summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-11-22 00:29:05 (GMT)
committerGitHub <noreply@github.com>2024-11-22 00:29:05 (GMT)
commitfd133d4f21cd7f5cbf6bcf332290ce52e5501167 (patch)
treef7d9aaa889d138dbb1a303f9c411f90acdda0c86
parente8bb05394164e7735f7a9de80a046953606a38eb (diff)
downloadcpython-fd133d4f21cd7f5cbf6bcf332290ce52e5501167.zip
cpython-fd133d4f21cd7f5cbf6bcf332290ce52e5501167.tar.gz
cpython-fd133d4f21cd7f5cbf6bcf332290ce52e5501167.tar.bz2
GH-126601: `pathname2url()`: handle NTFS alternate data streams (#126760)
Adjust `pathname2url()` to encode embedded colon characters in Windows paths, rather than bailing out with an `OSError`. Co-authored-by: Steve Dower <steve.dower@microsoft.com>
-rw-r--r--Doc/library/urllib.request.rst5
-rw-r--r--Lib/nturl2path.py22
-rw-r--r--Lib/test/test_urllib.py5
-rw-r--r--Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst3
4 files changed, 21 insertions, 14 deletions
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index cdd58b8..e0831bf 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -152,6 +152,11 @@ The :mod:`urllib.request` module defines the following functions:
the path component of a URL. This does not produce a complete URL. The return
value will already be quoted using the :func:`~urllib.parse.quote` function.
+ .. versionchanged:: 3.14
+ On Windows, ``:`` characters not following a drive letter are quoted. In
+ previous versions, :exc:`OSError` was raised if a colon character was
+ found in any position other than the second character.
+
.. function:: url2pathname(path)
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 255eb2f..ed7880f 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -40,6 +40,7 @@ def pathname2url(p):
# C:\foo\bar\spam.foo
# becomes
# ///C:/foo/bar/spam.foo
+ import ntpath
import urllib.parse
# First, clean up some special forms. We are going to sacrifice
# the additional information anyway
@@ -48,16 +49,13 @@ def pathname2url(p):
p = p[4:]
if p[:4].upper() == 'UNC/':
p = '//' + p[4:]
- elif p[1:2] != ':':
- raise OSError('Bad path: ' + p)
- if not ':' in p:
- # No DOS drive specified, just quote the pathname
- return urllib.parse.quote(p)
- comp = p.split(':', maxsplit=2)
- if len(comp) != 2 or len(comp[0]) > 1:
- error = 'Bad path: ' + p
- raise OSError(error)
+ drive, tail = ntpath.splitdrive(p)
+ if drive[1:] == ':':
+ # DOS drive specified. Add three slashes to the start, producing
+ # an authority section with a zero-length authority, and a path
+ # section starting with a single slash.
+ drive = f'///{drive.upper()}'
- drive = urllib.parse.quote(comp[0].upper())
- tail = urllib.parse.quote(comp[1])
- return '///' + drive + ':' + tail
+ drive = urllib.parse.quote(drive, safe='/:')
+ tail = urllib.parse.quote(tail)
+ return drive + tail
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index c66b1c4..3e5dc25 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1429,8 +1429,9 @@ class Pathname_Tests(unittest.TestCase):
self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c')
self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9')
self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo")
- # Long drive letter
- self.assertRaises(IOError, fn, "XX:\\")
+ # NTFS alternate data streams
+ self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
+ self.assertEqual(fn('foo:bar'), 'foo%3Abar')
# No drive letter
self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
diff --git a/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
new file mode 100644
index 0000000..11e2b73
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst
@@ -0,0 +1,3 @@
+Fix issue where :func:`urllib.request.pathname2url` raised :exc:`OSError`
+when given a Windows path containing a colon character not following a
+drive letter, such as before an NTFS alternate data stream.