diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-11-22 00:29:05 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-22 00:29:05 (GMT) |
commit | fd133d4f21cd7f5cbf6bcf332290ce52e5501167 (patch) | |
tree | f7d9aaa889d138dbb1a303f9c411f90acdda0c86 | |
parent | e8bb05394164e7735f7a9de80a046953606a38eb (diff) | |
download | cpython-fd133d4f21cd7f5cbf6bcf332290ce52e5501167.zip cpython-fd133d4f21cd7f5cbf6bcf332290ce52e5501167.tar.gz cpython-fd133d4f21cd7f5cbf6bcf332290ce52e5501167.tar.bz2 |
GH-126601: `pathname2url()`: handle NTFS alternate data streams (#126760)
Adjust `pathname2url()` to encode embedded colon characters in Windows
paths, rather than bailing out with an `OSError`.
Co-authored-by: Steve Dower <steve.dower@microsoft.com>
-rw-r--r-- | Doc/library/urllib.request.rst | 5 | ||||
-rw-r--r-- | Lib/nturl2path.py | 22 | ||||
-rw-r--r-- | Lib/test/test_urllib.py | 5 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst | 3 |
4 files changed, 21 insertions, 14 deletions
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index cdd58b8..e0831bf 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -152,6 +152,11 @@ The :mod:`urllib.request` module defines the following functions: the path component of a URL. This does not produce a complete URL. The return value will already be quoted using the :func:`~urllib.parse.quote` function. + .. versionchanged:: 3.14 + On Windows, ``:`` characters not following a drive letter are quoted. In + previous versions, :exc:`OSError` was raised if a colon character was + found in any position other than the second character. + .. function:: url2pathname(path) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 255eb2f..ed7880f 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -40,6 +40,7 @@ def pathname2url(p): # C:\foo\bar\spam.foo # becomes # ///C:/foo/bar/spam.foo + import ntpath import urllib.parse # First, clean up some special forms. We are going to sacrifice # the additional information anyway @@ -48,16 +49,13 @@ def pathname2url(p): p = p[4:] if p[:4].upper() == 'UNC/': p = '//' + p[4:] - elif p[1:2] != ':': - raise OSError('Bad path: ' + p) - if not ':' in p: - # No DOS drive specified, just quote the pathname - return urllib.parse.quote(p) - comp = p.split(':', maxsplit=2) - if len(comp) != 2 or len(comp[0]) > 1: - error = 'Bad path: ' + p - raise OSError(error) + drive, tail = ntpath.splitdrive(p) + if drive[1:] == ':': + # DOS drive specified. Add three slashes to the start, producing + # an authority section with a zero-length authority, and a path + # section starting with a single slash. + drive = f'///{drive.upper()}' - drive = urllib.parse.quote(comp[0].upper()) - tail = urllib.parse.quote(comp[1]) - return '///' + drive + ':' + tail + drive = urllib.parse.quote(drive, safe='/:') + tail = urllib.parse.quote(tail) + return drive + tail diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index c66b1c4..3e5dc25 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1429,8 +1429,9 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c') self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9') self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo") - # Long drive letter - self.assertRaises(IOError, fn, "XX:\\") + # NTFS alternate data streams + self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar') + self.assertEqual(fn('foo:bar'), 'foo%3Abar') # No drive letter self.assertEqual(fn("\\folder\\test\\"), '/folder/test/') self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/') diff --git a/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst new file mode 100644 index 0000000..11e2b73 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-12-20-05-09.gh-issue-126601.Nj7bA9.rst @@ -0,0 +1,3 @@ +Fix issue where :func:`urllib.request.pathname2url` raised :exc:`OSError` +when given a Windows path containing a colon character not following a +drive letter, such as before an NTFS alternate data stream. |