diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-11-25 19:59:20 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-25 19:59:20 (GMT) |
commit | 5bb059fe606983814a445e4dcf9e96fd7cb4951a (patch) | |
tree | 676161387dee7f7b757b947199db07f8b29da54f | |
parent | a2ee89968299fc4f0da4b5a4165025b941213ba5 (diff) | |
download | cpython-5bb059fe606983814a445e4dcf9e96fd7cb4951a.zip cpython-5bb059fe606983814a445e4dcf9e96fd7cb4951a.tar.gz cpython-5bb059fe606983814a445e4dcf9e96fd7cb4951a.tar.bz2 |
GH-127236: `pathname2url()`: generate RFC 1738 URL for absolute POSIX path (#127194)
When handed an absolute Windows path such as `C:\foo` or `//server/share`,
the `urllib.request.pathname2url()` function returns a URL with an
authority section, such as `///C:/foo` or `//server/share` (or before
GH-126205, `////server/share`). Only the `file:` prefix is omitted.
But when handed an absolute POSIX path such as `/etc/hosts`, or a Windows
path of the same form (rooted but lacking a drive), the function returns a
URL without an authority section, such as `/etc/hosts`.
This patch corrects the discrepancy by adding a `//` prefix before
drive-less, rooted paths when generating URLs.
-rw-r--r-- | Doc/library/urllib.request.rst | 10 | ||||
-rw-r--r-- | Lib/nturl2path.py | 20 | ||||
-rw-r--r-- | Lib/test/test_urllib.py | 10 | ||||
-rw-r--r-- | Lib/urllib/request.py | 8 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst | 5 |
5 files changed, 33 insertions, 20 deletions
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 9055556..3c07dc4 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -159,12 +159,14 @@ The :mod:`urllib.request` module defines the following functions: 'file:///C:/Program%20Files' .. versionchanged:: 3.14 - Windows drive letters are no longer converted to uppercase. + Paths beginning with a slash are converted to URLs with authority + sections. For example, the path ``/etc/hosts`` is converted to + the URL ``///etc/hosts``. .. versionchanged:: 3.14 - On Windows, ``:`` characters not following a drive letter are quoted. In - previous versions, :exc:`OSError` was raised if a colon character was - found in any position other than the second character. + Windows drive letters are no longer converted to uppercase, and ``:`` + characters not following a drive letter no longer cause an + :exc:`OSError` exception to be raised on Windows. .. function:: url2pathname(url) diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index 01135d1..7e13ae3 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -55,13 +55,17 @@ def pathname2url(p): p = p[4:] if p[:4].upper() == 'UNC/': p = '//' + p[4:] - drive, tail = ntpath.splitdrive(p) - if drive[1:] == ':': - # DOS drive specified. Add three slashes to the start, producing - # an authority section with a zero-length authority, and a path - # section starting with a single slash. - drive = f'///{drive}' + drive, root, tail = ntpath.splitroot(p) + if drive: + if drive[1:] == ':': + # DOS drive specified. Add three slashes to the start, producing + # an authority section with a zero-length authority, and a path + # section starting with a single slash. + drive = f'///{drive}' + drive = urllib.parse.quote(drive, safe='/:') + elif root: + # Add explicitly empty authority to path beginning with one slash. + root = f'//{root}' - drive = urllib.parse.quote(drive, safe='/:') tail = urllib.parse.quote(tail) - return drive + tail + return drive + root + tail diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index fe16bad..00e4699 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1434,7 +1434,7 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar') self.assertEqual(fn('foo:bar'), 'foo%3Abar') # No drive letter - self.assertEqual(fn("\\folder\\test\\"), '/folder/test/') + self.assertEqual(fn("\\folder\\test\\"), '///folder/test/') self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/') self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/') self.assertEqual(fn('\\\\some\\share\\'), '//some/share/') @@ -1447,7 +1447,7 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir') # Round-tripping urls = ['///C:', - '/folder/test/', + '///folder/test/', '///C:/foo/bar/spam.foo'] for url in urls: self.assertEqual(fn(urllib.request.url2pathname(url)), url) @@ -1456,12 +1456,12 @@ class Pathname_Tests(unittest.TestCase): 'test specific to POSIX pathnames') def test_pathname2url_posix(self): fn = urllib.request.pathname2url - self.assertEqual(fn('/'), '/') - self.assertEqual(fn('/a/b.c'), '/a/b.c') + self.assertEqual(fn('/'), '///') + self.assertEqual(fn('/a/b.c'), '///a/b.c') self.assertEqual(fn('//a/b.c'), '////a/b.c') self.assertEqual(fn('///a/b.c'), '/////a/b.c') self.assertEqual(fn('////a/b.c'), '//////a/b.c') - self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c') + self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_pathname2url_nonascii(self): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 9e55543..1fcaa89 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1667,9 +1667,11 @@ else: def pathname2url(pathname): """OS-specific conversion from a file system path to a relative URL of the 'file' scheme; not recommended for general use.""" - if pathname[:2] == '//': - # Add explicitly empty authority to avoid interpreting the path - # as authority. + if pathname[:1] == '/': + # Add explicitly empty authority to absolute path. If the path + # starts with exactly one slash then this change is mostly + # cosmetic, but if it begins with two or more slashes then this + # avoids interpreting the path as a URL authority. pathname = '//' + pathname encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() diff --git a/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst new file mode 100644 index 0000000..0b8ffdb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst @@ -0,0 +1,5 @@ +:func:`urllib.request.pathname2url` now adds an empty authority when +generating a URL for a path that begins with exactly one slash. For example, +the path ``/etc/hosts`` is converted to the scheme-less URL ``///etc/hosts``. +As a result of this change, URLs without authorities are only generated for +relative paths. |