diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-11-22 03:17:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-22 03:17:06 (GMT) |
commit | ebf564a1d3e2e81b9846535114e481d6096443d2 (patch) | |
tree | 62a0618bdaca77bde5822e9c0902851872174cb9 | |
parent | fcfdb55465636afc256bc29781b283404d88e6ca (diff) | |
download | cpython-ebf564a1d3e2e81b9846535114e481d6096443d2.zip cpython-ebf564a1d3e2e81b9846535114e481d6096443d2.tar.gz cpython-ebf564a1d3e2e81b9846535114e481d6096443d2.tar.bz2 |
GH-126766: `url2pathname()`: handle 'localhost' authority (#127129)
Discard any 'localhost' authority from the beginning of a `file:` URI. As a
result, file URIs like `//localhost/etc/hosts` are correctly decoded as
`/etc/hosts`.
-rw-r--r-- | Lib/nturl2path.py | 11 | ||||
-rw-r--r-- | Lib/test/test_urllib.py | 4 | ||||
-rw-r--r-- | Lib/urllib/request.py | 3 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst | 2 |
4 files changed, 15 insertions, 5 deletions
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py index ed7880f..3308ee7 100644 --- a/Lib/nturl2path.py +++ b/Lib/nturl2path.py @@ -15,14 +15,17 @@ def url2pathname(url): # become # C:\foo\bar\spam.foo import string, urllib.parse + if url[:3] == '///': + # URL has an empty authority section, so the path begins on the third + # character. + url = url[2:] + elif url[:12] == '//localhost/': + # Skip past 'localhost' authority. + url = url[11:] # Windows itself uses ":" even in URLs. url = url.replace(':', '|') if not '|' in url: # No drive specifier, just convert slashes - if url[:3] == '///': - # URL has an empty authority section, so the path begins on the - # third character. - url = url[2:] # make sure not to convert quoted slashes :-) return urllib.parse.unquote(url.replace('/', '\\')) comp = url.split('|') diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 3e5dc25..e1c1d31 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1496,6 +1496,8 @@ class Pathname_Tests(unittest.TestCase): # Localhost paths self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') + self.assertEqual(fn('//localhost/path/to/file'), '\\path\\to\\file') + self.assertEqual(fn('//localhost//server/path/to/file'), '\\\\server\\path\\to\\file') # Percent-encoded forward slashes are preserved for backwards compatibility self.assertEqual(fn('C:/foo%2fbar'), 'C:\\foo/bar') self.assertEqual(fn('//server/share/foo%2fbar'), '\\\\server\\share\\foo/bar') @@ -1514,7 +1516,7 @@ class Pathname_Tests(unittest.TestCase): self.assertEqual(fn('//foo/bar'), '//foo/bar') self.assertEqual(fn('///foo/bar'), '/foo/bar') self.assertEqual(fn('////foo/bar'), '//foo/bar') - self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar') + self.assertEqual(fn('//localhost/foo/bar'), '/foo/bar') @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII') def test_url2pathname_nonascii(self): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index bcfdcc5..80be65c 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1657,6 +1657,9 @@ else: # URL has an empty authority section, so the path begins on the # third character. pathname = pathname[2:] + elif pathname[:12] == '//localhost/': + # Skip past 'localhost' authority. + pathname = pathname[11:] encoding = sys.getfilesystemencoding() errors = sys.getfilesystemencodeerrors() return unquote(pathname, encoding=encoding, errors=errors) diff --git a/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst new file mode 100644 index 0000000..998c99b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-22-02-31-55.gh-issue-126766.jfkhBH.rst @@ -0,0 +1,2 @@ +Fix issue where :func:`urllib.request.url2pathname` failed to discard any +'localhost' authority present in the URL. |