summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-11-25 19:59:20 (GMT)
committerGitHub <noreply@github.com>2024-11-25 19:59:20 (GMT)
commit5bb059fe606983814a445e4dcf9e96fd7cb4951a (patch)
tree676161387dee7f7b757b947199db07f8b29da54f
parenta2ee89968299fc4f0da4b5a4165025b941213ba5 (diff)
downloadcpython-5bb059fe606983814a445e4dcf9e96fd7cb4951a.zip
cpython-5bb059fe606983814a445e4dcf9e96fd7cb4951a.tar.gz
cpython-5bb059fe606983814a445e4dcf9e96fd7cb4951a.tar.bz2
GH-127236: `pathname2url()`: generate RFC 1738 URL for absolute POSIX path (#127194)
When handed an absolute Windows path such as `C:\foo` or `//server/share`, the `urllib.request.pathname2url()` function returns a URL with an authority section, such as `///C:/foo` or `//server/share` (or before GH-126205, `////server/share`). Only the `file:` prefix is omitted. But when handed an absolute POSIX path such as `/etc/hosts`, or a Windows path of the same form (rooted but lacking a drive), the function returns a URL without an authority section, such as `/etc/hosts`. This patch corrects the discrepancy by adding a `//` prefix before drive-less, rooted paths when generating URLs.
-rw-r--r--Doc/library/urllib.request.rst10
-rw-r--r--Lib/nturl2path.py20
-rw-r--r--Lib/test/test_urllib.py10
-rw-r--r--Lib/urllib/request.py8
-rw-r--r--Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst5
5 files changed, 33 insertions, 20 deletions
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 9055556..3c07dc4 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -159,12 +159,14 @@ The :mod:`urllib.request` module defines the following functions:
'file:///C:/Program%20Files'
.. versionchanged:: 3.14
- Windows drive letters are no longer converted to uppercase.
+ Paths beginning with a slash are converted to URLs with authority
+ sections. For example, the path ``/etc/hosts`` is converted to
+ the URL ``///etc/hosts``.
.. versionchanged:: 3.14
- On Windows, ``:`` characters not following a drive letter are quoted. In
- previous versions, :exc:`OSError` was raised if a colon character was
- found in any position other than the second character.
+ Windows drive letters are no longer converted to uppercase, and ``:``
+ characters not following a drive letter no longer cause an
+ :exc:`OSError` exception to be raised on Windows.
.. function:: url2pathname(url)
diff --git a/Lib/nturl2path.py b/Lib/nturl2path.py
index 01135d1..7e13ae3 100644
--- a/Lib/nturl2path.py
+++ b/Lib/nturl2path.py
@@ -55,13 +55,17 @@ def pathname2url(p):
p = p[4:]
if p[:4].upper() == 'UNC/':
p = '//' + p[4:]
- drive, tail = ntpath.splitdrive(p)
- if drive[1:] == ':':
- # DOS drive specified. Add three slashes to the start, producing
- # an authority section with a zero-length authority, and a path
- # section starting with a single slash.
- drive = f'///{drive}'
+ drive, root, tail = ntpath.splitroot(p)
+ if drive:
+ if drive[1:] == ':':
+ # DOS drive specified. Add three slashes to the start, producing
+ # an authority section with a zero-length authority, and a path
+ # section starting with a single slash.
+ drive = f'///{drive}'
+ drive = urllib.parse.quote(drive, safe='/:')
+ elif root:
+ # Add explicitly empty authority to path beginning with one slash.
+ root = f'//{root}'
- drive = urllib.parse.quote(drive, safe='/:')
tail = urllib.parse.quote(tail)
- return drive + tail
+ return drive + root + tail
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index fe16bad..00e4699 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1434,7 +1434,7 @@ class Pathname_Tests(unittest.TestCase):
self.assertEqual(fn('C:\\foo:bar'), '///C:/foo%3Abar')
self.assertEqual(fn('foo:bar'), 'foo%3Abar')
# No drive letter
- self.assertEqual(fn("\\folder\\test\\"), '/folder/test/')
+ self.assertEqual(fn("\\folder\\test\\"), '///folder/test/')
self.assertEqual(fn("\\\\folder\\test\\"), '//folder/test/')
self.assertEqual(fn("\\\\\\folder\\test\\"), '///folder/test/')
self.assertEqual(fn('\\\\some\\share\\'), '//some/share/')
@@ -1447,7 +1447,7 @@ class Pathname_Tests(unittest.TestCase):
self.assertEqual(fn('//?/unc/server/share/dir'), '//server/share/dir')
# Round-tripping
urls = ['///C:',
- '/folder/test/',
+ '///folder/test/',
'///C:/foo/bar/spam.foo']
for url in urls:
self.assertEqual(fn(urllib.request.url2pathname(url)), url)
@@ -1456,12 +1456,12 @@ class Pathname_Tests(unittest.TestCase):
'test specific to POSIX pathnames')
def test_pathname2url_posix(self):
fn = urllib.request.pathname2url
- self.assertEqual(fn('/'), '/')
- self.assertEqual(fn('/a/b.c'), '/a/b.c')
+ self.assertEqual(fn('/'), '///')
+ self.assertEqual(fn('/a/b.c'), '///a/b.c')
self.assertEqual(fn('//a/b.c'), '////a/b.c')
self.assertEqual(fn('///a/b.c'), '/////a/b.c')
self.assertEqual(fn('////a/b.c'), '//////a/b.c')
- self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
+ self.assertEqual(fn('/a/b%#c'), '///a/b%25%23c')
@unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
def test_pathname2url_nonascii(self):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 9e55543..1fcaa89 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1667,9 +1667,11 @@ else:
def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
- if pathname[:2] == '//':
- # Add explicitly empty authority to avoid interpreting the path
- # as authority.
+ if pathname[:1] == '/':
+ # Add explicitly empty authority to absolute path. If the path
+ # starts with exactly one slash then this change is mostly
+ # cosmetic, but if it begins with two or more slashes then this
+ # avoids interpreting the path as a URL authority.
pathname = '//' + pathname
encoding = sys.getfilesystemencoding()
errors = sys.getfilesystemencodeerrors()
diff --git a/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst
new file mode 100644
index 0000000..0b8ffdb
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-23-12-25-06.gh-issue-125866.wEOP66.rst
@@ -0,0 +1,5 @@
+:func:`urllib.request.pathname2url` now adds an empty authority when
+generating a URL for a path that begins with exactly one slash. For example,
+the path ``/etc/hosts`` is converted to the scheme-less URL ``///etc/hosts``.
+As a result of this change, URLs without authorities are only generated for
+relative paths.