summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-11-19 21:19:30 (GMT)
committerGitHub <noreply@github.com>2024-11-19 21:19:30 (GMT)
commitc9b399fbdb01584dcfff0d7f6ad484644ff269c3 (patch)
tree4e7fe4fa8d106d35597ec8c2f969e7e58ba9517c
parent2cdfb41d0c3bfea37983fc872951bc3b2a4d90b8 (diff)
downloadcpython-c9b399fbdb01584dcfff0d7f6ad484644ff269c3.zip
cpython-c9b399fbdb01584dcfff0d7f6ad484644ff269c3.tar.gz
cpython-c9b399fbdb01584dcfff0d7f6ad484644ff269c3.tar.bz2
GH-85168: Use filesystem encoding when converting to/from `file` URIs (#126852)
Adjust `urllib.request.url2pathname()` and `pathname2url()` to use the filesystem encoding when quoting and unquoting file URIs, rather than forcing use of UTF-8. No changes are needed in the `nturl2path` module because Windows always uses UTF-8, per PEP 529.
-rw-r--r--Lib/test/test_urllib.py20
-rw-r--r--Lib/test/test_urllib2.py4
-rw-r--r--Lib/urllib/request.py8
-rw-r--r--Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst4
4 files changed, 26 insertions, 10 deletions
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 71084a4..c66b1c4 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -609,10 +609,6 @@ class urlretrieve_FileTests(unittest.TestCase):
def constructLocalFileUrl(self, filePath):
filePath = os.path.abspath(filePath)
- try:
- filePath.encode("utf-8")
- except UnicodeEncodeError:
- raise unittest.SkipTest("filePath is not encodable to utf8")
return "file://%s" % urllib.request.pathname2url(filePath)
def createNewTempFile(self, data=b""):
@@ -1462,6 +1458,13 @@ class Pathname_Tests(unittest.TestCase):
self.assertEqual(fn('/a/b.c'), '/a/b.c')
self.assertEqual(fn('/a/b%#c'), '/a/b%25%23c')
+ @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+ def test_pathname2url_nonascii(self):
+ encoding = sys.getfilesystemencoding()
+ errors = sys.getfilesystemencodeerrors()
+ url = urllib.parse.quote(os_helper.FS_NONASCII, encoding=encoding, errors=errors)
+ self.assertEqual(urllib.request.pathname2url(os_helper.FS_NONASCII), url)
+
@unittest.skipUnless(sys.platform == 'win32',
'test specific to Windows pathnames.')
def test_url2pathname_win(self):
@@ -1512,6 +1515,15 @@ class Pathname_Tests(unittest.TestCase):
self.assertEqual(fn('////foo/bar'), '//foo/bar')
self.assertEqual(fn('//localhost/foo/bar'), '//localhost/foo/bar')
+ @unittest.skipUnless(os_helper.FS_NONASCII, 'need os_helper.FS_NONASCII')
+ def test_url2pathname_nonascii(self):
+ encoding = sys.getfilesystemencoding()
+ errors = sys.getfilesystemencodeerrors()
+ url = os_helper.FS_NONASCII
+ self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+ url = urllib.parse.quote(url, encoding=encoding, errors=errors)
+ self.assertEqual(urllib.request.url2pathname(url), os_helper.FS_NONASCII)
+
class Utility_Tests(unittest.TestCase):
"""Testcase to test the various utility functions in the urllib."""
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index b90ccc2..99ad11c 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -718,10 +718,6 @@ class OpenerDirectorTests(unittest.TestCase):
def sanepathname2url(path):
- try:
- path.encode("utf-8")
- except UnicodeEncodeError:
- raise unittest.SkipTest("path is not encodable to utf8")
urlpath = urllib.request.pathname2url(path)
if os.name == "nt" and urlpath.startswith("///"):
urlpath = urlpath[2:]
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 5c061a2..bcfdcc5 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -1657,12 +1657,16 @@ else:
# URL has an empty authority section, so the path begins on the
# third character.
pathname = pathname[2:]
- return unquote(pathname)
+ encoding = sys.getfilesystemencoding()
+ errors = sys.getfilesystemencodeerrors()
+ return unquote(pathname, encoding=encoding, errors=errors)
def pathname2url(pathname):
"""OS-specific conversion from a file system path to a relative URL
of the 'file' scheme; not recommended for general use."""
- return quote(pathname)
+ encoding = sys.getfilesystemencoding()
+ errors = sys.getfilesystemencodeerrors()
+ return quote(pathname, encoding=encoding, errors=errors)
# Utility functions
diff --git a/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
new file mode 100644
index 0000000..abceda8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-15-01-50-36.gh-issue-85168.bP8VIN.rst
@@ -0,0 +1,4 @@
+Fix issue where :func:`urllib.request.url2pathname` and
+:func:`~urllib.request.pathname2url` always used UTF-8 when quoting and
+unquoting file URIs. They now use the :term:`filesystem encoding and error
+handler`.