summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2024-05-14 09:47:05 (GMT)
committerGitHub <noreply@github.com>2024-05-14 09:47:05 (GMT)
commit872000606271c52d989e53fe4cc9904343d81855 (patch)
tree6997a88ef80f2162ebfbddada1e17c484e94cfe9
parent29a2f9cc286f6cef8a359fc7022fe9d480a2eb79 (diff)
downloadcpython-872000606271c52d989e53fe4cc9904343d81855.zip
cpython-872000606271c52d989e53fe4cc9904343d81855.tar.gz
cpython-872000606271c52d989e53fe4cc9904343d81855.tar.bz2
[3.13] gh-67693: Fix urlunparse() and urlunsplit() for URIs with path starting with multiple slashes and no authority (GH-113563) (GH-119023)
(cherry picked from commit e237b25a4fa5626fcd1b1848aa03f725f892e40e) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
-rw-r--r--Lib/test/test_urlparse.py70
-rw-r--r--Lib/urllib/parse.py2
-rw-r--r--Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst2
3 files changed, 70 insertions, 4 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 236b6e4..2cf03d0 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -103,7 +103,9 @@ parse_qs_test_cases = [
class UrlParseTestCase(unittest.TestCase):
- def checkRoundtrips(self, url, parsed, split):
+ def checkRoundtrips(self, url, parsed, split, url2=None):
+ if url2 is None:
+ url2 = url
result = urllib.parse.urlparse(url)
self.assertSequenceEqual(result, parsed)
t = (result.scheme, result.netloc, result.path,
@@ -111,7 +113,7 @@ class UrlParseTestCase(unittest.TestCase):
self.assertSequenceEqual(t, parsed)
# put it back together and it should be the same
result2 = urllib.parse.urlunparse(result)
- self.assertSequenceEqual(result2, url)
+ self.assertSequenceEqual(result2, url2)
self.assertSequenceEqual(result2, result.geturl())
# the result of geturl() is a fixpoint; we can always parse it
@@ -137,7 +139,7 @@ class UrlParseTestCase(unittest.TestCase):
result.query, result.fragment)
self.assertSequenceEqual(t, split)
result2 = urllib.parse.urlunsplit(result)
- self.assertSequenceEqual(result2, url)
+ self.assertSequenceEqual(result2, url2)
self.assertSequenceEqual(result2, result.geturl())
# check the fixpoint property of re-parsing the result of geturl()
@@ -175,9 +177,39 @@ class UrlParseTestCase(unittest.TestCase):
def test_roundtrips(self):
str_cases = [
+ ('path/to/file',
+ ('', '', 'path/to/file', '', '', ''),
+ ('', '', 'path/to/file', '', '')),
+ ('/path/to/file',
+ ('', '', '/path/to/file', '', '', ''),
+ ('', '', '/path/to/file', '', '')),
+ ('//path/to/file',
+ ('', 'path', '/to/file', '', '', ''),
+ ('', 'path', '/to/file', '', '')),
+ ('////path/to/file',
+ ('', '', '//path/to/file', '', '', ''),
+ ('', '', '//path/to/file', '', '')),
+ ('scheme:path/to/file',
+ ('scheme', '', 'path/to/file', '', '', ''),
+ ('scheme', '', 'path/to/file', '', '')),
+ ('scheme:/path/to/file',
+ ('scheme', '', '/path/to/file', '', '', ''),
+ ('scheme', '', '/path/to/file', '', '')),
+ ('scheme://path/to/file',
+ ('scheme', 'path', '/to/file', '', '', ''),
+ ('scheme', 'path', '/to/file', '', '')),
+ ('scheme:////path/to/file',
+ ('scheme', '', '//path/to/file', '', '', ''),
+ ('scheme', '', '//path/to/file', '', '')),
('file:///tmp/junk.txt',
('file', '', '/tmp/junk.txt', '', '', ''),
('file', '', '/tmp/junk.txt', '', '')),
+ ('file:////tmp/junk.txt',
+ ('file', '', '//tmp/junk.txt', '', '', ''),
+ ('file', '', '//tmp/junk.txt', '', '')),
+ ('file://///tmp/junk.txt',
+ ('file', '', '///tmp/junk.txt', '', '', ''),
+ ('file', '', '///tmp/junk.txt', '', '')),
('imap://mail.python.org/mbox1',
('imap', 'mail.python.org', '/mbox1', '', '', ''),
('imap', 'mail.python.org', '/mbox1', '', '')),
@@ -213,6 +245,38 @@ class UrlParseTestCase(unittest.TestCase):
for url, parsed, split in str_cases + bytes_cases:
self.checkRoundtrips(url, parsed, split)
+ def test_roundtrips_normalization(self):
+ str_cases = [
+ ('///path/to/file',
+ '/path/to/file',
+ ('', '', '/path/to/file', '', '', ''),
+ ('', '', '/path/to/file', '', '')),
+ ('scheme:///path/to/file',
+ 'scheme:/path/to/file',
+ ('scheme', '', '/path/to/file', '', '', ''),
+ ('scheme', '', '/path/to/file', '', '')),
+ ('file:/tmp/junk.txt',
+ 'file:///tmp/junk.txt',
+ ('file', '', '/tmp/junk.txt', '', '', ''),
+ ('file', '', '/tmp/junk.txt', '', '')),
+ ('http:/tmp/junk.txt',
+ 'http:///tmp/junk.txt',
+ ('http', '', '/tmp/junk.txt', '', '', ''),
+ ('http', '', '/tmp/junk.txt', '', '')),
+ ('https:/tmp/junk.txt',
+ 'https:///tmp/junk.txt',
+ ('https', '', '/tmp/junk.txt', '', '', ''),
+ ('https', '', '/tmp/junk.txt', '', '')),
+ ]
+ def _encode(t):
+ return (t[0].encode('ascii'),
+ t[1].encode('ascii'),
+ tuple(x.encode('ascii') for x in t[2]),
+ tuple(x.encode('ascii') for x in t[3]))
+ bytes_cases = [_encode(x) for x in str_cases]
+ for url, url2, parsed, split in str_cases + bytes_cases:
+ self.checkRoundtrips(url, parsed, split, url2)
+
def test_http_roundtrips(self):
# urllib.parse.urlsplit treats 'http:' as an optimized special case,
# so we test both 'http:' and 'https:' in all the following.
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index fc9e7c9..3932bb9 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -525,7 +525,7 @@ def urlunsplit(components):
empty query; the RFC states that these are equivalent)."""
scheme, netloc, url, query, fragment, _coerce_result = (
_coerce_args(*components))
- if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
+ if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
if url and url[:1] != '/': url = '/' + url
url = '//' + (netloc or '') + url
if scheme:
diff --git a/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst
new file mode 100644
index 0000000..22457df
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-08-27-01-16-50.gh-issue-67693.4NIAiy.rst
@@ -0,0 +1,2 @@
+Fix :func:`urllib.parse.urlunparse` and :func:`urllib.parse.urlunsplit` for URIs with path starting with multiple slashes and no authority.
+Based on patch by Ashwin Ramaswami.