From a66e3885fb7bafbe69d738f500a63bd6718b0076 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Mon, 22 Sep 2014 15:49:16 +0800 Subject: Issue #22278: Fix urljoin problem with relative urls, a regression observed after changes to issue22118 were submitted. Patch contributed by Demian Brecht and reviewed by Antoine Pitrou. --- Lib/test/test_urlparse.py | 12 ++++++++++++ Lib/urllib/parse.py | 6 +++++- Misc/NEWS | 3 +++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 24c1856..cb323d3 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -380,6 +380,18 @@ class UrlParseTestCase(unittest.TestCase): # self.checkJoin(SIMPLE_BASE, '../../../g','http://a/../g') # self.checkJoin(SIMPLE_BASE, '/./g','http://a/./g') + # test for issue22118 duplicate slashes + self.checkJoin(SIMPLE_BASE + '/', 'foo', SIMPLE_BASE + '/foo') + + # Non-RFC-defined tests, covering variations of base and trailing + # slashes + self.checkJoin('http://a/b/c/d/e/', '../../f/g/', 'http://a/b/c/f/g/') + self.checkJoin('http://a/b/c/d/e', '../../f/g/', 'http://a/b/f/g/') + self.checkJoin('http://a/b/c/d/e/', '/../../f/g/', 'http://a/f/g/') + self.checkJoin('http://a/b/c/d/e', '/../../f/g/', 'http://a/f/g/') + self.checkJoin('http://a/b/c/d/e/', '../../f/g', 'http://a/b/c/f/g') + self.checkJoin('http://a/b/', '../../f/g/', 'http://a/f/g/') + def test_RFC2732(self): str_cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index b6ac414..8bbeab6 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -443,6 +443,10 @@ def urljoin(base, url, allow_fragments=True): segments = path.split('/') else: segments = base_parts + path.split('/') + # filter out elements that would cause redundant slashes on re-joining + # the resolved_path + segments = segments[0:1] + [ + s for s in segments[1:-1] if len(s) > 0] + segments[-1:] resolved_path = [] @@ -465,7 +469,7 @@ def urljoin(base, url, allow_fragments=True): resolved_path.append('') return _coerce_result(urlunparse((scheme, netloc, '/'.join( - resolved_path), params, query, fragment))) + resolved_path) or '/', params, query, fragment))) def urldefrag(url): diff --git a/Misc/NEWS b/Misc/NEWS index 7240842..c26eaa6 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -137,6 +137,9 @@ Core and Builtins Library ------- +- Issue #22278: Fix urljoin problem with relative urls, a regression observed + after changes to issue22118 were submitted. + - Issue #22415: Fixed debugging output of the GROUPREF_EXISTS opcode in the re module. Removed trailing spaces in debugging output. -- cgit v0.12