diff options
-rw-r--r-- | Lib/test/test_urlparse.py | 72 | ||||
-rw-r--r-- | Lib/urllib/parse.py | 4 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst | 4 |
3 files changed, 78 insertions, 2 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index d49e438..297fb48 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -623,6 +623,78 @@ class UrlParseTestCase(unittest.TestCase): self.checkJoin(RFC1808_BASE, 'https:;', 'https:;') self.checkJoin(RFC1808_BASE, 'https:;x', 'https:;x') + def test_urljoins_relative_base(self): + # According to RFC 3986, Section 5.1, a base URI must conform to + # the absolute-URI syntax rule (Section 4.3). But urljoin() lacks + # a context to establish missed components of the relative base URI. + # It still has to return a sensible result for backwards compatibility. + # The following tests are figments of the imagination and artifacts + # of the current implementation that are not based on any standard. + self.checkJoin('', '', '') + self.checkJoin('', '//', '//', relroundtrip=False) + self.checkJoin('', '//v', '//v') + self.checkJoin('', '//v/w', '//v/w') + self.checkJoin('', '/w', '/w') + self.checkJoin('', '///w', '///w', relroundtrip=False) + self.checkJoin('', 'w', 'w') + + self.checkJoin('//', '', '//') + self.checkJoin('//', '//', '//') + self.checkJoin('//', '//v', '//v') + self.checkJoin('//', '//v/w', '//v/w') + self.checkJoin('//', '/w', '///w') + self.checkJoin('//', '///w', '///w') + self.checkJoin('//', 'w', '///w') + + self.checkJoin('//a', '', '//a') + self.checkJoin('//a', '//', '//a') + self.checkJoin('//a', '//v', '//v') + self.checkJoin('//a', '//v/w', '//v/w') + self.checkJoin('//a', '/w', '//a/w') + self.checkJoin('//a', '///w', '//a/w') + self.checkJoin('//a', 'w', '//a/w') + + for scheme in '', 'http:': + self.checkJoin('http:', scheme + '', 'http:') + self.checkJoin('http:', scheme + '//', 'http:') + self.checkJoin('http:', scheme + '//v', 'http://v') + self.checkJoin('http:', scheme + '//v/w', 'http://v/w') + self.checkJoin('http:', scheme + '/w', 'http:/w') + self.checkJoin('http:', scheme + '///w', 'http:/w') + self.checkJoin('http:', scheme + 'w', 'http:/w') + + self.checkJoin('http://', scheme + '', 'http://') + self.checkJoin('http://', scheme + '//', 'http://') + self.checkJoin('http://', scheme + '//v', 'http://v') + self.checkJoin('http://', scheme + '//v/w', 'http://v/w') + self.checkJoin('http://', scheme + '/w', 'http:///w') + self.checkJoin('http://', scheme + '///w', 'http:///w') + self.checkJoin('http://', scheme + 'w', 'http:///w') + + self.checkJoin('http://a', scheme + '', 'http://a') + self.checkJoin('http://a', scheme + '//', 'http://a') + self.checkJoin('http://a', scheme + '//v', 'http://v') + self.checkJoin('http://a', scheme + '//v/w', 'http://v/w') + self.checkJoin('http://a', scheme + '/w', 'http://a/w') + self.checkJoin('http://a', scheme + '///w', 'http://a/w') + self.checkJoin('http://a', scheme + 'w', 'http://a/w') + + self.checkJoin('/b/c', '', '/b/c') + self.checkJoin('/b/c', '//', '/b/c') + self.checkJoin('/b/c', '//v', '//v') + self.checkJoin('/b/c', '//v/w', '//v/w') + self.checkJoin('/b/c', '/w', '/w') + self.checkJoin('/b/c', '///w', '/w') + self.checkJoin('/b/c', 'w', '/b/w') + + self.checkJoin('///b/c', '', '///b/c') + self.checkJoin('///b/c', '//', '///b/c') + self.checkJoin('///b/c', '//v', '//v') + self.checkJoin('///b/c', '//v/w', '//v/w') + self.checkJoin('///b/c', '/w', '///w') + self.checkJoin('///b/c', '///w', '///w') + self.checkJoin('///b/c', 'w', '///b/w') + def test_RFC2732(self): str_cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 5b00ab2..a721d77 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -577,9 +577,9 @@ def urljoin(base, url, allow_fragments=True): if scheme is None: scheme = bscheme - if scheme != bscheme or scheme not in uses_relative: + if scheme != bscheme or (scheme and scheme not in uses_relative): return _coerce_result(url) - if scheme in uses_netloc: + if not scheme or scheme in uses_netloc: if netloc: return _coerce_result(_urlunsplit(scheme, netloc, path, query, fragment)) diff --git a/Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst b/Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst new file mode 100644 index 0000000..7f98bcd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst @@ -0,0 +1,4 @@ +Fix :func:`urllib.parse.urljoin` for base URI with undefined authority. +Although :rfc:`3986` only specify reference resolution for absolute base +URI, :func:`!urljoin` should continue to return sensible result for relative +base URI. |