diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_urlparse.py | 20 | ||||
-rw-r--r-- | Lib/urlparse.py | 11 |
2 files changed, 26 insertions, 5 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 80e4d91..8244017 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -478,6 +478,26 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(urlparse.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme','foo.com','/stuff','','','')) + def test_withoutscheme(self): + # Test urlparse without scheme + # Issue 754016: urlparse goes wrong with IP:port without scheme + # RFC 1808 specifies that netloc should start with //, urlparse expects + # the same, otherwise it classifies the portion of url as path. + self.assertEqual(urlparse.urlparse("path"), + ('','','path','','','')) + self.assertEqual(urlparse.urlparse("//www.python.org:80"), + ('','www.python.org:80','','','','')) + self.assertEqual(urlparse.urlparse("http://www.python.org:80"), + ('http','www.python.org:80','','','','')) + + def test_portseparator(self): + # Issue 754016 makes changes for port separator ':' from scheme separator + self.assertEqual(urlparse.urlparse("path:80"), + ('','','path:80','','','')) + self.assertEqual(urlparse.urlparse("http:"),('http','','','','','')) + self.assertEqual(urlparse.urlparse("https:"),('https','','','','','')) + self.assertEqual(urlparse.urlparse("http://www.python.org:80"), + ('http','www.python.org:80','','','','')) def test_main(): diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 6261b24..99ebe6a 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -187,11 +187,12 @@ def urlsplit(url, scheme='', allow_fragments=True): v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v - for c in url[:i]: - if c not in scheme_chars: - break - else: - scheme, url = url[:i].lower(), url[i+1:] + if url.endswith(':') or not url[i+1].isdigit(): + for c in url[:i]: + if c not in scheme_chars: + break + else: + scheme, url = url[:i].lower(), url[i+1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) |