diff options
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_urlparse.py | 21 | ||||
-rw-r--r-- | Lib/urllib/parse.py | 11 |
2 files changed, 27 insertions, 5 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 69d65c1..e559142 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -461,6 +461,27 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), ('http', 'example.com', '', '', 'blahblah=/foo', '')) + def test_withoutscheme(self): + # Test urlparse without scheme + # Issue 754016: urlparse goes wrong with IP:port without scheme + # RFC 1808 specifies that netloc should start with //, urlparse expects + # the same, otherwise it classifies the portion of url as path. + self.assertEqual(urllib.parse.urlparse("path"), + ('','','path','','','')) + self.assertEqual(urllib.parse.urlparse("//www.python.org:80"), + ('','www.python.org:80','','','','')) + self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), + ('http','www.python.org:80','','','','')) + + def test_portseparator(self): + # Issue 754016 makes changes for port separator ':' from scheme separator + self.assertEqual(urllib.parse.urlparse("path:80"), + ('','','path:80','','','')) + self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','','')) + self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) + self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), + ('http','www.python.org:80','','','','')) + def test_usingsys(self): # Issue 3314: sys module is used in the error self.assertRaises(TypeError, urllib.parse.urlencode, "foo") diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 133b9d9..00f0e5b 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -192,11 +192,12 @@ def urlsplit(url, scheme='', allow_fragments=True): v = SplitResult(scheme, netloc, url, query, fragment) _parse_cache[key] = v return v - for c in url[:i]: - if c not in scheme_chars: - break - else: - scheme, url = url[:i].lower(), url[i+1:] + if url.endswith(':') or not url[i+1].isdigit(): + for c in url[:i]: + if c not in scheme_chars: + break + else: + scheme, url = url[:i].lower(), url[i+1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) if (('[' in netloc and ']' not in netloc) or |