summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_urlparse.py21
-rw-r--r--Lib/urllib/parse.py11
2 files changed, 27 insertions, 5 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 69d65c1..e559142 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -461,6 +461,27 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"),
('http', 'example.com', '', '', 'blahblah=/foo', ''))
+ def test_withoutscheme(self):
+ # Test urlparse without scheme
+ # Issue 754016: urlparse goes wrong with IP:port without scheme
+ # RFC 1808 specifies that netloc should start with //, urlparse expects
+ # the same, otherwise it classifies the portion of url as path.
+ self.assertEqual(urllib.parse.urlparse("path"),
+ ('','','path','','',''))
+ self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
+ ('','www.python.org:80','','','',''))
+ self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
+ ('http','www.python.org:80','','','',''))
+
+ def test_portseparator(self):
+ # Issue 754016 makes changes for port separator ':' from scheme separator
+ self.assertEqual(urllib.parse.urlparse("path:80"),
+ ('','','path:80','','',''))
+ self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
+ self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
+ self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
+ ('http','www.python.org:80','','','',''))
+
def test_usingsys(self):
# Issue 3314: sys module is used in the error
self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 133b9d9..00f0e5b 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -192,11 +192,12 @@ def urlsplit(url, scheme='', allow_fragments=True):
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return v
- for c in url[:i]:
- if c not in scheme_chars:
- break
- else:
- scheme, url = url[:i].lower(), url[i+1:]
+ if url.endswith(':') or not url[i+1].isdigit():
+ for c in url[:i]:
+ if c not in scheme_chars:
+ break
+ else:
+ scheme, url = url[:i].lower(), url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or