diff options
author | Nick Coghlan <ncoghlan@gmail.com> | 2010-11-30 15:48:08 (GMT) |
---|---|---|
committer | Nick Coghlan <ncoghlan@gmail.com> | 2010-11-30 15:48:08 (GMT) |
commit | 9fc443cf590c76d4b979c46dc954d3956cee0319 (patch) | |
tree | 16384a522e3cbc37539a86745f47ee6f60f18239 /Lib/test/test_urlparse.py | |
parent | 43f0c27be78ce11d871c8a80ec1251cc82010d19 (diff) | |
download | cpython-9fc443cf590c76d4b979c46dc954d3956cee0319.zip cpython-9fc443cf590c76d4b979c46dc954d3956cee0319.tar.gz cpython-9fc443cf590c76d4b979c46dc954d3956cee0319.tar.bz2 |
Issue 9873: the URL parsing functions now accept ASCII encoded byte sequences in addition to character strings
Diffstat (limited to 'Lib/test/test_urlparse.py')
-rw-r--r-- | Lib/test/test_urlparse.py | 235 |
1 files changed, 214 insertions, 21 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index e559142..e67c2b6 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -24,6 +24,17 @@ parse_qsl_test_cases = [ ("&a=b", [('a', 'b')]), ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), ("a=1&a=2", [('a', '1'), ('a', '2')]), + (b"", []), + (b"&", []), + (b"&&", []), + (b"=", [(b'', b'')]), + (b"=a", [(b'', b'a')]), + (b"a", [(b'a', b'')]), + (b"a=", [(b'a', b'')]), + (b"a=", [(b'a', b'')]), + (b"&a=b", [(b'a', b'b')]), + (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), + (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), ] class UrlParseTestCase(unittest.TestCase): @@ -86,7 +97,7 @@ class UrlParseTestCase(unittest.TestCase): def test_roundtrips(self): - testcases = [ + str_cases = [ ('file:///tmp/junk.txt', ('file', '', '/tmp/junk.txt', '', '', ''), ('file', '', '/tmp/junk.txt', '', '')), @@ -110,16 +121,21 @@ class UrlParseTestCase(unittest.TestCase): ('git+ssh', 'git@github.com','/user/project.git', '','',''), ('git+ssh', 'git@github.com','/user/project.git', - '', '')) + '', '')), ] - for url, parsed, split in testcases: + def _encode(t): + return (t[0].encode('ascii'), + tuple(x.encode('ascii') for x in t[1]), + tuple(x.encode('ascii') for x in t[2])) + bytes_cases = [_encode(x) for x in str_cases] + for url, parsed, split in str_cases + bytes_cases: self.checkRoundtrips(url, parsed, split) def test_http_roundtrips(self): # urllib.parse.urlsplit treats 'http:' as an optimized special case, # so we test both 'http:' and 'https:' in all the following. # Three cheers for white box knowledge! - testcases = [ + str_cases = [ ('://www.python.org', ('www.python.org', '', '', '', ''), ('www.python.org', '', '', '')), @@ -136,19 +152,34 @@ class UrlParseTestCase(unittest.TestCase): ('a', '/b/c/d', 'p', 'q', 'f'), ('a', '/b/c/d;p', 'q', 'f')), ] - for scheme in ('http', 'https'): - for url, parsed, split in testcases: - url = scheme + url - parsed = (scheme,) + parsed - split = (scheme,) + split - self.checkRoundtrips(url, parsed, split) + def _encode(t): + return (t[0].encode('ascii'), + tuple(x.encode('ascii') for x in t[1]), + tuple(x.encode('ascii') for x in t[2])) + bytes_cases = [_encode(x) for x in str_cases] + str_schemes = ('http', 'https') + bytes_schemes = (b'http', b'https') + str_tests = str_schemes, str_cases + bytes_tests = bytes_schemes, bytes_cases + for schemes, test_cases in (str_tests, bytes_tests): + for scheme in schemes: + for url, parsed, split in test_cases: + url = scheme + url + parsed = (scheme,) + parsed + split = (scheme,) + split + self.checkRoundtrips(url, parsed, split) def checkJoin(self, base, relurl, expected): - self.assertEqual(urllib.parse.urljoin(base, relurl), expected, - (base, relurl, expected)) + str_components = (base, relurl, expected) + self.assertEqual(urllib.parse.urljoin(base, relurl), expected) + bytes_components = baseb, relurlb, expectedb = [ + x.encode('ascii') for x in str_components] + self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb) def test_unparse_parse(self): - for u in ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',]: + str_cases = ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',] + bytes_cases = [x.encode('ascii') for x in str_cases] + for u in str_cases + bytes_cases: self.assertEqual(urllib.parse.urlunsplit(urllib.parse.urlsplit(u)), u) self.assertEqual(urllib.parse.urlunparse(urllib.parse.urlparse(u)), u) @@ -328,7 +359,7 @@ class UrlParseTestCase(unittest.TestCase): self.checkJoin(SIMPLE_BASE, 'http:g?y/./x','http://a/b/c/g?y/./x') def test_RFC2732(self): - for url, hostname, port in [ + str_cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), ('http://12.34.56.78:5432/foo/', '12.34.56.78', 5432), ('http://[::1]:5432/foo/', '::1', 5432), @@ -349,20 +380,26 @@ class UrlParseTestCase(unittest.TestCase): ('http://[::12.34.56.78]/foo/', '::12.34.56.78', None), ('http://[::ffff:12.34.56.78]/foo/', '::ffff:12.34.56.78', None), - ]: + ] + def _encode(t): + return t[0].encode('ascii'), t[1].encode('ascii'), t[2] + bytes_cases = [_encode(x) for x in str_cases] + for url, hostname, port in str_cases + bytes_cases: urlparsed = urllib.parse.urlparse(url) self.assertEqual((urlparsed.hostname, urlparsed.port) , (hostname, port)) - for invalid_url in [ + str_cases = [ 'http://::12.34.56.78]/', 'http://[::1/foo/', 'ftp://[::1/foo/bad]/bad', 'http://[::1/foo/bad]/bad', - 'http://[::ffff:12.34.56.78']: + 'http://[::ffff:12.34.56.78'] + bytes_cases = [x.encode('ascii') for x in str_cases] + for invalid_url in str_cases + bytes_cases: self.assertRaises(ValueError, urllib.parse.urlparse, invalid_url) def test_urldefrag(self): - for url, defrag, frag in [ + str_cases = [ ('http://python.org#frag', 'http://python.org', 'frag'), ('http://python.org', 'http://python.org', ''), ('http://python.org/#frag', 'http://python.org/', 'frag'), @@ -373,8 +410,16 @@ class UrlParseTestCase(unittest.TestCase): ('http://python.org/p?q', 'http://python.org/p?q', ''), (RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'), (RFC2396_BASE, 'http://a/b/c/d;p?q', ''), - ]: - self.assertEqual(urllib.parse.urldefrag(url), (defrag, frag)) + ] + def _encode(t): + return type(t)(x.encode('ascii') for x in t) + bytes_cases = [_encode(x) for x in str_cases] + for url, defrag, frag in str_cases + bytes_cases: + result = urllib.parse.urldefrag(url) + self.assertEqual(result.geturl(), url) + self.assertEqual(result, (defrag, frag)) + self.assertEqual(result.url, defrag) + self.assertEqual(result.fragment, frag) def test_urlsplit_attributes(self): url = "HTTP://WWW.PYTHON.ORG/doc/#frag" @@ -390,7 +435,8 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(p.port, None) # geturl() won't return exactly the original URL in this case # since the scheme is always case-normalized - #self.assertEqual(p.geturl(), url) + # We handle this by ignoring the first 4 characters of the URL + self.assertEqual(p.geturl()[4:], url[4:]) url = "http://User:Pass@www.python.org:080/doc/?query=yes#frag" p = urllib.parse.urlsplit(url) @@ -422,6 +468,45 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(p.port, 80) self.assertEqual(p.geturl(), url) + # And check them all again, only with bytes this time + url = b"HTTP://WWW.PYTHON.ORG/doc/#frag" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"WWW.PYTHON.ORG") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, None) + self.assertEqual(p.geturl()[4:], url[4:]) + + url = b"http://User:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"User:Pass@www.python.org:080") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"query=yes") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, b"User") + self.assertEqual(p.password, b"Pass") + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) + + url = b"http://User@example.com:Pass@www.python.org:080/doc/?query=yes#frag" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"User@example.com:Pass@www.python.org:080") + self.assertEqual(p.path, b"/doc/") + self.assertEqual(p.query, b"query=yes") + self.assertEqual(p.fragment, b"frag") + self.assertEqual(p.username, b"User@example.com") + self.assertEqual(p.password, b"Pass") + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, 80) + self.assertEqual(p.geturl(), url) def test_attributes_bad_port(self): """Check handling of non-integer ports.""" @@ -433,6 +518,15 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(p.netloc, "www.example.net:foo") self.assertRaises(ValueError, lambda: p.port) + # Once again, repeat ourselves to test bytes + p = urllib.parse.urlsplit(b"http://www.example.net:foo") + self.assertEqual(p.netloc, b"www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + + p = urllib.parse.urlparse(b"http://www.example.net:foo") + self.assertEqual(p.netloc, b"www.example.net:foo") + self.assertRaises(ValueError, lambda: p.port) + def test_attributes_without_netloc(self): # This example is straight from RFC 3261. It looks like it # should allow the username, hostname, and port to be filled @@ -456,10 +550,30 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(p.port, None) self.assertEqual(p.geturl(), uri) + # You guessed it, repeating the test with bytes input + uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15" + p = urllib.parse.urlsplit(uri) + self.assertEqual(p.netloc, b"") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + + p = urllib.parse.urlparse(uri) + self.assertEqual(p.netloc, b"") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, None) + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), uri) + def test_noslash(self): # Issue 1637: http://foo.com?query is legal self.assertEqual(urllib.parse.urlparse("http://example.com?blahblah=/foo"), ('http', 'example.com', '', '', 'blahblah=/foo', '')) + self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"), + (b'http', b'example.com', b'', b'', b'blahblah=/foo', b'')) def test_withoutscheme(self): # Test urlparse without scheme @@ -472,6 +586,13 @@ class UrlParseTestCase(unittest.TestCase): ('','www.python.org:80','','','','')) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) + # Repeat for bytes input + self.assertEqual(urllib.parse.urlparse(b"path"), + (b'',b'',b'path',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"), + (b'',b'www.python.org:80',b'',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), + (b'http',b'www.python.org:80',b'',b'',b'',b'')) def test_portseparator(self): # Issue 754016 makes changes for port separator ':' from scheme separator @@ -481,6 +602,13 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','','')) self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"), ('http','www.python.org:80','','','','')) + # As usual, need to check bytes input as well + self.assertEqual(urllib.parse.urlparse(b"path:80"), + (b'',b'',b'path:80',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b'')) + self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"), + (b'http',b'www.python.org:80',b'',b'',b'',b'')) def test_usingsys(self): # Issue 3314: sys module is used in the error @@ -492,6 +620,71 @@ class UrlParseTestCase(unittest.TestCase): ('s3', 'foo.com', '/stuff', '', '', '')) self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"), ('x-newscheme', 'foo.com', '/stuff', '', '', '')) + # And for bytes... + self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"), + (b's3', b'foo.com', b'/stuff', b'', b'', b'')) + self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"), + (b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b'')) + + def test_mixed_types_rejected(self): + # Several functions that process either strings or ASCII encoded bytes + # accept multiple arguments. Check they reject mixed type input + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlparse("www.python.org", b"http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlparse(b"www.python.org", "http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlsplit("www.python.org", b"http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlsplit(b"www.python.org", "http") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunparse(( b"http", "www.python.org","","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunparse(("http", b"www.python.org","","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunsplit((b"http", "www.python.org","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urlunsplit(("http", b"www.python.org","","","")) + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urljoin("http://python.org", b"http://python.org") + with self.assertRaisesRegexp(TypeError, "Cannot mix str"): + urllib.parse.urljoin(b"http://python.org", "http://python.org") + + def _check_result_type(self, str_type): + num_args = len(str_type._fields) + bytes_type = str_type._encoded_counterpart + self.assertIs(bytes_type._decoded_counterpart, str_type) + str_args = ('',) * num_args + bytes_args = (b'',) * num_args + str_result = str_type(*str_args) + bytes_result = bytes_type(*bytes_args) + encoding = 'ascii' + errors = 'strict' + self.assertEqual(str_result, str_args) + self.assertEqual(bytes_result.decode(), str_args) + self.assertEqual(bytes_result.decode(), str_result) + self.assertEqual(bytes_result.decode(encoding), str_args) + self.assertEqual(bytes_result.decode(encoding), str_result) + self.assertEqual(bytes_result.decode(encoding, errors), str_args) + self.assertEqual(bytes_result.decode(encoding, errors), str_result) + self.assertEqual(bytes_result, bytes_args) + self.assertEqual(str_result.encode(), bytes_args) + self.assertEqual(str_result.encode(), bytes_result) + self.assertEqual(str_result.encode(encoding), bytes_args) + self.assertEqual(str_result.encode(encoding), bytes_result) + self.assertEqual(str_result.encode(encoding, errors), bytes_args) + self.assertEqual(str_result.encode(encoding, errors), bytes_result) + + def test_result_pairs(self): + # Check encoding and decoding between result pairs + result_types = [ + urllib.parse.DefragResult, + urllib.parse.SplitResult, + urllib.parse.ParseResult, + ] + for result_type in result_types: + self._check_result_type(result_type) + def test_main(): support.run_unittest(UrlParseTestCase) |