diff options
Diffstat (limited to 'Lib/test/test_urlparse.py')
-rw-r--r-- | Lib/test/test_urlparse.py | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index d2ec0da..0f99130 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -612,6 +612,54 @@ class UrlParseTestCase(unittest.TestCase): with self.assertRaisesRegex(ValueError, "out of range"): p.port + def test_urlsplit_remove_unsafe_bytes(self): + # Remove ASCII tabs and newlines from input, for http common case scenario. + url = "h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, "http") + self.assertEqual(p.netloc, "www.python.org") + self.assertEqual(p.path, "/javascript:alert('msg')/") + self.assertEqual(p.query, "query=something") + self.assertEqual(p.fragment, "fragment") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, "www.python.org") + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment") + + # Remove ASCII tabs and newlines from input as bytes, for http common case scenario. + url = b"h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.scheme, b"http") + self.assertEqual(p.netloc, b"www.python.org") + self.assertEqual(p.path, b"/javascript:alert('msg')/") + self.assertEqual(p.query, b"query=something") + self.assertEqual(p.fragment, b"fragment") + self.assertEqual(p.username, None) + self.assertEqual(p.password, None) + self.assertEqual(p.hostname, b"www.python.org") + self.assertEqual(p.port, None) + self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment") + + # any scheme + url = "x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.geturl(), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment") + + # Remove ASCII tabs and newlines from input as bytes, any scheme. + url = b"x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + p = urllib.parse.urlsplit(url) + self.assertEqual(p.geturl(), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment") + + # Unsafe bytes is not returned from urlparse cache. + # scheme is stored after parsing, sending an scheme with unsafe bytes *will not* return an unsafe scheme + url = "https://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment" + scheme = "htt\nps" + for _ in range(2): + p = urllib.parse.urlsplit(url, scheme=scheme) + self.assertEqual(p.scheme, "https") + self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment") + def test_attributes_bad_port(self): """Check handling of invalid ports.""" for bytes in (False, True): |