summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_urlparse.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/test/test_urlparse.py')
-rw-r--r--Lib/test/test_urlparse.py48
1 files changed, 48 insertions, 0 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index d2ec0da..0f99130 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -612,6 +612,54 @@ class UrlParseTestCase(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "out of range"):
p.port
+ def test_urlsplit_remove_unsafe_bytes(self):
+ # Remove ASCII tabs and newlines from input, for http common case scenario.
+ url = "h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, "http")
+ self.assertEqual(p.netloc, "www.python.org")
+ self.assertEqual(p.path, "/javascript:alert('msg')/")
+ self.assertEqual(p.query, "query=something")
+ self.assertEqual(p.fragment, "fragment")
+ self.assertEqual(p.username, None)
+ self.assertEqual(p.password, None)
+ self.assertEqual(p.hostname, "www.python.org")
+ self.assertEqual(p.port, None)
+ self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Remove ASCII tabs and newlines from input as bytes, for http common case scenario.
+ url = b"h\nttp://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.scheme, b"http")
+ self.assertEqual(p.netloc, b"www.python.org")
+ self.assertEqual(p.path, b"/javascript:alert('msg')/")
+ self.assertEqual(p.query, b"query=something")
+ self.assertEqual(p.fragment, b"fragment")
+ self.assertEqual(p.username, None)
+ self.assertEqual(p.password, None)
+ self.assertEqual(p.hostname, b"www.python.org")
+ self.assertEqual(p.port, None)
+ self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # any scheme
+ url = "x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.geturl(), "x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Remove ASCII tabs and newlines from input as bytes, any scheme.
+ url = b"x-new-scheme\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ p = urllib.parse.urlsplit(url)
+ self.assertEqual(p.geturl(), b"x-new-scheme://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+ # Unsafe bytes is not returned from urlparse cache.
+ # scheme is stored after parsing, sending an scheme with unsafe bytes *will not* return an unsafe scheme
+ url = "https://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+ scheme = "htt\nps"
+ for _ in range(2):
+ p = urllib.parse.urlsplit(url, scheme=scheme)
+ self.assertEqual(p.scheme, "https")
+ self.assertEqual(p.geturl(), "https://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
for bytes in (False, True):