summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorSenthil Kumaran <senthil@uthcode.com>2012-05-19 00:12:46 (GMT)
committerSenthil Kumaran <senthil@uthcode.com>2012-05-19 00:12:46 (GMT)
commit15e848b0767c41e6badc45316d60aa588d7e5ac1 (patch)
tree7bee86a3995621b9c1e5246d4eb64fc7e87bfb23 /Lib
parent43ae3ceab8dd65d184aef1773714a604984de38e (diff)
parent1be320ebdd5b1f46f32e32c83f3c1e982e2d27e2 (diff)
downloadcpython-15e848b0767c41e6badc45316d60aa588d7e5ac1.zip
cpython-15e848b0767c41e6badc45316d60aa588d7e5ac1.tar.gz
cpython-15e848b0767c41e6badc45316d60aa588d7e5ac1.tar.bz2
Issue9374 - Generic parsing of query and fragment portion of urls for any scheme
Diffstat (limited to 'Lib')
-rwxr-xr-xLib/test/test_urlparse.py9
-rw-r--r--Lib/urllib/parse.py11
2 files changed, 11 insertions, 9 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index ada0ca8..5784381 100755
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -636,11 +636,20 @@ class UrlParseTestCase(unittest.TestCase):
('s3', 'foo.com', '/stuff', '', '', ''))
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
('x-newscheme', 'foo.com', '/stuff', '', '', ''))
+ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
+ ('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
+ self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
+ ('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
+
# And for bytes...
self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
(b's3', b'foo.com', b'/stuff', b'', b'', b''))
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
+ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
+ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
+ self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
+ (b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
def test_mixed_types_rejected(self):
# Several functions that process either strings or ASCII encoded bytes
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 01067ae..47b7962 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -44,16 +44,9 @@ uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
'imap', 'wais', 'file', 'mms', 'https', 'shttp',
'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']
-non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
- 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
'mms', '', 'sftp']
-uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
- 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
-uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
- 'nntp', 'wais', 'https', 'shttp', 'snews',
- 'file', 'prospero', '']
# Characters valid in scheme names
scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
@@ -357,9 +350,9 @@ def urlsplit(url, scheme='', allow_fragments=True):
if (('[' in netloc and ']' not in netloc) or
(']' in netloc and '[' not in netloc)):
raise ValueError("Invalid IPv6 URL")
- if allow_fragments and scheme in uses_fragment and '#' in url:
+ if allow_fragments and '#' in url:
url, fragment = url.split('#', 1)
- if scheme in uses_query and '?' in url:
+ if '?' in url:
url, query = url.split('?', 1)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v