diff options
author | Johannes Gijsbers <jlg@dds.nl> | 2005-01-09 15:29:10 (GMT) |
---|---|---|
committer | Johannes Gijsbers <jlg@dds.nl> | 2005-01-09 15:29:10 (GMT) |
commit | 41e4faa82bdf4fb601a97565bf30ee683c4bfd50 (patch) | |
tree | 64d542aff19737d2d93f8fcfe3b4a15d41e117f0 /Lib/urlparse.py | |
parent | cdd625a77067e226a5dc715d1892f9511a067391 (diff) | |
download | cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.zip cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.tar.gz cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.tar.bz2 |
Patch #712317: In URLs such as http://www.example.com?query=spam, treat '?' as
a delimiter. Previously, the 'network location' (<authority> in RFC 2396) would
become 'www.example.com?query=spam', while RFC 2396 does not allow a '?' in
<authority>. See bug #548176 for further discussion.
Diffstat (limited to 'Lib/urlparse.py')
-rw-r--r-- | Lib/urlparse.py | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 9c76272..8469139 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -63,6 +63,15 @@ def _splitparams(url): i = url.find(';') return url[:i], url[i+1:] +def _splitnetloc(url, start=0): + for c in '/?#': # the order is important! + delim = url.find(c, start) + if delim >= 0: + break + else: + delim = len(url) + return url[start:delim], url[delim:] + def urlsplit(url, scheme='', allow_fragments=1): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> @@ -82,13 +91,7 @@ def urlsplit(url, scheme='', allow_fragments=1): scheme = url[:i].lower() url = url[i+1:] if url[:2] == '//': - i = url.find('/', 2) - if i < 0: - i = url.find('#') - if i < 0: - i = len(url) - netloc = url[2:i] - url = url[i:] + netloc, url = _splitnetloc(url, 2) if allow_fragments and '#' in url: url, fragment = url.split('#', 1) if '?' in url: @@ -101,12 +104,8 @@ def urlsplit(url, scheme='', allow_fragments=1): break else: scheme, url = url[:i].lower(), url[i+1:] - if scheme in uses_netloc: - if url[:2] == '//': - i = url.find('/', 2) - if i < 0: - i = len(url) - netloc, url = url[2:i], url[i:] + if scheme in uses_netloc and url[:2] == '//': + netloc, url = _splitnetloc(url, 2) if allow_fragments and scheme in uses_fragment and '#' in url: url, fragment = url.split('#', 1) if scheme in uses_query and '?' in url: |