Patch #712317: In URLs such as http://www.example.com?query=spam, treat '?' as

a delimiter. Previously, the 'network location' (<authority> in RFC 2396) would become 'www.example.com?query=spam', while RFC 2396 does not allow a '?' in <authority>. See bug #548176 for further discussion.
author: Johannes Gijsbers <jlg@dds.nl> 2005-01-09 15:29:10 (GMT)
committer: Johannes Gijsbers <jlg@dds.nl> 2005-01-09 15:29:10 (GMT)
commit: 41e4faa82bdf4fb601a97565bf30ee683c4bfd50 (patch)
tree: 64d542aff19737d2d93f8fcfe3b4a15d41e117f0 /Lib/urlparse.py
parent: cdd625a77067e226a5dc715d1892f9511a067391 (diff)
download: cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.zip
cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.tar.gz
cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.tar.bz2
1 files changed, 12 insertions, 13 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 9c76272..8469139 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -63,6 +63,15 @@ def _splitparams(url):
         i = url.find(';')
     return url[:i], url[i+1:]
 
+def _splitnetloc(url, start=0):
+    for c in '/?#': # the order is important!
+        delim = url.find(c, start)
+        if delim >= 0:
+            break
+    else:
+        delim = len(url)
+    return url[start:delim], url[delim:]
+
 def urlsplit(url, scheme='', allow_fragments=1):
     """Parse a URL into 5 components:
     <scheme>://<netloc>/<path>?<query>#<fragment>
@@ -82,13 +91,7 @@ def urlsplit(url, scheme='', allow_fragments=1):
             scheme = url[:i].lower()
             url = url[i+1:]
             if url[:2] == '//':
-                i = url.find('/', 2)
-                if i < 0:
-                    i = url.find('#')
-                    if i < 0:
-                        i = len(url)
-                netloc = url[2:i]
-                url = url[i:]
+                netloc, url = _splitnetloc(url, 2)
             if allow_fragments and '#' in url:
                 url, fragment = url.split('#', 1)
             if '?' in url:
@@ -101,12 +104,8 @@ def urlsplit(url, scheme='', allow_fragments=1):
                 break
         else:
             scheme, url = url[:i].lower(), url[i+1:]
-    if scheme in uses_netloc:
-        if url[:2] == '//':
-            i = url.find('/', 2)
-            if i < 0:
-                i = len(url)
-            netloc, url = url[2:i], url[i:]
+    if scheme in uses_netloc and url[:2] == '//':
+        netloc, url = _splitnetloc(url, 2)
     if allow_fragments and scheme in uses_fragment and '#' in url:
         url, fragment = url.split('#', 1)
     if scheme in uses_query and '?' in url:
author	Johannes Gijsbers <jlg@dds.nl>	2005-01-09 15:29:10 (GMT)
committer	Johannes Gijsbers <jlg@dds.nl>	2005-01-09 15:29:10 (GMT)
commit	41e4faa82bdf4fb601a97565bf30ee683c4bfd50 (patch)
tree	64d542aff19737d2d93f8fcfe3b4a15d41e117f0 /Lib/urlparse.py
parent	cdd625a77067e226a5dc715d1892f9511a067391 (diff)
download	cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.zip cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.tar.gz cpython-41e4faa82bdf4fb601a97565bf30ee683c4bfd50.tar.bz2