Fix parsing of parameters from a URL; urlparse() did not check that it only

split parameters from the last path segment. Introduces two new functions, urlsplit() and urlunsplit(), that do the simpler job of splitting the URL without monkeying around with the parameters field, since that was not being handled properly. This closes bug #478038.
author: Fred Drake <fdrake@acm.org> 2001-11-16 02:52:57 (GMT)
committer: Fred Drake <fdrake@acm.org> 2001-11-16 02:52:57 (GMT)
commit: 5751a22ede6a1c40f3926439e6c8368889f9b8d2 (patch)
tree: dc84cdb3e0f8ada74f992a78bb98294c30ca350d /Lib/urlparse.py
parent: c66ff203bb02a56f813399403445730a14d8c045 (diff)
download: cpython-5751a22ede6a1c40f3926439e6c8368889f9b8d2.zip
cpython-5751a22ede6a1c40f3926439e6c8368889f9b8d2.tar.gz
cpython-5751a22ede6a1c40f3926439e6c8368889f9b8d2.tar.bz2
1 files changed, 46 insertions, 34 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 49c7bc8..cd6ad26 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -43,19 +43,42 @@ def clear_cache():
     _parse_cache = {}
 
 
-def urlparse(url, scheme = '', allow_fragments = 1):
+def urlparse(url, scheme='', allow_fragments=1):
     """Parse a URL into 6 components:
     <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
     Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
     Note that we don't break the components up in smaller bits
     (e.g. netloc is a single string) and we don't expand % escapes."""
+    tuple = urlsplit(url, scheme, allow_fragments)
+    scheme, netloc, url, query, fragment = tuple
+    if scheme in uses_params and ';' in url:
+        url, params = _splitparams(url)
+    else:
+        params = ''
+    return scheme, netloc, url, params, query, fragment
+
+def _splitparams(url):
+    if '/'  in url:
+        i = url.find(';', url.rfind('/'))
+        if i < 0:
+            return url, ''
+    else:
+        i = url.find(';')
+    return url[:i], url[i+1:]
+
+def urlsplit(url, scheme='', allow_fragments=1):
+    """Parse a URL into 5 components:
+    <scheme>://<netloc>/<path>?<query>#<fragment>
+    Return a 5-tuple: (scheme, netloc, path, query, fragment).
+    Note that we don't break the components up in smaller bits
+    (e.g. netloc is a single string) and we don't expand % escapes."""
     key = url, scheme, allow_fragments
     cached = _parse_cache.get(key, None)
     if cached:
         return cached
     if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
         clear_cache()
-    netloc = params = query = fragment = ''
+    netloc = query = fragment = ''
     i = url.find(':')
     if i > 0:
         if url[:i] == 'http': # optimize the common case
@@ -67,20 +90,11 @@ def urlparse(url, scheme = '', allow_fragments = 1):
                     i = len(url)
                 netloc = url[2:i]
                 url = url[i:]
-            if allow_fragments:
-                i = url.rfind('#')
-                if i >= 0:
-                    fragment = url[i+1:]
-                    url = url[:i]
-            i = url.find('?')
-            if i >= 0:
-                query = url[i+1:]
-                url = url[:i]
-            i = url.find(';')
-            if i >= 0:
-                params = url[i+1:]
-                url = url[:i]
-            tuple = scheme, netloc, url, params, query, fragment
+            if allow_fragments and '#' in url:
+                url, fragment = url.split('#', 1)
+            if '?' in url:
+                url, query = url.split('?', 1)
+            tuple = scheme, netloc, url, query, fragment
             _parse_cache[key] = tuple
             return tuple
         for c in url[:i]:
@@ -94,19 +108,11 @@ def urlparse(url, scheme = '', allow_fragments = 1):
             if i < 0:
                 i = len(url)
             netloc, url = url[2:i], url[i:]
-    if allow_fragments and scheme in uses_fragment:
-        i = url.rfind('#')
-        if i >= 0:
-            url, fragment = url[:i], url[i+1:]
-    if scheme in uses_query:
-        i = url.find('?')
-        if i >= 0:
-            url, query = url[:i], url[i+1:]
-    if scheme in uses_params:
-        i = url.find(';')
-        if i >= 0:
-            url, params = url[:i], url[i+1:]
-    tuple = scheme, netloc, url, params, query, fragment
+    if allow_fragments and scheme in uses_fragment and '#' in url:
+        url, fragment = url.split('#', 1)
+    if scheme in uses_query and '?' in url:
+        url, query = url.split('?', 1)
+    tuple = scheme, netloc, url, query, fragment
     _parse_cache[key] = tuple
     return tuple
 
@@ -115,13 +121,16 @@ def urlunparse((scheme, netloc, url, params, query, fragment)):
     slightly different, but equivalent URL, if the URL that was parsed
     originally had redundant delimiters, e.g. a ? with an empty query
     (the draft states that these are equivalent)."""
+    if params:
+        url = "%s;%s" % (url, params)
+    return urlunsplit((scheme, netloc, url, query, fragment))
+
+def urlunsplit((scheme, netloc, url, query, fragment)):
     if netloc or (scheme in uses_netloc and url[:2] == '//'):
         if url and url[:1] != '/': url = '/' + url
         url = '//' + (netloc or '') + url
     if scheme:
         url = scheme + ':' + url
-    if params:
-        url = url + ';' + params
     if query:
         url = url + '?' + query
     if fragment:
@@ -187,9 +196,12 @@ def urldefrag(url):
     the URL contained no fragments, the second element is the
     empty string.
     """
-    s, n, p, a, q, frag = urlparse(url)
-    defrag = urlunparse((s, n, p, a, q, ''))
-    return defrag, frag
+    if '#' in url:
+        s, n, p, a, q, frag = urlparse(url)
+        defrag = urlunparse((s, n, p, a, q, ''))
+        return defrag, frag
+    else:
+        return url, ''
 
 
 test_input = """
author	Fred Drake <fdrake@acm.org>	2001-11-16 02:52:57 (GMT)
committer	Fred Drake <fdrake@acm.org>	2001-11-16 02:52:57 (GMT)
commit	5751a22ede6a1c40f3926439e6c8368889f9b8d2 (patch)
tree	dc84cdb3e0f8ada74f992a78bb98294c30ca350d /Lib/urlparse.py
parent	c66ff203bb02a56f813399403445730a14d8c045 (diff)
download	cpython-5751a22ede6a1c40f3926439e6c8368889f9b8d2.zip cpython-5751a22ede6a1c40f3926439e6c8368889f9b8d2.tar.gz cpython-5751a22ede6a1c40f3926439e6c8368889f9b8d2.tar.bz2