summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Hylton <jeremy@alum.mit.edu>1998-09-02 21:53:16 (GMT)
committerJeremy Hylton <jeremy@alum.mit.edu>1998-09-02 21:53:16 (GMT)
commitb85c8479eb0dd25968eceabf599c0c4616a3c59a (patch)
treeb3786c069d4c17a82728ae034d7bea07caa17393
parentf6ae743cb53a2953c7fb063963ec48029206c8b0 (diff)
downloadcpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.zip
cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.gz
cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.bz2
Easy optimizations of urlparse for the common case of parsing an http URL.
1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find
-rw-r--r--Lib/urlparse.py41
1 files changed, 33 insertions, 8 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 5348c30..c3c39a3 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -45,15 +45,40 @@ def clear_cache():
# (e.g. netloc is a single string) and we don't expand % escapes.
def urlparse(url, scheme = '', allow_fragments = 1):
key = url, scheme, allow_fragments
- try:
- return _parse_cache[key]
- except KeyError:
- pass
+ cached = _parse_cache.get(key, None)
+ if cached:
+ return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
+ find = string.find
netloc = path = params = query = fragment = ''
- i = string.find(url, ':')
+ i = find(url, ':')
if i > 0:
+ if url[:i] == 'http': # optimizie the common case
+ scheme = string.lower(url[:i])
+ url = url[i+1:]
+ if url[:2] == '//':
+ i = find(url, '/', 2)
+ if i < 0:
+ i = len(url)
+ netloc = url[2:i]
+ url = url[i:]
+ if allow_fragments:
+ i = string.rfind(url, '#')
+ if i >= 0:
+ url = url[:i]
+ fragment = url[i+1:]
+ i = find(url, '?')
+ if i >= 0:
+ url = url[:i]
+ query = url[i+1:]
+ i = find(url, ';')
+ if i >= 0:
+ url = url[:i]
+ params = url[i+1:]
+ tuple = scheme, netloc, url, params, query, fragment
+ _parse_cache[key] = tuple
+ return tuple
for c in url[:i]:
if c not in scheme_chars:
break
@@ -61,7 +86,7 @@ def urlparse(url, scheme = '', allow_fragments = 1):
scheme, url = string.lower(url[:i]), url[i+1:]
if scheme in uses_netloc:
if url[:2] == '//':
- i = string.find(url, '/', 2)
+ i = find(url, '/', 2)
if i < 0:
i = len(url)
netloc, url = url[2:i], url[i:]
@@ -70,11 +95,11 @@ def urlparse(url, scheme = '', allow_fragments = 1):
if i >= 0:
url, fragment = url[:i], url[i+1:]
if scheme in uses_query:
- i = string.find(url, '?')
+ i = find(url, '?')
if i >= 0:
url, query = url[:i], url[i+1:]
if scheme in uses_params:
- i = string.find(url, ';')
+ i = find(url, ';')
if i >= 0:
url, params = url[:i], url[i+1:]
tuple = scheme, netloc, url, params, query, fragment