diff options
author | Jeremy Hylton <jeremy@alum.mit.edu> | 1998-09-02 21:53:16 (GMT) |
---|---|---|
committer | Jeremy Hylton <jeremy@alum.mit.edu> | 1998-09-02 21:53:16 (GMT) |
commit | b85c8479eb0dd25968eceabf599c0c4616a3c59a (patch) | |
tree | b3786c069d4c17a82728ae034d7bea07caa17393 | |
parent | f6ae743cb53a2953c7fb063963ec48029206c8b0 (diff) | |
download | cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.zip cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.gz cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.bz2 |
Easy optimizations of urlparse for the common case of parsing an http URL.
1. use dict.get instead of try/except KeyError
2. if the url scheme is 'http' then avoid the series of
'if var in [someseq]:'. instead, inline all of the code.
3. find = string.find
-rw-r--r-- | Lib/urlparse.py | 41 |
1 files changed, 33 insertions, 8 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 5348c30..c3c39a3 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -45,15 +45,40 @@ def clear_cache(): # (e.g. netloc is a single string) and we don't expand % escapes. def urlparse(url, scheme = '', allow_fragments = 1): key = url, scheme, allow_fragments - try: - return _parse_cache[key] - except KeyError: - pass + cached = _parse_cache.get(key, None) + if cached: + return cached if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() + find = string.find netloc = path = params = query = fragment = '' - i = string.find(url, ':') + i = find(url, ':') if i > 0: + if url[:i] == 'http': # optimizie the common case + scheme = string.lower(url[:i]) + url = url[i+1:] + if url[:2] == '//': + i = find(url, '/', 2) + if i < 0: + i = len(url) + netloc = url[2:i] + url = url[i:] + if allow_fragments: + i = string.rfind(url, '#') + if i >= 0: + url = url[:i] + fragment = url[i+1:] + i = find(url, '?') + if i >= 0: + url = url[:i] + query = url[i+1:] + i = find(url, ';') + if i >= 0: + url = url[:i] + params = url[i+1:] + tuple = scheme, netloc, url, params, query, fragment + _parse_cache[key] = tuple + return tuple for c in url[:i]: if c not in scheme_chars: break @@ -61,7 +86,7 @@ def urlparse(url, scheme = '', allow_fragments = 1): scheme, url = string.lower(url[:i]), url[i+1:] if scheme in uses_netloc: if url[:2] == '//': - i = string.find(url, '/', 2) + i = find(url, '/', 2) if i < 0: i = len(url) netloc, url = url[2:i], url[i:] @@ -70,11 +95,11 @@ def urlparse(url, scheme = '', allow_fragments = 1): if i >= 0: url, fragment = url[:i], url[i+1:] if scheme in uses_query: - i = string.find(url, '?') + i = find(url, '?') if i >= 0: url, query = url[:i], url[i+1:] if scheme in uses_params: - i = string.find(url, ';') + i = find(url, ';') if i >= 0: url, params = url[:i], url[i+1:] tuple = scheme, netloc, url, params, query, fragment |