Easy optimizations of urlparse for the common case of parsing an http URL.

1. use dict.get instead of try/except KeyError 2. if the url scheme is 'http' then avoid the series of 'if var in [someseq]:'. instead, inline all of the code. 3. find = string.find
author: Jeremy Hylton <jeremy@alum.mit.edu> 1998-09-02 21:53:16 (GMT)
committer: Jeremy Hylton <jeremy@alum.mit.edu> 1998-09-02 21:53:16 (GMT)
commit: b85c8479eb0dd25968eceabf599c0c4616a3c59a (patch)
tree: b3786c069d4c17a82728ae034d7bea07caa17393 /Lib/urlparse.py
parent: f6ae743cb53a2953c7fb063963ec48029206c8b0 (diff)
download: cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.zip
cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.gz
cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.bz2
1 files changed, 33 insertions, 8 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 5348c30..c3c39a3 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -45,15 +45,40 @@ def clear_cache():
 # (e.g. netloc is a single string) and we don't expand % escapes.
 def urlparse(url, scheme = '', allow_fragments = 1):
 	key = url, scheme, allow_fragments
-	try:
-	    return _parse_cache[key]
-	except KeyError:
-	    pass
+	cached = _parse_cache.get(key, None)
+	if cached:
+		return cached
 	if len(_parse_cache) >= MAX_CACHE_SIZE:	# avoid runaway growth
 	    clear_cache()
+	find = string.find
 	netloc = path = params = query = fragment = ''
-	i = string.find(url, ':')
+	i = find(url, ':')
 	if i > 0:
+		if url[:i] == 'http': # optimizie the common case
+			scheme = string.lower(url[:i])
+			url = url[i+1:]
+			if url[:2] == '//':
+				i = find(url, '/', 2)
+				if i < 0:
+					i = len(url)
+				netloc = url[2:i]
+				url = url[i:]
+			if allow_fragments:
+				i = string.rfind(url, '#')
+				if i >= 0:
+					url = url[:i]
+					fragment = url[i+1:]
+			i = find(url, '?')
+			if i >= 0:
+				url = url[:i]
+				query = url[i+1:]
+			i = find(url, ';')
+			if i >= 0:
+				url = url[:i]
+				params = url[i+1:]
+			tuple = scheme, netloc, url, params, query, fragment
+			_parse_cache[key] = tuple
+			return tuple
 		for c in url[:i]:
 			if c not in scheme_chars:
 				break
@@ -61,7 +86,7 @@ def urlparse(url, scheme = '', allow_fragments = 1):
 			scheme, url = string.lower(url[:i]), url[i+1:]
 	if scheme in uses_netloc:
 		if url[:2] == '//':
-			i = string.find(url, '/', 2)
+			i = find(url, '/', 2)
 			if i < 0:
 				i = len(url)
 			netloc, url = url[2:i], url[i:]
@@ -70,11 +95,11 @@ def urlparse(url, scheme = '', allow_fragments = 1):
 		if i >= 0:
 			url, fragment = url[:i], url[i+1:]
 	if scheme in uses_query:
-		i = string.find(url, '?')
+		i = find(url, '?')
 		if i >= 0:
 			url, query = url[:i], url[i+1:]
 	if scheme in uses_params:
-		i = string.find(url, ';')
+		i = find(url, ';')
 		if i >= 0:
 			url, params = url[:i], url[i+1:]
 	tuple = scheme, netloc, url, params, query, fragment
author	Jeremy Hylton <jeremy@alum.mit.edu>	1998-09-02 21:53:16 (GMT)
committer	Jeremy Hylton <jeremy@alum.mit.edu>	1998-09-02 21:53:16 (GMT)
commit	b85c8479eb0dd25968eceabf599c0c4616a3c59a (patch)
tree	b3786c069d4c17a82728ae034d7bea07caa17393 /Lib/urlparse.py
parent	f6ae743cb53a2953c7fb063963ec48029206c8b0 (diff)
download	cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.zip cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.gz cpython-b85c8479eb0dd25968eceabf599c0c4616a3c59a.tar.bz2