diff options
author | Guido van Rossum <guido@python.org> | 2000-12-19 16:48:13 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2000-12-19 16:48:13 (GMT) |
commit | fad81f08380080bb370c71c6cd0562e5329a4660 (patch) | |
tree | 2e9311483f520fb77d84768b93fed81dd0f6e100 | |
parent | 68abe832b9761b1b506d6eaf5eb941c07d0735bc (diff) | |
download | cpython-fad81f08380080bb370c71c6cd0562e5329a4660.zip cpython-fad81f08380080bb370c71c6cd0562e5329a4660.tar.gz cpython-fad81f08380080bb370c71c6cd0562e5329a4660.tar.bz2 |
Be explicit about scheme_chars -- string.letters is locale dependent
so we can't use it.
While I'm at it, got rid of string module use. (Found several new
hard special cases for a hypothetical conversion tool: from string
import join, find, rfind; and a local assignment "find=string.find".)
-rw-r--r-- | Lib/urlparse.py | 38 |
1 files changed, 18 insertions, 20 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 929315e..d927b7e 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 1995. """ -# Standard/builtin Python modules -import string -from string import join, split, rfind - # A classification of schemes ('' means apply by default) uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file', 'https', 'shttp', @@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais', 'file', 'prospero', ''] # Characters valid in scheme names -scheme_chars = string.letters + string.digits + '+-.' +scheme_chars = ('abcdefghijklmnopqrstuvwxyz' + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + '0123456789' + '+-.') MAX_CACHE_SIZE = 20 _parse_cache = {} @@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1): return cached if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth clear_cache() - find = string.find netloc = path = params = query = fragment = '' - i = find(url, ':') + i = url.find(':') if i > 0: if url[:i] == 'http': # optimize the common case - scheme = string.lower(url[:i]) + scheme = url[:i].lower() url = url[i+1:] if url[:2] == '//': - i = find(url, '/', 2) + i = url.find('/', 2) if i < 0: i = len(url) netloc = url[2:i] url = url[i:] if allow_fragments: - i = string.rfind(url, '#') + i = url.rfind('#') if i >= 0: fragment = url[i+1:] url = url[:i] - i = find(url, '?') + i = url.find('?') if i >= 0: query = url[i+1:] url = url[:i] - i = find(url, ';') + i = url.find(';') if i >= 0: params = url[i+1:] url = url[:i] @@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1): if c not in scheme_chars: break else: - scheme, url = string.lower(url[:i]), url[i+1:] + scheme, url = url[:i].lower(), url[i+1:] if scheme in uses_netloc: if url[:2] == '//': - i = find(url, '/', 2) + i = url.find('/', 2) if i < 0: i = len(url) netloc, url = url[2:i], url[i:] if allow_fragments and scheme in uses_fragment: - i = string.rfind(url, '#') + i = url.rfind('#') if i >= 0: url, fragment = url[:i], url[i+1:] if scheme in uses_query: - i = find(url, '?') + i = url.find('?') if i >= 0: url, query = url[:i], url[i+1:] if scheme in uses_params: - i = find(url, ';') + i = url.find(';') if i >= 0: url, params = url[:i], url[i+1:] tuple = scheme, netloc, url, params, query, fragment @@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1): if not path: return urlunparse((scheme, netloc, bpath, params, query or bquery, fragment)) - segments = split(bpath, '/')[:-1] + split(path, '/') + segments = bpath.split('/')[:-1] + path.split('/') # XXX The stuff below is bogus in various ways... if segments[-1] == '.': segments[-1] = '' @@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1): segments[-1] = '' elif len(segments) >= 2 and segments[-1] == '..': segments[-2:] = [''] - return urlunparse((scheme, netloc, join(segments, '/'), + return urlunparse((scheme, netloc, '/'.join(segments), params, query, fragment)) def urldefrag(url): @@ -236,7 +234,7 @@ def test(): while 1: line = fp.readline() if not line: break - words = string.split(line) + words = line.split() if not words: continue url = words[0] |