summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>2000-12-19 16:48:13 (GMT)
committerGuido van Rossum <guido@python.org>2000-12-19 16:48:13 (GMT)
commitfad81f08380080bb370c71c6cd0562e5329a4660 (patch)
tree2e9311483f520fb77d84768b93fed81dd0f6e100
parent68abe832b9761b1b506d6eaf5eb941c07d0735bc (diff)
downloadcpython-fad81f08380080bb370c71c6cd0562e5329a4660.zip
cpython-fad81f08380080bb370c71c6cd0562e5329a4660.tar.gz
cpython-fad81f08380080bb370c71c6cd0562e5329a4660.tar.bz2
Be explicit about scheme_chars -- string.letters is locale dependent
so we can't use it. While I'm at it, got rid of string module use. (Found several new hard special cases for a hypothetical conversion tool: from string import join, find, rfind; and a local assignment "find=string.find".)
-rw-r--r--Lib/urlparse.py38
1 files changed, 18 insertions, 20 deletions
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 929315e..d927b7e 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -4,10 +4,6 @@ See RFC 1808: "Relative Uniform Resource Locators", by R. Fielding,
UC Irvine, June 1995.
"""
-# Standard/builtin Python modules
-import string
-from string import join, split, rfind
-
# A classification of schemes ('' means apply by default)
uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'wais', 'file',
'https', 'shttp',
@@ -31,7 +27,10 @@ uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 'nntp', 'wais',
'file', 'prospero', '']
# Characters valid in scheme names
-scheme_chars = string.letters + string.digits + '+-.'
+scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+ '0123456789'
+ '+-.')
MAX_CACHE_SIZE = 20
_parse_cache = {}
@@ -54,29 +53,28 @@ def urlparse(url, scheme = '', allow_fragments = 1):
return cached
if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
clear_cache()
- find = string.find
netloc = path = params = query = fragment = ''
- i = find(url, ':')
+ i = url.find(':')
if i > 0:
if url[:i] == 'http': # optimize the common case
- scheme = string.lower(url[:i])
+ scheme = url[:i].lower()
url = url[i+1:]
if url[:2] == '//':
- i = find(url, '/', 2)
+ i = url.find('/', 2)
if i < 0:
i = len(url)
netloc = url[2:i]
url = url[i:]
if allow_fragments:
- i = string.rfind(url, '#')
+ i = url.rfind('#')
if i >= 0:
fragment = url[i+1:]
url = url[:i]
- i = find(url, '?')
+ i = url.find('?')
if i >= 0:
query = url[i+1:]
url = url[:i]
- i = find(url, ';')
+ i = url.find(';')
if i >= 0:
params = url[i+1:]
url = url[:i]
@@ -87,23 +85,23 @@ def urlparse(url, scheme = '', allow_fragments = 1):
if c not in scheme_chars:
break
else:
- scheme, url = string.lower(url[:i]), url[i+1:]
+ scheme, url = url[:i].lower(), url[i+1:]
if scheme in uses_netloc:
if url[:2] == '//':
- i = find(url, '/', 2)
+ i = url.find('/', 2)
if i < 0:
i = len(url)
netloc, url = url[2:i], url[i:]
if allow_fragments and scheme in uses_fragment:
- i = string.rfind(url, '#')
+ i = url.rfind('#')
if i >= 0:
url, fragment = url[:i], url[i+1:]
if scheme in uses_query:
- i = find(url, '?')
+ i = url.find('?')
if i >= 0:
url, query = url[:i], url[i+1:]
if scheme in uses_params:
- i = find(url, ';')
+ i = url.find(';')
if i >= 0:
url, params = url[:i], url[i+1:]
tuple = scheme, netloc, url, params, query, fragment
@@ -151,7 +149,7 @@ def urljoin(base, url, allow_fragments = 1):
if not path:
return urlunparse((scheme, netloc, bpath,
params, query or bquery, fragment))
- segments = split(bpath, '/')[:-1] + split(path, '/')
+ segments = bpath.split('/')[:-1] + path.split('/')
# XXX The stuff below is bogus in various ways...
if segments[-1] == '.':
segments[-1] = ''
@@ -171,7 +169,7 @@ def urljoin(base, url, allow_fragments = 1):
segments[-1] = ''
elif len(segments) >= 2 and segments[-1] == '..':
segments[-2:] = ['']
- return urlunparse((scheme, netloc, join(segments, '/'),
+ return urlunparse((scheme, netloc, '/'.join(segments),
params, query, fragment))
def urldefrag(url):
@@ -236,7 +234,7 @@ def test():
while 1:
line = fp.readline()
if not line: break
- words = string.split(line)
+ words = line.split()
if not words:
continue
url = words[0]