diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2000-12-03 18:30:10 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2000-12-03 18:30:10 (GMT) |
commit | 1d99433a58c8c69caa734acb884f274663885a17 (patch) | |
tree | 32a4d9d9b7845dbdbc07debb94e8d275d93ec40b | |
parent | 57657bce9492ac8f4ec273e19a7e0357f028b036 (diff) | |
download | cpython-1d99433a58c8c69caa734acb884f274663885a17.zip cpython-1d99433a58c8c69caa734acb884f274663885a17.tar.gz cpython-1d99433a58c8c69caa734acb884f274663885a17.tar.bz2 |
Convert Unicode strings to byte strings before passing them into specific
protocols. Closes bug #119822.
-rw-r--r-- | Lib/urllib.py | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/Lib/urllib.py b/Lib/urllib.py index 8b3c924..badfa0e 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -26,9 +26,9 @@ import string import socket import os import sys +import types - -__version__ = '1.13' # XXX This version is not always updated :-( +__version__ = '1.14' # XXX This version is not always updated :-( MAXFTPCACHE = 10 # Trim the ftp cache beyond this size @@ -136,23 +136,23 @@ class URLopener: # External interface def open(self, fullurl, data=None): """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(fullurl) + fullurl = unwrap(toBytes(fullurl)) if self.tempcache and self.tempcache.has_key(fullurl): filename, headers = self.tempcache[fullurl] fp = open(filename, 'rb') return addinfourl(fp, headers, fullurl) - type, url = splittype(fullurl) - if not type: - type = 'file' - if self.proxies.has_key(type): - proxy = self.proxies[type] - type, proxyhost = splittype(proxy) + urltype, url = splittype(fullurl) + if not urltype: + urltype = 'file' + if self.proxies.has_key(urltype): + proxy = self.proxies[urltype] + urltype, proxyhost = splittype(proxy) host, selector = splithost(proxyhost) url = (host, fullurl) # Signal special case to open_*() else: proxy = None - name = 'open_' + type - self.type = type + name = 'open_' + urltype + self.type = urltype if '-' in name: # replace - with _ name = string.join(string.split(name, '-'), '_') @@ -183,7 +183,7 @@ class URLopener: def retrieve(self, url, filename=None, reporthook=None, data=None): """retrieve(url) returns (filename, None) for a local object or (tempfilename, headers) for a remote object.""" - url = unwrap(url) + url = unwrap(toBytes(url)) if self.tempcache and self.tempcache.has_key(url): return self.tempcache[url] type, url1 = splittype(url) @@ -238,7 +238,7 @@ class URLopener: """Use HTTP protocol.""" import httplib user_passwd = None - if type(url) is type(""): + if type(url) is types.StringType: host, selector = splithost(url) if host: user_passwd, host = splituser(host) @@ -313,7 +313,7 @@ class URLopener: """Use HTTPS protocol.""" import httplib user_passwd = None - if type(url) is type(""): + if type(url) in types.StringTypes: host, selector = splithost(url) if host: user_passwd, host = splituser(host) @@ -852,6 +852,17 @@ def basejoin(base, url): # unquote('abc%20def') -> 'abc def' # quote('abc def') -> 'abc%20def') +def toBytes(url): + """toBytes(u"URL") --> 'URL'.""" + # Most URL schemes require ASCII. If that changes, the conversion + # can be relaxed + if type(url) is types.UnicodeType: + try: + url = url.encode("ASCII") + except UnicodeError: + raise UnicodeError("URL "+repr(url)+" contains non-ASCII characters") + return url + def unwrap(url): """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" url = string.strip(url) |