From 1d99433a58c8c69caa734acb884f274663885a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= Date: Sun, 3 Dec 2000 18:30:10 +0000 Subject: Convert Unicode strings to byte strings before passing them into specific protocols. Closes bug #119822. --- Lib/urllib.py | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/Lib/urllib.py b/Lib/urllib.py index 8b3c924..badfa0e 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -26,9 +26,9 @@ import string import socket import os import sys +import types - -__version__ = '1.13' # XXX This version is not always updated :-( +__version__ = '1.14' # XXX This version is not always updated :-( MAXFTPCACHE = 10 # Trim the ftp cache beyond this size @@ -136,23 +136,23 @@ class URLopener: # External interface def open(self, fullurl, data=None): """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(fullurl) + fullurl = unwrap(toBytes(fullurl)) if self.tempcache and self.tempcache.has_key(fullurl): filename, headers = self.tempcache[fullurl] fp = open(filename, 'rb') return addinfourl(fp, headers, fullurl) - type, url = splittype(fullurl) - if not type: - type = 'file' - if self.proxies.has_key(type): - proxy = self.proxies[type] - type, proxyhost = splittype(proxy) + urltype, url = splittype(fullurl) + if not urltype: + urltype = 'file' + if self.proxies.has_key(urltype): + proxy = self.proxies[urltype] + urltype, proxyhost = splittype(proxy) host, selector = splithost(proxyhost) url = (host, fullurl) # Signal special case to open_*() else: proxy = None - name = 'open_' + type - self.type = type + name = 'open_' + urltype + self.type = urltype if '-' in name: # replace - with _ name = string.join(string.split(name, '-'), '_') @@ -183,7 +183,7 @@ class URLopener: def retrieve(self, url, filename=None, reporthook=None, data=None): """retrieve(url) returns (filename, None) for a local object or (tempfilename, headers) for a remote object.""" - url = unwrap(url) + url = unwrap(toBytes(url)) if self.tempcache and self.tempcache.has_key(url): return self.tempcache[url] type, url1 = splittype(url) @@ -238,7 +238,7 @@ class URLopener: """Use HTTP protocol.""" import httplib user_passwd = None - if type(url) is type(""): + if type(url) is types.StringType: host, selector = splithost(url) if host: user_passwd, host = splituser(host) @@ -313,7 +313,7 @@ class URLopener: """Use HTTPS protocol.""" import httplib user_passwd = None - if type(url) is type(""): + if type(url) in types.StringTypes: host, selector = splithost(url) if host: user_passwd, host = splituser(host) @@ -852,6 +852,17 @@ def basejoin(base, url): # unquote('abc%20def') -> 'abc def' # quote('abc def') -> 'abc%20def') +def toBytes(url): + """toBytes(u"URL") --> 'URL'.""" + # Most URL schemes require ASCII. If that changes, the conversion + # can be relaxed + if type(url) is types.UnicodeType: + try: + url = url.encode("ASCII") + except UnicodeError: + raise UnicodeError("URL "+repr(url)+" contains non-ASCII characters") + return url + def unwrap(url): """unwrap('') --> 'type://host/path'.""" url = string.strip(url) -- cgit v0.12