diff options
author | Fred Drake <fdrake@acm.org> | 2000-02-10 17:17:14 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 2000-02-10 17:17:14 (GMT) |
commit | 13a2c279c504ae58c20baba5f0b3d1d6c0a85ed3 (patch) | |
tree | 6942aa091af39fe239eff68b0bf1292eea26da29 /Lib/urllib2.py | |
parent | 857c4c36b962c6e74559e045c7fb43177dd5bcea (diff) | |
download | cpython-13a2c279c504ae58c20baba5f0b3d1d6c0a85ed3.zip cpython-13a2c279c504ae58c20baba5f0b3d1d6c0a85ed3.tar.gz cpython-13a2c279c504ae58c20baba5f0b3d1d6c0a85ed3.tar.bz2 |
Untabify to pass the -tt test.
Diffstat (limited to 'Lib/urllib2.py')
-rw-r--r-- | Lib/urllib2.py | 766 |
1 files changed, 383 insertions, 383 deletions
diff --git a/Lib/urllib2.py b/Lib/urllib2.py index f1b4113..c147560 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -148,57 +148,57 @@ class URLError(IOError): # URLError is a sub-type of IOError, but it doesn't share any of # the implementation. need to override __init__ and __str__ def __init__(self, reason): - self.reason = reason + self.reason = reason def __str__(self): - return '<urlopen error %s>' % self.reason + return '<urlopen error %s>' % self.reason class HTTPError(URLError, addinfourl): """Raised when HTTP error occurs, but also acts like non-error return""" def __init__(self, url, code, msg, hdrs, fp): - addinfourl.__init__(self, fp, hdrs, url) - self.code = code - self.msg = msg - self.hdrs = hdrs - self.fp = fp - # XXX - self.filename = url - + addinfourl.__init__(self, fp, hdrs, url) + self.code = code + self.msg = msg + self.hdrs = hdrs + self.fp = fp + # XXX + self.filename = url + def __str__(self): - return 'HTTP Error %s: %s' % (self.code, self.msg) + return 'HTTP Error %s: %s' % (self.code, self.msg) def __del__(self): - # XXX is this safe? what if user catches exception, then - # extracts fp and discards exception? - self.fp.close() + # XXX is this safe? what if user catches exception, then + # extracts fp and discards exception? + self.fp.close() class GopherError(URLError): pass class Request: def __init__(self, url, data=None, headers={}): - # unwrap('<URL:type://host/path>') --> 'type://host/path' - self.__original = unwrap(url) - self.type = None - # self.__r_type is what's left after doing the splittype - self.host = None - self.port = None + # unwrap('<URL:type://host/path>') --> 'type://host/path' + self.__original = unwrap(url) + self.type = None + # self.__r_type is what's left after doing the splittype + self.host = None + self.port = None self.data = data - self.headers = {} + self.headers = {} self.headers.update(headers) def __getattr__(self, attr): - # XXX this is a fallback mechanism to guard against these - # methods getting called in a non-standard order. this may be - # too complicated and/or unnecessary. - # XXX should the __r_XXX attributes be public? - if attr[:12] == '_Request__r_': - name = attr[12:] - if hasattr(Request, 'get_' + name): - getattr(self, 'get_' + name)() - return getattr(self, attr) - raise AttributeError, attr + # XXX this is a fallback mechanism to guard against these + # methods getting called in a non-standard order. this may be + # too complicated and/or unnecessary. + # XXX should the __r_XXX attributes be public? + if attr[:12] == '_Request__r_': + name = attr[12:] + if hasattr(Request, 'get_' + name): + getattr(self, 'get_' + name)() + return getattr(self, attr) + raise AttributeError, attr def add_data(self, data): self.data = data @@ -213,34 +213,34 @@ class Request: return self.__original def get_type(self): - if self.type is None: - self.type, self.__r_type = splittype(self.__original) - return self.type + if self.type is None: + self.type, self.__r_type = splittype(self.__original) + return self.type def get_host(self): - if self.host is None: - self.host, self.__r_host = splithost(self.__r_type) - if self.host: - self.host = unquote(self.host) - return self.host + if self.host is None: + self.host, self.__r_host = splithost(self.__r_type) + if self.host: + self.host = unquote(self.host) + return self.host def get_selector(self): - return self.__r_host + return self.__r_host def set_proxy(self, proxy): - self.__proxy = proxy - # XXX this code is based on urllib, but it doesn't seem - # correct. specifically, if the proxy has a port number then - # splittype will return the hostname as the type and the port - # will be include with everything else - self.type, self.__r_type = splittype(self.__proxy) - self.host, XXX = splithost(self.__r_type) - self.host = unquote(self.host) - self.__r_host = self.__original + self.__proxy = proxy + # XXX this code is based on urllib, but it doesn't seem + # correct. specifically, if the proxy has a port number then + # splittype will return the hostname as the type and the port + # will be include with everything else + self.type, self.__r_type = splittype(self.__proxy) + self.host, XXX = splithost(self.__r_type) + self.host = unquote(self.host) + self.__r_host = self.__original def add_header(self, key, val): - # useful for something like authentication - self.headers[key] = val + # useful for something like authentication + self.headers[key] = val class OpenerDirector: def __init__(self): @@ -302,21 +302,21 @@ class OpenerDirector: return result def open(self, fullurl, data=None): - # accept a URL or a Request object - if type(fullurl) == types.StringType: - req = Request(fullurl, data) + # accept a URL or a Request object + if type(fullurl) == types.StringType: + req = Request(fullurl, data) else: req = fullurl if data is not None: req.add_data(data) - assert isinstance(req, Request) # really only care about interface + assert isinstance(req, Request) # really only care about interface result = self._call_chain(self.handle_open, 'default', 'default_open', req) if result: return result - type_ = req.get_type() + type_ = req.get_type() result = self._call_chain(self.handle_open, type_, type_ + \ '_open', req) if result: @@ -350,11 +350,11 @@ def is_callable(obj): # not quite like builtin callable (which I didn't know existed), # not entirely sure it needs to be different if type(obj) in (types.BuiltinFunctionType, - types.BuiltinMethodType, types.LambdaType, - types.MethodType): - return 1 + types.BuiltinMethodType, types.LambdaType, + types.MethodType): + return 1 if type(obj) == types.InstanceType: - return hasattr(obj, '__call__') + return hasattr(obj, '__call__') return 0 def get_methods(inst): @@ -370,8 +370,8 @@ def get_methods(inst): if type(attr) == types.UnboundMethodType: methods[name] = 1 for name in dir(inst): - if is_callable(getattr(inst, name)): - methods[name] = 1 + if is_callable(getattr(inst, name)): + methods[name] = 1 return methods.keys() # XXX probably also want an abstract factory that knows things like @@ -423,7 +423,7 @@ class BaseHandler: class HTTPDefaultErrorHandler(BaseHandler): def http_error_default(self, req, fp, code, msg, hdrs): - raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) + raise HTTPError(req.get_full_url(), code, msg, hdrs, fp) class HTTPRedirectHandler(BaseHandler): # Implementation note: To avoid the server sending us into an @@ -461,114 +461,114 @@ class HTTPRedirectHandler(BaseHandler): class ProxyHandler(BaseHandler): def __init__(self, proxies=None): - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'has_key'), "proxies must be a mapping" - self.proxies = proxies - for type, url in proxies.items(): - setattr(self, '%s_open' % type, - lambda r, proxy=url, type=type, meth=self.proxy_open: \ - meth(r, proxy, type)) + if proxies is None: + proxies = getproxies() + assert hasattr(proxies, 'has_key'), "proxies must be a mapping" + self.proxies = proxies + for type, url in proxies.items(): + setattr(self, '%s_open' % type, + lambda r, proxy=url, type=type, meth=self.proxy_open: \ + meth(r, proxy, type)) def proxy_open(self, req, proxy, type): - orig_type = req.get_type() - req.set_proxy(proxy) - if orig_type == type: - # let other handlers take care of it - # XXX this only makes sense if the proxy is before the - # other handlers - return None - else: - # need to start over, because the other handlers don't - # grok the proxy's URL type - return self.parent.open(req) + orig_type = req.get_type() + req.set_proxy(proxy) + if orig_type == type: + # let other handlers take care of it + # XXX this only makes sense if the proxy is before the + # other handlers + return None + else: + # need to start over, because the other handlers don't + # grok the proxy's URL type + return self.parent.open(req) # feature suggested by Duncan Booth # XXX custom is not a good name class CustomProxy: # either pass a function to the constructor or override handle def __init__(self, proto, func=None, proxy_addr=None): - self.proto = proto - self.func = func - self.addr = proxy_addr + self.proto = proto + self.func = func + self.addr = proxy_addr def handle(self, req): - if self.func and self.func(req): - return 1 + if self.func and self.func(req): + return 1 def get_proxy(self): - return self.addr + return self.addr class CustomProxyHandler(BaseHandler): def __init__(self, *proxies): - self.proxies = {} + self.proxies = {} def proxy_open(self, req): - proto = req.get_type() - try: - proxies = self.proxies[proto] - except KeyError: - return None - for p in proxies: - if p.handle(req): - req.set_proxy(p.get_proxy()) - return self.parent.open(req) - return None + proto = req.get_type() + try: + proxies = self.proxies[proto] + except KeyError: + return None + for p in proxies: + if p.handle(req): + req.set_proxy(p.get_proxy()) + return self.parent.open(req) + return None def do_proxy(self, p, req): - p - return self.parent.open(req) + p + return self.parent.open(req) def add_proxy(self, cpo): - if self.proxies.has_key(cpo.proto): - self.proxies[cpo.proto].append(cpo) - else: - self.proxies[cpo.proto] = [cpo] + if self.proxies.has_key(cpo.proto): + self.proxies[cpo.proto].append(cpo) + else: + self.proxies[cpo.proto] = [cpo] class HTTPPasswordMgr: def __init__(self): - self.passwd = {} + self.passwd = {} def add_password(self, realm, uri, user, passwd): - # uri could be a single URI or a sequence - if type(uri) == types.StringType: - uri = [uri] - uri = tuple(map(self.reduce_uri, uri)) - if not self.passwd.has_key(realm): - self.passwd[realm] = {} - self.passwd[realm][uri] = (user, passwd) + # uri could be a single URI or a sequence + if type(uri) == types.StringType: + uri = [uri] + uri = tuple(map(self.reduce_uri, uri)) + if not self.passwd.has_key(realm): + self.passwd[realm] = {} + self.passwd[realm][uri] = (user, passwd) def find_user_password(self, realm, authuri): - domains = self.passwd.get(realm, {}) - authuri = self.reduce_uri(authuri) - for uris, authinfo in domains.items(): - for uri in uris: - if self.is_suburi(uri, authuri): - return authinfo - return None, None + domains = self.passwd.get(realm, {}) + authuri = self.reduce_uri(authuri) + for uris, authinfo in domains.items(): + for uri in uris: + if self.is_suburi(uri, authuri): + return authinfo + return None, None def reduce_uri(self, uri): - """Accept netloc or URI and extract only the netloc and path""" - parts = urlparse.urlparse(uri) - if parts[1]: - return parts[1], parts[2] or '/' - else: - return parts[2], '/' + """Accept netloc or URI and extract only the netloc and path""" + parts = urlparse.urlparse(uri) + if parts[1]: + return parts[1], parts[2] or '/' + else: + return parts[2], '/' def is_suburi(self, base, test): - """Check if test is below base in a URI tree - - Both args must be URIs in reduced form. - """ - if base == test: - return 1 - if base[0] != test[0]: - return 0 - common = os.path.commonprefix((base[1], test[1])) - if len(common) == len(base[1]): - return 1 - return 0 - + """Check if test is below base in a URI tree + + Both args must be URIs in reduced form. + """ + if base == test: + return 1 + if base[0] != test[0]: + return 0 + common = os.path.commonprefix((base[1], test[1])) + if len(common) == len(base[1]): + return 1 + return 0 + class HTTPBasicAuthHandler(BaseHandler): rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"') @@ -579,15 +579,15 @@ class HTTPBasicAuthHandler(BaseHandler): def __init__(self): self.passwd = HTTPPasswordMgr() - self.add_password = self.passwd.add_password - self.__current_realm = None - # if __current_realm is not None, then the server must have - # refused our name/password and is asking for authorization - # again. must be careful to set it to None on successful - # return. + self.add_password = self.passwd.add_password + self.__current_realm = None + # if __current_realm is not None, then the server must have + # refused our name/password and is asking for authorization + # again. must be careful to set it to None on successful + # return. def http_error_401(self, req, fp, code, msg, headers): - # XXX could be mult. headers + # XXX could be mult. headers authreq = headers.get('www-authenticate', None) if authreq: mo = HTTPBasicAuthHandler.rx.match(authreq) @@ -597,23 +597,23 @@ class HTTPBasicAuthHandler(BaseHandler): return self.retry_http_basic_auth(req, realm) def retry_http_basic_auth(self, req, realm): - if self.__current_realm is None: - self.__current_realm = realm - else: - self.__current_realm = realm - return None - # XXX host isn't really the correct URI? + if self.__current_realm is None: + self.__current_realm = realm + else: + self.__current_realm = realm + return None + # XXX host isn't really the correct URI? host = req.get_host() user,pw = self.passwd.find_user_password(realm, host) if pw: - raw = "%s:%s" % (user, pw) - auth = string.strip(base64.encodestring(raw)) + raw = "%s:%s" % (user, pw) + auth = string.strip(base64.encodestring(raw)) req.add_header('Authorization', 'Basic %s' % auth) resp = self.parent.open(req) - self.__current_realm = None - return resp + self.__current_realm = None + return resp else: - self.__current_realm = None + self.__current_realm = None return None class HTTPDigestAuthHandler(BaseHandler): @@ -624,111 +624,111 @@ class HTTPDigestAuthHandler(BaseHandler): """ def __init__(self): - self.passwd = HTTPPasswordMgr() - self.add_password = self.passwd.add_password - self.__current_realm = None + self.passwd = HTTPPasswordMgr() + self.add_password = self.passwd.add_password + self.__current_realm = None def http_error_401(self, req, fp, code, msg, headers): - # XXX could be mult. headers - authreq = headers.get('www-authenticate', None) - if authreq: - kind = string.split(authreq)[0] - if kind == 'Digest': - return self.retry_http_digest_auth(req, authreq) + # XXX could be mult. headers + authreq = headers.get('www-authenticate', None) + if authreq: + kind = string.split(authreq)[0] + if kind == 'Digest': + return self.retry_http_digest_auth(req, authreq) def retry_http_digest_auth(self, req, auth): - token, challenge = string.split(auth, ' ', 1) - chal = parse_keqv_list(parse_http_list(challenge)) - auth = self.get_authorization(req, chal) - if auth: - req.add_header('Authorization', 'Digest %s' % auth) - resp = self.parent.open(req) - self.__current_realm = None - return resp + token, challenge = string.split(auth, ' ', 1) + chal = parse_keqv_list(parse_http_list(challenge)) + auth = self.get_authorization(req, chal) + if auth: + req.add_header('Authorization', 'Digest %s' % auth) + resp = self.parent.open(req) + self.__current_realm = None + return resp def get_authorization(self, req, chal): - try: - realm = chal['realm'] - nonce = chal['nonce'] - algorithm = chal.get('algorithm', 'MD5') - # mod_digest doesn't send an opaque, even though it isn't - # supposed to be optional - opaque = chal.get('opaque', None) - except KeyError: - return None - - if self.__current_realm is None: - self.__current_realm = realm - else: - self.__current_realm = realm - return None - - H, KD = self.get_algorithm_impls(algorithm) - if H is None: - return None - - user, pw = self.passwd.find_user_password(realm, - req.get_full_url()) - if user is None: - return None - - # XXX not implemented yet - if req.has_data(): - entdig = self.get_entity_digest(req.get_data(), chal) - else: - entdig = None - - A1 = "%s:%s:%s" % (user, realm, pw) - A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET', - # XXX selector: what about proxies and full urls - req.get_selector()) - respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) - # XXX should the partial digests be encoded too? - - base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ - 'response="%s"' % (user, realm, nonce, req.get_selector(), - respdig) - if opaque: - base = base + ', opaque="%s"' % opaque - if entdig: - base = base + ', digest="%s"' % entdig - if algorithm != 'MD5': - base = base + ', algorithm="%s"' % algorithm - return base + try: + realm = chal['realm'] + nonce = chal['nonce'] + algorithm = chal.get('algorithm', 'MD5') + # mod_digest doesn't send an opaque, even though it isn't + # supposed to be optional + opaque = chal.get('opaque', None) + except KeyError: + return None + + if self.__current_realm is None: + self.__current_realm = realm + else: + self.__current_realm = realm + return None + + H, KD = self.get_algorithm_impls(algorithm) + if H is None: + return None + + user, pw = self.passwd.find_user_password(realm, + req.get_full_url()) + if user is None: + return None + + # XXX not implemented yet + if req.has_data(): + entdig = self.get_entity_digest(req.get_data(), chal) + else: + entdig = None + + A1 = "%s:%s:%s" % (user, realm, pw) + A2 = "%s:%s" % (req.has_data() and 'POST' or 'GET', + # XXX selector: what about proxies and full urls + req.get_selector()) + respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) + # XXX should the partial digests be encoded too? + + base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ + 'response="%s"' % (user, realm, nonce, req.get_selector(), + respdig) + if opaque: + base = base + ', opaque="%s"' % opaque + if entdig: + base = base + ', digest="%s"' % entdig + if algorithm != 'MD5': + base = base + ', algorithm="%s"' % algorithm + return base def get_algorithm_impls(self, algorithm): - # lambdas assume digest modules are imported at the top level - if algorithm == 'MD5': - H = lambda x, e=encode_digest:e(md5.new(x).digest()) - elif algorithm == 'SHA': - H = lambda x, e=encode_digest:e(sha.new(x).digest()) - # XXX MD5-sess - KD = lambda s, d, H=H: H("%s:%s" % (s, d)) - return H, KD + # lambdas assume digest modules are imported at the top level + if algorithm == 'MD5': + H = lambda x, e=encode_digest:e(md5.new(x).digest()) + elif algorithm == 'SHA': + H = lambda x, e=encode_digest:e(sha.new(x).digest()) + # XXX MD5-sess + KD = lambda s, d, H=H: H("%s:%s" % (s, d)) + return H, KD def get_entity_digest(self, data, chal): - # XXX not implemented yet - return None + # XXX not implemented yet + return None def encode_digest(digest): hexrep = [] for c in digest: - n = (ord(c) >> 4) & 0xf - hexrep.append(hex(n)[-1]) - n = ord(c) & 0xf - hexrep.append(hex(n)[-1]) + n = (ord(c) >> 4) & 0xf + hexrep.append(hex(n)[-1]) + n = ord(c) & 0xf + hexrep.append(hex(n)[-1]) return string.join(hexrep, '') - + class HTTPHandler(BaseHandler): def http_open(self, req): # XXX devise a new mechanism to specify user/password - host = req.get_host() + host = req.get_host() if not host: raise URLError('no host given') h = httplib.HTTP(host) # will parse host:port -## h.set_debuglevel(1) +## h.set_debuglevel(1) if req.has_data(): data = req.get_data() h.putrequest('POST', req.get_selector()) @@ -740,8 +740,8 @@ class HTTPHandler(BaseHandler): h.putheader('Host', host) for args in self.parent.addheaders: apply(h.putheader, args) - for k, v in req.headers.items(): - h.putheader(k, v) + for k, v in req.headers.items(): + h.putheader(k, v) h.endheaders() if req.has_data(): h.send(data + '\r\n') @@ -761,17 +761,17 @@ class HTTPHandler(BaseHandler): class UnknownHandler(BaseHandler): def unknown_open(self, req): - type = req.get_type() + type = req.get_type() raise URLError('unknown url type: %s' % type) def parse_keqv_list(l): """Parse list of key=value strings where keys are not duplicated.""" parsed = {} for elt in l: - k, v = string.split(elt, '=', 1) - if v[0] == '"' and v[-1] == '"': - v = v[1:-1] - parsed[k] = v + k, v = string.split(elt, '=', 1) + if v[0] == '"' and v[-1] == '"': + v = v[1:-1] + parsed[k] = v return parsed def parse_http_list(s): @@ -789,104 +789,104 @@ def parse_http_list(s): inquote = 0 start = 0 while i < end: - cur = s[i:] - c = string.find(cur, ',') - q = string.find(cur, '"') - if c == -1: - list.append(s[start:]) - break - if q == -1: - if inquote: - raise ValueError, "unbalanced quotes" - else: - list.append(s[start:i+c]) - i = i + c + 1 - continue - if inquote: - if q < c: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - inquote = 0 - else: - i = i + q - else: - if c < q: - list.append(s[start:i+c]) - i = i + c + 1 - start = i - else: - inquote = 1 - i = i + q + 1 + cur = s[i:] + c = string.find(cur, ',') + q = string.find(cur, '"') + if c == -1: + list.append(s[start:]) + break + if q == -1: + if inquote: + raise ValueError, "unbalanced quotes" + else: + list.append(s[start:i+c]) + i = i + c + 1 + continue + if inquote: + if q < c: + list.append(s[start:i+c]) + i = i + c + 1 + start = i + inquote = 0 + else: + i = i + q + else: + if c < q: + list.append(s[start:i+c]) + i = i + c + 1 + start = i + else: + inquote = 1 + i = i + q + 1 return map(string.strip, list) class FileHandler(BaseHandler): # Use local file or FTP depending on form of URL def file_open(self, req): - url = req.get_selector() - if url[:2] == '//' and url[2:3] != '/': - req.type = 'ftp' - return self.parent.open(req) - else: - return self.open_local_file(req) + url = req.get_selector() + if url[:2] == '//' and url[2:3] != '/': + req.type = 'ftp' + return self.parent.open(req) + else: + return self.open_local_file(req) # names for the localhost names = None def get_names(self): - if FileHandler.names is None: - FileHandler.names = (socket.gethostbyname('localhost'), - socket.gethostbyname(socket.gethostname())) - return FileHandler.names + if FileHandler.names is None: + FileHandler.names = (socket.gethostbyname('localhost'), + socket.gethostbyname(socket.gethostname())) + return FileHandler.names # not entirely sure what the rules are here def open_local_file(self, req): - mtype = mimetypes.guess_type(req.get_selector())[0] - headers = mimetools.Message(StringIO('Content-Type: %s\n' \ - % (mtype or 'text/plain'))) - host = req.get_host() - file = req.get_selector() - if host: - host, port = splitport(host) - if not host or \ - (not port and socket.gethostbyname(host) in self.get_names()): - return addinfourl(open(url2pathname(file), 'rb'), - headers, 'file:'+file) - raise URLError('file not on local host') + mtype = mimetypes.guess_type(req.get_selector())[0] + headers = mimetools.Message(StringIO('Content-Type: %s\n' \ + % (mtype or 'text/plain'))) + host = req.get_host() + file = req.get_selector() + if host: + host, port = splitport(host) + if not host or \ + (not port and socket.gethostbyname(host) in self.get_names()): + return addinfourl(open(url2pathname(file), 'rb'), + headers, 'file:'+file) + raise URLError('file not on local host') class FTPHandler(BaseHandler): def ftp_open(self, req): - host = req.get_host() - if not host: - raise IOError, ('ftp error', 'no host given') - # XXX handle custom username & password - host = socket.gethostbyname(host) - host, port = splitport(host) - if port is None: - port = ftplib.FTP_PORT - path, attrs = splitattr(req.get_selector()) - path = unquote(path) - dirs = string.splitfields(path, '/') - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: - dirs = dirs[1:] - user = passwd = '' # XXX - try: - fw = self.connect_ftp(user, passwd, host, port, dirs) - type = file and 'I' or 'D' - for attr in attrs: - attr, value = splitattr(attr) - if string.lower(attr) == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = string.upper(value) - fp, retrlen = fw.retrfile(file, type) - if retrlen is not None and retrlen >= 0: - sf = StringIO('Content-Length: %d\n' % retrlen) - headers = mimetools.Message(sf) - else: - headers = noheaders() - return addinfourl(fp, headers, req.get_full_url()) - except ftplib.all_errors, msg: - raise IOError, ('ftp error', msg), sys.exc_info()[2] + host = req.get_host() + if not host: + raise IOError, ('ftp error', 'no host given') + # XXX handle custom username & password + host = socket.gethostbyname(host) + host, port = splitport(host) + if port is None: + port = ftplib.FTP_PORT + path, attrs = splitattr(req.get_selector()) + path = unquote(path) + dirs = string.splitfields(path, '/') + dirs, file = dirs[:-1], dirs[-1] + if dirs and not dirs[0]: + dirs = dirs[1:] + user = passwd = '' # XXX + try: + fw = self.connect_ftp(user, passwd, host, port, dirs) + type = file and 'I' or 'D' + for attr in attrs: + attr, value = splitattr(attr) + if string.lower(attr) == 'type' and \ + value in ('a', 'A', 'i', 'I', 'd', 'D'): + type = string.upper(value) + fp, retrlen = fw.retrfile(file, type) + if retrlen is not None and retrlen >= 0: + sf = StringIO('Content-Length: %d\n' % retrlen) + headers = mimetools.Message(sf) + else: + headers = noheaders() + return addinfourl(fp, headers, req.get_full_url()) + except ftplib.all_errors, msg: + raise IOError, ('ftp error', msg), sys.exc_info()[2] def connect_ftp(self, user, passwd, host, port, dirs): fw = ftpwrapper(user, passwd, host, port, dirs) @@ -901,13 +901,13 @@ class CacheFTPHandler(FTPHandler): self.timeout = {} self.soonest = 0 self.delay = 60 - self.max_conns = 16 + self.max_conns = 16 def setTimeout(self, t): self.delay = t def setMaxConns(self, m): - self.max_conns = m + self.max_conns = m def connect_ftp(self, user, passwd, host, port, dirs): key = user, passwd, host, port @@ -916,11 +916,11 @@ class CacheFTPHandler(FTPHandler): else: self.cache[key] = ftpwrapper(user, passwd, host, port, dirs) self.timeout[key] = time.time() + self.delay - self.check_cache() + self.check_cache() return self.cache[key] def check_cache(self): - # first check for old ones + # first check for old ones t = time.time() if self.soonest <= t: for k, v in self.timeout.items(): @@ -931,56 +931,56 @@ class CacheFTPHandler(FTPHandler): self.soonest = min(self.timeout.values()) # then check the size - if len(self.cache) == self.max_conns: - for k, v in self.timeout.items(): - if v == self.soonest: - del self.cache[k] - del self.timeout[k] - break - self.soonest = min(self.timeout.values()) + if len(self.cache) == self.max_conns: + for k, v in self.timeout.items(): + if v == self.soonest: + del self.cache[k] + del self.timeout[k] + break + self.soonest = min(self.timeout.values()) class GopherHandler(BaseHandler): def gopher_open(self, req): - host = req.get_host() - if not host: - raise GopherError('no host given') - host = unquote(host) - selector = req.get_selector() - type, selector = splitgophertype(selector) - selector, query = splitquery(selector) - selector = unquote(selector) - if query: - query = unquote(query) - fp = gopherlib.send_query(selector, query, host) - else: - fp = gopherlib.send_selector(selector, host) - return addinfourl(fp, noheaders(), req.get_full_url()) + host = req.get_host() + if not host: + raise GopherError('no host given') + host = unquote(host) + selector = req.get_selector() + type, selector = splitgophertype(selector) + selector, query = splitquery(selector) + selector = unquote(selector) + if query: + query = unquote(query) + fp = gopherlib.send_query(selector, query, host) + else: + fp = gopherlib.send_selector(selector, host) + return addinfourl(fp, noheaders(), req.get_full_url()) #bleck! don't use this yet class OpenerFactory: default_handlers = [UnknownHandler, HTTPHandler, - HTTPDefaultErrorHandler, HTTPRedirectHandler, - FTPHandler, FileHandler] + HTTPDefaultErrorHandler, HTTPRedirectHandler, + FTPHandler, FileHandler] proxy_handlers = [ProxyHandler] handlers = [] replacement_handlers = [] def add_proxy_handler(self, ph): - self.proxy_handlers = self.proxy_handlers + [ph] + self.proxy_handlers = self.proxy_handlers + [ph] def add_handler(self, h): - self.handlers = self.handlers + [h] + self.handlers = self.handlers + [h] def replace_handler(self, h): - pass + pass def build_opener(self): - opener = OpenerDirectory() - for ph in self.proxy_handlers: - if type(ph) == types.ClassType: - ph = ph() - opener.add_handler(ph) + opener = OpenerDirectory() + for ph in self.proxy_handlers: + if type(ph) == types.ClassType: + ph = ph() + opener.add_handler(ph) if __name__ == "__main__": # XXX some of the test code depends on machine configurations that @@ -993,24 +993,24 @@ if __name__ == "__main__": else: localhost = None urls = [ - # Thanks to Fred for finding these! - 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex', - 'gopher://gopher.vt.edu:10010/10/33', + # Thanks to Fred for finding these! + 'gopher://gopher.lib.ncsu.edu/11/library/stacks/Alex', + 'gopher://gopher.vt.edu:10010/10/33', - 'file:/etc/passwd', - 'file://nonsensename/etc/passwd', - 'ftp://www.python.org/pub/tmp/httplib.py', + 'file:/etc/passwd', + 'file://nonsensename/etc/passwd', + 'ftp://www.python.org/pub/tmp/httplib.py', 'ftp://www.python.org/pub/tmp/imageop.c', 'ftp://www.python.org/pub/tmp/blat', - 'http://www.espn.com/', # redirect - 'http://www.python.org/Spanish/Inquistion/', - ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py', - 'query=pythonistas&querytype=simple&casefold=yes&req=search'), - 'http://www.python.org/', + 'http://www.espn.com/', # redirect + 'http://www.python.org/Spanish/Inquistion/', + ('http://grail.cnri.reston.va.us/cgi-bin/faqw.py', + 'query=pythonistas&querytype=simple&casefold=yes&req=search'), + 'http://www.python.org/', 'ftp://prep.ai.mit.edu/welcome.msg', 'ftp://www.python.org/pub/tmp/figure.prn', 'ftp://www.python.org/pub/tmp/interp.pl', - 'http://checkproxy.cnri.reston.va.us/test/test.html', + 'http://checkproxy.cnri.reston.va.us/test/test.html', ] if localhost is not None: @@ -1034,10 +1034,10 @@ if __name__ == "__main__": # XXX try out some custom proxy objects too! def at_cnri(req): - host = req.get_host() - print host - if host[-18:] == '.cnri.reston.va.us': - return 1 + host = req.get_host() + print host + if host[-18:] == '.cnri.reston.va.us': + return 1 p = CustomProxy('http', at_cnri, 'proxy.cnri.reston.va.us') ph = CustomProxyHandler(p) @@ -1052,9 +1052,9 @@ if __name__ == "__main__": try: f = urlopen(url, req) except IOError, err: - print "IOError:", err - except socket.error, err: - print "socket.error:", err + print "IOError:", err + except socket.error, err: + print "socket.error:", err else: buf = f.read() f.close() |