diff options
author | Moshe Zadka <moshez@math.huji.ac.il> | 2001-03-01 08:40:42 (GMT) |
---|---|---|
committer | Moshe Zadka <moshez@math.huji.ac.il> | 2001-03-01 08:40:42 (GMT) |
commit | 8a18e99008c28156a7ba701ca8d6824a50fb0a9e (patch) | |
tree | 301cbec622d1abeaa992063babaaee5a6cfb540a | |
parent | 251083142fe4c114753cef12c37589bd55761912 (diff) | |
download | cpython-8a18e99008c28156a7ba701ca8d6824a50fb0a9e.zip cpython-8a18e99008c28156a7ba701ca8d6824a50fb0a9e.tar.gz cpython-8a18e99008c28156a7ba701ca8d6824a50fb0a9e.tar.bz2 |
Checking in patch 404826 -- urllib2 enhancements and documentations.
(please not that the library reference does *not* include the
urllib2 documnetation -- that will wiat for Fred)
-rw-r--r-- | Doc/lib/liburllib2.tex | 501 | ||||
-rw-r--r-- | Lib/urllib2.py | 163 |
2 files changed, 616 insertions, 48 deletions
diff --git a/Doc/lib/liburllib2.tex b/Doc/lib/liburllib2.tex new file mode 100644 index 0000000..90243a5 --- /dev/null +++ b/Doc/lib/liburllib2.tex @@ -0,0 +1,501 @@ +\section{\module{urllib2} --- + extensible library for opening URLs} + +\declaremodule{standard}{urllib2} + +\moduleauthor{Jeremy Hylton}{jhylton@users.sourceforge.net} +\sectionauthor{Moshe Zadka}{moshez@users.sourceforge.net} + +\modulesynopsis{An extensible library for opening URLs using a variety of + protocols} + +The \module{urllib2} module defines functions and classes which help +in opening URLs (mostly HTTP) in a complex world -- basic and digest +authentication, redirections and more. + +The \module{urllib2} module defines the following functions: + +\begin{funcdesc}{urlopen}{url\optional{, data}} +Open the url \var{url}, which can either a string or a \class{Request} +object (currently the code checks that it really is a \class{Request} +instance, or an instance of a subclass of \class{Request}. + +\var{data} should be a string, which specifies additional data to +send to the server. In HTTP requests, which are the only ones that +support \var{data}, it should be a buffer in the format of +\code{application/x-www-form-urlencoded}, for example one returned +from \function{urllib.urlencode}. + +This function returns a file-like object with two additional methods: + +\begin{itemize} + + \item \code{geturl()} --- return the URL of the resource retrieved + \item \code{info()} --- return the meta-information of the page, as + a dictionary-like object +\end{itemize} + +Raises \exception{URLError} on errors. +\end{funcdesc} + +\begin{funcdesc}{install_opener}{opener} +Install a \class{OpenerDirector} instance as the default opener. +The code does not check for a real \class{OpenerDirector}, and any +class with the appropriate interface will work. +\end{funcdesc} + +\begin{funcdesc}{build_opener}{\optional{handler\optional{, + handler\optional{, ...}}}} +Return an \class{OpenerDirector} instance, which chains the +handlers in the order given. \var{handler}s can be either instances +of \class{BaseHandler}, or subclasses of \class{BaseHandler} (in +which case it must be possible to call the constructor without +any parameters. Instances of the following classes will be in +the front of the \var{handler}s, unless the \var{handler}s contain +them, instances of them or subclasses of them: + +\code{ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, + HTTPRedirectHandler, FTPHandler, FileHandler} + +If the Python installation has SSL support (\code{socket.ssl} exists), +\class{HTTPSHandler} will also be added. +\end{funcdesc} + +\begin{excdesc}{URLError} +The error handlers raise when they run into a problem. It is a subclass +of \exception{IOError}. +\end{excdesc} + +\begin{excdesc}{HTTPError} +A subclass of \exception{URLError}, it can also function as a +non-exceptional file-like return value (the same thing that \function{urlopen} +returns). This is useful when handling exotic HTTP errors, such as +requests for authentications. +\end{excdesc} + +\begin{excdesc}{GopherError} +A subclass of \exception{URLError}, this is the error raised by the +Gopher handler. +\end{excdesc} + +\begin{classdesc}{Request}{url\optional{data, \optional{, headers}}} +This class is an abstraction of a URL request. + +\var{url} should be a string which is a valid URL. For descrtion +of \var{data} see the \method{add_data} description. +\var{headers} should be a dictionary, and will be treated as if +\method{add_header} was called with each key and value as arguments. +\end{classdesc} + +The following methods describe all of \class{Request}'s public interface, +and so all must be overridden in subclasses. + +\begin{methoddesc}[Request]{add_data}{data} +Set the \class{Request} data to \var{data} is ignored +by all handlers except HTTP handlers --- and there it should be an +\code{application/x-www-form-encoded} buffer, and will change the +request to be \code{POST} rather then \code{GET}. +\end{methoddesc} + +\begin{methoddesc}[Request]{has_data}{data} +Return whether the instance has a non-\code{None} data. +\end{methoddesc} + +\begin{methoddesc}[Request]{get_data}{data} +Return the instance's data. +\end{methoddesc} + +\begin{methoddesc}[Request]{add_header}{key, val} +Add another header to the request. Headers +are currently ignored by all handlers except HTTP handlers, where they +are added to the list of headers sent to the server. Note that there +cannot be more then one header with the same name, and later calls +will overwrite previous calls in case the \var{key} collides. Currently, +this is no loss of HTTP functionality, since all headers which have meaning +when used more then once have a (header-specific) way of gaining the +same functionality using only one header. +\end{methoddesc} + +\begin{methoddesc}[Request]{get_full_url}{} +Return the URL given in the constructor. +\end{methoddesc} + +\begin{methoddesc}[Request]{get_type}{} +Return the type of the URL --- also known as the schema. +\end{methoddesc} + +\begin{methoddesc}[Request]{get_host}{} +Return the host to which connection will be made. +\end{methoddesc} + +\begin{methoddesc}[Request]{get_selector}{} +Return the selector --- the part of the URL that is sent to +the server. +\end{methoddesc} + +\begin{methoddesc}[Request]{set_proxy}{host, type} +Make the request by connecting to a proxy server. The \var{host} and \var{type} +will replace those of the instance, and the instance's selector will be +the original URL given in the constructor. +\end{methoddesc} + +\begin{classdesc}{OpenerDirector}{} +The \class{OpenerDirector} class opens URLs via \class{BaseHandler}s chained +together. It manages the chaining of handlers, and recovery from errors. +\end{classdesc} + +\begin{methoddesc}[OpenerDirector]{add_handler}{handler} +\var{handler} should be an instance of \class{BaseHandler}. The following +methods are searched, and added to the possible chains. + +\begin{itemize} + \item \code{{\em protocol}_open} --- signal that the handler knows how + to open {\em protocol} URLs. + \item \code{{\em protocol}_error_{\em type}} -- signal that the handler + knows how to handle {\em type} + errors from {\em protocol}. +\end{itemize} + +\end{methoddesc} + +\begin{methoddesc}[OpenerDirector]{close}{} +Explicitly break cycles, and delete all the handlers. +Because the \class{OpenerDirector} needs to know the registered handlers, +and a handler needs to know who the \class{OpenerDirector} who called +it is, there is a reference cycles. Even though recent versions of Python +have cycle-collection, it is sometimes preferable to explicitly break +the cycles. +\end{methoddesc} + +\begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}} +Open the given \var{url}. (which can be a request object or a string), +optionally passing the given \var{data}. +Arguments, return values and exceptions raised are the same as those +of \function{urlopen} (which simply calls the \method{open()} method +on the default installed \class{OpenerDirector}. +\end{methoddesc} + +\begin{methoddesc}[OpenerDirector]{error}{proto\optional{, arg\optional{, ...}}} +Handle an error in a given protocol. The HTTP protocol is special cased to +use the code as the error. This will call the registered error handlers +for the given protocol with the given arguments (which are protocol specific). + +Return values and exceptions raised are the same as those +of \function{urlopen}. +\end{methoddesc} + +\begin{classdesc}{BaseHandler}{} +This is the base class for all registered handlers --- and handles only +the simple mechanics of registration. +\end{classdesc} + +\begin{methoddesc}[BaseHandler]{add_parent}{director} +Add a director as parent. +\end{methoddesc} + +\begin{methoddesc}[BaseHandler]{close}{} +Remove any parents. +\end{methoddesc} + +The following members and methods should be used only be classes derived +from \class{BaseHandler}: + +\begin{memberdesc}[BaseHandler]{parent} +A valid \class{OpenerDirector}, which can be used to open using a different +protocol, or handle errors. +\end{memberdesc} + +\begin{methoddesc}[BaseHandler]{default_open}{req} +This method is {\em not} defined in \class{BaseHandler}, but subclasses +should define it if they want to catch all URLs. + +This method, if exists, will be called by the \member{parent} +\class{OpenerDirector}. It should return a file-like object as described +in the return value of the \method{open} of \class{OpenerDirector} or +\code{None}. It should raise \exception{URLError}, unless a truly exceptional +thing happens (for example, \exception{MemoryError} should not be mapped +to \exception{URLError}. + +This method will be called before any protocol-specific open method. +\end{methoddesc} + +\begin{methoddesc}[BaseHandler]{{\em protocol}_open}{req} +This method is {\em not} defined in \class{BaseHandler}, but subclasses +should define it if they want to handle URLs with the given protocol. + +This method, if exists, will be called by the \member{parent} +\class{OpenerDirector}. Return values should be the same as for +\method{default_open}. +\end{methoddesc} + +\begin{methoddesc}[BaseHandler]{unknown_open}{req} +This method is {\em not} defined in \class{BaseHandler}, but subclasses +should define it if they want to catch all URLs with no specific +registerd handler to open it. + +This method, if exists, will be called by the \member{parent} +\class{OpenerDirector}. Return values should be the same as for +\method{default_open}. +\end{methoddesc} + +\begin{methoddesc}[BaseHandler]{http_error_default}{req, fp, code, msg, hdrs} +This method is {\em not} defined in \class{BaseHandler}, but subclasses +should override it if they intend to provide a catch-all for otherwise +unhandled HTTP errors. It will be called automatically by the +\class{OpenerDirector} getting the error, and should not normally be called +in other circumstances. + +\var{req} will be a \class{Request} object, \var{fp} will be a file-like +object with the HTTP error body, \var{code} will be the three-digit code +of the error, \var{msg} will be the user-visible explanation of the +code and \var{hdrs} will be a dictionary-like object with the headers of +the error. + +Return values and exceptions raised should be the same as those +of \function{urlopen}. +\end{methoddesc} + +\begin{methoddesc}[BaseHandler]{http_error_{\em nnn}}{req, fp, code, msg, hdrs} +\code{nnn} should be a three-digit HTTP error code. This method is also +not defined in \class{BaseHandler}, but will be called, if it exists, on +an instance of a subclass, when an HTTP error with code \code{nnn} occurse. + +Subclasses should override this method to handle specific HTTP errors. + +Arguments, return values and exceptions raised shoudl be the same as for +\method{http_error_default} +\end{methoddesc} + + +\begin{classdesc}{HTTPDefaultErrorHandler}{} +A class which catches all HTTP errors. +\end{classdesc} + +\begin{methoddesc}[HTTPDefaultErrorHandler]{http_error_default}{req, fp, code, + msg, hdrs} +Raise an \exception{HTTPError} +\end{methoddesc} + +\begin{classdesc}{HTTPRedirectHandler}{} +A class to handle redirections. +\end{classdesc} + +\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req, fp, code, + msg, hdrs} +Redirect to the \code{Location:} URL. This method gets called by +the parent \class{OpenerDirector} when getting an HTTP permanent-redirect +error. +\end{methoddesc} + +\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req, fp, code, + msg, hdrs} +The same as \method{http_error_301}. +\end{methoddesc} + +\strong{Note:} 303 redirection is not supported by this version of +\module{urllib2}. + +\begin{classdesc}{ProxyHandler}{\optional{proxies}} +Cause requests to go through a proxy. +If \var{proxies} is given, it must be a dictionary mapping +protocol names to URLs of proxies. +The default is to read the list of proxies from the environment +variables \code{{\em protocol}_proxy}. +\end{classdesc} + +\begin{methoddesc}[ProxyHandler]{{\em protocol}_open}{request} +The \class{ProxyHandler} will have a method \code{{\em protocol}_open} for +every {\em protocol} which has a proxy in the \var{proxies} dictionary +given in the constructor. The method will modify requests to go +through the proxy, by calling \code{request.set_proxy()}, and call the next +handler in the chain to actually execute the protocol. +\end{methoddesc} + +\begin{classdesc}{HTTPPasswordMgr}{} +Keep a database of +\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mapping. +\end{classdesc} + +\begin{methoddesc}[HTTPPasswordMgr]{add_password}{realm, uri, user, passwd} +\var{uri} can be either a single URI, or a sequene of URIs. \var{realm}, +\var{user} and \var{passwd} must be strings. This causes + \code{(\var{user}, \var{passwd})} to be used as authentication tokens +when authentication for \var{realm} and a super-URI of any of the +given URIs is given. +\end{methoddesc} + +\begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri} +Get user/password for given realm and URI, if any. This method will +return \code{(None, None)} if there is no user/password is known. +\end{methoddesc} + +\begin{classdesc}{HTTPPasswordMgrWithDefaultRealm}{} +Keep a database of +\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mapping. +A realm of \code{None} is considered a catch-all realm, which is searched +if no other realm fits. +\end{classdesc} + +\begin{methoddesc}[HTTPPasswordMgrWithDefaultRealm]{add_password} + {realm, uri, user, passwd} +\var{uri} can be either a single URI, or a sequene of URIs. \var{realm}, +\var{user} and \var{passwd} must be strings. This causes + \code{(\var{user}, \var{passwd})} to be used as authentication tokens +when authentication for \var{realm} and a super-URI of any of the +given URIs is given. +\end{methoddesc} + +\begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri} +Get user/password for given realm and URI, if any. This method will +return \code{(None, None)} if there is no user/password is known. +If the given \var{realm} has no user/password, the realm \code{None} +will be searched. +\end{methoddesc} + +\begin{classdesc}[AbstractBasicAuthHandler]{\optional{password_mgr}} +This is a mixin class, that helps with HTTP authentication, both +to the remote host and to a proxy. + +\var{password_mgr} should be something that is compatible with +\class{HTTPPasswordMgr} --- supplies the documented interface above. +\end{classdesc} + +\begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request} + {authreq, host, req, headers} +Handle an authentication request by getting user/password pair, and retrying. +\var{authreq} should be the name of the header where the information about +the realm, \var{host} is the host to authenticate too, \var{req} should be the +(failed) \class{Request} object, and \var{headers} should be the error headers. +\end{methoddesc} + +\begin{classdesc}{HTTPBasicAuthHandler}{\optional{password_mgr}} +Handle authentication with the remote host. +Valid \var{password_mgr}, if given, are the same as for +\class{AbstractBasicAuthHandler}. +\end{classdesc} + +\begin{methoddesc}[HTTPBasicAuthHandler]{http_error_401}{req, fp, code, + msg, hdrs} +Retry the request with authentication info, if available. +\end{methoddesc} + +\begin{classdesc}{ProxyBasicAuthHandler}{\optional{password_mgr}} +Handle authentication with the proxy. +Valid \var{password_mgr}, if given, are the same as for +\class{AbstractBasicAuthHandler}. +\end{classdesc} + +\begin{methoddesc}[ProxyBasicAuthHandler]{http_error_407}{req, fp, code, + msg, hdrs} +Retry the request with authentication info, if available. +\end{methoddesc} + +\begin{classdesc}{AbstractDigestAuthHandler}{\optional{password_mgr}} +This is a mixin class, that helps with HTTP authentication, both +to the remote host and to a proxy. + +\var{password_mgr} should be something that is compatible with +\class{HTTPPasswordMgr} --- supplies the documented interface above. +\end{classdesc} + +\begin{methoddesc}[AbstractBasicAuthHandler]{handle_authentication_request} + {authreq, host, req, headers} +\var{authreq} should be the name of the header where the information about +the realm, \var{host} should be the host to authenticate too, \var{req} +should be the (failed) \class{Request} object, and \var{headers} should be the +error headers. +\end{methoddesc} + +\begin{classdesc}{HTTPDigestAuthHandler}{\optional{password_mgr}} +Handle authentication with the remote host. +Valid \var{password_mgr}, if given, are the same as for +\class{AbstractBasicAuthHandler}. +\end{classdesc} + +\begin{methoddesc}[HTTPDigestAuthHandler]{http_error_401}{req, fp, code, + msg, hdrs} +Retry the request with authentication info, if available. +\end{methoddesc} + +\begin{classdesc}{ProxyDigestAuthHandler}{\optional{password_mgr}} +Handle authentication with the proxy. +\var{password_mgr}, if given, shoudl be the same as for +the constructor of \class{AbstractDigestAuthHandler}. +\end{classdesc} + +\begin{methoddesc}[ProxyDigestAuthHandler]{http_error_407}{req, fp, code, + msg, hdrs} +Retry the request with authentication info, if available. +\end{methoddesc} + +\begin{classdesc}{HTTPHandler}{} +A class to handle opening of HTTP URLs +\end{classdesc} + +\begin{methoddesc}[HTTPHandler]{http_open}{req} +Send an HTTP request (either GET or POST, depending on whether +\code{req.has_data()}. +\end{methoddesc} + +\begin{classdesc}{HTTPSHandler}{} +A class to handle opening of HTTPS URLs +\end{classdesc} + +\begin{methoddesc}[HTTPSHandler]{https_open}{req} +Send an HTTPS request (either GET or POST, depending on whether +\code{req.has_data()}. +\end{methoddesc} + +\begin{classdesc}{UknownHandler}{} +A catch-all class to handle unknown URLs. +\end{classdesc} + +\begin{methoddesc}[UknownHandler]{unknown_open} +Raise a \exception{URLError} exception +\end{methoddesc} + +\begin{classdesc}{FileHandler}{} +Open local files. +\end{classdesc} + +\begin{methoddesc}[FileHandler]{file_open}{req} +Open the file locally, if there is no host name, or +the host name is \code{"localhost"}. Change the +protocol to \code{ftp} otherwise, and retry opening +it using \member{parent}. +\end{methoddesc} + +\begin{classdesc}{FTPHandler}{} +Open FTP URLs. +\end{classdesc} + +\begin{methoddesc}[FTPHandler]{ftp_open}{req} +Open the FTP file indicated by \var{req}. +The login is always done with empty username and password. +\end{methoddesc} + +\begin{classdesc}{CacheFTPHandler}{} +Open FTP URLs, keeping a cache of open FTP connections to minimize +delays. +\end{classdesc} + +\begin{methoddesc}[CacheFTPHandler]{ftp_open}{req} +Open the FTP file indicated by \var{req}. +The login is always done with empty username and password. +\end{methoddesc} + +\begin{methoddesc}[CacheFTPHandler]{setTimeout}{t} +Set timeout of connections to \var{t} seconds. +\end{methoddesc} + +\begin{methoddesc}[CacheFTPHandler]{setMaxConns}{m} +Set maximum number of cached connections to \var{m}. +\end{methoddesc} + +\begin{classdesc}{GopherHandler}{} +Open gopher URLs. +\end{classdesc} + +\begin{methoddesc}[GopherHandler]{gopher_open}{req} +Open the gopher resource indicated by \var{req}. +\end{methoddesc} diff --git a/Lib/urllib2.py b/Lib/urllib2.py index a3ff482..a1e157b 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -57,8 +57,10 @@ import urllib2 authinfo = urllib2.HTTPBasicAuthHandler() authinfo.add_password('realm', 'host', 'username', 'password') +proxy_support = urllib2.ProxyHandler({"http" : "http://ahad-haam:3128"}) + # build a new opener that adds authentication and caching FTP handlers -opener = urllib2.build_opener(authinfo, urllib2.CacheFTPHandler) +opener = urllib2.build_opener(proxy_support, authinfo, urllib2.CacheFTPHandler) # install it urllib2.install_opener(opener) @@ -92,7 +94,6 @@ import re import base64 import types import urlparse -import os import md5 import mimetypes import mimetools @@ -100,6 +101,7 @@ import ftplib import sys import time import gopherlib +import posixpath try: from cStringIO import StringIO @@ -121,10 +123,7 @@ from urllib import unwrap, unquote, splittype, splithost, \ from urllib import getproxies # support for FileHandler -from urllib import localhost, thishost, url2pathname, pathname2url - -# support for GopherHandler -from urllib import splitgophertype, splitquery +from urllib import localhost, url2pathname __version__ = "2.0a1" @@ -177,7 +176,9 @@ class HTTPError(URLError, addinfourl): class GopherError(URLError): pass + class Request: + def __init__(self, url, data=None, headers={}): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.__original = unwrap(url) @@ -229,15 +230,8 @@ class Request: def get_selector(self): return self.__r_host - def set_proxy(self, proxy): - self.__proxy = proxy - # XXX this code is based on urllib, but it doesn't seem - # correct. specifically, if the proxy has a port number then - # splittype will return the hostname as the type and the port - # will be include with everything else - self.type, self.__r_type = splittype(self.__proxy) - self.host, XXX = splithost(self.__r_type) - self.host = unquote(self.host) + def set_proxy(self, host, type): + self.host, self.type = host, type self.__r_host = self.__original def add_header(self, key, val): @@ -329,9 +323,9 @@ class OpenerDirector: 'unknown_open', req) def error(self, proto, *args): - if proto == 'http': - # XXX http protocol is special cased - dict = self.handle_error[proto] + if proto in ['http', 'https']: + # XXX http[s] protocols are special cased + dict = self.handle_error['http'] # https is not different then http proto = args[2] # YUCK! meth_name = 'http_error_%d' % proto http_err = 1 @@ -397,6 +391,8 @@ def build_opener(*handlers): default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, FTPHandler, FileHandler] + if hasattr(httplib, 'HTTPS'): + default_classes.append(HTTPSHandler) skip = [] for klass in default_classes: for check in handlers: @@ -451,7 +447,7 @@ class HTTPRedirectHandler(BaseHandler): new = Request(newurl, req.get_data()) new.error_302_dict = {} if hasattr(req, 'error_302_dict'): - if req.error_302_dict.has_key(newurl): + if len(error_302_dict)>10 or req.error_302_dict.has_key(newurl): raise HTTPError(req.get_full_url(), code, self.inf_msg + msg, headers) new.error_302_dict.update(req.error_302_dict) @@ -477,7 +473,14 @@ class ProxyHandler(BaseHandler): def proxy_open(self, req, proxy, type): orig_type = req.get_type() - req.set_proxy(proxy) + type, r_type = splittype(proxy) + host, XXX = splithost(r_type) + if '@' in host: + user_pass, host = host.split('@', 1) + user_pass = base64.encode_string(unquote(user_passw)).strip() + req.addheader('Proxy-Authorization', user_pass) + host = unquote(host) + req.set_proxy(host, type) if orig_type == type: # let other handlers take care of it # XXX this only makes sense if the proxy is before the @@ -569,21 +572,33 @@ class HTTPPasswordMgr: return 1 if base[0] != test[0]: return 0 - common = os.path.commonprefix((base[1], test[1])) + common = posixpath.commonprefix((base[1], test[1])) if len(common) == len(base[1]): return 1 return 0 -class HTTPBasicAuthHandler(BaseHandler): +class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): + + def find_user_password(self, realm, authuri): + user, password = HTTPPasswordMgr.find_user_password(self,realm,authuri) + if user is not None: + return user, password + return HTTPPasswordMgr.find_user_password(self, None, authuri) + + +class AbstractBasicAuthHandler: + rx = re.compile('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"') # XXX there can actually be multiple auth-schemes in a # www-authenticate header. should probably be a lot more careful # in parsing them to extract multiple alternatives - def __init__(self): - self.passwd = HTTPPasswordMgr() + def __init__(self, password_mgr=None): + if password_mgr is None: + password_mgr = HTTPPasswordMgr() + self.passwd = password_mgr self.add_password = self.passwd.add_password self.__current_realm = None # if __current_realm is not None, then the server must have @@ -591,29 +606,27 @@ class HTTPBasicAuthHandler(BaseHandler): # again. must be careful to set it to None on successful # return. - def http_error_401(self, req, fp, code, msg, headers): - # XXX could be mult. headers - authreq = headers.get('www-authenticate', None) + def http_error_auth_reqed(self, authreq, host, req, headers): + # XXX could be multiple headers + authreq = headers.get(authreq, None) if authreq: - mo = HTTPBasicAuthHandler.rx.match(authreq) + mo = AbstractBasicAuthHandler.rx.match(authreq) if mo: scheme, realm = mo.groups() if scheme.lower() == 'basic': - return self.retry_http_basic_auth(req, realm) + return self.retry_http_basic_auth(host, req, realm) - def retry_http_basic_auth(self, req, realm): + def retry_http_basic_auth(self, host, req, realm): if self.__current_realm is None: self.__current_realm = realm else: self.__current_realm = realm return None - # XXX host isn't really the correct URI? - host = req.get_host() user,pw = self.passwd.find_user_password(realm, host) if pw: raw = "%s:%s" % (user, pw) auth = base64.encodestring(raw).strip() - req.add_header('Authorization', 'Basic %s' % auth) + req.add_header(self.header, 'Basic %s' % auth) resp = self.parent.open(req) self.__current_realm = None return resp @@ -621,21 +634,37 @@ class HTTPBasicAuthHandler(BaseHandler): self.__current_realm = None return None -class HTTPDigestAuthHandler(BaseHandler): - """An authentication protocol defined by RFC 2069 +class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): - Digest authentication improves on basic authentication because it - does not transmit passwords in the clear. - """ + header = 'Authorization' - def __init__(self): - self.passwd = HTTPPasswordMgr() + def http_error_401(self, req, fp, code, msg, headers): + host = urlparse.urlparse(req.get_full_url())[1] + return self.http_error_auth_reqed('www-authenticate', + host, req, headers) + + +class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): + + header = 'Proxy-Authorization' + + def http_error_407(self, req, fp, code, msg, headers): + host = req.get_host() + return self.http_error_auth_reqed('proxy-authenticate', + host, req, headers) + + +class AbstractDigestAuthHandler: + + def __init__(self, passwd=None): + if passwd is None: + passwd = HTTPPassowrdMgr() + self.passwd = passwd self.add_password = self.passwd.add_password self.__current_realm = None - def http_error_401(self, req, fp, code, msg, headers): - # XXX could be mult. headers - authreq = headers.get('www-authenticate', None) + def http_error_auth_reqed(self, authreq, host, req, headers): + authreq = headers.get(self.header, None) if authreq: kind = authreq.split()[0] if kind == 'Digest': @@ -646,7 +675,7 @@ class HTTPDigestAuthHandler(BaseHandler): chal = parse_keqv_list(parse_http_list(challenge)) auth = self.get_authorization(req, chal) if auth: - req.add_header('Authorization', 'Digest %s' % auth) + req.add_header(self.header, 'Digest %s' % auth) resp = self.parent.open(req) self.__current_realm = None return resp @@ -715,6 +744,30 @@ class HTTPDigestAuthHandler(BaseHandler): # XXX not implemented yet return None + +class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + """An authentication protocol defined by RFC 2069 + + Digest authentication improves on basic authentication because it + does not transmit passwords in the clear. + """ + + header = 'Authorization' + + def http_error_401(self, req, fp, code, msg, headers): + host = urlparse.urlparse(req.get_full_url())[1] + self.http_error_auth_reqed('www-authenticate', host, req, headers) + + +class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): + + header = 'Proxy-Authorization' + + def http_error_407(self, req, fp, code, msg, headers): + host = req.get_host() + self.http_error_auth_reqed('proxy-authenticate', host, req, headers) + + def encode_digest(digest): hexrep = [] for c in digest: @@ -725,15 +778,15 @@ def encode_digest(digest): return ''.join(hexrep) -class HTTPHandler(BaseHandler): - def http_open(self, req): - # XXX devise a new mechanism to specify user/password +class AbstractHTTPHandler(BaseHandler): + + def do_open(self, http_class, req): host = req.get_host() if not host: raise URLError('no host given') try: - h = httplib.HTTP(host) # will parse host:port + h = http_class(host) # will parse host:port if req.has_data(): data = req.get_data() h.putrequest('POST', req.get_selector()) @@ -762,6 +815,20 @@ class HTTPHandler(BaseHandler): else: return self.parent.error('http', req, fp, code, msg, hdrs) + +class HTTPHandler(AbstractHTTPHandler): + + def http_open(self, req): + return self.do_open(httplib.HTTP, req) + + +if hasattr(httplib, 'HTTPS'): + class HTTPSHandler(AbstractHTTPHandler): + + def https_open(self, req): + return self.do_open(httplib.HTTPS, req) + + class UnknownHandler(BaseHandler): def unknown_open(self, req): type = req.get_type() |