summaryrefslogtreecommitdiffstats
path: root/Doc
diff options
context:
space:
mode:
Diffstat (limited to 'Doc')
-rw-r--r--Doc/lib/lib.tex1
-rw-r--r--Doc/lib/libcookie.tex4
-rw-r--r--Doc/lib/libcookielib.tex679
-rw-r--r--Doc/lib/liburllib2.tex161
-rw-r--r--Doc/whatsnew/whatsnew24.tex19
5 files changed, 835 insertions, 29 deletions
diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex
index 56ae04f..e6879fd 100644
--- a/Doc/lib/lib.tex
+++ b/Doc/lib/lib.tex
@@ -233,6 +233,7 @@ and how to embed it in other applications.
\input{libbasehttp}
\input{libsimplehttp}
\input{libcgihttp}
+\input{libcookielib}
\input{libcookie}
\input{libxmlrpclib}
\input{libsimplexmlrpc}
diff --git a/Doc/lib/libcookie.tex b/Doc/lib/libcookie.tex
index f872ed2..bba9c79 100644
--- a/Doc/lib/libcookie.tex
+++ b/Doc/lib/libcookie.tex
@@ -68,6 +68,10 @@ you should not use the \class{SerialCookie} class.
\begin{seealso}
+ \seemodule{cookielib}{HTTP cookie handling for for web
+ \emph{clients}. The \module{cookielib} and \module{Cookie}
+ modules do not depend on each other.}
+
\seerfc{2109}{HTTP State Management Mechanism}{This is the state
management specification implemented by this module.}
\end{seealso}
diff --git a/Doc/lib/libcookielib.tex b/Doc/lib/libcookielib.tex
new file mode 100644
index 0000000..ee42594
--- /dev/null
+++ b/Doc/lib/libcookielib.tex
@@ -0,0 +1,679 @@
+\section{\module{cookielib} ---
+ Cookie handling for HTTP clients}
+
+\declaremodule{standard}{cookielib}
+\moduleauthor{John J. Lee}{jjl@pobox.com}
+\sectionauthor{John J. Lee}{jjl@pobox.com}
+
+\modulesynopsis{Cookie handling for HTTP clients}
+
+The \module{cookielib} module defines classes for automatic handling
+of HTTP cookies. It is useful for accessing web sites that require
+small pieces of data -- \dfn{cookies} -- to be set on the client
+machine by an HTTP response from a web server, and then returned to
+the server in later HTTP requests.
+
+Both the regular Netscape cookie protocol and the protocol defined by
+\rfc{2965} are handled. RFC 2965 handling is switched off by default.
+\rfc{2109} cookies are parsed as Netscape cookies and subsequently
+treated as RFC 2965 cookies. Note that the great majority of cookies
+on the Internet are Netscape cookies. \module{cookielib} attempts to
+follow the de-facto Netscape cookie protocol (which differs
+substantially from that set out in the original Netscape
+specification), including taking note of the \code{max-age} and
+\code{port} cookie-attributes introduced with RFC 2109. \note{The
+various named parameters found in \mailheader{Set-Cookie} and
+\mailheader{Set-Cookie2} headers (eg. \code{domain} and
+\code{expires}) are conventionally referred to as \dfn{attributes}.
+To distinguish them from Python attributes, the documentation for this
+module uses the term \dfn{cookie-attribute} instead}.
+
+
+The module defines the following exception:
+
+\begin{excdesc}{LoadError}
+Instances of \class{FileCookieJar} raise this exception on failure to
+load cookies from a file.
+\end{excdesc}
+
+
+The following classes are provided:
+
+\begin{classdesc}{CookieJar}{policy=\constant{None}}
+\var{policy} is an object implementing the \class{CookiePolicy}
+interface.
+
+The \class{CookieJar} class stores HTTP cookies. It extracts cookies
+from HTTP requests, and returns them in HTTP responses.
+\class{CookieJar} instances automatically expire contained cookies
+when necessary. Subclasses are also responsible for storing and
+retrieving cookies from a file or database.
+\end{classdesc}
+
+\begin{classdesc}{FileCookieJar}{filename, delayload=\constant{None},
+ policy=\constant{None}}
+\var{policy} is an object implementing the \class{CookiePolicy}
+interface. For the other arguments, see the documentation for the
+corresponding attributes.
+
+A \class{CookieJar} which can load cookies from, and perhaps save
+cookies to, a file on disk. Cookies are \strong{NOT} loaded from the
+named file until either the \method{load()} or \method{revert()}
+method is called. Subclasses of this class are documented in section
+\ref{file-cookie-jar-classes}.
+\end{classdesc}
+
+\begin{classdesc}{CookiePolicy}{}
+This class is responsible for deciding whether each cookie should be
+accepted from / returned to the server.
+\end{classdesc}
+
+\begin{classdesc}{DefaultCookiePolicy}{
+ blocked_domains=\constant{None},
+ allowed_domains=\constant{None},
+ netscape=\constant{True}, rfc2965=\constant{False},
+ hide_cookie2=\constant{False},
+ strict_domain=\constant{False},
+ strict_rfc2965_unverifiable=\constant{True},
+ strict_ns_unverifiable=\constant{False},
+ strict_ns_domain=\constant{DefaultCookiePolicy.DomainLiberal},
+ strict_ns_set_initial_dollar=\constant{False},
+ strict_ns_set_path=\constant{False}
+ }
+
+Constructor arguments should be passed as keyword arguments only.
+\var{blocked_domains} is a sequence of domain names that we never
+accept cookies from, nor return cookies to. \var{allowed_domains} if
+not \constant{None}, this is a sequence of the only domains for which
+we accept and return cookies. For all other arguments, see the
+documentation for \class{CookiePolicy} and \class{DefaultCookiePolicy}
+objects.
+
+\class{DefaultCookiePolicy} implements the standard accept / reject
+rules for Netscape and RFC 2965 cookies. RFC 2109 cookies
+(ie. cookies received in a \mailheader{Set-Cookie} header with a
+version cookie-attribute of 1) are treated according to the RFC 2965
+rules. \class{DefaultCookiePolicy} also provides some parameters to
+allow some fine-tuning of policy.
+\end{classdesc}
+
+\begin{classdesc}{Cookie}{}
+This class represents Netscape, RFC 2109 and RFC 2965 cookies. It is
+not expected that users of \module{cookielib} construct their own
+\class{Cookie} instances. Instead, if necessary, call
+\method{make_cookies()} on a \class{CookieJar} instance.
+\end{classdesc}
+
+\begin{seealso}
+
+\seemodule{urllib2}{URL opening with automatic cookie handling.}
+
+\seemodule{Cookie}{HTTP cookie classes, principally useful for
+server-side code. The \module{cookielib} and \module{Cookie} modules
+do not depend on each other.}
+
+\seeurl{http://wwwsearch.sf.net/ClientCookie/}{Extensions to this
+module, including a class for reading Microsoft Internet Explorer
+cookies on Windows.}
+
+\seeurl{http://www.netscape.com/newsref/std/cookie_spec.html}{The
+specification of the original Netscape cookie protocol. Though this
+is still the dominant protocol, the 'Netscape cookie protocol'
+implemented by all the major browsers (and \module{cookielib}) only
+bears a passing resemblance to the one sketched out in
+\code{cookie_spec.html}.}
+
+\seerfc{2109}{HTTP State Management Mechanism}{Obsoleted by RFC 2965.
+Uses \mailheader{Set-Cookie} with version=1.}
+
+\seerfc{2965}{HTTP State Management Mechanism}{The Netscape protocol
+with the bugs fixed. Uses \mailheader{Set-Cookie2} in place of
+\mailheader{Set-Cookie}. Not widely used.}
+
+\seeurl{http://kristol.org/cookie/errata.html}{Unfinished errata to
+RFC 2965.}
+
+\seerfc{2964}{Use of HTTP State Management}{}
+
+\end{seealso}
+
+
+\subsection{CookieJar and FileCookieJar Objects \label{cookie-jar-objects}}
+
+\class{CookieJar} objects support the iterator protocol.
+
+\class{CookieJar} has the following methods:
+
+\begin{methoddesc}[CookieJar]{add_cookie_header}{request}
+Add correct \mailheader{Cookie} header to \var{request}.
+
+If the CookiePolicy allows (ie. the \class{CookiePolicy} instance's
+\member{rfc2965} and \member{hide_cookie2} attributes are true and
+false respectively), the \mailheader{Cookie2} header is also added
+when appropriate.
+
+The \var{request} object (usually a \class{urllib2.Request} instance)
+must support the methods \method{get_full_url()}, \method{get_host()},
+\method{get_type()}, \method{unverifiable()},
+\method{get_origin_req_host()}, \method{has_header()},
+\method{get_header()}, \method{header_items()}, and
+\method{add_unredirected_header()},as documented by \module{urllib2}.
+\end{methoddesc}
+
+\begin{methoddesc}[CookieJar]{extract_cookies}{response, request}
+Extract cookies from HTTP \var{response} and store them in the
+\class{CookieJar}, where allowed by policy.
+
+The \class{CookieJar} will look for allowable \mailheader{Set-Cookie}
+and \mailheader{Set-Cookie2} headers in the \var{response} argument,
+and store cookies as appropriate (subject to the
+\method{CookiePolicy.set_ok()} method's approval).
+
+The \var{response} object (usually the result of a call to
+\method{urllib2.urlopen()}, or similar) should support an
+\method{info()} method, which returns an object with a
+\method{getallmatchingheaders()} method (usually a
+\class{mimetools.Message} instance).
+
+The \var{request} object (usually a \class{urllib2.Request} instance)
+must support the methods \method{get_full_url()}, \method{get_host()},
+\method{unverifiable()}, and \method{get_origin_req_host()}, as
+documented by \module{urllib2}. The request is used to set default
+values for cookie-attributes as well as for checking that the cookie
+is allowed to be set.
+\end{methoddesc}
+
+\begin{methoddesc}[CookieJar]{set_policy}{policy}
+Set the \class{CookiePolicy} instance to be used.
+\end{methoddesc}
+
+\begin{methoddesc}[CookieJar]{make_cookies}{response, request}
+Return sequence of \class{Cookie} objects extracted from
+\var{response} object.
+
+See the documentation for \method{extract_cookies} for the interfaces
+required of the \var{response} and \var{request} arguments.
+\end{methoddesc}
+
+\begin{methoddesc}[CookieJar]{set_cookie_if_ok}{cookie, request}
+Set a \class{Cookie} if policy says it's OK to do so.
+\end{methoddesc}
+
+\begin{methoddesc}[CookieJar]{set_cookie}{cookie}
+Set a \class{Cookie}, without checking with policy to see whether or
+not it should be set.
+\end{methoddesc}
+
+\begin{methoddesc}[CookieJar]{clear}{\optional{domain\optional{,
+ path\optional{, name}}}}
+Clear some cookies.
+
+If invoked without arguments, clear all cookies. If given a single
+argument, only cookies belonging to that \var{domain} will be removed.
+If given two arguments, cookies belonging to the specified
+\var{domain} and URL \var{path} are removed. If given three
+arguments, then the cookie with the specified \var{domain}, \var{path}
+and \var{name} is removed.
+
+Raises \exception{KeyError} if no matching cookie exists.
+\end{methoddesc}
+
+\begin{methoddesc}[CookieJar]{clear_session_cookies}{}
+Discard all session cookies.
+
+Discards all contained cookies that have a true \member{discard}
+attribute (usually because they had either no \code{max-age} or
+\code{expires} cookie-attribute, or an explicit \code{discard}
+cookie-attribute). For interactive browsers, the end of a session
+usually corresponds to closing the browser window.
+
+Note that the \method{save()} method won't save session cookies
+anyway, unless you ask otherwise by passing a true
+\var{ignore_discard} argument.
+\end{methoddesc}
+
+\class{FileCookieJar} implements the following additional methods:
+
+\begin{methoddesc}[FileCookieJar]{save}{filename=\constant{None},
+ ignore_discard=\constant{False}, ignore_expires=\constant{False}}
+Save cookies to a file.
+
+This base class raises \class{NotImplementedError}. Subclasses may
+leave this method unimplemented.
+
+\var{filename} is the name of file in which to save cookies. If
+\var{filename} is not specified, \member{self.filename} is used (whose
+default is the value passed to the constructor, if any); if
+\member{self.filename} is \constant{None}, \exception{ValueError} is
+raised.
+
+\var{ignore_discard}: save even cookies set to be discarded.
+\var{ignore_expires}: save even cookies that have expired
+
+The file is overwritten if it already exists, thus wiping all the
+cookies it contains. Saved cookies can be restored later using the
+\method{load()} or \method{revert()} methods.
+\end{methoddesc}
+
+\begin{methoddesc}[FileCookieJar]{load}{filename=\constant{None},
+ ignore_discard=\constant{False}, ignore_expires=\constant{False}}
+Load cookies from a file.
+
+Old cookies are kept unless overwritten by newly loaded ones.
+
+Arguments are as for \method{save()}.
+
+The named file must be in the format understood by the class, or
+\exception{LoadError} will be raised.
+\end{methoddesc}
+
+\begin{methoddesc}[FileCookieJar]{revert}{filename=\constant{None},
+ ignore_discard=\constant{False}, ignore_expires=\constant{False}}
+Clear all cookies and reload cookies from a saved file.
+
+Raises \exception{cookielib.LoadError} or \exception{IOError} if
+reversion is not successful; the object's state will not be altered if
+this happens.
+\end{methoddesc}
+
+\class{FileCookieJar} instances have the following public attributes:
+
+\begin{memberdesc}{filename}
+Filename of default file in which to keep cookies.
+\end{memberdesc}
+
+\begin{memberdesc}{delayload}
+If true, load cookies lazily from disk. This is only a hint, since
+this only affects performance, not behaviour (unless the cookies on
+disk are changing). A \class{CookieJar} object may ignore it. None
+of the \class{FileCookieJar} classes included in the standard library
+lazily loads cookies.
+\end{memberdesc}
+
+
+\subsection{FileCookieJar subclasses and co-operation with web browsers
+ \label{file-cookie-jar-classes}}
+
+The following \class{CookieJar} subclasses are provided for reading
+and writing . Further \class{CookieJar} subclasses, including one
+that reads Microsoft Internet Explorer cookies, are available at
+\url{http://wwwsearch.sf.net/ClientCookie/}.
+
+\begin{classdesc}{MozillaCookieJar}{filename, delayload=\constant{None},
+ policy=\constant{None}}
+A \class{FileCookieJar} that can load from and save cookies to disk in
+the Mozilla \code{cookies.txt} file format (which is also used by the
+lynx and Netscape browsers). \note{This loses information about RFC
+2965 cookies, and also about newer or non-standard cookie-attributes
+such as \code{port}.}
+
+\warning{Back up your cookies before saving if you have cookies whose
+loss / corruption would be inconvenient (there are some subtleties
+which may lead to slight changes in the file over a load / save
+round-trip).}
+
+Also note that cookies saved while Mozilla is running will get
+clobbered by Mozilla.
+\end{classdesc}
+
+\begin{classdesc}{LWPCookieJar}{filename, delayload=\constant{None},
+ policy=\constant{None}}
+A \class{FileCookieJar} that can load from and save cookies to disk in
+format compatible with the libwww-perl library's \code{Set-Cookie3}
+file format. This is convenient if you want to store cookies in a
+human-readable file.
+\end{classdesc}
+
+
+\subsection{CookiePolicy Objects \label{cookie-policy-objects}}
+
+Objects implementing the \class{CookiePolicy} interface have the
+following methods:
+
+\begin{methoddesc}[CookiePolicy]{set_ok}{cookie, request}
+Return boolean value indicating whether cookie should be accepted from server.
+
+\var{cookie} is a \class{cookielib.Cookie} instance. \var{request} is
+an object implementing the interface defined by the documentation for
+\method{CookieJar.extract_cookies()}.
+\end{methoddesc}
+
+\begin{methoddesc}[CookiePolicy]{return_ok}{cookie, request}
+Return boolean value indicating whether cookie should be returned to server.
+
+\var{cookie} is a \class{cookielib.Cookie} instance. \var{request} is
+an object implementing the interface defined by the documentation for
+\method{CookieJar.add_cookie_header()}.
+\end{methoddesc}
+
+\begin{methoddesc}[CookiePolicy]{domain_return_ok}{domain, request}
+Return false if cookies should not be returned, given cookie domain.
+
+This method is an optimization. It removes the need for checking
+every cookie with a particular domain (which might involve reading
+many files). The default implementations of
+\method{domain_return_ok()} and \method{path_return_ok()}
+(\samp{return True}) leave all the work to \method{return_ok()}.
+
+If \method{domain_return_ok()} returns true for the cookie domain,
+\method{path_return_ok()} is called for the cookie path. Otherwise,
+\method{path_return_ok()} and \method{return_ok()} are never called
+for that cookie domain. If \method{path_return_ok()} returns true,
+\method{return_ok()} is called with the \class{Cookie} object itself
+for a full check. Otherwise, \method{return_ok()} is never called for
+that cookie path.
+
+Note that \method{domain_return_ok()} is called for every
+\emph{cookie} domain, not just for the \emph{request} domain. For
+example, the function might be called with both \code{".example.com"}
+and \code{"www.example.com"} if the request domain is
+\code{"www.example.com"}. The same goes for
+\method{path_return_ok()}.
+
+The \var{request} argument is as documented for \method{return_ok()}.
+\end{methoddesc}
+
+\begin{methoddesc}[CookiePolicy]{path_return_ok}{path, request}
+Return false if cookies should not be returned, given cookie path.
+
+See the documentation for \method{domain_return_ok()}.
+\end{methoddesc}
+
+
+In addition to implementing the methods above, implementations of the
+\class{CookiePolicy} interface must also supply the following
+attributes, indicating which protocols should be used, and how. All
+of these attributes may be assigned to.
+
+\begin{memberdesc}{netscape}
+Implement netscape protocol.
+\end{memberdesc}
+\begin{memberdesc}{rfc2965}
+Implement RFC 2965 protocol.
+\end{memberdesc}
+\begin{memberdesc}{hide_cookie2}
+Don't add Cookie2 header to requests (the presence of this header
+indicates to the server that we understand RFC 2965 cookies).
+\end{memberdesc}
+
+The most useful way to define a \class{CookiePolicy} class is by
+subclassing from \class{DefaultCookiePolicy} and overriding some or
+all of the methods above. \class{CookiePolicy} itself may be used as
+a 'null policy' to allow setting and receiving any and all cookies.
+
+
+\subsection{DefaultCookiePolicy Objects \label{default-cookie-policy-objects}}
+
+Implements the standard rules for accepting and returning cookies.
+
+Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is
+switched off by default.
+
+The easiest way to provide your own policy is to override this class
+and call its methods in your overriden implementations before adding
+your own additional checks:
+
+\begin{verbatim}
+import cookielib
+class MyCookiePolicy(cookielib.DefaultCookiePolicy):
+ def set_ok(self, cookie, request):
+ if not cookielib.DefaultCookiePolicy.set_ok(self, cookie, request):
+ return False
+ if i_dont_want_to_store_this_cookie(cookie):
+ return False
+ return True
+\end{verbatim}
+
+In addition to the features required to implement the
+\class{CookiePolicy} interface, this class allows you to block and
+allow domains from setting and receiving cookies. There are also some
+strictness switches that allow you to tighten up the rather loose
+Netscape protocol rules a little bit (at the cost of blocking some
+benign cookies).
+
+A domain blacklist and whitelist is provided (both off by default).
+Only domains not in the blacklist and present in the whitelist (if the
+whitelist is active) participate in cookie setting and returning. Use
+the \var{blocked_domains} constructor argument, and
+\method{blocked_domains()} and \method{set_blocked_domains()} methods
+(and the corresponding argument and methods for
+\var{allowed_domains}). If you set a whitelist, you can turn it off
+again by setting it to \constant{None}.
+
+Domains in block or allow lists that do not start with a dot must be
+equal. For example, \code{"example.com"} matches a blacklist entry of
+\code{"example.com"}, but \code{"www.example.com"} does not. Domains
+that do start with a dot are matched by more specific domains too.
+For example, both \code{"www.example.com"} and
+\code{"www.coyote.example.com"} match \code{".example.com"} (but
+\code{"example.com"} itself does not). IP addresses are an exception,
+and must match exactly. For example, if blocked_domains contains
+\code{"192.168.1.2"} and \code{".168.1.2"}, 192.168.1.2 is blocked,
+but 193.168.1.2 is not.
+
+\class{DefaultCookiePolicy} implements the following additional
+methods:
+
+\begin{methoddesc}[DefaultCookiePolicy]{blocked_domains}{}
+Return the sequence of blocked domains (as a tuple).
+\end{methoddesc}
+
+\begin{methoddesc}[DefaultCookiePolicy]{set_blocked_domains}
+ {blocked_domains}
+Set the sequence of blocked domains.
+\end{methoddesc}
+
+\begin{methoddesc}[DefaultCookiePolicy]{is_blocked}{domain}
+Return whether \var{domain} is on the blacklist for setting or
+receiving cookies.
+\end{methoddesc}
+
+\begin{methoddesc}[DefaultCookiePolicy]{allowed_domains}{}
+Return \constant{None}, or the sequence of allowed domains (as a tuple).
+\end{methoddesc}
+
+\begin{methoddesc}[DefaultCookiePolicy]{set_allowed_domains}
+ {allowed_domains}
+Set the sequence of allowed domains, or \constant{None}.
+\end{methoddesc}
+
+\begin{methoddesc}[DefaultCookiePolicy]{is_not_allowed}{domain}
+Return whether \var{domain} is not on the whitelist for setting or
+receiving cookies.
+\end{methoddesc}
+
+\class{DefaultCookiePolicy} instances have the following attributes,
+which are all initialised from the constructor arguments of the same
+name, and which may all be assigned to.
+
+General strictness switches:
+
+\begin{memberdesc}{strict_domain}
+Don't allow sites to set two-component domains with country-code
+top-level domains like \code{.co.uk}, \code{.gov.uk},
+\code{.co.nz}.etc. This is far from perfect and isn't guaranteed to
+work!
+\end{memberdesc}
+
+RFC 2965 protocol strictness switches:
+
+\begin{memberdesc}{strict_rfc2965_unverifiable}
+Follow RFC 2965 rules on unverifiable transactions (usually, an
+unverifiable transaction is one resulting from a redirect or a request
+for an image hosted on another site). If this is false, cookies are
+\emph{never} blocked on the basis of verifiability
+\end{memberdesc}
+
+Netscape protocol strictness switches:
+
+\begin{memberdesc}{strict_ns_unverifiable}
+apply RFC 2965 rules on unverifiable transactions even to Netscape
+cookies
+\end{memberdesc}
+\begin{memberdesc}{strict_ns_domain}
+Flags indicating how strict to be with domain-matching rules for
+Netscape cookies. See below for acceptable values.
+\end{memberdesc}
+\begin{memberdesc}{strict_ns_set_initial_dollar}
+Ignore cookies in Set-Cookie: headers that have names starting with
+\code{'\$'}.
+\end{memberdesc}
+\begin{memberdesc}{strict_ns_set_path}
+Don't allow setting cookies whose path doesn't path-match request URI.
+\end{memberdesc}
+
+\member{strict_ns_domain} is a collection of flags. Its value is
+constructed by or-ing together (for example,
+\code{DomainStrictNoDots|DomainStrictNonDomain} means both flags are
+set).
+
+\begin{memberdesc}{DomainStrictNoDots}
+When setting cookies, the 'host prefix' must not contain a dot
+(eg. \code{www.foo.bar.com} can't set a cookie for \code{.bar.com},
+because \code{www.foo} contains a dot).
+\end{memberdesc}
+\begin{memberdesc}{DomainStrictNonDomain}
+Cookies that did not explicitly specify a \code{domain}
+cookie-attribute can only be returned to a domain that string-compares
+equal to the domain that set the cookie (eg. \code{spam.example.com}
+won't be returned cookies from \code{example.com} that had no
+\code{domain} cookie-attribute).
+\end{memberdesc}
+\begin{memberdesc}{DomainRFC2965Match}
+When setting cookies, require a full RFC 2965 domain-match.
+\end{memberdesc}
+
+The following attributes are provided for convenience, and are the
+most useful combinations of the above flags:
+
+\begin{memberdesc}{DomainLiberal}
+Equivalent to 0 (ie. all of the above Netscape domain strictness flags
+switched off).
+\end{memberdesc}
+\begin{memberdesc}{DomainStrict}
+Equivalent to \code{DomainStrictNoDots|DomainStrictNonDomain}.
+\end{memberdesc}
+
+
+\subsection{Cookie Objects \label{cookie-jar-objects}}
+
+\class{Cookie} instances have Python attributes roughly corresponding
+to the standard cookie-attributes specified in the various cookie
+standards. The correspondence is not one-to-one, because there are
+complicated rules for assigning default values, and because the
+\code{max-age} and \code{expires} cookie-attributes contain equivalent
+information.
+
+Assignment to these attributes should not be necessary other than in
+rare circumstances in a \class{CookiePolicy} method. The class does
+not enforce internal consistency, so you should know what you're
+doing if you do that.
+
+\begin{memberdesc}[Cookie]{version}
+Integer or \constant{None}. Netscape cookies have version 0. RFC
+2965 and RFC 2109 cookies have version 1.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{name}
+Cookie name (a string), or \constant{None}.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{value}
+Cookie value (a string).
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{port}
+String representing a port or a set of ports (eg. '80', or '80,8080'),
+or \constant{None}.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{path}
+Cookie path (a string, eg. '/acme/rocket_launchers').
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{secure}
+True if cookie should only be returned over a secure connection.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{expires}
+Integer expiry date in seconds since epoch, or \constant{None}. See
+also the \method{is_expired()} method.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{discard}
+True if this is a session cookie.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{comment}
+String comment from the server explaining the function of this cookie,
+or \constant{None}.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{comment_url}
+URL linking to a comment from the server explaining the function of
+this cookie, or \constant{None}.
+\end{memberdesc}
+
+\begin{memberdesc}[Cookie]{port_specified}
+True if a port or set of ports was explicitly specified by the server
+(in the \mailheader{Set-Cookie} / \mailheader{Set-Cookie2} header).
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{domain_specified}
+True if a domain was explicitly specified by the server.
+\end{memberdesc}
+\begin{memberdesc}[Cookie]{domain_initial_dot}
+True if the domain explicitly specified by the server began with a
+dot ('.').
+\end{memberdesc}
+
+Cookies may have additional non-standard cookie-attributes. These may
+be accessed using the following methods:
+
+\begin{methoddesc}[Cookie]{has_nonstandard_attr}{name}
+Return true if cookie has the named cookie-attribute.
+\end{methoddesc}
+\begin{methoddesc}[Cookie]{get_nonstandard_attr}{name, default=\constant{None}}
+If cookie has the named cookie-attribute, return its value.
+Otherwise, return \var{default}.
+\end{methoddesc}
+\begin{methoddesc}[Cookie]{set_nonstandard_attr}{name, value}
+Set the value of the named cookie-attribute.
+\end{methoddesc}
+
+The \class{Cookie} class also defines the following method:
+
+\begin{methoddesc}[Cookie]{is_expired}{\optional{now=\constant{None}}}
+True if cookie has passed the time at which the server requested it
+should expire. If \var{now} is given (in seconds since the epoch),
+return whether the cookie has expired at the specified time.
+\end{methoddesc}
+
+
+\subsection{Examples \label{cookielib-examples}}
+
+The first example shows the most common usage of \module{cookielib}:
+
+\begin{verbatim}
+import cookielib, urllib2
+cj = cookielib.CookieJar()
+opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
+r = opener.open("http://example.com/")
+\end{verbatim}
+
+This example illustrates how to open a URL using your Netscape,
+Mozilla, or lynx cookies (assumes \UNIX{} convention for location of
+the cookies file):
+
+\begin{verbatim}
+import os, cookielib, urllib2
+cj = cookielib.MozillaCookieJar()
+cj.load(os.path.join(os.environ["HOME"], "/.netscape/cookies.txt"))
+opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
+r = opener.open("http://example.com/")
+\end{verbatim}
+
+The next example illustrates the use of \class{DefaultCookiePolicy}.
+Turn on RFC 2965 cookies, be more strict about domains when setting
+and returning Netscape cookies, and block some domains from setting
+cookies or having them returned:
+
+\begin{verbatim}
+import urllib2
+from cookielib import CookieJar, DefaultCookiePolicy as Policy
+policy = Policy(rfc2965=True, strict_ns_domain=Policy.DomainStrict,
+ blocked_domains=["ads.net", ".ads.net"])
+cj = CookieJar(policy)
+opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
+r = opener.open("http://example.com/")
+\end{verbatim}
diff --git a/Doc/lib/liburllib2.tex b/Doc/lib/liburllib2.tex
index 2399645..3ce95fb 100644
--- a/Doc/lib/liburllib2.tex
+++ b/Doc/lib/liburllib2.tex
@@ -10,14 +10,13 @@
The \module{urllib2} module defines functions and classes which help
in opening URLs (mostly HTTP) in a complex world --- basic and digest
-authentication, redirections and more.
+authentication, redirections, cookies and more.
The \module{urllib2} module defines the following functions:
\begin{funcdesc}{urlopen}{url\optional{, data}}
Open the URL \var{url}, which can be either a string or a \class{Request}
-object (currently the code checks that it really is a \class{Request}
-instance, or an instance of a subclass of \class{Request}).
+object.
\var{data} should be a string, which specifies additional data to
send to the server. In HTTP requests, which are the only ones that
@@ -87,13 +86,32 @@ Gopher handler.
The following classes are provided:
-\begin{classdesc}{Request}{url\optional{, data\optional{, headers}}}
+\begin{classdesc}{Request}{url\optional{, data}\optional{, headers}
+ \optional{, origin_req_host}\optional{, unverifiable}}
This class is an abstraction of a URL request.
\var{url} should be a string which is a valid URL. For a description
of \var{data} see the \method{add_data()} description.
\var{headers} should be a dictionary, and will be treated as if
\method{add_header()} was called with each key and value as arguments.
+
+The final two arguments are only of interest for correct handling of
+third-party HTTP cookies:
+
+\var{origin_req_host} should be the request-host of the origin
+transaction, as defined by \rfc{2965}. It defaults to
+\code{cookielib.request_host(self)}. This is the host name or IP
+address of the original request that was initiated by the user. For
+example, if the request is for an image in an HTML document, this
+should be the request-host of the request for the page containing the
+image.
+
+\var{unverifiable} should indicate whether the request is
+unverifiable, as defined by RFC 2965. It defaults to False. An
+unverifiable request is one whose URL the user did not have the option
+to approve. For example, if the request is for an image in an HTML
+document, and the user had no option to approve the automatic fetching
+of the image, this should be true.
\end{classdesc}
\begin{classdesc}{OpenerDirector}{}
@@ -116,6 +134,10 @@ responses are turned into \exception{HTTPError} exceptions.
A class to handle redirections.
\end{classdesc}
+\begin{classdesc}{HTTPCookieProcessor}{\optional{cookiejar}}
+A class to handle HTTP Cookies.
+\end{classdesc}
+
\begin{classdesc}{ProxyHandler}{\optional{proxies}}
Cause requests to go through a proxy.
If \var{proxies} is given, it must be a dictionary mapping
@@ -217,10 +239,10 @@ The following methods describe all of \class{Request}'s public interface,
and so all must be overridden in subclasses.
\begin{methoddesc}[Request]{add_data}{data}
-Set the \class{Request} data to \var{data}. This is ignored
-by all handlers except HTTP handlers --- and there it should be an
-\mimetype{application/x-www-form-encoded} buffer, and will change the
-request to be \code{POST} rather than \code{GET}.
+Set the \class{Request} data to \var{data}. This is ignored by all
+handlers except HTTP handlers --- and there it should be a byte
+string, and will change the request to be \code{POST} rather than
+\code{GET}.
\end{methoddesc}
\begin{methoddesc}[Request]{get_method}{}
@@ -282,6 +304,17 @@ and \var{type} will replace those of the instance, and the instance's
selector will be the original URL given in the constructor.
\end{methoddesc}
+\begin{methoddesc}[Request]{get_origin_req_host}{}
+Return the request-host of the origin transaction, as defined by
+\rfc{2965}. See the documentation for the \class{Request}
+constructor.
+\end{methoddesc}
+
+\begin{methoddesc}[Request]{is_unverifiable}{}
+Return whether the request is unverifiable, as defined by RFC 2965.
+See the documentation for the \class{Request} constructor.
+\end{methoddesc}
+
\subsection{OpenerDirector Objects \label{opener-director-objects}}
@@ -289,14 +322,18 @@ selector will be the original URL given in the constructor.
\begin{methoddesc}[OpenerDirector]{add_handler}{handler}
\var{handler} should be an instance of \class{BaseHandler}. The
-following methods are searched, and added to the possible chains.
+following methods are searched, and added to the possible chains (note
+that HTTP errors are a special case).
\begin{itemize}
\item \method{\var{protocol}_open()} ---
signal that the handler knows how to open \var{protocol} URLs.
- \item \method{\var{protocol}_error_\var{type}()} ---
- signal that the handler knows how to handle \var{type} errors from
- \var{protocol}.
+ \item \method{http_error_\var{type}()} ---
+ signal that the handler knows how to handle HTTP errors with HTTP
+ error code \var{type}.
+ \item \method{\var{protocol}_error()} ---
+ signal that the handler knows how to handle errors from
+ (non-\code{http}) \var{protocol}.
\item \method{\var{protocol}_request()} ---
signal that the handler knows how to pre-process \var{protocol}
requests.
@@ -306,26 +343,17 @@ following methods are searched, and added to the possible chains.
\end{itemize}
\end{methoddesc}
-\begin{methoddesc}[OpenerDirector]{close}{}
-Explicitly break cycles, and delete all the handlers.
-Because the \class{OpenerDirector} needs to know the registered handlers,
-and a handler needs to know who the \class{OpenerDirector} who called
-it is, there is a reference cycle. Even though recent versions of Python
-have cycle-collection, it is sometimes preferable to explicitly break
-the cycles.
-\end{methoddesc}
-
\begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}}
Open the given \var{url} (which can be a request object or a string),
optionally passing the given \var{data}.
Arguments, return values and exceptions raised are the same as those
of \function{urlopen()} (which simply calls the \method{open()} method
-on the default installed \class{OpenerDirector}).
+on the currently installed global \class{OpenerDirector}).
\end{methoddesc}
\begin{methoddesc}[OpenerDirector]{error}{proto\optional{,
arg\optional{, \moreargs}}}
-Handle an error in a given protocol. This will call the registered
+Handle an error of the given protocol. This will call the registered
error handlers for the given protocol with the given arguments (which
are protocol specific). The HTTP protocol is a special case which
uses the HTTP response code to determine the specific error handler;
@@ -335,6 +363,45 @@ Return values and exceptions raised are the same as those
of \function{urlopen()}.
\end{methoddesc}
+OpenerDirector objects open URLs in three stages:
+
+\begin{enumerate}
+ \item Every handler with a method named like
+ \method{\var{protocol}_request()} has that method called to
+ pre-process the request.
+
+ The order in which these methods are called is determined by
+ sorting the handler instances by the \member{.processor_order}
+ attribute.
+
+ \item Handlers with a method named like
+ \method{\var{protocol}_open()} are called to handle the request.
+ This stage ends when a handler either returns a
+ non-\constant{None} value (ie. a response), or raises an exception
+ (usually URLError). Exceptions are allowed to propagate.
+
+ In fact, the above algorithm is first tried for methods named
+ \method{default_open}. If all such methods return
+ \constant{None}, the algorithm is repeated for methods named like
+ \method{\var{protocol}_open()}. If all such methods return
+ \constant{None}, the algorithm is repeated for methods named
+ \method{unknown_open()}.
+
+ Note that the implementation of these methods may involve calls of
+ the parent \class{OpenerDirector} instance's \method{.open()} and
+ \method{.error()} methods.
+
+ The order in which these methods are called is determined by
+ sorting the handler instances.
+
+ \item Every handler with a method named like
+ \method{\var{protocol}_response()} has that method called to
+ post-process the response.
+
+ The order in which these methods are called is determined by
+ sorting the handler instances by the \member{.processor_order}
+ attribute.
+\end{enumerate}
\subsection{BaseHandler Objects \label{base-handler-objects}}
@@ -351,7 +418,11 @@ Remove any parents.
\end{methoddesc}
The following members and methods should only be used by classes
-derived from \class{BaseHandler}:
+derived from \class{BaseHandler}. \note{The convention has been
+adopted that subclasses defining \method{\var{protocol}_request()} or
+\method{\var{protocol}_response()} methods are named
+\class{*Processor}; all others are named \class{*Handler}.}
+
\begin{memberdesc}[BaseHandler]{parent}
A valid \class{OpenerDirector}, which can be used to open using a
@@ -423,6 +494,29 @@ Arguments, return values and exceptions raised should be the same as
for \method{http_error_default()}.
\end{methoddesc}
+\begin{methoddescni}[BaseHandler]{\var{protocol}_request}{req}
+This method is \emph{not} defined in \class{BaseHandler}, but
+subclasses should define it if they want to pre-process requests of
+the given protocol.
+
+This method, if defined, will be called by the parent
+\class{OpenerDirector}. \var{req} will be a \class{Request} object.
+The return value should be a \class{Request} object.
+\end{methoddescni}
+
+\begin{methoddescni}[BaseHandler]{\var{protocol}_response}{req, response}
+This method is \emph{not} defined in \class{BaseHandler}, but
+subclasses should define it if they want to post-process responses of
+the given protocol.
+
+This method, if defined, will be called by the parent
+\class{OpenerDirector}. \var{req} will be a \class{Request} object.
+\var{response} will be an object implementing the same interface as
+the return value of \function{urlopen()}. The return value should
+implement the same interface as the return value of
+\function{urlopen()}.
+\end{methoddescni}
+
\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
\note{Some HTTP redirections require action from this module's client
@@ -434,12 +528,12 @@ for \method{http_error_default()}.
fp, code, msg, hdrs}
Return a \class{Request} or \code{None} in response to a redirect.
This is called by the default implementations of the
-\method{http_error_30*()} methods when a redirection is received
-from the server. If a redirection should take place, return a new
+\method{http_error_30*()} methods when a redirection is received from
+the server. If a redirection should take place, return a new
\class{Request} to allow \method{http_error_30*()} to perform the
-redirect. Otherwise, raise \exception{HTTPError} if no other
-\class{Handler} should try to handle this URL, or return \code{None}
-if you can't but another \class{Handler} might.
+redirect. Otherwise, raise \exception{HTTPError} if no other handler
+should try to handle this URL, or return \code{None} if you can't but
+another handler might.
\begin{notice}
The default implementation of this method does not strictly
@@ -478,6 +572,15 @@ The same as \method{http_error_301()}, but called for the
\end{methoddesc}
+\subsection{HTTPCookieProcessor Objects \label{http-cookie-processor}}
+
+\class{HTTPCookieProcessor} instances have one attribute:
+
+\begin{memberdesc}{cookiejar}
+The \class{cookielib.CookieJar} in which cookies are stored.
+\end{memberdesc}
+
+
\subsection{ProxyHandler Objects \label{proxy-handler}}
\begin{methoddescni}[ProxyHandler]{\var{protocol}_open}{request}
diff --git a/Doc/whatsnew/whatsnew24.tex b/Doc/whatsnew/whatsnew24.tex
index bcb9134..a394971 100644
--- a/Doc/whatsnew/whatsnew24.tex
+++ b/Doc/whatsnew/whatsnew24.tex
@@ -569,6 +569,25 @@ For example:
%======================================================================
% whole new modules get described in \subsections here
+\subsection{cookielib}
+
+The \module{cookielib} library supports client-side handling for HTTP
+cookies, just as the \module{Cookie} provides server-side cookie
+support in CGI scripts. This library manages cookies in a way similar
+to web browsers. Cookies are stored in cookie jars; the library
+transparently stores cookies offered by the web server in the cookie
+jar, and fetches the cookie from the jar when connecting to the
+server. Similar to web browsers, policy objects control whether
+cookies are accepted or not.
+
+In order to store cookies across sessions, two implementations of
+cookie jars are provided: one that stores cookies in the Netscape
+format, so applications can use the Mozilla or Lynx cookie jars, and
+one that stores cookies in the same format as the Perl libwww libary.
+
+\module{urllib2} has been changed to interact with \module{cookielib}:
+\class{HTTPCookieProcessor} manages a cookie jar that is used when
+accessing URLs.
% ======================================================================
\section{Build and C API Changes}