From 2442015af26321083d4a2d75c096c8b732f049b2 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 26 May 2008 16:32:26 +0000 Subject: Create http package. #2883. --- Doc/howto/urllib2.rst | 10 +- Doc/library/basehttpserver.rst | 265 ------ Doc/library/cgihttpserver.rst | 73 -- Doc/library/codecs.rst | 4 +- Doc/library/cookie.rst | 281 ------ Doc/library/cookielib.rst | 750 ---------------- Doc/library/http.client.rst | 498 +++++++++++ Doc/library/http.cookiejar.rst | 746 ++++++++++++++++ Doc/library/http.cookies.rst | 280 ++++++ Doc/library/http.server.rst | 322 +++++++ Doc/library/httplib.rst | 501 ----------- Doc/library/internet.rst | 10 +- Doc/library/simplehttpserver.rst | 86 -- Doc/library/ssl.rst | 2 +- Doc/library/urllib2.rst | 15 +- Doc/library/wsgiref.rst | 6 +- Doc/library/xmlrpc.client.rst | 4 +- Doc/license.rst | 2 +- Lib/BaseHTTPServer.py | 601 ------------- Lib/CGIHTTPServer.py | 367 -------- Lib/Cookie.py | 733 --------------- Lib/SimpleHTTPServer.py | 216 ----- Lib/_LWPCookieJar.py | 8 +- Lib/_MozillaCookieJar.py | 8 +- Lib/cookielib.py | 1785 ------------------------------------- Lib/distutils/command/upload.py | 6 +- Lib/http/__init__.py | 1 + Lib/http/client.py | 1132 +++++++++++++++++++++++ Lib/http/cookiejar.py | 1785 +++++++++++++++++++++++++++++++++++++ Lib/http/cookies.py | 733 +++++++++++++++ Lib/http/server.py | 1141 ++++++++++++++++++++++++ Lib/httplib.py | 1132 ----------------------- Lib/logging/handlers.py | 4 +- Lib/pydoc.py | 8 +- Lib/test/test_SimpleHTTPServer.py | 4 +- Lib/test/test___all__.py | 10 +- Lib/test/test_cookie.py | 82 -- Lib/test/test_cookielib.py | 1734 ----------------------------------- Lib/test/test_docxmlrpc.py | 4 +- Lib/test/test_http_cookiejar.py | 1637 ++++++++++++++++++++++++++++++++++ Lib/test/test_http_cookies.py | 82 ++ Lib/test/test_httplib.py | 4 +- Lib/test/test_httpservers.py | 13 +- Lib/test/test_pyclbr.py | 1 - Lib/test/test_shelve.py | 2 +- Lib/test/test_socket.py | 2 +- Lib/test/test_ssl.py | 3 +- Lib/test/test_sundry.py | 1 - Lib/test/test_urllib.py | 10 +- Lib/test/test_urllib2.py | 11 +- Lib/test/test_urllib2_localnet.py | 16 +- Lib/test/test_urllib2net.py | 8 +- Lib/test/test_xmlrpc.py | 6 +- Lib/test/test_xmlrpc_net.py | 2 +- Lib/urllib.py | 12 +- Lib/urllib2.py | 18 +- Lib/wsgiref/simple_server.py | 2 +- Lib/xmlrpc/client.py | 8 +- Lib/xmlrpc/server.py | 12 +- Misc/cheatsheet | 38 +- 60 files changed, 8482 insertions(+), 8755 deletions(-) delete mode 100644 Doc/library/basehttpserver.rst delete mode 100644 Doc/library/cgihttpserver.rst delete mode 100644 Doc/library/cookie.rst delete mode 100644 Doc/library/cookielib.rst create mode 100644 Doc/library/http.client.rst create mode 100644 Doc/library/http.cookiejar.rst create mode 100644 Doc/library/http.cookies.rst create mode 100644 Doc/library/http.server.rst delete mode 100644 Doc/library/httplib.rst delete mode 100644 Doc/library/simplehttpserver.rst delete mode 100644 Lib/BaseHTTPServer.py delete mode 100644 Lib/CGIHTTPServer.py delete mode 100644 Lib/Cookie.py delete mode 100644 Lib/SimpleHTTPServer.py delete mode 100644 Lib/cookielib.py create mode 100644 Lib/http/__init__.py create mode 100644 Lib/http/client.py create mode 100644 Lib/http/cookiejar.py create mode 100644 Lib/http/cookies.py create mode 100644 Lib/http/server.py delete mode 100644 Lib/httplib.py delete mode 100644 Lib/test/test_cookie.py delete mode 100644 Lib/test/test_cookielib.py create mode 100644 Lib/test/test_http_cookiejar.py create mode 100644 Lib/test/test_http_cookies.py diff --git a/Doc/howto/urllib2.rst b/Doc/howto/urllib2.rst index 4ba3932..0940d82 100644 --- a/Doc/howto/urllib2.rst +++ b/Doc/howto/urllib2.rst @@ -230,7 +230,7 @@ Because the default handlers handle redirects (codes in the 300 range), and codes in the 100-299 range indicate success, you will usually only see error codes in the 400-599 range. -``BaseHTTPServer.BaseHTTPRequestHandler.responses`` is a useful dictionary of +:attr:`http.server.BaseHTTPRequestHandler.responses` is a useful dictionary of response codes in that shows all the response codes used by RFC 2616. The dictionary is reproduced here for convenience :: @@ -385,7 +385,7 @@ redirect. The URL of the page fetched may not be the same as the URL requested. **info** - this returns a dictionary-like object that describes the page fetched, particularly the headers sent by the server. It is currently an -``httplib.HTTPMessage`` instance. +``http.client.HTTPMessage`` instance. Typical headers include 'Content-length', 'Content-type', and so on. See the `Quick Reference to HTTP Headers `_ @@ -526,13 +526,13 @@ Sockets and Layers ================== The Python support for fetching resources from the web is layered. urllib2 uses -the httplib library, which in turn uses the socket library. +the http.client library, which in turn uses the socket library. As of Python 2.3 you can specify how long a socket should wait for a response before timing out. This can be useful in applications which have to fetch web pages. By default the socket module has *no timeout* and can hang. Currently, -the socket timeout is not exposed at the httplib or urllib2 levels. However, -you can set the default timeout globally for all sockets using :: +the socket timeout is not exposed at the http.client or urllib2 levels. +However, you can set the default timeout globally for all sockets using :: import socket import urllib2 diff --git a/Doc/library/basehttpserver.rst b/Doc/library/basehttpserver.rst deleted file mode 100644 index 52a3866..0000000 --- a/Doc/library/basehttpserver.rst +++ /dev/null @@ -1,265 +0,0 @@ - -:mod:`BaseHTTPServer` --- Basic HTTP server -=========================================== - -.. module:: BaseHTTPServer - :synopsis: Basic HTTP server (base class for SimpleHTTPServer and CGIHTTPServer). - - -.. index:: - pair: WWW; server - pair: HTTP; protocol - single: URL - single: httpd - -.. index:: - module: SimpleHTTPServer - module: CGIHTTPServer - -This module defines two classes for implementing HTTP servers (Web servers). -Usually, this module isn't used directly, but is used as a basis for building -functioning Web servers. See the :mod:`SimpleHTTPServer` and -:mod:`CGIHTTPServer` modules. - -The first class, :class:`HTTPServer`, is a :class:`socketserver.TCPServer` -subclass. It creates and listens at the HTTP socket, dispatching the requests -to a handler. Code to create and run the server looks like this:: - - def run(server_class=BaseHTTPServer.HTTPServer, - handler_class=BaseHTTPServer.BaseHTTPRequestHandler): - server_address = ('', 8000) - httpd = server_class(server_address, handler_class) - httpd.serve_forever() - - -.. class:: HTTPServer(server_address, RequestHandlerClass) - - This class builds on the :class:`TCPServer` class by storing the server - address as instance variables named :attr:`server_name` and - :attr:`server_port`. The server is accessible by the handler, typically - through the handler's :attr:`server` instance variable. - - -.. class:: BaseHTTPRequestHandler(request, client_address, server) - - This class is used to handle the HTTP requests that arrive at the server. By - itself, it cannot respond to any actual HTTP requests; it must be subclassed - to handle each request method (e.g. GET or - POST). :class:`BaseHTTPRequestHandler` provides a number of class and - instance variables, and methods for use by subclasses. - - The handler will parse the request and the headers, then call a method - specific to the request type. The method name is constructed from the - request. For example, for the request method ``SPAM``, the :meth:`do_SPAM` - method will be called with no arguments. All of the relevant information is - stored in instance variables of the handler. Subclasses should not need to - override or extend the :meth:`__init__` method. - - :class:`BaseHTTPRequestHandler` has the following instance variables: - - - .. attribute:: client_address - - Contains a tuple of the form ``(host, port)`` referring to the client's - address. - - - .. attribute:: command - - Contains the command (request type). For example, ``'GET'``. - - - .. attribute:: path - - Contains the request path. - - - .. attribute:: request_version - - Contains the version string from the request. For example, ``'HTTP/1.0'``. - - - .. attribute:: headers - - Holds an instance of the class specified by the :attr:`MessageClass` class - variable. This instance parses and manages the headers in the HTTP - request. - - - .. attribute:: rfile - - Contains an input stream, positioned at the start of the optional input - data. - - - .. attribute:: wfile - - Contains the output stream for writing a response back to the - client. Proper adherence to the HTTP protocol must be used when writing to - this stream. - - - :class:`BaseHTTPRequestHandler` has the following class variables: - - - .. attribute:: server_version - - Specifies the server software version. You may want to override this. The - format is multiple whitespace-separated strings, where each string is of - the form name[/version]. For example, ``'BaseHTTP/0.2'``. - - - .. attribute:: sys_version - - Contains the Python system version, in a form usable by the - :attr:`version_string` method and the :attr:`server_version` class - variable. For example, ``'Python/1.4'``. - - - .. attribute:: error_message_format - - Specifies a format string for building an error response to the client. It - uses parenthesized, keyed format specifiers, so the format operand must be - a dictionary. The *code* key should be an integer, specifying the numeric - HTTP error code value. *message* should be a string containing a - (detailed) error message of what occurred, and *explain* should be an - explanation of the error code number. Default *message* and *explain* - values can found in the *responses* class variable. - - - .. attribute:: error_content_type - - Specifies the Content-Type HTTP header of error responses sent to the - client. The default value is ``'text/html'``. - - - .. attribute:: protocol_version - - This specifies the HTTP protocol version used in responses. If set to - ``'HTTP/1.1'``, the server will permit HTTP persistent connections; - however, your server *must* then include an accurate ``Content-Length`` - header (using :meth:`send_header`) in all of its responses to clients. - For backwards compatibility, the setting defaults to ``'HTTP/1.0'``. - - - .. attribute:: MessageClass - - .. index:: single: Message (in module mimetools) - - Specifies a :class:`rfc822.Message`\ -like class to parse HTTP headers. - Typically, this is not overridden, and it defaults to - :class:`mimetools.Message`. - - - .. attribute:: responses - - This variable contains a mapping of error code integers to two-element tuples - containing a short and long message. For example, ``{code: (shortmessage, - longmessage)}``. The *shortmessage* is usually used as the *message* key in an - error response, and *longmessage* as the *explain* key (see the - :attr:`error_message_format` class variable). - - - A :class:`BaseHTTPRequestHandler` instance has the following methods: - - - .. method:: handle() - - Calls :meth:`handle_one_request` once (or, if persistent connections are - enabled, multiple times) to handle incoming HTTP requests. You should - never need to override it; instead, implement appropriate :meth:`do_\*` - methods. - - - .. method:: handle_one_request() - - This method will parse and dispatch the request to the appropriate - :meth:`do_\*` method. You should never need to override it. - - - .. method:: send_error(code[, message]) - - Sends and logs a complete error reply to the client. The numeric *code* - specifies the HTTP error code, with *message* as optional, more specific text. A - complete set of headers is sent, followed by text composed using the - :attr:`error_message_format` class variable. - - - .. method:: send_response(code[, message]) - - Sends a response header and logs the accepted request. The HTTP response - line is sent, followed by *Server* and *Date* headers. The values for - these two headers are picked up from the :meth:`version_string` and - :meth:`date_time_string` methods, respectively. - - - .. method:: send_header(keyword, value) - - Writes a specific HTTP header to the output stream. *keyword* should - specify the header keyword, with *value* specifying its value. - - - .. method:: end_headers() - - Sends a blank line, indicating the end of the HTTP headers in the - response. - - - .. method:: log_request([code[, size]]) - - Logs an accepted (successful) request. *code* should specify the numeric - HTTP code associated with the response. If a size of the response is - available, then it should be passed as the *size* parameter. - - - .. method:: log_error(...) - - Logs an error when a request cannot be fulfilled. By default, it passes - the message to :meth:`log_message`, so it takes the same arguments - (*format* and additional values). - - - .. method:: log_message(format, ...) - - Logs an arbitrary message to ``sys.stderr``. This is typically overridden - to create custom error logging mechanisms. The *format* argument is a - standard printf-style format string, where the additional arguments to - :meth:`log_message` are applied as inputs to the formatting. The client - address and current date and time are prefixed to every message logged. - - - .. method:: version_string() - - Returns the server software's version string. This is a combination of the - :attr:`server_version` and :attr:`sys_version` class variables. - - - .. method:: date_time_string([timestamp]) - - Returns the date and time given by *timestamp* (which must be in the - format returned by :func:`time.time`), formatted for a message header. If - *timestamp* is omitted, it uses the current date and time. - - The result looks like ``'Sun, 06 Nov 1994 08:49:37 GMT'``. - - - .. method:: log_date_time_string() - - Returns the current date and time, formatted for logging. - - - .. method:: address_string() - - Returns the client address, formatted for logging. A name lookup is - performed on the client's IP address. - - -.. seealso:: - - Module :mod:`CGIHTTPServer` - Extended request handler that supports CGI scripts. - - Module :mod:`SimpleHTTPServer` - Basic request handler that limits response to files actually under the document - root. - diff --git a/Doc/library/cgihttpserver.rst b/Doc/library/cgihttpserver.rst deleted file mode 100644 index 6275c1a..0000000 --- a/Doc/library/cgihttpserver.rst +++ /dev/null @@ -1,73 +0,0 @@ - -:mod:`CGIHTTPServer` --- CGI-capable HTTP request handler -========================================================= - -.. module:: CGIHTTPServer - :synopsis: This module provides a request handler for HTTP servers which can run CGI - scripts. -.. sectionauthor:: Moshe Zadka - - -The :mod:`CGIHTTPServer` module defines a request-handler class, interface -compatible with :class:`BaseHTTPServer.BaseHTTPRequestHandler` and inherits -behavior from :class:`SimpleHTTPServer.SimpleHTTPRequestHandler` but can also -run CGI scripts. - -.. note:: - - This module can run CGI scripts on Unix and Windows systems; on Mac OS it will - only be able to run Python scripts within the same process as itself. - -.. note:: - - CGI scripts run by the :class:`CGIHTTPRequestHandler` class cannot execute - redirects (HTTP code 302), because code 200 (script output follows) is sent - prior to execution of the CGI script. This pre-empts the status code. - -The :mod:`CGIHTTPServer` module defines the following class: - - -.. class:: CGIHTTPRequestHandler(request, client_address, server) - - This class is used to serve either files or output of CGI scripts from the - current directory and below. Note that mapping HTTP hierarchic structure to - local directory structure is exactly as in - :class:`SimpleHTTPServer.SimpleHTTPRequestHandler`. - - The class will however, run the CGI script, instead of serving it as a file, if - it guesses it to be a CGI script. Only directory-based CGI are used --- the - other common server configuration is to treat special extensions as denoting CGI - scripts. - - The :func:`do_GET` and :func:`do_HEAD` functions are modified to run CGI scripts - and serve the output, instead of serving files, if the request leads to - somewhere below the ``cgi_directories`` path. - - The :class:`CGIHTTPRequestHandler` defines the following data member: - - - .. attribute:: cgi_directories - - This defaults to ``['/cgi-bin', '/htbin']`` and describes directories to - treat as containing CGI scripts. - - The :class:`CGIHTTPRequestHandler` defines the following methods: - - - .. method:: do_POST() - - This method serves the ``'POST'`` request type, only allowed for CGI - scripts. Error 501, "Can only POST to CGI scripts", is output when trying - to POST to a non-CGI url. - -Note that CGI scripts will be run with UID of user nobody, for security reasons. -Problems with the CGI script will be translated to error 403. - -For example usage, see the implementation of the :func:`test` function. - - -.. seealso:: - - Module :mod:`BaseHTTPServer` - Base class implementation for Web server and request handler. - diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst index 0745c66..42273051 100644 --- a/Doc/library/codecs.rst +++ b/Doc/library/codecs.rst @@ -1159,8 +1159,8 @@ convert between Unicode and the ACE. Furthermore, the :mod:`socket` module transparently converts Unicode host names to ACE, so that applications need not be concerned about converting host names themselves when they pass them to the socket module. On top of that, modules that have host names as function -parameters, such as :mod:`httplib` and :mod:`ftplib`, accept Unicode host names -(:mod:`httplib` then also transparently sends an IDNA hostname in the +parameters, such as :mod:`http.client` and :mod:`ftplib`, accept Unicode host +names (:mod:`http.client` then also transparently sends an IDNA hostname in the :mailheader:`Host` field if it sends that field at all). When receiving host names from the wire (such as in reverse name lookup), no diff --git a/Doc/library/cookie.rst b/Doc/library/cookie.rst deleted file mode 100644 index ccc2faa..0000000 --- a/Doc/library/cookie.rst +++ /dev/null @@ -1,281 +0,0 @@ - -:mod:`Cookie` --- HTTP state management -======================================= - -.. module:: Cookie - :synopsis: Support for HTTP state management (cookies). -.. moduleauthor:: Timothy O'Malley -.. sectionauthor:: Moshe Zadka - - -The :mod:`Cookie` module defines classes for abstracting the concept of -cookies, an HTTP state management mechanism. It supports both simple string-only -cookies, and provides an abstraction for having any serializable data-type as -cookie value. - -The module formerly strictly applied the parsing rules described in the -:rfc:`2109` and :rfc:`2068` specifications. It has since been discovered that -MSIE 3.0x doesn't follow the character rules outlined in those specs. As a -result, the parsing rules used are a bit less strict. - - -.. exception:: CookieError - - Exception failing because of :rfc:`2109` invalidity: incorrect attributes, - incorrect :mailheader:`Set-Cookie` header, etc. - - -.. class:: BaseCookie([input]) - - This class is a dictionary-like object whose keys are strings and whose values - are :class:`Morsel` instances. Note that upon setting a key to a value, the - value is first converted to a :class:`Morsel` containing the key and the value. - - If *input* is given, it is passed to the :meth:`load` method. - - -.. class:: SimpleCookie([input]) - - This class derives from :class:`BaseCookie` and overrides :meth:`value_decode` - and :meth:`value_encode` to be the identity and :func:`str` respectively. - - -.. class:: SerialCookie([input]) - - This class derives from :class:`BaseCookie` and overrides :meth:`value_decode` - and :meth:`value_encode` to be the :func:`pickle.loads` and - :func:`pickle.dumps`. - - .. deprecated:: 2.3 - Reading pickled values from untrusted cookie data is a huge security hole, as - pickle strings can be crafted to cause arbitrary code to execute on your server. - It is supported for backwards compatibility only, and may eventually go away. - - -.. class:: SmartCookie([input]) - - This class derives from :class:`BaseCookie`. It overrides :meth:`value_decode` - to be :func:`pickle.loads` if it is a valid pickle, and otherwise the value - itself. It overrides :meth:`value_encode` to be :func:`pickle.dumps` unless it - is a string, in which case it returns the value itself. - - .. deprecated:: 2.3 - The same security warning from :class:`SerialCookie` applies here. - -A further security note is warranted. For backwards compatibility, the -:mod:`Cookie` module exports a class named :class:`Cookie` which is just an -alias for :class:`SmartCookie`. This is probably a mistake and will likely be -removed in a future version. You should not use the :class:`Cookie` class in -your applications, for the same reason why you should not use the -:class:`SerialCookie` class. - - -.. seealso:: - - Module :mod:`cookielib` - HTTP cookie handling for web *clients*. The :mod:`cookielib` and :mod:`Cookie` - modules do not depend on each other. - - :rfc:`2109` - HTTP State Management Mechanism - This is the state management specification implemented by this module. - - -.. _cookie-objects: - -Cookie Objects --------------- - - -.. method:: BaseCookie.value_decode(val) - - Return a decoded value from a string representation. Return value can be any - type. This method does nothing in :class:`BaseCookie` --- it exists so it can be - overridden. - - -.. method:: BaseCookie.value_encode(val) - - Return an encoded value. *val* can be any type, but return value must be a - string. This method does nothing in :class:`BaseCookie` --- it exists so it can - be overridden - - In general, it should be the case that :meth:`value_encode` and - :meth:`value_decode` are inverses on the range of *value_decode*. - - -.. method:: BaseCookie.output([attrs[, header[, sep]]]) - - Return a string representation suitable to be sent as HTTP headers. *attrs* and - *header* are sent to each :class:`Morsel`'s :meth:`output` method. *sep* is used - to join the headers together, and is by default the combination ``'\r\n'`` - (CRLF). - - -.. method:: BaseCookie.js_output([attrs]) - - Return an embeddable JavaScript snippet, which, if run on a browser which - supports JavaScript, will act the same as if the HTTP headers was sent. - - The meaning for *attrs* is the same as in :meth:`output`. - - -.. method:: BaseCookie.load(rawdata) - - If *rawdata* is a string, parse it as an ``HTTP_COOKIE`` and add the values - found there as :class:`Morsel`\ s. If it is a dictionary, it is equivalent to:: - - for k, v in rawdata.items(): - cookie[k] = v - - -.. _morsel-objects: - -Morsel Objects --------------- - - -.. class:: Morsel() - - Abstract a key/value pair, which has some :rfc:`2109` attributes. - - Morsels are dictionary-like objects, whose set of keys is constant --- the valid - :rfc:`2109` attributes, which are - - * ``expires`` - * ``path`` - * ``comment`` - * ``domain`` - * ``max-age`` - * ``secure`` - * ``version`` - - The keys are case-insensitive. - - -.. attribute:: Morsel.value - - The value of the cookie. - - -.. attribute:: Morsel.coded_value - - The encoded value of the cookie --- this is what should be sent. - - -.. attribute:: Morsel.key - - The name of the cookie. - - -.. method:: Morsel.set(key, value, coded_value) - - Set the *key*, *value* and *coded_value* members. - - -.. method:: Morsel.isReservedKey(K) - - Whether *K* is a member of the set of keys of a :class:`Morsel`. - - -.. method:: Morsel.output([attrs[, header]]) - - Return a string representation of the Morsel, suitable to be sent as an HTTP - header. By default, all the attributes are included, unless *attrs* is given, in - which case it should be a list of attributes to use. *header* is by default - ``"Set-Cookie:"``. - - -.. method:: Morsel.js_output([attrs]) - - Return an embeddable JavaScript snippet, which, if run on a browser which - supports JavaScript, will act the same as if the HTTP header was sent. - - The meaning for *attrs* is the same as in :meth:`output`. - - -.. method:: Morsel.OutputString([attrs]) - - Return a string representing the Morsel, without any surrounding HTTP or - JavaScript. - - The meaning for *attrs* is the same as in :meth:`output`. - - -.. _cookie-example: - -Example -------- - -The following example demonstrates how to use the :mod:`Cookie` module. - -.. doctest:: - :options: +NORMALIZE_WHITESPACE - - >>> import Cookie - >>> C = Cookie.SimpleCookie() - >>> C = Cookie.SerialCookie() - >>> C = Cookie.SmartCookie() - >>> C["fig"] = "newton" - >>> C["sugar"] = "wafer" - >>> print(C) # generate HTTP headers - Set-Cookie: fig=newton - Set-Cookie: sugar=wafer - >>> print(C.output()) # same thing - Set-Cookie: fig=newton - Set-Cookie: sugar=wafer - >>> C = Cookie.SmartCookie() - >>> C["rocky"] = "road" - >>> C["rocky"]["path"] = "/cookie" - >>> print(C.output(header="Cookie:")) - Cookie: rocky=road; Path=/cookie - >>> print(C.output(attrs=[], header="Cookie:")) - Cookie: rocky=road - >>> C = Cookie.SmartCookie() - >>> C.load("chips=ahoy; vienna=finger") # load from a string (HTTP header) - >>> print(C) - Set-Cookie: chips=ahoy - Set-Cookie: vienna=finger - >>> C = Cookie.SmartCookie() - >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') - >>> print(C) - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" - >>> C = Cookie.SmartCookie() - >>> C["oreo"] = "doublestuff" - >>> C["oreo"]["path"] = "/" - >>> print(C) - Set-Cookie: oreo=doublestuff; Path=/ - >>> C = Cookie.SmartCookie() - >>> C["twix"] = "none for you" - >>> C["twix"].value - 'none for you' - >>> C = Cookie.SimpleCookie() - >>> C["number"] = 7 # equivalent to C["number"] = str(7) - >>> C["string"] = "seven" - >>> C["number"].value - '7' - >>> C["string"].value - 'seven' - >>> print(C) - Set-Cookie: number=7 - Set-Cookie: string=seven - >>> C = Cookie.SerialCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - 7 - >>> C["string"].value - 'seven' - >>> print(C) - Set-Cookie: number="I7\012." - Set-Cookie: string="S'seven'\012p1\012." - >>> C = Cookie.SmartCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - 7 - >>> C["string"].value - 'seven' - >>> print(C) - Set-Cookie: number="I7\012." - Set-Cookie: string=seven - diff --git a/Doc/library/cookielib.rst b/Doc/library/cookielib.rst deleted file mode 100644 index c8e7298..0000000 --- a/Doc/library/cookielib.rst +++ /dev/null @@ -1,750 +0,0 @@ - -:mod:`cookielib` --- Cookie handling for HTTP clients -===================================================== - -.. module:: cookielib - :synopsis: Classes for automatic handling of HTTP cookies. -.. moduleauthor:: John J. Lee -.. sectionauthor:: John J. Lee - - -The :mod:`cookielib` module defines classes for automatic handling of HTTP -cookies. It is useful for accessing web sites that require small pieces of data --- :dfn:`cookies` -- to be set on the client machine by an HTTP response from a -web server, and then returned to the server in later HTTP requests. - -Both the regular Netscape cookie protocol and the protocol defined by -:rfc:`2965` are handled. RFC 2965 handling is switched off by default. -:rfc:`2109` cookies are parsed as Netscape cookies and subsequently treated -either as Netscape or RFC 2965 cookies according to the 'policy' in effect. -Note that the great majority of cookies on the Internet are Netscape cookies. -:mod:`cookielib` attempts to follow the de-facto Netscape cookie protocol (which -differs substantially from that set out in the original Netscape specification), -including taking note of the ``max-age`` and ``port`` cookie-attributes -introduced with RFC 2965. - -.. note:: - - The various named parameters found in :mailheader:`Set-Cookie` and - :mailheader:`Set-Cookie2` headers (eg. ``domain`` and ``expires``) are - conventionally referred to as :dfn:`attributes`. To distinguish them from - Python attributes, the documentation for this module uses the term - :dfn:`cookie-attribute` instead. - - -The module defines the following exception: - - -.. exception:: LoadError - - Instances of :class:`FileCookieJar` raise this exception on failure to load - cookies from a file. :exc:`LoadError` is a subclass of :exc:`IOError`. - - -The following classes are provided: - - -.. class:: CookieJar(policy=None) - - *policy* is an object implementing the :class:`CookiePolicy` interface. - - The :class:`CookieJar` class stores HTTP cookies. It extracts cookies from HTTP - requests, and returns them in HTTP responses. :class:`CookieJar` instances - automatically expire contained cookies when necessary. Subclasses are also - responsible for storing and retrieving cookies from a file or database. - - -.. class:: FileCookieJar(filename, delayload=None, policy=None) - - *policy* is an object implementing the :class:`CookiePolicy` interface. For the - other arguments, see the documentation for the corresponding attributes. - - A :class:`CookieJar` which can load cookies from, and perhaps save cookies to, a - file on disk. Cookies are **NOT** loaded from the named file until either the - :meth:`load` or :meth:`revert` method is called. Subclasses of this class are - documented in section :ref:`file-cookie-jar-classes`. - - -.. class:: CookiePolicy() - - This class is responsible for deciding whether each cookie should be accepted - from / returned to the server. - - -.. class:: DefaultCookiePolicy( blocked_domains=None, allowed_domains=None, netscape=True, rfc2965=False, rfc2109_as_netscape=None, hide_cookie2=False, strict_domain=False, strict_rfc2965_unverifiable=True, strict_ns_unverifiable=False, strict_ns_domain=DefaultCookiePolicy.DomainLiberal, strict_ns_set_initial_dollar=False, strict_ns_set_path=False ) - - Constructor arguments should be passed as keyword arguments only. - *blocked_domains* is a sequence of domain names that we never accept cookies - from, nor return cookies to. *allowed_domains* if not :const:`None`, this is a - sequence of the only domains for which we accept and return cookies. For all - other arguments, see the documentation for :class:`CookiePolicy` and - :class:`DefaultCookiePolicy` objects. - - :class:`DefaultCookiePolicy` implements the standard accept / reject rules for - Netscape and RFC 2965 cookies. By default, RFC 2109 cookies (ie. cookies - received in a :mailheader:`Set-Cookie` header with a version cookie-attribute of - 1) are treated according to the RFC 2965 rules. However, if RFC 2965 handling - is turned off or :attr:`rfc2109_as_netscape` is True, RFC 2109 cookies are - 'downgraded' by the :class:`CookieJar` instance to Netscape cookies, by - setting the :attr:`version` attribute of the :class:`Cookie` instance to 0. - :class:`DefaultCookiePolicy` also provides some parameters to allow some - fine-tuning of policy. - - -.. class:: Cookie() - - This class represents Netscape, RFC 2109 and RFC 2965 cookies. It is not - expected that users of :mod:`cookielib` construct their own :class:`Cookie` - instances. Instead, if necessary, call :meth:`make_cookies` on a - :class:`CookieJar` instance. - - -.. seealso:: - - Module :mod:`urllib2` - URL opening with automatic cookie handling. - - Module :mod:`Cookie` - HTTP cookie classes, principally useful for server-side code. The - :mod:`cookielib` and :mod:`Cookie` modules do not depend on each other. - - http://wwwsearch.sf.net/ClientCookie/ - Extensions to this module, including a class for reading Microsoft Internet - Explorer cookies on Windows. - - http://wp.netscape.com/newsref/std/cookie_spec.html - The specification of the original Netscape cookie protocol. Though this is - still the dominant protocol, the 'Netscape cookie protocol' implemented by all - the major browsers (and :mod:`cookielib`) only bears a passing resemblance to - the one sketched out in ``cookie_spec.html``. - - :rfc:`2109` - HTTP State Management Mechanism - Obsoleted by RFC 2965. Uses :mailheader:`Set-Cookie` with version=1. - - :rfc:`2965` - HTTP State Management Mechanism - The Netscape protocol with the bugs fixed. Uses :mailheader:`Set-Cookie2` in - place of :mailheader:`Set-Cookie`. Not widely used. - - http://kristol.org/cookie/errata.html - Unfinished errata to RFC 2965. - - :rfc:`2964` - Use of HTTP State Management - -.. _cookie-jar-objects: - -CookieJar and FileCookieJar Objects ------------------------------------ - -:class:`CookieJar` objects support the :term:`iterator` protocol for iterating over -contained :class:`Cookie` objects. - -:class:`CookieJar` has the following methods: - - -.. method:: CookieJar.add_cookie_header(request) - - Add correct :mailheader:`Cookie` header to *request*. - - If policy allows (ie. the :attr:`rfc2965` and :attr:`hide_cookie2` attributes of - the :class:`CookieJar`'s :class:`CookiePolicy` instance are true and false - respectively), the :mailheader:`Cookie2` header is also added when appropriate. - - The *request* object (usually a :class:`urllib2.Request` instance) must support - the methods :meth:`get_full_url`, :meth:`get_host`, :meth:`get_type`, - :meth:`unverifiable`, :meth:`get_origin_req_host`, :meth:`has_header`, - :meth:`get_header`, :meth:`header_items`, and :meth:`add_unredirected_header`,as - documented by :mod:`urllib2`. - - -.. method:: CookieJar.extract_cookies(response, request) - - Extract cookies from HTTP *response* and store them in the :class:`CookieJar`, - where allowed by policy. - - The :class:`CookieJar` will look for allowable :mailheader:`Set-Cookie` and - :mailheader:`Set-Cookie2` headers in the *response* argument, and store cookies - as appropriate (subject to the :meth:`CookiePolicy.set_ok` method's approval). - - The *response* object (usually the result of a call to :meth:`urllib2.urlopen`, - or similar) should support an :meth:`info` method, which returns an object with - a :meth:`getallmatchingheaders` method (usually a :class:`mimetools.Message` - instance). - - The *request* object (usually a :class:`urllib2.Request` instance) must support - the methods :meth:`get_full_url`, :meth:`get_host`, :meth:`unverifiable`, and - :meth:`get_origin_req_host`, as documented by :mod:`urllib2`. The request is - used to set default values for cookie-attributes as well as for checking that - the cookie is allowed to be set. - - -.. method:: CookieJar.set_policy(policy) - - Set the :class:`CookiePolicy` instance to be used. - - -.. method:: CookieJar.make_cookies(response, request) - - Return sequence of :class:`Cookie` objects extracted from *response* object. - - See the documentation for :meth:`extract_cookies` for the interfaces required of - the *response* and *request* arguments. - - -.. method:: CookieJar.set_cookie_if_ok(cookie, request) - - Set a :class:`Cookie` if policy says it's OK to do so. - - -.. method:: CookieJar.set_cookie(cookie) - - Set a :class:`Cookie`, without checking with policy to see whether or not it - should be set. - - -.. method:: CookieJar.clear([domain[, path[, name]]]) - - Clear some cookies. - - If invoked without arguments, clear all cookies. If given a single argument, - only cookies belonging to that *domain* will be removed. If given two arguments, - cookies belonging to the specified *domain* and URL *path* are removed. If - given three arguments, then the cookie with the specified *domain*, *path* and - *name* is removed. - - Raises :exc:`KeyError` if no matching cookie exists. - - -.. method:: CookieJar.clear_session_cookies() - - Discard all session cookies. - - Discards all contained cookies that have a true :attr:`discard` attribute - (usually because they had either no ``max-age`` or ``expires`` cookie-attribute, - or an explicit ``discard`` cookie-attribute). For interactive browsers, the end - of a session usually corresponds to closing the browser window. - - Note that the :meth:`save` method won't save session cookies anyway, unless you - ask otherwise by passing a true *ignore_discard* argument. - -:class:`FileCookieJar` implements the following additional methods: - - -.. method:: FileCookieJar.save(filename=None, ignore_discard=False, ignore_expires=False) - - Save cookies to a file. - - This base class raises :exc:`NotImplementedError`. Subclasses may leave this - method unimplemented. - - *filename* is the name of file in which to save cookies. If *filename* is not - specified, :attr:`self.filename` is used (whose default is the value passed to - the constructor, if any); if :attr:`self.filename` is :const:`None`, - :exc:`ValueError` is raised. - - *ignore_discard*: save even cookies set to be discarded. *ignore_expires*: save - even cookies that have expired - - The file is overwritten if it already exists, thus wiping all the cookies it - contains. Saved cookies can be restored later using the :meth:`load` or - :meth:`revert` methods. - - -.. method:: FileCookieJar.load(filename=None, ignore_discard=False, ignore_expires=False) - - Load cookies from a file. - - Old cookies are kept unless overwritten by newly loaded ones. - - Arguments are as for :meth:`save`. - - The named file must be in the format understood by the class, or - :exc:`LoadError` will be raised. Also, :exc:`IOError` may be raised, for - example if the file does not exist. - - -.. method:: FileCookieJar.revert(filename=None, ignore_discard=False, ignore_expires=False) - - Clear all cookies and reload cookies from a saved file. - - :meth:`revert` can raise the same exceptions as :meth:`load`. If there is a - failure, the object's state will not be altered. - -:class:`FileCookieJar` instances have the following public attributes: - - -.. attribute:: FileCookieJar.filename - - Filename of default file in which to keep cookies. This attribute may be - assigned to. - - -.. attribute:: FileCookieJar.delayload - - If true, load cookies lazily from disk. This attribute should not be assigned - to. This is only a hint, since this only affects performance, not behaviour - (unless the cookies on disk are changing). A :class:`CookieJar` object may - ignore it. None of the :class:`FileCookieJar` classes included in the standard - library lazily loads cookies. - - -.. _file-cookie-jar-classes: - -FileCookieJar subclasses and co-operation with web browsers ------------------------------------------------------------ - -The following :class:`CookieJar` subclasses are provided for reading and writing -. Further :class:`CookieJar` subclasses, including one that reads Microsoft -Internet Explorer cookies, are available at -http://wwwsearch.sf.net/ClientCookie/. - - -.. class:: MozillaCookieJar(filename, delayload=None, policy=None) - - A :class:`FileCookieJar` that can load from and save cookies to disk in the - Mozilla ``cookies.txt`` file format (which is also used by the Lynx and Netscape - browsers). - - .. note:: - - This loses information about RFC 2965 cookies, and also about newer or - non-standard cookie-attributes such as ``port``. - - .. warning:: - - Back up your cookies before saving if you have cookies whose loss / corruption - would be inconvenient (there are some subtleties which may lead to slight - changes in the file over a load / save round-trip). - - Also note that cookies saved while Mozilla is running will get clobbered by - Mozilla. - - -.. class:: LWPCookieJar(filename, delayload=None, policy=None) - - A :class:`FileCookieJar` that can load from and save cookies to disk in format - compatible with the libwww-perl library's ``Set-Cookie3`` file format. This is - convenient if you want to store cookies in a human-readable file. - - -.. _cookie-policy-objects: - -CookiePolicy Objects --------------------- - -Objects implementing the :class:`CookiePolicy` interface have the following -methods: - - -.. method:: CookiePolicy.set_ok(cookie, request) - - Return boolean value indicating whether cookie should be accepted from server. - - *cookie* is a :class:`cookielib.Cookie` instance. *request* is an object - implementing the interface defined by the documentation for - :meth:`CookieJar.extract_cookies`. - - -.. method:: CookiePolicy.return_ok(cookie, request) - - Return boolean value indicating whether cookie should be returned to server. - - *cookie* is a :class:`cookielib.Cookie` instance. *request* is an object - implementing the interface defined by the documentation for - :meth:`CookieJar.add_cookie_header`. - - -.. method:: CookiePolicy.domain_return_ok(domain, request) - - Return false if cookies should not be returned, given cookie domain. - - This method is an optimization. It removes the need for checking every cookie - with a particular domain (which might involve reading many files). Returning - true from :meth:`domain_return_ok` and :meth:`path_return_ok` leaves all the - work to :meth:`return_ok`. - - If :meth:`domain_return_ok` returns true for the cookie domain, - :meth:`path_return_ok` is called for the cookie path. Otherwise, - :meth:`path_return_ok` and :meth:`return_ok` are never called for that cookie - domain. If :meth:`path_return_ok` returns true, :meth:`return_ok` is called - with the :class:`Cookie` object itself for a full check. Otherwise, - :meth:`return_ok` is never called for that cookie path. - - Note that :meth:`domain_return_ok` is called for every *cookie* domain, not just - for the *request* domain. For example, the function might be called with both - ``".example.com"`` and ``"www.example.com"`` if the request domain is - ``"www.example.com"``. The same goes for :meth:`path_return_ok`. - - The *request* argument is as documented for :meth:`return_ok`. - - -.. method:: CookiePolicy.path_return_ok(path, request) - - Return false if cookies should not be returned, given cookie path. - - See the documentation for :meth:`domain_return_ok`. - -In addition to implementing the methods above, implementations of the -:class:`CookiePolicy` interface must also supply the following attributes, -indicating which protocols should be used, and how. All of these attributes may -be assigned to. - - -.. attribute:: CookiePolicy.netscape - - Implement Netscape protocol. - - -.. attribute:: CookiePolicy.rfc2965 - - Implement RFC 2965 protocol. - - -.. attribute:: CookiePolicy.hide_cookie2 - - Don't add :mailheader:`Cookie2` header to requests (the presence of this header - indicates to the server that we understand RFC 2965 cookies). - -The most useful way to define a :class:`CookiePolicy` class is by subclassing -from :class:`DefaultCookiePolicy` and overriding some or all of the methods -above. :class:`CookiePolicy` itself may be used as a 'null policy' to allow -setting and receiving any and all cookies (this is unlikely to be useful). - - -.. _default-cookie-policy-objects: - -DefaultCookiePolicy Objects ---------------------------- - -Implements the standard rules for accepting and returning cookies. - -Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is switched -off by default. - -The easiest way to provide your own policy is to override this class and call -its methods in your overridden implementations before adding your own additional -checks:: - - import cookielib - class MyCookiePolicy(cookielib.DefaultCookiePolicy): - def set_ok(self, cookie, request): - if not cookielib.DefaultCookiePolicy.set_ok(self, cookie, request): - return False - if i_dont_want_to_store_this_cookie(cookie): - return False - return True - -In addition to the features required to implement the :class:`CookiePolicy` -interface, this class allows you to block and allow domains from setting and -receiving cookies. There are also some strictness switches that allow you to -tighten up the rather loose Netscape protocol rules a little bit (at the cost of -blocking some benign cookies). - -A domain blacklist and whitelist is provided (both off by default). Only domains -not in the blacklist and present in the whitelist (if the whitelist is active) -participate in cookie setting and returning. Use the *blocked_domains* -constructor argument, and :meth:`blocked_domains` and -:meth:`set_blocked_domains` methods (and the corresponding argument and methods -for *allowed_domains*). If you set a whitelist, you can turn it off again by -setting it to :const:`None`. - -Domains in block or allow lists that do not start with a dot must equal the -cookie domain to be matched. For example, ``"example.com"`` matches a blacklist -entry of ``"example.com"``, but ``"www.example.com"`` does not. Domains that do -start with a dot are matched by more specific domains too. For example, both -``"www.example.com"`` and ``"www.coyote.example.com"`` match ``".example.com"`` -(but ``"example.com"`` itself does not). IP addresses are an exception, and -must match exactly. For example, if blocked_domains contains ``"192.168.1.2"`` -and ``".168.1.2"``, 192.168.1.2 is blocked, but 193.168.1.2 is not. - -:class:`DefaultCookiePolicy` implements the following additional methods: - - -.. method:: DefaultCookiePolicy.blocked_domains() - - Return the sequence of blocked domains (as a tuple). - - -.. method:: DefaultCookiePolicy.set_blocked_domains(blocked_domains) - - Set the sequence of blocked domains. - - -.. method:: DefaultCookiePolicy.is_blocked(domain) - - Return whether *domain* is on the blacklist for setting or receiving cookies. - - -.. method:: DefaultCookiePolicy.allowed_domains() - - Return :const:`None`, or the sequence of allowed domains (as a tuple). - - -.. method:: DefaultCookiePolicy.set_allowed_domains(allowed_domains) - - Set the sequence of allowed domains, or :const:`None`. - - -.. method:: DefaultCookiePolicy.is_not_allowed(domain) - - Return whether *domain* is not on the whitelist for setting or receiving - cookies. - -:class:`DefaultCookiePolicy` instances have the following attributes, which are -all initialised from the constructor arguments of the same name, and which may -all be assigned to. - - -.. attribute:: DefaultCookiePolicy.rfc2109_as_netscape - - If true, request that the :class:`CookieJar` instance downgrade RFC 2109 cookies - (ie. cookies received in a :mailheader:`Set-Cookie` header with a version - cookie-attribute of 1) to Netscape cookies by setting the version attribute of - the :class:`Cookie` instance to 0. The default value is :const:`None`, in which - case RFC 2109 cookies are downgraded if and only if RFC 2965 handling is turned - off. Therefore, RFC 2109 cookies are downgraded by default. - - -General strictness switches: - -.. attribute:: DefaultCookiePolicy.strict_domain - - Don't allow sites to set two-component domains with country-code top-level - domains like ``.co.uk``, ``.gov.uk``, ``.co.nz``.etc. This is far from perfect - and isn't guaranteed to work! - - -RFC 2965 protocol strictness switches: - -.. attribute:: DefaultCookiePolicy.strict_rfc2965_unverifiable - - Follow RFC 2965 rules on unverifiable transactions (usually, an unverifiable - transaction is one resulting from a redirect or a request for an image hosted on - another site). If this is false, cookies are *never* blocked on the basis of - verifiability - - -Netscape protocol strictness switches: - -.. attribute:: DefaultCookiePolicy.strict_ns_unverifiable - - apply RFC 2965 rules on unverifiable transactions even to Netscape cookies - - -.. attribute:: DefaultCookiePolicy.strict_ns_domain - - Flags indicating how strict to be with domain-matching rules for Netscape - cookies. See below for acceptable values. - - -.. attribute:: DefaultCookiePolicy.strict_ns_set_initial_dollar - - Ignore cookies in Set-Cookie: headers that have names starting with ``'$'``. - - -.. attribute:: DefaultCookiePolicy.strict_ns_set_path - - Don't allow setting cookies whose path doesn't path-match request URI. - -:attr:`strict_ns_domain` is a collection of flags. Its value is constructed by -or-ing together (for example, ``DomainStrictNoDots|DomainStrictNonDomain`` means -both flags are set). - - -.. attribute:: DefaultCookiePolicy.DomainStrictNoDots - - When setting cookies, the 'host prefix' must not contain a dot (eg. - ``www.foo.bar.com`` can't set a cookie for ``.bar.com``, because ``www.foo`` - contains a dot). - - -.. attribute:: DefaultCookiePolicy.DomainStrictNonDomain - - Cookies that did not explicitly specify a ``domain`` cookie-attribute can only - be returned to a domain equal to the domain that set the cookie (eg. - ``spam.example.com`` won't be returned cookies from ``example.com`` that had no - ``domain`` cookie-attribute). - - -.. attribute:: DefaultCookiePolicy.DomainRFC2965Match - - When setting cookies, require a full RFC 2965 domain-match. - -The following attributes are provided for convenience, and are the most useful -combinations of the above flags: - - -.. attribute:: DefaultCookiePolicy.DomainLiberal - - Equivalent to 0 (ie. all of the above Netscape domain strictness flags switched - off). - - -.. attribute:: DefaultCookiePolicy.DomainStrict - - Equivalent to ``DomainStrictNoDots|DomainStrictNonDomain``. - - -.. _cookielib-cookie-objects: - -Cookie Objects --------------- - -:class:`Cookie` instances have Python attributes roughly corresponding to the -standard cookie-attributes specified in the various cookie standards. The -correspondence is not one-to-one, because there are complicated rules for -assigning default values, because the ``max-age`` and ``expires`` -cookie-attributes contain equivalent information, and because RFC 2109 cookies -may be 'downgraded' by :mod:`cookielib` from version 1 to version 0 (Netscape) -cookies. - -Assignment to these attributes should not be necessary other than in rare -circumstances in a :class:`CookiePolicy` method. The class does not enforce -internal consistency, so you should know what you're doing if you do that. - - -.. attribute:: Cookie.version - - Integer or :const:`None`. Netscape cookies have :attr:`version` 0. RFC 2965 and - RFC 2109 cookies have a ``version`` cookie-attribute of 1. However, note that - :mod:`cookielib` may 'downgrade' RFC 2109 cookies to Netscape cookies, in which - case :attr:`version` is 0. - - -.. attribute:: Cookie.name - - Cookie name (a string). - - -.. attribute:: Cookie.value - - Cookie value (a string), or :const:`None`. - - -.. attribute:: Cookie.port - - String representing a port or a set of ports (eg. '80', or '80,8080'), or - :const:`None`. - - -.. attribute:: Cookie.path - - Cookie path (a string, eg. ``'/acme/rocket_launchers'``). - - -.. attribute:: Cookie.secure - - True if cookie should only be returned over a secure connection. - - -.. attribute:: Cookie.expires - - Integer expiry date in seconds since epoch, or :const:`None`. See also the - :meth:`is_expired` method. - - -.. attribute:: Cookie.discard - - True if this is a session cookie. - - -.. attribute:: Cookie.comment - - String comment from the server explaining the function of this cookie, or - :const:`None`. - - -.. attribute:: Cookie.comment_url - - URL linking to a comment from the server explaining the function of this cookie, - or :const:`None`. - - -.. attribute:: Cookie.rfc2109 - - True if this cookie was received as an RFC 2109 cookie (ie. the cookie - arrived in a :mailheader:`Set-Cookie` header, and the value of the Version - cookie-attribute in that header was 1). This attribute is provided because - :mod:`cookielib` may 'downgrade' RFC 2109 cookies to Netscape cookies, in - which case :attr:`version` is 0. - - -.. attribute:: Cookie.port_specified - - True if a port or set of ports was explicitly specified by the server (in the - :mailheader:`Set-Cookie` / :mailheader:`Set-Cookie2` header). - - -.. attribute:: Cookie.domain_specified - - True if a domain was explicitly specified by the server. - - -.. attribute:: Cookie.domain_initial_dot - - True if the domain explicitly specified by the server began with a dot - (``'.'``). - -Cookies may have additional non-standard cookie-attributes. These may be -accessed using the following methods: - - -.. method:: Cookie.has_nonstandard_attr(name) - - Return true if cookie has the named cookie-attribute. - - -.. method:: Cookie.get_nonstandard_attr(name, default=None) - - If cookie has the named cookie-attribute, return its value. Otherwise, return - *default*. - - -.. method:: Cookie.set_nonstandard_attr(name, value) - - Set the value of the named cookie-attribute. - -The :class:`Cookie` class also defines the following method: - - -.. method:: Cookie.is_expired([now=:const:`None`]) - - True if cookie has passed the time at which the server requested it should - expire. If *now* is given (in seconds since the epoch), return whether the - cookie has expired at the specified time. - - -.. _cookielib-examples: - -Examples --------- - -The first example shows the most common usage of :mod:`cookielib`:: - - import cookielib, urllib2 - cj = cookielib.CookieJar() - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) - r = opener.open("http://example.com/") - -This example illustrates how to open a URL using your Netscape, Mozilla, or Lynx -cookies (assumes Unix/Netscape convention for location of the cookies file):: - - import os, cookielib, urllib2 - cj = cookielib.MozillaCookieJar() - cj.load(os.path.join(os.environ["HOME"], ".netscape/cookies.txt")) - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) - r = opener.open("http://example.com/") - -The next example illustrates the use of :class:`DefaultCookiePolicy`. Turn on -RFC 2965 cookies, be more strict about domains when setting and returning -Netscape cookies, and block some domains from setting cookies or having them -returned:: - - import urllib2 - from cookielib import CookieJar, DefaultCookiePolicy - policy = DefaultCookiePolicy( - rfc2965=True, strict_ns_domain=Policy.DomainStrict, - blocked_domains=["ads.net", ".ads.net"]) - cj = CookieJar(policy) - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) - r = opener.open("http://example.com/") - diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst new file mode 100644 index 0000000..6903b6a --- /dev/null +++ b/Doc/library/http.client.rst @@ -0,0 +1,498 @@ +:mod:`http.client` --- HTTP protocol client +=========================================== + +.. module:: http.client + :synopsis: HTTP and HTTPS protocol client (requires sockets). + + +.. index:: + pair: HTTP; protocol + single: HTTP; http.client (standard module) + +.. index:: module: urllib + +This module defines classes which implement the client side of the HTTP and +HTTPS protocols. It is normally not used directly --- the module :mod:`urllib` +uses it to handle URLs that use HTTP and HTTPS. + +.. note:: + + HTTPS support is only available if the :mod:`socket` module was compiled with + SSL support. + +The module provides the following classes: + + +.. class:: HTTPConnection(host[, port[, strict[, timeout]]]) + + An :class:`HTTPConnection` instance represents one transaction with an HTTP + server. It should be instantiated passing it a host and optional port + number. If no port number is passed, the port is extracted from the host + string if it has the form ``host:port``, else the default HTTP port (80) is + used. When True, the optional parameter *strict* causes ``BadStatusLine`` to + be raised if the status line can't be parsed as a valid HTTP/1.0 or 1.1 + status line. If the optional *timeout* parameter is given, blocking + operations (like connection attempts) will timeout after that many seconds + (if it is not given or ``None``, the global default timeout setting is used). + + For example, the following calls all create instances that connect to the server + at the same host and port:: + + >>> h1 = http.client.HTTPConnection('www.cwi.nl') + >>> h2 = http.client.HTTPConnection('www.cwi.nl:80') + >>> h3 = http.client.HTTPConnection('www.cwi.nl', 80) + >>> h3 = http.client.HTTPConnection('www.cwi.nl', 80, timeout=10) + + +.. class:: HTTPSConnection(host[, port[, key_file[, cert_file[, strict[, timeout]]]]]) + + A subclass of :class:`HTTPConnection` that uses SSL for communication with + secure servers. Default port is ``443``. *key_file* is the name of a PEM + formatted file that contains your private key. *cert_file* is a PEM formatted + certificate chain file. + + .. warning:: + + This does not do any certificate verification! + + +.. class:: HTTPResponse(sock[, debuglevel=0][, strict=0]) + + Class whose instances are returned upon successful connection. Not instantiated + directly by user. + + +The following exceptions are raised as appropriate: + + +.. exception:: HTTPException + + The base class of the other exceptions in this module. It is a subclass of + :exc:`Exception`. + + +.. exception:: NotConnected + + A subclass of :exc:`HTTPException`. + + +.. exception:: InvalidURL + + A subclass of :exc:`HTTPException`, raised if a port is given and is either + non-numeric or empty. + + +.. exception:: UnknownProtocol + + A subclass of :exc:`HTTPException`. + + +.. exception:: UnknownTransferEncoding + + A subclass of :exc:`HTTPException`. + + +.. exception:: UnimplementedFileMode + + A subclass of :exc:`HTTPException`. + + +.. exception:: IncompleteRead + + A subclass of :exc:`HTTPException`. + + +.. exception:: ImproperConnectionState + + A subclass of :exc:`HTTPException`. + + +.. exception:: CannotSendRequest + + A subclass of :exc:`ImproperConnectionState`. + + +.. exception:: CannotSendHeader + + A subclass of :exc:`ImproperConnectionState`. + + +.. exception:: ResponseNotReady + + A subclass of :exc:`ImproperConnectionState`. + + +.. exception:: BadStatusLine + + A subclass of :exc:`HTTPException`. Raised if a server responds with a HTTP + status code that we don't understand. + +The constants defined in this module are: + + +.. data:: HTTP_PORT + + The default port for the HTTP protocol (always ``80``). + + +.. data:: HTTPS_PORT + + The default port for the HTTPS protocol (always ``443``). + +and also the following constants for integer status codes: + ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| Constant | Value | Definition | ++==========================================+=========+=======================================================================+ +| :const:`CONTINUE` | ``100`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.1.1 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`SWITCHING_PROTOCOLS` | ``101`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.1.2 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`PROCESSING` | ``102`` | WEBDAV, `RFC 2518, Section 10.1 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`OK` | ``200`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.2.1 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`CREATED` | ``201`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.2.2 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`ACCEPTED` | ``202`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.2.3 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NON_AUTHORITATIVE_INFORMATION` | ``203`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.2.4 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NO_CONTENT` | ``204`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.2.5 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`RESET_CONTENT` | ``205`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.2.6 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`PARTIAL_CONTENT` | ``206`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.2.7 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`MULTI_STATUS` | ``207`` | WEBDAV `RFC 2518, Section 10.2 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`IM_USED` | ``226`` | Delta encoding in HTTP, | +| | | :rfc:`3229`, Section 10.4.1 | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`MULTIPLE_CHOICES` | ``300`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.3.1 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`MOVED_PERMANENTLY` | ``301`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.3.2 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`FOUND` | ``302`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.3.3 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`SEE_OTHER` | ``303`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.3.4 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NOT_MODIFIED` | ``304`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.3.5 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`USE_PROXY` | ``305`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.3.6 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`TEMPORARY_REDIRECT` | ``307`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.3.8 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`BAD_REQUEST` | ``400`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.1 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`UNAUTHORIZED` | ``401`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.2 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`PAYMENT_REQUIRED` | ``402`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.3 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`FORBIDDEN` | ``403`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.4 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NOT_FOUND` | ``404`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.5 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`METHOD_NOT_ALLOWED` | ``405`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.6 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NOT_ACCEPTABLE` | ``406`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.7 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`PROXY_AUTHENTICATION_REQUIRED` | ``407`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.8 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`REQUEST_TIMEOUT` | ``408`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.9 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`CONFLICT` | ``409`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.10 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`GONE` | ``410`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.11 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`LENGTH_REQUIRED` | ``411`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.12 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`PRECONDITION_FAILED` | ``412`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.13 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`REQUEST_ENTITY_TOO_LARGE` | ``413`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.14 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`REQUEST_URI_TOO_LONG` | ``414`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.15 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`UNSUPPORTED_MEDIA_TYPE` | ``415`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.16 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`REQUESTED_RANGE_NOT_SATISFIABLE` | ``416`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.17 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`EXPECTATION_FAILED` | ``417`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.4.18 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`UNPROCESSABLE_ENTITY` | ``422`` | WEBDAV, `RFC 2518, Section 10.3 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`LOCKED` | ``423`` | WEBDAV `RFC 2518, Section 10.4 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`FAILED_DEPENDENCY` | ``424`` | WEBDAV, `RFC 2518, Section 10.5 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, | +| | | :rfc:`2817`, Section 6 | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.5.1 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NOT_IMPLEMENTED` | ``501`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.5.2 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`BAD_GATEWAY` | ``502`` | HTTP/1.1 `RFC 2616, Section | +| | | 10.5.3 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`SERVICE_UNAVAILABLE` | ``503`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.5.4 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`GATEWAY_TIMEOUT` | ``504`` | HTTP/1.1 `RFC 2616, Section | +| | | 10.5.5 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`HTTP_VERSION_NOT_SUPPORTED` | ``505`` | HTTP/1.1, `RFC 2616, Section | +| | | 10.5.6 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`INSUFFICIENT_STORAGE` | ``507`` | WEBDAV, `RFC 2518, Section 10.6 | +| | | `_ | ++------------------------------------------+---------+-----------------------------------------------------------------------+ +| :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, | +| | | :rfc:`2774`, Section 7 | ++------------------------------------------+---------+-----------------------------------------------------------------------+ + + +.. data:: responses + + This dictionary maps the HTTP 1.1 status codes to the W3C names. + + Example: ``http.client.responses[http.client.NOT_FOUND]`` is ``'Not Found'``. + + +.. _httpconnection-objects: + +HTTPConnection Objects +---------------------- + +:class:`HTTPConnection` instances have the following methods: + + +.. method:: HTTPConnection.request(method, url[, body[, headers]]) + + This will send a request to the server using the HTTP request method *method* + and the selector *url*. If the *body* argument is present, it should be a + string of data to send after the headers are finished. Alternatively, it may + be an open file object, in which case the contents of the file is sent; this + file object should support ``fileno()`` and ``read()`` methods. The header + Content-Length is automatically set to the correct value. The *headers* + argument should be a mapping of extra HTTP headers to send with the request. + + +.. method:: HTTPConnection.getresponse() + + Should be called after a request is sent to get the response from the server. + Returns an :class:`HTTPResponse` instance. + + .. note:: + + Note that you must have read the whole response before you can send a new + request to the server. + + +.. method:: HTTPConnection.set_debuglevel(level) + + Set the debugging level (the amount of debugging output printed). The default + debug level is ``0``, meaning no debugging output is printed. + + +.. method:: HTTPConnection.connect() + + Connect to the server specified when the object was created. + + +.. method:: HTTPConnection.close() + + Close the connection to the server. + +As an alternative to using the :meth:`request` method described above, you can +also send your request step by step, by using the four functions below. + + +.. method:: HTTPConnection.putrequest(request, selector[, skip_host[, skip_accept_encoding]]) + + This should be the first call after the connection to the server has been made. + It sends a line to the server consisting of the *request* string, the *selector* + string, and the HTTP version (``HTTP/1.1``). To disable automatic sending of + ``Host:`` or ``Accept-Encoding:`` headers (for example to accept additional + content encodings), specify *skip_host* or *skip_accept_encoding* with non-False + values. + + +.. method:: HTTPConnection.putheader(header, argument[, ...]) + + Send an :rfc:`822`\ -style header to the server. It sends a line to the server + consisting of the header, a colon and a space, and the first argument. If more + arguments are given, continuation lines are sent, each consisting of a tab and + an argument. + + +.. method:: HTTPConnection.endheaders() + + Send a blank line to the server, signalling the end of the headers. + + +.. method:: HTTPConnection.send(data) + + Send data to the server. This should be used directly only after the + :meth:`endheaders` method has been called and before :meth:`getresponse` is + called. + + +.. _httpresponse-objects: + +HTTPResponse Objects +-------------------- + +:class:`HTTPResponse` instances have the following methods and attributes: + + +.. method:: HTTPResponse.read([amt]) + + Reads and returns the response body, or up to the next *amt* bytes. + + +.. method:: HTTPResponse.getheader(name[, default]) + + Get the contents of the header *name*, or *default* if there is no matching + header. + + +.. method:: HTTPResponse.getheaders() + + Return a list of (header, value) tuples. + + +.. attribute:: HTTPResponse.msg + + A :class:`mimetools.Message` instance containing the response headers. + + +.. attribute:: HTTPResponse.version + + HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. + + +.. attribute:: HTTPResponse.status + + Status code returned by server. + + +.. attribute:: HTTPResponse.reason + + Reason phrase returned by server. + + +Examples +-------- + +Here is an example session that uses the ``GET`` method:: + + >>> import http.client + >>> conn = http.client.HTTPConnection("www.python.org") + >>> conn.request("GET", "/index.html") + >>> r1 = conn.getresponse() + >>> print(r1.status, r1.reason) + 200 OK + >>> data1 = r1.read() + >>> conn.request("GET", "/parrot.spam") + >>> r2 = conn.getresponse() + >>> print(r2.status, r2.reason) + 404 Not Found + >>> data2 = r2.read() + >>> conn.close() + +Here is an example session that shows how to ``POST`` requests:: + + >>> import http.client, urllib + >>> params = urllib.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0}) + >>> headers = {"Content-type": "application/x-www-form-urlencoded", + ... "Accept": "text/plain"} + >>> conn = http.client.HTTPConnection("musi-cal.mojam.com:80") + >>> conn.request("POST", "/cgi-bin/query", params, headers) + >>> response = conn.getresponse() + >>> print(response.status, response.reason) + 200 OK + >>> data = response.read() + >>> conn.close() + diff --git a/Doc/library/http.cookiejar.rst b/Doc/library/http.cookiejar.rst new file mode 100644 index 0000000..250abde --- /dev/null +++ b/Doc/library/http.cookiejar.rst @@ -0,0 +1,746 @@ +:mod:`http.cookiejar` --- Cookie handling for HTTP clients +========================================================== + +.. module:: http.cookiejar + :synopsis: Classes for automatic handling of HTTP cookies. +.. moduleauthor:: John J. Lee +.. sectionauthor:: John J. Lee + + +The :mod:`http.cookiejar` module defines classes for automatic handling of HTTP +cookies. It is useful for accessing web sites that require small pieces of data +-- :dfn:`cookies` -- to be set on the client machine by an HTTP response from a +web server, and then returned to the server in later HTTP requests. + +Both the regular Netscape cookie protocol and the protocol defined by +:rfc:`2965` are handled. RFC 2965 handling is switched off by default. +:rfc:`2109` cookies are parsed as Netscape cookies and subsequently treated +either as Netscape or RFC 2965 cookies according to the 'policy' in effect. +Note that the great majority of cookies on the Internet are Netscape cookies. +:mod:`http.cookiejar` attempts to follow the de-facto Netscape cookie protocol (which +differs substantially from that set out in the original Netscape specification), +including taking note of the ``max-age`` and ``port`` cookie-attributes +introduced with RFC 2965. + +.. note:: + + The various named parameters found in :mailheader:`Set-Cookie` and + :mailheader:`Set-Cookie2` headers (eg. ``domain`` and ``expires``) are + conventionally referred to as :dfn:`attributes`. To distinguish them from + Python attributes, the documentation for this module uses the term + :dfn:`cookie-attribute` instead. + + +The module defines the following exception: + + +.. exception:: LoadError + + Instances of :class:`FileCookieJar` raise this exception on failure to load + cookies from a file. :exc:`LoadError` is a subclass of :exc:`IOError`. + + +The following classes are provided: + + +.. class:: CookieJar(policy=None) + + *policy* is an object implementing the :class:`CookiePolicy` interface. + + The :class:`CookieJar` class stores HTTP cookies. It extracts cookies from HTTP + requests, and returns them in HTTP responses. :class:`CookieJar` instances + automatically expire contained cookies when necessary. Subclasses are also + responsible for storing and retrieving cookies from a file or database. + + +.. class:: FileCookieJar(filename, delayload=None, policy=None) + + *policy* is an object implementing the :class:`CookiePolicy` interface. For the + other arguments, see the documentation for the corresponding attributes. + + A :class:`CookieJar` which can load cookies from, and perhaps save cookies to, a + file on disk. Cookies are **NOT** loaded from the named file until either the + :meth:`load` or :meth:`revert` method is called. Subclasses of this class are + documented in section :ref:`file-cookie-jar-classes`. + + +.. class:: CookiePolicy() + + This class is responsible for deciding whether each cookie should be accepted + from / returned to the server. + + +.. class:: DefaultCookiePolicy( blocked_domains=None, allowed_domains=None, netscape=True, rfc2965=False, rfc2109_as_netscape=None, hide_cookie2=False, strict_domain=False, strict_rfc2965_unverifiable=True, strict_ns_unverifiable=False, strict_ns_domain=DefaultCookiePolicy.DomainLiberal, strict_ns_set_initial_dollar=False, strict_ns_set_path=False ) + + Constructor arguments should be passed as keyword arguments only. + *blocked_domains* is a sequence of domain names that we never accept cookies + from, nor return cookies to. *allowed_domains* if not :const:`None`, this is a + sequence of the only domains for which we accept and return cookies. For all + other arguments, see the documentation for :class:`CookiePolicy` and + :class:`DefaultCookiePolicy` objects. + + :class:`DefaultCookiePolicy` implements the standard accept / reject rules for + Netscape and RFC 2965 cookies. By default, RFC 2109 cookies (ie. cookies + received in a :mailheader:`Set-Cookie` header with a version cookie-attribute of + 1) are treated according to the RFC 2965 rules. However, if RFC 2965 handling + is turned off or :attr:`rfc2109_as_netscape` is True, RFC 2109 cookies are + 'downgraded' by the :class:`CookieJar` instance to Netscape cookies, by + setting the :attr:`version` attribute of the :class:`Cookie` instance to 0. + :class:`DefaultCookiePolicy` also provides some parameters to allow some + fine-tuning of policy. + + +.. class:: Cookie() + + This class represents Netscape, RFC 2109 and RFC 2965 cookies. It is not + expected that users of :mod:`http.cookiejar` construct their own :class:`Cookie` + instances. Instead, if necessary, call :meth:`make_cookies` on a + :class:`CookieJar` instance. + + +.. seealso:: + + Module :mod:`urllib2` + URL opening with automatic cookie handling. + + Module :mod:`http.cookies` + HTTP cookie classes, principally useful for server-side code. The + :mod:`http.cookiejar` and :mod:`http.cookies` modules do not depend on each + other. + + http://wwwsearch.sf.net/ClientCookie/ + Extensions to this module, including a class for reading Microsoft Internet + Explorer cookies on Windows. + + http://wp.netscape.com/newsref/std/cookie_spec.html + The specification of the original Netscape cookie protocol. Though this is + still the dominant protocol, the 'Netscape cookie protocol' implemented by all + the major browsers (and :mod:`http.cookiejar`) only bears a passing resemblance to + the one sketched out in ``cookie_spec.html``. + + :rfc:`2109` - HTTP State Management Mechanism + Obsoleted by RFC 2965. Uses :mailheader:`Set-Cookie` with version=1. + + :rfc:`2965` - HTTP State Management Mechanism + The Netscape protocol with the bugs fixed. Uses :mailheader:`Set-Cookie2` in + place of :mailheader:`Set-Cookie`. Not widely used. + + http://kristol.org/cookie/errata.html + Unfinished errata to RFC 2965. + + :rfc:`2964` - Use of HTTP State Management + +.. _cookie-jar-objects: + +CookieJar and FileCookieJar Objects +----------------------------------- + +:class:`CookieJar` objects support the :term:`iterator` protocol for iterating over +contained :class:`Cookie` objects. + +:class:`CookieJar` has the following methods: + + +.. method:: CookieJar.add_cookie_header(request) + + Add correct :mailheader:`Cookie` header to *request*. + + If policy allows (ie. the :attr:`rfc2965` and :attr:`hide_cookie2` attributes of + the :class:`CookieJar`'s :class:`CookiePolicy` instance are true and false + respectively), the :mailheader:`Cookie2` header is also added when appropriate. + + The *request* object (usually a :class:`urllib2.Request` instance) must support + the methods :meth:`get_full_url`, :meth:`get_host`, :meth:`get_type`, + :meth:`unverifiable`, :meth:`get_origin_req_host`, :meth:`has_header`, + :meth:`get_header`, :meth:`header_items`, and :meth:`add_unredirected_header`,as + documented by :mod:`urllib2`. + + +.. method:: CookieJar.extract_cookies(response, request) + + Extract cookies from HTTP *response* and store them in the :class:`CookieJar`, + where allowed by policy. + + The :class:`CookieJar` will look for allowable :mailheader:`Set-Cookie` and + :mailheader:`Set-Cookie2` headers in the *response* argument, and store cookies + as appropriate (subject to the :meth:`CookiePolicy.set_ok` method's approval). + + The *response* object (usually the result of a call to :meth:`urllib2.urlopen`, + or similar) should support an :meth:`info` method, which returns an object with + a :meth:`getallmatchingheaders` method (usually a :class:`mimetools.Message` + instance). + + The *request* object (usually a :class:`urllib2.Request` instance) must support + the methods :meth:`get_full_url`, :meth:`get_host`, :meth:`unverifiable`, and + :meth:`get_origin_req_host`, as documented by :mod:`urllib2`. The request is + used to set default values for cookie-attributes as well as for checking that + the cookie is allowed to be set. + + +.. method:: CookieJar.set_policy(policy) + + Set the :class:`CookiePolicy` instance to be used. + + +.. method:: CookieJar.make_cookies(response, request) + + Return sequence of :class:`Cookie` objects extracted from *response* object. + + See the documentation for :meth:`extract_cookies` for the interfaces required of + the *response* and *request* arguments. + + +.. method:: CookieJar.set_cookie_if_ok(cookie, request) + + Set a :class:`Cookie` if policy says it's OK to do so. + + +.. method:: CookieJar.set_cookie(cookie) + + Set a :class:`Cookie`, without checking with policy to see whether or not it + should be set. + + +.. method:: CookieJar.clear([domain[, path[, name]]]) + + Clear some cookies. + + If invoked without arguments, clear all cookies. If given a single argument, + only cookies belonging to that *domain* will be removed. If given two arguments, + cookies belonging to the specified *domain* and URL *path* are removed. If + given three arguments, then the cookie with the specified *domain*, *path* and + *name* is removed. + + Raises :exc:`KeyError` if no matching cookie exists. + + +.. method:: CookieJar.clear_session_cookies() + + Discard all session cookies. + + Discards all contained cookies that have a true :attr:`discard` attribute + (usually because they had either no ``max-age`` or ``expires`` cookie-attribute, + or an explicit ``discard`` cookie-attribute). For interactive browsers, the end + of a session usually corresponds to closing the browser window. + + Note that the :meth:`save` method won't save session cookies anyway, unless you + ask otherwise by passing a true *ignore_discard* argument. + +:class:`FileCookieJar` implements the following additional methods: + + +.. method:: FileCookieJar.save(filename=None, ignore_discard=False, ignore_expires=False) + + Save cookies to a file. + + This base class raises :exc:`NotImplementedError`. Subclasses may leave this + method unimplemented. + + *filename* is the name of file in which to save cookies. If *filename* is not + specified, :attr:`self.filename` is used (whose default is the value passed to + the constructor, if any); if :attr:`self.filename` is :const:`None`, + :exc:`ValueError` is raised. + + *ignore_discard*: save even cookies set to be discarded. *ignore_expires*: save + even cookies that have expired + + The file is overwritten if it already exists, thus wiping all the cookies it + contains. Saved cookies can be restored later using the :meth:`load` or + :meth:`revert` methods. + + +.. method:: FileCookieJar.load(filename=None, ignore_discard=False, ignore_expires=False) + + Load cookies from a file. + + Old cookies are kept unless overwritten by newly loaded ones. + + Arguments are as for :meth:`save`. + + The named file must be in the format understood by the class, or + :exc:`LoadError` will be raised. Also, :exc:`IOError` may be raised, for + example if the file does not exist. + + +.. method:: FileCookieJar.revert(filename=None, ignore_discard=False, ignore_expires=False) + + Clear all cookies and reload cookies from a saved file. + + :meth:`revert` can raise the same exceptions as :meth:`load`. If there is a + failure, the object's state will not be altered. + +:class:`FileCookieJar` instances have the following public attributes: + + +.. attribute:: FileCookieJar.filename + + Filename of default file in which to keep cookies. This attribute may be + assigned to. + + +.. attribute:: FileCookieJar.delayload + + If true, load cookies lazily from disk. This attribute should not be assigned + to. This is only a hint, since this only affects performance, not behaviour + (unless the cookies on disk are changing). A :class:`CookieJar` object may + ignore it. None of the :class:`FileCookieJar` classes included in the standard + library lazily loads cookies. + + +.. _file-cookie-jar-classes: + +FileCookieJar subclasses and co-operation with web browsers +----------------------------------------------------------- + +The following :class:`CookieJar` subclasses are provided for reading and writing +. Further :class:`CookieJar` subclasses, including one that reads Microsoft +Internet Explorer cookies, are available at +http://wwwsearch.sf.net/ClientCookie/. + + +.. class:: MozillaCookieJar(filename, delayload=None, policy=None) + + A :class:`FileCookieJar` that can load from and save cookies to disk in the + Mozilla ``cookies.txt`` file format (which is also used by the Lynx and Netscape + browsers). + + .. note:: + + This loses information about RFC 2965 cookies, and also about newer or + non-standard cookie-attributes such as ``port``. + + .. warning:: + + Back up your cookies before saving if you have cookies whose loss / corruption + would be inconvenient (there are some subtleties which may lead to slight + changes in the file over a load / save round-trip). + + Also note that cookies saved while Mozilla is running will get clobbered by + Mozilla. + + +.. class:: LWPCookieJar(filename, delayload=None, policy=None) + + A :class:`FileCookieJar` that can load from and save cookies to disk in format + compatible with the libwww-perl library's ``Set-Cookie3`` file format. This is + convenient if you want to store cookies in a human-readable file. + + +.. _cookie-policy-objects: + +CookiePolicy Objects +-------------------- + +Objects implementing the :class:`CookiePolicy` interface have the following +methods: + + +.. method:: CookiePolicy.set_ok(cookie, request) + + Return boolean value indicating whether cookie should be accepted from server. + + *cookie* is a :class:`Cookie` instance. *request* is an object + implementing the interface defined by the documentation for + :meth:`CookieJar.extract_cookies`. + + +.. method:: CookiePolicy.return_ok(cookie, request) + + Return boolean value indicating whether cookie should be returned to server. + + *cookie* is a :class:`Cookie` instance. *request* is an object + implementing the interface defined by the documentation for + :meth:`CookieJar.add_cookie_header`. + + +.. method:: CookiePolicy.domain_return_ok(domain, request) + + Return false if cookies should not be returned, given cookie domain. + + This method is an optimization. It removes the need for checking every cookie + with a particular domain (which might involve reading many files). Returning + true from :meth:`domain_return_ok` and :meth:`path_return_ok` leaves all the + work to :meth:`return_ok`. + + If :meth:`domain_return_ok` returns true for the cookie domain, + :meth:`path_return_ok` is called for the cookie path. Otherwise, + :meth:`path_return_ok` and :meth:`return_ok` are never called for that cookie + domain. If :meth:`path_return_ok` returns true, :meth:`return_ok` is called + with the :class:`Cookie` object itself for a full check. Otherwise, + :meth:`return_ok` is never called for that cookie path. + + Note that :meth:`domain_return_ok` is called for every *cookie* domain, not just + for the *request* domain. For example, the function might be called with both + ``".example.com"`` and ``"www.example.com"`` if the request domain is + ``"www.example.com"``. The same goes for :meth:`path_return_ok`. + + The *request* argument is as documented for :meth:`return_ok`. + + +.. method:: CookiePolicy.path_return_ok(path, request) + + Return false if cookies should not be returned, given cookie path. + + See the documentation for :meth:`domain_return_ok`. + +In addition to implementing the methods above, implementations of the +:class:`CookiePolicy` interface must also supply the following attributes, +indicating which protocols should be used, and how. All of these attributes may +be assigned to. + + +.. attribute:: CookiePolicy.netscape + + Implement Netscape protocol. + + +.. attribute:: CookiePolicy.rfc2965 + + Implement RFC 2965 protocol. + + +.. attribute:: CookiePolicy.hide_cookie2 + + Don't add :mailheader:`Cookie2` header to requests (the presence of this header + indicates to the server that we understand RFC 2965 cookies). + +The most useful way to define a :class:`CookiePolicy` class is by subclassing +from :class:`DefaultCookiePolicy` and overriding some or all of the methods +above. :class:`CookiePolicy` itself may be used as a 'null policy' to allow +setting and receiving any and all cookies (this is unlikely to be useful). + + +.. _default-cookie-policy-objects: + +DefaultCookiePolicy Objects +--------------------------- + +Implements the standard rules for accepting and returning cookies. + +Both RFC 2965 and Netscape cookies are covered. RFC 2965 handling is switched +off by default. + +The easiest way to provide your own policy is to override this class and call +its methods in your overridden implementations before adding your own additional +checks:: + + import http.cookiejar + class MyCookiePolicy(http.cookiejar.DefaultCookiePolicy): + def set_ok(self, cookie, request): + if not http.cookiejar.DefaultCookiePolicy.set_ok(self, cookie, request): + return False + if i_dont_want_to_store_this_cookie(cookie): + return False + return True + +In addition to the features required to implement the :class:`CookiePolicy` +interface, this class allows you to block and allow domains from setting and +receiving cookies. There are also some strictness switches that allow you to +tighten up the rather loose Netscape protocol rules a little bit (at the cost of +blocking some benign cookies). + +A domain blacklist and whitelist is provided (both off by default). Only domains +not in the blacklist and present in the whitelist (if the whitelist is active) +participate in cookie setting and returning. Use the *blocked_domains* +constructor argument, and :meth:`blocked_domains` and +:meth:`set_blocked_domains` methods (and the corresponding argument and methods +for *allowed_domains*). If you set a whitelist, you can turn it off again by +setting it to :const:`None`. + +Domains in block or allow lists that do not start with a dot must equal the +cookie domain to be matched. For example, ``"example.com"`` matches a blacklist +entry of ``"example.com"``, but ``"www.example.com"`` does not. Domains that do +start with a dot are matched by more specific domains too. For example, both +``"www.example.com"`` and ``"www.coyote.example.com"`` match ``".example.com"`` +(but ``"example.com"`` itself does not). IP addresses are an exception, and +must match exactly. For example, if blocked_domains contains ``"192.168.1.2"`` +and ``".168.1.2"``, 192.168.1.2 is blocked, but 193.168.1.2 is not. + +:class:`DefaultCookiePolicy` implements the following additional methods: + + +.. method:: DefaultCookiePolicy.blocked_domains() + + Return the sequence of blocked domains (as a tuple). + + +.. method:: DefaultCookiePolicy.set_blocked_domains(blocked_domains) + + Set the sequence of blocked domains. + + +.. method:: DefaultCookiePolicy.is_blocked(domain) + + Return whether *domain* is on the blacklist for setting or receiving cookies. + + +.. method:: DefaultCookiePolicy.allowed_domains() + + Return :const:`None`, or the sequence of allowed domains (as a tuple). + + +.. method:: DefaultCookiePolicy.set_allowed_domains(allowed_domains) + + Set the sequence of allowed domains, or :const:`None`. + + +.. method:: DefaultCookiePolicy.is_not_allowed(domain) + + Return whether *domain* is not on the whitelist for setting or receiving + cookies. + +:class:`DefaultCookiePolicy` instances have the following attributes, which are +all initialised from the constructor arguments of the same name, and which may +all be assigned to. + + +.. attribute:: DefaultCookiePolicy.rfc2109_as_netscape + + If true, request that the :class:`CookieJar` instance downgrade RFC 2109 cookies + (ie. cookies received in a :mailheader:`Set-Cookie` header with a version + cookie-attribute of 1) to Netscape cookies by setting the version attribute of + the :class:`Cookie` instance to 0. The default value is :const:`None`, in which + case RFC 2109 cookies are downgraded if and only if RFC 2965 handling is turned + off. Therefore, RFC 2109 cookies are downgraded by default. + + +General strictness switches: + +.. attribute:: DefaultCookiePolicy.strict_domain + + Don't allow sites to set two-component domains with country-code top-level + domains like ``.co.uk``, ``.gov.uk``, ``.co.nz``.etc. This is far from perfect + and isn't guaranteed to work! + + +RFC 2965 protocol strictness switches: + +.. attribute:: DefaultCookiePolicy.strict_rfc2965_unverifiable + + Follow RFC 2965 rules on unverifiable transactions (usually, an unverifiable + transaction is one resulting from a redirect or a request for an image hosted on + another site). If this is false, cookies are *never* blocked on the basis of + verifiability + + +Netscape protocol strictness switches: + +.. attribute:: DefaultCookiePolicy.strict_ns_unverifiable + + apply RFC 2965 rules on unverifiable transactions even to Netscape cookies + + +.. attribute:: DefaultCookiePolicy.strict_ns_domain + + Flags indicating how strict to be with domain-matching rules for Netscape + cookies. See below for acceptable values. + + +.. attribute:: DefaultCookiePolicy.strict_ns_set_initial_dollar + + Ignore cookies in Set-Cookie: headers that have names starting with ``'$'``. + + +.. attribute:: DefaultCookiePolicy.strict_ns_set_path + + Don't allow setting cookies whose path doesn't path-match request URI. + +:attr:`strict_ns_domain` is a collection of flags. Its value is constructed by +or-ing together (for example, ``DomainStrictNoDots|DomainStrictNonDomain`` means +both flags are set). + + +.. attribute:: DefaultCookiePolicy.DomainStrictNoDots + + When setting cookies, the 'host prefix' must not contain a dot (eg. + ``www.foo.bar.com`` can't set a cookie for ``.bar.com``, because ``www.foo`` + contains a dot). + + +.. attribute:: DefaultCookiePolicy.DomainStrictNonDomain + + Cookies that did not explicitly specify a ``domain`` cookie-attribute can only + be returned to a domain equal to the domain that set the cookie (eg. + ``spam.example.com`` won't be returned cookies from ``example.com`` that had no + ``domain`` cookie-attribute). + + +.. attribute:: DefaultCookiePolicy.DomainRFC2965Match + + When setting cookies, require a full RFC 2965 domain-match. + +The following attributes are provided for convenience, and are the most useful +combinations of the above flags: + + +.. attribute:: DefaultCookiePolicy.DomainLiberal + + Equivalent to 0 (ie. all of the above Netscape domain strictness flags switched + off). + + +.. attribute:: DefaultCookiePolicy.DomainStrict + + Equivalent to ``DomainStrictNoDots|DomainStrictNonDomain``. + + +Cookie Objects +-------------- + +:class:`Cookie` instances have Python attributes roughly corresponding to the +standard cookie-attributes specified in the various cookie standards. The +correspondence is not one-to-one, because there are complicated rules for +assigning default values, because the ``max-age`` and ``expires`` +cookie-attributes contain equivalent information, and because RFC 2109 cookies +may be 'downgraded' by :mod:`http.cookiejar` from version 1 to version 0 (Netscape) +cookies. + +Assignment to these attributes should not be necessary other than in rare +circumstances in a :class:`CookiePolicy` method. The class does not enforce +internal consistency, so you should know what you're doing if you do that. + + +.. attribute:: Cookie.version + + Integer or :const:`None`. Netscape cookies have :attr:`version` 0. RFC 2965 and + RFC 2109 cookies have a ``version`` cookie-attribute of 1. However, note that + :mod:`http.cookiejar` may 'downgrade' RFC 2109 cookies to Netscape cookies, in which + case :attr:`version` is 0. + + +.. attribute:: Cookie.name + + Cookie name (a string). + + +.. attribute:: Cookie.value + + Cookie value (a string), or :const:`None`. + + +.. attribute:: Cookie.port + + String representing a port or a set of ports (eg. '80', or '80,8080'), or + :const:`None`. + + +.. attribute:: Cookie.path + + Cookie path (a string, eg. ``'/acme/rocket_launchers'``). + + +.. attribute:: Cookie.secure + + True if cookie should only be returned over a secure connection. + + +.. attribute:: Cookie.expires + + Integer expiry date in seconds since epoch, or :const:`None`. See also the + :meth:`is_expired` method. + + +.. attribute:: Cookie.discard + + True if this is a session cookie. + + +.. attribute:: Cookie.comment + + String comment from the server explaining the function of this cookie, or + :const:`None`. + + +.. attribute:: Cookie.comment_url + + URL linking to a comment from the server explaining the function of this cookie, + or :const:`None`. + + +.. attribute:: Cookie.rfc2109 + + True if this cookie was received as an RFC 2109 cookie (ie. the cookie + arrived in a :mailheader:`Set-Cookie` header, and the value of the Version + cookie-attribute in that header was 1). This attribute is provided because + :mod:`http.cookiejar` may 'downgrade' RFC 2109 cookies to Netscape cookies, in + which case :attr:`version` is 0. + + +.. attribute:: Cookie.port_specified + + True if a port or set of ports was explicitly specified by the server (in the + :mailheader:`Set-Cookie` / :mailheader:`Set-Cookie2` header). + + +.. attribute:: Cookie.domain_specified + + True if a domain was explicitly specified by the server. + + +.. attribute:: Cookie.domain_initial_dot + + True if the domain explicitly specified by the server began with a dot + (``'.'``). + +Cookies may have additional non-standard cookie-attributes. These may be +accessed using the following methods: + + +.. method:: Cookie.has_nonstandard_attr(name) + + Return true if cookie has the named cookie-attribute. + + +.. method:: Cookie.get_nonstandard_attr(name, default=None) + + If cookie has the named cookie-attribute, return its value. Otherwise, return + *default*. + + +.. method:: Cookie.set_nonstandard_attr(name, value) + + Set the value of the named cookie-attribute. + +The :class:`Cookie` class also defines the following method: + + +.. method:: Cookie.is_expired([now=:const:`None`]) + + True if cookie has passed the time at which the server requested it should + expire. If *now* is given (in seconds since the epoch), return whether the + cookie has expired at the specified time. + + +Examples +-------- + +The first example shows the most common usage of :mod:`http.cookiejar`:: + + import http.cookiejar, urllib2 + cj = http.cookiejar.CookieJar() + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) + r = opener.open("http://example.com/") + +This example illustrates how to open a URL using your Netscape, Mozilla, or Lynx +cookies (assumes Unix/Netscape convention for location of the cookies file):: + + import os, http.cookiejar, urllib2 + cj = http.cookiejar.MozillaCookieJar() + cj.load(os.path.join(os.environ["HOME"], ".netscape/cookies.txt")) + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) + r = opener.open("http://example.com/") + +The next example illustrates the use of :class:`DefaultCookiePolicy`. Turn on +RFC 2965 cookies, be more strict about domains when setting and returning +Netscape cookies, and block some domains from setting cookies or having them +returned:: + + import urllib2 + from http.cookiejar import CookieJar, DefaultCookiePolicy + policy = DefaultCookiePolicy( + rfc2965=True, strict_ns_domain=Policy.DomainStrict, + blocked_domains=["ads.net", ".ads.net"]) + cj = CookieJar(policy) + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) + r = opener.open("http://example.com/") + diff --git a/Doc/library/http.cookies.rst b/Doc/library/http.cookies.rst new file mode 100644 index 0000000..8c90c5e --- /dev/null +++ b/Doc/library/http.cookies.rst @@ -0,0 +1,280 @@ +:mod:`http.cookies` --- HTTP state management +============================================= + +.. module:: http.cookies + :synopsis: Support for HTTP state management (cookies). +.. moduleauthor:: Timothy O'Malley +.. sectionauthor:: Moshe Zadka + + +The :mod:`http.cookies` module defines classes for abstracting the concept of +cookies, an HTTP state management mechanism. It supports both simple string-only +cookies, and provides an abstraction for having any serializable data-type as +cookie value. + +The module formerly strictly applied the parsing rules described in the +:rfc:`2109` and :rfc:`2068` specifications. It has since been discovered that +MSIE 3.0x doesn't follow the character rules outlined in those specs. As a +result, the parsing rules used are a bit less strict. + + +.. exception:: CookieError + + Exception failing because of :rfc:`2109` invalidity: incorrect attributes, + incorrect :mailheader:`Set-Cookie` header, etc. + + +.. class:: BaseCookie([input]) + + This class is a dictionary-like object whose keys are strings and whose values + are :class:`Morsel` instances. Note that upon setting a key to a value, the + value is first converted to a :class:`Morsel` containing the key and the value. + + If *input* is given, it is passed to the :meth:`load` method. + + +.. class:: SimpleCookie([input]) + + This class derives from :class:`BaseCookie` and overrides :meth:`value_decode` + and :meth:`value_encode` to be the identity and :func:`str` respectively. + + +.. class:: SerialCookie([input]) + + This class derives from :class:`BaseCookie` and overrides :meth:`value_decode` + and :meth:`value_encode` to be the :func:`pickle.loads` and + :func:`pickle.dumps`. + + .. deprecated:: 2.3 + Reading pickled values from untrusted cookie data is a huge security hole, as + pickle strings can be crafted to cause arbitrary code to execute on your server. + It is supported for backwards compatibility only, and may eventually go away. + + +.. class:: SmartCookie([input]) + + This class derives from :class:`BaseCookie`. It overrides :meth:`value_decode` + to be :func:`pickle.loads` if it is a valid pickle, and otherwise the value + itself. It overrides :meth:`value_encode` to be :func:`pickle.dumps` unless it + is a string, in which case it returns the value itself. + + .. deprecated:: 2.3 + The same security warning from :class:`SerialCookie` applies here. + +A further security note is warranted. For backwards compatibility, the +:mod:`http.cookies` module exports a class named :class:`Cookie` which is just an +alias for :class:`SmartCookie`. This is probably a mistake and will likely be +removed in a future version. You should not use the :class:`Cookie` class in +your applications, for the same reason why you should not use the +:class:`SerialCookie` class. + + +.. seealso:: + + Module :mod:`http.cookiejar` + HTTP cookie handling for web *clients*. The :mod:`http.cookiejar` and + :mod:`http.cookies` modules do not depend on each other. + + :rfc:`2109` - HTTP State Management Mechanism + This is the state management specification implemented by this module. + + +.. _cookie-objects: + +Cookie Objects +-------------- + + +.. method:: BaseCookie.value_decode(val) + + Return a decoded value from a string representation. Return value can be any + type. This method does nothing in :class:`BaseCookie` --- it exists so it can be + overridden. + + +.. method:: BaseCookie.value_encode(val) + + Return an encoded value. *val* can be any type, but return value must be a + string. This method does nothing in :class:`BaseCookie` --- it exists so it can + be overridden + + In general, it should be the case that :meth:`value_encode` and + :meth:`value_decode` are inverses on the range of *value_decode*. + + +.. method:: BaseCookie.output([attrs[, header[, sep]]]) + + Return a string representation suitable to be sent as HTTP headers. *attrs* and + *header* are sent to each :class:`Morsel`'s :meth:`output` method. *sep* is used + to join the headers together, and is by default the combination ``'\r\n'`` + (CRLF). + + +.. method:: BaseCookie.js_output([attrs]) + + Return an embeddable JavaScript snippet, which, if run on a browser which + supports JavaScript, will act the same as if the HTTP headers was sent. + + The meaning for *attrs* is the same as in :meth:`output`. + + +.. method:: BaseCookie.load(rawdata) + + If *rawdata* is a string, parse it as an ``HTTP_COOKIE`` and add the values + found there as :class:`Morsel`\ s. If it is a dictionary, it is equivalent to:: + + for k, v in rawdata.items(): + cookie[k] = v + + +.. _morsel-objects: + +Morsel Objects +-------------- + + +.. class:: Morsel() + + Abstract a key/value pair, which has some :rfc:`2109` attributes. + + Morsels are dictionary-like objects, whose set of keys is constant --- the valid + :rfc:`2109` attributes, which are + + * ``expires`` + * ``path`` + * ``comment`` + * ``domain`` + * ``max-age`` + * ``secure`` + * ``version`` + + The keys are case-insensitive. + + +.. attribute:: Morsel.value + + The value of the cookie. + + +.. attribute:: Morsel.coded_value + + The encoded value of the cookie --- this is what should be sent. + + +.. attribute:: Morsel.key + + The name of the cookie. + + +.. method:: Morsel.set(key, value, coded_value) + + Set the *key*, *value* and *coded_value* members. + + +.. method:: Morsel.isReservedKey(K) + + Whether *K* is a member of the set of keys of a :class:`Morsel`. + + +.. method:: Morsel.output([attrs[, header]]) + + Return a string representation of the Morsel, suitable to be sent as an HTTP + header. By default, all the attributes are included, unless *attrs* is given, in + which case it should be a list of attributes to use. *header* is by default + ``"Set-Cookie:"``. + + +.. method:: Morsel.js_output([attrs]) + + Return an embeddable JavaScript snippet, which, if run on a browser which + supports JavaScript, will act the same as if the HTTP header was sent. + + The meaning for *attrs* is the same as in :meth:`output`. + + +.. method:: Morsel.OutputString([attrs]) + + Return a string representing the Morsel, without any surrounding HTTP or + JavaScript. + + The meaning for *attrs* is the same as in :meth:`output`. + + +.. _cookie-example: + +Example +------- + +The following example demonstrates how to use the :mod:`http.cookies` module. + +.. doctest:: + :options: +NORMALIZE_WHITESPACE + + >>> from http import cookies + >>> C = cookies.SimpleCookie() + >>> C = cookies.SerialCookie() + >>> C = cookies.SmartCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> print(C) # generate HTTP headers + Set-Cookie: fig=newton + Set-Cookie: sugar=wafer + >>> print(C.output()) # same thing + Set-Cookie: fig=newton + Set-Cookie: sugar=wafer + >>> C = cookies.SmartCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print(C.output(header="Cookie:")) + Cookie: rocky=road; Path=/cookie + >>> print(C.output(attrs=[], header="Cookie:")) + Cookie: rocky=road + >>> C = cookies.SmartCookie() + >>> C.load("chips=ahoy; vienna=finger") # load from a string (HTTP header) + >>> print(C) + Set-Cookie: chips=ahoy + Set-Cookie: vienna=finger + >>> C = cookies.SmartCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print(C) + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + >>> C = cookies.SmartCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print(C) + Set-Cookie: oreo=doublestuff; Path=/ + >>> C = cookies.SmartCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + >>> C = cookies.SimpleCookie() + >>> C["number"] = 7 # equivalent to C["number"] = str(7) + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> print(C) + Set-Cookie: number=7 + Set-Cookie: string=seven + >>> C = cookies.SerialCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> print(C) + Set-Cookie: number="I7\012." + Set-Cookie: string="S'seven'\012p1\012." + >>> C = cookies.SmartCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> print(C) + Set-Cookie: number="I7\012." + Set-Cookie: string=seven + diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst new file mode 100644 index 0000000..941f043 --- /dev/null +++ b/Doc/library/http.server.rst @@ -0,0 +1,322 @@ +:mod:`http.server` --- HTTP servers +=================================== + +.. module:: http.server + :synopsis: HTTP server and request handlers. + + +.. index:: + pair: WWW; server + pair: HTTP; protocol + single: URL + single: httpd + +This module defines classes for implementing HTTP servers (Web servers). + +One class, :class:`HTTPServer`, is a :class:`socketserver.TCPServer` subclass. +It creates and listens at the HTTP socket, dispatching the requests to a +handler. Code to create and run the server looks like this:: + + def run(server_class=HTTPServer, handler_class=BaseHTTPRequestHandler): + server_address = ('', 8000) + httpd = server_class(server_address, handler_class) + httpd.serve_forever() + + +.. class:: HTTPServer(server_address, RequestHandlerClass) + + This class builds on the :class:`TCPServer` class by storing the server + address as instance variables named :attr:`server_name` and + :attr:`server_port`. The server is accessible by the handler, typically + through the handler's :attr:`server` instance variable. + + +The :class:`HTTPServer` must be given a *RequestHandlerClass* on instantiation, +of which this module provides three different variants: + +.. class:: BaseHTTPRequestHandler(request, client_address, server) + + This class is used to handle the HTTP requests that arrive at the server. By + itself, it cannot respond to any actual HTTP requests; it must be subclassed + to handle each request method (e.g. GET or POST). + :class:`BaseHTTPRequestHandler` provides a number of class and instance + variables, and methods for use by subclasses. + + The handler will parse the request and the headers, then call a method + specific to the request type. The method name is constructed from the + request. For example, for the request method ``SPAM``, the :meth:`do_SPAM` + method will be called with no arguments. All of the relevant information is + stored in instance variables of the handler. Subclasses should not need to + override or extend the :meth:`__init__` method. + + :class:`BaseHTTPRequestHandler` has the following instance variables: + + .. attribute:: client_address + + Contains a tuple of the form ``(host, port)`` referring to the client's + address. + + .. attribute:: command + + Contains the command (request type). For example, ``'GET'``. + + .. attribute:: path + + Contains the request path. + + .. attribute:: request_version + + Contains the version string from the request. For example, ``'HTTP/1.0'``. + + .. attribute:: headers + + Holds an instance of the class specified by the :attr:`MessageClass` class + variable. This instance parses and manages the headers in the HTTP + request. + + .. attribute:: rfile + + Contains an input stream, positioned at the start of the optional input + data. + + .. attribute:: wfile + + Contains the output stream for writing a response back to the + client. Proper adherence to the HTTP protocol must be used when writing to + this stream. + + :class:`BaseHTTPRequestHandler` has the following class variables: + + .. attribute:: server_version + + Specifies the server software version. You may want to override this. The + format is multiple whitespace-separated strings, where each string is of + the form name[/version]. For example, ``'BaseHTTP/0.2'``. + + .. attribute:: sys_version + + Contains the Python system version, in a form usable by the + :attr:`version_string` method and the :attr:`server_version` class + variable. For example, ``'Python/1.4'``. + + .. attribute:: error_message_format + + Specifies a format string for building an error response to the client. It + uses parenthesized, keyed format specifiers, so the format operand must be + a dictionary. The *code* key should be an integer, specifying the numeric + HTTP error code value. *message* should be a string containing a + (detailed) error message of what occurred, and *explain* should be an + explanation of the error code number. Default *message* and *explain* + values can found in the *responses* class variable. + + .. attribute:: error_content_type + + Specifies the Content-Type HTTP header of error responses sent to the + client. The default value is ``'text/html'``. + + .. attribute:: protocol_version + + This specifies the HTTP protocol version used in responses. If set to + ``'HTTP/1.1'``, the server will permit HTTP persistent connections; + however, your server *must* then include an accurate ``Content-Length`` + header (using :meth:`send_header`) in all of its responses to clients. + For backwards compatibility, the setting defaults to ``'HTTP/1.0'``. + + .. attribute:: MessageClass + + .. index:: single: Message (in module mimetools) + + Specifies a :class:`rfc822.Message`\ -like class to parse HTTP headers. + Typically, this is not overridden, and it defaults to + :class:`mimetools.Message`. + + .. attribute:: responses + + This variable contains a mapping of error code integers to two-element tuples + containing a short and long message. For example, ``{code: (shortmessage, + longmessage)}``. The *shortmessage* is usually used as the *message* key in an + error response, and *longmessage* as the *explain* key (see the + :attr:`error_message_format` class variable). + + A :class:`BaseHTTPRequestHandler` instance has the following methods: + + .. method:: handle() + + Calls :meth:`handle_one_request` once (or, if persistent connections are + enabled, multiple times) to handle incoming HTTP requests. You should + never need to override it; instead, implement appropriate :meth:`do_\*` + methods. + + .. method:: handle_one_request() + + This method will parse and dispatch the request to the appropriate + :meth:`do_\*` method. You should never need to override it. + + .. method:: send_error(code[, message]) + + Sends and logs a complete error reply to the client. The numeric *code* + specifies the HTTP error code, with *message* as optional, more specific text. A + complete set of headers is sent, followed by text composed using the + :attr:`error_message_format` class variable. + + .. method:: send_response(code[, message]) + + Sends a response header and logs the accepted request. The HTTP response + line is sent, followed by *Server* and *Date* headers. The values for + these two headers are picked up from the :meth:`version_string` and + :meth:`date_time_string` methods, respectively. + + .. method:: send_header(keyword, value) + + Writes a specific HTTP header to the output stream. *keyword* should + specify the header keyword, with *value* specifying its value. + + .. method:: end_headers() + + Sends a blank line, indicating the end of the HTTP headers in the + response. + + .. method:: log_request([code[, size]]) + + Logs an accepted (successful) request. *code* should specify the numeric + HTTP code associated with the response. If a size of the response is + available, then it should be passed as the *size* parameter. + + .. method:: log_error(...) + + Logs an error when a request cannot be fulfilled. By default, it passes + the message to :meth:`log_message`, so it takes the same arguments + (*format* and additional values). + + + .. method:: log_message(format, ...) + + Logs an arbitrary message to ``sys.stderr``. This is typically overridden + to create custom error logging mechanisms. The *format* argument is a + standard printf-style format string, where the additional arguments to + :meth:`log_message` are applied as inputs to the formatting. The client + address and current date and time are prefixed to every message logged. + + .. method:: version_string() + + Returns the server software's version string. This is a combination of the + :attr:`server_version` and :attr:`sys_version` class variables. + + .. method:: date_time_string([timestamp]) + + Returns the date and time given by *timestamp* (which must be in the + format returned by :func:`time.time`), formatted for a message header. If + *timestamp* is omitted, it uses the current date and time. + + The result looks like ``'Sun, 06 Nov 1994 08:49:37 GMT'``. + + .. method:: log_date_time_string() + + Returns the current date and time, formatted for logging. + + .. method:: address_string() + + Returns the client address, formatted for logging. A name lookup is + performed on the client's IP address. + + +.. class:: SimpleHTTPRequestHandler(request, client_address, server) + + This class serves files from the current directory and below, directly + mapping the directory structure to HTTP requests. + + A lot of the work, such as parsing the request, is done by the base class + :class:`BaseHTTPRequestHandler`. This class implements the :func:`do_GET` + and :func:`do_HEAD` functions. + + The following are defined as class-level attributes of + :class:`SimpleHTTPRequestHandler`: + + .. attribute:: server_version + + This will be ``"SimpleHTTP/" + __version__``, where ``__version__`` is + defined at the module level. + + .. attribute:: extensions_map + + A dictionary mapping suffixes into MIME types. The default is + signified by an empty string, and is considered to be + ``application/octet-stream``. The mapping is used case-insensitively, + and so should contain only lower-cased keys. + + The :class:`SimpleHTTPRequestHandler` class defines the following methods: + + .. method:: do_HEAD() + + This method serves the ``'HEAD'`` request type: it sends the headers it + would send for the equivalent ``GET`` request. See the :meth:`do_GET` + method for a more complete explanation of the possible headers. + + .. method:: do_GET() + + The request is mapped to a local file by interpreting the request as a + path relative to the current working directory. + + If the request was mapped to a directory, the directory is checked for a + file named ``index.html`` or ``index.htm`` (in that order). If found, the + file's contents are returned; otherwise a directory listing is generated + by calling the :meth:`list_directory` method. This method uses + :func:`os.listdir` to scan the directory, and returns a ``404`` error + response if the :func:`listdir` fails. + + If the request was mapped to a file, it is opened and the contents are + returned. Any :exc:`IOError` exception in opening the requested file is + mapped to a ``404``, ``'File not found'`` error. Otherwise, the content + type is guessed by calling the :meth:`guess_type` method, which in turn + uses the *extensions_map* variable. + + A ``'Content-type:'`` header with the guessed content type is output, + followed by a ``'Content-Length:'`` header with the file's size and a + ``'Last-Modified:'`` header with the file's modification time. + + Then follows a blank line signifying the end of the headers, and then the + contents of the file are output. If the file's MIME type starts with + ``text/`` the file is opened in text mode; otherwise binary mode is used. + + For example usage, see the implementation of the :func:`test` function. + + +.. class:: CGIHTTPRequestHandler(request, client_address, server) + + This class is used to serve either files or output of CGI scripts from the + current directory and below. Note that mapping HTTP hierarchic structure to + local directory structure is exactly as in :class:`SimpleHTTPRequestHandler`. + + .. note:: + + CGI scripts run by the :class:`CGIHTTPRequestHandler` class cannot execute + redirects (HTTP code 302), because code 200 (script output follows) is + sent prior to execution of the CGI script. This pre-empts the status + code. + + The class will however, run the CGI script, instead of serving it as a file, + if it guesses it to be a CGI script. Only directory-based CGI are used --- + the other common server configuration is to treat special extensions as + denoting CGI scripts. + + The :func:`do_GET` and :func:`do_HEAD` functions are modified to run CGI scripts + and serve the output, instead of serving files, if the request leads to + somewhere below the ``cgi_directories`` path. + + The :class:`CGIHTTPRequestHandler` defines the following data member: + + .. attribute:: cgi_directories + + This defaults to ``['/cgi-bin', '/htbin']`` and describes directories to + treat as containing CGI scripts. + + The :class:`CGIHTTPRequestHandler` defines the following method: + + .. method:: do_POST() + + This method serves the ``'POST'`` request type, only allowed for CGI + scripts. Error 501, "Can only POST to CGI scripts", is output when trying + to POST to a non-CGI url. + + Note that CGI scripts will be run with UID of user nobody, for security + reasons. Problems with the CGI script will be translated to error 403. diff --git a/Doc/library/httplib.rst b/Doc/library/httplib.rst deleted file mode 100644 index 16b74ee..0000000 --- a/Doc/library/httplib.rst +++ /dev/null @@ -1,501 +0,0 @@ - -:mod:`httplib` --- HTTP protocol client -======================================= - -.. module:: httplib - :synopsis: HTTP and HTTPS protocol client (requires sockets). - - -.. index:: - pair: HTTP; protocol - single: HTTP; httplib (standard module) - -.. index:: module: urllib - -This module defines classes which implement the client side of the HTTP and -HTTPS protocols. It is normally not used directly --- the module :mod:`urllib` -uses it to handle URLs that use HTTP and HTTPS. - -.. note:: - - HTTPS support is only available if the :mod:`socket` module was compiled with - SSL support. - -The module provides the following classes: - - -.. class:: HTTPConnection(host[, port[, strict[, timeout]]]) - - An :class:`HTTPConnection` instance represents one transaction with an HTTP - server. It should be instantiated passing it a host and optional port - number. If no port number is passed, the port is extracted from the host - string if it has the form ``host:port``, else the default HTTP port (80) is - used. When True, the optional parameter *strict* causes ``BadStatusLine`` to - be raised if the status line can't be parsed as a valid HTTP/1.0 or 1.1 - status line. If the optional *timeout* parameter is given, blocking - operations (like connection attempts) will timeout after that many seconds - (if it is not given or ``None``, the global default timeout setting is used). - - For example, the following calls all create instances that connect to the server - at the same host and port:: - - >>> h1 = httplib.HTTPConnection('www.cwi.nl') - >>> h2 = httplib.HTTPConnection('www.cwi.nl:80') - >>> h3 = httplib.HTTPConnection('www.cwi.nl', 80) - >>> h3 = httplib.HTTPConnection('www.cwi.nl', 80, timeout=10) - - -.. class:: HTTPSConnection(host[, port[, key_file[, cert_file[, strict[, timeout]]]]]) - - A subclass of :class:`HTTPConnection` that uses SSL for communication with - secure servers. Default port is ``443``. *key_file* is the name of a PEM - formatted file that contains your private key. *cert_file* is a PEM formatted - certificate chain file. - - .. warning:: - - This does not do any certificate verification! - - -.. class:: HTTPResponse(sock[, debuglevel=0][, strict=0]) - - Class whose instances are returned upon successful connection. Not instantiated - directly by user. - - -The following exceptions are raised as appropriate: - - -.. exception:: HTTPException - - The base class of the other exceptions in this module. It is a subclass of - :exc:`Exception`. - - -.. exception:: NotConnected - - A subclass of :exc:`HTTPException`. - - -.. exception:: InvalidURL - - A subclass of :exc:`HTTPException`, raised if a port is given and is either - non-numeric or empty. - - -.. exception:: UnknownProtocol - - A subclass of :exc:`HTTPException`. - - -.. exception:: UnknownTransferEncoding - - A subclass of :exc:`HTTPException`. - - -.. exception:: UnimplementedFileMode - - A subclass of :exc:`HTTPException`. - - -.. exception:: IncompleteRead - - A subclass of :exc:`HTTPException`. - - -.. exception:: ImproperConnectionState - - A subclass of :exc:`HTTPException`. - - -.. exception:: CannotSendRequest - - A subclass of :exc:`ImproperConnectionState`. - - -.. exception:: CannotSendHeader - - A subclass of :exc:`ImproperConnectionState`. - - -.. exception:: ResponseNotReady - - A subclass of :exc:`ImproperConnectionState`. - - -.. exception:: BadStatusLine - - A subclass of :exc:`HTTPException`. Raised if a server responds with a HTTP - status code that we don't understand. - -The constants defined in this module are: - - -.. data:: HTTP_PORT - - The default port for the HTTP protocol (always ``80``). - - -.. data:: HTTPS_PORT - - The default port for the HTTPS protocol (always ``443``). - -and also the following constants for integer status codes: - -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| Constant | Value | Definition | -+==========================================+=========+=======================================================================+ -| :const:`CONTINUE` | ``100`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.1.1 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`SWITCHING_PROTOCOLS` | ``101`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.1.2 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`PROCESSING` | ``102`` | WEBDAV, `RFC 2518, Section 10.1 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`OK` | ``200`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.2.1 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`CREATED` | ``201`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.2.2 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`ACCEPTED` | ``202`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.2.3 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`NON_AUTHORITATIVE_INFORMATION` | ``203`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.2.4 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`NO_CONTENT` | ``204`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.2.5 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`RESET_CONTENT` | ``205`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.2.6 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`PARTIAL_CONTENT` | ``206`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.2.7 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`MULTI_STATUS` | ``207`` | WEBDAV `RFC 2518, Section 10.2 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`IM_USED` | ``226`` | Delta encoding in HTTP, | -| | | :rfc:`3229`, Section 10.4.1 | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`MULTIPLE_CHOICES` | ``300`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.3.1 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`MOVED_PERMANENTLY` | ``301`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.3.2 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`FOUND` | ``302`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.3.3 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`SEE_OTHER` | ``303`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.3.4 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`NOT_MODIFIED` | ``304`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.3.5 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`USE_PROXY` | ``305`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.3.6 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`TEMPORARY_REDIRECT` | ``307`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.3.8 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`BAD_REQUEST` | ``400`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.1 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`UNAUTHORIZED` | ``401`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.2 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`PAYMENT_REQUIRED` | ``402`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.3 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`FORBIDDEN` | ``403`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.4 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`NOT_FOUND` | ``404`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.5 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`METHOD_NOT_ALLOWED` | ``405`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.6 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`NOT_ACCEPTABLE` | ``406`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.7 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`PROXY_AUTHENTICATION_REQUIRED` | ``407`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.8 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`REQUEST_TIMEOUT` | ``408`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.9 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`CONFLICT` | ``409`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.10 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`GONE` | ``410`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.11 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`LENGTH_REQUIRED` | ``411`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.12 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`PRECONDITION_FAILED` | ``412`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.13 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`REQUEST_ENTITY_TOO_LARGE` | ``413`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.14 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`REQUEST_URI_TOO_LONG` | ``414`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.15 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`UNSUPPORTED_MEDIA_TYPE` | ``415`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.16 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`REQUESTED_RANGE_NOT_SATISFIABLE` | ``416`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.17 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`EXPECTATION_FAILED` | ``417`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.4.18 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`UNPROCESSABLE_ENTITY` | ``422`` | WEBDAV, `RFC 2518, Section 10.3 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`LOCKED` | ``423`` | WEBDAV `RFC 2518, Section 10.4 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`FAILED_DEPENDENCY` | ``424`` | WEBDAV, `RFC 2518, Section 10.5 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, | -| | | :rfc:`2817`, Section 6 | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.5.1 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`NOT_IMPLEMENTED` | ``501`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.5.2 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`BAD_GATEWAY` | ``502`` | HTTP/1.1 `RFC 2616, Section | -| | | 10.5.3 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`SERVICE_UNAVAILABLE` | ``503`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.5.4 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`GATEWAY_TIMEOUT` | ``504`` | HTTP/1.1 `RFC 2616, Section | -| | | 10.5.5 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`HTTP_VERSION_NOT_SUPPORTED` | ``505`` | HTTP/1.1, `RFC 2616, Section | -| | | 10.5.6 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`INSUFFICIENT_STORAGE` | ``507`` | WEBDAV, `RFC 2518, Section 10.6 | -| | | `_ | -+------------------------------------------+---------+-----------------------------------------------------------------------+ -| :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, | -| | | :rfc:`2774`, Section 7 | -+------------------------------------------+---------+-----------------------------------------------------------------------+ - - -.. data:: responses - - This dictionary maps the HTTP 1.1 status codes to the W3C names. - - Example: ``httplib.responses[httplib.NOT_FOUND]`` is ``'Not Found'``. - - -.. _httpconnection-objects: - -HTTPConnection Objects ----------------------- - -:class:`HTTPConnection` instances have the following methods: - - -.. method:: HTTPConnection.request(method, url[, body[, headers]]) - - This will send a request to the server using the HTTP request method *method* - and the selector *url*. If the *body* argument is present, it should be a - string of data to send after the headers are finished. Alternatively, it may - be an open file object, in which case the contents of the file is sent; this - file object should support ``fileno()`` and ``read()`` methods. The header - Content-Length is automatically set to the correct value. The *headers* - argument should be a mapping of extra HTTP headers to send with the request. - - -.. method:: HTTPConnection.getresponse() - - Should be called after a request is sent to get the response from the server. - Returns an :class:`HTTPResponse` instance. - - .. note:: - - Note that you must have read the whole response before you can send a new - request to the server. - - -.. method:: HTTPConnection.set_debuglevel(level) - - Set the debugging level (the amount of debugging output printed). The default - debug level is ``0``, meaning no debugging output is printed. - - -.. method:: HTTPConnection.connect() - - Connect to the server specified when the object was created. - - -.. method:: HTTPConnection.close() - - Close the connection to the server. - -As an alternative to using the :meth:`request` method described above, you can -also send your request step by step, by using the four functions below. - - -.. method:: HTTPConnection.putrequest(request, selector[, skip_host[, skip_accept_encoding]]) - - This should be the first call after the connection to the server has been made. - It sends a line to the server consisting of the *request* string, the *selector* - string, and the HTTP version (``HTTP/1.1``). To disable automatic sending of - ``Host:`` or ``Accept-Encoding:`` headers (for example to accept additional - content encodings), specify *skip_host* or *skip_accept_encoding* with non-False - values. - - -.. method:: HTTPConnection.putheader(header, argument[, ...]) - - Send an :rfc:`822`\ -style header to the server. It sends a line to the server - consisting of the header, a colon and a space, and the first argument. If more - arguments are given, continuation lines are sent, each consisting of a tab and - an argument. - - -.. method:: HTTPConnection.endheaders() - - Send a blank line to the server, signalling the end of the headers. - - -.. method:: HTTPConnection.send(data) - - Send data to the server. This should be used directly only after the - :meth:`endheaders` method has been called and before :meth:`getresponse` is - called. - - -.. _httpresponse-objects: - -HTTPResponse Objects --------------------- - -:class:`HTTPResponse` instances have the following methods and attributes: - - -.. method:: HTTPResponse.read([amt]) - - Reads and returns the response body, or up to the next *amt* bytes. - - -.. method:: HTTPResponse.getheader(name[, default]) - - Get the contents of the header *name*, or *default* if there is no matching - header. - - -.. method:: HTTPResponse.getheaders() - - Return a list of (header, value) tuples. - - -.. attribute:: HTTPResponse.msg - - A :class:`mimetools.Message` instance containing the response headers. - - -.. attribute:: HTTPResponse.version - - HTTP protocol version used by server. 10 for HTTP/1.0, 11 for HTTP/1.1. - - -.. attribute:: HTTPResponse.status - - Status code returned by server. - - -.. attribute:: HTTPResponse.reason - - Reason phrase returned by server. - - -.. _httplib-examples: - -Examples --------- - -Here is an example session that uses the ``GET`` method:: - - >>> import httplib - >>> conn = httplib.HTTPConnection("www.python.org") - >>> conn.request("GET", "/index.html") - >>> r1 = conn.getresponse() - >>> print(r1.status, r1.reason) - 200 OK - >>> data1 = r1.read() - >>> conn.request("GET", "/parrot.spam") - >>> r2 = conn.getresponse() - >>> print(r2.status, r2.reason) - 404 Not Found - >>> data2 = r2.read() - >>> conn.close() - -Here is an example session that shows how to ``POST`` requests:: - - >>> import httplib, urllib - >>> params = urllib.urlencode({'spam': 1, 'eggs': 2, 'bacon': 0}) - >>> headers = {"Content-type": "application/x-www-form-urlencoded", - ... "Accept": "text/plain"} - >>> conn = httplib.HTTPConnection("musi-cal.mojam.com:80") - >>> conn.request("POST", "/cgi-bin/query", params, headers) - >>> response = conn.getresponse() - >>> print(response.status, response.reason) - 200 OK - >>> data = response.read() - >>> conn.close() - diff --git a/Doc/library/internet.rst b/Doc/library/internet.rst index a5f6d22..948a0b2 100644 --- a/Doc/library/internet.rst +++ b/Doc/library/internet.rst @@ -26,7 +26,7 @@ is currently supported on most popular platforms. Here is an overview: wsgiref.rst urllib.rst urllib2.rst - httplib.rst + http.client.rst ftplib.rst poplib.rst imaplib.rst @@ -37,10 +37,8 @@ is currently supported on most popular platforms. Here is an overview: uuid.rst urlparse.rst socketserver.rst - basehttpserver.rst - simplehttpserver.rst - cgihttpserver.rst - cookielib.rst - cookie.rst + http.server.rst + http.cookies.rst + http.cookiejar.rst xmlrpc.client.rst xmlrpc.server.rst diff --git a/Doc/library/simplehttpserver.rst b/Doc/library/simplehttpserver.rst deleted file mode 100644 index 7d99681..0000000 --- a/Doc/library/simplehttpserver.rst +++ /dev/null @@ -1,86 +0,0 @@ - -:mod:`SimpleHTTPServer` --- Simple HTTP request handler -======================================================= - -.. module:: SimpleHTTPServer - :synopsis: This module provides a basic request handler for HTTP servers. -.. sectionauthor:: Moshe Zadka - - -The :mod:`SimpleHTTPServer` module defines a single class, -:class:`SimpleHTTPRequestHandler`, which is interface-compatible with -:class:`BaseHTTPServer.BaseHTTPRequestHandler`. - -The :mod:`SimpleHTTPServer` module defines the following class: - - -.. class:: SimpleHTTPRequestHandler(request, client_address, server) - - This class serves files from the current directory and below, directly - mapping the directory structure to HTTP requests. - - A lot of the work, such as parsing the request, is done by the base class - :class:`BaseHTTPServer.BaseHTTPRequestHandler`. This class implements the - :func:`do_GET` and :func:`do_HEAD` functions. - - The following are defined as class-level attributes of - :class:`SimpleHTTPRequestHandler`: - - - .. attribute:: server_version - - This will be ``"SimpleHTTP/" + __version__``, where ``__version__`` is - defined at the module level. - - - .. attribute:: extensions_map - - A dictionary mapping suffixes into MIME types. The default is - signified by an empty string, and is considered to be - ``application/octet-stream``. The mapping is used case-insensitively, - and so should contain only lower-cased keys. - - The :class:`SimpleHTTPRequestHandler` class defines the following methods: - - - .. method:: do_HEAD() - - This method serves the ``'HEAD'`` request type: it sends the headers it - would send for the equivalent ``GET`` request. See the :meth:`do_GET` - method for a more complete explanation of the possible headers. - - - .. method:: do_GET() - - The request is mapped to a local file by interpreting the request as a - path relative to the current working directory. - - If the request was mapped to a directory, the directory is checked for a - file named ``index.html`` or ``index.htm`` (in that order). If found, the - file's contents are returned; otherwise a directory listing is generated - by calling the :meth:`list_directory` method. This method uses - :func:`os.listdir` to scan the directory, and returns a ``404`` error - response if the :func:`listdir` fails. - - If the request was mapped to a file, it is opened and the contents are - returned. Any :exc:`IOError` exception in opening the requested file is - mapped to a ``404``, ``'File not found'`` error. Otherwise, the content - type is guessed by calling the :meth:`guess_type` method, which in turn - uses the *extensions_map* variable. - - A ``'Content-type:'`` header with the guessed content type is output, - followed by a ``'Content-Length:'`` header with the file's size and a - ``'Last-Modified:'`` header with the file's modification time. - - Then follows a blank line signifying the end of the headers, and then the - contents of the file are output. If the file's MIME type starts with - ``text/`` the file is opened in text mode; otherwise binary mode is used. - - For example usage, see the implementation of the :func:`test` function. - - -.. seealso:: - - Module :mod:`BaseHTTPServer` - Base class implementation for Web server and request handler. - diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index f60620c..33904b5 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -489,7 +489,7 @@ sends some bytes, and reads part of the response:: pprint.pprint(ssl_sock.getpeercert()) print(pprint.pformat(ssl_sock.getpeercert())) - # Set a simple HTTP request -- use httplib in actual code. + # Set a simple HTTP request -- use http.client in actual code. ssl_sock.write("""GET / HTTP/1.0\r Host: www.verisign.com\r\n\r\n""") diff --git a/Doc/library/urllib2.rst b/Doc/library/urllib2.rst index 143fe50..0462222 100644 --- a/Doc/library/urllib2.rst +++ b/Doc/library/urllib2.rst @@ -37,7 +37,7 @@ The :mod:`urllib2` module defines the following functions: determine if a redirect was followed * :meth:`info` --- return the meta-information of the page, such as headers, in - the form of an ``httplib.HTTPMessage`` instance + the form of an ``http.client.HTTPMessage`` instance (see `Quick Reference to HTTP Headers `_) Raises :exc:`URLError` on errors. @@ -100,7 +100,7 @@ The following exceptions are raised as appropriate: An HTTP status code as defined in `RFC 2616 `_. This numeric value corresponds to a value found in the dictionary of - codes as found in :attr:`BaseHTTPServer.BaseHTTPRequestHandler.responses`. + codes as found in :attr:`http.server.BaseHTTPRequestHandler.responses`. @@ -133,10 +133,11 @@ The following classes are provided: HTTP cookies: *origin_req_host* should be the request-host of the origin transaction, as - defined by :rfc:`2965`. It defaults to ``cookielib.request_host(self)``. This - is the host name or IP address of the original request that was initiated by the - user. For example, if the request is for an image in an HTML document, this - should be the request-host of the request for the page containing the image. + defined by :rfc:`2965`. It defaults to ``http.cookiejar.request_host(self)``. + This is the host name or IP address of the original request that was + initiated by the user. For example, if the request is for an image in an + HTML document, this should be the request-host of the request for the page + containing the image. *unverifiable* should indicate whether the request is unverifiable, as defined by RFC 2965. It defaults to False. An unverifiable request is one whose URL @@ -627,7 +628,7 @@ HTTPCookieProcessor Objects .. attribute:: HTTPCookieProcessor.cookiejar - The :class:`cookielib.CookieJar` in which cookies are stored. + The :class:`http.cookiejar.CookieJar` in which cookies are stored. .. _proxy-handler: diff --git a/Doc/library/wsgiref.rst b/Doc/library/wsgiref.rst index 2437bcd..70064fc 100644 --- a/Doc/library/wsgiref.rst +++ b/Doc/library/wsgiref.rst @@ -260,7 +260,7 @@ manipulation of WSGI response headers using a mapping-like interface. :synopsis: A simple WSGI HTTP server. -This module implements a simple HTTP server (based on :mod:`BaseHTTPServer`) +This module implements a simple HTTP server (based on :mod:`http.server`) that serves WSGI applications. Each server instance serves a single WSGI application on a given host and port. If you want to serve multiple applications on a single host and port, you should create a WSGI application @@ -303,13 +303,13 @@ request. (E.g., using the :func:`shift_path_info` function from Create a :class:`WSGIServer` instance. *server_address* should be a ``(host,port)`` tuple, and *RequestHandlerClass* should be the subclass of - :class:`BaseHTTPServer.BaseHTTPRequestHandler` that will be used to process + :class:`http.server.BaseHTTPRequestHandler` that will be used to process requests. You do not normally need to call this constructor, as the :func:`make_server` function can handle all the details for you. - :class:`WSGIServer` is a subclass of :class:`BaseHTTPServer.HTTPServer`, so all + :class:`WSGIServer` is a subclass of :class:`http.server.HTTPServer`, so all of its methods (such as :meth:`serve_forever` and :meth:`handle_request`) are available. :class:`WSGIServer` also provides these WSGI-specific methods: diff --git a/Doc/library/xmlrpc.client.rst b/Doc/library/xmlrpc.client.rst index 7d59750..e249b12 100644 --- a/Doc/library/xmlrpc.client.rst +++ b/Doc/library/xmlrpc.client.rst @@ -506,14 +506,14 @@ transport. The following example shows how: :: - import xmlrpc.client, httplib + import xmlrpc.client, http.client class ProxiedTransport(xmlrpc.client.Transport): def set_proxy(self, proxy): self.proxy = proxy def make_connection(self, host): self.realhost = host - h = httplib.HTTP(self.proxy) + h = http.client.HTTP(self.proxy) return h def send_request(self, connection, handler, request_body): connection.putrequest("POST", 'http://%s%s' % (self.realhost, handler)) diff --git a/Doc/license.rst b/Doc/license.rst index ed399dc..892b5ea 100644 --- a/Doc/license.rst +++ b/Doc/license.rst @@ -449,7 +449,7 @@ The :mod:`asynchat` and :mod:`asyncore` modules contain the following notice:: Cookie management ----------------- -The :mod:`Cookie` module contains the following notice:: +The :mod:`http.cookies` module contains the following notice:: Copyright 2000 by Timothy O'Malley diff --git a/Lib/BaseHTTPServer.py b/Lib/BaseHTTPServer.py deleted file mode 100644 index 7fbbcf2..0000000 --- a/Lib/BaseHTTPServer.py +++ /dev/null @@ -1,601 +0,0 @@ -"""HTTP server base class. - -Note: the class in this module doesn't implement any HTTP request; see -SimpleHTTPServer for simple implementations of GET, HEAD and POST -(including CGI scripts). It does, however, optionally implement HTTP/1.1 -persistent connections, as of version 0.3. - -Contents: - -- BaseHTTPRequestHandler: HTTP request handler base class -- test: test function - -XXX To do: - -- log requests even later (to capture byte count) -- log user-agent header and other interesting goodies -- send error log to separate file -""" - - -# See also: -# -# HTTP Working Group T. Berners-Lee -# INTERNET-DRAFT R. T. Fielding -# H. Frystyk Nielsen -# Expires September 8, 1995 March 8, 1995 -# -# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt -# -# and -# -# Network Working Group R. Fielding -# Request for Comments: 2616 et al -# Obsoletes: 2068 June 1999 -# Category: Standards Track -# -# URL: http://www.faqs.org/rfcs/rfc2616.html - -# Log files -# --------- -# -# Here's a quote from the NCSA httpd docs about log file format. -# -# | The logfile format is as follows. Each line consists of: -# | -# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb -# | -# | host: Either the DNS name or the IP number of the remote client -# | rfc931: Any information returned by identd for this person, -# | - otherwise. -# | authuser: If user sent a userid for authentication, the user name, -# | - otherwise. -# | DD: Day -# | Mon: Month (calendar name) -# | YYYY: Year -# | hh: hour (24-hour format, the machine's timezone) -# | mm: minutes -# | ss: seconds -# | request: The first line of the HTTP request as sent by the client. -# | ddd: the status code returned by the server, - if not available. -# | bbbb: the total number of bytes sent, -# | *not including the HTTP/1.0 header*, - if not available -# | -# | You can determine the name of the file accessed through request. -# -# (Actually, the latter is only true if you know the server configuration -# at the time the request was made!) - -__version__ = "0.3" - -__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] - -import io -import sys -import time -import socket # For gethostbyaddr() -import mimetools -import socketserver - -# Default error message template -DEFAULT_ERROR_MESSAGE = """\ - -Error response - - -

Error response

-

Error code %(code)d. -

Message: %(message)s. -

Error code explanation: %(code)s = %(explain)s. - -""" - -DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" - -def _quote_html(html): - return html.replace("&", "&").replace("<", "<").replace(">", ">") - -class HTTPServer(socketserver.TCPServer): - - allow_reuse_address = 1 # Seems to make sense in testing environment - - def server_bind(self): - """Override server_bind to store the server name.""" - socketserver.TCPServer.server_bind(self) - host, port = self.socket.getsockname()[:2] - self.server_name = socket.getfqdn(host) - self.server_port = port - - -class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): - - """HTTP request handler base class. - - The following explanation of HTTP serves to guide you through the - code as well as to expose any misunderstandings I may have about - HTTP (so you don't need to read the code to figure out I'm wrong - :-). - - HTTP (HyperText Transfer Protocol) is an extensible protocol on - top of a reliable stream transport (e.g. TCP/IP). The protocol - recognizes three parts to a request: - - 1. One line identifying the request type and path - 2. An optional set of RFC-822-style headers - 3. An optional data part - - The headers and data are separated by a blank line. - - The first line of the request has the form - - - - where is a (case-sensitive) keyword such as GET or POST, - is a string containing path information for the request, - and should be the string "HTTP/1.0" or "HTTP/1.1". - is encoded using the URL encoding scheme (using %xx to signify - the ASCII character with hex code xx). - - The specification specifies that lines are separated by CRLF but - for compatibility with the widest range of clients recommends - servers also handle LF. Similarly, whitespace in the request line - is treated sensibly (allowing multiple spaces between components - and allowing trailing whitespace). - - Similarly, for output, lines ought to be separated by CRLF pairs - but most clients grok LF characters just fine. - - If the first line of the request has the form - - - - (i.e. is left out) then this is assumed to be an HTTP - 0.9 request; this form has no optional headers and data part and - the reply consists of just the data. - - The reply form of the HTTP 1.x protocol again has three parts: - - 1. One line giving the response code - 2. An optional set of RFC-822-style headers - 3. The data - - Again, the headers and data are separated by a blank line. - - The response code line has the form - - - - where is the protocol version ("HTTP/1.0" or "HTTP/1.1"), - is a 3-digit response code indicating success or - failure of the request, and is an optional - human-readable string explaining what the response code means. - - This server parses the request and the headers, and then calls a - function specific to the request type (). Specifically, - a request SPAM will be handled by a method do_SPAM(). If no - such method exists the server sends an error response to the - client. If it exists, it is called with no arguments: - - do_SPAM() - - Note that the request name is case sensitive (i.e. SPAM and spam - are different requests). - - The various request details are stored in instance variables: - - - client_address is the client IP address in the form (host, - port); - - - command, path and version are the broken-down request line; - - - headers is an instance of mimetools.Message (or a derived - class) containing the header information; - - - rfile is a file object open for reading positioned at the - start of the optional input data part; - - - wfile is a file object open for writing. - - IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! - - The first thing to be written must be the response line. Then - follow 0 or more header lines, then a blank line, and then the - actual data (if any). The meaning of the header lines depends on - the command executed by the server; in most cases, when data is - returned, there should be at least one header line of the form - - Content-type: / - - where and should be registered MIME types, - e.g. "text/html" or "text/plain". - - """ - - # The Python system version, truncated to its first component. - sys_version = "Python/" + sys.version.split()[0] - - # The server software version. You may want to override this. - # The format is multiple whitespace-separated strings, - # where each string is of the form name[/version]. - server_version = "BaseHTTP/" + __version__ - - error_message_format = DEFAULT_ERROR_MESSAGE - error_content_type = DEFAULT_ERROR_CONTENT_TYPE - - # The default request version. This only affects responses up until - # the point where the request line is parsed, so it mainly decides what - # the client gets back when sending a malformed request line. - # Most web servers default to HTTP 0.9, i.e. don't send a status line. - default_request_version = "HTTP/0.9" - - def parse_request(self): - """Parse a request (internal). - - The request should be stored in self.raw_requestline; the results - are in self.command, self.path, self.request_version and - self.headers. - - Return True for success, False for failure; on failure, an - error is sent back. - - """ - self.command = None # set in case of error on the first line - self.request_version = version = self.default_request_version - self.close_connection = 1 - requestline = str(self.raw_requestline, 'iso-8859-1') - if requestline[-2:] == '\r\n': - requestline = requestline[:-2] - elif requestline[-1:] == '\n': - requestline = requestline[:-1] - self.requestline = requestline - words = requestline.split() - if len(words) == 3: - [command, path, version] = words - if version[:5] != 'HTTP/': - self.send_error(400, "Bad request version (%r)" % version) - return False - try: - base_version_number = version.split('/', 1)[1] - version_number = base_version_number.split(".") - # RFC 2145 section 3.1 says there can be only one "." and - # - major and minor numbers MUST be treated as - # separate integers; - # - HTTP/2.4 is a lower version than HTTP/2.13, which in - # turn is lower than HTTP/12.3; - # - Leading zeros MUST be ignored by recipients. - if len(version_number) != 2: - raise ValueError - version_number = int(version_number[0]), int(version_number[1]) - except (ValueError, IndexError): - self.send_error(400, "Bad request version (%r)" % version) - return False - if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": - self.close_connection = 0 - if version_number >= (2, 0): - self.send_error(505, - "Invalid HTTP Version (%s)" % base_version_number) - return False - elif len(words) == 2: - [command, path] = words - self.close_connection = 1 - if command != 'GET': - self.send_error(400, - "Bad HTTP/0.9 request type (%r)" % command) - return False - elif not words: - return False - else: - self.send_error(400, "Bad request syntax (%r)" % requestline) - return False - self.command, self.path, self.request_version = command, path, version - - # Examine the headers and look for a Connection directive. - - # MessageClass (rfc822) wants to see strings rather than bytes. - # But a TextIOWrapper around self.rfile would buffer too many bytes - # from the stream, bytes which we later need to read as bytes. - # So we read the correct bytes here, as bytes, then use StringIO - # to make them look like strings for MessageClass to parse. - headers = [] - while True: - line = self.rfile.readline() - headers.append(line) - if line in (b'\r\n', b'\n', b''): - break - hfile = io.StringIO(b''.join(headers).decode('iso-8859-1')) - self.headers = self.MessageClass(hfile) - - conntype = self.headers.get('Connection', "") - if conntype.lower() == 'close': - self.close_connection = 1 - elif (conntype.lower() == 'keep-alive' and - self.protocol_version >= "HTTP/1.1"): - self.close_connection = 0 - return True - - def handle_one_request(self): - """Handle a single HTTP request. - - You normally don't need to override this method; see the class - __doc__ string for information on how to handle specific HTTP - commands such as GET and POST. - - """ - self.raw_requestline = self.rfile.readline() - if not self.raw_requestline: - self.close_connection = 1 - return - if not self.parse_request(): # An error code has been sent, just exit - return - mname = 'do_' + self.command - if not hasattr(self, mname): - self.send_error(501, "Unsupported method (%r)" % self.command) - return - method = getattr(self, mname) - method() - - def handle(self): - """Handle multiple requests if necessary.""" - self.close_connection = 1 - - self.handle_one_request() - while not self.close_connection: - self.handle_one_request() - - def send_error(self, code, message=None): - """Send and log an error reply. - - Arguments are the error code, and a detailed message. - The detailed message defaults to the short entry matching the - response code. - - This sends an error response (so it must be called before any - output has been generated), logs the error, and finally sends - a piece of HTML explaining the error to the user. - - """ - - try: - shortmsg, longmsg = self.responses[code] - except KeyError: - shortmsg, longmsg = '???', '???' - if message is None: - message = shortmsg - explain = longmsg - self.log_error("code %d, message %s", code, message) - # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) - content = (self.error_message_format % - {'code': code, 'message': _quote_html(message), 'explain': explain}) - self.send_response(code, message) - self.send_header("Content-Type", self.error_content_type) - self.send_header('Connection', 'close') - self.end_headers() - if self.command != 'HEAD' and code >= 200 and code not in (204, 304): - self.wfile.write(content.encode('UTF-8', 'replace')) - - def send_response(self, code, message=None): - """Send the response header and log the response code. - - Also send two standard headers with the server software - version and the current date. - - """ - self.log_request(code) - if message is None: - if code in self.responses: - message = self.responses[code][0] - else: - message = '' - if self.request_version != 'HTTP/0.9': - self.wfile.write(("%s %d %s\r\n" % - (self.protocol_version, code, message)).encode('ASCII', 'strict')) - # print (self.protocol_version, code, message) - self.send_header('Server', self.version_string()) - self.send_header('Date', self.date_time_string()) - - def send_header(self, keyword, value): - """Send a MIME header.""" - if self.request_version != 'HTTP/0.9': - self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict')) - - if keyword.lower() == 'connection': - if value.lower() == 'close': - self.close_connection = 1 - elif value.lower() == 'keep-alive': - self.close_connection = 0 - - def end_headers(self): - """Send the blank line ending the MIME headers.""" - if self.request_version != 'HTTP/0.9': - self.wfile.write(b"\r\n") - - def log_request(self, code='-', size='-'): - """Log an accepted request. - - This is called by send_response(). - - """ - - self.log_message('"%s" %s %s', - self.requestline, str(code), str(size)) - - def log_error(self, format, *args): - """Log an error. - - This is called when a request cannot be fulfilled. By - default it passes the message on to log_message(). - - Arguments are the same as for log_message(). - - XXX This should go to the separate error log. - - """ - - self.log_message(format, *args) - - def log_message(self, format, *args): - """Log an arbitrary message. - - This is used by all other logging functions. Override - it if you have specific logging wishes. - - The first argument, FORMAT, is a format string for the - message to be logged. If the format string contains - any % escapes requiring parameters, they should be - specified as subsequent arguments (it's just like - printf!). - - The client host and current date/time are prefixed to - every message. - - """ - - sys.stderr.write("%s - - [%s] %s\n" % - (self.address_string(), - self.log_date_time_string(), - format%args)) - - def version_string(self): - """Return the server software version string.""" - return self.server_version + ' ' + self.sys_version - - def date_time_string(self, timestamp=None): - """Return the current date and time formatted for a message header.""" - if timestamp is None: - timestamp = time.time() - year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) - s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( - self.weekdayname[wd], - day, self.monthname[month], year, - hh, mm, ss) - return s - - def log_date_time_string(self): - """Return the current time formatted for logging.""" - now = time.time() - year, month, day, hh, mm, ss, x, y, z = time.localtime(now) - s = "%02d/%3s/%04d %02d:%02d:%02d" % ( - day, self.monthname[month], year, hh, mm, ss) - return s - - weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - - monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - - def address_string(self): - """Return the client address formatted for logging. - - This version looks up the full hostname using gethostbyaddr(), - and tries to find a name that contains at least one dot. - - """ - - host, port = self.client_address[:2] - return socket.getfqdn(host) - - # Essentially static class variables - - # The version of the HTTP protocol we support. - # Set this to HTTP/1.1 to enable automatic keepalive - protocol_version = "HTTP/1.0" - - # The Message-like class used to parse headers - MessageClass = mimetools.Message - - # Table mapping response codes to messages; entries have the - # form {code: (shortmessage, longmessage)}. - # See RFC 2616. - responses = { - 100: ('Continue', 'Request received, please continue'), - 101: ('Switching Protocols', - 'Switching to new protocol; obey Upgrade header'), - - 200: ('OK', 'Request fulfilled, document follows'), - 201: ('Created', 'Document created, URL follows'), - 202: ('Accepted', - 'Request accepted, processing continues off-line'), - 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), - 204: ('No Content', 'Request fulfilled, nothing follows'), - 205: ('Reset Content', 'Clear input form for further input.'), - 206: ('Partial Content', 'Partial content follows.'), - - 300: ('Multiple Choices', - 'Object has several resources -- see URI list'), - 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), - 302: ('Found', 'Object moved temporarily -- see URI list'), - 303: ('See Other', 'Object moved -- see Method and URL list'), - 304: ('Not Modified', - 'Document has not changed since given time'), - 305: ('Use Proxy', - 'You must use proxy specified in Location to access this ' - 'resource.'), - 307: ('Temporary Redirect', - 'Object moved temporarily -- see URI list'), - - 400: ('Bad Request', - 'Bad request syntax or unsupported method'), - 401: ('Unauthorized', - 'No permission -- see authorization schemes'), - 402: ('Payment Required', - 'No payment -- see charging schemes'), - 403: ('Forbidden', - 'Request forbidden -- authorization will not help'), - 404: ('Not Found', 'Nothing matches the given URI'), - 405: ('Method Not Allowed', - 'Specified method is invalid for this server.'), - 406: ('Not Acceptable', 'URI not available in preferred format.'), - 407: ('Proxy Authentication Required', 'You must authenticate with ' - 'this proxy before proceeding.'), - 408: ('Request Timeout', 'Request timed out; try again later.'), - 409: ('Conflict', 'Request conflict.'), - 410: ('Gone', - 'URI no longer exists and has been permanently removed.'), - 411: ('Length Required', 'Client must specify Content-Length.'), - 412: ('Precondition Failed', 'Precondition in headers is false.'), - 413: ('Request Entity Too Large', 'Entity is too large.'), - 414: ('Request-URI Too Long', 'URI is too long.'), - 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), - 416: ('Requested Range Not Satisfiable', - 'Cannot satisfy request range.'), - 417: ('Expectation Failed', - 'Expect condition could not be satisfied.'), - - 500: ('Internal Server Error', 'Server got itself in trouble'), - 501: ('Not Implemented', - 'Server does not support this operation'), - 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), - 503: ('Service Unavailable', - 'The server cannot process the request due to a high load'), - 504: ('Gateway Timeout', - 'The gateway server did not receive a timely response'), - 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), - } - - -def test(HandlerClass = BaseHTTPRequestHandler, - ServerClass = HTTPServer, protocol="HTTP/1.0"): - """Test the HTTP request handler class. - - This runs an HTTP server on port 8000 (or the first command line - argument). - - """ - - if sys.argv[1:]: - port = int(sys.argv[1]) - else: - port = 8000 - server_address = ('', port) - - HandlerClass.protocol_version = protocol - httpd = ServerClass(server_address, HandlerClass) - - sa = httpd.socket.getsockname() - print("Serving HTTP on", sa[0], "port", sa[1], "...") - httpd.serve_forever() - - -if __name__ == '__main__': - test() diff --git a/Lib/CGIHTTPServer.py b/Lib/CGIHTTPServer.py deleted file mode 100644 index 29d701a..0000000 --- a/Lib/CGIHTTPServer.py +++ /dev/null @@ -1,367 +0,0 @@ -"""CGI-savvy HTTP Server. - -This module builds on SimpleHTTPServer by implementing GET and POST -requests to cgi-bin scripts. - -If the os.fork() function is not present (e.g. on Windows), -os.popen2() is used as a fallback, with slightly altered semantics; if -that function is not present either (e.g. on Macintosh), only Python -scripts are supported, and they are executed by the current process. - -In all cases, the implementation is intentionally naive -- all -requests are executed sychronously. - -SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL --- it may execute arbitrary Python code or external programs. - -Note that status code 200 is sent prior to execution of a CGI script, so -scripts cannot send other status codes such as 302 (redirect). -""" - - -__version__ = "0.4" - -__all__ = ["CGIHTTPRequestHandler"] - -import os -import sys -import urllib -import BaseHTTPServer -import SimpleHTTPServer -import select - - -class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): - - """Complete HTTP server with GET, HEAD and POST commands. - - GET and HEAD also support running CGI scripts. - - The POST command is *only* implemented for CGI scripts. - - """ - - # Determine platform specifics - have_fork = hasattr(os, 'fork') - have_popen2 = hasattr(os, 'popen2') - have_popen3 = hasattr(os, 'popen3') - - # Make rfile unbuffered -- we need to read one line and then pass - # the rest to a subprocess, so we can't use buffered input. - rbufsize = 0 - - def do_POST(self): - """Serve a POST request. - - This is only implemented for CGI scripts. - - """ - - if self.is_cgi(): - self.run_cgi() - else: - self.send_error(501, "Can only POST to CGI scripts") - - def send_head(self): - """Version of send_head that support CGI scripts""" - if self.is_cgi(): - return self.run_cgi() - else: - return SimpleHTTPServer.SimpleHTTPRequestHandler.send_head(self) - - def is_cgi(self): - """Test whether self.path corresponds to a CGI script. - - Return a tuple (dir, rest) if self.path requires running a - CGI script, None if not. Note that rest begins with a - slash if it is not empty. - - The default implementation tests whether the path - begins with one of the strings in the list - self.cgi_directories (and the next character is a '/' - or the end of the string). - - """ - - path = self.path - - for x in self.cgi_directories: - i = len(x) - if path[:i] == x and (not path[i:] or path[i] == '/'): - self.cgi_info = path[:i], path[i+1:] - return True - return False - - cgi_directories = ['/cgi-bin', '/htbin'] - - def is_executable(self, path): - """Test whether argument path is an executable file.""" - return executable(path) - - def is_python(self, path): - """Test whether argument path is a Python script.""" - head, tail = os.path.splitext(path) - return tail.lower() in (".py", ".pyw") - - def run_cgi(self): - """Execute a CGI script.""" - path = self.path - dir, rest = self.cgi_info - - i = path.find('/', len(dir) + 1) - while i >= 0: - nextdir = path[:i] - nextrest = path[i+1:] - - scriptdir = self.translate_path(nextdir) - if os.path.isdir(scriptdir): - dir, rest = nextdir, nextrest - i = path.find('/', len(dir) + 1) - else: - break - - # find an explicit query string, if present. - i = rest.rfind('?') - if i >= 0: - rest, query = rest[:i], rest[i+1:] - else: - query = '' - - # dissect the part after the directory name into a script name & - # a possible additional path, to be stored in PATH_INFO. - i = rest.find('/') - if i >= 0: - script, rest = rest[:i], rest[i:] - else: - script, rest = rest, '' - - scriptname = dir + '/' + script - scriptfile = self.translate_path(scriptname) - if not os.path.exists(scriptfile): - self.send_error(404, "No such CGI script (%r)" % scriptname) - return - if not os.path.isfile(scriptfile): - self.send_error(403, "CGI script is not a plain file (%r)" % - scriptname) - return - ispy = self.is_python(scriptname) - if not ispy: - if not (self.have_fork or self.have_popen2 or self.have_popen3): - self.send_error(403, "CGI script is not a Python script (%r)" % - scriptname) - return - if not self.is_executable(scriptfile): - self.send_error(403, "CGI script is not executable (%r)" % - scriptname) - return - - # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html - # XXX Much of the following could be prepared ahead of time! - env = {} - env['SERVER_SOFTWARE'] = self.version_string() - env['SERVER_NAME'] = self.server.server_name - env['GATEWAY_INTERFACE'] = 'CGI/1.1' - env['SERVER_PROTOCOL'] = self.protocol_version - env['SERVER_PORT'] = str(self.server.server_port) - env['REQUEST_METHOD'] = self.command - uqrest = urllib.unquote(rest) - env['PATH_INFO'] = uqrest - env['PATH_TRANSLATED'] = self.translate_path(uqrest) - env['SCRIPT_NAME'] = scriptname - if query: - env['QUERY_STRING'] = query - host = self.address_string() - if host != self.client_address[0]: - env['REMOTE_HOST'] = host - env['REMOTE_ADDR'] = self.client_address[0] - authorization = self.headers.getheader("authorization") - if authorization: - authorization = authorization.split() - if len(authorization) == 2: - import base64, binascii - env['AUTH_TYPE'] = authorization[0] - if authorization[0].lower() == "basic": - try: - authorization = authorization[1].encode('ascii') - authorization = base64.decodestring(authorization).\ - decode('ascii') - except (binascii.Error, UnicodeError): - pass - else: - authorization = authorization.split(':') - if len(authorization) == 2: - env['REMOTE_USER'] = authorization[0] - # XXX REMOTE_IDENT - if self.headers.typeheader is None: - env['CONTENT_TYPE'] = self.headers.type - else: - env['CONTENT_TYPE'] = self.headers.typeheader - length = self.headers.getheader('content-length') - if length: - env['CONTENT_LENGTH'] = length - referer = self.headers.getheader('referer') - if referer: - env['HTTP_REFERER'] = referer - accept = [] - for line in self.headers.getallmatchingheaders('accept'): - if line[:1] in "\t\n\r ": - accept.append(line.strip()) - else: - accept = accept + line[7:].split(',') - env['HTTP_ACCEPT'] = ','.join(accept) - ua = self.headers.getheader('user-agent') - if ua: - env['HTTP_USER_AGENT'] = ua - co = filter(None, self.headers.getheaders('cookie')) - if co: - env['HTTP_COOKIE'] = ', '.join(co) - # XXX Other HTTP_* headers - # Since we're setting the env in the parent, provide empty - # values to override previously set values - for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', - 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): - env.setdefault(k, "") - os.environ.update(env) - - self.send_response(200, "Script output follows") - - decoded_query = query.replace('+', ' ') - - if self.have_fork: - # Unix -- fork as we should - args = [script] - if '=' not in decoded_query: - args.append(decoded_query) - nobody = nobody_uid() - self.wfile.flush() # Always flush before forking - pid = os.fork() - if pid != 0: - # Parent - pid, sts = os.waitpid(pid, 0) - # throw away additional data [see bug #427345] - while select.select([self.rfile], [], [], 0)[0]: - if not self.rfile.read(1): - break - if sts: - self.log_error("CGI script exit status %#x", sts) - return - # Child - try: - try: - os.setuid(nobody) - except os.error: - pass - os.dup2(self.rfile.fileno(), 0) - os.dup2(self.wfile.fileno(), 1) - os.execve(scriptfile, args, os.environ) - except: - self.server.handle_error(self.request, self.client_address) - os._exit(127) - - elif self.have_popen2 or self.have_popen3: - # Windows -- use popen2 or popen3 to create a subprocess - import shutil - if self.have_popen3: - popenx = os.popen3 - else: - popenx = os.popen2 - cmdline = scriptfile - if self.is_python(scriptfile): - interp = sys.executable - if interp.lower().endswith("w.exe"): - # On Windows, use python.exe, not pythonw.exe - interp = interp[:-5] + interp[-4:] - cmdline = "%s -u %s" % (interp, cmdline) - if '=' not in query and '"' not in query: - cmdline = '%s "%s"' % (cmdline, query) - self.log_message("command: %s", cmdline) - try: - nbytes = int(length) - except (TypeError, ValueError): - nbytes = 0 - files = popenx(cmdline, 'b') - fi = files[0] - fo = files[1] - if self.have_popen3: - fe = files[2] - if self.command.lower() == "post" and nbytes > 0: - data = self.rfile.read(nbytes) - fi.write(data) - # throw away additional data [see bug #427345] - while select.select([self.rfile._sock], [], [], 0)[0]: - if not self.rfile._sock.recv(1): - break - fi.close() - shutil.copyfileobj(fo, self.wfile) - if self.have_popen3: - errors = fe.read() - fe.close() - if errors: - self.log_error('%s', errors) - sts = fo.close() - if sts: - self.log_error("CGI script exit status %#x", sts) - else: - self.log_message("CGI script exited OK") - - else: - # Other O.S. -- execute script in this process - save_argv = sys.argv - save_stdin = sys.stdin - save_stdout = sys.stdout - save_stderr = sys.stderr - try: - save_cwd = os.getcwd() - try: - sys.argv = [scriptfile] - if '=' not in decoded_query: - sys.argv.append(decoded_query) - sys.stdout = self.wfile - sys.stdin = self.rfile - exec(open(scriptfile).read(), {"__name__": "__main__"}) - finally: - sys.argv = save_argv - sys.stdin = save_stdin - sys.stdout = save_stdout - sys.stderr = save_stderr - os.chdir(save_cwd) - except SystemExit as sts: - self.log_error("CGI script exit status %s", str(sts)) - else: - self.log_message("CGI script exited OK") - - -nobody = None - -def nobody_uid(): - """Internal routine to get nobody's uid""" - global nobody - if nobody: - return nobody - try: - import pwd - except ImportError: - return -1 - try: - nobody = pwd.getpwnam('nobody')[2] - except KeyError: - nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) - return nobody - - -def executable(path): - """Test for executable file.""" - try: - st = os.stat(path) - except os.error: - return False - return st.st_mode & 0o111 != 0 - - -def test(HandlerClass = CGIHTTPRequestHandler, - ServerClass = BaseHTTPServer.HTTPServer): - SimpleHTTPServer.test(HandlerClass, ServerClass) - - -if __name__ == '__main__': - test() diff --git a/Lib/Cookie.py b/Lib/Cookie.py deleted file mode 100644 index 83f30d5..0000000 --- a/Lib/Cookie.py +++ /dev/null @@ -1,733 +0,0 @@ -#!/usr/bin/env python -# - -#### -# Copyright 2000 by Timothy O'Malley -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software -# and its documentation for any purpose and without fee is hereby -# granted, provided that the above copyright notice appear in all -# copies and that both that copyright notice and this permission -# notice appear in supporting documentation, and that the name of -# Timothy O'Malley not be used in advertising or publicity -# pertaining to distribution of the software without specific, written -# prior permission. -# -# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS -# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR -# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS -# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -# PERFORMANCE OF THIS SOFTWARE. -# -#### -# -# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp -# by Timothy O'Malley -# -# Cookie.py is a Python module for the handling of HTTP -# cookies as a Python dictionary. See RFC 2109 for more -# information on cookies. -# -# The original idea to treat Cookies as a dictionary came from -# Dave Mitchell (davem@magnet.com) in 1995, when he released the -# first version of nscookie.py. -# -#### - -r""" -Here's a sample session to show how to use this module. -At the moment, this is the only documentation. - -The Basics ----------- - -Importing is easy.. - - >>> import Cookie - -Most of the time you start by creating a cookie. Cookies come in -three flavors, each with slightly different encoding semantics, but -more on that later. - - >>> C = Cookie.SimpleCookie() - >>> C = Cookie.SerialCookie() - >>> C = Cookie.SmartCookie() - -[Note: Long-time users of Cookie.py will remember using -Cookie.Cookie() to create an Cookie object. Although deprecated, it -is still supported by the code. See the Backward Compatibility notes -for more information.] - -Once you've created your Cookie, you can add values just as if it were -a dictionary. - - >>> C = Cookie.SmartCookie() - >>> C["fig"] = "newton" - >>> C["sugar"] = "wafer" - >>> C.output() - 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' - -Notice that the printable representation of a Cookie is the -appropriate format for a Set-Cookie: header. This is the -default behavior. You can change the header and printed -attributes by using the .output() function - - >>> C = Cookie.SmartCookie() - >>> C["rocky"] = "road" - >>> C["rocky"]["path"] = "/cookie" - >>> print(C.output(header="Cookie:")) - Cookie: rocky=road; Path=/cookie - >>> print(C.output(attrs=[], header="Cookie:")) - Cookie: rocky=road - -The load() method of a Cookie extracts cookies from a string. In a -CGI script, you would use this method to extract the cookies from the -HTTP_COOKIE environment variable. - - >>> C = Cookie.SmartCookie() - >>> C.load("chips=ahoy; vienna=finger") - >>> C.output() - 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' - -The load() method is darn-tootin smart about identifying cookies -within a string. Escaped quotation marks, nested semicolons, and other -such trickeries do not confuse it. - - >>> C = Cookie.SmartCookie() - >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') - >>> print(C) - Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" - -Each element of the Cookie also supports all of the RFC 2109 -Cookie attributes. Here's an example which sets the Path -attribute. - - >>> C = Cookie.SmartCookie() - >>> C["oreo"] = "doublestuff" - >>> C["oreo"]["path"] = "/" - >>> print(C) - Set-Cookie: oreo=doublestuff; Path=/ - -Each dictionary element has a 'value' attribute, which gives you -back the value associated with the key. - - >>> C = Cookie.SmartCookie() - >>> C["twix"] = "none for you" - >>> C["twix"].value - 'none for you' - - -A Bit More Advanced -------------------- - -As mentioned before, there are three different flavors of Cookie -objects, each with different encoding/decoding semantics. This -section briefly discusses the differences. - -SimpleCookie - -The SimpleCookie expects that all values should be standard strings. -Just to be sure, SimpleCookie invokes the str() builtin to convert -the value to a string, when the values are set dictionary-style. - - >>> C = Cookie.SimpleCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - '7' - >>> C["string"].value - 'seven' - >>> C.output() - 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' - - -SerialCookie - -The SerialCookie expects that all values should be serialized using -pickle. As a result of serializing, SerialCookie can save almost any -Python object to a value, and recover the exact same object when the -cookie has been returned. (SerialCookie can yield some -strange-looking cookie values, however.) - - >>> C = Cookie.SerialCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - 7 - >>> C["string"].value - 'seven' - >>> C.output() - 'Set-Cookie: number="L7\\012."\r\nSet-Cookie: string="Vseven\\012p0\\012."' - -Be warned, however, if SerialCookie cannot de-serialize a value (because -it isn't a valid pickle'd object), IT WILL RAISE AN EXCEPTION. - - -SmartCookie - -The SmartCookie combines aspects of each of the other two flavors. -When setting a value in a dictionary-fashion, the SmartCookie will -serialize (ala pickle) the value *if and only if* it isn't a -Python string. String objects are *not* serialized. Similarly, -when the load() method parses out values, it attempts to de-serialize -the value. If it fails, then it fallsback to treating the value -as a string. - - >>> C = Cookie.SmartCookie() - >>> C["number"] = 7 - >>> C["string"] = "seven" - >>> C["number"].value - 7 - >>> C["string"].value - 'seven' - >>> C.output() - 'Set-Cookie: number="L7\\012."\r\nSet-Cookie: string=seven' - - -Backwards Compatibility ------------------------ - -In order to keep compatibilty with earlier versions of Cookie.py, -it is still possible to use Cookie.Cookie() to create a Cookie. In -fact, this simply returns a SmartCookie. - - >>> C = Cookie.Cookie() - >>> print(C.__class__.__name__) - SmartCookie - - -Finis. -""" #" -# ^ -# |----helps out font-lock - -# -# Import our required modules -# -import string - -from pickle import dumps, loads - -import re, warnings - -__all__ = ["CookieError","BaseCookie","SimpleCookie","SerialCookie", - "SmartCookie","Cookie"] - -_nulljoin = ''.join -_semispacejoin = '; '.join -_spacejoin = ' '.join - -# -# Define an exception visible to External modules -# -class CookieError(Exception): - pass - - -# These quoting routines conform to the RFC2109 specification, which in -# turn references the character definitions from RFC2068. They provide -# a two-way quoting algorithm. Any non-text character is translated -# into a 4 character sequence: a forward-slash followed by the -# three-digit octal equivalent of the character. Any '\' or '"' is -# quoted with a preceeding '\' slash. -# -# These are taken from RFC2068 and RFC2109. -# _LegalChars is the list of chars which don't require "'s -# _Translator hash-table for fast quoting -# -_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~" -_Translator = { - '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', - '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', - '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', - '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', - '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', - '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', - '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', - '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', - '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', - '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', - '\036' : '\\036', '\037' : '\\037', - - '"' : '\\"', '\\' : '\\\\', - - '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', - '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', - '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', - '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', - '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', - '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', - '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', - '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', - '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', - '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', - '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', - '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', - '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', - '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', - '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', - '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', - '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', - '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', - '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', - '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', - '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', - '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', - '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', - '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', - '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', - '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', - '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', - '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', - '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', - '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', - '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', - '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', - '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', - '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', - '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', - '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', - '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', - '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', - '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', - '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', - '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', - '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', - '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' - } - -def _quote(str, LegalChars=_LegalChars): - # - # If the string does not need to be double-quoted, - # then just return the string. Otherwise, surround - # the string in doublequotes and precede quote (with a \) - # special characters. - # - if all(c in LegalChars for c in str): - return str - else: - return '"' + _nulljoin( map(_Translator.get, str, str) ) + '"' -# end _quote - - -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") - -def _unquote(str): - # If there aren't any doublequotes, - # then there can't be any special characters. See RFC 2109. - if len(str) < 2: - return str - if str[0] != '"' or str[-1] != '"': - return str - - # We have to assume that we must decode this string. - # Down to work. - - # Remove the "s - str = str[1:-1] - - # Check for special sequences. Examples: - # \012 --> \n - # \" --> " - # - i = 0 - n = len(str) - res = [] - while 0 <= i < n: - Omatch = _OctalPatt.search(str, i) - Qmatch = _QuotePatt.search(str, i) - if not Omatch and not Qmatch: # Neither matched - res.append(str[i:]) - break - # else: - j = k = -1 - if Omatch: j = Omatch.start(0) - if Qmatch: k = Qmatch.start(0) - if Qmatch and ( not Omatch or k < j ): # QuotePatt matched - res.append(str[i:k]) - res.append(str[k+1]) - i = k+2 - else: # OctalPatt matched - res.append(str[i:j]) - res.append( chr( int(str[j+1:j+4], 8) ) ) - i = j+4 - return _nulljoin(res) -# end _unquote - -# The _getdate() routine is used to set the expiration time in -# the cookie's HTTP header. By default, _getdate() returns the -# current time in the appropriate "expires" format for a -# Set-Cookie header. The one optional argument is an offset from -# now, in seconds. For example, an offset of -3600 means "one hour ago". -# The offset may be a floating point number. -# - -_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - -_monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - -def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): - from time import gmtime, time - now = time() - year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) - return "%s, %02d-%3s-%4d %02d:%02d:%02d GMT" % \ - (weekdayname[wd], day, monthname[month], year, hh, mm, ss) - - -# -# A class to hold ONE key,value pair. -# In a cookie, each such pair may have several attributes. -# so this class is used to keep the attributes associated -# with the appropriate key,value pair. -# This class also includes a coded_value attribute, which -# is used to hold the network representation of the -# value. This is most useful when Python objects are -# pickled for network transit. -# - -class Morsel(dict): - # RFC 2109 lists these attributes as reserved: - # path comment domain - # max-age secure version - # - # For historical reasons, these attributes are also reserved: - # expires - # - # This dictionary provides a mapping from the lowercase - # variant on the left to the appropriate traditional - # formatting on the right. - _reserved = { "expires" : "expires", - "path" : "Path", - "comment" : "Comment", - "domain" : "Domain", - "max-age" : "Max-Age", - "secure" : "secure", - "version" : "Version", - } - - def __init__(self): - # Set defaults - self.key = self.value = self.coded_value = None - - # Set default attributes - for K in self._reserved: - dict.__setitem__(self, K, "") - # end __init__ - - def __setitem__(self, K, V): - K = K.lower() - if not K in self._reserved: - raise CookieError("Invalid Attribute %s" % K) - dict.__setitem__(self, K, V) - # end __setitem__ - - def isReservedKey(self, K): - return K.lower() in self._reserved - # end isReservedKey - - def set(self, key, val, coded_val, LegalChars=_LegalChars): - # First we verify that the key isn't a reserved word - # Second we make sure it only contains legal characters - if key.lower() in self._reserved: - raise CookieError("Attempt to set a reserved key: %s" % key) - if any(c not in LegalChars for c in key): - raise CookieError("Illegal key value: %s" % key) - - # It's a good key, so save it. - self.key = key - self.value = val - self.coded_value = coded_val - # end set - - def output(self, attrs=None, header = "Set-Cookie:"): - return "%s %s" % ( header, self.OutputString(attrs) ) - - __str__ = output - - def __repr__(self): - return '<%s: %s=%s>' % (self.__class__.__name__, - self.key, repr(self.value) ) - - def js_output(self, attrs=None): - # Print javascript - return """ - - """ % ( self.OutputString(attrs), ) - # end js_output() - - def OutputString(self, attrs=None): - # Build up our result - # - result = [] - RA = result.append - - # First, the key=value pair - RA("%s=%s" % (self.key, self.coded_value)) - - # Now add any defined attributes - if attrs is None: - attrs = self._reserved - items = sorted(self.items()) - for K,V in items: - if V == "": continue - if K not in attrs: continue - if K == "expires" and type(V) == type(1): - RA("%s=%s" % (self._reserved[K], _getdate(V))) - elif K == "max-age" and type(V) == type(1): - RA("%s=%d" % (self._reserved[K], V)) - elif K == "secure": - RA(str(self._reserved[K])) - else: - RA("%s=%s" % (self._reserved[K], V)) - - # Return the result - return _semispacejoin(result) - # end OutputString -# end Morsel class - - - -# -# Pattern for finding cookie -# -# This used to be strict parsing based on the RFC2109 and RFC2068 -# specifications. I have since discovered that MSIE 3.0x doesn't -# follow the character rules outlined in those specs. As a -# result, the parsing rules here are less strict. -# - -_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" -_CookiePattern = re.compile( - r"(?x)" # This is a Verbose pattern - r"(?P" # Start of group 'key' - ""+ _LegalCharsPatt +"+?" # Any word of at least one letter, nongreedy - r")" # End of group 'key' - r"\s*=\s*" # Equal Sign - r"(?P" # Start of group 'val' - r'"(?:[^\\"]|\\.)*"' # Any doublequoted string - r"|" # or - ""+ _LegalCharsPatt +"*" # Any word or empty string - r")" # End of group 'val' - r"\s*;?" # Probably ending in a semi-colon - ) - - -# At long last, here is the cookie class. -# Using this class is almost just like using a dictionary. -# See this module's docstring for example usage. -# -class BaseCookie(dict): - # A container class for a set of Morsels - # - - def value_decode(self, val): - """real_value, coded_value = value_decode(STRING) - Called prior to setting a cookie's value from the network - representation. The VALUE is the value read from HTTP - header. - Override this function to modify the behavior of cookies. - """ - return val, val - # end value_encode - - def value_encode(self, val): - """real_value, coded_value = value_encode(VALUE) - Called prior to setting a cookie's value from the dictionary - representation. The VALUE is the value being assigned. - Override this function to modify the behavior of cookies. - """ - strval = str(val) - return strval, strval - # end value_encode - - def __init__(self, input=None): - if input: self.load(input) - # end __init__ - - def __set(self, key, real_value, coded_value): - """Private method for setting a cookie's value""" - M = self.get(key, Morsel()) - M.set(key, real_value, coded_value) - dict.__setitem__(self, key, M) - # end __set - - def __setitem__(self, key, value): - """Dictionary style assignment.""" - rval, cval = self.value_encode(value) - self.__set(key, rval, cval) - # end __setitem__ - - def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): - """Return a string suitable for HTTP.""" - result = [] - items = sorted(self.items()) - for K,V in items: - result.append( V.output(attrs, header) ) - return sep.join(result) - # end output - - __str__ = output - - def __repr__(self): - L = [] - items = sorted(self.items()) - for K,V in items: - L.append( '%s=%s' % (K,repr(V.value) ) ) - return '<%s: %s>' % (self.__class__.__name__, _spacejoin(L)) - - def js_output(self, attrs=None): - """Return a string suitable for JavaScript.""" - result = [] - items = sorted(self.items()) - for K,V in items: - result.append( V.js_output(attrs) ) - return _nulljoin(result) - # end js_output - - def load(self, rawdata): - """Load cookies from a string (presumably HTTP_COOKIE) or - from a dictionary. Loading cookies from a dictionary 'd' - is equivalent to calling: - map(Cookie.__setitem__, d.keys(), d.values()) - """ - if type(rawdata) == type(""): - self.__ParseString(rawdata) - else: - self.update(rawdata) - return - # end load() - - def __ParseString(self, str, patt=_CookiePattern): - i = 0 # Our starting point - n = len(str) # Length of string - M = None # current morsel - - while 0 <= i < n: - # Start looking for a cookie - match = patt.search(str, i) - if not match: break # No more cookies - - K,V = match.group("key"), match.group("val") - i = match.end(0) - - # Parse the key, value in case it's metainfo - if K[0] == "$": - # We ignore attributes which pertain to the cookie - # mechanism as a whole. See RFC 2109. - # (Does anyone care?) - if M: - M[ K[1:] ] = V - elif K.lower() in Morsel._reserved: - if M: - M[ K ] = _unquote(V) - else: - rval, cval = self.value_decode(V) - self.__set(K, rval, cval) - M = self[K] - # end __ParseString -# end BaseCookie class - -class SimpleCookie(BaseCookie): - """SimpleCookie - SimpleCookie supports strings as cookie values. When setting - the value using the dictionary assignment notation, SimpleCookie - calls the builtin str() to convert the value to a string. Values - received from HTTP are kept as strings. - """ - def value_decode(self, val): - return _unquote( val ), val - def value_encode(self, val): - strval = str(val) - return strval, _quote( strval ) -# end SimpleCookie - -class SerialCookie(BaseCookie): - """SerialCookie - SerialCookie supports arbitrary objects as cookie values. All - values are serialized (using pickle) before being sent to the - client. All incoming values are assumed to be valid Pickle - representations. IF AN INCOMING VALUE IS NOT IN A VALID PICKLE - FORMAT, THEN AN EXCEPTION WILL BE RAISED. - - Note: Large cookie values add overhead because they must be - retransmitted on every HTTP transaction. - - Note: HTTP has a 2k limit on the size of a cookie. This class - does not check for this limit, so be careful!!! - """ - def __init__(self, input=None): - warnings.warn("SerialCookie class is insecure; do not use it", - DeprecationWarning) - BaseCookie.__init__(self, input) - # end __init__ - def value_decode(self, val): - # This could raise an exception! - return loads( _unquote(val).encode('latin-1') ), val - def value_encode(self, val): - return val, _quote( dumps(val, 0).decode('latin-1') ) -# end SerialCookie - -class SmartCookie(BaseCookie): - """SmartCookie - SmartCookie supports arbitrary objects as cookie values. If the - object is a string, then it is quoted. If the object is not a - string, however, then SmartCookie will use pickle to serialize - the object into a string representation. - - Note: Large cookie values add overhead because they must be - retransmitted on every HTTP transaction. - - Note: HTTP has a 2k limit on the size of a cookie. This class - does not check for this limit, so be careful!!! - """ - def __init__(self, input=None): - warnings.warn("Cookie/SmartCookie class is insecure; do not use it", - DeprecationWarning) - BaseCookie.__init__(self, input) - # end __init__ - def value_decode(self, val): - strval = _unquote(val) - try: - return loads(strval.encode('latin-1')), val - except: - return strval, val - def value_encode(self, val): - if isinstance(val, str): - return val, _quote(val) - else: - return val, _quote( dumps(val, 0).decode('latin-1') ) -# end SmartCookie - - -########################################################### -# Backwards Compatibility: Don't break any existing code! - -# We provide Cookie() as an alias for SmartCookie() -Cookie = SmartCookie - -# -########################################################### - -def _test(): - import doctest, Cookie - return doctest.testmod(Cookie) - -if __name__ == "__main__": - _test() - - -#Local Variables: -#tab-width: 4 -#end: diff --git a/Lib/SimpleHTTPServer.py b/Lib/SimpleHTTPServer.py deleted file mode 100644 index 36a249c..0000000 --- a/Lib/SimpleHTTPServer.py +++ /dev/null @@ -1,216 +0,0 @@ -"""Simple HTTP Server. - -This module builds on BaseHTTPServer by implementing the standard GET -and HEAD requests in a fairly straightforward manner. - -""" - - -__version__ = "0.6" - -__all__ = ["SimpleHTTPRequestHandler"] - -import os -import sys -import posixpath -import BaseHTTPServer -import urllib -import cgi -import shutil -import mimetypes -from io import BytesIO - - -class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): - - """Simple HTTP request handler with GET and HEAD commands. - - This serves files from the current directory and any of its - subdirectories. The MIME type for files is determined by - calling the .guess_type() method. - - The GET and HEAD requests are identical except that the HEAD - request omits the actual contents of the file. - - """ - - server_version = "SimpleHTTP/" + __version__ - - def do_GET(self): - """Serve a GET request.""" - f = self.send_head() - if f: - self.copyfile(f, self.wfile) - f.close() - - def do_HEAD(self): - """Serve a HEAD request.""" - f = self.send_head() - if f: - f.close() - - def send_head(self): - """Common code for GET and HEAD commands. - - This sends the response code and MIME headers. - - Return value is either a file object (which has to be copied - to the outputfile by the caller unless the command was HEAD, - and must be closed by the caller under all circumstances), or - None, in which case the caller has nothing further to do. - - """ - path = self.translate_path(self.path) - f = None - if os.path.isdir(path): - if not self.path.endswith('/'): - # redirect browser - doing basically what apache does - self.send_response(301) - self.send_header("Location", self.path + "/") - self.end_headers() - return None - for index in "index.html", "index.htm": - index = os.path.join(path, index) - if os.path.exists(index): - path = index - break - else: - return self.list_directory(path) - ctype = self.guess_type(path) - try: - f = open(path, 'rb') - except IOError: - self.send_error(404, "File not found") - return None - self.send_response(200) - self.send_header("Content-type", ctype) - fs = os.fstat(f.fileno()) - self.send_header("Content-Length", str(fs[6])) - self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) - self.end_headers() - return f - - def list_directory(self, path): - """Helper to produce a directory listing (absent index.html). - - Return value is either a file object, or None (indicating an - error). In either case, the headers are sent, making the - interface the same as for send_head(). - - """ - try: - list = os.listdir(path) - except os.error: - self.send_error(404, "No permission to list directory") - return None - list.sort(key=lambda a: a.lower()) - r = [] - displaypath = cgi.escape(urllib.unquote(self.path)) - r.append('') - r.append("\nDirectory listing for %s\n" % displaypath) - r.append("\n

Directory listing for %s

\n" % displaypath) - r.append("
\n
    \n") - for name in list: - fullname = os.path.join(path, name) - displayname = linkname = name - # Append / for directories or @ for symbolic links - if os.path.isdir(fullname): - displayname = name + "/" - linkname = name + "/" - if os.path.islink(fullname): - displayname = name + "@" - # Note: a link to a directory displays with @ and links with / - r.append('
  • %s\n' - % (urllib.quote(linkname), cgi.escape(displayname))) - r.append("
\n
\n\n\n") - enc = sys.getfilesystemencoding() - encoded = ''.join(r).encode(enc) - f = BytesIO() - f.write(encoded) - f.seek(0) - self.send_response(200) - self.send_header("Content-type", "text/html; charset=%s" % enc) - self.send_header("Content-Length", str(len(encoded))) - self.end_headers() - return f - - def translate_path(self, path): - """Translate a /-separated PATH to the local filename syntax. - - Components that mean special things to the local file system - (e.g. drive or directory names) are ignored. (XXX They should - probably be diagnosed.) - - """ - # abandon query parameters - path = path.split('?',1)[0] - path = path.split('#',1)[0] - path = posixpath.normpath(urllib.unquote(path)) - words = path.split('/') - words = filter(None, words) - path = os.getcwd() - for word in words: - drive, word = os.path.splitdrive(word) - head, word = os.path.split(word) - if word in (os.curdir, os.pardir): continue - path = os.path.join(path, word) - return path - - def copyfile(self, source, outputfile): - """Copy all data between two file objects. - - The SOURCE argument is a file object open for reading - (or anything with a read() method) and the DESTINATION - argument is a file object open for writing (or - anything with a write() method). - - The only reason for overriding this would be to change - the block size or perhaps to replace newlines by CRLF - -- note however that this the default server uses this - to copy binary data as well. - - """ - shutil.copyfileobj(source, outputfile) - - def guess_type(self, path): - """Guess the type of a file. - - Argument is a PATH (a filename). - - Return value is a string of the form type/subtype, - usable for a MIME Content-type header. - - The default implementation looks the file's extension - up in the table self.extensions_map, using application/octet-stream - as a default; however it would be permissible (if - slow) to look inside the data to make a better guess. - - """ - - base, ext = posixpath.splitext(path) - if ext in self.extensions_map: - return self.extensions_map[ext] - ext = ext.lower() - if ext in self.extensions_map: - return self.extensions_map[ext] - else: - return self.extensions_map[''] - - if not mimetypes.inited: - mimetypes.init() # try to read system mime.types - extensions_map = mimetypes.types_map.copy() - extensions_map.update({ - '': 'application/octet-stream', # Default - '.py': 'text/plain', - '.c': 'text/plain', - '.h': 'text/plain', - }) - - -def test(HandlerClass = SimpleHTTPRequestHandler, - ServerClass = BaseHTTPServer.HTTPServer): - BaseHTTPServer.test(HandlerClass, ServerClass) - - -if __name__ == '__main__': - test() diff --git a/Lib/_LWPCookieJar.py b/Lib/_LWPCookieJar.py index 6720958..641744b 100644 --- a/Lib/_LWPCookieJar.py +++ b/Lib/_LWPCookieJar.py @@ -12,10 +12,10 @@ libwww-perl, I hope. """ import time, re -from cookielib import (_warn_unhandled_exception, FileCookieJar, LoadError, - Cookie, MISSING_FILENAME_TEXT, - join_header_words, split_header_words, - iso2time, time2isoz) +from http.cookiejar import (_warn_unhandled_exception, FileCookieJar, LoadError, + Cookie, MISSING_FILENAME_TEXT, + join_header_words, split_header_words, + iso2time, time2isoz) def lwp_cookie_str(cookie): """Return string representation of Cookie in an the LWP cookie file format. diff --git a/Lib/_MozillaCookieJar.py b/Lib/_MozillaCookieJar.py index 4fd6de3..a376ce0 100644 --- a/Lib/_MozillaCookieJar.py +++ b/Lib/_MozillaCookieJar.py @@ -2,8 +2,8 @@ import re, time -from cookielib import (_warn_unhandled_exception, FileCookieJar, LoadError, - Cookie, MISSING_FILENAME_TEXT) +from http.cookiejar import (_warn_unhandled_exception, FileCookieJar, LoadError, + Cookie, MISSING_FILENAME_TEXT) class MozillaCookieJar(FileCookieJar): """ @@ -73,7 +73,7 @@ class MozillaCookieJar(FileCookieJar): domain_specified = (domain_specified == "TRUE") if name == "": # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas cookielib regards it as a + # with no name, whereas http.cookiejar regards it as a # cookie with no value. name = value value = None @@ -134,7 +134,7 @@ class MozillaCookieJar(FileCookieJar): expires = "" if cookie.value is None: # cookies.txt regards 'Set-Cookie: foo' as a cookie - # with no name, whereas cookielib regards it as a + # with no name, whereas http.cookiejar regards it as a # cookie with no value. name = "" value = cookie.name diff --git a/Lib/cookielib.py b/Lib/cookielib.py deleted file mode 100644 index 6c59390..0000000 --- a/Lib/cookielib.py +++ /dev/null @@ -1,1785 +0,0 @@ -"""HTTP cookie handling for web clients. - -This module has (now fairly distant) origins in Gisle Aas' Perl module -HTTP::Cookies, from the libwww-perl library. - -Docstrings, comments and debug strings in this code refer to the -attributes of the HTTP cookie system as cookie-attributes, to distinguish -them clearly from Python attributes. - -Class diagram (note that BSDDBCookieJar and the MSIE* classes are not -distributed with the Python standard library, but are available from -http://wwwsearch.sf.net/): - - CookieJar____ - / \ \ - FileCookieJar \ \ - / | \ \ \ - MozillaCookieJar | LWPCookieJar \ \ - | | \ - | ---MSIEBase | \ - | / | | \ - | / MSIEDBCookieJar BSDDBCookieJar - |/ - MSIECookieJar - -""" - -__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', - 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] - -import re, urlparse, copy, time, urllib -try: - import threading as _threading -except ImportError: - import dummy_threading as _threading -import httplib # only for the default HTTP port -from calendar import timegm - -debug = False # set to True to enable debugging via the logging module -logger = None - -def _debug(*args): - if not debug: - return - global logger - if not logger: - import logging - logger = logging.getLogger("cookielib") - return logger.debug(*args) - - -DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT) -MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " - "instance initialised with one)") - -def _warn_unhandled_exception(): - # There are a few catch-all except: statements in this module, for - # catching input that's bad in unexpected ways. Warn if any - # exceptions are caught there. - import io, warnings, traceback - f = io.StringIO() - traceback.print_exc(None, f) - msg = f.getvalue() - warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2) - - -# Date/time conversion -# ----------------------------------------------------------------------------- - -EPOCH_YEAR = 1970 -def _timegm(tt): - year, month, mday, hour, min, sec = tt[:6] - if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and - (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): - return timegm(tt) - else: - return None - -DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] -MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] -MONTHS_LOWER = [] -for month in MONTHS: MONTHS_LOWER.append(month.lower()) - -def time2isoz(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", - representing Universal Time (UTC, aka GMT). An example of this format is: - - 1994-11-24 08:49:37Z - - """ - if t is None: t = time.time() - year, mon, mday, hour, min, sec = time.gmtime(t)[:6] - return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( - year, mon, mday, hour, min, sec) - -def time2netscape(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like this: - - Wed, DD-Mon-YYYY HH:MM:SS GMT - - """ - if t is None: t = time.time() - year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] - return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( - DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) - - -UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} - -TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") -def offset_from_tz_string(tz): - offset = None - if tz in UTC_ZONES: - offset = 0 - else: - m = TIMEZONE_RE.search(tz) - if m: - offset = 3600 * int(m.group(2)) - if m.group(3): - offset = offset + 60 * int(m.group(3)) - if m.group(1) == '-': - offset = -offset - return offset - -def _str2time(day, mon, yr, hr, min, sec, tz): - # translate month name to number - # month numbers start with 1 (January) - try: - mon = MONTHS_LOWER.index(mon.lower())+1 - except ValueError: - # maybe it's already a number - try: - imon = int(mon) - except ValueError: - return None - if 1 <= imon <= 12: - mon = imon - else: - return None - - # make sure clock elements are defined - if hr is None: hr = 0 - if min is None: min = 0 - if sec is None: sec = 0 - - yr = int(yr) - day = int(day) - hr = int(hr) - min = int(min) - sec = int(sec) - - if yr < 1000: - # find "obvious" year - cur_yr = time.localtime(time.time())[0] - m = cur_yr % 100 - tmp = yr - yr = yr + cur_yr - m - m = m - tmp - if abs(m) > 50: - if m > 0: yr = yr + 100 - else: yr = yr - 100 - - # convert UTC time tuple to seconds since epoch (not timezone-adjusted) - t = _timegm((yr, mon, day, hr, min, sec, tz)) - - if t is not None: - # adjust time using timezone string, to get absolute time since epoch - if tz is None: - tz = "UTC" - tz = tz.upper() - offset = offset_from_tz_string(tz) - if offset is None: - return None - t = t - offset - - return t - -STRICT_DATE_RE = re.compile( - r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " - "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") -WEEKDAY_RE = re.compile( - r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) -LOOSE_HTTP_DATE_RE = re.compile( - r"""^ - (\d\d?) # day - (?:\s+|[-\/]) - (\w+) # month - (?:\s+|[-\/]) - (\d+) # year - (?: - (?:\s+|:) # separator before clock - (\d\d?):(\d\d) # hour:min - (?::(\d\d))? # optional seconds - )? # optional clock - \s* - ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone - \s* - (?:\(\w+\))? # ASCII representation of timezone in parens. - \s*$""", re.X) -def http2time(text): - """Returns time in seconds since epoch of time represented by a string. - - Return value is an integer. - - None is returned if the format of str is unrecognized, the time is outside - the representable range, or the timezone string is not recognized. If the - string contains no timezone, UTC is assumed. - - The timezone in the string may be numerical (like "-0800" or "+0100") or a - string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the - timezone strings equivalent to UTC (zero offset) are known to the function. - - The function loosely parses the following formats: - - Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format - Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format - Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format - 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) - 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) - 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) - - The parser ignores leading and trailing whitespace. The time may be - absent. - - If the year is given with only 2 digits, the function will select the - century that makes the year closest to the current date. - - """ - # fast exit for strictly conforming string - m = STRICT_DATE_RE.search(text) - if m: - g = m.groups() - mon = MONTHS_LOWER.index(g[1].lower()) + 1 - tt = (int(g[2]), mon, int(g[0]), - int(g[3]), int(g[4]), float(g[5])) - return _timegm(tt) - - # No, we need some messy parsing... - - # clean up - text = text.lstrip() - text = WEEKDAY_RE.sub("", text, 1) # Useless weekday - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = LOOSE_HTTP_DATE_RE.search(text) - if m is not None: - day, mon, yr, hr, min, sec, tz = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - -ISO_DATE_RE = re.compile( - """^ - (\d{4}) # year - [-\/]? - (\d\d?) # numerical month - [-\/]? - (\d\d?) # day - (?: - (?:\s+|[-:Tt]) # separator before clock - (\d\d?):?(\d\d) # hour:min - (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) - )? # optional clock - \s* - ([-+]?\d\d?:?(:?\d\d)? - |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) - \s*$""", re.X) -def iso2time(text): - """ - As for http2time, but parses the ISO 8601 formats: - - 1994-02-03 14:15:29 -0100 -- ISO 8601 format - 1994-02-03 14:15:29 -- zone is optional - 1994-02-03 -- only date - 1994-02-03T14:15:29 -- Use T as separator - 19940203T141529Z -- ISO 8601 compact format - 19940203 -- only date - - """ - # clean up - text = text.lstrip() - - # tz is time zone specifier string - day, mon, yr, hr, min, sec, tz = [None]*7 - - # loose regexp parse - m = ISO_DATE_RE.search(text) - if m is not None: - # XXX there's an extra bit of the timezone I'm ignoring here: is - # this the right thing to do? - yr, mon, day, hr, min, sec, tz, _ = m.groups() - else: - return None # bad format - - return _str2time(day, mon, yr, hr, min, sec, tz) - - -# Header parsing -# ----------------------------------------------------------------------------- - -def unmatched(match): - """Return unmatched part of re.Match object.""" - start, end = match.span(0) - return match.string[:start]+match.string[end:] - -HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") -HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") -HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") -HEADER_ESCAPE_RE = re.compile(r"\\(.)") -def split_header_words(header_values): - r"""Parse header values into a list of lists containing key,value pairs. - - The function knows how to deal with ",", ";" and "=" as well as quoted - values after "=". A list of space separated tokens are parsed as if they - were separated by ";". - - If the header_values passed as argument contains multiple values, then they - are treated as if they were a single value separated by comma ",". - - This means that this function is useful for parsing header fields that - follow this syntax (BNF as from the HTTP/1.1 specification, but we relax - the requirement for tokens). - - headers = #header - header = (token | parameter) *( [";"] (token | parameter)) - - token = 1* - separators = "(" | ")" | "<" | ">" | "@" - | "," | ";" | ":" | "\" | <"> - | "/" | "[" | "]" | "?" | "=" - | "{" | "}" | SP | HT - - quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - qdtext = > - quoted-pair = "\" CHAR - - parameter = attribute "=" value - attribute = token - value = token | quoted-string - - Each header is represented by a list of key/value pairs. The value for a - simple token (not part of a parameter) is None. Syntactically incorrect - headers will not necessarily be parsed as you would want. - - This is easier to describe with some examples: - - >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) - [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] - >>> split_header_words(['text/html; charset="iso-8859-1"']) - [[('text/html', None), ('charset', 'iso-8859-1')]] - >>> split_header_words([r'Basic realm="\"foo\bar\""']) - [[('Basic', None), ('realm', '"foobar"')]] - - """ - assert not isinstance(header_values, str) - result = [] - for text in header_values: - orig_text = text - pairs = [] - while text: - m = HEADER_TOKEN_RE.search(text) - if m: - text = unmatched(m) - name = m.group(1) - m = HEADER_QUOTED_VALUE_RE.search(text) - if m: # quoted value - text = unmatched(m) - value = m.group(1) - value = HEADER_ESCAPE_RE.sub(r"\1", value) - else: - m = HEADER_VALUE_RE.search(text) - if m: # unquoted value - text = unmatched(m) - value = m.group(1) - value = value.rstrip() - else: - # no value, a lone token - value = None - pairs.append((name, value)) - elif text.lstrip().startswith(","): - # concatenated headers, as per RFC 2616 section 4.2 - text = text.lstrip()[1:] - if pairs: result.append(pairs) - pairs = [] - else: - # skip junk - non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) - assert nr_junk_chars > 0, ( - "split_header_words bug: '%s', '%s', %s" % - (orig_text, text, pairs)) - text = non_junk - if pairs: result.append(pairs) - return result - -HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse (almost) of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) - 'text/plain; charset="iso-8859/1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) - 'text/plain, charset="iso-8859/1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) - -def parse_ns_headers(ns_headers): - """Ad-hoc parser for Netscape protocol cookie-attributes. - - The old Netscape cookie format for Set-Cookie can for instance contain - an unquoted "," in the expires field, so we have to use this ad-hoc - parser instead of split_header_words. - - XXX This may not make the best possible effort to parse all the crap - that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient - parser is probably better, so could do worse than following that if - this ever gives any trouble. - - Currently, this is also used for parsing RFC 2109 cookies. - - """ - known_attrs = ("expires", "domain", "path", "secure", - # RFC 2109 attrs (may turn up in Netscape cookies, too) - "port", "max-age") - - result = [] - for ns_header in ns_headers: - pairs = [] - version_set = False - for ii, param in enumerate(re.split(r";\s*", ns_header)): - param = param.rstrip() - if param == "": continue - if "=" not in param: - k, v = param, None - else: - k, v = re.split(r"\s*=\s*", param, 1) - k = k.lstrip() - if ii != 0: - lc = k.lower() - if lc in known_attrs: - k = lc - if k == "version": - # This is an RFC 2109 cookie. - version_set = True - if k == "expires": - # convert expires date to seconds since epoch - if v.startswith('"'): v = v[1:] - if v.endswith('"'): v = v[:-1] - v = http2time(v) # None if invalid - pairs.append((k, v)) - - if pairs: - if not version_set: - pairs.append(("version", "0")) - result.append(pairs) - - return result - - -IPV4_RE = re.compile(r"\.\d+$") -def is_HDN(text): - """Return True if text is a host domain name.""" - # XXX - # This may well be wrong. Which RFC is HDN defined in, if any (for - # the purposes of RFC 2965)? - # For the current implementation, what about IPv6? Remember to look - # at other uses of IPV4_RE also, if change this. - if IPV4_RE.search(text): - return False - if text == "": - return False - if text[0] == "." or text[-1] == ".": - return False - return True - -def domain_match(A, B): - """Return True if domain A domain-matches domain B, according to RFC 2965. - - A and B may be host domain names or IP addresses. - - RFC 2965, section 1: - - Host names can be specified either as an IP address or a HDN string. - Sometimes we compare one host name with another. (Such comparisons SHALL - be case-insensitive.) Host A's name domain-matches host B's if - - * their host name strings string-compare equal; or - - * A is a HDN string and has the form NB, where N is a non-empty - name string, B has the form .B', and B' is a HDN string. (So, - x.y.com domain-matches .Y.com but not Y.com.) - - Note that domain-match is not a commutative operation: a.b.c.com - domain-matches .c.com, but not the reverse. - - """ - # Note that, if A or B are IP addresses, the only relevant part of the - # definition of the domain-match algorithm is the direct string-compare. - A = A.lower() - B = B.lower() - if A == B: - return True - if not is_HDN(A): - return False - i = A.rfind(B) - if i == -1 or i == 0: - # A does not have form NB, or N is the empty string - return False - if not B.startswith("."): - return False - if not is_HDN(B[1:]): - return False - return True - -def liberal_is_HDN(text): - """Return True if text is a sort-of-like a host domain name. - - For accepting/blocking domains. - - """ - if IPV4_RE.search(text): - return False - return True - -def user_domain_match(A, B): - """For blocking/accepting domains. - - A and B may be host domain names or IP addresses. - - """ - A = A.lower() - B = B.lower() - if not (liberal_is_HDN(A) and liberal_is_HDN(B)): - if A == B: - # equal IP addresses - return True - return False - initial_dot = B.startswith(".") - if initial_dot and A.endswith(B): - return True - if not initial_dot and A == B: - return True - return False - -cut_port_re = re.compile(r":\d+$") -def request_host(request): - """Return request-host, as defined by RFC 2965. - - Variation from RFC: returned value is lowercased, for convenient - comparison. - - """ - url = request.get_full_url() - host = urlparse.urlparse(url)[1] - if host == "": - host = request.get_header("Host", "") - - # remove port, if present - host = cut_port_re.sub("", host, 1) - return host.lower() - -def eff_request_host(request): - """Return a tuple (request-host, effective request-host name). - - As defined by RFC 2965, except both are lowercased. - - """ - erhn = req_host = request_host(request) - if req_host.find(".") == -1 and not IPV4_RE.search(req_host): - erhn = req_host + ".local" - return req_host, erhn - -def request_path(request): - """request-URI, as defined by RFC 2965.""" - url = request.get_full_url() - #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url) - #req_path = escape_path("".join(urlparse.urlparse(url)[2:])) - path, parameters, query, frag = urlparse.urlparse(url)[2:] - if parameters: - path = "%s;%s" % (path, parameters) - path = escape_path(path) - req_path = urlparse.urlunparse(("", "", path, "", query, frag)) - if not req_path.startswith("/"): - # fix bad RFC 2396 absoluteURI - req_path = "/"+req_path - return req_path - -def request_port(request): - host = request.get_host() - i = host.find(':') - if i >= 0: - port = host[i+1:] - try: - int(port) - except ValueError: - _debug("nonnumeric port: '%s'", port) - return None - else: - port = DEFAULT_HTTP_PORT - return port - -# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't -# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). -HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" -ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") -def uppercase_escaped_char(match): - return "%%%s" % match.group(1).upper() -def escape_path(path): - """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" - # There's no knowing what character encoding was used to create URLs - # containing %-escapes, but since we have to pick one to escape invalid - # path characters, we pick UTF-8, as recommended in the HTML 4.0 - # specification: - # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 - # And here, kind of: draft-fielding-uri-rfc2396bis-03 - # (And in draft IRI specification: draft-duerst-iri-05) - # (And here, for new URI schemes: RFC 2718) - path = urllib.quote(path, HTTP_PATH_SAFE) - path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) - return path - -def reach(h): - """Return reach of host h, as defined by RFC 2965, section 1. - - The reach R of a host name H is defined as follows: - - * If - - - H is the host domain name of a host; and, - - - H has the form A.B; and - - - A has no embedded (that is, interior) dots; and - - - B has at least one embedded dot, or B is the string "local". - then the reach of H is .B. - - * Otherwise, the reach of H is H. - - >>> reach("www.acme.com") - '.acme.com' - >>> reach("acme.com") - 'acme.com' - >>> reach("acme.local") - '.local' - - """ - i = h.find(".") - if i >= 0: - #a = h[:i] # this line is only here to show what a is - b = h[i+1:] - i = b.find(".") - if is_HDN(h) and (i >= 0 or b == "local"): - return "."+b - return h - -def is_third_party(request): - """ - - RFC 2965, section 3.3.6: - - An unverifiable transaction is to a third-party host if its request- - host U does not domain-match the reach R of the request-host O in the - origin transaction. - - """ - req_host = request_host(request) - if not domain_match(req_host, reach(request.get_origin_req_host())): - return True - else: - return False - - -class Cookie: - """HTTP Cookie. - - This class represents both Netscape and RFC 2965 cookies. - - This is deliberately a very simple class. It just holds attributes. It's - possible to construct Cookie instances that don't comply with the cookie - standards. CookieJar.make_cookies is the factory function for Cookie - objects -- it deals with cookie parsing, supplying defaults, and - normalising to the representation used in this class. CookiePolicy is - responsible for checking them to see whether they should be accepted from - and returned to the server. - - Note that the port may be present in the headers, but unspecified ("Port" - rather than"Port=80", for example); if this is the case, port is None. - - """ - - def __init__(self, version, name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest, - rfc2109=False, - ): - - if version is not None: version = int(version) - if expires is not None: expires = int(expires) - if port is None and port_specified is True: - raise ValueError("if port is None, port_specified must be false") - - self.version = version - self.name = name - self.value = value - self.port = port - self.port_specified = port_specified - # normalise case, as per RFC 2965 section 3.3.3 - self.domain = domain.lower() - self.domain_specified = domain_specified - # Sigh. We need to know whether the domain given in the - # cookie-attribute had an initial dot, in order to follow RFC 2965 - # (as clarified in draft errata). Needed for the returned $Domain - # value. - self.domain_initial_dot = domain_initial_dot - self.path = path - self.path_specified = path_specified - self.secure = secure - self.expires = expires - self.discard = discard - self.comment = comment - self.comment_url = comment_url - self.rfc2109 = rfc2109 - - self._rest = copy.copy(rest) - - def has_nonstandard_attr(self, name): - return name in self._rest - def get_nonstandard_attr(self, name, default=None): - return self._rest.get(name, default) - def set_nonstandard_attr(self, name, value): - self._rest[name] = value - - def is_expired(self, now=None): - if now is None: now = time.time() - if (self.expires is not None) and (self.expires <= now): - return True - return False - - def __str__(self): - if self.port is None: p = "" - else: p = ":"+self.port - limit = self.domain + p + self.path - if self.value is not None: - namevalue = "%s=%s" % (self.name, self.value) - else: - namevalue = self.name - return "" % (namevalue, limit) - - def __repr__(self): - args = [] - for name in ("version", "name", "value", - "port", "port_specified", - "domain", "domain_specified", "domain_initial_dot", - "path", "path_specified", - "secure", "expires", "discard", "comment", "comment_url", - ): - attr = getattr(self, name) - args.append("%s=%s" % (name, repr(attr))) - args.append("rest=%s" % repr(self._rest)) - args.append("rfc2109=%s" % repr(self.rfc2109)) - return "Cookie(%s)" % ", ".join(args) - - -class CookiePolicy: - """Defines which cookies get accepted from and returned to server. - - May also modify cookies, though this is probably a bad idea. - - The subclass DefaultCookiePolicy defines the standard rules for Netscape - and RFC 2965 cookies -- override that if you want a customised policy. - - """ - def set_ok(self, cookie, request): - """Return true if (and only if) cookie should be accepted from server. - - Currently, pre-expired cookies never get this far -- the CookieJar - class deletes such cookies itself. - - """ - raise NotImplementedError() - - def return_ok(self, cookie, request): - """Return true if (and only if) cookie should be returned to server.""" - raise NotImplementedError() - - def domain_return_ok(self, domain, request): - """Return false if cookies should not be returned, given cookie domain. - """ - return True - - def path_return_ok(self, path, request): - """Return false if cookies should not be returned, given cookie path. - """ - return True - - -class DefaultCookiePolicy(CookiePolicy): - """Implements the standard rules for accepting and returning cookies.""" - - DomainStrictNoDots = 1 - DomainStrictNonDomain = 2 - DomainRFC2965Match = 4 - - DomainLiberal = 0 - DomainStrict = DomainStrictNoDots|DomainStrictNonDomain - - def __init__(self, - blocked_domains=None, allowed_domains=None, - netscape=True, rfc2965=False, - rfc2109_as_netscape=None, - hide_cookie2=False, - strict_domain=False, - strict_rfc2965_unverifiable=True, - strict_ns_unverifiable=False, - strict_ns_domain=DomainLiberal, - strict_ns_set_initial_dollar=False, - strict_ns_set_path=False, - ): - """Constructor arguments should be passed as keyword arguments only.""" - self.netscape = netscape - self.rfc2965 = rfc2965 - self.rfc2109_as_netscape = rfc2109_as_netscape - self.hide_cookie2 = hide_cookie2 - self.strict_domain = strict_domain - self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable - self.strict_ns_unverifiable = strict_ns_unverifiable - self.strict_ns_domain = strict_ns_domain - self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar - self.strict_ns_set_path = strict_ns_set_path - - if blocked_domains is not None: - self._blocked_domains = tuple(blocked_domains) - else: - self._blocked_domains = () - - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def blocked_domains(self): - """Return the sequence of blocked domains (as a tuple).""" - return self._blocked_domains - def set_blocked_domains(self, blocked_domains): - """Set the sequence of blocked domains.""" - self._blocked_domains = tuple(blocked_domains) - - def is_blocked(self, domain): - for blocked_domain in self._blocked_domains: - if user_domain_match(domain, blocked_domain): - return True - return False - - def allowed_domains(self): - """Return None, or the sequence of allowed domains (as a tuple).""" - return self._allowed_domains - def set_allowed_domains(self, allowed_domains): - """Set the sequence of allowed domains, or None.""" - if allowed_domains is not None: - allowed_domains = tuple(allowed_domains) - self._allowed_domains = allowed_domains - - def is_not_allowed(self, domain): - if self._allowed_domains is None: - return False - for allowed_domain in self._allowed_domains: - if user_domain_match(domain, allowed_domain): - return False - return True - - def set_ok(self, cookie, request): - """ - If you override .set_ok(), be sure to call this method. If it returns - false, so should your subclass (assuming your subclass wants to be more - strict about which cookies to accept). - - """ - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - assert cookie.name is not None - - for n in "version", "verifiability", "name", "path", "domain", "port": - fn_name = "set_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - - return True - - def set_ok_version(self, cookie, request): - if cookie.version is None: - # Version is always set to 0 by parse_ns_headers if it's a Netscape - # cookie, so this must be an invalid RFC 2965 cookie. - _debug(" Set-Cookie2 without version attribute (%s=%s)", - cookie.name, cookie.value) - return False - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def set_ok_verifiability(self, cookie, request): - if request.is_unverifiable() and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during " - "unverifiable transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during " - "unverifiable transaction") - return False - return True - - def set_ok_name(self, cookie, request): - # Try and stop servers setting V0 cookies designed to hack other - # servers that know both V0 and V1 protocols. - if (cookie.version == 0 and self.strict_ns_set_initial_dollar and - cookie.name.startswith("$")): - _debug(" illegal name (starts with '$'): '%s'", cookie.name) - return False - return True - - def set_ok_path(self, cookie, request): - if cookie.path_specified: - req_path = request_path(request) - if ((cookie.version > 0 or - (cookie.version == 0 and self.strict_ns_set_path)) and - not req_path.startswith(cookie.path)): - _debug(" path attribute %s is not a prefix of request " - "path %s", cookie.path, req_path) - return False - return True - - def set_ok_domain(self, cookie, request): - if self.is_blocked(cookie.domain): - _debug(" domain %s is in user block-list", cookie.domain) - return False - if self.is_not_allowed(cookie.domain): - _debug(" domain %s is not in user allow-list", cookie.domain) - return False - if cookie.domain_specified: - req_host, erhn = eff_request_host(request) - domain = cookie.domain - if self.strict_domain and (domain.count(".") >= 2): - # XXX This should probably be compared with the Konqueror - # (kcookiejar.cpp) and Mozilla implementations, but it's a - # losing battle. - i = domain.rfind(".") - j = domain.rfind(".", 0, i) - if j == 0: # domain like .foo.bar - tld = domain[i+1:] - sld = domain[j+1:i] - if sld.lower() in ("co", "ac", "com", "edu", "org", "net", - "gov", "mil", "int", "aero", "biz", "cat", "coop", - "info", "jobs", "mobi", "museum", "name", "pro", - "travel", "eu") and len(tld) == 2: - # domain like .co.uk - _debug(" country-code second level domain %s", domain) - return False - if domain.startswith("."): - undotted_domain = domain[1:] - else: - undotted_domain = domain - embedded_dots = (undotted_domain.find(".") >= 0) - if not embedded_dots and domain != ".local": - _debug(" non-local domain %s contains no embedded dot", - domain) - return False - if cookie.version == 0: - if (not erhn.endswith(domain) and - (not erhn.startswith(".") and - not ("."+erhn).endswith(domain))): - _debug(" effective request-host %s (even with added " - "initial dot) does not end end with %s", - erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainRFC2965Match)): - if not domain_match(erhn, domain): - _debug(" effective request-host %s does not domain-match " - "%s", erhn, domain) - return False - if (cookie.version > 0 or - (self.strict_ns_domain & self.DomainStrictNoDots)): - host_prefix = req_host[:-len(domain)] - if (host_prefix.find(".") >= 0 and - not IPV4_RE.search(req_host)): - _debug(" host prefix %s for domain %s contains a dot", - host_prefix, domain) - return False - return True - - def set_ok_port(self, cookie, request): - if cookie.port_specified: - req_port = request_port(request) - if req_port is None: - req_port = "80" - else: - req_port = str(req_port) - for p in cookie.port.split(","): - try: - int(p) - except ValueError: - _debug(" bad port %s (not numeric)", p) - return False - if p == req_port: - break - else: - _debug(" request port (%s) not found in %s", - req_port, cookie.port) - return False - return True - - def return_ok(self, cookie, request): - """ - If you override .return_ok(), be sure to call this method. If it - returns false, so should your subclass (assuming your subclass wants to - be more strict about which cookies to return). - - """ - # Path has already been checked by .path_return_ok(), and domain - # blocking done by .domain_return_ok(). - _debug(" - checking cookie %s=%s", cookie.name, cookie.value) - - for n in "version", "verifiability", "secure", "expires", "port", "domain": - fn_name = "return_ok_"+n - fn = getattr(self, fn_name) - if not fn(cookie, request): - return False - return True - - def return_ok_version(self, cookie, request): - if cookie.version > 0 and not self.rfc2965: - _debug(" RFC 2965 cookies are switched off") - return False - elif cookie.version == 0 and not self.netscape: - _debug(" Netscape cookies are switched off") - return False - return True - - def return_ok_verifiability(self, cookie, request): - if request.is_unverifiable() and is_third_party(request): - if cookie.version > 0 and self.strict_rfc2965_unverifiable: - _debug(" third-party RFC 2965 cookie during unverifiable " - "transaction") - return False - elif cookie.version == 0 and self.strict_ns_unverifiable: - _debug(" third-party Netscape cookie during unverifiable " - "transaction") - return False - return True - - def return_ok_secure(self, cookie, request): - if cookie.secure and request.get_type() != "https": - _debug(" secure cookie with non-secure request") - return False - return True - - def return_ok_expires(self, cookie, request): - if cookie.is_expired(self._now): - _debug(" cookie expired") - return False - return True - - def return_ok_port(self, cookie, request): - if cookie.port: - req_port = request_port(request) - if req_port is None: - req_port = "80" - for p in cookie.port.split(","): - if p == req_port: - break - else: - _debug(" request port %s does not match cookie port %s", - req_port, cookie.port) - return False - return True - - def return_ok_domain(self, cookie, request): - req_host, erhn = eff_request_host(request) - domain = cookie.domain - - # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't - if (cookie.version == 0 and - (self.strict_ns_domain & self.DomainStrictNonDomain) and - not cookie.domain_specified and domain != erhn): - _debug(" cookie with unspecified domain does not string-compare " - "equal to request domain") - return False - - if cookie.version > 0 and not domain_match(erhn, domain): - _debug(" effective request-host name %s does not domain-match " - "RFC 2965 cookie domain %s", erhn, domain) - return False - if cookie.version == 0 and not ("."+erhn).endswith(domain): - _debug(" request-host %s does not match Netscape cookie domain " - "%s", req_host, domain) - return False - return True - - def domain_return_ok(self, domain, request): - # Liberal check of. This is here as an optimization to avoid - # having to load lots of MSIE cookie files unless necessary. - req_host, erhn = eff_request_host(request) - if not req_host.startswith("."): - req_host = "."+req_host - if not erhn.startswith("."): - erhn = "."+erhn - if not (req_host.endswith(domain) or erhn.endswith(domain)): - #_debug(" request domain %s does not match cookie domain %s", - # req_host, domain) - return False - - if self.is_blocked(domain): - _debug(" domain %s is in user block-list", domain) - return False - if self.is_not_allowed(domain): - _debug(" domain %s is not in user allow-list", domain) - return False - - return True - - def path_return_ok(self, path, request): - _debug("- checking cookie path=%s", path) - req_path = request_path(request) - if not req_path.startswith(path): - _debug(" %s does not path-match %s", req_path, path) - return False - return True - - -def vals_sorted_by_key(adict): - keys = sorted(adict.keys()) - return map(adict.get, keys) - -def deepvalues(mapping): - """Iterates over nested mapping, depth-first, in sorted order by key.""" - values = vals_sorted_by_key(mapping) - for obj in values: - mapping = False - try: - obj.items - except AttributeError: - pass - else: - mapping = True - for subobj in deepvalues(obj): - yield subobj - if not mapping: - yield obj - - -# Used as second parameter to dict.get() method, to distinguish absent -# dict key from one with a None value. -class Absent: pass - -class CookieJar: - """Collection of HTTP cookies. - - You may not need to know about this class: try - urllib2.build_opener(HTTPCookieProcessor).open(url). - - """ - - non_word_re = re.compile(r"\W") - quote_re = re.compile(r"([\"\\])") - strict_domain_re = re.compile(r"\.?[^.]*") - domain_re = re.compile(r"[^.]*") - dots_re = re.compile(r"^\.+") - - magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" - - def __init__(self, policy=None): - if policy is None: - policy = DefaultCookiePolicy() - self._policy = policy - - self._cookies_lock = _threading.RLock() - self._cookies = {} - - def set_policy(self, policy): - self._policy = policy - - def _cookies_for_domain(self, domain, request): - cookies = [] - if not self._policy.domain_return_ok(domain, request): - return [] - _debug("Checking %s for cookies to return", domain) - cookies_by_path = self._cookies[domain] - for path in cookies_by_path.keys(): - if not self._policy.path_return_ok(path, request): - continue - cookies_by_name = cookies_by_path[path] - for cookie in cookies_by_name.values(): - if not self._policy.return_ok(cookie, request): - _debug(" not returning cookie") - continue - _debug(" it's a match") - cookies.append(cookie) - return cookies - - def _cookies_for_request(self, request): - """Return a list of cookies to be returned to server.""" - cookies = [] - for domain in self._cookies.keys(): - cookies.extend(self._cookies_for_domain(domain, request)) - return cookies - - def _cookie_attrs(self, cookies): - """Return a list of cookie-attributes to be returned to server. - - like ['foo="bar"; $Path="/"', ...] - - The $Version attribute is also added when appropriate (currently only - once per request). - - """ - # add cookies in order of most specific (ie. longest) path first - cookies.sort(key=lambda a: len(a.path), reverse=True) - - version_set = False - - attrs = [] - for cookie in cookies: - # set version of Cookie header - # XXX - # What should it be if multiple matching Set-Cookie headers have - # different versions themselves? - # Answer: there is no answer; was supposed to be settled by - # RFC 2965 errata, but that may never appear... - version = cookie.version - if not version_set: - version_set = True - if version > 0: - attrs.append("$Version=%s" % version) - - # quote cookie value if necessary - # (not for Netscape protocol, which already has any quotes - # intact, due to the poorly-specified Netscape Cookie: syntax) - if ((cookie.value is not None) and - self.non_word_re.search(cookie.value) and version > 0): - value = self.quote_re.sub(r"\\\1", cookie.value) - else: - value = cookie.value - - # add cookie-attributes to be returned in Cookie header - if cookie.value is None: - attrs.append(cookie.name) - else: - attrs.append("%s=%s" % (cookie.name, value)) - if version > 0: - if cookie.path_specified: - attrs.append('$Path="%s"' % cookie.path) - if cookie.domain.startswith("."): - domain = cookie.domain - if (not cookie.domain_initial_dot and - domain.startswith(".")): - domain = domain[1:] - attrs.append('$Domain="%s"' % domain) - if cookie.port is not None: - p = "$Port" - if cookie.port_specified: - p = p + ('="%s"' % cookie.port) - attrs.append(p) - - return attrs - - def add_cookie_header(self, request): - """Add correct Cookie: header to request (urllib2.Request object). - - The Cookie2 header is also added unless policy.hide_cookie2 is true. - - """ - _debug("add_cookie_header") - self._cookies_lock.acquire() - try: - - self._policy._now = self._now = int(time.time()) - - cookies = self._cookies_for_request(request) - - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_unredirected_header( - "Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if (self._policy.rfc2965 and not self._policy.hide_cookie2 and - not request.has_header("Cookie2")): - for cookie in cookies: - if cookie.version != 1: - request.add_unredirected_header("Cookie2", '$Version="1"') - break - - finally: - self._cookies_lock.release() - - self.clear_expired_cookies() - - def _normalized_cookie_tuples(self, attrs_set): - """Return list of tuples containing normalised cookie information. - - attrs_set is the list of lists of key,value pairs extracted from - the Set-Cookie or Set-Cookie2 headers. - - Tuples are name, value, standard, rest, where name and value are the - cookie name and value, standard is a dictionary containing the standard - cookie-attributes (discard, secure, version, expires or max-age, - domain, path and port) and rest is a dictionary containing the rest of - the cookie-attributes. - - """ - cookie_tuples = [] - - boolean_attrs = "discard", "secure" - value_attrs = ("version", - "expires", "max-age", - "domain", "path", "port", - "comment", "commenturl") - - for cookie_attrs in attrs_set: - name, value = cookie_attrs[0] - - # Build dictionary of standard cookie-attributes (standard) and - # dictionary of other cookie-attributes (rest). - - # Note: expiry time is normalised to seconds since epoch. V0 - # cookies should have the Expires cookie-attribute, and V1 cookies - # should have Max-Age, but since V1 includes RFC 2109 cookies (and - # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we - # accept either (but prefer Max-Age). - max_age_set = False - - bad_cookie = False - - standard = {} - rest = {} - for k, v in cookie_attrs[1:]: - lc = k.lower() - # don't lose case distinction for unknown fields - if lc in value_attrs or lc in boolean_attrs: - k = lc - if k in boolean_attrs and v is None: - # boolean cookie-attribute is present, but has no value - # (like "discard", rather than "port=80") - v = True - if k in standard: - # only first value is significant - continue - if k == "domain": - if v is None: - _debug(" missing value for domain attribute") - bad_cookie = True - break - # RFC 2965 section 3.3.3 - v = v.lower() - if k == "expires": - if max_age_set: - # Prefer max-age to expires (like Mozilla) - continue - if v is None: - _debug(" missing or invalid value for expires " - "attribute: treating as session cookie") - continue - if k == "max-age": - max_age_set = True - try: - v = int(v) - except ValueError: - _debug(" missing or invalid (non-numeric) value for " - "max-age attribute") - bad_cookie = True - break - # convert RFC 2965 Max-Age to seconds since epoch - # XXX Strictly you're supposed to follow RFC 2616 - # age-calculation rules. Remember that zero Max-Age is a - # is a request to discard (old and new) cookie, though. - k = "expires" - v = self._now + v - if (k in value_attrs) or (k in boolean_attrs): - if (v is None and - k not in ("port", "comment", "commenturl")): - _debug(" missing value for %s attribute" % k) - bad_cookie = True - break - standard[k] = v - else: - rest[k] = v - - if bad_cookie: - continue - - cookie_tuples.append((name, value, standard, rest)) - - return cookie_tuples - - def _cookie_from_cookie_tuple(self, tup, request): - # standard is dict of standard cookie-attributes, rest is dict of the - # rest of them - name, value, standard, rest = tup - - domain = standard.get("domain", Absent) - path = standard.get("path", Absent) - port = standard.get("port", Absent) - expires = standard.get("expires", Absent) - - # set the easy defaults - version = standard.get("version", None) - if version is not None: version = int(version) - secure = standard.get("secure", False) - # (discard is also set if expires is Absent) - discard = standard.get("discard", False) - comment = standard.get("comment", None) - comment_url = standard.get("commenturl", None) - - # set default path - if path is not Absent and path != "": - path_specified = True - path = escape_path(path) - else: - path_specified = False - path = request_path(request) - i = path.rfind("/") - if i != -1: - if version == 0: - # Netscape spec parts company from reality here - path = path[:i] - else: - path = path[:i+1] - if len(path) == 0: path = "/" - - # set default domain - domain_specified = domain is not Absent - # but first we have to remember whether it starts with a dot - domain_initial_dot = False - if domain_specified: - domain_initial_dot = bool(domain.startswith(".")) - if domain is Absent: - req_host, erhn = eff_request_host(request) - domain = erhn - elif not domain.startswith("."): - domain = "."+domain - - # set default port - port_specified = False - if port is not Absent: - if port is None: - # Port attr present, but has no value: default to request port. - # Cookie should then only be sent back on that port. - port = request_port(request) - else: - port_specified = True - port = re.sub(r"\s+", "", port) - else: - # No port attr present. Cookie can be sent back on any port. - port = None - - # set default expires and discard - if expires is Absent: - expires = None - discard = True - elif expires <= self._now: - # Expiry date in past is request to delete cookie. This can't be - # in DefaultCookiePolicy, because can't delete cookies there. - try: - self.clear(domain, path, name) - except KeyError: - pass - _debug("Expiring cookie, domain='%s', path='%s', name='%s'", - domain, path, name) - return None - - return Cookie(version, - name, value, - port, port_specified, - domain, domain_specified, domain_initial_dot, - path, path_specified, - secure, - expires, - discard, - comment, - comment_url, - rest) - - def _cookies_from_attrs_set(self, attrs_set, request): - cookie_tuples = self._normalized_cookie_tuples(attrs_set) - - cookies = [] - for tup in cookie_tuples: - cookie = self._cookie_from_cookie_tuple(tup, request) - if cookie: cookies.append(cookie) - return cookies - - def _process_rfc2109_cookies(self, cookies): - rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) - if rfc2109_as_ns is None: - rfc2109_as_ns = not self._policy.rfc2965 - for cookie in cookies: - if cookie.version == 1: - cookie.rfc2109 = True - if rfc2109_as_ns: - # treat 2109 cookies as Netscape cookies rather than - # as RFC2965 cookies - cookie.version = 0 - - def make_cookies(self, response, request): - """Return sequence of Cookie objects extracted from response object.""" - # get cookie-attributes for RFC 2965 and Netscape protocols - headers = response.info() - rfc2965_hdrs = headers.getheaders("Set-Cookie2") - ns_hdrs = headers.getheaders("Set-Cookie") - - rfc2965 = self._policy.rfc2965 - netscape = self._policy.netscape - - if ((not rfc2965_hdrs and not ns_hdrs) or - (not ns_hdrs and not rfc2965) or - (not rfc2965_hdrs and not netscape) or - (not netscape and not rfc2965)): - return [] # no relevant cookie headers: quick exit - - try: - cookies = self._cookies_from_attrs_set( - split_header_words(rfc2965_hdrs), request) - except Exception: - _warn_unhandled_exception() - cookies = [] - - if ns_hdrs and netscape: - try: - # RFC 2109 and Netscape cookies - ns_cookies = self._cookies_from_attrs_set( - parse_ns_headers(ns_hdrs), request) - except Exception: - _warn_unhandled_exception() - ns_cookies = [] - self._process_rfc2109_cookies(ns_cookies) - - # Look for Netscape cookies (from Set-Cookie headers) that match - # corresponding RFC 2965 cookies (from Set-Cookie2 headers). - # For each match, keep the RFC 2965 cookie and ignore the Netscape - # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are - # bundled in with the Netscape cookies for this purpose, which is - # reasonable behaviour. - if rfc2965: - lookup = {} - for cookie in cookies: - lookup[(cookie.domain, cookie.path, cookie.name)] = None - - def no_matching_rfc2965(ns_cookie, lookup=lookup): - key = ns_cookie.domain, ns_cookie.path, ns_cookie.name - return key not in lookup - ns_cookies = filter(no_matching_rfc2965, ns_cookies) - - if ns_cookies: - cookies.extend(ns_cookies) - - return cookies - - def set_cookie_if_ok(self, cookie, request): - """Set a cookie if policy says it's OK to do so.""" - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - if self._policy.set_ok(cookie, request): - self.set_cookie(cookie) - - - finally: - self._cookies_lock.release() - - def set_cookie(self, cookie): - """Set a cookie, without checking whether or not it should be set.""" - c = self._cookies - self._cookies_lock.acquire() - try: - if cookie.domain not in c: c[cookie.domain] = {} - c2 = c[cookie.domain] - if cookie.path not in c2: c2[cookie.path] = {} - c3 = c2[cookie.path] - c3[cookie.name] = cookie - finally: - self._cookies_lock.release() - - def extract_cookies(self, response, request): - """Extract cookies from response, where allowable given the request.""" - _debug("extract_cookies: %s", response.info()) - self._cookies_lock.acquire() - try: - self._policy._now = self._now = int(time.time()) - - for cookie in self.make_cookies(response, request): - if self._policy.set_ok(cookie, request): - _debug(" setting cookie: %s", cookie) - self.set_cookie(cookie) - finally: - self._cookies_lock.release() - - def clear(self, domain=None, path=None, name=None): - """Clear some cookies. - - Invoking this method without arguments will clear all cookies. If - given a single argument, only cookies belonging to that domain will be - removed. If given two arguments, cookies belonging to the specified - path within that domain are removed. If given three arguments, then - the cookie with the specified name, path and domain is removed. - - Raises KeyError if no matching cookie exists. - - """ - if name is not None: - if (domain is None) or (path is None): - raise ValueError( - "domain and path must be given to remove a cookie by name") - del self._cookies[domain][path][name] - elif path is not None: - if domain is None: - raise ValueError( - "domain must be given to remove cookies by path") - del self._cookies[domain][path] - elif domain is not None: - del self._cookies[domain] - else: - self._cookies = {} - - def clear_session_cookies(self): - """Discard all session cookies. - - Note that the .save() method won't save session cookies anyway, unless - you ask otherwise by passing a true ignore_discard argument. - - """ - self._cookies_lock.acquire() - try: - for cookie in self: - if cookie.discard: - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def clear_expired_cookies(self): - """Discard all expired cookies. - - You probably don't need to call this method: expired cookies are never - sent back to the server (provided you're using DefaultCookiePolicy), - this method is called by CookieJar itself every so often, and the - .save() method won't save expired cookies anyway (unless you ask - otherwise by passing a true ignore_expires argument). - - """ - self._cookies_lock.acquire() - try: - now = time.time() - for cookie in self: - if cookie.is_expired(now): - self.clear(cookie.domain, cookie.path, cookie.name) - finally: - self._cookies_lock.release() - - def __iter__(self): - return deepvalues(self._cookies) - - def __len__(self): - """Return number of contained cookies.""" - i = 0 - for cookie in self: i = i + 1 - return i - - def __repr__(self): - r = [] - for cookie in self: r.append(repr(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - def __str__(self): - r = [] - for cookie in self: r.append(str(cookie)) - return "<%s[%s]>" % (self.__class__, ", ".join(r)) - - -# derives from IOError for backwards-compatibility with Python 2.4.0 -class LoadError(IOError): pass - -class FileCookieJar(CookieJar): - """CookieJar that can be loaded from and saved to a file.""" - - def __init__(self, filename=None, delayload=False, policy=None): - """ - Cookies are NOT loaded from the named file until either the .load() or - .revert() method is called. - - """ - CookieJar.__init__(self, policy) - if filename is not None: - try: - filename+"" - except: - raise ValueError("filename must be string-like") - self.filename = filename - self.delayload = bool(delayload) - - def save(self, filename=None, ignore_discard=False, ignore_expires=False): - """Save cookies to a file.""" - raise NotImplementedError() - - def load(self, filename=None, ignore_discard=False, ignore_expires=False): - """Load cookies from a file.""" - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - f = open(filename) - try: - self._really_load(f, filename, ignore_discard, ignore_expires) - finally: - f.close() - - def revert(self, filename=None, - ignore_discard=False, ignore_expires=False): - """Clear all cookies and reload cookies from a saved file. - - Raises LoadError (or IOError) if reversion is not successful; the - object's state will not be altered if this happens. - - """ - if filename is None: - if self.filename is not None: filename = self.filename - else: raise ValueError(MISSING_FILENAME_TEXT) - - self._cookies_lock.acquire() - try: - - old_state = copy.deepcopy(self._cookies) - self._cookies = {} - try: - self.load(filename, ignore_discard, ignore_expires) - except (LoadError, IOError): - self._cookies = old_state - raise - - finally: - self._cookies_lock.release() - -from _LWPCookieJar import LWPCookieJar, lwp_cookie_str -from _MozillaCookieJar import MozillaCookieJar diff --git a/Lib/distutils/command/upload.py b/Lib/distutils/command/upload.py index 603336c..2cad2c7 100644 --- a/Lib/distutils/command/upload.py +++ b/Lib/distutils/command/upload.py @@ -11,7 +11,7 @@ import os import socket import platform import configparser -import httplib +import http.client import base64 import urlparse @@ -151,9 +151,9 @@ class upload(PyPIRCCommand): urlparse.urlparse(self.repository) assert not params and not query and not fragments if schema == 'http': - http = httplib.HTTPConnection(netloc) + http = http.client.HTTPConnection(netloc) elif schema == 'https': - http = httplib.HTTPSConnection(netloc) + http = http.client.HTTPSConnection(netloc) else: raise AssertionError("unsupported schema "+schema) diff --git a/Lib/http/__init__.py b/Lib/http/__init__.py new file mode 100644 index 0000000..196d378 --- /dev/null +++ b/Lib/http/__init__.py @@ -0,0 +1 @@ +# This directory is a Python package. diff --git a/Lib/http/client.py b/Lib/http/client.py new file mode 100644 index 0000000..de27c17 --- /dev/null +++ b/Lib/http/client.py @@ -0,0 +1,1132 @@ +"""HTTP/1.1 client library + + + + +HTTPConnection goes through a number of "states", which define when a client +may legally make another request or fetch the response for a particular +request. This diagram details these state transitions: + + (null) + | + | HTTPConnection() + v + Idle + | + | putrequest() + v + Request-started + | + | ( putheader() )* endheaders() + v + Request-sent + | + | response = getresponse() + v + Unread-response [Response-headers-read] + |\____________________ + | | + | response.read() | putrequest() + v v + Idle Req-started-unread-response + ______/| + / | + response.read() | | ( putheader() )* endheaders() + v v + Request-started Req-sent-unread-response + | + | response.read() + v + Request-sent + +This diagram presents the following rules: + -- a second request may not be started until {response-headers-read} + -- a response [object] cannot be retrieved until {request-sent} + -- there is no differentiation between an unread response body and a + partially read response body + +Note: this enforcement is applied by the HTTPConnection class. The + HTTPResponse class does not enforce this state machine, which + implies sophisticated clients may accelerate the request/response + pipeline. Caution should be taken, though: accelerating the states + beyond the above pattern may imply knowledge of the server's + connection-close behavior for certain requests. For example, it + is impossible to tell whether the server will close the connection + UNTIL the response headers have been read; this means that further + requests cannot be placed into the pipeline until it is known that + the server will NOT be closing the connection. + +Logical State __state __response +------------- ------- ---------- +Idle _CS_IDLE None +Request-started _CS_REQ_STARTED None +Request-sent _CS_REQ_SENT None +Unread-response _CS_IDLE +Req-started-unread-response _CS_REQ_STARTED +Req-sent-unread-response _CS_REQ_SENT +""" + +import io +import mimetools +import socket +from urlparse import urlsplit +import warnings + +__all__ = ["HTTPResponse", "HTTPConnection", + "HTTPException", "NotConnected", "UnknownProtocol", + "UnknownTransferEncoding", "UnimplementedFileMode", + "IncompleteRead", "InvalidURL", "ImproperConnectionState", + "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", + "BadStatusLine", "error", "responses"] + +HTTP_PORT = 80 +HTTPS_PORT = 443 + +_UNKNOWN = 'UNKNOWN' + +# connection states +_CS_IDLE = 'Idle' +_CS_REQ_STARTED = 'Request-started' +_CS_REQ_SENT = 'Request-sent' + +# status codes +# informational +CONTINUE = 100 +SWITCHING_PROTOCOLS = 101 +PROCESSING = 102 + +# successful +OK = 200 +CREATED = 201 +ACCEPTED = 202 +NON_AUTHORITATIVE_INFORMATION = 203 +NO_CONTENT = 204 +RESET_CONTENT = 205 +PARTIAL_CONTENT = 206 +MULTI_STATUS = 207 +IM_USED = 226 + +# redirection +MULTIPLE_CHOICES = 300 +MOVED_PERMANENTLY = 301 +FOUND = 302 +SEE_OTHER = 303 +NOT_MODIFIED = 304 +USE_PROXY = 305 +TEMPORARY_REDIRECT = 307 + +# client error +BAD_REQUEST = 400 +UNAUTHORIZED = 401 +PAYMENT_REQUIRED = 402 +FORBIDDEN = 403 +NOT_FOUND = 404 +METHOD_NOT_ALLOWED = 405 +NOT_ACCEPTABLE = 406 +PROXY_AUTHENTICATION_REQUIRED = 407 +REQUEST_TIMEOUT = 408 +CONFLICT = 409 +GONE = 410 +LENGTH_REQUIRED = 411 +PRECONDITION_FAILED = 412 +REQUEST_ENTITY_TOO_LARGE = 413 +REQUEST_URI_TOO_LONG = 414 +UNSUPPORTED_MEDIA_TYPE = 415 +REQUESTED_RANGE_NOT_SATISFIABLE = 416 +EXPECTATION_FAILED = 417 +UNPROCESSABLE_ENTITY = 422 +LOCKED = 423 +FAILED_DEPENDENCY = 424 +UPGRADE_REQUIRED = 426 + +# server error +INTERNAL_SERVER_ERROR = 500 +NOT_IMPLEMENTED = 501 +BAD_GATEWAY = 502 +SERVICE_UNAVAILABLE = 503 +GATEWAY_TIMEOUT = 504 +HTTP_VERSION_NOT_SUPPORTED = 505 +INSUFFICIENT_STORAGE = 507 +NOT_EXTENDED = 510 + +# Mapping status codes to official W3C names +responses = { + 100: 'Continue', + 101: 'Switching Protocols', + + 200: 'OK', + 201: 'Created', + 202: 'Accepted', + 203: 'Non-Authoritative Information', + 204: 'No Content', + 205: 'Reset Content', + 206: 'Partial Content', + + 300: 'Multiple Choices', + 301: 'Moved Permanently', + 302: 'Found', + 303: 'See Other', + 304: 'Not Modified', + 305: 'Use Proxy', + 306: '(Unused)', + 307: 'Temporary Redirect', + + 400: 'Bad Request', + 401: 'Unauthorized', + 402: 'Payment Required', + 403: 'Forbidden', + 404: 'Not Found', + 405: 'Method Not Allowed', + 406: 'Not Acceptable', + 407: 'Proxy Authentication Required', + 408: 'Request Timeout', + 409: 'Conflict', + 410: 'Gone', + 411: 'Length Required', + 412: 'Precondition Failed', + 413: 'Request Entity Too Large', + 414: 'Request-URI Too Long', + 415: 'Unsupported Media Type', + 416: 'Requested Range Not Satisfiable', + 417: 'Expectation Failed', + + 500: 'Internal Server Error', + 501: 'Not Implemented', + 502: 'Bad Gateway', + 503: 'Service Unavailable', + 504: 'Gateway Timeout', + 505: 'HTTP Version Not Supported', +} + +# maximal amount of data to read at one time in _safe_read +MAXAMOUNT = 1048576 + +class HTTPMessage(mimetools.Message): + + def addheader(self, key, value): + """Add header for field key handling repeats.""" + prev = self.dict.get(key) + if prev is None: + self.dict[key] = value + else: + combined = ", ".join((prev, value)) + self.dict[key] = combined + + def addcontinue(self, key, more): + """Add more field data from a continuation line.""" + prev = self.dict[key] + self.dict[key] = prev + "\n " + more + + def readheaders(self): + """Read header lines. + + Read header lines up to the entirely blank line that terminates them. + The (normally blank) line that ends the headers is skipped, but not + included in the returned list. If a non-header line ends the headers, + (which is an error), an attempt is made to backspace over it; it is + never included in the returned list. + + The variable self.status is set to the empty string if all went well, + otherwise it is an error message. The variable self.headers is a + completely uninterpreted list of lines contained in the header (so + printing them will reproduce the header exactly as it appears in the + file). + + If multiple header fields with the same name occur, they are combined + according to the rules in RFC 2616 sec 4.2: + + Appending each subsequent field-value to the first, each separated + by a comma. The order in which header fields with the same field-name + are received is significant to the interpretation of the combined + field value. + """ + # XXX The implementation overrides the readheaders() method of + # rfc822.Message. The base class design isn't amenable to + # customized behavior here so the method here is a copy of the + # base class code with a few small changes. + + self.dict = {} + self.unixfrom = '' + self.headers = hlist = [] + self.status = '' + headerseen = "" + firstline = 1 + startofline = unread = tell = None + if hasattr(self.fp, 'unread'): + unread = self.fp.unread + elif self.seekable: + tell = self.fp.tell + while True: + if tell: + try: + startofline = tell() + except IOError: + startofline = tell = None + self.seekable = 0 + line = str(self.fp.readline(), "iso-8859-1") + if not line: + self.status = 'EOF in headers' + break + # Skip unix From name time lines + if firstline and line.startswith('From '): + self.unixfrom = self.unixfrom + line + continue + firstline = 0 + if headerseen and line[0] in ' \t': + # XXX Not sure if continuation lines are handled properly + # for http and/or for repeating headers + # It's a continuation line. + hlist.append(line) + self.addcontinue(headerseen, line.strip()) + continue + elif self.iscomment(line): + # It's a comment. Ignore it. + continue + elif self.islast(line): + # Note! No pushback here! The delimiter line gets eaten. + break + headerseen = self.isheader(line) + if headerseen: + # It's a legal header line, save it. + hlist.append(line) + self.addheader(headerseen, line[len(headerseen)+1:].strip()) + continue + else: + # It's not a header line; throw it back and stop here. + if not self.dict: + self.status = 'No headers' + else: + self.status = 'Non-header line where header expected' + # Try to undo the read. + if unread: + unread(line) + elif tell: + self.fp.seek(startofline) + else: + self.status = self.status + '; bad seek' + break + +class HTTPResponse: + + # strict: If true, raise BadStatusLine if the status line can't be + # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is + # false because it prevents clients from talking to HTTP/0.9 + # servers. Note that a response with a sufficiently corrupted + # status line will look like an HTTP/0.9 response. + + # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. + + # The bytes from the socket object are iso-8859-1 strings. + # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded + # text following RFC 2047. The basic status line parsing only + # accepts iso-8859-1. + + def __init__(self, sock, debuglevel=0, strict=0, method=None): + # XXX If the response includes a content-length header, we + # need to make sure that the client doesn't read more than the + # specified number of bytes. If it does, it will block until + # the server times out and closes the connection. (The only + # applies to HTTP/1.1 connections.) Since some clients access + # self.fp directly rather than calling read(), this is a little + # tricky. + self.fp = sock.makefile("rb", 0) + self.debuglevel = debuglevel + self.strict = strict + self._method = method + + self.msg = None + + # from the Status-Line of the response + self.version = _UNKNOWN # HTTP-Version + self.status = _UNKNOWN # Status-Code + self.reason = _UNKNOWN # Reason-Phrase + + self.chunked = _UNKNOWN # is "chunked" being used? + self.chunk_left = _UNKNOWN # bytes left to read in current chunk + self.length = _UNKNOWN # number of bytes left in response + self.will_close = _UNKNOWN # conn will close at end of response + + def _read_status(self): + # Initialize with Simple-Response defaults. + line = str(self.fp.readline(), "iso-8859-1") + if self.debuglevel > 0: + print("reply:", repr(line)) + if not line: + # Presumably, the server closed the connection before + # sending a valid response. + raise BadStatusLine(line) + try: + [version, status, reason] = line.split(None, 2) + except ValueError: + try: + [version, status] = line.split(None, 1) + reason = "" + except ValueError: + # empty version will cause next test to fail and status + # will be treated as 0.9 response. + version = "" + if not version.startswith("HTTP/"): + if self.strict: + self.close() + raise BadStatusLine(line) + else: + # Assume it's a Simple-Response from an 0.9 server. + # We have to convert the first line back to raw bytes + # because self.fp.readline() needs to return bytes. + self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp) + return "HTTP/0.9", 200, "" + + # The status code is a three-digit number + try: + status = int(status) + if status < 100 or status > 999: + raise BadStatusLine(line) + except ValueError: + raise BadStatusLine(line) + return version, status, reason + + def begin(self): + if self.msg is not None: + # we've already started reading the response + return + + # read until we get a non-100 response + while True: + version, status, reason = self._read_status() + if status != CONTINUE: + break + # skip the header from the 100 response + while True: + skip = self.fp.readline().strip() + if not skip: + break + if self.debuglevel > 0: + print("header:", skip) + + self.status = status + self.reason = reason.strip() + if version == "HTTP/1.0": + self.version = 10 + elif version.startswith("HTTP/1."): + self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 + elif version == "HTTP/0.9": + self.version = 9 + else: + raise UnknownProtocol(version) + + if self.version == 9: + self.length = None + self.chunked = 0 + self.will_close = 1 + self.msg = HTTPMessage(io.BytesIO()) + return + + self.msg = HTTPMessage(self.fp, 0) + if self.debuglevel > 0: + for hdr in self.msg.headers: + print("header:", hdr, end=" ") + + # don't let the msg keep an fp + self.msg.fp = None + + # are we using the chunked-style of transfer encoding? + tr_enc = self.msg.getheader("transfer-encoding") + if tr_enc and tr_enc.lower() == "chunked": + self.chunked = 1 + self.chunk_left = None + else: + self.chunked = 0 + + # will the connection close at the end of the response? + self.will_close = self._check_close() + + # do we have a Content-Length? + # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" + self.length = None + length = self.msg.getheader("content-length") + if length and not self.chunked: + try: + self.length = int(length) + except ValueError: + self.length = None + else: + if self.length < 0: # ignore nonsensical negative lengths + self.length = None + else: + self.length = None + + # does the body have a fixed length? (of zero) + if (status == NO_CONTENT or status == NOT_MODIFIED or + 100 <= status < 200 or # 1xx codes + self._method == "HEAD"): + self.length = 0 + + # if the connection remains open, and we aren't using chunked, and + # a content-length was not provided, then assume that the connection + # WILL close. + if (not self.will_close and + not self.chunked and + self.length is None): + self.will_close = 1 + + def _check_close(self): + conn = self.msg.getheader("connection") + if self.version == 11: + # An HTTP/1.1 proxy is assumed to stay open unless + # explicitly closed. + conn = self.msg.getheader("connection") + if conn and "close" in conn.lower(): + return True + return False + + # Some HTTP/1.0 implementations have support for persistent + # connections, using rules different than HTTP/1.1. + + # For older HTTP, Keep-Alive indicates persistent connection. + if self.msg.getheader("keep-alive"): + return False + + # At least Akamai returns a "Connection: Keep-Alive" header, + # which was supposed to be sent by the client. + if conn and "keep-alive" in conn.lower(): + return False + + # Proxy-Connection is a netscape hack. + pconn = self.msg.getheader("proxy-connection") + if pconn and "keep-alive" in pconn.lower(): + return False + + # otherwise, assume it will close + return True + + def close(self): + if self.fp: + self.fp.close() + self.fp = None + + # These implementations are for the benefit of io.BufferedReader. + + # XXX This class should probably be revised to act more like + # the "raw stream" that BufferedReader expects. + + @property + def closed(self): + return self.isclosed() + + def flush(self): + self.fp.flush() + + # End of "raw stream" methods + + def isclosed(self): + # NOTE: it is possible that we will not ever call self.close(). This + # case occurs when will_close is TRUE, length is None, and we + # read up to the last byte, but NOT past it. + # + # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be + # called, meaning self.isclosed() is meaningful. + return self.fp is None + + # XXX It would be nice to have readline and __iter__ for this, too. + + def read(self, amt=None): + if self.fp is None: + return b"" + + if self.chunked: + return self._read_chunked(amt) + + if amt is None: + # unbounded read + if self.length is None: + s = self.fp.read() + else: + s = self._safe_read(self.length) + self.length = 0 + self.close() # we read everything + return s + + if self.length is not None: + if amt > self.length: + # clip the read to the "end of response" + amt = self.length + + # we do not use _safe_read() here because this may be a .will_close + # connection, and the user is reading more bytes than will be provided + # (for example, reading in 1k chunks) + s = self.fp.read(amt) + if self.length is not None: + self.length -= len(s) + if not self.length: + self.close() + return s + + def _read_chunked(self, amt): + assert self.chunked != _UNKNOWN + chunk_left = self.chunk_left + value = b"" + + # XXX This accumulates chunks by repeated string concatenation, + # which is not efficient as the number or size of chunks gets big. + while True: + if chunk_left is None: + line = self.fp.readline() + i = line.find(b";") + if i >= 0: + line = line[:i] # strip chunk-extensions + try: + chunk_left = int(line, 16) + except ValueError: + # close the connection as protocol synchronisation is + # probably lost + self.close() + raise IncompleteRead(value) + if chunk_left == 0: + break + if amt is None: + value += self._safe_read(chunk_left) + elif amt < chunk_left: + value += self._safe_read(amt) + self.chunk_left = chunk_left - amt + return value + elif amt == chunk_left: + value += self._safe_read(amt) + self._safe_read(2) # toss the CRLF at the end of the chunk + self.chunk_left = None + return value + else: + value += self._safe_read(chunk_left) + amt -= chunk_left + + # we read the whole chunk, get another + self._safe_read(2) # toss the CRLF at the end of the chunk + chunk_left = None + + # read and discard trailer up to the CRLF terminator + ### note: we shouldn't have any trailers! + while True: + line = self.fp.readline() + if not line: + # a vanishingly small number of sites EOF without + # sending the trailer + break + if line == b"\r\n": + break + + # we read everything; close the "file" + self.close() + + return value + + def _safe_read(self, amt): + """Read the number of bytes requested, compensating for partial reads. + + Normally, we have a blocking socket, but a read() can be interrupted + by a signal (resulting in a partial read). + + Note that we cannot distinguish between EOF and an interrupt when zero + bytes have been read. IncompleteRead() will be raised in this + situation. + + This function should be used when bytes "should" be present for + reading. If the bytes are truly not available (due to EOF), then the + IncompleteRead exception can be used to detect the problem. + """ + s = [] + while amt > 0: + chunk = self.fp.read(min(amt, MAXAMOUNT)) + if not chunk: + raise IncompleteRead(s) + s.append(chunk) + amt -= len(chunk) + return b"".join(s) + + def getheader(self, name, default=None): + if self.msg is None: + raise ResponseNotReady() + return self.msg.getheader(name, default) + + def getheaders(self): + """Return list of (header, value) tuples.""" + if self.msg is None: + raise ResponseNotReady() + return list(self.msg.items()) + + +class HTTPConnection: + + _http_vsn = 11 + _http_vsn_str = 'HTTP/1.1' + + response_class = HTTPResponse + default_port = HTTP_PORT + auto_open = 1 + debuglevel = 0 + strict = 0 + + def __init__(self, host, port=None, strict=None, timeout=None): + self.timeout = timeout + self.sock = None + self._buffer = [] + self.__response = None + self.__state = _CS_IDLE + self._method = None + + self._set_hostport(host, port) + if strict is not None: + self.strict = strict + + def _set_hostport(self, host, port): + if port is None: + i = host.rfind(':') + j = host.rfind(']') # ipv6 addresses have [...] + if i > j: + try: + port = int(host[i+1:]) + except ValueError: + raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) + host = host[:i] + else: + port = self.default_port + if host and host[0] == '[' and host[-1] == ']': + host = host[1:-1] + self.host = host + self.port = port + + def set_debuglevel(self, level): + self.debuglevel = level + + def connect(self): + """Connect to the host and port specified in __init__.""" + self.sock = socket.create_connection((self.host,self.port), + self.timeout) + + def close(self): + """Close the connection to the HTTP server.""" + if self.sock: + self.sock.close() # close it manually... there may be other refs + self.sock = None + if self.__response: + self.__response.close() + self.__response = None + self.__state = _CS_IDLE + + def send(self, str): + """Send `str' to the server.""" + if self.sock is None: + if self.auto_open: + self.connect() + else: + raise NotConnected() + + # send the data to the server. if we get a broken pipe, then close + # the socket. we want to reconnect when somebody tries to send again. + # + # NOTE: we DO propagate the error, though, because we cannot simply + # ignore the error... the caller will know if they can retry. + if self.debuglevel > 0: + print("send:", repr(str)) + try: + blocksize=8192 + if hasattr(str,'read') : + if self.debuglevel > 0: print("sendIng a read()able") + data=str.read(blocksize) + while data: + self.sock.sendall(data) + data=str.read(blocksize) + else: + self.sock.sendall(str) + except socket.error as v: + if v.args[0] == 32: # Broken pipe + self.close() + raise + + def _output(self, s): + """Add a line of output to the current request buffer. + + Assumes that the line does *not* end with \\r\\n. + """ + self._buffer.append(s) + + def _send_output(self): + """Send the currently buffered request and clear the buffer. + + Appends an extra \\r\\n to the buffer. + """ + self._buffer.extend((b"", b"")) + msg = b"\r\n".join(self._buffer) + del self._buffer[:] + self.send(msg) + + def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): + """Send a request to the server. + + `method' specifies an HTTP request method, e.g. 'GET'. + `url' specifies the object being requested, e.g. '/index.html'. + `skip_host' if True does not add automatically a 'Host:' header + `skip_accept_encoding' if True does not add automatically an + 'Accept-Encoding:' header + """ + + # if a prior response has been completed, then forget about it. + if self.__response and self.__response.isclosed(): + self.__response = None + + + # in certain cases, we cannot issue another request on this connection. + # this occurs when: + # 1) we are in the process of sending a request. (_CS_REQ_STARTED) + # 2) a response to a previous request has signalled that it is going + # to close the connection upon completion. + # 3) the headers for the previous response have not been read, thus + # we cannot determine whether point (2) is true. (_CS_REQ_SENT) + # + # if there is no prior response, then we can request at will. + # + # if point (2) is true, then we will have passed the socket to the + # response (effectively meaning, "there is no prior response"), and + # will open a new one when a new request is made. + # + # Note: if a prior response exists, then we *can* start a new request. + # We are not allowed to begin fetching the response to this new + # request, however, until that prior response is complete. + # + if self.__state == _CS_IDLE: + self.__state = _CS_REQ_STARTED + else: + raise CannotSendRequest() + + # Save the method we use, we need it later in the response phase + self._method = method + if not url: + url = '/' + request = '%s %s %s' % (method, url, self._http_vsn_str) + + # Non-ASCII characters should have been eliminated earlier + self._output(request.encode('ascii')) + + if self._http_vsn == 11: + # Issue some standard headers for better HTTP/1.1 compliance + + if not skip_host: + # this header is issued *only* for HTTP/1.1 + # connections. more specifically, this means it is + # only issued when the client uses the new + # HTTPConnection() class. backwards-compat clients + # will be using HTTP/1.0 and those clients may be + # issuing this header themselves. we should NOT issue + # it twice; some web servers (such as Apache) barf + # when they see two Host: headers + + # If we need a non-standard port,include it in the + # header. If the request is going through a proxy, + # but the host of the actual URL, not the host of the + # proxy. + + netloc = '' + if url.startswith('http'): + nil, netloc, nil, nil, nil = urlsplit(url) + + if netloc: + try: + netloc_enc = netloc.encode("ascii") + except UnicodeEncodeError: + netloc_enc = netloc.encode("idna") + self.putheader('Host', netloc_enc) + else: + try: + host_enc = self.host.encode("ascii") + except UnicodeEncodeError: + host_enc = self.host.encode("idna") + if self.port == HTTP_PORT: + self.putheader('Host', host_enc) + else: + host_enc = host_enc.decode("ascii") + self.putheader('Host', "%s:%s" % (host_enc, self.port)) + + # note: we are assuming that clients will not attempt to set these + # headers since *this* library must deal with the + # consequences. this also means that when the supporting + # libraries are updated to recognize other forms, then this + # code should be changed (removed or updated). + + # we only want a Content-Encoding of "identity" since we don't + # support encodings such as x-gzip or x-deflate. + if not skip_accept_encoding: + self.putheader('Accept-Encoding', 'identity') + + # we can accept "chunked" Transfer-Encodings, but no others + # NOTE: no TE header implies *only* "chunked" + #self.putheader('TE', 'chunked') + + # if TE is supplied in the header, then it must appear in a + # Connection header. + #self.putheader('Connection', 'TE') + + else: + # For HTTP/1.0, the server will assume "not chunked" + pass + + def putheader(self, header, value): + """Send a request header line to the server. + + For example: h.putheader('Accept', 'text/html') + """ + if self.__state != _CS_REQ_STARTED: + raise CannotSendHeader() + + if hasattr(header, 'encode'): + header = header.encode('ascii') + if hasattr(value, 'encode'): + value = value.encode('ascii') + header = header + b': ' + value + self._output(header) + + def endheaders(self): + """Indicate that the last header line has been sent to the server.""" + + if self.__state == _CS_REQ_STARTED: + self.__state = _CS_REQ_SENT + else: + raise CannotSendHeader() + + self._send_output() + + def request(self, method, url, body=None, headers={}): + """Send a complete request to the server.""" + try: + self._send_request(method, url, body, headers) + except socket.error as v: + # trap 'Broken pipe' if we're allowed to automatically reconnect + if v.args[0] != 32 or not self.auto_open: + raise + # try one more time + self._send_request(method, url, body, headers) + + def _send_request(self, method, url, body, headers): + # honour explicitly requested Host: and Accept-Encoding headers + header_names = dict.fromkeys([k.lower() for k in headers]) + skips = {} + if 'host' in header_names: + skips['skip_host'] = 1 + if 'accept-encoding' in header_names: + skips['skip_accept_encoding'] = 1 + + self.putrequest(method, url, **skips) + + if body and ('content-length' not in header_names): + thelen = None + try: + thelen = str(len(body)) + except TypeError as te: + # If this is a file-like object, try to + # fstat its file descriptor + import os + try: + thelen = str(os.fstat(body.fileno()).st_size) + except (AttributeError, OSError): + # Don't send a length if this failed + if self.debuglevel > 0: print("Cannot stat!!") + + if thelen is not None: + self.putheader('Content-Length',thelen) + for hdr, value in headers.items(): + self.putheader(hdr, value) + self.endheaders() + + if body: + if isinstance(body, str): body = body.encode('ascii') + self.send(body) + + def getresponse(self): + """Get the response from the server.""" + + # if a prior response has been completed, then forget about it. + if self.__response and self.__response.isclosed(): + self.__response = None + + # + # if a prior response exists, then it must be completed (otherwise, we + # cannot read this response's header to determine the connection-close + # behavior) + # + # note: if a prior response existed, but was connection-close, then the + # socket and response were made independent of this HTTPConnection + # object since a new request requires that we open a whole new + # connection + # + # this means the prior response had one of two states: + # 1) will_close: this connection was reset and the prior socket and + # response operate independently + # 2) persistent: the response was retained and we await its + # isclosed() status to become true. + # + if self.__state != _CS_REQ_SENT or self.__response: + raise ResponseNotReady() + + if self.debuglevel > 0: + response = self.response_class(self.sock, self.debuglevel, + strict=self.strict, + method=self._method) + else: + response = self.response_class(self.sock, strict=self.strict, + method=self._method) + + response.begin() + assert response.will_close != _UNKNOWN + self.__state = _CS_IDLE + + if response.will_close: + # this effectively passes the connection to the response + self.close() + else: + # remember this, so we can tell when it is complete + self.__response = response + + return response + +try: + import ssl +except ImportError: + pass +else: + class HTTPSConnection(HTTPConnection): + "This class allows communication via SSL." + + default_port = HTTPS_PORT + + def __init__(self, host, port=None, key_file=None, cert_file=None, + strict=None, timeout=None): + HTTPConnection.__init__(self, host, port, strict, timeout) + self.key_file = key_file + self.cert_file = cert_file + + def connect(self): + "Connect to a host on a given (SSL) port." + + sock = socket.create_connection((self.host, self.port), self.timeout) + self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) + + + def FakeSocket (sock, sslobj): + warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + + "Use the result of ssl.wrap_socket() directly instead.", + DeprecationWarning, stacklevel=2) + return sslobj + + __all__.append("HTTPSConnection") + +class HTTPException(Exception): + # Subclasses that define an __init__ must call Exception.__init__ + # or define self.args. Otherwise, str() will fail. + pass + +class NotConnected(HTTPException): + pass + +class InvalidURL(HTTPException): + pass + +class UnknownProtocol(HTTPException): + def __init__(self, version): + self.args = version, + self.version = version + +class UnknownTransferEncoding(HTTPException): + pass + +class UnimplementedFileMode(HTTPException): + pass + +class IncompleteRead(HTTPException): + def __init__(self, partial): + self.args = partial, + self.partial = partial + +class ImproperConnectionState(HTTPException): + pass + +class CannotSendRequest(ImproperConnectionState): + pass + +class CannotSendHeader(ImproperConnectionState): + pass + +class ResponseNotReady(ImproperConnectionState): + pass + +class BadStatusLine(HTTPException): + def __init__(self, line): + self.args = line, + self.line = line + +# for backwards compatibility +error = HTTPException + +class LineAndFileWrapper: + """A limited file-like object for HTTP/0.9 responses.""" + + # The status-line parsing code calls readline(), which normally + # get the HTTP status line. For a 0.9 response, however, this is + # actually the first line of the body! Clients need to get a + # readable file object that contains that line. + + def __init__(self, line, file): + self._line = line + self._file = file + self._line_consumed = 0 + self._line_offset = 0 + self._line_left = len(line) + + def __getattr__(self, attr): + return getattr(self._file, attr) + + def _done(self): + # called when the last byte is read from the line. After the + # call, all read methods are delegated to the underlying file + # object. + self._line_consumed = 1 + self.read = self._file.read + self.readline = self._file.readline + self.readlines = self._file.readlines + + def read(self, amt=None): + if self._line_consumed: + return self._file.read(amt) + assert self._line_left + if amt is None or amt > self._line_left: + s = self._line[self._line_offset:] + self._done() + if amt is None: + return s + self._file.read() + else: + return s + self._file.read(amt - len(s)) + else: + assert amt <= self._line_left + i = self._line_offset + j = i + amt + s = self._line[i:j] + self._line_offset = j + self._line_left -= amt + if self._line_left == 0: + self._done() + return s + + def readline(self): + if self._line_consumed: + return self._file.readline() + assert self._line_left + s = self._line[self._line_offset:] + self._done() + return s + + def readlines(self, size=None): + if self._line_consumed: + return self._file.readlines(size) + assert self._line_left + L = [self._line[self._line_offset:]] + self._done() + if size is None: + return L + self._file.readlines() + else: + return L + self._file.readlines(size) diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py new file mode 100644 index 0000000..795953b --- /dev/null +++ b/Lib/http/cookiejar.py @@ -0,0 +1,1785 @@ +"""HTTP cookie handling for web clients. + +This module has (now fairly distant) origins in Gisle Aas' Perl module +HTTP::Cookies, from the libwww-perl library. + +Docstrings, comments and debug strings in this code refer to the +attributes of the HTTP cookie system as cookie-attributes, to distinguish +them clearly from Python attributes. + +Class diagram (note that BSDDBCookieJar and the MSIE* classes are not +distributed with the Python standard library, but are available from +http://wwwsearch.sf.net/): + + CookieJar____ + / \ \ + FileCookieJar \ \ + / | \ \ \ + MozillaCookieJar | LWPCookieJar \ \ + | | \ + | ---MSIEBase | \ + | / | | \ + | / MSIEDBCookieJar BSDDBCookieJar + |/ + MSIECookieJar + +""" + +__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy', + 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar'] + +import re, urlparse, copy, time, urllib +try: + import threading as _threading +except ImportError: + import dummy_threading as _threading +import http.client # only for the default HTTP port +from calendar import timegm + +debug = False # set to True to enable debugging via the logging module +logger = None + +def _debug(*args): + if not debug: + return + global logger + if not logger: + import logging + logger = logging.getLogger("http.cookiejar") + return logger.debug(*args) + + +DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT) +MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar " + "instance initialised with one)") + +def _warn_unhandled_exception(): + # There are a few catch-all except: statements in this module, for + # catching input that's bad in unexpected ways. Warn if any + # exceptions are caught there. + import io, warnings, traceback + f = io.StringIO() + traceback.print_exc(None, f) + msg = f.getvalue() + warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2) + + +# Date/time conversion +# ----------------------------------------------------------------------------- + +EPOCH_YEAR = 1970 +def _timegm(tt): + year, month, mday, hour, min, sec = tt[:6] + if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and + (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)): + return timegm(tt) + else: + return None + +DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"] +MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] +MONTHS_LOWER = [] +for month in MONTHS: MONTHS_LOWER.append(month.lower()) + +def time2isoz(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", + representing Universal Time (UTC, aka GMT). An example of this format is: + + 1994-11-24 08:49:37Z + + """ + if t is None: t = time.time() + year, mon, mday, hour, min, sec = time.gmtime(t)[:6] + return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( + year, mon, mday, hour, min, sec) + +def time2netscape(t=None): + """Return a string representing time in seconds since epoch, t. + + If the function is called without an argument, it will use the current + time. + + The format of the returned string is like this: + + Wed, DD-Mon-YYYY HH:MM:SS GMT + + """ + if t is None: t = time.time() + year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] + return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( + DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) + + +UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} + +TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") +def offset_from_tz_string(tz): + offset = None + if tz in UTC_ZONES: + offset = 0 + else: + m = TIMEZONE_RE.search(tz) + if m: + offset = 3600 * int(m.group(2)) + if m.group(3): + offset = offset + 60 * int(m.group(3)) + if m.group(1) == '-': + offset = -offset + return offset + +def _str2time(day, mon, yr, hr, min, sec, tz): + # translate month name to number + # month numbers start with 1 (January) + try: + mon = MONTHS_LOWER.index(mon.lower())+1 + except ValueError: + # maybe it's already a number + try: + imon = int(mon) + except ValueError: + return None + if 1 <= imon <= 12: + mon = imon + else: + return None + + # make sure clock elements are defined + if hr is None: hr = 0 + if min is None: min = 0 + if sec is None: sec = 0 + + yr = int(yr) + day = int(day) + hr = int(hr) + min = int(min) + sec = int(sec) + + if yr < 1000: + # find "obvious" year + cur_yr = time.localtime(time.time())[0] + m = cur_yr % 100 + tmp = yr + yr = yr + cur_yr - m + m = m - tmp + if abs(m) > 50: + if m > 0: yr = yr + 100 + else: yr = yr - 100 + + # convert UTC time tuple to seconds since epoch (not timezone-adjusted) + t = _timegm((yr, mon, day, hr, min, sec, tz)) + + if t is not None: + # adjust time using timezone string, to get absolute time since epoch + if tz is None: + tz = "UTC" + tz = tz.upper() + offset = offset_from_tz_string(tz) + if offset is None: + return None + t = t - offset + + return t + +STRICT_DATE_RE = re.compile( + r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " + "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") +WEEKDAY_RE = re.compile( + r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) +LOOSE_HTTP_DATE_RE = re.compile( + r"""^ + (\d\d?) # day + (?:\s+|[-\/]) + (\w+) # month + (?:\s+|[-\/]) + (\d+) # year + (?: + (?:\s+|:) # separator before clock + (\d\d?):(\d\d) # hour:min + (?::(\d\d))? # optional seconds + )? # optional clock + \s* + ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone + \s* + (?:\(\w+\))? # ASCII representation of timezone in parens. + \s*$""", re.X) +def http2time(text): + """Returns time in seconds since epoch of time represented by a string. + + Return value is an integer. + + None is returned if the format of str is unrecognized, the time is outside + the representable range, or the timezone string is not recognized. If the + string contains no timezone, UTC is assumed. + + The timezone in the string may be numerical (like "-0800" or "+0100") or a + string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the + timezone strings equivalent to UTC (zero offset) are known to the function. + + The function loosely parses the following formats: + + Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format + Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format + Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format + 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday) + 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday) + 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday) + + The parser ignores leading and trailing whitespace. The time may be + absent. + + If the year is given with only 2 digits, the function will select the + century that makes the year closest to the current date. + + """ + # fast exit for strictly conforming string + m = STRICT_DATE_RE.search(text) + if m: + g = m.groups() + mon = MONTHS_LOWER.index(g[1].lower()) + 1 + tt = (int(g[2]), mon, int(g[0]), + int(g[3]), int(g[4]), float(g[5])) + return _timegm(tt) + + # No, we need some messy parsing... + + # clean up + text = text.lstrip() + text = WEEKDAY_RE.sub("", text, 1) # Useless weekday + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = LOOSE_HTTP_DATE_RE.search(text) + if m is not None: + day, mon, yr, hr, min, sec, tz = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + +ISO_DATE_RE = re.compile( + """^ + (\d{4}) # year + [-\/]? + (\d\d?) # numerical month + [-\/]? + (\d\d?) # day + (?: + (?:\s+|[-:Tt]) # separator before clock + (\d\d?):?(\d\d) # hour:min + (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) + )? # optional clock + \s* + ([-+]?\d\d?:?(:?\d\d)? + |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) + \s*$""", re.X) +def iso2time(text): + """ + As for http2time, but parses the ISO 8601 formats: + + 1994-02-03 14:15:29 -0100 -- ISO 8601 format + 1994-02-03 14:15:29 -- zone is optional + 1994-02-03 -- only date + 1994-02-03T14:15:29 -- Use T as separator + 19940203T141529Z -- ISO 8601 compact format + 19940203 -- only date + + """ + # clean up + text = text.lstrip() + + # tz is time zone specifier string + day, mon, yr, hr, min, sec, tz = [None]*7 + + # loose regexp parse + m = ISO_DATE_RE.search(text) + if m is not None: + # XXX there's an extra bit of the timezone I'm ignoring here: is + # this the right thing to do? + yr, mon, day, hr, min, sec, tz, _ = m.groups() + else: + return None # bad format + + return _str2time(day, mon, yr, hr, min, sec, tz) + + +# Header parsing +# ----------------------------------------------------------------------------- + +def unmatched(match): + """Return unmatched part of re.Match object.""" + start, end = match.span(0) + return match.string[:start]+match.string[end:] + +HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)") +HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"") +HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)") +HEADER_ESCAPE_RE = re.compile(r"\\(.)") +def split_header_words(header_values): + r"""Parse header values into a list of lists containing key,value pairs. + + The function knows how to deal with ",", ";" and "=" as well as quoted + values after "=". A list of space separated tokens are parsed as if they + were separated by ";". + + If the header_values passed as argument contains multiple values, then they + are treated as if they were a single value separated by comma ",". + + This means that this function is useful for parsing header fields that + follow this syntax (BNF as from the HTTP/1.1 specification, but we relax + the requirement for tokens). + + headers = #header + header = (token | parameter) *( [";"] (token | parameter)) + + token = 1* + separators = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + + quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + qdtext = > + quoted-pair = "\" CHAR + + parameter = attribute "=" value + attribute = token + value = token | quoted-string + + Each header is represented by a list of key/value pairs. The value for a + simple token (not part of a parameter) is None. Syntactically incorrect + headers will not necessarily be parsed as you would want. + + This is easier to describe with some examples: + + >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz']) + [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]] + >>> split_header_words(['text/html; charset="iso-8859-1"']) + [[('text/html', None), ('charset', 'iso-8859-1')]] + >>> split_header_words([r'Basic realm="\"foo\bar\""']) + [[('Basic', None), ('realm', '"foobar"')]] + + """ + assert not isinstance(header_values, str) + result = [] + for text in header_values: + orig_text = text + pairs = [] + while text: + m = HEADER_TOKEN_RE.search(text) + if m: + text = unmatched(m) + name = m.group(1) + m = HEADER_QUOTED_VALUE_RE.search(text) + if m: # quoted value + text = unmatched(m) + value = m.group(1) + value = HEADER_ESCAPE_RE.sub(r"\1", value) + else: + m = HEADER_VALUE_RE.search(text) + if m: # unquoted value + text = unmatched(m) + value = m.group(1) + value = value.rstrip() + else: + # no value, a lone token + value = None + pairs.append((name, value)) + elif text.lstrip().startswith(","): + # concatenated headers, as per RFC 2616 section 4.2 + text = text.lstrip()[1:] + if pairs: result.append(pairs) + pairs = [] + else: + # skip junk + non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text) + assert nr_junk_chars > 0, ( + "split_header_words bug: '%s', '%s', %s" % + (orig_text, text, pairs)) + text = non_junk + if pairs: result.append(pairs) + return result + +HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])") +def join_header_words(lists): + """Do the inverse (almost) of the conversion done by split_header_words. + + Takes a list of lists of (key, value) pairs and produces a single header + value. Attribute values are quoted if needed. + + >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) + 'text/plain; charset="iso-8859/1"' + >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) + 'text/plain, charset="iso-8859/1"' + + """ + headers = [] + for pairs in lists: + attr = [] + for k, v in pairs: + if v is not None: + if not re.search(r"^\w+$", v): + v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \ + v = '"%s"' % v + k = "%s=%s" % (k, v) + attr.append(k) + if attr: headers.append("; ".join(attr)) + return ", ".join(headers) + +def parse_ns_headers(ns_headers): + """Ad-hoc parser for Netscape protocol cookie-attributes. + + The old Netscape cookie format for Set-Cookie can for instance contain + an unquoted "," in the expires field, so we have to use this ad-hoc + parser instead of split_header_words. + + XXX This may not make the best possible effort to parse all the crap + that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient + parser is probably better, so could do worse than following that if + this ever gives any trouble. + + Currently, this is also used for parsing RFC 2109 cookies. + + """ + known_attrs = ("expires", "domain", "path", "secure", + # RFC 2109 attrs (may turn up in Netscape cookies, too) + "port", "max-age") + + result = [] + for ns_header in ns_headers: + pairs = [] + version_set = False + for ii, param in enumerate(re.split(r";\s*", ns_header)): + param = param.rstrip() + if param == "": continue + if "=" not in param: + k, v = param, None + else: + k, v = re.split(r"\s*=\s*", param, 1) + k = k.lstrip() + if ii != 0: + lc = k.lower() + if lc in known_attrs: + k = lc + if k == "version": + # This is an RFC 2109 cookie. + version_set = True + if k == "expires": + # convert expires date to seconds since epoch + if v.startswith('"'): v = v[1:] + if v.endswith('"'): v = v[:-1] + v = http2time(v) # None if invalid + pairs.append((k, v)) + + if pairs: + if not version_set: + pairs.append(("version", "0")) + result.append(pairs) + + return result + + +IPV4_RE = re.compile(r"\.\d+$") +def is_HDN(text): + """Return True if text is a host domain name.""" + # XXX + # This may well be wrong. Which RFC is HDN defined in, if any (for + # the purposes of RFC 2965)? + # For the current implementation, what about IPv6? Remember to look + # at other uses of IPV4_RE also, if change this. + if IPV4_RE.search(text): + return False + if text == "": + return False + if text[0] == "." or text[-1] == ".": + return False + return True + +def domain_match(A, B): + """Return True if domain A domain-matches domain B, according to RFC 2965. + + A and B may be host domain names or IP addresses. + + RFC 2965, section 1: + + Host names can be specified either as an IP address or a HDN string. + Sometimes we compare one host name with another. (Such comparisons SHALL + be case-insensitive.) Host A's name domain-matches host B's if + + * their host name strings string-compare equal; or + + * A is a HDN string and has the form NB, where N is a non-empty + name string, B has the form .B', and B' is a HDN string. (So, + x.y.com domain-matches .Y.com but not Y.com.) + + Note that domain-match is not a commutative operation: a.b.c.com + domain-matches .c.com, but not the reverse. + + """ + # Note that, if A or B are IP addresses, the only relevant part of the + # definition of the domain-match algorithm is the direct string-compare. + A = A.lower() + B = B.lower() + if A == B: + return True + if not is_HDN(A): + return False + i = A.rfind(B) + if i == -1 or i == 0: + # A does not have form NB, or N is the empty string + return False + if not B.startswith("."): + return False + if not is_HDN(B[1:]): + return False + return True + +def liberal_is_HDN(text): + """Return True if text is a sort-of-like a host domain name. + + For accepting/blocking domains. + + """ + if IPV4_RE.search(text): + return False + return True + +def user_domain_match(A, B): + """For blocking/accepting domains. + + A and B may be host domain names or IP addresses. + + """ + A = A.lower() + B = B.lower() + if not (liberal_is_HDN(A) and liberal_is_HDN(B)): + if A == B: + # equal IP addresses + return True + return False + initial_dot = B.startswith(".") + if initial_dot and A.endswith(B): + return True + if not initial_dot and A == B: + return True + return False + +cut_port_re = re.compile(r":\d+$") +def request_host(request): + """Return request-host, as defined by RFC 2965. + + Variation from RFC: returned value is lowercased, for convenient + comparison. + + """ + url = request.get_full_url() + host = urlparse.urlparse(url)[1] + if host == "": + host = request.get_header("Host", "") + + # remove port, if present + host = cut_port_re.sub("", host, 1) + return host.lower() + +def eff_request_host(request): + """Return a tuple (request-host, effective request-host name). + + As defined by RFC 2965, except both are lowercased. + + """ + erhn = req_host = request_host(request) + if req_host.find(".") == -1 and not IPV4_RE.search(req_host): + erhn = req_host + ".local" + return req_host, erhn + +def request_path(request): + """request-URI, as defined by RFC 2965.""" + url = request.get_full_url() + #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url) + #req_path = escape_path("".join(urlparse.urlparse(url)[2:])) + path, parameters, query, frag = urlparse.urlparse(url)[2:] + if parameters: + path = "%s;%s" % (path, parameters) + path = escape_path(path) + req_path = urlparse.urlunparse(("", "", path, "", query, frag)) + if not req_path.startswith("/"): + # fix bad RFC 2396 absoluteURI + req_path = "/"+req_path + return req_path + +def request_port(request): + host = request.get_host() + i = host.find(':') + if i >= 0: + port = host[i+1:] + try: + int(port) + except ValueError: + _debug("nonnumeric port: '%s'", port) + return None + else: + port = DEFAULT_HTTP_PORT + return port + +# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't +# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738). +HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()" +ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])") +def uppercase_escaped_char(match): + return "%%%s" % match.group(1).upper() +def escape_path(path): + """Escape any invalid characters in HTTP URL, and uppercase all escapes.""" + # There's no knowing what character encoding was used to create URLs + # containing %-escapes, but since we have to pick one to escape invalid + # path characters, we pick UTF-8, as recommended in the HTML 4.0 + # specification: + # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1 + # And here, kind of: draft-fielding-uri-rfc2396bis-03 + # (And in draft IRI specification: draft-duerst-iri-05) + # (And here, for new URI schemes: RFC 2718) + path = urllib.quote(path, HTTP_PATH_SAFE) + path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path) + return path + +def reach(h): + """Return reach of host h, as defined by RFC 2965, section 1. + + The reach R of a host name H is defined as follows: + + * If + + - H is the host domain name of a host; and, + + - H has the form A.B; and + + - A has no embedded (that is, interior) dots; and + + - B has at least one embedded dot, or B is the string "local". + then the reach of H is .B. + + * Otherwise, the reach of H is H. + + >>> reach("www.acme.com") + '.acme.com' + >>> reach("acme.com") + 'acme.com' + >>> reach("acme.local") + '.local' + + """ + i = h.find(".") + if i >= 0: + #a = h[:i] # this line is only here to show what a is + b = h[i+1:] + i = b.find(".") + if is_HDN(h) and (i >= 0 or b == "local"): + return "."+b + return h + +def is_third_party(request): + """ + + RFC 2965, section 3.3.6: + + An unverifiable transaction is to a third-party host if its request- + host U does not domain-match the reach R of the request-host O in the + origin transaction. + + """ + req_host = request_host(request) + if not domain_match(req_host, reach(request.get_origin_req_host())): + return True + else: + return False + + +class Cookie: + """HTTP Cookie. + + This class represents both Netscape and RFC 2965 cookies. + + This is deliberately a very simple class. It just holds attributes. It's + possible to construct Cookie instances that don't comply with the cookie + standards. CookieJar.make_cookies is the factory function for Cookie + objects -- it deals with cookie parsing, supplying defaults, and + normalising to the representation used in this class. CookiePolicy is + responsible for checking them to see whether they should be accepted from + and returned to the server. + + Note that the port may be present in the headers, but unspecified ("Port" + rather than"Port=80", for example); if this is the case, port is None. + + """ + + def __init__(self, version, name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest, + rfc2109=False, + ): + + if version is not None: version = int(version) + if expires is not None: expires = int(expires) + if port is None and port_specified is True: + raise ValueError("if port is None, port_specified must be false") + + self.version = version + self.name = name + self.value = value + self.port = port + self.port_specified = port_specified + # normalise case, as per RFC 2965 section 3.3.3 + self.domain = domain.lower() + self.domain_specified = domain_specified + # Sigh. We need to know whether the domain given in the + # cookie-attribute had an initial dot, in order to follow RFC 2965 + # (as clarified in draft errata). Needed for the returned $Domain + # value. + self.domain_initial_dot = domain_initial_dot + self.path = path + self.path_specified = path_specified + self.secure = secure + self.expires = expires + self.discard = discard + self.comment = comment + self.comment_url = comment_url + self.rfc2109 = rfc2109 + + self._rest = copy.copy(rest) + + def has_nonstandard_attr(self, name): + return name in self._rest + def get_nonstandard_attr(self, name, default=None): + return self._rest.get(name, default) + def set_nonstandard_attr(self, name, value): + self._rest[name] = value + + def is_expired(self, now=None): + if now is None: now = time.time() + if (self.expires is not None) and (self.expires <= now): + return True + return False + + def __str__(self): + if self.port is None: p = "" + else: p = ":"+self.port + limit = self.domain + p + self.path + if self.value is not None: + namevalue = "%s=%s" % (self.name, self.value) + else: + namevalue = self.name + return "" % (namevalue, limit) + + def __repr__(self): + args = [] + for name in ("version", "name", "value", + "port", "port_specified", + "domain", "domain_specified", "domain_initial_dot", + "path", "path_specified", + "secure", "expires", "discard", "comment", "comment_url", + ): + attr = getattr(self, name) + args.append("%s=%s" % (name, repr(attr))) + args.append("rest=%s" % repr(self._rest)) + args.append("rfc2109=%s" % repr(self.rfc2109)) + return "Cookie(%s)" % ", ".join(args) + + +class CookiePolicy: + """Defines which cookies get accepted from and returned to server. + + May also modify cookies, though this is probably a bad idea. + + The subclass DefaultCookiePolicy defines the standard rules for Netscape + and RFC 2965 cookies -- override that if you want a customised policy. + + """ + def set_ok(self, cookie, request): + """Return true if (and only if) cookie should be accepted from server. + + Currently, pre-expired cookies never get this far -- the CookieJar + class deletes such cookies itself. + + """ + raise NotImplementedError() + + def return_ok(self, cookie, request): + """Return true if (and only if) cookie should be returned to server.""" + raise NotImplementedError() + + def domain_return_ok(self, domain, request): + """Return false if cookies should not be returned, given cookie domain. + """ + return True + + def path_return_ok(self, path, request): + """Return false if cookies should not be returned, given cookie path. + """ + return True + + +class DefaultCookiePolicy(CookiePolicy): + """Implements the standard rules for accepting and returning cookies.""" + + DomainStrictNoDots = 1 + DomainStrictNonDomain = 2 + DomainRFC2965Match = 4 + + DomainLiberal = 0 + DomainStrict = DomainStrictNoDots|DomainStrictNonDomain + + def __init__(self, + blocked_domains=None, allowed_domains=None, + netscape=True, rfc2965=False, + rfc2109_as_netscape=None, + hide_cookie2=False, + strict_domain=False, + strict_rfc2965_unverifiable=True, + strict_ns_unverifiable=False, + strict_ns_domain=DomainLiberal, + strict_ns_set_initial_dollar=False, + strict_ns_set_path=False, + ): + """Constructor arguments should be passed as keyword arguments only.""" + self.netscape = netscape + self.rfc2965 = rfc2965 + self.rfc2109_as_netscape = rfc2109_as_netscape + self.hide_cookie2 = hide_cookie2 + self.strict_domain = strict_domain + self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable + self.strict_ns_unverifiable = strict_ns_unverifiable + self.strict_ns_domain = strict_ns_domain + self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar + self.strict_ns_set_path = strict_ns_set_path + + if blocked_domains is not None: + self._blocked_domains = tuple(blocked_domains) + else: + self._blocked_domains = () + + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def blocked_domains(self): + """Return the sequence of blocked domains (as a tuple).""" + return self._blocked_domains + def set_blocked_domains(self, blocked_domains): + """Set the sequence of blocked domains.""" + self._blocked_domains = tuple(blocked_domains) + + def is_blocked(self, domain): + for blocked_domain in self._blocked_domains: + if user_domain_match(domain, blocked_domain): + return True + return False + + def allowed_domains(self): + """Return None, or the sequence of allowed domains (as a tuple).""" + return self._allowed_domains + def set_allowed_domains(self, allowed_domains): + """Set the sequence of allowed domains, or None.""" + if allowed_domains is not None: + allowed_domains = tuple(allowed_domains) + self._allowed_domains = allowed_domains + + def is_not_allowed(self, domain): + if self._allowed_domains is None: + return False + for allowed_domain in self._allowed_domains: + if user_domain_match(domain, allowed_domain): + return False + return True + + def set_ok(self, cookie, request): + """ + If you override .set_ok(), be sure to call this method. If it returns + false, so should your subclass (assuming your subclass wants to be more + strict about which cookies to accept). + + """ + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + assert cookie.name is not None + + for n in "version", "verifiability", "name", "path", "domain", "port": + fn_name = "set_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + + return True + + def set_ok_version(self, cookie, request): + if cookie.version is None: + # Version is always set to 0 by parse_ns_headers if it's a Netscape + # cookie, so this must be an invalid RFC 2965 cookie. + _debug(" Set-Cookie2 without version attribute (%s=%s)", + cookie.name, cookie.value) + return False + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def set_ok_verifiability(self, cookie, request): + if request.is_unverifiable() and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during " + "unverifiable transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during " + "unverifiable transaction") + return False + return True + + def set_ok_name(self, cookie, request): + # Try and stop servers setting V0 cookies designed to hack other + # servers that know both V0 and V1 protocols. + if (cookie.version == 0 and self.strict_ns_set_initial_dollar and + cookie.name.startswith("$")): + _debug(" illegal name (starts with '$'): '%s'", cookie.name) + return False + return True + + def set_ok_path(self, cookie, request): + if cookie.path_specified: + req_path = request_path(request) + if ((cookie.version > 0 or + (cookie.version == 0 and self.strict_ns_set_path)) and + not req_path.startswith(cookie.path)): + _debug(" path attribute %s is not a prefix of request " + "path %s", cookie.path, req_path) + return False + return True + + def set_ok_domain(self, cookie, request): + if self.is_blocked(cookie.domain): + _debug(" domain %s is in user block-list", cookie.domain) + return False + if self.is_not_allowed(cookie.domain): + _debug(" domain %s is not in user allow-list", cookie.domain) + return False + if cookie.domain_specified: + req_host, erhn = eff_request_host(request) + domain = cookie.domain + if self.strict_domain and (domain.count(".") >= 2): + # XXX This should probably be compared with the Konqueror + # (kcookiejar.cpp) and Mozilla implementations, but it's a + # losing battle. + i = domain.rfind(".") + j = domain.rfind(".", 0, i) + if j == 0: # domain like .foo.bar + tld = domain[i+1:] + sld = domain[j+1:i] + if sld.lower() in ("co", "ac", "com", "edu", "org", "net", + "gov", "mil", "int", "aero", "biz", "cat", "coop", + "info", "jobs", "mobi", "museum", "name", "pro", + "travel", "eu") and len(tld) == 2: + # domain like .co.uk + _debug(" country-code second level domain %s", domain) + return False + if domain.startswith("."): + undotted_domain = domain[1:] + else: + undotted_domain = domain + embedded_dots = (undotted_domain.find(".") >= 0) + if not embedded_dots and domain != ".local": + _debug(" non-local domain %s contains no embedded dot", + domain) + return False + if cookie.version == 0: + if (not erhn.endswith(domain) and + (not erhn.startswith(".") and + not ("."+erhn).endswith(domain))): + _debug(" effective request-host %s (even with added " + "initial dot) does not end end with %s", + erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainRFC2965Match)): + if not domain_match(erhn, domain): + _debug(" effective request-host %s does not domain-match " + "%s", erhn, domain) + return False + if (cookie.version > 0 or + (self.strict_ns_domain & self.DomainStrictNoDots)): + host_prefix = req_host[:-len(domain)] + if (host_prefix.find(".") >= 0 and + not IPV4_RE.search(req_host)): + _debug(" host prefix %s for domain %s contains a dot", + host_prefix, domain) + return False + return True + + def set_ok_port(self, cookie, request): + if cookie.port_specified: + req_port = request_port(request) + if req_port is None: + req_port = "80" + else: + req_port = str(req_port) + for p in cookie.port.split(","): + try: + int(p) + except ValueError: + _debug(" bad port %s (not numeric)", p) + return False + if p == req_port: + break + else: + _debug(" request port (%s) not found in %s", + req_port, cookie.port) + return False + return True + + def return_ok(self, cookie, request): + """ + If you override .return_ok(), be sure to call this method. If it + returns false, so should your subclass (assuming your subclass wants to + be more strict about which cookies to return). + + """ + # Path has already been checked by .path_return_ok(), and domain + # blocking done by .domain_return_ok(). + _debug(" - checking cookie %s=%s", cookie.name, cookie.value) + + for n in "version", "verifiability", "secure", "expires", "port", "domain": + fn_name = "return_ok_"+n + fn = getattr(self, fn_name) + if not fn(cookie, request): + return False + return True + + def return_ok_version(self, cookie, request): + if cookie.version > 0 and not self.rfc2965: + _debug(" RFC 2965 cookies are switched off") + return False + elif cookie.version == 0 and not self.netscape: + _debug(" Netscape cookies are switched off") + return False + return True + + def return_ok_verifiability(self, cookie, request): + if request.is_unverifiable() and is_third_party(request): + if cookie.version > 0 and self.strict_rfc2965_unverifiable: + _debug(" third-party RFC 2965 cookie during unverifiable " + "transaction") + return False + elif cookie.version == 0 and self.strict_ns_unverifiable: + _debug(" third-party Netscape cookie during unverifiable " + "transaction") + return False + return True + + def return_ok_secure(self, cookie, request): + if cookie.secure and request.get_type() != "https": + _debug(" secure cookie with non-secure request") + return False + return True + + def return_ok_expires(self, cookie, request): + if cookie.is_expired(self._now): + _debug(" cookie expired") + return False + return True + + def return_ok_port(self, cookie, request): + if cookie.port: + req_port = request_port(request) + if req_port is None: + req_port = "80" + for p in cookie.port.split(","): + if p == req_port: + break + else: + _debug(" request port %s does not match cookie port %s", + req_port, cookie.port) + return False + return True + + def return_ok_domain(self, cookie, request): + req_host, erhn = eff_request_host(request) + domain = cookie.domain + + # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't + if (cookie.version == 0 and + (self.strict_ns_domain & self.DomainStrictNonDomain) and + not cookie.domain_specified and domain != erhn): + _debug(" cookie with unspecified domain does not string-compare " + "equal to request domain") + return False + + if cookie.version > 0 and not domain_match(erhn, domain): + _debug(" effective request-host name %s does not domain-match " + "RFC 2965 cookie domain %s", erhn, domain) + return False + if cookie.version == 0 and not ("."+erhn).endswith(domain): + _debug(" request-host %s does not match Netscape cookie domain " + "%s", req_host, domain) + return False + return True + + def domain_return_ok(self, domain, request): + # Liberal check of. This is here as an optimization to avoid + # having to load lots of MSIE cookie files unless necessary. + req_host, erhn = eff_request_host(request) + if not req_host.startswith("."): + req_host = "."+req_host + if not erhn.startswith("."): + erhn = "."+erhn + if not (req_host.endswith(domain) or erhn.endswith(domain)): + #_debug(" request domain %s does not match cookie domain %s", + # req_host, domain) + return False + + if self.is_blocked(domain): + _debug(" domain %s is in user block-list", domain) + return False + if self.is_not_allowed(domain): + _debug(" domain %s is not in user allow-list", domain) + return False + + return True + + def path_return_ok(self, path, request): + _debug("- checking cookie path=%s", path) + req_path = request_path(request) + if not req_path.startswith(path): + _debug(" %s does not path-match %s", req_path, path) + return False + return True + + +def vals_sorted_by_key(adict): + keys = sorted(adict.keys()) + return map(adict.get, keys) + +def deepvalues(mapping): + """Iterates over nested mapping, depth-first, in sorted order by key.""" + values = vals_sorted_by_key(mapping) + for obj in values: + mapping = False + try: + obj.items + except AttributeError: + pass + else: + mapping = True + for subobj in deepvalues(obj): + yield subobj + if not mapping: + yield obj + + +# Used as second parameter to dict.get() method, to distinguish absent +# dict key from one with a None value. +class Absent: pass + +class CookieJar: + """Collection of HTTP cookies. + + You may not need to know about this class: try + urllib2.build_opener(HTTPCookieProcessor).open(url). + + """ + + non_word_re = re.compile(r"\W") + quote_re = re.compile(r"([\"\\])") + strict_domain_re = re.compile(r"\.?[^.]*") + domain_re = re.compile(r"[^.]*") + dots_re = re.compile(r"^\.+") + + magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" + + def __init__(self, policy=None): + if policy is None: + policy = DefaultCookiePolicy() + self._policy = policy + + self._cookies_lock = _threading.RLock() + self._cookies = {} + + def set_policy(self, policy): + self._policy = policy + + def _cookies_for_domain(self, domain, request): + cookies = [] + if not self._policy.domain_return_ok(domain, request): + return [] + _debug("Checking %s for cookies to return", domain) + cookies_by_path = self._cookies[domain] + for path in cookies_by_path.keys(): + if not self._policy.path_return_ok(path, request): + continue + cookies_by_name = cookies_by_path[path] + for cookie in cookies_by_name.values(): + if not self._policy.return_ok(cookie, request): + _debug(" not returning cookie") + continue + _debug(" it's a match") + cookies.append(cookie) + return cookies + + def _cookies_for_request(self, request): + """Return a list of cookies to be returned to server.""" + cookies = [] + for domain in self._cookies.keys(): + cookies.extend(self._cookies_for_domain(domain, request)) + return cookies + + def _cookie_attrs(self, cookies): + """Return a list of cookie-attributes to be returned to server. + + like ['foo="bar"; $Path="/"', ...] + + The $Version attribute is also added when appropriate (currently only + once per request). + + """ + # add cookies in order of most specific (ie. longest) path first + cookies.sort(key=lambda a: len(a.path), reverse=True) + + version_set = False + + attrs = [] + for cookie in cookies: + # set version of Cookie header + # XXX + # What should it be if multiple matching Set-Cookie headers have + # different versions themselves? + # Answer: there is no answer; was supposed to be settled by + # RFC 2965 errata, but that may never appear... + version = cookie.version + if not version_set: + version_set = True + if version > 0: + attrs.append("$Version=%s" % version) + + # quote cookie value if necessary + # (not for Netscape protocol, which already has any quotes + # intact, due to the poorly-specified Netscape Cookie: syntax) + if ((cookie.value is not None) and + self.non_word_re.search(cookie.value) and version > 0): + value = self.quote_re.sub(r"\\\1", cookie.value) + else: + value = cookie.value + + # add cookie-attributes to be returned in Cookie header + if cookie.value is None: + attrs.append(cookie.name) + else: + attrs.append("%s=%s" % (cookie.name, value)) + if version > 0: + if cookie.path_specified: + attrs.append('$Path="%s"' % cookie.path) + if cookie.domain.startswith("."): + domain = cookie.domain + if (not cookie.domain_initial_dot and + domain.startswith(".")): + domain = domain[1:] + attrs.append('$Domain="%s"' % domain) + if cookie.port is not None: + p = "$Port" + if cookie.port_specified: + p = p + ('="%s"' % cookie.port) + attrs.append(p) + + return attrs + + def add_cookie_header(self, request): + """Add correct Cookie: header to request (urllib2.Request object). + + The Cookie2 header is also added unless policy.hide_cookie2 is true. + + """ + _debug("add_cookie_header") + self._cookies_lock.acquire() + try: + + self._policy._now = self._now = int(time.time()) + + cookies = self._cookies_for_request(request) + + attrs = self._cookie_attrs(cookies) + if attrs: + if not request.has_header("Cookie"): + request.add_unredirected_header( + "Cookie", "; ".join(attrs)) + + # if necessary, advertise that we know RFC 2965 + if (self._policy.rfc2965 and not self._policy.hide_cookie2 and + not request.has_header("Cookie2")): + for cookie in cookies: + if cookie.version != 1: + request.add_unredirected_header("Cookie2", '$Version="1"') + break + + finally: + self._cookies_lock.release() + + self.clear_expired_cookies() + + def _normalized_cookie_tuples(self, attrs_set): + """Return list of tuples containing normalised cookie information. + + attrs_set is the list of lists of key,value pairs extracted from + the Set-Cookie or Set-Cookie2 headers. + + Tuples are name, value, standard, rest, where name and value are the + cookie name and value, standard is a dictionary containing the standard + cookie-attributes (discard, secure, version, expires or max-age, + domain, path and port) and rest is a dictionary containing the rest of + the cookie-attributes. + + """ + cookie_tuples = [] + + boolean_attrs = "discard", "secure" + value_attrs = ("version", + "expires", "max-age", + "domain", "path", "port", + "comment", "commenturl") + + for cookie_attrs in attrs_set: + name, value = cookie_attrs[0] + + # Build dictionary of standard cookie-attributes (standard) and + # dictionary of other cookie-attributes (rest). + + # Note: expiry time is normalised to seconds since epoch. V0 + # cookies should have the Expires cookie-attribute, and V1 cookies + # should have Max-Age, but since V1 includes RFC 2109 cookies (and + # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we + # accept either (but prefer Max-Age). + max_age_set = False + + bad_cookie = False + + standard = {} + rest = {} + for k, v in cookie_attrs[1:]: + lc = k.lower() + # don't lose case distinction for unknown fields + if lc in value_attrs or lc in boolean_attrs: + k = lc + if k in boolean_attrs and v is None: + # boolean cookie-attribute is present, but has no value + # (like "discard", rather than "port=80") + v = True + if k in standard: + # only first value is significant + continue + if k == "domain": + if v is None: + _debug(" missing value for domain attribute") + bad_cookie = True + break + # RFC 2965 section 3.3.3 + v = v.lower() + if k == "expires": + if max_age_set: + # Prefer max-age to expires (like Mozilla) + continue + if v is None: + _debug(" missing or invalid value for expires " + "attribute: treating as session cookie") + continue + if k == "max-age": + max_age_set = True + try: + v = int(v) + except ValueError: + _debug(" missing or invalid (non-numeric) value for " + "max-age attribute") + bad_cookie = True + break + # convert RFC 2965 Max-Age to seconds since epoch + # XXX Strictly you're supposed to follow RFC 2616 + # age-calculation rules. Remember that zero Max-Age is a + # is a request to discard (old and new) cookie, though. + k = "expires" + v = self._now + v + if (k in value_attrs) or (k in boolean_attrs): + if (v is None and + k not in ("port", "comment", "commenturl")): + _debug(" missing value for %s attribute" % k) + bad_cookie = True + break + standard[k] = v + else: + rest[k] = v + + if bad_cookie: + continue + + cookie_tuples.append((name, value, standard, rest)) + + return cookie_tuples + + def _cookie_from_cookie_tuple(self, tup, request): + # standard is dict of standard cookie-attributes, rest is dict of the + # rest of them + name, value, standard, rest = tup + + domain = standard.get("domain", Absent) + path = standard.get("path", Absent) + port = standard.get("port", Absent) + expires = standard.get("expires", Absent) + + # set the easy defaults + version = standard.get("version", None) + if version is not None: version = int(version) + secure = standard.get("secure", False) + # (discard is also set if expires is Absent) + discard = standard.get("discard", False) + comment = standard.get("comment", None) + comment_url = standard.get("commenturl", None) + + # set default path + if path is not Absent and path != "": + path_specified = True + path = escape_path(path) + else: + path_specified = False + path = request_path(request) + i = path.rfind("/") + if i != -1: + if version == 0: + # Netscape spec parts company from reality here + path = path[:i] + else: + path = path[:i+1] + if len(path) == 0: path = "/" + + # set default domain + domain_specified = domain is not Absent + # but first we have to remember whether it starts with a dot + domain_initial_dot = False + if domain_specified: + domain_initial_dot = bool(domain.startswith(".")) + if domain is Absent: + req_host, erhn = eff_request_host(request) + domain = erhn + elif not domain.startswith("."): + domain = "."+domain + + # set default port + port_specified = False + if port is not Absent: + if port is None: + # Port attr present, but has no value: default to request port. + # Cookie should then only be sent back on that port. + port = request_port(request) + else: + port_specified = True + port = re.sub(r"\s+", "", port) + else: + # No port attr present. Cookie can be sent back on any port. + port = None + + # set default expires and discard + if expires is Absent: + expires = None + discard = True + elif expires <= self._now: + # Expiry date in past is request to delete cookie. This can't be + # in DefaultCookiePolicy, because can't delete cookies there. + try: + self.clear(domain, path, name) + except KeyError: + pass + _debug("Expiring cookie, domain='%s', path='%s', name='%s'", + domain, path, name) + return None + + return Cookie(version, + name, value, + port, port_specified, + domain, domain_specified, domain_initial_dot, + path, path_specified, + secure, + expires, + discard, + comment, + comment_url, + rest) + + def _cookies_from_attrs_set(self, attrs_set, request): + cookie_tuples = self._normalized_cookie_tuples(attrs_set) + + cookies = [] + for tup in cookie_tuples: + cookie = self._cookie_from_cookie_tuple(tup, request) + if cookie: cookies.append(cookie) + return cookies + + def _process_rfc2109_cookies(self, cookies): + rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None) + if rfc2109_as_ns is None: + rfc2109_as_ns = not self._policy.rfc2965 + for cookie in cookies: + if cookie.version == 1: + cookie.rfc2109 = True + if rfc2109_as_ns: + # treat 2109 cookies as Netscape cookies rather than + # as RFC2965 cookies + cookie.version = 0 + + def make_cookies(self, response, request): + """Return sequence of Cookie objects extracted from response object.""" + # get cookie-attributes for RFC 2965 and Netscape protocols + headers = response.info() + rfc2965_hdrs = headers.getheaders("Set-Cookie2") + ns_hdrs = headers.getheaders("Set-Cookie") + + rfc2965 = self._policy.rfc2965 + netscape = self._policy.netscape + + if ((not rfc2965_hdrs and not ns_hdrs) or + (not ns_hdrs and not rfc2965) or + (not rfc2965_hdrs and not netscape) or + (not netscape and not rfc2965)): + return [] # no relevant cookie headers: quick exit + + try: + cookies = self._cookies_from_attrs_set( + split_header_words(rfc2965_hdrs), request) + except Exception: + _warn_unhandled_exception() + cookies = [] + + if ns_hdrs and netscape: + try: + # RFC 2109 and Netscape cookies + ns_cookies = self._cookies_from_attrs_set( + parse_ns_headers(ns_hdrs), request) + except Exception: + _warn_unhandled_exception() + ns_cookies = [] + self._process_rfc2109_cookies(ns_cookies) + + # Look for Netscape cookies (from Set-Cookie headers) that match + # corresponding RFC 2965 cookies (from Set-Cookie2 headers). + # For each match, keep the RFC 2965 cookie and ignore the Netscape + # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are + # bundled in with the Netscape cookies for this purpose, which is + # reasonable behaviour. + if rfc2965: + lookup = {} + for cookie in cookies: + lookup[(cookie.domain, cookie.path, cookie.name)] = None + + def no_matching_rfc2965(ns_cookie, lookup=lookup): + key = ns_cookie.domain, ns_cookie.path, ns_cookie.name + return key not in lookup + ns_cookies = filter(no_matching_rfc2965, ns_cookies) + + if ns_cookies: + cookies.extend(ns_cookies) + + return cookies + + def set_cookie_if_ok(self, cookie, request): + """Set a cookie if policy says it's OK to do so.""" + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + if self._policy.set_ok(cookie, request): + self.set_cookie(cookie) + + + finally: + self._cookies_lock.release() + + def set_cookie(self, cookie): + """Set a cookie, without checking whether or not it should be set.""" + c = self._cookies + self._cookies_lock.acquire() + try: + if cookie.domain not in c: c[cookie.domain] = {} + c2 = c[cookie.domain] + if cookie.path not in c2: c2[cookie.path] = {} + c3 = c2[cookie.path] + c3[cookie.name] = cookie + finally: + self._cookies_lock.release() + + def extract_cookies(self, response, request): + """Extract cookies from response, where allowable given the request.""" + _debug("extract_cookies: %s", response.info()) + self._cookies_lock.acquire() + try: + self._policy._now = self._now = int(time.time()) + + for cookie in self.make_cookies(response, request): + if self._policy.set_ok(cookie, request): + _debug(" setting cookie: %s", cookie) + self.set_cookie(cookie) + finally: + self._cookies_lock.release() + + def clear(self, domain=None, path=None, name=None): + """Clear some cookies. + + Invoking this method without arguments will clear all cookies. If + given a single argument, only cookies belonging to that domain will be + removed. If given two arguments, cookies belonging to the specified + path within that domain are removed. If given three arguments, then + the cookie with the specified name, path and domain is removed. + + Raises KeyError if no matching cookie exists. + + """ + if name is not None: + if (domain is None) or (path is None): + raise ValueError( + "domain and path must be given to remove a cookie by name") + del self._cookies[domain][path][name] + elif path is not None: + if domain is None: + raise ValueError( + "domain must be given to remove cookies by path") + del self._cookies[domain][path] + elif domain is not None: + del self._cookies[domain] + else: + self._cookies = {} + + def clear_session_cookies(self): + """Discard all session cookies. + + Note that the .save() method won't save session cookies anyway, unless + you ask otherwise by passing a true ignore_discard argument. + + """ + self._cookies_lock.acquire() + try: + for cookie in self: + if cookie.discard: + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def clear_expired_cookies(self): + """Discard all expired cookies. + + You probably don't need to call this method: expired cookies are never + sent back to the server (provided you're using DefaultCookiePolicy), + this method is called by CookieJar itself every so often, and the + .save() method won't save expired cookies anyway (unless you ask + otherwise by passing a true ignore_expires argument). + + """ + self._cookies_lock.acquire() + try: + now = time.time() + for cookie in self: + if cookie.is_expired(now): + self.clear(cookie.domain, cookie.path, cookie.name) + finally: + self._cookies_lock.release() + + def __iter__(self): + return deepvalues(self._cookies) + + def __len__(self): + """Return number of contained cookies.""" + i = 0 + for cookie in self: i = i + 1 + return i + + def __repr__(self): + r = [] + for cookie in self: r.append(repr(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + def __str__(self): + r = [] + for cookie in self: r.append(str(cookie)) + return "<%s[%s]>" % (self.__class__, ", ".join(r)) + + +# derives from IOError for backwards-compatibility with Python 2.4.0 +class LoadError(IOError): pass + +class FileCookieJar(CookieJar): + """CookieJar that can be loaded from and saved to a file.""" + + def __init__(self, filename=None, delayload=False, policy=None): + """ + Cookies are NOT loaded from the named file until either the .load() or + .revert() method is called. + + """ + CookieJar.__init__(self, policy) + if filename is not None: + try: + filename+"" + except: + raise ValueError("filename must be string-like") + self.filename = filename + self.delayload = bool(delayload) + + def save(self, filename=None, ignore_discard=False, ignore_expires=False): + """Save cookies to a file.""" + raise NotImplementedError() + + def load(self, filename=None, ignore_discard=False, ignore_expires=False): + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + f = open(filename) + try: + self._really_load(f, filename, ignore_discard, ignore_expires) + finally: + f.close() + + def revert(self, filename=None, + ignore_discard=False, ignore_expires=False): + """Clear all cookies and reload cookies from a saved file. + + Raises LoadError (or IOError) if reversion is not successful; the + object's state will not be altered if this happens. + + """ + if filename is None: + if self.filename is not None: filename = self.filename + else: raise ValueError(MISSING_FILENAME_TEXT) + + self._cookies_lock.acquire() + try: + + old_state = copy.deepcopy(self._cookies) + self._cookies = {} + try: + self.load(filename, ignore_discard, ignore_expires) + except (LoadError, IOError): + self._cookies = old_state + raise + + finally: + self._cookies_lock.release() + +from _LWPCookieJar import LWPCookieJar, lwp_cookie_str +from _MozillaCookieJar import MozillaCookieJar diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py new file mode 100644 index 0000000..5078d33 --- /dev/null +++ b/Lib/http/cookies.py @@ -0,0 +1,733 @@ +#!/usr/bin/env python +# + +#### +# Copyright 2000 by Timothy O'Malley +# +# All Rights Reserved +# +# Permission to use, copy, modify, and distribute this software +# and its documentation for any purpose and without fee is hereby +# granted, provided that the above copyright notice appear in all +# copies and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Timothy O'Malley not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS +# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY +# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR +# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +# PERFORMANCE OF THIS SOFTWARE. +# +#### +# +# Id: Cookie.py,v 2.29 2000/08/23 05:28:49 timo Exp +# by Timothy O'Malley +# +# Cookie.py is a Python module for the handling of HTTP +# cookies as a Python dictionary. See RFC 2109 for more +# information on cookies. +# +# The original idea to treat Cookies as a dictionary came from +# Dave Mitchell (davem@magnet.com) in 1995, when he released the +# first version of nscookie.py. +# +#### + +r""" +Here's a sample session to show how to use this module. +At the moment, this is the only documentation. + +The Basics +---------- + +Importing is easy.. + + >>> from http import cookies + +Most of the time you start by creating a cookie. Cookies come in +three flavors, each with slightly different encoding semantics, but +more on that later. + + >>> C = cookies.SimpleCookie() + >>> C = cookies.SerialCookie() + >>> C = cookies.SmartCookie() + +[Note: Long-time users of cookies.py will remember using +cookies.Cookie() to create an Cookie object. Although deprecated, it +is still supported by the code. See the Backward Compatibility notes +for more information.] + +Once you've created your Cookie, you can add values just as if it were +a dictionary. + + >>> C = cookies.SmartCookie() + >>> C["fig"] = "newton" + >>> C["sugar"] = "wafer" + >>> C.output() + 'Set-Cookie: fig=newton\r\nSet-Cookie: sugar=wafer' + +Notice that the printable representation of a Cookie is the +appropriate format for a Set-Cookie: header. This is the +default behavior. You can change the header and printed +attributes by using the .output() function + + >>> C = cookies.SmartCookie() + >>> C["rocky"] = "road" + >>> C["rocky"]["path"] = "/cookie" + >>> print(C.output(header="Cookie:")) + Cookie: rocky=road; Path=/cookie + >>> print(C.output(attrs=[], header="Cookie:")) + Cookie: rocky=road + +The load() method of a Cookie extracts cookies from a string. In a +CGI script, you would use this method to extract the cookies from the +HTTP_COOKIE environment variable. + + >>> C = cookies.SmartCookie() + >>> C.load("chips=ahoy; vienna=finger") + >>> C.output() + 'Set-Cookie: chips=ahoy\r\nSet-Cookie: vienna=finger' + +The load() method is darn-tootin smart about identifying cookies +within a string. Escaped quotation marks, nested semicolons, and other +such trickeries do not confuse it. + + >>> C = cookies.SmartCookie() + >>> C.load('keebler="E=everybody; L=\\"Loves\\"; fudge=\\012;";') + >>> print(C) + Set-Cookie: keebler="E=everybody; L=\"Loves\"; fudge=\012;" + +Each element of the Cookie also supports all of the RFC 2109 +Cookie attributes. Here's an example which sets the Path +attribute. + + >>> C = cookies.SmartCookie() + >>> C["oreo"] = "doublestuff" + >>> C["oreo"]["path"] = "/" + >>> print(C) + Set-Cookie: oreo=doublestuff; Path=/ + +Each dictionary element has a 'value' attribute, which gives you +back the value associated with the key. + + >>> C = cookies.SmartCookie() + >>> C["twix"] = "none for you" + >>> C["twix"].value + 'none for you' + + +A Bit More Advanced +------------------- + +As mentioned before, there are three different flavors of Cookie +objects, each with different encoding/decoding semantics. This +section briefly discusses the differences. + +SimpleCookie + +The SimpleCookie expects that all values should be standard strings. +Just to be sure, SimpleCookie invokes the str() builtin to convert +the value to a string, when the values are set dictionary-style. + + >>> C = cookies.SimpleCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + '7' + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' + + +SerialCookie + +The SerialCookie expects that all values should be serialized using +pickle. As a result of serializing, SerialCookie can save almost any +Python object to a value, and recover the exact same object when the +cookie has been returned. (SerialCookie can yield some +strange-looking cookie values, however.) + + >>> C = cookies.SerialCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number="L7\\012."\r\nSet-Cookie: string="Vseven\\012p0\\012."' + +Be warned, however, if SerialCookie cannot de-serialize a value (because +it isn't a valid pickle'd object), IT WILL RAISE AN EXCEPTION. + + +SmartCookie + +The SmartCookie combines aspects of each of the other two flavors. +When setting a value in a dictionary-fashion, the SmartCookie will +serialize (ala pickle) the value *if and only if* it isn't a +Python string. String objects are *not* serialized. Similarly, +when the load() method parses out values, it attempts to de-serialize +the value. If it fails, then it fallsback to treating the value +as a string. + + >>> C = cookies.SmartCookie() + >>> C["number"] = 7 + >>> C["string"] = "seven" + >>> C["number"].value + 7 + >>> C["string"].value + 'seven' + >>> C.output() + 'Set-Cookie: number="L7\\012."\r\nSet-Cookie: string=seven' + + +Backwards Compatibility +----------------------- + +In order to keep compatibilty with earlier versions of Cookie.py, +it is still possible to use cookies.Cookie() to create a Cookie. In +fact, this simply returns a SmartCookie. + + >>> C = cookies.Cookie() + >>> print(C.__class__.__name__) + SmartCookie + + +Finis. +""" #" +# ^ +# |----helps out font-lock + +# +# Import our required modules +# +import string + +from pickle import dumps, loads + +import re, warnings + +__all__ = ["CookieError","BaseCookie","SimpleCookie","SerialCookie", + "SmartCookie","Cookie"] + +_nulljoin = ''.join +_semispacejoin = '; '.join +_spacejoin = ' '.join + +# +# Define an exception visible to External modules +# +class CookieError(Exception): + pass + + +# These quoting routines conform to the RFC2109 specification, which in +# turn references the character definitions from RFC2068. They provide +# a two-way quoting algorithm. Any non-text character is translated +# into a 4 character sequence: a forward-slash followed by the +# three-digit octal equivalent of the character. Any '\' or '"' is +# quoted with a preceeding '\' slash. +# +# These are taken from RFC2068 and RFC2109. +# _LegalChars is the list of chars which don't require "'s +# _Translator hash-table for fast quoting +# +_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~" +_Translator = { + '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', + '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', + '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', + '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', + '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', + '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', + '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', + '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', + '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', + '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', + '\036' : '\\036', '\037' : '\\037', + + '"' : '\\"', '\\' : '\\\\', + + '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', + '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', + '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', + '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', + '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', + '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', + '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', + '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', + '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', + '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', + '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', + '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', + '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', + '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', + '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', + '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', + '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', + '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', + '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', + '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', + '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', + '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', + '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', + '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', + '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', + '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', + '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', + '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', + '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', + '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', + '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', + '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', + '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', + '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', + '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', + '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', + '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', + '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', + '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', + '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', + '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', + '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', + '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' + } + +def _quote(str, LegalChars=_LegalChars): + # + # If the string does not need to be double-quoted, + # then just return the string. Otherwise, surround + # the string in doublequotes and precede quote (with a \) + # special characters. + # + if all(c in LegalChars for c in str): + return str + else: + return '"' + _nulljoin( map(_Translator.get, str, str) ) + '"' +# end _quote + + +_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") +_QuotePatt = re.compile(r"[\\].") + +def _unquote(str): + # If there aren't any doublequotes, + # then there can't be any special characters. See RFC 2109. + if len(str) < 2: + return str + if str[0] != '"' or str[-1] != '"': + return str + + # We have to assume that we must decode this string. + # Down to work. + + # Remove the "s + str = str[1:-1] + + # Check for special sequences. Examples: + # \012 --> \n + # \" --> " + # + i = 0 + n = len(str) + res = [] + while 0 <= i < n: + Omatch = _OctalPatt.search(str, i) + Qmatch = _QuotePatt.search(str, i) + if not Omatch and not Qmatch: # Neither matched + res.append(str[i:]) + break + # else: + j = k = -1 + if Omatch: j = Omatch.start(0) + if Qmatch: k = Qmatch.start(0) + if Qmatch and ( not Omatch or k < j ): # QuotePatt matched + res.append(str[i:k]) + res.append(str[k+1]) + i = k+2 + else: # OctalPatt matched + res.append(str[i:j]) + res.append( chr( int(str[j+1:j+4], 8) ) ) + i = j+4 + return _nulljoin(res) +# end _unquote + +# The _getdate() routine is used to set the expiration time in +# the cookie's HTTP header. By default, _getdate() returns the +# current time in the appropriate "expires" format for a +# Set-Cookie header. The one optional argument is an offset from +# now, in seconds. For example, an offset of -3600 means "one hour ago". +# The offset may be a floating point number. +# + +_weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + +_monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + +def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): + from time import gmtime, time + now = time() + year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) + return "%s, %02d-%3s-%4d %02d:%02d:%02d GMT" % \ + (weekdayname[wd], day, monthname[month], year, hh, mm, ss) + + +# +# A class to hold ONE key,value pair. +# In a cookie, each such pair may have several attributes. +# so this class is used to keep the attributes associated +# with the appropriate key,value pair. +# This class also includes a coded_value attribute, which +# is used to hold the network representation of the +# value. This is most useful when Python objects are +# pickled for network transit. +# + +class Morsel(dict): + # RFC 2109 lists these attributes as reserved: + # path comment domain + # max-age secure version + # + # For historical reasons, these attributes are also reserved: + # expires + # + # This dictionary provides a mapping from the lowercase + # variant on the left to the appropriate traditional + # formatting on the right. + _reserved = { "expires" : "expires", + "path" : "Path", + "comment" : "Comment", + "domain" : "Domain", + "max-age" : "Max-Age", + "secure" : "secure", + "version" : "Version", + } + + def __init__(self): + # Set defaults + self.key = self.value = self.coded_value = None + + # Set default attributes + for K in self._reserved: + dict.__setitem__(self, K, "") + # end __init__ + + def __setitem__(self, K, V): + K = K.lower() + if not K in self._reserved: + raise CookieError("Invalid Attribute %s" % K) + dict.__setitem__(self, K, V) + # end __setitem__ + + def isReservedKey(self, K): + return K.lower() in self._reserved + # end isReservedKey + + def set(self, key, val, coded_val, LegalChars=_LegalChars): + # First we verify that the key isn't a reserved word + # Second we make sure it only contains legal characters + if key.lower() in self._reserved: + raise CookieError("Attempt to set a reserved key: %s" % key) + if any(c not in LegalChars for c in key): + raise CookieError("Illegal key value: %s" % key) + + # It's a good key, so save it. + self.key = key + self.value = val + self.coded_value = coded_val + # end set + + def output(self, attrs=None, header = "Set-Cookie:"): + return "%s %s" % ( header, self.OutputString(attrs) ) + + __str__ = output + + def __repr__(self): + return '<%s: %s=%s>' % (self.__class__.__name__, + self.key, repr(self.value) ) + + def js_output(self, attrs=None): + # Print javascript + return """ + + """ % ( self.OutputString(attrs), ) + # end js_output() + + def OutputString(self, attrs=None): + # Build up our result + # + result = [] + RA = result.append + + # First, the key=value pair + RA("%s=%s" % (self.key, self.coded_value)) + + # Now add any defined attributes + if attrs is None: + attrs = self._reserved + items = sorted(self.items()) + for K,V in items: + if V == "": continue + if K not in attrs: continue + if K == "expires" and type(V) == type(1): + RA("%s=%s" % (self._reserved[K], _getdate(V))) + elif K == "max-age" and type(V) == type(1): + RA("%s=%d" % (self._reserved[K], V)) + elif K == "secure": + RA(str(self._reserved[K])) + else: + RA("%s=%s" % (self._reserved[K], V)) + + # Return the result + return _semispacejoin(result) + # end OutputString +# end Morsel class + + + +# +# Pattern for finding cookie +# +# This used to be strict parsing based on the RFC2109 and RFC2068 +# specifications. I have since discovered that MSIE 3.0x doesn't +# follow the character rules outlined in those specs. As a +# result, the parsing rules here are less strict. +# + +_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" +_CookiePattern = re.compile( + r"(?x)" # This is a Verbose pattern + r"(?P" # Start of group 'key' + ""+ _LegalCharsPatt +"+?" # Any word of at least one letter, nongreedy + r")" # End of group 'key' + r"\s*=\s*" # Equal Sign + r"(?P" # Start of group 'val' + r'"(?:[^\\"]|\\.)*"' # Any doublequoted string + r"|" # or + ""+ _LegalCharsPatt +"*" # Any word or empty string + r")" # End of group 'val' + r"\s*;?" # Probably ending in a semi-colon + ) + + +# At long last, here is the cookie class. +# Using this class is almost just like using a dictionary. +# See this module's docstring for example usage. +# +class BaseCookie(dict): + # A container class for a set of Morsels + # + + def value_decode(self, val): + """real_value, coded_value = value_decode(STRING) + Called prior to setting a cookie's value from the network + representation. The VALUE is the value read from HTTP + header. + Override this function to modify the behavior of cookies. + """ + return val, val + # end value_encode + + def value_encode(self, val): + """real_value, coded_value = value_encode(VALUE) + Called prior to setting a cookie's value from the dictionary + representation. The VALUE is the value being assigned. + Override this function to modify the behavior of cookies. + """ + strval = str(val) + return strval, strval + # end value_encode + + def __init__(self, input=None): + if input: self.load(input) + # end __init__ + + def __set(self, key, real_value, coded_value): + """Private method for setting a cookie's value""" + M = self.get(key, Morsel()) + M.set(key, real_value, coded_value) + dict.__setitem__(self, key, M) + # end __set + + def __setitem__(self, key, value): + """Dictionary style assignment.""" + rval, cval = self.value_encode(value) + self.__set(key, rval, cval) + # end __setitem__ + + def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): + """Return a string suitable for HTTP.""" + result = [] + items = sorted(self.items()) + for K,V in items: + result.append( V.output(attrs, header) ) + return sep.join(result) + # end output + + __str__ = output + + def __repr__(self): + L = [] + items = sorted(self.items()) + for K,V in items: + L.append( '%s=%s' % (K,repr(V.value) ) ) + return '<%s: %s>' % (self.__class__.__name__, _spacejoin(L)) + + def js_output(self, attrs=None): + """Return a string suitable for JavaScript.""" + result = [] + items = sorted(self.items()) + for K,V in items: + result.append( V.js_output(attrs) ) + return _nulljoin(result) + # end js_output + + def load(self, rawdata): + """Load cookies from a string (presumably HTTP_COOKIE) or + from a dictionary. Loading cookies from a dictionary 'd' + is equivalent to calling: + map(Cookie.__setitem__, d.keys(), d.values()) + """ + if type(rawdata) == type(""): + self.__ParseString(rawdata) + else: + self.update(rawdata) + return + # end load() + + def __ParseString(self, str, patt=_CookiePattern): + i = 0 # Our starting point + n = len(str) # Length of string + M = None # current morsel + + while 0 <= i < n: + # Start looking for a cookie + match = patt.search(str, i) + if not match: break # No more cookies + + K,V = match.group("key"), match.group("val") + i = match.end(0) + + # Parse the key, value in case it's metainfo + if K[0] == "$": + # We ignore attributes which pertain to the cookie + # mechanism as a whole. See RFC 2109. + # (Does anyone care?) + if M: + M[ K[1:] ] = V + elif K.lower() in Morsel._reserved: + if M: + M[ K ] = _unquote(V) + else: + rval, cval = self.value_decode(V) + self.__set(K, rval, cval) + M = self[K] + # end __ParseString +# end BaseCookie class + +class SimpleCookie(BaseCookie): + """SimpleCookie + SimpleCookie supports strings as cookie values. When setting + the value using the dictionary assignment notation, SimpleCookie + calls the builtin str() to convert the value to a string. Values + received from HTTP are kept as strings. + """ + def value_decode(self, val): + return _unquote( val ), val + def value_encode(self, val): + strval = str(val) + return strval, _quote( strval ) +# end SimpleCookie + +class SerialCookie(BaseCookie): + """SerialCookie + SerialCookie supports arbitrary objects as cookie values. All + values are serialized (using pickle) before being sent to the + client. All incoming values are assumed to be valid Pickle + representations. IF AN INCOMING VALUE IS NOT IN A VALID PICKLE + FORMAT, THEN AN EXCEPTION WILL BE RAISED. + + Note: Large cookie values add overhead because they must be + retransmitted on every HTTP transaction. + + Note: HTTP has a 2k limit on the size of a cookie. This class + does not check for this limit, so be careful!!! + """ + def __init__(self, input=None): + warnings.warn("SerialCookie class is insecure; do not use it", + DeprecationWarning) + BaseCookie.__init__(self, input) + # end __init__ + def value_decode(self, val): + # This could raise an exception! + return loads( _unquote(val).encode('latin-1') ), val + def value_encode(self, val): + return val, _quote( dumps(val, 0).decode('latin-1') ) +# end SerialCookie + +class SmartCookie(BaseCookie): + """SmartCookie + SmartCookie supports arbitrary objects as cookie values. If the + object is a string, then it is quoted. If the object is not a + string, however, then SmartCookie will use pickle to serialize + the object into a string representation. + + Note: Large cookie values add overhead because they must be + retransmitted on every HTTP transaction. + + Note: HTTP has a 2k limit on the size of a cookie. This class + does not check for this limit, so be careful!!! + """ + def __init__(self, input=None): + warnings.warn("Cookie/SmartCookie class is insecure; do not use it", + DeprecationWarning) + BaseCookie.__init__(self, input) + # end __init__ + def value_decode(self, val): + strval = _unquote(val) + try: + return loads(strval.encode('latin-1')), val + except: + return strval, val + def value_encode(self, val): + if isinstance(val, str): + return val, _quote(val) + else: + return val, _quote( dumps(val, 0).decode('latin-1') ) +# end SmartCookie + + +########################################################### +# Backwards Compatibility: Don't break any existing code! + +# We provide Cookie() as an alias for SmartCookie() +Cookie = SmartCookie + +# +########################################################### + +def _test(): + import doctest, http.cookies + return doctest.testmod(http.cookies) + +if __name__ == "__main__": + _test() + + +#Local Variables: +#tab-width: 4 +#end: diff --git a/Lib/http/server.py b/Lib/http/server.py new file mode 100644 index 0000000..4f41a19 --- /dev/null +++ b/Lib/http/server.py @@ -0,0 +1,1141 @@ +"""HTTP server classes. + +Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see +SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST, +and CGIHTTPRequestHandler for CGI scripts. + +It does, however, optionally implement HTTP/1.1 persistent connections, +as of version 0.3. + +Notes on CGIHTTPRequestHandler +------------------------------ + +This class implements GET and POST requests to cgi-bin scripts. + +If the os.fork() function is not present (e.g. on Windows), +os.popen2() is used as a fallback, with slightly altered semantics; if +that function is not present either (e.g. on Macintosh), only Python +scripts are supported, and they are executed by the current process. + +In all cases, the implementation is intentionally naive -- all +requests are executed synchronously. + +SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL +-- it may execute arbitrary Python code or external programs. + +Note that status code 200 is sent prior to execution of a CGI script, so +scripts cannot send other status codes such as 302 (redirect). + +XXX To do: + +- log requests even later (to capture byte count) +- log user-agent header and other interesting goodies +- send error log to separate file +""" + + +# See also: +# +# HTTP Working Group T. Berners-Lee +# INTERNET-DRAFT R. T. Fielding +# H. Frystyk Nielsen +# Expires September 8, 1995 March 8, 1995 +# +# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt +# +# and +# +# Network Working Group R. Fielding +# Request for Comments: 2616 et al +# Obsoletes: 2068 June 1999 +# Category: Standards Track +# +# URL: http://www.faqs.org/rfcs/rfc2616.html + +# Log files +# --------- +# +# Here's a quote from the NCSA httpd docs about log file format. +# +# | The logfile format is as follows. Each line consists of: +# | +# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb +# | +# | host: Either the DNS name or the IP number of the remote client +# | rfc931: Any information returned by identd for this person, +# | - otherwise. +# | authuser: If user sent a userid for authentication, the user name, +# | - otherwise. +# | DD: Day +# | Mon: Month (calendar name) +# | YYYY: Year +# | hh: hour (24-hour format, the machine's timezone) +# | mm: minutes +# | ss: seconds +# | request: The first line of the HTTP request as sent by the client. +# | ddd: the status code returned by the server, - if not available. +# | bbbb: the total number of bytes sent, +# | *not including the HTTP/1.0 header*, - if not available +# | +# | You can determine the name of the file accessed through request. +# +# (Actually, the latter is only true if you know the server configuration +# at the time the request was made!) + +__version__ = "0.6" + +__all__ = ["HTTPServer", "BaseHTTPRequestHandler"] + +import io +import os +import sys +import cgi +import time +import socket # For gethostbyaddr() +import shutil +import urllib +import select +import mimetools +import mimetypes +import posixpath +import socketserver + +# Default error message template +DEFAULT_ERROR_MESSAGE = """\ + +Error response + + +

Error response

+

Error code %(code)d. +

Message: %(message)s. +

Error code explanation: %(code)s = %(explain)s. + +""" + +DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8" + +def _quote_html(html): + return html.replace("&", "&").replace("<", "<").replace(">", ">") + +class HTTPServer(socketserver.TCPServer): + + allow_reuse_address = 1 # Seems to make sense in testing environment + + def server_bind(self): + """Override server_bind to store the server name.""" + socketserver.TCPServer.server_bind(self) + host, port = self.socket.getsockname()[:2] + self.server_name = socket.getfqdn(host) + self.server_port = port + + +class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): + + """HTTP request handler base class. + + The following explanation of HTTP serves to guide you through the + code as well as to expose any misunderstandings I may have about + HTTP (so you don't need to read the code to figure out I'm wrong + :-). + + HTTP (HyperText Transfer Protocol) is an extensible protocol on + top of a reliable stream transport (e.g. TCP/IP). The protocol + recognizes three parts to a request: + + 1. One line identifying the request type and path + 2. An optional set of RFC-822-style headers + 3. An optional data part + + The headers and data are separated by a blank line. + + The first line of the request has the form + + + + where is a (case-sensitive) keyword such as GET or POST, + is a string containing path information for the request, + and should be the string "HTTP/1.0" or "HTTP/1.1". + is encoded using the URL encoding scheme (using %xx to signify + the ASCII character with hex code xx). + + The specification specifies that lines are separated by CRLF but + for compatibility with the widest range of clients recommends + servers also handle LF. Similarly, whitespace in the request line + is treated sensibly (allowing multiple spaces between components + and allowing trailing whitespace). + + Similarly, for output, lines ought to be separated by CRLF pairs + but most clients grok LF characters just fine. + + If the first line of the request has the form + + + + (i.e. is left out) then this is assumed to be an HTTP + 0.9 request; this form has no optional headers and data part and + the reply consists of just the data. + + The reply form of the HTTP 1.x protocol again has three parts: + + 1. One line giving the response code + 2. An optional set of RFC-822-style headers + 3. The data + + Again, the headers and data are separated by a blank line. + + The response code line has the form + + + + where is the protocol version ("HTTP/1.0" or "HTTP/1.1"), + is a 3-digit response code indicating success or + failure of the request, and is an optional + human-readable string explaining what the response code means. + + This server parses the request and the headers, and then calls a + function specific to the request type (). Specifically, + a request SPAM will be handled by a method do_SPAM(). If no + such method exists the server sends an error response to the + client. If it exists, it is called with no arguments: + + do_SPAM() + + Note that the request name is case sensitive (i.e. SPAM and spam + are different requests). + + The various request details are stored in instance variables: + + - client_address is the client IP address in the form (host, + port); + + - command, path and version are the broken-down request line; + + - headers is an instance of mimetools.Message (or a derived + class) containing the header information; + + - rfile is a file object open for reading positioned at the + start of the optional input data part; + + - wfile is a file object open for writing. + + IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING! + + The first thing to be written must be the response line. Then + follow 0 or more header lines, then a blank line, and then the + actual data (if any). The meaning of the header lines depends on + the command executed by the server; in most cases, when data is + returned, there should be at least one header line of the form + + Content-type: / + + where and should be registered MIME types, + e.g. "text/html" or "text/plain". + + """ + + # The Python system version, truncated to its first component. + sys_version = "Python/" + sys.version.split()[0] + + # The server software version. You may want to override this. + # The format is multiple whitespace-separated strings, + # where each string is of the form name[/version]. + server_version = "BaseHTTP/" + __version__ + + error_message_format = DEFAULT_ERROR_MESSAGE + error_content_type = DEFAULT_ERROR_CONTENT_TYPE + + # The default request version. This only affects responses up until + # the point where the request line is parsed, so it mainly decides what + # the client gets back when sending a malformed request line. + # Most web servers default to HTTP 0.9, i.e. don't send a status line. + default_request_version = "HTTP/0.9" + + def parse_request(self): + """Parse a request (internal). + + The request should be stored in self.raw_requestline; the results + are in self.command, self.path, self.request_version and + self.headers. + + Return True for success, False for failure; on failure, an + error is sent back. + + """ + self.command = None # set in case of error on the first line + self.request_version = version = self.default_request_version + self.close_connection = 1 + requestline = str(self.raw_requestline, 'iso-8859-1') + if requestline[-2:] == '\r\n': + requestline = requestline[:-2] + elif requestline[-1:] == '\n': + requestline = requestline[:-1] + self.requestline = requestline + words = requestline.split() + if len(words) == 3: + [command, path, version] = words + if version[:5] != 'HTTP/': + self.send_error(400, "Bad request version (%r)" % version) + return False + try: + base_version_number = version.split('/', 1)[1] + version_number = base_version_number.split(".") + # RFC 2145 section 3.1 says there can be only one "." and + # - major and minor numbers MUST be treated as + # separate integers; + # - HTTP/2.4 is a lower version than HTTP/2.13, which in + # turn is lower than HTTP/12.3; + # - Leading zeros MUST be ignored by recipients. + if len(version_number) != 2: + raise ValueError + version_number = int(version_number[0]), int(version_number[1]) + except (ValueError, IndexError): + self.send_error(400, "Bad request version (%r)" % version) + return False + if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1": + self.close_connection = 0 + if version_number >= (2, 0): + self.send_error(505, + "Invalid HTTP Version (%s)" % base_version_number) + return False + elif len(words) == 2: + [command, path] = words + self.close_connection = 1 + if command != 'GET': + self.send_error(400, + "Bad HTTP/0.9 request type (%r)" % command) + return False + elif not words: + return False + else: + self.send_error(400, "Bad request syntax (%r)" % requestline) + return False + self.command, self.path, self.request_version = command, path, version + + # Examine the headers and look for a Connection directive. + + # MessageClass (rfc822) wants to see strings rather than bytes. + # But a TextIOWrapper around self.rfile would buffer too many bytes + # from the stream, bytes which we later need to read as bytes. + # So we read the correct bytes here, as bytes, then use StringIO + # to make them look like strings for MessageClass to parse. + headers = [] + while True: + line = self.rfile.readline() + headers.append(line) + if line in (b'\r\n', b'\n', b''): + break + hfile = io.StringIO(b''.join(headers).decode('iso-8859-1')) + self.headers = self.MessageClass(hfile) + + conntype = self.headers.get('Connection', "") + if conntype.lower() == 'close': + self.close_connection = 1 + elif (conntype.lower() == 'keep-alive' and + self.protocol_version >= "HTTP/1.1"): + self.close_connection = 0 + return True + + def handle_one_request(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + self.raw_requestline = self.rfile.readline() + if not self.raw_requestline: + self.close_connection = 1 + return + if not self.parse_request(): # An error code has been sent, just exit + return + mname = 'do_' + self.command + if not hasattr(self, mname): + self.send_error(501, "Unsupported method (%r)" % self.command) + return + method = getattr(self, mname) + method() + + def handle(self): + """Handle multiple requests if necessary.""" + self.close_connection = 1 + + self.handle_one_request() + while not self.close_connection: + self.handle_one_request() + + def send_error(self, code, message=None): + """Send and log an error reply. + + Arguments are the error code, and a detailed message. + The detailed message defaults to the short entry matching the + response code. + + This sends an error response (so it must be called before any + output has been generated), logs the error, and finally sends + a piece of HTML explaining the error to the user. + + """ + + try: + shortmsg, longmsg = self.responses[code] + except KeyError: + shortmsg, longmsg = '???', '???' + if message is None: + message = shortmsg + explain = longmsg + self.log_error("code %d, message %s", code, message) + # using _quote_html to prevent Cross Site Scripting attacks (see bug #1100201) + content = (self.error_message_format % + {'code': code, 'message': _quote_html(message), 'explain': explain}) + self.send_response(code, message) + self.send_header("Content-Type", self.error_content_type) + self.send_header('Connection', 'close') + self.end_headers() + if self.command != 'HEAD' and code >= 200 and code not in (204, 304): + self.wfile.write(content.encode('UTF-8', 'replace')) + + def send_response(self, code, message=None): + """Send the response header and log the response code. + + Also send two standard headers with the server software + version and the current date. + + """ + self.log_request(code) + if message is None: + if code in self.responses: + message = self.responses[code][0] + else: + message = '' + if self.request_version != 'HTTP/0.9': + self.wfile.write(("%s %d %s\r\n" % + (self.protocol_version, code, message)).encode('ASCII', 'strict')) + # print (self.protocol_version, code, message) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_header(self, keyword, value): + """Send a MIME header.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict')) + + if keyword.lower() == 'connection': + if value.lower() == 'close': + self.close_connection = 1 + elif value.lower() == 'keep-alive': + self.close_connection = 0 + + def end_headers(self): + """Send the blank line ending the MIME headers.""" + if self.request_version != 'HTTP/0.9': + self.wfile.write(b"\r\n") + + def log_request(self, code='-', size='-'): + """Log an accepted request. + + This is called by send_response(). + + """ + + self.log_message('"%s" %s %s', + self.requestline, str(code), str(size)) + + def log_error(self, format, *args): + """Log an error. + + This is called when a request cannot be fulfilled. By + default it passes the message on to log_message(). + + Arguments are the same as for log_message(). + + XXX This should go to the separate error log. + + """ + + self.log_message(format, *args) + + def log_message(self, format, *args): + """Log an arbitrary message. + + This is used by all other logging functions. Override + it if you have specific logging wishes. + + The first argument, FORMAT, is a format string for the + message to be logged. If the format string contains + any % escapes requiring parameters, they should be + specified as subsequent arguments (it's just like + printf!). + + The client host and current date/time are prefixed to + every message. + + """ + + sys.stderr.write("%s - - [%s] %s\n" % + (self.address_string(), + self.log_date_time_string(), + format%args)) + + def version_string(self): + """Return the server software version string.""" + return self.server_version + ' ' + self.sys_version + + def date_time_string(self, timestamp=None): + """Return the current date and time formatted for a message header.""" + if timestamp is None: + timestamp = time.time() + year, month, day, hh, mm, ss, wd, y, z = time.gmtime(timestamp) + s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( + self.weekdayname[wd], + day, self.monthname[month], year, + hh, mm, ss) + return s + + def log_date_time_string(self): + """Return the current time formatted for logging.""" + now = time.time() + year, month, day, hh, mm, ss, x, y, z = time.localtime(now) + s = "%02d/%3s/%04d %02d:%02d:%02d" % ( + day, self.monthname[month], year, hh, mm, ss) + return s + + weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] + + monthname = [None, + 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', + 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] + + def address_string(self): + """Return the client address formatted for logging. + + This version looks up the full hostname using gethostbyaddr(), + and tries to find a name that contains at least one dot. + + """ + + host, port = self.client_address[:2] + return socket.getfqdn(host) + + # Essentially static class variables + + # The version of the HTTP protocol we support. + # Set this to HTTP/1.1 to enable automatic keepalive + protocol_version = "HTTP/1.0" + + # The Message-like class used to parse headers + MessageClass = mimetools.Message + + # Table mapping response codes to messages; entries have the + # form {code: (shortmessage, longmessage)}. + # See RFC 2616. + responses = { + 100: ('Continue', 'Request received, please continue'), + 101: ('Switching Protocols', + 'Switching to new protocol; obey Upgrade header'), + + 200: ('OK', 'Request fulfilled, document follows'), + 201: ('Created', 'Document created, URL follows'), + 202: ('Accepted', + 'Request accepted, processing continues off-line'), + 203: ('Non-Authoritative Information', 'Request fulfilled from cache'), + 204: ('No Content', 'Request fulfilled, nothing follows'), + 205: ('Reset Content', 'Clear input form for further input.'), + 206: ('Partial Content', 'Partial content follows.'), + + 300: ('Multiple Choices', + 'Object has several resources -- see URI list'), + 301: ('Moved Permanently', 'Object moved permanently -- see URI list'), + 302: ('Found', 'Object moved temporarily -- see URI list'), + 303: ('See Other', 'Object moved -- see Method and URL list'), + 304: ('Not Modified', + 'Document has not changed since given time'), + 305: ('Use Proxy', + 'You must use proxy specified in Location to access this ' + 'resource.'), + 307: ('Temporary Redirect', + 'Object moved temporarily -- see URI list'), + + 400: ('Bad Request', + 'Bad request syntax or unsupported method'), + 401: ('Unauthorized', + 'No permission -- see authorization schemes'), + 402: ('Payment Required', + 'No payment -- see charging schemes'), + 403: ('Forbidden', + 'Request forbidden -- authorization will not help'), + 404: ('Not Found', 'Nothing matches the given URI'), + 405: ('Method Not Allowed', + 'Specified method is invalid for this server.'), + 406: ('Not Acceptable', 'URI not available in preferred format.'), + 407: ('Proxy Authentication Required', 'You must authenticate with ' + 'this proxy before proceeding.'), + 408: ('Request Timeout', 'Request timed out; try again later.'), + 409: ('Conflict', 'Request conflict.'), + 410: ('Gone', + 'URI no longer exists and has been permanently removed.'), + 411: ('Length Required', 'Client must specify Content-Length.'), + 412: ('Precondition Failed', 'Precondition in headers is false.'), + 413: ('Request Entity Too Large', 'Entity is too large.'), + 414: ('Request-URI Too Long', 'URI is too long.'), + 415: ('Unsupported Media Type', 'Entity body in unsupported format.'), + 416: ('Requested Range Not Satisfiable', + 'Cannot satisfy request range.'), + 417: ('Expectation Failed', + 'Expect condition could not be satisfied.'), + + 500: ('Internal Server Error', 'Server got itself in trouble'), + 501: ('Not Implemented', + 'Server does not support this operation'), + 502: ('Bad Gateway', 'Invalid responses from another server/proxy.'), + 503: ('Service Unavailable', + 'The server cannot process the request due to a high load'), + 504: ('Gateway Timeout', + 'The gateway server did not receive a timely response'), + 505: ('HTTP Version Not Supported', 'Cannot fulfill request.'), + } + + +class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): + + """Simple HTTP request handler with GET and HEAD commands. + + This serves files from the current directory and any of its + subdirectories. The MIME type for files is determined by + calling the .guess_type() method. + + The GET and HEAD requests are identical except that the HEAD + request omits the actual contents of the file. + + """ + + server_version = "SimpleHTTP/" + __version__ + + def do_GET(self): + """Serve a GET request.""" + f = self.send_head() + if f: + self.copyfile(f, self.wfile) + f.close() + + def do_HEAD(self): + """Serve a HEAD request.""" + f = self.send_head() + if f: + f.close() + + def send_head(self): + """Common code for GET and HEAD commands. + + This sends the response code and MIME headers. + + Return value is either a file object (which has to be copied + to the outputfile by the caller unless the command was HEAD, + and must be closed by the caller under all circumstances), or + None, in which case the caller has nothing further to do. + + """ + path = self.translate_path(self.path) + f = None + if os.path.isdir(path): + if not self.path.endswith('/'): + # redirect browser - doing basically what apache does + self.send_response(301) + self.send_header("Location", self.path + "/") + self.end_headers() + return None + for index in "index.html", "index.htm": + index = os.path.join(path, index) + if os.path.exists(index): + path = index + break + else: + return self.list_directory(path) + ctype = self.guess_type(path) + try: + f = open(path, 'rb') + except IOError: + self.send_error(404, "File not found") + return None + self.send_response(200) + self.send_header("Content-type", ctype) + fs = os.fstat(f.fileno()) + self.send_header("Content-Length", str(fs[6])) + self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) + self.end_headers() + return f + + def list_directory(self, path): + """Helper to produce a directory listing (absent index.html). + + Return value is either a file object, or None (indicating an + error). In either case, the headers are sent, making the + interface the same as for send_head(). + + """ + try: + list = os.listdir(path) + except os.error: + self.send_error(404, "No permission to list directory") + return None + list.sort(key=lambda a: a.lower()) + r = [] + displaypath = cgi.escape(urllib.unquote(self.path)) + r.append('') + r.append("\nDirectory listing for %s\n" % displaypath) + r.append("\n

Directory listing for %s

\n" % displaypath) + r.append("
\n
    \n") + for name in list: + fullname = os.path.join(path, name) + displayname = linkname = name + # Append / for directories or @ for symbolic links + if os.path.isdir(fullname): + displayname = name + "/" + linkname = name + "/" + if os.path.islink(fullname): + displayname = name + "@" + # Note: a link to a directory displays with @ and links with / + r.append('
  • %s\n' + % (urllib.quote(linkname), cgi.escape(displayname))) + r.append("
\n
\n\n\n") + enc = sys.getfilesystemencoding() + encoded = ''.join(r).encode(enc) + f = io.BytesIO() + f.write(encoded) + f.seek(0) + self.send_response(200) + self.send_header("Content-type", "text/html; charset=%s" % enc) + self.send_header("Content-Length", str(len(encoded))) + self.end_headers() + return f + + def translate_path(self, path): + """Translate a /-separated PATH to the local filename syntax. + + Components that mean special things to the local file system + (e.g. drive or directory names) are ignored. (XXX They should + probably be diagnosed.) + + """ + # abandon query parameters + path = path.split('?',1)[0] + path = path.split('#',1)[0] + path = posixpath.normpath(urllib.unquote(path)) + words = path.split('/') + words = filter(None, words) + path = os.getcwd() + for word in words: + drive, word = os.path.splitdrive(word) + head, word = os.path.split(word) + if word in (os.curdir, os.pardir): continue + path = os.path.join(path, word) + return path + + def copyfile(self, source, outputfile): + """Copy all data between two file objects. + + The SOURCE argument is a file object open for reading + (or anything with a read() method) and the DESTINATION + argument is a file object open for writing (or + anything with a write() method). + + The only reason for overriding this would be to change + the block size or perhaps to replace newlines by CRLF + -- note however that this the default server uses this + to copy binary data as well. + + """ + shutil.copyfileobj(source, outputfile) + + def guess_type(self, path): + """Guess the type of a file. + + Argument is a PATH (a filename). + + Return value is a string of the form type/subtype, + usable for a MIME Content-type header. + + The default implementation looks the file's extension + up in the table self.extensions_map, using application/octet-stream + as a default; however it would be permissible (if + slow) to look inside the data to make a better guess. + + """ + + base, ext = posixpath.splitext(path) + if ext in self.extensions_map: + return self.extensions_map[ext] + ext = ext.lower() + if ext in self.extensions_map: + return self.extensions_map[ext] + else: + return self.extensions_map[''] + + if not mimetypes.inited: + mimetypes.init() # try to read system mime.types + extensions_map = mimetypes.types_map.copy() + extensions_map.update({ + '': 'application/octet-stream', # Default + '.py': 'text/plain', + '.c': 'text/plain', + '.h': 'text/plain', + }) + + +# Utilities for CGIHTTPRequestHandler + +nobody = None + +def nobody_uid(): + """Internal routine to get nobody's uid""" + global nobody + if nobody: + return nobody + try: + import pwd + except ImportError: + return -1 + try: + nobody = pwd.getpwnam('nobody')[2] + except KeyError: + nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) + return nobody + + +def executable(path): + """Test for executable file.""" + try: + st = os.stat(path) + except os.error: + return False + return st.st_mode & 0o111 != 0 + + +class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): + + """Complete HTTP server with GET, HEAD and POST commands. + + GET and HEAD also support running CGI scripts. + + The POST command is *only* implemented for CGI scripts. + + """ + + # Determine platform specifics + have_fork = hasattr(os, 'fork') + have_popen2 = hasattr(os, 'popen2') + have_popen3 = hasattr(os, 'popen3') + + # Make rfile unbuffered -- we need to read one line and then pass + # the rest to a subprocess, so we can't use buffered input. + rbufsize = 0 + + def do_POST(self): + """Serve a POST request. + + This is only implemented for CGI scripts. + + """ + + if self.is_cgi(): + self.run_cgi() + else: + self.send_error(501, "Can only POST to CGI scripts") + + def send_head(self): + """Version of send_head that support CGI scripts""" + if self.is_cgi(): + return self.run_cgi() + else: + return SimpleHTTPRequestHandler.send_head(self) + + def is_cgi(self): + """Test whether self.path corresponds to a CGI script. + + Return a tuple (dir, rest) if self.path requires running a + CGI script, None if not. Note that rest begins with a + slash if it is not empty. + + The default implementation tests whether the path + begins with one of the strings in the list + self.cgi_directories (and the next character is a '/' + or the end of the string). + + """ + + path = self.path + + for x in self.cgi_directories: + i = len(x) + if path[:i] == x and (not path[i:] or path[i] == '/'): + self.cgi_info = path[:i], path[i+1:] + return True + return False + + cgi_directories = ['/cgi-bin', '/htbin'] + + def is_executable(self, path): + """Test whether argument path is an executable file.""" + return executable(path) + + def is_python(self, path): + """Test whether argument path is a Python script.""" + head, tail = os.path.splitext(path) + return tail.lower() in (".py", ".pyw") + + def run_cgi(self): + """Execute a CGI script.""" + path = self.path + dir, rest = self.cgi_info + + i = path.find('/', len(dir) + 1) + while i >= 0: + nextdir = path[:i] + nextrest = path[i+1:] + + scriptdir = self.translate_path(nextdir) + if os.path.isdir(scriptdir): + dir, rest = nextdir, nextrest + i = path.find('/', len(dir) + 1) + else: + break + + # find an explicit query string, if present. + i = rest.rfind('?') + if i >= 0: + rest, query = rest[:i], rest[i+1:] + else: + query = '' + + # dissect the part after the directory name into a script name & + # a possible additional path, to be stored in PATH_INFO. + i = rest.find('/') + if i >= 0: + script, rest = rest[:i], rest[i:] + else: + script, rest = rest, '' + + scriptname = dir + '/' + script + scriptfile = self.translate_path(scriptname) + if not os.path.exists(scriptfile): + self.send_error(404, "No such CGI script (%r)" % scriptname) + return + if not os.path.isfile(scriptfile): + self.send_error(403, "CGI script is not a plain file (%r)" % + scriptname) + return + ispy = self.is_python(scriptname) + if not ispy: + if not (self.have_fork or self.have_popen2 or self.have_popen3): + self.send_error(403, "CGI script is not a Python script (%r)" % + scriptname) + return + if not self.is_executable(scriptfile): + self.send_error(403, "CGI script is not executable (%r)" % + scriptname) + return + + # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html + # XXX Much of the following could be prepared ahead of time! + env = {} + env['SERVER_SOFTWARE'] = self.version_string() + env['SERVER_NAME'] = self.server.server_name + env['GATEWAY_INTERFACE'] = 'CGI/1.1' + env['SERVER_PROTOCOL'] = self.protocol_version + env['SERVER_PORT'] = str(self.server.server_port) + env['REQUEST_METHOD'] = self.command + uqrest = urllib.unquote(rest) + env['PATH_INFO'] = uqrest + env['PATH_TRANSLATED'] = self.translate_path(uqrest) + env['SCRIPT_NAME'] = scriptname + if query: + env['QUERY_STRING'] = query + host = self.address_string() + if host != self.client_address[0]: + env['REMOTE_HOST'] = host + env['REMOTE_ADDR'] = self.client_address[0] + authorization = self.headers.getheader("authorization") + if authorization: + authorization = authorization.split() + if len(authorization) == 2: + import base64, binascii + env['AUTH_TYPE'] = authorization[0] + if authorization[0].lower() == "basic": + try: + authorization = authorization[1].encode('ascii') + authorization = base64.decodestring(authorization).\ + decode('ascii') + except (binascii.Error, UnicodeError): + pass + else: + authorization = authorization.split(':') + if len(authorization) == 2: + env['REMOTE_USER'] = authorization[0] + # XXX REMOTE_IDENT + if self.headers.typeheader is None: + env['CONTENT_TYPE'] = self.headers.type + else: + env['CONTENT_TYPE'] = self.headers.typeheader + length = self.headers.getheader('content-length') + if length: + env['CONTENT_LENGTH'] = length + referer = self.headers.getheader('referer') + if referer: + env['HTTP_REFERER'] = referer + accept = [] + for line in self.headers.getallmatchingheaders('accept'): + if line[:1] in "\t\n\r ": + accept.append(line.strip()) + else: + accept = accept + line[7:].split(',') + env['HTTP_ACCEPT'] = ','.join(accept) + ua = self.headers.getheader('user-agent') + if ua: + env['HTTP_USER_AGENT'] = ua + co = filter(None, self.headers.getheaders('cookie')) + if co: + env['HTTP_COOKIE'] = ', '.join(co) + # XXX Other HTTP_* headers + # Since we're setting the env in the parent, provide empty + # values to override previously set values + for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH', + 'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'): + env.setdefault(k, "") + os.environ.update(env) + + self.send_response(200, "Script output follows") + + decoded_query = query.replace('+', ' ') + + if self.have_fork: + # Unix -- fork as we should + args = [script] + if '=' not in decoded_query: + args.append(decoded_query) + nobody = nobody_uid() + self.wfile.flush() # Always flush before forking + pid = os.fork() + if pid != 0: + # Parent + pid, sts = os.waitpid(pid, 0) + # throw away additional data [see bug #427345] + while select.select([self.rfile], [], [], 0)[0]: + if not self.rfile.read(1): + break + if sts: + self.log_error("CGI script exit status %#x", sts) + return + # Child + try: + try: + os.setuid(nobody) + except os.error: + pass + os.dup2(self.rfile.fileno(), 0) + os.dup2(self.wfile.fileno(), 1) + os.execve(scriptfile, args, os.environ) + except: + self.server.handle_error(self.request, self.client_address) + os._exit(127) + + elif self.have_popen2 or self.have_popen3: + # Windows -- use popen2 or popen3 to create a subprocess + import shutil + if self.have_popen3: + popenx = os.popen3 + else: + popenx = os.popen2 + cmdline = scriptfile + if self.is_python(scriptfile): + interp = sys.executable + if interp.lower().endswith("w.exe"): + # On Windows, use python.exe, not pythonw.exe + interp = interp[:-5] + interp[-4:] + cmdline = "%s -u %s" % (interp, cmdline) + if '=' not in query and '"' not in query: + cmdline = '%s "%s"' % (cmdline, query) + self.log_message("command: %s", cmdline) + try: + nbytes = int(length) + except (TypeError, ValueError): + nbytes = 0 + files = popenx(cmdline, 'b') + fi = files[0] + fo = files[1] + if self.have_popen3: + fe = files[2] + if self.command.lower() == "post" and nbytes > 0: + data = self.rfile.read(nbytes) + fi.write(data) + # throw away additional data [see bug #427345] + while select.select([self.rfile._sock], [], [], 0)[0]: + if not self.rfile._sock.recv(1): + break + fi.close() + shutil.copyfileobj(fo, self.wfile) + if self.have_popen3: + errors = fe.read() + fe.close() + if errors: + self.log_error('%s', errors) + sts = fo.close() + if sts: + self.log_error("CGI script exit status %#x", sts) + else: + self.log_message("CGI script exited OK") + + else: + # Other O.S. -- execute script in this process + save_argv = sys.argv + save_stdin = sys.stdin + save_stdout = sys.stdout + save_stderr = sys.stderr + try: + save_cwd = os.getcwd() + try: + sys.argv = [scriptfile] + if '=' not in decoded_query: + sys.argv.append(decoded_query) + sys.stdout = self.wfile + sys.stdin = self.rfile + exec(open(scriptfile).read(), {"__name__": "__main__"}) + finally: + sys.argv = save_argv + sys.stdin = save_stdin + sys.stdout = save_stdout + sys.stderr = save_stderr + os.chdir(save_cwd) + except SystemExit as sts: + self.log_error("CGI script exit status %s", str(sts)) + else: + self.log_message("CGI script exited OK") + + +def test(HandlerClass = BaseHTTPRequestHandler, + ServerClass = HTTPServer, protocol="HTTP/1.0"): + """Test the HTTP request handler class. + + This runs an HTTP server on port 8000 (or the first command line + argument). + + """ + + if sys.argv[1:]: + port = int(sys.argv[1]) + else: + port = 8000 + server_address = ('', port) + + HandlerClass.protocol_version = protocol + httpd = ServerClass(server_address, HandlerClass) + + sa = httpd.socket.getsockname() + print("Serving HTTP on", sa[0], "port", sa[1], "...") + httpd.serve_forever() + + +if __name__ == '__main__': + test(HandlerClass=BaseHTTPRequestHandler) + test(HandlerClass=SimpleHTTPRequestHandler) + test(HandlerClass=CGIHTTPRequestHandler) diff --git a/Lib/httplib.py b/Lib/httplib.py deleted file mode 100644 index de27c17..0000000 --- a/Lib/httplib.py +++ /dev/null @@ -1,1132 +0,0 @@ -"""HTTP/1.1 client library - - - - -HTTPConnection goes through a number of "states", which define when a client -may legally make another request or fetch the response for a particular -request. This diagram details these state transitions: - - (null) - | - | HTTPConnection() - v - Idle - | - | putrequest() - v - Request-started - | - | ( putheader() )* endheaders() - v - Request-sent - | - | response = getresponse() - v - Unread-response [Response-headers-read] - |\____________________ - | | - | response.read() | putrequest() - v v - Idle Req-started-unread-response - ______/| - / | - response.read() | | ( putheader() )* endheaders() - v v - Request-started Req-sent-unread-response - | - | response.read() - v - Request-sent - -This diagram presents the following rules: - -- a second request may not be started until {response-headers-read} - -- a response [object] cannot be retrieved until {request-sent} - -- there is no differentiation between an unread response body and a - partially read response body - -Note: this enforcement is applied by the HTTPConnection class. The - HTTPResponse class does not enforce this state machine, which - implies sophisticated clients may accelerate the request/response - pipeline. Caution should be taken, though: accelerating the states - beyond the above pattern may imply knowledge of the server's - connection-close behavior for certain requests. For example, it - is impossible to tell whether the server will close the connection - UNTIL the response headers have been read; this means that further - requests cannot be placed into the pipeline until it is known that - the server will NOT be closing the connection. - -Logical State __state __response -------------- ------- ---------- -Idle _CS_IDLE None -Request-started _CS_REQ_STARTED None -Request-sent _CS_REQ_SENT None -Unread-response _CS_IDLE -Req-started-unread-response _CS_REQ_STARTED -Req-sent-unread-response _CS_REQ_SENT -""" - -import io -import mimetools -import socket -from urlparse import urlsplit -import warnings - -__all__ = ["HTTPResponse", "HTTPConnection", - "HTTPException", "NotConnected", "UnknownProtocol", - "UnknownTransferEncoding", "UnimplementedFileMode", - "IncompleteRead", "InvalidURL", "ImproperConnectionState", - "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "error", "responses"] - -HTTP_PORT = 80 -HTTPS_PORT = 443 - -_UNKNOWN = 'UNKNOWN' - -# connection states -_CS_IDLE = 'Idle' -_CS_REQ_STARTED = 'Request-started' -_CS_REQ_SENT = 'Request-sent' - -# status codes -# informational -CONTINUE = 100 -SWITCHING_PROTOCOLS = 101 -PROCESSING = 102 - -# successful -OK = 200 -CREATED = 201 -ACCEPTED = 202 -NON_AUTHORITATIVE_INFORMATION = 203 -NO_CONTENT = 204 -RESET_CONTENT = 205 -PARTIAL_CONTENT = 206 -MULTI_STATUS = 207 -IM_USED = 226 - -# redirection -MULTIPLE_CHOICES = 300 -MOVED_PERMANENTLY = 301 -FOUND = 302 -SEE_OTHER = 303 -NOT_MODIFIED = 304 -USE_PROXY = 305 -TEMPORARY_REDIRECT = 307 - -# client error -BAD_REQUEST = 400 -UNAUTHORIZED = 401 -PAYMENT_REQUIRED = 402 -FORBIDDEN = 403 -NOT_FOUND = 404 -METHOD_NOT_ALLOWED = 405 -NOT_ACCEPTABLE = 406 -PROXY_AUTHENTICATION_REQUIRED = 407 -REQUEST_TIMEOUT = 408 -CONFLICT = 409 -GONE = 410 -LENGTH_REQUIRED = 411 -PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 -REQUEST_URI_TOO_LONG = 414 -UNSUPPORTED_MEDIA_TYPE = 415 -REQUESTED_RANGE_NOT_SATISFIABLE = 416 -EXPECTATION_FAILED = 417 -UNPROCESSABLE_ENTITY = 422 -LOCKED = 423 -FAILED_DEPENDENCY = 424 -UPGRADE_REQUIRED = 426 - -# server error -INTERNAL_SERVER_ERROR = 500 -NOT_IMPLEMENTED = 501 -BAD_GATEWAY = 502 -SERVICE_UNAVAILABLE = 503 -GATEWAY_TIMEOUT = 504 -HTTP_VERSION_NOT_SUPPORTED = 505 -INSUFFICIENT_STORAGE = 507 -NOT_EXTENDED = 510 - -# Mapping status codes to official W3C names -responses = { - 100: 'Continue', - 101: 'Switching Protocols', - - 200: 'OK', - 201: 'Created', - 202: 'Accepted', - 203: 'Non-Authoritative Information', - 204: 'No Content', - 205: 'Reset Content', - 206: 'Partial Content', - - 300: 'Multiple Choices', - 301: 'Moved Permanently', - 302: 'Found', - 303: 'See Other', - 304: 'Not Modified', - 305: 'Use Proxy', - 306: '(Unused)', - 307: 'Temporary Redirect', - - 400: 'Bad Request', - 401: 'Unauthorized', - 402: 'Payment Required', - 403: 'Forbidden', - 404: 'Not Found', - 405: 'Method Not Allowed', - 406: 'Not Acceptable', - 407: 'Proxy Authentication Required', - 408: 'Request Timeout', - 409: 'Conflict', - 410: 'Gone', - 411: 'Length Required', - 412: 'Precondition Failed', - 413: 'Request Entity Too Large', - 414: 'Request-URI Too Long', - 415: 'Unsupported Media Type', - 416: 'Requested Range Not Satisfiable', - 417: 'Expectation Failed', - - 500: 'Internal Server Error', - 501: 'Not Implemented', - 502: 'Bad Gateway', - 503: 'Service Unavailable', - 504: 'Gateway Timeout', - 505: 'HTTP Version Not Supported', -} - -# maximal amount of data to read at one time in _safe_read -MAXAMOUNT = 1048576 - -class HTTPMessage(mimetools.Message): - - def addheader(self, key, value): - """Add header for field key handling repeats.""" - prev = self.dict.get(key) - if prev is None: - self.dict[key] = value - else: - combined = ", ".join((prev, value)) - self.dict[key] = combined - - def addcontinue(self, key, more): - """Add more field data from a continuation line.""" - prev = self.dict[key] - self.dict[key] = prev + "\n " + more - - def readheaders(self): - """Read header lines. - - Read header lines up to the entirely blank line that terminates them. - The (normally blank) line that ends the headers is skipped, but not - included in the returned list. If a non-header line ends the headers, - (which is an error), an attempt is made to backspace over it; it is - never included in the returned list. - - The variable self.status is set to the empty string if all went well, - otherwise it is an error message. The variable self.headers is a - completely uninterpreted list of lines contained in the header (so - printing them will reproduce the header exactly as it appears in the - file). - - If multiple header fields with the same name occur, they are combined - according to the rules in RFC 2616 sec 4.2: - - Appending each subsequent field-value to the first, each separated - by a comma. The order in which header fields with the same field-name - are received is significant to the interpretation of the combined - field value. - """ - # XXX The implementation overrides the readheaders() method of - # rfc822.Message. The base class design isn't amenable to - # customized behavior here so the method here is a copy of the - # base class code with a few small changes. - - self.dict = {} - self.unixfrom = '' - self.headers = hlist = [] - self.status = '' - headerseen = "" - firstline = 1 - startofline = unread = tell = None - if hasattr(self.fp, 'unread'): - unread = self.fp.unread - elif self.seekable: - tell = self.fp.tell - while True: - if tell: - try: - startofline = tell() - except IOError: - startofline = tell = None - self.seekable = 0 - line = str(self.fp.readline(), "iso-8859-1") - if not line: - self.status = 'EOF in headers' - break - # Skip unix From name time lines - if firstline and line.startswith('From '): - self.unixfrom = self.unixfrom + line - continue - firstline = 0 - if headerseen and line[0] in ' \t': - # XXX Not sure if continuation lines are handled properly - # for http and/or for repeating headers - # It's a continuation line. - hlist.append(line) - self.addcontinue(headerseen, line.strip()) - continue - elif self.iscomment(line): - # It's a comment. Ignore it. - continue - elif self.islast(line): - # Note! No pushback here! The delimiter line gets eaten. - break - headerseen = self.isheader(line) - if headerseen: - # It's a legal header line, save it. - hlist.append(line) - self.addheader(headerseen, line[len(headerseen)+1:].strip()) - continue - else: - # It's not a header line; throw it back and stop here. - if not self.dict: - self.status = 'No headers' - else: - self.status = 'Non-header line where header expected' - # Try to undo the read. - if unread: - unread(line) - elif tell: - self.fp.seek(startofline) - else: - self.status = self.status + '; bad seek' - break - -class HTTPResponse: - - # strict: If true, raise BadStatusLine if the status line can't be - # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is - # false because it prevents clients from talking to HTTP/0.9 - # servers. Note that a response with a sufficiently corrupted - # status line will look like an HTTP/0.9 response. - - # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. - - # The bytes from the socket object are iso-8859-1 strings. - # See RFC 2616 sec 2.2 which notes an exception for MIME-encoded - # text following RFC 2047. The basic status line parsing only - # accepts iso-8859-1. - - def __init__(self, sock, debuglevel=0, strict=0, method=None): - # XXX If the response includes a content-length header, we - # need to make sure that the client doesn't read more than the - # specified number of bytes. If it does, it will block until - # the server times out and closes the connection. (The only - # applies to HTTP/1.1 connections.) Since some clients access - # self.fp directly rather than calling read(), this is a little - # tricky. - self.fp = sock.makefile("rb", 0) - self.debuglevel = debuglevel - self.strict = strict - self._method = method - - self.msg = None - - # from the Status-Line of the response - self.version = _UNKNOWN # HTTP-Version - self.status = _UNKNOWN # Status-Code - self.reason = _UNKNOWN # Reason-Phrase - - self.chunked = _UNKNOWN # is "chunked" being used? - self.chunk_left = _UNKNOWN # bytes left to read in current chunk - self.length = _UNKNOWN # number of bytes left in response - self.will_close = _UNKNOWN # conn will close at end of response - - def _read_status(self): - # Initialize with Simple-Response defaults. - line = str(self.fp.readline(), "iso-8859-1") - if self.debuglevel > 0: - print("reply:", repr(line)) - if not line: - # Presumably, the server closed the connection before - # sending a valid response. - raise BadStatusLine(line) - try: - [version, status, reason] = line.split(None, 2) - except ValueError: - try: - [version, status] = line.split(None, 1) - reason = "" - except ValueError: - # empty version will cause next test to fail and status - # will be treated as 0.9 response. - version = "" - if not version.startswith("HTTP/"): - if self.strict: - self.close() - raise BadStatusLine(line) - else: - # Assume it's a Simple-Response from an 0.9 server. - # We have to convert the first line back to raw bytes - # because self.fp.readline() needs to return bytes. - self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp) - return "HTTP/0.9", 200, "" - - # The status code is a three-digit number - try: - status = int(status) - if status < 100 or status > 999: - raise BadStatusLine(line) - except ValueError: - raise BadStatusLine(line) - return version, status, reason - - def begin(self): - if self.msg is not None: - # we've already started reading the response - return - - # read until we get a non-100 response - while True: - version, status, reason = self._read_status() - if status != CONTINUE: - break - # skip the header from the 100 response - while True: - skip = self.fp.readline().strip() - if not skip: - break - if self.debuglevel > 0: - print("header:", skip) - - self.status = status - self.reason = reason.strip() - if version == "HTTP/1.0": - self.version = 10 - elif version.startswith("HTTP/1."): - self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 - elif version == "HTTP/0.9": - self.version = 9 - else: - raise UnknownProtocol(version) - - if self.version == 9: - self.length = None - self.chunked = 0 - self.will_close = 1 - self.msg = HTTPMessage(io.BytesIO()) - return - - self.msg = HTTPMessage(self.fp, 0) - if self.debuglevel > 0: - for hdr in self.msg.headers: - print("header:", hdr, end=" ") - - # don't let the msg keep an fp - self.msg.fp = None - - # are we using the chunked-style of transfer encoding? - tr_enc = self.msg.getheader("transfer-encoding") - if tr_enc and tr_enc.lower() == "chunked": - self.chunked = 1 - self.chunk_left = None - else: - self.chunked = 0 - - # will the connection close at the end of the response? - self.will_close = self._check_close() - - # do we have a Content-Length? - # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" - self.length = None - length = self.msg.getheader("content-length") - if length and not self.chunked: - try: - self.length = int(length) - except ValueError: - self.length = None - else: - if self.length < 0: # ignore nonsensical negative lengths - self.length = None - else: - self.length = None - - # does the body have a fixed length? (of zero) - if (status == NO_CONTENT or status == NOT_MODIFIED or - 100 <= status < 200 or # 1xx codes - self._method == "HEAD"): - self.length = 0 - - # if the connection remains open, and we aren't using chunked, and - # a content-length was not provided, then assume that the connection - # WILL close. - if (not self.will_close and - not self.chunked and - self.length is None): - self.will_close = 1 - - def _check_close(self): - conn = self.msg.getheader("connection") - if self.version == 11: - # An HTTP/1.1 proxy is assumed to stay open unless - # explicitly closed. - conn = self.msg.getheader("connection") - if conn and "close" in conn.lower(): - return True - return False - - # Some HTTP/1.0 implementations have support for persistent - # connections, using rules different than HTTP/1.1. - - # For older HTTP, Keep-Alive indicates persistent connection. - if self.msg.getheader("keep-alive"): - return False - - # At least Akamai returns a "Connection: Keep-Alive" header, - # which was supposed to be sent by the client. - if conn and "keep-alive" in conn.lower(): - return False - - # Proxy-Connection is a netscape hack. - pconn = self.msg.getheader("proxy-connection") - if pconn and "keep-alive" in pconn.lower(): - return False - - # otherwise, assume it will close - return True - - def close(self): - if self.fp: - self.fp.close() - self.fp = None - - # These implementations are for the benefit of io.BufferedReader. - - # XXX This class should probably be revised to act more like - # the "raw stream" that BufferedReader expects. - - @property - def closed(self): - return self.isclosed() - - def flush(self): - self.fp.flush() - - # End of "raw stream" methods - - def isclosed(self): - # NOTE: it is possible that we will not ever call self.close(). This - # case occurs when will_close is TRUE, length is None, and we - # read up to the last byte, but NOT past it. - # - # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be - # called, meaning self.isclosed() is meaningful. - return self.fp is None - - # XXX It would be nice to have readline and __iter__ for this, too. - - def read(self, amt=None): - if self.fp is None: - return b"" - - if self.chunked: - return self._read_chunked(amt) - - if amt is None: - # unbounded read - if self.length is None: - s = self.fp.read() - else: - s = self._safe_read(self.length) - self.length = 0 - self.close() # we read everything - return s - - if self.length is not None: - if amt > self.length: - # clip the read to the "end of response" - amt = self.length - - # we do not use _safe_read() here because this may be a .will_close - # connection, and the user is reading more bytes than will be provided - # (for example, reading in 1k chunks) - s = self.fp.read(amt) - if self.length is not None: - self.length -= len(s) - if not self.length: - self.close() - return s - - def _read_chunked(self, amt): - assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - value = b"" - - # XXX This accumulates chunks by repeated string concatenation, - # which is not efficient as the number or size of chunks gets big. - while True: - if chunk_left is None: - line = self.fp.readline() - i = line.find(b";") - if i >= 0: - line = line[:i] # strip chunk-extensions - try: - chunk_left = int(line, 16) - except ValueError: - # close the connection as protocol synchronisation is - # probably lost - self.close() - raise IncompleteRead(value) - if chunk_left == 0: - break - if amt is None: - value += self._safe_read(chunk_left) - elif amt < chunk_left: - value += self._safe_read(amt) - self.chunk_left = chunk_left - amt - return value - elif amt == chunk_left: - value += self._safe_read(amt) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return value - else: - value += self._safe_read(chunk_left) - amt -= chunk_left - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - # read and discard trailer up to the CRLF terminator - ### note: we shouldn't have any trailers! - while True: - line = self.fp.readline() - if not line: - # a vanishingly small number of sites EOF without - # sending the trailer - break - if line == b"\r\n": - break - - # we read everything; close the "file" - self.close() - - return value - - def _safe_read(self, amt): - """Read the number of bytes requested, compensating for partial reads. - - Normally, we have a blocking socket, but a read() can be interrupted - by a signal (resulting in a partial read). - - Note that we cannot distinguish between EOF and an interrupt when zero - bytes have been read. IncompleteRead() will be raised in this - situation. - - This function should be used when bytes "should" be present for - reading. If the bytes are truly not available (due to EOF), then the - IncompleteRead exception can be used to detect the problem. - """ - s = [] - while amt > 0: - chunk = self.fp.read(min(amt, MAXAMOUNT)) - if not chunk: - raise IncompleteRead(s) - s.append(chunk) - amt -= len(chunk) - return b"".join(s) - - def getheader(self, name, default=None): - if self.msg is None: - raise ResponseNotReady() - return self.msg.getheader(name, default) - - def getheaders(self): - """Return list of (header, value) tuples.""" - if self.msg is None: - raise ResponseNotReady() - return list(self.msg.items()) - - -class HTTPConnection: - - _http_vsn = 11 - _http_vsn_str = 'HTTP/1.1' - - response_class = HTTPResponse - default_port = HTTP_PORT - auto_open = 1 - debuglevel = 0 - strict = 0 - - def __init__(self, host, port=None, strict=None, timeout=None): - self.timeout = timeout - self.sock = None - self._buffer = [] - self.__response = None - self.__state = _CS_IDLE - self._method = None - - self._set_hostport(host, port) - if strict is not None: - self.strict = strict - - def _set_hostport(self, host, port): - if port is None: - i = host.rfind(':') - j = host.rfind(']') # ipv6 addresses have [...] - if i > j: - try: - port = int(host[i+1:]) - except ValueError: - raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) - host = host[:i] - else: - port = self.default_port - if host and host[0] == '[' and host[-1] == ']': - host = host[1:-1] - self.host = host - self.port = port - - def set_debuglevel(self, level): - self.debuglevel = level - - def connect(self): - """Connect to the host and port specified in __init__.""" - self.sock = socket.create_connection((self.host,self.port), - self.timeout) - - def close(self): - """Close the connection to the HTTP server.""" - if self.sock: - self.sock.close() # close it manually... there may be other refs - self.sock = None - if self.__response: - self.__response.close() - self.__response = None - self.__state = _CS_IDLE - - def send(self, str): - """Send `str' to the server.""" - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise NotConnected() - - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. - if self.debuglevel > 0: - print("send:", repr(str)) - try: - blocksize=8192 - if hasattr(str,'read') : - if self.debuglevel > 0: print("sendIng a read()able") - data=str.read(blocksize) - while data: - self.sock.sendall(data) - data=str.read(blocksize) - else: - self.sock.sendall(str) - except socket.error as v: - if v.args[0] == 32: # Broken pipe - self.close() - raise - - def _output(self, s): - """Add a line of output to the current request buffer. - - Assumes that the line does *not* end with \\r\\n. - """ - self._buffer.append(s) - - def _send_output(self): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - """ - self._buffer.extend((b"", b"")) - msg = b"\r\n".join(self._buffer) - del self._buffer[:] - self.send(msg) - - def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): - """Send a request to the server. - - `method' specifies an HTTP request method, e.g. 'GET'. - `url' specifies the object being requested, e.g. '/index.html'. - `skip_host' if True does not add automatically a 'Host:' header - `skip_accept_encoding' if True does not add automatically an - 'Accept-Encoding:' header - """ - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - - # in certain cases, we cannot issue another request on this connection. - # this occurs when: - # 1) we are in the process of sending a request. (_CS_REQ_STARTED) - # 2) a response to a previous request has signalled that it is going - # to close the connection upon completion. - # 3) the headers for the previous response have not been read, thus - # we cannot determine whether point (2) is true. (_CS_REQ_SENT) - # - # if there is no prior response, then we can request at will. - # - # if point (2) is true, then we will have passed the socket to the - # response (effectively meaning, "there is no prior response"), and - # will open a new one when a new request is made. - # - # Note: if a prior response exists, then we *can* start a new request. - # We are not allowed to begin fetching the response to this new - # request, however, until that prior response is complete. - # - if self.__state == _CS_IDLE: - self.__state = _CS_REQ_STARTED - else: - raise CannotSendRequest() - - # Save the method we use, we need it later in the response phase - self._method = method - if not url: - url = '/' - request = '%s %s %s' % (method, url, self._http_vsn_str) - - # Non-ASCII characters should have been eliminated earlier - self._output(request.encode('ascii')) - - if self._http_vsn == 11: - # Issue some standard headers for better HTTP/1.1 compliance - - if not skip_host: - # this header is issued *only* for HTTP/1.1 - # connections. more specifically, this means it is - # only issued when the client uses the new - # HTTPConnection() class. backwards-compat clients - # will be using HTTP/1.0 and those clients may be - # issuing this header themselves. we should NOT issue - # it twice; some web servers (such as Apache) barf - # when they see two Host: headers - - # If we need a non-standard port,include it in the - # header. If the request is going through a proxy, - # but the host of the actual URL, not the host of the - # proxy. - - netloc = '' - if url.startswith('http'): - nil, netloc, nil, nil, nil = urlsplit(url) - - if netloc: - try: - netloc_enc = netloc.encode("ascii") - except UnicodeEncodeError: - netloc_enc = netloc.encode("idna") - self.putheader('Host', netloc_enc) - else: - try: - host_enc = self.host.encode("ascii") - except UnicodeEncodeError: - host_enc = self.host.encode("idna") - if self.port == HTTP_PORT: - self.putheader('Host', host_enc) - else: - host_enc = host_enc.decode("ascii") - self.putheader('Host', "%s:%s" % (host_enc, self.port)) - - # note: we are assuming that clients will not attempt to set these - # headers since *this* library must deal with the - # consequences. this also means that when the supporting - # libraries are updated to recognize other forms, then this - # code should be changed (removed or updated). - - # we only want a Content-Encoding of "identity" since we don't - # support encodings such as x-gzip or x-deflate. - if not skip_accept_encoding: - self.putheader('Accept-Encoding', 'identity') - - # we can accept "chunked" Transfer-Encodings, but no others - # NOTE: no TE header implies *only* "chunked" - #self.putheader('TE', 'chunked') - - # if TE is supplied in the header, then it must appear in a - # Connection header. - #self.putheader('Connection', 'TE') - - else: - # For HTTP/1.0, the server will assume "not chunked" - pass - - def putheader(self, header, value): - """Send a request header line to the server. - - For example: h.putheader('Accept', 'text/html') - """ - if self.__state != _CS_REQ_STARTED: - raise CannotSendHeader() - - if hasattr(header, 'encode'): - header = header.encode('ascii') - if hasattr(value, 'encode'): - value = value.encode('ascii') - header = header + b': ' + value - self._output(header) - - def endheaders(self): - """Indicate that the last header line has been sent to the server.""" - - if self.__state == _CS_REQ_STARTED: - self.__state = _CS_REQ_SENT - else: - raise CannotSendHeader() - - self._send_output() - - def request(self, method, url, body=None, headers={}): - """Send a complete request to the server.""" - try: - self._send_request(method, url, body, headers) - except socket.error as v: - # trap 'Broken pipe' if we're allowed to automatically reconnect - if v.args[0] != 32 or not self.auto_open: - raise - # try one more time - self._send_request(method, url, body, headers) - - def _send_request(self, method, url, body, headers): - # honour explicitly requested Host: and Accept-Encoding headers - header_names = dict.fromkeys([k.lower() for k in headers]) - skips = {} - if 'host' in header_names: - skips['skip_host'] = 1 - if 'accept-encoding' in header_names: - skips['skip_accept_encoding'] = 1 - - self.putrequest(method, url, **skips) - - if body and ('content-length' not in header_names): - thelen = None - try: - thelen = str(len(body)) - except TypeError as te: - # If this is a file-like object, try to - # fstat its file descriptor - import os - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") - - if thelen is not None: - self.putheader('Content-Length',thelen) - for hdr, value in headers.items(): - self.putheader(hdr, value) - self.endheaders() - - if body: - if isinstance(body, str): body = body.encode('ascii') - self.send(body) - - def getresponse(self): - """Get the response from the server.""" - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - # - # if a prior response exists, then it must be completed (otherwise, we - # cannot read this response's header to determine the connection-close - # behavior) - # - # note: if a prior response existed, but was connection-close, then the - # socket and response were made independent of this HTTPConnection - # object since a new request requires that we open a whole new - # connection - # - # this means the prior response had one of two states: - # 1) will_close: this connection was reset and the prior socket and - # response operate independently - # 2) persistent: the response was retained and we await its - # isclosed() status to become true. - # - if self.__state != _CS_REQ_SENT or self.__response: - raise ResponseNotReady() - - if self.debuglevel > 0: - response = self.response_class(self.sock, self.debuglevel, - strict=self.strict, - method=self._method) - else: - response = self.response_class(self.sock, strict=self.strict, - method=self._method) - - response.begin() - assert response.will_close != _UNKNOWN - self.__state = _CS_IDLE - - if response.will_close: - # this effectively passes the connection to the response - self.close() - else: - # remember this, so we can tell when it is complete - self.__response = response - - return response - -try: - import ssl -except ImportError: - pass -else: - class HTTPSConnection(HTTPConnection): - "This class allows communication via SSL." - - default_port = HTTPS_PORT - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=None): - HTTPConnection.__init__(self, host, port, strict, timeout) - self.key_file = key_file - self.cert_file = cert_file - - def connect(self): - "Connect to a host on a given (SSL) port." - - sock = socket.create_connection((self.host, self.port), self.timeout) - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) - - - def FakeSocket (sock, sslobj): - warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + - "Use the result of ssl.wrap_socket() directly instead.", - DeprecationWarning, stacklevel=2) - return sslobj - - __all__.append("HTTPSConnection") - -class HTTPException(Exception): - # Subclasses that define an __init__ must call Exception.__init__ - # or define self.args. Otherwise, str() will fail. - pass - -class NotConnected(HTTPException): - pass - -class InvalidURL(HTTPException): - pass - -class UnknownProtocol(HTTPException): - def __init__(self, version): - self.args = version, - self.version = version - -class UnknownTransferEncoding(HTTPException): - pass - -class UnimplementedFileMode(HTTPException): - pass - -class IncompleteRead(HTTPException): - def __init__(self, partial): - self.args = partial, - self.partial = partial - -class ImproperConnectionState(HTTPException): - pass - -class CannotSendRequest(ImproperConnectionState): - pass - -class CannotSendHeader(ImproperConnectionState): - pass - -class ResponseNotReady(ImproperConnectionState): - pass - -class BadStatusLine(HTTPException): - def __init__(self, line): - self.args = line, - self.line = line - -# for backwards compatibility -error = HTTPException - -class LineAndFileWrapper: - """A limited file-like object for HTTP/0.9 responses.""" - - # The status-line parsing code calls readline(), which normally - # get the HTTP status line. For a 0.9 response, however, this is - # actually the first line of the body! Clients need to get a - # readable file object that contains that line. - - def __init__(self, line, file): - self._line = line - self._file = file - self._line_consumed = 0 - self._line_offset = 0 - self._line_left = len(line) - - def __getattr__(self, attr): - return getattr(self._file, attr) - - def _done(self): - # called when the last byte is read from the line. After the - # call, all read methods are delegated to the underlying file - # object. - self._line_consumed = 1 - self.read = self._file.read - self.readline = self._file.readline - self.readlines = self._file.readlines - - def read(self, amt=None): - if self._line_consumed: - return self._file.read(amt) - assert self._line_left - if amt is None or amt > self._line_left: - s = self._line[self._line_offset:] - self._done() - if amt is None: - return s + self._file.read() - else: - return s + self._file.read(amt - len(s)) - else: - assert amt <= self._line_left - i = self._line_offset - j = i + amt - s = self._line[i:j] - self._line_offset = j - self._line_left -= amt - if self._line_left == 0: - self._done() - return s - - def readline(self): - if self._line_consumed: - return self._file.readline() - assert self._line_left - s = self._line[self._line_offset:] - self._done() - return s - - def readlines(self, size=None): - if self._line_consumed: - return self._file.readlines(size) - assert self._line_left - L = [self._line[self._line_offset:]] - self._done() - if size is None: - return L + self._file.readlines() - else: - return L + self._file.readlines(size) diff --git a/Lib/logging/handlers.py b/Lib/logging/handlers.py index c871ed0..29c398d 100644 --- a/Lib/logging/handlers.py +++ b/Lib/logging/handlers.py @@ -1002,9 +1002,9 @@ class HTTPHandler(logging.Handler): Send the record to the Web server as an URL-encoded dictionary """ try: - import httplib, urllib + import http.client, urllib host = self.host - h = httplib.HTTP(host) + h = http.client.HTTP(host) url = self.url data = urllib.urlencode(self.mapLogRecord(record)) if self.method == "GET": diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 140e1d8..42ba266 100755 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1921,7 +1921,7 @@ def apropos(key): # --------------------------------------------------- web browser interface def serve(port, callback=None, completer=None): - import BaseHTTPServer, mimetools, select + import http.server, mimetools, select # Patch up mimetools.Message so it doesn't break if rfc822 is reloaded. class Message(mimetools.Message): @@ -1933,7 +1933,7 @@ def serve(port, callback=None, completer=None): self.parsetype() self.parseplist() - class DocHandler(BaseHTTPServer.BaseHTTPRequestHandler): + class DocHandler(http.server.BaseHTTPRequestHandler): def send_document(self, title, contents): try: self.send_response(200) @@ -1978,7 +1978,7 @@ pydoc by Ka-Ping Yee <ping@lfw.org>''' def log_message(self, *args): pass - class DocServer(BaseHTTPServer.HTTPServer): + class DocServer(http.server.HTTPServer): def __init__(self, port, callback): host = (sys.platform == 'mac') and '127.0.0.1' or 'localhost' self.address = ('', port) @@ -1997,7 +1997,7 @@ pydoc by Ka-Ping Yee <ping@lfw.org>''' self.base.server_activate(self) if self.callback: self.callback(self) - DocServer.base = BaseHTTPServer.HTTPServer + DocServer.base = http.server.HTTPServer DocServer.handler = DocHandler DocHandler.MessageClass = Message try: diff --git a/Lib/test/test_SimpleHTTPServer.py b/Lib/test/test_SimpleHTTPServer.py index d285f27..36c2b89 100644 --- a/Lib/test/test_SimpleHTTPServer.py +++ b/Lib/test/test_SimpleHTTPServer.py @@ -4,11 +4,11 @@ We don't want to require the 'network' resource. """ import os, unittest -from SimpleHTTPServer import SimpleHTTPRequestHandler +from http.server import SimpleHTTPRequestHandler from test import support -class SocketlessRequestHandler (SimpleHTTPRequestHandler): +class SocketlessRequestHandler(SimpleHTTPRequestHandler): def __init__(self): pass diff --git a/Lib/test/test___all__.py b/Lib/test/test___all__.py index f3f7ba3..89395be 100644 --- a/Lib/test/test___all__.py +++ b/Lib/test/test___all__.py @@ -33,12 +33,10 @@ class AllTest(unittest.TestCase): # than an AttributeError somewhere deep in CGIHTTPServer. import _socket - self.check_all("BaseHTTPServer") - self.check_all("CGIHTTPServer") + self.check_all("http.server") self.check_all("configparser") - self.check_all("Cookie") - self.check_all("Queue") - self.check_all("SimpleHTTPServer") + self.check_all("http.cookies") + self.check_all("queue") self.check_all("socketserver") self.check_all("aifc") self.check_all("base64") @@ -77,7 +75,7 @@ class AllTest(unittest.TestCase): self.check_all("gzip") self.check_all("heapq") self.check_all("htmllib") - self.check_all("httplib") + self.check_all("http.client") self.check_all("ihooks") self.check_all("imaplib") self.check_all("imghdr") diff --git a/Lib/test/test_cookie.py b/Lib/test/test_cookie.py deleted file mode 100644 index 07d29e1..0000000 --- a/Lib/test/test_cookie.py +++ /dev/null @@ -1,82 +0,0 @@ -# Simple test suite for Cookie.py - -from test.support import run_unittest, run_doctest -import unittest -import Cookie - -import warnings -warnings.filterwarnings("ignore", - ".* class is insecure.*", - DeprecationWarning) - -class CookieTests(unittest.TestCase): - # Currently this only tests SimpleCookie - def test_basic(self): - cases = [ - { 'data': 'chips=ahoy; vienna=finger', - 'dict': {'chips':'ahoy', 'vienna':'finger'}, - 'repr': "", - 'output': 'Set-Cookie: chips=ahoy\nSet-Cookie: vienna=finger', - }, - - { 'data': 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', - 'dict': {'keebler' : 'E=mc2; L="Loves"; fudge=\012;'}, - 'repr': '''''', - 'output': 'Set-Cookie: keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', - }, - - # Check illegal cookies that have an '=' char in an unquoted value - { 'data': 'keebler=E=mc2', - 'dict': {'keebler' : 'E=mc2'}, - 'repr': "", - 'output': 'Set-Cookie: keebler=E=mc2', - } - ] - - for case in cases: - C = Cookie.SimpleCookie() - C.load(case['data']) - self.assertEqual(repr(C), case['repr']) - self.assertEqual(C.output(sep='\n'), case['output']) - for k, v in sorted(case['dict'].items()): - self.assertEqual(C[k].value, v) - - def test_load(self): - C = Cookie.SimpleCookie() - C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme') - - self.assertEqual(C['Customer'].value, 'WILE_E_COYOTE') - self.assertEqual(C['Customer']['version'], '1') - self.assertEqual(C['Customer']['path'], '/acme') - - self.assertEqual(C.output(['path']), - 'Set-Cookie: Customer="WILE_E_COYOTE"; Path=/acme') - self.assertEqual(C.js_output(), """ - - """) - self.assertEqual(C.js_output(['path']), """ - - """) - - def test_quoted_meta(self): - # Try cookie with quoted meta-data - C = Cookie.SimpleCookie() - C.load('Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"') - self.assertEqual(C['Customer'].value, 'WILE_E_COYOTE') - self.assertEqual(C['Customer']['version'], '1') - self.assertEqual(C['Customer']['path'], '/acme') - -def test_main(): - run_unittest(CookieTests) - run_doctest(Cookie) - -if __name__ == '__main__': - test_main() diff --git a/Lib/test/test_cookielib.py b/Lib/test/test_cookielib.py deleted file mode 100644 index e400bfa..0000000 --- a/Lib/test/test_cookielib.py +++ /dev/null @@ -1,1734 +0,0 @@ -"""Tests for cookielib.py.""" - -import re, os, time -from unittest import TestCase - -from test import support - -class DateTimeTests(TestCase): - - def test_time2isoz(self): - from cookielib import time2isoz - - base = 1019227000 - day = 24*3600 - self.assertEquals(time2isoz(base), "2002-04-19 14:36:40Z") - self.assertEquals(time2isoz(base+day), "2002-04-20 14:36:40Z") - self.assertEquals(time2isoz(base+2*day), "2002-04-21 14:36:40Z") - self.assertEquals(time2isoz(base+3*day), "2002-04-22 14:36:40Z") - - az = time2isoz() - bz = time2isoz(500000) - for text in (az, bz): - self.assert_(re.search(r"^\d{4}-\d\d-\d\d \d\d:\d\d:\d\dZ$", text), - "bad time2isoz format: %s %s" % (az, bz)) - - def test_http2time(self): - from cookielib import http2time - - def parse_date(text): - return time.gmtime(http2time(text))[:6] - - self.assertEquals(parse_date("01 Jan 2001"), (2001, 1, 1, 0, 0, 0.0)) - - # this test will break around year 2070 - self.assertEquals(parse_date("03-Feb-20"), (2020, 2, 3, 0, 0, 0.0)) - - # this test will break around year 2048 - self.assertEquals(parse_date("03-Feb-98"), (1998, 2, 3, 0, 0, 0.0)) - - def test_http2time_formats(self): - from cookielib import http2time, time2isoz - - # test http2time for supported dates. Test cases with 2 digit year - # will probably break in year 2044. - tests = [ - 'Thu, 03 Feb 1994 00:00:00 GMT', # proposed new HTTP format - 'Thursday, 03-Feb-94 00:00:00 GMT', # old rfc850 HTTP format - 'Thursday, 03-Feb-1994 00:00:00 GMT', # broken rfc850 HTTP format - - '03 Feb 1994 00:00:00 GMT', # HTTP format (no weekday) - '03-Feb-94 00:00:00 GMT', # old rfc850 (no weekday) - '03-Feb-1994 00:00:00 GMT', # broken rfc850 (no weekday) - '03-Feb-1994 00:00 GMT', # broken rfc850 (no weekday, no seconds) - '03-Feb-1994 00:00', # broken rfc850 (no weekday, no seconds, no tz) - - '03-Feb-94', # old rfc850 HTTP format (no weekday, no time) - '03-Feb-1994', # broken rfc850 HTTP format (no weekday, no time) - '03 Feb 1994', # proposed new HTTP format (no weekday, no time) - - # A few tests with extra space at various places - ' 03 Feb 1994 0:00 ', - ' 03-Feb-1994 ', - ] - - test_t = 760233600 # assume broken POSIX counting of seconds - result = time2isoz(test_t) - expected = "1994-02-03 00:00:00Z" - self.assertEquals(result, expected, - "%s => '%s' (%s)" % (test_t, result, expected)) - - for s in tests: - t = http2time(s) - t2 = http2time(s.lower()) - t3 = http2time(s.upper()) - - self.assert_(t == t2 == t3 == test_t, - "'%s' => %s, %s, %s (%s)" % (s, t, t2, t3, test_t)) - - def test_http2time_garbage(self): - from cookielib import http2time - - for test in [ - '', - 'Garbage', - 'Mandag 16. September 1996', - '01-00-1980', - '01-13-1980', - '00-01-1980', - '32-01-1980', - '01-01-1980 25:00:00', - '01-01-1980 00:61:00', - '01-01-1980 00:00:62', - ]: - self.assert_(http2time(test) is None, - "http2time(%s) is not None\n" - "http2time(test) %s" % (test, http2time(test)) - ) - - -class HeaderTests(TestCase): - def test_parse_ns_headers(self): - from cookielib import parse_ns_headers - - # quotes should be stripped - expected = [[('foo', 'bar'), ('expires', 2209069412), ('version', '0')]] - for hdr in [ - 'foo=bar; expires=01 Jan 2040 22:23:32 GMT', - 'foo=bar; expires="01 Jan 2040 22:23:32 GMT"', - ]: - self.assertEquals(parse_ns_headers([hdr]), expected) - - def test_parse_ns_headers_special_names(self): - # names such as 'expires' are not special in first name=value pair - # of Set-Cookie: header - from cookielib import parse_ns_headers - - # Cookie with name 'expires' - hdr = 'expires=01 Jan 2040 22:23:32 GMT' - expected = [[("expires", "01 Jan 2040 22:23:32 GMT"), ("version", "0")]] - self.assertEquals(parse_ns_headers([hdr]), expected) - - def test_join_header_words(self): - from cookielib import join_header_words - - joined = join_header_words([[("foo", None), ("bar", "baz")]]) - self.assertEquals(joined, "foo; bar=baz") - - self.assertEquals(join_header_words([[]]), "") - - def test_split_header_words(self): - from cookielib import split_header_words - - tests = [ - ("foo", [[("foo", None)]]), - ("foo=bar", [[("foo", "bar")]]), - (" foo ", [[("foo", None)]]), - (" foo= ", [[("foo", "")]]), - (" foo=", [[("foo", "")]]), - (" foo= ; ", [[("foo", "")]]), - (" foo= ; bar= baz ", [[("foo", ""), ("bar", "baz")]]), - ("foo=bar bar=baz", [[("foo", "bar"), ("bar", "baz")]]), - # doesn't really matter if this next fails, but it works ATM - ("foo= bar=baz", [[("foo", "bar=baz")]]), - ("foo=bar;bar=baz", [[("foo", "bar"), ("bar", "baz")]]), - ('foo bar baz', [[("foo", None), ("bar", None), ("baz", None)]]), - ("a, b, c", [[("a", None)], [("b", None)], [("c", None)]]), - (r'foo; bar=baz, spam=, foo="\,\;\"", bar= ', - [[("foo", None), ("bar", "baz")], - [("spam", "")], [("foo", ',;"')], [("bar", "")]]), - ] - - for arg, expect in tests: - try: - result = split_header_words([arg]) - except: - import traceback, io - f = io.StringIO() - traceback.print_exc(None, f) - result = "(error -- traceback follows)\n\n%s" % f.getvalue() - self.assertEquals(result, expect, """ -When parsing: '%s' -Expected: '%s' -Got: '%s' -""" % (arg, expect, result)) - - def test_roundtrip(self): - from cookielib import split_header_words, join_header_words - - tests = [ - ("foo", "foo"), - ("foo=bar", "foo=bar"), - (" foo ", "foo"), - ("foo=", 'foo=""'), - ("foo=bar bar=baz", "foo=bar; bar=baz"), - ("foo=bar;bar=baz", "foo=bar; bar=baz"), - ('foo bar baz', "foo; bar; baz"), - (r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'), - ('foo,,,bar', 'foo, bar'), - ('foo=bar,bar=baz', 'foo=bar, bar=baz'), - - ('text/html; charset=iso-8859-1', - 'text/html; charset="iso-8859-1"'), - - ('foo="bar"; port="80,81"; discard, bar=baz', - 'foo=bar; port="80,81"; discard, bar=baz'), - - (r'Basic realm="\"foo\\\\bar\""', - r'Basic; realm="\"foo\\\\bar\""') - ] - - for arg, expect in tests: - input = split_header_words([arg]) - res = join_header_words(input) - self.assertEquals(res, expect, """ -When parsing: '%s' -Expected: '%s' -Got: '%s' -Input was: '%s' -""" % (arg, expect, res, input)) - - -class FakeResponse: - def __init__(self, headers=[], url=None): - """ - headers: list of RFC822-style 'Key: value' strings - """ - import mimetools, io - f = io.StringIO("\n".join(headers)) - self._headers = mimetools.Message(f) - self._url = url - def info(self): return self._headers - -def interact_2965(cookiejar, url, *set_cookie_hdrs): - return _interact(cookiejar, url, set_cookie_hdrs, "Set-Cookie2") - -def interact_netscape(cookiejar, url, *set_cookie_hdrs): - return _interact(cookiejar, url, set_cookie_hdrs, "Set-Cookie") - -def _interact(cookiejar, url, set_cookie_hdrs, hdr_name): - """Perform a single request / response cycle, returning Cookie: header.""" - from urllib2 import Request - req = Request(url) - cookiejar.add_cookie_header(req) - cookie_hdr = req.get_header("Cookie", "") - headers = [] - for hdr in set_cookie_hdrs: - headers.append("%s: %s" % (hdr_name, hdr)) - res = FakeResponse(headers, url) - cookiejar.extract_cookies(res, req) - return cookie_hdr - - -class FileCookieJarTests(TestCase): - def test_lwp_valueless_cookie(self): - # cookies with no value should be saved and loaded consistently - from cookielib import LWPCookieJar - filename = support.TESTFN - c = LWPCookieJar() - interact_netscape(c, "http://www.acme.com/", 'boo') - self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) - try: - c.save(filename, ignore_discard=True) - c = LWPCookieJar() - c.load(filename, ignore_discard=True) - finally: - try: os.unlink(filename) - except OSError: pass - self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) - - def test_bad_magic(self): - from cookielib import LWPCookieJar, MozillaCookieJar, LoadError - # IOErrors (eg. file doesn't exist) are allowed to propagate - filename = support.TESTFN - for cookiejar_class in LWPCookieJar, MozillaCookieJar: - c = cookiejar_class() - try: - c.load(filename="for this test to work, a file with this " - "filename should not exist") - except IOError as exc: - # exactly IOError, not LoadError - self.assertEqual(exc.__class__, IOError) - else: - self.fail("expected IOError for invalid filename") - # Invalid contents of cookies file (eg. bad magic string) - # causes a LoadError. - try: - f = open(filename, "w") - f.write("oops\n") - for cookiejar_class in LWPCookieJar, MozillaCookieJar: - c = cookiejar_class() - self.assertRaises(LoadError, c.load, filename) - finally: - try: os.unlink(filename) - except OSError: pass - -class CookieTests(TestCase): - # XXX - # Get rid of string comparisons where not actually testing str / repr. - # .clear() etc. - # IP addresses like 50 (single number, no dot) and domain-matching - # functions (and is_HDN)? See draft RFC 2965 errata. - # Strictness switches - # is_third_party() - # unverifiability / third-party blocking - # Netscape cookies work the same as RFC 2965 with regard to port. - # Set-Cookie with negative max age. - # If turn RFC 2965 handling off, Set-Cookie2 cookies should not clobber - # Set-Cookie cookies. - # Cookie2 should be sent if *any* cookies are not V1 (ie. V0 OR V2 etc.). - # Cookies (V1 and V0) with no expiry date should be set to be discarded. - # RFC 2965 Quoting: - # Should accept unquoted cookie-attribute values? check errata draft. - # Which are required on the way in and out? - # Should always return quoted cookie-attribute values? - # Proper testing of when RFC 2965 clobbers Netscape (waiting for errata). - # Path-match on return (same for V0 and V1). - # RFC 2965 acceptance and returning rules - # Set-Cookie2 without version attribute is rejected. - - # Netscape peculiarities list from Ronald Tschalar. - # The first two still need tests, the rest are covered. -## - Quoting: only quotes around the expires value are recognized as such -## (and yes, some folks quote the expires value); quotes around any other -## value are treated as part of the value. -## - White space: white space around names and values is ignored -## - Default path: if no path parameter is given, the path defaults to the -## path in the request-uri up to, but not including, the last '/'. Note -## that this is entirely different from what the spec says. -## - Commas and other delimiters: Netscape just parses until the next ';'. -## This means it will allow commas etc inside values (and yes, both -## commas and equals are commonly appear in the cookie value). This also -## means that if you fold multiple Set-Cookie header fields into one, -## comma-separated list, it'll be a headache to parse (at least my head -## starts hurting everytime I think of that code). -## - Expires: You'll get all sorts of date formats in the expires, -## including emtpy expires attributes ("expires="). Be as flexible as you -## can, and certainly don't expect the weekday to be there; if you can't -## parse it, just ignore it and pretend it's a session cookie. -## - Domain-matching: Netscape uses the 2-dot rule for _all_ domains, not -## just the 7 special TLD's listed in their spec. And folks rely on -## that... - - def test_domain_return_ok(self): - # test optimization: .domain_return_ok() should filter out most - # domains in the CookieJar before we try to access them (because that - # may require disk access -- in particular, with MSIECookieJar) - # This is only a rough check for performance reasons, so it's not too - # critical as long as it's sufficiently liberal. - import cookielib, urllib2 - pol = cookielib.DefaultCookiePolicy() - for url, domain, ok in [ - ("http://foo.bar.com/", "blah.com", False), - ("http://foo.bar.com/", "rhubarb.blah.com", False), - ("http://foo.bar.com/", "rhubarb.foo.bar.com", False), - ("http://foo.bar.com/", ".foo.bar.com", True), - ("http://foo.bar.com/", "foo.bar.com", True), - ("http://foo.bar.com/", ".bar.com", True), - ("http://foo.bar.com/", "com", True), - ("http://foo.com/", "rhubarb.foo.com", False), - ("http://foo.com/", ".foo.com", True), - ("http://foo.com/", "foo.com", True), - ("http://foo.com/", "com", True), - ("http://foo/", "rhubarb.foo", False), - ("http://foo/", ".foo", True), - ("http://foo/", "foo", True), - ("http://foo/", "foo.local", True), - ("http://foo/", ".local", True), - ]: - request = urllib2.Request(url) - r = pol.domain_return_ok(domain, request) - if ok: self.assert_(r) - else: self.assert_(not r) - - def test_missing_value(self): - from cookielib import MozillaCookieJar, lwp_cookie_str - - # missing = sign in Cookie: header is regarded by Mozilla as a missing - # name, and by cookielib as a missing value - filename = support.TESTFN - c = MozillaCookieJar(filename) - interact_netscape(c, "http://www.acme.com/", 'eggs') - interact_netscape(c, "http://www.acme.com/", '"spam"; path=/foo/') - cookie = c._cookies["www.acme.com"]["/"]["eggs"] - self.assert_(cookie.value is None) - self.assertEquals(cookie.name, "eggs") - cookie = c._cookies["www.acme.com"]['/foo/']['"spam"'] - self.assert_(cookie.value is None) - self.assertEquals(cookie.name, '"spam"') - self.assertEquals(lwp_cookie_str(cookie), ( - r'"spam"; path="/foo/"; domain="www.acme.com"; ' - 'path_spec; discard; version=0')) - old_str = repr(c) - c.save(ignore_expires=True, ignore_discard=True) - try: - c = MozillaCookieJar(filename) - c.revert(ignore_expires=True, ignore_discard=True) - finally: - os.unlink(c.filename) - # cookies unchanged apart from lost info re. whether path was specified - self.assertEquals( - repr(c), - re.sub("path_specified=%s" % True, "path_specified=%s" % False, - old_str) - ) - self.assertEquals(interact_netscape(c, "http://www.acme.com/foo/"), - '"spam"; eggs') - - def test_rfc2109_handling(self): - # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, - # dependent on policy settings - from cookielib import CookieJar, DefaultCookiePolicy - - for rfc2109_as_netscape, rfc2965, version in [ - # default according to rfc2965 if not explicitly specified - (None, False, 0), - (None, True, 1), - # explicit rfc2109_as_netscape - (False, False, None), # version None here means no cookie stored - (False, True, 1), - (True, False, 0), - (True, True, 0), - ]: - policy = DefaultCookiePolicy( - rfc2109_as_netscape=rfc2109_as_netscape, - rfc2965=rfc2965) - c = CookieJar(policy) - interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") - try: - cookie = c._cookies["www.example.com"]["/"]["ni"] - except KeyError: - self.assert_(version is None) # didn't expect a stored cookie - else: - self.assertEqual(cookie.version, version) - # 2965 cookies are unaffected - interact_2965(c, "http://www.example.com/", - "foo=bar; Version=1") - if rfc2965: - cookie2965 = c._cookies["www.example.com"]["/"]["foo"] - self.assertEqual(cookie2965.version, 1) - - def test_ns_parser(self): - from cookielib import CookieJar, DEFAULT_HTTP_PORT - - c = CookieJar() - interact_netscape(c, "http://www.acme.com/", - 'spam=eggs; DoMain=.acme.com; port; blArgh="feep"') - interact_netscape(c, "http://www.acme.com/", 'ni=ni; port=80,8080') - interact_netscape(c, "http://www.acme.com:80/", 'nini=ni') - interact_netscape(c, "http://www.acme.com:80/", 'foo=bar; expires=') - interact_netscape(c, "http://www.acme.com:80/", 'spam=eggs; ' - 'expires="Foo Bar 25 33:22:11 3022"') - - cookie = c._cookies[".acme.com"]["/"]["spam"] - self.assertEquals(cookie.domain, ".acme.com") - self.assert_(cookie.domain_specified) - self.assertEquals(cookie.port, DEFAULT_HTTP_PORT) - self.assert_(not cookie.port_specified) - # case is preserved - self.assert_(cookie.has_nonstandard_attr("blArgh") and - not cookie.has_nonstandard_attr("blargh")) - - cookie = c._cookies["www.acme.com"]["/"]["ni"] - self.assertEquals(cookie.domain, "www.acme.com") - self.assert_(not cookie.domain_specified) - self.assertEquals(cookie.port, "80,8080") - self.assert_(cookie.port_specified) - - cookie = c._cookies["www.acme.com"]["/"]["nini"] - self.assert_(cookie.port is None) - self.assert_(not cookie.port_specified) - - # invalid expires should not cause cookie to be dropped - foo = c._cookies["www.acme.com"]["/"]["foo"] - spam = c._cookies["www.acme.com"]["/"]["foo"] - self.assert_(foo.expires is None) - self.assert_(spam.expires is None) - - def test_ns_parser_special_names(self): - # names such as 'expires' are not special in first name=value pair - # of Set-Cookie: header - from cookielib import CookieJar - - c = CookieJar() - interact_netscape(c, "http://www.acme.com/", 'expires=eggs') - interact_netscape(c, "http://www.acme.com/", 'version=eggs; spam=eggs') - - cookies = c._cookies["www.acme.com"]["/"] - self.assert_('expires' in cookies) - self.assert_('version' in cookies) - - def test_expires(self): - from cookielib import time2netscape, CookieJar - - # if expires is in future, keep cookie... - c = CookieJar() - future = time2netscape(time.time()+3600) - interact_netscape(c, "http://www.acme.com/", 'spam="bar"; expires=%s' % - future) - self.assertEquals(len(c), 1) - now = time2netscape(time.time()-1) - # ... and if in past or present, discard it - interact_netscape(c, "http://www.acme.com/", 'foo="eggs"; expires=%s' % - now) - h = interact_netscape(c, "http://www.acme.com/") - self.assertEquals(len(c), 1) - self.assert_('spam="bar"' in h and "foo" not in h) - - # max-age takes precedence over expires, and zero max-age is request to - # delete both new cookie and any old matching cookie - interact_netscape(c, "http://www.acme.com/", 'eggs="bar"; expires=%s' % - future) - interact_netscape(c, "http://www.acme.com/", 'bar="bar"; expires=%s' % - future) - self.assertEquals(len(c), 3) - interact_netscape(c, "http://www.acme.com/", 'eggs="bar"; ' - 'expires=%s; max-age=0' % future) - interact_netscape(c, "http://www.acme.com/", 'bar="bar"; ' - 'max-age=0; expires=%s' % future) - h = interact_netscape(c, "http://www.acme.com/") - self.assertEquals(len(c), 1) - - # test expiry at end of session for cookies with no expires attribute - interact_netscape(c, "http://www.rhubarb.net/", 'whum="fizz"') - self.assertEquals(len(c), 2) - c.clear_session_cookies() - self.assertEquals(len(c), 1) - self.assert_('spam="bar"' in h) - - # XXX RFC 2965 expiry rules (some apply to V0 too) - - def test_default_path(self): - from cookielib import CookieJar, DefaultCookiePolicy - - # RFC 2965 - pol = DefaultCookiePolicy(rfc2965=True) - - c = CookieJar(pol) - interact_2965(c, "http://www.acme.com/", 'spam="bar"; Version="1"') - self.assert_("/" in c._cookies["www.acme.com"]) - - c = CookieJar(pol) - interact_2965(c, "http://www.acme.com/blah", 'eggs="bar"; Version="1"') - self.assert_("/" in c._cookies["www.acme.com"]) - - c = CookieJar(pol) - interact_2965(c, "http://www.acme.com/blah/rhubarb", - 'eggs="bar"; Version="1"') - self.assert_("/blah/" in c._cookies["www.acme.com"]) - - c = CookieJar(pol) - interact_2965(c, "http://www.acme.com/blah/rhubarb/", - 'eggs="bar"; Version="1"') - self.assert_("/blah/rhubarb/" in c._cookies["www.acme.com"]) - - # Netscape - - c = CookieJar() - interact_netscape(c, "http://www.acme.com/", 'spam="bar"') - self.assert_("/" in c._cookies["www.acme.com"]) - - c = CookieJar() - interact_netscape(c, "http://www.acme.com/blah", 'eggs="bar"') - self.assert_("/" in c._cookies["www.acme.com"]) - - c = CookieJar() - interact_netscape(c, "http://www.acme.com/blah/rhubarb", 'eggs="bar"') - self.assert_("/blah" in c._cookies["www.acme.com"]) - - c = CookieJar() - interact_netscape(c, "http://www.acme.com/blah/rhubarb/", 'eggs="bar"') - self.assert_("/blah/rhubarb" in c._cookies["www.acme.com"]) - - def test_escape_path(self): - from cookielib import escape_path - cases = [ - # quoted safe - ("/foo%2f/bar", "/foo%2F/bar"), - ("/foo%2F/bar", "/foo%2F/bar"), - # quoted % - ("/foo%%/bar", "/foo%%/bar"), - # quoted unsafe - ("/fo%19o/bar", "/fo%19o/bar"), - ("/fo%7do/bar", "/fo%7Do/bar"), - # unquoted safe - ("/foo/bar&", "/foo/bar&"), - ("/foo//bar", "/foo//bar"), - ("\176/foo/bar", "\176/foo/bar"), - # unquoted unsafe - ("/foo\031/bar", "/foo%19/bar"), - ("/\175foo/bar", "/%7Dfoo/bar"), - # unicode - ("/foo/bar\uabcd", "/foo/bar%EA%AF%8D"), # UTF-8 encoded - ] - for arg, result in cases: - self.assertEquals(escape_path(arg), result) - - def test_request_path(self): - from urllib2 import Request - from cookielib import request_path - # with parameters - req = Request("http://www.example.com/rheum/rhaponicum;" - "foo=bar;sing=song?apples=pears&spam=eggs#ni") - self.assertEquals(request_path(req), "/rheum/rhaponicum;" - "foo=bar;sing=song?apples=pears&spam=eggs#ni") - # without parameters - req = Request("http://www.example.com/rheum/rhaponicum?" - "apples=pears&spam=eggs#ni") - self.assertEquals(request_path(req), "/rheum/rhaponicum?" - "apples=pears&spam=eggs#ni") - # missing final slash - req = Request("http://www.example.com") - self.assertEquals(request_path(req), "/") - - def test_request_port(self): - from urllib2 import Request - from cookielib import request_port, DEFAULT_HTTP_PORT - req = Request("http://www.acme.com:1234/", - headers={"Host": "www.acme.com:4321"}) - self.assertEquals(request_port(req), "1234") - req = Request("http://www.acme.com/", - headers={"Host": "www.acme.com:4321"}) - self.assertEquals(request_port(req), DEFAULT_HTTP_PORT) - - def test_request_host(self): - from urllib2 import Request - from cookielib import request_host - # this request is illegal (RFC2616, 14.2.3) - req = Request("http://1.1.1.1/", - headers={"Host": "www.acme.com:80"}) - # libwww-perl wants this response, but that seems wrong (RFC 2616, - # section 5.2, point 1., and RFC 2965 section 1, paragraph 3) - #self.assertEquals(request_host(req), "www.acme.com") - self.assertEquals(request_host(req), "1.1.1.1") - req = Request("http://www.acme.com/", - headers={"Host": "irrelevant.com"}) - self.assertEquals(request_host(req), "www.acme.com") - # not actually sure this one is valid Request object, so maybe should - # remove test for no host in url in request_host function? - req = Request("/resource.html", - headers={"Host": "www.acme.com"}) - self.assertEquals(request_host(req), "www.acme.com") - # port shouldn't be in request-host - req = Request("http://www.acme.com:2345/resource.html", - headers={"Host": "www.acme.com:5432"}) - self.assertEquals(request_host(req), "www.acme.com") - - def test_is_HDN(self): - from cookielib import is_HDN - self.assert_(is_HDN("foo.bar.com")) - self.assert_(is_HDN("1foo2.3bar4.5com")) - self.assert_(not is_HDN("192.168.1.1")) - self.assert_(not is_HDN("")) - self.assert_(not is_HDN(".")) - self.assert_(not is_HDN(".foo.bar.com")) - self.assert_(not is_HDN("..foo")) - self.assert_(not is_HDN("foo.")) - - def test_reach(self): - from cookielib import reach - self.assertEquals(reach("www.acme.com"), ".acme.com") - self.assertEquals(reach("acme.com"), "acme.com") - self.assertEquals(reach("acme.local"), ".local") - self.assertEquals(reach(".local"), ".local") - self.assertEquals(reach(".com"), ".com") - self.assertEquals(reach("."), ".") - self.assertEquals(reach(""), "") - self.assertEquals(reach("192.168.0.1"), "192.168.0.1") - - def test_domain_match(self): - from cookielib import domain_match, user_domain_match - self.assert_(domain_match("192.168.1.1", "192.168.1.1")) - self.assert_(not domain_match("192.168.1.1", ".168.1.1")) - self.assert_(domain_match("x.y.com", "x.Y.com")) - self.assert_(domain_match("x.y.com", ".Y.com")) - self.assert_(not domain_match("x.y.com", "Y.com")) - self.assert_(domain_match("a.b.c.com", ".c.com")) - self.assert_(not domain_match(".c.com", "a.b.c.com")) - self.assert_(domain_match("example.local", ".local")) - self.assert_(not domain_match("blah.blah", "")) - self.assert_(not domain_match("", ".rhubarb.rhubarb")) - self.assert_(domain_match("", "")) - - self.assert_(user_domain_match("acme.com", "acme.com")) - self.assert_(not user_domain_match("acme.com", ".acme.com")) - self.assert_(user_domain_match("rhubarb.acme.com", ".acme.com")) - self.assert_(user_domain_match("www.rhubarb.acme.com", ".acme.com")) - self.assert_(user_domain_match("x.y.com", "x.Y.com")) - self.assert_(user_domain_match("x.y.com", ".Y.com")) - self.assert_(not user_domain_match("x.y.com", "Y.com")) - self.assert_(user_domain_match("y.com", "Y.com")) - self.assert_(not user_domain_match(".y.com", "Y.com")) - self.assert_(user_domain_match(".y.com", ".Y.com")) - self.assert_(user_domain_match("x.y.com", ".com")) - self.assert_(not user_domain_match("x.y.com", "com")) - self.assert_(not user_domain_match("x.y.com", "m")) - self.assert_(not user_domain_match("x.y.com", ".m")) - self.assert_(not user_domain_match("x.y.com", "")) - self.assert_(not user_domain_match("x.y.com", ".")) - self.assert_(user_domain_match("192.168.1.1", "192.168.1.1")) - # not both HDNs, so must string-compare equal to match - self.assert_(not user_domain_match("192.168.1.1", ".168.1.1")) - self.assert_(not user_domain_match("192.168.1.1", ".")) - # empty string is a special case - self.assert_(not user_domain_match("192.168.1.1", "")) - - def test_wrong_domain(self): - # Cookies whose effective request-host name does not domain-match the - # domain are rejected. - - # XXX far from complete - from cookielib import CookieJar - c = CookieJar() - interact_2965(c, "http://www.nasty.com/", - 'foo=bar; domain=friendly.org; Version="1"') - self.assertEquals(len(c), 0) - - def test_strict_domain(self): - # Cookies whose domain is a country-code tld like .co.uk should - # not be set if CookiePolicy.strict_domain is true. - from cookielib import CookieJar, DefaultCookiePolicy - - cp = DefaultCookiePolicy(strict_domain=True) - cj = CookieJar(policy=cp) - interact_netscape(cj, "http://example.co.uk/", 'no=problemo') - interact_netscape(cj, "http://example.co.uk/", - 'okey=dokey; Domain=.example.co.uk') - self.assertEquals(len(cj), 2) - for pseudo_tld in [".co.uk", ".org.za", ".tx.us", ".name.us"]: - interact_netscape(cj, "http://example.%s/" % pseudo_tld, - 'spam=eggs; Domain=.co.uk') - self.assertEquals(len(cj), 2) - - def test_two_component_domain_ns(self): - # Netscape: .www.bar.com, www.bar.com, .bar.com, bar.com, no domain - # should all get accepted, as should .acme.com, acme.com and no domain - # for 2-component domains like acme.com. - from cookielib import CookieJar, DefaultCookiePolicy - - c = CookieJar() - - # two-component V0 domain is OK - interact_netscape(c, "http://foo.net/", 'ns=bar') - self.assertEquals(len(c), 1) - self.assertEquals(c._cookies["foo.net"]["/"]["ns"].value, "bar") - self.assertEquals(interact_netscape(c, "http://foo.net/"), "ns=bar") - # *will* be returned to any other domain (unlike RFC 2965)... - self.assertEquals(interact_netscape(c, "http://www.foo.net/"), - "ns=bar") - # ...unless requested otherwise - pol = DefaultCookiePolicy( - strict_ns_domain=DefaultCookiePolicy.DomainStrictNonDomain) - c.set_policy(pol) - self.assertEquals(interact_netscape(c, "http://www.foo.net/"), "") - - # unlike RFC 2965, even explicit two-component domain is OK, - # because .foo.net matches foo.net - interact_netscape(c, "http://foo.net/foo/", - 'spam1=eggs; domain=foo.net') - # even if starts with a dot -- in NS rules, .foo.net matches foo.net! - interact_netscape(c, "http://foo.net/foo/bar/", - 'spam2=eggs; domain=.foo.net') - self.assertEquals(len(c), 3) - self.assertEquals(c._cookies[".foo.net"]["/foo"]["spam1"].value, - "eggs") - self.assertEquals(c._cookies[".foo.net"]["/foo/bar"]["spam2"].value, - "eggs") - self.assertEquals(interact_netscape(c, "http://foo.net/foo/bar/"), - "spam2=eggs; spam1=eggs; ns=bar") - - # top-level domain is too general - interact_netscape(c, "http://foo.net/", 'nini="ni"; domain=.net') - self.assertEquals(len(c), 3) - -## # Netscape protocol doesn't allow non-special top level domains (such -## # as co.uk) in the domain attribute unless there are at least three -## # dots in it. - # Oh yes it does! Real implementations don't check this, and real - # cookies (of course) rely on that behaviour. - interact_netscape(c, "http://foo.co.uk", 'nasty=trick; domain=.co.uk') -## self.assertEquals(len(c), 2) - self.assertEquals(len(c), 4) - - def test_two_component_domain_rfc2965(self): - from cookielib import CookieJar, DefaultCookiePolicy - - pol = DefaultCookiePolicy(rfc2965=True) - c = CookieJar(pol) - - # two-component V1 domain is OK - interact_2965(c, "http://foo.net/", 'foo=bar; Version="1"') - self.assertEquals(len(c), 1) - self.assertEquals(c._cookies["foo.net"]["/"]["foo"].value, "bar") - self.assertEquals(interact_2965(c, "http://foo.net/"), - "$Version=1; foo=bar") - # won't be returned to any other domain (because domain was implied) - self.assertEquals(interact_2965(c, "http://www.foo.net/"), "") - - # unless domain is given explicitly, because then it must be - # rewritten to start with a dot: foo.net --> .foo.net, which does - # not domain-match foo.net - interact_2965(c, "http://foo.net/foo", - 'spam=eggs; domain=foo.net; path=/foo; Version="1"') - self.assertEquals(len(c), 1) - self.assertEquals(interact_2965(c, "http://foo.net/foo"), - "$Version=1; foo=bar") - - # explicit foo.net from three-component domain www.foo.net *does* get - # set, because .foo.net domain-matches .foo.net - interact_2965(c, "http://www.foo.net/foo/", - 'spam=eggs; domain=foo.net; Version="1"') - self.assertEquals(c._cookies[".foo.net"]["/foo/"]["spam"].value, - "eggs") - self.assertEquals(len(c), 2) - self.assertEquals(interact_2965(c, "http://foo.net/foo/"), - "$Version=1; foo=bar") - self.assertEquals(interact_2965(c, "http://www.foo.net/foo/"), - '$Version=1; spam=eggs; $Domain="foo.net"') - - # top-level domain is too general - interact_2965(c, "http://foo.net/", - 'ni="ni"; domain=".net"; Version="1"') - self.assertEquals(len(c), 2) - - # RFC 2965 doesn't require blocking this - interact_2965(c, "http://foo.co.uk/", - 'nasty=trick; domain=.co.uk; Version="1"') - self.assertEquals(len(c), 3) - - def test_domain_allow(self): - from cookielib import CookieJar, DefaultCookiePolicy - from urllib2 import Request - - c = CookieJar(policy=DefaultCookiePolicy( - blocked_domains=["acme.com"], - allowed_domains=["www.acme.com"])) - - req = Request("http://acme.com/") - headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] - res = FakeResponse(headers, "http://acme.com/") - c.extract_cookies(res, req) - self.assertEquals(len(c), 0) - - req = Request("http://www.acme.com/") - res = FakeResponse(headers, "http://www.acme.com/") - c.extract_cookies(res, req) - self.assertEquals(len(c), 1) - - req = Request("http://www.coyote.com/") - res = FakeResponse(headers, "http://www.coyote.com/") - c.extract_cookies(res, req) - self.assertEquals(len(c), 1) - - # set a cookie with non-allowed domain... - req = Request("http://www.coyote.com/") - res = FakeResponse(headers, "http://www.coyote.com/") - cookies = c.make_cookies(res, req) - c.set_cookie(cookies[0]) - self.assertEquals(len(c), 2) - # ... and check is doesn't get returned - c.add_cookie_header(req) - self.assert_(not req.has_header("Cookie")) - - def test_domain_block(self): - from cookielib import CookieJar, DefaultCookiePolicy - from urllib2 import Request - - pol = DefaultCookiePolicy( - rfc2965=True, blocked_domains=[".acme.com"]) - c = CookieJar(policy=pol) - headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] - - req = Request("http://www.acme.com/") - res = FakeResponse(headers, "http://www.acme.com/") - c.extract_cookies(res, req) - self.assertEquals(len(c), 0) - - p = pol.set_blocked_domains(["acme.com"]) - c.extract_cookies(res, req) - self.assertEquals(len(c), 1) - - c.clear() - req = Request("http://www.roadrunner.net/") - res = FakeResponse(headers, "http://www.roadrunner.net/") - c.extract_cookies(res, req) - self.assertEquals(len(c), 1) - req = Request("http://www.roadrunner.net/") - c.add_cookie_header(req) - self.assert_((req.has_header("Cookie") and - req.has_header("Cookie2"))) - - c.clear() - pol.set_blocked_domains([".acme.com"]) - c.extract_cookies(res, req) - self.assertEquals(len(c), 1) - - # set a cookie with blocked domain... - req = Request("http://www.acme.com/") - res = FakeResponse(headers, "http://www.acme.com/") - cookies = c.make_cookies(res, req) - c.set_cookie(cookies[0]) - self.assertEquals(len(c), 2) - # ... and check is doesn't get returned - c.add_cookie_header(req) - self.assert_(not req.has_header("Cookie")) - - def test_secure(self): - from cookielib import CookieJar, DefaultCookiePolicy - - for ns in True, False: - for whitespace in " ", "": - c = CookieJar() - if ns: - pol = DefaultCookiePolicy(rfc2965=False) - int = interact_netscape - vs = "" - else: - pol = DefaultCookiePolicy(rfc2965=True) - int = interact_2965 - vs = "; Version=1" - c.set_policy(pol) - url = "http://www.acme.com/" - int(c, url, "foo1=bar%s%s" % (vs, whitespace)) - int(c, url, "foo2=bar%s; secure%s" % (vs, whitespace)) - self.assert_( - not c._cookies["www.acme.com"]["/"]["foo1"].secure, - "non-secure cookie registered secure") - self.assert_( - c._cookies["www.acme.com"]["/"]["foo2"].secure, - "secure cookie registered non-secure") - - def test_quote_cookie_value(self): - from cookielib import CookieJar, DefaultCookiePolicy - c = CookieJar(policy=DefaultCookiePolicy(rfc2965=True)) - interact_2965(c, "http://www.acme.com/", r'foo=\b"a"r; Version=1') - h = interact_2965(c, "http://www.acme.com/") - self.assertEquals(h, r'$Version=1; foo=\\b\"a\"r') - - def test_missing_final_slash(self): - # Missing slash from request URL's abs_path should be assumed present. - from cookielib import CookieJar, DefaultCookiePolicy - from urllib2 import Request - url = "http://www.acme.com" - c = CookieJar(DefaultCookiePolicy(rfc2965=True)) - interact_2965(c, url, "foo=bar; Version=1") - req = Request(url) - self.assertEquals(len(c), 1) - c.add_cookie_header(req) - self.assert_(req.has_header("Cookie")) - - def test_domain_mirror(self): - from cookielib import CookieJar, DefaultCookiePolicy - - pol = DefaultCookiePolicy(rfc2965=True) - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, "spam=eggs; Version=1") - h = interact_2965(c, url) - self.assert_("Domain" not in h, - "absent domain returned with domain present") - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, 'spam=eggs; Version=1; Domain=.bar.com') - h = interact_2965(c, url) - self.assert_('$Domain=".bar.com"' in h, "domain not returned") - - c = CookieJar(pol) - url = "http://foo.bar.com/" - # note missing initial dot in Domain - interact_2965(c, url, 'spam=eggs; Version=1; Domain=bar.com') - h = interact_2965(c, url) - self.assert_('$Domain="bar.com"' in h, "domain not returned") - - def test_path_mirror(self): - from cookielib import CookieJar, DefaultCookiePolicy - - pol = DefaultCookiePolicy(rfc2965=True) - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, "spam=eggs; Version=1") - h = interact_2965(c, url) - self.assert_("Path" not in h, - "absent path returned with path present") - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, 'spam=eggs; Version=1; Path=/') - h = interact_2965(c, url) - self.assert_('$Path="/"' in h, "path not returned") - - def test_port_mirror(self): - from cookielib import CookieJar, DefaultCookiePolicy - - pol = DefaultCookiePolicy(rfc2965=True) - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, "spam=eggs; Version=1") - h = interact_2965(c, url) - self.assert_("Port" not in h, - "absent port returned with port present") - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, "spam=eggs; Version=1; Port") - h = interact_2965(c, url) - self.assert_(re.search("\$Port([^=]|$)", h), - "port with no value not returned with no value") - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, 'spam=eggs; Version=1; Port="80"') - h = interact_2965(c, url) - self.assert_('$Port="80"' in h, - "port with single value not returned with single value") - - c = CookieJar(pol) - url = "http://foo.bar.com/" - interact_2965(c, url, 'spam=eggs; Version=1; Port="80,8080"') - h = interact_2965(c, url) - self.assert_('$Port="80,8080"' in h, - "port with multiple values not returned with multiple " - "values") - - def test_no_return_comment(self): - from cookielib import CookieJar, DefaultCookiePolicy - - c = CookieJar(DefaultCookiePolicy(rfc2965=True)) - url = "http://foo.bar.com/" - interact_2965(c, url, 'spam=eggs; Version=1; ' - 'Comment="does anybody read these?"; ' - 'CommentURL="http://foo.bar.net/comment.html"') - h = interact_2965(c, url) - self.assert_( - "Comment" not in h, - "Comment or CommentURL cookie-attributes returned to server") - - def test_Cookie_iterator(self): - from cookielib import CookieJar, Cookie, DefaultCookiePolicy - - cs = CookieJar(DefaultCookiePolicy(rfc2965=True)) - # add some random cookies - interact_2965(cs, "http://blah.spam.org/", 'foo=eggs; Version=1; ' - 'Comment="does anybody read these?"; ' - 'CommentURL="http://foo.bar.net/comment.html"') - interact_netscape(cs, "http://www.acme.com/blah/", "spam=bar; secure") - interact_2965(cs, "http://www.acme.com/blah/", - "foo=bar; secure; Version=1") - interact_2965(cs, "http://www.acme.com/blah/", - "foo=bar; path=/; Version=1") - interact_2965(cs, "http://www.sol.no", - r'bang=wallop; version=1; domain=".sol.no"; ' - r'port="90,100, 80,8080"; ' - r'max-age=100; Comment = "Just kidding! (\"|\\\\) "') - - versions = [1, 1, 1, 0, 1] - names = ["bang", "foo", "foo", "spam", "foo"] - domains = [".sol.no", "blah.spam.org", "www.acme.com", - "www.acme.com", "www.acme.com"] - paths = ["/", "/", "/", "/blah", "/blah/"] - - for i in range(4): - i = 0 - for c in cs: - self.assert_(isinstance(c, Cookie)) - self.assertEquals(c.version, versions[i]) - self.assertEquals(c.name, names[i]) - self.assertEquals(c.domain, domains[i]) - self.assertEquals(c.path, paths[i]) - i = i + 1 - - def test_parse_ns_headers(self): - from cookielib import parse_ns_headers - - # missing domain value (invalid cookie) - self.assertEquals( - parse_ns_headers(["foo=bar; path=/; domain"]), - [[("foo", "bar"), - ("path", "/"), ("domain", None), ("version", "0")]] - ) - # invalid expires value - self.assertEquals( - parse_ns_headers(["foo=bar; expires=Foo Bar 12 33:22:11 2000"]), - [[("foo", "bar"), ("expires", None), ("version", "0")]] - ) - # missing cookie value (valid cookie) - self.assertEquals( - parse_ns_headers(["foo"]), - [[("foo", None), ("version", "0")]] - ) - # shouldn't add version if header is empty - self.assertEquals(parse_ns_headers([""]), []) - - def test_bad_cookie_header(self): - - def cookiejar_from_cookie_headers(headers): - from cookielib import CookieJar - from urllib2 import Request - c = CookieJar() - req = Request("http://www.example.com/") - r = FakeResponse(headers, "http://www.example.com/") - c.extract_cookies(r, req) - return c - - # none of these bad headers should cause an exception to be raised - for headers in [ - ["Set-Cookie: "], # actually, nothing wrong with this - ["Set-Cookie2: "], # ditto - # missing domain value - ["Set-Cookie2: a=foo; path=/; Version=1; domain"], - # bad max-age - ["Set-Cookie: b=foo; max-age=oops"], - ]: - c = cookiejar_from_cookie_headers(headers) - # these bad cookies shouldn't be set - self.assertEquals(len(c), 0) - - # cookie with invalid expires is treated as session cookie - headers = ["Set-Cookie: c=foo; expires=Foo Bar 12 33:22:11 2000"] - c = cookiejar_from_cookie_headers(headers) - cookie = c._cookies["www.example.com"]["/"]["c"] - self.assert_(cookie.expires is None) - - -class LWPCookieTests(TestCase): - # Tests taken from libwww-perl, with a few modifications and additions. - - def test_netscape_example_1(self): - from cookielib import CookieJar, DefaultCookiePolicy - from urllib2 import Request - - #------------------------------------------------------------------- - # First we check that it works for the original example at - # http://www.netscape.com/newsref/std/cookie_spec.html - - # Client requests a document, and receives in the response: - # - # Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT - # - # When client requests a URL in path "/" on this server, it sends: - # - # Cookie: CUSTOMER=WILE_E_COYOTE - # - # Client requests a document, and receives in the response: - # - # Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/ - # - # When client requests a URL in path "/" on this server, it sends: - # - # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001 - # - # Client receives: - # - # Set-Cookie: SHIPPING=FEDEX; path=/fo - # - # When client requests a URL in path "/" on this server, it sends: - # - # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001 - # - # When client requests a URL in path "/foo" on this server, it sends: - # - # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001; SHIPPING=FEDEX - # - # The last Cookie is buggy, because both specifications say that the - # most specific cookie must be sent first. SHIPPING=FEDEX is the - # most specific and should thus be first. - - year_plus_one = time.localtime()[0] + 1 - - headers = [] - - c = CookieJar(DefaultCookiePolicy(rfc2965 = True)) - - #req = Request("http://1.1.1.1/", - # headers={"Host": "www.acme.com:80"}) - req = Request("http://www.acme.com:80/", - headers={"Host": "www.acme.com:80"}) - - headers.append( - "Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/ ; " - "expires=Wednesday, 09-Nov-%d 23:12:40 GMT" % year_plus_one) - res = FakeResponse(headers, "http://www.acme.com/") - c.extract_cookies(res, req) - - req = Request("http://www.acme.com/") - c.add_cookie_header(req) - - self.assertEqual(req.get_header("Cookie"), "CUSTOMER=WILE_E_COYOTE") - self.assertEqual(req.get_header("Cookie2"), '$Version="1"') - - headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/") - res = FakeResponse(headers, "http://www.acme.com/") - c.extract_cookies(res, req) - - req = Request("http://www.acme.com/foo/bar") - c.add_cookie_header(req) - - h = req.get_header("Cookie") - self.assert_("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and - "CUSTOMER=WILE_E_COYOTE" in h) - - headers.append('Set-Cookie: SHIPPING=FEDEX; path=/foo') - res = FakeResponse(headers, "http://www.acme.com") - c.extract_cookies(res, req) - - req = Request("http://www.acme.com/") - c.add_cookie_header(req) - - h = req.get_header("Cookie") - self.assert_("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and - "CUSTOMER=WILE_E_COYOTE" in h and - "SHIPPING=FEDEX" not in h) - - req = Request("http://www.acme.com/foo/") - c.add_cookie_header(req) - - h = req.get_header("Cookie") - self.assert_(("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and - "CUSTOMER=WILE_E_COYOTE" in h and - h.startswith("SHIPPING=FEDEX;"))) - - def test_netscape_example_2(self): - from cookielib import CookieJar - from urllib2 import Request - - # Second Example transaction sequence: - # - # Assume all mappings from above have been cleared. - # - # Client receives: - # - # Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/ - # - # When client requests a URL in path "/" on this server, it sends: - # - # Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001 - # - # Client receives: - # - # Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo - # - # When client requests a URL in path "/ammo" on this server, it sends: - # - # Cookie: PART_NUMBER=RIDING_ROCKET_0023; PART_NUMBER=ROCKET_LAUNCHER_0001 - # - # NOTE: There are two name/value pairs named "PART_NUMBER" due to - # the inheritance of the "/" mapping in addition to the "/ammo" mapping. - - c = CookieJar() - headers = [] - - req = Request("http://www.acme.com/") - headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/") - res = FakeResponse(headers, "http://www.acme.com/") - - c.extract_cookies(res, req) - - req = Request("http://www.acme.com/") - c.add_cookie_header(req) - - self.assertEquals(req.get_header("Cookie"), - "PART_NUMBER=ROCKET_LAUNCHER_0001") - - headers.append( - "Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo") - res = FakeResponse(headers, "http://www.acme.com/") - c.extract_cookies(res, req) - - req = Request("http://www.acme.com/ammo") - c.add_cookie_header(req) - - self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*" - "PART_NUMBER=ROCKET_LAUNCHER_0001", - req.get_header("Cookie"))) - - def test_ietf_example_1(self): - from cookielib import CookieJar, DefaultCookiePolicy - #------------------------------------------------------------------- - # Then we test with the examples from draft-ietf-http-state-man-mec-03.txt - # - # 5. EXAMPLES - - c = CookieJar(DefaultCookiePolicy(rfc2965=True)) - - # - # 5.1 Example 1 - # - # Most detail of request and response headers has been omitted. Assume - # the user agent has no stored cookies. - # - # 1. User Agent -> Server - # - # POST /acme/login HTTP/1.1 - # [form data] - # - # User identifies self via a form. - # - # 2. Server -> User Agent - # - # HTTP/1.1 200 OK - # Set-Cookie2: Customer="WILE_E_COYOTE"; Version="1"; Path="/acme" - # - # Cookie reflects user's identity. - - cookie = interact_2965( - c, 'http://www.acme.com/acme/login', - 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"') - self.assert_(not cookie) - - # - # 3. User Agent -> Server - # - # POST /acme/pickitem HTTP/1.1 - # Cookie: $Version="1"; Customer="WILE_E_COYOTE"; $Path="/acme" - # [form data] - # - # User selects an item for ``shopping basket.'' - # - # 4. Server -> User Agent - # - # HTTP/1.1 200 OK - # Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; - # Path="/acme" - # - # Shopping basket contains an item. - - cookie = interact_2965(c, 'http://www.acme.com/acme/pickitem', - 'Part_Number="Rocket_Launcher_0001"; ' - 'Version="1"; Path="/acme"'); - self.assert_(re.search( - r'^\$Version="?1"?; Customer="?WILE_E_COYOTE"?; \$Path="/acme"$', - cookie)) - - # - # 5. User Agent -> Server - # - # POST /acme/shipping HTTP/1.1 - # Cookie: $Version="1"; - # Customer="WILE_E_COYOTE"; $Path="/acme"; - # Part_Number="Rocket_Launcher_0001"; $Path="/acme" - # [form data] - # - # User selects shipping method from form. - # - # 6. Server -> User Agent - # - # HTTP/1.1 200 OK - # Set-Cookie2: Shipping="FedEx"; Version="1"; Path="/acme" - # - # New cookie reflects shipping method. - - cookie = interact_2965(c, "http://www.acme.com/acme/shipping", - 'Shipping="FedEx"; Version="1"; Path="/acme"') - - self.assert_(re.search(r'^\$Version="?1"?;', cookie)) - self.assert_(re.search(r'Part_Number="?Rocket_Launcher_0001"?;' - '\s*\$Path="\/acme"', cookie)) - self.assert_(re.search(r'Customer="?WILE_E_COYOTE"?;\s*\$Path="\/acme"', - cookie)) - - # - # 7. User Agent -> Server - # - # POST /acme/process HTTP/1.1 - # Cookie: $Version="1"; - # Customer="WILE_E_COYOTE"; $Path="/acme"; - # Part_Number="Rocket_Launcher_0001"; $Path="/acme"; - # Shipping="FedEx"; $Path="/acme" - # [form data] - # - # User chooses to process order. - # - # 8. Server -> User Agent - # - # HTTP/1.1 200 OK - # - # Transaction is complete. - - cookie = interact_2965(c, "http://www.acme.com/acme/process") - self.assert_( - re.search(r'Shipping="?FedEx"?;\s*\$Path="\/acme"', cookie) and - "WILE_E_COYOTE" in cookie) - - # - # The user agent makes a series of requests on the origin server, after - # each of which it receives a new cookie. All the cookies have the same - # Path attribute and (default) domain. Because the request URLs all have - # /acme as a prefix, and that matches the Path attribute, each request - # contains all the cookies received so far. - - def test_ietf_example_2(self): - from cookielib import CookieJar, DefaultCookiePolicy - - # 5.2 Example 2 - # - # This example illustrates the effect of the Path attribute. All detail - # of request and response headers has been omitted. Assume the user agent - # has no stored cookies. - - c = CookieJar(DefaultCookiePolicy(rfc2965=True)) - - # Imagine the user agent has received, in response to earlier requests, - # the response headers - # - # Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; - # Path="/acme" - # - # and - # - # Set-Cookie2: Part_Number="Riding_Rocket_0023"; Version="1"; - # Path="/acme/ammo" - - interact_2965( - c, "http://www.acme.com/acme/ammo/specific", - 'Part_Number="Rocket_Launcher_0001"; Version="1"; Path="/acme"', - 'Part_Number="Riding_Rocket_0023"; Version="1"; Path="/acme/ammo"') - - # A subsequent request by the user agent to the (same) server for URLs of - # the form /acme/ammo/... would include the following request header: - # - # Cookie: $Version="1"; - # Part_Number="Riding_Rocket_0023"; $Path="/acme/ammo"; - # Part_Number="Rocket_Launcher_0001"; $Path="/acme" - # - # Note that the NAME=VALUE pair for the cookie with the more specific Path - # attribute, /acme/ammo, comes before the one with the less specific Path - # attribute, /acme. Further note that the same cookie name appears more - # than once. - - cookie = interact_2965(c, "http://www.acme.com/acme/ammo/...") - self.assert_( - re.search(r"Riding_Rocket_0023.*Rocket_Launcher_0001", cookie)) - - # A subsequent request by the user agent to the (same) server for a URL of - # the form /acme/parts/ would include the following request header: - # - # Cookie: $Version="1"; Part_Number="Rocket_Launcher_0001"; $Path="/acme" - # - # Here, the second cookie's Path attribute /acme/ammo is not a prefix of - # the request URL, /acme/parts/, so the cookie does not get forwarded to - # the server. - - cookie = interact_2965(c, "http://www.acme.com/acme/parts/") - self.assert_("Rocket_Launcher_0001" in cookie and - "Riding_Rocket_0023" not in cookie) - - def test_rejection(self): - # Test rejection of Set-Cookie2 responses based on domain, path, port. - from cookielib import DefaultCookiePolicy, LWPCookieJar - - pol = DefaultCookiePolicy(rfc2965=True) - - c = LWPCookieJar(policy=pol) - - max_age = "max-age=3600" - - # illegal domain (no embedded dots) - cookie = interact_2965(c, "http://www.acme.com", - 'foo=bar; domain=".com"; version=1') - self.assert_(not c) - - # legal domain - cookie = interact_2965(c, "http://www.acme.com", - 'ping=pong; domain="acme.com"; version=1') - self.assertEquals(len(c), 1) - - # illegal domain (host prefix "www.a" contains a dot) - cookie = interact_2965(c, "http://www.a.acme.com", - 'whiz=bang; domain="acme.com"; version=1') - self.assertEquals(len(c), 1) - - # legal domain - cookie = interact_2965(c, "http://www.a.acme.com", - 'wow=flutter; domain=".a.acme.com"; version=1') - self.assertEquals(len(c), 2) - - # can't partially match an IP-address - cookie = interact_2965(c, "http://125.125.125.125", - 'zzzz=ping; domain="125.125.125"; version=1') - self.assertEquals(len(c), 2) - - # illegal path (must be prefix of request path) - cookie = interact_2965(c, "http://www.sol.no", - 'blah=rhubarb; domain=".sol.no"; path="/foo"; ' - 'version=1') - self.assertEquals(len(c), 2) - - # legal path - cookie = interact_2965(c, "http://www.sol.no/foo/bar", - 'bing=bong; domain=".sol.no"; path="/foo"; ' - 'version=1') - self.assertEquals(len(c), 3) - - # illegal port (request-port not in list) - cookie = interact_2965(c, "http://www.sol.no", - 'whiz=ffft; domain=".sol.no"; port="90,100"; ' - 'version=1') - self.assertEquals(len(c), 3) - - # legal port - cookie = interact_2965( - c, "http://www.sol.no", - r'bang=wallop; version=1; domain=".sol.no"; ' - r'port="90,100, 80,8080"; ' - r'max-age=100; Comment = "Just kidding! (\"|\\\\) "') - self.assertEquals(len(c), 4) - - # port attribute without any value (current port) - cookie = interact_2965(c, "http://www.sol.no", - 'foo9=bar; version=1; domain=".sol.no"; port; ' - 'max-age=100;') - self.assertEquals(len(c), 5) - - # encoded path - # LWP has this test, but unescaping allowed path characters seems - # like a bad idea, so I think this should fail: -## cookie = interact_2965(c, "http://www.sol.no/foo/", -## r'foo8=bar; version=1; path="/%66oo"') - # but this is OK, because '<' is not an allowed HTTP URL path - # character: - cookie = interact_2965(c, "http://www.sol.no/ '%s' (%s)" % (test_t, result, expected)) + + for s in tests: + t = http2time(s) + t2 = http2time(s.lower()) + t3 = http2time(s.upper()) + + self.assert_(t == t2 == t3 == test_t, + "'%s' => %s, %s, %s (%s)" % (s, t, t2, t3, test_t)) + + def test_http2time_garbage(self): + for test in [ + '', + 'Garbage', + 'Mandag 16. September 1996', + '01-00-1980', + '01-13-1980', + '00-01-1980', + '32-01-1980', + '01-01-1980 25:00:00', + '01-01-1980 00:61:00', + '01-01-1980 00:00:62', + ]: + self.assert_(http2time(test) is None, + "http2time(%s) is not None\n" + "http2time(test) %s" % (test, http2time(test)) + ) + + +class HeaderTests(TestCase): + def test_parse_ns_headers(self): + # quotes should be stripped + expected = [[('foo', 'bar'), ('expires', 2209069412), ('version', '0')]] + for hdr in [ + 'foo=bar; expires=01 Jan 2040 22:23:32 GMT', + 'foo=bar; expires="01 Jan 2040 22:23:32 GMT"', + ]: + self.assertEquals(parse_ns_headers([hdr]), expected) + + def test_parse_ns_headers_special_names(self): + # names such as 'expires' are not special in first name=value pair + # of Set-Cookie: header + # Cookie with name 'expires' + hdr = 'expires=01 Jan 2040 22:23:32 GMT' + expected = [[("expires", "01 Jan 2040 22:23:32 GMT"), ("version", "0")]] + self.assertEquals(parse_ns_headers([hdr]), expected) + + def test_join_header_words(self): + joined = join_header_words([[("foo", None), ("bar", "baz")]]) + self.assertEquals(joined, "foo; bar=baz") + + self.assertEquals(join_header_words([[]]), "") + + def test_split_header_words(self): + tests = [ + ("foo", [[("foo", None)]]), + ("foo=bar", [[("foo", "bar")]]), + (" foo ", [[("foo", None)]]), + (" foo= ", [[("foo", "")]]), + (" foo=", [[("foo", "")]]), + (" foo= ; ", [[("foo", "")]]), + (" foo= ; bar= baz ", [[("foo", ""), ("bar", "baz")]]), + ("foo=bar bar=baz", [[("foo", "bar"), ("bar", "baz")]]), + # doesn't really matter if this next fails, but it works ATM + ("foo= bar=baz", [[("foo", "bar=baz")]]), + ("foo=bar;bar=baz", [[("foo", "bar"), ("bar", "baz")]]), + ('foo bar baz', [[("foo", None), ("bar", None), ("baz", None)]]), + ("a, b, c", [[("a", None)], [("b", None)], [("c", None)]]), + (r'foo; bar=baz, spam=, foo="\,\;\"", bar= ', + [[("foo", None), ("bar", "baz")], + [("spam", "")], [("foo", ',;"')], [("bar", "")]]), + ] + + for arg, expect in tests: + try: + result = split_header_words([arg]) + except: + import traceback, io + f = io.StringIO() + traceback.print_exc(None, f) + result = "(error -- traceback follows)\n\n%s" % f.getvalue() + self.assertEquals(result, expect, """ +When parsing: '%s' +Expected: '%s' +Got: '%s' +""" % (arg, expect, result)) + + def test_roundtrip(self): + tests = [ + ("foo", "foo"), + ("foo=bar", "foo=bar"), + (" foo ", "foo"), + ("foo=", 'foo=""'), + ("foo=bar bar=baz", "foo=bar; bar=baz"), + ("foo=bar;bar=baz", "foo=bar; bar=baz"), + ('foo bar baz', "foo; bar; baz"), + (r'foo="\"" bar="\\"', r'foo="\""; bar="\\"'), + ('foo,,,bar', 'foo, bar'), + ('foo=bar,bar=baz', 'foo=bar, bar=baz'), + + ('text/html; charset=iso-8859-1', + 'text/html; charset="iso-8859-1"'), + + ('foo="bar"; port="80,81"; discard, bar=baz', + 'foo=bar; port="80,81"; discard, bar=baz'), + + (r'Basic realm="\"foo\\\\bar\""', + r'Basic; realm="\"foo\\\\bar\""') + ] + + for arg, expect in tests: + input = split_header_words([arg]) + res = join_header_words(input) + self.assertEquals(res, expect, """ +When parsing: '%s' +Expected: '%s' +Got: '%s' +Input was: '%s' +""" % (arg, expect, res, input)) + + +class FakeResponse: + def __init__(self, headers=[], url=None): + """ + headers: list of RFC822-style 'Key: value' strings + """ + import mimetools, io + f = io.StringIO("\n".join(headers)) + self._headers = mimetools.Message(f) + self._url = url + def info(self): return self._headers + +def interact_2965(cookiejar, url, *set_cookie_hdrs): + return _interact(cookiejar, url, set_cookie_hdrs, "Set-Cookie2") + +def interact_netscape(cookiejar, url, *set_cookie_hdrs): + return _interact(cookiejar, url, set_cookie_hdrs, "Set-Cookie") + +def _interact(cookiejar, url, set_cookie_hdrs, hdr_name): + """Perform a single request / response cycle, returning Cookie: header.""" + req = urllib2.Request(url) + cookiejar.add_cookie_header(req) + cookie_hdr = req.get_header("Cookie", "") + headers = [] + for hdr in set_cookie_hdrs: + headers.append("%s: %s" % (hdr_name, hdr)) + res = FakeResponse(headers, url) + cookiejar.extract_cookies(res, req) + return cookie_hdr + + +class FileCookieJarTests(TestCase): + def test_lwp_valueless_cookie(self): + # cookies with no value should be saved and loaded consistently + filename = support.TESTFN + c = LWPCookieJar() + interact_netscape(c, "http://www.acme.com/", 'boo') + self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) + try: + c.save(filename, ignore_discard=True) + c = LWPCookieJar() + c.load(filename, ignore_discard=True) + finally: + try: os.unlink(filename) + except OSError: pass + self.assertEqual(c._cookies["www.acme.com"]["/"]["boo"].value, None) + + def test_bad_magic(self): + # IOErrors (eg. file doesn't exist) are allowed to propagate + filename = support.TESTFN + for cookiejar_class in LWPCookieJar, MozillaCookieJar: + c = cookiejar_class() + try: + c.load(filename="for this test to work, a file with this " + "filename should not exist") + except IOError as exc: + # exactly IOError, not LoadError + self.assertEqual(exc.__class__, IOError) + else: + self.fail("expected IOError for invalid filename") + # Invalid contents of cookies file (eg. bad magic string) + # causes a LoadError. + try: + f = open(filename, "w") + f.write("oops\n") + for cookiejar_class in LWPCookieJar, MozillaCookieJar: + c = cookiejar_class() + self.assertRaises(LoadError, c.load, filename) + finally: + try: os.unlink(filename) + except OSError: pass + +class CookieTests(TestCase): + # XXX + # Get rid of string comparisons where not actually testing str / repr. + # .clear() etc. + # IP addresses like 50 (single number, no dot) and domain-matching + # functions (and is_HDN)? See draft RFC 2965 errata. + # Strictness switches + # is_third_party() + # unverifiability / third-party blocking + # Netscape cookies work the same as RFC 2965 with regard to port. + # Set-Cookie with negative max age. + # If turn RFC 2965 handling off, Set-Cookie2 cookies should not clobber + # Set-Cookie cookies. + # Cookie2 should be sent if *any* cookies are not V1 (ie. V0 OR V2 etc.). + # Cookies (V1 and V0) with no expiry date should be set to be discarded. + # RFC 2965 Quoting: + # Should accept unquoted cookie-attribute values? check errata draft. + # Which are required on the way in and out? + # Should always return quoted cookie-attribute values? + # Proper testing of when RFC 2965 clobbers Netscape (waiting for errata). + # Path-match on return (same for V0 and V1). + # RFC 2965 acceptance and returning rules + # Set-Cookie2 without version attribute is rejected. + + # Netscape peculiarities list from Ronald Tschalar. + # The first two still need tests, the rest are covered. +## - Quoting: only quotes around the expires value are recognized as such +## (and yes, some folks quote the expires value); quotes around any other +## value are treated as part of the value. +## - White space: white space around names and values is ignored +## - Default path: if no path parameter is given, the path defaults to the +## path in the request-uri up to, but not including, the last '/'. Note +## that this is entirely different from what the spec says. +## - Commas and other delimiters: Netscape just parses until the next ';'. +## This means it will allow commas etc inside values (and yes, both +## commas and equals are commonly appear in the cookie value). This also +## means that if you fold multiple Set-Cookie header fields into one, +## comma-separated list, it'll be a headache to parse (at least my head +## starts hurting everytime I think of that code). +## - Expires: You'll get all sorts of date formats in the expires, +## including emtpy expires attributes ("expires="). Be as flexible as you +## can, and certainly don't expect the weekday to be there; if you can't +## parse it, just ignore it and pretend it's a session cookie. +## - Domain-matching: Netscape uses the 2-dot rule for _all_ domains, not +## just the 7 special TLD's listed in their spec. And folks rely on +## that... + + def test_domain_return_ok(self): + # test optimization: .domain_return_ok() should filter out most + # domains in the CookieJar before we try to access them (because that + # may require disk access -- in particular, with MSIECookieJar) + # This is only a rough check for performance reasons, so it's not too + # critical as long as it's sufficiently liberal. + pol = DefaultCookiePolicy() + for url, domain, ok in [ + ("http://foo.bar.com/", "blah.com", False), + ("http://foo.bar.com/", "rhubarb.blah.com", False), + ("http://foo.bar.com/", "rhubarb.foo.bar.com", False), + ("http://foo.bar.com/", ".foo.bar.com", True), + ("http://foo.bar.com/", "foo.bar.com", True), + ("http://foo.bar.com/", ".bar.com", True), + ("http://foo.bar.com/", "com", True), + ("http://foo.com/", "rhubarb.foo.com", False), + ("http://foo.com/", ".foo.com", True), + ("http://foo.com/", "foo.com", True), + ("http://foo.com/", "com", True), + ("http://foo/", "rhubarb.foo", False), + ("http://foo/", ".foo", True), + ("http://foo/", "foo", True), + ("http://foo/", "foo.local", True), + ("http://foo/", ".local", True), + ]: + request = urllib2.Request(url) + r = pol.domain_return_ok(domain, request) + if ok: self.assert_(r) + else: self.assert_(not r) + + def test_missing_value(self): + # missing = sign in Cookie: header is regarded by Mozilla as a missing + # name, and by http.cookiejar as a missing value + filename = support.TESTFN + c = MozillaCookieJar(filename) + interact_netscape(c, "http://www.acme.com/", 'eggs') + interact_netscape(c, "http://www.acme.com/", '"spam"; path=/foo/') + cookie = c._cookies["www.acme.com"]["/"]["eggs"] + self.assert_(cookie.value is None) + self.assertEquals(cookie.name, "eggs") + cookie = c._cookies["www.acme.com"]['/foo/']['"spam"'] + self.assert_(cookie.value is None) + self.assertEquals(cookie.name, '"spam"') + self.assertEquals(lwp_cookie_str(cookie), ( + r'"spam"; path="/foo/"; domain="www.acme.com"; ' + 'path_spec; discard; version=0')) + old_str = repr(c) + c.save(ignore_expires=True, ignore_discard=True) + try: + c = MozillaCookieJar(filename) + c.revert(ignore_expires=True, ignore_discard=True) + finally: + os.unlink(c.filename) + # cookies unchanged apart from lost info re. whether path was specified + self.assertEquals( + repr(c), + re.sub("path_specified=%s" % True, "path_specified=%s" % False, + old_str) + ) + self.assertEquals(interact_netscape(c, "http://www.acme.com/foo/"), + '"spam"; eggs') + + def test_rfc2109_handling(self): + # RFC 2109 cookies are handled as RFC 2965 or Netscape cookies, + # dependent on policy settings + for rfc2109_as_netscape, rfc2965, version in [ + # default according to rfc2965 if not explicitly specified + (None, False, 0), + (None, True, 1), + # explicit rfc2109_as_netscape + (False, False, None), # version None here means no cookie stored + (False, True, 1), + (True, False, 0), + (True, True, 0), + ]: + policy = DefaultCookiePolicy( + rfc2109_as_netscape=rfc2109_as_netscape, + rfc2965=rfc2965) + c = CookieJar(policy) + interact_netscape(c, "http://www.example.com/", "ni=ni; Version=1") + try: + cookie = c._cookies["www.example.com"]["/"]["ni"] + except KeyError: + self.assert_(version is None) # didn't expect a stored cookie + else: + self.assertEqual(cookie.version, version) + # 2965 cookies are unaffected + interact_2965(c, "http://www.example.com/", + "foo=bar; Version=1") + if rfc2965: + cookie2965 = c._cookies["www.example.com"]["/"]["foo"] + self.assertEqual(cookie2965.version, 1) + + def test_ns_parser(self): + c = CookieJar() + interact_netscape(c, "http://www.acme.com/", + 'spam=eggs; DoMain=.acme.com; port; blArgh="feep"') + interact_netscape(c, "http://www.acme.com/", 'ni=ni; port=80,8080') + interact_netscape(c, "http://www.acme.com:80/", 'nini=ni') + interact_netscape(c, "http://www.acme.com:80/", 'foo=bar; expires=') + interact_netscape(c, "http://www.acme.com:80/", 'spam=eggs; ' + 'expires="Foo Bar 25 33:22:11 3022"') + + cookie = c._cookies[".acme.com"]["/"]["spam"] + self.assertEquals(cookie.domain, ".acme.com") + self.assert_(cookie.domain_specified) + self.assertEquals(cookie.port, DEFAULT_HTTP_PORT) + self.assert_(not cookie.port_specified) + # case is preserved + self.assert_(cookie.has_nonstandard_attr("blArgh") and + not cookie.has_nonstandard_attr("blargh")) + + cookie = c._cookies["www.acme.com"]["/"]["ni"] + self.assertEquals(cookie.domain, "www.acme.com") + self.assert_(not cookie.domain_specified) + self.assertEquals(cookie.port, "80,8080") + self.assert_(cookie.port_specified) + + cookie = c._cookies["www.acme.com"]["/"]["nini"] + self.assert_(cookie.port is None) + self.assert_(not cookie.port_specified) + + # invalid expires should not cause cookie to be dropped + foo = c._cookies["www.acme.com"]["/"]["foo"] + spam = c._cookies["www.acme.com"]["/"]["foo"] + self.assert_(foo.expires is None) + self.assert_(spam.expires is None) + + def test_ns_parser_special_names(self): + # names such as 'expires' are not special in first name=value pair + # of Set-Cookie: header + c = CookieJar() + interact_netscape(c, "http://www.acme.com/", 'expires=eggs') + interact_netscape(c, "http://www.acme.com/", 'version=eggs; spam=eggs') + + cookies = c._cookies["www.acme.com"]["/"] + self.assert_('expires' in cookies) + self.assert_('version' in cookies) + + def test_expires(self): + # if expires is in future, keep cookie... + c = CookieJar() + future = time2netscape(time.time()+3600) + interact_netscape(c, "http://www.acme.com/", 'spam="bar"; expires=%s' % + future) + self.assertEquals(len(c), 1) + now = time2netscape(time.time()-1) + # ... and if in past or present, discard it + interact_netscape(c, "http://www.acme.com/", 'foo="eggs"; expires=%s' % + now) + h = interact_netscape(c, "http://www.acme.com/") + self.assertEquals(len(c), 1) + self.assert_('spam="bar"' in h and "foo" not in h) + + # max-age takes precedence over expires, and zero max-age is request to + # delete both new cookie and any old matching cookie + interact_netscape(c, "http://www.acme.com/", 'eggs="bar"; expires=%s' % + future) + interact_netscape(c, "http://www.acme.com/", 'bar="bar"; expires=%s' % + future) + self.assertEquals(len(c), 3) + interact_netscape(c, "http://www.acme.com/", 'eggs="bar"; ' + 'expires=%s; max-age=0' % future) + interact_netscape(c, "http://www.acme.com/", 'bar="bar"; ' + 'max-age=0; expires=%s' % future) + h = interact_netscape(c, "http://www.acme.com/") + self.assertEquals(len(c), 1) + + # test expiry at end of session for cookies with no expires attribute + interact_netscape(c, "http://www.rhubarb.net/", 'whum="fizz"') + self.assertEquals(len(c), 2) + c.clear_session_cookies() + self.assertEquals(len(c), 1) + self.assert_('spam="bar"' in h) + + # XXX RFC 2965 expiry rules (some apply to V0 too) + + def test_default_path(self): + # RFC 2965 + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/", 'spam="bar"; Version="1"') + self.assert_("/" in c._cookies["www.acme.com"]) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/blah", 'eggs="bar"; Version="1"') + self.assert_("/" in c._cookies["www.acme.com"]) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/blah/rhubarb", + 'eggs="bar"; Version="1"') + self.assert_("/blah/" in c._cookies["www.acme.com"]) + + c = CookieJar(pol) + interact_2965(c, "http://www.acme.com/blah/rhubarb/", + 'eggs="bar"; Version="1"') + self.assert_("/blah/rhubarb/" in c._cookies["www.acme.com"]) + + # Netscape + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/", 'spam="bar"') + self.assert_("/" in c._cookies["www.acme.com"]) + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/blah", 'eggs="bar"') + self.assert_("/" in c._cookies["www.acme.com"]) + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/blah/rhubarb", 'eggs="bar"') + self.assert_("/blah" in c._cookies["www.acme.com"]) + + c = CookieJar() + interact_netscape(c, "http://www.acme.com/blah/rhubarb/", 'eggs="bar"') + self.assert_("/blah/rhubarb" in c._cookies["www.acme.com"]) + + def test_escape_path(self): + cases = [ + # quoted safe + ("/foo%2f/bar", "/foo%2F/bar"), + ("/foo%2F/bar", "/foo%2F/bar"), + # quoted % + ("/foo%%/bar", "/foo%%/bar"), + # quoted unsafe + ("/fo%19o/bar", "/fo%19o/bar"), + ("/fo%7do/bar", "/fo%7Do/bar"), + # unquoted safe + ("/foo/bar&", "/foo/bar&"), + ("/foo//bar", "/foo//bar"), + ("\176/foo/bar", "\176/foo/bar"), + # unquoted unsafe + ("/foo\031/bar", "/foo%19/bar"), + ("/\175foo/bar", "/%7Dfoo/bar"), + # unicode + ("/foo/bar\uabcd", "/foo/bar%EA%AF%8D"), # UTF-8 encoded + ] + for arg, result in cases: + self.assertEquals(escape_path(arg), result) + + def test_request_path(self): + # with parameters + req = urllib2.Request("http://www.example.com/rheum/rhaponicum;" + "foo=bar;sing=song?apples=pears&spam=eggs#ni") + self.assertEquals(request_path(req), "/rheum/rhaponicum;" + "foo=bar;sing=song?apples=pears&spam=eggs#ni") + # without parameters + req = urllib2.Request("http://www.example.com/rheum/rhaponicum?" + "apples=pears&spam=eggs#ni") + self.assertEquals(request_path(req), "/rheum/rhaponicum?" + "apples=pears&spam=eggs#ni") + # missing final slash + req = urllib2.Request("http://www.example.com") + self.assertEquals(request_path(req), "/") + + def test_request_port(self): + req = urllib2.Request("http://www.acme.com:1234/", + headers={"Host": "www.acme.com:4321"}) + self.assertEquals(request_port(req), "1234") + req = urllib2.Request("http://www.acme.com/", + headers={"Host": "www.acme.com:4321"}) + self.assertEquals(request_port(req), DEFAULT_HTTP_PORT) + + def test_request_host(self): + # this request is illegal (RFC2616, 14.2.3) + req = urllib2.Request("http://1.1.1.1/", + headers={"Host": "www.acme.com:80"}) + # libwww-perl wants this response, but that seems wrong (RFC 2616, + # section 5.2, point 1., and RFC 2965 section 1, paragraph 3) + #self.assertEquals(request_host(req), "www.acme.com") + self.assertEquals(request_host(req), "1.1.1.1") + req = urllib2.Request("http://www.acme.com/", + headers={"Host": "irrelevant.com"}) + self.assertEquals(request_host(req), "www.acme.com") + # not actually sure this one is valid Request object, so maybe should + # remove test for no host in url in request_host function? + req = urllib2.Request("/resource.html", + headers={"Host": "www.acme.com"}) + self.assertEquals(request_host(req), "www.acme.com") + # port shouldn't be in request-host + req = urllib2.Request("http://www.acme.com:2345/resource.html", + headers={"Host": "www.acme.com:5432"}) + self.assertEquals(request_host(req), "www.acme.com") + + def test_is_HDN(self): + self.assert_(is_HDN("foo.bar.com")) + self.assert_(is_HDN("1foo2.3bar4.5com")) + self.assert_(not is_HDN("192.168.1.1")) + self.assert_(not is_HDN("")) + self.assert_(not is_HDN(".")) + self.assert_(not is_HDN(".foo.bar.com")) + self.assert_(not is_HDN("..foo")) + self.assert_(not is_HDN("foo.")) + + def test_reach(self): + self.assertEquals(reach("www.acme.com"), ".acme.com") + self.assertEquals(reach("acme.com"), "acme.com") + self.assertEquals(reach("acme.local"), ".local") + self.assertEquals(reach(".local"), ".local") + self.assertEquals(reach(".com"), ".com") + self.assertEquals(reach("."), ".") + self.assertEquals(reach(""), "") + self.assertEquals(reach("192.168.0.1"), "192.168.0.1") + + def test_domain_match(self): + self.assert_(domain_match("192.168.1.1", "192.168.1.1")) + self.assert_(not domain_match("192.168.1.1", ".168.1.1")) + self.assert_(domain_match("x.y.com", "x.Y.com")) + self.assert_(domain_match("x.y.com", ".Y.com")) + self.assert_(not domain_match("x.y.com", "Y.com")) + self.assert_(domain_match("a.b.c.com", ".c.com")) + self.assert_(not domain_match(".c.com", "a.b.c.com")) + self.assert_(domain_match("example.local", ".local")) + self.assert_(not domain_match("blah.blah", "")) + self.assert_(not domain_match("", ".rhubarb.rhubarb")) + self.assert_(domain_match("", "")) + + self.assert_(user_domain_match("acme.com", "acme.com")) + self.assert_(not user_domain_match("acme.com", ".acme.com")) + self.assert_(user_domain_match("rhubarb.acme.com", ".acme.com")) + self.assert_(user_domain_match("www.rhubarb.acme.com", ".acme.com")) + self.assert_(user_domain_match("x.y.com", "x.Y.com")) + self.assert_(user_domain_match("x.y.com", ".Y.com")) + self.assert_(not user_domain_match("x.y.com", "Y.com")) + self.assert_(user_domain_match("y.com", "Y.com")) + self.assert_(not user_domain_match(".y.com", "Y.com")) + self.assert_(user_domain_match(".y.com", ".Y.com")) + self.assert_(user_domain_match("x.y.com", ".com")) + self.assert_(not user_domain_match("x.y.com", "com")) + self.assert_(not user_domain_match("x.y.com", "m")) + self.assert_(not user_domain_match("x.y.com", ".m")) + self.assert_(not user_domain_match("x.y.com", "")) + self.assert_(not user_domain_match("x.y.com", ".")) + self.assert_(user_domain_match("192.168.1.1", "192.168.1.1")) + # not both HDNs, so must string-compare equal to match + self.assert_(not user_domain_match("192.168.1.1", ".168.1.1")) + self.assert_(not user_domain_match("192.168.1.1", ".")) + # empty string is a special case + self.assert_(not user_domain_match("192.168.1.1", "")) + + def test_wrong_domain(self): + # Cookies whose effective request-host name does not domain-match the + # domain are rejected. + + # XXX far from complete + c = CookieJar() + interact_2965(c, "http://www.nasty.com/", + 'foo=bar; domain=friendly.org; Version="1"') + self.assertEquals(len(c), 0) + + def test_strict_domain(self): + # Cookies whose domain is a country-code tld like .co.uk should + # not be set if CookiePolicy.strict_domain is true. + cp = DefaultCookiePolicy(strict_domain=True) + cj = CookieJar(policy=cp) + interact_netscape(cj, "http://example.co.uk/", 'no=problemo') + interact_netscape(cj, "http://example.co.uk/", + 'okey=dokey; Domain=.example.co.uk') + self.assertEquals(len(cj), 2) + for pseudo_tld in [".co.uk", ".org.za", ".tx.us", ".name.us"]: + interact_netscape(cj, "http://example.%s/" % pseudo_tld, + 'spam=eggs; Domain=.co.uk') + self.assertEquals(len(cj), 2) + + def test_two_component_domain_ns(self): + # Netscape: .www.bar.com, www.bar.com, .bar.com, bar.com, no domain + # should all get accepted, as should .acme.com, acme.com and no domain + # for 2-component domains like acme.com. + c = CookieJar() + + # two-component V0 domain is OK + interact_netscape(c, "http://foo.net/", 'ns=bar') + self.assertEquals(len(c), 1) + self.assertEquals(c._cookies["foo.net"]["/"]["ns"].value, "bar") + self.assertEquals(interact_netscape(c, "http://foo.net/"), "ns=bar") + # *will* be returned to any other domain (unlike RFC 2965)... + self.assertEquals(interact_netscape(c, "http://www.foo.net/"), + "ns=bar") + # ...unless requested otherwise + pol = DefaultCookiePolicy( + strict_ns_domain=DefaultCookiePolicy.DomainStrictNonDomain) + c.set_policy(pol) + self.assertEquals(interact_netscape(c, "http://www.foo.net/"), "") + + # unlike RFC 2965, even explicit two-component domain is OK, + # because .foo.net matches foo.net + interact_netscape(c, "http://foo.net/foo/", + 'spam1=eggs; domain=foo.net') + # even if starts with a dot -- in NS rules, .foo.net matches foo.net! + interact_netscape(c, "http://foo.net/foo/bar/", + 'spam2=eggs; domain=.foo.net') + self.assertEquals(len(c), 3) + self.assertEquals(c._cookies[".foo.net"]["/foo"]["spam1"].value, + "eggs") + self.assertEquals(c._cookies[".foo.net"]["/foo/bar"]["spam2"].value, + "eggs") + self.assertEquals(interact_netscape(c, "http://foo.net/foo/bar/"), + "spam2=eggs; spam1=eggs; ns=bar") + + # top-level domain is too general + interact_netscape(c, "http://foo.net/", 'nini="ni"; domain=.net') + self.assertEquals(len(c), 3) + +## # Netscape protocol doesn't allow non-special top level domains (such +## # as co.uk) in the domain attribute unless there are at least three +## # dots in it. + # Oh yes it does! Real implementations don't check this, and real + # cookies (of course) rely on that behaviour. + interact_netscape(c, "http://foo.co.uk", 'nasty=trick; domain=.co.uk') +## self.assertEquals(len(c), 2) + self.assertEquals(len(c), 4) + + def test_two_component_domain_rfc2965(self): + pol = DefaultCookiePolicy(rfc2965=True) + c = CookieJar(pol) + + # two-component V1 domain is OK + interact_2965(c, "http://foo.net/", 'foo=bar; Version="1"') + self.assertEquals(len(c), 1) + self.assertEquals(c._cookies["foo.net"]["/"]["foo"].value, "bar") + self.assertEquals(interact_2965(c, "http://foo.net/"), + "$Version=1; foo=bar") + # won't be returned to any other domain (because domain was implied) + self.assertEquals(interact_2965(c, "http://www.foo.net/"), "") + + # unless domain is given explicitly, because then it must be + # rewritten to start with a dot: foo.net --> .foo.net, which does + # not domain-match foo.net + interact_2965(c, "http://foo.net/foo", + 'spam=eggs; domain=foo.net; path=/foo; Version="1"') + self.assertEquals(len(c), 1) + self.assertEquals(interact_2965(c, "http://foo.net/foo"), + "$Version=1; foo=bar") + + # explicit foo.net from three-component domain www.foo.net *does* get + # set, because .foo.net domain-matches .foo.net + interact_2965(c, "http://www.foo.net/foo/", + 'spam=eggs; domain=foo.net; Version="1"') + self.assertEquals(c._cookies[".foo.net"]["/foo/"]["spam"].value, + "eggs") + self.assertEquals(len(c), 2) + self.assertEquals(interact_2965(c, "http://foo.net/foo/"), + "$Version=1; foo=bar") + self.assertEquals(interact_2965(c, "http://www.foo.net/foo/"), + '$Version=1; spam=eggs; $Domain="foo.net"') + + # top-level domain is too general + interact_2965(c, "http://foo.net/", + 'ni="ni"; domain=".net"; Version="1"') + self.assertEquals(len(c), 2) + + # RFC 2965 doesn't require blocking this + interact_2965(c, "http://foo.co.uk/", + 'nasty=trick; domain=.co.uk; Version="1"') + self.assertEquals(len(c), 3) + + def test_domain_allow(self): + c = CookieJar(policy=DefaultCookiePolicy( + blocked_domains=["acme.com"], + allowed_domains=["www.acme.com"])) + + req = urllib2.Request("http://acme.com/") + headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] + res = FakeResponse(headers, "http://acme.com/") + c.extract_cookies(res, req) + self.assertEquals(len(c), 0) + + req = urllib2.Request("http://www.acme.com/") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + self.assertEquals(len(c), 1) + + req = urllib2.Request("http://www.coyote.com/") + res = FakeResponse(headers, "http://www.coyote.com/") + c.extract_cookies(res, req) + self.assertEquals(len(c), 1) + + # set a cookie with non-allowed domain... + req = urllib2.Request("http://www.coyote.com/") + res = FakeResponse(headers, "http://www.coyote.com/") + cookies = c.make_cookies(res, req) + c.set_cookie(cookies[0]) + self.assertEquals(len(c), 2) + # ... and check is doesn't get returned + c.add_cookie_header(req) + self.assert_(not req.has_header("Cookie")) + + def test_domain_block(self): + pol = DefaultCookiePolicy( + rfc2965=True, blocked_domains=[".acme.com"]) + c = CookieJar(policy=pol) + headers = ["Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/"] + + req = urllib2.Request("http://www.acme.com/") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + self.assertEquals(len(c), 0) + + p = pol.set_blocked_domains(["acme.com"]) + c.extract_cookies(res, req) + self.assertEquals(len(c), 1) + + c.clear() + req = urllib2.Request("http://www.roadrunner.net/") + res = FakeResponse(headers, "http://www.roadrunner.net/") + c.extract_cookies(res, req) + self.assertEquals(len(c), 1) + req = urllib2.Request("http://www.roadrunner.net/") + c.add_cookie_header(req) + self.assert_((req.has_header("Cookie") and + req.has_header("Cookie2"))) + + c.clear() + pol.set_blocked_domains([".acme.com"]) + c.extract_cookies(res, req) + self.assertEquals(len(c), 1) + + # set a cookie with blocked domain... + req = urllib2.Request("http://www.acme.com/") + res = FakeResponse(headers, "http://www.acme.com/") + cookies = c.make_cookies(res, req) + c.set_cookie(cookies[0]) + self.assertEquals(len(c), 2) + # ... and check is doesn't get returned + c.add_cookie_header(req) + self.assert_(not req.has_header("Cookie")) + + def test_secure(self): + for ns in True, False: + for whitespace in " ", "": + c = CookieJar() + if ns: + pol = DefaultCookiePolicy(rfc2965=False) + int = interact_netscape + vs = "" + else: + pol = DefaultCookiePolicy(rfc2965=True) + int = interact_2965 + vs = "; Version=1" + c.set_policy(pol) + url = "http://www.acme.com/" + int(c, url, "foo1=bar%s%s" % (vs, whitespace)) + int(c, url, "foo2=bar%s; secure%s" % (vs, whitespace)) + self.assert_( + not c._cookies["www.acme.com"]["/"]["foo1"].secure, + "non-secure cookie registered secure") + self.assert_( + c._cookies["www.acme.com"]["/"]["foo2"].secure, + "secure cookie registered non-secure") + + def test_quote_cookie_value(self): + c = CookieJar(policy=DefaultCookiePolicy(rfc2965=True)) + interact_2965(c, "http://www.acme.com/", r'foo=\b"a"r; Version=1') + h = interact_2965(c, "http://www.acme.com/") + self.assertEquals(h, r'$Version=1; foo=\\b\"a\"r') + + def test_missing_final_slash(self): + # Missing slash from request URL's abs_path should be assumed present. + url = "http://www.acme.com" + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + interact_2965(c, url, "foo=bar; Version=1") + req = urllib2.Request(url) + self.assertEquals(len(c), 1) + c.add_cookie_header(req) + self.assert_(req.has_header("Cookie")) + + def test_domain_mirror(self): + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1") + h = interact_2965(c, url) + self.assert_("Domain" not in h, + "absent domain returned with domain present") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Domain=.bar.com') + h = interact_2965(c, url) + self.assert_('$Domain=".bar.com"' in h, "domain not returned") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + # note missing initial dot in Domain + interact_2965(c, url, 'spam=eggs; Version=1; Domain=bar.com') + h = interact_2965(c, url) + self.assert_('$Domain="bar.com"' in h, "domain not returned") + + def test_path_mirror(self): + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1") + h = interact_2965(c, url) + self.assert_("Path" not in h, + "absent path returned with path present") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Path=/') + h = interact_2965(c, url) + self.assert_('$Path="/"' in h, "path not returned") + + def test_port_mirror(self): + pol = DefaultCookiePolicy(rfc2965=True) + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1") + h = interact_2965(c, url) + self.assert_("Port" not in h, + "absent port returned with port present") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, "spam=eggs; Version=1; Port") + h = interact_2965(c, url) + self.assert_(re.search("\$Port([^=]|$)", h), + "port with no value not returned with no value") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Port="80"') + h = interact_2965(c, url) + self.assert_('$Port="80"' in h, + "port with single value not returned with single value") + + c = CookieJar(pol) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; Port="80,8080"') + h = interact_2965(c, url) + self.assert_('$Port="80,8080"' in h, + "port with multiple values not returned with multiple " + "values") + + def test_no_return_comment(self): + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + url = "http://foo.bar.com/" + interact_2965(c, url, 'spam=eggs; Version=1; ' + 'Comment="does anybody read these?"; ' + 'CommentURL="http://foo.bar.net/comment.html"') + h = interact_2965(c, url) + self.assert_( + "Comment" not in h, + "Comment or CommentURL cookie-attributes returned to server") + + def test_Cookie_iterator(self): + cs = CookieJar(DefaultCookiePolicy(rfc2965=True)) + # add some random cookies + interact_2965(cs, "http://blah.spam.org/", 'foo=eggs; Version=1; ' + 'Comment="does anybody read these?"; ' + 'CommentURL="http://foo.bar.net/comment.html"') + interact_netscape(cs, "http://www.acme.com/blah/", "spam=bar; secure") + interact_2965(cs, "http://www.acme.com/blah/", + "foo=bar; secure; Version=1") + interact_2965(cs, "http://www.acme.com/blah/", + "foo=bar; path=/; Version=1") + interact_2965(cs, "http://www.sol.no", + r'bang=wallop; version=1; domain=".sol.no"; ' + r'port="90,100, 80,8080"; ' + r'max-age=100; Comment = "Just kidding! (\"|\\\\) "') + + versions = [1, 1, 1, 0, 1] + names = ["bang", "foo", "foo", "spam", "foo"] + domains = [".sol.no", "blah.spam.org", "www.acme.com", + "www.acme.com", "www.acme.com"] + paths = ["/", "/", "/", "/blah", "/blah/"] + + for i in range(4): + i = 0 + for c in cs: + self.assert_(isinstance(c, Cookie)) + self.assertEquals(c.version, versions[i]) + self.assertEquals(c.name, names[i]) + self.assertEquals(c.domain, domains[i]) + self.assertEquals(c.path, paths[i]) + i = i + 1 + + def test_parse_ns_headers(self): + # missing domain value (invalid cookie) + self.assertEquals( + parse_ns_headers(["foo=bar; path=/; domain"]), + [[("foo", "bar"), + ("path", "/"), ("domain", None), ("version", "0")]] + ) + # invalid expires value + self.assertEquals( + parse_ns_headers(["foo=bar; expires=Foo Bar 12 33:22:11 2000"]), + [[("foo", "bar"), ("expires", None), ("version", "0")]] + ) + # missing cookie value (valid cookie) + self.assertEquals( + parse_ns_headers(["foo"]), + [[("foo", None), ("version", "0")]] + ) + # shouldn't add version if header is empty + self.assertEquals(parse_ns_headers([""]), []) + + def test_bad_cookie_header(self): + + def cookiejar_from_cookie_headers(headers): + c = CookieJar() + req = urllib2.Request("http://www.example.com/") + r = FakeResponse(headers, "http://www.example.com/") + c.extract_cookies(r, req) + return c + + # none of these bad headers should cause an exception to be raised + for headers in [ + ["Set-Cookie: "], # actually, nothing wrong with this + ["Set-Cookie2: "], # ditto + # missing domain value + ["Set-Cookie2: a=foo; path=/; Version=1; domain"], + # bad max-age + ["Set-Cookie: b=foo; max-age=oops"], + ]: + c = cookiejar_from_cookie_headers(headers) + # these bad cookies shouldn't be set + self.assertEquals(len(c), 0) + + # cookie with invalid expires is treated as session cookie + headers = ["Set-Cookie: c=foo; expires=Foo Bar 12 33:22:11 2000"] + c = cookiejar_from_cookie_headers(headers) + cookie = c._cookies["www.example.com"]["/"]["c"] + self.assert_(cookie.expires is None) + + +class LWPCookieTests(TestCase): + # Tests taken from libwww-perl, with a few modifications and additions. + + def test_netscape_example_1(self): + #------------------------------------------------------------------- + # First we check that it works for the original example at + # http://www.netscape.com/newsref/std/cookie_spec.html + + # Client requests a document, and receives in the response: + # + # Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE + # + # Client requests a document, and receives in the response: + # + # Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/ + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # Client receives: + # + # Set-Cookie: SHIPPING=FEDEX; path=/fo + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # When client requests a URL in path "/foo" on this server, it sends: + # + # Cookie: CUSTOMER=WILE_E_COYOTE; PART_NUMBER=ROCKET_LAUNCHER_0001; SHIPPING=FEDEX + # + # The last Cookie is buggy, because both specifications say that the + # most specific cookie must be sent first. SHIPPING=FEDEX is the + # most specific and should thus be first. + + year_plus_one = time.localtime()[0] + 1 + + headers = [] + + c = CookieJar(DefaultCookiePolicy(rfc2965 = True)) + + #req = urllib2.Request("http://1.1.1.1/", + # headers={"Host": "www.acme.com:80"}) + req = urllib2.Request("http://www.acme.com:80/", + headers={"Host": "www.acme.com:80"}) + + headers.append( + "Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/ ; " + "expires=Wednesday, 09-Nov-%d 23:12:40 GMT" % year_plus_one) + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + + req = urllib2.Request("http://www.acme.com/") + c.add_cookie_header(req) + + self.assertEqual(req.get_header("Cookie"), "CUSTOMER=WILE_E_COYOTE") + self.assertEqual(req.get_header("Cookie2"), '$Version="1"') + + headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + + req = urllib2.Request("http://www.acme.com/foo/bar") + c.add_cookie_header(req) + + h = req.get_header("Cookie") + self.assert_("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and + "CUSTOMER=WILE_E_COYOTE" in h) + + headers.append('Set-Cookie: SHIPPING=FEDEX; path=/foo') + res = FakeResponse(headers, "http://www.acme.com") + c.extract_cookies(res, req) + + req = urllib2.Request("http://www.acme.com/") + c.add_cookie_header(req) + + h = req.get_header("Cookie") + self.assert_("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and + "CUSTOMER=WILE_E_COYOTE" in h and + "SHIPPING=FEDEX" not in h) + + req = urllib2.Request("http://www.acme.com/foo/") + c.add_cookie_header(req) + + h = req.get_header("Cookie") + self.assert_(("PART_NUMBER=ROCKET_LAUNCHER_0001" in h and + "CUSTOMER=WILE_E_COYOTE" in h and + h.startswith("SHIPPING=FEDEX;"))) + + def test_netscape_example_2(self): + # Second Example transaction sequence: + # + # Assume all mappings from above have been cleared. + # + # Client receives: + # + # Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/ + # + # When client requests a URL in path "/" on this server, it sends: + # + # Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # Client receives: + # + # Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo + # + # When client requests a URL in path "/ammo" on this server, it sends: + # + # Cookie: PART_NUMBER=RIDING_ROCKET_0023; PART_NUMBER=ROCKET_LAUNCHER_0001 + # + # NOTE: There are two name/value pairs named "PART_NUMBER" due to + # the inheritance of the "/" mapping in addition to the "/ammo" mapping. + + c = CookieJar() + headers = [] + + req = urllib2.Request("http://www.acme.com/") + headers.append("Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/") + res = FakeResponse(headers, "http://www.acme.com/") + + c.extract_cookies(res, req) + + req = urllib2.Request("http://www.acme.com/") + c.add_cookie_header(req) + + self.assertEquals(req.get_header("Cookie"), + "PART_NUMBER=ROCKET_LAUNCHER_0001") + + headers.append( + "Set-Cookie: PART_NUMBER=RIDING_ROCKET_0023; path=/ammo") + res = FakeResponse(headers, "http://www.acme.com/") + c.extract_cookies(res, req) + + req = urllib2.Request("http://www.acme.com/ammo") + c.add_cookie_header(req) + + self.assert_(re.search(r"PART_NUMBER=RIDING_ROCKET_0023;\s*" + "PART_NUMBER=ROCKET_LAUNCHER_0001", + req.get_header("Cookie"))) + + def test_ietf_example_1(self): + #------------------------------------------------------------------- + # Then we test with the examples from draft-ietf-http-state-man-mec-03.txt + # + # 5. EXAMPLES + + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + + # + # 5.1 Example 1 + # + # Most detail of request and response headers has been omitted. Assume + # the user agent has no stored cookies. + # + # 1. User Agent -> Server + # + # POST /acme/login HTTP/1.1 + # [form data] + # + # User identifies self via a form. + # + # 2. Server -> User Agent + # + # HTTP/1.1 200 OK + # Set-Cookie2: Customer="WILE_E_COYOTE"; Version="1"; Path="/acme" + # + # Cookie reflects user's identity. + + cookie = interact_2965( + c, 'http://www.acme.com/acme/login', + 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"') + self.assert_(not cookie) + + # + # 3. User Agent -> Server + # + # POST /acme/pickitem HTTP/1.1 + # Cookie: $Version="1"; Customer="WILE_E_COYOTE"; $Path="/acme" + # [form data] + # + # User selects an item for ``shopping basket.'' + # + # 4. Server -> User Agent + # + # HTTP/1.1 200 OK + # Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; + # Path="/acme" + # + # Shopping basket contains an item. + + cookie = interact_2965(c, 'http://www.acme.com/acme/pickitem', + 'Part_Number="Rocket_Launcher_0001"; ' + 'Version="1"; Path="/acme"'); + self.assert_(re.search( + r'^\$Version="?1"?; Customer="?WILE_E_COYOTE"?; \$Path="/acme"$', + cookie)) + + # + # 5. User Agent -> Server + # + # POST /acme/shipping HTTP/1.1 + # Cookie: $Version="1"; + # Customer="WILE_E_COYOTE"; $Path="/acme"; + # Part_Number="Rocket_Launcher_0001"; $Path="/acme" + # [form data] + # + # User selects shipping method from form. + # + # 6. Server -> User Agent + # + # HTTP/1.1 200 OK + # Set-Cookie2: Shipping="FedEx"; Version="1"; Path="/acme" + # + # New cookie reflects shipping method. + + cookie = interact_2965(c, "http://www.acme.com/acme/shipping", + 'Shipping="FedEx"; Version="1"; Path="/acme"') + + self.assert_(re.search(r'^\$Version="?1"?;', cookie)) + self.assert_(re.search(r'Part_Number="?Rocket_Launcher_0001"?;' + '\s*\$Path="\/acme"', cookie)) + self.assert_(re.search(r'Customer="?WILE_E_COYOTE"?;\s*\$Path="\/acme"', + cookie)) + + # + # 7. User Agent -> Server + # + # POST /acme/process HTTP/1.1 + # Cookie: $Version="1"; + # Customer="WILE_E_COYOTE"; $Path="/acme"; + # Part_Number="Rocket_Launcher_0001"; $Path="/acme"; + # Shipping="FedEx"; $Path="/acme" + # [form data] + # + # User chooses to process order. + # + # 8. Server -> User Agent + # + # HTTP/1.1 200 OK + # + # Transaction is complete. + + cookie = interact_2965(c, "http://www.acme.com/acme/process") + self.assert_( + re.search(r'Shipping="?FedEx"?;\s*\$Path="\/acme"', cookie) and + "WILE_E_COYOTE" in cookie) + + # + # The user agent makes a series of requests on the origin server, after + # each of which it receives a new cookie. All the cookies have the same + # Path attribute and (default) domain. Because the request URLs all have + # /acme as a prefix, and that matches the Path attribute, each request + # contains all the cookies received so far. + + def test_ietf_example_2(self): + # 5.2 Example 2 + # + # This example illustrates the effect of the Path attribute. All detail + # of request and response headers has been omitted. Assume the user agent + # has no stored cookies. + + c = CookieJar(DefaultCookiePolicy(rfc2965=True)) + + # Imagine the user agent has received, in response to earlier requests, + # the response headers + # + # Set-Cookie2: Part_Number="Rocket_Launcher_0001"; Version="1"; + # Path="/acme" + # + # and + # + # Set-Cookie2: Part_Number="Riding_Rocket_0023"; Version="1"; + # Path="/acme/ammo" + + interact_2965( + c, "http://www.acme.com/acme/ammo/specific", + 'Part_Number="Rocket_Launcher_0001"; Version="1"; Path="/acme"', + 'Part_Number="Riding_Rocket_0023"; Version="1"; Path="/acme/ammo"') + + # A subsequent request by the user agent to the (same) server for URLs of + # the form /acme/ammo/... would include the following request header: + # + # Cookie: $Version="1"; + # Part_Number="Riding_Rocket_0023"; $Path="/acme/ammo"; + # Part_Number="Rocket_Launcher_0001"; $Path="/acme" + # + # Note that the NAME=VALUE pair for the cookie with the more specific Path + # attribute, /acme/ammo, comes before the one with the less specific Path + # attribute, /acme. Further note that the same cookie name appears more + # than once. + + cookie = interact_2965(c, "http://www.acme.com/acme/ammo/...") + self.assert_( + re.search(r"Riding_Rocket_0023.*Rocket_Launcher_0001", cookie)) + + # A subsequent request by the user agent to the (same) server for a URL of + # the form /acme/parts/ would include the following request header: + # + # Cookie: $Version="1"; Part_Number="Rocket_Launcher_0001"; $Path="/acme" + # + # Here, the second cookie's Path attribute /acme/ammo is not a prefix of + # the request URL, /acme/parts/, so the cookie does not get forwarded to + # the server. + + cookie = interact_2965(c, "http://www.acme.com/acme/parts/") + self.assert_("Rocket_Launcher_0001" in cookie and + "Riding_Rocket_0023" not in cookie) + + def test_rejection(self): + # Test rejection of Set-Cookie2 responses based on domain, path, port. + pol = DefaultCookiePolicy(rfc2965=True) + + c = LWPCookieJar(policy=pol) + + max_age = "max-age=3600" + + # illegal domain (no embedded dots) + cookie = interact_2965(c, "http://www.acme.com", + 'foo=bar; domain=".com"; version=1') + self.assert_(not c) + + # legal domain + cookie = interact_2965(c, "http://www.acme.com", + 'ping=pong; domain="acme.com"; version=1') + self.assertEquals(len(c), 1) + + # illegal domain (host prefix "www.a" contains a dot) + cookie = interact_2965(c, "http://www.a.acme.com", + 'whiz=bang; domain="acme.com"; version=1') + self.assertEquals(len(c), 1) + + # legal domain + cookie = interact_2965(c, "http://www.a.acme.com", + 'wow=flutter; domain=".a.acme.com"; version=1') + self.assertEquals(len(c), 2) + + # can't partially match an IP-address + cookie = interact_2965(c, "http://125.125.125.125", + 'zzzz=ping; domain="125.125.125"; version=1') + self.assertEquals(len(c), 2) + + # illegal path (must be prefix of request path) + cookie = interact_2965(c, "http://www.sol.no", + 'blah=rhubarb; domain=".sol.no"; path="/foo"; ' + 'version=1') + self.assertEquals(len(c), 2) + + # legal path + cookie = interact_2965(c, "http://www.sol.no/foo/bar", + 'bing=bong; domain=".sol.no"; path="/foo"; ' + 'version=1') + self.assertEquals(len(c), 3) + + # illegal port (request-port not in list) + cookie = interact_2965(c, "http://www.sol.no", + 'whiz=ffft; domain=".sol.no"; port="90,100"; ' + 'version=1') + self.assertEquals(len(c), 3) + + # legal port + cookie = interact_2965( + c, "http://www.sol.no", + r'bang=wallop; version=1; domain=".sol.no"; ' + r'port="90,100, 80,8080"; ' + r'max-age=100; Comment = "Just kidding! (\"|\\\\) "') + self.assertEquals(len(c), 4) + + # port attribute without any value (current port) + cookie = interact_2965(c, "http://www.sol.no", + 'foo9=bar; version=1; domain=".sol.no"; port; ' + 'max-age=100;') + self.assertEquals(len(c), 5) + + # encoded path + # LWP has this test, but unescaping allowed path characters seems + # like a bad idea, so I think this should fail: +## cookie = interact_2965(c, "http://www.sol.no/foo/", +## r'foo8=bar; version=1; path="/%66oo"') + # but this is OK, because '<' is not an allowed HTTP URL path + # character: + cookie = interact_2965(c, "http://www.sol.no/", + 'output': 'Set-Cookie: chips=ahoy\nSet-Cookie: vienna=finger', + }, + + { 'data': 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', + 'dict': {'keebler' : 'E=mc2; L="Loves"; fudge=\012;'}, + 'repr': '''''', + 'output': 'Set-Cookie: keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', + }, + + # Check illegal cookies that have an '=' char in an unquoted value + { 'data': 'keebler=E=mc2', + 'dict': {'keebler' : 'E=mc2'}, + 'repr': "", + 'output': 'Set-Cookie: keebler=E=mc2', + } + ] + + for case in cases: + C = cookies.SimpleCookie() + C.load(case['data']) + self.assertEqual(repr(C), case['repr']) + self.assertEqual(C.output(sep='\n'), case['output']) + for k, v in sorted(case['dict'].items()): + self.assertEqual(C[k].value, v) + + def test_load(self): + C = cookies.SimpleCookie() + C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme') + + self.assertEqual(C['Customer'].value, 'WILE_E_COYOTE') + self.assertEqual(C['Customer']['version'], '1') + self.assertEqual(C['Customer']['path'], '/acme') + + self.assertEqual(C.output(['path']), + 'Set-Cookie: Customer="WILE_E_COYOTE"; Path=/acme') + self.assertEqual(C.js_output(), """ + + """) + self.assertEqual(C.js_output(['path']), """ + + """) + + def test_quoted_meta(self): + # Try cookie with quoted meta-data + C = cookies.SimpleCookie() + C.load('Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"') + self.assertEqual(C['Customer'].value, 'WILE_E_COYOTE') + self.assertEqual(C['Customer']['version'], '1') + self.assertEqual(C['Customer']['path'], '/acme') + +def test_main(): + run_unittest(CookieTests) + run_doctest(cookies) + +if __name__ == '__main__': + test_main() diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 7a77cff..ff4cf9b 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1,4 +1,4 @@ -import httplib +import http.client as httplib import io import socket @@ -48,8 +48,6 @@ class HeaderTests(TestCase): # Some headers are added automatically, but should not be added by # .request() if they are explicitly set. - import httplib - class HeaderCountingBuffer(list): def __init__(self): self.count = {} diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index cc79cb8..02be57a 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -4,16 +4,15 @@ Written by Cody A.W. Somerville , Josip Dzolonga, and Michael Otteneder for the 2007/08 GHOP contest. """ -from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -from SimpleHTTPServer import SimpleHTTPRequestHandler -from CGIHTTPServer import CGIHTTPRequestHandler +from http.server import BaseHTTPRequestHandler, HTTPServer, \ + SimpleHTTPRequestHandler, CGIHTTPRequestHandler import os import sys import base64 import shutil import urllib -import httplib +import http.client import tempfile import threading @@ -59,7 +58,7 @@ class BaseTestCase(unittest.TestCase): self.thread.stop() def request(self, uri, method='GET', body=None, headers={}): - self.connection = httplib.HTTPConnection('localhost', self.PORT) + self.connection = http.client.HTTPConnection('localhost', self.PORT) self.connection.request(method, uri, body, headers) return self.connection.getresponse() @@ -92,7 +91,7 @@ class BaseHTTPServerTestCase(BaseTestCase): def setUp(self): BaseTestCase.setUp(self) - self.con = httplib.HTTPConnection('localhost', self.PORT) + self.con = http.client.HTTPConnection('localhost', self.PORT) self.con.connect() def test_command(self): @@ -343,7 +342,7 @@ class CGIHTTPServerTestCase(BaseTestCase): def test_main(verbose=None): try: cwd = os.getcwd() - support.run_unittest(#BaseHTTPServerTestCase, + support.run_unittest(BaseHTTPServerTestCase, SimpleHTTPServerTestCase, CGIHTTPServerTestCase ) diff --git a/Lib/test/test_pyclbr.py b/Lib/test/test_pyclbr.py index ca0aefc..e8eb94e 100644 --- a/Lib/test/test_pyclbr.py +++ b/Lib/test/test_pyclbr.py @@ -168,7 +168,6 @@ class PyclbrTest(TestCase): 'getproxies_internetconfig',)) # not on all platforms cm('pickle') cm('aifc', ignore=('openfp',)) # set with = in module - cm('Cookie', ignore=('Cookie',)) # Cookie is an alias for SmartCookie cm('sre_parse', ignore=('dump',)) # from sre_constants import * cm('pdb') cm('pydoc') diff --git a/Lib/test/test_shelve.py b/Lib/test/test_shelve.py index 5fb4641..b2ed87d 100644 --- a/Lib/test/test_shelve.py +++ b/Lib/test/test_shelve.py @@ -3,7 +3,7 @@ import shelve import glob from test import support from collections import MutableMapping -from test.test_anydbm import dbm_iterator +from test.test_dbm import dbm_iterator def L1(s): return s.decode("latin-1") diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index d12968c..d664582 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -855,7 +855,7 @@ class UnbufferedFileObjectClassTestCase(FileObjectClassTestCase): In this case (and in this case only), it should be possible to create a file object, read a line from it, create another file object, read another line from it, without loss of data in the - first file object's buffer. Note that httplib relies on this + first file object's buffer. Note that http.client relies on this when reading multiple requests from the same socket.""" bufsize = 0 # Use unbuffered mode diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index 4f884f0..ea3efd6 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -15,8 +15,7 @@ import shutil import traceback import asyncore -from BaseHTTPServer import HTTPServer -from SimpleHTTPServer import SimpleHTTPRequestHandler +from http.server import HTTPServer, SimpleHTTPRequestHandler # Optionally test SSL support, if we have it in the tested platform skip_expected = False diff --git a/Lib/test/test_sundry.py b/Lib/test/test_sundry.py index 292722d..18101e7 100644 --- a/Lib/test/test_sundry.py +++ b/Lib/test/test_sundry.py @@ -8,7 +8,6 @@ import warnings class TestUntestedModules(unittest.TestCase): def test_at_least_import_untested_modules(self): with support.catch_warning(): - import CGIHTTPServer import aifc import bdb import cgitb diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 7db281c..b14510f 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1,7 +1,7 @@ """Regresssion tests for urllib""" import urllib -import httplib +import http.client import io import unittest from test import support @@ -107,14 +107,14 @@ class urlopen_HttpTests(unittest.TestCase): def readline(self, length=None): if self.closed: return b"" return io.BytesIO.readline(self, length) - class FakeHTTPConnection(httplib.HTTPConnection): + class FakeHTTPConnection(http.client.HTTPConnection): def connect(self): self.sock = FakeSocket(fakedata) - self._connection_class = httplib.HTTPConnection - httplib.HTTPConnection = FakeHTTPConnection + self._connection_class = http.client.HTTPConnection + http.client.HTTPConnection = FakeHTTPConnection def unfakehttp(self): - httplib.HTTPConnection = self._connection_class + http.client.HTTPConnection = self._connection_class def test_read(self): self.fakehttp(b"Hello!") diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index cb730e2..3386800 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -77,7 +77,7 @@ def test_request_headers_methods(): Note the case normalization of header names here, to .capitalize()-case. This should be preserved for backwards-compatibility. (In the HTTP case, normalization to .title()-case is done by urllib2 before sending headers to - httplib). + http.client). >>> url = "http://example.com" >>> r = Request(url, headers={"Spam-eggs": "blah"}) @@ -348,12 +348,12 @@ class MockHTTPHandler(urllib2.BaseHandler): self._count = 0 self.requests = [] def http_open(self, req): - import mimetools, httplib, copy + import mimetools, http.client, copy from io import StringIO self.requests.append(copy.deepcopy(req)) if self._count == 0: self._count = self._count + 1 - name = httplib.responses[self.code] + name = http.client.responses[self.code] msg = mimetools.Message(StringIO(self.headers)) return self.parent.error( "http", req, MockFile(), self.code, name, msg) @@ -875,9 +875,8 @@ class HandlerTests(unittest.TestCase): def test_cookie_redirect(self): # cookies shouldn't leak into redirected requests - from cookielib import CookieJar - - from test.test_cookielib import interact_netscape + from http.cookiejar import CookieJar + from test.test_http_cookiejar import interact_netscape cj = CookieJar() interact_netscape(cj, "http://www.example.com/", "spam=eggs") diff --git a/Lib/test/test_urllib2_localnet.py b/Lib/test/test_urllib2_localnet.py index 6bf1820..6c877d9 100644 --- a/Lib/test/test_urllib2_localnet.py +++ b/Lib/test/test_urllib2_localnet.py @@ -4,29 +4,29 @@ import mimetools import threading import urlparse import urllib2 -import BaseHTTPServer +import http.server import unittest import hashlib from test import support # Loopback http server infrastructure -class LoopbackHttpServer(BaseHTTPServer.HTTPServer): +class LoopbackHttpServer(http.server.HTTPServer): """HTTP server w/ a few modifications that make it useful for loopback testing purposes. """ def __init__(self, server_address, RequestHandlerClass): - BaseHTTPServer.HTTPServer.__init__(self, - server_address, - RequestHandlerClass) + http.server.HTTPServer.__init__(self, + server_address, + RequestHandlerClass) # Set the timeout of our listening socket really low so # that we can stop the server easily. self.socket.settimeout(1.0) def get_request(self): - """BaseHTTPServer method, overridden.""" + """HTTPServer method, overridden.""" request, client_address = self.socket.accept() @@ -188,7 +188,7 @@ class DigestAuthHandler: # Proxy test infrastructure -class FakeProxyHandler(BaseHTTPServer.BaseHTTPRequestHandler): +class FakeProxyHandler(http.server.BaseHTTPRequestHandler): """This is a 'fake proxy' that makes it look like the entire internet has gone down due to a sudden zombie invasion. It main utility is in providing us with authentication support for @@ -283,7 +283,7 @@ class ProxyAuthTests(unittest.TestCase): def GetRequestHandler(responses): - class FakeHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): + class FakeHTTPRequestHandler(http.server.BaseHTTPRequestHandler): server_version = "TestHTTP/" requests = [] diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index 89e48cb..990d3da 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -33,7 +33,7 @@ class AuthTests(unittest.TestCase): ## could be used to HTTP authentication. # # def test_basic_auth(self): -# import httplib +# import http.client # # test_url = "http://www.python.org/test/test_urllib2/basic_auth" # test_hostport = "www.python.org" @@ -61,14 +61,14 @@ class AuthTests(unittest.TestCase): # # reasons, let's not implement it! (it's already implemented for proxy # # specification strings (that is, URLs or authorities specifying a # # proxy), so we must keep that) -# self.assertRaises(httplib.InvalidURL, +# self.assertRaises(http.client.InvalidURL, # urllib2.urlopen, "http://evil:thing@example.com") class CloseSocketTest(unittest.TestCase): def test_close(self): - import socket, httplib, gc + import socket, http.client, gc # calling .close() on urllib2's response objects should close the # underlying socket @@ -77,7 +77,7 @@ class CloseSocketTest(unittest.TestCase): response = _urlopen_with_retry("http://www.python.org/") abused_fileobject = response.fp httpresponse = abused_fileobject.raw - self.assert_(httpresponse.__class__ is httplib.HTTPResponse) + self.assert_(httpresponse.__class__ is http.client.HTTPResponse) fileobject = httpresponse.fp self.assert_(not fileobject.closed) diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 25a9c9d..8325496 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -7,7 +7,7 @@ import xmlrpc.client as xmlrpclib import xmlrpc.server import threading import mimetools -import httplib +import http.client import socket import os from test import support @@ -340,9 +340,9 @@ class SimpleServerTestCase(unittest.TestCase): # [ch] The test 404 is causing lots of false alarms. def XXXtest_404(self): - # send POST with httplib, it should return 404 header and + # send POST with http.client, it should return 404 header and # 'Not Found' message. - conn = httplib.HTTPConnection('localhost', PORT) + conn = http.client.HTTPConnection('localhost', PORT) conn.request('POST', '/this-is-not-valid') response = conn.getresponse() conn.close() diff --git a/Lib/test/test_xmlrpc_net.py b/Lib/test/test_xmlrpc_net.py index 2525254..f6ebbf0 100644 --- a/Lib/test/test_xmlrpc_net.py +++ b/Lib/test/test_xmlrpc_net.py @@ -6,7 +6,7 @@ import sys import unittest from test import support -import xmlrpclib.client as xmlrpclib +import xmlrpc.client as xmlrpclib class CurrentTimeTest(unittest.TestCase): diff --git a/Lib/urllib.py b/Lib/urllib.py index 500bf06..2005baf 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -22,7 +22,7 @@ used to query various info about the object, if available. (mimetools.Message objects are queried with the getheader() method.) """ -import httplib +import http.client import os import socket import sys @@ -352,7 +352,7 @@ class URLopener: try: response = http_conn.getresponse() - except httplib.BadStatusLine: + except http.client.BadStatusLine: # something went wrong with the HTTP status line raise IOError('http protocol error', 0, 'got a bad status line', None) @@ -369,7 +369,7 @@ class URLopener: def open_http(self, url, data=None): """Use HTTP protocol.""" - return self._open_generic_http(httplib.HTTPConnection, url, data) + return self._open_generic_http(http.client.HTTPConnection, url, data) def http_error(self, url, fp, errcode, errmsg, headers, data=None): """Handle http errors. @@ -395,9 +395,9 @@ class URLopener: if _have_ssl: def _https_connection(self, host): - return httplib.HTTPSConnection(host, - key_file=self.key_file, - cert_file=self.cert_file) + return http.client.HTTPSConnection(host, + key_file=self.key_file, + cert_file=self.cert_file) def open_https(self, url, data=None): """Use HTTPS protocol.""" diff --git a/Lib/urllib2.py b/Lib/urllib2.py index f87d364..948c6c3 100644 --- a/Lib/urllib2.py +++ b/Lib/urllib2.py @@ -89,7 +89,7 @@ f = urllib2.urlopen('http://www.python.org/') import base64 import hashlib -import httplib +import http.client import io import mimetools import os @@ -441,7 +441,7 @@ def build_opener(*handlers): default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, FTPHandler, FileHandler, HTTPErrorProcessor] - if hasattr(httplib, 'HTTPS'): + if hasattr(http.client, 'HTTPS'): default_classes.append(HTTPSHandler) skip = set() for klass in default_classes: @@ -1047,7 +1047,7 @@ class AbstractHTTPHandler(BaseHandler): def do_open(self, http_class, req): """Return an addinfourl object for the request, using http_class. - http_class must implement the HTTPConnection API from httplib. + http_class must implement the HTTPConnection API from http.client. The addinfourl return value is a file-like object. It also has methods and attributes including: - info(): return a mimetools.Message object for the headers @@ -1082,7 +1082,7 @@ class AbstractHTTPHandler(BaseHandler): # object initialized properly. # XXX Should an HTTPResponse object really be passed to - # BufferedReader? If so, we should change httplib to support + # BufferedReader? If so, we should change http.client to support # this use directly. # Add some fake methods to the reader to satisfy BufferedReader. @@ -1101,23 +1101,23 @@ class AbstractHTTPHandler(BaseHandler): class HTTPHandler(AbstractHTTPHandler): def http_open(self, req): - return self.do_open(httplib.HTTPConnection, req) + return self.do_open(http.client.HTTPConnection, req) http_request = AbstractHTTPHandler.do_request_ -if hasattr(httplib, 'HTTPS'): +if hasattr(http.client, 'HTTPS'): class HTTPSHandler(AbstractHTTPHandler): def https_open(self, req): - return self.do_open(httplib.HTTPSConnection, req) + return self.do_open(http.client.HTTPSConnection, req) https_request = AbstractHTTPHandler.do_request_ class HTTPCookieProcessor(BaseHandler): def __init__(self, cookiejar=None): - import cookielib + import http.cookiejar if cookiejar is None: - cookiejar = cookielib.CookieJar() + cookiejar = http.cookiejar.CookieJar() self.cookiejar = cookiejar def http_request(self, request): diff --git a/Lib/wsgiref/simple_server.py b/Lib/wsgiref/simple_server.py index 980f97a..e74f576 100644 --- a/Lib/wsgiref/simple_server.py +++ b/Lib/wsgiref/simple_server.py @@ -10,7 +10,7 @@ For example usage, see the 'if __name__=="__main__"' block at the end of the module. See also the BaseHTTPServer module docs for other API information. """ -from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer +from http.server import BaseHTTPRequestHandler, HTTPServer import urllib, sys from wsgiref.handlers import SimpleHandler diff --git a/Lib/xmlrpc/client.py b/Lib/xmlrpc/client.py index 138d86d..43e7a6c 100644 --- a/Lib/xmlrpc/client.py +++ b/Lib/xmlrpc/client.py @@ -135,7 +135,7 @@ Exported functions: """ import re, time, operator -import httplib +import http.client # -------------------------------------------------------------------- # Internal stuff @@ -1196,7 +1196,7 @@ class Transport: def send_request(self, host, handler, request_body, debug): host, extra_headers, x509 = self.get_host_info(host) - connection = httplib.HTTPConnection(host) + connection = http.client.HTTPConnection(host) if debug: connection.set_debuglevel(1) headers = {} @@ -1261,10 +1261,10 @@ class SafeTransport(Transport): import socket if not hasattr(socket, "ssl"): raise NotImplementedError( - "your version of httplib doesn't support HTTPS") + "your version of http.client doesn't support HTTPS") host, extra_headers, x509 = self.get_host_info(host) - connection = httplib.HTTPSConnection(host, None, **(x509 or {})) + connection = http.client.HTTPSConnection(host, None, **(x509 or {})) if debug: connection.set_debuglevel(1) headers = {} diff --git a/Lib/xmlrpc/server.py b/Lib/xmlrpc/server.py index 9668c8c..4ddc004 100644 --- a/Lib/xmlrpc/server.py +++ b/Lib/xmlrpc/server.py @@ -105,8 +105,9 @@ server.handle_request() # Based on code written by Fredrik Lundh. from xmlrpc.client import Fault, dumps, loads +from http.server import BaseHTTPRequestHandler +import http.server import socketserver -import BaseHTTPServer import sys import os import re @@ -408,7 +409,7 @@ class SimpleXMLRPCDispatcher: else: raise Exception('method "%s" is not supported' % method) -class SimpleXMLRPCRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): +class SimpleXMLRPCRequestHandler(BaseHTTPRequestHandler): """Simple XML-RPC request handler class. Handles all HTTP POST requests and attempts to decode them as @@ -500,7 +501,7 @@ class SimpleXMLRPCRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): """Selectively log an accepted request.""" if self.server.logRequests: - BaseHTTPServer.BaseHTTPRequestHandler.log_request(self, code, size) + BaseHTTPRequestHandler.log_request(self, code, size) class SimpleXMLRPCServer(socketserver.TCPServer, SimpleXMLRPCDispatcher): @@ -560,10 +561,9 @@ class CGIXMLRPCRequestHandler(SimpleXMLRPCDispatcher): """ code = 400 - message, explain = \ - BaseHTTPServer.BaseHTTPRequestHandler.responses[code] + message, explain = BaseHTTPRequestHandler.responses[code] - response = BaseHTTPServer.DEFAULT_ERROR_MESSAGE % \ + response = http.server.DEFAULT_ERROR_MESSAGE % \ { 'code' : code, 'message' : message, diff --git a/Misc/cheatsheet b/Misc/cheatsheet index ed7c98a..e9a2639 100644 --- a/Misc/cheatsheet +++ b/Misc/cheatsheet @@ -1795,14 +1795,10 @@ List of modules and packages in base distribution Standard library modules Operation Result aifc Stuff to parse AIFF-C and AIFF files. -dbm Generic interface to all dbm clones. (dbm.bsd, dbm.gnu, - dbm.ndbm, dbm.dumb) asynchat Support for 'chat' style protocols asyncore Asynchronous File I/O (in select style) atexit Register functions to be called at exit of Python interpreter. base64 Conversions to/from base64 RFC-MIME transport encoding . -BaseHTTPServer Base class forhttp services. -Bastion "Bastionification" utility (control access to instance vars) bdb A generic Python debugger base class. binhex Macintosh binhex compression/decompression. bisect List bisection algorithms. @@ -1810,7 +1806,6 @@ bz2 Support for bz2 compression/decompression. calendar Calendar printing functions. cgi Wraps the WWW Forms Common Gateway Interface (CGI). cgitb Utility for handling CGI tracebacks. -CGIHTTPServer CGI http services. cmd A generic class to build line-oriented command interpreters. datetime Basic date and time types. code Utilities needed to emulate Python's interactive interpreter @@ -1818,10 +1813,12 @@ codecs Lookup existing Unicode encodings and register new ones. colorsys Conversion functions between RGB and other color systems. commands Tools for executing UNIX commands . compileall Force "compilation" of all .py files in a directory. -ConfigParser Configuration file parser (much like windows .ini files) +configparser Configuration file parser (much like windows .ini files) copy Generic shallow and deep copying operations. -copy_reg Helper to provide extensibility for pickle/cPickle. +copyreg Helper to provide extensibility for pickle/cPickle. csv Read and write files with comma separated values. +dbm Generic interface to all dbm clones (dbm.bsd, dbm.gnu, + dbm.ndbm, dbm.dumb). dircache Sorted list of files in a dir, using a cache. difflib Tool for creating delta between sequences. dis Bytecode disassembler. @@ -1844,11 +1841,11 @@ getpass Utilities to get a password and/or the current user name. glob filename globbing. gzip Read & write gzipped files. heapq Priority queue implemented using lists organized as heaps. -HMAC Keyed-Hashing for Message Authentication -- RFC 2104. -htmlentitydefs Proposed entity definitions for HTML. -htmllib HTML parsing utilities. -HTMLParser A parser for HTML and XHTML. -httplib HTTP client class. +hmac Keyed-Hashing for Message Authentication -- RFC 2104. +html.entities HTML entity definitions. +html.parser A parser for HTML and XHTML. +http.client HTTP client class. +http.server HTTP server services. ihooks Hooks into the "import" mechanism. imaplib IMAP4 client.Based on RFC 2060. imghdr Recognizing image files based on their first few bytes. @@ -1864,7 +1861,6 @@ macurl2path Mac specific module for conversion between pathnames and URLs. mailbox A class to handle a unix-style or mmdf-style mailbox. mailcap Mailcap file handling (RFC 1524). mhlib MH (mailbox) interface. -mimetools Various tools used by MIME-reading or MIME-writing programs. mimetypes Guess the MIME type of a file. mmap Interface to memory-mapped files - they behave like mutable strings./font> @@ -1892,13 +1888,11 @@ pty Pseudo terminal utilities. pyexpat Interface to the Expay XML parser. py_compile Routine to "compile" a .py file to a .pyc file. pyclbr Parse a Python file and retrieve classes and methods. -Queue A multi-producer, multi-consumer queue. +queue A multi-producer, multi-consumer queue. quopri Conversions to/from quoted-printable transport encoding. random Random variable generators re Regular Expressions. -repr Redo repr() but with limits on most sizes. -rexec Restricted execution facilities ("safe" exec, eval, etc). -rfc822 RFC-822 message manipulation class. +reprlib Redo repr() but with limits on most sizes. rlcompleter Word completion for GNU readline 2.0. robotparser Parse robots.txt files, useful for web spiders. sched A generally useful event scheduler class. @@ -1906,7 +1900,6 @@ sgmllib A parser for SGML. shelve Manage shelves of pickled objects. shlex Lexical analyzer class for simple shell-like syntaxes. shutil Utility functions usable in a shell-like program. -SimpleHTTPServer Simple extension to base http class site Append module search paths for third-party packages to sys.path. smtplib SMTP Client class (RFC 821) @@ -1916,8 +1909,6 @@ stat Constants and functions for interpreting stat/lstat struct. statvfs Constants for interpreting statvfs struct as returned by os.statvfs()and os.fstatvfs() (if they exist). string A collection of string operations. -StringIO File-like objects that read/write a string buffer (a fasterC - implementation exists in built-in module: cStringIO). sunau Stuff to parse Sun and NeXT audio files. sunaudio Interpret sun audio headers. symbol Non-terminal symbols of Python grammar (from "graminit.h"). @@ -1927,7 +1918,6 @@ telnetlib TELNET client class. Based on RFC 854. tempfile Temporary file name allocation. textwrap Object for wrapping and filling text. threading Proposed new higher-level threading interfaces -threading_api (doc of the threading module) token Tokens (from "token.h"). tokenize Compiles a regular expression that recognizes Python tokens. traceback Format and print Python stack traces. @@ -1939,17 +1929,13 @@ unicodedata Interface to unicode properties. urllib Open an arbitrary URL. urlparse Parse URLs according to latest draft of standard. user Hook to allow user-specified customization code to run. -UserDict A wrapper to allow subclassing of built-in dict class. -UserList A wrapper to allow subclassing of built-in list class. -UserString A wrapper to allow subclassing of built-in string class. uu UUencode/UUdecode. unittest Utilities for implementing unit testing. wave Stuff to parse WAVE files. weakref Tools for creating and managing weakly referenced objects. webbrowser Platform independent URL launcher. xdrlib Implements (a subset of) Sun XDR (eXternal Data - Representation) -xmllib A parser for XML, using the derived class as static DTD. + Representation). xml.dom Classes for processing XML using the Document Object Model. xml.sax Classes for processing XML using the SAX API. xmlrpc.client Support for remote procedure calls using XML. -- cgit v0.12