diff options
Diffstat (limited to 'Lib/http')
-rw-r--r-- | Lib/http/client.py | 282 | ||||
-rw-r--r-- | Lib/http/cookiejar.py | 28 | ||||
-rw-r--r-- | Lib/http/cookies.py | 248 | ||||
-rw-r--r-- | Lib/http/server.py | 160 |
4 files changed, 354 insertions, 364 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py index 4a65125..5466d06 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -71,6 +71,7 @@ import email.message import io import os import socket +import collections from urllib.parse import urlsplit import warnings @@ -257,13 +258,10 @@ def parse_headers(fp, _class=HTTPMessage): hstring = b''.join(headers).decode('iso-8859-1') return email.parser.Parser(_class=_class).parsestr(hstring) -class HTTPResponse(io.RawIOBase): - # strict: If true, raise BadStatusLine if the status line can't be - # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is - # false because it prevents clients from talking to HTTP/0.9 - # servers. Note that a response with a sufficiently corrupted - # status line will look like an HTTP/0.9 response. +_strict_sentinel = object() + +class HTTPResponse(io.RawIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. @@ -272,7 +270,7 @@ class HTTPResponse(io.RawIOBase): # text following RFC 2047. The basic status line parsing only # accepts iso-8859-1. - def __init__(self, sock, debuglevel=0, strict=0, method=None, url=None): + def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): # If the response includes a content-length header, we need to # make sure that the client doesn't read more than the # specified number of bytes. If it does, it will block until @@ -282,7 +280,10 @@ class HTTPResponse(io.RawIOBase): # clients unless they know what they are doing. self.fp = sock.makefile("rb") self.debuglevel = debuglevel - self.strict = strict + if strict is not _strict_sentinel: + warnings.warn("the 'strict' argument isn't supported anymore; " + "http.client now always assumes HTTP/1.x compliant servers.", + DeprecationWarning, 2) self._method = method # The HTTPResponse object is returned via urllib. The clients @@ -304,8 +305,9 @@ class HTTPResponse(io.RawIOBase): self.will_close = _UNKNOWN # conn will close at end of response def _read_status(self): - # Initialize with Simple-Response defaults. - line = str(self.fp.readline(), "iso-8859-1") + line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") + if len(line) > _MAXLINE: + raise LineTooLong("status line") if self.debuglevel > 0: print("reply:", repr(line)) if not line: @@ -313,25 +315,17 @@ class HTTPResponse(io.RawIOBase): # sending a valid response. raise BadStatusLine(line) try: - [version, status, reason] = line.split(None, 2) + version, status, reason = line.split(None, 2) except ValueError: try: - [version, status] = line.split(None, 1) + version, status = line.split(None, 1) reason = "" except ValueError: - # empty version will cause next test to fail and status - # will be treated as 0.9 response. + # empty version will cause next test to fail. version = "" if not version.startswith("HTTP/"): - if self.strict: - self.close() - raise BadStatusLine(line) - else: - # Assume it's a Simple-Response from an 0.9 server. - # We have to convert the first line back to raw bytes - # because self.fp.readline() needs to return bytes. - self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp) - return "HTTP/0.9", 200, "" + self._close_conn() + raise BadStatusLine(line) # The status code is a three-digit number try: @@ -365,22 +359,14 @@ class HTTPResponse(io.RawIOBase): self.code = self.status = status self.reason = reason.strip() - if version == "HTTP/1.0": + if version in ("HTTP/1.0", "HTTP/0.9"): + # Some servers might still return "0.9", treat it as 1.0 anyway self.version = 10 elif version.startswith("HTTP/1."): self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 - elif version == "HTTP/0.9": - self.version = 9 else: raise UnknownProtocol(version) - if self.version == 9: - self.length = None - self.chunked = False - self.will_close = True - self.headers = self.msg = email.message_from_string('') - return - self.headers = self.msg = parse_headers(self.fp) if self.debuglevel > 0: @@ -460,22 +446,25 @@ class HTTPResponse(io.RawIOBase): # otherwise, assume it will close return True + def _close_conn(self): + fp = self.fp + self.fp = None + fp.close() + def close(self): + super().close() # set "closed" flag if self.fp: - self.fp.close() - self.fp = None + self._close_conn() # These implementations are for the benefit of io.BufferedReader. # XXX This class should probably be revised to act more like # the "raw stream" that BufferedReader expects. - @property - def closed(self): - return self.isclosed() - def flush(self): - self.fp.flush() + super().flush() + if self.fp: + self.fp.flush() def readable(self): return True @@ -483,6 +472,7 @@ class HTTPResponse(io.RawIOBase): # End of "raw stream" methods def isclosed(self): + """True if the connection is closed.""" # NOTE: it is possible that we will not ever call self.close(). This # case occurs when will_close is TRUE, length is None, and we # read up to the last byte, but NOT past it. @@ -496,7 +486,7 @@ class HTTPResponse(io.RawIOBase): return b"" if self._method == "HEAD": - self.close() + self._close_conn() return b"" if self.chunked: @@ -507,9 +497,13 @@ class HTTPResponse(io.RawIOBase): if self.length is None: s = self.fp.read() else: - s = self._safe_read(self.length) + try: + s = self._safe_read(self.length) + except IncompleteRead: + self._close_conn() + raise self.length = 0 - self.close() # we read everything + self._close_conn() # we read everything return s if self.length is not None: @@ -521,10 +515,15 @@ class HTTPResponse(io.RawIOBase): # connection, and the user is reading more bytes than will be provided # (for example, reading in 1k chunks) s = self.fp.read(amt) - if self.length is not None: + if not s: + # Ideally, we would raise IncompleteRead if the content-length + # wasn't satisfied, but it might break compatibility. + self._close_conn() + elif self.length is not None: self.length -= len(s) if not self.length: - self.close() + self._close_conn() + return s def _read_chunked(self, amt): @@ -544,7 +543,7 @@ class HTTPResponse(io.RawIOBase): except ValueError: # close the connection as protocol synchronisation is # probably lost - self.close() + self._close_conn() raise IncompleteRead(b''.join(value)) if chunk_left == 0: break @@ -577,11 +576,11 @@ class HTTPResponse(io.RawIOBase): # a vanishingly small number of sites EOF without # sending the trailer break - if line == b"\r\n": + if line in (b'\r\n', b'\n', b''): break # we read everything; close the "file" - self.close() + self._close_conn() return b''.join(value) @@ -651,11 +650,15 @@ class HTTPConnection: default_port = HTTP_PORT auto_open = 1 debuglevel = 0 - strict = 0 - def __init__(self, host, port=None, strict=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + def __init__(self, host, port=None, strict=_strict_sentinel, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): + if strict is not _strict_sentinel: + warnings.warn("the 'strict' argument isn't supported anymore; " + "http.client now always assumes HTTP/1.x compliant servers.", + DeprecationWarning, 2) self.timeout = timeout + self.source_address = source_address self.sock = None self._buffer = [] self.__response = None @@ -666,10 +669,13 @@ class HTTPConnection: self._tunnel_headers = {} self._set_hostport(host, port) - if strict is not None: - self.strict = strict - def _set_tunnel(self, host, port=None, headers=None): + def set_tunnel(self, host, port=None, headers=None): + """ Sets up the host and the port for the HTTP CONNECT Tunnelling. + + The headers argument should be a mapping of extra HTTP headers + to send with the CONNECT request. + """ self._tunnel_host = host self._tunnel_port = port if headers: @@ -685,7 +691,10 @@ class HTTPConnection: try: port = int(host[i+1:]) except ValueError: - raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) + if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ + port = self.default_port + else: + raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) host = host[:i] else: port = self.default_port @@ -704,12 +713,11 @@ class HTTPConnection: self.send(connect_bytes) for header, value in self._tunnel_headers.items(): header_str = "%s: %s\r\n" % (header, value) - header_bytes = header_str.encode("ascii") + header_bytes = header_str.encode("latin1") self.send(header_bytes) self.send(b'\r\n') - response = self.response_class(self.sock, strict = self.strict, - method = self._method) + response = self.response_class(self.sock, method=self._method) (version, code, message) = response._read_status() if code != 200: @@ -720,13 +728,16 @@ class HTTPConnection: line = response.fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: raise LineTooLong("header line") - if line == b'\r\n': + if not line: + # for sites which EOF without sending a trailer + break + if line in (b'\r\n', b'\n', b''): break def connect(self): """Connect to the host and port specified in __init__.""" self.sock = socket.create_connection((self.host,self.port), - self.timeout) + self.timeout, self.source_address) if self._tunnel_host: self._tunnel() @@ -741,18 +752,17 @@ class HTTPConnection: self.__state = _CS_IDLE def send(self, data): - """Send `data' to the server.""" + """Send `data' to the server. + ``data`` can be a string object, a bytes object, an array object, a + file-like object that supports a .read() method, or an iterable object. + """ + if self.sock is None: if self.auto_open: self.connect() else: raise NotConnected() - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. if self.debuglevel > 0: print("send:", repr(data)) blocksize = 8192 @@ -778,8 +788,16 @@ class HTTPConnection: if encode: datablock = datablock.encode("iso-8859-1") self.sock.sendall(datablock) - else: + + try: self.sock.sendall(data) + except TypeError: + if isinstance(data, collections.Iterable): + for d in data: + self.sock.sendall(d) + else: + raise TypeError("data should be a bytes-like object " + "or an iterable, got %r" % type(data)) def _output(self, s): """Add a line of output to the current request buffer. @@ -938,7 +956,7 @@ class HTTPConnection: values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): - values[i] = one_value.encode('ascii') + values[i] = one_value.encode('latin1') elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') value = b'\r\n\t'.join(values) @@ -948,11 +966,11 @@ class HTTPConnection: def endheaders(self, message_body=None): """Indicate that the last header line has been sent to the server. - This method sends the request to the server. The optional - message_body argument can be used to pass message body - associated with the request. The message body will be sent in - the same packet as the message headers if possible. The - message_body should be a string. + This method sends the request to the server. The optional message_body + argument can be used to pass a message body associated with the + request. The message body will be sent in the same packet as the + message headers if it is a string, otherwise it is sent as a separate + packet. """ if self.__state == _CS_REQ_STARTED: self.__state = _CS_REQ_SENT @@ -992,7 +1010,7 @@ class HTTPConnection: self.putrequest(method, url, **skips) - if body and ('content-length' not in header_names): + if body is not None and ('content-length' not in header_names): self._set_content_length(body) for hdr, value in headers.items(): self.putheader(hdr, value) @@ -1040,11 +1058,9 @@ class HTTPConnection: if self.debuglevel > 0: response = self.response_class(self.sock, self.debuglevel, - strict=self.strict, method=self._method) else: - response = self.response_class(self.sock, strict=self.strict, - method=self._method) + response = self.response_class(self.sock, method=self._method) response.begin() assert response.will_close != _UNKNOWN @@ -1069,30 +1085,50 @@ else: default_port = HTTPS_PORT + # XXX Should key_file and cert_file be deprecated in favour of context? + def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - HTTPConnection.__init__(self, host, port, strict, timeout) + strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, *, context=None, check_hostname=None): + super(HTTPSConnection, self).__init__(host, port, strict, timeout, + source_address) self.key_file = key_file self.cert_file = cert_file + if context is None: + # Some reasonable defaults + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + will_verify = context.verify_mode != ssl.CERT_NONE + if check_hostname is None: + check_hostname = will_verify + elif check_hostname and not will_verify: + raise ValueError("check_hostname needs a SSL context with " + "either CERT_OPTIONAL or CERT_REQUIRED") + if key_file or cert_file: + context.load_cert_chain(cert_file, key_file) + self._context = context + self._check_hostname = check_hostname def connect(self): "Connect to a host on a given (SSL) port." sock = socket.create_connection((self.host, self.port), - self.timeout) + self.timeout, self.source_address) if self._tunnel_host: self.sock = sock self._tunnel() - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) - - - def FakeSocket (sock, sslobj): - warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + - "Use the result of ssl.wrap_socket() directly instead.", - DeprecationWarning, stacklevel=2) - return sslobj + server_hostname = self.host if ssl.HAS_SNI else None + self.sock = self._context.wrap_socket(sock, + server_hostname=server_hostname) + try: + if self._check_hostname: + ssl.match_hostname(self.sock.getpeercert(), self.host) + except Exception: + self.sock.shutdown(socket.SHUT_RDWR) + self.sock.close() + raise __all__.append("HTTPSConnection") @@ -1146,6 +1182,8 @@ class ResponseNotReady(ImproperConnectionState): class BadStatusLine(HTTPException): def __init__(self, line): + if not line: + line = repr(line) self.args = line, self.line = line @@ -1156,71 +1194,3 @@ class LineTooLong(HTTPException): # for backwards compatibility error = HTTPException - -class LineAndFileWrapper: - """A limited file-like object for HTTP/0.9 responses.""" - - # The status-line parsing code calls readline(), which normally - # get the HTTP status line. For a 0.9 response, however, this is - # actually the first line of the body! Clients need to get a - # readable file object that contains that line. - - def __init__(self, line, file): - self._line = line - self._file = file - self._line_consumed = 0 - self._line_offset = 0 - self._line_left = len(line) - - def __getattr__(self, attr): - return getattr(self._file, attr) - - def _done(self): - # called when the last byte is read from the line. After the - # call, all read methods are delegated to the underlying file - # object. - self._line_consumed = 1 - self.read = self._file.read - self.readline = self._file.readline - self.readlines = self._file.readlines - - def read(self, amt=None): - if self._line_consumed: - return self._file.read(amt) - assert self._line_left - if amt is None or amt > self._line_left: - s = self._line[self._line_offset:] - self._done() - if amt is None: - return s + self._file.read() - else: - return s + self._file.read(amt - len(s)) - else: - assert amt <= self._line_left - i = self._line_offset - j = i + amt - s = self._line[i:j] - self._line_offset = j - self._line_left -= amt - if self._line_left == 0: - self._done() - return s - - def readline(self): - if self._line_consumed: - return self._file.readline() - assert self._line_left - s = self._line[self._line_offset:] - self._done() - return s - - def readlines(self, size=None): - if self._line_consumed: - return self._file.readlines(size) - assert self._line_left - L = [self._line[self._line_offset:]] - self._done() - if size is None: - return L + self._file.readlines() - else: - return L + self._file.readlines(size) diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py index e7f0b4b..b6cfc35 100644 --- a/Lib/http/cookiejar.py +++ b/Lib/http/cookiejar.py @@ -1,4 +1,4 @@ -"""HTTP cookie handling for web clients. +r"""HTTP cookie handling for web clients. This module has (now fairly distant) origins in Gisle Aas' Perl module HTTP::Cookies, from the libwww-perl library. @@ -442,6 +442,13 @@ def join_header_words(lists): if attr: headers.append("; ".join(attr)) return ", ".join(headers) +def strip_quotes(text): + if text.startswith('"'): + text = text[1:] + if text.endswith('"'): + text = text[:-1] + return text + def parse_ns_headers(ns_headers): """Ad-hoc parser for Netscape protocol cookie-attributes. @@ -459,7 +466,7 @@ def parse_ns_headers(ns_headers): """ known_attrs = ("expires", "domain", "path", "secure", # RFC 2109 attrs (may turn up in Netscape cookies, too) - "port", "max-age") + "version", "port", "max-age") result = [] for ns_header in ns_headers: @@ -479,12 +486,11 @@ def parse_ns_headers(ns_headers): k = lc if k == "version": # This is an RFC 2109 cookie. + v = strip_quotes(v) version_set = True if k == "expires": # convert expires date to seconds since epoch - if v.startswith('"'): v = v[1:] - if v.endswith('"'): v = v[:-1] - v = http2time(v) # None if invalid + v = http2time(strip_quotes(v)) # None if invalid pairs.append((k, v)) if pairs: @@ -1014,7 +1020,7 @@ class DefaultCookiePolicy(CookiePolicy): (not erhn.startswith(".") and not ("."+erhn).endswith(domain))): _debug(" effective request-host %s (even with added " - "initial dot) does not end end with %s", + "initial dot) does not end with %s", erhn, domain) return False if (cookie.version > 0 or @@ -1449,7 +1455,11 @@ class CookieJar: # set the easy defaults version = standard.get("version", None) - if version is not None: version = int(version) + if version is not None: + try: + version = int(version) + except ValueError: + return None # invalid version, ignore cookie secure = standard.get("secure", False) # (discard is also set if expires is Absent) discard = standard.get("discard", False) @@ -1815,7 +1825,7 @@ def lwp_cookie_str(cookie): class LWPCookieJar(FileCookieJar): """ - The LWPCookieJar saves a sequence of"Set-Cookie3" lines. + The LWPCookieJar saves a sequence of "Set-Cookie3" lines. "Set-Cookie3" is the format used by the libwww-perl libary, not known to be compatible with any browser, but which is easy to read and doesn't lose information about RFC 2965 cookies. @@ -1827,7 +1837,7 @@ class LWPCookieJar(FileCookieJar): """ def as_lwp_str(self, ignore_discard=True, ignore_expires=True): - """Return cookies as a string of "\n"-separated "Set-Cookie3" headers. + """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers. ignore_discard and ignore_expires: see docstring for FileCookieJar.save diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py index 0d9e6d0..ddbcbf8 100644 --- a/Lib/http/cookies.py +++ b/Lib/http/cookies.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # #### @@ -46,7 +46,7 @@ At the moment, this is the only documentation. The Basics ---------- -Importing is easy.. +Importing is easy... >>> from http import cookies @@ -127,19 +127,14 @@ the value to a string, when the values are set dictionary-style. 'Set-Cookie: number=7\r\nSet-Cookie: string=seven' Finis. -""" #" -# ^ -# |----helps out font-lock +""" # # Import our required modules # +import re import string -from pickle import dumps, loads - -import re, warnings - __all__ = ["CookieError", "BaseCookie", "SimpleCookie"] _nulljoin = ''.join @@ -231,17 +226,16 @@ _Translator = { } def _quote(str, LegalChars=_LegalChars): - # - # If the string does not need to be double-quoted, - # then just return the string. Otherwise, surround - # the string in doublequotes and precede quote (with a \) - # special characters. - # + r"""Quote a string for use in a cookie header. + + If the string does not need to be double-quoted, then just return the + string. Otherwise, surround the string in doublequotes and quote + (with a \) special characters. + """ if all(c in LegalChars for c in str): return str else: - return '"' + _nulljoin( map(_Translator.get, str, str) ) + '"' -# end _quote + return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"' _OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") @@ -250,7 +244,7 @@ _QuotePatt = re.compile(r"[\\].") def _unquote(str): # If there aren't any doublequotes, # then there can't be any special characters. See RFC 2109. - if len(str) < 2: + if len(str) < 2: return str if str[0] != '"' or str[-1] != '"': return str @@ -269,32 +263,32 @@ def _unquote(str): n = len(str) res = [] while 0 <= i < n: - Omatch = _OctalPatt.search(str, i) - Qmatch = _QuotePatt.search(str, i) - if not Omatch and not Qmatch: # Neither matched + o_match = _OctalPatt.search(str, i) + q_match = _QuotePatt.search(str, i) + if not o_match and not q_match: # Neither matched res.append(str[i:]) break # else: j = k = -1 - if Omatch: j = Omatch.start(0) - if Qmatch: k = Qmatch.start(0) - if Qmatch and ( not Omatch or k < j ): # QuotePatt matched + if o_match: + j = o_match.start(0) + if q_match: + k = q_match.start(0) + if q_match and (not o_match or k < j): # QuotePatt matched res.append(str[i:k]) res.append(str[k+1]) - i = k+2 + i = k + 2 else: # OctalPatt matched res.append(str[i:j]) - res.append( chr( int(str[j+1:j+4], 8) ) ) - i = j+4 + res.append(chr(int(str[j+1:j+4], 8))) + i = j + 4 return _nulljoin(res) -# end _unquote - -# The _getdate() routine is used to set the expiration time in -# the cookie's HTTP header. By default, _getdate() returns the -# current time in the appropriate "expires" format for a -# Set-Cookie header. The one optional argument is an offset from -# now, in seconds. For example, an offset of -3600 means "one hour ago". -# The offset may be a floating point number. + +# The _getdate() routine is used to set the expiration time in the cookie's HTTP +# header. By default, _getdate() returns the current time in the appropriate +# "expires" format for a Set-Cookie header. The one optional argument is an +# offset from now, in seconds. For example, an offset of -3600 means "one hour +# ago". The offset may be a floating point number. # _weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] @@ -307,22 +301,19 @@ def _getdate(future=0, weekdayname=_weekdayname, monthname=_monthname): from time import gmtime, time now = time() year, month, day, hh, mm, ss, wd, y, z = gmtime(now + future) - return "%s, %02d-%3s-%4d %02d:%02d:%02d GMT" % \ + return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ (weekdayname[wd], day, monthname[month], year, hh, mm, ss) -# -# A class to hold ONE key,value pair. -# In a cookie, each such pair may have several attributes. -# so this class is used to keep the attributes associated -# with the appropriate key,value pair. -# This class also includes a coded_value attribute, which -# is used to hold the network representation of the -# value. This is most useful when Python objects are -# pickled for network transit. -# - class Morsel(dict): + """A class to hold ONE (key, value) pair. + + In a cookie, each such pair may have several attributes, so this class is + used to keep the attributes associated with the appropriate key,value pair. + This class also includes a coded_value attribute, which is used to hold + the network representation of the value. This is most useful when Python + objects are pickled for network transit. + """ # RFC 2109 lists these attributes as reserved: # path comment domain # max-age secure version @@ -336,35 +327,33 @@ class Morsel(dict): # This dictionary provides a mapping from the lowercase # variant on the left to the appropriate traditional # formatting on the right. - _reserved = { "expires" : "expires", - "path" : "Path", - "comment" : "Comment", - "domain" : "Domain", - "max-age" : "Max-Age", - "secure" : "secure", - "httponly" : "httponly", - "version" : "Version", - } + _reserved = { + "expires" : "expires", + "path" : "Path", + "comment" : "Comment", + "domain" : "Domain", + "max-age" : "Max-Age", + "secure" : "secure", + "httponly" : "httponly", + "version" : "Version", + } def __init__(self): # Set defaults self.key = self.value = self.coded_value = None # Set default attributes - for K in self._reserved: - dict.__setitem__(self, K, "") - # end __init__ + for key in self._reserved: + dict.__setitem__(self, key, "") def __setitem__(self, K, V): K = K.lower() if not K in self._reserved: raise CookieError("Invalid Attribute %s" % K) dict.__setitem__(self, K, V) - # end __setitem__ def isReservedKey(self, K): return K.lower() in self._reserved - # end isReservedKey def set(self, key, val, coded_val, LegalChars=_LegalChars): # First we verify that the key isn't a reserved word @@ -375,19 +364,18 @@ class Morsel(dict): raise CookieError("Illegal key value: %s" % key) # It's a good key, so save it. - self.key = key - self.value = val - self.coded_value = coded_val - # end set + self.key = key + self.value = val + self.coded_value = coded_val - def output(self, attrs=None, header = "Set-Cookie:"): - return "%s %s" % ( header, self.OutputString(attrs) ) + def output(self, attrs=None, header="Set-Cookie:"): + return "%s %s" % (header, self.OutputString(attrs)) __str__ = output def __repr__(self): return '<%s: %s=%s>' % (self.__class__.__name__, - self.key, repr(self.value) ) + self.key, repr(self.value)) def js_output(self, attrs=None): # Print javascript @@ -397,41 +385,39 @@ class Morsel(dict): document.cookie = \"%s\"; // end hiding --> </script> - """ % ( self.OutputString(attrs).replace('"',r'\"')) - # end js_output() + """ % (self.OutputString(attrs).replace('"', r'\"')) def OutputString(self, attrs=None): # Build up our result # result = [] - RA = result.append + append = result.append # First, the key=value pair - RA("%s=%s" % (self.key, self.coded_value)) + append("%s=%s" % (self.key, self.coded_value)) # Now add any defined attributes if attrs is None: attrs = self._reserved items = sorted(self.items()) - for K,V in items: - if V == "": continue - if K not in attrs: continue - if K == "expires" and type(V) == type(1): - RA("%s=%s" % (self._reserved[K], _getdate(V))) - elif K == "max-age" and type(V) == type(1): - RA("%s=%d" % (self._reserved[K], V)) - elif K == "secure": - RA(str(self._reserved[K])) - elif K == "httponly": - RA(str(self._reserved[K])) + for key, value in items: + if value == "": + continue + if key not in attrs: + continue + if key == "expires" and isinstance(value, int): + append("%s=%s" % (self._reserved[key], _getdate(value))) + elif key == "max-age" and isinstance(value, int): + append("%s=%d" % (self._reserved[key], value)) + elif key == "secure": + append(str(self._reserved[key])) + elif key == "httponly": + append(str(self._reserved[key])) else: - RA("%s=%s" % (self._reserved[K], V)) + append("%s=%s" % (self._reserved[key], value)) # Return the result return _semispacejoin(result) - # end OutputString -# end Morsel class - # @@ -453,7 +439,7 @@ _CookiePattern = re.compile(r""" (?P<val> # Start of group 'val' "(?:[^\\"]|\\.)*" # Any doublequoted string | # or - \w{3},\s[\w\d-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr + \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr | # or """ + _LegalCharsPatt + r"""* # Any word or empty string ) # End of group 'val' @@ -461,13 +447,11 @@ _CookiePattern = re.compile(r""" """, re.ASCII) # May be removed if safe. -# At long last, here is the cookie class. -# Using this class is almost just like using a dictionary. -# See this module's docstring for example usage. +# At long last, here is the cookie class. Using this class is almost just like +# using a dictionary. See this module's docstring for example usage. # class BaseCookie(dict): - # A container class for a set of Morsels - # + """A container class for a set of Morsels.""" def value_decode(self, val): """real_value, coded_value = value_decode(STRING) @@ -477,7 +461,6 @@ class BaseCookie(dict): Override this function to modify the behavior of cookies. """ return val, val - # end value_encode def value_encode(self, val): """real_value, coded_value = value_encode(VALUE) @@ -487,51 +470,46 @@ class BaseCookie(dict): """ strval = str(val) return strval, strval - # end value_encode def __init__(self, input=None): - if input: self.load(input) - # end __init__ + if input: + self.load(input) def __set(self, key, real_value, coded_value): """Private method for setting a cookie's value""" M = self.get(key, Morsel()) M.set(key, real_value, coded_value) dict.__setitem__(self, key, M) - # end __set def __setitem__(self, key, value): """Dictionary style assignment.""" rval, cval = self.value_encode(value) self.__set(key, rval, cval) - # end __setitem__ def output(self, attrs=None, header="Set-Cookie:", sep="\015\012"): """Return a string suitable for HTTP.""" result = [] items = sorted(self.items()) - for K,V in items: - result.append( V.output(attrs, header) ) + for key, value in items: + result.append(value.output(attrs, header)) return sep.join(result) - # end output __str__ = output def __repr__(self): - L = [] + l = [] items = sorted(self.items()) - for K,V in items: - L.append( '%s=%s' % (K,repr(V.value) ) ) - return '<%s: %s>' % (self.__class__.__name__, _spacejoin(L)) + for key, value in items: + l.append('%s=%s' % (key, repr(value.value))) + return '<%s: %s>' % (self.__class__.__name__, _spacejoin(l)) def js_output(self, attrs=None): """Return a string suitable for JavaScript.""" result = [] items = sorted(self.items()) - for K,V in items: - result.append( V.js_output(attrs) ) + for key, value in items: + result.append(value.js_output(attrs)) return _nulljoin(result) - # end js_output def load(self, rawdata): """Load cookies from a string (presumably HTTP_COOKIE) or @@ -539,16 +517,15 @@ class BaseCookie(dict): is equivalent to calling: map(Cookie.__setitem__, d.keys(), d.values()) """ - if type(rawdata) == type(""): - self.__ParseString(rawdata) + if isinstance(rawdata, str): + self.__parse_string(rawdata) else: # self.update() wouldn't call our custom __setitem__ - for k, v in rawdata.items(): - self[k] = v + for key, value in rawdata.items(): + self[key] = value return - # end load() - def __ParseString(self, str, patt=_CookiePattern): + def __parse_string(self, str, patt=_CookiePattern): i = 0 # Our starting point n = len(str) # Length of string M = None # current morsel @@ -556,48 +533,39 @@ class BaseCookie(dict): while 0 <= i < n: # Start looking for a cookie match = patt.search(str, i) - if not match: break # No more cookies + if not match: + # No more cookies + break - K,V = match.group("key"), match.group("val") + key, value = match.group("key"), match.group("val") i = match.end(0) # Parse the key, value in case it's metainfo - if K[0] == "$": + if key[0] == "$": # We ignore attributes which pertain to the cookie # mechanism as a whole. See RFC 2109. # (Does anyone care?) if M: - M[ K[1:] ] = V - elif K.lower() in Morsel._reserved: + M[key[1:]] = value + elif key.lower() in Morsel._reserved: if M: - M[ K ] = _unquote(V) + M[key] = _unquote(value) else: - rval, cval = self.value_decode(V) - self.__set(K, rval, cval) - M = self[K] - # end __ParseString -# end BaseCookie class + rval, cval = self.value_decode(value) + self.__set(key, rval, cval) + M = self[key] + class SimpleCookie(BaseCookie): - """SimpleCookie + """ SimpleCookie supports strings as cookie values. When setting the value using the dictionary assignment notation, SimpleCookie calls the builtin str() to convert the value to a string. Values received from HTTP are kept as strings. """ def value_decode(self, val): - return _unquote( val ), val + return _unquote(val), val + def value_encode(self, val): strval = str(val) - return strval, _quote( strval ) -# end SimpleCookie - -# -########################################################### - -def _test(): - import doctest, http.cookies - return doctest.testmod(http.cookies) - -if __name__ == "__main__": - _test() + return strval, _quote(strval) diff --git a/Lib/http/server.py b/Lib/http/server.py index fccdc4c..5569037 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -84,7 +84,7 @@ __version__ = "0.6" __all__ = ["HTTPServer", "BaseHTTPRequestHandler"] -import cgi +import html import email.message import email.parser import http.client @@ -271,14 +271,11 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): self.request_version = version = self.default_request_version self.close_connection = 1 requestline = str(self.raw_requestline, 'iso-8859-1') - if requestline[-2:] == '\r\n': - requestline = requestline[:-2] - elif requestline[-1:] == '\n': - requestline = requestline[:-1] + requestline = requestline.rstrip('\r\n') self.requestline = requestline words = requestline.split() if len(words) == 3: - [command, path, version] = words + command, path, version = words if version[:5] != 'HTTP/': self.send_error(400, "Bad request version (%r)" % version) return False @@ -304,7 +301,7 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): "Invalid HTTP Version (%s)" % base_version_number) return False elif len(words) == 2: - [command, path] = words + command, path = words self.close_connection = 1 if command != 'GET': self.send_error(400, @@ -331,6 +328,30 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): elif (conntype.lower() == 'keep-alive' and self.protocol_version >= "HTTP/1.1"): self.close_connection = 0 + # Examine the headers and look for an Expect directive + expect = self.headers.get('Expect', "") + if (expect.lower() == "100-continue" and + self.protocol_version >= "HTTP/1.1" and + self.request_version >= "HTTP/1.1"): + if not self.handle_expect_100(): + return False + return True + + def handle_expect_100(self): + """Decide what to do with an "Expect: 100-continue" header. + + If the client is expecting a 100 Continue response, we must + respond with either a 100 Continue or a final response before + waiting for the request body. The default is to always respond + with a 100 Continue. You can behave differently (for example, + reject unauthorized requests) by overriding this method. + + This method should either return True (possibly after sending + a 100 Continue response) or send an error response and return + False. + + """ + self.send_response_only(100) return True def handle_one_request(self): @@ -341,24 +362,32 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): commands such as GET and POST. """ - self.raw_requestline = self.rfile.readline(65537) - if len(self.raw_requestline) > 65536: - self.requestline = '' - self.request_version = '' - self.command = '' - self.send_error(414) - return - if not self.raw_requestline: + try: + self.raw_requestline = self.rfile.readline(65537) + if len(self.raw_requestline) > 65536: + self.requestline = '' + self.request_version = '' + self.command = '' + self.send_error(414) + return + if not self.raw_requestline: + self.close_connection = 1 + return + if not self.parse_request(): + # An error code has been sent, just exit + return + mname = 'do_' + self.command + if not hasattr(self, mname): + self.send_error(501, "Unsupported method (%r)" % self.command) + return + method = getattr(self, mname) + method() + self.wfile.flush() #actually send the response if not already done. + except socket.timeout as e: + #a read or a write timed out. Discard this connection + self.log_error("Request timed out: %r", e) self.close_connection = 1 return - if not self.parse_request(): # An error code has been sent, just exit - return - mname = 'do_' + self.command - if not hasattr(self, mname): - self.send_error(501, "Unsupported method (%r)" % self.command) - return - method = getattr(self, mname) - method() def handle(self): """Handle multiple requests if necessary.""" @@ -407,6 +436,12 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): """ self.log_request(code) + self.send_response_only(code, message) + self.send_header('Server', self.version_string()) + self.send_header('Date', self.date_time_string()) + + def send_response_only(self, code, message=None): + """Send the response header only.""" if message is None: if code in self.responses: message = self.responses[code][0] @@ -414,15 +449,15 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): message = '' if self.request_version != 'HTTP/0.9': self.wfile.write(("%s %d %s\r\n" % - (self.protocol_version, code, message)).encode('ASCII', 'strict')) - # print (self.protocol_version, code, message) - self.send_header('Server', self.version_string()) - self.send_header('Date', self.date_time_string()) + (self.protocol_version, code, message)).encode('latin1', 'strict')) def send_header(self, keyword, value): """Send a MIME header.""" if self.request_version != 'HTTP/0.9': - self.wfile.write(("%s: %s\r\n" % (keyword, value)).encode('ASCII', 'strict')) + if not hasattr(self, '_headers_buffer'): + self._headers_buffer = [] + self._headers_buffer.append( + ("%s: %s\r\n" % (keyword, value)).encode('latin1', 'strict')) if keyword.lower() == 'connection': if value.lower() == 'close': @@ -433,7 +468,9 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): def end_headers(self): """Send the blank line ending the MIME headers.""" if self.request_version != 'HTTP/0.9': - self.wfile.write(b"\r\n") + self._headers_buffer.append(b"\r\n") + self.wfile.write(b"".join(self._headers_buffer)) + self._headers_buffer = [] def log_request(self, code='-', size='-'): """Log an accepted request. @@ -471,13 +508,13 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): specified as subsequent arguments (it's just like printf!). - The client host and current date/time are prefixed to + The client ip and current date/time are prefixed to every message. """ sys.stderr.write("%s - - [%s] %s\n" % - (self.address_string(), + (self.client_address[0], self.log_date_time_string(), format%args)) @@ -684,7 +721,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): return None list.sort(key=lambda a: a.lower()) r = [] - displaypath = cgi.escape(urllib.parse.unquote(self.path)) + displaypath = html.escape(urllib.parse.unquote(self.path)) r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">') r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath) r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath) @@ -700,7 +737,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): displayname = name + "@" # Note: a link to a directory displays with @ and links with / r.append('<li><a href="%s">%s</a>\n' - % (urllib.parse.quote(linkname), cgi.escape(displayname))) + % (urllib.parse.quote(linkname), html.escape(displayname))) r.append("</ul>\n<hr>\n</body>\n</html>\n") enc = sys.getfilesystemencoding() encoded = ''.join(r).encode(enc) @@ -788,44 +825,47 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): # Utilities for CGIHTTPRequestHandler -# TODO(gregory.p.smith): Move this into an appropriate library. -def _url_collapse_path_split(path): +def _url_collapse_path(path): """ Given a URL path, remove extra '/'s and '.' path elements and collapse - any '..' references. + any '..' references and returns a colllapsed path. Implements something akin to RFC-2396 5.2 step 6 to parse relative paths. + The utility of this function is limited to is_cgi method and helps + preventing some security attacks. Returns: A tuple of (head, tail) where tail is everything after the final / and head is everything before it. Head will always start with a '/' and, if it contains anything else, never have a trailing '/'. Raises: IndexError if too many '..' occur within the path. + """ # Similar to os.path.split(os.path.normpath(path)) but specific to URL # path semantics rather than local operating system semantics. - path_parts = [] - for part in path.split('/'): - if part == '.': - path_parts.append('') - else: - path_parts.append(part) - # Filter out blank non trailing parts before consuming the '..'. - path_parts = [part for part in path_parts[:-1] if part] + path_parts[-1:] + path_parts = path.split('/') + head_parts = [] + for part in path_parts[:-1]: + if part == '..': + head_parts.pop() # IndexError if more '..' than prior parts + elif part and part != '.': + head_parts.append( part ) if path_parts: tail_part = path_parts.pop() + if tail_part: + if tail_part == '..': + head_parts.pop() + tail_part = '' + elif tail_part == '.': + tail_part = '' else: tail_part = '' - head_parts = [] - for part in path_parts: - if part == '..': - head_parts.pop() - else: - head_parts.append(part) - if tail_part and tail_part == '..': - head_parts.pop() - tail_part = '' - return ('/' + '/'.join(head_parts), tail_part) + + splitpath = ('/' + '/'.join(head_parts), tail_part) + collapsed_path = "/".join(splitpath) + + return collapsed_path + nobody = None @@ -842,7 +882,7 @@ def nobody_uid(): try: nobody = pwd.getpwnam('nobody')[2] except KeyError: - nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) + nobody = 1 + max(x[2] for x in pwd.getpwall()) return nobody @@ -906,13 +946,15 @@ class CGIHTTPRequestHandler(SimpleHTTPRequestHandler): (and the next character is a '/' or the end of the string). """ - - splitpath = _url_collapse_path_split(self.path) - if splitpath[0] in self.cgi_directories: - self.cgi_info = splitpath + collapsed_path = _url_collapse_path(self.path) + dir_sep = collapsed_path.find('/', 1) + head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] + if head in self.cgi_directories: + self.cgi_info = head, tail return True return False + cgi_directories = ['/cgi-bin', '/htbin'] def is_executable(self, path): |