diff options
Diffstat (limited to 'Lib/http/client.py')
-rw-r--r-- | Lib/http/client.py | 282 |
1 files changed, 126 insertions, 156 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py index 4a65125..5466d06 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -71,6 +71,7 @@ import email.message import io import os import socket +import collections from urllib.parse import urlsplit import warnings @@ -257,13 +258,10 @@ def parse_headers(fp, _class=HTTPMessage): hstring = b''.join(headers).decode('iso-8859-1') return email.parser.Parser(_class=_class).parsestr(hstring) -class HTTPResponse(io.RawIOBase): - # strict: If true, raise BadStatusLine if the status line can't be - # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is - # false because it prevents clients from talking to HTTP/0.9 - # servers. Note that a response with a sufficiently corrupted - # status line will look like an HTTP/0.9 response. +_strict_sentinel = object() + +class HTTPResponse(io.RawIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. @@ -272,7 +270,7 @@ class HTTPResponse(io.RawIOBase): # text following RFC 2047. The basic status line parsing only # accepts iso-8859-1. - def __init__(self, sock, debuglevel=0, strict=0, method=None, url=None): + def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): # If the response includes a content-length header, we need to # make sure that the client doesn't read more than the # specified number of bytes. If it does, it will block until @@ -282,7 +280,10 @@ class HTTPResponse(io.RawIOBase): # clients unless they know what they are doing. self.fp = sock.makefile("rb") self.debuglevel = debuglevel - self.strict = strict + if strict is not _strict_sentinel: + warnings.warn("the 'strict' argument isn't supported anymore; " + "http.client now always assumes HTTP/1.x compliant servers.", + DeprecationWarning, 2) self._method = method # The HTTPResponse object is returned via urllib. The clients @@ -304,8 +305,9 @@ class HTTPResponse(io.RawIOBase): self.will_close = _UNKNOWN # conn will close at end of response def _read_status(self): - # Initialize with Simple-Response defaults. - line = str(self.fp.readline(), "iso-8859-1") + line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") + if len(line) > _MAXLINE: + raise LineTooLong("status line") if self.debuglevel > 0: print("reply:", repr(line)) if not line: @@ -313,25 +315,17 @@ class HTTPResponse(io.RawIOBase): # sending a valid response. raise BadStatusLine(line) try: - [version, status, reason] = line.split(None, 2) + version, status, reason = line.split(None, 2) except ValueError: try: - [version, status] = line.split(None, 1) + version, status = line.split(None, 1) reason = "" except ValueError: - # empty version will cause next test to fail and status - # will be treated as 0.9 response. + # empty version will cause next test to fail. version = "" if not version.startswith("HTTP/"): - if self.strict: - self.close() - raise BadStatusLine(line) - else: - # Assume it's a Simple-Response from an 0.9 server. - # We have to convert the first line back to raw bytes - # because self.fp.readline() needs to return bytes. - self.fp = LineAndFileWrapper(bytes(line, "ascii"), self.fp) - return "HTTP/0.9", 200, "" + self._close_conn() + raise BadStatusLine(line) # The status code is a three-digit number try: @@ -365,22 +359,14 @@ class HTTPResponse(io.RawIOBase): self.code = self.status = status self.reason = reason.strip() - if version == "HTTP/1.0": + if version in ("HTTP/1.0", "HTTP/0.9"): + # Some servers might still return "0.9", treat it as 1.0 anyway self.version = 10 elif version.startswith("HTTP/1."): self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 - elif version == "HTTP/0.9": - self.version = 9 else: raise UnknownProtocol(version) - if self.version == 9: - self.length = None - self.chunked = False - self.will_close = True - self.headers = self.msg = email.message_from_string('') - return - self.headers = self.msg = parse_headers(self.fp) if self.debuglevel > 0: @@ -460,22 +446,25 @@ class HTTPResponse(io.RawIOBase): # otherwise, assume it will close return True + def _close_conn(self): + fp = self.fp + self.fp = None + fp.close() + def close(self): + super().close() # set "closed" flag if self.fp: - self.fp.close() - self.fp = None + self._close_conn() # These implementations are for the benefit of io.BufferedReader. # XXX This class should probably be revised to act more like # the "raw stream" that BufferedReader expects. - @property - def closed(self): - return self.isclosed() - def flush(self): - self.fp.flush() + super().flush() + if self.fp: + self.fp.flush() def readable(self): return True @@ -483,6 +472,7 @@ class HTTPResponse(io.RawIOBase): # End of "raw stream" methods def isclosed(self): + """True if the connection is closed.""" # NOTE: it is possible that we will not ever call self.close(). This # case occurs when will_close is TRUE, length is None, and we # read up to the last byte, but NOT past it. @@ -496,7 +486,7 @@ class HTTPResponse(io.RawIOBase): return b"" if self._method == "HEAD": - self.close() + self._close_conn() return b"" if self.chunked: @@ -507,9 +497,13 @@ class HTTPResponse(io.RawIOBase): if self.length is None: s = self.fp.read() else: - s = self._safe_read(self.length) + try: + s = self._safe_read(self.length) + except IncompleteRead: + self._close_conn() + raise self.length = 0 - self.close() # we read everything + self._close_conn() # we read everything return s if self.length is not None: @@ -521,10 +515,15 @@ class HTTPResponse(io.RawIOBase): # connection, and the user is reading more bytes than will be provided # (for example, reading in 1k chunks) s = self.fp.read(amt) - if self.length is not None: + if not s: + # Ideally, we would raise IncompleteRead if the content-length + # wasn't satisfied, but it might break compatibility. + self._close_conn() + elif self.length is not None: self.length -= len(s) if not self.length: - self.close() + self._close_conn() + return s def _read_chunked(self, amt): @@ -544,7 +543,7 @@ class HTTPResponse(io.RawIOBase): except ValueError: # close the connection as protocol synchronisation is # probably lost - self.close() + self._close_conn() raise IncompleteRead(b''.join(value)) if chunk_left == 0: break @@ -577,11 +576,11 @@ class HTTPResponse(io.RawIOBase): # a vanishingly small number of sites EOF without # sending the trailer break - if line == b"\r\n": + if line in (b'\r\n', b'\n', b''): break # we read everything; close the "file" - self.close() + self._close_conn() return b''.join(value) @@ -651,11 +650,15 @@ class HTTPConnection: default_port = HTTP_PORT auto_open = 1 debuglevel = 0 - strict = 0 - def __init__(self, host, port=None, strict=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT): + def __init__(self, host, port=None, strict=_strict_sentinel, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): + if strict is not _strict_sentinel: + warnings.warn("the 'strict' argument isn't supported anymore; " + "http.client now always assumes HTTP/1.x compliant servers.", + DeprecationWarning, 2) self.timeout = timeout + self.source_address = source_address self.sock = None self._buffer = [] self.__response = None @@ -666,10 +669,13 @@ class HTTPConnection: self._tunnel_headers = {} self._set_hostport(host, port) - if strict is not None: - self.strict = strict - def _set_tunnel(self, host, port=None, headers=None): + def set_tunnel(self, host, port=None, headers=None): + """ Sets up the host and the port for the HTTP CONNECT Tunnelling. + + The headers argument should be a mapping of extra HTTP headers + to send with the CONNECT request. + """ self._tunnel_host = host self._tunnel_port = port if headers: @@ -685,7 +691,10 @@ class HTTPConnection: try: port = int(host[i+1:]) except ValueError: - raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) + if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ + port = self.default_port + else: + raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) host = host[:i] else: port = self.default_port @@ -704,12 +713,11 @@ class HTTPConnection: self.send(connect_bytes) for header, value in self._tunnel_headers.items(): header_str = "%s: %s\r\n" % (header, value) - header_bytes = header_str.encode("ascii") + header_bytes = header_str.encode("latin1") self.send(header_bytes) self.send(b'\r\n') - response = self.response_class(self.sock, strict = self.strict, - method = self._method) + response = self.response_class(self.sock, method=self._method) (version, code, message) = response._read_status() if code != 200: @@ -720,13 +728,16 @@ class HTTPConnection: line = response.fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: raise LineTooLong("header line") - if line == b'\r\n': + if not line: + # for sites which EOF without sending a trailer + break + if line in (b'\r\n', b'\n', b''): break def connect(self): """Connect to the host and port specified in __init__.""" self.sock = socket.create_connection((self.host,self.port), - self.timeout) + self.timeout, self.source_address) if self._tunnel_host: self._tunnel() @@ -741,18 +752,17 @@ class HTTPConnection: self.__state = _CS_IDLE def send(self, data): - """Send `data' to the server.""" + """Send `data' to the server. + ``data`` can be a string object, a bytes object, an array object, a + file-like object that supports a .read() method, or an iterable object. + """ + if self.sock is None: if self.auto_open: self.connect() else: raise NotConnected() - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - # - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. if self.debuglevel > 0: print("send:", repr(data)) blocksize = 8192 @@ -778,8 +788,16 @@ class HTTPConnection: if encode: datablock = datablock.encode("iso-8859-1") self.sock.sendall(datablock) - else: + + try: self.sock.sendall(data) + except TypeError: + if isinstance(data, collections.Iterable): + for d in data: + self.sock.sendall(d) + else: + raise TypeError("data should be a bytes-like object " + "or an iterable, got %r" % type(data)) def _output(self, s): """Add a line of output to the current request buffer. @@ -938,7 +956,7 @@ class HTTPConnection: values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): - values[i] = one_value.encode('ascii') + values[i] = one_value.encode('latin1') elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') value = b'\r\n\t'.join(values) @@ -948,11 +966,11 @@ class HTTPConnection: def endheaders(self, message_body=None): """Indicate that the last header line has been sent to the server. - This method sends the request to the server. The optional - message_body argument can be used to pass message body - associated with the request. The message body will be sent in - the same packet as the message headers if possible. The - message_body should be a string. + This method sends the request to the server. The optional message_body + argument can be used to pass a message body associated with the + request. The message body will be sent in the same packet as the + message headers if it is a string, otherwise it is sent as a separate + packet. """ if self.__state == _CS_REQ_STARTED: self.__state = _CS_REQ_SENT @@ -992,7 +1010,7 @@ class HTTPConnection: self.putrequest(method, url, **skips) - if body and ('content-length' not in header_names): + if body is not None and ('content-length' not in header_names): self._set_content_length(body) for hdr, value in headers.items(): self.putheader(hdr, value) @@ -1040,11 +1058,9 @@ class HTTPConnection: if self.debuglevel > 0: response = self.response_class(self.sock, self.debuglevel, - strict=self.strict, method=self._method) else: - response = self.response_class(self.sock, strict=self.strict, - method=self._method) + response = self.response_class(self.sock, method=self._method) response.begin() assert response.will_close != _UNKNOWN @@ -1069,30 +1085,50 @@ else: default_port = HTTPS_PORT + # XXX Should key_file and cert_file be deprecated in favour of context? + def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): - HTTPConnection.__init__(self, host, port, strict, timeout) + strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, *, context=None, check_hostname=None): + super(HTTPSConnection, self).__init__(host, port, strict, timeout, + source_address) self.key_file = key_file self.cert_file = cert_file + if context is None: + # Some reasonable defaults + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + will_verify = context.verify_mode != ssl.CERT_NONE + if check_hostname is None: + check_hostname = will_verify + elif check_hostname and not will_verify: + raise ValueError("check_hostname needs a SSL context with " + "either CERT_OPTIONAL or CERT_REQUIRED") + if key_file or cert_file: + context.load_cert_chain(cert_file, key_file) + self._context = context + self._check_hostname = check_hostname def connect(self): "Connect to a host on a given (SSL) port." sock = socket.create_connection((self.host, self.port), - self.timeout) + self.timeout, self.source_address) if self._tunnel_host: self.sock = sock self._tunnel() - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file) - - - def FakeSocket (sock, sslobj): - warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + - "Use the result of ssl.wrap_socket() directly instead.", - DeprecationWarning, stacklevel=2) - return sslobj + server_hostname = self.host if ssl.HAS_SNI else None + self.sock = self._context.wrap_socket(sock, + server_hostname=server_hostname) + try: + if self._check_hostname: + ssl.match_hostname(self.sock.getpeercert(), self.host) + except Exception: + self.sock.shutdown(socket.SHUT_RDWR) + self.sock.close() + raise __all__.append("HTTPSConnection") @@ -1146,6 +1182,8 @@ class ResponseNotReady(ImproperConnectionState): class BadStatusLine(HTTPException): def __init__(self, line): + if not line: + line = repr(line) self.args = line, self.line = line @@ -1156,71 +1194,3 @@ class LineTooLong(HTTPException): # for backwards compatibility error = HTTPException - -class LineAndFileWrapper: - """A limited file-like object for HTTP/0.9 responses.""" - - # The status-line parsing code calls readline(), which normally - # get the HTTP status line. For a 0.9 response, however, this is - # actually the first line of the body! Clients need to get a - # readable file object that contains that line. - - def __init__(self, line, file): - self._line = line - self._file = file - self._line_consumed = 0 - self._line_offset = 0 - self._line_left = len(line) - - def __getattr__(self, attr): - return getattr(self._file, attr) - - def _done(self): - # called when the last byte is read from the line. After the - # call, all read methods are delegated to the underlying file - # object. - self._line_consumed = 1 - self.read = self._file.read - self.readline = self._file.readline - self.readlines = self._file.readlines - - def read(self, amt=None): - if self._line_consumed: - return self._file.read(amt) - assert self._line_left - if amt is None or amt > self._line_left: - s = self._line[self._line_offset:] - self._done() - if amt is None: - return s + self._file.read() - else: - return s + self._file.read(amt - len(s)) - else: - assert amt <= self._line_left - i = self._line_offset - j = i + amt - s = self._line[i:j] - self._line_offset = j - self._line_left -= amt - if self._line_left == 0: - self._done() - return s - - def readline(self): - if self._line_consumed: - return self._file.readline() - assert self._line_left - s = self._line[self._line_offset:] - self._done() - return s - - def readlines(self, size=None): - if self._line_consumed: - return self._file.readlines(size) - assert self._line_left - L = [self._line[self._line_offset:]] - self._done() - if size is None: - return L + self._file.readlines() - else: - return L + self._file.readlines(size) |