diff options
Diffstat (limited to 'Lib/http/client.py')
| -rw-r--r-- | Lib/http/client.py | 263 |
1 files changed, 172 insertions, 91 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py index e05c84d..1c69dcb 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -70,17 +70,19 @@ import email.parser import email.message import io import os +import re import socket import collections from urllib.parse import urlsplit -import warnings +# HTTPMessage, parse_headers(), and the HTTP status code constants are +# intentionally omitted for simplicity __all__ = ["HTTPResponse", "HTTPConnection", "HTTPException", "NotConnected", "UnknownProtocol", "UnknownTransferEncoding", "UnimplementedFileMode", "IncompleteRead", "InvalidURL", "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "error", "responses"] + "BadStatusLine", "LineTooLong", "error", "responses"] HTTP_PORT = 80 HTTPS_PORT = 443 @@ -216,6 +218,38 @@ MAXAMOUNT = 1048576 _MAXLINE = 65536 _MAXHEADERS = 100 +# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2) +# +# VCHAR = %x21-7E +# obs-text = %x80-FF +# header-field = field-name ":" OWS field-value OWS +# field-name = token +# field-value = *( field-content / obs-fold ) +# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] +# field-vchar = VCHAR / obs-text +# +# obs-fold = CRLF 1*( SP / HTAB ) +# ; obsolete line folding +# ; see Section 3.2.4 + +# token = 1*tchar +# +# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" +# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" +# / DIGIT / ALPHA +# ; any VCHAR, except delimiters +# +# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1 + +# the patterns for both name and value are more leniant than RFC +# definitions to allow for backwards compatibility +_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch +_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search + +# We always set the Content-Length header for these methods because some +# servers will otherwise respond with a 411 +_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'} + class HTTPMessage(email.message.Message): # XXX The only usage of this method is in @@ -271,8 +305,6 @@ def parse_headers(fp, _class=HTTPMessage): return email.parser.Parser(_class=_class).parsestr(hstring) -_strict_sentinel = object() - class HTTPResponse(io.RawIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. @@ -282,7 +314,7 @@ class HTTPResponse(io.RawIOBase): # text following RFC 2047. The basic status line parsing only # accepts iso-8859-1. - def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None): + def __init__(self, sock, debuglevel=0, method=None, url=None): # If the response includes a content-length header, we need to # make sure that the client doesn't read more than the # specified number of bytes. If it does, it will block until @@ -292,10 +324,6 @@ class HTTPResponse(io.RawIOBase): # clients unless they know what they are doing. self.fp = sock.makefile("rb") self.debuglevel = debuglevel - if strict is not _strict_sentinel: - warnings.warn("the 'strict' argument isn't supported anymore; " - "http.client now always assumes HTTP/1.x compliant servers.", - DeprecationWarning, 2) self._method = method # The HTTPResponse object is returned via urllib. The clients @@ -464,9 +492,11 @@ class HTTPResponse(io.RawIOBase): fp.close() def close(self): - super().close() # set "closed" flag - if self.fp: - self._close_conn() + try: + super().close() # set "closed" flag + finally: + if self.fp: + self._close_conn() # These implementations are for the benefit of io.BufferedReader. @@ -732,13 +762,17 @@ class HTTPConnection: default_port = HTTP_PORT auto_open = 1 debuglevel = 0 - - def __init__(self, host, port=None, strict=_strict_sentinel, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): - if strict is not _strict_sentinel: - warnings.warn("the 'strict' argument isn't supported anymore; " - "http.client now always assumes HTTP/1.x compliant servers.", - DeprecationWarning, 2) + # TCP Maximum Segment Size (MSS) is determined by the TCP stack on + # a per-connection basis. There is no simple and efficient + # platform independent mechanism for determining the MSS, so + # instead a reasonable estimate is chosen. The getsockopt() + # interface using the TCP_MAXSEG parameter may be a suitable + # approach on some operating systems. A value of 16KiB is chosen + # as a reasonable estimate of the maximum MSS. + mss = 16384 + + def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None): self.timeout = timeout self.source_address = source_address self.sock = None @@ -750,22 +784,37 @@ class HTTPConnection: self._tunnel_port = None self._tunnel_headers = {} - self._set_hostport(host, port) + (self.host, self.port) = self._get_hostport(host, port) + + # This is stored as an instance variable to allow unit + # tests to replace it with a suitable mockup + self._create_connection = socket.create_connection def set_tunnel(self, host, port=None, headers=None): - """ Sets up the host and the port for the HTTP CONNECT Tunnelling. + """Set up host and port for HTTP CONNECT tunnelling. + + In a connection that uses HTTP CONNECT tunneling, the host passed to the + constructor is used as a proxy server that relays all communication to + the endpoint passed to `set_tunnel`. This done by sending an HTTP + CONNECT request to the proxy server when the connection is established. + + This method must be called before the HTML connection has been + established. - The headers argument should be a mapping of extra HTTP headers - to send with the CONNECT request. + The headers argument should be a mapping of extra HTTP headers to send + with the CONNECT request. """ - self._tunnel_host = host - self._tunnel_port = port + + if self.sock: + raise RuntimeError("Can't set up tunnel for established connection") + + self._tunnel_host, self._tunnel_port = self._get_hostport(host, port) if headers: self._tunnel_headers = headers else: self._tunnel_headers.clear() - def _set_hostport(self, host, port): + def _get_hostport(self, host, port): if port is None: i = host.rfind(':') j = host.rfind(']') # ipv6 addresses have [...] @@ -782,15 +831,15 @@ class HTTPConnection: port = self.default_port if host and host[0] == '[' and host[-1] == ']': host = host[1:-1] - self.host = host - self.port = port + + return (host, port) def set_debuglevel(self, level): self.debuglevel = level def _tunnel(self): - self._set_hostport(self._tunnel_host, self._tunnel_port) - connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port) + connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host, + self._tunnel_port) connect_bytes = connect_str.encode("ascii") self.send(connect_bytes) for header, value in self._tunnel_headers.items(): @@ -804,8 +853,8 @@ class HTTPConnection: if code != 200: self.close() - raise socket.error("Tunnel connection failed: %d %s" % (code, - message.strip())) + raise OSError("Tunnel connection failed: %d %s" % (code, + message.strip())) while True: line = response.fp.readline(_MAXLINE + 1) if len(line) > _MAXLINE: @@ -818,20 +867,25 @@ class HTTPConnection: def connect(self): """Connect to the host and port specified in __init__.""" - self.sock = socket.create_connection((self.host,self.port), - self.timeout, self.source_address) + self.sock = self._create_connection((self.host,self.port), + self.timeout, self.source_address) + if self._tunnel_host: self._tunnel() def close(self): """Close the connection to the HTTP server.""" - if self.sock: - self.sock.close() # close it manually... there may be other refs - self.sock = None - if self.__response: - self.__response.close() - self.__response = None self.__state = _CS_IDLE + try: + sock = self.sock + if sock: + self.sock = None + sock.close() # close it manually... there may be other refs + finally: + response = self.__response + if response: + self.__response = None + response.close() def send(self, data): """Send `data' to the server. @@ -899,8 +953,11 @@ class HTTPConnection: del self._buffer[:] # If msg and message_body are sent in a single send() call, # it will avoid performance problems caused by the interaction - # between delayed ack and the Nagle algorithm. - if isinstance(message_body, bytes): + # between delayed ack and the Nagle algorithm. However, + # there is no performance gain if the message is larger + # than MSS (and there is a memory penalty for the message + # copy). + if isinstance(message_body, bytes) and len(message_body) < self.mss: msg += message_body message_body = None self.send(msg) @@ -985,22 +1042,29 @@ class HTTPConnection: netloc_enc = netloc.encode("idna") self.putheader('Host', netloc_enc) else: + if self._tunnel_host: + host = self._tunnel_host + port = self._tunnel_port + else: + host = self.host + port = self.port + try: - host_enc = self.host.encode("ascii") + host_enc = host.encode("ascii") except UnicodeEncodeError: - host_enc = self.host.encode("idna") + host_enc = host.encode("idna") # As per RFC 273, IPv6 address should be wrapped with [] # when used as Host header - if self.host.find(':') >= 0: + if host.find(':') >= 0: host_enc = b'[' + host_enc + b']' - if self.port == self.default_port: + if port == self.default_port: self.putheader('Host', host_enc) else: host_enc = host_enc.decode("ascii") - self.putheader('Host', "%s:%s" % (host_enc, self.port)) + self.putheader('Host', "%s:%s" % (host_enc, port)) # note: we are assuming that clients will not attempt to set these # headers since *this* library must deal with the @@ -1035,12 +1099,20 @@ class HTTPConnection: if hasattr(header, 'encode'): header = header.encode('ascii') + + if not _is_legal_header_name(header): + raise ValueError('Invalid header name %r' % (header,)) + values = list(values) for i, one_value in enumerate(values): if hasattr(one_value, 'encode'): values[i] = one_value.encode('latin-1') elif isinstance(one_value, int): values[i] = str(one_value).encode('ascii') + + if _is_illegal_header_value(values[i]): + raise ValueError('Invalid header value %r' % (values[i],)) + value = b'\r\n\t'.join(values) header = header + b': ' + value self._output(header) @@ -1064,19 +1136,26 @@ class HTTPConnection: """Send a complete request to the server.""" self._send_request(method, url, body, headers) - def _set_content_length(self, body): - # Set the content-length based on the body. + def _set_content_length(self, body, method): + # Set the content-length based on the body. If the body is "empty", we + # set Content-Length: 0 for methods that expect a body (RFC 7230, + # Section 3.3.2). If the body is set for other methods, we set the + # header provided we can figure out what the length is. thelen = None - try: - thelen = str(len(body)) - except TypeError as te: - # If this is a file-like object, try to - # fstat its file descriptor + method_expects_body = method.upper() in _METHODS_EXPECTING_BODY + if body is None and method_expects_body: + thelen = '0' + elif body is not None: try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print("Cannot stat!!") + thelen = str(len(body)) + except TypeError: + # If this is a file-like object, try to + # fstat its file descriptor + try: + thelen = str(os.fstat(body.fileno()).st_size) + except (AttributeError, OSError): + # Don't send a length if this failed + if self.debuglevel > 0: print("Cannot stat!!") if thelen is not None: self.putheader('Content-Length', thelen) @@ -1092,8 +1171,8 @@ class HTTPConnection: self.putrequest(method, url, **skips) - if body is not None and ('content-length' not in header_names): - self._set_content_length(body) + if 'content-length' not in header_names: + self._set_content_length(body, method) for hdr, value in headers.items(): self.putheader(hdr, value) if isinstance(body, str): @@ -1144,18 +1223,22 @@ class HTTPConnection: else: response = self.response_class(self.sock, method=self._method) - response.begin() - assert response.will_close != _UNKNOWN - self.__state = _CS_IDLE + try: + response.begin() + assert response.will_close != _UNKNOWN + self.__state = _CS_IDLE - if response.will_close: - # this effectively passes the connection to the response - self.close() - else: - # remember this, so we can tell when it is complete - self.__response = response + if response.will_close: + # this effectively passes the connection to the response + self.close() + else: + # remember this, so we can tell when it is complete + self.__response = response - return response + return response + except: + response.close() + raise try: import ssl @@ -1170,20 +1253,19 @@ else: # XXX Should key_file and cert_file be deprecated in favour of context? def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, *, context=None, check_hostname=None): - super(HTTPSConnection, self).__init__(host, port, strict, timeout, + timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + source_address=None, *, context=None, + check_hostname=None): + super(HTTPSConnection, self).__init__(host, port, timeout, source_address) self.key_file = key_file self.cert_file = cert_file if context is None: - # Some reasonable defaults - context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - context.options |= ssl.OP_NO_SSLv2 + context = ssl._create_default_https_context() will_verify = context.verify_mode != ssl.CERT_NONE if check_hostname is None: - check_hostname = will_verify - elif check_hostname and not will_verify: + check_hostname = context.check_hostname + if check_hostname and not will_verify: raise ValueError("check_hostname needs a SSL context with " "either CERT_OPTIONAL or CERT_REQUIRED") if key_file or cert_file: @@ -1194,23 +1276,22 @@ else: def connect(self): "Connect to a host on a given (SSL) port." - sock = socket.create_connection((self.host, self.port), - self.timeout, self.source_address) + super().connect() if self._tunnel_host: - self.sock = sock - self._tunnel() + server_hostname = self._tunnel_host + else: + server_hostname = self.host - server_hostname = self.host if ssl.HAS_SNI else None - self.sock = self._context.wrap_socket(sock, + self.sock = self._context.wrap_socket(self.sock, server_hostname=server_hostname) - try: - if self._check_hostname: - ssl.match_hostname(self.sock.getpeercert(), self.host) - except Exception: - self.sock.shutdown(socket.SHUT_RDWR) - self.sock.close() - raise + if not self._context.check_hostname and self._check_hostname: + try: + ssl.match_hostname(self.sock.getpeercert(), server_hostname) + except Exception: + self.sock.shutdown(socket.SHUT_RDWR) + self.sock.close() + raise __all__.append("HTTPSConnection") |
