summaryrefslogtreecommitdiffstats
path: root/Lib/http/client.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/http/client.py')
-rw-r--r--Lib/http/client.py263
1 files changed, 172 insertions, 91 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py
index e05c84d..1c69dcb 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -70,17 +70,19 @@ import email.parser
import email.message
import io
import os
+import re
import socket
import collections
from urllib.parse import urlsplit
-import warnings
+# HTTPMessage, parse_headers(), and the HTTP status code constants are
+# intentionally omitted for simplicity
__all__ = ["HTTPResponse", "HTTPConnection",
"HTTPException", "NotConnected", "UnknownProtocol",
"UnknownTransferEncoding", "UnimplementedFileMode",
"IncompleteRead", "InvalidURL", "ImproperConnectionState",
"CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
- "BadStatusLine", "error", "responses"]
+ "BadStatusLine", "LineTooLong", "error", "responses"]
HTTP_PORT = 80
HTTPS_PORT = 443
@@ -216,6 +218,38 @@ MAXAMOUNT = 1048576
_MAXLINE = 65536
_MAXHEADERS = 100
+# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
+#
+# VCHAR = %x21-7E
+# obs-text = %x80-FF
+# header-field = field-name ":" OWS field-value OWS
+# field-name = token
+# field-value = *( field-content / obs-fold )
+# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
+# field-vchar = VCHAR / obs-text
+#
+# obs-fold = CRLF 1*( SP / HTAB )
+# ; obsolete line folding
+# ; see Section 3.2.4
+
+# token = 1*tchar
+#
+# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
+# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
+# / DIGIT / ALPHA
+# ; any VCHAR, except delimiters
+#
+# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
+
+# the patterns for both name and value are more leniant than RFC
+# definitions to allow for backwards compatibility
+_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
+_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
+
+# We always set the Content-Length header for these methods because some
+# servers will otherwise respond with a 411
+_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
+
class HTTPMessage(email.message.Message):
# XXX The only usage of this method is in
@@ -271,8 +305,6 @@ def parse_headers(fp, _class=HTTPMessage):
return email.parser.Parser(_class=_class).parsestr(hstring)
-_strict_sentinel = object()
-
class HTTPResponse(io.RawIOBase):
# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
@@ -282,7 +314,7 @@ class HTTPResponse(io.RawIOBase):
# text following RFC 2047. The basic status line parsing only
# accepts iso-8859-1.
- def __init__(self, sock, debuglevel=0, strict=_strict_sentinel, method=None, url=None):
+ def __init__(self, sock, debuglevel=0, method=None, url=None):
# If the response includes a content-length header, we need to
# make sure that the client doesn't read more than the
# specified number of bytes. If it does, it will block until
@@ -292,10 +324,6 @@ class HTTPResponse(io.RawIOBase):
# clients unless they know what they are doing.
self.fp = sock.makefile("rb")
self.debuglevel = debuglevel
- if strict is not _strict_sentinel:
- warnings.warn("the 'strict' argument isn't supported anymore; "
- "http.client now always assumes HTTP/1.x compliant servers.",
- DeprecationWarning, 2)
self._method = method
# The HTTPResponse object is returned via urllib. The clients
@@ -464,9 +492,11 @@ class HTTPResponse(io.RawIOBase):
fp.close()
def close(self):
- super().close() # set "closed" flag
- if self.fp:
- self._close_conn()
+ try:
+ super().close() # set "closed" flag
+ finally:
+ if self.fp:
+ self._close_conn()
# These implementations are for the benefit of io.BufferedReader.
@@ -732,13 +762,17 @@ class HTTPConnection:
default_port = HTTP_PORT
auto_open = 1
debuglevel = 0
-
- def __init__(self, host, port=None, strict=_strict_sentinel,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
- if strict is not _strict_sentinel:
- warnings.warn("the 'strict' argument isn't supported anymore; "
- "http.client now always assumes HTTP/1.x compliant servers.",
- DeprecationWarning, 2)
+ # TCP Maximum Segment Size (MSS) is determined by the TCP stack on
+ # a per-connection basis. There is no simple and efficient
+ # platform independent mechanism for determining the MSS, so
+ # instead a reasonable estimate is chosen. The getsockopt()
+ # interface using the TCP_MAXSEG parameter may be a suitable
+ # approach on some operating systems. A value of 16KiB is chosen
+ # as a reasonable estimate of the maximum MSS.
+ mss = 16384
+
+ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None):
self.timeout = timeout
self.source_address = source_address
self.sock = None
@@ -750,22 +784,37 @@ class HTTPConnection:
self._tunnel_port = None
self._tunnel_headers = {}
- self._set_hostport(host, port)
+ (self.host, self.port) = self._get_hostport(host, port)
+
+ # This is stored as an instance variable to allow unit
+ # tests to replace it with a suitable mockup
+ self._create_connection = socket.create_connection
def set_tunnel(self, host, port=None, headers=None):
- """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
+ """Set up host and port for HTTP CONNECT tunnelling.
+
+ In a connection that uses HTTP CONNECT tunneling, the host passed to the
+ constructor is used as a proxy server that relays all communication to
+ the endpoint passed to `set_tunnel`. This done by sending an HTTP
+ CONNECT request to the proxy server when the connection is established.
+
+ This method must be called before the HTML connection has been
+ established.
- The headers argument should be a mapping of extra HTTP headers
- to send with the CONNECT request.
+ The headers argument should be a mapping of extra HTTP headers to send
+ with the CONNECT request.
"""
- self._tunnel_host = host
- self._tunnel_port = port
+
+ if self.sock:
+ raise RuntimeError("Can't set up tunnel for established connection")
+
+ self._tunnel_host, self._tunnel_port = self._get_hostport(host, port)
if headers:
self._tunnel_headers = headers
else:
self._tunnel_headers.clear()
- def _set_hostport(self, host, port):
+ def _get_hostport(self, host, port):
if port is None:
i = host.rfind(':')
j = host.rfind(']') # ipv6 addresses have [...]
@@ -782,15 +831,15 @@ class HTTPConnection:
port = self.default_port
if host and host[0] == '[' and host[-1] == ']':
host = host[1:-1]
- self.host = host
- self.port = port
+
+ return (host, port)
def set_debuglevel(self, level):
self.debuglevel = level
def _tunnel(self):
- self._set_hostport(self._tunnel_host, self._tunnel_port)
- connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)
+ connect_str = "CONNECT %s:%d HTTP/1.0\r\n" % (self._tunnel_host,
+ self._tunnel_port)
connect_bytes = connect_str.encode("ascii")
self.send(connect_bytes)
for header, value in self._tunnel_headers.items():
@@ -804,8 +853,8 @@ class HTTPConnection:
if code != 200:
self.close()
- raise socket.error("Tunnel connection failed: %d %s" % (code,
- message.strip()))
+ raise OSError("Tunnel connection failed: %d %s" % (code,
+ message.strip()))
while True:
line = response.fp.readline(_MAXLINE + 1)
if len(line) > _MAXLINE:
@@ -818,20 +867,25 @@ class HTTPConnection:
def connect(self):
"""Connect to the host and port specified in __init__."""
- self.sock = socket.create_connection((self.host,self.port),
- self.timeout, self.source_address)
+ self.sock = self._create_connection((self.host,self.port),
+ self.timeout, self.source_address)
+
if self._tunnel_host:
self._tunnel()
def close(self):
"""Close the connection to the HTTP server."""
- if self.sock:
- self.sock.close() # close it manually... there may be other refs
- self.sock = None
- if self.__response:
- self.__response.close()
- self.__response = None
self.__state = _CS_IDLE
+ try:
+ sock = self.sock
+ if sock:
+ self.sock = None
+ sock.close() # close it manually... there may be other refs
+ finally:
+ response = self.__response
+ if response:
+ self.__response = None
+ response.close()
def send(self, data):
"""Send `data' to the server.
@@ -899,8 +953,11 @@ class HTTPConnection:
del self._buffer[:]
# If msg and message_body are sent in a single send() call,
# it will avoid performance problems caused by the interaction
- # between delayed ack and the Nagle algorithm.
- if isinstance(message_body, bytes):
+ # between delayed ack and the Nagle algorithm. However,
+ # there is no performance gain if the message is larger
+ # than MSS (and there is a memory penalty for the message
+ # copy).
+ if isinstance(message_body, bytes) and len(message_body) < self.mss:
msg += message_body
message_body = None
self.send(msg)
@@ -985,22 +1042,29 @@ class HTTPConnection:
netloc_enc = netloc.encode("idna")
self.putheader('Host', netloc_enc)
else:
+ if self._tunnel_host:
+ host = self._tunnel_host
+ port = self._tunnel_port
+ else:
+ host = self.host
+ port = self.port
+
try:
- host_enc = self.host.encode("ascii")
+ host_enc = host.encode("ascii")
except UnicodeEncodeError:
- host_enc = self.host.encode("idna")
+ host_enc = host.encode("idna")
# As per RFC 273, IPv6 address should be wrapped with []
# when used as Host header
- if self.host.find(':') >= 0:
+ if host.find(':') >= 0:
host_enc = b'[' + host_enc + b']'
- if self.port == self.default_port:
+ if port == self.default_port:
self.putheader('Host', host_enc)
else:
host_enc = host_enc.decode("ascii")
- self.putheader('Host', "%s:%s" % (host_enc, self.port))
+ self.putheader('Host', "%s:%s" % (host_enc, port))
# note: we are assuming that clients will not attempt to set these
# headers since *this* library must deal with the
@@ -1035,12 +1099,20 @@ class HTTPConnection:
if hasattr(header, 'encode'):
header = header.encode('ascii')
+
+ if not _is_legal_header_name(header):
+ raise ValueError('Invalid header name %r' % (header,))
+
values = list(values)
for i, one_value in enumerate(values):
if hasattr(one_value, 'encode'):
values[i] = one_value.encode('latin-1')
elif isinstance(one_value, int):
values[i] = str(one_value).encode('ascii')
+
+ if _is_illegal_header_value(values[i]):
+ raise ValueError('Invalid header value %r' % (values[i],))
+
value = b'\r\n\t'.join(values)
header = header + b': ' + value
self._output(header)
@@ -1064,19 +1136,26 @@ class HTTPConnection:
"""Send a complete request to the server."""
self._send_request(method, url, body, headers)
- def _set_content_length(self, body):
- # Set the content-length based on the body.
+ def _set_content_length(self, body, method):
+ # Set the content-length based on the body. If the body is "empty", we
+ # set Content-Length: 0 for methods that expect a body (RFC 7230,
+ # Section 3.3.2). If the body is set for other methods, we set the
+ # header provided we can figure out what the length is.
thelen = None
- try:
- thelen = str(len(body))
- except TypeError as te:
- # If this is a file-like object, try to
- # fstat its file descriptor
+ method_expects_body = method.upper() in _METHODS_EXPECTING_BODY
+ if body is None and method_expects_body:
+ thelen = '0'
+ elif body is not None:
try:
- thelen = str(os.fstat(body.fileno()).st_size)
- except (AttributeError, OSError):
- # Don't send a length if this failed
- if self.debuglevel > 0: print("Cannot stat!!")
+ thelen = str(len(body))
+ except TypeError:
+ # If this is a file-like object, try to
+ # fstat its file descriptor
+ try:
+ thelen = str(os.fstat(body.fileno()).st_size)
+ except (AttributeError, OSError):
+ # Don't send a length if this failed
+ if self.debuglevel > 0: print("Cannot stat!!")
if thelen is not None:
self.putheader('Content-Length', thelen)
@@ -1092,8 +1171,8 @@ class HTTPConnection:
self.putrequest(method, url, **skips)
- if body is not None and ('content-length' not in header_names):
- self._set_content_length(body)
+ if 'content-length' not in header_names:
+ self._set_content_length(body, method)
for hdr, value in headers.items():
self.putheader(hdr, value)
if isinstance(body, str):
@@ -1144,18 +1223,22 @@ class HTTPConnection:
else:
response = self.response_class(self.sock, method=self._method)
- response.begin()
- assert response.will_close != _UNKNOWN
- self.__state = _CS_IDLE
+ try:
+ response.begin()
+ assert response.will_close != _UNKNOWN
+ self.__state = _CS_IDLE
- if response.will_close:
- # this effectively passes the connection to the response
- self.close()
- else:
- # remember this, so we can tell when it is complete
- self.__response = response
+ if response.will_close:
+ # this effectively passes the connection to the response
+ self.close()
+ else:
+ # remember this, so we can tell when it is complete
+ self.__response = response
- return response
+ return response
+ except:
+ response.close()
+ raise
try:
import ssl
@@ -1170,20 +1253,19 @@ else:
# XXX Should key_file and cert_file be deprecated in favour of context?
def __init__(self, host, port=None, key_file=None, cert_file=None,
- strict=_strict_sentinel, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- source_address=None, *, context=None, check_hostname=None):
- super(HTTPSConnection, self).__init__(host, port, strict, timeout,
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ source_address=None, *, context=None,
+ check_hostname=None):
+ super(HTTPSConnection, self).__init__(host, port, timeout,
source_address)
self.key_file = key_file
self.cert_file = cert_file
if context is None:
- # Some reasonable defaults
- context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
- context.options |= ssl.OP_NO_SSLv2
+ context = ssl._create_default_https_context()
will_verify = context.verify_mode != ssl.CERT_NONE
if check_hostname is None:
- check_hostname = will_verify
- elif check_hostname and not will_verify:
+ check_hostname = context.check_hostname
+ if check_hostname and not will_verify:
raise ValueError("check_hostname needs a SSL context with "
"either CERT_OPTIONAL or CERT_REQUIRED")
if key_file or cert_file:
@@ -1194,23 +1276,22 @@ else:
def connect(self):
"Connect to a host on a given (SSL) port."
- sock = socket.create_connection((self.host, self.port),
- self.timeout, self.source_address)
+ super().connect()
if self._tunnel_host:
- self.sock = sock
- self._tunnel()
+ server_hostname = self._tunnel_host
+ else:
+ server_hostname = self.host
- server_hostname = self.host if ssl.HAS_SNI else None
- self.sock = self._context.wrap_socket(sock,
+ self.sock = self._context.wrap_socket(self.sock,
server_hostname=server_hostname)
- try:
- if self._check_hostname:
- ssl.match_hostname(self.sock.getpeercert(), self.host)
- except Exception:
- self.sock.shutdown(socket.SHUT_RDWR)
- self.sock.close()
- raise
+ if not self._context.check_hostname and self._check_hostname:
+ try:
+ ssl.match_hostname(self.sock.getpeercert(), server_hostname)
+ except Exception:
+ self.sock.shutdown(socket.SHUT_RDWR)
+ self.sock.close()
+ raise
__all__.append("HTTPSConnection")