diff options
Diffstat (limited to 'Lib/http/client.py')
-rw-r--r-- | Lib/http/client.py | 358 |
1 files changed, 157 insertions, 201 deletions
diff --git a/Lib/http/client.py b/Lib/http/client.py index 1c69dcb..80c80cf 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -20,10 +20,12 @@ request. This diagram details these state transitions: | ( putheader() )* endheaders() v Request-sent - | - | response = getresponse() - v - Unread-response [Response-headers-read] + |\_____________________________ + | | getresponse() raises + | response = getresponse() | ConnectionError + v v + Unread-response Idle + [Response-headers-read] |\____________________ | | | response.read() | putrequest() @@ -68,6 +70,7 @@ Req-sent-unread-response _CS_REQ_SENT <response_class> import email.parser import email.message +import http import io import os import re @@ -82,7 +85,8 @@ __all__ = ["HTTPResponse", "HTTPConnection", "UnknownTransferEncoding", "UnimplementedFileMode", "IncompleteRead", "InvalidURL", "ImproperConnectionState", "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "LineTooLong", "error", "responses"] + "BadStatusLine", "LineTooLong", "RemoteDisconnected", "error", + "responses"] HTTP_PORT = 80 HTTPS_PORT = 443 @@ -94,122 +98,13 @@ _CS_IDLE = 'Idle' _CS_REQ_STARTED = 'Request-started' _CS_REQ_SENT = 'Request-sent' -# status codes -# informational -CONTINUE = 100 -SWITCHING_PROTOCOLS = 101 -PROCESSING = 102 - -# successful -OK = 200 -CREATED = 201 -ACCEPTED = 202 -NON_AUTHORITATIVE_INFORMATION = 203 -NO_CONTENT = 204 -RESET_CONTENT = 205 -PARTIAL_CONTENT = 206 -MULTI_STATUS = 207 -IM_USED = 226 - -# redirection -MULTIPLE_CHOICES = 300 -MOVED_PERMANENTLY = 301 -FOUND = 302 -SEE_OTHER = 303 -NOT_MODIFIED = 304 -USE_PROXY = 305 -TEMPORARY_REDIRECT = 307 - -# client error -BAD_REQUEST = 400 -UNAUTHORIZED = 401 -PAYMENT_REQUIRED = 402 -FORBIDDEN = 403 -NOT_FOUND = 404 -METHOD_NOT_ALLOWED = 405 -NOT_ACCEPTABLE = 406 -PROXY_AUTHENTICATION_REQUIRED = 407 -REQUEST_TIMEOUT = 408 -CONFLICT = 409 -GONE = 410 -LENGTH_REQUIRED = 411 -PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 -REQUEST_URI_TOO_LONG = 414 -UNSUPPORTED_MEDIA_TYPE = 415 -REQUESTED_RANGE_NOT_SATISFIABLE = 416 -EXPECTATION_FAILED = 417 -UNPROCESSABLE_ENTITY = 422 -LOCKED = 423 -FAILED_DEPENDENCY = 424 -UPGRADE_REQUIRED = 426 -PRECONDITION_REQUIRED = 428 -TOO_MANY_REQUESTS = 429 -REQUEST_HEADER_FIELDS_TOO_LARGE = 431 - -# server error -INTERNAL_SERVER_ERROR = 500 -NOT_IMPLEMENTED = 501 -BAD_GATEWAY = 502 -SERVICE_UNAVAILABLE = 503 -GATEWAY_TIMEOUT = 504 -HTTP_VERSION_NOT_SUPPORTED = 505 -INSUFFICIENT_STORAGE = 507 -NOT_EXTENDED = 510 -NETWORK_AUTHENTICATION_REQUIRED = 511 +# hack to maintain backwards compatibility +globals().update(http.HTTPStatus.__members__) + +# another hack to maintain backwards compatibility # Mapping status codes to official W3C names -responses = { - 100: 'Continue', - 101: 'Switching Protocols', - - 200: 'OK', - 201: 'Created', - 202: 'Accepted', - 203: 'Non-Authoritative Information', - 204: 'No Content', - 205: 'Reset Content', - 206: 'Partial Content', - - 300: 'Multiple Choices', - 301: 'Moved Permanently', - 302: 'Found', - 303: 'See Other', - 304: 'Not Modified', - 305: 'Use Proxy', - 306: '(Unused)', - 307: 'Temporary Redirect', - - 400: 'Bad Request', - 401: 'Unauthorized', - 402: 'Payment Required', - 403: 'Forbidden', - 404: 'Not Found', - 405: 'Method Not Allowed', - 406: 'Not Acceptable', - 407: 'Proxy Authentication Required', - 408: 'Request Timeout', - 409: 'Conflict', - 410: 'Gone', - 411: 'Length Required', - 412: 'Precondition Failed', - 413: 'Request Entity Too Large', - 414: 'Request-URI Too Long', - 415: 'Unsupported Media Type', - 416: 'Requested Range Not Satisfiable', - 417: 'Expectation Failed', - 428: 'Precondition Required', - 429: 'Too Many Requests', - 431: 'Request Header Fields Too Large', - - 500: 'Internal Server Error', - 501: 'Not Implemented', - 502: 'Bad Gateway', - 503: 'Service Unavailable', - 504: 'Gateway Timeout', - 505: 'HTTP Version Not Supported', - 511: 'Network Authentication Required', -} +responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} # maximal amount of data to read at one time in _safe_read MAXAMOUNT = 1048576 @@ -305,7 +200,7 @@ def parse_headers(fp, _class=HTTPMessage): return email.parser.Parser(_class=_class).parsestr(hstring) -class HTTPResponse(io.RawIOBase): +class HTTPResponse(io.BufferedIOBase): # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. @@ -353,7 +248,8 @@ class HTTPResponse(io.RawIOBase): if not line: # Presumably, the server closed the connection before # sending a valid response. - raise BadStatusLine(line) + raise RemoteDisconnected("Remote end closed connection without" + " response") try: version, status, reason = line.split(None, 2) except ValueError: @@ -532,9 +428,10 @@ class HTTPResponse(io.RawIOBase): return b"" if amt is not None: - # Amount is given, so call base class version - # (which is implemented in terms of self.readinto) - return super(HTTPResponse, self).read(amt) + # Amount is given, implement using readinto + b = bytearray(amt) + n = self.readinto(b) + return memoryview(b)[:n].tobytes() else: # Amount is not given (unbounded read) so we must check self.length # and self.chunked @@ -614,71 +511,67 @@ class HTTPResponse(io.RawIOBase): if line in (b'\r\n', b'\n', b''): break + def _get_chunk_left(self): + # return self.chunk_left, reading a new chunk if necessary. + # chunk_left == 0: at the end of the current chunk, need to close it + # chunk_left == None: No current chunk, should read next. + # This function returns non-zero or None if the last chunk has + # been read. + chunk_left = self.chunk_left + if not chunk_left: # Can be 0 or None + if chunk_left is not None: + # We are at the end of chunk. dicard chunk end + self._safe_read(2) # toss the CRLF at the end of the chunk + try: + chunk_left = self._read_next_chunk_size() + except ValueError: + raise IncompleteRead(b'') + if chunk_left == 0: + # last chunk: 1*("0") [ chunk-extension ] CRLF + self._read_and_discard_trailer() + # we read everything; close the "file" + self._close_conn() + chunk_left = None + self.chunk_left = chunk_left + return chunk_left + def _readall_chunked(self): assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left value = [] - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(b''.join(value)) - value.append(self._safe_read(chunk_left)) - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return b''.join(value) + try: + while True: + chunk_left = self._get_chunk_left() + if chunk_left is None: + break + value.append(self._safe_read(chunk_left)) + self.chunk_left = 0 + return b''.join(value) + except IncompleteRead: + raise IncompleteRead(b''.join(value)) def _readinto_chunked(self, b): assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - total_bytes = 0 mvb = memoryview(b) - while True: - if chunk_left is None: - try: - chunk_left = self._read_next_chunk_size() - if chunk_left == 0: - break - except ValueError: - raise IncompleteRead(bytes(b[0:total_bytes])) - - if len(mvb) < chunk_left: - n = self._safe_readinto(mvb) - self.chunk_left = chunk_left - n - return total_bytes + n - elif len(mvb) == chunk_left: - n = self._safe_readinto(mvb) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return total_bytes + n - else: - temp_mvb = mvb[0:chunk_left] + try: + while True: + chunk_left = self._get_chunk_left() + if chunk_left is None: + return total_bytes + + if len(mvb) <= chunk_left: + n = self._safe_readinto(mvb) + self.chunk_left = chunk_left - n + return total_bytes + n + + temp_mvb = mvb[:chunk_left] n = self._safe_readinto(temp_mvb) mvb = mvb[n:] total_bytes += n + self.chunk_left = 0 - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - self._read_and_discard_trailer() - - # we read everything; close the "file" - self._close_conn() - - return total_bytes + except IncompleteRead: + raise IncompleteRead(bytes(b[0:total_bytes])) def _safe_read(self, amt): """Read the number of bytes requested, compensating for partial reads. @@ -719,6 +612,73 @@ class HTTPResponse(io.RawIOBase): total_bytes += n return total_bytes + def read1(self, n=-1): + """Read with at most one underlying system call. If at least one + byte is buffered, return that instead. + """ + if self.fp is None or self._method == "HEAD": + return b"" + if self.chunked: + return self._read1_chunked(n) + try: + result = self.fp.read1(n) + except ValueError: + if n >= 0: + raise + # some implementations, like BufferedReader, don't support -1 + # Read an arbitrarily selected largeish chunk. + result = self.fp.read1(16*1024) + if not result and n: + self._close_conn() + return result + + def peek(self, n=-1): + # Having this enables IOBase.readline() to read more than one + # byte at a time + if self.fp is None or self._method == "HEAD": + return b"" + if self.chunked: + return self._peek_chunked(n) + return self.fp.peek(n) + + def readline(self, limit=-1): + if self.fp is None or self._method == "HEAD": + return b"" + if self.chunked: + # Fallback to IOBase readline which uses peek() and read() + return super().readline(limit) + result = self.fp.readline(limit) + if not result and limit: + self._close_conn() + return result + + def _read1_chunked(self, n): + # Strictly speaking, _get_chunk_left() may cause more than one read, + # but that is ok, since that is to satisfy the chunked protocol. + chunk_left = self._get_chunk_left() + if chunk_left is None or n == 0: + return b'' + if not (0 <= n <= chunk_left): + n = chunk_left # if n is negative or larger than chunk_left + read = self.fp.read1(n) + self.chunk_left -= len(read) + if not read: + raise IncompleteRead(b"") + return read + + def _peek_chunked(self, n): + # Strictly speaking, _get_chunk_left() may cause more than one read, + # but that is ok, since that is to satisfy the chunked protocol. + try: + chunk_left = self._get_chunk_left() + except IncompleteRead: + return b'' # peek doesn't worry about protocol + if chunk_left is None: + return b'' # eof + # peek is allowed to return more than requested. Just request the + # entire chunk, and truncate what we get. + return self.fp.peek(chunk_left)[:chunk_left] + def fileno(self): return self.fp.fileno() @@ -762,14 +722,6 @@ class HTTPConnection: default_port = HTTP_PORT auto_open = 1 debuglevel = 0 - # TCP Maximum Segment Size (MSS) is determined by the TCP stack on - # a per-connection basis. There is no simple and efficient - # platform independent mechanism for determining the MSS, so - # instead a reasonable estimate is chosen. The getsockopt() - # interface using the TCP_MAXSEG parameter may be a suitable - # approach on some operating systems. A value of 16KiB is chosen - # as a reasonable estimate of the maximum MSS. - mss = 16384 def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): @@ -851,7 +803,7 @@ class HTTPConnection: response = self.response_class(self.sock, method=self._method) (version, code, message) = response._read_status() - if code != 200: + if code != http.HTTPStatus.OK: self.close() raise OSError("Tunnel connection failed: %d %s" % (code, message.strip())) @@ -865,10 +817,14 @@ class HTTPConnection: if line in (b'\r\n', b'\n', b''): break + if self.debuglevel > 0: + print('header:', line.decode()) + def connect(self): """Connect to the host and port specified in __init__.""" - self.sock = self._create_connection((self.host,self.port), - self.timeout, self.source_address) + self.sock = self._create_connection( + (self.host,self.port), self.timeout, self.source_address) + self.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) if self._tunnel_host: self._tunnel() @@ -951,19 +907,9 @@ class HTTPConnection: self._buffer.extend((b"", b"")) msg = b"\r\n".join(self._buffer) del self._buffer[:] - # If msg and message_body are sent in a single send() call, - # it will avoid performance problems caused by the interaction - # between delayed ack and the Nagle algorithm. However, - # there is no performance gain if the message is larger - # than MSS (and there is a memory penalty for the message - # copy). - if isinstance(message_body, bytes) and len(message_body) < self.mss: - msg += message_body - message_body = None + self.send(msg) if message_body is not None: - # message_body was not a string (i.e. it is a file), and - # we must run the risk of Nagle. self.send(message_body) def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): @@ -1224,7 +1170,11 @@ class HTTPConnection: response = self.response_class(self.sock, method=self._method) try: - response.begin() + try: + response.begin() + except ConnectionError: + self.close() + raise assert response.will_close != _UNKNOWN self.__state = _CS_IDLE @@ -1327,7 +1277,8 @@ class IncompleteRead(HTTPException): e = ', %i more expected' % self.expected else: e = '' - return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) + return '%s(%i bytes read%s)' % (self.__class__.__name__, + len(self.partial), e) def __str__(self): return repr(self) @@ -1355,5 +1306,10 @@ class LineTooLong(HTTPException): HTTPException.__init__(self, "got more than %d bytes when reading %s" % (_MAXLINE, line_type)) +class RemoteDisconnected(ConnectionResetError, BadStatusLine): + def __init__(self, *pos, **kw): + BadStatusLine.__init__(self, "") + ConnectionResetError.__init__(self, *pos, **kw) + # for backwards compatibility error = HTTPException |