From 153365d864c411f6fb523efa752ccb3497d815ca Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 28 Jul 2021 22:27:49 +0900 Subject: [3.9] bpo-42853: Fix http.client fails to download >2GiB data over TLS (GH-27405) Revert "bpo-36050: optimize HTTPResponse.read() (GH-12698)" This reverts commit d6bf6f2d0c83f0c64ce86e7b9340278627798090. --- Lib/http/client.py | 42 ++++++++++++++++------ .../2021-07-28-15-50-59.bpo-42853.8SYiF_.rst | 1 + 2 files changed, 33 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index 9752925..0fd9021 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -105,6 +105,9 @@ globals().update(http.HTTPStatus.__members__) # Mapping status codes to official W3C names responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} +# maximal amount of data to read at one time in _safe_read +MAXAMOUNT = 1048576 + # maximal line length when calling readline(). _MAXLINE = 65536 _MAXHEADERS = 100 @@ -604,24 +607,43 @@ class HTTPResponse(io.BufferedIOBase): raise IncompleteRead(bytes(b[0:total_bytes])) def _safe_read(self, amt): - """Read the number of bytes requested. + """Read the number of bytes requested, compensating for partial reads. + + Normally, we have a blocking socket, but a read() can be interrupted + by a signal (resulting in a partial read). + + Note that we cannot distinguish between EOF and an interrupt when zero + bytes have been read. IncompleteRead() will be raised in this + situation. This function should be used when bytes "should" be present for reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - data = self.fp.read(amt) - if len(data) < amt: - raise IncompleteRead(data, amt-len(data)) - return data + s = [] + while amt > 0: + chunk = self.fp.read(min(amt, MAXAMOUNT)) + if not chunk: + raise IncompleteRead(b''.join(s), amt) + s.append(chunk) + amt -= len(chunk) + return b"".join(s) def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer.""" - amt = len(b) - n = self.fp.readinto(b) - if n < amt: - raise IncompleteRead(bytes(b[:n]), amt-n) - return n + total_bytes = 0 + mvb = memoryview(b) + while total_bytes < len(b): + if MAXAMOUNT < len(mvb): + temp_mvb = mvb[0:MAXAMOUNT] + n = self.fp.readinto(temp_mvb) + else: + n = self.fp.readinto(mvb) + if not n: + raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) + mvb = mvb[n:] + total_bytes += n + return total_bytes def read1(self, n=-1): """Read with at most one underlying system call. If at least one diff --git a/Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst b/Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst new file mode 100644 index 0000000..aaf8af0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-07-28-15-50-59.bpo-42853.8SYiF_.rst @@ -0,0 +1 @@ +Fix ``http.client.HTTPSConnection`` fails to download >2GiB data. -- cgit v0.12