From d6bf6f2d0c83f0c64ce86e7b9340278627798090 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 6 Apr 2019 18:06:19 +0900 Subject: bpo-36050: optimize HTTPResponse.read() (GH-12698) * No need to chunking for now. * No need to partial read caused by EINTR for now. --- Lib/http/client.py | 42 ++++++---------------- .../2019-04-05-21-29-53.bpo-36050.x9DRKE.rst | 2 ++ 2 files changed, 12 insertions(+), 32 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-04-05-21-29-53.bpo-36050.x9DRKE.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index 1de151c..5a22252 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -105,9 +105,6 @@ globals().update(http.HTTPStatus.__members__) # Mapping status codes to official W3C names responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()} -# maximal amount of data to read at one time in _safe_read -MAXAMOUNT = 1048576 - # maximal line length when calling readline(). _MAXLINE = 65536 _MAXHEADERS = 100 @@ -592,43 +589,24 @@ class HTTPResponse(io.BufferedIOBase): raise IncompleteRead(bytes(b[0:total_bytes])) def _safe_read(self, amt): - """Read the number of bytes requested, compensating for partial reads. - - Normally, we have a blocking socket, but a read() can be interrupted - by a signal (resulting in a partial read). - - Note that we cannot distinguish between EOF and an interrupt when zero - bytes have been read. IncompleteRead() will be raised in this - situation. + """Read the number of bytes requested. This function should be used when bytes "should" be present for reading. If the bytes are truly not available (due to EOF), then the IncompleteRead exception can be used to detect the problem. """ - s = [] - while amt > 0: - chunk = self.fp.read(min(amt, MAXAMOUNT)) - if not chunk: - raise IncompleteRead(b''.join(s), amt) - s.append(chunk) - amt -= len(chunk) - return b"".join(s) + data = self.fp.read(amt) + if len(data) < amt: + raise IncompleteRead(data, amt-len(data)) + return data def _safe_readinto(self, b): """Same as _safe_read, but for reading into a buffer.""" - total_bytes = 0 - mvb = memoryview(b) - while total_bytes < len(b): - if MAXAMOUNT < len(mvb): - temp_mvb = mvb[0:MAXAMOUNT] - n = self.fp.readinto(temp_mvb) - else: - n = self.fp.readinto(mvb) - if not n: - raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) - mvb = mvb[n:] - total_bytes += n - return total_bytes + amt = len(b) + n = self.fp.readinto(b) + if n < amt: + raise IncompleteRead(bytes(b[:n]), amt-n) + return n def read1(self, n=-1): """Read with at most one underlying system call. If at least one diff --git a/Misc/NEWS.d/next/Library/2019-04-05-21-29-53.bpo-36050.x9DRKE.rst b/Misc/NEWS.d/next/Library/2019-04-05-21-29-53.bpo-36050.x9DRKE.rst new file mode 100644 index 0000000..92318f8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-04-05-21-29-53.bpo-36050.x9DRKE.rst @@ -0,0 +1,2 @@ +Optimized ``http.client.HTTPResponse.read()`` for large response. Patch by +Inada Naoki. -- cgit v0.12