From 38d9643d5e225dd7c342c6ccaae3e6e01f705e91 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 6 Dec 2011 22:33:57 +0100 Subject: Issue #13464: Add a readinto() method to http.client.HTTPResponse. Patch by Jon Kuhn. --- Doc/library/http.client.rst | 6 ++ Lib/http/client.py | 163 +++++++++++++++++++++++++++++++------------- Lib/test/test_httplib.py | 86 +++++++++++++++++++++++ Misc/ACKS | 1 + Misc/NEWS | 3 + 5 files changed, 213 insertions(+), 46 deletions(-) diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index c1ce15b..7fce91f 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -502,6 +502,12 @@ statement. Reads and returns the response body, or up to the next *amt* bytes. +.. method:: HTTPResponse.readinto(b) + + Reads up to the next len(b) bytes of the response body into the buffer *b*. + Returns the number of bytes read. + + .. versionadded:: 3.3 .. method:: HTTPResponse.getheader(name, default=None) diff --git a/Lib/http/client.py b/Lib/http/client.py index 88da550..7062569 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -485,11 +485,17 @@ class HTTPResponse(io.RawIOBase): self.close() return b"" - if self.chunked: - return self._read_chunked(amt) + if amt is not None: + # Amount is given, so call base class version + # (which is implemented in terms of self.readinto) + return super(HTTPResponse, self).read(amt) + else: + # Amount is not given (unbounded read) so we must check self.length + # and self.chunked + + if self.chunked: + return self._readall_chunked() - if amt is None: - # unbounded read if self.length is None: s = self.fp.read() else: @@ -498,78 +504,127 @@ class HTTPResponse(io.RawIOBase): self.close() # we read everything return s + def readinto(self, b): + if self.fp is None: + return 0 + + if self._method == "HEAD": + self.close() + return 0 + + if self.chunked: + return self._readinto_chunked(b) + if self.length is not None: - if amt > self.length: + if len(b) > self.length: # clip the read to the "end of response" - amt = self.length + b = memoryview(b)[0:self.length] # we do not use _safe_read() here because this may be a .will_close # connection, and the user is reading more bytes than will be provided # (for example, reading in 1k chunks) - s = self.fp.read(amt) + n = self.fp.readinto(b) if self.length is not None: - self.length -= len(s) + self.length -= n if not self.length: self.close() - return s + return n + + def _read_next_chunk_size(self): + # Read the next chunk size from the file + line = self.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("chunk size") + i = line.find(b";") + if i >= 0: + line = line[:i] # strip chunk-extensions + try: + return int(line, 16) + except ValueError: + # close the connection as protocol synchronisation is + # probably lost + self.close() + raise - def _read_chunked(self, amt): + def _read_and_discard_trailer(self): + # read and discard trailer up to the CRLF terminator + ### note: we shouldn't have any trailers! + while True: + line = self.fp.readline(_MAXLINE + 1) + if len(line) > _MAXLINE: + raise LineTooLong("trailer line") + if not line: + # a vanishingly small number of sites EOF without + # sending the trailer + break + if line == b"\r\n": + break + + def _readall_chunked(self): assert self.chunked != _UNKNOWN chunk_left = self.chunk_left value = [] while True: if chunk_left is None: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("chunk size") - i = line.find(b";") - if i >= 0: - line = line[:i] # strip chunk-extensions try: - chunk_left = int(line, 16) + chunk_left = self._read_next_chunk_size() + if chunk_left == 0: + break except ValueError: - # close the connection as protocol synchronisation is - # probably lost - self.close() raise IncompleteRead(b''.join(value)) - if chunk_left == 0: - break - if amt is None: - value.append(self._safe_read(chunk_left)) - elif amt < chunk_left: - value.append(self._safe_read(amt)) - self.chunk_left = chunk_left - amt - return b''.join(value) - elif amt == chunk_left: - value.append(self._safe_read(amt)) + value.append(self._safe_read(chunk_left)) + + # we read the whole chunk, get another + self._safe_read(2) # toss the CRLF at the end of the chunk + chunk_left = None + + self._read_and_discard_trailer() + + # we read everything; close the "file" + self.close() + + return b''.join(value) + + def _readinto_chunked(self, b): + assert self.chunked != _UNKNOWN + chunk_left = self.chunk_left + + total_bytes = 0 + mvb = memoryview(b) + while True: + if chunk_left is None: + try: + chunk_left = self._read_next_chunk_size() + if chunk_left == 0: + break + except ValueError: + raise IncompleteRead(bytes(b[0:total_bytes])) + + if len(mvb) < chunk_left: + n = self._safe_readinto(mvb) + self.chunk_left = chunk_left - n + return n + elif len(mvb) == chunk_left: + n = self._safe_readinto(mvb) self._safe_read(2) # toss the CRLF at the end of the chunk self.chunk_left = None - return b''.join(value) + return n else: - value.append(self._safe_read(chunk_left)) - amt -= chunk_left + temp_mvb = mvb[0:chunk_left] + n = self._safe_readinto(temp_mvb) + mvb = mvb[n:] + total_bytes += n # we read the whole chunk, get another self._safe_read(2) # toss the CRLF at the end of the chunk chunk_left = None - # read and discard trailer up to the CRLF terminator - ### note: we shouldn't have any trailers! - while True: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("trailer line") - if not line: - # a vanishingly small number of sites EOF without - # sending the trailer - break - if line == b"\r\n": - break + self._read_and_discard_trailer() # we read everything; close the "file" self.close() - return b''.join(value) + return total_bytes def _safe_read(self, amt): """Read the number of bytes requested, compensating for partial reads. @@ -594,6 +649,22 @@ class HTTPResponse(io.RawIOBase): amt -= len(chunk) return b"".join(s) + def _safe_readinto(self, b): + """Same as _safe_read, but for reading into a buffer.""" + total_bytes = 0 + mvb = memoryview(b) + while total_bytes < len(b): + if MAXAMOUNT < len(mvb): + temp_mvb = mvb[0:MAXAMOUNT] + n = self.fp.readinto(temp_mvb) + else: + n = self.fp.readinto(mvb) + if not n: + raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b)) + mvb = mvb[n:] + total_bytes += n + return total_bytes + def fileno(self): return self.fp.fileno() diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index a10c09d..425c716 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -158,6 +158,23 @@ class BasicTest(TestCase): self.assertEqual(resp.read(2), b'xt') self.assertTrue(resp.isclosed()) + def test_partial_readintos(self): + # if we have a lenght, the system knows when to close itself + # same behaviour than when we read the whole thing with read() + body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText" + sock = FakeSocket(body) + resp = client.HTTPResponse(sock) + resp.begin() + b = bytearray(2) + n = resp.readinto(b) + self.assertEqual(n, 2) + self.assertEqual(bytes(b), b'Te') + self.assertFalse(resp.isclosed()) + n = resp.readinto(b) + self.assertEqual(n, 2) + self.assertEqual(bytes(b), b'xt') + self.assertTrue(resp.isclosed()) + def test_host_port(self): # Check invalid host_port @@ -206,6 +223,21 @@ class BasicTest(TestCase): if resp.read(): self.fail("Did not expect response from HEAD request") + def test_readinto_head(self): + # Test that the library doesn't attempt to read any data + # from a HEAD request. (Tickles SF bug #622042.) + sock = FakeSocket( + 'HTTP/1.1 200 OK\r\n' + 'Content-Length: 14432\r\n' + '\r\n', + NoEOFStringIO) + resp = client.HTTPResponse(sock, method="HEAD") + resp.begin() + b = bytearray(5) + if resp.readinto(b) != 0: + self.fail("Did not expect response from HEAD request") + self.assertEqual(bytes(b), b'\x00'*5) + def test_send_file(self): expected = (b'GET /foo HTTP/1.1\r\nHost: example.com\r\n' b'Accept-Encoding: identity\r\nContent-Length:') @@ -285,6 +317,40 @@ class BasicTest(TestCase): finally: resp.close() + def test_readinto_chunked(self): + chunked_start = ( + 'HTTP/1.1 200 OK\r\n' + 'Transfer-Encoding: chunked\r\n\r\n' + 'a\r\n' + 'hello worl\r\n' + '1\r\n' + 'd\r\n' + ) + sock = FakeSocket(chunked_start + '0\r\n') + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + b = bytearray(16) + n = resp.readinto(b) + self.assertEqual(b[:11], b'hello world') + self.assertEqual(n, 11) + resp.close() + + for x in ('', 'foo\r\n'): + sock = FakeSocket(chunked_start + x) + resp = client.HTTPResponse(sock, method="GET") + resp.begin() + try: + b = bytearray(16) + n = resp.readinto(b) + except client.IncompleteRead as i: + self.assertEqual(i.partial, b'hello world') + self.assertEqual(repr(i),'IncompleteRead(11 bytes read)') + self.assertEqual(str(i),'IncompleteRead(11 bytes read)') + else: + self.fail('IncompleteRead expected') + finally: + resp.close() + def test_chunked_head(self): chunked_start = ( 'HTTP/1.1 200 OK\r\n' @@ -302,6 +368,26 @@ class BasicTest(TestCase): self.assertEqual(resp.reason, 'OK') self.assertTrue(resp.isclosed()) + def test_readinto_chunked_head(self): + chunked_start = ( + 'HTTP/1.1 200 OK\r\n' + 'Transfer-Encoding: chunked\r\n\r\n' + 'a\r\n' + 'hello world\r\n' + '1\r\n' + 'd\r\n' + ) + sock = FakeSocket(chunked_start + '0\r\n') + resp = client.HTTPResponse(sock, method="HEAD") + resp.begin() + b = bytearray(5) + n = resp.readinto(b) + self.assertEqual(n, 0) + self.assertEqual(bytes(b), b'\x00'*5) + self.assertEqual(resp.status, 200) + self.assertEqual(resp.reason, 'OK') + self.assertTrue(resp.isclosed()) + def test_negative_content_length(self): sock = FakeSocket( 'HTTP/1.1 200 OK\r\nContent-Length: -1\r\n\r\nHello\r\n') diff --git a/Misc/ACKS b/Misc/ACKS index 048e549..0beb5c9 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -547,6 +547,7 @@ Hannu Krosing Andrej Krpic Ivan Krstić Andrew Kuchling +Jon Kuhn Vladimir Kushnir Ross Lagerwall Cameron Laird diff --git a/Misc/NEWS b/Misc/NEWS index c98c956..2444ced 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -406,6 +406,9 @@ Core and Builtins Library ------- +- Issue #13464: Add a readinto() method to http.client.HTTPResponse. Patch + by Jon Kuhn. + - tarfile.py: Correctly detect bzip2 compressed streams with blocksizes other than 900k. -- cgit v0.12