summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2011-12-06 21:33:57 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2011-12-06 21:33:57 (GMT)
commit38d9643d5e225dd7c342c6ccaae3e6e01f705e91 (patch)
treeb4b50d8504c87e4cd325625eb7495a4fd0157195
parent0f6492250c37b639ce5aa92486aa3b05b61f90d6 (diff)
downloadcpython-38d9643d5e225dd7c342c6ccaae3e6e01f705e91.zip
cpython-38d9643d5e225dd7c342c6ccaae3e6e01f705e91.tar.gz
cpython-38d9643d5e225dd7c342c6ccaae3e6e01f705e91.tar.bz2
Issue #13464: Add a readinto() method to http.client.HTTPResponse.
Patch by Jon Kuhn.
-rw-r--r--Doc/library/http.client.rst6
-rw-r--r--Lib/http/client.py163
-rw-r--r--Lib/test/test_httplib.py86
-rw-r--r--Misc/ACKS1
-rw-r--r--Misc/NEWS3
5 files changed, 213 insertions, 46 deletions
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
index c1ce15b..7fce91f 100644
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -502,6 +502,12 @@ statement.
Reads and returns the response body, or up to the next *amt* bytes.
+.. method:: HTTPResponse.readinto(b)
+
+ Reads up to the next len(b) bytes of the response body into the buffer *b*.
+ Returns the number of bytes read.
+
+ .. versionadded:: 3.3
.. method:: HTTPResponse.getheader(name, default=None)
diff --git a/Lib/http/client.py b/Lib/http/client.py
index 88da550..7062569 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -485,11 +485,17 @@ class HTTPResponse(io.RawIOBase):
self.close()
return b""
- if self.chunked:
- return self._read_chunked(amt)
+ if amt is not None:
+ # Amount is given, so call base class version
+ # (which is implemented in terms of self.readinto)
+ return super(HTTPResponse, self).read(amt)
+ else:
+ # Amount is not given (unbounded read) so we must check self.length
+ # and self.chunked
+
+ if self.chunked:
+ return self._readall_chunked()
- if amt is None:
- # unbounded read
if self.length is None:
s = self.fp.read()
else:
@@ -498,78 +504,127 @@ class HTTPResponse(io.RawIOBase):
self.close() # we read everything
return s
+ def readinto(self, b):
+ if self.fp is None:
+ return 0
+
+ if self._method == "HEAD":
+ self.close()
+ return 0
+
+ if self.chunked:
+ return self._readinto_chunked(b)
+
if self.length is not None:
- if amt > self.length:
+ if len(b) > self.length:
# clip the read to the "end of response"
- amt = self.length
+ b = memoryview(b)[0:self.length]
# we do not use _safe_read() here because this may be a .will_close
# connection, and the user is reading more bytes than will be provided
# (for example, reading in 1k chunks)
- s = self.fp.read(amt)
+ n = self.fp.readinto(b)
if self.length is not None:
- self.length -= len(s)
+ self.length -= n
if not self.length:
self.close()
- return s
+ return n
+
+ def _read_next_chunk_size(self):
+ # Read the next chunk size from the file
+ line = self.fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("chunk size")
+ i = line.find(b";")
+ if i >= 0:
+ line = line[:i] # strip chunk-extensions
+ try:
+ return int(line, 16)
+ except ValueError:
+ # close the connection as protocol synchronisation is
+ # probably lost
+ self.close()
+ raise
- def _read_chunked(self, amt):
+ def _read_and_discard_trailer(self):
+ # read and discard trailer up to the CRLF terminator
+ ### note: we shouldn't have any trailers!
+ while True:
+ line = self.fp.readline(_MAXLINE + 1)
+ if len(line) > _MAXLINE:
+ raise LineTooLong("trailer line")
+ if not line:
+ # a vanishingly small number of sites EOF without
+ # sending the trailer
+ break
+ if line == b"\r\n":
+ break
+
+ def _readall_chunked(self):
assert self.chunked != _UNKNOWN
chunk_left = self.chunk_left
value = []
while True:
if chunk_left is None:
- line = self.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("chunk size")
- i = line.find(b";")
- if i >= 0:
- line = line[:i] # strip chunk-extensions
try:
- chunk_left = int(line, 16)
+ chunk_left = self._read_next_chunk_size()
+ if chunk_left == 0:
+ break
except ValueError:
- # close the connection as protocol synchronisation is
- # probably lost
- self.close()
raise IncompleteRead(b''.join(value))
- if chunk_left == 0:
- break
- if amt is None:
- value.append(self._safe_read(chunk_left))
- elif amt < chunk_left:
- value.append(self._safe_read(amt))
- self.chunk_left = chunk_left - amt
- return b''.join(value)
- elif amt == chunk_left:
- value.append(self._safe_read(amt))
+ value.append(self._safe_read(chunk_left))
+
+ # we read the whole chunk, get another
+ self._safe_read(2) # toss the CRLF at the end of the chunk
+ chunk_left = None
+
+ self._read_and_discard_trailer()
+
+ # we read everything; close the "file"
+ self.close()
+
+ return b''.join(value)
+
+ def _readinto_chunked(self, b):
+ assert self.chunked != _UNKNOWN
+ chunk_left = self.chunk_left
+
+ total_bytes = 0
+ mvb = memoryview(b)
+ while True:
+ if chunk_left is None:
+ try:
+ chunk_left = self._read_next_chunk_size()
+ if chunk_left == 0:
+ break
+ except ValueError:
+ raise IncompleteRead(bytes(b[0:total_bytes]))
+
+ if len(mvb) < chunk_left:
+ n = self._safe_readinto(mvb)
+ self.chunk_left = chunk_left - n
+ return n
+ elif len(mvb) == chunk_left:
+ n = self._safe_readinto(mvb)
self._safe_read(2) # toss the CRLF at the end of the chunk
self.chunk_left = None
- return b''.join(value)
+ return n
else:
- value.append(self._safe_read(chunk_left))
- amt -= chunk_left
+ temp_mvb = mvb[0:chunk_left]
+ n = self._safe_readinto(temp_mvb)
+ mvb = mvb[n:]
+ total_bytes += n
# we read the whole chunk, get another
self._safe_read(2) # toss the CRLF at the end of the chunk
chunk_left = None
- # read and discard trailer up to the CRLF terminator
- ### note: we shouldn't have any trailers!
- while True:
- line = self.fp.readline(_MAXLINE + 1)
- if len(line) > _MAXLINE:
- raise LineTooLong("trailer line")
- if not line:
- # a vanishingly small number of sites EOF without
- # sending the trailer
- break
- if line == b"\r\n":
- break
+ self._read_and_discard_trailer()
# we read everything; close the "file"
self.close()
- return b''.join(value)
+ return total_bytes
def _safe_read(self, amt):
"""Read the number of bytes requested, compensating for partial reads.
@@ -594,6 +649,22 @@ class HTTPResponse(io.RawIOBase):
amt -= len(chunk)
return b"".join(s)
+ def _safe_readinto(self, b):
+ """Same as _safe_read, but for reading into a buffer."""
+ total_bytes = 0
+ mvb = memoryview(b)
+ while total_bytes < len(b):
+ if MAXAMOUNT < len(mvb):
+ temp_mvb = mvb[0:MAXAMOUNT]
+ n = self.fp.readinto(temp_mvb)
+ else:
+ n = self.fp.readinto(mvb)
+ if not n:
+ raise IncompleteRead(bytes(mvb[0:total_bytes]), len(b))
+ mvb = mvb[n:]
+ total_bytes += n
+ return total_bytes
+
def fileno(self):
return self.fp.fileno()
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index a10c09d..425c716 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -158,6 +158,23 @@ class BasicTest(TestCase):
self.assertEqual(resp.read(2), b'xt')
self.assertTrue(resp.isclosed())
+ def test_partial_readintos(self):
+ # if we have a lenght, the system knows when to close itself
+ # same behaviour than when we read the whole thing with read()
+ body = "HTTP/1.1 200 Ok\r\nContent-Length: 4\r\n\r\nText"
+ sock = FakeSocket(body)
+ resp = client.HTTPResponse(sock)
+ resp.begin()
+ b = bytearray(2)
+ n = resp.readinto(b)
+ self.assertEqual(n, 2)
+ self.assertEqual(bytes(b), b'Te')
+ self.assertFalse(resp.isclosed())
+ n = resp.readinto(b)
+ self.assertEqual(n, 2)
+ self.assertEqual(bytes(b), b'xt')
+ self.assertTrue(resp.isclosed())
+
def test_host_port(self):
# Check invalid host_port
@@ -206,6 +223,21 @@ class BasicTest(TestCase):
if resp.read():
self.fail("Did not expect response from HEAD request")
+ def test_readinto_head(self):
+ # Test that the library doesn't attempt to read any data
+ # from a HEAD request. (Tickles SF bug #622042.)
+ sock = FakeSocket(
+ 'HTTP/1.1 200 OK\r\n'
+ 'Content-Length: 14432\r\n'
+ '\r\n',
+ NoEOFStringIO)
+ resp = client.HTTPResponse(sock, method="HEAD")
+ resp.begin()
+ b = bytearray(5)
+ if resp.readinto(b) != 0:
+ self.fail("Did not expect response from HEAD request")
+ self.assertEqual(bytes(b), b'\x00'*5)
+
def test_send_file(self):
expected = (b'GET /foo HTTP/1.1\r\nHost: example.com\r\n'
b'Accept-Encoding: identity\r\nContent-Length:')
@@ -285,6 +317,40 @@ class BasicTest(TestCase):
finally:
resp.close()
+ def test_readinto_chunked(self):
+ chunked_start = (
+ 'HTTP/1.1 200 OK\r\n'
+ 'Transfer-Encoding: chunked\r\n\r\n'
+ 'a\r\n'
+ 'hello worl\r\n'
+ '1\r\n'
+ 'd\r\n'
+ )
+ sock = FakeSocket(chunked_start + '0\r\n')
+ resp = client.HTTPResponse(sock, method="GET")
+ resp.begin()
+ b = bytearray(16)
+ n = resp.readinto(b)
+ self.assertEqual(b[:11], b'hello world')
+ self.assertEqual(n, 11)
+ resp.close()
+
+ for x in ('', 'foo\r\n'):
+ sock = FakeSocket(chunked_start + x)
+ resp = client.HTTPResponse(sock, method="GET")
+ resp.begin()
+ try:
+ b = bytearray(16)
+ n = resp.readinto(b)
+ except client.IncompleteRead as i:
+ self.assertEqual(i.partial, b'hello world')
+ self.assertEqual(repr(i),'IncompleteRead(11 bytes read)')
+ self.assertEqual(str(i),'IncompleteRead(11 bytes read)')
+ else:
+ self.fail('IncompleteRead expected')
+ finally:
+ resp.close()
+
def test_chunked_head(self):
chunked_start = (
'HTTP/1.1 200 OK\r\n'
@@ -302,6 +368,26 @@ class BasicTest(TestCase):
self.assertEqual(resp.reason, 'OK')
self.assertTrue(resp.isclosed())
+ def test_readinto_chunked_head(self):
+ chunked_start = (
+ 'HTTP/1.1 200 OK\r\n'
+ 'Transfer-Encoding: chunked\r\n\r\n'
+ 'a\r\n'
+ 'hello world\r\n'
+ '1\r\n'
+ 'd\r\n'
+ )
+ sock = FakeSocket(chunked_start + '0\r\n')
+ resp = client.HTTPResponse(sock, method="HEAD")
+ resp.begin()
+ b = bytearray(5)
+ n = resp.readinto(b)
+ self.assertEqual(n, 0)
+ self.assertEqual(bytes(b), b'\x00'*5)
+ self.assertEqual(resp.status, 200)
+ self.assertEqual(resp.reason, 'OK')
+ self.assertTrue(resp.isclosed())
+
def test_negative_content_length(self):
sock = FakeSocket(
'HTTP/1.1 200 OK\r\nContent-Length: -1\r\n\r\nHello\r\n')
diff --git a/Misc/ACKS b/Misc/ACKS
index 048e549..0beb5c9 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -547,6 +547,7 @@ Hannu Krosing
Andrej Krpic
Ivan Krstić
Andrew Kuchling
+Jon Kuhn
Vladimir Kushnir
Ross Lagerwall
Cameron Laird
diff --git a/Misc/NEWS b/Misc/NEWS
index c98c956..2444ced 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -406,6 +406,9 @@ Core and Builtins
Library
-------
+- Issue #13464: Add a readinto() method to http.client.HTTPResponse. Patch
+ by Jon Kuhn.
+
- tarfile.py: Correctly detect bzip2 compressed streams with blocksizes
other than 900k.