From b353c12a9caa20e90466af6a8f17a227b72e4f23 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 11 Feb 2009 00:39:14 +0000 Subject: Issue #4631: Fix urlopen() result when an HTTP response uses chunked encoding. --- Lib/http/client.py | 12 +++++++++--- Lib/io.py | 2 -- Lib/test/test_httplib.py | 4 +--- Lib/test/test_io.py | 1 + Lib/test/test_urllib2_localnet.py | 32 +++++++++++++++++++++++++++----- Lib/test/test_urllib2net.py | 8 ++++---- Lib/urllib/request.py | 6 +++--- Lib/urllib/response.py | 3 ++- Misc/NEWS | 3 +++ 9 files changed, 50 insertions(+), 21 deletions(-) diff --git a/Lib/http/client.py b/Lib/http/client.py index f816d58..14a6c35 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -249,7 +249,7 @@ def parse_headers(fp): return email.parser.Parser(_class=HTTPMessage).parsestr(hstring) -class HTTPResponse: +class HTTPResponse(io.RawIOBase): # strict: If true, raise BadStatusLine if the status line can't be # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is @@ -471,8 +471,6 @@ class HTTPResponse: # called, meaning self.isclosed() is meaningful. return self.fp is None - # XXX It would be nice to have readline and __iter__ for this, too. - def read(self, amt=None): if self.fp is None: return b"" @@ -585,6 +583,9 @@ class HTTPResponse: amt -= len(chunk) return b"".join(s) + def fileno(self): + return self.fp.fileno() + def getheader(self, name, default=None): if self.msg is None: raise ResponseNotReady() @@ -596,6 +597,11 @@ class HTTPResponse: raise ResponseNotReady() return list(self.msg.items()) + # We override IOBase.__iter__ so that it doesn't check for closed-ness + + def __iter__(self): + return self + class HTTPConnection: diff --git a/Lib/io.py b/Lib/io.py index 439ca95..c2c1067 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -491,7 +491,6 @@ class IOBase(metaclass=abc.ABCMeta): terminator(s) recognized. """ # For backwards compatibility, a (slowish) readline(). - self._checkClosed() if hasattr(self, "peek"): def nreadahead(): readahead = self.peek(1) @@ -533,7 +532,6 @@ class IOBase(metaclass=abc.ABCMeta): lines will be read if the total size (in bytes/characters) of all lines so far exceeds hint. """ - self._checkClosed() if hint is None or hint <= 0: return list(self) n = 0 diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 591f956..099f803 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -42,7 +42,6 @@ class NoEOFStringIO(io.BytesIO): raise AssertionError('caller tried to read past EOF') return data - class HeaderTests(TestCase): def test_auto_headers(self): # Some headers are added automatically, but should not be added by @@ -245,7 +244,6 @@ class TimeoutTest(TestCase): self.assertEqual(httpConn.sock.gettimeout(), 30) httpConn.close() - class HTTPSTimeoutTest(TestCase): # XXX Here should be tests for HTTPS, there isn't any right now! @@ -257,7 +255,7 @@ class HTTPSTimeoutTest(TestCase): def test_main(verbose=None): support.run_unittest(HeaderTests, OfflineTest, BasicTest, TimeoutTest, - HTTPSTimeoutTest) + HTTPSTimeoutTest) if __name__ == '__main__': test_main() diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 6265258..30869ac 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -1363,6 +1363,7 @@ class MiscIOTest(unittest.TestCase): self.assertRaises(ValueError, f.fileno) self.assertRaises(ValueError, f.isatty) self.assertRaises(ValueError, f.__iter__) + self.assertRaises(ValueError, next, f) if hasattr(f, "peek"): self.assertRaises(ValueError, f.peek, 1) self.assertRaises(ValueError, f.read) diff --git a/Lib/test/test_urllib2_localnet.py b/Lib/test/test_urllib2_localnet.py index 94f057f..90d3c88 100644 --- a/Lib/test/test_urllib2_localnet.py +++ b/Lib/test/test_urllib2_localnet.py @@ -310,7 +310,7 @@ def GetRequestHandler(responses): self.send_response(response_code) for (header, value) in headers: - self.send_header(header, value % self.port) + self.send_header(header, value % {'port':self.port}) if body: self.send_header("Content-type", "text/plain") self.end_headers() @@ -341,10 +341,17 @@ class TestUrlopen(unittest.TestCase): self.server.stop() def urlopen(self, url, data=None): + l = [] f = urllib.request.urlopen(url, data) - result = f.read() - f.close() - return result + try: + # Exercise various methods + l.extend(f.readlines(200)) + l.append(f.readline()) + l.append(f.read(1024)) + l.append(f.read()) + finally: + f.close() + return b"".join(l) def start_server(self, responses=None): if responses is None: @@ -361,7 +368,8 @@ class TestUrlopen(unittest.TestCase): def test_redirection(self): expected_response = b"We got here..." responses = [ - (302, [("Location", "http://localhost:%s/somewhere_else")], ""), + (302, [("Location", "http://localhost:%(port)s/somewhere_else")], + ""), (200, [], expected_response) ] @@ -370,6 +378,20 @@ class TestUrlopen(unittest.TestCase): self.assertEquals(data, expected_response) self.assertEquals(handler.requests, ["/", "/somewhere_else"]) + def test_chunked(self): + expected_response = b"hello world" + chunked_start = ( + b'a\r\n' + b'hello worl\r\n' + b'1\r\n' + b'd\r\n' + b'0\r\n' + ) + response = [(200, [("Transfer-Encoding", "chunked")], chunked_start)] + handler = self.start_server(response) + data = self.urlopen("http://localhost:%s/" % handler.port) + self.assertEquals(data, expected_response) + def test_404(self): expected_response = b"Bad bad bad..." handler = self.start_server([(404, [], expected_response)]) diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index ee0f67e..16ca301 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -195,7 +195,7 @@ class TimeoutTest(unittest.TestCase): def test_http_basic(self): self.assertTrue(socket.getdefaulttimeout() is None) u = _urlopen_with_retry("http://www.python.org") - self.assertTrue(u.fp.raw._sock.gettimeout() is None) + self.assertTrue(u.fp.fp.raw._sock.gettimeout() is None) def test_http_default_timeout(self): self.assertTrue(socket.getdefaulttimeout() is None) @@ -204,7 +204,7 @@ class TimeoutTest(unittest.TestCase): u = _urlopen_with_retry("http://www.python.org") finally: socket.setdefaulttimeout(None) - self.assertEqual(u.fp.raw._sock.gettimeout(), 60) + self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 60) def test_http_no_timeout(self): self.assertTrue(socket.getdefaulttimeout() is None) @@ -213,11 +213,11 @@ class TimeoutTest(unittest.TestCase): u = _urlopen_with_retry("http://www.python.org", timeout=None) finally: socket.setdefaulttimeout(None) - self.assertTrue(u.fp.raw._sock.gettimeout() is None) + self.assertTrue(u.fp.fp.raw._sock.gettimeout() is None) def test_http_timeout(self): u = _urlopen_with_retry("http://www.python.org", timeout=120) - self.assertEqual(u.fp.raw._sock.gettimeout(), 120) + self.assertEqual(u.fp.fp.raw._sock.gettimeout(), 120) FTP_HOST = "ftp://ftp.mirror.nl/pub/mirror/gnu/" diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 63ce6d4..b86d8f2 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -333,7 +333,6 @@ class OpenerDirector: handlers = chain.get(kind, ()) for handler in handlers: func = getattr(handler, meth_name) - result = func(*args) if result is not None: return result @@ -1070,7 +1069,8 @@ class AbstractHTTPHandler(BaseHandler): except socket.error as err: # XXX what error? raise URLError(err) - resp = addinfourl(r.fp, r.msg, req.get_full_url()) +## resp = addinfourl(r.fp, r.msg, req.get_full_url()) + resp = addinfourl(r, r.msg, req.get_full_url()) resp.code = r.status resp.msg = r.reason return resp @@ -1606,7 +1606,7 @@ class URLopener: # According to RFC 2616, "2xx" code indicates that the client's # request was successfully received, understood, and accepted. if 200 <= response.status < 300: - return addinfourl(response.fp, response.msg, "http:" + url, + return addinfourl(response, response.msg, "http:" + url, response.status) else: return self.http_error( diff --git a/Lib/urllib/response.py b/Lib/urllib/response.py index 1352622..52eeed0 100644 --- a/Lib/urllib/response.py +++ b/Lib/urllib/response.py @@ -17,7 +17,8 @@ class addbase(object): self.read = self.fp.read self.readline = self.fp.readline # TODO(jhylton): Make sure an object with readlines() is also iterable - if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines + if hasattr(self.fp, "readlines"): + self.readlines = self.fp.readlines if hasattr(self.fp, "fileno"): self.fileno = self.fp.fileno else: diff --git a/Misc/NEWS b/Misc/NEWS index 185e5cf..1be4001 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -163,6 +163,9 @@ Core and Builtins Library ------- +- Issue #4631: Fix urlopen() result when an HTTP response uses chunked + encoding. + - Issue #5203: Fixed ctypes segfaults when passing a unicode string to a function without argtypes (only occurs if HAVE_USABLE_WCHAR_T is false). -- cgit v0.12