From 6b0bdab4293684a2a7e47e8e8e70076c8670eb9b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 10 Nov 2012 13:43:44 -0800 Subject: Fixes issue #16409: The reporthook callback made by the legacy urllib.request.urlretrieve API now properly supplies a constant non-zero block_size as it did in Python 3.2 and 2.7. This matches the behavior of urllib.request.URLopener.retrieve. --- Doc/library/urllib.request.rst | 3 ++- Lib/test/test_urllibnet.py | 34 ++++++++++++++++++++++++++++++---- Lib/urllib/request.py | 4 ++-- Misc/NEWS | 5 +++++ 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst index 898fe71..21255e5 100644 --- a/Doc/library/urllib.request.rst +++ b/Doc/library/urllib.request.rst @@ -1305,7 +1305,8 @@ some point in the future. *filename* is not given, the filename is the output of :func:`tempfile.mktemp` with a suffix that matches the suffix of the last path component of the input URL. If *reporthook* is given, it must be a function accepting three numeric - parameters. It will be called after each chunk of data is read from the + parameters: A chunk number, the maximum size chunks are read in and the total size of the download + (-1 if unknown). It will be called once at the start and after each chunk of data is read from the network. *reporthook* is ignored for local URLs. If the *url* uses the :file:`http:` scheme identifier, the optional *data* diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py index 383b2af..d3fe69d 100644 --- a/Lib/test/test_urllibnet.py +++ b/Lib/test/test_urllibnet.py @@ -137,10 +137,10 @@ class urlretrieveNetworkTests(unittest.TestCase): """Tests urllib.request.urlretrieve using the network.""" @contextlib.contextmanager - def urlretrieve(self, *args): + def urlretrieve(self, *args, **kwargs): resource = args[0] with support.transient_internet(resource): - file_location, info = urllib.request.urlretrieve(*args) + file_location, info = urllib.request.urlretrieve(*args, **kwargs) try: yield file_location, info finally: @@ -170,9 +170,10 @@ class urlretrieveNetworkTests(unittest.TestCase): self.assertIsInstance(info, email.message.Message, "info is not an instance of email.message.Message") + logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png" + def test_data_header(self): - logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png" - with self.urlretrieve(logo) as (file_location, fileheaders): + with self.urlretrieve(self.logo) as (file_location, fileheaders): datevalue = fileheaders.get('Date') dateformat = '%a, %d %b %Y %H:%M:%S GMT' try: @@ -180,6 +181,31 @@ class urlretrieveNetworkTests(unittest.TestCase): except ValueError: self.fail('Date value not in %r format', dateformat) + def test_reporthook(self): + records = [] + def recording_reporthook(blocks, block_size, total_size): + records.append((blocks, block_size, total_size)) + + with self.urlretrieve(self.logo, reporthook=recording_reporthook) as ( + file_location, fileheaders): + expected_size = int(fileheaders['Content-Length']) + + records_repr = repr(records) # For use in error messages. + self.assertGreater(len(records), 1, msg="There should always be two " + "calls; the first one before the transfer starts.") + self.assertEqual(records[0][0], 0) + self.assertGreater(records[0][1], 0, + msg="block size can't be 0 in %s" % records_repr) + self.assertEqual(records[0][2], expected_size) + self.assertEqual(records[-1][2], expected_size) + + block_sizes = {block_size for _, block_size, _ in records} + self.assertEqual({records[0][1]}, block_sizes, + msg="block sizes in %s must be equal" % records_repr) + self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size, + msg="number of blocks * block size must be" + " >= total size in %s" % records_repr) + def test_main(): support.requires('network') diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 27ab2b9..5ddec5f 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -208,7 +208,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None): size = int(headers["Content-Length"]) if reporthook: - reporthook(blocknum, 0, size) + reporthook(blocknum, bs, size) while True: block = fp.read(bs) @@ -218,7 +218,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None): tfp.write(block) blocknum += 1 if reporthook: - reporthook(blocknum, len(block), size) + reporthook(blocknum, bs, size) if size >= 0 and read < size: raise ContentTooShortError( diff --git a/Misc/NEWS b/Misc/NEWS index bf201cf..0646a74 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -80,6 +80,11 @@ Core and Builtins Library ------- +- Issue #16409: The reporthook callback made by the legacy + urllib.request.urlretrieve API now properly supplies a constant non-zero + block_size as it did in Python 3.2 and 2.7. This matches the behavior of + urllib.request.URLopener.retrieve. + - Issue #16431: Use the type information when constructing a Decimal subtype from a Decimal argument. -- cgit v0.12