summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGregory P. Smith <greg@krypto.org>2012-11-10 21:43:44 (GMT)
committerGregory P. Smith <greg@krypto.org>2012-11-10 21:43:44 (GMT)
commit6b0bdab4293684a2a7e47e8e8e70076c8670eb9b (patch)
tree6ec957a8c657b0e66d3fed6d6c944068137afc59
parent296c2fd065f3ad026dd7169d5fd323a0fee40b31 (diff)
downloadcpython-6b0bdab4293684a2a7e47e8e8e70076c8670eb9b.zip
cpython-6b0bdab4293684a2a7e47e8e8e70076c8670eb9b.tar.gz
cpython-6b0bdab4293684a2a7e47e8e8e70076c8670eb9b.tar.bz2
Fixes issue #16409: The reporthook callback made by the legacy
urllib.request.urlretrieve API now properly supplies a constant non-zero block_size as it did in Python 3.2 and 2.7. This matches the behavior of urllib.request.URLopener.retrieve.
-rw-r--r--Doc/library/urllib.request.rst3
-rw-r--r--Lib/test/test_urllibnet.py34
-rw-r--r--Lib/urllib/request.py4
-rw-r--r--Misc/NEWS5
4 files changed, 39 insertions, 7 deletions
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
index 898fe71..21255e5 100644
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -1305,7 +1305,8 @@ some point in the future.
*filename* is not given, the filename is the output of :func:`tempfile.mktemp`
with a suffix that matches the suffix of the last path component of the input
URL. If *reporthook* is given, it must be a function accepting three numeric
- parameters. It will be called after each chunk of data is read from the
+ parameters: A chunk number, the maximum size chunks are read in and the total size of the download
+ (-1 if unknown). It will be called once at the start and after each chunk of data is read from the
network. *reporthook* is ignored for local URLs.
If the *url* uses the :file:`http:` scheme identifier, the optional *data*
diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py
index 383b2af..d3fe69d 100644
--- a/Lib/test/test_urllibnet.py
+++ b/Lib/test/test_urllibnet.py
@@ -137,10 +137,10 @@ class urlretrieveNetworkTests(unittest.TestCase):
"""Tests urllib.request.urlretrieve using the network."""
@contextlib.contextmanager
- def urlretrieve(self, *args):
+ def urlretrieve(self, *args, **kwargs):
resource = args[0]
with support.transient_internet(resource):
- file_location, info = urllib.request.urlretrieve(*args)
+ file_location, info = urllib.request.urlretrieve(*args, **kwargs)
try:
yield file_location, info
finally:
@@ -170,9 +170,10 @@ class urlretrieveNetworkTests(unittest.TestCase):
self.assertIsInstance(info, email.message.Message,
"info is not an instance of email.message.Message")
+ logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png"
+
def test_data_header(self):
- logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png"
- with self.urlretrieve(logo) as (file_location, fileheaders):
+ with self.urlretrieve(self.logo) as (file_location, fileheaders):
datevalue = fileheaders.get('Date')
dateformat = '%a, %d %b %Y %H:%M:%S GMT'
try:
@@ -180,6 +181,31 @@ class urlretrieveNetworkTests(unittest.TestCase):
except ValueError:
self.fail('Date value not in %r format', dateformat)
+ def test_reporthook(self):
+ records = []
+ def recording_reporthook(blocks, block_size, total_size):
+ records.append((blocks, block_size, total_size))
+
+ with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
+ file_location, fileheaders):
+ expected_size = int(fileheaders['Content-Length'])
+
+ records_repr = repr(records) # For use in error messages.
+ self.assertGreater(len(records), 1, msg="There should always be two "
+ "calls; the first one before the transfer starts.")
+ self.assertEqual(records[0][0], 0)
+ self.assertGreater(records[0][1], 0,
+ msg="block size can't be 0 in %s" % records_repr)
+ self.assertEqual(records[0][2], expected_size)
+ self.assertEqual(records[-1][2], expected_size)
+
+ block_sizes = {block_size for _, block_size, _ in records}
+ self.assertEqual({records[0][1]}, block_sizes,
+ msg="block sizes in %s must be equal" % records_repr)
+ self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
+ msg="number of blocks * block size must be"
+ " >= total size in %s" % records_repr)
+
def test_main():
support.requires('network')
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 27ab2b9..5ddec5f 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -208,7 +208,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None):
size = int(headers["Content-Length"])
if reporthook:
- reporthook(blocknum, 0, size)
+ reporthook(blocknum, bs, size)
while True:
block = fp.read(bs)
@@ -218,7 +218,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None):
tfp.write(block)
blocknum += 1
if reporthook:
- reporthook(blocknum, len(block), size)
+ reporthook(blocknum, bs, size)
if size >= 0 and read < size:
raise ContentTooShortError(
diff --git a/Misc/NEWS b/Misc/NEWS
index bf201cf..0646a74 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -80,6 +80,11 @@ Core and Builtins
Library
-------
+- Issue #16409: The reporthook callback made by the legacy
+ urllib.request.urlretrieve API now properly supplies a constant non-zero
+ block_size as it did in Python 3.2 and 2.7. This matches the behavior of
+ urllib.request.URLopener.retrieve.
+
- Issue #16431: Use the type information when constructing a Decimal subtype
from a Decimal argument.