Issue10050 - urlretrieve uses newer urlopen. reporthook of urlretrieve takes, block number, block read size, file_size

author: Senthil Kumaran <senthil@uthcode.com> 2012-03-14 02:29:33 (GMT)
committer: Senthil Kumaran <senthil@uthcode.com> 2012-03-14 02:29:33 (GMT)
commit: e24f96a05973ddbb59d88c03570aef8545c5ef10 (patch)
tree: c3c8721fe8e2bf99cf6bb2c6761ad14adc88eb48 /Lib
parent: a2251aadaa6de54eaf9663451afd16806a0712f3 (diff)
download: cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.zip
cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.gz
cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.bz2
2 files changed, 89 insertions, 25 deletions
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index b2680ed..85f8f84 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -384,11 +384,11 @@ class urlretrieve_FileTests(unittest.TestCase):
 
     def test_reporthook(self):
         # Make sure that the reporthook works.
-        def hooktester(count, block_size, total_size, count_holder=[0]):
-            self.assertIsInstance(count, int)
-            self.assertIsInstance(block_size, int)
-            self.assertIsInstance(total_size, int)
-            self.assertEqual(count, count_holder[0])
+        def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
+            self.assertIsInstance(block_count, int)
+            self.assertIsInstance(block_read_size, int)
+            self.assertIsInstance(file_size, int)
+            self.assertEqual(block_count, count_holder[0])
             count_holder[0] = count_holder[0] + 1
         second_temp = "%s.2" % support.TESTFN
         self.registerFileForCleanUp(second_temp)
@@ -399,8 +399,8 @@ class urlretrieve_FileTests(unittest.TestCase):
     def test_reporthook_0_bytes(self):
         # Test on zero length file. Should call reporthook only 1 time.
         report = []
-        def hooktester(count, block_size, total_size, _report=report):
-            _report.append((count, block_size, total_size))
+        def hooktester(block_count, block_read_size, file_size, _report=report):
+            _report.append((block_count, block_read_size, file_size))
         srcFileName = self.createNewTempFile()
         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
             support.TESTFN, hooktester)
@@ -410,31 +410,31 @@ class urlretrieve_FileTests(unittest.TestCase):
     def test_reporthook_5_bytes(self):
         # Test on 5 byte file. Should call reporthook only 2 times (once when
         # the "network connection" is established and once when the block is
-        # read). Since the block size is 8192 bytes, only one block read is
-        # required to read the entire file.
+        # read).
         report = []
-        def hooktester(count, block_size, total_size, _report=report):
-            _report.append((count, block_size, total_size))
+        def hooktester(block_count, block_read_size, file_size, _report=report):
+            _report.append((block_count, block_read_size, file_size))
         srcFileName = self.createNewTempFile(b"x" * 5)
         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
             support.TESTFN, hooktester)
         self.assertEqual(len(report), 2)
-        self.assertEqual(report[0][1], 8192)
-        self.assertEqual(report[0][2], 5)
+        self.assertEqual(report[0][1], 0)
+        self.assertEqual(report[1][1], 5)
 
     def test_reporthook_8193_bytes(self):
         # Test on 8193 byte file. Should call reporthook only 3 times (once
         # when the "network connection" is established, once for the next 8192
         # bytes, and once for the last byte).
         report = []
-        def hooktester(count, block_size, total_size, _report=report):
-            _report.append((count, block_size, total_size))
+        def hooktester(block_count, block_read_size, file_size, _report=report):
+            _report.append((block_count, block_read_size, file_size))
         srcFileName = self.createNewTempFile(b"x" * 8193)
         urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
             support.TESTFN, hooktester)
         self.assertEqual(len(report), 3)
-        self.assertEqual(report[0][1], 8192)
-        self.assertEqual(report[0][2], 8193)
+        self.assertEqual(report[0][1], 0)
+        self.assertEqual(report[1][1], 8192)
+        self.assertEqual(report[2][1], 1)
 
 
 class urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 90dfcff..c220a7d 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -94,6 +94,9 @@ import socket
 import sys
 import time
 import collections
+import tempfile
+import contextlib
+
 
 from urllib.error import URLError, HTTPError, ContentTooShortError
 from urllib.parse import (
@@ -156,17 +159,78 @@ def install_opener(opener):
     global _opener
     _opener = opener
 
-# TODO(jhylton): Make this work with the same global opener.
-_urlopener = None
+_url_tempfiles = []
 def urlretrieve(url, filename=None, reporthook=None, data=None):
-    global _urlopener
-    if not _urlopener:
-        _urlopener = FancyURLopener()
-    return _urlopener.retrieve(url, filename, reporthook, data)
+    """
+    Retrieve a URL into a temporary location on disk.
+
+    Requires a URL argument. If a filename is passed, it is used as
+    the temporary file location. The reporthook argument should be
+    a callable that accepts a block number, a read size, and the
+    total file size of the URL target. The data argument should be
+    valid URL encoded data.
+
+    If a filename is passed and the URL points to a local resource,
+    the result is a copy from local file to new file.
+
+    Returns a tuple containing the path to the newly created
+    data file as well as the resulting HTTPMessage object.
+    """
+    url_type, path = splittype(url)
+
+    with contextlib.closing(urlopen(url, data)) as fp:
+        headers = fp.info()
+
+        # Just return the local path and the "headers" for file://
+        # URLs. No sense in performing a copy unless requested.
+        if url_type == "file" and not filename:
+            return os.path.normpath(path), headers
+
+        # Handle temporary file setup.
+        if filename:
+            tfp = open(filename, 'wb')
+        else:
+            tfp = tempfile.NamedTemporaryFile(delete=False)
+            filename = tfp.name
+            _url_tempfiles.append(filename)
+
+        with tfp:
+            result = filename, headers
+            bs = 1024*8
+            size = -1
+            read = 0
+            blocknum = 0
+            if "content-length" in headers:
+                size = int(headers["Content-Length"])
+
+            if reporthook:
+                reporthook(blocknum, 0, size)
+
+            while True:
+                block = fp.read(bs)
+                if not block:
+                    break
+                read += len(block)
+                tfp.write(block)
+                blocknum += 1
+                if reporthook:
+                    reporthook(blocknum, len(block), size)
+
+    if size >= 0 and read < size:
+        raise ContentTooShortError(
+            "retrieval incomplete: got only %i out of %i bytes"
+            % (read, size), result)
+
+    return result
 
 def urlcleanup():
-    if _urlopener:
-        _urlopener.cleanup()
+    for temp_file in _url_tempfiles:
+        try:
+            os.unlink(temp_file)
+        except EnvironmentError:
+            pass
+
+    del _url_tempfiles[:]
     global _opener
     if _opener:
         _opener = None
author	Senthil Kumaran <senthil@uthcode.com>	2012-03-14 02:29:33 (GMT)
committer	Senthil Kumaran <senthil@uthcode.com>	2012-03-14 02:29:33 (GMT)
commit	e24f96a05973ddbb59d88c03570aef8545c5ef10 (patch)
tree	c3c8721fe8e2bf99cf6bb2c6761ad14adc88eb48 /Lib
parent	a2251aadaa6de54eaf9663451afd16806a0712f3 (diff)
download	cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.zip cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.gz cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.bz2