Issue10050 - urlretrieve uses newer urlopen. reporthook of urlretrieve takes, block number, block read size, file_size

author: Senthil Kumaran <senthil@uthcode.com> 2012-03-14 02:29:33 (GMT)
committer: Senthil Kumaran <senthil@uthcode.com> 2012-03-14 02:29:33 (GMT)
commit: e24f96a05973ddbb59d88c03570aef8545c5ef10 (patch)
tree: c3c8721fe8e2bf99cf6bb2c6761ad14adc88eb48 /Lib/urllib
parent: a2251aadaa6de54eaf9663451afd16806a0712f3 (diff)
download: cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.zip
cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.gz
cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.bz2
1 files changed, 72 insertions, 8 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 90dfcff..c220a7d 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -94,6 +94,9 @@ import socket
 import sys
 import time
 import collections
+import tempfile
+import contextlib
+
 
 from urllib.error import URLError, HTTPError, ContentTooShortError
 from urllib.parse import (
@@ -156,17 +159,78 @@ def install_opener(opener):
     global _opener
     _opener = opener
 
-# TODO(jhylton): Make this work with the same global opener.
-_urlopener = None
+_url_tempfiles = []
 def urlretrieve(url, filename=None, reporthook=None, data=None):
-    global _urlopener
-    if not _urlopener:
-        _urlopener = FancyURLopener()
-    return _urlopener.retrieve(url, filename, reporthook, data)
+    """
+    Retrieve a URL into a temporary location on disk.
+
+    Requires a URL argument. If a filename is passed, it is used as
+    the temporary file location. The reporthook argument should be
+    a callable that accepts a block number, a read size, and the
+    total file size of the URL target. The data argument should be
+    valid URL encoded data.
+
+    If a filename is passed and the URL points to a local resource,
+    the result is a copy from local file to new file.
+
+    Returns a tuple containing the path to the newly created
+    data file as well as the resulting HTTPMessage object.
+    """
+    url_type, path = splittype(url)
+
+    with contextlib.closing(urlopen(url, data)) as fp:
+        headers = fp.info()
+
+        # Just return the local path and the "headers" for file://
+        # URLs. No sense in performing a copy unless requested.
+        if url_type == "file" and not filename:
+            return os.path.normpath(path), headers
+
+        # Handle temporary file setup.
+        if filename:
+            tfp = open(filename, 'wb')
+        else:
+            tfp = tempfile.NamedTemporaryFile(delete=False)
+            filename = tfp.name
+            _url_tempfiles.append(filename)
+
+        with tfp:
+            result = filename, headers
+            bs = 1024*8
+            size = -1
+            read = 0
+            blocknum = 0
+            if "content-length" in headers:
+                size = int(headers["Content-Length"])
+
+            if reporthook:
+                reporthook(blocknum, 0, size)
+
+            while True:
+                block = fp.read(bs)
+                if not block:
+                    break
+                read += len(block)
+                tfp.write(block)
+                blocknum += 1
+                if reporthook:
+                    reporthook(blocknum, len(block), size)
+
+    if size >= 0 and read < size:
+        raise ContentTooShortError(
+            "retrieval incomplete: got only %i out of %i bytes"
+            % (read, size), result)
+
+    return result
 
 def urlcleanup():
-    if _urlopener:
-        _urlopener.cleanup()
+    for temp_file in _url_tempfiles:
+        try:
+            os.unlink(temp_file)
+        except EnvironmentError:
+            pass
+
+    del _url_tempfiles[:]
     global _opener
     if _opener:
         _opener = None
author	Senthil Kumaran <senthil@uthcode.com>	2012-03-14 02:29:33 (GMT)
committer	Senthil Kumaran <senthil@uthcode.com>	2012-03-14 02:29:33 (GMT)
commit	e24f96a05973ddbb59d88c03570aef8545c5ef10 (patch)
tree	c3c8721fe8e2bf99cf6bb2c6761ad14adc88eb48 /Lib/urllib
parent	a2251aadaa6de54eaf9663451afd16806a0712f3 (diff)
download	cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.zip cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.gz cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.bz2