summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
authorSenthil Kumaran <senthil@uthcode.com>2012-03-14 02:29:33 (GMT)
committerSenthil Kumaran <senthil@uthcode.com>2012-03-14 02:29:33 (GMT)
commite24f96a05973ddbb59d88c03570aef8545c5ef10 (patch)
treec3c8721fe8e2bf99cf6bb2c6761ad14adc88eb48 /Lib/urllib
parenta2251aadaa6de54eaf9663451afd16806a0712f3 (diff)
downloadcpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.zip
cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.gz
cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.bz2
Issue10050 - urlretrieve uses newer urlopen. reporthook of urlretrieve takes, block number, block read size, file_size
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/request.py80
1 files changed, 72 insertions, 8 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 90dfcff..c220a7d 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -94,6 +94,9 @@ import socket
import sys
import time
import collections
+import tempfile
+import contextlib
+
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
@@ -156,17 +159,78 @@ def install_opener(opener):
global _opener
_opener = opener
-# TODO(jhylton): Make this work with the same global opener.
-_urlopener = None
+_url_tempfiles = []
def urlretrieve(url, filename=None, reporthook=None, data=None):
- global _urlopener
- if not _urlopener:
- _urlopener = FancyURLopener()
- return _urlopener.retrieve(url, filename, reporthook, data)
+ """
+ Retrieve a URL into a temporary location on disk.
+
+ Requires a URL argument. If a filename is passed, it is used as
+ the temporary file location. The reporthook argument should be
+ a callable that accepts a block number, a read size, and the
+ total file size of the URL target. The data argument should be
+ valid URL encoded data.
+
+ If a filename is passed and the URL points to a local resource,
+ the result is a copy from local file to new file.
+
+ Returns a tuple containing the path to the newly created
+ data file as well as the resulting HTTPMessage object.
+ """
+ url_type, path = splittype(url)
+
+ with contextlib.closing(urlopen(url, data)) as fp:
+ headers = fp.info()
+
+ # Just return the local path and the "headers" for file://
+ # URLs. No sense in performing a copy unless requested.
+ if url_type == "file" and not filename:
+ return os.path.normpath(path), headers
+
+ # Handle temporary file setup.
+ if filename:
+ tfp = open(filename, 'wb')
+ else:
+ tfp = tempfile.NamedTemporaryFile(delete=False)
+ filename = tfp.name
+ _url_tempfiles.append(filename)
+
+ with tfp:
+ result = filename, headers
+ bs = 1024*8
+ size = -1
+ read = 0
+ blocknum = 0
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
+
+ if reporthook:
+ reporthook(blocknum, 0, size)
+
+ while True:
+ block = fp.read(bs)
+ if not block:
+ break
+ read += len(block)
+ tfp.write(block)
+ blocknum += 1
+ if reporthook:
+ reporthook(blocknum, len(block), size)
+
+ if size >= 0 and read < size:
+ raise ContentTooShortError(
+ "retrieval incomplete: got only %i out of %i bytes"
+ % (read, size), result)
+
+ return result
def urlcleanup():
- if _urlopener:
- _urlopener.cleanup()
+ for temp_file in _url_tempfiles:
+ try:
+ os.unlink(temp_file)
+ except EnvironmentError:
+ pass
+
+ del _url_tempfiles[:]
global _opener
if _opener:
_opener = None