diff options
author | Senthil Kumaran <senthil@uthcode.com> | 2012-03-14 02:29:33 (GMT) |
---|---|---|
committer | Senthil Kumaran <senthil@uthcode.com> | 2012-03-14 02:29:33 (GMT) |
commit | e24f96a05973ddbb59d88c03570aef8545c5ef10 (patch) | |
tree | c3c8721fe8e2bf99cf6bb2c6761ad14adc88eb48 /Lib/urllib | |
parent | a2251aadaa6de54eaf9663451afd16806a0712f3 (diff) | |
download | cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.zip cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.gz cpython-e24f96a05973ddbb59d88c03570aef8545c5ef10.tar.bz2 |
Issue10050 - urlretrieve uses newer urlopen. reporthook of urlretrieve takes, block number, block read size, file_size
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/request.py | 80 |
1 files changed, 72 insertions, 8 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 90dfcff..c220a7d 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -94,6 +94,9 @@ import socket import sys import time import collections +import tempfile +import contextlib + from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( @@ -156,17 +159,78 @@ def install_opener(opener): global _opener _opener = opener -# TODO(jhylton): Make this work with the same global opener. -_urlopener = None +_url_tempfiles = [] def urlretrieve(url, filename=None, reporthook=None, data=None): - global _urlopener - if not _urlopener: - _urlopener = FancyURLopener() - return _urlopener.retrieve(url, filename, reporthook, data) + """ + Retrieve a URL into a temporary location on disk. + + Requires a URL argument. If a filename is passed, it is used as + the temporary file location. The reporthook argument should be + a callable that accepts a block number, a read size, and the + total file size of the URL target. The data argument should be + valid URL encoded data. + + If a filename is passed and the URL points to a local resource, + the result is a copy from local file to new file. + + Returns a tuple containing the path to the newly created + data file as well as the resulting HTTPMessage object. + """ + url_type, path = splittype(url) + + with contextlib.closing(urlopen(url, data)) as fp: + headers = fp.info() + + # Just return the local path and the "headers" for file:// + # URLs. No sense in performing a copy unless requested. + if url_type == "file" and not filename: + return os.path.normpath(path), headers + + # Handle temporary file setup. + if filename: + tfp = open(filename, 'wb') + else: + tfp = tempfile.NamedTemporaryFile(delete=False) + filename = tfp.name + _url_tempfiles.append(filename) + + with tfp: + result = filename, headers + bs = 1024*8 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + + if reporthook: + reporthook(blocknum, 0, size) + + while True: + block = fp.read(bs) + if not block: + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, len(block), size) + + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: got only %i out of %i bytes" + % (read, size), result) + + return result def urlcleanup(): - if _urlopener: - _urlopener.cleanup() + for temp_file in _url_tempfiles: + try: + os.unlink(temp_file) + except EnvironmentError: + pass + + del _url_tempfiles[:] global _opener if _opener: _opener = None |