diff options
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r-- | Lib/urllib/request.py | 210 |
1 files changed, 161 insertions, 49 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index eb45c7e..5ddec5f 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -89,14 +89,16 @@ import http.client import io import os import posixpath -import random import re import socket import sys import time import collections +import tempfile +import contextlib import warnings + from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, @@ -112,21 +114,40 @@ except ImportError: else: _have_ssl = True +__all__ = [ + # Classes + 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', + 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', + 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', + 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', + 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', + 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'UnknownHandler', 'HTTPErrorProcessor', + # Functions + 'urlopen', 'install_opener', 'build_opener', + 'pathname2url', 'url2pathname', 'getproxies', + # Legacy interface + 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', +] + # used in User-Agent header sent __version__ = sys.version[:3] _opener = None def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - *, cafile=None, capath=None): + *, cafile=None, capath=None, cadefault=False): global _opener - if cafile or capath: + if cafile or capath or cadefault: if not _have_ssl: raise ValueError('SSL support not available') context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.options |= ssl.OP_NO_SSLv2 - if cafile or capath: + if cafile or capath or cadefault: context.verify_mode = ssl.CERT_REQUIRED - context.load_verify_locations(cafile, capath) + if cafile or capath: + context.load_verify_locations(cafile, capath) + else: + context.set_default_verify_paths() check_hostname = True else: check_hostname = False @@ -142,17 +163,78 @@ def install_opener(opener): global _opener _opener = opener -# TODO(jhylton): Make this work with the same global opener. -_urlopener = None +_url_tempfiles = [] def urlretrieve(url, filename=None, reporthook=None, data=None): - global _urlopener - if not _urlopener: - _urlopener = FancyURLopener() - return _urlopener.retrieve(url, filename, reporthook, data) + """ + Retrieve a URL into a temporary location on disk. + + Requires a URL argument. If a filename is passed, it is used as + the temporary file location. The reporthook argument should be + a callable that accepts a block number, a read size, and the + total file size of the URL target. The data argument should be + valid URL encoded data. + + If a filename is passed and the URL points to a local resource, + the result is a copy from local file to new file. + + Returns a tuple containing the path to the newly created + data file as well as the resulting HTTPMessage object. + """ + url_type, path = splittype(url) + + with contextlib.closing(urlopen(url, data)) as fp: + headers = fp.info() + + # Just return the local path and the "headers" for file:// + # URLs. No sense in performing a copy unless requested. + if url_type == "file" and not filename: + return os.path.normpath(path), headers + + # Handle temporary file setup. + if filename: + tfp = open(filename, 'wb') + else: + tfp = tempfile.NamedTemporaryFile(delete=False) + filename = tfp.name + _url_tempfiles.append(filename) + + with tfp: + result = filename, headers + bs = 1024*8 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + + if reporthook: + reporthook(blocknum, bs, size) + + while True: + block = fp.read(bs) + if not block: + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, bs, size) + + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: got only %i out of %i bytes" + % (read, size), result) + + return result def urlcleanup(): - if _urlopener: - _urlopener.cleanup() + for temp_file in _url_tempfiles: + try: + os.unlink(temp_file) + except EnvironmentError: + pass + + del _url_tempfiles[:] global _opener if _opener: _opener = None @@ -178,7 +260,8 @@ def request_host(request): class Request: def __init__(self, url, data=None, headers={}, - origin_req_host=None, unverifiable=False): + origin_req_host=None, unverifiable=False, + method=None): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.full_url = unwrap(url) self.full_url, self.fragment = splittag(self.full_url) @@ -192,6 +275,7 @@ class Request: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable + self.method = method self._parse() def _parse(self): @@ -203,41 +287,60 @@ class Request: self.host = unquote(self.host) def get_method(self): - if self.data is not None: + """Return a string indicating the HTTP request method.""" + if self.method is not None: + return self.method + elif self.data is not None: return "POST" else: return "GET" + def get_full_url(self): + if self.fragment: + return '%s#%s' % (self.full_url, self.fragment) + else: + return self.full_url + # Begin deprecated methods def add_data(self, data): + msg = "Request.add_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) self.data = data def has_data(self): + msg = "Request.has_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.data is not None def get_data(self): + msg = "Request.get_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.data - def get_full_url(self): - if self.fragment: - return '%s#%s' % (self.full_url, self.fragment) - else: - return self.full_url - def get_type(self): + msg = "Request.get_type method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.type def get_host(self): + msg = "Request.get_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.host def get_selector(self): + msg = "Request.get_selector method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.selector def is_unverifiable(self): + msg = "Request.is_unverifiable method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.unverifiable def get_origin_req_host(self): + msg = "Request.get_origin_req_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.origin_req_host # End deprecated methods @@ -682,8 +785,8 @@ class ProxyHandler(BaseHandler): self.proxies = proxies for type, url in proxies.items(): setattr(self, '%s_open' % type, - lambda r, proxy=url, type=type, meth=self.proxy_open: \ - meth(r, proxy, type)) + lambda r, proxy=url, type=type, meth=self.proxy_open: + meth(r, proxy, type)) def proxy_open(self, req, proxy, type): orig_type = req.type @@ -825,17 +928,23 @@ class AbstractBasicAuthHandler: self.retried += 1 if authreq: - mo = AbstractBasicAuthHandler.rx.search(authreq) - if mo: - scheme, quote, realm = mo.groups() - if quote not in ["'", '"']: - warnings.warn("Basic Auth Realm was unquoted", - UserWarning, 2) - if scheme.lower() == 'basic': - response = self.retry_http_basic_auth(host, req, realm) - if response and response.code != 401: - self.retried = 0 - return response + scheme = authreq.split()[0] + if scheme.lower() != 'basic': + raise ValueError("AbstractBasicAuthHandler does not" + " support the following scheme: '%s'" % + scheme) + else: + mo = AbstractBasicAuthHandler.rx.search(authreq) + if mo: + scheme, quote, realm = mo.groups() + if quote not in ['"',"'"]: + warnings.warn("Basic Auth Realm was unquoted", + UserWarning, 2) + if scheme.lower() == 'basic': + response = self.retry_http_basic_auth(host, req, realm) + if response and response.code != 401: + self.retried = 0 + return response def retry_http_basic_auth(self, host, req, realm): user, pw = self.passwd.find_user_password(realm, host) @@ -878,9 +987,9 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): return response -def randombytes(n): - """Return n random bytes.""" - return os.urandom(n) +# Return n random bytes. +_randombytes = os.urandom + class AbstractDigestAuthHandler: # Digest authentication is specified in RFC 2617. @@ -921,6 +1030,9 @@ class AbstractDigestAuthHandler: scheme = authreq.split()[0] if scheme.lower() == 'digest': return self.retry_http_digest_auth(req, authreq) + elif scheme.lower() != 'basic': + raise ValueError("AbstractDigestAuthHandler does not support" + " the following scheme: '%s'" % scheme) def retry_http_digest_auth(self, req, auth): token, challenge = auth.split(' ', 1) @@ -941,7 +1053,7 @@ class AbstractDigestAuthHandler: # authentication, and to provide some message integrity protection. # This isn't a fabulous effort, but it's probably Good Enough. s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) - b = s.encode("ascii") + randombytes(8) + b = s.encode("ascii") + _randombytes(8) dig = hashlib.sha1(b).hexdigest() return dig[:16] @@ -1066,7 +1178,7 @@ class AbstractHTTPHandler(BaseHandler): if request.data is not None: # POST data = request.data if isinstance(data, str): - msg = "POST data should be bytes or an iterable of bytes. "\ + msg = "POST data should be bytes or an iterable of bytes. " \ "It cannot be of type str." raise TypeError(msg) if not request.has_header('Content-type'): @@ -1162,7 +1274,6 @@ class HTTPHandler(AbstractHTTPHandler): http_request = AbstractHTTPHandler.do_request_ if hasattr(http.client, 'HTTPSConnection'): - import ssl class HTTPSHandler(AbstractHTTPHandler): @@ -1177,6 +1288,8 @@ if hasattr(http.client, 'HTTPSConnection'): https_request = AbstractHTTPHandler.do_request_ + __all__.append('HTTPSHandler') + class HTTPCookieProcessor(BaseHandler): def __init__(self, cookiejar=None): import http.cookiejar @@ -1463,6 +1576,9 @@ class URLopener: # Constructor def __init__(self, proxies=None, **x509): + msg = "%(class)s style of invoking requests is deprecated. " \ + "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} + warnings.warn(msg, DeprecationWarning, stacklevel=3) if proxies is None: proxies = getproxies() assert hasattr(proxies, 'keys'), "proxies must be a mapping" @@ -1542,6 +1658,8 @@ class URLopener: return getattr(self, name)(url) else: return getattr(self, name)(url, data) + except HTTPError: + raise except socket.error as msg: raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) @@ -1760,8 +1878,8 @@ class URLopener: def open_local_file(self, url): """Use local file.""" - import mimetypes, email.utils - from io import StringIO + import email.utils + import mimetypes host, file = splithost(url) localname = url2pathname(file) try: @@ -1877,7 +1995,7 @@ class URLopener: msg.append('Content-type: %s' % type) if encoding == 'base64': # XXX is this encoding/decoding ok? - data = base64.decodebytes(data.encode('ascii')).decode('latin1') + data = base64.decodebytes(data.encode('ascii')).decode('latin-1') else: data = unquote(data) msg.append('Content-Length: %d' % len(data)) @@ -1970,7 +2088,6 @@ class FancyURLopener(URLopener): URLopener.http_error_default(self, url, fp, errcode, errmsg, headers) stuff = headers['www-authenticate'] - import re match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) if not match: URLopener.http_error_default(self, url, fp, @@ -1996,7 +2113,6 @@ class FancyURLopener(URLopener): URLopener.http_error_default(self, url, fp, errcode, errmsg, headers) stuff = headers['proxy-authenticate'] - import re match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) if not match: URLopener.http_error_default(self, url, fp, @@ -2282,8 +2398,6 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] } """ - import re - import socket from fnmatch import fnmatch hostonly, port = splitport(host) @@ -2386,7 +2500,6 @@ elif os.name == 'nt': for p in proxyServer.split(';'): protocol, address = p.split('=', 1) # See if address has a type:// prefix - import re if not re.match('^([^/:]+)://', address): address = '%s://%s' % (protocol, address) proxies[protocol] = address @@ -2418,7 +2531,6 @@ elif os.name == 'nt': def proxy_bypass_registry(host): try: import winreg - import re except ImportError: # Std modules, so should be around - but you never know! return 0 |