diff options
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/error.py | 3 | ||||
-rw-r--r-- | Lib/urllib/request.py | 213 | ||||
-rw-r--r-- | Lib/urllib/response.py | 7 |
3 files changed, 169 insertions, 54 deletions
diff --git a/Lib/urllib/error.py b/Lib/urllib/error.py index 40add41..c1dfc40 100644 --- a/Lib/urllib/error.py +++ b/Lib/urllib/error.py @@ -13,6 +13,9 @@ response. import urllib.response +__all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] + + # do these error classes make sense? # make sure all of the IOError stuff is overridden. we just want to be # subtypes. diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 796b700..3896aa0 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -89,14 +89,16 @@ import http.client import io import os import posixpath -import random import re import socket import sys import time import collections +import tempfile +import contextlib import warnings + from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, @@ -112,21 +114,40 @@ except ImportError: else: _have_ssl = True +__all__ = [ + # Classes + 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', + 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', + 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', + 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', + 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', + 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'UnknownHandler', 'HTTPErrorProcessor', + # Functions + 'urlopen', 'install_opener', 'build_opener', + 'pathname2url', 'url2pathname', 'getproxies', + # Legacy interface + 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', +] + # used in User-Agent header sent __version__ = sys.version[:3] _opener = None def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - *, cafile=None, capath=None): + *, cafile=None, capath=None, cadefault=False): global _opener - if cafile or capath: + if cafile or capath or cadefault: if not _have_ssl: raise ValueError('SSL support not available') context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.options |= ssl.OP_NO_SSLv2 - if cafile or capath: + if cafile or capath or cadefault: context.verify_mode = ssl.CERT_REQUIRED - context.load_verify_locations(cafile, capath) + if cafile or capath: + context.load_verify_locations(cafile, capath) + else: + context.set_default_verify_paths() check_hostname = True else: check_hostname = False @@ -142,17 +163,78 @@ def install_opener(opener): global _opener _opener = opener -# TODO(jhylton): Make this work with the same global opener. -_urlopener = None +_url_tempfiles = [] def urlretrieve(url, filename=None, reporthook=None, data=None): - global _urlopener - if not _urlopener: - _urlopener = FancyURLopener() - return _urlopener.retrieve(url, filename, reporthook, data) + """ + Retrieve a URL into a temporary location on disk. + + Requires a URL argument. If a filename is passed, it is used as + the temporary file location. The reporthook argument should be + a callable that accepts a block number, a read size, and the + total file size of the URL target. The data argument should be + valid URL encoded data. + + If a filename is passed and the URL points to a local resource, + the result is a copy from local file to new file. + + Returns a tuple containing the path to the newly created + data file as well as the resulting HTTPMessage object. + """ + url_type, path = splittype(url) + + with contextlib.closing(urlopen(url, data)) as fp: + headers = fp.info() + + # Just return the local path and the "headers" for file:// + # URLs. No sense in performing a copy unless requested. + if url_type == "file" and not filename: + return os.path.normpath(path), headers + + # Handle temporary file setup. + if filename: + tfp = open(filename, 'wb') + else: + tfp = tempfile.NamedTemporaryFile(delete=False) + filename = tfp.name + _url_tempfiles.append(filename) + + with tfp: + result = filename, headers + bs = 1024*8 + size = -1 + read = 0 + blocknum = 0 + if "content-length" in headers: + size = int(headers["Content-Length"]) + + if reporthook: + reporthook(blocknum, 0, size) + + while True: + block = fp.read(bs) + if not block: + break + read += len(block) + tfp.write(block) + blocknum += 1 + if reporthook: + reporthook(blocknum, len(block), size) + + if size >= 0 and read < size: + raise ContentTooShortError( + "retrieval incomplete: got only %i out of %i bytes" + % (read, size), result) + + return result def urlcleanup(): - if _urlopener: - _urlopener.cleanup() + for temp_file in _url_tempfiles: + try: + os.unlink(temp_file) + except EnvironmentError: + pass + + del _url_tempfiles[:] global _opener if _opener: _opener = None @@ -178,7 +260,8 @@ def request_host(request): class Request: def __init__(self, url, data=None, headers={}, - origin_req_host=None, unverifiable=False): + origin_req_host=None, unverifiable=False, + method=None): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.full_url = unwrap(to_bytes(url)) self.full_url = quote(self.full_url, safe="%/:=&?~#+!$,;'@()*[]|") @@ -193,6 +276,7 @@ class Request: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable + self.method = method self._parse() def _parse(self): @@ -204,41 +288,60 @@ class Request: self.host = unquote(self.host) def get_method(self): - if self.data is not None: + """Return a string indicating the HTTP request method.""" + if self.method is not None: + return self.method + elif self.data is not None: return "POST" else: return "GET" + def get_full_url(self): + if self.fragment: + return '%s#%s' % (self.full_url, self.fragment) + else: + return self.full_url + # Begin deprecated methods def add_data(self, data): + msg = "Request.add_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) self.data = data def has_data(self): + msg = "Request.has_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.data is not None def get_data(self): + msg = "Request.get_data method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.data - def get_full_url(self): - if self.fragment: - return '%s#%s' % (self.full_url, self.fragment) - else: - return self.full_url - def get_type(self): + msg = "Request.get_type method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.type def get_host(self): + msg = "Request.get_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.host def get_selector(self): + msg = "Request.get_selector method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.selector def is_unverifiable(self): + msg = "Request.is_unverifiable method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.unverifiable def get_origin_req_host(self): + msg = "Request.get_origin_req_host method is deprecated." + warnings.warn(msg, DeprecationWarning, stacklevel=1) return self.origin_req_host # End deprecated methods @@ -683,8 +786,8 @@ class ProxyHandler(BaseHandler): self.proxies = proxies for type, url in proxies.items(): setattr(self, '%s_open' % type, - lambda r, proxy=url, type=type, meth=self.proxy_open: \ - meth(r, proxy, type)) + lambda r, proxy=url, type=type, meth=self.proxy_open: + meth(r, proxy, type)) def proxy_open(self, req, proxy, type): orig_type = req.type @@ -826,17 +929,23 @@ class AbstractBasicAuthHandler: self.retried += 1 if authreq: - mo = AbstractBasicAuthHandler.rx.search(authreq) - if mo: - scheme, quote, realm = mo.groups() - if quote not in ["'", '"']: - warnings.warn("Basic Auth Realm was unquoted", - UserWarning, 2) - if scheme.lower() == 'basic': - response = self.retry_http_basic_auth(host, req, realm) - if response and response.code != 401: - self.retried = 0 - return response + scheme = authreq.split()[0] + if scheme.lower() != 'basic': + raise ValueError("AbstractBasicAuthHandler does not" + " support the following scheme: '%s'" % + scheme) + else: + mo = AbstractBasicAuthHandler.rx.search(authreq) + if mo: + scheme, quote, realm = mo.groups() + if quote not in ['"',"'"]: + warnings.warn("Basic Auth Realm was unquoted", + UserWarning, 2) + if scheme.lower() == 'basic': + response = self.retry_http_basic_auth(host, req, realm) + if response and response.code != 401: + self.retried = 0 + return response def retry_http_basic_auth(self, host, req, realm): user, pw = self.passwd.find_user_password(realm, host) @@ -879,9 +988,9 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): return response -def randombytes(n): - """Return n random bytes.""" - return os.urandom(n) +# Return n random bytes. +_randombytes = os.urandom + class AbstractDigestAuthHandler: # Digest authentication is specified in RFC 2617. @@ -922,6 +1031,9 @@ class AbstractDigestAuthHandler: scheme = authreq.split()[0] if scheme.lower() == 'digest': return self.retry_http_digest_auth(req, authreq) + elif scheme.lower() != 'basic': + raise ValueError("AbstractDigestAuthHandler does not support" + " the following scheme: '%s'" % scheme) def retry_http_digest_auth(self, req, auth): token, challenge = auth.split(' ', 1) @@ -942,7 +1054,7 @@ class AbstractDigestAuthHandler: # authentication, and to provide some message integrity protection. # This isn't a fabulous effort, but it's probably Good Enough. s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) - b = s.encode("ascii") + randombytes(8) + b = s.encode("ascii") + _randombytes(8) dig = hashlib.sha1(b).hexdigest() return dig[:16] @@ -1067,7 +1179,7 @@ class AbstractHTTPHandler(BaseHandler): if request.data is not None: # POST data = request.data if isinstance(data, str): - msg = "POST data should be bytes or an iterable of bytes. "\ + msg = "POST data should be bytes or an iterable of bytes. " \ "It cannot be of type str." raise TypeError(msg) if not request.has_header('Content-type'): @@ -1163,7 +1275,6 @@ class HTTPHandler(AbstractHTTPHandler): http_request = AbstractHTTPHandler.do_request_ if hasattr(http.client, 'HTTPSConnection'): - import ssl class HTTPSHandler(AbstractHTTPHandler): @@ -1178,6 +1289,8 @@ if hasattr(http.client, 'HTTPSConnection'): https_request = AbstractHTTPHandler.do_request_ + __all__.append('HTTPSHandler') + class HTTPCookieProcessor(BaseHandler): def __init__(self, cookiejar=None): import http.cookiejar @@ -1464,6 +1577,9 @@ class URLopener: # Constructor def __init__(self, proxies=None, **x509): + msg = "%(class)s style of invoking requests is deprecated. " \ + "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} + warnings.warn(msg, DeprecationWarning, stacklevel=3) if proxies is None: proxies = getproxies() assert hasattr(proxies, 'keys'), "proxies must be a mapping" @@ -1543,6 +1659,8 @@ class URLopener: return getattr(self, name)(url) else: return getattr(self, name)(url, data) + except HTTPError: + raise except socket.error as msg: raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) @@ -1663,7 +1781,6 @@ class URLopener: if proxy_bypass(realhost): host = realhost - #print "proxy via http:", host, selector if not host: raise IOError('http error', 'no host given') if proxy_passwd: @@ -1762,8 +1879,8 @@ class URLopener: def open_local_file(self, url): """Use local file.""" - import mimetypes, email.utils - from io import StringIO + import email.utils + import mimetypes host, file = splithost(url) localname = url2pathname(file) try: @@ -1797,7 +1914,6 @@ class URLopener: if not isinstance(url, str): raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented') import mimetypes - from io import StringIO host, path = splithost(url) if not host: raise URLError('ftp error', 'no host given') host, port = splitport(host) @@ -1880,7 +1996,7 @@ class URLopener: msg.append('Content-type: %s' % type) if encoding == 'base64': # XXX is this encoding/decoding ok? - data = base64.decodebytes(data.encode('ascii')).decode('latin1') + data = base64.decodebytes(data.encode('ascii')).decode('latin-1') else: data = unquote(data) msg.append('Content-Length: %d' % len(data)) @@ -1973,7 +2089,6 @@ class FancyURLopener(URLopener): URLopener.http_error_default(self, url, fp, errcode, errmsg, headers) stuff = headers['www-authenticate'] - import re match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) if not match: URLopener.http_error_default(self, url, fp, @@ -1999,7 +2114,6 @@ class FancyURLopener(URLopener): URLopener.http_error_default(self, url, fp, errcode, errmsg, headers) stuff = headers['proxy-authenticate'] - import re match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) if not match: URLopener.http_error_default(self, url, fp, @@ -2291,8 +2405,6 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] } """ - import re - import socket from fnmatch import fnmatch hostonly, port = splitport(host) @@ -2395,7 +2507,6 @@ elif os.name == 'nt': for p in proxyServer.split(';'): protocol, address = p.split('=', 1) # See if address has a type:// prefix - import re if not re.match('^([^/:]+)://', address): address = '%s://%s' % (protocol, address) proxies[protocol] = address @@ -2427,7 +2538,6 @@ elif os.name == 'nt': def proxy_bypass_registry(host): try: import winreg - import re except ImportError: # Std modules, so should be around - but you never know! return 0 @@ -2471,7 +2581,6 @@ elif os.name == 'nt': test = test.replace("*", r".*") # change glob sequence test = test.replace("?", r".") # change glob char for val in host: - # print "%s <--> %s" %( test, val ) if re.match(test, val, re.I): return 1 return 0 diff --git a/Lib/urllib/response.py b/Lib/urllib/response.py index ffaa5fa..1cf1d1a 100644 --- a/Lib/urllib/response.py +++ b/Lib/urllib/response.py @@ -37,12 +37,15 @@ class addbase(object): id(self), self.fp) def close(self): + if self.fp: + self.fp.close() + self.fp = None self.read = None self.readline = None self.readlines = None self.fileno = None - if self.fp: self.fp.close() - self.fp = None + self.__iter__ = None + self.__next__ = None def __enter__(self): if self.fp is None: |