diff options
Diffstat (limited to 'Lib/urllib/request.py')
| -rw-r--r-- | Lib/urllib/request.py | 227 |
1 files changed, 154 insertions, 73 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index e13381c..5325d62 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -94,6 +94,7 @@ import re import socket import sys import time +import collections from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( @@ -114,11 +115,27 @@ else: __version__ = sys.version[:3] _opener = None -def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): +def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, + *, cafile=None, capath=None): global _opener - if _opener is None: - _opener = build_opener() - return _opener.open(url, data, timeout) + if cafile or capath: + if not _have_ssl: + raise ValueError('SSL support not available') + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + context.options |= ssl.OP_NO_SSLv2 + if cafile or capath: + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(cafile, capath) + check_hostname = True + else: + check_hostname = False + https_handler = HTTPSHandler(context=context, check_hostname=check_hostname) + opener = build_opener(https_handler) + elif _opener is None: + _opener = opener = build_opener() + else: + opener = _opener + return opener.open(url, data, timeout) def install_opener(opener): global _opener @@ -163,7 +180,7 @@ class Request: origin_req_host=None, unverifiable=False): # unwrap('<URL:type://host/path>') --> 'type://host/path' self.full_url = unwrap(url) - self.full_url, fragment = splittag(self.full_url) + self.full_url, self.fragment = splittag(self.full_url) self.data = data self.headers = {} self._tunnel_host = None @@ -202,7 +219,10 @@ class Request: return self.data def get_full_url(self): - return self.full_url + if self.fragment: + return '%s#%s' % (self.full_url, self.fragment) + else: + return self.full_url def get_type(self): return self.type @@ -528,6 +548,17 @@ class HTTPRedirectHandler(BaseHandler): # fix a possible malformed URL urlparts = urlparse(newurl) + + # For security reasons we don't allow redirection to anything other + # than http, https or ftp. + + if not urlparts.scheme in ('http', 'https', 'ftp'): + raise HTTPError(newurl, code, + msg + + " - Redirection to url '%s' is not allowed" % + newurl, + headers, fp) + if not urlparts.path: urlparts = list(urlparts) urlparts[2] = "/" @@ -1031,13 +1062,24 @@ class AbstractHTTPHandler(BaseHandler): if request.data is not None: # POST data = request.data + if isinstance(data, str): + raise TypeError("POST data should be bytes" + " or an iterable of bytes. It cannot be str.") if not request.has_header('Content-type'): request.add_unredirected_header( 'Content-type', 'application/x-www-form-urlencoded') if not request.has_header('Content-length'): - request.add_unredirected_header( - 'Content-length', '%d' % len(data)) + try: + mv = memoryview(data) + except TypeError: + if isinstance(data, collections.Iterable): + raise ValueError("Content-Length should be specified " + "for iterable data of type %r %r" % (type(data), + data)) + else: + request.add_unredirected_header( + 'Content-length', '%d' % (len(mv) * mv.itemsize)) sel_host = host if request.has_proxy(): @@ -1052,7 +1094,7 @@ class AbstractHTTPHandler(BaseHandler): return request - def do_open(self, http_class, req): + def do_open(self, http_class, req, **http_conn_args): """Return an HTTPResponse object for the request, using http_class. http_class must implement the HTTPConnection API from http.client. @@ -1061,7 +1103,8 @@ class AbstractHTTPHandler(BaseHandler): if not host: raise URLError('no host given') - h = http_class(host, timeout=req.timeout) # will parse host:port + # will parse host:port + h = http_class(host, timeout=req.timeout, **http_conn_args) headers = dict(req.unredirected_hdrs) headers.update(dict((k, v) for k, v in req.headers.items() @@ -1087,7 +1130,7 @@ class AbstractHTTPHandler(BaseHandler): # Proxy-Authorization should not be sent to origin # server. del headers[proxy_auth_hdr] - h._set_tunnel(req._tunnel_host, headers=tunnel_headers) + h.set_tunnel(req._tunnel_host, headers=tunnel_headers) try: h.request(req.get_method(), req.selector, req.data, headers) @@ -1095,7 +1138,7 @@ class AbstractHTTPHandler(BaseHandler): except socket.error as err: raise URLError(err) - r.url = req.full_url + r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse # with .headers, because urllib clients expect the response to # have the reason in .msg. It would be good to mark this @@ -1113,10 +1156,18 @@ class HTTPHandler(AbstractHTTPHandler): http_request = AbstractHTTPHandler.do_request_ if hasattr(http.client, 'HTTPSConnection'): + import ssl + class HTTPSHandler(AbstractHTTPHandler): + def __init__(self, debuglevel=0, context=None, check_hostname=None): + AbstractHTTPHandler.__init__(self, debuglevel) + self._context = context + self._check_hostname = check_hostname + def https_open(self, req): - return self.do_open(http.client.HTTPSConnection, req) + return self.do_open(http.client.HTTPSConnection, req, + context=self._context, check_hostname=self._check_hostname) https_request = AbstractHTTPHandler.do_request_ @@ -1202,8 +1253,8 @@ class FileHandler(BaseHandler): url = req.selector if url[:2] == '//' and url[2:3] != '/' and (req.host and req.host != 'localhost'): - req.type = 'ftp' - return self.parent.open(req) + if not req.host is self.get_names(): + raise URLError("file:// scheme is supported only on localhost") else: return self.open_local_file(req) @@ -1364,9 +1415,7 @@ class CacheFTPHandler(FTPHandler): MAXFTPCACHE = 10 # Trim the ftp cache beyond this size # Helper for non-unix systems -if os.name == 'mac': - from macurl2path import url2pathname, pathname2url -elif os.name == 'nt': +if os.name == 'nt': from nturl2path import url2pathname, pathname2url else: def url2pathname(pathname): @@ -1505,7 +1554,7 @@ class URLopener: try: fp = self.open_local_file(url1) hdrs = fp.info() - del fp + fp.close() return url2pathname(splithost(url1)[1]), hdrs except IOError as msg: pass @@ -1549,8 +1598,6 @@ class URLopener: tfp.close() finally: fp.close() - del fp - del tfp # raise exception if actual size does not match content-length header if size >= 0 and read < size: @@ -1624,6 +1671,12 @@ class URLopener: headers["Authorization"] = "Basic %s" % auth if realhost: headers["Host"] = realhost + + # Add Connection:close as we don't support persistent connections yet. + # This helps in closing the socket and avoiding ResourceWarning + + headers["Connection"] = "close" + for header, value in self.addheaders: headers[header] = value @@ -1690,7 +1743,7 @@ class URLopener: if not isinstance(url, str): raise URLError('file error', 'proxy support for file protocol currently not implemented') if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': - return self.open_ftp(url) + raise ValueError("file:// scheme is supported only on localhost") else: return self.open_local_file(url) @@ -1864,8 +1917,24 @@ class FancyURLopener(URLopener): return void = fp.read() fp.close() + # In case the server sent a relative URL, join with original: newurl = urljoin(self.type + ":" + url, newurl) + + urlparts = urlparse(newurl) + + # For security reasons, we don't allow redirection to anything other + # than http, https and ftp. + + # We are using newer HTTPError with older redirect_internal method + # This older method will get deprecated in 3.3 + + if not urlparts.scheme in ('http', 'https', 'ftp'): + raise HTTPError(newurl, errcode, + errmsg + + " Redirection to url '%s' is not allowed." % newurl, + headers, fp) + return self.open(newurl) def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): @@ -2097,7 +2166,7 @@ class ftpwrapper: # Try to retrieve as a file try: cmd = 'RETR ' + file - conn = self.ftp.ntransfercmd(cmd) + conn, retrlen = self.ftp.ntransfercmd(cmd) except ftplib.error_perm as reason: if str(reason)[:3] != '550': raise URLError('ftp error', reason).with_traceback( @@ -2118,10 +2187,14 @@ class ftpwrapper: cmd = 'LIST ' + file else: cmd = 'LIST' - conn = self.ftp.ntransfercmd(cmd) + conn, retrlen = self.ftp.ntransfercmd(cmd) self.busy = 1 + + ftpobj = addclosehook(conn.makefile('rb'), self.endtransfer) + conn.close() # Pass back both a suitably decorated object and a retrieval length - return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1]) + return (ftpobj, retrlen) + def endtransfer(self): if not self.busy: return @@ -2175,68 +2248,76 @@ def proxy_bypass_environment(host): return 0 -if sys.platform == 'darwin': - from _scproxy import _get_proxy_settings, _get_proxies +# This code tests an OSX specific data structure but is testable on all +# platforms +def _proxy_bypass_macosx_sysconf(host, proxy_settings): + """ + Return True iff this host shouldn't be accessed using a proxy - def proxy_bypass_macosx_sysconf(host): - """ - Return True iff this host shouldn't be accessed using a proxy + This function uses the MacOSX framework SystemConfiguration + to fetch the proxy information. - This function uses the MacOSX framework SystemConfiguration - to fetch the proxy information. - """ - import re - import socket - from fnmatch import fnmatch - - hostonly, port = splitport(host) + proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: + { 'exclude_simple': bool, + 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] + } + """ + import re + import socket + from fnmatch import fnmatch - def ip2num(ipAddr): - parts = ipAddr.split('.') - parts = list(map(int, parts)) - if len(parts) != 4: - parts = (parts + [0, 0, 0, 0])[:4] - return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] + hostonly, port = splitport(host) - proxy_settings = _get_proxy_settings() + def ip2num(ipAddr): + parts = ipAddr.split('.') + parts = list(map(int, parts)) + if len(parts) != 4: + parts = (parts + [0, 0, 0, 0])[:4] + return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] - # Check for simple host names: - if '.' not in host: - if proxy_settings['exclude_simple']: - return True + # Check for simple host names: + if '.' not in host: + if proxy_settings['exclude_simple']: + return True - hostIP = None + hostIP = None - for value in proxy_settings.get('exceptions', ()): - # Items in the list are strings like these: *.local, 169.254/16 - if not value: continue + for value in proxy_settings.get('exceptions', ()): + # Items in the list are strings like these: *.local, 169.254/16 + if not value: continue - m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None: - if hostIP is None: - try: - hostIP = socket.gethostbyname(hostonly) - hostIP = ip2num(hostIP) - except socket.error: - continue + m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) + if m is not None: + if hostIP is None: + try: + hostIP = socket.gethostbyname(hostonly) + hostIP = ip2num(hostIP) + except socket.error: + continue + + base = ip2num(m.group(1)) + mask = m.group(2) + if mask is None: + mask = 8 * (m.group(1).count('.') + 1) + else: + mask = int(mask[1:]) + mask = 32 - mask - base = ip2num(m.group(1)) - mask = m.group(2) - if mask is None: - mask = 8 * (m.group(1).count('.') + 1) + if (hostIP >> mask) == (base >> mask): + return True - else: - mask = int(mask[1:]) - mask = 32 - mask + elif fnmatch(host, value): + return True - if (hostIP >> mask) == (base >> mask): - return True + return False - elif fnmatch(host, value): - return True - return False +if sys.platform == 'darwin': + from _scproxy import _get_proxy_settings, _get_proxies + def proxy_bypass_macosx_sysconf(host): + proxy_settings = _get_proxy_settings() + return _proxy_bypass_macosx_sysconf(host, proxy_settings) def getproxies_macosx_sysconf(): """Return a dictionary of scheme -> proxy server URL mappings. |
