diff options
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/error.py | 14 | ||||
-rw-r--r-- | Lib/urllib/parse.py | 9 | ||||
-rw-r--r-- | Lib/urllib/request.py | 180 |
3 files changed, 114 insertions, 89 deletions
diff --git a/Lib/urllib/error.py b/Lib/urllib/error.py index b712ebb..45b7169 100644 --- a/Lib/urllib/error.py +++ b/Lib/urllib/error.py @@ -1,6 +1,6 @@ """Exception classes raised by urllib. -The base exception class is URLError, which inherits from IOError. It +The base exception class is URLError, which inherits from OSError. It doesn't define any behavior of its own, but is the base class for all exceptions defined in this package. @@ -17,11 +17,11 @@ __all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] # do these error classes make sense? -# make sure all of the IOError stuff is overridden. we just want to be +# make sure all of the OSError stuff is overridden. we just want to be # subtypes. -class URLError(IOError): - # URLError is a sub-type of IOError, but it doesn't share any of +class URLError(OSError): + # URLError is a sub-type of OSError, but it doesn't share any of # the implementation. need to override __init__ and __str__. # It sets self.args for compatibility with other EnvironmentError # subclasses, but args doesn't have the typical format with errno in @@ -61,9 +61,13 @@ class HTTPError(URLError, urllib.response.addinfourl): def reason(self): return self.msg - def info(self): + @property + def headers(self): return self.hdrs + @headers.setter + def headers(self, headers): + self.hdrs = headers # exception raised when downloaded size does not match content-length class ContentTooShortError(URLError): diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 9a2df41..5328b9d 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -846,7 +846,6 @@ def splittype(url): """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" global _typeprog if _typeprog is None: - import re _typeprog = re.compile('^([^/:]+):') match = _typeprog.match(url) @@ -860,7 +859,6 @@ def splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: - import re _hostprog = re.compile('^//([^/?]*)(.*)$') match = _hostprog.match(url) @@ -877,7 +875,6 @@ def splituser(host): """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" global _userprog if _userprog is None: - import re _userprog = re.compile('^(.*)@(.*)$') match = _userprog.match(host) @@ -889,7 +886,6 @@ def splitpasswd(user): """splitpasswd('user:passwd') -> 'user', 'passwd'.""" global _passwdprog if _passwdprog is None: - import re _passwdprog = re.compile('^([^:]*):(.*)$',re.S) match = _passwdprog.match(user) @@ -902,7 +898,6 @@ def splitport(host): """splitport('host:port') --> 'host', 'port'.""" global _portprog if _portprog is None: - import re _portprog = re.compile('^(.*):([0-9]+)$') match = _portprog.match(host) @@ -917,7 +912,6 @@ def splitnport(host, defport=-1): Return None if ':' but not a valid number.""" global _nportprog if _nportprog is None: - import re _nportprog = re.compile('^(.*):(.*)$') match = _nportprog.match(host) @@ -936,7 +930,6 @@ def splitquery(url): """splitquery('/path?query') --> '/path', 'query'.""" global _queryprog if _queryprog is None: - import re _queryprog = re.compile('^(.*)\?([^?]*)$') match = _queryprog.match(url) @@ -948,7 +941,6 @@ def splittag(url): """splittag('/path#tag') --> '/path', 'tag'.""" global _tagprog if _tagprog is None: - import re _tagprog = re.compile('^(.*)#([^#]*)$') match = _tagprog.match(url) @@ -966,7 +958,6 @@ def splitvalue(attr): """splitvalue('attr=value') --> 'attr', 'value'.""" global _valueprog if _valueprog is None: - import re _valueprog = re.compile('^([^=]*)=(.*)$') match = _valueprog.match(attr) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index a7445d1..4765a94 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -18,7 +18,7 @@ urlopen(url, data=None) -- Basic usage is the same as original urllib. pass the url and optionally data to post to an HTTP URL, and get a file-like object back. One difference is that you can also pass a Request instance instead of URL. Raises a URLError (subclass of -IOError); for HTTP errors, raises an HTTPError, which can also be +OSError); for HTTP errors, raises an HTTPError, which can also be treated as a valid response. build_opener -- Function that creates a new OpenerDirector instance. @@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, splittype, splithost, splitport, splituser, splitpasswd, - splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) + splitattr, splitquery, splitvalue, splittag, to_bytes, + unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook # check for SSL @@ -121,7 +122,7 @@ __all__ = [ 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', - 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', 'UnknownHandler', 'HTTPErrorProcessor', # Functions 'urlopen', 'install_opener', 'build_opener', @@ -227,7 +228,7 @@ def urlcleanup(): for temp_file in _url_tempfiles: try: os.unlink(temp_file) - except EnvironmentError: + except OSError: pass del _url_tempfiles[:] @@ -258,24 +259,59 @@ class Request: def __init__(self, url, data=None, headers={}, origin_req_host=None, unverifiable=False, method=None): - # unwrap('<URL:type://host/path>') --> 'type://host/path' - self.full_url = unwrap(url) - self.full_url, self.fragment = splittag(self.full_url) - self.data = data + self.full_url = url self.headers = {} + self.unredirected_hdrs = {} + self._data = None + self.data = data self._tunnel_host = None for key, value in headers.items(): self.add_header(key, value) - self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host self.unverifiable = unverifiable self.method = method + + @property + def full_url(self): + if self.fragment: + return '{}#{}'.format(self._full_url, self.fragment) + return self._full_url + + @full_url.setter + def full_url(self, url): + # unwrap('<URL:type://host/path>') --> 'type://host/path' + self._full_url = unwrap(url) + self._full_url, self.fragment = splittag(self._full_url) self._parse() + @full_url.deleter + def full_url(self): + self._full_url = None + self.fragment = None + self.selector = '' + + @property + def data(self): + return self._data + + @data.setter + def data(self, data): + if data != self._data: + self._data = data + # issue 16464 + # if we change data we need to remove content-length header + # (cause it's most probably calculated for previous value) + if self.has_header("Content-length"): + self.remove_header("Content-length") + + @data.deleter + def data(self): + self.data = None + def _parse(self): - self.type, rest = splittype(self.full_url) + self.type, rest = splittype(self._full_url) if self.type is None: raise ValueError("unknown url type: %r" % self.full_url) self.host, self.selector = splithost(rest) @@ -292,54 +328,7 @@ class Request: return "GET" def get_full_url(self): - if self.fragment: - return '%s#%s' % (self.full_url, self.fragment) - else: - return self.full_url - - # Begin deprecated methods - - def add_data(self, data): - msg = "Request.add_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - self.data = data - - def has_data(self): - msg = "Request.has_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.data is not None - - def get_data(self): - msg = "Request.get_data method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.data - - def get_type(self): - msg = "Request.get_type method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.type - - def get_host(self): - msg = "Request.get_host method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.host - - def get_selector(self): - msg = "Request.get_selector method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.selector - - def is_unverifiable(self): - msg = "Request.is_unverifiable method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.unverifiable - - def get_origin_req_host(self): - msg = "Request.get_origin_req_host method is deprecated." - warnings.warn(msg, DeprecationWarning, stacklevel=1) - return self.origin_req_host - - # End deprecated methods + return self.full_url def set_proxy(self, host, type): if self.type == 'https' and not self._tunnel_host: @@ -369,6 +358,10 @@ class Request: header_name, self.unredirected_hdrs.get(header_name, default)) + def remove_header(self, header_name): + self.headers.pop(header_name, None) + self.unredirected_hdrs.pop(header_name, None) + def header_items(self): hdrs = self.unredirected_hdrs.copy() hdrs.update(self.headers) @@ -531,7 +524,8 @@ def build_opener(*handlers): opener = OpenerDirector() default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, - FTPHandler, FileHandler, HTTPErrorProcessor] + FTPHandler, FileHandler, HTTPErrorProcessor, + DataHandler] if hasattr(http.client, "HTTPSConnection"): default_classes.append(HTTPSHandler) skip = set() @@ -1246,11 +1240,17 @@ class AbstractHTTPHandler(BaseHandler): try: h.request(req.get_method(), req.selector, req.data, headers) - except socket.error as err: # timeout error + except OSError as err: # timeout error h.close() raise URLError(err) else: r = h.getresponse() + # If the server does not send us a 'Connection: close' header, + # HTTPConnection assumes the socket should be left open. Manually + # mark the socket to be closed when this response object goes away. + if h.sock: + h.sock.close() + h.sock = None r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse @@ -1445,7 +1445,7 @@ class FTPHandler(BaseHandler): try: host = socket.gethostbyname(host) - except socket.error as msg: + except OSError as msg: raise URLError(msg) path, attrs = splitattr(req.selector) dirs = path.split('/') @@ -1531,6 +1531,36 @@ class CacheFTPHandler(FTPHandler): self.cache.clear() self.timeout.clear() +class DataHandler(BaseHandler): + def data_open(self, req): + # data URLs as specified in RFC 2397. + # + # ignores POSTed data + # + # syntax: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + url = req.full_url + + scheme, data = url.split(":",1) + mediatype, data = data.split(",",1) + + # even base64 encoded data URLs might be quoted so unquote in any case: + data = unquote_to_bytes(data) + if mediatype.endswith(";base64"): + data = base64.decodebytes(data) + mediatype = mediatype[:-7] + + if not mediatype: + mediatype = "text/plain;charset=US-ASCII" + + headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" % + (mediatype, len(data))) + + return addinfourl(io.BytesIO(data), headers, url) + # Code move from the old urllib module @@ -1654,20 +1684,20 @@ class URLopener: return getattr(self, name)(url) else: return getattr(self, name)(url, data) - except HTTPError: + except (HTTPError, URLError): raise - except socket.error as msg: - raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) + except OSError as msg: + raise OSError('socket error', msg).with_traceback(sys.exc_info()[2]) def open_unknown(self, fullurl, data=None): """Overridable interface to open unknown URL type.""" type, url = splittype(fullurl) - raise IOError('url error', 'unknown url type', type) + raise OSError('url error', 'unknown url type', type) def open_unknown_proxy(self, proxy, fullurl, data=None): """Overridable interface to open unknown URL type.""" type, url = splittype(fullurl) - raise IOError('url error', 'invalid proxy for %s' % type, proxy) + raise OSError('url error', 'invalid proxy for %s' % type, proxy) # External interface def retrieve(self, url, filename=None, reporthook=None, data=None): @@ -1683,7 +1713,7 @@ class URLopener: hdrs = fp.info() fp.close() return url2pathname(splithost(url1)[1]), hdrs - except IOError as msg: + except OSError as msg: pass fp = self.open(url, data) try: @@ -1776,7 +1806,7 @@ class URLopener: if proxy_bypass(realhost): host = realhost - if not host: raise IOError('http error', 'no host given') + if not host: raise OSError('http error', 'no host given') if proxy_passwd: proxy_passwd = unquote(proxy_passwd) @@ -1849,7 +1879,7 @@ class URLopener: return self.http_error_default(url, fp, errcode, errmsg, headers) def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise IOError.""" + """Default error handler: close the connection and raise OSError.""" fp.close() raise HTTPError(url, errcode, errmsg, headers, None) @@ -1976,7 +2006,7 @@ class URLopener: try: [type, data] = url.split(',', 1) except ValueError: - raise IOError('data error', 'bad data URL') + raise OSError('data error', 'bad data URL') if not type: type = 'text/plain;charset=US-ASCII' semi = type.rfind(';') @@ -2425,7 +2455,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): try: hostIP = socket.gethostbyname(hostonly) hostIP = ip2num(hostIP) - except socket.error: + except OSError: continue base = ip2num(m.group(1)) @@ -2511,7 +2541,7 @@ elif os.name == 'nt': proxies['https'] = 'https://%s' % proxyServer proxies['ftp'] = 'ftp://%s' % proxyServer internetSettings.Close() - except (WindowsError, ValueError, TypeError): + except (OSError, ValueError, TypeError): # Either registry key not found etc, or the value in an # unexpected format. # proxies already set up to be empty so nothing to do @@ -2541,7 +2571,7 @@ elif os.name == 'nt': proxyOverride = str(winreg.QueryValueEx(internetSettings, 'ProxyOverride')[0]) # ^^^^ Returned as Unicode but problems if not converted to ASCII - except WindowsError: + except OSError: return 0 if not proxyEnable or not proxyOverride: return 0 @@ -2552,13 +2582,13 @@ elif os.name == 'nt': addr = socket.gethostbyname(rawHost) if addr != rawHost: host.append(addr) - except socket.error: + except OSError: pass try: fqdn = socket.getfqdn(rawHost) if fqdn != rawHost: host.append(fqdn) - except socket.error: + except OSError: pass # make a check value list from the registry entry: replace the # '<local>' string by the localhost entry and the corresponding |