diff options
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/error.py | 14 | ||||
-rw-r--r-- | Lib/urllib/request.py | 107 |
2 files changed, 93 insertions, 28 deletions
diff --git a/Lib/urllib/error.py b/Lib/urllib/error.py index b712ebb..45b7169 100644 --- a/Lib/urllib/error.py +++ b/Lib/urllib/error.py @@ -1,6 +1,6 @@ """Exception classes raised by urllib. -The base exception class is URLError, which inherits from IOError. It +The base exception class is URLError, which inherits from OSError. It doesn't define any behavior of its own, but is the base class for all exceptions defined in this package. @@ -17,11 +17,11 @@ __all__ = ['URLError', 'HTTPError', 'ContentTooShortError'] # do these error classes make sense? -# make sure all of the IOError stuff is overridden. we just want to be +# make sure all of the OSError stuff is overridden. we just want to be # subtypes. -class URLError(IOError): - # URLError is a sub-type of IOError, but it doesn't share any of +class URLError(OSError): + # URLError is a sub-type of OSError, but it doesn't share any of # the implementation. need to override __init__ and __str__. # It sets self.args for compatibility with other EnvironmentError # subclasses, but args doesn't have the typical format with errno in @@ -61,9 +61,13 @@ class HTTPError(URLError, urllib.response.addinfourl): def reason(self): return self.msg - def info(self): + @property + def headers(self): return self.hdrs + @headers.setter + def headers(self, headers): + self.hdrs = headers # exception raised when downloaded size does not match content-length class ContentTooShortError(URLError): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 5ddec5f..8035f7c 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -18,7 +18,7 @@ urlopen(url, data=None) -- Basic usage is the same as original urllib. pass the url and optionally data to post to an HTTP URL, and get a file-like object back. One difference is that you can also pass a Request instance instead of URL. Raises a URLError (subclass of -IOError); for HTTP errors, raises an HTTPError, which can also be +OSError); for HTTP errors, raises an HTTPError, which can also be treated as a valid response. build_opener -- Function that creates a new OpenerDirector instance. @@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, splittype, splithost, splitport, splituser, splitpasswd, - splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse) + splitattr, splitquery, splitvalue, splittag, to_bytes, + unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook # check for SSL @@ -121,7 +122,7 @@ __all__ = [ 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', - 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', + 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', 'UnknownHandler', 'HTTPErrorProcessor', # Functions 'urlopen', 'install_opener', 'build_opener', @@ -231,7 +232,7 @@ def urlcleanup(): for temp_file in _url_tempfiles: try: os.unlink(temp_file) - except EnvironmentError: + except OSError: pass del _url_tempfiles[:] @@ -265,12 +266,13 @@ class Request: # unwrap('<URL:type://host/path>') --> 'type://host/path' self.full_url = unwrap(url) self.full_url, self.fragment = splittag(self.full_url) - self.data = data self.headers = {} + self.unredirected_hdrs = {} + self._data = None + self.data = data self._tunnel_host = None for key, value in headers.items(): self.add_header(key, value) - self.unredirected_hdrs = {} if origin_req_host is None: origin_req_host = request_host(self) self.origin_req_host = origin_req_host @@ -278,6 +280,24 @@ class Request: self.method = method self._parse() + @property + def data(self): + return self._data + + @data.setter + def data(self, data): + if data != self._data: + self._data = data + # issue 16464 + # if we change data we need to remove content-length header + # (cause it's most probably calculated for previous value) + if self.has_header("Content-length"): + self.remove_header("Content-length") + + @data.deleter + def data(self): + self._data = None + def _parse(self): self.type, rest = splittype(self.full_url) if self.type is None: @@ -373,6 +393,10 @@ class Request: header_name, self.unredirected_hdrs.get(header_name, default)) + def remove_header(self, header_name): + self.headers.pop(header_name, None) + self.unredirected_hdrs.pop(header_name, None) + def header_items(self): hdrs = self.unredirected_hdrs.copy() hdrs.update(self.headers) @@ -535,7 +559,8 @@ def build_opener(*handlers): opener = OpenerDirector() default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, HTTPRedirectHandler, - FTPHandler, FileHandler, HTTPErrorProcessor] + FTPHandler, FileHandler, HTTPErrorProcessor, + DataHandler] if hasattr(http.client, "HTTPSConnection"): default_classes.append(HTTPSHandler) skip = set() @@ -1250,11 +1275,17 @@ class AbstractHTTPHandler(BaseHandler): try: h.request(req.get_method(), req.selector, req.data, headers) - except socket.error as err: # timeout error + except OSError as err: # timeout error h.close() raise URLError(err) else: r = h.getresponse() + # If the server does not send us a 'Connection: close' header, + # HTTPConnection assumes the socket should be left open. Manually + # mark the socket to be closed when this response object goes away. + if h.sock: + h.sock.close() + h.sock = None r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse @@ -1449,7 +1480,7 @@ class FTPHandler(BaseHandler): try: host = socket.gethostbyname(host) - except socket.error as msg: + except OSError as msg: raise URLError(msg) path, attrs = splitattr(req.selector) dirs = path.split('/') @@ -1535,6 +1566,36 @@ class CacheFTPHandler(FTPHandler): self.cache.clear() self.timeout.clear() +class DataHandler(BaseHandler): + def data_open(self, req): + # data URLs as specified in RFC 2397. + # + # ignores POSTed data + # + # syntax: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + url = req.full_url + + scheme, data = url.split(":",1) + mediatype, data = data.split(",",1) + + # even base64 encoded data URLs might be quoted so unquote in any case: + data = unquote_to_bytes(data) + if mediatype.endswith(";base64"): + data = base64.decodebytes(data) + mediatype = mediatype[:-7] + + if not mediatype: + mediatype = "text/plain;charset=US-ASCII" + + headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" % + (mediatype, len(data))) + + return addinfourl(io.BytesIO(data), headers, url) + # Code move from the old urllib module @@ -1658,20 +1719,20 @@ class URLopener: return getattr(self, name)(url) else: return getattr(self, name)(url, data) - except HTTPError: + except (HTTPError, URLError): raise - except socket.error as msg: - raise IOError('socket error', msg).with_traceback(sys.exc_info()[2]) + except OSError as msg: + raise OSError('socket error', msg).with_traceback(sys.exc_info()[2]) def open_unknown(self, fullurl, data=None): """Overridable interface to open unknown URL type.""" type, url = splittype(fullurl) - raise IOError('url error', 'unknown url type', type) + raise OSError('url error', 'unknown url type', type) def open_unknown_proxy(self, proxy, fullurl, data=None): """Overridable interface to open unknown URL type.""" type, url = splittype(fullurl) - raise IOError('url error', 'invalid proxy for %s' % type, proxy) + raise OSError('url error', 'invalid proxy for %s' % type, proxy) # External interface def retrieve(self, url, filename=None, reporthook=None, data=None): @@ -1687,7 +1748,7 @@ class URLopener: hdrs = fp.info() fp.close() return url2pathname(splithost(url1)[1]), hdrs - except IOError as msg: + except OSError as msg: pass fp = self.open(url, data) try: @@ -1780,7 +1841,7 @@ class URLopener: if proxy_bypass(realhost): host = realhost - if not host: raise IOError('http error', 'no host given') + if not host: raise OSError('http error', 'no host given') if proxy_passwd: proxy_passwd = unquote(proxy_passwd) @@ -1853,7 +1914,7 @@ class URLopener: return self.http_error_default(url, fp, errcode, errmsg, headers) def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise IOError.""" + """Default error handler: close the connection and raise OSError.""" fp.close() raise HTTPError(url, errcode, errmsg, headers, None) @@ -1980,7 +2041,7 @@ class URLopener: try: [type, data] = url.split(',', 1) except ValueError: - raise IOError('data error', 'bad data URL') + raise OSError('data error', 'bad data URL') if not type: type = 'text/plain;charset=US-ASCII' semi = type.rfind(';') @@ -2426,7 +2487,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): try: hostIP = socket.gethostbyname(hostonly) hostIP = ip2num(hostIP) - except socket.error: + except OSError: continue base = ip2num(m.group(1)) @@ -2512,7 +2573,7 @@ elif os.name == 'nt': proxies['https'] = 'https://%s' % proxyServer proxies['ftp'] = 'ftp://%s' % proxyServer internetSettings.Close() - except (WindowsError, ValueError, TypeError): + except (OSError, ValueError, TypeError): # Either registry key not found etc, or the value in an # unexpected format. # proxies already set up to be empty so nothing to do @@ -2542,7 +2603,7 @@ elif os.name == 'nt': proxyOverride = str(winreg.QueryValueEx(internetSettings, 'ProxyOverride')[0]) # ^^^^ Returned as Unicode but problems if not converted to ASCII - except WindowsError: + except OSError: return 0 if not proxyEnable or not proxyOverride: return 0 @@ -2553,13 +2614,13 @@ elif os.name == 'nt': addr = socket.gethostbyname(rawHost) if addr != rawHost: host.append(addr) - except socket.error: + except OSError: pass try: fqdn = socket.getfqdn(rawHost) if fqdn != rawHost: host.append(fqdn) - except socket.error: + except OSError: pass # make a check value list from the registry entry: replace the # '<local>' string by the localhost entry and the corresponding |