summaryrefslogtreecommitdiffstats
path: root/Lib/urllib
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib')
-rw-r--r--Lib/urllib/error.py14
-rw-r--r--Lib/urllib/parse.py9
-rw-r--r--Lib/urllib/request.py180
3 files changed, 114 insertions, 89 deletions
diff --git a/Lib/urllib/error.py b/Lib/urllib/error.py
index b712ebb..45b7169 100644
--- a/Lib/urllib/error.py
+++ b/Lib/urllib/error.py
@@ -1,6 +1,6 @@
"""Exception classes raised by urllib.
-The base exception class is URLError, which inherits from IOError. It
+The base exception class is URLError, which inherits from OSError. It
doesn't define any behavior of its own, but is the base class for all
exceptions defined in this package.
@@ -17,11 +17,11 @@ __all__ = ['URLError', 'HTTPError', 'ContentTooShortError']
# do these error classes make sense?
-# make sure all of the IOError stuff is overridden. we just want to be
+# make sure all of the OSError stuff is overridden. we just want to be
# subtypes.
-class URLError(IOError):
- # URLError is a sub-type of IOError, but it doesn't share any of
+class URLError(OSError):
+ # URLError is a sub-type of OSError, but it doesn't share any of
# the implementation. need to override __init__ and __str__.
# It sets self.args for compatibility with other EnvironmentError
# subclasses, but args doesn't have the typical format with errno in
@@ -61,9 +61,13 @@ class HTTPError(URLError, urllib.response.addinfourl):
def reason(self):
return self.msg
- def info(self):
+ @property
+ def headers(self):
return self.hdrs
+ @headers.setter
+ def headers(self, headers):
+ self.hdrs = headers
# exception raised when downloaded size does not match content-length
class ContentTooShortError(URLError):
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index abe5d0d..b49b7a1 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -846,7 +846,6 @@ def splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
if _typeprog is None:
- import re
_typeprog = re.compile('^([^/:]+):')
match = _typeprog.match(url)
@@ -860,7 +859,6 @@ def splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
- import re
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
@@ -877,7 +875,6 @@ def splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
global _userprog
if _userprog is None:
- import re
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
@@ -889,7 +886,6 @@ def splitpasswd(user):
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
global _passwdprog
if _passwdprog is None:
- import re
_passwdprog = re.compile('^([^:]*):(.*)$',re.S)
match = _passwdprog.match(user)
@@ -902,7 +898,6 @@ def splitport(host):
"""splitport('host:port') --> 'host', 'port'."""
global _portprog
if _portprog is None:
- import re
_portprog = re.compile('^(.*):([0-9]+)$')
match = _portprog.match(host)
@@ -917,7 +912,6 @@ def splitnport(host, defport=-1):
Return None if ':' but not a valid number."""
global _nportprog
if _nportprog is None:
- import re
_nportprog = re.compile('^(.*):(.*)$')
match = _nportprog.match(host)
@@ -936,7 +930,6 @@ def splitquery(url):
"""splitquery('/path?query') --> '/path', 'query'."""
global _queryprog
if _queryprog is None:
- import re
_queryprog = re.compile('^(.*)\?([^?]*)$')
match = _queryprog.match(url)
@@ -948,7 +941,6 @@ def splittag(url):
"""splittag('/path#tag') --> '/path', 'tag'."""
global _tagprog
if _tagprog is None:
- import re
_tagprog = re.compile('^(.*)#([^#]*)$')
match = _tagprog.match(url)
@@ -966,7 +958,6 @@ def splitvalue(attr):
"""splitvalue('attr=value') --> 'attr', 'value'."""
global _valueprog
if _valueprog is None:
- import re
_valueprog = re.compile('^([^=]*)=(.*)$')
match = _valueprog.match(attr)
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 1279322..1de6aae 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -18,7 +18,7 @@ urlopen(url, data=None) -- Basic usage is the same as original
urllib. pass the url and optionally data to post to an HTTP URL, and
get a file-like object back. One difference is that you can also pass
a Request instance instead of URL. Raises a URLError (subclass of
-IOError); for HTTP errors, raises an HTTPError, which can also be
+OSError); for HTTP errors, raises an HTTPError, which can also be
treated as a valid response.
build_opener -- Function that creates a new OpenerDirector instance.
@@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
splittype, splithost, splitport, splituser, splitpasswd,
- splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
+ splitattr, splitquery, splitvalue, splittag, to_bytes,
+ unquote_to_bytes, urlunparse)
from urllib.response import addinfourl, addclosehook
# check for SSL
@@ -121,7 +122,7 @@ __all__ = [
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
- 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
+ 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
'UnknownHandler', 'HTTPErrorProcessor',
# Functions
'urlopen', 'install_opener', 'build_opener',
@@ -227,7 +228,7 @@ def urlcleanup():
for temp_file in _url_tempfiles:
try:
os.unlink(temp_file)
- except EnvironmentError:
+ except OSError:
pass
del _url_tempfiles[:]
@@ -258,24 +259,59 @@ class Request:
def __init__(self, url, data=None, headers={},
origin_req_host=None, unverifiable=False,
method=None):
- # unwrap('<URL:type://host/path>') --> 'type://host/path'
- self.full_url = unwrap(url)
- self.full_url, self.fragment = splittag(self.full_url)
- self.data = data
+ self.full_url = url
self.headers = {}
+ self.unredirected_hdrs = {}
+ self._data = None
+ self.data = data
self._tunnel_host = None
for key, value in headers.items():
self.add_header(key, value)
- self.unredirected_hdrs = {}
if origin_req_host is None:
origin_req_host = request_host(self)
self.origin_req_host = origin_req_host
self.unverifiable = unverifiable
self.method = method
+
+ @property
+ def full_url(self):
+ if self.fragment:
+ return '{}#{}'.format(self._full_url, self.fragment)
+ return self._full_url
+
+ @full_url.setter
+ def full_url(self, url):
+ # unwrap('<URL:type://host/path>') --> 'type://host/path'
+ self._full_url = unwrap(url)
+ self._full_url, self.fragment = splittag(self._full_url)
self._parse()
+ @full_url.deleter
+ def full_url(self):
+ self._full_url = None
+ self.fragment = None
+ self.selector = ''
+
+ @property
+ def data(self):
+ return self._data
+
+ @data.setter
+ def data(self, data):
+ if data != self._data:
+ self._data = data
+ # issue 16464
+ # if we change data we need to remove content-length header
+ # (cause it's most probably calculated for previous value)
+ if self.has_header("Content-length"):
+ self.remove_header("Content-length")
+
+ @data.deleter
+ def data(self):
+ self.data = None
+
def _parse(self):
- self.type, rest = splittype(self.full_url)
+ self.type, rest = splittype(self._full_url)
if self.type is None:
raise ValueError("unknown url type: %r" % self.full_url)
self.host, self.selector = splithost(rest)
@@ -292,54 +328,7 @@ class Request:
return "GET"
def get_full_url(self):
- if self.fragment:
- return '%s#%s' % (self.full_url, self.fragment)
- else:
- return self.full_url
-
- # Begin deprecated methods
-
- def add_data(self, data):
- msg = "Request.add_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- self.data = data
-
- def has_data(self):
- msg = "Request.has_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.data is not None
-
- def get_data(self):
- msg = "Request.get_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.data
-
- def get_type(self):
- msg = "Request.get_type method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.type
-
- def get_host(self):
- msg = "Request.get_host method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.host
-
- def get_selector(self):
- msg = "Request.get_selector method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.selector
-
- def is_unverifiable(self):
- msg = "Request.is_unverifiable method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.unverifiable
-
- def get_origin_req_host(self):
- msg = "Request.get_origin_req_host method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.origin_req_host
-
- # End deprecated methods
+ return self.full_url
def set_proxy(self, host, type):
if self.type == 'https' and not self._tunnel_host:
@@ -369,6 +358,10 @@ class Request:
header_name,
self.unredirected_hdrs.get(header_name, default))
+ def remove_header(self, header_name):
+ self.headers.pop(header_name, None)
+ self.unredirected_hdrs.pop(header_name, None)
+
def header_items(self):
hdrs = self.unredirected_hdrs.copy()
hdrs.update(self.headers)
@@ -531,7 +524,8 @@ def build_opener(*handlers):
opener = OpenerDirector()
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
HTTPDefaultErrorHandler, HTTPRedirectHandler,
- FTPHandler, FileHandler, HTTPErrorProcessor]
+ FTPHandler, FileHandler, HTTPErrorProcessor,
+ DataHandler]
if hasattr(http.client, "HTTPSConnection"):
default_classes.append(HTTPSHandler)
skip = set()
@@ -1246,11 +1240,17 @@ class AbstractHTTPHandler(BaseHandler):
try:
h.request(req.get_method(), req.selector, req.data, headers)
- except socket.error as err: # timeout error
+ except OSError as err: # timeout error
h.close()
raise URLError(err)
else:
r = h.getresponse()
+ # If the server does not send us a 'Connection: close' header,
+ # HTTPConnection assumes the socket should be left open. Manually
+ # mark the socket to be closed when this response object goes away.
+ if h.sock:
+ h.sock.close()
+ h.sock = None
r.url = req.get_full_url()
# This line replaces the .msg attribute of the HTTPResponse
@@ -1445,7 +1445,7 @@ class FTPHandler(BaseHandler):
try:
host = socket.gethostbyname(host)
- except socket.error as msg:
+ except OSError as msg:
raise URLError(msg)
path, attrs = splitattr(req.selector)
dirs = path.split('/')
@@ -1531,6 +1531,36 @@ class CacheFTPHandler(FTPHandler):
self.cache.clear()
self.timeout.clear()
+class DataHandler(BaseHandler):
+ def data_open(self, req):
+ # data URLs as specified in RFC 2397.
+ #
+ # ignores POSTed data
+ #
+ # syntax:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ url = req.full_url
+
+ scheme, data = url.split(":",1)
+ mediatype, data = data.split(",",1)
+
+ # even base64 encoded data URLs might be quoted so unquote in any case:
+ data = unquote_to_bytes(data)
+ if mediatype.endswith(";base64"):
+ data = base64.decodebytes(data)
+ mediatype = mediatype[:-7]
+
+ if not mediatype:
+ mediatype = "text/plain;charset=US-ASCII"
+
+ headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
+ (mediatype, len(data)))
+
+ return addinfourl(io.BytesIO(data), headers, url)
+
# Code move from the old urllib module
@@ -1654,20 +1684,20 @@ class URLopener:
return getattr(self, name)(url)
else:
return getattr(self, name)(url, data)
- except HTTPError:
+ except (HTTPError, URLError):
raise
- except socket.error as msg:
- raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
+ except OSError as msg:
+ raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
def open_unknown(self, fullurl, data=None):
"""Overridable interface to open unknown URL type."""
type, url = splittype(fullurl)
- raise IOError('url error', 'unknown url type', type)
+ raise OSError('url error', 'unknown url type', type)
def open_unknown_proxy(self, proxy, fullurl, data=None):
"""Overridable interface to open unknown URL type."""
type, url = splittype(fullurl)
- raise IOError('url error', 'invalid proxy for %s' % type, proxy)
+ raise OSError('url error', 'invalid proxy for %s' % type, proxy)
# External interface
def retrieve(self, url, filename=None, reporthook=None, data=None):
@@ -1683,7 +1713,7 @@ class URLopener:
hdrs = fp.info()
fp.close()
return url2pathname(splithost(url1)[1]), hdrs
- except IOError as msg:
+ except OSError as msg:
pass
fp = self.open(url, data)
try:
@@ -1776,7 +1806,7 @@ class URLopener:
if proxy_bypass(realhost):
host = realhost
- if not host: raise IOError('http error', 'no host given')
+ if not host: raise OSError('http error', 'no host given')
if proxy_passwd:
proxy_passwd = unquote(proxy_passwd)
@@ -1849,7 +1879,7 @@ class URLopener:
return self.http_error_default(url, fp, errcode, errmsg, headers)
def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handler: close the connection and raise IOError."""
+ """Default error handler: close the connection and raise OSError."""
fp.close()
raise HTTPError(url, errcode, errmsg, headers, None)
@@ -1976,7 +2006,7 @@ class URLopener:
try:
[type, data] = url.split(',', 1)
except ValueError:
- raise IOError('data error', 'bad data URL')
+ raise OSError('data error', 'bad data URL')
if not type:
type = 'text/plain;charset=US-ASCII'
semi = type.rfind(';')
@@ -2421,7 +2451,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
try:
hostIP = socket.gethostbyname(hostonly)
hostIP = ip2num(hostIP)
- except socket.error:
+ except OSError:
continue
base = ip2num(m.group(1))
@@ -2507,7 +2537,7 @@ elif os.name == 'nt':
proxies['https'] = 'https://%s' % proxyServer
proxies['ftp'] = 'ftp://%s' % proxyServer
internetSettings.Close()
- except (WindowsError, ValueError, TypeError):
+ except (OSError, ValueError, TypeError):
# Either registry key not found etc, or the value in an
# unexpected format.
# proxies already set up to be empty so nothing to do
@@ -2537,7 +2567,7 @@ elif os.name == 'nt':
proxyOverride = str(winreg.QueryValueEx(internetSettings,
'ProxyOverride')[0])
# ^^^^ Returned as Unicode but problems if not converted to ASCII
- except WindowsError:
+ except OSError:
return 0
if not proxyEnable or not proxyOverride:
return 0
@@ -2548,13 +2578,13 @@ elif os.name == 'nt':
addr = socket.gethostbyname(rawHost)
if addr != rawHost:
host.append(addr)
- except socket.error:
+ except OSError:
pass
try:
fqdn = socket.getfqdn(rawHost)
if fqdn != rawHost:
host.append(fqdn)
- except socket.error:
+ except OSError:
pass
# make a check value list from the registry entry: replace the
# '<local>' string by the localhost entry and the corresponding