summaryrefslogtreecommitdiffstats
path: root/Lib/urllib/request.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r--Lib/urllib/request.py195
1 files changed, 106 insertions, 89 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index ef62acc..5995cbe 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -18,7 +18,7 @@ urlopen(url, data=None) -- Basic usage is the same as original
urllib. pass the url and optionally data to post to an HTTP URL, and
get a file-like object back. One difference is that you can also pass
a Request instance instead of URL. Raises a URLError (subclass of
-IOError); for HTTP errors, raises an HTTPError, which can also be
+OSError); for HTTP errors, raises an HTTPError, which can also be
treated as a valid response.
build_opener -- Function that creates a new OpenerDirector instance.
@@ -103,7 +103,8 @@ from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
splittype, splithost, splitport, splituser, splitpasswd,
- splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
+ splitattr, splitquery, splitvalue, splittag, to_bytes,
+ unquote_to_bytes, urlunparse)
from urllib.response import addinfourl, addclosehook
# check for SSL
@@ -121,7 +122,7 @@ __all__ = [
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
- 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
+ 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
'UnknownHandler', 'HTTPErrorProcessor',
# Functions
'urlopen', 'install_opener', 'build_opener',
@@ -140,13 +141,9 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
if cafile or capath or cadefault:
if not _have_ssl:
raise ValueError('SSL support not available')
- context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
- context.options |= ssl.OP_NO_SSLv2
- context.verify_mode = ssl.CERT_REQUIRED
- if cafile or capath:
- context.load_verify_locations(cafile, capath)
- else:
- context.set_default_verify_paths()
+ context = ssl._create_stdlib_context(cert_reqs=ssl.CERT_REQUIRED,
+ cafile=cafile,
+ capath=capath)
https_handler = HTTPSHandler(context=context, check_hostname=True)
opener = build_opener(https_handler)
elif _opener is None:
@@ -227,7 +224,7 @@ def urlcleanup():
for temp_file in _url_tempfiles:
try:
os.unlink(temp_file)
- except EnvironmentError:
+ except OSError:
pass
del _url_tempfiles[:]
@@ -258,24 +255,60 @@ class Request:
def __init__(self, url, data=None, headers={},
origin_req_host=None, unverifiable=False,
method=None):
- # unwrap('<URL:type://host/path>') --> 'type://host/path'
- self.full_url = unwrap(url)
- self.full_url, self.fragment = splittag(self.full_url)
- self.data = data
+ self.full_url = url
self.headers = {}
+ self.unredirected_hdrs = {}
+ self._data = None
+ self.data = data
self._tunnel_host = None
for key, value in headers.items():
self.add_header(key, value)
- self.unredirected_hdrs = {}
if origin_req_host is None:
origin_req_host = request_host(self)
self.origin_req_host = origin_req_host
self.unverifiable = unverifiable
- self.method = method
+ if method:
+ self.method = method
+
+ @property
+ def full_url(self):
+ if self.fragment:
+ return '{}#{}'.format(self._full_url, self.fragment)
+ return self._full_url
+
+ @full_url.setter
+ def full_url(self, url):
+ # unwrap('<URL:type://host/path>') --> 'type://host/path'
+ self._full_url = unwrap(url)
+ self._full_url, self.fragment = splittag(self._full_url)
self._parse()
+ @full_url.deleter
+ def full_url(self):
+ self._full_url = None
+ self.fragment = None
+ self.selector = ''
+
+ @property
+ def data(self):
+ return self._data
+
+ @data.setter
+ def data(self, data):
+ if data != self._data:
+ self._data = data
+ # issue 16464
+ # if we change data we need to remove content-length header
+ # (cause it's most probably calculated for previous value)
+ if self.has_header("Content-length"):
+ self.remove_header("Content-length")
+
+ @data.deleter
+ def data(self):
+ self.data = None
+
def _parse(self):
- self.type, rest = splittype(self.full_url)
+ self.type, rest = splittype(self._full_url)
if self.type is None:
raise ValueError("unknown url type: %r" % self.full_url)
self.host, self.selector = splithost(rest)
@@ -284,62 +317,11 @@ class Request:
def get_method(self):
"""Return a string indicating the HTTP request method."""
- if self.method is not None:
- return self.method
- elif self.data is not None:
- return "POST"
- else:
- return "GET"
+ default_method = "POST" if self.data is not None else "GET"
+ return getattr(self, 'method', default_method)
def get_full_url(self):
- if self.fragment:
- return '%s#%s' % (self.full_url, self.fragment)
- else:
- return self.full_url
-
- # Begin deprecated methods
-
- def add_data(self, data):
- msg = "Request.add_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- self.data = data
-
- def has_data(self):
- msg = "Request.has_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.data is not None
-
- def get_data(self):
- msg = "Request.get_data method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.data
-
- def get_type(self):
- msg = "Request.get_type method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.type
-
- def get_host(self):
- msg = "Request.get_host method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.host
-
- def get_selector(self):
- msg = "Request.get_selector method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.selector
-
- def is_unverifiable(self):
- msg = "Request.is_unverifiable method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.unverifiable
-
- def get_origin_req_host(self):
- msg = "Request.get_origin_req_host method is deprecated."
- warnings.warn(msg, DeprecationWarning, stacklevel=1)
- return self.origin_req_host
-
- # End deprecated methods
+ return self.full_url
def set_proxy(self, host, type):
if self.type == 'https' and not self._tunnel_host:
@@ -369,6 +351,10 @@ class Request:
header_name,
self.unredirected_hdrs.get(header_name, default))
+ def remove_header(self, header_name):
+ self.headers.pop(header_name, None)
+ self.unredirected_hdrs.pop(header_name, None)
+
def header_items(self):
hdrs = self.unredirected_hdrs.copy()
hdrs.update(self.headers)
@@ -531,7 +517,8 @@ def build_opener(*handlers):
opener = OpenerDirector()
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
HTTPDefaultErrorHandler, HTTPRedirectHandler,
- FTPHandler, FileHandler, HTTPErrorProcessor]
+ FTPHandler, FileHandler, HTTPErrorProcessor,
+ DataHandler]
if hasattr(http.client, "HTTPSConnection"):
default_classes.append(HTTPSHandler)
skip = set()
@@ -1246,7 +1233,7 @@ class AbstractHTTPHandler(BaseHandler):
try:
h.request(req.get_method(), req.selector, req.data, headers)
- except socket.error as err: # timeout error
+ except OSError as err: # timeout error
h.close()
raise URLError(err)
else:
@@ -1451,7 +1438,7 @@ class FTPHandler(BaseHandler):
try:
host = socket.gethostbyname(host)
- except socket.error as msg:
+ except OSError as msg:
raise URLError(msg)
path, attrs = splitattr(req.selector)
dirs = path.split('/')
@@ -1537,6 +1524,36 @@ class CacheFTPHandler(FTPHandler):
self.cache.clear()
self.timeout.clear()
+class DataHandler(BaseHandler):
+ def data_open(self, req):
+ # data URLs as specified in RFC 2397.
+ #
+ # ignores POSTed data
+ #
+ # syntax:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ url = req.full_url
+
+ scheme, data = url.split(":",1)
+ mediatype, data = data.split(",",1)
+
+ # even base64 encoded data URLs might be quoted so unquote in any case:
+ data = unquote_to_bytes(data)
+ if mediatype.endswith(";base64"):
+ data = base64.decodebytes(data)
+ mediatype = mediatype[:-7]
+
+ if not mediatype:
+ mediatype = "text/plain;charset=US-ASCII"
+
+ headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
+ (mediatype, len(data)))
+
+ return addinfourl(io.BytesIO(data), headers, url)
+
# Code move from the old urllib module
@@ -1660,20 +1677,20 @@ class URLopener:
return getattr(self, name)(url)
else:
return getattr(self, name)(url, data)
- except HTTPError:
+ except (HTTPError, URLError):
raise
- except socket.error as msg:
- raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
+ except OSError as msg:
+ raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
def open_unknown(self, fullurl, data=None):
"""Overridable interface to open unknown URL type."""
type, url = splittype(fullurl)
- raise IOError('url error', 'unknown url type', type)
+ raise OSError('url error', 'unknown url type', type)
def open_unknown_proxy(self, proxy, fullurl, data=None):
"""Overridable interface to open unknown URL type."""
type, url = splittype(fullurl)
- raise IOError('url error', 'invalid proxy for %s' % type, proxy)
+ raise OSError('url error', 'invalid proxy for %s' % type, proxy)
# External interface
def retrieve(self, url, filename=None, reporthook=None, data=None):
@@ -1689,7 +1706,7 @@ class URLopener:
hdrs = fp.info()
fp.close()
return url2pathname(splithost(url1)[1]), hdrs
- except IOError as msg:
+ except OSError as msg:
pass
fp = self.open(url, data)
try:
@@ -1782,7 +1799,7 @@ class URLopener:
if proxy_bypass(realhost):
host = realhost
- if not host: raise IOError('http error', 'no host given')
+ if not host: raise OSError('http error', 'no host given')
if proxy_passwd:
proxy_passwd = unquote(proxy_passwd)
@@ -1855,7 +1872,7 @@ class URLopener:
return self.http_error_default(url, fp, errcode, errmsg, headers)
def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handler: close the connection and raise IOError."""
+ """Default error handler: close the connection and raise OSError."""
fp.close()
raise HTTPError(url, errcode, errmsg, headers, None)
@@ -1982,7 +1999,7 @@ class URLopener:
try:
[type, data] = url.split(',', 1)
except ValueError:
- raise IOError('data error', 'bad data URL')
+ raise OSError('data error', 'bad data URL')
if not type:
type = 'text/plain;charset=US-ASCII'
semi = type.rfind(';')
@@ -2431,7 +2448,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
try:
hostIP = socket.gethostbyname(hostonly)
hostIP = ip2num(hostIP)
- except socket.error:
+ except OSError:
continue
base = ip2num(m.group(1))
@@ -2517,7 +2534,7 @@ elif os.name == 'nt':
proxies['https'] = 'https://%s' % proxyServer
proxies['ftp'] = 'ftp://%s' % proxyServer
internetSettings.Close()
- except (WindowsError, ValueError, TypeError):
+ except (OSError, ValueError, TypeError):
# Either registry key not found etc, or the value in an
# unexpected format.
# proxies already set up to be empty so nothing to do
@@ -2547,7 +2564,7 @@ elif os.name == 'nt':
proxyOverride = str(winreg.QueryValueEx(internetSettings,
'ProxyOverride')[0])
# ^^^^ Returned as Unicode but problems if not converted to ASCII
- except WindowsError:
+ except OSError:
return 0
if not proxyEnable or not proxyOverride:
return 0
@@ -2558,13 +2575,13 @@ elif os.name == 'nt':
addr = socket.gethostbyname(rawHost)
if addr != rawHost:
host.append(addr)
- except socket.error:
+ except OSError:
pass
try:
fqdn = socket.getfqdn(rawHost)
if fqdn != rawHost:
host.append(fqdn)
- except socket.error:
+ except OSError:
pass
# make a check value list from the registry entry: replace the
# '<local>' string by the localhost entry and the corresponding