summaryrefslogtreecommitdiffstats
path: root/Lib/urllib/request.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r--Lib/urllib/request.py227
1 files changed, 154 insertions, 73 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index e13381c..5325d62 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -94,6 +94,7 @@ import re
import socket
import sys
import time
+import collections
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
@@ -114,11 +115,27 @@ else:
__version__ = sys.version[:3]
_opener = None
-def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ *, cafile=None, capath=None):
global _opener
- if _opener is None:
- _opener = build_opener()
- return _opener.open(url, data, timeout)
+ if cafile or capath:
+ if not _have_ssl:
+ raise ValueError('SSL support not available')
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.options |= ssl.OP_NO_SSLv2
+ if cafile or capath:
+ context.verify_mode = ssl.CERT_REQUIRED
+ context.load_verify_locations(cafile, capath)
+ check_hostname = True
+ else:
+ check_hostname = False
+ https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
+ opener = build_opener(https_handler)
+ elif _opener is None:
+ _opener = opener = build_opener()
+ else:
+ opener = _opener
+ return opener.open(url, data, timeout)
def install_opener(opener):
global _opener
@@ -163,7 +180,7 @@ class Request:
origin_req_host=None, unverifiable=False):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
self.full_url = unwrap(url)
- self.full_url, fragment = splittag(self.full_url)
+ self.full_url, self.fragment = splittag(self.full_url)
self.data = data
self.headers = {}
self._tunnel_host = None
@@ -202,7 +219,10 @@ class Request:
return self.data
def get_full_url(self):
- return self.full_url
+ if self.fragment:
+ return '%s#%s' % (self.full_url, self.fragment)
+ else:
+ return self.full_url
def get_type(self):
return self.type
@@ -528,6 +548,17 @@ class HTTPRedirectHandler(BaseHandler):
# fix a possible malformed URL
urlparts = urlparse(newurl)
+
+ # For security reasons we don't allow redirection to anything other
+ # than http, https or ftp.
+
+ if not urlparts.scheme in ('http', 'https', 'ftp'):
+ raise HTTPError(newurl, code,
+ msg +
+ " - Redirection to url '%s' is not allowed" %
+ newurl,
+ headers, fp)
+
if not urlparts.path:
urlparts = list(urlparts)
urlparts[2] = "/"
@@ -1031,13 +1062,24 @@ class AbstractHTTPHandler(BaseHandler):
if request.data is not None: # POST
data = request.data
+ if isinstance(data, str):
+ raise TypeError("POST data should be bytes"
+ " or an iterable of bytes. It cannot be str.")
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
if not request.has_header('Content-length'):
- request.add_unredirected_header(
- 'Content-length', '%d' % len(data))
+ try:
+ mv = memoryview(data)
+ except TypeError:
+ if isinstance(data, collections.Iterable):
+ raise ValueError("Content-Length should be specified "
+ "for iterable data of type %r %r" % (type(data),
+ data))
+ else:
+ request.add_unredirected_header(
+ 'Content-length', '%d' % (len(mv) * mv.itemsize))
sel_host = host
if request.has_proxy():
@@ -1052,7 +1094,7 @@ class AbstractHTTPHandler(BaseHandler):
return request
- def do_open(self, http_class, req):
+ def do_open(self, http_class, req, **http_conn_args):
"""Return an HTTPResponse object for the request, using http_class.
http_class must implement the HTTPConnection API from http.client.
@@ -1061,7 +1103,8 @@ class AbstractHTTPHandler(BaseHandler):
if not host:
raise URLError('no host given')
- h = http_class(host, timeout=req.timeout) # will parse host:port
+ # will parse host:port
+ h = http_class(host, timeout=req.timeout, **http_conn_args)
headers = dict(req.unredirected_hdrs)
headers.update(dict((k, v) for k, v in req.headers.items()
@@ -1087,7 +1130,7 @@ class AbstractHTTPHandler(BaseHandler):
# Proxy-Authorization should not be sent to origin
# server.
del headers[proxy_auth_hdr]
- h._set_tunnel(req._tunnel_host, headers=tunnel_headers)
+ h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
try:
h.request(req.get_method(), req.selector, req.data, headers)
@@ -1095,7 +1138,7 @@ class AbstractHTTPHandler(BaseHandler):
except socket.error as err:
raise URLError(err)
- r.url = req.full_url
+ r.url = req.get_full_url()
# This line replaces the .msg attribute of the HTTPResponse
# with .headers, because urllib clients expect the response to
# have the reason in .msg. It would be good to mark this
@@ -1113,10 +1156,18 @@ class HTTPHandler(AbstractHTTPHandler):
http_request = AbstractHTTPHandler.do_request_
if hasattr(http.client, 'HTTPSConnection'):
+ import ssl
+
class HTTPSHandler(AbstractHTTPHandler):
+ def __init__(self, debuglevel=0, context=None, check_hostname=None):
+ AbstractHTTPHandler.__init__(self, debuglevel)
+ self._context = context
+ self._check_hostname = check_hostname
+
def https_open(self, req):
- return self.do_open(http.client.HTTPSConnection, req)
+ return self.do_open(http.client.HTTPSConnection, req,
+ context=self._context, check_hostname=self._check_hostname)
https_request = AbstractHTTPHandler.do_request_
@@ -1202,8 +1253,8 @@ class FileHandler(BaseHandler):
url = req.selector
if url[:2] == '//' and url[2:3] != '/' and (req.host and
req.host != 'localhost'):
- req.type = 'ftp'
- return self.parent.open(req)
+ if not req.host is self.get_names():
+ raise URLError("file:// scheme is supported only on localhost")
else:
return self.open_local_file(req)
@@ -1364,9 +1415,7 @@ class CacheFTPHandler(FTPHandler):
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
# Helper for non-unix systems
-if os.name == 'mac':
- from macurl2path import url2pathname, pathname2url
-elif os.name == 'nt':
+if os.name == 'nt':
from nturl2path import url2pathname, pathname2url
else:
def url2pathname(pathname):
@@ -1505,7 +1554,7 @@ class URLopener:
try:
fp = self.open_local_file(url1)
hdrs = fp.info()
- del fp
+ fp.close()
return url2pathname(splithost(url1)[1]), hdrs
except IOError as msg:
pass
@@ -1549,8 +1598,6 @@ class URLopener:
tfp.close()
finally:
fp.close()
- del fp
- del tfp
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
@@ -1624,6 +1671,12 @@ class URLopener:
headers["Authorization"] = "Basic %s" % auth
if realhost:
headers["Host"] = realhost
+
+ # Add Connection:close as we don't support persistent connections yet.
+ # This helps in closing the socket and avoiding ResourceWarning
+
+ headers["Connection"] = "close"
+
for header, value in self.addheaders:
headers[header] = value
@@ -1690,7 +1743,7 @@ class URLopener:
if not isinstance(url, str):
raise URLError('file error', 'proxy support for file protocol currently not implemented')
if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
- return self.open_ftp(url)
+ raise ValueError("file:// scheme is supported only on localhost")
else:
return self.open_local_file(url)
@@ -1864,8 +1917,24 @@ class FancyURLopener(URLopener):
return
void = fp.read()
fp.close()
+
# In case the server sent a relative URL, join with original:
newurl = urljoin(self.type + ":" + url, newurl)
+
+ urlparts = urlparse(newurl)
+
+ # For security reasons, we don't allow redirection to anything other
+ # than http, https and ftp.
+
+ # We are using newer HTTPError with older redirect_internal method
+ # This older method will get deprecated in 3.3
+
+ if not urlparts.scheme in ('http', 'https', 'ftp'):
+ raise HTTPError(newurl, errcode,
+ errmsg +
+ " Redirection to url '%s' is not allowed." % newurl,
+ headers, fp)
+
return self.open(newurl)
def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
@@ -2097,7 +2166,7 @@ class ftpwrapper:
# Try to retrieve as a file
try:
cmd = 'RETR ' + file
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
except ftplib.error_perm as reason:
if str(reason)[:3] != '550':
raise URLError('ftp error', reason).with_traceback(
@@ -2118,10 +2187,14 @@ class ftpwrapper:
cmd = 'LIST ' + file
else:
cmd = 'LIST'
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
self.busy = 1
+
+ ftpobj = addclosehook(conn.makefile('rb'), self.endtransfer)
+ conn.close()
# Pass back both a suitably decorated object and a retrieval length
- return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1])
+ return (ftpobj, retrlen)
+
def endtransfer(self):
if not self.busy:
return
@@ -2175,68 +2248,76 @@ def proxy_bypass_environment(host):
return 0
-if sys.platform == 'darwin':
- from _scproxy import _get_proxy_settings, _get_proxies
+# This code tests an OSX specific data structure but is testable on all
+# platforms
+def _proxy_bypass_macosx_sysconf(host, proxy_settings):
+ """
+ Return True iff this host shouldn't be accessed using a proxy
- def proxy_bypass_macosx_sysconf(host):
- """
- Return True iff this host shouldn't be accessed using a proxy
+ This function uses the MacOSX framework SystemConfiguration
+ to fetch the proxy information.
- This function uses the MacOSX framework SystemConfiguration
- to fetch the proxy information.
- """
- import re
- import socket
- from fnmatch import fnmatch
-
- hostonly, port = splitport(host)
+ proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
+ { 'exclude_simple': bool,
+ 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
+ }
+ """
+ import re
+ import socket
+ from fnmatch import fnmatch
- def ip2num(ipAddr):
- parts = ipAddr.split('.')
- parts = list(map(int, parts))
- if len(parts) != 4:
- parts = (parts + [0, 0, 0, 0])[:4]
- return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
+ hostonly, port = splitport(host)
- proxy_settings = _get_proxy_settings()
+ def ip2num(ipAddr):
+ parts = ipAddr.split('.')
+ parts = list(map(int, parts))
+ if len(parts) != 4:
+ parts = (parts + [0, 0, 0, 0])[:4]
+ return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
- # Check for simple host names:
- if '.' not in host:
- if proxy_settings['exclude_simple']:
- return True
+ # Check for simple host names:
+ if '.' not in host:
+ if proxy_settings['exclude_simple']:
+ return True
- hostIP = None
+ hostIP = None
- for value in proxy_settings.get('exceptions', ()):
- # Items in the list are strings like these: *.local, 169.254/16
- if not value: continue
+ for value in proxy_settings.get('exceptions', ()):
+ # Items in the list are strings like these: *.local, 169.254/16
+ if not value: continue
- m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
- if m is not None:
- if hostIP is None:
- try:
- hostIP = socket.gethostbyname(hostonly)
- hostIP = ip2num(hostIP)
- except socket.error:
- continue
+ m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
+ if m is not None:
+ if hostIP is None:
+ try:
+ hostIP = socket.gethostbyname(hostonly)
+ hostIP = ip2num(hostIP)
+ except socket.error:
+ continue
+
+ base = ip2num(m.group(1))
+ mask = m.group(2)
+ if mask is None:
+ mask = 8 * (m.group(1).count('.') + 1)
+ else:
+ mask = int(mask[1:])
+ mask = 32 - mask
- base = ip2num(m.group(1))
- mask = m.group(2)
- if mask is None:
- mask = 8 * (m.group(1).count('.') + 1)
+ if (hostIP >> mask) == (base >> mask):
+ return True
- else:
- mask = int(mask[1:])
- mask = 32 - mask
+ elif fnmatch(host, value):
+ return True
- if (hostIP >> mask) == (base >> mask):
- return True
+ return False
- elif fnmatch(host, value):
- return True
- return False
+if sys.platform == 'darwin':
+ from _scproxy import _get_proxy_settings, _get_proxies
+ def proxy_bypass_macosx_sysconf(host):
+ proxy_settings = _get_proxy_settings()
+ return _proxy_bypass_macosx_sysconf(host, proxy_settings)
def getproxies_macosx_sysconf():
"""Return a dictionary of scheme -> proxy server URL mappings.