summaryrefslogtreecommitdiffstats
path: root/Lib/urllib/request.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r--Lib/urllib/request.py206
1 files changed, 134 insertions, 72 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 6b29901..eb45c7e 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -94,6 +94,8 @@ import re
import socket
import sys
import time
+import collections
+import warnings
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
@@ -114,11 +116,27 @@ else:
__version__ = sys.version[:3]
_opener = None
-def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ *, cafile=None, capath=None):
global _opener
- if _opener is None:
- _opener = build_opener()
- return _opener.open(url, data, timeout)
+ if cafile or capath:
+ if not _have_ssl:
+ raise ValueError('SSL support not available')
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
+ context.options |= ssl.OP_NO_SSLv2
+ if cafile or capath:
+ context.verify_mode = ssl.CERT_REQUIRED
+ context.load_verify_locations(cafile, capath)
+ check_hostname = True
+ else:
+ check_hostname = False
+ https_handler = HTTPSHandler(context=context, check_hostname=check_hostname)
+ opener = build_opener(https_handler)
+ elif _opener is None:
+ _opener = opener = build_opener()
+ else:
+ opener = _opener
+ return opener.open(url, data, timeout)
def install_opener(opener):
global _opener
@@ -535,12 +553,11 @@ class HTTPRedirectHandler(BaseHandler):
# For security reasons we don't allow redirection to anything other
# than http, https or ftp.
- if not urlparts.scheme in ('http', 'https', 'ftp'):
- raise HTTPError(newurl, code,
- msg +
- " - Redirection to url '%s' is not allowed" %
- newurl,
- headers, fp)
+ if urlparts.scheme not in ('http', 'https', 'ftp', ''):
+ raise HTTPError(
+ newurl, code,
+ "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
+ headers, fp)
if not urlparts.path:
urlparts = list(urlparts)
@@ -705,7 +722,7 @@ class HTTPPasswordMgr:
# uri could be a single URI or a sequence
if isinstance(uri, str):
uri = [uri]
- if not realm in self.passwd:
+ if realm not in self.passwd:
self.passwd[realm] = {}
for default_port in True, False:
reduced_uri = tuple(
@@ -778,7 +795,7 @@ class AbstractBasicAuthHandler:
# allow for double- and single-quoted realm values
# (single quotes are a violation of the RFC, but appear in the wild)
rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
- 'realm=(["\'])(.*?)\\2', re.I)
+ 'realm=(["\']?)([^"\']*)\\2', re.I)
# XXX could pre-emptively send auth info already accepted (RFC 2617,
# end of section 2, and section 1.2 immediately after "credentials"
@@ -811,6 +828,9 @@ class AbstractBasicAuthHandler:
mo = AbstractBasicAuthHandler.rx.search(authreq)
if mo:
scheme, quote, realm = mo.groups()
+ if quote not in ["'", '"']:
+ warnings.warn("Basic Auth Realm was unquoted",
+ UserWarning, 2)
if scheme.lower() == 'basic':
response = self.retry_http_basic_auth(host, req, realm)
if response and response.code != 401:
@@ -1045,13 +1065,25 @@ class AbstractHTTPHandler(BaseHandler):
if request.data is not None: # POST
data = request.data
+ if isinstance(data, str):
+ msg = "POST data should be bytes or an iterable of bytes. "\
+ "It cannot be of type str."
+ raise TypeError(msg)
if not request.has_header('Content-type'):
request.add_unredirected_header(
'Content-type',
'application/x-www-form-urlencoded')
if not request.has_header('Content-length'):
- request.add_unredirected_header(
- 'Content-length', '%d' % len(data))
+ try:
+ mv = memoryview(data)
+ except TypeError:
+ if isinstance(data, collections.Iterable):
+ raise ValueError("Content-Length should be specified "
+ "for iterable data of type %r %r" % (type(data),
+ data))
+ else:
+ request.add_unredirected_header(
+ 'Content-length', '%d' % (len(mv) * mv.itemsize))
sel_host = host
if request.has_proxy():
@@ -1066,7 +1098,7 @@ class AbstractHTTPHandler(BaseHandler):
return request
- def do_open(self, http_class, req):
+ def do_open(self, http_class, req, **http_conn_args):
"""Return an HTTPResponse object for the request, using http_class.
http_class must implement the HTTPConnection API from http.client.
@@ -1075,7 +1107,8 @@ class AbstractHTTPHandler(BaseHandler):
if not host:
raise URLError('no host given')
- h = http_class(host, timeout=req.timeout) # will parse host:port
+ # will parse host:port
+ h = http_class(host, timeout=req.timeout, **http_conn_args)
headers = dict(req.unredirected_hdrs)
headers.update(dict((k, v) for k, v in req.headers.items()
@@ -1101,13 +1134,15 @@ class AbstractHTTPHandler(BaseHandler):
# Proxy-Authorization should not be sent to origin
# server.
del headers[proxy_auth_hdr]
- h._set_tunnel(req._tunnel_host, headers=tunnel_headers)
+ h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
try:
h.request(req.get_method(), req.selector, req.data, headers)
- r = h.getresponse() # an HTTPResponse instance
- except socket.error as err:
+ except socket.error as err: # timeout error
+ h.close()
raise URLError(err)
+ else:
+ r = h.getresponse()
r.url = req.get_full_url()
# This line replaces the .msg attribute of the HTTPResponse
@@ -1127,10 +1162,18 @@ class HTTPHandler(AbstractHTTPHandler):
http_request = AbstractHTTPHandler.do_request_
if hasattr(http.client, 'HTTPSConnection'):
+ import ssl
+
class HTTPSHandler(AbstractHTTPHandler):
+ def __init__(self, debuglevel=0, context=None, check_hostname=None):
+ AbstractHTTPHandler.__init__(self, debuglevel)
+ self._context = context
+ self._check_hostname = check_hostname
+
def https_open(self, req):
- return self.do_open(http.client.HTTPSConnection, req)
+ return self.do_open(http.client.HTTPSConnection, req,
+ context=self._context, check_hostname=self._check_hostname)
https_request = AbstractHTTPHandler.do_request_
@@ -1216,8 +1259,8 @@ class FileHandler(BaseHandler):
url = req.selector
if url[:2] == '//' and url[2:3] != '/' and (req.host and
req.host != 'localhost'):
- req.type = 'ftp'
- return self.parent.open(req)
+ if not req.host is self.get_names():
+ raise URLError("file:// scheme is supported only on localhost")
else:
return self.open_local_file(req)
@@ -1257,9 +1300,9 @@ class FileHandler(BaseHandler):
else:
origurl = 'file://' + filename
return addinfourl(open(localfile, 'rb'), headers, origurl)
- except OSError as msg:
+ except OSError as exp:
# users shouldn't expect OSErrors coming from urlopen()
- raise URLError(msg)
+ raise URLError(exp)
raise URLError('file not on local host')
def _safe_gethostbyname(host):
@@ -1318,13 +1361,13 @@ class FTPHandler(BaseHandler):
headers += "Content-length: %d\n" % retrlen
headers = email.message_from_string(headers)
return addinfourl(fp, headers, req.full_url)
- except ftplib.all_errors as msg:
- exc = URLError('ftp error: %s' % msg)
+ except ftplib.all_errors as exp:
+ exc = URLError('ftp error: %r' % exp)
raise exc.with_traceback(sys.exc_info()[2])
def connect_ftp(self, user, passwd, host, port, dirs, timeout):
- fw = ftpwrapper(user, passwd, host, port, dirs, timeout)
- return fw
+ return ftpwrapper(user, passwd, host, port, dirs, timeout,
+ persistent=False)
class CacheFTPHandler(FTPHandler):
# XXX would be nice to have pluggable cache strategies
@@ -1373,14 +1416,19 @@ class CacheFTPHandler(FTPHandler):
break
self.soonest = min(list(self.timeout.values()))
+ def clear_cache(self):
+ for conn in self.cache.values():
+ conn.close()
+ self.cache.clear()
+ self.timeout.clear()
+
+
# Code move from the old urllib module
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
# Helper for non-unix systems
-if os.name == 'mac':
- from macurl2path import url2pathname, pathname2url
-elif os.name == 'nt':
+if os.name == 'nt':
from nturl2path import url2pathname, pathname2url
else:
def url2pathname(pathname):
@@ -1519,7 +1567,7 @@ class URLopener:
try:
fp = self.open_local_file(url1)
hdrs = fp.info()
- del fp
+ fp.close()
return url2pathname(splithost(url1)[1]), hdrs
except IOError as msg:
pass
@@ -1546,9 +1594,9 @@ class URLopener:
size = -1
read = 0
blocknum = 0
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
if reporthook:
- if "content-length" in headers:
- size = int(headers["Content-Length"])
reporthook(blocknum, bs, size)
while 1:
block = fp.read(bs)
@@ -1563,8 +1611,6 @@ class URLopener:
tfp.close()
finally:
fp.close()
- del fp
- del tfp
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
@@ -1616,17 +1662,16 @@ class URLopener:
if proxy_bypass(realhost):
host = realhost
- #print "proxy via http:", host, selector
if not host: raise IOError('http error', 'no host given')
if proxy_passwd:
- import base64
+ proxy_passwd = unquote(proxy_passwd)
proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
else:
proxy_auth = None
if user_passwd:
- import base64
+ user_passwd = unquote(user_passwd)
auth = base64.b64encode(user_passwd.encode()).decode('ascii')
else:
auth = None
@@ -1638,6 +1683,12 @@ class URLopener:
headers["Authorization"] = "Basic %s" % auth
if realhost:
headers["Host"] = realhost
+
+ # Add Connection:close as we don't support persistent connections yet.
+ # This helps in closing the socket and avoiding ResourceWarning
+
+ headers["Connection"] = "close"
+
for header, value in self.addheaders:
headers[header] = value
@@ -1685,7 +1736,6 @@ class URLopener:
def http_error_default(self, url, fp, errcode, errmsg, headers):
"""Default error handler: close the connection and raise IOError."""
- void = fp.read()
fp.close()
raise HTTPError(url, errcode, errmsg, headers, None)
@@ -1702,9 +1752,9 @@ class URLopener:
def open_file(self, url):
"""Use local file or FTP depending on form of URL."""
if not isinstance(url, str):
- raise URLError('file error', 'proxy support for file protocol currently not implemented')
+ raise URLError('file error: proxy support for file protocol currently not implemented')
if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
- return self.open_ftp(url)
+ raise ValueError("file:// scheme is supported only on localhost")
else:
return self.open_local_file(url)
@@ -1717,7 +1767,7 @@ class URLopener:
try:
stats = os.stat(localname)
except OSError as e:
- raise URLError(e.errno, e.strerror, e.filename)
+ raise URLError(e.strerror, e.filename)
size = stats.st_size
modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
mtype = mimetypes.guess_type(url)[0]
@@ -1731,21 +1781,22 @@ class URLopener:
return addinfourl(open(localname, 'rb'), headers, urlfile)
host, port = splitport(host)
if (not port
- and socket.gethostbyname(host) in (localhost() + thishost())):
+ and socket.gethostbyname(host) in ((localhost(),) + thishost())):
urlfile = file
if file[:1] == '/':
urlfile = 'file://' + file
+ elif file[:2] == './':
+ raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
return addinfourl(open(localname, 'rb'), headers, urlfile)
- raise URLError('local file error', 'not on local host')
+ raise URLError('local file error: not on local host')
def open_ftp(self, url):
"""Use FTP protocol."""
if not isinstance(url, str):
- raise URLError('ftp error', 'proxy support for ftp protocol currently not implemented')
+ raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
import mimetypes
- from io import StringIO
host, path = splithost(url)
- if not host: raise URLError('ftp error', 'no host given')
+ if not host: raise URLError('ftp error: no host given')
host, port = splitport(host)
user, host = splituser(host)
if user: user, passwd = splitpasswd(user)
@@ -1775,7 +1826,7 @@ class URLopener:
del self.ftpcache[k]
v.close()
try:
- if not key in self.ftpcache:
+ if key not in self.ftpcache:
self.ftpcache[key] = \
ftpwrapper(user, passwd, host, port, dirs)
if not file: type = 'D'
@@ -1794,13 +1845,13 @@ class URLopener:
headers += "Content-Length: %d\n" % retrlen
headers = email.message_from_string(headers)
return addinfourl(fp, headers, "ftp:" + url)
- except ftperrors() as msg:
- raise URLError('ftp error', msg).with_traceback(sys.exc_info()[2])
+ except ftperrors() as exp:
+ raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
def open_data(self, url, data=None):
"""Use "data" URL."""
if not isinstance(url, str):
- raise URLError('data error', 'proxy support for data protocol currently not implemented')
+ raise URLError('data error: proxy support for data protocol currently not implemented')
# ignore POSTed data
#
# syntax of data URLs:
@@ -1825,7 +1876,6 @@ class URLopener:
time.gmtime(time.time())))
msg.append('Content-type: %s' % type)
if encoding == 'base64':
- import base64
# XXX is this encoding/decoding ok?
data = base64.decodebytes(data.encode('ascii')).decode('latin1')
else:
@@ -1876,7 +1926,6 @@ class FancyURLopener(URLopener):
newurl = headers['uri']
else:
return
- void = fp.read()
fp.close()
# In case the server sent a relative URL, join with original:
@@ -1890,7 +1939,7 @@ class FancyURLopener(URLopener):
# We are using newer HTTPError with older redirect_internal method
# This older method will get deprecated in 3.3
- if not urlparts.scheme in ('http', 'https', 'ftp'):
+ if urlparts.scheme not in ('http', 'https', 'ftp', ''):
raise HTTPError(newurl, errcode,
errmsg +
" Redirection to url '%s' is not allowed." % newurl,
@@ -1917,7 +1966,7 @@ class FancyURLopener(URLopener):
retry=False):
"""Error 401 -- authentication required.
This function supports Basic authentication only."""
- if not 'www-authenticate' in headers:
+ if 'www-authenticate' not in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['www-authenticate']
@@ -1943,7 +1992,7 @@ class FancyURLopener(URLopener):
retry=False):
"""Error 407 -- proxy authentication required.
This function supports Basic authentication only."""
- if not 'proxy-authenticate' in headers:
+ if 'proxy-authenticate' not in headers:
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['proxy-authenticate']
@@ -2068,7 +2117,7 @@ def thishost():
"""Return the IP addresses of the current host."""
global _thishost
if _thishost is None:
- _thishost = tuple(socket.gethostbyname_ex(socket.gethostname()[2]))
+ _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
return _thishost
_ftperrors = None
@@ -2094,13 +2143,16 @@ def noheaders():
class ftpwrapper:
"""Class used by open_ftp() for cache of open FTP connections."""
- def __init__(self, user, passwd, host, port, dirs, timeout=None):
+ def __init__(self, user, passwd, host, port, dirs, timeout=None,
+ persistent=True):
self.user = user
self.passwd = passwd
self.host = host
self.port = port
self.dirs = dirs
self.timeout = timeout
+ self.refcount = 0
+ self.keepalive = persistent
self.init()
def init(self):
@@ -2127,10 +2179,10 @@ class ftpwrapper:
# Try to retrieve as a file
try:
cmd = 'RETR ' + file
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
except ftplib.error_perm as reason:
if str(reason)[:3] != '550':
- raise URLError('ftp error', reason).with_traceback(
+ raise URLError('ftp error: %d' % reason).with_traceback(
sys.exc_info()[2])
if not conn:
# Set transfer mode to ASCII!
@@ -2142,26 +2194,36 @@ class ftpwrapper:
try:
self.ftp.cwd(file)
except ftplib.error_perm as reason:
- raise URLError('ftp error', reason) from reason
+ raise URLError('ftp error: %d' % reason) from reason
finally:
self.ftp.cwd(pwd)
cmd = 'LIST ' + file
else:
cmd = 'LIST'
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
self.busy = 1
+
+ ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
+ self.refcount += 1
+ conn.close()
# Pass back both a suitably decorated object and a retrieval length
- return (addclosehook(conn[0].makefile('rb'), self.endtransfer), conn[1])
+ return (ftpobj, retrlen)
+
def endtransfer(self):
- if not self.busy:
- return
self.busy = 0
- try:
- self.ftp.voidresp()
- except ftperrors():
- pass
def close(self):
+ self.keepalive = False
+ if self.refcount <= 0:
+ self.real_close()
+
+ def file_close(self):
+ self.endtransfer()
+ self.refcount -= 1
+ if self.refcount <= 0 and not self.keepalive:
+ self.real_close()
+
+ def real_close(self):
self.endtransfer()
try:
self.ftp.close()
@@ -2198,7 +2260,8 @@ def proxy_bypass_environment(host):
# strip port off host
hostonly, port = splitport(host)
# check if the host ends with any of the DNS suffixes
- for name in no_proxy.split(','):
+ no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
+ for name in no_proxy_list:
if name and (hostonly.endswith(name) or host.endswith(name)):
return 1
# otherwise, don't bypass
@@ -2399,7 +2462,6 @@ elif os.name == 'nt':
test = test.replace("*", r".*") # change glob sequence
test = test.replace("?", r".") # change glob char
for val in host:
- # print "%s <--> %s" %( test, val )
if re.match(test, val, re.I):
return 1
return 0