summaryrefslogtreecommitdiffstats
path: root/Lib/urllib/request.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/urllib/request.py')
-rw-r--r--Lib/urllib/request.py210
1 files changed, 161 insertions, 49 deletions
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index eb45c7e..5ddec5f 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -89,14 +89,16 @@ import http.client
import io
import os
import posixpath
-import random
import re
import socket
import sys
import time
import collections
+import tempfile
+import contextlib
import warnings
+
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
@@ -112,21 +114,40 @@ except ImportError:
else:
_have_ssl = True
+__all__ = [
+ # Classes
+ 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
+ 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
+ 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
+ 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
+ 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
+ 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
+ 'UnknownHandler', 'HTTPErrorProcessor',
+ # Functions
+ 'urlopen', 'install_opener', 'build_opener',
+ 'pathname2url', 'url2pathname', 'getproxies',
+ # Legacy interface
+ 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
+]
+
# used in User-Agent header sent
__version__ = sys.version[:3]
_opener = None
def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
- *, cafile=None, capath=None):
+ *, cafile=None, capath=None, cadefault=False):
global _opener
- if cafile or capath:
+ if cafile or capath or cadefault:
if not _have_ssl:
raise ValueError('SSL support not available')
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
context.options |= ssl.OP_NO_SSLv2
- if cafile or capath:
+ if cafile or capath or cadefault:
context.verify_mode = ssl.CERT_REQUIRED
- context.load_verify_locations(cafile, capath)
+ if cafile or capath:
+ context.load_verify_locations(cafile, capath)
+ else:
+ context.set_default_verify_paths()
check_hostname = True
else:
check_hostname = False
@@ -142,17 +163,78 @@ def install_opener(opener):
global _opener
_opener = opener
-# TODO(jhylton): Make this work with the same global opener.
-_urlopener = None
+_url_tempfiles = []
def urlretrieve(url, filename=None, reporthook=None, data=None):
- global _urlopener
- if not _urlopener:
- _urlopener = FancyURLopener()
- return _urlopener.retrieve(url, filename, reporthook, data)
+ """
+ Retrieve a URL into a temporary location on disk.
+
+ Requires a URL argument. If a filename is passed, it is used as
+ the temporary file location. The reporthook argument should be
+ a callable that accepts a block number, a read size, and the
+ total file size of the URL target. The data argument should be
+ valid URL encoded data.
+
+ If a filename is passed and the URL points to a local resource,
+ the result is a copy from local file to new file.
+
+ Returns a tuple containing the path to the newly created
+ data file as well as the resulting HTTPMessage object.
+ """
+ url_type, path = splittype(url)
+
+ with contextlib.closing(urlopen(url, data)) as fp:
+ headers = fp.info()
+
+ # Just return the local path and the "headers" for file://
+ # URLs. No sense in performing a copy unless requested.
+ if url_type == "file" and not filename:
+ return os.path.normpath(path), headers
+
+ # Handle temporary file setup.
+ if filename:
+ tfp = open(filename, 'wb')
+ else:
+ tfp = tempfile.NamedTemporaryFile(delete=False)
+ filename = tfp.name
+ _url_tempfiles.append(filename)
+
+ with tfp:
+ result = filename, headers
+ bs = 1024*8
+ size = -1
+ read = 0
+ blocknum = 0
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
+
+ if reporthook:
+ reporthook(blocknum, bs, size)
+
+ while True:
+ block = fp.read(bs)
+ if not block:
+ break
+ read += len(block)
+ tfp.write(block)
+ blocknum += 1
+ if reporthook:
+ reporthook(blocknum, bs, size)
+
+ if size >= 0 and read < size:
+ raise ContentTooShortError(
+ "retrieval incomplete: got only %i out of %i bytes"
+ % (read, size), result)
+
+ return result
def urlcleanup():
- if _urlopener:
- _urlopener.cleanup()
+ for temp_file in _url_tempfiles:
+ try:
+ os.unlink(temp_file)
+ except EnvironmentError:
+ pass
+
+ del _url_tempfiles[:]
global _opener
if _opener:
_opener = None
@@ -178,7 +260,8 @@ def request_host(request):
class Request:
def __init__(self, url, data=None, headers={},
- origin_req_host=None, unverifiable=False):
+ origin_req_host=None, unverifiable=False,
+ method=None):
# unwrap('<URL:type://host/path>') --> 'type://host/path'
self.full_url = unwrap(url)
self.full_url, self.fragment = splittag(self.full_url)
@@ -192,6 +275,7 @@ class Request:
origin_req_host = request_host(self)
self.origin_req_host = origin_req_host
self.unverifiable = unverifiable
+ self.method = method
self._parse()
def _parse(self):
@@ -203,41 +287,60 @@ class Request:
self.host = unquote(self.host)
def get_method(self):
- if self.data is not None:
+ """Return a string indicating the HTTP request method."""
+ if self.method is not None:
+ return self.method
+ elif self.data is not None:
return "POST"
else:
return "GET"
+ def get_full_url(self):
+ if self.fragment:
+ return '%s#%s' % (self.full_url, self.fragment)
+ else:
+ return self.full_url
+
# Begin deprecated methods
def add_data(self, data):
+ msg = "Request.add_data method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
self.data = data
def has_data(self):
+ msg = "Request.has_data method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
return self.data is not None
def get_data(self):
+ msg = "Request.get_data method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
return self.data
- def get_full_url(self):
- if self.fragment:
- return '%s#%s' % (self.full_url, self.fragment)
- else:
- return self.full_url
-
def get_type(self):
+ msg = "Request.get_type method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
return self.type
def get_host(self):
+ msg = "Request.get_host method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
return self.host
def get_selector(self):
+ msg = "Request.get_selector method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
return self.selector
def is_unverifiable(self):
+ msg = "Request.is_unverifiable method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
return self.unverifiable
def get_origin_req_host(self):
+ msg = "Request.get_origin_req_host method is deprecated."
+ warnings.warn(msg, DeprecationWarning, stacklevel=1)
return self.origin_req_host
# End deprecated methods
@@ -682,8 +785,8 @@ class ProxyHandler(BaseHandler):
self.proxies = proxies
for type, url in proxies.items():
setattr(self, '%s_open' % type,
- lambda r, proxy=url, type=type, meth=self.proxy_open: \
- meth(r, proxy, type))
+ lambda r, proxy=url, type=type, meth=self.proxy_open:
+ meth(r, proxy, type))
def proxy_open(self, req, proxy, type):
orig_type = req.type
@@ -825,17 +928,23 @@ class AbstractBasicAuthHandler:
self.retried += 1
if authreq:
- mo = AbstractBasicAuthHandler.rx.search(authreq)
- if mo:
- scheme, quote, realm = mo.groups()
- if quote not in ["'", '"']:
- warnings.warn("Basic Auth Realm was unquoted",
- UserWarning, 2)
- if scheme.lower() == 'basic':
- response = self.retry_http_basic_auth(host, req, realm)
- if response and response.code != 401:
- self.retried = 0
- return response
+ scheme = authreq.split()[0]
+ if scheme.lower() != 'basic':
+ raise ValueError("AbstractBasicAuthHandler does not"
+ " support the following scheme: '%s'" %
+ scheme)
+ else:
+ mo = AbstractBasicAuthHandler.rx.search(authreq)
+ if mo:
+ scheme, quote, realm = mo.groups()
+ if quote not in ['"',"'"]:
+ warnings.warn("Basic Auth Realm was unquoted",
+ UserWarning, 2)
+ if scheme.lower() == 'basic':
+ response = self.retry_http_basic_auth(host, req, realm)
+ if response and response.code != 401:
+ self.retried = 0
+ return response
def retry_http_basic_auth(self, host, req, realm):
user, pw = self.passwd.find_user_password(realm, host)
@@ -878,9 +987,9 @@ class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
return response
-def randombytes(n):
- """Return n random bytes."""
- return os.urandom(n)
+# Return n random bytes.
+_randombytes = os.urandom
+
class AbstractDigestAuthHandler:
# Digest authentication is specified in RFC 2617.
@@ -921,6 +1030,9 @@ class AbstractDigestAuthHandler:
scheme = authreq.split()[0]
if scheme.lower() == 'digest':
return self.retry_http_digest_auth(req, authreq)
+ elif scheme.lower() != 'basic':
+ raise ValueError("AbstractDigestAuthHandler does not support"
+ " the following scheme: '%s'" % scheme)
def retry_http_digest_auth(self, req, auth):
token, challenge = auth.split(' ', 1)
@@ -941,7 +1053,7 @@ class AbstractDigestAuthHandler:
# authentication, and to provide some message integrity protection.
# This isn't a fabulous effort, but it's probably Good Enough.
s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
- b = s.encode("ascii") + randombytes(8)
+ b = s.encode("ascii") + _randombytes(8)
dig = hashlib.sha1(b).hexdigest()
return dig[:16]
@@ -1066,7 +1178,7 @@ class AbstractHTTPHandler(BaseHandler):
if request.data is not None: # POST
data = request.data
if isinstance(data, str):
- msg = "POST data should be bytes or an iterable of bytes. "\
+ msg = "POST data should be bytes or an iterable of bytes. " \
"It cannot be of type str."
raise TypeError(msg)
if not request.has_header('Content-type'):
@@ -1162,7 +1274,6 @@ class HTTPHandler(AbstractHTTPHandler):
http_request = AbstractHTTPHandler.do_request_
if hasattr(http.client, 'HTTPSConnection'):
- import ssl
class HTTPSHandler(AbstractHTTPHandler):
@@ -1177,6 +1288,8 @@ if hasattr(http.client, 'HTTPSConnection'):
https_request = AbstractHTTPHandler.do_request_
+ __all__.append('HTTPSHandler')
+
class HTTPCookieProcessor(BaseHandler):
def __init__(self, cookiejar=None):
import http.cookiejar
@@ -1463,6 +1576,9 @@ class URLopener:
# Constructor
def __init__(self, proxies=None, **x509):
+ msg = "%(class)s style of invoking requests is deprecated. " \
+ "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
+ warnings.warn(msg, DeprecationWarning, stacklevel=3)
if proxies is None:
proxies = getproxies()
assert hasattr(proxies, 'keys'), "proxies must be a mapping"
@@ -1542,6 +1658,8 @@ class URLopener:
return getattr(self, name)(url)
else:
return getattr(self, name)(url, data)
+ except HTTPError:
+ raise
except socket.error as msg:
raise IOError('socket error', msg).with_traceback(sys.exc_info()[2])
@@ -1760,8 +1878,8 @@ class URLopener:
def open_local_file(self, url):
"""Use local file."""
- import mimetypes, email.utils
- from io import StringIO
+ import email.utils
+ import mimetypes
host, file = splithost(url)
localname = url2pathname(file)
try:
@@ -1877,7 +1995,7 @@ class URLopener:
msg.append('Content-type: %s' % type)
if encoding == 'base64':
# XXX is this encoding/decoding ok?
- data = base64.decodebytes(data.encode('ascii')).decode('latin1')
+ data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
else:
data = unquote(data)
msg.append('Content-Length: %d' % len(data))
@@ -1970,7 +2088,6 @@ class FancyURLopener(URLopener):
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['www-authenticate']
- import re
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
if not match:
URLopener.http_error_default(self, url, fp,
@@ -1996,7 +2113,6 @@ class FancyURLopener(URLopener):
URLopener.http_error_default(self, url, fp,
errcode, errmsg, headers)
stuff = headers['proxy-authenticate']
- import re
match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
if not match:
URLopener.http_error_default(self, url, fp,
@@ -2282,8 +2398,6 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
}
"""
- import re
- import socket
from fnmatch import fnmatch
hostonly, port = splitport(host)
@@ -2386,7 +2500,6 @@ elif os.name == 'nt':
for p in proxyServer.split(';'):
protocol, address = p.split('=', 1)
# See if address has a type:// prefix
- import re
if not re.match('^([^/:]+)://', address):
address = '%s://%s' % (protocol, address)
proxies[protocol] = address
@@ -2418,7 +2531,6 @@ elif os.name == 'nt':
def proxy_bypass_registry(host):
try:
import winreg
- import re
except ImportError:
# Std modules, so should be around - but you never know!
return 0