diff options
author | Barney Gale <barney.gale@gmail.com> | 2024-11-19 14:01:49 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-19 14:01:49 (GMT) |
commit | 4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c (patch) | |
tree | 0bedd1382fcf31e644dcaf66bf5be0c3dcce9c1d /Lib | |
parent | a99dd23c1f5b9254651d9895714596d5e7942389 (diff) | |
download | cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.zip cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.gz cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.bz2 |
GH-84850: Remove `urllib.request.URLopener` and `FancyURLopener` (#125739)
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/test/test_urllib.py | 192 | ||||
-rw-r--r-- | Lib/test/test_urllibnet.py | 12 | ||||
-rw-r--r-- | Lib/urllib/request.py | 691 |
3 files changed, 33 insertions, 862 deletions
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 2c53ce3..71084a4 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -7,11 +7,9 @@ import http.client import email.message import io import unittest -from unittest.mock import patch from test import support from test.support import os_helper from test.support import socket_helper -from test.support import warnings_helper import os try: import ssl @@ -20,7 +18,6 @@ except ImportError: import sys import tempfile -from base64 import b64encode import collections @@ -35,32 +32,6 @@ def hexescape(char): hex_repr = "0%s" % hex_repr return "%" + hex_repr -# Shortcut for testing FancyURLopener -_urlopener = None - - -def urlopen(url, data=None, proxies=None): - """urlopen(url [, data]) -> open file-like object""" - global _urlopener - if proxies is not None: - opener = urllib.request.FancyURLopener(proxies=proxies) - elif not _urlopener: - opener = FancyURLopener() - _urlopener = opener - else: - opener = _urlopener - if data is None: - return opener.open(url) - else: - return opener.open(url, data) - - -def FancyURLopener(): - with warnings_helper.check_warnings( - ('FancyURLopener style of invoking requests is deprecated.', - DeprecationWarning)): - return urllib.request.FancyURLopener() - def fakehttp(fakedata, mock_close=False): class FakeSocket(io.BytesIO): @@ -119,26 +90,6 @@ class FakeHTTPMixin(object): http.client.HTTPConnection = self._connection_class -class FakeFTPMixin(object): - def fakeftp(self): - class FakeFtpWrapper(object): - def __init__(self, user, passwd, host, port, dirs, timeout=None, - persistent=True): - pass - - def retrfile(self, file, type): - return io.BytesIO(), 0 - - def close(self): - pass - - self._ftpwrapper_class = urllib.request.ftpwrapper - urllib.request.ftpwrapper = FakeFtpWrapper - - def unfakeftp(self): - urllib.request.ftpwrapper = self._ftpwrapper_class - - class urlopen_FileTests(unittest.TestCase): """Test urlopen() opening a temporary file. @@ -158,7 +109,7 @@ class urlopen_FileTests(unittest.TestCase): f.close() self.pathname = os_helper.TESTFN self.quoted_pathname = urllib.parse.quote(self.pathname) - self.returned_obj = urlopen("file:%s" % self.quoted_pathname) + self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname) def tearDown(self): """Shut down the open object""" @@ -205,7 +156,7 @@ class urlopen_FileTests(unittest.TestCase): self.assertIsInstance(self.returned_obj.headers, email.message.Message) def test_url(self): - self.assertEqual(self.returned_obj.url, self.quoted_pathname) + self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname) def test_status(self): self.assertIsNone(self.returned_obj.status) @@ -214,7 +165,7 @@ class urlopen_FileTests(unittest.TestCase): self.assertIsInstance(self.returned_obj.info(), email.message.Message) def test_geturl(self): - self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname) + self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname) def test_getcode(self): self.assertIsNone(self.returned_obj.getcode()) @@ -339,13 +290,13 @@ class ProxyTests_withOrderedEnv(unittest.TestCase): self.assertEqual('http://somewhere:3128', proxies['http']) -class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): +class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): """Test urlopen() opening a fake http connection.""" def check_read(self, ver): self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!") try: - fp = urlopen("http://python.org/") + fp = urllib.request.urlopen("http://python.org/") self.assertEqual(fp.readline(), b"Hello!") self.assertEqual(fp.readline(), b"") self.assertEqual(fp.geturl(), 'http://python.org/') @@ -366,8 +317,8 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): def test_willclose(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") try: - resp = urlopen("http://www.python.org") - self.assertTrue(resp.fp.will_close) + resp = urllib.request.urlopen("http://www.python.org") + self.assertTrue(resp.will_close) finally: self.unfakehttp() @@ -392,9 +343,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): with self.assertRaisesRegex( InvalidURL, f"contain control.*{escaped_char_repr}"): urllib.request.urlopen(f"https:{schemeless_url}") - # This code path quotes the URL so there is no injection. - resp = urlopen(f"http:{schemeless_url}") - self.assertNotIn(char, resp.geturl()) finally: self.unfakehttp() @@ -416,11 +364,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): urllib.request.urlopen(f"http:{schemeless_url}") with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): urllib.request.urlopen(f"https:{schemeless_url}") - # This code path quotes the URL so there is no injection. - resp = urlopen(f"http:{schemeless_url}") - self.assertNotIn(' ', resp.geturl()) - self.assertNotIn('\r', resp.geturl()) - self.assertNotIn('\n', resp.geturl()) finally: self.unfakehttp() @@ -435,9 +378,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): InvalidURL = http.client.InvalidURL with self.assertRaisesRegex( InvalidURL, f"contain control.*{escaped_char_repr}"): - urlopen(f"http:{schemeless_url}") + urllib.request.urlopen(f"http:{schemeless_url}") with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): - urlopen(f"https:{schemeless_url}") + urllib.request.urlopen(f"https:{schemeless_url}") finally: self.unfakehttp() @@ -450,9 +393,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin): InvalidURL = http.client.InvalidURL with self.assertRaisesRegex( InvalidURL, r"contain control.*\\r"): - urlopen(f"http:{schemeless_url}") + urllib.request.urlopen(f"http:{schemeless_url}") with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): - urlopen(f"https:{schemeless_url}") + urllib.request.urlopen(f"https:{schemeless_url}") finally: self.unfakehttp() @@ -476,7 +419,7 @@ Connection: close Content-Type: text/html; charset=iso-8859-1 ''', mock_close=True) try: - self.assertRaises(OSError, urlopen, "http://python.org/") + self.assertRaises(OSError, urllib.request.urlopen, "http://python.org/") finally: self.unfakehttp() @@ -492,20 +435,20 @@ Content-Type: text/html; charset=iso-8859-1 try: msg = "Redirection to url 'file:" with self.assertRaisesRegex(urllib.error.HTTPError, msg): - urlopen("http://python.org/") + urllib.request.urlopen("http://python.org/") finally: self.unfakehttp() def test_redirect_limit_independent(self): # Ticket #12923: make sure independent requests each use their # own retry limit. - for i in range(FancyURLopener().maxtries): + for i in range(urllib.request.HTTPRedirectHandler.max_redirections): self.fakehttp(b'''HTTP/1.1 302 Found Location: file://guidocomputer.athome.com:/python/license Connection: close ''', mock_close=True) try: - self.assertRaises(urllib.error.HTTPError, urlopen, + self.assertRaises(urllib.error.HTTPError, urllib.request.urlopen, "http://something") finally: self.unfakehttp() @@ -515,14 +458,14 @@ Connection: close # data. (#1680230) self.fakehttp(b'') try: - self.assertRaises(OSError, urlopen, "http://something") + self.assertRaises(OSError, urllib.request.urlopen, "http://something") finally: self.unfakehttp() def test_missing_localfile(self): # Test for #10836 with self.assertRaises(urllib.error.URLError) as e: - urlopen('file://localhost/a/file/which/doesnot/exists.py') + urllib.request.urlopen('file://localhost/a/file/which/doesnot/exists.py') self.assertTrue(e.exception.filename) self.assertTrue(e.exception.reason) @@ -531,71 +474,28 @@ Connection: close tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') try: self.assertTrue(os.path.exists(tmp_file)) - with urlopen(tmp_fileurl) as fobj: + with urllib.request.urlopen(tmp_fileurl) as fobj: self.assertTrue(fobj) finally: os.close(fd) os.unlink(tmp_file) self.assertFalse(os.path.exists(tmp_file)) with self.assertRaises(urllib.error.URLError): - urlopen(tmp_fileurl) + urllib.request.urlopen(tmp_fileurl) def test_ftp_nohost(self): test_ftp_url = 'ftp:///path' with self.assertRaises(urllib.error.URLError) as e: - urlopen(test_ftp_url) + urllib.request.urlopen(test_ftp_url) self.assertFalse(e.exception.filename) self.assertTrue(e.exception.reason) def test_ftp_nonexisting(self): with self.assertRaises(urllib.error.URLError) as e: - urlopen('ftp://localhost/a/file/which/doesnot/exists.py') + urllib.request.urlopen('ftp://localhost/a/file/which/doesnot/exists.py') self.assertFalse(e.exception.filename) self.assertTrue(e.exception.reason) - @patch.object(urllib.request, 'MAXFTPCACHE', 0) - def test_ftp_cache_pruning(self): - self.fakeftp() - try: - urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, []) - urlopen('ftp://localhost') - finally: - self.unfakeftp() - - def test_userpass_inurl(self): - self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") - try: - fp = urlopen("http://user:pass@python.org/") - self.assertEqual(fp.readline(), b"Hello!") - self.assertEqual(fp.readline(), b"") - self.assertEqual(fp.geturl(), 'http://user:pass@python.org/') - self.assertEqual(fp.getcode(), 200) - finally: - self.unfakehttp() - - def test_userpass_inurl_w_spaces(self): - self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!") - try: - userpass = "a b:c d" - url = "http://{}@python.org/".format(userpass) - fakehttp_wrapper = http.client.HTTPConnection - authorization = ("Authorization: Basic %s\r\n" % - b64encode(userpass.encode("ASCII")).decode("ASCII")) - fp = urlopen(url) - # The authorization header must be in place - self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8")) - self.assertEqual(fp.readline(), b"Hello!") - self.assertEqual(fp.readline(), b"") - # the spaces are quoted in URL so no match - self.assertNotEqual(fp.geturl(), url) - self.assertEqual(fp.getcode(), 200) - finally: - self.unfakehttp() - - def test_URLopener_deprecation(self): - with warnings_helper.check_warnings(('',DeprecationWarning)): - urllib.request.URLopener() - class urlopen_DataTests(unittest.TestCase): """Test urlopen() opening a data URL.""" @@ -1620,56 +1520,6 @@ class Utility_Tests(unittest.TestCase): self.assertIsInstance(urllib.request.thishost(), tuple) -class URLopener_Tests(FakeHTTPMixin, unittest.TestCase): - """Testcase to test the open method of URLopener class.""" - - def test_quoted_open(self): - class DummyURLopener(urllib.request.URLopener): - def open_spam(self, url): - return url - with warnings_helper.check_warnings( - ('DummyURLopener style of invoking requests is deprecated.', - DeprecationWarning)): - self.assertEqual(DummyURLopener().open( - 'spam://example/ /'),'//example/%20/') - - # test the safe characters are not quoted by urlopen - self.assertEqual(DummyURLopener().open( - "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"), - "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/") - - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_urlopener_retrieve_file(self): - with os_helper.temp_dir() as tmpdir: - fd, tmpfile = tempfile.mkstemp(dir=tmpdir) - os.close(fd) - fileurl = "file:" + urllib.request.pathname2url(tmpfile) - filename, _ = urllib.request.URLopener().retrieve(fileurl) - # Some buildbots have TEMP folder that uses a lowercase drive letter. - self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile)) - - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_urlopener_retrieve_remote(self): - url = "http://www.python.org/file.txt" - self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!") - self.addCleanup(self.unfakehttp) - filename, _ = urllib.request.URLopener().retrieve(url) - self.assertEqual(os.path.splitext(filename)[1], ".txt") - - @warnings_helper.ignore_warnings(category=DeprecationWarning) - def test_local_file_open(self): - # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme - class DummyURLopener(urllib.request.URLopener): - def open_local_file(self, url): - return url - for url in ('local_file://example', 'local-file://example'): - self.assertRaises(OSError, urllib.request.urlopen, url) - self.assertRaises(OSError, urllib.request.URLopener().open, url) - self.assertRaises(OSError, urllib.request.URLopener().retrieve, url) - self.assertRaises(OSError, DummyURLopener().open, url) - self.assertRaises(OSError, DummyURLopener().retrieve, url) - - class RequestTests(unittest.TestCase): """Unit tests for urllib.request.Request.""" diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py index 49a3b5a..f824ddd 100644 --- a/Lib/test/test_urllibnet.py +++ b/Lib/test/test_urllibnet.py @@ -5,6 +5,7 @@ from test.support import socket_helper import contextlib import socket +import urllib.error import urllib.parse import urllib.request import os @@ -101,13 +102,10 @@ class urlopenNetworkTests(unittest.TestCase): # test getcode() with the fancy opener to get 404 error codes URL = self.url + "XXXinvalidXXX" with socket_helper.transient_internet(URL): - with self.assertWarns(DeprecationWarning): - open_url = urllib.request.FancyURLopener().open(URL) - try: - code = open_url.getcode() - finally: - open_url.close() - self.assertEqual(code, 404) + with self.assertRaises(urllib.error.URLError) as e: + with urllib.request.urlopen(URL): + pass + self.assertEqual(e.exception.code, 404) @support.requires_resource('walltime') def test_bad_address(self): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 18a837d..5c061a2 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -83,6 +83,7 @@ f = urllib.request.urlopen('https://www.python.org/') import base64 import bisect +import contextlib import email import hashlib import http.client @@ -94,15 +95,13 @@ import string import sys import time import tempfile -import contextlib -import warnings from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, _splittype, _splithost, _splitport, _splituser, _splitpasswd, - _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes, + _splitattr, _splitvalue, _splittag, unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook @@ -128,7 +127,7 @@ __all__ = [ 'urlopen', 'install_opener', 'build_opener', 'pathname2url', 'url2pathname', 'getproxies', # Legacy interface - 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', + 'urlretrieve', 'urlcleanup', ] # used in User-Agent header sent @@ -165,8 +164,7 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, the reason phrase returned by the server --- instead of the response headers as it is specified in the documentation for HTTPResponse. - For FTP, file, and data URLs and requests explicitly handled by legacy - URLopener and FancyURLopener classes, this function returns a + For FTP, file, and data URLs, this function returns a urllib.response.addinfourl object. Note that None may be returned if no handler handles the request (though @@ -940,6 +938,7 @@ class AbstractBasicAuthHandler: for mo in AbstractBasicAuthHandler.rx.finditer(header): scheme, quote, realm = mo.groups() if quote not in ['"', "'"]: + import warnings warnings.warn("Basic Auth Realm was unquoted", UserWarning, 3) @@ -1495,7 +1494,7 @@ class FileHandler(BaseHandler): origurl = 'file://' + filename return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: - raise URLError(exp) + raise URLError(exp, exp.filename) raise URLError('file not on local host') def _safe_gethostbyname(host): @@ -1647,8 +1646,6 @@ class DataHandler(BaseHandler): # Code move from the old urllib module -MAXFTPCACHE = 10 # Trim the ftp cache beyond this size - # Helper for non-unix systems if os.name == 'nt': from nturl2path import url2pathname, pathname2url @@ -1668,678 +1665,6 @@ else: return quote(pathname) -ftpcache = {} - - -class URLopener: - """Class to open URLs. - This is a class rather than just a subroutine because we may need - more than one set of global protocol-specific options. - Note -- this is a base class for those who don't want the - automatic handling of errors type 302 (relocated) and 401 - (authorization needed).""" - - __tempfiles = None - - version = "Python-urllib/%s" % __version__ - - # Constructor - def __init__(self, proxies=None, **x509): - msg = "%(class)s style of invoking requests is deprecated. " \ - "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} - warnings.warn(msg, DeprecationWarning, stacklevel=3) - if proxies is None: - proxies = getproxies() - assert hasattr(proxies, 'keys'), "proxies must be a mapping" - self.proxies = proxies - self.key_file = x509.get('key_file') - self.cert_file = x509.get('cert_file') - self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')] - self.__tempfiles = [] - self.__unlink = os.unlink # See cleanup() - self.tempcache = None - # Undocumented feature: if you assign {} to tempcache, - # it is used to cache files retrieved with - # self.retrieve(). This is not enabled by default - # since it does not work for changing documents (and I - # haven't got the logic to check expiration headers - # yet). - self.ftpcache = ftpcache - # Undocumented feature: you can use a different - # ftp cache by assigning to the .ftpcache member; - # in case you want logically independent URL openers - # XXX This is not threadsafe. Bah. - - def __del__(self): - self.close() - - def close(self): - self.cleanup() - - def cleanup(self): - # This code sometimes runs when the rest of this module - # has already been deleted, so it can't use any globals - # or import anything. - if self.__tempfiles: - for file in self.__tempfiles: - try: - self.__unlink(file) - except OSError: - pass - del self.__tempfiles[:] - if self.tempcache: - self.tempcache.clear() - - def addheader(self, *args): - """Add a header to be used by the HTTP interface only - e.g. u.addheader('Accept', 'sound/basic')""" - self.addheaders.append(args) - - # External interface - def open(self, fullurl, data=None): - """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(_to_bytes(fullurl)) - fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") - if self.tempcache and fullurl in self.tempcache: - filename, headers = self.tempcache[fullurl] - fp = open(filename, 'rb') - return addinfourl(fp, headers, fullurl) - urltype, url = _splittype(fullurl) - if not urltype: - urltype = 'file' - if urltype in self.proxies: - proxy = self.proxies[urltype] - urltype, proxyhost = _splittype(proxy) - host, selector = _splithost(proxyhost) - url = (host, fullurl) # Signal special case to open_*() - else: - proxy = None - name = 'open_' + urltype - self.type = urltype - name = name.replace('-', '_') - if not hasattr(self, name) or name == 'open_local_file': - if proxy: - return self.open_unknown_proxy(proxy, fullurl, data) - else: - return self.open_unknown(fullurl, data) - try: - if data is None: - return getattr(self, name)(url) - else: - return getattr(self, name)(url, data) - except (HTTPError, URLError): - raise - except OSError as msg: - raise OSError('socket error', msg) from msg - - def open_unknown(self, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = _splittype(fullurl) - raise OSError('url error', 'unknown url type', type) - - def open_unknown_proxy(self, proxy, fullurl, data=None): - """Overridable interface to open unknown URL type.""" - type, url = _splittype(fullurl) - raise OSError('url error', 'invalid proxy for %s' % type, proxy) - - # External interface - def retrieve(self, url, filename=None, reporthook=None, data=None): - """retrieve(url) returns (filename, headers) for a local object - or (tempfilename, headers) for a remote object.""" - url = unwrap(_to_bytes(url)) - if self.tempcache and url in self.tempcache: - return self.tempcache[url] - type, url1 = _splittype(url) - if filename is None and (not type or type == 'file'): - try: - fp = self.open_local_file(url1) - hdrs = fp.info() - fp.close() - return url2pathname(_splithost(url1)[1]), hdrs - except OSError: - pass - fp = self.open(url, data) - try: - headers = fp.info() - if filename: - tfp = open(filename, 'wb') - else: - garbage, path = _splittype(url) - garbage, path = _splithost(path or "") - path, garbage = _splitquery(path or "") - path, garbage = _splitattr(path or "") - suffix = os.path.splitext(path)[1] - (fd, filename) = tempfile.mkstemp(suffix) - self.__tempfiles.append(filename) - tfp = os.fdopen(fd, 'wb') - try: - result = filename, headers - if self.tempcache is not None: - self.tempcache[url] = result - bs = 1024*8 - size = -1 - read = 0 - blocknum = 0 - if "content-length" in headers: - size = int(headers["Content-Length"]) - if reporthook: - reporthook(blocknum, bs, size) - while block := fp.read(bs): - read += len(block) - tfp.write(block) - blocknum += 1 - if reporthook: - reporthook(blocknum, bs, size) - finally: - tfp.close() - finally: - fp.close() - - # raise exception if actual size does not match content-length header - if size >= 0 and read < size: - raise ContentTooShortError( - "retrieval incomplete: got only %i out of %i bytes" - % (read, size), result) - - return result - - # Each method named open_<type> knows how to open that type of URL - - def _open_generic_http(self, connection_factory, url, data): - """Make an HTTP connection using connection_class. - - This is an internal method that should be called from - open_http() or open_https(). - - Arguments: - - connection_factory should take a host name and return an - HTTPConnection instance. - - url is the url to retrieval or a host, relative-path pair. - - data is payload for a POST request or None. - """ - - user_passwd = None - proxy_passwd= None - if isinstance(url, str): - host, selector = _splithost(url) - if host: - user_passwd, host = _splituser(host) - host = unquote(host) - realhost = host - else: - host, selector = url - # check whether the proxy contains authorization information - proxy_passwd, host = _splituser(host) - # now we proceed with the url we want to obtain - urltype, rest = _splittype(selector) - url = rest - user_passwd = None - if urltype.lower() != 'http': - realhost = None - else: - realhost, rest = _splithost(rest) - if realhost: - user_passwd, realhost = _splituser(realhost) - if user_passwd: - selector = "%s://%s%s" % (urltype, realhost, rest) - if proxy_bypass(realhost): - host = realhost - - if not host: raise OSError('http error', 'no host given') - - if proxy_passwd: - proxy_passwd = unquote(proxy_passwd) - proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') - else: - proxy_auth = None - - if user_passwd: - user_passwd = unquote(user_passwd) - auth = base64.b64encode(user_passwd.encode()).decode('ascii') - else: - auth = None - http_conn = connection_factory(host) - headers = {} - if proxy_auth: - headers["Proxy-Authorization"] = "Basic %s" % proxy_auth - if auth: - headers["Authorization"] = "Basic %s" % auth - if realhost: - headers["Host"] = realhost - - # Add Connection:close as we don't support persistent connections yet. - # This helps in closing the socket and avoiding ResourceWarning - - headers["Connection"] = "close" - - for header, value in self.addheaders: - headers[header] = value - - if data is not None: - headers["Content-Type"] = "application/x-www-form-urlencoded" - http_conn.request("POST", selector, data, headers) - else: - http_conn.request("GET", selector, headers=headers) - - try: - response = http_conn.getresponse() - except http.client.BadStatusLine: - # something went wrong with the HTTP status line - raise URLError("http protocol error: bad status line") - - # According to RFC 2616, "2xx" code indicates that the client's - # request was successfully received, understood, and accepted. - if 200 <= response.status < 300: - return addinfourl(response, response.msg, "http:" + url, - response.status) - else: - return self.http_error( - url, response.fp, - response.status, response.reason, response.msg, data) - - def open_http(self, url, data=None): - """Use HTTP protocol.""" - return self._open_generic_http(http.client.HTTPConnection, url, data) - - def http_error(self, url, fp, errcode, errmsg, headers, data=None): - """Handle http errors. - - Derived class can override this, or provide specific handlers - named http_error_DDD where DDD is the 3-digit error code.""" - # First check if there's a specific handler for this error - name = 'http_error_%d' % errcode - if hasattr(self, name): - method = getattr(self, name) - if data is None: - result = method(url, fp, errcode, errmsg, headers) - else: - result = method(url, fp, errcode, errmsg, headers, data) - if result: return result - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handler: close the connection and raise OSError.""" - fp.close() - raise HTTPError(url, errcode, errmsg, headers, None) - - if _have_ssl: - def _https_connection(self, host): - if self.key_file or self.cert_file: - http_version = http.client.HTTPSConnection._http_vsn - context = http.client._create_https_context(http_version) - context.load_cert_chain(self.cert_file, self.key_file) - # cert and key file means the user wants to authenticate. - # enable TLS 1.3 PHA implicitly even for custom contexts. - if context.post_handshake_auth is not None: - context.post_handshake_auth = True - else: - context = None - return http.client.HTTPSConnection(host, context=context) - - def open_https(self, url, data=None): - """Use HTTPS protocol.""" - return self._open_generic_http(self._https_connection, url, data) - - def open_file(self, url): - """Use local file or FTP depending on form of URL.""" - if not isinstance(url, str): - raise URLError('file error: proxy support for file protocol currently not implemented') - if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': - raise ValueError("file:// scheme is supported only on localhost") - else: - return self.open_local_file(url) - - def open_local_file(self, url): - """Use local file.""" - import email.utils - import mimetypes - host, file = _splithost(url) - localname = url2pathname(file) - try: - stats = os.stat(localname) - except OSError as e: - raise URLError(e.strerror, e.filename) - size = stats.st_size - modified = email.utils.formatdate(stats.st_mtime, usegmt=True) - mtype = mimetypes.guess_type(url)[0] - headers = email.message_from_string( - 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % - (mtype or 'text/plain', size, modified)) - if not host: - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - return addinfourl(open(localname, 'rb'), headers, urlfile) - host, port = _splitport(host) - if (not port - and socket.gethostbyname(host) in ((localhost(),) + thishost())): - urlfile = file - if file[:1] == '/': - urlfile = 'file://' + file - elif file[:2] == './': - raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) - return addinfourl(open(localname, 'rb'), headers, urlfile) - raise URLError('local file error: not on local host') - - def open_ftp(self, url): - """Use FTP protocol.""" - if not isinstance(url, str): - raise URLError('ftp error: proxy support for ftp protocol currently not implemented') - import mimetypes - host, path = _splithost(url) - if not host: raise URLError('ftp error: no host given') - host, port = _splitport(host) - user, host = _splituser(host) - if user: user, passwd = _splitpasswd(user) - else: passwd = None - host = unquote(host) - user = unquote(user or '') - passwd = unquote(passwd or '') - host = socket.gethostbyname(host) - if not port: - import ftplib - port = ftplib.FTP_PORT - else: - port = int(port) - path, attrs = _splitattr(path) - path = unquote(path) - dirs = path.split('/') - dirs, file = dirs[:-1], dirs[-1] - if dirs and not dirs[0]: dirs = dirs[1:] - if dirs and not dirs[0]: dirs[0] = '/' - key = user, host, port, '/'.join(dirs) - # XXX thread unsafe! - if len(self.ftpcache) > MAXFTPCACHE: - # Prune the cache, rather arbitrarily - for k in list(self.ftpcache): - if k != key: - v = self.ftpcache[k] - del self.ftpcache[k] - v.close() - try: - if key not in self.ftpcache: - self.ftpcache[key] = \ - ftpwrapper(user, passwd, host, port, dirs) - if not file: type = 'D' - else: type = 'I' - for attr in attrs: - attr, value = _splitvalue(attr) - if attr.lower() == 'type' and \ - value in ('a', 'A', 'i', 'I', 'd', 'D'): - type = value.upper() - (fp, retrlen) = self.ftpcache[key].retrfile(file, type) - mtype = mimetypes.guess_type("ftp:" + url)[0] - headers = "" - if mtype: - headers += "Content-Type: %s\n" % mtype - if retrlen is not None and retrlen >= 0: - headers += "Content-Length: %d\n" % retrlen - headers = email.message_from_string(headers) - return addinfourl(fp, headers, "ftp:" + url) - except ftperrors() as exp: - raise URLError(f'ftp error: {exp}') from exp - - def open_data(self, url, data=None): - """Use "data" URL.""" - if not isinstance(url, str): - raise URLError('data error: proxy support for data protocol currently not implemented') - # ignore POSTed data - # - # syntax of data URLs: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - try: - [type, data] = url.split(',', 1) - except ValueError: - raise OSError('data error', 'bad data URL') - if not type: - type = 'text/plain;charset=US-ASCII' - semi = type.rfind(';') - if semi >= 0 and '=' not in type[semi:]: - encoding = type[semi+1:] - type = type[:semi] - else: - encoding = '' - msg = [] - msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', - time.gmtime(time.time()))) - msg.append('Content-type: %s' % type) - if encoding == 'base64': - # XXX is this encoding/decoding ok? - data = base64.decodebytes(data.encode('ascii')).decode('latin-1') - else: - data = unquote(data) - msg.append('Content-Length: %d' % len(data)) - msg.append('') - msg.append(data) - msg = '\n'.join(msg) - headers = email.message_from_string(msg) - f = io.StringIO(msg) - #f.fileno = None # needed for addinfourl - return addinfourl(f, headers, url) - - -class FancyURLopener(URLopener): - """Derived class with handlers for errors we can handle (perhaps).""" - - def __init__(self, *args, **kwargs): - URLopener.__init__(self, *args, **kwargs) - self.auth_cache = {} - self.tries = 0 - self.maxtries = 10 - - def http_error_default(self, url, fp, errcode, errmsg, headers): - """Default error handling -- don't raise an exception.""" - return addinfourl(fp, headers, "http:" + url, errcode) - - def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): - """Error 302 -- relocated (temporarily).""" - self.tries += 1 - try: - if self.maxtries and self.tries >= self.maxtries: - if hasattr(self, "http_error_500"): - meth = self.http_error_500 - else: - meth = self.http_error_default - return meth(url, fp, 500, - "Internal Server Error: Redirect Recursion", - headers) - result = self.redirect_internal(url, fp, errcode, errmsg, - headers, data) - return result - finally: - self.tries = 0 - - def redirect_internal(self, url, fp, errcode, errmsg, headers, data): - if 'location' in headers: - newurl = headers['location'] - elif 'uri' in headers: - newurl = headers['uri'] - else: - return - fp.close() - - # In case the server sent a relative URL, join with original: - newurl = urljoin(self.type + ":" + url, newurl) - - urlparts = urlparse(newurl) - - # For security reasons, we don't allow redirection to anything other - # than http, https and ftp. - - # We are using newer HTTPError with older redirect_internal method - # This older method will get deprecated in 3.3 - - if urlparts.scheme not in ('http', 'https', 'ftp', ''): - raise HTTPError(newurl, errcode, - errmsg + - " Redirection to url '%s' is not allowed." % newurl, - headers, fp) - - return self.open(newurl) - - def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): - """Error 301 -- also relocated (permanently).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): - """Error 303 -- also relocated (essentially identical to 302).""" - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - - def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): - """Error 307 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_302(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_308(self, url, fp, errcode, errmsg, headers, data=None): - """Error 308 -- relocated, but turn POST into error.""" - if data is None: - return self.http_error_301(url, fp, errcode, errmsg, headers, data) - else: - return self.http_error_default(url, fp, errcode, errmsg, headers) - - def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, - retry=False): - """Error 401 -- authentication required. - This function supports Basic authentication only.""" - if 'www-authenticate' not in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['www-authenticate'] - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - if not retry: - URLopener.http_error_default(self, url, fp, errcode, errmsg, - headers) - name = 'retry_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, - retry=False): - """Error 407 -- proxy authentication required. - This function supports Basic authentication only.""" - if 'proxy-authenticate' not in headers: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - stuff = headers['proxy-authenticate'] - match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) - if not match: - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - scheme, realm = match.groups() - if scheme.lower() != 'basic': - URLopener.http_error_default(self, url, fp, - errcode, errmsg, headers) - if not retry: - URLopener.http_error_default(self, url, fp, errcode, errmsg, - headers) - name = 'retry_proxy_' + self.type + '_basic_auth' - if data is None: - return getattr(self,name)(url, realm) - else: - return getattr(self,name)(url, realm, data) - - def retry_proxy_http_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - newurl = 'http://' + host + selector - proxy = self.proxies['http'] - urltype, proxyhost = _splittype(proxy) - proxyhost, proxyselector = _splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), proxyhost) - self.proxies['http'] = 'http://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_proxy_https_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - newurl = 'https://' + host + selector - proxy = self.proxies['https'] - urltype, proxyhost = _splittype(proxy) - proxyhost, proxyselector = _splithost(proxyhost) - i = proxyhost.find('@') + 1 - proxyhost = proxyhost[i:] - user, passwd = self.get_user_passwd(proxyhost, realm, i) - if not (user or passwd): return None - proxyhost = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), proxyhost) - self.proxies['https'] = 'https://' + proxyhost + proxyselector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_http_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), host) - newurl = 'http://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def retry_https_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) - i = host.find('@') + 1 - host = host[i:] - user, passwd = self.get_user_passwd(host, realm, i) - if not (user or passwd): return None - host = "%s:%s@%s" % (quote(user, safe=''), - quote(passwd, safe=''), host) - newurl = 'https://' + host + selector - if data is None: - return self.open(newurl) - else: - return self.open(newurl, data) - - def get_user_passwd(self, host, realm, clear_cache=0): - key = realm + '@' + host.lower() - if key in self.auth_cache: - if clear_cache: - del self.auth_cache[key] - else: - return self.auth_cache[key] - user, passwd = self.prompt_user_passwd(host, realm) - if user or passwd: self.auth_cache[key] = (user, passwd) - return user, passwd - - def prompt_user_passwd(self, host, realm): - """Override this in a GUI environment!""" - import getpass - try: - user = input("Enter username for %s at %s: " % (realm, host)) - passwd = getpass.getpass("Enter password for %s in %s at %s: " % - (user, realm, host)) - return user, passwd - except KeyboardInterrupt: - print() - return None, None - - # Utility functions _localhost = None @@ -2485,9 +1810,7 @@ def getproxies_environment(): """Return a dictionary of scheme -> proxy server URL mappings. Scan the environment for variables named <scheme>_proxy; - this seems to be the standard convention. If you need a - different way, you can pass a proxies dictionary to the - [Fancy]URLopener constructor. + this seems to be the standard convention. """ # in order to prefer lowercase variables, process environment in # two passes: first matches any, second pass matches lowercase only |