GH-84850: Remove `urllib.request.URLopener` and `FancyURLopener` (#125739)

author: Barney Gale <barney.gale@gmail.com> 2024-11-19 14:01:49 (GMT)
committer: GitHub <noreply@github.com> 2024-11-19 14:01:49 (GMT)
commit: 4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c (patch)
tree: 0bedd1382fcf31e644dcaf66bf5be0c3dcce9c1d /Lib
parent: a99dd23c1f5b9254651d9895714596d5e7942389 (diff)
download: cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.zip
cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.gz
cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.bz2
3 files changed, 33 insertions, 862 deletions
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 2c53ce3..71084a4 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -7,11 +7,9 @@ import http.client
 import email.message
 import io
 import unittest
-from unittest.mock import patch
 from test import support
 from test.support import os_helper
 from test.support import socket_helper
-from test.support import warnings_helper
 import os
 try:
     import ssl
@@ -20,7 +18,6 @@ except ImportError:
 import sys
 import tempfile
 
-from base64 import b64encode
 import collections
 
 
@@ -35,32 +32,6 @@ def hexescape(char):
         hex_repr = "0%s" % hex_repr
     return "%" + hex_repr
 
-# Shortcut for testing FancyURLopener
-_urlopener = None
-
-
-def urlopen(url, data=None, proxies=None):
-    """urlopen(url [, data]) -> open file-like object"""
-    global _urlopener
-    if proxies is not None:
-        opener = urllib.request.FancyURLopener(proxies=proxies)
-    elif not _urlopener:
-        opener = FancyURLopener()
-        _urlopener = opener
-    else:
-        opener = _urlopener
-    if data is None:
-        return opener.open(url)
-    else:
-        return opener.open(url, data)
-
-
-def FancyURLopener():
-    with warnings_helper.check_warnings(
-            ('FancyURLopener style of invoking requests is deprecated.',
-            DeprecationWarning)):
-        return urllib.request.FancyURLopener()
-
 
 def fakehttp(fakedata, mock_close=False):
     class FakeSocket(io.BytesIO):
@@ -119,26 +90,6 @@ class FakeHTTPMixin(object):
         http.client.HTTPConnection = self._connection_class
 
 
-class FakeFTPMixin(object):
-    def fakeftp(self):
-        class FakeFtpWrapper(object):
-            def __init__(self,  user, passwd, host, port, dirs, timeout=None,
-                     persistent=True):
-                pass
-
-            def retrfile(self, file, type):
-                return io.BytesIO(), 0
-
-            def close(self):
-                pass
-
-        self._ftpwrapper_class = urllib.request.ftpwrapper
-        urllib.request.ftpwrapper = FakeFtpWrapper
-
-    def unfakeftp(self):
-        urllib.request.ftpwrapper = self._ftpwrapper_class
-
-
 class urlopen_FileTests(unittest.TestCase):
     """Test urlopen() opening a temporary file.
 
@@ -158,7 +109,7 @@ class urlopen_FileTests(unittest.TestCase):
             f.close()
         self.pathname = os_helper.TESTFN
         self.quoted_pathname = urllib.parse.quote(self.pathname)
-        self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
+        self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname)
 
     def tearDown(self):
         """Shut down the open object"""
@@ -205,7 +156,7 @@ class urlopen_FileTests(unittest.TestCase):
         self.assertIsInstance(self.returned_obj.headers, email.message.Message)
 
     def test_url(self):
-        self.assertEqual(self.returned_obj.url, self.quoted_pathname)
+        self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname)
 
     def test_status(self):
         self.assertIsNone(self.returned_obj.status)
@@ -214,7 +165,7 @@ class urlopen_FileTests(unittest.TestCase):
         self.assertIsInstance(self.returned_obj.info(), email.message.Message)
 
     def test_geturl(self):
-        self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
+        self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname)
 
     def test_getcode(self):
         self.assertIsNone(self.returned_obj.getcode())
@@ -339,13 +290,13 @@ class ProxyTests_withOrderedEnv(unittest.TestCase):
         self.assertEqual('http://somewhere:3128', proxies['http'])
 
 
-class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
+class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
     """Test urlopen() opening a fake http connection."""
 
     def check_read(self, ver):
         self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
         try:
-            fp = urlopen("http://python.org/")
+            fp = urllib.request.urlopen("http://python.org/")
             self.assertEqual(fp.readline(), b"Hello!")
             self.assertEqual(fp.readline(), b"")
             self.assertEqual(fp.geturl(), 'http://python.org/')
@@ -366,8 +317,8 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
     def test_willclose(self):
         self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
         try:
-            resp = urlopen("http://www.python.org")
-            self.assertTrue(resp.fp.will_close)
+            resp = urllib.request.urlopen("http://www.python.org")
+            self.assertTrue(resp.will_close)
         finally:
             self.unfakehttp()
 
@@ -392,9 +343,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
                 with self.assertRaisesRegex(
                     InvalidURL, f"contain control.*{escaped_char_repr}"):
                     urllib.request.urlopen(f"https:{schemeless_url}")
-                # This code path quotes the URL so there is no injection.
-                resp = urlopen(f"http:{schemeless_url}")
-                self.assertNotIn(char, resp.geturl())
             finally:
                 self.unfakehttp()
 
@@ -416,11 +364,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
                 urllib.request.urlopen(f"http:{schemeless_url}")
             with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
                 urllib.request.urlopen(f"https:{schemeless_url}")
-            # This code path quotes the URL so there is no injection.
-            resp = urlopen(f"http:{schemeless_url}")
-            self.assertNotIn(' ', resp.geturl())
-            self.assertNotIn('\r', resp.geturl())
-            self.assertNotIn('\n', resp.geturl())
         finally:
             self.unfakehttp()
 
@@ -435,9 +378,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
                 InvalidURL = http.client.InvalidURL
                 with self.assertRaisesRegex(
                     InvalidURL, f"contain control.*{escaped_char_repr}"):
-                    urlopen(f"http:{schemeless_url}")
+                    urllib.request.urlopen(f"http:{schemeless_url}")
                 with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
-                    urlopen(f"https:{schemeless_url}")
+                    urllib.request.urlopen(f"https:{schemeless_url}")
             finally:
                 self.unfakehttp()
 
@@ -450,9 +393,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
             InvalidURL = http.client.InvalidURL
             with self.assertRaisesRegex(
                 InvalidURL, r"contain control.*\\r"):
-                urlopen(f"http:{schemeless_url}")
+                urllib.request.urlopen(f"http:{schemeless_url}")
             with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
-                urlopen(f"https:{schemeless_url}")
+                urllib.request.urlopen(f"https:{schemeless_url}")
         finally:
             self.unfakehttp()
 
@@ -476,7 +419,7 @@ Connection: close
 Content-Type: text/html; charset=iso-8859-1
 ''', mock_close=True)
         try:
-            self.assertRaises(OSError, urlopen, "http://python.org/")
+            self.assertRaises(OSError, urllib.request.urlopen, "http://python.org/")
         finally:
             self.unfakehttp()
 
@@ -492,20 +435,20 @@ Content-Type: text/html; charset=iso-8859-1
         try:
             msg = "Redirection to url 'file:"
             with self.assertRaisesRegex(urllib.error.HTTPError, msg):
-                urlopen("http://python.org/")
+                urllib.request.urlopen("http://python.org/")
         finally:
             self.unfakehttp()
 
     def test_redirect_limit_independent(self):
         # Ticket #12923: make sure independent requests each use their
         # own retry limit.
-        for i in range(FancyURLopener().maxtries):
+        for i in range(urllib.request.HTTPRedirectHandler.max_redirections):
             self.fakehttp(b'''HTTP/1.1 302 Found
 Location: file://guidocomputer.athome.com:/python/license
 Connection: close
 ''', mock_close=True)
             try:
-                self.assertRaises(urllib.error.HTTPError, urlopen,
+                self.assertRaises(urllib.error.HTTPError, urllib.request.urlopen,
                     "http://something")
             finally:
                 self.unfakehttp()
@@ -515,14 +458,14 @@ Connection: close
         # data. (#1680230)
         self.fakehttp(b'')
         try:
-            self.assertRaises(OSError, urlopen, "http://something")
+            self.assertRaises(OSError, urllib.request.urlopen, "http://something")
         finally:
             self.unfakehttp()
 
     def test_missing_localfile(self):
         # Test for #10836
         with self.assertRaises(urllib.error.URLError) as e:
-            urlopen('file://localhost/a/file/which/doesnot/exists.py')
+            urllib.request.urlopen('file://localhost/a/file/which/doesnot/exists.py')
         self.assertTrue(e.exception.filename)
         self.assertTrue(e.exception.reason)
 
@@ -531,71 +474,28 @@ Connection: close
         tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
         try:
             self.assertTrue(os.path.exists(tmp_file))
-            with urlopen(tmp_fileurl) as fobj:
+            with urllib.request.urlopen(tmp_fileurl) as fobj:
                 self.assertTrue(fobj)
         finally:
             os.close(fd)
             os.unlink(tmp_file)
         self.assertFalse(os.path.exists(tmp_file))
         with self.assertRaises(urllib.error.URLError):
-            urlopen(tmp_fileurl)
+            urllib.request.urlopen(tmp_fileurl)
 
     def test_ftp_nohost(self):
         test_ftp_url = 'ftp:///path'
         with self.assertRaises(urllib.error.URLError) as e:
-            urlopen(test_ftp_url)
+            urllib.request.urlopen(test_ftp_url)
         self.assertFalse(e.exception.filename)
         self.assertTrue(e.exception.reason)
 
     def test_ftp_nonexisting(self):
         with self.assertRaises(urllib.error.URLError) as e:
-            urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
+            urllib.request.urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
         self.assertFalse(e.exception.filename)
         self.assertTrue(e.exception.reason)
 
-    @patch.object(urllib.request, 'MAXFTPCACHE', 0)
-    def test_ftp_cache_pruning(self):
-        self.fakeftp()
-        try:
-            urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
-            urlopen('ftp://localhost')
-        finally:
-            self.unfakeftp()
-
-    def test_userpass_inurl(self):
-        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
-        try:
-            fp = urlopen("http://user:pass@python.org/")
-            self.assertEqual(fp.readline(), b"Hello!")
-            self.assertEqual(fp.readline(), b"")
-            self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
-            self.assertEqual(fp.getcode(), 200)
-        finally:
-            self.unfakehttp()
-
-    def test_userpass_inurl_w_spaces(self):
-        self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
-        try:
-            userpass = "a b:c d"
-            url = "http://{}@python.org/".format(userpass)
-            fakehttp_wrapper = http.client.HTTPConnection
-            authorization = ("Authorization: Basic %s\r\n" %
-                             b64encode(userpass.encode("ASCII")).decode("ASCII"))
-            fp = urlopen(url)
-            # The authorization header must be in place
-            self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
-            self.assertEqual(fp.readline(), b"Hello!")
-            self.assertEqual(fp.readline(), b"")
-            # the spaces are quoted in URL so no match
-            self.assertNotEqual(fp.geturl(), url)
-            self.assertEqual(fp.getcode(), 200)
-        finally:
-            self.unfakehttp()
-
-    def test_URLopener_deprecation(self):
-        with warnings_helper.check_warnings(('',DeprecationWarning)):
-            urllib.request.URLopener()
-
 
 class urlopen_DataTests(unittest.TestCase):
     """Test urlopen() opening a data URL."""
@@ -1620,56 +1520,6 @@ class Utility_Tests(unittest.TestCase):
         self.assertIsInstance(urllib.request.thishost(), tuple)
 
 
-class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
-    """Testcase to test the open method of URLopener class."""
-
-    def test_quoted_open(self):
-        class DummyURLopener(urllib.request.URLopener):
-            def open_spam(self, url):
-                return url
-        with warnings_helper.check_warnings(
-                ('DummyURLopener style of invoking requests is deprecated.',
-                DeprecationWarning)):
-            self.assertEqual(DummyURLopener().open(
-                'spam://example/ /'),'//example/%20/')
-
-            # test the safe characters are not quoted by urlopen
-            self.assertEqual(DummyURLopener().open(
-                "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
-                "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
-
-    @warnings_helper.ignore_warnings(category=DeprecationWarning)
-    def test_urlopener_retrieve_file(self):
-        with os_helper.temp_dir() as tmpdir:
-            fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
-            os.close(fd)
-            fileurl = "file:" + urllib.request.pathname2url(tmpfile)
-            filename, _ = urllib.request.URLopener().retrieve(fileurl)
-            # Some buildbots have TEMP folder that uses a lowercase drive letter.
-            self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
-
-    @warnings_helper.ignore_warnings(category=DeprecationWarning)
-    def test_urlopener_retrieve_remote(self):
-        url = "http://www.python.org/file.txt"
-        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
-        self.addCleanup(self.unfakehttp)
-        filename, _ = urllib.request.URLopener().retrieve(url)
-        self.assertEqual(os.path.splitext(filename)[1], ".txt")
-
-    @warnings_helper.ignore_warnings(category=DeprecationWarning)
-    def test_local_file_open(self):
-        # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
-        class DummyURLopener(urllib.request.URLopener):
-            def open_local_file(self, url):
-                return url
-        for url in ('local_file://example', 'local-file://example'):
-            self.assertRaises(OSError, urllib.request.urlopen, url)
-            self.assertRaises(OSError, urllib.request.URLopener().open, url)
-            self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
-            self.assertRaises(OSError, DummyURLopener().open, url)
-            self.assertRaises(OSError, DummyURLopener().retrieve, url)
-
-
 class RequestTests(unittest.TestCase):
     """Unit tests for urllib.request.Request."""
 
diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py
index 49a3b5a..f824ddd 100644
--- a/Lib/test/test_urllibnet.py
+++ b/Lib/test/test_urllibnet.py
@@ -5,6 +5,7 @@ from test.support import socket_helper
 
 import contextlib
 import socket
+import urllib.error
 import urllib.parse
 import urllib.request
 import os
@@ -101,13 +102,10 @@ class urlopenNetworkTests(unittest.TestCase):
         # test getcode() with the fancy opener to get 404 error codes
         URL = self.url + "XXXinvalidXXX"
         with socket_helper.transient_internet(URL):
-            with self.assertWarns(DeprecationWarning):
-                open_url = urllib.request.FancyURLopener().open(URL)
-            try:
-                code = open_url.getcode()
-            finally:
-                open_url.close()
-            self.assertEqual(code, 404)
+            with self.assertRaises(urllib.error.URLError) as e:
+                with urllib.request.urlopen(URL):
+                    pass
+            self.assertEqual(e.exception.code, 404)
 
     @support.requires_resource('walltime')
     def test_bad_address(self):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 18a837d..5c061a2 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -83,6 +83,7 @@ f = urllib.request.urlopen('https://www.python.org/')
 
 import base64
 import bisect
+import contextlib
 import email
 import hashlib
 import http.client
@@ -94,15 +95,13 @@ import string
 import sys
 import time
 import tempfile
-import contextlib
-import warnings
 
 
 from urllib.error import URLError, HTTPError, ContentTooShortError
 from urllib.parse import (
     urlparse, urlsplit, urljoin, unwrap, quote, unquote,
     _splittype, _splithost, _splitport, _splituser, _splitpasswd,
-    _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
+    _splitattr, _splitvalue, _splittag,
     unquote_to_bytes, urlunparse)
 from urllib.response import addinfourl, addclosehook
 
@@ -128,7 +127,7 @@ __all__ = [
     'urlopen', 'install_opener', 'build_opener',
     'pathname2url', 'url2pathname', 'getproxies',
     # Legacy interface
-    'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
+    'urlretrieve', 'urlcleanup',
 ]
 
 # used in User-Agent header sent
@@ -165,8 +164,7 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
     the reason phrase returned by the server --- instead of the response
     headers as it is specified in the documentation for HTTPResponse.
 
-    For FTP, file, and data URLs and requests explicitly handled by legacy
-    URLopener and FancyURLopener classes, this function returns a
+    For FTP, file, and data URLs, this function returns a
     urllib.response.addinfourl object.
 
     Note that None may be returned if no handler handles the request (though
@@ -940,6 +938,7 @@ class AbstractBasicAuthHandler:
         for mo in AbstractBasicAuthHandler.rx.finditer(header):
             scheme, quote, realm = mo.groups()
             if quote not in ['"', "'"]:
+                import warnings
                 warnings.warn("Basic Auth Realm was unquoted",
                               UserWarning, 3)
 
@@ -1495,7 +1494,7 @@ class FileHandler(BaseHandler):
                     origurl = 'file://' + filename
                 return addinfourl(open(localfile, 'rb'), headers, origurl)
         except OSError as exp:
-            raise URLError(exp)
+            raise URLError(exp, exp.filename)
         raise URLError('file not on local host')
 
 def _safe_gethostbyname(host):
@@ -1647,8 +1646,6 @@ class DataHandler(BaseHandler):
 
 # Code move from the old urllib module
 
-MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
-
 # Helper for non-unix systems
 if os.name == 'nt':
     from nturl2path import url2pathname, pathname2url
@@ -1668,678 +1665,6 @@ else:
         return quote(pathname)
 
 
-ftpcache = {}
-
-
-class URLopener:
-    """Class to open URLs.
-    This is a class rather than just a subroutine because we may need
-    more than one set of global protocol-specific options.
-    Note -- this is a base class for those who don't want the
-    automatic handling of errors type 302 (relocated) and 401
-    (authorization needed)."""
-
-    __tempfiles = None
-
-    version = "Python-urllib/%s" % __version__
-
-    # Constructor
-    def __init__(self, proxies=None, **x509):
-        msg = "%(class)s style of invoking requests is deprecated. " \
-              "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
-        warnings.warn(msg, DeprecationWarning, stacklevel=3)
-        if proxies is None:
-            proxies = getproxies()
-        assert hasattr(proxies, 'keys'), "proxies must be a mapping"
-        self.proxies = proxies
-        self.key_file = x509.get('key_file')
-        self.cert_file = x509.get('cert_file')
-        self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
-        self.__tempfiles = []
-        self.__unlink = os.unlink # See cleanup()
-        self.tempcache = None
-        # Undocumented feature: if you assign {} to tempcache,
-        # it is used to cache files retrieved with
-        # self.retrieve().  This is not enabled by default
-        # since it does not work for changing documents (and I
-        # haven't got the logic to check expiration headers
-        # yet).
-        self.ftpcache = ftpcache
-        # Undocumented feature: you can use a different
-        # ftp cache by assigning to the .ftpcache member;
-        # in case you want logically independent URL openers
-        # XXX This is not threadsafe.  Bah.
-
-    def __del__(self):
-        self.close()
-
-    def close(self):
-        self.cleanup()
-
-    def cleanup(self):
-        # This code sometimes runs when the rest of this module
-        # has already been deleted, so it can't use any globals
-        # or import anything.
-        if self.__tempfiles:
-            for file in self.__tempfiles:
-                try:
-                    self.__unlink(file)
-                except OSError:
-                    pass
-            del self.__tempfiles[:]
-        if self.tempcache:
-            self.tempcache.clear()
-
-    def addheader(self, *args):
-        """Add a header to be used by the HTTP interface only
-        e.g. u.addheader('Accept', 'sound/basic')"""
-        self.addheaders.append(args)
-
-    # External interface
-    def open(self, fullurl, data=None):
-        """Use URLopener().open(file) instead of open(file, 'r')."""
-        fullurl = unwrap(_to_bytes(fullurl))
-        fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
-        if self.tempcache and fullurl in self.tempcache:
-            filename, headers = self.tempcache[fullurl]
-            fp = open(filename, 'rb')
-            return addinfourl(fp, headers, fullurl)
-        urltype, url = _splittype(fullurl)
-        if not urltype:
-            urltype = 'file'
-        if urltype in self.proxies:
-            proxy = self.proxies[urltype]
-            urltype, proxyhost = _splittype(proxy)
-            host, selector = _splithost(proxyhost)
-            url = (host, fullurl) # Signal special case to open_*()
-        else:
-            proxy = None
-        name = 'open_' + urltype
-        self.type = urltype
-        name = name.replace('-', '_')
-        if not hasattr(self, name) or name == 'open_local_file':
-            if proxy:
-                return self.open_unknown_proxy(proxy, fullurl, data)
-            else:
-                return self.open_unknown(fullurl, data)
-        try:
-            if data is None:
-                return getattr(self, name)(url)
-            else:
-                return getattr(self, name)(url, data)
-        except (HTTPError, URLError):
-            raise
-        except OSError as msg:
-            raise OSError('socket error', msg) from msg
-
-    def open_unknown(self, fullurl, data=None):
-        """Overridable interface to open unknown URL type."""
-        type, url = _splittype(fullurl)
-        raise OSError('url error', 'unknown url type', type)
-
-    def open_unknown_proxy(self, proxy, fullurl, data=None):
-        """Overridable interface to open unknown URL type."""
-        type, url = _splittype(fullurl)
-        raise OSError('url error', 'invalid proxy for %s' % type, proxy)
-
-    # External interface
-    def retrieve(self, url, filename=None, reporthook=None, data=None):
-        """retrieve(url) returns (filename, headers) for a local object
-        or (tempfilename, headers) for a remote object."""
-        url = unwrap(_to_bytes(url))
-        if self.tempcache and url in self.tempcache:
-            return self.tempcache[url]
-        type, url1 = _splittype(url)
-        if filename is None and (not type or type == 'file'):
-            try:
-                fp = self.open_local_file(url1)
-                hdrs = fp.info()
-                fp.close()
-                return url2pathname(_splithost(url1)[1]), hdrs
-            except OSError:
-                pass
-        fp = self.open(url, data)
-        try:
-            headers = fp.info()
-            if filename:
-                tfp = open(filename, 'wb')
-            else:
-                garbage, path = _splittype(url)
-                garbage, path = _splithost(path or "")
-                path, garbage = _splitquery(path or "")
-                path, garbage = _splitattr(path or "")
-                suffix = os.path.splitext(path)[1]
-                (fd, filename) = tempfile.mkstemp(suffix)
-                self.__tempfiles.append(filename)
-                tfp = os.fdopen(fd, 'wb')
-            try:
-                result = filename, headers
-                if self.tempcache is not None:
-                    self.tempcache[url] = result
-                bs = 1024*8
-                size = -1
-                read = 0
-                blocknum = 0
-                if "content-length" in headers:
-                    size = int(headers["Content-Length"])
-                if reporthook:
-                    reporthook(blocknum, bs, size)
-                while block := fp.read(bs):
-                    read += len(block)
-                    tfp.write(block)
-                    blocknum += 1
-                    if reporthook:
-                        reporthook(blocknum, bs, size)
-            finally:
-                tfp.close()
-        finally:
-            fp.close()
-
-        # raise exception if actual size does not match content-length header
-        if size >= 0 and read < size:
-            raise ContentTooShortError(
-                "retrieval incomplete: got only %i out of %i bytes"
-                % (read, size), result)
-
-        return result
-
-    # Each method named open_<type> knows how to open that type of URL
-
-    def _open_generic_http(self, connection_factory, url, data):
-        """Make an HTTP connection using connection_class.
-
-        This is an internal method that should be called from
-        open_http() or open_https().
-
-        Arguments:
-        - connection_factory should take a host name and return an
-          HTTPConnection instance.
-        - url is the url to retrieval or a host, relative-path pair.
-        - data is payload for a POST request or None.
-        """
-
-        user_passwd = None
-        proxy_passwd= None
-        if isinstance(url, str):
-            host, selector = _splithost(url)
-            if host:
-                user_passwd, host = _splituser(host)
-                host = unquote(host)
-            realhost = host
-        else:
-            host, selector = url
-            # check whether the proxy contains authorization information
-            proxy_passwd, host = _splituser(host)
-            # now we proceed with the url we want to obtain
-            urltype, rest = _splittype(selector)
-            url = rest
-            user_passwd = None
-            if urltype.lower() != 'http':
-                realhost = None
-            else:
-                realhost, rest = _splithost(rest)
-                if realhost:
-                    user_passwd, realhost = _splituser(realhost)
-                if user_passwd:
-                    selector = "%s://%s%s" % (urltype, realhost, rest)
-                if proxy_bypass(realhost):
-                    host = realhost
-
-        if not host: raise OSError('http error', 'no host given')
-
-        if proxy_passwd:
-            proxy_passwd = unquote(proxy_passwd)
-            proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
-        else:
-            proxy_auth = None
-
-        if user_passwd:
-            user_passwd = unquote(user_passwd)
-            auth = base64.b64encode(user_passwd.encode()).decode('ascii')
-        else:
-            auth = None
-        http_conn = connection_factory(host)
-        headers = {}
-        if proxy_auth:
-            headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
-        if auth:
-            headers["Authorization"] =  "Basic %s" % auth
-        if realhost:
-            headers["Host"] = realhost
-
-        # Add Connection:close as we don't support persistent connections yet.
-        # This helps in closing the socket and avoiding ResourceWarning
-
-        headers["Connection"] = "close"
-
-        for header, value in self.addheaders:
-            headers[header] = value
-
-        if data is not None:
-            headers["Content-Type"] = "application/x-www-form-urlencoded"
-            http_conn.request("POST", selector, data, headers)
-        else:
-            http_conn.request("GET", selector, headers=headers)
-
-        try:
-            response = http_conn.getresponse()
-        except http.client.BadStatusLine:
-            # something went wrong with the HTTP status line
-            raise URLError("http protocol error: bad status line")
-
-        # According to RFC 2616, "2xx" code indicates that the client's
-        # request was successfully received, understood, and accepted.
-        if 200 <= response.status < 300:
-            return addinfourl(response, response.msg, "http:" + url,
-                              response.status)
-        else:
-            return self.http_error(
-                url, response.fp,
-                response.status, response.reason, response.msg, data)
-
-    def open_http(self, url, data=None):
-        """Use HTTP protocol."""
-        return self._open_generic_http(http.client.HTTPConnection, url, data)
-
-    def http_error(self, url, fp, errcode, errmsg, headers, data=None):
-        """Handle http errors.
-
-        Derived class can override this, or provide specific handlers
-        named http_error_DDD where DDD is the 3-digit error code."""
-        # First check if there's a specific handler for this error
-        name = 'http_error_%d' % errcode
-        if hasattr(self, name):
-            method = getattr(self, name)
-            if data is None:
-                result = method(url, fp, errcode, errmsg, headers)
-            else:
-                result = method(url, fp, errcode, errmsg, headers, data)
-            if result: return result
-        return self.http_error_default(url, fp, errcode, errmsg, headers)
-
-    def http_error_default(self, url, fp, errcode, errmsg, headers):
-        """Default error handler: close the connection and raise OSError."""
-        fp.close()
-        raise HTTPError(url, errcode, errmsg, headers, None)
-
-    if _have_ssl:
-        def _https_connection(self, host):
-            if self.key_file or self.cert_file:
-                http_version = http.client.HTTPSConnection._http_vsn
-                context = http.client._create_https_context(http_version)
-                context.load_cert_chain(self.cert_file, self.key_file)
-                # cert and key file means the user wants to authenticate.
-                # enable TLS 1.3 PHA implicitly even for custom contexts.
-                if context.post_handshake_auth is not None:
-                    context.post_handshake_auth = True
-            else:
-                context = None
-            return http.client.HTTPSConnection(host, context=context)
-
-        def open_https(self, url, data=None):
-            """Use HTTPS protocol."""
-            return self._open_generic_http(self._https_connection, url, data)
-
-    def open_file(self, url):
-        """Use local file or FTP depending on form of URL."""
-        if not isinstance(url, str):
-            raise URLError('file error: proxy support for file protocol currently not implemented')
-        if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
-            raise ValueError("file:// scheme is supported only on localhost")
-        else:
-            return self.open_local_file(url)
-
-    def open_local_file(self, url):
-        """Use local file."""
-        import email.utils
-        import mimetypes
-        host, file = _splithost(url)
-        localname = url2pathname(file)
-        try:
-            stats = os.stat(localname)
-        except OSError as e:
-            raise URLError(e.strerror, e.filename)
-        size = stats.st_size
-        modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
-        mtype = mimetypes.guess_type(url)[0]
-        headers = email.message_from_string(
-            'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
-            (mtype or 'text/plain', size, modified))
-        if not host:
-            urlfile = file
-            if file[:1] == '/':
-                urlfile = 'file://' + file
-            return addinfourl(open(localname, 'rb'), headers, urlfile)
-        host, port = _splitport(host)
-        if (not port
-           and socket.gethostbyname(host) in ((localhost(),) + thishost())):
-            urlfile = file
-            if file[:1] == '/':
-                urlfile = 'file://' + file
-            elif file[:2] == './':
-                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
-            return addinfourl(open(localname, 'rb'), headers, urlfile)
-        raise URLError('local file error: not on local host')
-
-    def open_ftp(self, url):
-        """Use FTP protocol."""
-        if not isinstance(url, str):
-            raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
-        import mimetypes
-        host, path = _splithost(url)
-        if not host: raise URLError('ftp error: no host given')
-        host, port = _splitport(host)
-        user, host = _splituser(host)
-        if user: user, passwd = _splitpasswd(user)
-        else: passwd = None
-        host = unquote(host)
-        user = unquote(user or '')
-        passwd = unquote(passwd or '')
-        host = socket.gethostbyname(host)
-        if not port:
-            import ftplib
-            port = ftplib.FTP_PORT
-        else:
-            port = int(port)
-        path, attrs = _splitattr(path)
-        path = unquote(path)
-        dirs = path.split('/')
-        dirs, file = dirs[:-1], dirs[-1]
-        if dirs and not dirs[0]: dirs = dirs[1:]
-        if dirs and not dirs[0]: dirs[0] = '/'
-        key = user, host, port, '/'.join(dirs)
-        # XXX thread unsafe!
-        if len(self.ftpcache) > MAXFTPCACHE:
-            # Prune the cache, rather arbitrarily
-            for k in list(self.ftpcache):
-                if k != key:
-                    v = self.ftpcache[k]
-                    del self.ftpcache[k]
-                    v.close()
-        try:
-            if key not in self.ftpcache:
-                self.ftpcache[key] = \
-                    ftpwrapper(user, passwd, host, port, dirs)
-            if not file: type = 'D'
-            else: type = 'I'
-            for attr in attrs:
-                attr, value = _splitvalue(attr)
-                if attr.lower() == 'type' and \
-                   value in ('a', 'A', 'i', 'I', 'd', 'D'):
-                    type = value.upper()
-            (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
-            mtype = mimetypes.guess_type("ftp:" + url)[0]
-            headers = ""
-            if mtype:
-                headers += "Content-Type: %s\n" % mtype
-            if retrlen is not None and retrlen >= 0:
-                headers += "Content-Length: %d\n" % retrlen
-            headers = email.message_from_string(headers)
-            return addinfourl(fp, headers, "ftp:" + url)
-        except ftperrors() as exp:
-            raise URLError(f'ftp error: {exp}') from exp
-
-    def open_data(self, url, data=None):
-        """Use "data" URL."""
-        if not isinstance(url, str):
-            raise URLError('data error: proxy support for data protocol currently not implemented')
-        # ignore POSTed data
-        #
-        # syntax of data URLs:
-        # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
-        # mediatype := [ type "/" subtype ] *( ";" parameter )
-        # data      := *urlchar
-        # parameter := attribute "=" value
-        try:
-            [type, data] = url.split(',', 1)
-        except ValueError:
-            raise OSError('data error', 'bad data URL')
-        if not type:
-            type = 'text/plain;charset=US-ASCII'
-        semi = type.rfind(';')
-        if semi >= 0 and '=' not in type[semi:]:
-            encoding = type[semi+1:]
-            type = type[:semi]
-        else:
-            encoding = ''
-        msg = []
-        msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
-                                            time.gmtime(time.time())))
-        msg.append('Content-type: %s' % type)
-        if encoding == 'base64':
-            # XXX is this encoding/decoding ok?
-            data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
-        else:
-            data = unquote(data)
-        msg.append('Content-Length: %d' % len(data))
-        msg.append('')
-        msg.append(data)
-        msg = '\n'.join(msg)
-        headers = email.message_from_string(msg)
-        f = io.StringIO(msg)
-        #f.fileno = None     # needed for addinfourl
-        return addinfourl(f, headers, url)
-
-
-class FancyURLopener(URLopener):
-    """Derived class with handlers for errors we can handle (perhaps)."""
-
-    def __init__(self, *args, **kwargs):
-        URLopener.__init__(self, *args, **kwargs)
-        self.auth_cache = {}
-        self.tries = 0
-        self.maxtries = 10
-
-    def http_error_default(self, url, fp, errcode, errmsg, headers):
-        """Default error handling -- don't raise an exception."""
-        return addinfourl(fp, headers, "http:" + url, errcode)
-
-    def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
-        """Error 302 -- relocated (temporarily)."""
-        self.tries += 1
-        try:
-            if self.maxtries and self.tries >= self.maxtries:
-                if hasattr(self, "http_error_500"):
-                    meth = self.http_error_500
-                else:
-                    meth = self.http_error_default
-                return meth(url, fp, 500,
-                            "Internal Server Error: Redirect Recursion",
-                            headers)
-            result = self.redirect_internal(url, fp, errcode, errmsg,
-                                            headers, data)
-            return result
-        finally:
-            self.tries = 0
-
-    def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
-        if 'location' in headers:
-            newurl = headers['location']
-        elif 'uri' in headers:
-            newurl = headers['uri']
-        else:
-            return
-        fp.close()
-
-        # In case the server sent a relative URL, join with original:
-        newurl = urljoin(self.type + ":" + url, newurl)
-
-        urlparts = urlparse(newurl)
-
-        # For security reasons, we don't allow redirection to anything other
-        # than http, https and ftp.
-
-        # We are using newer HTTPError with older redirect_internal method
-        # This older method will get deprecated in 3.3
-
-        if urlparts.scheme not in ('http', 'https', 'ftp', ''):
-            raise HTTPError(newurl, errcode,
-                            errmsg +
-                            " Redirection to url '%s' is not allowed." % newurl,
-                            headers, fp)
-
-        return self.open(newurl)
-
-    def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
-        """Error 301 -- also relocated (permanently)."""
-        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
-    def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
-        """Error 303 -- also relocated (essentially identical to 302)."""
-        return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
-    def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
-        """Error 307 -- relocated, but turn POST into error."""
-        if data is None:
-            return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-        else:
-            return self.http_error_default(url, fp, errcode, errmsg, headers)
-
-    def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
-        """Error 308 -- relocated, but turn POST into error."""
-        if data is None:
-            return self.http_error_301(url, fp, errcode, errmsg, headers, data)
-        else:
-            return self.http_error_default(url, fp, errcode, errmsg, headers)
-
-    def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
-            retry=False):
-        """Error 401 -- authentication required.
-        This function supports Basic authentication only."""
-        if 'www-authenticate' not in headers:
-            URLopener.http_error_default(self, url, fp,
-                                         errcode, errmsg, headers)
-        stuff = headers['www-authenticate']
-        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
-        if not match:
-            URLopener.http_error_default(self, url, fp,
-                                         errcode, errmsg, headers)
-        scheme, realm = match.groups()
-        if scheme.lower() != 'basic':
-            URLopener.http_error_default(self, url, fp,
-                                         errcode, errmsg, headers)
-        if not retry:
-            URLopener.http_error_default(self, url, fp, errcode, errmsg,
-                    headers)
-        name = 'retry_' + self.type + '_basic_auth'
-        if data is None:
-            return getattr(self,name)(url, realm)
-        else:
-            return getattr(self,name)(url, realm, data)
-
-    def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
-            retry=False):
-        """Error 407 -- proxy authentication required.
-        This function supports Basic authentication only."""
-        if 'proxy-authenticate' not in headers:
-            URLopener.http_error_default(self, url, fp,
-                                         errcode, errmsg, headers)
-        stuff = headers['proxy-authenticate']
-        match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
-        if not match:
-            URLopener.http_error_default(self, url, fp,
-                                         errcode, errmsg, headers)
-        scheme, realm = match.groups()
-        if scheme.lower() != 'basic':
-            URLopener.http_error_default(self, url, fp,
-                                         errcode, errmsg, headers)
-        if not retry:
-            URLopener.http_error_default(self, url, fp, errcode, errmsg,
-                    headers)
-        name = 'retry_proxy_' + self.type + '_basic_auth'
-        if data is None:
-            return getattr(self,name)(url, realm)
-        else:
-            return getattr(self,name)(url, realm, data)
-
-    def retry_proxy_http_basic_auth(self, url, realm, data=None):
-        host, selector = _splithost(url)
-        newurl = 'http://' + host + selector
-        proxy = self.proxies['http']
-        urltype, proxyhost = _splittype(proxy)
-        proxyhost, proxyselector = _splithost(proxyhost)
-        i = proxyhost.find('@') + 1
-        proxyhost = proxyhost[i:]
-        user, passwd = self.get_user_passwd(proxyhost, realm, i)
-        if not (user or passwd): return None
-        proxyhost = "%s:%s@%s" % (quote(user, safe=''),
-                                  quote(passwd, safe=''), proxyhost)
-        self.proxies['http'] = 'http://' + proxyhost + proxyselector
-        if data is None:
-            return self.open(newurl)
-        else:
-            return self.open(newurl, data)
-
-    def retry_proxy_https_basic_auth(self, url, realm, data=None):
-        host, selector = _splithost(url)
-        newurl = 'https://' + host + selector
-        proxy = self.proxies['https']
-        urltype, proxyhost = _splittype(proxy)
-        proxyhost, proxyselector = _splithost(proxyhost)
-        i = proxyhost.find('@') + 1
-        proxyhost = proxyhost[i:]
-        user, passwd = self.get_user_passwd(proxyhost, realm, i)
-        if not (user or passwd): return None
-        proxyhost = "%s:%s@%s" % (quote(user, safe=''),
-                                  quote(passwd, safe=''), proxyhost)
-        self.proxies['https'] = 'https://' + proxyhost + proxyselector
-        if data is None:
-            return self.open(newurl)
-        else:
-            return self.open(newurl, data)
-
-    def retry_http_basic_auth(self, url, realm, data=None):
-        host, selector = _splithost(url)
-        i = host.find('@') + 1
-        host = host[i:]
-        user, passwd = self.get_user_passwd(host, realm, i)
-        if not (user or passwd): return None
-        host = "%s:%s@%s" % (quote(user, safe=''),
-                             quote(passwd, safe=''), host)
-        newurl = 'http://' + host + selector
-        if data is None:
-            return self.open(newurl)
-        else:
-            return self.open(newurl, data)
-
-    def retry_https_basic_auth(self, url, realm, data=None):
-        host, selector = _splithost(url)
-        i = host.find('@') + 1
-        host = host[i:]
-        user, passwd = self.get_user_passwd(host, realm, i)
-        if not (user or passwd): return None
-        host = "%s:%s@%s" % (quote(user, safe=''),
-                             quote(passwd, safe=''), host)
-        newurl = 'https://' + host + selector
-        if data is None:
-            return self.open(newurl)
-        else:
-            return self.open(newurl, data)
-
-    def get_user_passwd(self, host, realm, clear_cache=0):
-        key = realm + '@' + host.lower()
-        if key in self.auth_cache:
-            if clear_cache:
-                del self.auth_cache[key]
-            else:
-                return self.auth_cache[key]
-        user, passwd = self.prompt_user_passwd(host, realm)
-        if user or passwd: self.auth_cache[key] = (user, passwd)
-        return user, passwd
-
-    def prompt_user_passwd(self, host, realm):
-        """Override this in a GUI environment!"""
-        import getpass
-        try:
-            user = input("Enter username for %s at %s: " % (realm, host))
-            passwd = getpass.getpass("Enter password for %s in %s at %s: " %
-                (user, realm, host))
-            return user, passwd
-        except KeyboardInterrupt:
-            print()
-            return None, None
-
-
 # Utility functions
 
 _localhost = None
@@ -2485,9 +1810,7 @@ def getproxies_environment():
     """Return a dictionary of scheme -> proxy server URL mappings.
 
     Scan the environment for variables named <scheme>_proxy;
-    this seems to be the standard convention.  If you need a
-    different way, you can pass a proxies dictionary to the
-    [Fancy]URLopener constructor.
+    this seems to be the standard convention.
     """
     # in order to prefer lowercase variables, process environment in
     # two passes: first matches any, second pass matches lowercase only
author	Barney Gale <barney.gale@gmail.com>	2024-11-19 14:01:49 (GMT)
committer	GitHub <noreply@github.com>	2024-11-19 14:01:49 (GMT)
commit	4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c (patch)
tree	0bedd1382fcf31e644dcaf66bf5be0c3dcce9c1d /Lib
parent	a99dd23c1f5b9254651d9895714596d5e7942389 (diff)
download	cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.zip cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.gz cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.bz2