summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorBarney Gale <barney.gale@gmail.com>2024-11-19 14:01:49 (GMT)
committerGitHub <noreply@github.com>2024-11-19 14:01:49 (GMT)
commit4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c (patch)
tree0bedd1382fcf31e644dcaf66bf5be0c3dcce9c1d /Lib
parenta99dd23c1f5b9254651d9895714596d5e7942389 (diff)
downloadcpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.zip
cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.gz
cpython-4d771977b17e5ffaa9c2e8a2e6f5d393f68fc63c.tar.bz2
GH-84850: Remove `urllib.request.URLopener` and `FancyURLopener` (#125739)
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_urllib.py192
-rw-r--r--Lib/test/test_urllibnet.py12
-rw-r--r--Lib/urllib/request.py691
3 files changed, 33 insertions, 862 deletions
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 2c53ce3..71084a4 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -7,11 +7,9 @@ import http.client
import email.message
import io
import unittest
-from unittest.mock import patch
from test import support
from test.support import os_helper
from test.support import socket_helper
-from test.support import warnings_helper
import os
try:
import ssl
@@ -20,7 +18,6 @@ except ImportError:
import sys
import tempfile
-from base64 import b64encode
import collections
@@ -35,32 +32,6 @@ def hexescape(char):
hex_repr = "0%s" % hex_repr
return "%" + hex_repr
-# Shortcut for testing FancyURLopener
-_urlopener = None
-
-
-def urlopen(url, data=None, proxies=None):
- """urlopen(url [, data]) -> open file-like object"""
- global _urlopener
- if proxies is not None:
- opener = urllib.request.FancyURLopener(proxies=proxies)
- elif not _urlopener:
- opener = FancyURLopener()
- _urlopener = opener
- else:
- opener = _urlopener
- if data is None:
- return opener.open(url)
- else:
- return opener.open(url, data)
-
-
-def FancyURLopener():
- with warnings_helper.check_warnings(
- ('FancyURLopener style of invoking requests is deprecated.',
- DeprecationWarning)):
- return urllib.request.FancyURLopener()
-
def fakehttp(fakedata, mock_close=False):
class FakeSocket(io.BytesIO):
@@ -119,26 +90,6 @@ class FakeHTTPMixin(object):
http.client.HTTPConnection = self._connection_class
-class FakeFTPMixin(object):
- def fakeftp(self):
- class FakeFtpWrapper(object):
- def __init__(self, user, passwd, host, port, dirs, timeout=None,
- persistent=True):
- pass
-
- def retrfile(self, file, type):
- return io.BytesIO(), 0
-
- def close(self):
- pass
-
- self._ftpwrapper_class = urllib.request.ftpwrapper
- urllib.request.ftpwrapper = FakeFtpWrapper
-
- def unfakeftp(self):
- urllib.request.ftpwrapper = self._ftpwrapper_class
-
-
class urlopen_FileTests(unittest.TestCase):
"""Test urlopen() opening a temporary file.
@@ -158,7 +109,7 @@ class urlopen_FileTests(unittest.TestCase):
f.close()
self.pathname = os_helper.TESTFN
self.quoted_pathname = urllib.parse.quote(self.pathname)
- self.returned_obj = urlopen("file:%s" % self.quoted_pathname)
+ self.returned_obj = urllib.request.urlopen("file:%s" % self.quoted_pathname)
def tearDown(self):
"""Shut down the open object"""
@@ -205,7 +156,7 @@ class urlopen_FileTests(unittest.TestCase):
self.assertIsInstance(self.returned_obj.headers, email.message.Message)
def test_url(self):
- self.assertEqual(self.returned_obj.url, self.quoted_pathname)
+ self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname)
def test_status(self):
self.assertIsNone(self.returned_obj.status)
@@ -214,7 +165,7 @@ class urlopen_FileTests(unittest.TestCase):
self.assertIsInstance(self.returned_obj.info(), email.message.Message)
def test_geturl(self):
- self.assertEqual(self.returned_obj.geturl(), self.quoted_pathname)
+ self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname)
def test_getcode(self):
self.assertIsNone(self.returned_obj.getcode())
@@ -339,13 +290,13 @@ class ProxyTests_withOrderedEnv(unittest.TestCase):
self.assertEqual('http://somewhere:3128', proxies['http'])
-class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
+class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin):
"""Test urlopen() opening a fake http connection."""
def check_read(self, ver):
self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
try:
- fp = urlopen("http://python.org/")
+ fp = urllib.request.urlopen("http://python.org/")
self.assertEqual(fp.readline(), b"Hello!")
self.assertEqual(fp.readline(), b"")
self.assertEqual(fp.geturl(), 'http://python.org/')
@@ -366,8 +317,8 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
def test_willclose(self):
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
try:
- resp = urlopen("http://www.python.org")
- self.assertTrue(resp.fp.will_close)
+ resp = urllib.request.urlopen("http://www.python.org")
+ self.assertTrue(resp.will_close)
finally:
self.unfakehttp()
@@ -392,9 +343,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
with self.assertRaisesRegex(
InvalidURL, f"contain control.*{escaped_char_repr}"):
urllib.request.urlopen(f"https:{schemeless_url}")
- # This code path quotes the URL so there is no injection.
- resp = urlopen(f"http:{schemeless_url}")
- self.assertNotIn(char, resp.geturl())
finally:
self.unfakehttp()
@@ -416,11 +364,6 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
urllib.request.urlopen(f"http:{schemeless_url}")
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
urllib.request.urlopen(f"https:{schemeless_url}")
- # This code path quotes the URL so there is no injection.
- resp = urlopen(f"http:{schemeless_url}")
- self.assertNotIn(' ', resp.geturl())
- self.assertNotIn('\r', resp.geturl())
- self.assertNotIn('\n', resp.geturl())
finally:
self.unfakehttp()
@@ -435,9 +378,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
InvalidURL = http.client.InvalidURL
with self.assertRaisesRegex(
InvalidURL, f"contain control.*{escaped_char_repr}"):
- urlopen(f"http:{schemeless_url}")
+ urllib.request.urlopen(f"http:{schemeless_url}")
with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
- urlopen(f"https:{schemeless_url}")
+ urllib.request.urlopen(f"https:{schemeless_url}")
finally:
self.unfakehttp()
@@ -450,9 +393,9 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
InvalidURL = http.client.InvalidURL
with self.assertRaisesRegex(
InvalidURL, r"contain control.*\\r"):
- urlopen(f"http:{schemeless_url}")
+ urllib.request.urlopen(f"http:{schemeless_url}")
with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
- urlopen(f"https:{schemeless_url}")
+ urllib.request.urlopen(f"https:{schemeless_url}")
finally:
self.unfakehttp()
@@ -476,7 +419,7 @@ Connection: close
Content-Type: text/html; charset=iso-8859-1
''', mock_close=True)
try:
- self.assertRaises(OSError, urlopen, "http://python.org/")
+ self.assertRaises(OSError, urllib.request.urlopen, "http://python.org/")
finally:
self.unfakehttp()
@@ -492,20 +435,20 @@ Content-Type: text/html; charset=iso-8859-1
try:
msg = "Redirection to url 'file:"
with self.assertRaisesRegex(urllib.error.HTTPError, msg):
- urlopen("http://python.org/")
+ urllib.request.urlopen("http://python.org/")
finally:
self.unfakehttp()
def test_redirect_limit_independent(self):
# Ticket #12923: make sure independent requests each use their
# own retry limit.
- for i in range(FancyURLopener().maxtries):
+ for i in range(urllib.request.HTTPRedirectHandler.max_redirections):
self.fakehttp(b'''HTTP/1.1 302 Found
Location: file://guidocomputer.athome.com:/python/license
Connection: close
''', mock_close=True)
try:
- self.assertRaises(urllib.error.HTTPError, urlopen,
+ self.assertRaises(urllib.error.HTTPError, urllib.request.urlopen,
"http://something")
finally:
self.unfakehttp()
@@ -515,14 +458,14 @@ Connection: close
# data. (#1680230)
self.fakehttp(b'')
try:
- self.assertRaises(OSError, urlopen, "http://something")
+ self.assertRaises(OSError, urllib.request.urlopen, "http://something")
finally:
self.unfakehttp()
def test_missing_localfile(self):
# Test for #10836
with self.assertRaises(urllib.error.URLError) as e:
- urlopen('file://localhost/a/file/which/doesnot/exists.py')
+ urllib.request.urlopen('file://localhost/a/file/which/doesnot/exists.py')
self.assertTrue(e.exception.filename)
self.assertTrue(e.exception.reason)
@@ -531,71 +474,28 @@ Connection: close
tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
try:
self.assertTrue(os.path.exists(tmp_file))
- with urlopen(tmp_fileurl) as fobj:
+ with urllib.request.urlopen(tmp_fileurl) as fobj:
self.assertTrue(fobj)
finally:
os.close(fd)
os.unlink(tmp_file)
self.assertFalse(os.path.exists(tmp_file))
with self.assertRaises(urllib.error.URLError):
- urlopen(tmp_fileurl)
+ urllib.request.urlopen(tmp_fileurl)
def test_ftp_nohost(self):
test_ftp_url = 'ftp:///path'
with self.assertRaises(urllib.error.URLError) as e:
- urlopen(test_ftp_url)
+ urllib.request.urlopen(test_ftp_url)
self.assertFalse(e.exception.filename)
self.assertTrue(e.exception.reason)
def test_ftp_nonexisting(self):
with self.assertRaises(urllib.error.URLError) as e:
- urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
+ urllib.request.urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
self.assertFalse(e.exception.filename)
self.assertTrue(e.exception.reason)
- @patch.object(urllib.request, 'MAXFTPCACHE', 0)
- def test_ftp_cache_pruning(self):
- self.fakeftp()
- try:
- urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
- urlopen('ftp://localhost')
- finally:
- self.unfakeftp()
-
- def test_userpass_inurl(self):
- self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
- try:
- fp = urlopen("http://user:pass@python.org/")
- self.assertEqual(fp.readline(), b"Hello!")
- self.assertEqual(fp.readline(), b"")
- self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
- self.assertEqual(fp.getcode(), 200)
- finally:
- self.unfakehttp()
-
- def test_userpass_inurl_w_spaces(self):
- self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
- try:
- userpass = "a b:c d"
- url = "http://{}@python.org/".format(userpass)
- fakehttp_wrapper = http.client.HTTPConnection
- authorization = ("Authorization: Basic %s\r\n" %
- b64encode(userpass.encode("ASCII")).decode("ASCII"))
- fp = urlopen(url)
- # The authorization header must be in place
- self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
- self.assertEqual(fp.readline(), b"Hello!")
- self.assertEqual(fp.readline(), b"")
- # the spaces are quoted in URL so no match
- self.assertNotEqual(fp.geturl(), url)
- self.assertEqual(fp.getcode(), 200)
- finally:
- self.unfakehttp()
-
- def test_URLopener_deprecation(self):
- with warnings_helper.check_warnings(('',DeprecationWarning)):
- urllib.request.URLopener()
-
class urlopen_DataTests(unittest.TestCase):
"""Test urlopen() opening a data URL."""
@@ -1620,56 +1520,6 @@ class Utility_Tests(unittest.TestCase):
self.assertIsInstance(urllib.request.thishost(), tuple)
-class URLopener_Tests(FakeHTTPMixin, unittest.TestCase):
- """Testcase to test the open method of URLopener class."""
-
- def test_quoted_open(self):
- class DummyURLopener(urllib.request.URLopener):
- def open_spam(self, url):
- return url
- with warnings_helper.check_warnings(
- ('DummyURLopener style of invoking requests is deprecated.',
- DeprecationWarning)):
- self.assertEqual(DummyURLopener().open(
- 'spam://example/ /'),'//example/%20/')
-
- # test the safe characters are not quoted by urlopen
- self.assertEqual(DummyURLopener().open(
- "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
- "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
-
- @warnings_helper.ignore_warnings(category=DeprecationWarning)
- def test_urlopener_retrieve_file(self):
- with os_helper.temp_dir() as tmpdir:
- fd, tmpfile = tempfile.mkstemp(dir=tmpdir)
- os.close(fd)
- fileurl = "file:" + urllib.request.pathname2url(tmpfile)
- filename, _ = urllib.request.URLopener().retrieve(fileurl)
- # Some buildbots have TEMP folder that uses a lowercase drive letter.
- self.assertEqual(os.path.normcase(filename), os.path.normcase(tmpfile))
-
- @warnings_helper.ignore_warnings(category=DeprecationWarning)
- def test_urlopener_retrieve_remote(self):
- url = "http://www.python.org/file.txt"
- self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
- self.addCleanup(self.unfakehttp)
- filename, _ = urllib.request.URLopener().retrieve(url)
- self.assertEqual(os.path.splitext(filename)[1], ".txt")
-
- @warnings_helper.ignore_warnings(category=DeprecationWarning)
- def test_local_file_open(self):
- # bpo-35907, CVE-2019-9948: urllib must reject local_file:// scheme
- class DummyURLopener(urllib.request.URLopener):
- def open_local_file(self, url):
- return url
- for url in ('local_file://example', 'local-file://example'):
- self.assertRaises(OSError, urllib.request.urlopen, url)
- self.assertRaises(OSError, urllib.request.URLopener().open, url)
- self.assertRaises(OSError, urllib.request.URLopener().retrieve, url)
- self.assertRaises(OSError, DummyURLopener().open, url)
- self.assertRaises(OSError, DummyURLopener().retrieve, url)
-
-
class RequestTests(unittest.TestCase):
"""Unit tests for urllib.request.Request."""
diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py
index 49a3b5a..f824ddd 100644
--- a/Lib/test/test_urllibnet.py
+++ b/Lib/test/test_urllibnet.py
@@ -5,6 +5,7 @@ from test.support import socket_helper
import contextlib
import socket
+import urllib.error
import urllib.parse
import urllib.request
import os
@@ -101,13 +102,10 @@ class urlopenNetworkTests(unittest.TestCase):
# test getcode() with the fancy opener to get 404 error codes
URL = self.url + "XXXinvalidXXX"
with socket_helper.transient_internet(URL):
- with self.assertWarns(DeprecationWarning):
- open_url = urllib.request.FancyURLopener().open(URL)
- try:
- code = open_url.getcode()
- finally:
- open_url.close()
- self.assertEqual(code, 404)
+ with self.assertRaises(urllib.error.URLError) as e:
+ with urllib.request.urlopen(URL):
+ pass
+ self.assertEqual(e.exception.code, 404)
@support.requires_resource('walltime')
def test_bad_address(self):
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index 18a837d..5c061a2 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -83,6 +83,7 @@ f = urllib.request.urlopen('https://www.python.org/')
import base64
import bisect
+import contextlib
import email
import hashlib
import http.client
@@ -94,15 +95,13 @@ import string
import sys
import time
import tempfile
-import contextlib
-import warnings
from urllib.error import URLError, HTTPError, ContentTooShortError
from urllib.parse import (
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
_splittype, _splithost, _splitport, _splituser, _splitpasswd,
- _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes,
+ _splitattr, _splitvalue, _splittag,
unquote_to_bytes, urlunparse)
from urllib.response import addinfourl, addclosehook
@@ -128,7 +127,7 @@ __all__ = [
'urlopen', 'install_opener', 'build_opener',
'pathname2url', 'url2pathname', 'getproxies',
# Legacy interface
- 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
+ 'urlretrieve', 'urlcleanup',
]
# used in User-Agent header sent
@@ -165,8 +164,7 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
the reason phrase returned by the server --- instead of the response
headers as it is specified in the documentation for HTTPResponse.
- For FTP, file, and data URLs and requests explicitly handled by legacy
- URLopener and FancyURLopener classes, this function returns a
+ For FTP, file, and data URLs, this function returns a
urllib.response.addinfourl object.
Note that None may be returned if no handler handles the request (though
@@ -940,6 +938,7 @@ class AbstractBasicAuthHandler:
for mo in AbstractBasicAuthHandler.rx.finditer(header):
scheme, quote, realm = mo.groups()
if quote not in ['"', "'"]:
+ import warnings
warnings.warn("Basic Auth Realm was unquoted",
UserWarning, 3)
@@ -1495,7 +1494,7 @@ class FileHandler(BaseHandler):
origurl = 'file://' + filename
return addinfourl(open(localfile, 'rb'), headers, origurl)
except OSError as exp:
- raise URLError(exp)
+ raise URLError(exp, exp.filename)
raise URLError('file not on local host')
def _safe_gethostbyname(host):
@@ -1647,8 +1646,6 @@ class DataHandler(BaseHandler):
# Code move from the old urllib module
-MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
-
# Helper for non-unix systems
if os.name == 'nt':
from nturl2path import url2pathname, pathname2url
@@ -1668,678 +1665,6 @@ else:
return quote(pathname)
-ftpcache = {}
-
-
-class URLopener:
- """Class to open URLs.
- This is a class rather than just a subroutine because we may need
- more than one set of global protocol-specific options.
- Note -- this is a base class for those who don't want the
- automatic handling of errors type 302 (relocated) and 401
- (authorization needed)."""
-
- __tempfiles = None
-
- version = "Python-urllib/%s" % __version__
-
- # Constructor
- def __init__(self, proxies=None, **x509):
- msg = "%(class)s style of invoking requests is deprecated. " \
- "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
- warnings.warn(msg, DeprecationWarning, stacklevel=3)
- if proxies is None:
- proxies = getproxies()
- assert hasattr(proxies, 'keys'), "proxies must be a mapping"
- self.proxies = proxies
- self.key_file = x509.get('key_file')
- self.cert_file = x509.get('cert_file')
- self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
- self.__tempfiles = []
- self.__unlink = os.unlink # See cleanup()
- self.tempcache = None
- # Undocumented feature: if you assign {} to tempcache,
- # it is used to cache files retrieved with
- # self.retrieve(). This is not enabled by default
- # since it does not work for changing documents (and I
- # haven't got the logic to check expiration headers
- # yet).
- self.ftpcache = ftpcache
- # Undocumented feature: you can use a different
- # ftp cache by assigning to the .ftpcache member;
- # in case you want logically independent URL openers
- # XXX This is not threadsafe. Bah.
-
- def __del__(self):
- self.close()
-
- def close(self):
- self.cleanup()
-
- def cleanup(self):
- # This code sometimes runs when the rest of this module
- # has already been deleted, so it can't use any globals
- # or import anything.
- if self.__tempfiles:
- for file in self.__tempfiles:
- try:
- self.__unlink(file)
- except OSError:
- pass
- del self.__tempfiles[:]
- if self.tempcache:
- self.tempcache.clear()
-
- def addheader(self, *args):
- """Add a header to be used by the HTTP interface only
- e.g. u.addheader('Accept', 'sound/basic')"""
- self.addheaders.append(args)
-
- # External interface
- def open(self, fullurl, data=None):
- """Use URLopener().open(file) instead of open(file, 'r')."""
- fullurl = unwrap(_to_bytes(fullurl))
- fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
- if self.tempcache and fullurl in self.tempcache:
- filename, headers = self.tempcache[fullurl]
- fp = open(filename, 'rb')
- return addinfourl(fp, headers, fullurl)
- urltype, url = _splittype(fullurl)
- if not urltype:
- urltype = 'file'
- if urltype in self.proxies:
- proxy = self.proxies[urltype]
- urltype, proxyhost = _splittype(proxy)
- host, selector = _splithost(proxyhost)
- url = (host, fullurl) # Signal special case to open_*()
- else:
- proxy = None
- name = 'open_' + urltype
- self.type = urltype
- name = name.replace('-', '_')
- if not hasattr(self, name) or name == 'open_local_file':
- if proxy:
- return self.open_unknown_proxy(proxy, fullurl, data)
- else:
- return self.open_unknown(fullurl, data)
- try:
- if data is None:
- return getattr(self, name)(url)
- else:
- return getattr(self, name)(url, data)
- except (HTTPError, URLError):
- raise
- except OSError as msg:
- raise OSError('socket error', msg) from msg
-
- def open_unknown(self, fullurl, data=None):
- """Overridable interface to open unknown URL type."""
- type, url = _splittype(fullurl)
- raise OSError('url error', 'unknown url type', type)
-
- def open_unknown_proxy(self, proxy, fullurl, data=None):
- """Overridable interface to open unknown URL type."""
- type, url = _splittype(fullurl)
- raise OSError('url error', 'invalid proxy for %s' % type, proxy)
-
- # External interface
- def retrieve(self, url, filename=None, reporthook=None, data=None):
- """retrieve(url) returns (filename, headers) for a local object
- or (tempfilename, headers) for a remote object."""
- url = unwrap(_to_bytes(url))
- if self.tempcache and url in self.tempcache:
- return self.tempcache[url]
- type, url1 = _splittype(url)
- if filename is None and (not type or type == 'file'):
- try:
- fp = self.open_local_file(url1)
- hdrs = fp.info()
- fp.close()
- return url2pathname(_splithost(url1)[1]), hdrs
- except OSError:
- pass
- fp = self.open(url, data)
- try:
- headers = fp.info()
- if filename:
- tfp = open(filename, 'wb')
- else:
- garbage, path = _splittype(url)
- garbage, path = _splithost(path or "")
- path, garbage = _splitquery(path or "")
- path, garbage = _splitattr(path or "")
- suffix = os.path.splitext(path)[1]
- (fd, filename) = tempfile.mkstemp(suffix)
- self.__tempfiles.append(filename)
- tfp = os.fdopen(fd, 'wb')
- try:
- result = filename, headers
- if self.tempcache is not None:
- self.tempcache[url] = result
- bs = 1024*8
- size = -1
- read = 0
- blocknum = 0
- if "content-length" in headers:
- size = int(headers["Content-Length"])
- if reporthook:
- reporthook(blocknum, bs, size)
- while block := fp.read(bs):
- read += len(block)
- tfp.write(block)
- blocknum += 1
- if reporthook:
- reporthook(blocknum, bs, size)
- finally:
- tfp.close()
- finally:
- fp.close()
-
- # raise exception if actual size does not match content-length header
- if size >= 0 and read < size:
- raise ContentTooShortError(
- "retrieval incomplete: got only %i out of %i bytes"
- % (read, size), result)
-
- return result
-
- # Each method named open_<type> knows how to open that type of URL
-
- def _open_generic_http(self, connection_factory, url, data):
- """Make an HTTP connection using connection_class.
-
- This is an internal method that should be called from
- open_http() or open_https().
-
- Arguments:
- - connection_factory should take a host name and return an
- HTTPConnection instance.
- - url is the url to retrieval or a host, relative-path pair.
- - data is payload for a POST request or None.
- """
-
- user_passwd = None
- proxy_passwd= None
- if isinstance(url, str):
- host, selector = _splithost(url)
- if host:
- user_passwd, host = _splituser(host)
- host = unquote(host)
- realhost = host
- else:
- host, selector = url
- # check whether the proxy contains authorization information
- proxy_passwd, host = _splituser(host)
- # now we proceed with the url we want to obtain
- urltype, rest = _splittype(selector)
- url = rest
- user_passwd = None
- if urltype.lower() != 'http':
- realhost = None
- else:
- realhost, rest = _splithost(rest)
- if realhost:
- user_passwd, realhost = _splituser(realhost)
- if user_passwd:
- selector = "%s://%s%s" % (urltype, realhost, rest)
- if proxy_bypass(realhost):
- host = realhost
-
- if not host: raise OSError('http error', 'no host given')
-
- if proxy_passwd:
- proxy_passwd = unquote(proxy_passwd)
- proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
- else:
- proxy_auth = None
-
- if user_passwd:
- user_passwd = unquote(user_passwd)
- auth = base64.b64encode(user_passwd.encode()).decode('ascii')
- else:
- auth = None
- http_conn = connection_factory(host)
- headers = {}
- if proxy_auth:
- headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
- if auth:
- headers["Authorization"] = "Basic %s" % auth
- if realhost:
- headers["Host"] = realhost
-
- # Add Connection:close as we don't support persistent connections yet.
- # This helps in closing the socket and avoiding ResourceWarning
-
- headers["Connection"] = "close"
-
- for header, value in self.addheaders:
- headers[header] = value
-
- if data is not None:
- headers["Content-Type"] = "application/x-www-form-urlencoded"
- http_conn.request("POST", selector, data, headers)
- else:
- http_conn.request("GET", selector, headers=headers)
-
- try:
- response = http_conn.getresponse()
- except http.client.BadStatusLine:
- # something went wrong with the HTTP status line
- raise URLError("http protocol error: bad status line")
-
- # According to RFC 2616, "2xx" code indicates that the client's
- # request was successfully received, understood, and accepted.
- if 200 <= response.status < 300:
- return addinfourl(response, response.msg, "http:" + url,
- response.status)
- else:
- return self.http_error(
- url, response.fp,
- response.status, response.reason, response.msg, data)
-
- def open_http(self, url, data=None):
- """Use HTTP protocol."""
- return self._open_generic_http(http.client.HTTPConnection, url, data)
-
- def http_error(self, url, fp, errcode, errmsg, headers, data=None):
- """Handle http errors.
-
- Derived class can override this, or provide specific handlers
- named http_error_DDD where DDD is the 3-digit error code."""
- # First check if there's a specific handler for this error
- name = 'http_error_%d' % errcode
- if hasattr(self, name):
- method = getattr(self, name)
- if data is None:
- result = method(url, fp, errcode, errmsg, headers)
- else:
- result = method(url, fp, errcode, errmsg, headers, data)
- if result: return result
- return self.http_error_default(url, fp, errcode, errmsg, headers)
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handler: close the connection and raise OSError."""
- fp.close()
- raise HTTPError(url, errcode, errmsg, headers, None)
-
- if _have_ssl:
- def _https_connection(self, host):
- if self.key_file or self.cert_file:
- http_version = http.client.HTTPSConnection._http_vsn
- context = http.client._create_https_context(http_version)
- context.load_cert_chain(self.cert_file, self.key_file)
- # cert and key file means the user wants to authenticate.
- # enable TLS 1.3 PHA implicitly even for custom contexts.
- if context.post_handshake_auth is not None:
- context.post_handshake_auth = True
- else:
- context = None
- return http.client.HTTPSConnection(host, context=context)
-
- def open_https(self, url, data=None):
- """Use HTTPS protocol."""
- return self._open_generic_http(self._https_connection, url, data)
-
- def open_file(self, url):
- """Use local file or FTP depending on form of URL."""
- if not isinstance(url, str):
- raise URLError('file error: proxy support for file protocol currently not implemented')
- if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
- raise ValueError("file:// scheme is supported only on localhost")
- else:
- return self.open_local_file(url)
-
- def open_local_file(self, url):
- """Use local file."""
- import email.utils
- import mimetypes
- host, file = _splithost(url)
- localname = url2pathname(file)
- try:
- stats = os.stat(localname)
- except OSError as e:
- raise URLError(e.strerror, e.filename)
- size = stats.st_size
- modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
- mtype = mimetypes.guess_type(url)[0]
- headers = email.message_from_string(
- 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified))
- if not host:
- urlfile = file
- if file[:1] == '/':
- urlfile = 'file://' + file
- return addinfourl(open(localname, 'rb'), headers, urlfile)
- host, port = _splitport(host)
- if (not port
- and socket.gethostbyname(host) in ((localhost(),) + thishost())):
- urlfile = file
- if file[:1] == '/':
- urlfile = 'file://' + file
- elif file[:2] == './':
- raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
- return addinfourl(open(localname, 'rb'), headers, urlfile)
- raise URLError('local file error: not on local host')
-
- def open_ftp(self, url):
- """Use FTP protocol."""
- if not isinstance(url, str):
- raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
- import mimetypes
- host, path = _splithost(url)
- if not host: raise URLError('ftp error: no host given')
- host, port = _splitport(host)
- user, host = _splituser(host)
- if user: user, passwd = _splitpasswd(user)
- else: passwd = None
- host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
- host = socket.gethostbyname(host)
- if not port:
- import ftplib
- port = ftplib.FTP_PORT
- else:
- port = int(port)
- path, attrs = _splitattr(path)
- path = unquote(path)
- dirs = path.split('/')
- dirs, file = dirs[:-1], dirs[-1]
- if dirs and not dirs[0]: dirs = dirs[1:]
- if dirs and not dirs[0]: dirs[0] = '/'
- key = user, host, port, '/'.join(dirs)
- # XXX thread unsafe!
- if len(self.ftpcache) > MAXFTPCACHE:
- # Prune the cache, rather arbitrarily
- for k in list(self.ftpcache):
- if k != key:
- v = self.ftpcache[k]
- del self.ftpcache[k]
- v.close()
- try:
- if key not in self.ftpcache:
- self.ftpcache[key] = \
- ftpwrapper(user, passwd, host, port, dirs)
- if not file: type = 'D'
- else: type = 'I'
- for attr in attrs:
- attr, value = _splitvalue(attr)
- if attr.lower() == 'type' and \
- value in ('a', 'A', 'i', 'I', 'd', 'D'):
- type = value.upper()
- (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
- mtype = mimetypes.guess_type("ftp:" + url)[0]
- headers = ""
- if mtype:
- headers += "Content-Type: %s\n" % mtype
- if retrlen is not None and retrlen >= 0:
- headers += "Content-Length: %d\n" % retrlen
- headers = email.message_from_string(headers)
- return addinfourl(fp, headers, "ftp:" + url)
- except ftperrors() as exp:
- raise URLError(f'ftp error: {exp}') from exp
-
- def open_data(self, url, data=None):
- """Use "data" URL."""
- if not isinstance(url, str):
- raise URLError('data error: proxy support for data protocol currently not implemented')
- # ignore POSTed data
- #
- # syntax of data URLs:
- # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
- # mediatype := [ type "/" subtype ] *( ";" parameter )
- # data := *urlchar
- # parameter := attribute "=" value
- try:
- [type, data] = url.split(',', 1)
- except ValueError:
- raise OSError('data error', 'bad data URL')
- if not type:
- type = 'text/plain;charset=US-ASCII'
- semi = type.rfind(';')
- if semi >= 0 and '=' not in type[semi:]:
- encoding = type[semi+1:]
- type = type[:semi]
- else:
- encoding = ''
- msg = []
- msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
- time.gmtime(time.time())))
- msg.append('Content-type: %s' % type)
- if encoding == 'base64':
- # XXX is this encoding/decoding ok?
- data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
- else:
- data = unquote(data)
- msg.append('Content-Length: %d' % len(data))
- msg.append('')
- msg.append(data)
- msg = '\n'.join(msg)
- headers = email.message_from_string(msg)
- f = io.StringIO(msg)
- #f.fileno = None # needed for addinfourl
- return addinfourl(f, headers, url)
-
-
-class FancyURLopener(URLopener):
- """Derived class with handlers for errors we can handle (perhaps)."""
-
- def __init__(self, *args, **kwargs):
- URLopener.__init__(self, *args, **kwargs)
- self.auth_cache = {}
- self.tries = 0
- self.maxtries = 10
-
- def http_error_default(self, url, fp, errcode, errmsg, headers):
- """Default error handling -- don't raise an exception."""
- return addinfourl(fp, headers, "http:" + url, errcode)
-
- def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 302 -- relocated (temporarily)."""
- self.tries += 1
- try:
- if self.maxtries and self.tries >= self.maxtries:
- if hasattr(self, "http_error_500"):
- meth = self.http_error_500
- else:
- meth = self.http_error_default
- return meth(url, fp, 500,
- "Internal Server Error: Redirect Recursion",
- headers)
- result = self.redirect_internal(url, fp, errcode, errmsg,
- headers, data)
- return result
- finally:
- self.tries = 0
-
- def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
- if 'location' in headers:
- newurl = headers['location']
- elif 'uri' in headers:
- newurl = headers['uri']
- else:
- return
- fp.close()
-
- # In case the server sent a relative URL, join with original:
- newurl = urljoin(self.type + ":" + url, newurl)
-
- urlparts = urlparse(newurl)
-
- # For security reasons, we don't allow redirection to anything other
- # than http, https and ftp.
-
- # We are using newer HTTPError with older redirect_internal method
- # This older method will get deprecated in 3.3
-
- if urlparts.scheme not in ('http', 'https', 'ftp', ''):
- raise HTTPError(newurl, errcode,
- errmsg +
- " Redirection to url '%s' is not allowed." % newurl,
- headers, fp)
-
- return self.open(newurl)
-
- def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 301 -- also relocated (permanently)."""
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
- def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 303 -- also relocated (essentially identical to 302)."""
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
-
- def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 307 -- relocated, but turn POST into error."""
- if data is None:
- return self.http_error_302(url, fp, errcode, errmsg, headers, data)
- else:
- return self.http_error_default(url, fp, errcode, errmsg, headers)
-
- def http_error_308(self, url, fp, errcode, errmsg, headers, data=None):
- """Error 308 -- relocated, but turn POST into error."""
- if data is None:
- return self.http_error_301(url, fp, errcode, errmsg, headers, data)
- else:
- return self.http_error_default(url, fp, errcode, errmsg, headers)
-
- def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
- retry=False):
- """Error 401 -- authentication required.
- This function supports Basic authentication only."""
- if 'www-authenticate' not in headers:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- stuff = headers['www-authenticate']
- match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
- if not match:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- scheme, realm = match.groups()
- if scheme.lower() != 'basic':
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- if not retry:
- URLopener.http_error_default(self, url, fp, errcode, errmsg,
- headers)
- name = 'retry_' + self.type + '_basic_auth'
- if data is None:
- return getattr(self,name)(url, realm)
- else:
- return getattr(self,name)(url, realm, data)
-
- def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
- retry=False):
- """Error 407 -- proxy authentication required.
- This function supports Basic authentication only."""
- if 'proxy-authenticate' not in headers:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- stuff = headers['proxy-authenticate']
- match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
- if not match:
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- scheme, realm = match.groups()
- if scheme.lower() != 'basic':
- URLopener.http_error_default(self, url, fp,
- errcode, errmsg, headers)
- if not retry:
- URLopener.http_error_default(self, url, fp, errcode, errmsg,
- headers)
- name = 'retry_proxy_' + self.type + '_basic_auth'
- if data is None:
- return getattr(self,name)(url, realm)
- else:
- return getattr(self,name)(url, realm, data)
-
- def retry_proxy_http_basic_auth(self, url, realm, data=None):
- host, selector = _splithost(url)
- newurl = 'http://' + host + selector
- proxy = self.proxies['http']
- urltype, proxyhost = _splittype(proxy)
- proxyhost, proxyselector = _splithost(proxyhost)
- i = proxyhost.find('@') + 1
- proxyhost = proxyhost[i:]
- user, passwd = self.get_user_passwd(proxyhost, realm, i)
- if not (user or passwd): return None
- proxyhost = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), proxyhost)
- self.proxies['http'] = 'http://' + proxyhost + proxyselector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_proxy_https_basic_auth(self, url, realm, data=None):
- host, selector = _splithost(url)
- newurl = 'https://' + host + selector
- proxy = self.proxies['https']
- urltype, proxyhost = _splittype(proxy)
- proxyhost, proxyselector = _splithost(proxyhost)
- i = proxyhost.find('@') + 1
- proxyhost = proxyhost[i:]
- user, passwd = self.get_user_passwd(proxyhost, realm, i)
- if not (user or passwd): return None
- proxyhost = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), proxyhost)
- self.proxies['https'] = 'https://' + proxyhost + proxyselector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_http_basic_auth(self, url, realm, data=None):
- host, selector = _splithost(url)
- i = host.find('@') + 1
- host = host[i:]
- user, passwd = self.get_user_passwd(host, realm, i)
- if not (user or passwd): return None
- host = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), host)
- newurl = 'http://' + host + selector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def retry_https_basic_auth(self, url, realm, data=None):
- host, selector = _splithost(url)
- i = host.find('@') + 1
- host = host[i:]
- user, passwd = self.get_user_passwd(host, realm, i)
- if not (user or passwd): return None
- host = "%s:%s@%s" % (quote(user, safe=''),
- quote(passwd, safe=''), host)
- newurl = 'https://' + host + selector
- if data is None:
- return self.open(newurl)
- else:
- return self.open(newurl, data)
-
- def get_user_passwd(self, host, realm, clear_cache=0):
- key = realm + '@' + host.lower()
- if key in self.auth_cache:
- if clear_cache:
- del self.auth_cache[key]
- else:
- return self.auth_cache[key]
- user, passwd = self.prompt_user_passwd(host, realm)
- if user or passwd: self.auth_cache[key] = (user, passwd)
- return user, passwd
-
- def prompt_user_passwd(self, host, realm):
- """Override this in a GUI environment!"""
- import getpass
- try:
- user = input("Enter username for %s at %s: " % (realm, host))
- passwd = getpass.getpass("Enter password for %s in %s at %s: " %
- (user, realm, host))
- return user, passwd
- except KeyboardInterrupt:
- print()
- return None, None
-
-
# Utility functions
_localhost = None
@@ -2485,9 +1810,7 @@ def getproxies_environment():
"""Return a dictionary of scheme -> proxy server URL mappings.
Scan the environment for variables named <scheme>_proxy;
- this seems to be the standard convention. If you need a
- different way, you can pass a proxies dictionary to the
- [Fancy]URLopener constructor.
+ this seems to be the standard convention.
"""
# in order to prefer lowercase variables, process environment in
# two passes: first matches any, second pass matches lowercase only