summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWeii Wang <weii.wang@canonical.com>2024-02-28 20:15:52 (GMT)
committerGitHub <noreply@github.com>2024-02-28 20:15:52 (GMT)
commitc43b26d02eaa103756c250e8d36829d388c5f3be (patch)
tree035e743458a83d1bb135b5ed078b38ad7dfaaf05
parent6c1c94dc517b77afcebb25436a4b7b0d13b6eb4d (diff)
downloadcpython-c43b26d02eaa103756c250e8d36829d388c5f3be.zip
cpython-c43b26d02eaa103756c250e8d36829d388c5f3be.tar.gz
cpython-c43b26d02eaa103756c250e8d36829d388c5f3be.tar.bz2
gh-115197: Stop resolving host in urllib.request proxy bypass (GH-115210)
Use of a proxy is intended to defer DNS for the hosts to the proxy itself, rather than a potential for information leak of the host doing DNS resolution itself for any reason. Proxy bypass lists are strictly name based. Most implementations of proxy support agree.
-rw-r--r--Lib/test/test_urllib2.py29
-rw-r--r--Lib/urllib/request.py77
-rw-r--r--Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst2
3 files changed, 64 insertions, 44 deletions
diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index fa528a6..739c15d 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -15,10 +15,11 @@ import tempfile
import subprocess
import urllib.request
-# The proxy bypass method imported below has logic specific to the OSX
-# proxy config data structure but is testable on all platforms.
+# The proxy bypass method imported below has logic specific to the
+# corresponding system but is testable on all platforms.
from urllib.request import (Request, OpenerDirector, HTTPBasicAuthHandler,
HTTPPasswordMgrWithPriorAuth, _parse_proxy,
+ _proxy_bypass_winreg_override,
_proxy_bypass_macosx_sysconf,
AbstractDigestAuthHandler)
from urllib.parse import urlparse
@@ -1485,6 +1486,30 @@ class HandlerTests(unittest.TestCase):
self.assertEqual(req.host, "proxy.example.com:3128")
self.assertEqual(req.get_header("Proxy-authorization"), "FooBar")
+ @unittest.skipUnless(os.name == "nt", "only relevant for Windows")
+ def test_winreg_proxy_bypass(self):
+ proxy_override = "www.example.com;*.example.net; 192.168.0.1"
+ proxy_bypass = _proxy_bypass_winreg_override
+ for host in ("www.example.com", "www.example.net", "192.168.0.1"):
+ self.assertTrue(proxy_bypass(host, proxy_override),
+ "expected bypass of %s to be true" % host)
+
+ for host in ("example.com", "www.example.org", "example.net",
+ "192.168.0.2"):
+ self.assertFalse(proxy_bypass(host, proxy_override),
+ "expected bypass of %s to be False" % host)
+
+ # check intranet address bypass
+ proxy_override = "example.com; <local>"
+ self.assertTrue(proxy_bypass("example.com", proxy_override),
+ "expected bypass of %s to be true" % host)
+ self.assertFalse(proxy_bypass("example.net", proxy_override),
+ "expected bypass of %s to be False" % host)
+ for host in ("test", "localhost"):
+ self.assertTrue(proxy_bypass(host, proxy_override),
+ "expect <local> to bypass intranet address '%s'"
+ % host)
+
@unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX")
def test_osx_proxy_bypass(self):
bypass = {
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
index bca5944..d22af66 100644
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -2563,6 +2563,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
}
"""
from fnmatch import fnmatch
+ from ipaddress import AddressValueError, IPv4Address
hostonly, port = _splitport(host)
@@ -2579,20 +2580,17 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
return True
hostIP = None
+ try:
+ hostIP = int(IPv4Address(hostonly))
+ except AddressValueError:
+ pass
for value in proxy_settings.get('exceptions', ()):
# Items in the list are strings like these: *.local, 169.254/16
if not value: continue
m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
- if m is not None:
- if hostIP is None:
- try:
- hostIP = socket.gethostbyname(hostonly)
- hostIP = ip2num(hostIP)
- except OSError:
- continue
-
+ if m is not None and hostIP is not None:
base = ip2num(m.group(1))
mask = m.group(2)
if mask is None:
@@ -2615,6 +2613,31 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings):
return False
+# Same as _proxy_bypass_macosx_sysconf, testable on all platforms
+def _proxy_bypass_winreg_override(host, override):
+ """Return True if the host should bypass the proxy server.
+
+ The proxy override list is obtained from the Windows
+ Internet settings proxy override registry value.
+
+ An example of a proxy override value is:
+ "www.example.com;*.example.net; 192.168.0.1"
+ """
+ from fnmatch import fnmatch
+
+ host, _ = _splitport(host)
+ proxy_override = override.split(';')
+ for test in proxy_override:
+ test = test.strip()
+ # "<local>" should bypass the proxy server for all intranet addresses
+ if test == '<local>':
+ if '.' not in host:
+ return True
+ elif fnmatch(host, test):
+ return True
+ return False
+
+
if sys.platform == 'darwin':
from _scproxy import _get_proxy_settings, _get_proxies
@@ -2713,7 +2736,7 @@ elif os.name == 'nt':
import winreg
except ImportError:
# Std modules, so should be around - but you never know!
- return 0
+ return False
try:
internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
@@ -2723,40 +2746,10 @@ elif os.name == 'nt':
'ProxyOverride')[0])
# ^^^^ Returned as Unicode but problems if not converted to ASCII
except OSError:
- return 0
+ return False
if not proxyEnable or not proxyOverride:
- return 0
- # try to make a host list from name and IP address.
- rawHost, port = _splitport(host)
- host = [rawHost]
- try:
- addr = socket.gethostbyname(rawHost)
- if addr != rawHost:
- host.append(addr)
- except OSError:
- pass
- try:
- fqdn = socket.getfqdn(rawHost)
- if fqdn != rawHost:
- host.append(fqdn)
- except OSError:
- pass
- # make a check value list from the registry entry: replace the
- # '<local>' string by the localhost entry and the corresponding
- # canonical entry.
- proxyOverride = proxyOverride.split(';')
- # now check if we match one of the registry values.
- for test in proxyOverride:
- if test == '<local>':
- if '.' not in rawHost:
- return 1
- test = test.replace(".", r"\.") # mask dots
- test = test.replace("*", r".*") # change glob sequence
- test = test.replace("?", r".") # change glob char
- for val in host:
- if re.match(test, val, re.I):
- return 1
- return 0
+ return False
+ return _proxy_bypass_winreg_override(host, proxyOverride)
def proxy_bypass(host):
"""Return True, if host should be bypassed.
diff --git a/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst b/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst
new file mode 100644
index 0000000..e6ca3cc
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst
@@ -0,0 +1,2 @@
+``urllib.request`` no longer resolves the hostname before checking it
+against the system's proxy bypass list on macOS and Windows.