From b31c87bfcfc8b904ef05d998b4c38122b8651763 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Mon, 25 Apr 2016 09:17:54 -0700 Subject: backport fix for Issue #26804. urllib.request will prefer lower_case proxy environment variables over UPPER_CASE or Mixed_Case ones. --- Doc/library/urllib.rst | 2 ++ Lib/test/test_urllib.py | 43 ++++++++++++++++++++++++++++++++++++++++- Lib/urllib.py | 51 +++++++++++++++++++++++++++++++++++-------------- Misc/NEWS | 3 +++ 4 files changed, 84 insertions(+), 15 deletions(-) diff --git a/Doc/library/urllib.rst b/Doc/library/urllib.rst index 91deb39..4c34b90 100644 --- a/Doc/library/urllib.rst +++ b/Doc/library/urllib.rst @@ -292,6 +292,8 @@ Utility functions in case insensitive way, for all operating systems first, and when it cannot find it, looks for proxy information from Mac OSX System Configuration for Mac OS X and Windows Systems Registry for Windows. + If both lowercase and uppercase environment variables exist (and disagree), + lowercase is preferred. .. note:: urllib also exposes certain utility functions like splittype, splithost and diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index e14cccc..e8c14c2 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -1,5 +1,6 @@ """Regresssion tests for urllib""" +import collections import urllib import httplib import unittest @@ -158,8 +159,46 @@ class ProxyTests(unittest.TestCase): # getproxies_environment use lowered case truncated (no '_proxy') keys self.assertEqual('localhost', proxies['no']) # List of no_proxies with space. - self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com') + self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234') self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com')) + self.assertTrue(urllib.proxy_bypass_environment('anotherdomain.com:8888')) + self.assertTrue(urllib.proxy_bypass_environment('newdomain.com:1234')) + + +class ProxyTests_withOrderedEnv(unittest.TestCase): + + def setUp(self): + # We need to test conditions, where variable order _is_ significant + self._saved_env = os.environ + # Monkey patch os.environ, start with empty fake environment + os.environ = collections.OrderedDict() + + def tearDown(self): + os.environ = self._saved_env + + def test_getproxies_environment_prefer_lowercase(self): + # Test lowercase preference with removal + os.environ['no_proxy'] = '' + os.environ['No_Proxy'] = 'localhost' + self.assertFalse(urllib.proxy_bypass_environment('localhost')) + self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) + os.environ['http_proxy'] = '' + os.environ['HTTP_PROXY'] = 'http://somewhere:3128' + proxies = urllib.getproxies_environment() + self.assertEqual({}, proxies) + # Test lowercase preference of proxy bypass and correct matching including ports + os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234' + os.environ['No_Proxy'] = 'xyz.com' + self.assertTrue(urllib.proxy_bypass_environment('localhost')) + self.assertTrue(urllib.proxy_bypass_environment('noproxy.com:5678')) + self.assertTrue(urllib.proxy_bypass_environment('my.proxy:1234')) + self.assertFalse(urllib.proxy_bypass_environment('my.proxy')) + self.assertFalse(urllib.proxy_bypass_environment('arbitrary')) + # Test lowercase preference with replacement + os.environ['http_proxy'] = 'http://somewhere:3128' + os.environ['Http_Proxy'] = 'http://somewhereelse:3128' + proxies = urllib.getproxies_environment() + self.assertEqual('http://somewhere:3128', proxies['http']) class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin): @@ -1030,6 +1069,8 @@ def test_main(): Pathname_Tests, Utility_Tests, URLopener_Tests, + ProxyTests, + ProxyTests_withOrderedEnv, #FTPWrapperTests, ) diff --git a/Lib/urllib.py b/Lib/urllib.py index 0cb3df9..6d17e6f 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1379,25 +1379,42 @@ def getproxies_environment(): """Return a dictionary of scheme -> proxy server URL mappings. Scan the environment for variables named _proxy; - this seems to be the standard convention. If you need a - different way, you can pass a proxies dictionary to the - [Fancy]URLopener constructor. + this seems to be the standard convention. In order to prefer lowercase + variables, we process the environment in two passes, first matches any + and second matches only lower case proxies. + If you need a different way, you can pass a proxies dictionary to the + [Fancy]URLopener constructor. """ proxies = {} for name, value in os.environ.items(): name = name.lower() if value and name[-6:] == '_proxy': proxies[name[:-6]] = value + + for name, value in os.environ.items(): + if name[-6:] == '_proxy': + name = name.lower() + if value: + proxies[name[:-6]] = value + else: + proxies.pop(name[:-6], None) + return proxies -def proxy_bypass_environment(host): +def proxy_bypass_environment(host, proxies=None): """Test if proxies should not be used for a particular host. - Checks the environment for a variable named no_proxy, which should - be a list of DNS suffixes separated by commas, or '*' for all hosts. + Checks the proxies dict for the value of no_proxy, which should be a + list of comma separated DNS suffixes, or '*' for all hosts. """ - no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') + if proxies is None: + proxies = getproxies_environment() + # don't bypass, if no_proxy isn't specified + try: + no_proxy = proxies['no'] + except KeyError: + return 0 # '*' is special case for always bypass if no_proxy == '*': return 1 @@ -1483,8 +1500,14 @@ if sys.platform == 'darwin': return _get_proxies() def proxy_bypass(host): - if getproxies_environment(): - return proxy_bypass_environment(host) + """Return True, if a host should be bypassed. + + Checks proxy settings gathered from the environment, if specified, or + from the MacOSX framework SystemConfiguration. + """ + proxies = getproxies_environment() + if proxies: + return proxy_bypass_environment(host, proxies) else: return proxy_bypass_macosx_sysconf(host) @@ -1600,14 +1623,14 @@ elif os.name == 'nt': return 0 def proxy_bypass(host): - """Return a dictionary of scheme -> proxy server URL mappings. + """Return True, if the host should be bypassed. - Returns settings gathered from the environment, if specified, + Checks proxy settings gathered from the environment, if specified, or the registry. - """ - if getproxies_environment(): - return proxy_bypass_environment(host) + proxies = getproxies_environment() + if proxies: + return proxy_bypass_environment(host, proxies) else: return proxy_bypass_registry(host) diff --git a/Misc/NEWS b/Misc/NEWS index 6380bf9..6f29bca 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -77,6 +77,9 @@ Core and Builtins Library ------- +- Issue #26804: urllib will prefer lower_case proxy environment variables over + UPPER_CASE or Mixed_Case ones. Patch contributed by Hans-Peter Jansen. + - Issue #26837: assertSequenceEqual() now correctly outputs non-stringified differing items. This affects assertListEqual() and assertTupleEqual(). -- cgit v0.12