From 2235011d49bc543ced855804ac9b87c0e98a7b19 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 20 Jan 2008 12:05:43 +0000 Subject: #856047: respect the ``no_proxy`` env var when checking for proxies in urllib and using the other ``_proxy`` env vars. Original patch by Donovan Baarda. --- Doc/library/urllib.rst | 9 +++++++++ Lib/urllib.py | 52 ++++++++++++++++++++++++++++++++++++++++++-------- Misc/NEWS | 3 +++ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/Doc/library/urllib.rst b/Doc/library/urllib.rst index 3ec6e88..2718d80 100644 --- a/Doc/library/urllib.rst +++ b/Doc/library/urllib.rst @@ -79,6 +79,11 @@ High-level interface % python ... + The :envvar:`no_proxy` environment variable can be used to specify hosts which + shouldn't be reached via proxy; if set, it should be a comma-separated list + of hostname suffixes, optionally with ``:port`` appended, for example + ``cern.ch,ncsa.uiuc.edu,some.host:8080``. + In a Windows environment, if no proxy environment variables are set, proxy settings are obtained from the registry's Internet Settings section. @@ -112,6 +117,10 @@ High-level interface .. versionchanged:: 2.3 Added the *proxies* support. + .. versionchanged:: 2.6 + Added :meth:`getcode` to returned object and support for the + :envvar:`no_proxy` environment variable. + .. function:: urlretrieve(url[, filename[, reporthook[, data]]]) diff --git a/Lib/urllib.py b/Lib/urllib.py index 4a49fc0..2e720ac 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1283,10 +1283,33 @@ def getproxies_environment(): proxies = {} for name, value in os.environ.items(): name = name.lower() + if name == 'no_proxy': + # handled in proxy_bypass_environment + continue if value and name[-6:] == '_proxy': proxies[name[:-6]] = value return proxies +def proxy_bypass_environment(host): + """Test if proxies should not be used for a particular host. + + Checks the environment for a variable named no_proxy, which should + be a list of DNS suffixes separated by commas, or '*' for all hosts. + """ + no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '') + # '*' is special case for always bypass + if no_proxy == '*': + return 1 + # strip port off host + hostonly, port = splitport(host) + # check if the host ends with any of the DNS suffixes + for name in no_proxy.split(','): + if name and (hostonly.endswith(name) or host.endswith(name)): + return 1 + # otherwise, don't bypass + return 0 + + if sys.platform == 'darwin': def getproxies_internetconfig(): """Return a dictionary of scheme -> proxy server URL mappings. @@ -1314,12 +1337,15 @@ if sys.platform == 'darwin': pass else: proxies['http'] = 'http://%s' % value - # FTP: XXXX To be done. - # Gopher: XXXX To be done. + # FTP: XXX To be done. + # Gopher: XXX To be done. return proxies - def proxy_bypass(x): - return 0 + def proxy_bypass(host): + if getproxies_environment(): + return proxy_bypass_environment(host) + else: + return 0 def getproxies(): return getproxies_environment() or getproxies_internetconfig() @@ -1379,7 +1405,7 @@ elif os.name == 'nt': """ return getproxies_environment() or getproxies_registry() - def proxy_bypass(host): + def proxy_bypass_registry(host): try: import _winreg import re @@ -1438,12 +1464,22 @@ elif os.name == 'nt': return 1 return 0 + def proxy_bypass(host): + """Return a dictionary of scheme -> proxy server URL mappings. + + Returns settings gathered from the environment, if specified, + or the registry. + + """ + if getproxies_environment(): + return proxy_bypass_environment(host) + else: + return proxy_bypass_registry(host) + else: # By default use environment variables getproxies = getproxies_environment - - def proxy_bypass(host): - return 0 + proxy_bypass = proxy_bypass_environment # Test and time quote() and unquote() def test1(): diff --git a/Misc/NEWS b/Misc/NEWS index 3ff4f3e..7e4a12e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -369,6 +369,9 @@ Core and builtins Library ------- +- #856047: respect the ``no_proxy`` environment variable when using the + ``http_proxy`` etc. environment variables in urllib. + - #1178141: add a getcode() method to the addinfourls that urllib.open() returns so that you can retrieve the HTTP status code. -- cgit v0.12