From aeb28f51304ebe2ad9fd6a61b6e4e5a03d288aa1 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 5 Oct 2022 19:57:52 +0200 Subject: gh-91539: improve performance of get_proxies_environment (#91566) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * improve performance of get_proxies_environment when there are many environment variables * 📜🤖 Added by blurb_it. * fix case of short env name * fix formatting * fix whitespace * whitespace * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer * whitespace * Update Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst Co-authored-by: Carl Meyer * Update Lib/urllib/request.py Co-authored-by: Carl Meyer Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Carl Meyer --- Lib/urllib/request.py | 26 +++++++++++++--------- .../2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst | 1 + 2 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 1761e95..e2d5b8c 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2508,28 +2508,34 @@ def getproxies_environment(): this seems to be the standard convention. If you need a different way, you can pass a proxies dictionary to the [Fancy]URLopener constructor. - """ - proxies = {} # in order to prefer lowercase variables, process environment in # two passes: first matches any, second pass matches lowercase only - for name, value in os.environ.items(): - name = name.lower() - if value and name[-6:] == '_proxy': - proxies[name[:-6]] = value + + # select only environment variables which end in (after making lowercase) _proxy + proxies = {} + environment = [] + for name in os.environ.keys(): + # fast screen underscore position before more expensive case-folding + if len(name) > 5 and name[-6] == "_" and name[-5:].lower() == "proxy": + value = os.environ[name] + proxy_name = name[:-6].lower() + environment.append((name, value, proxy_name)) + if value: + proxies[proxy_name] = value # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY # (non-all-lowercase) as it may be set from the web server by a "Proxy:" # header from the client # If "proxy" is lowercase, it will still be used thanks to the next block if 'REQUEST_METHOD' in os.environ: proxies.pop('http', None) - for name, value in os.environ.items(): + for name, value, proxy_name in environment: + # not case-folded, checking here for lower-case env vars only if name[-6:] == '_proxy': - name = name.lower() if value: - proxies[name[:-6]] = value + proxies[proxy_name] = value else: - proxies.pop(name[:-6], None) + proxies.pop(proxy_name, None) return proxies def proxy_bypass_environment(host, proxies=None): diff --git a/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst new file mode 100644 index 0000000..16d61f1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-15-11-29-38.gh-issue-91539.7WgVuA.rst @@ -0,0 +1 @@ +Improve performance of ``urllib.request.getproxies_environment`` when there are many environment variables -- cgit v0.12