diff options
author | Gregory P. Smith <greg@krypto.org> | 2022-09-19 23:06:25 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-19 23:06:25 (GMT) |
commit | e61ca2243163d829ab04d91d8e67940ea850aefa (patch) | |
tree | 4897f3571641d511770aad2e2d1fff505160792e /Lib/urllib | |
parent | 04f4977f508583954ad7b9cb09076ee1e57461f8 (diff) | |
download | cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.zip cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.tar.gz cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.tar.bz2 |
gh-95865: Further reduce quote_from_bytes memory consumption (#96860)
on large input values. Based on Dennis Sweeney's chunking idea.
Diffstat (limited to 'Lib/urllib')
-rw-r--r-- | Lib/urllib/parse.py | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index f25c770..3734c73 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -29,6 +29,7 @@ test_urlparse.py provides a good indicator of parsing behavior. from collections import namedtuple import functools +import math import re import types import warnings @@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'): if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): return bs.decode() quoter = _byte_quoter_factory(safe) - return ''.join(map(quoter, bs)) + if (bs_len := len(bs)) < 200_000: + return ''.join(map(quoter, bs)) + else: + # This saves memory - https://github.com/python/cpython/issues/95865 + chunk_size = math.isqrt(bs_len) + chunks = [''.join(map(quoter, bs[i:i+chunk_size])) + for i in range(0, bs_len, chunk_size)] + return ''.join(chunks) def urlencode(query, doseq=False, safe='', encoding=None, errors=None, quote_via=quote_plus): |