diff options
author | Gregory P. Smith <greg@krypto.org> | 2022-09-19 23:06:25 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-19 23:06:25 (GMT) |
commit | e61ca2243163d829ab04d91d8e67940ea850aefa (patch) | |
tree | 4897f3571641d511770aad2e2d1fff505160792e | |
parent | 04f4977f508583954ad7b9cb09076ee1e57461f8 (diff) | |
download | cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.zip cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.tar.gz cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.tar.bz2 |
gh-95865: Further reduce quote_from_bytes memory consumption (#96860)
on large input values. Based on Dennis Sweeney's chunking idea.
-rw-r--r-- | Lib/test/test_urlparse.py | 4 | ||||
-rw-r--r-- | Lib/urllib/parse.py | 10 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst | 3 |
3 files changed, 16 insertions, 1 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 2f629c7..81d6018 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -985,6 +985,10 @@ class UrlParseTestCase(unittest.TestCase): self.assertEqual(result, 'archaeological%20arcana') result = urllib.parse.quote_from_bytes(b'') self.assertEqual(result, '') + result = urllib.parse.quote_from_bytes(b'A'*10_000) + self.assertEqual(result, 'A'*10_000) + result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183) + self.assertEqual(result, 'z%01/%20'*253_183) def test_unquote_to_bytes(self): result = urllib.parse.unquote_to_bytes('abc%20def') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index f25c770..3734c73 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -29,6 +29,7 @@ test_urlparse.py provides a good indicator of parsing behavior. from collections import namedtuple import functools +import math import re import types import warnings @@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'): if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): return bs.decode() quoter = _byte_quoter_factory(safe) - return ''.join(map(quoter, bs)) + if (bs_len := len(bs)) < 200_000: + return ''.join(map(quoter, bs)) + else: + # This saves memory - https://github.com/python/cpython/issues/95865 + chunk_size = math.isqrt(bs_len) + chunks = [''.join(map(quoter, bs[i:i+chunk_size])) + for i in range(0, bs_len, chunk_size)] + return ''.join(chunks) def urlencode(query, doseq=False, safe='', encoding=None, errors=None, quote_via=quote_plus): diff --git a/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst b/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst new file mode 100644 index 0000000..03a5be7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-09-16-07-53-29.gh-issue-95865.oHjX0A.rst @@ -0,0 +1,3 @@ +Reduce :func:`urllib.parse.quote_from_bytes` memory use on large values. + +Contributed by Dennis Sweeney. |