summaryrefslogtreecommitdiffstats
path: root/Lib
diff options
context:
space:
mode:
authorGregory P. Smith <greg@krypto.org>2022-09-19 23:06:25 (GMT)
committerGitHub <noreply@github.com>2022-09-19 23:06:25 (GMT)
commite61ca2243163d829ab04d91d8e67940ea850aefa (patch)
tree4897f3571641d511770aad2e2d1fff505160792e /Lib
parent04f4977f508583954ad7b9cb09076ee1e57461f8 (diff)
downloadcpython-e61ca2243163d829ab04d91d8e67940ea850aefa.zip
cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.tar.gz
cpython-e61ca2243163d829ab04d91d8e67940ea850aefa.tar.bz2
gh-95865: Further reduce quote_from_bytes memory consumption (#96860)
on large input values. Based on Dennis Sweeney's chunking idea.
Diffstat (limited to 'Lib')
-rw-r--r--Lib/test/test_urlparse.py4
-rw-r--r--Lib/urllib/parse.py10
2 files changed, 13 insertions, 1 deletions
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 2f629c7..81d6018 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -985,6 +985,10 @@ class UrlParseTestCase(unittest.TestCase):
self.assertEqual(result, 'archaeological%20arcana')
result = urllib.parse.quote_from_bytes(b'')
self.assertEqual(result, '')
+ result = urllib.parse.quote_from_bytes(b'A'*10_000)
+ self.assertEqual(result, 'A'*10_000)
+ result = urllib.parse.quote_from_bytes(b'z\x01/ '*253_183)
+ self.assertEqual(result, 'z%01/%20'*253_183)
def test_unquote_to_bytes(self):
result = urllib.parse.unquote_to_bytes('abc%20def')
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index f25c770..3734c73 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -29,6 +29,7 @@ test_urlparse.py provides a good indicator of parsing behavior.
from collections import namedtuple
import functools
+import math
import re
import types
import warnings
@@ -906,7 +907,14 @@ def quote_from_bytes(bs, safe='/'):
if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
return bs.decode()
quoter = _byte_quoter_factory(safe)
- return ''.join(map(quoter, bs))
+ if (bs_len := len(bs)) < 200_000:
+ return ''.join(map(quoter, bs))
+ else:
+ # This saves memory - https://github.com/python/cpython/issues/95865
+ chunk_size = math.isqrt(bs_len)
+ chunks = [''.join(map(quoter, bs[i:i+chunk_size]))
+ for i in range(0, bs_len, chunk_size)]
+ return ''.join(chunks)
def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
quote_via=quote_plus):