diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2024-05-05 05:20:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-05 05:20:06 (GMT) |
commit | 711c80bfca5dd17cb7c6ec26f0e44848b33aec04 (patch) | |
tree | 62d8956a00dd3f7c8c5a81fd0e3229ca7c2018ea /Lib | |
parent | 5dd36732c850084ce262b7869ed90d73a281296a (diff) | |
download | cpython-711c80bfca5dd17cb7c6ec26f0e44848b33aec04.zip cpython-711c80bfca5dd17cb7c6ec26f0e44848b33aec04.tar.gz cpython-711c80bfca5dd17cb7c6ec26f0e44848b33aec04.tar.bz2 |
gh-118164: Break a loop between _pydecimal and _pylong and optimize int to str conversion (GH-118483)
For converting large ints to strings, CPython invokes a function in _pylong.py,
which uses the decimal module to implement an asymptotically waaaaay
sub-quadratic algorithm. But if the C decimal module isn't available, CPython
uses _pydecimal.py instead. Which in turn frequently does str(int). If the int
is very large, _pylong ends up doing the work, which in turn asks decimal to do
"big" arithmetic, which in turn calls str(big_int), which in turn ... it can
become infinite mutual recursion.
This change introduces a different int->str function that doesn't use decimal.
It's asymptotically worse, "Karatsuba time" instead of quadratic time, so
still a huge improvement. _pylong switches to that when the C decimal isn't
available. It is also used for not too large integers (less than 450_000 bits),
where it is faster (up to 2 times for 30_000 bits) than the asymptotically
better implementation that uses the C decimal.
Co-authored-by: Tim Peters <tim.peters@gmail.com>
Diffstat (limited to 'Lib')
-rw-r--r-- | Lib/_pylong.py | 46 | ||||
-rw-r--r-- | Lib/test/test_int.py | 31 |
2 files changed, 66 insertions, 11 deletions
diff --git a/Lib/_pylong.py b/Lib/_pylong.py index 936346e..30bee6f 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -14,6 +14,10 @@ maximum performance, they should use something like gmpy2.""" import re import decimal +try: + import _decimal +except ImportError: + _decimal = None def int_to_decimal(n): @@ -82,7 +86,47 @@ def int_to_decimal(n): def int_to_decimal_string(n): """Asymptotically fast conversion of an 'int' to a decimal string.""" - return str(int_to_decimal(n)) + w = n.bit_length() + if w > 450_000 and _decimal is not None: + # It is only usable with the C decimal implementation. + # _pydecimal.py calls str() on very large integers, which in its + # turn calls int_to_decimal_string(), causing very deep recursion. + return str(int_to_decimal(n)) + + # Fallback algorithm for the case when the C decimal module isn't + # available. This algorithm is asymptotically worse than the algorithm + # using the decimal module, but better than the quadratic time + # implementation in longobject.c. + def inner(n, w): + if w <= 1000: + return str(n) + w2 = w >> 1 + d = pow10_cache.get(w2) + if d is None: + d = pow10_cache[w2] = 5**w2 << w2 # 10**i = (5*2)**i = 5**i * 2**i + hi, lo = divmod(n, d) + return inner(hi, w - w2) + inner(lo, w2).zfill(w2) + + # The estimation of the number of decimal digits. + # There is no harm in small error. If we guess too large, there may + # be leading 0's that need to be stripped. If we guess too small, we + # may need to call str() recursively for the remaining highest digits, + # which can still potentially be a large integer. This is manifested + # only if the number has way more than 10**15 digits, that exceeds + # the 52-bit physical address limit in both Intel64 and AMD64. + w = int(w * 0.3010299956639812 + 1) # log10(2) + pow10_cache = {} + if n < 0: + n = -n + sign = '-' + else: + sign = '' + s = inner(n, w) + if s[0] == '0' and n: + # If our guess of w is too large, there may be leading 0's that + # need to be stripped. + s = s.lstrip('0') + return sign + s def _str_to_int_inner(s): diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 47fc50a..c862639 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -829,17 +829,28 @@ class PyLongModuleTests(unittest.TestCase): sys.set_int_max_str_digits(self._previous_limit) super().tearDown() - def test_pylong_int_to_decimal(self): - n = (1 << 100_000) - 1 - suffix = '9883109375' + def _test_pylong_int_to_decimal(self, n, suffix): s = str(n) - assert s[-10:] == suffix - s = str(-n) - assert s[-10:] == suffix - s = '%d' % n - assert s[-10:] == suffix - s = b'%d' % n - assert s[-10:] == suffix.encode('ascii') + self.assertEqual(s[-10:], suffix) + s2 = str(-n) + self.assertEqual(s2, '-' + s) + s3 = '%d' % n + self.assertEqual(s3, s) + s4 = b'%d' % n + self.assertEqual(s4, s.encode('ascii')) + + def test_pylong_int_to_decimal(self): + self._test_pylong_int_to_decimal((1 << 100_000), '9883109376') + self._test_pylong_int_to_decimal((1 << 100_000) - 1, '9883109375') + self._test_pylong_int_to_decimal(10**30_000, '0000000000') + self._test_pylong_int_to_decimal(10**30_000 - 1, '9999999999') + self._test_pylong_int_to_decimal(3**60_000, '9313200001') + + @support.requires_resource('cpu') + def test_pylong_int_to_decimal_2(self): + self._test_pylong_int_to_decimal(2**1_000_000, '2747109376') + self._test_pylong_int_to_decimal(10**300_000, '0000000000') + self._test_pylong_int_to_decimal(3**600_000, '3132000001') def test_pylong_int_divmod(self): n = (1 << 100_000) |