summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/library/functions.rst7
-rw-r--r--Doc/library/json.rst11
-rw-r--r--Doc/library/stdtypes.rst166
-rw-r--r--Doc/library/sys.rst57
-rw-r--r--Doc/library/test.rst10
-rw-r--r--Doc/using/cmdline.rst13
-rw-r--r--Doc/whatsnew/3.12.rst11
-rw-r--r--Include/internal/pycore_global_strings.h1
-rw-r--r--Include/internal/pycore_initconfig.h2
-rw-r--r--Include/internal/pycore_interp.h2
-rw-r--r--Include/internal/pycore_long.h35
-rw-r--r--Include/internal/pycore_runtime_init_generated.h7
-rw-r--r--Lib/idlelib/idle_test/test_sidebar.py4
-rw-r--r--Lib/test/support/__init__.py11
-rw-r--r--Lib/test/test_ast.py8
-rw-r--r--Lib/test/test_cmd_line.py33
-rw-r--r--Lib/test/test_compile.py13
-rw-r--r--Lib/test/test_decimal.py18
-rw-r--r--Lib/test/test_int.py114
-rw-r--r--Lib/test/test_json/test_decode.py9
-rw-r--r--Lib/test/test_sys.py10
-rw-r--r--Lib/test/test_xmlrpc.py10
-rw-r--r--Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst14
-rw-r--r--Objects/longobject.c45
-rw-r--r--Parser/pegen.c23
-rw-r--r--Python/clinic/sysmodule.c.h78
-rw-r--r--Python/initconfig.c65
-rw-r--r--Python/sysmodule.c46
28 files changed, 803 insertions, 20 deletions
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
index e86e185..b9cf02e 100644
--- a/Doc/library/functions.rst
+++ b/Doc/library/functions.rst
@@ -910,6 +910,13 @@ are always available. They are listed here in alphabetical order.
.. versionchanged:: 3.11
The delegation to :meth:`__trunc__` is deprecated.
+ .. versionchanged:: 3.12
+ :class:`int` string inputs and string representations can be limited to
+ help avoid denial of service attacks. A :exc:`ValueError` is raised when
+ the limit is exceeded while converting a string *x* to an :class:`int` or
+ when converting an :class:`int` into a string would exceed the limit.
+ See the :ref:`integer string conversion length limitation
+ <int_max_str_digits>` documentation.
.. function:: isinstance(object, classinfo)
diff --git a/Doc/library/json.rst b/Doc/library/json.rst
index 467d5d9..d05d62e 100644
--- a/Doc/library/json.rst
+++ b/Doc/library/json.rst
@@ -23,6 +23,11 @@ is a lightweight data interchange format inspired by
`JavaScript <https://en.wikipedia.org/wiki/JavaScript>`_ object literal syntax
(although it is not a strict subset of JavaScript [#rfc-errata]_ ).
+.. warning::
+ Be cautious when parsing JSON data from untrusted sources. A malicious
+ JSON string may cause the decoder to consume considerable CPU and memory
+ resources. Limiting the size of data to be parsed is recommended.
+
:mod:`json` exposes an API familiar to users of the standard library
:mod:`marshal` and :mod:`pickle` modules.
@@ -253,6 +258,12 @@ Basic Usage
be used to use another datatype or parser for JSON integers
(e.g. :class:`float`).
+ .. versionchanged:: 3.12
+ The default *parse_int* of :func:`int` now limits the maximum length of
+ the integer string via the interpreter's :ref:`integer string
+ conversion length limitation <int_max_str_digits>` to help avoid denial
+ of service attacks.
+
*parse_constant*, if specified, will be called with one of the following
strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``.
This can be used to raise an exception if invalid JSON numbers
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
index f68cf46..163ac70 100644
--- a/Doc/library/stdtypes.rst
+++ b/Doc/library/stdtypes.rst
@@ -622,6 +622,13 @@ class`. float also has the following additional methods.
:exc:`OverflowError` on infinities and a :exc:`ValueError` on
NaNs.
+ .. note::
+
+ The values returned by ``as_integer_ratio()`` can be huge. Attempts
+ to render such integers into decimal strings may bump into the
+ :ref:`integer string conversion length limitation
+ <int_max_str_digits>`.
+
.. method:: float.is_integer()
Return ``True`` if the float instance is finite with integral
@@ -5460,6 +5467,165 @@ types, where they are relevant. Some of these are not reported by the
[<class 'bool'>]
+.. _int_max_str_digits:
+
+Integer string conversion length limitation
+===========================================
+
+CPython has a global limit for converting between :class:`int` and :class:`str`
+to mitigate denial of service attacks. This limit *only* applies to decimal or
+other non-power-of-two number bases. Hexadecimal, octal, and binary conversions
+are unlimited. The limit can be configured.
+
+The :class:`int` type in CPython is an abitrary length number stored in binary
+form (commonly known as a "bignum"). There exists no algorithm that can convert
+a string to a binary integer or a binary integer to a string in linear time,
+*unless* the base is a power of 2. Even the best known algorithms for base 10
+have sub-quadratic complexity. Converting a large value such as ``int('1' *
+500_000)`` can take over a second on a fast CPU.
+
+Limiting conversion size offers a practical way to avoid `CVE-2020-10735
+<https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-10735>`_.
+
+The limit is applied to the number of digit characters in the input or output
+string when a non-linear conversion algorithm would be involved. Underscores
+and the sign are not counted towards the limit.
+
+When an operation would exceed the limit, a :exc:`ValueError` is raised:
+
+.. doctest::
+
+ >>> import sys
+ >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default.
+ >>> _ = int('2' * 5432)
+ Traceback (most recent call last):
+ ...
+ ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits.
+ >>> i = int('2' * 4300)
+ >>> len(str(i))
+ 4300
+ >>> i_squared = i*i
+ >>> len(str(i_squared))
+ Traceback (most recent call last):
+ ...
+ ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits.
+ >>> len(hex(i_squared))
+ 7144
+ >>> assert int(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited.
+
+The default limit is 4300 digits as provided in
+:data:`sys.int_info.default_max_str_digits <sys.int_info>`.
+The lowest limit that can be configured is 640 digits as provided in
+:data:`sys.int_info.str_digits_check_threshold <sys.int_info>`.
+
+Verification:
+
+.. doctest::
+
+ >>> import sys
+ >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info
+ >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info
+ >>> msg = int('578966293710682886880994035146873798396722250538762761564'
+ ... '9252925514383915483333812743580549779436104706260696366600'
+ ... '571186405732').to_bytes(53, 'big')
+ ...
+
+.. versionadded:: 3.12
+
+Affected APIs
+-------------
+
+The limition only applies to potentially slow conversions between :class:`int`
+and :class:`str` or :class:`bytes`:
+
+* ``int(string)`` with default base 10.
+* ``int(string, base)`` for all bases that are not a power of 2.
+* ``str(integer)``.
+* ``repr(integer)``
+* any other string conversion to base 10, for example ``f"{integer}"``,
+ ``"{}".format(integer)``, or ``b"%d" % integer``.
+
+The limitations do not apply to functions with a linear algorithm:
+
+* ``int(string, base)`` with base 2, 4, 8, 16, or 32.
+* :func:`int.from_bytes` and :func:`int.to_bytes`.
+* :func:`hex`, :func:`oct`, :func:`bin`.
+* :ref:`formatspec` for hex, octal, and binary numbers.
+* :class:`str` to :class:`float`.
+* :class:`str` to :class:`decimal.Decimal`.
+
+Configuring the limit
+---------------------
+
+Before Python starts up you can use an environment variable or an interpreter
+command line flag to configure the limit:
+
+* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g.
+ ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to 640 or
+ ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation.
+* :option:`-X int_max_str_digits <-X>`, e.g.
+ ``python3 -X int_max_str_digits=640``
+* :data:`sys.flags.int_max_str_digits` contains the value of
+ :envvar:`PYTHONINTMAXSTRDIGITS` or :option:`-X int_max_str_digits <-X>`.
+ If both the env var and the ``-X`` option are set, the ``-X`` option takes
+ precedence. A value of *-1* indicates that both were unset, thus a value of
+ :data:`sys.int_info.default_max_str_digits` was used during initilization.
+
+From code, you can inspect the current limit and set a new one using these
+:mod:`sys` APIs:
+
+* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are
+ a getter and setter for the interpreter-wide limit. Subinterpreters have
+ their own limit.
+
+Information about the default and minimum can be found in :attr:`sys.int_info`:
+
+* :data:`sys.int_info.default_max_str_digits <sys.int_info>` is the compiled-in
+ default limit.
+* :data:`sys.int_info.str_digits_check_threshold <sys.int_info>` is the lowest
+ accepted value for the limit (other than 0 which disables it).
+
+.. versionadded:: 3.12
+
+.. caution::
+
+ Setting a low limit *can* lead to problems. While rare, code exists that
+ contains integer constants in decimal in their source that exceed the
+ minimum threshold. A consequence of setting the limit is that Python source
+ code containing decimal integer literals longer than the limit will
+ encounter an error during parsing, usually at startup time or import time or
+ even at installation time - anytime an up to date ``.pyc`` does not already
+ exist for the code. A workaround for source that contains such large
+ constants is to convert them to ``0x`` hexadecimal form as it has no limit.
+
+ Test your application thoroughly if you use a low limit. Ensure your tests
+ run with the limit set early via the environment or flag so that it applies
+ during startup and even during any installation step that may invoke Python
+ to precompile ``.py`` sources to ``.pyc`` files.
+
+Recommended configuration
+-------------------------
+
+The default :data:`sys.int_info.default_max_str_digits` is expected to be
+reasonable for most applications. If your application requires a different
+limit, set it from your main entry point using Python version agnostic code as
+these APIs were added in security patch releases in versions before 3.12.
+
+Example::
+
+ >>> import sys
+ >>> if hasattr(sys, "set_int_max_str_digits"):
+ ... upper_bound = 68000
+ ... lower_bound = 4004
+ ... current_limit = sys.get_int_max_str_digits()
+ ... if current_limit == 0 or current_limit > upper_bound:
+ ... sys.set_int_max_str_digits(upper_bound)
+ ... elif current_limit < lower_bound:
+ ... sys.set_int_max_str_digits(lower_bound)
+
+If you need to disable it entirely, set it to ``0``.
+
+
.. rubric:: Footnotes
.. [1] Additional information on these special methods may be found in the Python
diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index 43db4ba..cc41b99 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -502,9 +502,9 @@ always available.
The :term:`named tuple` *flags* exposes the status of command line
flags. The attributes are read only.
- ============================= ================================================================
+ ============================= ==============================================================================================================
attribute flag
- ============================= ================================================================
+ ============================= ==============================================================================================================
:const:`debug` :option:`-d`
:const:`inspect` :option:`-i`
:const:`interactive` :option:`-i`
@@ -521,7 +521,8 @@ always available.
:const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode <devmode>`)
:const:`utf8_mode` :option:`-X utf8 <-X>`
:const:`safe_path` :option:`-P`
- ============================= ================================================================
+ :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation <int_max_str_digits>`)
+ ============================= ==============================================================================================================
.. versionchanged:: 3.2
Added ``quiet`` attribute for the new :option:`-q` flag.
@@ -543,6 +544,9 @@ always available.
.. versionchanged:: 3.11
Added the ``safe_path`` attribute for :option:`-P` option.
+ .. versionchanged:: 3.12
+ Added the ``int_max_str_digits`` attribute.
+
.. data:: float_info
@@ -723,6 +727,13 @@ always available.
.. versionadded:: 3.6
+.. function:: get_int_max_str_digits()
+
+ Returns the current value for the :ref:`integer string conversion length
+ limitation <int_max_str_digits>`. See also :func:`set_int_max_str_digits`.
+
+ .. versionadded:: 3.12
+
.. function:: getrefcount(object)
Return the reference count of the *object*. The count returned is generally one
@@ -996,19 +1007,31 @@ always available.
.. tabularcolumns:: |l|L|
- +-------------------------+----------------------------------------------+
- | Attribute | Explanation |
- +=========================+==============================================+
- | :const:`bits_per_digit` | number of bits held in each digit. Python |
- | | integers are stored internally in base |
- | | ``2**int_info.bits_per_digit`` |
- +-------------------------+----------------------------------------------+
- | :const:`sizeof_digit` | size in bytes of the C type used to |
- | | represent a digit |
- +-------------------------+----------------------------------------------+
+ +----------------------------------------+-----------------------------------------------+
+ | Attribute | Explanation |
+ +========================================+===============================================+
+ | :const:`bits_per_digit` | number of bits held in each digit. Python |
+ | | integers are stored internally in base |
+ | | ``2**int_info.bits_per_digit`` |
+ +----------------------------------------+-----------------------------------------------+
+ | :const:`sizeof_digit` | size in bytes of the C type used to |
+ | | represent a digit |
+ +----------------------------------------+-----------------------------------------------+
+ | :const:`default_max_str_digits` | default value for |
+ | | :func:`sys.get_int_max_str_digits` when it |
+ | | is not otherwise explicitly configured. |
+ +----------------------------------------+-----------------------------------------------+
+ | :const:`str_digits_check_threshold` | minimum non-zero value for |
+ | | :func:`sys.set_int_max_str_digits`, |
+ | | :envvar:`PYTHONINTMAXSTRDIGITS`, or |
+ | | :option:`-X int_max_str_digits <-X>`. |
+ +----------------------------------------+-----------------------------------------------+
.. versionadded:: 3.1
+ .. versionchanged:: 3.12
+ Added ``default_max_str_digits`` and ``str_digits_check_threshold``.
+
.. data:: __interactivehook__
@@ -1308,6 +1331,14 @@ always available.
.. availability:: Unix.
+.. function:: set_int_max_str_digits(n)
+
+ Set the :ref:`integer string conversion length limitation
+ <int_max_str_digits>` used by this interpreter. See also
+ :func:`get_int_max_str_digits`.
+
+ .. versionadded:: 3.12
+
.. function:: setprofile(profilefunc)
.. index::
diff --git a/Doc/library/test.rst b/Doc/library/test.rst
index f3bc7e7..eff3751 100644
--- a/Doc/library/test.rst
+++ b/Doc/library/test.rst
@@ -1011,6 +1011,16 @@ The :mod:`test.support` module defines the following functions:
.. versionadded:: 3.10
+.. function:: adjust_int_max_str_digits(max_digits)
+
+ This function returns a context manager that will change the global
+ :func:`sys.set_int_max_str_digits` setting for the duration of the
+ context to allow execution of test code that needs a different limit
+ on the number of digits when converting between an integer and string.
+
+ .. versionadded:: 3.12
+
+
The :mod:`test.support` module defines the following classes:
diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index fa2b07e..6a33d98 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -505,6 +505,9 @@ Miscellaneous options
stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start
tracing with a traceback limit of *NFRAME* frames. See the
:func:`tracemalloc.start` for more information.
+ * ``-X int_max_str_digits`` configures the :ref:`integer string conversion
+ length limitation <int_max_str_digits>`. See also
+ :envvar:`PYTHONINTMAXSTRDIGITS`.
* ``-X importtime`` to show how long each import takes. It shows module
name, cumulative time (including nested imports) and self time (excluding
nested imports). Note that its output may be broken in multi-threaded
@@ -583,6 +586,9 @@ Miscellaneous options
The ``-X frozen_modules`` option.
.. versionadded:: 3.12
+ The ``-X int_max_str_digits`` option.
+
+ .. versionadded:: 3.12
The ``-X perf`` option.
@@ -763,6 +769,13 @@ conflict.
.. versionadded:: 3.2.3
+.. envvar:: PYTHONINTMAXSTRDIGITS
+
+ If this variable is set to an integer, it is used to configure the
+ interpreter's global :ref:`integer string conversion length limitation
+ <int_max_str_digits>`.
+
+ .. versionadded:: 3.12
.. envvar:: PYTHONIOENCODING
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index f9fa8ac..70a1104 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -83,6 +83,17 @@ Other Language Changes
mapping is hashable.
(Contributed by Serhiy Storchaka in :gh:`87995`.)
+* Converting between :class:`int` and :class:`str` in bases other than 2
+ (binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal)
+ now raises a :exc:`ValueError` if the number of digits in string form is
+ above a limit to avoid potential denial of service attacks due to the
+ algorithmic complexity. This is a mitigation for `CVE-2020-10735
+ <https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-10735>`_.
+ This limit can be configured or disabled by environment variable, command
+ line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion
+ length limitation <int_max_str_digits>` documentation. The default limit
+ is 4300 digits in string form.
+
New Modules
===========
diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h
index c736bfe..03f6b90 100644
--- a/Include/internal/pycore_global_strings.h
+++ b/Include/internal/pycore_global_strings.h
@@ -451,6 +451,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(mapping)
STRUCT_FOR_ID(match)
STRUCT_FOR_ID(max_length)
+ STRUCT_FOR_ID(maxdigits)
STRUCT_FOR_ID(maxevents)
STRUCT_FOR_ID(maxmem)
STRUCT_FOR_ID(maxsplit)
diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h
index 69f88d7..6e49126 100644
--- a/Include/internal/pycore_initconfig.h
+++ b/Include/internal/pycore_initconfig.h
@@ -170,6 +170,8 @@ extern void _Py_DumpPathConfig(PyThreadState *tstate);
PyAPI_FUNC(PyObject*) _Py_Get_Getpath_CodeObject(void);
+extern int _Py_global_config_int_max_str_digits; // TODO(gpshead): move this into PyConfig in 3.12 after the backports ship.
+
/* --- Function used for testing ---------------------------------- */
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h
index d713869..a5ddcf2 100644
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -176,6 +176,8 @@ struct _is {
struct types_state types;
struct callable_cache callable_cache;
+ int int_max_str_digits;
+
/* The following fields are here to avoid allocation during init.
The data is exposed through PyInterpreterState pointer fields.
These fields should not be accessed directly outside of init.
diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h
index 67dd5c3..76c9a5f 100644
--- a/Include/internal/pycore_long.h
+++ b/Include/internal/pycore_long.h
@@ -11,6 +11,41 @@ extern "C" {
#include "pycore_global_objects.h" // _PY_NSMALLNEGINTS
#include "pycore_runtime.h" // _PyRuntime
+/*
+ * Default int base conversion size limitation: Denial of Service prevention.
+ *
+ * Chosen such that this isn't wildly slow on modern hardware and so that
+ * everyone's existing deployed numpy test suite passes before
+ * https://github.com/numpy/numpy/issues/22098 is widely available.
+ *
+ * $ python -m timeit -s 's = * "1"*4300' 'int(s)'
+ * 2000 loops, best of 5: 125 usec per loop
+ * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)'
+ * 1000 loops, best of 5: 311 usec per loop
+ * (zen2 cloud VM)
+ *
+ * 4300 decimal digits fits a ~14284 bit number.
+ */
+#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300
+/*
+ * Threshold for max digits check. For performance reasons int() and
+ * int.__str__() don't checks values that are smaller than this
+ * threshold. Acts as a guaranteed minimum size limit for bignums that
+ * applications can expect from CPython.
+ *
+ * % python -m timeit -s 's = "1"*640; v = int(s)' 'str(int(s))'
+ * 20000 loops, best of 5: 12 usec per loop
+ *
+ * "640 digits should be enough for anyone." - gps
+ * fits a ~2126 bit decimal number.
+ */
+#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640
+
+#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \
+ (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD))
+# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold."
+#endif
+
/* runtime lifecycle */
diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h
index 58d9e93..65ab098 100644
--- a/Include/internal/pycore_runtime_init_generated.h
+++ b/Include/internal/pycore_runtime_init_generated.h
@@ -960,6 +960,7 @@ extern "C" {
INIT_ID(mapping), \
INIT_ID(match), \
INIT_ID(max_length), \
+ INIT_ID(maxdigits), \
INIT_ID(maxevents), \
INIT_ID(maxmem), \
INIT_ID(maxsplit), \
@@ -2224,6 +2225,8 @@ _PyUnicode_InitStaticStrings(void) {
PyUnicode_InternInPlace(&string);
string = &_Py_ID(max_length);
PyUnicode_InternInPlace(&string);
+ string = &_Py_ID(maxdigits);
+ PyUnicode_InternInPlace(&string);
string = &_Py_ID(maxevents);
PyUnicode_InternInPlace(&string);
string = &_Py_ID(maxmem);
@@ -6373,6 +6376,10 @@ _PyStaticObjects_CheckRefcnt(void) {
_PyObject_Dump((PyObject *)&_Py_ID(max_length));
Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
};
+ if (Py_REFCNT((PyObject *)&_Py_ID(maxdigits)) < _PyObject_IMMORTAL_REFCNT) {
+ _PyObject_Dump((PyObject *)&_Py_ID(maxdigits));
+ Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
+ };
if (Py_REFCNT((PyObject *)&_Py_ID(maxevents)) < _PyObject_IMMORTAL_REFCNT) {
_PyObject_Dump((PyObject *)&_Py_ID(maxevents));
Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT");
diff --git a/Lib/idlelib/idle_test/test_sidebar.py b/Lib/idlelib/idle_test/test_sidebar.py
index 290e037..049531e 100644
--- a/Lib/idlelib/idle_test/test_sidebar.py
+++ b/Lib/idlelib/idle_test/test_sidebar.py
@@ -6,6 +6,7 @@ from itertools import chain
import unittest
import unittest.mock
from test.support import requires, swap_attr
+from test import support
import tkinter as tk
from idlelib.idle_test.tkinter_testing_utils import run_in_tk_mainloop
@@ -612,7 +613,8 @@ class ShellSidebarTest(unittest.TestCase):
@run_in_tk_mainloop()
def test_very_long_wrapped_line(self):
- with swap_attr(self.shell, 'squeezer', None):
+ with support.adjust_int_max_str_digits(11_111), \
+ swap_attr(self.shell, 'squeezer', None):
self.do_input('x = ' + '1'*10_000 + '\n')
yield
self.assertEqual(self.get_sidebar_lines(), ['>>>'])
diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py
index 2409fb0..573dce5 100644
--- a/Lib/test/support/__init__.py
+++ b/Lib/test/support/__init__.py
@@ -2341,3 +2341,14 @@ def sleeping_retry(timeout, err_msg=None, /,
time.sleep(delay)
delay = min(delay * 2, max_delay)
+
+
+@contextlib.contextmanager
+def adjust_int_max_str_digits(max_digits):
+ """Temporarily change the integer string conversion length limit."""
+ current = sys.get_int_max_str_digits()
+ try:
+ sys.set_int_max_str_digits(max_digits)
+ yield
+ finally:
+ sys.set_int_max_str_digits(current)
diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py
index 4cfefe4..68617b1 100644
--- a/Lib/test/test_ast.py
+++ b/Lib/test/test_ast.py
@@ -1136,6 +1136,14 @@ Module(
self.assertRaises(ValueError, ast.literal_eval, '+True')
self.assertRaises(ValueError, ast.literal_eval, '2+3')
+ def test_literal_eval_str_int_limit(self):
+ with support.adjust_int_max_str_digits(4000):
+ ast.literal_eval('3'*4000) # no error
+ with self.assertRaises(SyntaxError) as err_ctx:
+ ast.literal_eval('3'*4001)
+ self.assertIn('Exceeds the limit ', str(err_ctx.exception))
+ self.assertIn(' Consider hexadecimal ', str(err_ctx.exception))
+
def test_literal_eval_complex(self):
# Issue #4907
self.assertEqual(ast.literal_eval('6j'), 6j)
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 6446976..db96708 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -865,6 +865,39 @@ class CmdLineTest(unittest.TestCase):
self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr)
self.assertNotEqual(proc.returncode, 0)
+ def test_int_max_str_digits(self):
+ code = "import sys; print(sys.flags.int_max_str_digits, sys.get_int_max_str_digits())"
+
+ assert_python_failure('-X', 'int_max_str_digits', '-c', code)
+ assert_python_failure('-X', 'int_max_str_digits=foo', '-c', code)
+ assert_python_failure('-X', 'int_max_str_digits=100', '-c', code)
+
+ assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo')
+ assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100')
+
+ def res2int(res):
+ out = res.out.strip().decode("utf-8")
+ return tuple(int(i) for i in out.split())
+
+ res = assert_python_ok('-c', code)
+ self.assertEqual(res2int(res), (-1, sys.get_int_max_str_digits()))
+ res = assert_python_ok('-X', 'int_max_str_digits=0', '-c', code)
+ self.assertEqual(res2int(res), (0, 0))
+ res = assert_python_ok('-X', 'int_max_str_digits=4000', '-c', code)
+ self.assertEqual(res2int(res), (4000, 4000))
+ res = assert_python_ok('-X', 'int_max_str_digits=100000', '-c', code)
+ self.assertEqual(res2int(res), (100000, 100000))
+
+ res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0')
+ self.assertEqual(res2int(res), (0, 0))
+ res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000')
+ self.assertEqual(res2int(res), (4000, 4000))
+ res = assert_python_ok(
+ '-X', 'int_max_str_digits=6000', '-c', code,
+ PYTHONINTMAXSTRDIGITS='4000'
+ )
+ self.assertEqual(res2int(res), (6000, 6000))
+
@unittest.skipIf(interpreter_requires_environment(),
'Cannot run -I tests when PYTHON env vars are required.')
diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py
index d7b78f6..3ed57c2 100644
--- a/Lib/test/test_compile.py
+++ b/Lib/test/test_compile.py
@@ -199,6 +199,19 @@ if 1:
self.assertEqual(eval("0o777"), 511)
self.assertEqual(eval("-0o0000010"), -8)
+ def test_int_literals_too_long(self):
+ n = 3000
+ source = f"a = 1\nb = 2\nc = {'3'*n}\nd = 4"
+ with support.adjust_int_max_str_digits(n):
+ compile(source, "<long_int_pass>", "exec") # no errors.
+ with support.adjust_int_max_str_digits(n-1):
+ with self.assertRaises(SyntaxError) as err_ctx:
+ compile(source, "<long_int_fail>", "exec")
+ exc = err_ctx.exception
+ self.assertEqual(exc.lineno, 3)
+ self.assertIn('Exceeds the limit ', str(exc))
+ self.assertIn(' Consider hexadecimal ', str(exc))
+
def test_unary_minus(self):
# Verify treatment of unary minus on negative numbers SF bug #660455
if sys.maxsize == 2147483647:
diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py
index 7c5964e..67ccaab 100644
--- a/Lib/test/test_decimal.py
+++ b/Lib/test/test_decimal.py
@@ -2526,6 +2526,15 @@ class CUsabilityTest(UsabilityTest):
class PyUsabilityTest(UsabilityTest):
decimal = P
+ def setUp(self):
+ super().setUp()
+ self._previous_int_limit = sys.get_int_max_str_digits()
+ sys.set_int_max_str_digits(7000)
+
+ def tearDown(self):
+ sys.set_int_max_str_digits(self._previous_int_limit)
+ super().tearDown()
+
class PythonAPItests(unittest.TestCase):
def test_abc(self):
@@ -4626,6 +4635,15 @@ class CCoverage(Coverage):
class PyCoverage(Coverage):
decimal = P
+ def setUp(self):
+ super().setUp()
+ self._previous_int_limit = sys.get_int_max_str_digits()
+ sys.set_int_max_str_digits(7000)
+
+ def tearDown(self):
+ sys.set_int_max_str_digits(self._previous_int_limit)
+ super().tearDown()
+
class PyFunctionality(unittest.TestCase):
"""Extra functionality in decimal.py"""
diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py
index a72699c..e9561b0 100644
--- a/Lib/test/test_int.py
+++ b/Lib/test/test_int.py
@@ -577,5 +577,119 @@ class IntTestCases(unittest.TestCase):
self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807)
+class IntStrDigitLimitsTests(unittest.TestCase):
+
+ int_class = int # Override this in subclasses to reuse the suite.
+
+ def setUp(self):
+ super().setUp()
+ self._previous_limit = sys.get_int_max_str_digits()
+ sys.set_int_max_str_digits(2048)
+
+ def tearDown(self):
+ sys.set_int_max_str_digits(self._previous_limit)
+ super().tearDown()
+
+ def test_disabled_limit(self):
+ self.assertGreater(sys.get_int_max_str_digits(), 0)
+ self.assertLess(sys.get_int_max_str_digits(), 20_000)
+ with support.adjust_int_max_str_digits(0):
+ self.assertEqual(sys.get_int_max_str_digits(), 0)
+ i = self.int_class('1' * 20_000)
+ str(i)
+ self.assertGreater(sys.get_int_max_str_digits(), 0)
+
+ def test_max_str_digits_edge_cases(self):
+ """Ignore the +/- sign and space padding."""
+ int_class = self.int_class
+ maxdigits = sys.get_int_max_str_digits()
+
+ int_class('1' * maxdigits)
+ int_class(' ' + '1' * maxdigits)
+ int_class('1' * maxdigits + ' ')
+ int_class('+' + '1' * maxdigits)
+ int_class('-' + '1' * maxdigits)
+ self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits)
+
+ def check(self, i, base=None):
+ with self.assertRaises(ValueError):
+ if base is None:
+ self.int_class(i)
+ else:
+ self.int_class(i, base)
+
+ def test_max_str_digits(self):
+ maxdigits = sys.get_int_max_str_digits()
+
+ self.check('1' * (maxdigits + 1))
+ self.check(' ' + '1' * (maxdigits + 1))
+ self.check('1' * (maxdigits + 1) + ' ')
+ self.check('+' + '1' * (maxdigits + 1))
+ self.check('-' + '1' * (maxdigits + 1))
+ self.check('1' * (maxdigits + 1))
+
+ i = 10 ** maxdigits
+ with self.assertRaises(ValueError):
+ str(i)
+
+ def test_power_of_two_bases_unlimited(self):
+ """The limit does not apply to power of 2 bases."""
+ maxdigits = sys.get_int_max_str_digits()
+
+ for base in (2, 4, 8, 16, 32):
+ with self.subTest(base=base):
+ self.int_class('1' * (maxdigits + 1), base)
+ assert maxdigits < 100_000
+ self.int_class('1' * 100_000, base)
+
+ def test_underscores_ignored(self):
+ maxdigits = sys.get_int_max_str_digits()
+
+ triples = maxdigits // 3
+ s = '111' * triples
+ s_ = '1_11' * triples
+ self.int_class(s) # succeeds
+ self.int_class(s_) # succeeds
+ self.check(f'{s}111')
+ self.check(f'{s_}_111')
+
+ def test_sign_not_counted(self):
+ int_class = self.int_class
+ max_digits = sys.get_int_max_str_digits()
+ s = '5' * max_digits
+ i = int_class(s)
+ pos_i = int_class(f'+{s}')
+ assert i == pos_i
+ neg_i = int_class(f'-{s}')
+ assert -pos_i == neg_i
+ str(pos_i)
+ str(neg_i)
+
+ def _other_base_helper(self, base):
+ int_class = self.int_class
+ max_digits = sys.get_int_max_str_digits()
+ s = '2' * max_digits
+ i = int_class(s, base)
+ if base > 10:
+ with self.assertRaises(ValueError):
+ str(i)
+ elif base < 10:
+ str(i)
+ with self.assertRaises(ValueError) as err:
+ int_class(f'{s}1', base)
+
+ def test_int_from_other_bases(self):
+ base = 3
+ with self.subTest(base=base):
+ self._other_base_helper(base)
+ base = 36
+ with self.subTest(base=base):
+ self._other_base_helper(base)
+
+
+class IntSubclassStrDigitLimitsTests(IntStrDigitLimitsTests):
+ int_class = IntSubclass
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py
index fdb9e62..124045b 100644
--- a/Lib/test/test_json/test_decode.py
+++ b/Lib/test/test_json/test_decode.py
@@ -2,6 +2,7 @@ import decimal
from io import StringIO
from collections import OrderedDict
from test.test_json import PyTest, CTest
+from test import support
class TestDecode:
@@ -95,5 +96,13 @@ class TestDecode:
d = self.json.JSONDecoder()
self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000)
+ def test_limit_int(self):
+ maxdigits = 5000
+ with support.adjust_int_max_str_digits(maxdigits):
+ self.loads('1' * maxdigits)
+ with self.assertRaises(ValueError):
+ self.loads('1' * (maxdigits + 1))
+
+
class TestPyDecode(TestDecode, PyTest): pass
class TestCDecode(TestDecode, CTest): pass
diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py
index 202fb30..4148273 100644
--- a/Lib/test/test_sys.py
+++ b/Lib/test/test_sys.py
@@ -550,11 +550,17 @@ class SysModuleTest(unittest.TestCase):
self.assertIsInstance(sys.executable, str)
self.assertEqual(len(sys.float_info), 11)
self.assertEqual(sys.float_info.radix, 2)
- self.assertEqual(len(sys.int_info), 2)
+ self.assertEqual(len(sys.int_info), 4)
self.assertTrue(sys.int_info.bits_per_digit % 5 == 0)
self.assertTrue(sys.int_info.sizeof_digit >= 1)
+ self.assertGreaterEqual(sys.int_info.default_max_str_digits, 500)
+ self.assertGreaterEqual(sys.int_info.str_digits_check_threshold, 100)
+ self.assertGreater(sys.int_info.default_max_str_digits,
+ sys.int_info.str_digits_check_threshold)
self.assertEqual(type(sys.int_info.bits_per_digit), int)
self.assertEqual(type(sys.int_info.sizeof_digit), int)
+ self.assertIsInstance(sys.int_info.default_max_str_digits, int)
+ self.assertIsInstance(sys.int_info.str_digits_check_threshold, int)
self.assertIsInstance(sys.hexversion, int)
self.assertEqual(len(sys.hash_info), 9)
@@ -677,7 +683,7 @@ class SysModuleTest(unittest.TestCase):
"dont_write_bytecode", "no_user_site", "no_site",
"ignore_environment", "verbose", "bytes_warning", "quiet",
"hash_randomization", "isolated", "dev_mode", "utf8_mode",
- "warn_default_encoding", "safe_path")
+ "warn_default_encoding", "safe_path", "int_max_str_digits")
for attr in attrs:
self.assertTrue(hasattr(sys.flags, attr), attr)
attr_type = bool if attr in ("dev_mode", "safe_path") else int
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py
index 6c4601d..9ff5545 100644
--- a/Lib/test/test_xmlrpc.py
+++ b/Lib/test/test_xmlrpc.py
@@ -289,6 +289,16 @@ class XMLRPCTestCase(unittest.TestCase):
check('<bigdecimal>9876543210.0123456789</bigdecimal>',
decimal.Decimal('9876543210.0123456789'))
+ def test_limit_int(self):
+ check = self.check_loads
+ maxdigits = 5000
+ with support.adjust_int_max_str_digits(maxdigits):
+ s = '1' * (maxdigits + 1)
+ with self.assertRaises(ValueError):
+ check(f'<int>{s}</int>', None)
+ with self.assertRaises(ValueError):
+ check(f'<biginteger>{s}</biginteger>', None)
+
def test_get_host_info(self):
# see bug #3613, this raised a TypeError
transp = xmlrpc.client.Transport()
diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst
new file mode 100644
index 0000000..ea3b85d
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst
@@ -0,0 +1,14 @@
+Converting between :class:`int` and :class:`str` in bases other than 2
+(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now
+raises a :exc:`ValueError` if the number of digits in string form is above a
+limit to avoid potential denial of service attacks due to the algorithmic
+complexity. This is a mitigation for `CVE-2020-10735
+<https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-10735>`_.
+
+This new limit can be configured or disabled by environment variable, command
+line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length
+limitation <int_max_str_digits>` documentation. The default limit is 4300
+digits in string form.
+
+Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback
+from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily.
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 90ed02b..6c6e2ea 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -36,6 +36,8 @@ medium_value(PyLongObject *x)
#define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS)
#define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS)
+#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
+
static inline void
_Py_DECREF_INT(PyLongObject *op)
{
@@ -1815,6 +1817,17 @@ long_to_decimal_string_internal(PyObject *aa,
tenpow *= 10;
strlen++;
}
+ if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int max_str_digits = interp->int_max_str_digits;
+ Py_ssize_t strlen_nosign = strlen - negative;
+ if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
+ Py_DECREF(scratch);
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
+ max_str_digits, strlen_nosign);
+ return -1;
+ }
+ }
if (writer) {
if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) {
Py_DECREF(scratch);
@@ -2328,6 +2341,7 @@ PyLong_FromString(const char *str, char **pend, int base)
start = str;
if ((base & (base - 1)) == 0) {
+ /* binary bases are not limited by int_max_str_digits */
int res = long_from_binary_base(&str, base, &z);
if (res < 0) {
/* Syntax error. */
@@ -2479,6 +2493,17 @@ digit beyond the first.
goto onError;
}
+ /* Limit the size to avoid excessive computation attacks. */
+ if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int max_str_digits = interp->int_max_str_digits;
+ if ((max_str_digits > 0) && (digits > max_str_digits)) {
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
+ max_str_digits, digits);
+ return NULL;
+ }
+ }
+
/* Create an int object that can contain the largest possible
* integer with this base and length. Note that there's no
* need to initialize z->ob_digit -- no slot is read up before
@@ -5355,6 +5380,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)
}
return PyLong_FromLong(0L);
}
+ /* default base and limit, forward to standard implementation */
if (obase == NULL)
return PyNumber_Long(x);
@@ -6090,6 +6116,8 @@ internal representation of integers. The attributes are read only.");
static PyStructSequence_Field int_info_fields[] = {
{"bits_per_digit", "size of a digit in bits"},
{"sizeof_digit", "size in bytes of the C type used to represent a digit"},
+ {"default_max_str_digits", "maximum string conversion digits limitation"},
+ {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"},
{NULL, NULL}
};
@@ -6097,7 +6125,7 @@ static PyStructSequence_Desc int_info_desc = {
"sys.int_info", /* name */
int_info__doc__, /* doc */
int_info_fields, /* fields */
- 2 /* number of fields */
+ 4 /* number of fields */
};
PyObject *
@@ -6112,6 +6140,17 @@ PyLong_GetInfo(void)
PyLong_FromLong(PyLong_SHIFT));
PyStructSequence_SET_ITEM(int_info, field++,
PyLong_FromLong(sizeof(digit)));
+ /*
+ * The following two fields were added after investigating uses of
+ * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was
+ * numba using sys.int_info.bits_per_digit as attribute access rather than
+ * sequence unpacking. Cython and sympy also refer to sys.int_info but only
+ * as info for debugging. No concern about adding these in a backport.
+ */
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS));
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD));
if (PyErr_Occurred()) {
Py_CLEAR(int_info);
return NULL;
@@ -6139,6 +6178,10 @@ _PyLong_InitTypes(PyInterpreterState *interp)
return _PyStatus_ERR("can't init int info type");
}
}
+ interp->int_max_str_digits = _Py_global_config_int_max_str_digits;
+ if (interp->int_max_str_digits == -1) {
+ interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS;
+ }
return _PyStatus_OK();
}
diff --git a/Parser/pegen.c b/Parser/pegen.c
index 31e3ec0..a5d123d 100644
--- a/Parser/pegen.c
+++ b/Parser/pegen.c
@@ -1,5 +1,6 @@
#include <Python.h>
#include "pycore_ast.h" // _PyAST_Validate(),
+#include "pycore_pystate.h" // _PyThreadState_GET()
#include <errcode.h>
#include "tokenizer.h"
@@ -645,6 +646,28 @@ _PyPegen_number_token(Parser *p)
if (c == NULL) {
p->error_indicator = 1;
+ PyThreadState *tstate = _PyThreadState_GET();
+ // The only way a ValueError should happen in _this_ code is via
+ // PyLong_FromString hitting a length limit.
+ if (tstate->curexc_type == PyExc_ValueError &&
+ tstate->curexc_value != NULL) {
+ PyObject *type, *value, *tb;
+ // This acts as PyErr_Clear() as we're replacing curexc.
+ PyErr_Fetch(&type, &value, &tb);
+ Py_XDECREF(tb);
+ Py_DECREF(type);
+ /* Intentionally omitting columns to avoid a wall of 1000s of '^'s
+ * on the error message. Nobody is going to overlook their huge
+ * numeric literal once given the line. */
+ RAISE_ERROR_KNOWN_LOCATION(
+ p, PyExc_SyntaxError,
+ t->lineno, -1 /* col_offset */,
+ t->end_lineno, -1 /* end_col_offset */,
+ "%S - Consider hexadecimal for huge integer literals "
+ "to avoid decimal conversion limits.",
+ value);
+ Py_DECREF(value);
+ }
return NULL;
}
diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h
index ddf01a7..6864b8b 100644
--- a/Python/clinic/sysmodule.c.h
+++ b/Python/clinic/sysmodule.c.h
@@ -745,6 +745,82 @@ exit:
#endif /* defined(USE_MALLOPT) */
+PyDoc_STRVAR(sys_get_int_max_str_digits__doc__,
+"get_int_max_str_digits($module, /)\n"
+"--\n"
+"\n"
+"Set the maximum string digits limit for non-binary int<->str conversions.");
+
+#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \
+ {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__},
+
+static PyObject *
+sys_get_int_max_str_digits_impl(PyObject *module);
+
+static PyObject *
+sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+ return sys_get_int_max_str_digits_impl(module);
+}
+
+PyDoc_STRVAR(sys_set_int_max_str_digits__doc__,
+"set_int_max_str_digits($module, /, maxdigits)\n"
+"--\n"
+"\n"
+"Set the maximum string digits limit for non-binary int<->str conversions.");
+
+#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \
+ {"set_int_max_str_digits", _PyCFunction_CAST(sys_set_int_max_str_digits), METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__},
+
+static PyObject *
+sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits);
+
+static PyObject *
+sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
+
+ #define NUM_KEYWORDS 1
+ static struct {
+ PyGC_Head _this_is_not_used;
+ PyObject_VAR_HEAD
+ PyObject *ob_item[NUM_KEYWORDS];
+ } _kwtuple = {
+ .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
+ .ob_item = { &_Py_ID(maxdigits), },
+ };
+ #undef NUM_KEYWORDS
+ #define KWTUPLE (&_kwtuple.ob_base.ob_base)
+
+ #else // !Py_BUILD_CORE
+ # define KWTUPLE NULL
+ #endif // !Py_BUILD_CORE
+
+ static const char * const _keywords[] = {"maxdigits", NULL};
+ static _PyArg_Parser _parser = {
+ .keywords = _keywords,
+ .fname = "set_int_max_str_digits",
+ .kwtuple = KWTUPLE,
+ };
+ #undef KWTUPLE
+ PyObject *argsbuf[1];
+ int maxdigits;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ maxdigits = _PyLong_AsInt(args[0]);
+ if (maxdigits == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = sys_set_int_max_str_digits_impl(module, maxdigits);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(sys_getrefcount__doc__,
"getrefcount($module, object, /)\n"
"--\n"
@@ -1267,4 +1343,4 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored))
#ifndef SYS_GETANDROIDAPILEVEL_METHODDEF
#define SYS_GETANDROIDAPILEVEL_METHODDEF
#endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */
-/*[clinic end generated code: output=43b44240211afe95 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=15318cdd96b62b06 input=a9049054013a1b77]*/
diff --git a/Python/initconfig.c b/Python/initconfig.c
index 33a8f27..f18ec40 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -3,6 +3,7 @@
#include "pycore_getopt.h" // _PyOS_GetOpt()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_interp.h" // _PyInterpreterState.runtime
+#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD
#include "pycore_pathconfig.h" // _Py_path_config
#include "pycore_pyerrors.h" // _PyErr_Fetch()
#include "pycore_pylifecycle.h" // _Py_PreInitializeFromConfig()
@@ -124,7 +125,11 @@ The following implementation-specific options are available:\n\
do nothing if is not supported on the current system. The default value is \"off\".\n\
\n\
-X frozen_modules=[on|off]: whether or not frozen modules should be used.\n\
- The default is \"on\" (or \"off\" if you are running a local build).";
+ The default is \"on\" (or \"off\" if you are running a local build).\n\
+\n\
+-X int_max_str_digits=number: limit the size of int<->str conversions.\n\
+ This helps avoid denial of service attacks when parsing untrusted data.\n\
+ The default is sys.int_info.default_max_str_digits. 0 disables.";
/* Envvars that don't have equivalent command-line options are listed first */
static const char usage_envvars[] =
@@ -144,6 +149,10 @@ static const char usage_envvars[] =
" to seed the hashes of str and bytes objects. It can also be set to an\n"
" integer in the range [0,4294967295] to get hash values with a\n"
" predictable seed.\n"
+"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n"
+" when converting from a string and when converting an int back to a str.\n"
+" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n"
+" 16, and 32 are never limited.\n"
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
" hooks.\n"
@@ -784,6 +793,10 @@ _PyConfig_InitCompatConfig(PyConfig *config)
config->code_debug_ranges = 1;
}
+/* Excluded from public struct PyConfig for backporting reasons. */
+/* default to unconfigured, _PyLong_InitTypes() does the rest */
+int _Py_global_config_int_max_str_digits = -1;
+
static void
config_init_defaults(PyConfig *config)
@@ -1762,6 +1775,48 @@ config_init_tracemalloc(PyConfig *config)
return _PyStatus_OK();
}
+static PyStatus
+config_init_int_max_str_digits(PyConfig *config)
+{
+ int maxdigits;
+ int valid = 0;
+
+ const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS");
+ if (env) {
+ if (!_Py_str_to_int(env, &maxdigits)) {
+ valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD));
+ }
+ if (!valid) {
+#define STRINGIFY(VAL) _STRINGIFY(VAL)
+#define _STRINGIFY(VAL) #VAL
+ return _PyStatus_ERR(
+ "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= "
+ STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)
+ " or 0 for unlimited.");
+ }
+ _Py_global_config_int_max_str_digits = maxdigits;
+ }
+
+ const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits");
+ if (xoption) {
+ const wchar_t *sep = wcschr(xoption, L'=');
+ if (sep) {
+ if (!config_wstr_to_int(sep + 1, &maxdigits)) {
+ valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD));
+ }
+ }
+ if (!valid) {
+ return _PyStatus_ERR(
+ "-X int_max_str_digits: invalid limit; must be >= "
+ STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)
+ " or 0 for unlimited.");
+#undef _STRINGIFY
+#undef STRINGIFY
+ }
+ _Py_global_config_int_max_str_digits = maxdigits;
+ }
+ return _PyStatus_OK();
+}
static PyStatus
config_init_pycache_prefix(PyConfig *config)
@@ -1818,6 +1873,7 @@ config_read_complex_options(PyConfig *config)
return status;
}
}
+
if (config->perf_profiling < 0) {
status = config_init_perf_profiling(config);
if (_PyStatus_EXCEPTION(status)) {
@@ -1825,6 +1881,13 @@ config_read_complex_options(PyConfig *config)
}
}
+ if (_Py_global_config_int_max_str_digits < 0) {
+ status = config_init_int_max_str_digits(config);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
+ }
+ }
+
if (config->pycache_prefix == NULL) {
status = config_init_pycache_prefix(config);
if (_PyStatus_EXCEPTION(status)) {
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index 75e6455..a33abb2 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -20,6 +20,7 @@ Data members:
#include "pycore_code.h" // _Py_QuickenedCount
#include "pycore_frame.h" // _PyInterpreterFrame
#include "pycore_initconfig.h" // _PyStatus_EXCEPTION()
+#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD
#include "pycore_namespace.h" // _PyNamespace_New()
#include "pycore_object.h" // _PyObject_IS_GC()
#include "pycore_pathconfig.h" // _PyPathConfig_ComputeSysPath0()
@@ -1671,6 +1672,45 @@ sys_mdebug_impl(PyObject *module, int flag)
}
#endif /* USE_MALLOPT */
+
+/*[clinic input]
+sys.get_int_max_str_digits
+
+Set the maximum string digits limit for non-binary int<->str conversions.
+[clinic start generated code]*/
+
+static PyObject *
+sys_get_int_max_str_digits_impl(PyObject *module)
+/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/
+{
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ return PyLong_FromSsize_t(interp->int_max_str_digits);
+}
+
+/*[clinic input]
+sys.set_int_max_str_digits
+
+ maxdigits: int
+
+Set the maximum string digits limit for non-binary int<->str conversions.
+[clinic start generated code]*/
+
+static PyObject *
+sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits)
+/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/
+{
+ PyThreadState *tstate = _PyThreadState_GET();
+ if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) {
+ tstate->interp->int_max_str_digits = maxdigits;
+ Py_RETURN_NONE;
+ } else {
+ PyErr_Format(
+ PyExc_ValueError, "maxdigits must be 0 or larger than %d",
+ _PY_LONG_MAX_STR_DIGITS_THRESHOLD);
+ return NULL;
+ }
+}
+
size_t
_PySys_GetSizeOf(PyObject *o)
{
@@ -2186,6 +2226,8 @@ static PyMethodDef sys_methods[] = {
SYS_DEACTIVATE_STACK_TRAMPOLINE_METHODDEF
SYS_IS_STACK_TRAMPOLINE_ACTIVE_METHODDEF
SYS_UNRAISABLEHOOK_METHODDEF
+ SYS_GET_INT_MAX_STR_DIGITS_METHODDEF
+ SYS_SET_INT_MAX_STR_DIGITS_METHODDEF
#ifdef Py_STATS
SYS__STATS_ON_METHODDEF
SYS__STATS_OFF_METHODDEF
@@ -2686,6 +2728,7 @@ static PyStructSequence_Field flags_fields[] = {
{"utf8_mode", "-X utf8"},
{"warn_default_encoding", "-X warn_default_encoding"},
{"safe_path", "-P"},
+ {"int_max_str_digits", "-X int_max_str_digits"},
{0}
};
@@ -2693,7 +2736,7 @@ static PyStructSequence_Desc flags_desc = {
"sys.flags", /* name */
flags__doc__, /* doc */
flags_fields, /* fields */
- 17
+ 18
};
static int
@@ -2734,6 +2777,7 @@ set_flags_from_config(PyInterpreterState *interp, PyObject *flags)
SetFlag(preconfig->utf8_mode);
SetFlag(config->warn_default_encoding);
SetFlagObj(PyBool_FromLong(config->safe_path));
+ SetFlag(_Py_global_config_int_max_str_digits);
#undef SetFlagObj
#undef SetFlag
return 0;