diff options
author | Gregory P. Smith <gps@google.com> | 2022-09-02 16:51:49 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-02 16:51:49 (GMT) |
commit | 8f0fa4bd10aba723aff988720cd26b93be99bc12 (patch) | |
tree | 533e993997f3f0135df42dfca9796996361ca504 /Objects/longobject.c | |
parent | bbcb03e7b07ecf6f3ed0c308f72bc10f928c85a8 (diff) | |
download | cpython-8f0fa4bd10aba723aff988720cd26b93be99bc12.zip cpython-8f0fa4bd10aba723aff988720cd26b93be99bc12.tar.gz cpython-8f0fa4bd10aba723aff988720cd26b93be99bc12.tar.bz2 |
[3.10] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96501)
Integer to and from text conversions via CPython's bignum `int` type is not safe against denial of service attacks due to malicious input. Very large input strings with hundred thousands of digits can consume several CPU seconds.
This PR comes fresh from a pile of work done in our private PSRT security response team repo.
This backports https://github.com/python/cpython/pull/96499 aka 511ca9452033ef95bc7d7fc404b8161068226002
Signed-off-by: Christian Heimes [Red Hat] <christian@python.org>
Tons-of-polishing-up-by: Gregory P. Smith [Google] <greg@krypto.org>
Reviews via the private PSRT repo via many others (see the NEWS entry in the PR).
<!-- gh-issue-number: gh-95778 -->
* Issue: gh-95778
<!-- /gh-issue-number -->
I wrote up [a one pager for the release managers](https://docs.google.com/document/d/1KjuF_aXlzPUxTK4BMgezGJ2Pn7uevfX7g0_mvgHlL7Y/edit#).
Diffstat (limited to 'Objects/longobject.c')
-rw-r--r-- | Objects/longobject.c | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c index 685bd56..780ea81 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -4,6 +4,7 @@ #include "Python.h" #include "pycore_bitutils.h" // _Py_popcount32() +#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits #include "pycore_interp.h" // _PY_NSMALLPOSINTS #include "pycore_long.h" // __PyLong_GetSmallInt_internal() #include "pycore_object.h" // _PyObject_InitVar() @@ -35,6 +36,8 @@ _Py_IDENTIFIER(big); #define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS) +#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" + static PyObject * get_small_int(sdigit ival) { @@ -1660,6 +1663,17 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } + if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + Py_ssize_t strlen_nosign = strlen - negative; + if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { + Py_DECREF(scratch); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, strlen_nosign); + return -1; + } + } if (writer) { if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) { Py_DECREF(scratch); @@ -2173,6 +2187,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { + /* binary bases are not limited by int_max_str_digits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2324,6 +2339,17 @@ digit beyond the first. goto onError; } + /* Limit the size to avoid excessive computation attacks. */ + if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && (digits > max_str_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, digits); + return NULL; + } + } + /* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before @@ -4944,6 +4970,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) } return PyLong_FromLong(0L); } + /* default base and limit, forward to standard implementation */ if (obase == NULL) return PyNumber_Long(x); @@ -5674,6 +5701,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {"default_max_str_digits", "maximum string conversion digits limitation"}, + {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"}, {NULL, NULL} }; @@ -5681,7 +5710,7 @@ static PyStructSequence_Desc int_info_desc = { "sys.int_info", /* name */ int_info__doc__, /* doc */ int_info_fields, /* fields */ - 2 /* number of fields */ + 4 /* number of fields */ }; PyObject * @@ -5696,6 +5725,17 @@ PyLong_GetInfo(void) PyLong_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + /* + * The following two fields were added after investigating uses of + * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was + * numba using sys.int_info.bits_per_digit as attribute access rather than + * sequence unpacking. Cython and sympy also refer to sys.int_info but only + * as info for debugging. No concern about adding these in a backport. + */ + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -5720,6 +5760,10 @@ _PyLong_Init(PyInterpreterState *interp) interp->small_ints[i] = v; } + interp->int_max_str_digits = _Py_global_config_int_max_str_digits; + if (interp->int_max_str_digits == -1) { + interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; + } return 0; } |