diff options
author | Gregory P. Smith <greg@krypto.org> | 2022-09-05 09:21:03 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-05 09:21:03 (GMT) |
commit | cec1e9dfd769bd3a16142d0fdd1a36f19c77ed15 (patch) | |
tree | e8c95e4984b8f9d67f78ecc425e839302aa4c87e /Objects | |
parent | d348afa15d5a997e7a8e51c0f789f41cb15cc651 (diff) | |
download | cpython-cec1e9dfd769bd3a16142d0fdd1a36f19c77ed15.zip cpython-cec1e9dfd769bd3a16142d0fdd1a36f19c77ed15.tar.gz cpython-cec1e9dfd769bd3a16142d0fdd1a36f19c77ed15.tar.bz2 |
[3.9] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96502)
* Correctly pre-check for int-to-str conversion (#96537)
Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =)
The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact.
The justification for the current check. The C code check is:
```c
max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10
```
In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is:
$$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$
From this it follows that
$$\frac{M}{3L} < \frac{s-1}{10}$$
hence that
$$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$
So
$$2^{L(s-1)} > 10^M.$$
But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check.
<!-- gh-issue-number: gh-95778 -->
* Issue: gh-95778
<!-- /gh-issue-number -->
Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org>
Co-authored-by: Christian Heimes <christian@python.org>
Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/longobject.c | 65 |
1 files changed, 64 insertions, 1 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c index cf13b2c..ec18ec3 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3,7 +3,9 @@ /* XXX The functional organization of this file is terrible */ #include "Python.h" +#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits #include "pycore_interp.h" // _PY_NSMALLPOSINTS +#include "pycore_long.h" #include "pycore_pystate.h" // _Py_IsMainInterpreter() #include "longintrepr.h" @@ -36,6 +38,9 @@ PyObject *_PyLong_One = NULL; #define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS) +#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion" + static PyObject * get_small_int(sdigit ival) { @@ -1718,6 +1723,23 @@ long_to_decimal_string_internal(PyObject *aa, size_a = Py_ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; + /* quick and dirty pre-check for overflowing the decimal digit limit, + based on the inequality 10/3 >= log2(10) + + explanation in https://github.com/python/cpython/pull/96537 + */ + if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD + / (3 * PyLong_SHIFT) + 2) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && + (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } + /* quick and dirty upper bound for the number of digits required to express a in base _PyLong_DECIMAL_BASE: @@ -1777,6 +1799,17 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } + if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + Py_ssize_t strlen_nosign = strlen - negative; + if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { + Py_DECREF(scratch); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } if (writer) { if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) { Py_DECREF(scratch); @@ -2290,6 +2323,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { + /* binary bases are not limited by int_max_str_digits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2441,6 +2475,17 @@ digit beyond the first. goto onError; } + /* Limit the size to avoid excessive computation attacks. */ + if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && (digits > max_str_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT, + max_str_digits, digits); + return NULL; + } + } + /* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before @@ -5071,6 +5116,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) } return PyLong_FromLong(0L); } + /* default base and limit, forward to standard implementation */ if (obase == NULL) return PyNumber_Long(x); @@ -5723,6 +5769,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {"default_max_str_digits", "maximum string conversion digits limitation"}, + {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"}, {NULL, NULL} }; @@ -5730,7 +5778,7 @@ static PyStructSequence_Desc int_info_desc = { "sys.int_info", /* name */ int_info__doc__, /* doc */ int_info_fields, /* fields */ - 2 /* number of fields */ + 4 /* number of fields */ }; PyObject * @@ -5745,6 +5793,17 @@ PyLong_GetInfo(void) PyLong_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + /* + * The following two fields were added after investigating uses of + * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was + * numba using sys.int_info.bits_per_digit as attribute access rather than + * sequence unpacking. Cython and sympy also refer to sys.int_info but only + * as info for debugging. No concern about adding these in a backport. + */ + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -5790,6 +5849,10 @@ _PyLong_Init(PyThreadState *tstate) } } } + tstate->interp->int_max_str_digits = _Py_global_config_int_max_str_digits; + if (tstate->interp->int_max_str_digits == -1) { + tstate->interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; + } return 1; } |