summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorGregory P. Smith <greg@krypto.org>2022-09-05 20:26:09 (GMT)
committerGitHub <noreply@github.com>2022-09-05 20:26:09 (GMT)
commitb5e331fdb38684808ffc540d53e8595bdc408b89 (patch)
treefff15beb4402c977a0a4dc51aaeab8976039650b /Objects
parent4f100fe9f1c691145e3fa959ef324646e303cdf3 (diff)
downloadcpython-b5e331fdb38684808ffc540d53e8595bdc408b89.zip
cpython-b5e331fdb38684808ffc540d53e8595bdc408b89.tar.gz
cpython-b5e331fdb38684808ffc540d53e8595bdc408b89.tar.bz2
[3.8] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96503)
* Correctly pre-check for int-to-str conversion Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. <!-- gh-issue-number: gh-95778 --> * Issue: gh-95778 <!-- /gh-issue-number --> Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org> Co-authored-by: Christian Heimes <christian@python.org> Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
Diffstat (limited to 'Objects')
-rw-r--r--Objects/longobject.c68
1 files changed, 67 insertions, 1 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 67dce97..a58a2e1 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -3,6 +3,9 @@
/* XXX The functional organization of this file is terrible */
#include "Python.h"
+#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits
+#include "pycore_pystate.h"
+#include "pycore_long.h"
#include "longintrepr.h"
#include <float.h>
@@ -45,6 +48,9 @@ static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS];
Py_ssize_t _Py_quick_int_allocs, _Py_quick_neg_int_allocs;
#endif
+#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
+#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion"
+
static PyObject *
get_small_int(sdigit ival)
{
@@ -1765,6 +1771,23 @@ long_to_decimal_string_internal(PyObject *aa,
size_a = Py_ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0;
+ /* quick and dirty pre-check for overflowing the decimal digit limit,
+ based on the inequality 10/3 >= log2(10)
+
+ explanation in https://github.com/python/cpython/pull/96537
+ */
+ if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD
+ / (3 * PyLong_SHIFT) + 2) {
+ PyInterpreterState *interp = _PyInterpreterState_Get();
+ int max_str_digits = interp->int_max_str_digits;
+ if ((max_str_digits > 0) &&
+ (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) {
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
+ max_str_digits);
+ return -1;
+ }
+ }
+
/* quick and dirty upper bound for the number of digits
required to express a in base _PyLong_DECIMAL_BASE:
@@ -1824,6 +1847,17 @@ long_to_decimal_string_internal(PyObject *aa,
tenpow *= 10;
strlen++;
}
+ if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_Get();
+ int max_str_digits = interp->int_max_str_digits;
+ Py_ssize_t strlen_nosign = strlen - negative;
+ if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
+ Py_DECREF(scratch);
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
+ max_str_digits);
+ return -1;
+ }
+ }
if (writer) {
if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) {
Py_DECREF(scratch);
@@ -2337,6 +2371,7 @@ PyLong_FromString(const char *str, char **pend, int base)
start = str;
if ((base & (base - 1)) == 0) {
+ /* binary bases are not limited by int_max_str_digits */
int res = long_from_binary_base(&str, base, &z);
if (res < 0) {
/* Syntax error. */
@@ -2488,6 +2523,17 @@ digit beyond the first.
goto onError;
}
+ /* Limit the size to avoid excessive computation attacks. */
+ if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_Get();
+ int max_str_digits = interp->int_max_str_digits;
+ if ((max_str_digits > 0) && (digits > max_str_digits)) {
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT,
+ max_str_digits, digits);
+ return NULL;
+ }
+ }
+
/* Create an int object that can contain the largest possible
* integer with this base and length. Note that there's no
* need to initialize z->ob_digit -- no slot is read up before
@@ -5115,6 +5161,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)
}
return PyLong_FromLong(0L);
}
+ /* default base and limit, forward to standard implementation */
if (obase == NULL)
return PyNumber_Long(x);
@@ -5766,6 +5813,8 @@ internal representation of integers. The attributes are read only.");
static PyStructSequence_Field int_info_fields[] = {
{"bits_per_digit", "size of a digit in bits"},
{"sizeof_digit", "size in bytes of the C type used to represent a digit"},
+ {"default_max_str_digits", "maximum string conversion digits limitation"},
+ {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"},
{NULL, NULL}
};
@@ -5773,7 +5822,7 @@ static PyStructSequence_Desc int_info_desc = {
"sys.int_info", /* name */
int_info__doc__, /* doc */
int_info_fields, /* fields */
- 2 /* number of fields */
+ 4 /* number of fields */
};
PyObject *
@@ -5788,6 +5837,17 @@ PyLong_GetInfo(void)
PyLong_FromLong(PyLong_SHIFT));
PyStructSequence_SET_ITEM(int_info, field++,
PyLong_FromLong(sizeof(digit)));
+ /*
+ * The following two fields were added after investigating uses of
+ * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was
+ * numba using sys.int_info.bits_per_digit as attribute access rather than
+ * sequence unpacking. Cython and sympy also refer to sys.int_info but only
+ * as info for debugging. No concern about adding these in a backport.
+ */
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS));
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD));
if (PyErr_Occurred()) {
Py_CLEAR(int_info);
return NULL;
@@ -5798,6 +5858,7 @@ PyLong_GetInfo(void)
int
_PyLong_Init(void)
{
+ PyInterpreterState *interp;
#if NSMALLNEGINTS + NSMALLPOSINTS > 0
int ival, size;
PyLongObject *v = small_ints;
@@ -5840,6 +5901,11 @@ _PyLong_Init(void)
return 0;
}
}
+ interp = _PyInterpreterState_Get();
+ interp->int_max_str_digits = _Py_global_config_int_max_str_digits;
+ if (interp->int_max_str_digits == -1) {
+ interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS;
+ }
return 1;
}