summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorGregory P. Smith <greg@krypto.org>2022-09-05 09:21:03 (GMT)
committerGitHub <noreply@github.com>2022-09-05 09:21:03 (GMT)
commitcec1e9dfd769bd3a16142d0fdd1a36f19c77ed15 (patch)
treee8c95e4984b8f9d67f78ecc425e839302aa4c87e /Objects
parentd348afa15d5a997e7a8e51c0f789f41cb15cc651 (diff)
downloadcpython-cec1e9dfd769bd3a16142d0fdd1a36f19c77ed15.zip
cpython-cec1e9dfd769bd3a16142d0fdd1a36f19c77ed15.tar.gz
cpython-cec1e9dfd769bd3a16142d0fdd1a36f19c77ed15.tar.bz2
[3.9] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96502)
* Correctly pre-check for int-to-str conversion (#96537) Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. <!-- gh-issue-number: gh-95778 --> * Issue: gh-95778 <!-- /gh-issue-number --> Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org> Co-authored-by: Christian Heimes <christian@python.org> Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
Diffstat (limited to 'Objects')
-rw-r--r--Objects/longobject.c65
1 files changed, 64 insertions, 1 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c
index cf13b2c..ec18ec3 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -3,7 +3,9 @@
/* XXX The functional organization of this file is terrible */
#include "Python.h"
+#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits
#include "pycore_interp.h" // _PY_NSMALLPOSINTS
+#include "pycore_long.h"
#include "pycore_pystate.h" // _Py_IsMainInterpreter()
#include "longintrepr.h"
@@ -36,6 +38,9 @@ PyObject *_PyLong_One = NULL;
#define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS)
#define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS)
+#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
+#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion"
+
static PyObject *
get_small_int(sdigit ival)
{
@@ -1718,6 +1723,23 @@ long_to_decimal_string_internal(PyObject *aa,
size_a = Py_ABS(Py_SIZE(a));
negative = Py_SIZE(a) < 0;
+ /* quick and dirty pre-check for overflowing the decimal digit limit,
+ based on the inequality 10/3 >= log2(10)
+
+ explanation in https://github.com/python/cpython/pull/96537
+ */
+ if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD
+ / (3 * PyLong_SHIFT) + 2) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int max_str_digits = interp->int_max_str_digits;
+ if ((max_str_digits > 0) &&
+ (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) {
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
+ max_str_digits);
+ return -1;
+ }
+ }
+
/* quick and dirty upper bound for the number of digits
required to express a in base _PyLong_DECIMAL_BASE:
@@ -1777,6 +1799,17 @@ long_to_decimal_string_internal(PyObject *aa,
tenpow *= 10;
strlen++;
}
+ if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int max_str_digits = interp->int_max_str_digits;
+ Py_ssize_t strlen_nosign = strlen - negative;
+ if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
+ Py_DECREF(scratch);
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,
+ max_str_digits);
+ return -1;
+ }
+ }
if (writer) {
if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) {
Py_DECREF(scratch);
@@ -2290,6 +2323,7 @@ PyLong_FromString(const char *str, char **pend, int base)
start = str;
if ((base & (base - 1)) == 0) {
+ /* binary bases are not limited by int_max_str_digits */
int res = long_from_binary_base(&str, base, &z);
if (res < 0) {
/* Syntax error. */
@@ -2441,6 +2475,17 @@ digit beyond the first.
goto onError;
}
+ /* Limit the size to avoid excessive computation attacks. */
+ if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int max_str_digits = interp->int_max_str_digits;
+ if ((max_str_digits > 0) && (digits > max_str_digits)) {
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT,
+ max_str_digits, digits);
+ return NULL;
+ }
+ }
+
/* Create an int object that can contain the largest possible
* integer with this base and length. Note that there's no
* need to initialize z->ob_digit -- no slot is read up before
@@ -5071,6 +5116,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)
}
return PyLong_FromLong(0L);
}
+ /* default base and limit, forward to standard implementation */
if (obase == NULL)
return PyNumber_Long(x);
@@ -5723,6 +5769,8 @@ internal representation of integers. The attributes are read only.");
static PyStructSequence_Field int_info_fields[] = {
{"bits_per_digit", "size of a digit in bits"},
{"sizeof_digit", "size in bytes of the C type used to represent a digit"},
+ {"default_max_str_digits", "maximum string conversion digits limitation"},
+ {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"},
{NULL, NULL}
};
@@ -5730,7 +5778,7 @@ static PyStructSequence_Desc int_info_desc = {
"sys.int_info", /* name */
int_info__doc__, /* doc */
int_info_fields, /* fields */
- 2 /* number of fields */
+ 4 /* number of fields */
};
PyObject *
@@ -5745,6 +5793,17 @@ PyLong_GetInfo(void)
PyLong_FromLong(PyLong_SHIFT));
PyStructSequence_SET_ITEM(int_info, field++,
PyLong_FromLong(sizeof(digit)));
+ /*
+ * The following two fields were added after investigating uses of
+ * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was
+ * numba using sys.int_info.bits_per_digit as attribute access rather than
+ * sequence unpacking. Cython and sympy also refer to sys.int_info but only
+ * as info for debugging. No concern about adding these in a backport.
+ */
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS));
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD));
if (PyErr_Occurred()) {
Py_CLEAR(int_info);
return NULL;
@@ -5790,6 +5849,10 @@ _PyLong_Init(PyThreadState *tstate)
}
}
}
+ tstate->interp->int_max_str_digits = _Py_global_config_int_max_str_digits;
+ if (tstate->interp->int_max_str_digits == -1) {
+ tstate->interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS;
+ }
return 1;
}