summaryrefslogtreecommitdiffstats
path: root/Objects
diff options
context:
space:
mode:
authorGregory P. Smith <gps@google.com>2022-09-02 16:35:08 (GMT)
committerGitHub <noreply@github.com>2022-09-02 16:35:08 (GMT)
commit511ca9452033ef95bc7d7fc404b8161068226002 (patch)
treecefd49e0c9c75f912fa28d05eae15335273aaa8e /Objects
parent656167db81a53934da55d90ed431449d8a4fc14b (diff)
downloadcpython-511ca9452033ef95bc7d7fc404b8161068226002.zip
cpython-511ca9452033ef95bc7d7fc404b8161068226002.tar.gz
cpython-511ca9452033ef95bc7d7fc404b8161068226002.tar.bz2
gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96499)
Integer to and from text conversions via CPython's bignum `int` type is not safe against denial of service attacks due to malicious input. Very large input strings with hundred thousands of digits can consume several CPU seconds. This PR comes fresh from a pile of work done in our private PSRT security response team repo. Signed-off-by: Christian Heimes [Red Hat] <christian@python.org> Tons-of-polishing-up-by: Gregory P. Smith [Google] <greg@krypto.org> Reviews via the private PSRT repo via many others (see the NEWS entry in the PR). <!-- gh-issue-number: gh-95778 --> * Issue: gh-95778 <!-- /gh-issue-number --> I wrote up [a one pager for the release managers](https://docs.google.com/document/d/1KjuF_aXlzPUxTK4BMgezGJ2Pn7uevfX7g0_mvgHlL7Y/edit#). Much of that text wound up in the Issue. Backports PRs already exist. See the issue for links.
Diffstat (limited to 'Objects')
-rw-r--r--Objects/longobject.c45
1 files changed, 44 insertions, 1 deletions
diff --git a/Objects/longobject.c b/Objects/longobject.c
index 90ed02b..6c6e2ea 100644
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -36,6 +36,8 @@ medium_value(PyLongObject *x)
#define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS)
#define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS)
+#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits"
+
static inline void
_Py_DECREF_INT(PyLongObject *op)
{
@@ -1815,6 +1817,17 @@ long_to_decimal_string_internal(PyObject *aa,
tenpow *= 10;
strlen++;
}
+ if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int max_str_digits = interp->int_max_str_digits;
+ Py_ssize_t strlen_nosign = strlen - negative;
+ if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) {
+ Py_DECREF(scratch);
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
+ max_str_digits, strlen_nosign);
+ return -1;
+ }
+ }
if (writer) {
if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) {
Py_DECREF(scratch);
@@ -2328,6 +2341,7 @@ PyLong_FromString(const char *str, char **pend, int base)
start = str;
if ((base & (base - 1)) == 0) {
+ /* binary bases are not limited by int_max_str_digits */
int res = long_from_binary_base(&str, base, &z);
if (res < 0) {
/* Syntax error. */
@@ -2479,6 +2493,17 @@ digit beyond the first.
goto onError;
}
+ /* Limit the size to avoid excessive computation attacks. */
+ if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) {
+ PyInterpreterState *interp = _PyInterpreterState_GET();
+ int max_str_digits = interp->int_max_str_digits;
+ if ((max_str_digits > 0) && (digits > max_str_digits)) {
+ PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT,
+ max_str_digits, digits);
+ return NULL;
+ }
+ }
+
/* Create an int object that can contain the largest possible
* integer with this base and length. Note that there's no
* need to initialize z->ob_digit -- no slot is read up before
@@ -5355,6 +5380,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase)
}
return PyLong_FromLong(0L);
}
+ /* default base and limit, forward to standard implementation */
if (obase == NULL)
return PyNumber_Long(x);
@@ -6090,6 +6116,8 @@ internal representation of integers. The attributes are read only.");
static PyStructSequence_Field int_info_fields[] = {
{"bits_per_digit", "size of a digit in bits"},
{"sizeof_digit", "size in bytes of the C type used to represent a digit"},
+ {"default_max_str_digits", "maximum string conversion digits limitation"},
+ {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"},
{NULL, NULL}
};
@@ -6097,7 +6125,7 @@ static PyStructSequence_Desc int_info_desc = {
"sys.int_info", /* name */
int_info__doc__, /* doc */
int_info_fields, /* fields */
- 2 /* number of fields */
+ 4 /* number of fields */
};
PyObject *
@@ -6112,6 +6140,17 @@ PyLong_GetInfo(void)
PyLong_FromLong(PyLong_SHIFT));
PyStructSequence_SET_ITEM(int_info, field++,
PyLong_FromLong(sizeof(digit)));
+ /*
+ * The following two fields were added after investigating uses of
+ * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was
+ * numba using sys.int_info.bits_per_digit as attribute access rather than
+ * sequence unpacking. Cython and sympy also refer to sys.int_info but only
+ * as info for debugging. No concern about adding these in a backport.
+ */
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS));
+ PyStructSequence_SET_ITEM(int_info, field++,
+ PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD));
if (PyErr_Occurred()) {
Py_CLEAR(int_info);
return NULL;
@@ -6139,6 +6178,10 @@ _PyLong_InitTypes(PyInterpreterState *interp)
return _PyStatus_ERR("can't init int info type");
}
}
+ interp->int_max_str_digits = _Py_global_config_int_max_str_digits;
+ if (interp->int_max_str_digits == -1) {
+ interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS;
+ }
return _PyStatus_OK();
}