summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authorGregory P. Smith <greg@krypto.org>2022-09-05 20:26:09 (GMT)
committerGitHub <noreply@github.com>2022-09-05 20:26:09 (GMT)
commitb5e331fdb38684808ffc540d53e8595bdc408b89 (patch)
treefff15beb4402c977a0a4dc51aaeab8976039650b /Python
parent4f100fe9f1c691145e3fa959ef324646e303cdf3 (diff)
downloadcpython-b5e331fdb38684808ffc540d53e8595bdc408b89.zip
cpython-b5e331fdb38684808ffc540d53e8595bdc408b89.tar.gz
cpython-b5e331fdb38684808ffc540d53e8595bdc408b89.tar.bz2
[3.8] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96503)
* Correctly pre-check for int-to-str conversion Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. <!-- gh-issue-number: gh-95778 --> * Issue: gh-95778 <!-- /gh-issue-number --> Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org> Co-authored-by: Christian Heimes <christian@python.org> Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
Diffstat (limited to 'Python')
-rw-r--r--Python/ast.c19
-rw-r--r--Python/clinic/sysmodule.c.h60
-rw-r--r--Python/initconfig.c60
-rw-r--r--Python/sysmodule.c46
4 files changed, 182 insertions, 3 deletions
diff --git a/Python/ast.c b/Python/ast.c
index 7c1d24d..63563ce 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -2460,8 +2460,25 @@ ast_for_atom(struct compiling *c, const node *n)
return NULL;
}
pynum = parsenumber(c, STR(ch));
- if (!pynum)
+ if (!pynum) {
+ PyThreadState *tstate = PyThreadState_GET();
+ // The only way a ValueError should happen in _this_ code is via
+ // PyLong_FromString hitting a length limit.
+ if (tstate->curexc_type == PyExc_ValueError &&
+ tstate->curexc_value != NULL) {
+ PyObject *type, *value, *tb;
+ // This acts as PyErr_Clear() as we're replacing curexc.
+ PyErr_Fetch(&type, &value, &tb);
+ Py_XDECREF(tb);
+ Py_DECREF(type);
+ ast_error(c, ch,
+ "%S - Consider hexadecimal for huge integer literals "
+ "to avoid decimal conversion limits.",
+ value);
+ Py_DECREF(value);
+ }
return NULL;
+ }
if (PyArena_AddPyObject(c->c_arena, pynum) < 0) {
Py_DECREF(pynum);
diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h
index d2d1503..e41a9f3 100644
--- a/Python/clinic/sysmodule.c.h
+++ b/Python/clinic/sysmodule.c.h
@@ -723,6 +723,64 @@ exit:
#endif /* defined(USE_MALLOPT) */
+PyDoc_STRVAR(sys_get_int_max_str_digits__doc__,
+"get_int_max_str_digits($module, /)\n"
+"--\n"
+"\n"
+"Set the maximum string digits limit for non-binary int<->str conversions.");
+
+#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \
+ {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__},
+
+static PyObject *
+sys_get_int_max_str_digits_impl(PyObject *module);
+
+static PyObject *
+sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored))
+{
+ return sys_get_int_max_str_digits_impl(module);
+}
+
+PyDoc_STRVAR(sys_set_int_max_str_digits__doc__,
+"set_int_max_str_digits($module, /, maxdigits)\n"
+"--\n"
+"\n"
+"Set the maximum string digits limit for non-binary int<->str conversions.");
+
+#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \
+ {"set_int_max_str_digits", (PyCFunction)(void(*)(void))sys_set_int_max_str_digits, METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__},
+
+static PyObject *
+sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits);
+
+static PyObject *
+sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+ PyObject *return_value = NULL;
+ static const char * const _keywords[] = {"maxdigits", NULL};
+ static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_str_digits", 0};
+ PyObject *argsbuf[1];
+ int maxdigits;
+
+ args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
+ if (!args) {
+ goto exit;
+ }
+ if (PyFloat_Check(args[0])) {
+ PyErr_SetString(PyExc_TypeError,
+ "integer argument expected, got float" );
+ goto exit;
+ }
+ maxdigits = _PyLong_AsInt(args[0]);
+ if (maxdigits == -1 && PyErr_Occurred()) {
+ goto exit;
+ }
+ return_value = sys_set_int_max_str_digits_impl(module, maxdigits);
+
+exit:
+ return return_value;
+}
+
PyDoc_STRVAR(sys_getrefcount__doc__,
"getrefcount($module, object, /)\n"
"--\n"
@@ -1088,4 +1146,4 @@ sys_getandroidapilevel(PyObject *module, PyObject *Py_UNUSED(ignored))
#ifndef SYS_GETANDROIDAPILEVEL_METHODDEF
#define SYS_GETANDROIDAPILEVEL_METHODDEF
#endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */
-/*[clinic end generated code: output=273f9cec8bfcab91 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=c41f7fa36ead9409 input=a9049054013a1b77]*/
diff --git a/Python/initconfig.c b/Python/initconfig.c
index 69711d8..c2203c8 100644
--- a/Python/initconfig.c
+++ b/Python/initconfig.c
@@ -3,6 +3,7 @@
#include "pycore_fileutils.h"
#include "pycore_getopt.h"
#include "pycore_initconfig.h"
+#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD
#include "pycore_pathconfig.h"
#include "pycore_pyerrors.h"
#include "pycore_pylifecycle.h"
@@ -94,6 +95,9 @@ static const char usage_3[] = "\
otherwise activate automatically)\n\
-X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\
given directory instead of to the code tree\n\
+ -X int_max_str_digits=number: limit the size of int<->str conversions.\n\
+ This helps avoid denial of service attacks when parsing untrusted data.\n\
+ The default is sys.int_info.default_max_str_digits. 0 disables.\n\
\n\
--check-hash-based-pycs always|default|never:\n\
control how Python invalidates hash-based .pyc files\n\
@@ -119,6 +123,10 @@ static const char usage_6[] =
" to seed the hashes of str and bytes objects. It can also be set to an\n"
" integer in the range [0,4294967295] to get hash values with a\n"
" predictable seed.\n"
+"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n"
+" when converting from a string and when converting an int back to a str.\n"
+" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n"
+" 16, and 32 are never limited.\n"
"PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n"
" on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n"
" hooks.\n"
@@ -637,6 +645,10 @@ _PyConfig_InitCompatConfig(PyConfig *config)
#endif
}
+/* Excluded from public struct PyConfig for backporting reasons. */
+/* default to unconfigured, _PyLong_Init() does the rest */
+int _Py_global_config_int_max_str_digits = -1;
+
static void
config_init_defaults(PyConfig *config)
@@ -1387,6 +1399,48 @@ config_init_tracemalloc(PyConfig *config)
return _PyStatus_OK();
}
+static PyStatus
+config_init_int_max_str_digits(PyConfig *config)
+{
+ int maxdigits;
+ int valid = 0;
+
+ const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS");
+ if (env) {
+ if (!_Py_str_to_int(env, &maxdigits)) {
+ valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD));
+ }
+ if (!valid) {
+#define STRINGIFY(VAL) _STRINGIFY(VAL)
+#define _STRINGIFY(VAL) #VAL
+ return _PyStatus_ERR(
+ "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= "
+ STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)
+ " or 0 for unlimited.");
+ }
+ _Py_global_config_int_max_str_digits = maxdigits;
+ }
+
+ const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits");
+ if (xoption) {
+ const wchar_t *sep = wcschr(xoption, L'=');
+ if (sep) {
+ if (!config_wstr_to_int(sep + 1, &maxdigits)) {
+ valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD));
+ }
+ }
+ if (!valid) {
+ return _PyStatus_ERR(
+ "-X int_max_str_digits: invalid limit; must be >= "
+ STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)
+ " or 0 for unlimited.");
+#undef _STRINGIFY
+#undef STRINGIFY
+ }
+ _Py_global_config_int_max_str_digits = maxdigits;
+ }
+ return _PyStatus_OK();
+}
static PyStatus
config_init_pycache_prefix(PyConfig *config)
@@ -1438,6 +1492,12 @@ config_read_complex_options(PyConfig *config)
return status;
}
}
+ if (_Py_global_config_int_max_str_digits < 0) {
+ status = config_init_int_max_str_digits(config);
+ if (_PyStatus_EXCEPTION(status)) {
+ return status;
+ }
+ }
if (config->pycache_prefix == NULL) {
status = config_init_pycache_prefix(config);
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index b544f2b..ffda714 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -23,6 +23,7 @@ Data members:
#include "pycore_pathconfig.h"
#include "pycore_pystate.h"
#include "pycore_tupleobject.h"
+#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD
#include "pythread.h"
#include "pydtrace.h"
@@ -1608,6 +1609,45 @@ sys_mdebug_impl(PyObject *module, int flag)
}
#endif /* USE_MALLOPT */
+
+/*[clinic input]
+sys.get_int_max_str_digits
+
+Set the maximum string digits limit for non-binary int<->str conversions.
+[clinic start generated code]*/
+
+static PyObject *
+sys_get_int_max_str_digits_impl(PyObject *module)
+/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/
+{
+ PyInterpreterState *interp = _PyInterpreterState_Get();
+ return PyLong_FromSsize_t(interp->int_max_str_digits);
+}
+
+/*[clinic input]
+sys.set_int_max_str_digits
+
+ maxdigits: int
+
+Set the maximum string digits limit for non-binary int<->str conversions.
+[clinic start generated code]*/
+
+static PyObject *
+sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits)
+/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/
+{
+ PyInterpreterState *interp = _PyInterpreterState_Get();
+ if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) {
+ interp->int_max_str_digits = maxdigits;
+ Py_RETURN_NONE;
+ } else {
+ PyErr_Format(
+ PyExc_ValueError, "maxdigits must be 0 or larger than %d",
+ _PY_LONG_MAX_STR_DIGITS_THRESHOLD);
+ return NULL;
+ }
+}
+
size_t
_PySys_GetSizeOf(PyObject *o)
{
@@ -1999,6 +2039,8 @@ static PyMethodDef sys_methods[] = {
SYS_GET_ASYNCGEN_HOOKS_METHODDEF
SYS_GETANDROIDAPILEVEL_METHODDEF
SYS_UNRAISABLEHOOK_METHODDEF
+ SYS_GET_INT_MAX_STR_DIGITS_METHODDEF
+ SYS_SET_INT_MAX_STR_DIGITS_METHODDEF
{NULL, NULL} /* sentinel */
};
@@ -2454,6 +2496,7 @@ static PyStructSequence_Field flags_fields[] = {
{"isolated", "-I"},
{"dev_mode", "-X dev"},
{"utf8_mode", "-X utf8"},
+ {"int_max_str_digits", "-X int_max_str_digits"},
{0}
};
@@ -2461,7 +2504,7 @@ static PyStructSequence_Desc flags_desc = {
"sys.flags", /* name */
flags__doc__, /* doc */
flags_fields, /* fields */
- 15
+ 16
};
static PyObject*
@@ -2496,6 +2539,7 @@ make_flags(_PyRuntimeState *runtime, PyInterpreterState *interp)
SetFlag(config->isolated);
PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode));
SetFlag(preconfig->utf8_mode);
+ SetFlag(_Py_global_config_int_max_str_digits);
#undef SetFlag
if (PyErr_Occurred()) {