diff options
| author | Gregory P. Smith <greg@krypto.org> | 2022-09-05 20:26:09 (GMT) |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-09-05 20:26:09 (GMT) |
| commit | b5e331fdb38684808ffc540d53e8595bdc408b89 (patch) | |
| tree | fff15beb4402c977a0a4dc51aaeab8976039650b /Python | |
| parent | 4f100fe9f1c691145e3fa959ef324646e303cdf3 (diff) | |
| download | cpython-b5e331fdb38684808ffc540d53e8595bdc408b89.zip cpython-b5e331fdb38684808ffc540d53e8595bdc408b89.tar.gz cpython-b5e331fdb38684808ffc540d53e8595bdc408b89.tar.bz2 | |
[3.8] gh-95778: CVE-2020-10735: Prevent DoS by very large int() (#96503)
* Correctly pre-check for int-to-str conversion
Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =)
The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact.
The justification for the current check. The C code check is:
```c
max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10
```
In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is:
$$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$
From this it follows that
$$\frac{M}{3L} < \frac{s-1}{10}$$
hence that
$$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$
So
$$2^{L(s-1)} > 10^M.$$
But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check.
<!-- gh-issue-number: gh-95778 -->
* Issue: gh-95778
<!-- /gh-issue-number -->
Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org>
Co-authored-by: Christian Heimes <christian@python.org>
Co-authored-by: Mark Dickinson <dickinsm@gmail.com>
Diffstat (limited to 'Python')
| -rw-r--r-- | Python/ast.c | 19 | ||||
| -rw-r--r-- | Python/clinic/sysmodule.c.h | 60 | ||||
| -rw-r--r-- | Python/initconfig.c | 60 | ||||
| -rw-r--r-- | Python/sysmodule.c | 46 |
4 files changed, 182 insertions, 3 deletions
diff --git a/Python/ast.c b/Python/ast.c index 7c1d24d..63563ce 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -2460,8 +2460,25 @@ ast_for_atom(struct compiling *c, const node *n) return NULL; } pynum = parsenumber(c, STR(ch)); - if (!pynum) + if (!pynum) { + PyThreadState *tstate = PyThreadState_GET(); + // The only way a ValueError should happen in _this_ code is via + // PyLong_FromString hitting a length limit. + if (tstate->curexc_type == PyExc_ValueError && + tstate->curexc_value != NULL) { + PyObject *type, *value, *tb; + // This acts as PyErr_Clear() as we're replacing curexc. + PyErr_Fetch(&type, &value, &tb); + Py_XDECREF(tb); + Py_DECREF(type); + ast_error(c, ch, + "%S - Consider hexadecimal for huge integer literals " + "to avoid decimal conversion limits.", + value); + Py_DECREF(value); + } return NULL; + } if (PyArena_AddPyObject(c->c_arena, pynum) < 0) { Py_DECREF(pynum); diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index d2d1503..e41a9f3 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -723,6 +723,64 @@ exit: #endif /* defined(USE_MALLOPT) */ +PyDoc_STRVAR(sys_get_int_max_str_digits__doc__, +"get_int_max_str_digits($module, /)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \ + {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__}, + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module); + +static PyObject * +sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys_get_int_max_str_digits_impl(module); +} + +PyDoc_STRVAR(sys_set_int_max_str_digits__doc__, +"set_int_max_str_digits($module, /, maxdigits)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \ + {"set_int_max_str_digits", (PyCFunction)(void(*)(void))sys_set_int_max_str_digits, METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__}, + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits); + +static PyObject * +sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"maxdigits", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_str_digits", 0}; + PyObject *argsbuf[1]; + int maxdigits; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyFloat_Check(args[0])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + maxdigits = _PyLong_AsInt(args[0]); + if (maxdigits == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = sys_set_int_max_str_digits_impl(module, maxdigits); + +exit: + return return_value; +} + PyDoc_STRVAR(sys_getrefcount__doc__, "getrefcount($module, object, /)\n" "--\n" @@ -1088,4 +1146,4 @@ sys_getandroidapilevel(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=273f9cec8bfcab91 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c41f7fa36ead9409 input=a9049054013a1b77]*/ diff --git a/Python/initconfig.c b/Python/initconfig.c index 69711d8..c2203c8 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -3,6 +3,7 @@ #include "pycore_fileutils.h" #include "pycore_getopt.h" #include "pycore_initconfig.h" +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pycore_pathconfig.h" #include "pycore_pyerrors.h" #include "pycore_pylifecycle.h" @@ -94,6 +95,9 @@ static const char usage_3[] = "\ otherwise activate automatically)\n\ -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\ given directory instead of to the code tree\n\ + -X int_max_str_digits=number: limit the size of int<->str conversions.\n\ + This helps avoid denial of service attacks when parsing untrusted data.\n\ + The default is sys.int_info.default_max_str_digits. 0 disables.\n\ \n\ --check-hash-based-pycs always|default|never:\n\ control how Python invalidates hash-based .pyc files\n\ @@ -119,6 +123,10 @@ static const char usage_6[] = " to seed the hashes of str and bytes objects. It can also be set to an\n" " integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" +"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n" +" when converting from a string and when converting an int back to a str.\n" +" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n" +" 16, and 32 are never limited.\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " hooks.\n" @@ -637,6 +645,10 @@ _PyConfig_InitCompatConfig(PyConfig *config) #endif } +/* Excluded from public struct PyConfig for backporting reasons. */ +/* default to unconfigured, _PyLong_Init() does the rest */ +int _Py_global_config_int_max_str_digits = -1; + static void config_init_defaults(PyConfig *config) @@ -1387,6 +1399,48 @@ config_init_tracemalloc(PyConfig *config) return _PyStatus_OK(); } +static PyStatus +config_init_int_max_str_digits(PyConfig *config) +{ + int maxdigits; + int valid = 0; + + const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS"); + if (env) { + if (!_Py_str_to_int(env, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + if (!valid) { +#define STRINGIFY(VAL) _STRINGIFY(VAL) +#define _STRINGIFY(VAL) #VAL + return _PyStatus_ERR( + "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); + } + _Py_global_config_int_max_str_digits = maxdigits; + } + + const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits"); + if (xoption) { + const wchar_t *sep = wcschr(xoption, L'='); + if (sep) { + if (!config_wstr_to_int(sep + 1, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + } + if (!valid) { + return _PyStatus_ERR( + "-X int_max_str_digits: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); +#undef _STRINGIFY +#undef STRINGIFY + } + _Py_global_config_int_max_str_digits = maxdigits; + } + return _PyStatus_OK(); +} static PyStatus config_init_pycache_prefix(PyConfig *config) @@ -1438,6 +1492,12 @@ config_read_complex_options(PyConfig *config) return status; } } + if (_Py_global_config_int_max_str_digits < 0) { + status = config_init_int_max_str_digits(config); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } if (config->pycache_prefix == NULL) { status = config_init_pycache_prefix(config); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b544f2b..ffda714 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -23,6 +23,7 @@ Data members: #include "pycore_pathconfig.h" #include "pycore_pystate.h" #include "pycore_tupleobject.h" +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pythread.h" #include "pydtrace.h" @@ -1608,6 +1609,45 @@ sys_mdebug_impl(PyObject *module, int flag) } #endif /* USE_MALLOPT */ + +/*[clinic input] +sys.get_int_max_str_digits + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module) +/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/ +{ + PyInterpreterState *interp = _PyInterpreterState_Get(); + return PyLong_FromSsize_t(interp->int_max_str_digits); +} + +/*[clinic input] +sys.set_int_max_str_digits + + maxdigits: int + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits) +/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/ +{ + PyInterpreterState *interp = _PyInterpreterState_Get(); + if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) { + interp->int_max_str_digits = maxdigits; + Py_RETURN_NONE; + } else { + PyErr_Format( + PyExc_ValueError, "maxdigits must be 0 or larger than %d", + _PY_LONG_MAX_STR_DIGITS_THRESHOLD); + return NULL; + } +} + size_t _PySys_GetSizeOf(PyObject *o) { @@ -1999,6 +2039,8 @@ static PyMethodDef sys_methods[] = { SYS_GET_ASYNCGEN_HOOKS_METHODDEF SYS_GETANDROIDAPILEVEL_METHODDEF SYS_UNRAISABLEHOOK_METHODDEF + SYS_GET_INT_MAX_STR_DIGITS_METHODDEF + SYS_SET_INT_MAX_STR_DIGITS_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2454,6 +2496,7 @@ static PyStructSequence_Field flags_fields[] = { {"isolated", "-I"}, {"dev_mode", "-X dev"}, {"utf8_mode", "-X utf8"}, + {"int_max_str_digits", "-X int_max_str_digits"}, {0} }; @@ -2461,7 +2504,7 @@ static PyStructSequence_Desc flags_desc = { "sys.flags", /* name */ flags__doc__, /* doc */ flags_fields, /* fields */ - 15 + 16 }; static PyObject* @@ -2496,6 +2539,7 @@ make_flags(_PyRuntimeState *runtime, PyInterpreterState *interp) SetFlag(config->isolated); PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode)); SetFlag(preconfig->utf8_mode); + SetFlag(_Py_global_config_int_max_str_digits); #undef SetFlag if (PyErr_Occurred()) { |
