From 6cc800d3634fdd002b986c3ffe6a3d5540f311a0 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 26 May 2021 13:15:40 -0600 Subject: bpo-43693: Clean up the PyCodeObject fields. (GH-26364) * Move up the comment about fields using in hashing/comparision. * Group the fields more clearly. * Add co_ncellvars and co_nfreevars. * Raise ValueError if nlocals != len(varnames), rather than aborting. --- Include/cpython/code.h | 57 ++++++++++++++++++++++++++++++++++++++------------ Lib/test/test_code.py | 46 ++++++++++++++++++++++++++++++++++++++-- Objects/codeobject.c | 51 +++++++++++++++++++++++++------------------- Objects/frameobject.c | 24 ++++++++------------- Objects/funcobject.c | 14 ++++++------- Objects/typeobject.c | 18 ++++------------ Python/ceval.c | 17 +++++++-------- 7 files changed, 146 insertions(+), 81 deletions(-) diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 575a4b7..990f367 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -17,31 +17,62 @@ typedef struct _PyOpcache _PyOpcache; /* Bytecode object */ struct PyCodeObject { PyObject_HEAD + + /* Note only the following fields are used in hash and/or comparisons + * + * - co_name + * - co_argcount + * - co_posonlyargcount + * - co_kwonlyargcount + * - co_nlocals + * - co_stacksize + * - co_flags + * - co_firstlineno + * - co_code + * - co_consts + * - co_names + * - co_varnames + * - co_freevars + * - co_cellvars + * + * This is done to preserve the name and line number for tracebacks + * and debuggers; otherwise, constant de-duplication would collapse + * identical functions/lambdas defined on different lines. + */ + + /* These fields are set with provided values on new code objects. */ + + // The hottest fields (in the eval loop) are grouped here at the top. + PyObject *co_code; /* instruction opcodes */ + PyObject *co_consts; /* list (constants used) */ + PyObject *co_names; /* list of strings (names used) */ + int co_flags; /* CO_..., see below */ + // The rest are not so impactful on performance. int co_argcount; /* #arguments, except *args */ int co_posonlyargcount; /* #positional only arguments */ int co_kwonlyargcount; /* #keyword only arguments */ - int co_nlocals; /* #local variables */ int co_stacksize; /* #entries needed for evaluation stack */ - int co_flags; /* CO_..., see below */ int co_firstlineno; /* first source line number */ - PyObject *co_code; /* instruction opcodes */ - PyObject *co_consts; /* list (constants used) */ - PyObject *co_names; /* list of strings (names used) */ PyObject *co_varnames; /* tuple of strings (local variable names) */ - PyObject *co_freevars; /* tuple of strings (free variable names) */ PyObject *co_cellvars; /* tuple of strings (cell variable names) */ - /* The rest aren't used in either hash or comparisons, except for co_name, - used in both. This is done to preserve the name and line number - for tracebacks and debuggers; otherwise, constant de-duplication - would collapse identical functions/lambdas defined on different lines. - */ - Py_ssize_t *co_cell2arg; /* Maps cell vars which are arguments. */ + PyObject *co_freevars; /* tuple of strings (free variable names) */ PyObject *co_filename; /* unicode (where it was loaded from) */ PyObject *co_name; /* unicode (name, for reference) */ PyObject *co_linetable; /* string (encoding addr<->lineno mapping) See Objects/lnotab_notes.txt for details. */ - int co_nlocalsplus; /* Number of locals + free + cell variables */ PyObject *co_exceptiontable; /* Byte string encoding exception handling table */ + + /* These fields are set with computed values on new code objects. */ + + Py_ssize_t *co_cell2arg; /* Maps cell vars which are arguments. */ + // These are redundant but offer some performance benefit. + int co_nlocalsplus; /* number of local + cell + free variables */ + int co_nlocals; /* number of local variables */ + int co_ncellvars; /* number of cell variables */ + int co_nfreevars; /* number of free variables */ + + /* The remaining fields are zeroed out on new code objects. */ + PyObject *co_weakreflist; /* to support weakrefs to code objects */ /* Scratch space for extra data relating to the code object. Type is a void* to keep the format private in codeobject.c to force diff --git a/Lib/test/test_code.py b/Lib/test/test_code.py index c3dd678..244acab 100644 --- a/Lib/test/test_code.py +++ b/Lib/test/test_code.py @@ -240,6 +240,7 @@ class CodeTest(unittest.TestCase): # different co_name, co_varnames, co_consts def func2(): y = 2 + z = 3 return y code2 = func2.__code__ @@ -247,14 +248,14 @@ class CodeTest(unittest.TestCase): ("co_argcount", 0), ("co_posonlyargcount", 0), ("co_kwonlyargcount", 0), - ("co_nlocals", 0), + ("co_nlocals", 1), ("co_stacksize", 0), ("co_flags", code.co_flags | inspect.CO_COROUTINE), ("co_firstlineno", 100), ("co_code", code2.co_code), ("co_consts", code2.co_consts), ("co_names", ("myname",)), - ("co_varnames", code2.co_varnames), + ("co_varnames", ('spam',)), ("co_freevars", ("freevar",)), ("co_cellvars", ("cellvar",)), ("co_filename", "newfilename"), @@ -265,6 +266,47 @@ class CodeTest(unittest.TestCase): new_code = code.replace(**{attr: value}) self.assertEqual(getattr(new_code, attr), value) + new_code = code.replace(co_varnames=code2.co_varnames, + co_nlocals=code2.co_nlocals) + self.assertEqual(new_code.co_varnames, code2.co_varnames) + self.assertEqual(new_code.co_nlocals, code2.co_nlocals) + + def test_nlocals_mismatch(self): + def func(): + x = 1 + return x + co = func.__code__ + assert co.co_nlocals > 0; + + # First we try the constructor. + CodeType = type(co) + for diff in (-1, 1): + with self.assertRaises(ValueError): + CodeType(co.co_argcount, + co.co_posonlyargcount, + co.co_kwonlyargcount, + # This is the only change. + co.co_nlocals + diff, + co.co_stacksize, + co.co_flags, + co.co_code, + co.co_consts, + co.co_names, + co.co_varnames, + co.co_filename, + co.co_name, + co.co_firstlineno, + co.co_lnotab, + co.co_exceptiontable, + co.co_freevars, + co.co_cellvars, + ) + # Then we try the replace method. + with self.assertRaises(ValueError): + co.replace(co_nlocals=co.co_nlocals - 1) + with self.assertRaises(ValueError): + co.replace(co_nlocals=co.co_nlocals + 1) + def test_empty_linetable(self): def func(): pass diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 8b2cee2..65df3e3 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -164,7 +164,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, { PyCodeObject *co; Py_ssize_t *cell2arg = NULL; - Py_ssize_t i, n_cellvars, n_varnames, total_args; + Py_ssize_t i, total_args; + int ncellvars, nfreevars; /* Check argument types */ if (argcount < posonlyargcount || posonlyargcount < 0 || @@ -184,6 +185,19 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, return NULL; } + /* Make sure that code is indexable with an int, this is + a long running assumption in ceval.c and many parts of + the interpreter. */ + if (PyBytes_GET_SIZE(code) > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "co_code larger than INT_MAX"); + return NULL; + } + + if (nlocals != PyTuple_GET_SIZE(varnames)) { + PyErr_SetString(PyExc_ValueError, "co_nlocals != len(co_varnames)"); + return NULL; + } + /* Ensure that strings are ready Unicode string */ if (PyUnicode_READY(name) < 0) { return NULL; @@ -208,46 +222,40 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, return NULL; } - /* Make sure that code is indexable with an int, this is - a long running assumption in ceval.c and many parts of - the interpreter. */ - if (PyBytes_GET_SIZE(code) > INT_MAX) { - PyErr_SetString(PyExc_OverflowError, "co_code larger than INT_MAX"); - return NULL; - } - /* Check for any inner or outer closure references */ - n_cellvars = PyTuple_GET_SIZE(cellvars); - if (!n_cellvars && !PyTuple_GET_SIZE(freevars)) { + assert(PyTuple_GET_SIZE(cellvars) < INT_MAX); + ncellvars = (int)PyTuple_GET_SIZE(cellvars); + assert(PyTuple_GET_SIZE(freevars) < INT_MAX); + nfreevars = (int)PyTuple_GET_SIZE(freevars); + if (!ncellvars && !nfreevars) { flags |= CO_NOFREE; } else { flags &= ~CO_NOFREE; } - n_varnames = PyTuple_GET_SIZE(varnames); - if (argcount <= n_varnames && kwonlyargcount <= n_varnames) { + if (argcount <= nlocals && kwonlyargcount <= nlocals) { /* Never overflows. */ total_args = (Py_ssize_t)argcount + (Py_ssize_t)kwonlyargcount + ((flags & CO_VARARGS) != 0) + ((flags & CO_VARKEYWORDS) != 0); } else { - total_args = n_varnames + 1; + total_args = nlocals + 1; } - if (total_args > n_varnames) { + if (total_args > nlocals) { PyErr_SetString(PyExc_ValueError, "code: varnames is too small"); return NULL; } /* Create mapping between cells and arguments if needed. */ - if (n_cellvars) { + if (ncellvars) { bool used_cell2arg = false; - cell2arg = PyMem_NEW(Py_ssize_t, n_cellvars); + cell2arg = PyMem_NEW(Py_ssize_t, ncellvars); if (cell2arg == NULL) { PyErr_NoMemory(); return NULL; } /* Find cells which are also arguments. */ - for (i = 0; i < n_cellvars; i++) { + for (i = 0; i < ncellvars; i++) { Py_ssize_t j; PyObject *cell = PyTuple_GET_ITEM(cellvars, i); cell2arg[i] = CO_CELL_NOT_AN_ARG; @@ -279,9 +287,10 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount, co->co_argcount = argcount; co->co_posonlyargcount = posonlyargcount; co->co_kwonlyargcount = kwonlyargcount; + co->co_nlocalsplus = nlocals + ncellvars + nfreevars; co->co_nlocals = nlocals; - co->co_nlocalsplus = nlocals + - (int)PyTuple_GET_SIZE(freevars) + (int)PyTuple_GET_SIZE(cellvars); + co->co_ncellvars = ncellvars; + co->co_nfreevars = nfreevars; co->co_stacksize = stacksize; co->co_flags = flags; Py_INCREF(code); @@ -1139,7 +1148,7 @@ code_sizeof(PyCodeObject *co, PyObject *Py_UNUSED(args)) _PyCodeObjectExtra *co_extra = (_PyCodeObjectExtra*) co->co_extra; if (co->co_cell2arg != NULL && co->co_cellvars != NULL) { - res += PyTuple_GET_SIZE(co->co_cellvars) * sizeof(Py_ssize_t); + res += co->co_ncellvars * sizeof(Py_ssize_t); } if (co_extra != NULL) { res += sizeof(_PyCodeObjectExtra) + diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 87c4852..1bfae90 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1023,7 +1023,6 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f) PyObject **fast; PyCodeObject *co; Py_ssize_t j; - Py_ssize_t ncells, nfreevars; if (f == NULL) { PyErr_BadInternalCall(); @@ -1051,10 +1050,8 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f) if (map_to_dict(map, j, locals, fast, 0) < 0) return -1; } - ncells = PyTuple_GET_SIZE(co->co_cellvars); - nfreevars = PyTuple_GET_SIZE(co->co_freevars); - if (ncells || nfreevars) { - if (map_to_dict(co->co_cellvars, ncells, + if (co->co_ncellvars || co->co_nfreevars) { + if (map_to_dict(co->co_cellvars, co->co_ncellvars, locals, fast + co->co_nlocals, 1)) return -1; @@ -1067,8 +1064,8 @@ PyFrame_FastToLocalsWithError(PyFrameObject *f) into the locals dict used by the class. */ if (co->co_flags & CO_OPTIMIZED) { - if (map_to_dict(co->co_freevars, nfreevars, - locals, fast + co->co_nlocals + ncells, 1) < 0) + if (map_to_dict(co->co_freevars, co->co_nfreevars, locals, + fast + co->co_nlocals + co->co_ncellvars, 1) < 0) return -1; } } @@ -1096,7 +1093,6 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) PyObject *error_type, *error_value, *error_traceback; PyCodeObject *co; Py_ssize_t j; - Py_ssize_t ncells, nfreevars; if (f == NULL) return; locals = _PyFrame_Specials(f)[FRAME_SPECIALS_LOCALS_OFFSET]; @@ -1113,16 +1109,14 @@ PyFrame_LocalsToFast(PyFrameObject *f, int clear) j = co->co_nlocals; if (co->co_nlocals) dict_to_map(co->co_varnames, j, locals, fast, 0, clear); - ncells = PyTuple_GET_SIZE(co->co_cellvars); - nfreevars = PyTuple_GET_SIZE(co->co_freevars); - if (ncells || nfreevars) { - dict_to_map(co->co_cellvars, ncells, + if (co->co_ncellvars || co->co_nfreevars) { + dict_to_map(co->co_cellvars, co->co_ncellvars, locals, fast + co->co_nlocals, 1, clear); /* Same test as in PyFrame_FastToLocals() above. */ if (co->co_flags & CO_OPTIMIZED) { - dict_to_map(co->co_freevars, nfreevars, - locals, fast + co->co_nlocals + ncells, 1, - clear); + dict_to_map(co->co_freevars, co->co_nfreevars, locals, + fast + co->co_nlocals + co->co_ncellvars, 1, + clear); } } PyErr_Restore(error_type, error_value, error_traceback); diff --git a/Objects/funcobject.c b/Objects/funcobject.c index f0b0b67..dc27215 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -279,7 +279,8 @@ func_get_code(PyFunctionObject *op, void *Py_UNUSED(ignored)) static int func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored)) { - Py_ssize_t nfree, nclosure; + Py_ssize_t nclosure; + int nfree; /* Not legal to del f.func_code or to set it to anything * other than a code object. */ @@ -294,7 +295,7 @@ func_set_code(PyFunctionObject *op, PyObject *value, void *Py_UNUSED(ignored)) return -1; } - nfree = PyCode_GetNumFree((PyCodeObject *)value); + nfree = ((PyCodeObject *)value)->co_nfreevars; nclosure = (op->func_closure == NULL ? 0 : PyTuple_GET_SIZE(op->func_closure)); if (nclosure != nfree) { @@ -538,7 +539,7 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals, /*[clinic end generated code: output=99c6d9da3a24e3be input=93611752fc2daf11]*/ { PyFunctionObject *newfunc; - Py_ssize_t nfree, nclosure; + Py_ssize_t nclosure; if (name != Py_None && !PyUnicode_Check(name)) { PyErr_SetString(PyExc_TypeError, @@ -550,9 +551,8 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals, "arg 4 (defaults) must be None or tuple"); return NULL; } - nfree = PyTuple_GET_SIZE(code->co_freevars); if (!PyTuple_Check(closure)) { - if (nfree && closure == Py_None) { + if (code->co_nfreevars && closure == Py_None) { PyErr_SetString(PyExc_TypeError, "arg 5 (closure) must be tuple"); return NULL; @@ -566,10 +566,10 @@ func_new_impl(PyTypeObject *type, PyCodeObject *code, PyObject *globals, /* check that the closure is well-formed */ nclosure = closure == Py_None ? 0 : PyTuple_GET_SIZE(closure); - if (nfree != nclosure) + if (code->co_nfreevars != nclosure) return PyErr_Format(PyExc_ValueError, "%U requires closure of length %zd, not %zd", - code->co_name, nfree, nclosure); + code->co_name, code->co_nfreevars, nclosure); if (nclosure) { Py_ssize_t i; for (i = 0; i < nclosure; i++) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 1460085..feb25aa 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -8848,11 +8848,10 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co, } PyObject *obj = f->f_localsptr[0]; - Py_ssize_t i, n; + Py_ssize_t i; if (obj == NULL && co->co_cell2arg) { /* The first argument might be a cell. */ - n = PyTuple_GET_SIZE(co->co_cellvars); - for (i = 0; i < n; i++) { + for (i = 0; i < co->co_ncellvars; i++) { if (co->co_cell2arg[i] == 0) { PyObject *cell = f->f_localsptr[co->co_nlocals + i]; assert(PyCell_Check(cell)); @@ -8867,21 +8866,12 @@ super_init_without_args(PyFrameObject *f, PyCodeObject *co, return -1; } - if (co->co_freevars == NULL) { - n = 0; - } - else { - assert(PyTuple_Check(co->co_freevars)); - n = PyTuple_GET_SIZE(co->co_freevars); - } - PyTypeObject *type = NULL; - for (i = 0; i < n; i++) { + for (i = 0; i < co->co_nfreevars; i++) { PyObject *name = PyTuple_GET_ITEM(co->co_freevars, i); assert(PyUnicode_Check(name)); if (_PyUnicode_EqualToASCIIId(name, &PyId___class__)) { - Py_ssize_t index = co->co_nlocals + - PyTuple_GET_SIZE(co->co_cellvars) + i; + Py_ssize_t index = co->co_nlocals + co->co_ncellvars + i; PyObject *cell = f->f_localsptr[index]; if (cell == NULL || !PyCell_Check(cell)) { PyErr_SetString(PyExc_RuntimeError, diff --git a/Python/ceval.c b/Python/ceval.c index e4a6a65..3bbcfe9 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3081,9 +3081,9 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag) PyObject *name, *value, *locals = LOCALS(); Py_ssize_t idx; assert(locals); - assert(oparg >= PyTuple_GET_SIZE(co->co_cellvars)); - idx = oparg - PyTuple_GET_SIZE(co->co_cellvars); - assert(idx >= 0 && idx < PyTuple_GET_SIZE(co->co_freevars)); + assert(oparg >= co->co_ncellvars); + idx = oparg - co->co_ncellvars; + assert(idx >= 0 && idx < co->co_nfreevars); name = PyTuple_GET_ITEM(co->co_freevars, idx); if (PyDict_CheckExact(locals)) { value = PyDict_GetItemWithError(locals, name); @@ -5062,7 +5062,7 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, /* Allocate and initialize storage for cell vars, and copy free vars into frame. */ - for (i = 0; i < PyTuple_GET_SIZE(co->co_cellvars); ++i) { + for (i = 0; i < co->co_ncellvars; ++i) { PyObject *c; Py_ssize_t arg; /* Possibly account for the cell variable being an argument. */ @@ -5081,10 +5081,10 @@ initialize_locals(PyThreadState *tstate, PyFrameConstructor *con, } /* Copy closure variables to free variables */ - for (i = 0; i < PyTuple_GET_SIZE(co->co_freevars); ++i) { + for (i = 0; i < co->co_nfreevars; ++i) { PyObject *o = PyTuple_GET_ITEM(con->fc_closure, i); Py_INCREF(o); - freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o; + freevars[co->co_ncellvars + i] = o; } return 0; @@ -6417,7 +6417,7 @@ format_exc_unbound(PyThreadState *tstate, PyCodeObject *co, int oparg) /* Don't stomp existing exception */ if (_PyErr_Occurred(tstate)) return; - if (oparg < PyTuple_GET_SIZE(co->co_cellvars)) { + if (oparg < co->co_ncellvars) { name = PyTuple_GET_ITEM(co->co_cellvars, oparg); format_exc_check_arg(tstate, @@ -6425,8 +6425,7 @@ format_exc_unbound(PyThreadState *tstate, PyCodeObject *co, int oparg) UNBOUNDLOCAL_ERROR_MSG, name); } else { - name = PyTuple_GET_ITEM(co->co_freevars, oparg - - PyTuple_GET_SIZE(co->co_cellvars)); + name = PyTuple_GET_ITEM(co->co_freevars, oparg - co->co_ncellvars); format_exc_check_arg(tstate, PyExc_NameError, UNBOUNDFREE_ERROR_MSG, name); } -- cgit v0.12