From 0567786d26348aa7eaf0ab1b5d038fdabe409d92 Mon Sep 17 00:00:00 2001 From: Jeroen Demeyer Date: Fri, 16 Aug 2019 12:41:27 +0200 Subject: bpo-37540: vectorcall: keyword names must be strings (GH-14682) The fact that keyword names are strings is now part of the vectorcall and `METH_FASTCALL` protocols. The biggest concrete change is that `_PyStack_UnpackDict` now checks that and raises `TypeError` if not. CC @markshannon @vstinner https://bugs.python.org/issue37540 --- Doc/c-api/object.rst | 4 ++-- Doc/c-api/structures.rst | 1 + Doc/library/dis.rst | 6 ++++-- Include/cpython/abstract.h | 3 +-- Lib/test/test_extcall.py | 2 +- Lib/test/test_unpack_ex.py | 2 +- .../C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst | 2 ++ Objects/call.c | 25 ++++++++++++++++------ Python/ceval.c | 24 ++++++++------------- Python/getargs.c | 20 +++-------------- 10 files changed, 43 insertions(+), 46 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 2cf0328..fd1e9c6 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -400,8 +400,8 @@ Object Protocol :c:func:`PyVectorcall_NARGS(nargsf) `. *kwnames* can be either NULL (no keyword arguments) or a tuple of keyword - names. In the latter case, the values of the keyword arguments are stored - in *args* after the positional arguments. + names, which must be strings. In the latter case, the values of the keyword + arguments are stored in *args* after the positional arguments. The number of keyword arguments does not influence *nargsf*. *kwnames* must contain only objects of type ``str`` (not a subclass), diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 5184ad5..d4e65af 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -204,6 +204,7 @@ also keyword arguments. So there are a total of 6 calling conventions: Keyword arguments are passed the same way as in the vectorcall protocol: there is an additional fourth :c:type:`PyObject\*` parameter which is a tuple representing the names of the keyword arguments + (which are guaranteed to be strings) or possibly *NULL* if there are no keywords. The values of the keyword arguments are stored in the *args* array, after the positional arguments. diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 39a3e13..4a20245 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1142,8 +1142,10 @@ All of the following opcodes use their arguments. Calls a callable object with positional (if any) and keyword arguments. *argc* indicates the total number of positional and keyword arguments. - The top element on the stack contains a tuple of keyword argument names. - Below that are keyword arguments in the order corresponding to the tuple. + The top element on the stack contains a tuple with the names of the + keyword arguments, which must be strings. + Below that are the values for the keyword arguments, + in the order corresponding to the tuple. Below that are positional arguments, with the right-most parameter on top. Below the arguments is a callable object to call. ``CALL_FUNCTION_KW`` pops all arguments and the callable object off the stack, diff --git a/Include/cpython/abstract.h b/Include/cpython/abstract.h index d57aa54..62a113f 100644 --- a/Include/cpython/abstract.h +++ b/Include/cpython/abstract.h @@ -88,8 +88,7 @@ _PyVectorcall_Function(PyObject *callable) of keyword arguments does not change nargsf). kwnames can also be NULL if there are no keyword arguments. - keywords must only contains str strings (no subclass), and all keys must - be unique. + keywords must only contain strings and all keys must be unique. Return the result on success. Raise an exception and return NULL on error. */ diff --git a/Lib/test/test_extcall.py b/Lib/test/test_extcall.py index 3cac3bd..d9dcb70 100644 --- a/Lib/test/test_extcall.py +++ b/Lib/test/test_extcall.py @@ -237,7 +237,7 @@ What about willful misconduct? >>> f(**{1:2}) Traceback (most recent call last): ... - TypeError: f() keywords must be strings + TypeError: keywords must be strings >>> h(**{'e': 2}) Traceback (most recent call last): diff --git a/Lib/test/test_unpack_ex.py b/Lib/test/test_unpack_ex.py index 45cf051..87fea59 100644 --- a/Lib/test/test_unpack_ex.py +++ b/Lib/test/test_unpack_ex.py @@ -256,7 +256,7 @@ Overridden parameters >>> f(**{1: 3}, **{1: 5}) Traceback (most recent call last): ... - TypeError: f() keywords must be strings + TypeError: f() got multiple values for keyword argument '1' Unpacking non-sequence diff --git a/Misc/NEWS.d/next/C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst b/Misc/NEWS.d/next/C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst new file mode 100644 index 0000000..1a09c7e --- /dev/null +++ b/Misc/NEWS.d/next/C API/2019-07-10-12-27-28.bpo-37540.E8Z773.rst @@ -0,0 +1,2 @@ +The vectorcall protocol now requires that the caller passes only strings as +keyword names. diff --git a/Objects/call.c b/Objects/call.c index 7d91789..8a60b3e 100644 --- a/Objects/call.c +++ b/Objects/call.c @@ -322,8 +322,7 @@ _PyFunction_Vectorcall(PyObject *func, PyObject* const* stack, assert(nargs >= 0); assert(kwnames == NULL || PyTuple_CheckExact(kwnames)); assert((nargs == 0 && nkwargs == 0) || stack != NULL); - /* kwnames must only contains str strings, no subclass, and all keys must - be unique */ + /* kwnames must only contain strings and all keys must be unique */ if (co->co_kwonlyargcount == 0 && nkwargs == 0 && (co->co_flags & ~PyCF_MASK) == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) @@ -943,12 +942,12 @@ _PyStack_AsDict(PyObject *const *values, PyObject *kwnames) vector; return NULL with exception set on error. Return the keyword names tuple in *p_kwnames. - The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET. + This also checks that all keyword names are strings. If not, a TypeError is + raised. - When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) + The newly allocated argument vector supports PY_VECTORCALL_ARGUMENTS_OFFSET. - The type of keyword keys is not checked, these checks should be done - later (ex: _PyArg_ParseStackAndKeywords). */ + When done, you must call _PyStack_UnpackDict_Free(stack, nargs, kwnames) */ static PyObject *const * _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs, PyObject **p_kwnames) @@ -994,7 +993,9 @@ _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs, called in the performance critical hot code. */ Py_ssize_t pos = 0, i = 0; PyObject *key, *value; + unsigned long keys_are_strings = Py_TPFLAGS_UNICODE_SUBCLASS; while (PyDict_Next(kwargs, &pos, &key, &value)) { + keys_are_strings &= Py_TYPE(key)->tp_flags; Py_INCREF(key); Py_INCREF(value); PyTuple_SET_ITEM(kwnames, i, key); @@ -1002,6 +1003,18 @@ _PyStack_UnpackDict(PyObject *const *args, Py_ssize_t nargs, PyObject *kwargs, i++; } + /* keys_are_strings has the value Py_TPFLAGS_UNICODE_SUBCLASS if that + * flag is set for all keys. Otherwise, keys_are_strings equals 0. + * We do this check once at the end instead of inside the loop above + * because it simplifies the deallocation in the failing case. + * It happens to also make the loop above slightly more efficient. */ + if (!keys_are_strings) { + PyErr_SetString(PyExc_TypeError, + "keywords must be strings"); + _PyStack_UnpackDict_Free(stack, nargs, kwnames); + return NULL; + } + *p_kwnames = kwnames; return stack; } diff --git a/Python/ceval.c b/Python/ceval.c index 7c73591..ee03350 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -3504,7 +3504,9 @@ main_loop: PyObject **sp, *res, *names; names = POP(); - assert(PyTuple_CheckExact(names) && PyTuple_GET_SIZE(names) <= oparg); + assert(PyTuple_Check(names)); + assert(PyTuple_GET_SIZE(names) <= oparg); + /* We assume without checking that names contains only strings */ sp = stack_pointer; res = call_function(tstate, &sp, oparg, names); stack_pointer = sp; @@ -5372,20 +5374,12 @@ format_kwargs_error(PyThreadState *tstate, PyObject *func, PyObject *kwargs) _PyErr_Fetch(tstate, &exc, &val, &tb); if (val && PyTuple_Check(val) && PyTuple_GET_SIZE(val) == 1) { PyObject *key = PyTuple_GET_ITEM(val, 0); - if (!PyUnicode_Check(key)) { - _PyErr_Format(tstate, PyExc_TypeError, - "%.200s%.200s keywords must be strings", - PyEval_GetFuncName(func), - PyEval_GetFuncDesc(func)); - } - else { - _PyErr_Format(tstate, PyExc_TypeError, - "%.200s%.200s got multiple " - "values for keyword argument '%U'", - PyEval_GetFuncName(func), - PyEval_GetFuncDesc(func), - key); - } + _PyErr_Format(tstate, PyExc_TypeError, + "%.200s%.200s got multiple " + "values for keyword argument '%S'", + PyEval_GetFuncName(func), + PyEval_GetFuncDesc(func), + key); Py_XDECREF(exc); Py_XDECREF(val); Py_XDECREF(tb); diff --git a/Python/getargs.c b/Python/getargs.c index 59f0fda..cdc16d4 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -2043,11 +2043,7 @@ find_keyword(PyObject *kwnames, PyObject *const *kwstack, PyObject *key) if (kwname == key) { return kwstack[i]; } - if (!PyUnicode_Check(kwname)) { - /* ignore non-string keyword keys: - an error will be raised below */ - continue; - } + assert(PyUnicode_Check(kwname)); if (_PyUnicode_EQ(kwname, key)) { return kwstack[i]; } @@ -2275,16 +2271,11 @@ vgetargskeywordsfast_impl(PyObject *const *args, Py_ssize_t nargs, j++; } - if (!PyUnicode_Check(keyword)) { - PyErr_SetString(PyExc_TypeError, - "keywords must be strings"); - return cleanreturn(0, &freelist); - } match = PySequence_Contains(kwtuple, keyword); if (match <= 0) { if (!match) { PyErr_Format(PyExc_TypeError, - "'%U' is an invalid keyword " + "'%S' is an invalid keyword " "argument for %.200s%s", keyword, (parser->fname == NULL) ? "this function" : parser->fname, @@ -2505,16 +2496,11 @@ _PyArg_UnpackKeywords(PyObject *const *args, Py_ssize_t nargs, j++; } - if (!PyUnicode_Check(keyword)) { - PyErr_SetString(PyExc_TypeError, - "keywords must be strings"); - return NULL; - } match = PySequence_Contains(kwtuple, keyword); if (match <= 0) { if (!match) { PyErr_Format(PyExc_TypeError, - "'%U' is an invalid keyword " + "'%S' is an invalid keyword " "argument for %.200s%s", keyword, (parser->fname == NULL) ? "this function" : parser->fname, -- cgit v0.12