summaryrefslogtreecommitdiffstats
path: root/Modules
diff options
context:
space:
mode:
authorJeroen Demeyer <J.Demeyer@UGent.be>2019-07-13 14:39:18 (GMT)
committerMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2019-07-13 14:39:18 (GMT)
commited184c06e2e610e12050c5d5c9b0c1c2ecabb930 (patch)
treed6fa97b514508fbede5a0822590a53bd2af33328 /Modules
parentdc3f99fa77f415077c20a9c2b3e953e5cd894076 (diff)
downloadcpython-ed184c06e2e610e12050c5d5c9b0c1c2ecabb930.zip
cpython-ed184c06e2e610e12050c5d5c9b0c1c2ecabb930.tar.gz
cpython-ed184c06e2e610e12050c5d5c9b0c1c2ecabb930.tar.bz2
bpo-37358: Use vectorcall for functools.partial (GH-14284)
https://bugs.python.org/issue37358
Diffstat (limited to 'Modules')
-rw-r--r--Modules/_functoolsmodule.c164
1 files changed, 98 insertions, 66 deletions
diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c
index a101363..17d49ac 100644
--- a/Modules/_functoolsmodule.c
+++ b/Modules/_functoolsmodule.c
@@ -18,13 +18,15 @@ typedef struct {
PyObject *fn;
PyObject *args;
PyObject *kw;
- PyObject *dict;
+ PyObject *dict; /* __dict__ */
PyObject *weakreflist; /* List of weak references */
- int use_fastcall;
+ vectorcallfunc vectorcall;
} partialobject;
static PyTypeObject partial_type;
+static void partial_setvectorcall(partialobject *pto);
+
static PyObject *
partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
{
@@ -107,8 +109,7 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw)
return NULL;
}
- pto->use_fastcall = (_PyVectorcall_Function(func) != NULL);
-
+ partial_setvectorcall(pto);
return (PyObject *)pto;
}
@@ -126,77 +127,107 @@ partial_dealloc(partialobject *pto)
Py_TYPE(pto)->tp_free(pto);
}
+
+/* Merging keyword arguments using the vectorcall convention is messy, so
+ * if we would need to do that, we stop using vectorcall and fall back
+ * to using partial_call() instead. */
+_Py_NO_INLINE static PyObject *
+partial_vectorcall_fallback(partialobject *pto, PyObject *const *args,
+ size_t nargsf, PyObject *kwnames)
+{
+ pto->vectorcall = NULL;
+ Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+ return _PyObject_MakeTpCall((PyObject *)pto, args, nargs, kwnames);
+}
+
static PyObject *
-partial_fastcall(partialobject *pto, PyObject **args, Py_ssize_t nargs,
- PyObject *kwargs)
+partial_vectorcall(partialobject *pto, PyObject *const *args,
+ size_t nargsf, PyObject *kwnames)
{
- PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
- PyObject *ret;
- PyObject **stack, **stack_buf = NULL;
- Py_ssize_t nargs2, pto_nargs;
+ /* pto->kw is mutable, so need to check every time */
+ if (PyDict_GET_SIZE(pto->kw)) {
+ return partial_vectorcall_fallback(pto, args, nargsf, kwnames);
+ }
+
+ Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+ Py_ssize_t nargs_total = nargs;
+ if (kwnames != NULL) {
+ nargs_total += PyTuple_GET_SIZE(kwnames);
+ }
+
+ PyObject **pto_args = _PyTuple_ITEMS(pto->args);
+ Py_ssize_t pto_nargs = PyTuple_GET_SIZE(pto->args);
- pto_nargs = PyTuple_GET_SIZE(pto->args);
- nargs2 = pto_nargs + nargs;
+ /* Fast path if we're called without arguments */
+ if (nargs_total == 0) {
+ return _PyObject_Vectorcall(pto->fn, pto_args, pto_nargs, NULL);
+ }
- if (pto_nargs == 0) {
- stack = args;
+ /* Fast path using PY_VECTORCALL_ARGUMENTS_OFFSET to prepend a single
+ * positional argument */
+ if (pto_nargs == 1 && (nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
+ PyObject **newargs = (PyObject **)args - 1;
+ PyObject *tmp = newargs[0];
+ newargs[0] = pto_args[0];
+ PyObject *ret = _PyObject_Vectorcall(pto->fn, newargs, nargs + 1, kwnames);
+ newargs[0] = tmp;
+ return ret;
}
- else if (nargs == 0) {
- stack = _PyTuple_ITEMS(pto->args);
+
+ Py_ssize_t newnargs_total = pto_nargs + nargs_total;
+
+ PyObject *small_stack[_PY_FASTCALL_SMALL_STACK];
+ PyObject *ret;
+ PyObject **stack;
+
+ if (newnargs_total <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
+ stack = small_stack;
}
else {
- if (nargs2 <= (Py_ssize_t)Py_ARRAY_LENGTH(small_stack)) {
- stack = small_stack;
- }
- else {
- stack_buf = PyMem_Malloc(nargs2 * sizeof(PyObject *));
- if (stack_buf == NULL) {
- PyErr_NoMemory();
- return NULL;
- }
- stack = stack_buf;
+ stack = PyMem_Malloc(newnargs_total * sizeof(PyObject *));
+ if (stack == NULL) {
+ PyErr_NoMemory();
+ return NULL;
}
-
- /* use borrowed references */
- memcpy(stack,
- _PyTuple_ITEMS(pto->args),
- pto_nargs * sizeof(PyObject*));
- memcpy(&stack[pto_nargs],
- args,
- nargs * sizeof(PyObject*));
}
- ret = _PyObject_FastCallDict(pto->fn, stack, nargs2, kwargs);
- PyMem_Free(stack_buf);
+ /* Copy to new stack, using borrowed references */
+ memcpy(stack, pto_args, pto_nargs * sizeof(PyObject*));
+ memcpy(stack + pto_nargs, args, nargs_total * sizeof(PyObject*));
+
+ ret = _PyObject_Vectorcall(pto->fn, stack, pto_nargs + nargs, kwnames);
+ if (stack != small_stack) {
+ PyMem_Free(stack);
+ }
return ret;
}
-static PyObject *
-partial_call_impl(partialobject *pto, PyObject *args, PyObject *kwargs)
+/* Set pto->vectorcall depending on the parameters of the partial object */
+static void
+partial_setvectorcall(partialobject *pto)
{
- PyObject *ret, *args2;
-
- /* Note: tupleconcat() is optimized for empty tuples */
- args2 = PySequence_Concat(pto->args, args);
- if (args2 == NULL) {
- return NULL;
+ if (_PyVectorcall_Function(pto->fn) == NULL) {
+ /* Don't use vectorcall if the underlying function doesn't support it */
+ pto->vectorcall = NULL;
+ }
+ /* We could have a special case if there are no arguments,
+ * but that is unlikely (why use partial without arguments?),
+ * so we don't optimize that */
+ else {
+ pto->vectorcall = (vectorcallfunc)partial_vectorcall;
}
- assert(PyTuple_Check(args2));
-
- ret = PyObject_Call(pto->fn, args2, kwargs);
- Py_DECREF(args2);
- return ret;
}
+
static PyObject *
partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
{
- PyObject *kwargs2, *res;
-
- assert (PyCallable_Check(pto->fn));
- assert (PyTuple_Check(pto->args));
- assert (PyDict_Check(pto->kw));
+ assert(PyCallable_Check(pto->fn));
+ assert(PyTuple_Check(pto->args));
+ assert(PyDict_Check(pto->kw));
+ /* Merge keywords */
+ PyObject *kwargs2;
if (PyDict_GET_SIZE(pto->kw) == 0) {
/* kwargs can be NULL */
kwargs2 = kwargs;
@@ -219,16 +250,16 @@ partial_call(partialobject *pto, PyObject *args, PyObject *kwargs)
}
}
-
- if (pto->use_fastcall) {
- res = partial_fastcall(pto,
- _PyTuple_ITEMS(args),
- PyTuple_GET_SIZE(args),
- kwargs2);
- }
- else {
- res = partial_call_impl(pto, args, kwargs2);
+ /* Merge positional arguments */
+ /* Note: tupleconcat() is optimized for empty tuples */
+ PyObject *args2 = PySequence_Concat(pto->args, args);
+ if (args2 == NULL) {
+ Py_XDECREF(kwargs2);
+ return NULL;
}
+
+ PyObject *res = PyObject_Call(pto->fn, args2, kwargs2);
+ Py_DECREF(args2);
Py_XDECREF(kwargs2);
return res;
}
@@ -365,11 +396,11 @@ partial_setstate(partialobject *pto, PyObject *state)
Py_INCREF(dict);
Py_INCREF(fn);
- pto->use_fastcall = (_PyVectorcall_Function(fn) != NULL);
Py_SETREF(pto->fn, fn);
Py_SETREF(pto->args, fnargs);
Py_SETREF(pto->kw, kw);
Py_XSETREF(pto->dict, dict);
+ partial_setvectorcall(pto);
Py_RETURN_NONE;
}
@@ -386,7 +417,7 @@ static PyTypeObject partial_type = {
0, /* tp_itemsize */
/* methods */
(destructor)partial_dealloc, /* tp_dealloc */
- 0, /* tp_vectorcall_offset */
+ offsetof(partialobject, vectorcall),/* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
@@ -401,7 +432,8 @@ static PyTypeObject partial_type = {
PyObject_GenericSetAttr, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
- Py_TPFLAGS_BASETYPE, /* tp_flags */
+ Py_TPFLAGS_BASETYPE |
+ _Py_TPFLAGS_HAVE_VECTORCALL, /* tp_flags */
partial_doc, /* tp_doc */
(traverseproc)partial_traverse, /* tp_traverse */
0, /* tp_clear */