From 74330d992be26829dba65ab83d698d42b2f2a2ee Mon Sep 17 00:00:00 2001
From: Wenzel Jakob <wenzel.jakob@epfl.ch>
Date: Sat, 14 Sep 2024 00:40:25 +0900
Subject: gh-100554: Add ``Py_tp_vectorcall`` slot to set
 ``PyTypeObject.tp_vectorcall`` using the ``PyType_FromSpec`` function family.
 (#123332)

---
 Doc/c-api/type.rst                                 | 13 ++--
 Doc/c-api/typeobj.rst                              | 39 ++++++++--
 Include/typeslots.h                                |  4 +
 Lib/test/test_capi/test_misc.py                    |  8 ++
 .../2024-08-26-13-01-20.gh-issue-100554.0ku85o.rst |  4 +
 Misc/stable_abi.toml                               |  2 +
 Modules/_testcapi/heaptype.c                       | 86 ++++++++++++++++++++++
 Objects/typeslots.inc                              |  1 +
 8 files changed, 147 insertions(+), 10 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C_API/2024-08-26-13-01-20.gh-issue-100554.0ku85o.rst

diff --git a/Doc/c-api/type.rst b/Doc/c-api/type.rst
index fa04d6c..04aaf7f 100644
--- a/Doc/c-api/type.rst
+++ b/Doc/c-api/type.rst
@@ -504,11 +504,8 @@ The following functions and structs are used to create
          See :ref:`PyMemberDef documentation <pymemberdef-offsets>`
          for details.
 
-      The following fields cannot be set at all when creating a heap type:
-
-      * :c:member:`~PyTypeObject.tp_vectorcall`
-        (use :c:member:`~PyTypeObject.tp_new` and/or
-        :c:member:`~PyTypeObject.tp_init`)
+      The following internal fields cannot be set at all when creating a heap
+      type:
 
       * Internal fields:
         :c:member:`~PyTypeObject.tp_dict`,
@@ -531,6 +528,12 @@ The following functions and structs are used to create
         :c:member:`~PyBufferProcs.bf_releasebuffer` are now available
         under the :ref:`limited API <limited-c-api>`.
 
+      .. versionchanged:: 3.14
+
+         The field :c:member:`~PyTypeObject.tp_vectorcall` can now set
+         using ``Py_tp_vectorcall``.  See the field's documentation
+         for details.
+
    .. c:member:: void *pfunc
 
       The desired value of the slot. In most cases, this is a pointer
diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst
index b7b1418..cfe4563 100644
--- a/Doc/c-api/typeobj.rst
+++ b/Doc/c-api/typeobj.rst
@@ -2137,11 +2137,40 @@ and :c:data:`PyType_Type` effectively act as defaults.)
 
 .. c:member:: vectorcallfunc PyTypeObject.tp_vectorcall
 
-   Vectorcall function to use for calls of this type object.
-   In other words, it is used to implement
-   :ref:`vectorcall <vectorcall>` for ``type.__call__``.
-   If ``tp_vectorcall`` is ``NULL``, the default call implementation
-   using :meth:`~object.__new__` and :meth:`~object.__init__` is used.
+   A :ref:`vectorcall function <vectorcall>` to use for calls of this type
+   object (rather than instances).
+   In other words, ``tp_vectorcall`` can be used to optimize ``type.__call__``,
+   which typically returns a new instance of *type*.
+
+   As with any vectorcall function, if ``tp_vectorcall`` is ``NULL``,
+   the *tp_call* protocol (``Py_TYPE(type)->tp_call``) is used instead.
+
+   .. note::
+
+      The :ref:`vectorcall protocol <vectorcall>` requires that the vectorcall
+      function has the same behavior as the corresponding ``tp_call``.
+      This means that ``type->tp_vectorcall`` must match the behavior of
+      ``Py_TYPE(type)->tp_call``.
+
+      Specifically, if *type* uses the default metaclass,
+      ``type->tp_vectorcall`` must behave the same as
+      :c:expr:`PyType_Type->tp_call`, which:
+
+      - calls ``type->tp_new``,
+
+      - if the result is a subclass of *type*, calls ``type->tp_init``
+        on the result of ``tp_new``, and
+
+      - returns the result of ``tp_new``.
+
+      Typically, ``tp_vectorcall`` is overridden to optimize this process
+      for specific :c:member:`~PyTypeObject.tp_new` and
+      :c:member:`~PyTypeObject.tp_init`.
+      When doing this for user-subclassable types, note that both can be
+      overridden (using :py:func:`~object.__new__` and
+      :py:func:`~object.__init__`, respectively).
+
+
 
    **Inheritance:**
 
diff --git a/Include/typeslots.h b/Include/typeslots.h
index 506b055..e91caa1 100644
--- a/Include/typeslots.h
+++ b/Include/typeslots.h
@@ -86,3 +86,7 @@
 /* New in 3.10 */
 #define Py_am_send 81
 #endif
+#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030E0000
+/* New in 3.14 */
+#define Py_tp_vectorcall 82
+#endif
diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py
index 18392c4..ebc0a8a 100644
--- a/Lib/test/test_capi/test_misc.py
+++ b/Lib/test/test_capi/test_misc.py
@@ -733,6 +733,14 @@ class CAPITest(unittest.TestCase):
         with self.assertRaisesRegex(TypeError, msg):
             sub = _testcapi.make_type_with_base(Base)
 
+    def test_heaptype_with_tp_vectorcall(self):
+        tp = _testcapi.HeapCTypeVectorcall
+        v0 = tp.__new__(tp)
+        v0.__init__()
+        v1 = tp()
+        self.assertEqual(v0.value, 2)
+        self.assertEqual(v1.value, 1)
+
     def test_multiple_inheritance_ctypes_with_weakref_or_dict(self):
         for weakref_cls in (_testcapi.HeapCTypeWithWeakref,
                             _testlimitedcapi.HeapCTypeWithRelativeWeakref):
diff --git a/Misc/NEWS.d/next/C_API/2024-08-26-13-01-20.gh-issue-100554.0ku85o.rst b/Misc/NEWS.d/next/C_API/2024-08-26-13-01-20.gh-issue-100554.0ku85o.rst
new file mode 100644
index 0000000..97138de
--- /dev/null
+++ b/Misc/NEWS.d/next/C_API/2024-08-26-13-01-20.gh-issue-100554.0ku85o.rst
@@ -0,0 +1,4 @@
+Added a slot ``Py_tp_vectorcall`` to set
+:c:member:`~PyTypeObject.tp_vectorcall` via the :c:func:`PyType_FromSpec`
+function family. Limited API extensions can use this feature to provide more
+efficient vector call-based implementation of ``__new__`` and ``__init__``.
diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml
index 8bf638c..6036fc9 100644
--- a/Misc/stable_abi.toml
+++ b/Misc/stable_abi.toml
@@ -2526,3 +2526,5 @@
     added = '3.14'
 [function.PyLong_AsUInt64]
     added = '3.14'
+[const.Py_tp_vectorcall]
+    added = '3.14'
\ No newline at end of file
diff --git a/Modules/_testcapi/heaptype.c b/Modules/_testcapi/heaptype.c
index b45b890..b3fb9ec 100644
--- a/Modules/_testcapi/heaptype.c
+++ b/Modules/_testcapi/heaptype.c
@@ -1008,6 +1008,89 @@ static PyType_Spec HeapCTypeSetattr_spec = {
     HeapCTypeSetattr_slots
 };
 
+/*
+ * The code below is for a test that uses PyType_FromSpec API to create a heap
+ * type that simultaneously exposes
+ *
+ * - A regular __new__ / __init__ constructor pair
+ * - A vector call handler in the type object
+ *
+ * A general requirement of vector call implementations is that they should
+ * behave identically (except being potentially faster). The example below
+ * deviates from this rule by initializing the instance with a different value.
+ * This is only done here only so that we can see which path was taken and is
+ * strongly discouraged in other cases.
+ */
+
+typedef struct {
+    PyObject_HEAD
+    long value;
+} HeapCTypeVectorcallObject;
+
+static PyObject *heapctype_vectorcall_vectorcall(PyObject *self,
+                                                 PyObject *const *args_in,
+                                                 size_t nargsf,
+                                                 PyObject *kwargs_in)
+{
+    if (kwargs_in || PyVectorcall_NARGS(nargsf)) {
+        return PyErr_Format(PyExc_IndexError, "HeapCTypeVectorcall() takes no arguments!");
+    }
+
+    HeapCTypeVectorcallObject *r =
+        PyObject_New(HeapCTypeVectorcallObject, (PyTypeObject *) self);
+
+    if (!r) {
+        return NULL;
+    }
+
+    r->value = 1;
+
+    return (PyObject *) r;
+}
+
+static PyObject *
+heapctype_vectorcall_new(PyTypeObject* type, PyObject* args, PyObject *kwargs)
+{
+    if (PyTuple_GET_SIZE(args) || kwargs) {
+        return PyErr_Format(PyExc_IndexError, "HeapCTypeVectorcall() takes no arguments!");
+    }
+
+    return (PyObject *) PyObject_New(HeapCTypeVectorcallObject, type);
+}
+
+static int
+heapctype_vectorcall_init(PyObject *self, PyObject *args, PyObject *kwargs) {
+    if (PyTuple_GET_SIZE(args) || kwargs) {
+        PyErr_Format(PyExc_IndexError, "HeapCTypeVectorcall() takes no arguments!");
+        return -1;
+    }
+
+    HeapCTypeVectorcallObject *o = (HeapCTypeVectorcallObject *) self;
+    o->value = 2;
+    return 0;
+}
+
+static struct PyMemberDef heapctype_vectorcall_members[] = {
+    {"value", Py_T_LONG, offsetof(HeapCTypeVectorcallObject, value), 0, NULL},
+    {NULL}
+};
+
+static PyType_Slot HeapCTypeVectorcall_slots[] = {
+    {Py_tp_new, heapctype_vectorcall_new},
+    {Py_tp_init, heapctype_vectorcall_init},
+    {Py_tp_vectorcall, heapctype_vectorcall_vectorcall},
+    {Py_tp_members, heapctype_vectorcall_members},
+    {0, 0},
+};
+
+static PyType_Spec HeapCTypeVectorcall_spec = {
+    "_testcapi.HeapCTypeVectorcall",
+    sizeof(HeapCTypeVectorcallObject),
+    0,
+    Py_TPFLAGS_DEFAULT,
+    HeapCTypeVectorcall_slots
+};
+
 PyDoc_STRVAR(HeapCCollection_doc,
 "Tuple-like heap type that uses PyObject_GetItemData for items.");
 
@@ -1180,6 +1263,9 @@ _PyTestCapi_Init_Heaptype(PyObject *m) {
     PyObject *HeapCTypeSetattr = PyType_FromSpec(&HeapCTypeSetattr_spec);
     ADD("HeapCTypeSetattr", HeapCTypeSetattr);
 
+    PyObject *HeapCTypeVectorcall = PyType_FromSpec(&HeapCTypeVectorcall_spec);
+    ADD("HeapCTypeVectorcall", HeapCTypeVectorcall);
+
     PyObject *subclass_with_finalizer_bases = PyTuple_Pack(1, HeapCTypeSubclass);
     if (subclass_with_finalizer_bases == NULL) {
         return -1;
diff --git a/Objects/typeslots.inc b/Objects/typeslots.inc
index 896daa7..ffb85ff 100644
--- a/Objects/typeslots.inc
+++ b/Objects/typeslots.inc
@@ -80,3 +80,4 @@
 {offsetof(PyAsyncMethods, am_anext), offsetof(PyTypeObject, tp_as_async)},
 {-1, offsetof(PyTypeObject, tp_finalize)},
 {offsetof(PyAsyncMethods, am_send), offsetof(PyTypeObject, tp_as_async)},
+{-1, offsetof(PyTypeObject, tp_vectorcall)},
-- 
cgit v0.12