summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMark Shannon <mark@hotpy.org>2024-12-11 14:02:59 (GMT)
committerGitHub <noreply@github.com>2024-12-11 14:02:59 (GMT)
commit5a23994a3dbee43a0b08f5920032f60f38b63071 (patch)
treee2fad9c3a8a3204f2e155d3fff61d733231c4003
parent359389ed51aecc107681e600b71852c0a97304e1 (diff)
downloadcpython-5a23994a3dbee43a0b08f5920032f60f38b63071.zip
cpython-5a23994a3dbee43a0b08f5920032f60f38b63071.tar.gz
cpython-5a23994a3dbee43a0b08f5920032f60f38b63071.tar.bz2
GH-127058: Make `PySequence_Tuple` safer and probably faster. (#127758)
* Use a small buffer, then list when constructing a tuple from an arbitrary sequence.
-rw-r--r--Include/internal/pycore_list.h2
-rw-r--r--Lib/test/test_capi/test_tuple.py25
-rw-r--r--Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst3
-rw-r--r--Objects/abstract.c87
-rw-r--r--Objects/listobject.c19
5 files changed, 88 insertions, 48 deletions
diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h
index f03e484..836ff30 100644
--- a/Include/internal/pycore_list.h
+++ b/Include/internal/pycore_list.h
@@ -62,7 +62,7 @@ typedef struct {
union _PyStackRef;
PyAPI_FUNC(PyObject *)_PyList_FromStackRefSteal(const union _PyStackRef *src, Py_ssize_t n);
-
+PyAPI_FUNC(PyObject *)_PyList_AsTupleAndClear(PyListObject *v);
#ifdef __cplusplus
}
diff --git a/Lib/test/test_capi/test_tuple.py b/Lib/test/test_capi/test_tuple.py
index e6b49ca..6349467 100644
--- a/Lib/test/test_capi/test_tuple.py
+++ b/Lib/test/test_capi/test_tuple.py
@@ -1,5 +1,6 @@
import unittest
import sys
+import gc
from collections import namedtuple
from test.support import import_helper
@@ -257,5 +258,29 @@ class CAPITest(unittest.TestCase):
self.assertRaises(SystemError, resize, [1, 2, 3], 0, False)
self.assertRaises(SystemError, resize, NULL, 0, False)
+ def test_bug_59313(self):
+ # Before 3.14, the C-API function PySequence_Tuple
+ # would create incomplete tuples which were visible to
+ # the cycle GC, and this test would crash the interpeter.
+ TAG = object()
+ tuples = []
+
+ def referrer_tuples():
+ return [x for x in gc.get_referrers(TAG)
+ if isinstance(x, tuple)]
+
+ def my_iter():
+ nonlocal tuples
+ yield TAG # 'tag' gets stored in the result tuple
+ tuples += referrer_tuples()
+ for x in range(10):
+ tuples += referrer_tuples()
+ # Prior to 3.13 would raise a SystemError when the tuple needs to be resized
+ yield x
+
+ self.assertEqual(tuple(my_iter()), (TAG, *range(10)))
+ self.assertEqual(tuples, [])
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst
new file mode 100644
index 0000000..248e1b48
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-09-11-29-10.gh-issue-127058.pqtBcZ.rst
@@ -0,0 +1,3 @@
+``PySequence_Tuple`` now creates the resulting tuple atomically, preventing
+partially created tuples being visible to the garbage collector or through
+``gc.get_referrers()``
diff --git a/Objects/abstract.c b/Objects/abstract.c
index f664787..c92ef10 100644
--- a/Objects/abstract.c
+++ b/Objects/abstract.c
@@ -1993,9 +1993,6 @@ PyObject *
PySequence_Tuple(PyObject *v)
{
PyObject *it; /* iter(v) */
- Py_ssize_t n; /* guess for result tuple size */
- PyObject *result = NULL;
- Py_ssize_t j;
if (v == NULL) {
return null_error();
@@ -2017,58 +2014,54 @@ PySequence_Tuple(PyObject *v)
if (it == NULL)
return NULL;
- /* Guess result size and allocate space. */
- n = PyObject_LengthHint(v, 10);
- if (n == -1)
- goto Fail;
- result = PyTuple_New(n);
- if (result == NULL)
- goto Fail;
-
- /* Fill the tuple. */
- for (j = 0; ; ++j) {
+ Py_ssize_t n;
+ PyObject *buffer[8];
+ for (n = 0; n < 8; n++) {
PyObject *item = PyIter_Next(it);
if (item == NULL) {
- if (PyErr_Occurred())
- goto Fail;
- break;
- }
- if (j >= n) {
- size_t newn = (size_t)n;
- /* The over-allocation strategy can grow a bit faster
- than for lists because unlike lists the
- over-allocation isn't permanent -- we reclaim
- the excess before the end of this routine.
- So, grow by ten and then add 25%.
- */
- newn += 10u;
- newn += newn >> 2;
- if (newn > PY_SSIZE_T_MAX) {
- /* Check for overflow */
- PyErr_NoMemory();
- Py_DECREF(item);
- goto Fail;
+ if (PyErr_Occurred()) {
+ goto fail;
}
- n = (Py_ssize_t)newn;
- if (_PyTuple_Resize(&result, n) != 0) {
- Py_DECREF(item);
- goto Fail;
+ Py_DECREF(it);
+ return _PyTuple_FromArraySteal(buffer, n);
+ }
+ buffer[n] = item;
+ }
+ PyListObject *list = (PyListObject *)PyList_New(16);
+ if (list == NULL) {
+ goto fail;
+ }
+ assert(n == 8);
+ Py_SET_SIZE(list, n);
+ for (Py_ssize_t j = 0; j < n; j++) {
+ PyList_SET_ITEM(list, j, buffer[j]);
+ }
+ for (;;) {
+ PyObject *item = PyIter_Next(it);
+ if (item == NULL) {
+ if (PyErr_Occurred()) {
+ Py_DECREF(list);
+ Py_DECREF(it);
+ return NULL;
}
+ break;
+ }
+ if (_PyList_AppendTakeRef(list, item) < 0) {
+ Py_DECREF(list);
+ Py_DECREF(it);
+ return NULL;
}
- PyTuple_SET_ITEM(result, j, item);
}
-
- /* Cut tuple back if guess was too large. */
- if (j < n &&
- _PyTuple_Resize(&result, j) != 0)
- goto Fail;
-
Py_DECREF(it);
- return result;
-
-Fail:
- Py_XDECREF(result);
+ PyObject *res = _PyList_AsTupleAndClear(list);
+ Py_DECREF(list);
+ return res;
+fail:
Py_DECREF(it);
+ while (n > 0) {
+ n--;
+ Py_DECREF(buffer[n]);
+ }
return NULL;
}
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 3832295..a877bad 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -3175,6 +3175,25 @@ PyList_AsTuple(PyObject *v)
}
PyObject *
+_PyList_AsTupleAndClear(PyListObject *self)
+{
+ assert(self != NULL);
+ PyObject *ret;
+ if (self->ob_item == NULL) {
+ return PyTuple_New(0);
+ }
+ Py_BEGIN_CRITICAL_SECTION(self);
+ PyObject **items = self->ob_item;
+ Py_ssize_t size = Py_SIZE(self);
+ self->ob_item = NULL;
+ Py_SET_SIZE(self, 0);
+ ret = _PyTuple_FromArraySteal(items, size);
+ free_list_items(items, false);
+ Py_END_CRITICAL_SECTION();
+ return ret;
+}
+
+PyObject *
_PyList_FromStackRefSteal(const _PyStackRef *src, Py_ssize_t n)
{
if (n == 0) {