summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIonite <dev@ionite.io>2023-02-25 03:50:53 (GMT)
committerGitHub <noreply@github.com>2023-02-25 03:50:53 (GMT)
commit9f472f81bc2636eda212c12796b8d6581783bcef (patch)
tree55e89ad5142c112bc8ff0c63af7c5a62b5f6f1df
parent3e80d21b7673edf70753e14d88907c60bc6970c3 (diff)
downloadcpython-9f472f81bc2636eda212c12796b8d6581783bcef.zip
cpython-9f472f81bc2636eda212c12796b8d6581783bcef.tar.gz
cpython-9f472f81bc2636eda212c12796b8d6581783bcef.tar.bz2
[3.10] gh-101765: Fix SystemError / segmentation fault in iter `__reduce__` when internal access of `builtins.__dict__` exhausts the iterator (GH-101769) (#102229)
(cherry picked from commit 54dfa14c5a94b893b67a4d9e9e403ff538ce9023)
-rw-r--r--Lib/test/test_iter.py80
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst1
-rw-r--r--Objects/bytearrayobject.c11
-rw-r--r--Objects/bytesobject.c11
-rw-r--r--Objects/iterobject.c22
-rw-r--r--Objects/listobject.c18
-rw-r--r--Objects/tupleobject.c11
-rw-r--r--Objects/unicodeobject.c11
8 files changed, 141 insertions, 24 deletions
diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py
index 554f602..3130ec5 100644
--- a/Lib/test/test_iter.py
+++ b/Lib/test/test_iter.py
@@ -7,6 +7,9 @@ from test.support.os_helper import TESTFN, unlink
from test.support import check_free_after_iterating, ALWAYS_EQ, NEVER_EQ
import pickle
import collections.abc
+import functools
+import contextlib
+import builtins
# Test result of triple loop (too big to inline)
TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2),
@@ -81,6 +84,12 @@ class BadIterableClass:
def __iter__(self):
raise ZeroDivisionError
+class EmptyIterClass:
+ def __len__(self):
+ return 0
+ def __getitem__(self, i):
+ raise StopIteration
+
# Main test suite
class TestCase(unittest.TestCase):
@@ -228,6 +237,77 @@ class TestCase(unittest.TestCase):
self.assertEqual(list(empit), [5, 6])
self.assertEqual(list(a), [0, 1, 2, 3, 4, 5, 6])
+ def test_reduce_mutating_builtins_iter(self):
+ # This is a reproducer of issue #101765
+ # where iter `__reduce__` calls could lead to a segfault or SystemError
+ # depending on the order of C argument evaluation, which is undefined
+
+ # Backup builtins
+ builtins_dict = builtins.__dict__
+ orig = {"iter": iter, "reversed": reversed}
+
+ def run(builtin_name, item, sentinel=None):
+ it = iter(item) if sentinel is None else iter(item, sentinel)
+
+ class CustomStr:
+ def __init__(self, name, iterator):
+ self.name = name
+ self.iterator = iterator
+ def __hash__(self):
+ return hash(self.name)
+ def __eq__(self, other):
+ # Here we exhaust our iterator, possibly changing
+ # its `it_seq` pointer to NULL
+ # The `__reduce__` call should correctly get
+ # the pointers after this call
+ list(self.iterator)
+ return other == self.name
+
+ # del is required here
+ # to not prematurely call __eq__ from
+ # the hash collision with the old key
+ del builtins_dict[builtin_name]
+ builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name]
+
+ return it.__reduce__()
+
+ types = [
+ (EmptyIterClass(),),
+ (bytes(8),),
+ (bytearray(8),),
+ ((1, 2, 3),),
+ (lambda: 0, 0),
+ ]
+
+ try:
+ run_iter = functools.partial(run, "iter")
+ # The returned value of `__reduce__` should not only be valid
+ # but also *empty*, as `it` was exhausted during `__eq__`
+ # i.e "xyz" returns (iter, ("",))
+ self.assertEqual(run_iter("xyz"), (orig["iter"], ("",)))
+ self.assertEqual(run_iter([1, 2, 3]), (orig["iter"], ([],)))
+
+ # _PyEval_GetBuiltin is also called for `reversed` in a branch of
+ # listiter_reduce_general
+ self.assertEqual(
+ run("reversed", orig["reversed"](list(range(8)))),
+ (iter, ([],))
+ )
+
+ for case in types:
+ self.assertEqual(run_iter(*case), (orig["iter"], ((),)))
+ finally:
+ # Restore original builtins
+ for key, func in orig.items():
+ # need to suppress KeyErrors in case
+ # a failed test deletes the key without setting anything
+ with contextlib.suppress(KeyError):
+ # del is required here
+ # to not invoke our custom __eq__ from
+ # the hash collision with the old key
+ del builtins_dict[key]
+ builtins_dict[key] = func
+
# Test a new_style class with __iter__ but no next() method
def test_new_style_iter_class(self):
class IterClass(object):
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst
new file mode 100644
index 0000000..cc99779
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst
@@ -0,0 +1 @@
+Fix SystemError / segmentation fault in iter ``__reduce__`` when internal access of ``builtins.__dict__`` keys mutates the iter object.
diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c
index 2a675d5..c7fe8b2 100644
--- a/Objects/bytearrayobject.c
+++ b/Objects/bytearrayobject.c
@@ -2442,11 +2442,16 @@ static PyObject *
bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
+ PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
+
+ /* _PyEval_GetBuiltinId can invoke arbitrary code,
+ * call must be before access of iterator pointers.
+ * see issue #101765 */
+
if (it->it_seq != NULL) {
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
- it->it_seq, it->it_index);
+ return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
- return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
+ return Py_BuildValue("N(())", iter);
}
}
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 4bcb2eb..acbc26a 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3146,11 +3146,16 @@ static PyObject *
striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
+ PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
+
+ /* _PyEval_GetBuiltinId can invoke arbitrary code,
+ * call must be before access of iterator pointers.
+ * see issue #101765 */
+
if (it->it_seq != NULL) {
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
- it->it_seq, it->it_index);
+ return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
- return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
+ return Py_BuildValue("N(())", iter);
}
}
diff --git a/Objects/iterobject.c b/Objects/iterobject.c
index e493e41..980c04b 100644
--- a/Objects/iterobject.c
+++ b/Objects/iterobject.c
@@ -104,11 +104,16 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(
static PyObject *
iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored))
{
+ PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
+
+ /* _PyEval_GetBuiltinId can invoke arbitrary code,
+ * call must be before access of iterator pointers.
+ * see issue #101765 */
+
if (it->it_seq != NULL)
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
- it->it_seq, it->it_index);
+ return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
- return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
+ return Py_BuildValue("N(())", iter);
}
PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
@@ -243,11 +248,16 @@ calliter_iternext(calliterobject *it)
static PyObject *
calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored))
{
+ PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
+
+ /* _PyEval_GetBuiltinId can invoke arbitrary code,
+ * call must be before access of iterator pointers.
+ * see issue #101765 */
+
if (it->it_callable != NULL && it->it_sentinel != NULL)
- return Py_BuildValue("N(OO)", _PyEval_GetBuiltinId(&PyId_iter),
- it->it_callable, it->it_sentinel);
+ return Py_BuildValue("N(OO)", iter, it->it_callable, it->it_sentinel);
else
- return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
+ return Py_BuildValue("N(())", iter);
}
static PyMethodDef calliter_methods[] = {
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 484d374..f46d5e4 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -3405,17 +3405,23 @@ listiter_reduce_general(void *_it, int forward)
_Py_IDENTIFIER(reversed);
PyObject *list;
+ /* _PyEval_GetBuiltinId can invoke arbitrary code,
+ * call must be before access of iterator pointers.
+ * see issue #101765 */
+
/* the objects are not the same, index is of different types! */
if (forward) {
+ PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
listiterobject *it = (listiterobject *)_it;
- if (it->it_seq)
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
- it->it_seq, it->it_index);
+ if (it->it_seq) {
+ return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
+ }
} else {
+ PyObject *reversed = _PyEval_GetBuiltinId(&PyId_reversed);
listreviterobject *it = (listreviterobject *)_it;
- if (it->it_seq)
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_reversed),
- it->it_seq, it->it_index);
+ if (it->it_seq) {
+ return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index);
+ }
}
/* empty iterator, create an empty list */
list = PyList_New(0);
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index 6b1ab74..a401bf9 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -1115,11 +1115,16 @@ static PyObject *
tupleiter_reduce(tupleiterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
+ PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
+
+ /* _PyEval_GetBuiltinId can invoke arbitrary code,
+ * call must be before access of iterator pointers.
+ * see issue #101765 */
+
if (it->it_seq)
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
- it->it_seq, it->it_index);
+ return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
else
- return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter));
+ return Py_BuildValue("N(())", iter);
}
static PyObject *
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index b7ec1f2..5a1865f 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -16070,14 +16070,19 @@ static PyObject *
unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored))
{
_Py_IDENTIFIER(iter);
+ PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter);
+
+ /* _PyEval_GetBuiltinId can invoke arbitrary code,
+ * call must be before access of iterator pointers.
+ * see issue #101765 */
+
if (it->it_seq != NULL) {
- return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter),
- it->it_seq, it->it_index);
+ return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
} else {
PyObject *u = (PyObject *)_PyUnicode_New(0);
if (u == NULL)
return NULL;
- return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u);
+ return Py_BuildValue("N(N)", iter, u);
}
}