diff options
author | Ionite <dev@ionite.io> | 2023-02-25 03:50:53 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-25 03:50:53 (GMT) |
commit | 9f472f81bc2636eda212c12796b8d6581783bcef (patch) | |
tree | 55e89ad5142c112bc8ff0c63af7c5a62b5f6f1df | |
parent | 3e80d21b7673edf70753e14d88907c60bc6970c3 (diff) | |
download | cpython-9f472f81bc2636eda212c12796b8d6581783bcef.zip cpython-9f472f81bc2636eda212c12796b8d6581783bcef.tar.gz cpython-9f472f81bc2636eda212c12796b8d6581783bcef.tar.bz2 |
[3.10] gh-101765: Fix SystemError / segmentation fault in iter `__reduce__` when internal access of `builtins.__dict__` exhausts the iterator (GH-101769) (#102229)
(cherry picked from commit 54dfa14c5a94b893b67a4d9e9e403ff538ce9023)
-rw-r--r-- | Lib/test/test_iter.py | 80 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst | 1 | ||||
-rw-r--r-- | Objects/bytearrayobject.c | 11 | ||||
-rw-r--r-- | Objects/bytesobject.c | 11 | ||||
-rw-r--r-- | Objects/iterobject.c | 22 | ||||
-rw-r--r-- | Objects/listobject.c | 18 | ||||
-rw-r--r-- | Objects/tupleobject.c | 11 | ||||
-rw-r--r-- | Objects/unicodeobject.c | 11 |
8 files changed, 141 insertions, 24 deletions
diff --git a/Lib/test/test_iter.py b/Lib/test/test_iter.py index 554f602..3130ec5 100644 --- a/Lib/test/test_iter.py +++ b/Lib/test/test_iter.py @@ -7,6 +7,9 @@ from test.support.os_helper import TESTFN, unlink from test.support import check_free_after_iterating, ALWAYS_EQ, NEVER_EQ import pickle import collections.abc +import functools +import contextlib +import builtins # Test result of triple loop (too big to inline) TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2), @@ -81,6 +84,12 @@ class BadIterableClass: def __iter__(self): raise ZeroDivisionError +class EmptyIterClass: + def __len__(self): + return 0 + def __getitem__(self, i): + raise StopIteration + # Main test suite class TestCase(unittest.TestCase): @@ -228,6 +237,77 @@ class TestCase(unittest.TestCase): self.assertEqual(list(empit), [5, 6]) self.assertEqual(list(a), [0, 1, 2, 3, 4, 5, 6]) + def test_reduce_mutating_builtins_iter(self): + # This is a reproducer of issue #101765 + # where iter `__reduce__` calls could lead to a segfault or SystemError + # depending on the order of C argument evaluation, which is undefined + + # Backup builtins + builtins_dict = builtins.__dict__ + orig = {"iter": iter, "reversed": reversed} + + def run(builtin_name, item, sentinel=None): + it = iter(item) if sentinel is None else iter(item, sentinel) + + class CustomStr: + def __init__(self, name, iterator): + self.name = name + self.iterator = iterator + def __hash__(self): + return hash(self.name) + def __eq__(self, other): + # Here we exhaust our iterator, possibly changing + # its `it_seq` pointer to NULL + # The `__reduce__` call should correctly get + # the pointers after this call + list(self.iterator) + return other == self.name + + # del is required here + # to not prematurely call __eq__ from + # the hash collision with the old key + del builtins_dict[builtin_name] + builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name] + + return it.__reduce__() + + types = [ + (EmptyIterClass(),), + (bytes(8),), + (bytearray(8),), + ((1, 2, 3),), + (lambda: 0, 0), + ] + + try: + run_iter = functools.partial(run, "iter") + # The returned value of `__reduce__` should not only be valid + # but also *empty*, as `it` was exhausted during `__eq__` + # i.e "xyz" returns (iter, ("",)) + self.assertEqual(run_iter("xyz"), (orig["iter"], ("",))) + self.assertEqual(run_iter([1, 2, 3]), (orig["iter"], ([],))) + + # _PyEval_GetBuiltin is also called for `reversed` in a branch of + # listiter_reduce_general + self.assertEqual( + run("reversed", orig["reversed"](list(range(8)))), + (iter, ([],)) + ) + + for case in types: + self.assertEqual(run_iter(*case), (orig["iter"], ((),))) + finally: + # Restore original builtins + for key, func in orig.items(): + # need to suppress KeyErrors in case + # a failed test deletes the key without setting anything + with contextlib.suppress(KeyError): + # del is required here + # to not invoke our custom __eq__ from + # the hash collision with the old key + del builtins_dict[key] + builtins_dict[key] = func + # Test a new_style class with __iter__ but no next() method def test_new_style_iter_class(self): class IterClass(object): diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst new file mode 100644 index 0000000..cc99779 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-02-10-07-21-47.gh-issue-101765.MO5LlC.rst @@ -0,0 +1 @@ +Fix SystemError / segmentation fault in iter ``__reduce__`` when internal access of ``builtins.__dict__`` keys mutates the iter object. diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 2a675d5..c7fe8b2 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -2442,11 +2442,16 @@ static PyObject * bytearrayiter_reduce(bytesiterobject *it, PyObject *Py_UNUSED(ignored)) { _Py_IDENTIFIER(iter); + PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter); + + /* _PyEval_GetBuiltinId can invoke arbitrary code, + * call must be before access of iterator pointers. + * see issue #101765 */ + if (it->it_seq != NULL) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } else { - return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter)); + return Py_BuildValue("N(())", iter); } } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 4bcb2eb..acbc26a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -3146,11 +3146,16 @@ static PyObject * striter_reduce(striterobject *it, PyObject *Py_UNUSED(ignored)) { _Py_IDENTIFIER(iter); + PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter); + + /* _PyEval_GetBuiltinId can invoke arbitrary code, + * call must be before access of iterator pointers. + * see issue #101765 */ + if (it->it_seq != NULL) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } else { - return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter)); + return Py_BuildValue("N(())", iter); } } diff --git a/Objects/iterobject.c b/Objects/iterobject.c index e493e41..980c04b 100644 --- a/Objects/iterobject.c +++ b/Objects/iterobject.c @@ -104,11 +104,16 @@ PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list( static PyObject * iter_reduce(seqiterobject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter); + + /* _PyEval_GetBuiltinId can invoke arbitrary code, + * call must be before access of iterator pointers. + * see issue #101765 */ + if (it->it_seq != NULL) - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); else - return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter)); + return Py_BuildValue("N(())", iter); } PyDoc_STRVAR(reduce_doc, "Return state information for pickling."); @@ -243,11 +248,16 @@ calliter_iternext(calliterobject *it) static PyObject * calliter_reduce(calliterobject *it, PyObject *Py_UNUSED(ignored)) { + PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter); + + /* _PyEval_GetBuiltinId can invoke arbitrary code, + * call must be before access of iterator pointers. + * see issue #101765 */ + if (it->it_callable != NULL && it->it_sentinel != NULL) - return Py_BuildValue("N(OO)", _PyEval_GetBuiltinId(&PyId_iter), - it->it_callable, it->it_sentinel); + return Py_BuildValue("N(OO)", iter, it->it_callable, it->it_sentinel); else - return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter)); + return Py_BuildValue("N(())", iter); } static PyMethodDef calliter_methods[] = { diff --git a/Objects/listobject.c b/Objects/listobject.c index 484d374..f46d5e4 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3405,17 +3405,23 @@ listiter_reduce_general(void *_it, int forward) _Py_IDENTIFIER(reversed); PyObject *list; + /* _PyEval_GetBuiltinId can invoke arbitrary code, + * call must be before access of iterator pointers. + * see issue #101765 */ + /* the objects are not the same, index is of different types! */ if (forward) { + PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter); listiterobject *it = (listiterobject *)_it; - if (it->it_seq) - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), - it->it_seq, it->it_index); + if (it->it_seq) { + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); + } } else { + PyObject *reversed = _PyEval_GetBuiltinId(&PyId_reversed); listreviterobject *it = (listreviterobject *)_it; - if (it->it_seq) - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_reversed), - it->it_seq, it->it_index); + if (it->it_seq) { + return Py_BuildValue("N(O)n", reversed, it->it_seq, it->it_index); + } } /* empty iterator, create an empty list */ list = PyList_New(0); diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 6b1ab74..a401bf9 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1115,11 +1115,16 @@ static PyObject * tupleiter_reduce(tupleiterobject *it, PyObject *Py_UNUSED(ignored)) { _Py_IDENTIFIER(iter); + PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter); + + /* _PyEval_GetBuiltinId can invoke arbitrary code, + * call must be before access of iterator pointers. + * see issue #101765 */ + if (it->it_seq) - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); else - return Py_BuildValue("N(())", _PyEval_GetBuiltinId(&PyId_iter)); + return Py_BuildValue("N(())", iter); } static PyObject * diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b7ec1f2..5a1865f 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16070,14 +16070,19 @@ static PyObject * unicodeiter_reduce(unicodeiterobject *it, PyObject *Py_UNUSED(ignored)) { _Py_IDENTIFIER(iter); + PyObject *iter = _PyEval_GetBuiltinId(&PyId_iter); + + /* _PyEval_GetBuiltinId can invoke arbitrary code, + * call must be before access of iterator pointers. + * see issue #101765 */ + if (it->it_seq != NULL) { - return Py_BuildValue("N(O)n", _PyEval_GetBuiltinId(&PyId_iter), - it->it_seq, it->it_index); + return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index); } else { PyObject *u = (PyObject *)_PyUnicode_New(0); if (u == NULL) return NULL; - return Py_BuildValue("N(N)", _PyEval_GetBuiltinId(&PyId_iter), u); + return Py_BuildValue("N(N)", iter, u); } } |