bpo-42536: GC track recycled tuples (GH-23623)

Several built-in and standard library types now ensure that their internal result tuples are always tracked by the garbage collector: - collections.OrderedDict.items - dict.items - enumerate - functools.reduce - itertools.combinations - itertools.combinations_with_replacement - itertools.permutations - itertools.product - itertools.zip_longest - zip Previously, they could have become untracked by a prior garbage collection.
author: Brandt Bucher <brandtbucher@gmail.com> 2020-12-05 03:45:57 (GMT)
committer: GitHub <noreply@github.com> 2020-12-05 03:45:57 (GMT)
commit: 226a012d1cd61f42ecd3056c554922f359a1a35d (patch)
tree: 86407049a5d2c22b0ce8626407ca106eb3c26afd /Lib
parent: 2de5097ba4c50eba90df55696a7b2e74c93834d4 (diff)
download: cpython-226a012d1cd61f42ecd3056c554922f359a1a35d.zip
cpython-226a012d1cd61f42ecd3056c554922f359a1a35d.tar.gz
cpython-226a012d1cd61f42ecd3056c554922f359a1a35d.tar.bz2
5 files changed, 103 insertions, 0 deletions
diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py
index edb4ec0..8c95737 100644
--- a/Lib/test/test_builtin.py
+++ b/Lib/test/test_builtin.py
@@ -6,6 +6,7 @@ import builtins
 import collections
 import decimal
 import fractions
+import gc
 import io
 import locale
 import os
@@ -1756,6 +1757,18 @@ class BuiltinTest(unittest.TestCase):
         l8 = self.iter_error(zip(Iter(3), "AB", strict=True), ValueError)
         self.assertEqual(l8, [(2, "A"), (1, "B")])
 
+    @support.cpython_only
+    def test_zip_result_gc(self):
+        # bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions
+        # about what can be untracked. Make sure we re-track result tuples
+        # whenever we reuse them.
+        it = zip([[]])
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None,). Make sure it's re-tracked when
+        # it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
     def test_format(self):
         # Test the basic machinery of the format() builtin.  Don't test
         #  the specifics of the various formatters
diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py
index 9ff8b7d..4b31cdc 100644
--- a/Lib/test/test_dict.py
+++ b/Lib/test/test_dict.py
@@ -1452,6 +1452,25 @@ class DictTest(unittest.TestCase):
         d = CustomReversedDict(pairs)
         self.assertEqual(pairs[::-1], list(dict(d).items()))
 
+    @support.cpython_only
+    def test_dict_items_result_gc(self):
+        # bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = iter({None: []}.items())
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None, None). Make sure it's re-tracked
+        # when it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_dict_items_result_gc(self):
+        # Same as test_dict_items_result_gc above, but reversed.
+        it = reversed({None: []}.items())
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
 
 class CAPITest(unittest.TestCase):
 
diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py
index 5785cb4..906bfc2 100644
--- a/Lib/test/test_enumerate.py
+++ b/Lib/test/test_enumerate.py
@@ -2,6 +2,7 @@ import unittest
 import operator
 import sys
 import pickle
+import gc
 
 from test import support
 
@@ -134,6 +135,18 @@ class EnumerateTestCase(unittest.TestCase, PickleTest):
         self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq))
         self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq)))
 
+    @support.cpython_only
+    def test_enumerate_result_gc(self):
+        # bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = self.enum([[]])
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None, None). Make sure it's re-tracked
+        # when it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
 class MyEnum(enumerate):
     pass
 
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index df2997e..a99b5e2 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -12,6 +12,8 @@ from functools import reduce
 import sys
 import struct
 import threading
+import gc
+
 maxsize = support.MAX_Py_ssize_t
 minsize = -maxsize-1
 
@@ -1573,6 +1575,51 @@ class TestBasicOps(unittest.TestCase):
             self.assertRaises(StopIteration, next, f(lambda x:x, []))
             self.assertRaises(StopIteration, next, f(lambda x:x, StopNow()))
 
+    @support.cpython_only
+    def test_combinations_result_gc(self):
+        # bpo-42536: combinations's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = combinations([None, []], 1)
+        next(it)
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which has the value (None,). Make sure it's re-tracked when
+        # it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_combinations_with_replacement_result_gc(self):
+        # Ditto for combinations_with_replacement.
+        it = combinations_with_replacement([None, []], 1)
+        next(it)
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_permutations_result_gc(self):
+        # Ditto for permutations.
+        it = permutations([None, []], 1)
+        next(it)
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_product_result_gc(self):
+        # Ditto for product.
+        it = product([None, []])
+        next(it)
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+    @support.cpython_only
+    def test_zip_longest_result_gc(self):
+        # Ditto for zip_longest.
+        it = zip_longest([[]])
+        gc.collect()
+        self.assertTrue(gc.is_tracked(next(it)))
+
+
 class TestExamples(unittest.TestCase):
 
     def test_accumulate(self):
diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py
index 31759f2..eb40446 100644
--- a/Lib/test/test_ordered_dict.py
+++ b/Lib/test/test_ordered_dict.py
@@ -700,6 +700,17 @@ class OrderedDictTests:
         with self.assertRaises(ValueError):
             a |= "BAD"
 
+    @support.cpython_only
+    def test_ordered_dict_items_result_gc(self):
+        # bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's
+        # assumptions about what can be untracked. Make sure we re-track result
+        # tuples whenever we reuse them.
+        it = iter(self.OrderedDict({None: []}).items())
+        gc.collect()
+        # That GC collection probably untracked the recycled internal result
+        # tuple, which is initialized to (None, None). Make sure it's re-tracked
+        # when it's mutated and returned from __next__:
+        self.assertTrue(gc.is_tracked(next(it)))
 
 class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
author	Brandt Bucher <brandtbucher@gmail.com>	2020-12-05 03:45:57 (GMT)
committer	GitHub <noreply@github.com>	2020-12-05 03:45:57 (GMT)
commit	226a012d1cd61f42ecd3056c554922f359a1a35d (patch)
tree	86407049a5d2c22b0ce8626407ca106eb3c26afd /Lib
parent	2de5097ba4c50eba90df55696a7b2e74c93834d4 (diff)
download	cpython-226a012d1cd61f42ecd3056c554922f359a1a35d.zip cpython-226a012d1cd61f42ecd3056c554922f359a1a35d.tar.gz cpython-226a012d1cd61f42ecd3056c554922f359a1a35d.tar.bz2