From 142502ea8d26b17732009b6e981e630c342054f7 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Tue, 20 Feb 2024 12:24:35 -0800 Subject: Tier 2 cleanups and tweaks (#115534) * Rename `_testinternalcapi.get_{uop,counter}_optimizer` to `new_*_optimizer` * Use `_PyUOpName()` instead of` _PyOpcode_uop_name[]` * Add `target` to executor iterator items -- `list(ex)` now returns `(opcode, oparg, target, operand)` quadruples * Add executor methods `get_opcode()` and `get_oparg()` to get `vmdata.opcode`, `vmdata.oparg` * Define a helper for printing uops, and unify various places where they are printed * Add a hack to summarize_stats.py to fix legacy uop names (e.g. `POP_TOP` -> `_POP_TOP`) * Define helpers in `test_opt.py` for accessing the set or list of opnames of an executor --- Lib/test/test_capi/test_opt.py | 157 +++++++++++++++++++++------------------ Lib/test/test_monitoring.py | 2 +- Modules/_testinternalcapi.c | 8 +- Python/ceval.c | 61 +++++++++------ Python/optimizer.c | 77 +++++++++++++++---- Python/optimizer_analysis.c | 3 +- Python/specialize.c | 12 +-- Tools/scripts/summarize_stats.py | 4 + 8 files changed, 203 insertions(+), 121 deletions(-) diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index d53d6a0..38c6fa4 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -32,16 +32,16 @@ def clear_executors(func): class TestOptimizerAPI(unittest.TestCase): - def test_get_counter_optimizer_dealloc(self): + def test_new_counter_optimizer_dealloc(self): # See gh-108727 def f(): - _testinternalcapi.get_counter_optimizer() + _testinternalcapi.new_counter_optimizer() f() def test_get_set_optimizer(self): old = _testinternalcapi.get_optimizer() - opt = _testinternalcapi.get_counter_optimizer() + opt = _testinternalcapi.new_counter_optimizer() try: _testinternalcapi.set_optimizer(opt) self.assertEqual(_testinternalcapi.get_optimizer(), opt) @@ -62,7 +62,7 @@ class TestOptimizerAPI(unittest.TestCase): loop = ns['loop'] for repeat in range(5): - opt = _testinternalcapi.get_counter_optimizer() + opt = _testinternalcapi.new_counter_optimizer() with temporary_optimizer(opt): self.assertEqual(opt.get_count(), 0) with clear_executors(loop): @@ -90,7 +90,7 @@ class TestOptimizerAPI(unittest.TestCase): """), ns, ns) long_loop = ns['long_loop'] - opt = _testinternalcapi.get_counter_optimizer() + opt = _testinternalcapi.new_counter_optimizer() with temporary_optimizer(opt): self.assertEqual(opt.get_count(), 0) long_loop() @@ -102,7 +102,7 @@ class TestOptimizerAPI(unittest.TestCase): while i < x: i += 1 - opt = _testinternalcapi.get_counter_optimizer() + opt = _testinternalcapi.new_counter_optimizer() with temporary_optimizer(opt): testfunc(1000) code, replace_code = testfunc.__code__, testfunc.__code__.replace() @@ -123,11 +123,20 @@ def get_first_executor(func): return None +def iter_opnames(ex): + for item in ex: + yield item[0] + + +def get_opnames(ex): + return set(iter_opnames(ex)) + + class TestExecutorInvalidation(unittest.TestCase): def setUp(self): self.old = _testinternalcapi.get_optimizer() - self.opt = _testinternalcapi.get_counter_optimizer() + self.opt = _testinternalcapi.new_counter_optimizer() _testinternalcapi.set_optimizer(self.opt) def tearDown(self): @@ -176,7 +185,7 @@ class TestExecutorInvalidation(unittest.TestCase): pass """), ns, ns) f = ns['f'] - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): f() exe = get_first_executor(f) @@ -189,7 +198,7 @@ class TestExecutorInvalidation(unittest.TestCase): def f(): for _ in range(1000): pass - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): f() exe = get_first_executor(f) @@ -208,13 +217,13 @@ class TestUops(unittest.TestCase): while i < x: i += 1 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(1000) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_SET_IP", uops) self.assertIn("_LOAD_FAST_0", uops) @@ -255,7 +264,7 @@ class TestUops(unittest.TestCase): """), ns, ns) many_vars = ns["many_vars"] - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): ex = get_first_executor(many_vars) self.assertIsNone(ex) @@ -263,7 +272,8 @@ class TestUops(unittest.TestCase): ex = get_first_executor(many_vars) self.assertIsNotNone(ex) - self.assertIn(("_LOAD_FAST", 259, 0), list(ex)) + self.assertTrue(any((opcode, oparg, operand) == ("_LOAD_FAST", 259, 0) + for opcode, oparg, _, operand in list(ex))) def test_unspecialized_unpack(self): # An example of an unspecialized opcode @@ -277,14 +287,14 @@ class TestUops(unittest.TestCase): while i < x: i += 1 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(20) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_UNPACK_SEQUENCE", uops) def test_pop_jump_if_false(self): @@ -293,13 +303,13 @@ class TestUops(unittest.TestCase): while i < n: i += 1 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(20) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_IS_TRUE_POP", uops) def test_pop_jump_if_none(self): @@ -308,13 +318,13 @@ class TestUops(unittest.TestCase): if x is None: x = 0 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(range(20)) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_IS_NOT_NONE_POP", uops) def test_pop_jump_if_not_none(self): @@ -324,13 +334,13 @@ class TestUops(unittest.TestCase): if x is not None: x = 0 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(range(20)) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_IS_NONE_POP", uops) def test_pop_jump_if_true(self): @@ -339,13 +349,13 @@ class TestUops(unittest.TestCase): while not i >= n: i += 1 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(20) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_IS_FALSE_POP", uops) def test_jump_backward(self): @@ -354,13 +364,13 @@ class TestUops(unittest.TestCase): while i < n: i += 1 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(20) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_JUMP_TO_TOP", uops) def test_jump_forward(self): @@ -374,13 +384,13 @@ class TestUops(unittest.TestCase): a += 1 return a - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(20) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) # Since there is no JUMP_FORWARD instruction, # look for indirect evidence: the += operator self.assertIn("_BINARY_OP_ADD_INT", uops) @@ -392,7 +402,7 @@ class TestUops(unittest.TestCase): total += i return total - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): total = testfunc(20) self.assertEqual(total, 190) @@ -401,7 +411,7 @@ class TestUops(unittest.TestCase): self.assertIsNotNone(ex) # for i, (opname, oparg) in enumerate(ex): # print(f"{i:4d}: {opname:<20s} {oparg:3d}") - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_NOT_EXHAUSTED_RANGE", uops) # Verification that the jump goes past END_FOR # is done by manual inspection of the output @@ -413,7 +423,7 @@ class TestUops(unittest.TestCase): total += i return total - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): a = list(range(20)) total = testfunc(a) @@ -423,7 +433,7 @@ class TestUops(unittest.TestCase): self.assertIsNotNone(ex) # for i, (opname, oparg) in enumerate(ex): # print(f"{i:4d}: {opname:<20s} {oparg:3d}") - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_NOT_EXHAUSTED_LIST", uops) # Verification that the jump goes past END_FOR # is done by manual inspection of the output @@ -435,7 +445,7 @@ class TestUops(unittest.TestCase): total += i return total - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): a = tuple(range(20)) total = testfunc(a) @@ -445,7 +455,7 @@ class TestUops(unittest.TestCase): self.assertIsNotNone(ex) # for i, (opname, oparg) in enumerate(ex): # print(f"{i:4d}: {opname:<20s} {oparg:3d}") - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_NOT_EXHAUSTED_TUPLE", uops) # Verification that the jump goes past END_FOR # is done by manual inspection of the output @@ -455,7 +465,7 @@ class TestUops(unittest.TestCase): for x in it: pass - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): a = [1, 2, 3] it = iter(a) @@ -471,13 +481,13 @@ class TestUops(unittest.TestCase): for i in range(n): dummy(i) - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(20) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_PUSH_FRAME", uops) self.assertIn("_BINARY_OP_ADD_INT", uops) @@ -489,13 +499,13 @@ class TestUops(unittest.TestCase): else: i = 1 - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): testfunc(20) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_IS_FALSE_POP", uops) def test_for_iter_tier_two(self): @@ -517,7 +527,7 @@ class TestUops(unittest.TestCase): x += 1000*i + j return x - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): x = testfunc(10, 10) @@ -525,7 +535,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_FOR_ITER_TIER_TWO", uops) def test_confidence_score(self): @@ -546,14 +556,14 @@ class TestUops(unittest.TestCase): bits += 1 return bits - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): x = testfunc(20) self.assertEqual(x, 40) ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - ops = [opname for opname, _, _ in ex] + ops = list(iter_opnames(ex)) #Since branch is 50/50 the trace could go either way. count = ops.count("_GUARD_IS_TRUE_POP") + ops.count("_GUARD_IS_FALSE_POP") self.assertLessEqual(count, 2) @@ -562,7 +572,7 @@ class TestUopsOptimization(unittest.TestCase): def _run_with_optimizer(self, testfunc, arg): res = None - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() with temporary_optimizer(opt): res = testfunc(arg) @@ -582,8 +592,8 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertIsNotNone(ex) self.assertEqual(res, 63) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -598,7 +608,7 @@ class TestUopsOptimization(unittest.TestCase): num += 1 return a - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() res = None with temporary_optimizer(opt): res = testfunc(32) @@ -606,8 +616,8 @@ class TestUopsOptimization(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -622,7 +632,7 @@ class TestUopsOptimization(unittest.TestCase): num += 1 return x - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() res = None with temporary_optimizer(opt): res = testfunc(32) @@ -630,8 +640,8 @@ class TestUopsOptimization(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) self.assertEqual(res, 124) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] - guard_both_int_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertGreaterEqual(len(binop_count), 3) self.assertLessEqual(len(guard_both_int_count), 1) @@ -648,7 +658,7 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 64) self.assertIsNotNone(ex) - binop_count = [opname for opname, _, _ in ex if opname == "_BINARY_OP_ADD_INT"] + binop_count = [opname for opname in iter_opnames(ex) if opname == "_BINARY_OP_ADD_INT"] self.assertGreaterEqual(len(binop_count), 3) def test_call_py_exact_args(self): @@ -660,7 +670,7 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_PUSH_FRAME", uops) self.assertIn("_BINARY_OP_ADD_INT", uops) self.assertNotIn("_CHECK_PEP_523", uops) @@ -675,7 +685,7 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertEqual(res, 62) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertNotIn("_GUARD_BOTH_INT", uops) def test_int_value_numbering(self): @@ -693,9 +703,9 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertEqual(res, 4) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertIn("_GUARD_BOTH_INT", uops) - guard_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + guard_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertEqual(len(guard_count), 1) def test_comprehension(self): @@ -706,7 +716,7 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertEqual(res, list(range(32))) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) self.assertNotIn("_BINARY_OP_ADD_INT", uops) def test_call_py_exact_args_disappearing(self): @@ -717,7 +727,7 @@ class TestUopsOptimization(unittest.TestCase): for i in range(n): dummy(i) - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() # Trigger specialization testfunc(8) with temporary_optimizer(opt): @@ -752,18 +762,21 @@ class TestUopsOptimization(unittest.TestCase): pass return None + def get_opnames(ex): + return {item[0] for item in ex} + def testfunc(n): for i in range(n): x = range(i) return x - opt = _testinternalcapi.get_uop_optimizer() + opt = _testinternalcapi.new_uop_optimizer() _testinternalcapi.set_optimizer(opt) testfunc(64) ex = get_first_executor(testfunc) assert ex is not None - uops = {opname for opname, _, _ in ex} + uops = get_opnames(ex) assert "_LOAD_GLOBAL_BUILTINS" not in uops assert "_LOAD_CONST_INLINE_BORROW_WITH_NULL" in uops """)) @@ -779,8 +792,8 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertAlmostEqual(res, 4.2) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - guard_both_float_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_FLOAT"] + uops = get_opnames(ex) + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -796,8 +809,8 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertAlmostEqual(res, -2.2) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - guard_both_float_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_FLOAT"] + uops = get_opnames(ex) + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -813,8 +826,8 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertAlmostEqual(res, 2 ** 32) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - guard_both_float_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_FLOAT"] + uops = get_opnames(ex) + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) # TODO gh-115506: this assertion may change after propagating constants. # We'll also need to verify that propagation actually occurs. @@ -833,8 +846,8 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertTrue(res) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - guard_both_float_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_FLOAT"] + uops = get_opnames(ex) + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_FLOAT", uops) @@ -851,8 +864,8 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertTrue(res) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - guard_both_float_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_INT"] + uops = get_opnames(ex) + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_INT", uops) @@ -869,8 +882,8 @@ class TestUopsOptimization(unittest.TestCase): res, ex = self._run_with_optimizer(testfunc, 32) self.assertTrue(res) self.assertIsNotNone(ex) - uops = {opname for opname, _, _ in ex} - guard_both_float_count = [opname for opname, _, _ in ex if opname == "_GUARD_BOTH_UNICODE"] + uops = get_opnames(ex) + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_UNICODE"] self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_STR", uops) diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py index 60b6326..58aa4bc 100644 --- a/Lib/test/test_monitoring.py +++ b/Lib/test/test_monitoring.py @@ -1799,7 +1799,7 @@ class TestOptimizer(MonitoringTestBase, unittest.TestCase): def setUp(self): import _testinternalcapi self.old_opt = _testinternalcapi.get_optimizer() - opt = _testinternalcapi.get_counter_optimizer() + opt = _testinternalcapi.new_counter_optimizer() _testinternalcapi.set_optimizer(opt) super(TestOptimizer, self).setUp() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index bcc431a..0d23b18 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -960,13 +960,13 @@ iframe_getlasti(PyObject *self, PyObject *frame) } static PyObject * -get_counter_optimizer(PyObject *self, PyObject *arg) +new_counter_optimizer(PyObject *self, PyObject *arg) { return PyUnstable_Optimizer_NewCounter(); } static PyObject * -get_uop_optimizer(PyObject *self, PyObject *arg) +new_uop_optimizer(PyObject *self, PyObject *arg) { return PyUnstable_Optimizer_NewUOpOptimizer(); } @@ -1711,8 +1711,8 @@ static PyMethodDef module_functions[] = { {"get_optimizer", get_optimizer, METH_NOARGS, NULL}, {"set_optimizer", set_optimizer, METH_O, NULL}, {"get_executor", _PyCFunction_CAST(get_executor), METH_FASTCALL, NULL}, - {"get_counter_optimizer", get_counter_optimizer, METH_NOARGS, NULL}, - {"get_uop_optimizer", get_uop_optimizer, METH_NOARGS, NULL}, + {"new_counter_optimizer", new_counter_optimizer, METH_NOARGS, NULL}, + {"new_uop_optimizer", new_uop_optimizer, METH_NOARGS, NULL}, {"add_executor_dependency", add_executor_dependency, METH_VARARGS, NULL}, {"invalidate_executors", invalidate_executors, METH_O, NULL}, {"pending_threadfunc", _PyCFunction_CAST(pending_threadfunc), diff --git a/Python/ceval.c b/Python/ceval.c index b7a5d62..06c136a 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -649,7 +649,10 @@ static const _Py_CODEUNIT _Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS[] = { extern const struct _PyCode_DEF(8) _Py_InitCleanup; -extern const char *_PyUOpName(int index); +#ifdef Py_DEBUG +extern void _PyUOpPrint(const _PyUOpInstruction *uop); +#endif + /* Disable unused label warnings. They are handy for debugging, even if computed gotos aren't used. */ @@ -1006,14 +1009,14 @@ enter_tier_two: assert(next_uop->opcode == _START_EXECUTOR || next_uop->opcode == _COLD_EXIT); for (;;) { uopcode = next_uop->opcode; - DPRINTF(3, - "%4d: uop %s, oparg %d, operand %" PRIu64 ", target %d, stack_level %d\n", - (int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace)), - _PyUOpName(uopcode), - next_uop->oparg, - next_uop->operand, - next_uop->target, +#ifdef Py_DEBUG + if (lltrace >= 3) { + printf("%4d uop: ", (int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace))); + _PyUOpPrint(next_uop); + printf(" stack_level=%d\n", (int)(stack_pointer - _PyFrame_Stackbase(frame))); + } +#endif next_uop++; OPT_STAT_INC(uops_executed); UOP_STAT_INC(uopcode, execution_count); @@ -1028,9 +1031,9 @@ enter_tier_two: default: #ifdef Py_DEBUG { - fprintf(stderr, "Unknown uop %d, oparg %d, operand %" PRIu64 " @ %d\n", - next_uop[-1].opcode, next_uop[-1].oparg, next_uop[-1].operand, - (int)(next_uop - (current_executor == NULL ? next_uop : current_executor->trace) - 1)); + printf("Unknown uop: "); + _PyUOpPrint(&next_uop[-1]); + printf(" @ %d\n", (int)(next_uop - current_executor->trace - 1)); Py_FatalError("Unknown uop"); } #else @@ -1058,10 +1061,15 @@ pop_2_error_tier_two: pop_1_error_tier_two: STACK_SHRINK(1); error_tier_two: - DPRINTF(2, "Error: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d @ %d -> %s]\n", - uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target, - (int)(next_uop - current_executor->trace - 1), - _PyOpcode_OpName[frame->instr_ptr->op.code]); +#ifdef Py_DEBUG + if (lltrace >= 2) { + printf("Error: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(" @ %d -> %s]\n", + (int)(next_uop - current_executor->trace - 1), + _PyOpcode_OpName[frame->instr_ptr->op.code]); + } +#endif OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); frame->return_offset = 0; // Don't leave this random _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1072,9 +1080,14 @@ error_tier_two: // Jump here from DEOPT_IF() deoptimize: next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); - DPRINTF(2, "DEOPT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", target %d -> %s]\n", - uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, next_uop[-1].target, - _PyOpcode_OpName[next_instr->op.code]); +#ifdef Py_DEBUG + if (lltrace >= 2) { + printf("DEOPT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(" -> %s]\n", + _PyOpcode_OpName[frame->instr_ptr->op.code]); + } +#endif OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); UOP_STAT_INC(uopcode, miss); Py_DECREF(current_executor); @@ -1088,9 +1101,15 @@ side_exit: uint32_t exit_index = next_uop[-1].exit_index; assert(exit_index < current_executor->exit_count); _PyExitData *exit = ¤t_executor->exits[exit_index]; - DPRINTF(2, "SIDE EXIT: [UOp %d (%s), oparg %d, operand %" PRIu64 ", exit %u, temp %d, target %d -> %s]\n", - uopcode, _PyUOpName(uopcode), next_uop[-1].oparg, next_uop[-1].operand, exit_index, exit->temperature, - exit->target, _PyOpcode_OpName[_PyCode_CODE(_PyFrame_GetCode(frame))[exit->target].op.code]); +#ifdef Py_DEBUG + if (lltrace >= 2) { + printf("SIDE EXIT: [UOp "); + _PyUOpPrint(&next_uop[-1]); + printf(", exit %u, temp %d, target %d -> %s]\n", + exit_index, exit->temperature, exit->target, + _PyOpcode_OpName[frame->instr_ptr->op.code]); + } +#endif Py_INCREF(exit->executor); tstate->previous_executor = (PyObject *)current_executor; GOTO_TIER_TWO(exit->executor); diff --git a/Python/optimizer.c b/Python/optimizer.c index df8f0ed..74708be 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -262,8 +262,22 @@ is_valid(PyObject *self, PyObject *Py_UNUSED(ignored)) return PyBool_FromLong(((_PyExecutorObject *)self)->vm_data.valid); } +static PyObject * +get_opcode(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.opcode); +} + +static PyObject * +get_oparg(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromUnsignedLong(((_PyExecutorObject *)self)->vm_data.oparg); +} + static PyMethodDef executor_methods[] = { { "is_valid", is_valid, METH_NOARGS, NULL }, + { "get_opcode", get_opcode, METH_NOARGS, NULL }, + { "get_oparg", get_oparg, METH_NOARGS, NULL }, { NULL, NULL }, }; @@ -282,9 +296,30 @@ uop_dealloc(_PyExecutorObject *self) { const char * _PyUOpName(int index) { + if (index < 0 || index > MAX_UOP_ID) { + return NULL; + } return _PyOpcode_uop_name[index]; } +#ifdef Py_DEBUG +void +_PyUOpPrint(const _PyUOpInstruction *uop) +{ + const char *name = _PyUOpName(uop->opcode); + if (name == NULL) { + printf("", uop->opcode); + } + else { + printf("%s", name); + } + printf(" (%d, target=%d, operand=%" PRIx64 ")", + uop->oparg, + uop->target, + (uint64_t)uop->operand); +} +#endif + static Py_ssize_t uop_len(_PyExecutorObject *self) { @@ -312,14 +347,21 @@ uop_item(_PyExecutorObject *self, Py_ssize_t index) Py_DECREF(oname); return NULL; } + PyObject *target = PyLong_FromUnsignedLong(self->trace[index].target); + if (oparg == NULL) { + Py_DECREF(oparg); + Py_DECREF(oname); + return NULL; + } PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand); if (operand == NULL) { + Py_DECREF(target); Py_DECREF(oparg); Py_DECREF(oname); return NULL; } - PyObject *args[3] = { oname, oparg, operand }; - return _PyTuple_FromArraySteal(args, 3); + PyObject *args[4] = { oname, oparg, target, operand }; + return _PyTuple_FromArraySteal(args, 4); } PySequenceMethods uop_as_sequence = { @@ -390,19 +432,29 @@ BRANCH_TO_GUARD[4][2] = { #endif +// Beware: Macro arg order differs from struct member order +#ifdef Py_DEBUG #define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ - DPRINTF(2, \ - " ADD_TO_TRACE(%s, %d, %" PRIu64 ", %d)\n", \ - _PyUOpName(OPCODE), \ - (OPARG), \ - (uint64_t)(OPERAND), \ - TARGET); \ assert(trace_length < max_length); \ trace[trace_length].opcode = (OPCODE); \ trace[trace_length].oparg = (OPARG); \ + trace[trace_length].target = (TARGET); \ trace[trace_length].operand = (OPERAND); \ + if (lltrace >= 2) { \ + printf("%4d ADD_TO_TRACE: ", trace_length); \ + _PyUOpPrint(&trace[trace_length]); \ + printf("\n"); \ + } \ + trace_length++; +#else +#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND, TARGET) \ + assert(trace_length < max_length); \ + trace[trace_length].opcode = (OPCODE); \ + trace[trace_length].oparg = (OPARG); \ trace[trace_length].target = (TARGET); \ + trace[trace_length].operand = (OPERAND); \ trace_length++; +#endif #define INSTR_IP(INSTR, CODE) \ ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) @@ -890,12 +942,9 @@ make_executor_from_uops(_PyUOpInstruction *buffer, const _PyBloomFilter *depende if (lltrace >= 2) { printf("Optimized executor (length %d):\n", length); for (int i = 0; i < length; i++) { - printf("%4d %s(%d, %d, %" PRIu64 ")\n", - i, - _PyUOpName(executor->trace[i].opcode), - executor->trace[i].oparg, - executor->trace[i].target, - executor->trace[i].operand); + printf("%4d OPTIMIZED: ", i); + _PyUOpPrint(&executor->trace[i]); + printf("\n"); } } #endif diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index b104d2f..e7fb1e3 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -44,6 +44,7 @@ #define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) #ifdef Py_DEBUG + extern const char *_PyUOpName(int index); static const char *const DEBUG_ENV = "PYTHON_OPT_DEBUG"; static inline int get_lltrace(void) { char *uop_debug = Py_GETENV(DEBUG_ENV); @@ -632,7 +633,7 @@ uop_redundancy_eliminator( _Py_UOpsSymType **stack_pointer = ctx->frame->stack_pointer; DPRINTF(3, "Abstract interpreting %s:%d ", - _PyOpcode_uop_name[opcode], + _PyUOpName(opcode), oparg); switch (opcode) { #include "tier2_redundancy_eliminator_cases.c.h" diff --git a/Python/specialize.c b/Python/specialize.c index 2256d79..871979d 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -17,6 +17,7 @@ #include // rand() +extern const char *_PyUOpName(int index); /* For guidance on adding or extending families of instructions see * ./adaptive.md @@ -246,17 +247,12 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) stats->optimizer_failure_reason_no_memory); const char* const* names; - for (int i = 0; i < 512; i++) { - if (i < 256) { - names = _PyOpcode_OpName; - } else { - names = _PyOpcode_uop_name; - } + for (int i = 0; i <= MAX_UOP_ID; i++) { if (stats->opcode[i].execution_count) { - fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", names[i], stats->opcode[i].execution_count); + fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count); } if (stats->opcode[i].miss) { - fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", names[i], stats->opcode[i].miss); + fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss); } } diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 5bc39fc..6b60b59 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -102,6 +102,10 @@ def load_raw_data(input: Path) -> RawData: file=sys.stderr, ) continue + # Hack to handle older data files where some uops + # are missing an underscore prefix in their name + if key.startswith("uops[") and key[5:6] != "_": + key = "uops[_" + key[5:] stats[key.strip()] += int(value) stats["__nfiles__"] += 1 -- cgit v0.12