From 3409bc29c9f06051c28ae0791155e3aebd76ff2d Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 28 Feb 2024 14:38:01 -0800 Subject: gh-115859: Re-enable T2 optimizer pass by default (#116062) This undoes the *temporary* default disabling of the T2 optimizer pass in gh-115860. - Add a new test that reproduces Brandt's example from gh-115859; it indeed crashes before gh-116028 with PYTHONUOPSOPTIMIZE=1 - Re-enable the optimizer pass in T2, stop checking PYTHONUOPSOPTIMIZE - Rename the env var to disable T2 entirely to PYTHON_UOPS_OPTIMIZE (must be explicitly set to 0 to disable) - Fix skipIf conditions on tests in test_opt.py accordingly - Export sym_is_bottom() (for debugging) - Fix various things in the `_BINARY_OP_` specializations in the abstract interpreter: - DECREF(temp) - out-of-space check after sym_new_const() - add sym_matches_type() checks, so even if we somehow reach a binary op with symbolic constants of the wrong type on the stack we won't trigger the type assert --- Include/internal/pycore_optimizer.h | 2 ++ Lib/test/test_capi/test_opt.py | 21 +++++++++++++++++- Python/optimizer.c | 4 ++-- Python/optimizer_analysis.c | 10 ++++----- Python/optimizer_bytecodes.c | 43 +++++++++++++++++++++++++++++-------- Python/optimizer_cases.c.h | 42 ++++++++++++++++++++++++++++-------- Python/optimizer_symbols.c | 2 +- 7 files changed, 96 insertions(+), 28 deletions(-) diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 265eae4..6148504 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -95,6 +95,8 @@ extern void _Py_uop_sym_set_null(_Py_UopsSymbol *sym); extern void _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym); extern void _Py_uop_sym_set_type(_Py_UopsSymbol *sym, PyTypeObject *typ); extern void _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val); +extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); + extern int _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx); extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx); diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 25fc36d..e1aef21 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -210,6 +210,8 @@ class TestExecutorInvalidation(unittest.TestCase): exe = get_first_executor(f) self.assertIsNone(exe) + +@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUops(unittest.TestCase): def test_basic_loop(self): @@ -570,7 +572,7 @@ class TestUops(unittest.TestCase): self.assertLessEqual(count, 2) -@unittest.skipIf(os.getenv("PYTHONUOPSOPTIMIZE", default=0) == 0, "Needs uop optimizer to run.") +@unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") class TestUopsOptimization(unittest.TestCase): def _run_with_optimizer(self, testfunc, arg): @@ -890,5 +892,22 @@ class TestUopsOptimization(unittest.TestCase): self.assertLessEqual(len(guard_both_float_count), 1) self.assertIn("_COMPARE_OP_STR", uops) + def test_type_inconsistency(self): + def testfunc(n): + for i in range(n): + x = _test_global + _test_global + # Must be a real global else it won't be optimized to _LOAD_CONST_INLINE + global _test_global + _test_global = 0 + _, ex = self._run_with_optimizer(testfunc, 16) + self.assertIsNone(ex) + _test_global = 1.2 + _, ex = self._run_with_optimizer(testfunc, 16) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + self.assertIn("_GUARD_BOTH_INT", uops) + self.assertIn("_BINARY_OP_ADD_INT", uops) + + if __name__ == "__main__": unittest.main() diff --git a/Python/optimizer.c b/Python/optimizer.c index c04ee17..acd6d52 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1008,8 +1008,8 @@ uop_optimize( return err; } OPT_STAT_INC(traces_created); - char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); - if (uop_optimize == NULL || *uop_optimize > '0') { + char *env_var = Py_GETENV("PYTHON_UOPS_OPTIMIZE"); + if (env_var == NULL || *env_var == '\0' || *env_var > '0') { err = _Py_uop_analyze_and_optimize(frame, buffer, UOP_MAX_TRACE_LENGTH, curr_stackentries, &dependencies); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8e408ff..2a7ef4e 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -297,6 +297,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_set_non_null _Py_uop_sym_set_non_null #define sym_set_type _Py_uop_sym_set_type #define sym_set_const _Py_uop_sym_set_const +#define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new #define frame_pop _Py_uop_frame_pop @@ -510,12 +511,9 @@ _Py_uop_analyze_and_optimize( peephole_opt(frame, buffer, buffer_size); - char *uop_optimize = Py_GETENV("PYTHONUOPSOPTIMIZE"); - if (uop_optimize != NULL && *uop_optimize > '0') { - err = optimize_uops( - (PyCodeObject *)frame->f_executable, buffer, - buffer_size, curr_stacklen, dependencies); - } + err = optimize_uops( + (PyCodeObject *)frame->f_executable, buffer, + buffer_size, curr_stacklen, dependencies); if (err == 0) { goto not_ready; diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index b65e90b..928c22d 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -25,6 +25,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_set_non_null _Py_uop_sym_set_non_null #define sym_set_type _Py_uop_sym_set_type #define sym_set_const _Py_uop_sym_set_const +#define sym_is_bottom _Py_uop_sym_is_bottom #define frame_new _Py_uop_frame_new #define frame_pop _Py_uop_frame_pop @@ -107,7 +108,9 @@ dummy_func(void) { } op(_BINARY_OP_ADD_INT, (left, right -- res)) { - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), @@ -115,7 +118,9 @@ dummy_func(void) { if (temp == NULL) { goto error; } - OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, temp)); + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and add tests! } @@ -125,7 +130,9 @@ dummy_func(void) { } op(_BINARY_OP_SUBTRACT_INT, (left, right -- res)) { - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(left), @@ -133,7 +140,9 @@ dummy_func(void) { if (temp == NULL) { goto error; } - OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, temp)); + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and add tests! } @@ -143,7 +152,9 @@ dummy_func(void) { } op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(left), @@ -151,7 +162,9 @@ dummy_func(void) { if (temp == NULL) { goto error; } - OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, temp)); + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and add tests! } @@ -161,7 +174,9 @@ dummy_func(void) { } op(_BINARY_OP_ADD_FLOAT, (left, right -- res)) { - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { assert(PyFloat_CheckExact(sym_get_const(left))); assert(PyFloat_CheckExact(sym_get_const(right))); PyObject *temp = PyFloat_FromDouble( @@ -171,6 +186,8 @@ dummy_func(void) { goto error; } res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and update tests! } @@ -180,7 +197,9 @@ dummy_func(void) { } op(_BINARY_OP_SUBTRACT_FLOAT, (left, right -- res)) { - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { assert(PyFloat_CheckExact(sym_get_const(left))); assert(PyFloat_CheckExact(sym_get_const(right))); PyObject *temp = PyFloat_FromDouble( @@ -190,6 +209,8 @@ dummy_func(void) { goto error; } res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and update tests! } @@ -199,7 +220,9 @@ dummy_func(void) { } op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { assert(PyFloat_CheckExact(sym_get_const(left))); assert(PyFloat_CheckExact(sym_get_const(right))); PyObject *temp = PyFloat_FromDouble( @@ -209,6 +232,8 @@ dummy_func(void) { goto error; } res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and update tests! } diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 9e99c83..9b387c0 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -183,7 +183,9 @@ _Py_UopsSymbol *res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Multiply((PyLongObject *)sym_get_const(left), @@ -191,7 +193,9 @@ if (temp == NULL) { goto error; } - OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, temp)); + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and add tests! } @@ -209,7 +213,9 @@ _Py_UopsSymbol *res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Add((PyLongObject *)sym_get_const(left), @@ -217,7 +223,9 @@ if (temp == NULL) { goto error; } - OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, temp)); + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and add tests! } @@ -235,7 +243,9 @@ _Py_UopsSymbol *res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) + { assert(PyLong_CheckExact(sym_get_const(left))); assert(PyLong_CheckExact(sym_get_const(right))); PyObject *temp = _PyLong_Subtract((PyLongObject *)sym_get_const(left), @@ -243,7 +253,9 @@ if (temp == NULL) { goto error; } - OUT_OF_SPACE_IF_NULL(res = sym_new_const(ctx, temp)); + res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and add tests! } @@ -275,7 +287,9 @@ _Py_UopsSymbol *res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { assert(PyFloat_CheckExact(sym_get_const(left))); assert(PyFloat_CheckExact(sym_get_const(right))); PyObject *temp = PyFloat_FromDouble( @@ -285,6 +299,8 @@ goto error; } res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and update tests! } @@ -302,7 +318,9 @@ _Py_UopsSymbol *res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { assert(PyFloat_CheckExact(sym_get_const(left))); assert(PyFloat_CheckExact(sym_get_const(right))); PyObject *temp = PyFloat_FromDouble( @@ -312,6 +330,8 @@ goto error; } res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and update tests! } @@ -329,7 +349,9 @@ _Py_UopsSymbol *res; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_is_const(left) && sym_is_const(right)) { + if (sym_is_const(left) && sym_is_const(right) && + sym_matches_type(left, &PyFloat_Type) && sym_matches_type(right, &PyFloat_Type)) + { assert(PyFloat_CheckExact(sym_get_const(left))); assert(PyFloat_CheckExact(sym_get_const(right))); PyObject *temp = PyFloat_FromDouble( @@ -339,6 +361,8 @@ goto error; } res = sym_new_const(ctx, temp); + Py_DECREF(temp); + OUT_OF_SPACE_IF_NULL(res); // TODO gh-115506: // replace opcode with constant propagated one and update tests! } diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 158ee67..a529cc2 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -77,7 +77,7 @@ sym_set_bottom(_Py_UopsSymbol *sym) Py_CLEAR(sym->const_val); } -static inline bool +bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym) { if ((sym->flags & IS_NULL) && (sym->flags & NOT_NULL)) { -- cgit v0.12