diff options
author | Mark Shannon <mark@hotpy.org> | 2024-04-22 12:34:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-22 12:34:06 (GMT) |
commit | a6647d16abf4dd65997865e857371673238e60bf (patch) | |
tree | b4ebfbfb86f0962d766f09d3d54513a9b38b0669 | |
parent | ceb6038b053c403bed3ca3a8bd17b7e3fc9aab7d (diff) | |
download | cpython-a6647d16abf4dd65997865e857371673238e60bf.zip cpython-a6647d16abf4dd65997865e857371673238e60bf.tar.gz cpython-a6647d16abf4dd65997865e857371673238e60bf.tar.bz2 |
GH-115480: Reduce guard strength for binary ops when type of one operand is known already (GH-118050)
-rw-r--r-- | Include/internal/pycore_optimizer.h | 1 | ||||
-rw-r--r-- | Include/internal/pycore_uop_ids.h | 168 | ||||
-rw-r--r-- | Include/internal/pycore_uop_metadata.h | 16 | ||||
-rw-r--r-- | Lib/test/test_capi/test_opt.py | 44 | ||||
-rw-r--r-- | Python/bytecodes.c | 16 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 40 | ||||
-rw-r--r-- | Python/optimizer_analysis.c | 1 | ||||
-rw-r--r-- | Python/optimizer_bytecodes.c | 51 | ||||
-rw-r--r-- | Python/optimizer_cases.c.h | 68 | ||||
-rw-r--r-- | Python/optimizer_symbols.c | 14 |
10 files changed, 316 insertions, 103 deletions
diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h index 44cafe6..c0a76e8 100644 --- a/Include/internal/pycore_optimizer.h +++ b/Include/internal/pycore_optimizer.h @@ -98,6 +98,7 @@ extern bool _Py_uop_sym_set_type(_Py_UopsSymbol *sym, PyTypeObject *typ); extern bool _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val); extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); +extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsSymbol *sym); extern int _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx); diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 3e4dd8b..f055874 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -118,17 +118,21 @@ extern "C" { #define _GUARD_IS_NOT_NONE_POP 356 #define _GUARD_IS_TRUE_POP 357 #define _GUARD_KEYS_VERSION 358 -#define _GUARD_NOT_EXHAUSTED_LIST 359 -#define _GUARD_NOT_EXHAUSTED_RANGE 360 -#define _GUARD_NOT_EXHAUSTED_TUPLE 361 -#define _GUARD_TYPE_VERSION 362 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 363 -#define _INIT_CALL_PY_EXACT_ARGS 364 -#define _INIT_CALL_PY_EXACT_ARGS_0 365 -#define _INIT_CALL_PY_EXACT_ARGS_1 366 -#define _INIT_CALL_PY_EXACT_ARGS_2 367 -#define _INIT_CALL_PY_EXACT_ARGS_3 368 -#define _INIT_CALL_PY_EXACT_ARGS_4 369 +#define _GUARD_NOS_FLOAT 359 +#define _GUARD_NOS_INT 360 +#define _GUARD_NOT_EXHAUSTED_LIST 361 +#define _GUARD_NOT_EXHAUSTED_RANGE 362 +#define _GUARD_NOT_EXHAUSTED_TUPLE 363 +#define _GUARD_TOS_FLOAT 364 +#define _GUARD_TOS_INT 365 +#define _GUARD_TYPE_VERSION 366 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 367 +#define _INIT_CALL_PY_EXACT_ARGS 368 +#define _INIT_CALL_PY_EXACT_ARGS_0 369 +#define _INIT_CALL_PY_EXACT_ARGS_1 370 +#define _INIT_CALL_PY_EXACT_ARGS_2 371 +#define _INIT_CALL_PY_EXACT_ARGS_3 372 +#define _INIT_CALL_PY_EXACT_ARGS_4 373 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -145,65 +149,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 370 -#define _IS_NONE 371 +#define _INTERNAL_INCREMENT_OPT_COUNTER 374 +#define _IS_NONE 375 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 372 -#define _ITER_CHECK_RANGE 373 -#define _ITER_CHECK_TUPLE 374 -#define _ITER_JUMP_LIST 375 -#define _ITER_JUMP_RANGE 376 -#define _ITER_JUMP_TUPLE 377 -#define _ITER_NEXT_LIST 378 -#define _ITER_NEXT_RANGE 379 -#define _ITER_NEXT_TUPLE 380 -#define _JUMP_TO_TOP 381 +#define _ITER_CHECK_LIST 376 +#define _ITER_CHECK_RANGE 377 +#define _ITER_CHECK_TUPLE 378 +#define _ITER_JUMP_LIST 379 +#define _ITER_JUMP_RANGE 380 +#define _ITER_JUMP_TUPLE 381 +#define _ITER_NEXT_LIST 382 +#define _ITER_NEXT_RANGE 383 +#define _ITER_NEXT_TUPLE 384 +#define _JUMP_TO_TOP 385 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND #define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR -#define _LOAD_ATTR 382 -#define _LOAD_ATTR_CLASS 383 -#define _LOAD_ATTR_CLASS_0 384 -#define _LOAD_ATTR_CLASS_1 385 +#define _LOAD_ATTR 386 +#define _LOAD_ATTR_CLASS 387 +#define _LOAD_ATTR_CLASS_0 388 +#define _LOAD_ATTR_CLASS_1 389 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 386 -#define _LOAD_ATTR_INSTANCE_VALUE_0 387 -#define _LOAD_ATTR_INSTANCE_VALUE_1 388 -#define _LOAD_ATTR_METHOD_LAZY_DICT 389 -#define _LOAD_ATTR_METHOD_NO_DICT 390 -#define _LOAD_ATTR_METHOD_WITH_VALUES 391 -#define _LOAD_ATTR_MODULE 392 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 393 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 394 +#define _LOAD_ATTR_INSTANCE_VALUE 390 +#define _LOAD_ATTR_INSTANCE_VALUE_0 391 +#define _LOAD_ATTR_INSTANCE_VALUE_1 392 +#define _LOAD_ATTR_METHOD_LAZY_DICT 393 +#define _LOAD_ATTR_METHOD_NO_DICT 394 +#define _LOAD_ATTR_METHOD_WITH_VALUES 395 +#define _LOAD_ATTR_MODULE 396 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 397 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 398 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 395 -#define _LOAD_ATTR_SLOT_0 396 -#define _LOAD_ATTR_SLOT_1 397 -#define _LOAD_ATTR_WITH_HINT 398 +#define _LOAD_ATTR_SLOT 399 +#define _LOAD_ATTR_SLOT_0 400 +#define _LOAD_ATTR_SLOT_1 401 +#define _LOAD_ATTR_WITH_HINT 402 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 399 -#define _LOAD_CONST_INLINE_BORROW 400 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 401 -#define _LOAD_CONST_INLINE_WITH_NULL 402 +#define _LOAD_CONST_INLINE 403 +#define _LOAD_CONST_INLINE_BORROW 404 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 405 +#define _LOAD_CONST_INLINE_WITH_NULL 406 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 403 -#define _LOAD_FAST_0 404 -#define _LOAD_FAST_1 405 -#define _LOAD_FAST_2 406 -#define _LOAD_FAST_3 407 -#define _LOAD_FAST_4 408 -#define _LOAD_FAST_5 409 -#define _LOAD_FAST_6 410 -#define _LOAD_FAST_7 411 +#define _LOAD_FAST 407 +#define _LOAD_FAST_0 408 +#define _LOAD_FAST_1 409 +#define _LOAD_FAST_2 410 +#define _LOAD_FAST_3 411 +#define _LOAD_FAST_4 412 +#define _LOAD_FAST_5 413 +#define _LOAD_FAST_6 414 +#define _LOAD_FAST_7 415 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 412 -#define _LOAD_GLOBAL_BUILTINS 413 -#define _LOAD_GLOBAL_MODULE 414 +#define _LOAD_GLOBAL 416 +#define _LOAD_GLOBAL_BUILTINS 417 +#define _LOAD_GLOBAL_MODULE 418 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -217,49 +221,49 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 415 -#define _POP_JUMP_IF_FALSE 416 -#define _POP_JUMP_IF_TRUE 417 +#define _POP_FRAME 419 +#define _POP_JUMP_IF_FALSE 420 +#define _POP_JUMP_IF_TRUE 421 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 418 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 422 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 419 +#define _PUSH_FRAME 423 #define _PUSH_NULL PUSH_NULL -#define _REPLACE_WITH_TRUE 420 +#define _REPLACE_WITH_TRUE 424 #define _RESUME_CHECK RESUME_CHECK -#define _SAVE_RETURN_OFFSET 421 -#define _SEND 422 +#define _SAVE_RETURN_OFFSET 425 +#define _SEND 426 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _SIDE_EXIT 423 -#define _START_EXECUTOR 424 -#define _STORE_ATTR 425 -#define _STORE_ATTR_INSTANCE_VALUE 426 -#define _STORE_ATTR_SLOT 427 +#define _SIDE_EXIT 427 +#define _START_EXECUTOR 428 +#define _STORE_ATTR 429 +#define _STORE_ATTR_INSTANCE_VALUE 430 +#define _STORE_ATTR_SLOT 431 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 428 -#define _STORE_FAST_0 429 -#define _STORE_FAST_1 430 -#define _STORE_FAST_2 431 -#define _STORE_FAST_3 432 -#define _STORE_FAST_4 433 -#define _STORE_FAST_5 434 -#define _STORE_FAST_6 435 -#define _STORE_FAST_7 436 +#define _STORE_FAST 432 +#define _STORE_FAST_0 433 +#define _STORE_FAST_1 434 +#define _STORE_FAST_2 435 +#define _STORE_FAST_3 436 +#define _STORE_FAST_4 437 +#define _STORE_FAST_5 438 +#define _STORE_FAST_6 439 +#define _STORE_FAST_7 440 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 437 +#define _STORE_SUBSCR 441 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 438 +#define _TO_BOOL 442 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -269,12 +273,12 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 439 +#define _UNPACK_SEQUENCE 443 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define MAX_UOP_ID 439 +#define MAX_UOP_ID 443 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 44ede3e..4d15be6 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -59,10 +59,14 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_REPLACE_WITH_TRUE] = 0, [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_GUARD_BOTH_INT] = HAS_EXIT_FLAG, + [_GUARD_NOS_INT] = HAS_EXIT_FLAG, + [_GUARD_TOS_INT] = HAS_EXIT_FLAG, [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_GUARD_BOTH_FLOAT] = HAS_EXIT_FLAG, + [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, + [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, @@ -352,9 +356,13 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", [_GUARD_IS_TRUE_POP] = "_GUARD_IS_TRUE_POP", [_GUARD_KEYS_VERSION] = "_GUARD_KEYS_VERSION", + [_GUARD_NOS_FLOAT] = "_GUARD_NOS_FLOAT", + [_GUARD_NOS_INT] = "_GUARD_NOS_INT", [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", [_GUARD_NOT_EXHAUSTED_RANGE] = "_GUARD_NOT_EXHAUSTED_RANGE", [_GUARD_NOT_EXHAUSTED_TUPLE] = "_GUARD_NOT_EXHAUSTED_TUPLE", + [_GUARD_TOS_FLOAT] = "_GUARD_TOS_FLOAT", + [_GUARD_TOS_INT] = "_GUARD_TOS_INT", [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", @@ -566,6 +574,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _GUARD_BOTH_INT: return 2; + case _GUARD_NOS_INT: + return 2; + case _GUARD_TOS_INT: + return 1; case _BINARY_OP_MULTIPLY_INT: return 2; case _BINARY_OP_ADD_INT: @@ -574,6 +586,10 @@ int _PyUop_num_popped(int opcode, int oparg) return 2; case _GUARD_BOTH_FLOAT: return 2; + case _GUARD_NOS_FLOAT: + return 2; + case _GUARD_TOS_FLOAT: + return 1; case _BINARY_OP_MULTIPLY_FLOAT: return 2; case _BINARY_OP_ADD_FLOAT: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index 28d1873..ae23ead 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -903,10 +903,50 @@ class TestUopsOptimization(unittest.TestCase): self.assertTrue(res) self.assertIsNotNone(ex) uops = get_opnames(ex) - guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] - self.assertLessEqual(len(guard_both_float_count), 1) + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + self.assertLessEqual(len(guard_both_int_count), 1) self.assertIn("_COMPARE_OP_INT", uops) + def test_compare_op_type_propagation_int_partial(self): + def testfunc(n): + a = 1 + for _ in range(n): + if a > 2: + x = 0 + if a < 2: + x = 1 + return x + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 1) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + guard_left_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_INT"] + guard_both_int_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_INT"] + self.assertLessEqual(len(guard_left_int_count), 1) + self.assertEqual(len(guard_both_int_count), 0) + self.assertIn("_COMPARE_OP_INT", uops) + + def test_compare_op_type_propagation_float_partial(self): + def testfunc(n): + a = 1.0 + for _ in range(n): + if a > 2.0: + x = 0 + if a < 2.0: + x = 1 + return x + + res, ex = self._run_with_optimizer(testfunc, 32) + self.assertEqual(res, 1) + self.assertIsNotNone(ex) + uops = get_opnames(ex) + guard_left_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_NOS_FLOAT"] + guard_both_float_count = [opname for opname in iter_opnames(ex) if opname == "_GUARD_BOTH_FLOAT"] + self.assertLessEqual(len(guard_left_float_count), 1) + self.assertEqual(len(guard_both_float_count), 0) + self.assertIn("_COMPARE_OP_FLOAT", uops) + def test_compare_op_type_propagation_unicode(self): def testfunc(n): a = "" diff --git a/Python/bytecodes.c b/Python/bytecodes.c index b7511b9..4541eb6 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -426,6 +426,14 @@ dummy_func( EXIT_IF(!PyLong_CheckExact(right)); } + op(_GUARD_NOS_INT, (left, unused -- left, unused)) { + EXIT_IF(!PyLong_CheckExact(left)); + } + + op(_GUARD_TOS_INT, (value -- value)) { + EXIT_IF(!PyLong_CheckExact(value)); + } + pure op(_BINARY_OP_MULTIPLY_INT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); res = _PyLong_Multiply((PyLongObject *)left, (PyLongObject *)right); @@ -462,6 +470,14 @@ dummy_func( EXIT_IF(!PyFloat_CheckExact(right)); } + op(_GUARD_NOS_FLOAT, (left, unused -- left, unused)) { + EXIT_IF(!PyFloat_CheckExact(left)); + } + + op(_GUARD_TOS_FLOAT, (value -- value)) { + EXIT_IF(!PyFloat_CheckExact(value)); + } + pure op(_BINARY_OP_MULTIPLY_FLOAT, (left, right -- res)) { STAT_INC(BINARY_OP, hit); double dres = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 841ce8c..43b0221 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -447,6 +447,26 @@ break; } + case _GUARD_NOS_INT: { + PyObject *left; + left = stack_pointer[-2]; + if (!PyLong_CheckExact(left)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _GUARD_TOS_INT: { + PyObject *value; + value = stack_pointer[-1]; + if (!PyLong_CheckExact(value)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + case _BINARY_OP_MULTIPLY_INT: { PyObject *right; PyObject *left; @@ -511,6 +531,26 @@ break; } + case _GUARD_NOS_FLOAT: { + PyObject *left; + left = stack_pointer[-2]; + if (!PyFloat_CheckExact(left)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _GUARD_TOS_FLOAT: { + PyObject *value; + value = stack_pointer[-1]; + if (!PyFloat_CheckExact(value)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + case _BINARY_OP_MULTIPLY_FLOAT: { PyObject *right; PyObject *left; diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 155f702..76de6e5 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -320,6 +320,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_has_type _Py_uop_sym_has_type +#define sym_get_type _Py_uop_sym_get_type #define sym_matches_type _Py_uop_sym_matches_type #define sym_set_null _Py_uop_sym_set_null #define sym_set_non_null _Py_uop_sym_set_non_null diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index f119b8e..481fb83 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -21,6 +21,7 @@ typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; #define sym_new_const _Py_uop_sym_new_const #define sym_new_null _Py_uop_sym_new_null #define sym_matches_type _Py_uop_sym_matches_type +#define sym_get_type _Py_uop_sym_get_type #define sym_has_type _Py_uop_sym_has_type #define sym_set_null _Py_uop_sym_set_null #define sym_set_non_null _Py_uop_sym_set_non_null @@ -99,9 +100,18 @@ dummy_func(void) { } op(_GUARD_BOTH_INT, (left, right -- left, right)) { - if (sym_matches_type(left, &PyLong_Type) && - sym_matches_type(right, &PyLong_Type)) { - REPLACE_OP(this_instr, _NOP, 0, 0); + if (sym_matches_type(left, &PyLong_Type)) { + if (sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + REPLACE_OP(this_instr, _GUARD_TOS_INT, 0, 0); + } + } + else { + if (sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _GUARD_NOS_INT, 0, 0); + } } if (!sym_set_type(left, &PyLong_Type)) { goto hit_bottom; @@ -112,9 +122,18 @@ dummy_func(void) { } op(_GUARD_BOTH_FLOAT, (left, right -- left, right)) { - if (sym_matches_type(left, &PyFloat_Type) && - sym_matches_type(right, &PyFloat_Type)) { - REPLACE_OP(this_instr, _NOP, 0 ,0); + if (sym_matches_type(left, &PyFloat_Type)) { + if (sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + REPLACE_OP(this_instr, _GUARD_TOS_FLOAT, 0, 0); + } + } + else { + if (sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _GUARD_NOS_FLOAT, 0, 0); + } } if (!sym_set_type(left, &PyFloat_Type)) { goto hit_bottom; @@ -137,6 +156,25 @@ dummy_func(void) { } } + op(_BINARY_OP, (left, right -- res)) { + PyTypeObject *ltype = sym_get_type(left); + PyTypeObject *rtype = sym_get_type(right); + if (ltype != NULL && (ltype == &PyLong_Type || ltype == &PyFloat_Type) && + rtype != NULL && (rtype == &PyLong_Type || rtype == &PyFloat_Type)) + { + if (oparg != NB_TRUE_DIVIDE && oparg != NB_INPLACE_TRUE_DIVIDE && + ltype == &PyLong_Type && rtype == &PyLong_Type) { + /* If both inputs are ints and the op is not division the result is an int */ + OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); + } + else { + /* For any other op combining ints/floats the result is a float */ + OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); + } + } + OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx)); + } + op(_BINARY_OP_ADD_INT, (left, right -- res)) { if (sym_is_const(left) && sym_is_const(right) && sym_matches_type(left, &PyLong_Type) && sym_matches_type(right, &PyLong_Type)) @@ -424,7 +462,6 @@ dummy_func(void) { OUT_OF_SPACE_IF_NULL(null = sym_new_null(ctx)); } - op(_COPY, (bottom, unused[oparg-1] -- bottom, unused[oparg-1], top)) { assert(oparg > 0); top = bottom; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 50f335e..0a7d96d 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -225,9 +225,18 @@ _Py_UopsSymbol *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_matches_type(left, &PyLong_Type) && - sym_matches_type(right, &PyLong_Type)) { - REPLACE_OP(this_instr, _NOP, 0, 0); + if (sym_matches_type(left, &PyLong_Type)) { + if (sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + REPLACE_OP(this_instr, _GUARD_TOS_INT, 0, 0); + } + } + else { + if (sym_matches_type(right, &PyLong_Type)) { + REPLACE_OP(this_instr, _GUARD_NOS_INT, 0, 0); + } } if (!sym_set_type(left, &PyLong_Type)) { goto hit_bottom; @@ -238,6 +247,14 @@ break; } + case _GUARD_NOS_INT: { + break; + } + + case _GUARD_TOS_INT: { + break; + } + case _BINARY_OP_MULTIPLY_INT: { _Py_UopsSymbol *right; _Py_UopsSymbol *left; @@ -333,9 +350,18 @@ _Py_UopsSymbol *left; right = stack_pointer[-1]; left = stack_pointer[-2]; - if (sym_matches_type(left, &PyFloat_Type) && - sym_matches_type(right, &PyFloat_Type)) { - REPLACE_OP(this_instr, _NOP, 0 ,0); + if (sym_matches_type(left, &PyFloat_Type)) { + if (sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _NOP, 0, 0); + } + else { + REPLACE_OP(this_instr, _GUARD_TOS_FLOAT, 0, 0); + } + } + else { + if (sym_matches_type(right, &PyFloat_Type)) { + REPLACE_OP(this_instr, _GUARD_NOS_FLOAT, 0, 0); + } } if (!sym_set_type(left, &PyFloat_Type)) { goto hit_bottom; @@ -346,6 +372,14 @@ break; } + case _GUARD_NOS_FLOAT: { + break; + } + + case _GUARD_TOS_FLOAT: { + break; + } + case _BINARY_OP_MULTIPLY_FLOAT: { _Py_UopsSymbol *right; _Py_UopsSymbol *left; @@ -1852,9 +1886,27 @@ } case _BINARY_OP: { + _Py_UopsSymbol *right; + _Py_UopsSymbol *left; _Py_UopsSymbol *res; - res = sym_new_not_null(ctx); - if (res == NULL) goto out_of_space; + right = stack_pointer[-1]; + left = stack_pointer[-2]; + PyTypeObject *ltype = sym_get_type(left); + PyTypeObject *rtype = sym_get_type(right); + if (ltype != NULL && (ltype == &PyLong_Type || ltype == &PyFloat_Type) && + rtype != NULL && (rtype == &PyLong_Type || rtype == &PyFloat_Type)) + { + if (oparg != NB_TRUE_DIVIDE && oparg != NB_INPLACE_TRUE_DIVIDE && + ltype == &PyLong_Type && rtype == &PyLong_Type) { + /* If both inputs are ints and the op is not division the result is an int */ + OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyLong_Type)); + } + else { + /* For any other op combining ints/floats the result is a float */ + OUT_OF_SPACE_IF_NULL(res = sym_new_type(ctx, &PyFloat_Type)); + } + } + OUT_OF_SPACE_IF_NULL(res = sym_new_unknown(ctx)); stack_pointer[-2] = res; stack_pointer += -1; break; diff --git a/Python/optimizer_symbols.c b/Python/optimizer_symbols.c index 86b0d4d..204599b 100644 --- a/Python/optimizer_symbols.c +++ b/Python/optimizer_symbols.c @@ -231,6 +231,15 @@ _Py_uop_sym_new_null(_Py_UOpsContext *ctx) return null_sym; } +PyTypeObject * +_Py_uop_sym_get_type(_Py_UopsSymbol *sym) +{ + if (_Py_uop_sym_is_bottom(sym)) { + return NULL; + } + return sym->typ; +} + bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym) { @@ -244,10 +253,7 @@ bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ) { assert(typ != NULL && PyType_Check(typ)); - if (_Py_uop_sym_is_bottom(sym)) { - return false; - } - return sym->typ == typ; + return _Py_uop_sym_get_type(sym) == typ; } int |