From 8e9a1a032233f06ce0f1acdf5f983d614c8745a5 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 17 Jul 2023 12:12:33 -0700 Subject: gh-106603: Make uop struct a triple (opcode, oparg, operand) (#106794) --- Include/internal/pycore_opcode_metadata.h | 42 ++++++++------ Include/internal/pycore_uops.h | 5 +- Lib/test/test_capi/test_misc.py | 24 ++++---- Python/bytecodes.c | 16 ++---- Python/ceval.c | 7 ++- Python/executor_cases.c.h | 85 +++++++++++++++++++++++++++-- Python/generated_cases.c.h | 14 ++--- Python/optimizer.c | 91 +++++++++++++++++-------------- Tools/cases_generator/generate_cases.py | 15 ++--- 9 files changed, 190 insertions(+), 109 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 028736e..c3a0dbb 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -38,21 +38,24 @@ #define _SKIP_CACHE 314 #define _GUARD_GLOBALS_VERSION 315 #define _GUARD_BUILTINS_VERSION 316 -#define _GUARD_TYPE_VERSION 317 -#define _CHECK_MANAGED_OBJECT_HAS_VALUES 318 -#define IS_NONE 319 -#define _ITER_CHECK_LIST 320 -#define _IS_ITER_EXHAUSTED_LIST 321 -#define _ITER_NEXT_LIST 322 -#define _ITER_CHECK_TUPLE 323 -#define _IS_ITER_EXHAUSTED_TUPLE 324 -#define _ITER_NEXT_TUPLE 325 -#define _ITER_CHECK_RANGE 326 -#define _IS_ITER_EXHAUSTED_RANGE 327 -#define _ITER_NEXT_RANGE 328 -#define _POP_JUMP_IF_FALSE 329 -#define _POP_JUMP_IF_TRUE 330 -#define JUMP_TO_TOP 331 +#define _LOAD_GLOBAL_MODULE 317 +#define _LOAD_GLOBAL_BUILTINS 318 +#define _GUARD_TYPE_VERSION 319 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 320 +#define _LOAD_ATTR_INSTANCE_VALUE 321 +#define IS_NONE 322 +#define _ITER_CHECK_LIST 323 +#define _IS_ITER_EXHAUSTED_LIST 324 +#define _ITER_NEXT_LIST 325 +#define _ITER_CHECK_TUPLE 326 +#define _IS_ITER_EXHAUSTED_TUPLE 327 +#define _ITER_NEXT_TUPLE 328 +#define _ITER_CHECK_RANGE 329 +#define _IS_ITER_EXHAUSTED_RANGE 330 +#define _ITER_NEXT_RANGE 331 +#define _POP_JUMP_IF_FALSE 332 +#define _POP_JUMP_IF_TRUE 333 +#define JUMP_TO_TOP 334 #ifndef NEED_OPCODE_METADATA extern int _PyOpcode_num_popped(int opcode, int oparg, bool jump); @@ -1245,7 +1248,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { BINARY_SUBSCR_DICT, 0, 0 } } }, [LIST_APPEND] = { .nuops = 1, .uops = { { LIST_APPEND, 0, 0 } } }, [SET_ADD] = { .nuops = 1, .uops = { { SET_ADD, 0, 0 } } }, - [STORE_SUBSCR] = { .nuops = 1, .uops = { { STORE_SUBSCR, 1, 0 } } }, + [STORE_SUBSCR] = { .nuops = 1, .uops = { { STORE_SUBSCR, 0, 0 } } }, [STORE_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { STORE_SUBSCR_LIST_INT, 0, 0 } } }, [STORE_SUBSCR_DICT] = { .nuops = 1, .uops = { { STORE_SUBSCR_DICT, 0, 0 } } }, [DELETE_SUBSCR] = { .nuops = 1, .uops = { { DELETE_SUBSCR, 0, 0 } } }, @@ -1264,6 +1267,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [UNPACK_SEQUENCE_TUPLE] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_TUPLE, 0, 0 } } }, [UNPACK_SEQUENCE_LIST] = { .nuops = 1, .uops = { { UNPACK_SEQUENCE_LIST, 0, 0 } } }, [UNPACK_EX] = { .nuops = 1, .uops = { { UNPACK_EX, 0, 0 } } }, + [STORE_ATTR] = { .nuops = 1, .uops = { { STORE_ATTR, 0, 0 } } }, [DELETE_ATTR] = { .nuops = 1, .uops = { { DELETE_ATTR, 0, 0 } } }, [STORE_GLOBAL] = { .nuops = 1, .uops = { { STORE_GLOBAL, 0, 0 } } }, [DELETE_GLOBAL] = { .nuops = 1, .uops = { { DELETE_GLOBAL, 0, 0 } } }, @@ -1271,6 +1275,8 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [LOAD_NAME] = { .nuops = 2, .uops = { { _LOAD_LOCALS, 0, 0 }, { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } }, [LOAD_FROM_DICT_OR_GLOBALS] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_GLOBALS, 0, 0 } } }, [LOAD_GLOBAL] = { .nuops = 1, .uops = { { LOAD_GLOBAL, 0, 0 } } }, + [LOAD_GLOBAL_MODULE] = { .nuops = 4, .uops = { { _SKIP_CACHE, 0, 0 }, { _GUARD_GLOBALS_VERSION, 1, 1 }, { _SKIP_CACHE, 0, 0 }, { _LOAD_GLOBAL_MODULE, 1, 3 } } }, + [LOAD_GLOBAL_BUILTIN] = { .nuops = 4, .uops = { { _SKIP_CACHE, 0, 0 }, { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION, 1, 2 }, { _LOAD_GLOBAL_BUILTINS, 1, 3 } } }, [DELETE_FAST] = { .nuops = 1, .uops = { { DELETE_FAST, 0, 0 } } }, [DELETE_DEREF] = { .nuops = 1, .uops = { { DELETE_DEREF, 0, 0 } } }, [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, @@ -1292,6 +1298,7 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[OPCODE_MACRO_EXPAN [LOAD_SUPER_ATTR_ATTR] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_ATTR, 0, 0 } } }, [LOAD_SUPER_ATTR_METHOD] = { .nuops = 1, .uops = { { LOAD_SUPER_ATTR_METHOD, 0, 0 } } }, [LOAD_ATTR] = { .nuops = 1, .uops = { { LOAD_ATTR, 0, 0 } } }, + [LOAD_ATTR_INSTANCE_VALUE] = { .nuops = 4, .uops = { { _SKIP_CACHE, 0, 0 }, { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_MANAGED_OBJECT_HAS_VALUES, 0, 0 }, { _LOAD_ATTR_INSTANCE_VALUE, 1, 3 } } }, [COMPARE_OP] = { .nuops = 1, .uops = { { COMPARE_OP, 0, 0 } } }, [COMPARE_OP_FLOAT] = { .nuops = 1, .uops = { { COMPARE_OP_FLOAT, 0, 0 } } }, [COMPARE_OP_INT] = { .nuops = 1, .uops = { { COMPARE_OP_INT, 0, 0 } } }, @@ -1348,8 +1355,11 @@ const char * const _PyOpcode_uop_name[OPCODE_UOP_NAME_SIZE] = { [_SKIP_CACHE] = "_SKIP_CACHE", [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", [_GUARD_BUILTINS_VERSION] = "_GUARD_BUILTINS_VERSION", + [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE", + [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES", + [_LOAD_ATTR_INSTANCE_VALUE] = "_LOAD_ATTR_INSTANCE_VALUE", [IS_NONE] = "IS_NONE", [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", [_IS_ITER_EXHAUSTED_LIST] = "_IS_ITER_EXHAUSTED_LIST", diff --git a/Include/internal/pycore_uops.h b/Include/internal/pycore_uops.h index 5ed275f..edb141c 100644 --- a/Include/internal/pycore_uops.h +++ b/Include/internal/pycore_uops.h @@ -11,8 +11,9 @@ extern "C" { #define _Py_UOP_MAX_TRACE_LENGTH 32 typedef struct { - int opcode; - uint64_t operand; // Sometimes oparg, sometimes a cache entry + uint32_t opcode; + uint32_t oparg; + uint64_t operand; // A cache entry } _PyUOpInstruction; typedef struct { diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index c0dcff8..4e519fa 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2448,7 +2448,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("SAVE_IP", uops) self.assertIn("LOAD_FAST", uops) @@ -2493,7 +2493,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(many_vars) self.assertIsNotNone(ex) - self.assertIn(("LOAD_FAST", 259), list(ex)) + self.assertIn(("LOAD_FAST", 259, 0), list(ex)) def test_unspecialized_unpack(self): # An example of an unspecialized opcode @@ -2514,7 +2514,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("UNPACK_SEQUENCE", uops) def test_pop_jump_if_false(self): @@ -2529,7 +2529,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("_POP_JUMP_IF_FALSE", uops) def test_pop_jump_if_none(self): @@ -2544,7 +2544,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("_POP_JUMP_IF_TRUE", uops) def test_pop_jump_if_not_none(self): @@ -2559,7 +2559,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("_POP_JUMP_IF_FALSE", uops) def test_pop_jump_if_true(self): @@ -2574,7 +2574,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("_POP_JUMP_IF_TRUE", uops) def test_jump_backward(self): @@ -2589,7 +2589,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("JUMP_TO_TOP", uops) def test_jump_forward(self): @@ -2609,7 +2609,7 @@ class TestUops(unittest.TestCase): ex = get_first_executor(testfunc) self.assertIsNotNone(ex) - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} # Since there is no JUMP_FORWARD instruction, # look for indirect evidence: the += operator self.assertIn("_BINARY_OP_ADD_INT", uops) @@ -2630,7 +2630,7 @@ class TestUops(unittest.TestCase): self.assertIsNotNone(ex) # for i, (opname, oparg) in enumerate(ex): # print(f"{i:4d}: {opname:<20s} {oparg:3d}") - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("_IS_ITER_EXHAUSTED_RANGE", uops) # Verification that the jump goes past END_FOR # is done by manual inspection of the output @@ -2652,7 +2652,7 @@ class TestUops(unittest.TestCase): self.assertIsNotNone(ex) # for i, (opname, oparg) in enumerate(ex): # print(f"{i:4d}: {opname:<20s} {oparg:3d}") - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("_IS_ITER_EXHAUSTED_LIST", uops) # Verification that the jump goes past END_FOR # is done by manual inspection of the output @@ -2674,7 +2674,7 @@ class TestUops(unittest.TestCase): self.assertIsNotNone(ex) # for i, (opname, oparg) in enumerate(ex): # print(f"{i:4d}: {opname:<20s} {oparg:3d}") - uops = {opname for opname, _ in ex} + uops = {opname for opname, _, _ in ex} self.assertIn("_IS_ITER_EXHAUSTED_TUPLE", uops) # Verification that the jump goes past END_FOR # is done by manual inspection of the output diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 652372c..19fb138 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -645,18 +645,16 @@ dummy_func( STORE_SUBSCR_LIST_INT, }; - inst(STORE_SUBSCR, (counter/1, v, container, sub -- )) { + inst(STORE_SUBSCR, (unused/1, v, container, sub -- )) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); - #else - (void)counter; // Unused. #endif /* ENABLE_SPECIALIZATION */ /* container[sub] = v */ int err = PyObject_SetItem(container, sub, v); @@ -1198,19 +1196,17 @@ dummy_func( STORE_ATTR_WITH_HINT, }; - inst(STORE_ATTR, (counter/1, unused/3, v, owner --)) { + inst(STORE_ATTR, (unused/1, unused/3, v, owner --)) { #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); next_instr--; _Py_Specialize_StoreAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); - #else - (void)counter; // Unused. #endif /* ENABLE_SPECIALIZATION */ PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); int err = PyObject_SetAttr(owner, name, v); diff --git a/Python/ceval.c b/Python/ceval.c index f13ba98..b56ddfb 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2747,17 +2747,18 @@ _PyUopExecute(_PyExecutorObject *executor, _PyInterpreterFrame *frame, PyObject _Py_CODEUNIT *ip_offset = (_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive; int pc = 0; int opcode; - uint64_t operand; int oparg; + uint64_t operand; for (;;) { opcode = self->trace[pc].opcode; + oparg = self->trace[pc].oparg; operand = self->trace[pc].operand; - oparg = (int)operand; DPRINTF(3, - "%4d: uop %s, operand %" PRIu64 ", stack_level %d\n", + "%4d: uop %s, oparg %d, operand %" PRIu64 ", stack_level %d\n", pc, opcode < 256 ? _PyOpcode_OpName[opcode] : _PyOpcode_uop_name[opcode], + oparg, operand, (int)(stack_pointer - _PyFrame_Stackbase(frame))); pc++; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d85e23b..f492c1f 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -485,18 +485,15 @@ PyObject *sub = stack_pointer[-1]; PyObject *container = stack_pointer[-2]; PyObject *v = stack_pointer[-3]; - uint16_t counter = (uint16_t)operand; #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); - #else - (void)counter; // Unused. #endif /* ENABLE_SPECIALIZATION */ /* container[sub] = v */ int err = PyObject_SetItem(container, sub, v); @@ -849,6 +846,30 @@ break; } + case STORE_ATTR: { + static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); + PyObject *owner = stack_pointer[-1]; + PyObject *v = stack_pointer[-2]; + #if ENABLE_SPECIALIZATION + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { + PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); + next_instr--; + _Py_Specialize_StoreAttr(owner, next_instr, name); + DISPATCH_SAME_OPARG(); + } + STAT_INC(STORE_ATTR, deferred); + DECREMENT_ADAPTIVE_COUNTER(cache->counter); + #endif /* ENABLE_SPECIALIZATION */ + PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); + int err = PyObject_SetAttr(owner, name, v); + Py_DECREF(v); + Py_DECREF(owner); + if (err) goto pop_2_error; + STACK_SHRINK(2); + break; + } + case DELETE_ATTR: { PyObject *owner = stack_pointer[-1]; PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); @@ -1010,6 +1031,42 @@ break; } + case _LOAD_GLOBAL_MODULE: { + PyObject *null = NULL; + PyObject *res; + uint16_t index = (uint16_t)operand; + PyDictObject *dict = (PyDictObject *)GLOBALS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dict->ma_keys); + res = entries[index].me_value; + DEOPT_IF(res == NULL, LOAD_GLOBAL); + Py_INCREF(res); + STAT_INC(LOAD_GLOBAL, hit); + null = NULL; + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = res; + if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = null; } + break; + } + + case _LOAD_GLOBAL_BUILTINS: { + PyObject *null = NULL; + PyObject *res; + uint16_t index = (uint16_t)operand; + PyDictObject *bdict = (PyDictObject *)BUILTINS(); + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(bdict->ma_keys); + res = entries[index].me_value; + DEOPT_IF(res == NULL, LOAD_GLOBAL); + Py_INCREF(res); + STAT_INC(LOAD_GLOBAL, hit); + null = NULL; + STACK_GROW(1); + STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = res; + if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = null; } + break; + } + case DELETE_FAST: { PyObject *v = GETLOCAL(oparg); if (v == NULL) goto unbound_local_error; @@ -1443,6 +1500,24 @@ break; } + case _LOAD_ATTR_INSTANCE_VALUE: { + PyObject *owner = stack_pointer[-1]; + PyObject *res2 = NULL; + PyObject *res; + uint16_t index = (uint16_t)operand; + PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); + res = _PyDictOrValues_GetValues(dorv)->values[index]; + DEOPT_IF(res == NULL, LOAD_ATTR); + STAT_INC(LOAD_ATTR, hit); + Py_INCREF(res); + res2 = NULL; + Py_DECREF(owner); + STACK_GROW(((oparg & 1) ? 1 : 0)); + stack_pointer[-1] = res; + if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } + break; + } + case COMPARE_OP: { static_assert(INLINE_CACHE_ENTRIES_COMPARE_OP == 1, "incorrect cache size"); PyObject *right = stack_pointer[-1]; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1fd7671..0148078 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -773,18 +773,15 @@ PyObject *sub = stack_pointer[-1]; PyObject *container = stack_pointer[-2]; PyObject *v = stack_pointer[-3]; - uint16_t counter = read_u16(&next_instr[0].cache); #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_StoreSubscr(container, sub, next_instr); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_SUBSCR, deferred); - _PyStoreSubscrCache *cache = (_PyStoreSubscrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); - #else - (void)counter; // Unused. #endif /* ENABLE_SPECIALIZATION */ /* container[sub] = v */ int err = PyObject_SetItem(container, sub, v); @@ -1437,19 +1434,16 @@ static_assert(INLINE_CACHE_ENTRIES_STORE_ATTR == 4, "incorrect cache size"); PyObject *owner = stack_pointer[-1]; PyObject *v = stack_pointer[-2]; - uint16_t counter = read_u16(&next_instr[0].cache); #if ENABLE_SPECIALIZATION - if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { + _PyAttrCache *cache = (_PyAttrCache *)next_instr; + if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); next_instr--; _Py_Specialize_StoreAttr(owner, next_instr, name); DISPATCH_SAME_OPARG(); } STAT_INC(STORE_ATTR, deferred); - _PyAttrCache *cache = (_PyAttrCache *)next_instr; DECREMENT_ADAPTIVE_COUNTER(cache->counter); - #else - (void)counter; // Unused. #endif /* ENABLE_SPECIALIZATION */ PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); int err = PyObject_SetAttr(owner, name, v); diff --git a/Python/optimizer.c b/Python/optimizer.c index 693ba37..3d385a1 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -344,13 +344,19 @@ uop_item(_PyUOpExecutorObject *self, Py_ssize_t index) if (oname == NULL) { return NULL; } + PyObject *oparg = PyLong_FromUnsignedLong(self->trace[index].oparg); + if (oparg == NULL) { + Py_DECREF(oname); + return NULL; + } PyObject *operand = PyLong_FromUnsignedLongLong(self->trace[index].operand); if (operand == NULL) { + Py_DECREF(oparg); Py_DECREF(oname); return NULL; } - PyObject *args[2] = { oname, operand }; - return _PyTuple_FromArraySteal(args, 2); + PyObject *args[3] = { oname, oparg, operand }; + return _PyTuple_FromArraySteal(args, 3); } PySequenceMethods uop_as_sequence = { @@ -395,29 +401,33 @@ translate_bytecode_to_trace( #define DPRINTF(level, ...) #endif -#define ADD_TO_TRACE(OPCODE, OPERAND) \ +#define ADD_TO_TRACE(OPCODE, OPARG, OPERAND) \ DPRINTF(2, \ - " ADD_TO_TRACE(%s, %" PRIu64 ")\n", \ + " ADD_TO_TRACE(%s, %d, %" PRIu64 ")\n", \ uop_name(OPCODE), \ + (OPARG), \ (uint64_t)(OPERAND)); \ assert(trace_length < max_length); \ assert(reserved > 0); \ reserved--; \ trace[trace_length].opcode = (OPCODE); \ + trace[trace_length].oparg = (OPARG); \ trace[trace_length].operand = (OPERAND); \ trace_length++; #define INSTR_IP(INSTR, CODE) \ - ((long)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) + ((uint32_t)((INSTR) - ((_Py_CODEUNIT *)(CODE)->co_code_adaptive))) -#define ADD_TO_STUB(INDEX, OPCODE, OPERAND) \ - DPRINTF(2, " ADD_TO_STUB(%d, %s, %" PRIu64 ")\n", \ +#define ADD_TO_STUB(INDEX, OPCODE, OPARG, OPERAND) \ + DPRINTF(2, " ADD_TO_STUB(%d, %s, %d, %" PRIu64 ")\n", \ (INDEX), \ uop_name(OPCODE), \ + (OPARG), \ (uint64_t)(OPERAND)); \ assert(reserved > 0); \ reserved--; \ trace[(INDEX)].opcode = (OPCODE); \ + trace[(INDEX)].oparg = (OPARG); \ trace[(INDEX)].operand = (OPERAND); // Reserve space for n uops @@ -433,7 +443,7 @@ translate_bytecode_to_trace( #define RESERVE(main, stub) RESERVE_RAW((main) + (stub) + 2, uop_name(opcode)) DPRINTF(4, - "Optimizing %s (%s:%d) at byte offset %ld\n", + "Optimizing %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), PyUnicode_AsUTF8(code->co_filename), code->co_firstlineno, @@ -441,11 +451,11 @@ translate_bytecode_to_trace( for (;;) { RESERVE_RAW(2, "epilogue"); // Always need space for SAVE_IP and EXIT_TRACE - ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code)); + ADD_TO_TRACE(SAVE_IP, INSTR_IP(instr, code), 0); - int opcode = instr->op.code; - int oparg = instr->op.arg; - int extras = 0; + uint32_t opcode = instr->op.code; + uint32_t oparg = instr->op.arg; + uint32_t extras = 0; while (opcode == EXTENDED_ARG) { instr++; @@ -467,7 +477,7 @@ translate_bytecode_to_trace( case POP_JUMP_IF_NONE: { RESERVE(2, 2); - ADD_TO_TRACE(IS_NONE, 0); + ADD_TO_TRACE(IS_NONE, 0, 0); opcode = POP_JUMP_IF_TRUE; goto pop_jump_if_bool; } @@ -475,7 +485,7 @@ translate_bytecode_to_trace( case POP_JUMP_IF_NOT_NONE: { RESERVE(2, 2); - ADD_TO_TRACE(IS_NONE, 0); + ADD_TO_TRACE(IS_NONE, 0, 0); opcode = POP_JUMP_IF_FALSE; goto pop_jump_if_bool; } @@ -489,11 +499,11 @@ pop_jump_if_bool: _Py_CODEUNIT *target_instr = instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg; max_length -= 2; // Really the start of the stubs - int uopcode = opcode == POP_JUMP_IF_TRUE ? + uint32_t uopcode = opcode == POP_JUMP_IF_TRUE ? _POP_JUMP_IF_TRUE : _POP_JUMP_IF_FALSE; - ADD_TO_TRACE(uopcode, max_length); - ADD_TO_STUB(max_length, SAVE_IP, INSTR_IP(target_instr, code)); - ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0); + ADD_TO_TRACE(uopcode, max_length, 0); + ADD_TO_STUB(max_length, SAVE_IP, INSTR_IP(target_instr, code), 0); + ADD_TO_STUB(max_length + 1, EXIT_TRACE, 0, 0); break; } @@ -501,7 +511,7 @@ pop_jump_if_bool: { if (instr + 2 - oparg == initial_instr) { RESERVE(1, 0); - ADD_TO_TRACE(JUMP_TO_TOP, 0); + ADD_TO_TRACE(JUMP_TO_TOP, 0, 0); } else { DPRINTF(2, "JUMP_BACKWARD not to top ends trace\n"); @@ -546,14 +556,14 @@ pop_jump_if_bool: _Py_CODEUNIT *target_instr = // +1 at the end skips over END_FOR instr + 1 + _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + oparg + 1; max_length -= 3; // Really the start of the stubs - ADD_TO_TRACE(check_op, 0); - ADD_TO_TRACE(exhausted_op, 0); - ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length); - ADD_TO_TRACE(next_op, 0); - - ADD_TO_STUB(max_length + 0, POP_TOP, 0); - ADD_TO_STUB(max_length + 1, SAVE_IP, INSTR_IP(target_instr, code)); - ADD_TO_STUB(max_length + 2, EXIT_TRACE, 0); + ADD_TO_TRACE(check_op, 0, 0); + ADD_TO_TRACE(exhausted_op, 0, 0); + ADD_TO_TRACE(_POP_JUMP_IF_TRUE, max_length, 0); + ADD_TO_TRACE(next_op, 0, 0); + + ADD_TO_STUB(max_length + 0, POP_TOP, 0, 0); + ADD_TO_STUB(max_length + 1, SAVE_IP, INSTR_IP(target_instr, code), 0); + ADD_TO_STUB(max_length + 2, EXIT_TRACE, 0, 0); break; } @@ -564,19 +574,20 @@ pop_jump_if_bool: // Reserve space for nuops (+ SAVE_IP + EXIT_TRACE) int nuops = expansion->nuops; RESERVE(nuops, 0); + uint32_t orig_oparg = oparg; // For OPARG_TOP/BOTTOM for (int i = 0; i < nuops; i++) { - uint64_t operand; + oparg = orig_oparg; + uint64_t operand = 0; int offset = expansion->uops[i].offset; switch (expansion->uops[i].size) { case OPARG_FULL: - operand = oparg; if (extras && OPCODE_HAS_JUMP(opcode)) { if (opcode == JUMP_BACKWARD_NO_INTERRUPT) { - operand -= extras; + oparg -= extras; } else { assert(opcode != JUMP_BACKWARD); - operand += extras; + oparg += extras; } } break; @@ -590,10 +601,10 @@ pop_jump_if_bool: operand = read_u64(&instr[offset].cache); break; case OPARG_TOP: // First half of super-instr - operand = oparg >> 4; + oparg = orig_oparg >> 4; break; case OPARG_BOTTOM: // Second half of super-instr - operand = oparg & 0xF; + oparg = orig_oparg & 0xF; break; default: fprintf(stderr, @@ -603,7 +614,7 @@ pop_jump_if_bool: expansion->uops[i].offset); Py_FatalError("garbled expansion"); } - ADD_TO_TRACE(expansion->uops[i].uop, operand); + ADD_TO_TRACE(expansion->uops[i].uop, oparg, operand); } break; } @@ -621,9 +632,9 @@ pop_jump_if_bool: done: // Skip short traces like SAVE_IP, LOAD_FAST, SAVE_IP, EXIT_TRACE if (trace_length > 3) { - ADD_TO_TRACE(EXIT_TRACE, 0); + ADD_TO_TRACE(EXIT_TRACE, 0, 0); DPRINTF(1, - "Created a trace for %s (%s:%d) at byte offset %ld -- length %d\n", + "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n", PyUnicode_AsUTF8(code->co_qualname), PyUnicode_AsUTF8(code->co_filename), code->co_firstlineno, @@ -644,10 +655,10 @@ done: if (trace[i].opcode == _POP_JUMP_IF_FALSE || trace[i].opcode == _POP_JUMP_IF_TRUE) { - uint64_t target = trace[i].operand; - if (target >= (uint64_t)max_length) { + int target = trace[i].oparg; + if (target >= max_length) { target += trace_length - max_length; - trace[i].operand = target; + trace[i].oparg = target; } } } @@ -657,7 +668,7 @@ done: } else { DPRINTF(4, - "No trace for %s (%s:%d) at byte offset %ld\n", + "No trace for %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), PyUnicode_AsUTF8(code->co_filename), code->co_firstlineno, diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 112f29a..037bee1 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -417,16 +417,9 @@ class Instruction: if self.always_exits: dprint(f"Skipping {self.name} because it always exits") return False - if self.instr_flags.HAS_ARG_FLAG: - # If the instruction uses oparg, it cannot use any caches - if self.active_caches: - dprint(f"Skipping {self.name} because it uses oparg and caches") - return False - else: - # If it doesn't use oparg, it can have one cache entry - if len(self.active_caches) > 1: - dprint(f"Skipping {self.name} because it has >1 cache entries") - return False + if len(self.active_caches) > 1: + # print(f"Skipping {self.name} because it has >1 cache entries") + return False res = True for forbidden in FORBIDDEN_NAMES_IN_UOPS: # NOTE: To disallow unspecialized uops, use @@ -1374,7 +1367,7 @@ class Analyzer: if not part.instr.is_viable_uop(): print(f"NOTE: Part {part.instr.name} of {name} is not a viable uop") return - if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches: + if not part.active_caches: size, offset = OPARG_SIZES["OPARG_FULL"], 0 else: # If this assert triggers, is_viable_uops() lied -- cgit v0.12