diff options
author | Mark Shannon <mark@hotpy.org> | 2024-08-01 23:19:05 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-01 23:19:05 (GMT) |
commit | df13a1821a90fcfb75eca59aad6af1f0893b1e77 (patch) | |
tree | e917574fea945573980717b4823202049568893e /Python | |
parent | fda6bd842a2b93a501526f1b830eb900d935ac73 (diff) | |
download | cpython-df13a1821a90fcfb75eca59aad6af1f0893b1e77.zip cpython-df13a1821a90fcfb75eca59aad6af1f0893b1e77.tar.gz cpython-df13a1821a90fcfb75eca59aad6af1f0893b1e77.tar.bz2 |
GH-118095: Add tier two support for BINARY_SUBSCR_GETITEM (GH-120793)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 42 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 52 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 80 | ||||
-rw-r--r-- | Python/optimizer.c | 12 | ||||
-rw-r--r-- | Python/optimizer_analysis.c | 11 | ||||
-rw-r--r-- | Python/optimizer_cases.c.h | 13 |
6 files changed, 150 insertions, 60 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index abfd803..4147255 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -765,32 +765,40 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } - inst(BINARY_SUBSCR_GETITEM, (unused/1, container_st, sub_st -- unused)) { - PyObject *container = PyStackRef_AsPyObjectBorrow(container_st); - - DEOPT_IF(tstate->interp->eval_frame); - PyTypeObject *tp = Py_TYPE(container); + op(_BINARY_SUBSCR_CHECK_FUNC, (container, unused -- container, unused)) { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)); PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *cached = ht->_spec_cache.getitem; - DEOPT_IF(cached == NULL); - assert(PyFunction_Check(cached)); - PyFunctionObject *getitem = (PyFunctionObject *)cached; + PyObject *getitem = ht->_spec_cache.getitem; + DEOPT_IF(getitem == NULL); + assert(PyFunction_Check(getitem)); uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(getitem->func_version != cached_version); - PyCodeObject *code = (PyCodeObject *)getitem->func_code; + DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); assert(code->co_argcount == 2); DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize)); STAT_INC(BINARY_SUBSCR, hit); Py_INCREF(getitem); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2); - STACK_SHRINK(2); - new_frame->localsplus[0] = container_st; - new_frame->localsplus[1] = sub_st; - frame->return_offset = (uint16_t)(next_instr - this_instr); - DISPATCH_INLINED(new_frame); } + op(_BINARY_SUBSCR_INIT_CALL, (container, sub -- new_frame: _PyInterpreterFrame* )) { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + SYNC_SP(); + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + } + + macro(BINARY_SUBSCR_GETITEM) = + unused/1 + // Skip over the counter + _CHECK_PEP_523 + + _BINARY_SUBSCR_CHECK_FUNC + + _BINARY_SUBSCR_INIT_CALL + + _PUSH_FRAME; + inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ERROR_IF(_PyList_AppendTakeRef((PyListObject *)PyStackRef_AsPyObjectBorrow(list), PyStackRef_AsPyObjectSteal(v)) < 0, error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index f0acc3b..61e1c5c 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -966,7 +966,57 @@ break; } - /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + case _BINARY_SUBSCR_CHECK_FUNC: { + _PyStackRef container; + container = stack_pointer[-2]; + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + if (!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + if (getitem == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + assert(PyFunction_Check(getitem)); + uint32_t cached_version = ht->_spec_cache.getitem_version; + if (((PyFunctionObject *)getitem)->func_version != cached_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + assert(code->co_argcount == 2); + if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(BINARY_SUBSCR, hit); + Py_INCREF(getitem); + break; + } + + case _BINARY_SUBSCR_INIT_CALL: { + _PyStackRef sub; + _PyStackRef container; + _PyInterpreterFrame *new_frame; + sub = stack_pointer[-1]; + container = stack_pointer[-2]; + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + stack_pointer[0].bits = (uintptr_t)new_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } case _LIST_APPEND: { _PyStackRef v; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ff8c4ea..4efaf89 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -469,37 +469,63 @@ } TARGET(BINARY_SUBSCR_GETITEM) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(BINARY_SUBSCR_GETITEM); static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); - _PyStackRef container_st; - _PyStackRef sub_st; + _PyStackRef container; + _PyStackRef sub; + _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ - sub_st = stack_pointer[-1]; - container_st = stack_pointer[-2]; - PyObject *container = PyStackRef_AsPyObjectBorrow(container_st); - DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); - PyTypeObject *tp = Py_TYPE(container); - DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); - PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; - PyObject *cached = ht->_spec_cache.getitem; - DEOPT_IF(cached == NULL, BINARY_SUBSCR); - assert(PyFunction_Check(cached)); - PyFunctionObject *getitem = (PyFunctionObject *)cached; - uint32_t cached_version = ht->_spec_cache.getitem_version; - DEOPT_IF(getitem->func_version != cached_version, BINARY_SUBSCR); - PyCodeObject *code = (PyCodeObject *)getitem->func_code; - assert(code->co_argcount == 2); - DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); - STAT_INC(BINARY_SUBSCR, hit); - Py_INCREF(getitem); - _PyInterpreterFrame *new_frame = _PyFrame_PushUnchecked(tstate, getitem, 2); - STACK_SHRINK(2); - new_frame->localsplus[0] = container_st; - new_frame->localsplus[1] = sub_st; - frame->return_offset = (uint16_t)(next_instr - this_instr); - DISPATCH_INLINED(new_frame); + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); + } + // _BINARY_SUBSCR_CHECK_FUNC + container = stack_pointer[-2]; + { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + DEOPT_IF(getitem == NULL, BINARY_SUBSCR); + assert(PyFunction_Check(getitem)); + uint32_t cached_version = ht->_spec_cache.getitem_version; + DEOPT_IF(((PyFunctionObject *)getitem)->func_version != cached_version, BINARY_SUBSCR); + PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(getitem); + assert(code->co_argcount == 2); + DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize), BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); + Py_INCREF(getitem); + } + // _BINARY_SUBSCR_INIT_CALL + sub = stack_pointer[-1]; + { + PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(container)); + PyHeapTypeObject *ht = (PyHeapTypeObject *)tp; + PyObject *getitem = ht->_spec_cache.getitem; + new_frame = _PyFrame_PushUnchecked(tstate, (PyFunctionObject *)getitem, 2); + stack_pointer += -2; + assert(WITHIN_STACK_BOUNDS()); + new_frame->localsplus[0] = container; + new_frame->localsplus[1] = sub; + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_BINARY_SUBSCR); + } + // _PUSH_FRAME + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + LLTRACE_RESUME_FRAME(); + } + DISPATCH(); } TARGET(BINARY_SUBSCR_LIST_INT) { diff --git a/Python/optimizer.c b/Python/optimizer.c index 9d03813..e9cbfc5 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -795,6 +795,7 @@ translate_bytecode_to_trace( assert(i + 1 == nuops); if (opcode == FOR_ITER_GEN || opcode == LOAD_ATTR_PROPERTY || + opcode == BINARY_SUBSCR_GETITEM || opcode == SEND_GEN) { DPRINTF(2, "Bailing due to dynamic target\n"); @@ -921,7 +922,9 @@ done: 2 * INSTR_IP(initial_instr, code)); return 0; } - if (trace[trace_length-1].opcode != _JUMP_TO_TOP) { + if (!is_terminator(&trace[trace_length-1])) { + /* Allow space for _EXIT_TRACE */ + max_length += 2; ADD_TO_TRACE(_EXIT_TRACE, 0, 0, target); } DPRINTF(1, @@ -1102,7 +1105,7 @@ sanity_check(_PyExecutorObject *executor) CHECK(inst->format == UOP_FORMAT_JUMP); CHECK(inst->error_target < executor->code_size); } - if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + if (is_terminator(inst)) { ended = true; i++; break; @@ -1207,8 +1210,7 @@ int effective_trace_length(_PyUOpInstruction *buffer, int length) if (opcode == _NOP) { nop_count++; } - if (opcode == _EXIT_TRACE || - opcode == _JUMP_TO_TOP) { + if (is_terminator(&buffer[i])) { return i+1-nop_count; } } @@ -1257,7 +1259,7 @@ uop_optimize( else if (oparg < _PyUop_Replication[opcode]) { buffer[pc].opcode = opcode + oparg + 1; } - else if (opcode == _JUMP_TO_TOP || opcode == _EXIT_TRACE) { + else if (is_terminator(&buffer[pc])) { break; } assert(_PyOpcode_uop_name[buffer[pc].opcode]); diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 8c86641..f7adb44 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -52,14 +52,6 @@ #define DPRINTF(level, ...) #endif - - -static inline bool -op_is_end(uint32_t opcode) -{ - return opcode == _EXIT_TRACE || opcode == _JUMP_TO_TOP; -} - static int get_mutations(PyObject* dict) { assert(PyDict_CheckExact(dict)); @@ -288,7 +280,7 @@ remove_globals(_PyInterpreterFrame *frame, _PyUOpInstruction *buffer, prechecked_function_version = (uint32_t)buffer[pc].operand; break; default: - if (op_is_end(opcode)) { + if (is_terminator(inst)) { return 1; } break; @@ -552,6 +544,7 @@ remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) } case _JUMP_TO_TOP: case _EXIT_TRACE: + case _DYNAMIC_EXIT: return pc + 1; default: { diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index b704c9e..50aa972 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -539,7 +539,18 @@ break; } - /* _BINARY_SUBSCR_GETITEM is not a viable micro-op for tier 2 */ + case _BINARY_SUBSCR_CHECK_FUNC: { + break; + } + + case _BINARY_SUBSCR_INIT_CALL: { + _PyInterpreterFrame *new_frame; + new_frame = sym_new_not_null(ctx); + stack_pointer[-2] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + break; + } case _LIST_APPEND: { stack_pointer += -1; |