diff options
author | Mark Shannon <mark@hotpy.org> | 2024-08-16 16:11:24 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-16 16:11:24 (GMT) |
commit | c13e7d98fb8581014a225b900b1b88ccbfc28097 (patch) | |
tree | 2b27e6bbae922f421fbbae0fb761492031752b8a /Python | |
parent | e2f2dc708eae89f41e328501b5ea7c97b8e39907 (diff) | |
download | cpython-c13e7d98fb8581014a225b900b1b88ccbfc28097.zip cpython-c13e7d98fb8581014a225b900b1b88ccbfc28097.tar.gz cpython-c13e7d98fb8581014a225b900b1b88ccbfc28097.tar.bz2 |
GH-118093: Specialize `CALL_KW` (GH-123006)
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 152 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 180 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 292 | ||||
-rw-r--r-- | Python/opcode_targets.h | 6 | ||||
-rw-r--r-- | Python/optimizer.c | 2 | ||||
-rw-r--r-- | Python/optimizer_bytecodes.c | 9 | ||||
-rw-r--r-- | Python/optimizer_cases.c.h | 56 | ||||
-rw-r--r-- | Python/specialize.c | 67 |
8 files changed, 749 insertions, 15 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 97e37bc..ec57c07 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4007,7 +4007,14 @@ dummy_func( _CALL_METHOD_DESCRIPTOR_FAST + _CHECK_PERIODIC; - inst(INSTRUMENTED_CALL_KW, ( -- )) { + // Cache layout: counter/1, func_version/2 + family(CALL_KW, INLINE_CACHE_ENTRIES_CALL_KW) = { + CALL_KW_BOUND_METHOD, + CALL_KW_PY, + CALL_KW_NON_PY, + }; + + inst(INSTRUMENTED_CALL_KW, (counter/1, version/2 -- )) { int is_meth = !PyStackRef_IsNull(PEEK(oparg + 2)); int total_args = oparg + is_meth; PyObject *function = PyStackRef_AsPyObjectBorrow(PEEK(oparg + 3)); @@ -4017,6 +4024,7 @@ dummy_func( tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); ERROR_IF(err, error); + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(CALL_KW); } @@ -4062,8 +4070,8 @@ dummy_func( if (new_frame == NULL) { ERROR_NO_POP(); } - assert(next_instr - this_instr == 1); - frame->return_offset = 1; + assert(next_instr - this_instr == 1 + INLINE_CACHE_ENTRIES_CALL_KW); + frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL_KW; DISPATCH_INLINED(new_frame); } /* Callable is not a normal Python function */ @@ -4104,8 +4112,144 @@ dummy_func( res = PyStackRef_FromPyObjectSteal(res_o); } + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _PyInterpreterFrame*)) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); + + // oparg counts all of the args, but *not* self: + int total_args = oparg; + if (self_or_null_o != NULL) { + args--; + total_args++; + } + PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames); + int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o); + assert(Py_TYPE(callable_o) == &PyFunction_Type); + int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); + new_frame = _PyEvalFramePushAndInit( + tstate, (PyFunctionObject *)PyStackRef_AsPyObjectSteal(callable), locals, + args, positional_args, kwnames_o + ); + PyStackRef_CLOSE(kwnames); + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + SYNC_SP(); + if (new_frame == NULL) { + ERROR_NO_POP(); + } + } + + op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable, self_or_null, unused[oparg], kwnames -- callable, self_or_null, unused[oparg], kwnames)) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + EXIT_IF(!PyFunction_Check(callable_o)); + PyFunctionObject *func = (PyFunctionObject *)callable_o; + EXIT_IF(func->func_version != func_version); + } + + macro(CALL_KW_PY) = + unused/1 + // Skip over the counter + _CHECK_PEP_523 + + _CHECK_FUNCTION_VERSION_KW + + _PY_FRAME_KW + + _SAVE_RETURN_OFFSET + + _PUSH_FRAME; + + op(_CHECK_METHOD_VERSION_KW, (func_version/2, callable, null, unused[oparg], kwnames -- callable, null, unused[oparg], kwnames)) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + + EXIT_IF(Py_TYPE(callable_o) != &PyMethod_Type); + PyObject *func = ((PyMethodObject *)callable_o)->im_func; + EXIT_IF(!PyFunction_Check(func)); + EXIT_IF(((PyFunctionObject *)func)->func_version != func_version); + EXIT_IF(!PyStackRef_IsNull(null)); + } + + op(_EXPAND_METHOD_KW, (callable, null, unused[oparg], kwnames -- method, self, unused[oparg], kwnames)) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + + assert(PyStackRef_IsNull(null)); + assert(Py_TYPE(callable_o) == &PyMethod_Type); + self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self); + method = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func); + assert(PyStackRef_FunctionCheck(method)); + PyStackRef_CLOSE(callable); + } + + macro(CALL_KW_BOUND_METHOD) = + unused/1 + // Skip over the counter + _CHECK_PEP_523 + + _CHECK_METHOD_VERSION_KW + + _EXPAND_METHOD_KW + + flush + // so that self is in the argument array + _PY_FRAME_KW + + _SAVE_RETURN_OFFSET + + _PUSH_FRAME; + + specializing op(_SPECIALIZE_CALL_KW, (counter/1, callable, self_or_null, args[oparg], kwnames -- callable, self_or_null, args[oparg], kwnames)) { + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { + next_instr = this_instr; + _Py_Specialize_CallKw(callable, next_instr, oparg + !PyStackRef_IsNull(self_or_null)); + DISPATCH_SAME_OPARG(); + } + STAT_INC(CALL, deferred); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); + #endif /* ENABLE_SPECIALIZATION */ + } + macro(CALL_KW) = - _DO_CALL_KW + + _SPECIALIZE_CALL_KW + + unused/2 + + _DO_CALL_KW; + + op(_CHECK_IS_NOT_PY_CALLABLE_KW, (callable, unused, unused[oparg], kwnames -- callable, unused, unused[oparg], kwnames)) { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + EXIT_IF(PyFunction_Check(callable_o)); + EXIT_IF(Py_TYPE(callable_o) == &PyMethod_Type); + } + + + op(_CALL_KW_NON_PY, (callable, self_or_null, args[oparg], kwnames -- res)) { +#if TIER_ONE + assert(opcode != INSTRUMENTED_CALL); +#endif + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); + + int total_args = oparg; + if (self_or_null_o != NULL) { + args--; + total_args++; + } + /* Callable is not a normal Python function */ + STACKREFS_TO_PYOBJECTS(args, total_args, args_o); + if (CONVERSION_FAILED(args_o)) { + DECREF_INPUTS(); + ERROR_IF(true, error); + } + PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames); + int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o); + PyObject *res_o = PyObject_Vectorcall( + callable_o, args_o, + positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET, + kwnames_o); + PyStackRef_CLOSE(kwnames); + STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); + assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + PyStackRef_CLOSE(callable); + for (int i = 0; i < total_args; i++) { + PyStackRef_CLOSE(args[i]); + } + ERROR_IF(res_o == NULL, error); + res = PyStackRef_FromPyObjectSteal(res_o); + } + + macro(CALL_KW_NON_PY) = + unused/1 + // Skip over the counter + unused/2 + + _CHECK_IS_NOT_PY_CALLABLE_KW + + _CALL_KW_NON_PY + _CHECK_PERIODIC; inst(INSTRUMENTED_CALL_FUNCTION_EX, ( -- )) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index b878538..d699e8d 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4679,6 +4679,186 @@ /* _DO_CALL_KW is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + case _PY_FRAME_KW: { + _PyStackRef kwnames; + _PyStackRef *args; + _PyStackRef self_or_null; + _PyStackRef callable; + _PyInterpreterFrame *new_frame; + oparg = CURRENT_OPARG(); + kwnames = stack_pointer[-1]; + args = &stack_pointer[-1 - oparg]; + self_or_null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); + // oparg counts all of the args, but *not* self: + int total_args = oparg; + if (self_or_null_o != NULL) { + args--; + total_args++; + } + PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames); + int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o); + assert(Py_TYPE(callable_o) == &PyFunction_Type); + int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); + new_frame = _PyEvalFramePushAndInit( + tstate, (PyFunctionObject *)PyStackRef_AsPyObjectSteal(callable), locals, + args, positional_args, kwnames_o + ); + PyStackRef_CLOSE(kwnames); + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + if (new_frame == NULL) { + JUMP_TO_ERROR(); + } + stack_pointer[0].bits = (uintptr_t)new_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_FUNCTION_VERSION_KW: { + _PyStackRef callable; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-3 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + if (!PyFunction_Check(callable_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyFunctionObject *func = (PyFunctionObject *)callable_o; + if (func->func_version != func_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _CHECK_METHOD_VERSION_KW: { + _PyStackRef null; + _PyStackRef callable; + oparg = CURRENT_OPARG(); + null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + uint32_t func_version = (uint32_t)CURRENT_OPERAND(); + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + if (Py_TYPE(callable_o) != &PyMethod_Type) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + PyObject *func = ((PyMethodObject *)callable_o)->im_func; + if (!PyFunction_Check(func)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (((PyFunctionObject *)func)->func_version != func_version) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (!PyStackRef_IsNull(null)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _EXPAND_METHOD_KW: { + _PyStackRef kwnames; + _PyStackRef null; + _PyStackRef callable; + _PyStackRef method; + _PyStackRef self; + oparg = CURRENT_OPARG(); + kwnames = stack_pointer[-1]; + null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + assert(PyStackRef_IsNull(null)); + assert(Py_TYPE(callable_o) == &PyMethod_Type); + self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self); + stack_pointer[-2 - oparg] = self; + method = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func); + stack_pointer[-3 - oparg] = method; + assert(PyStackRef_FunctionCheck(method)); + PyStackRef_CLOSE(callable); + stack_pointer[-1] = kwnames; + break; + } + + case _CHECK_IS_NOT_PY_CALLABLE_KW: { + _PyStackRef callable; + oparg = CURRENT_OPARG(); + callable = stack_pointer[-3 - oparg]; + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + if (PyFunction_Check(callable_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (Py_TYPE(callable_o) == &PyMethod_Type) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + break; + } + + case _CALL_KW_NON_PY: { + _PyStackRef kwnames; + _PyStackRef *args; + _PyStackRef self_or_null; + _PyStackRef callable; + _PyStackRef res; + oparg = CURRENT_OPARG(); + kwnames = stack_pointer[-1]; + args = &stack_pointer[-1 - oparg]; + self_or_null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + #if TIER_ONE + assert(opcode != INSTRUMENTED_CALL); + #endif + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); + int total_args = oparg; + if (self_or_null_o != NULL) { + args--; + total_args++; + } + /* Callable is not a normal Python function */ + STACKREFS_TO_PYOBJECTS(args, total_args, args_o); + if (CONVERSION_FAILED(args_o)) { + PyStackRef_CLOSE(callable); + PyStackRef_CLOSE(self_or_null); + for (int _i = oparg; --_i >= 0;) { + PyStackRef_CLOSE(args[_i]); + } + PyStackRef_CLOSE(kwnames); + if (true) JUMP_TO_ERROR(); + } + PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames); + int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o); + PyObject *res_o = PyObject_Vectorcall( + callable_o, args_o, + positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET, + kwnames_o); + PyStackRef_CLOSE(kwnames); + STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); + assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + PyStackRef_CLOSE(callable); + for (int i = 0; i < total_args; i++) { + PyStackRef_CLOSE(args[i]); + } + if (res_o == NULL) JUMP_TO_ERROR(); + res = PyStackRef_FromPyObjectSteal(res_o); + stack_pointer[-3 - oparg] = res; + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + /* _INSTRUMENTED_CALL_FUNCTION_EX is not a viable micro-op for tier 2 because it is instrumented */ /* __DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7582b06..486d356 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1720,21 +1720,36 @@ TARGET(CALL_KW) { frame->instr_ptr = next_instr; - next_instr += 1; + next_instr += 4; INSTRUCTION_STATS(CALL_KW); PREDICTED(CALL_KW); - _Py_CODEUNIT *this_instr = next_instr - 1; + _Py_CODEUNIT *this_instr = next_instr - 4; (void)this_instr; _PyStackRef callable; _PyStackRef self_or_null; _PyStackRef *args; _PyStackRef kwnames; _PyStackRef res; + // _SPECIALIZE_CALL_KW + self_or_null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + { + uint16_t counter = read_u16(&this_instr[1].cache); + (void)counter; + #if ENABLE_SPECIALIZATION + if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { + next_instr = this_instr; + _Py_Specialize_CallKw(callable, next_instr, oparg + !PyStackRef_IsNull(self_or_null)); + DISPATCH_SAME_OPARG(); + } + STAT_INC(CALL, deferred); + ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); + #endif /* ENABLE_SPECIALIZATION */ + } + /* Skip 2 cache entries */ // _DO_CALL_KW kwnames = stack_pointer[-1]; args = &stack_pointer[-1 - oparg]; - self_or_null = stack_pointer[-2 - oparg]; - callable = stack_pointer[-3 - oparg]; { PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); @@ -1776,8 +1791,8 @@ if (new_frame == NULL) { goto error; } - assert(next_instr - this_instr == 1); - frame->return_offset = 1; + assert(next_instr - this_instr == 1 + INLINE_CACHE_ENTRIES_CALL_KW); + frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL_KW; DISPATCH_INLINED(new_frame); } /* Callable is not a normal Python function */ @@ -1830,6 +1845,183 @@ } res = PyStackRef_FromPyObjectSteal(res_o); } + stack_pointer[-3 - oparg] = res; + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + DISPATCH(); + } + + TARGET(CALL_KW_BOUND_METHOD) { + _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + next_instr += 4; + INSTRUCTION_STATS(CALL_KW_BOUND_METHOD); + static_assert(INLINE_CACHE_ENTRIES_CALL_KW == 3, "incorrect cache size"); + _PyStackRef callable; + _PyStackRef null; + _PyStackRef kwnames; + _PyStackRef method; + _PyStackRef self; + _PyStackRef self_or_null; + _PyStackRef *args; + _PyInterpreterFrame *new_frame; + /* Skip 1 cache entry */ + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, CALL_KW); + } + // _CHECK_METHOD_VERSION_KW + null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + { + uint32_t func_version = read_u32(&this_instr[2].cache); + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + DEOPT_IF(Py_TYPE(callable_o) != &PyMethod_Type, CALL_KW); + PyObject *func = ((PyMethodObject *)callable_o)->im_func; + DEOPT_IF(!PyFunction_Check(func), CALL_KW); + DEOPT_IF(((PyFunctionObject *)func)->func_version != func_version, CALL_KW); + DEOPT_IF(!PyStackRef_IsNull(null), CALL_KW); + } + // _EXPAND_METHOD_KW + kwnames = stack_pointer[-1]; + { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + assert(PyStackRef_IsNull(null)); + assert(Py_TYPE(callable_o) == &PyMethod_Type); + self = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_self); + stack_pointer[-2 - oparg] = self; + method = PyStackRef_FromPyObjectNew(((PyMethodObject *)callable_o)->im_func); + stack_pointer[-3 - oparg] = method; + assert(PyStackRef_FunctionCheck(method)); + PyStackRef_CLOSE(callable); + } + // flush + // _PY_FRAME_KW + kwnames = stack_pointer[-1]; + args = &stack_pointer[-1 - oparg]; + self_or_null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); + // oparg counts all of the args, but *not* self: + int total_args = oparg; + if (self_or_null_o != NULL) { + args--; + total_args++; + } + PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames); + int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o); + assert(Py_TYPE(callable_o) == &PyFunction_Type); + int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); + new_frame = _PyEvalFramePushAndInit( + tstate, (PyFunctionObject *)PyStackRef_AsPyObjectSteal(callable), locals, + args, positional_args, kwnames_o + ); + PyStackRef_CLOSE(kwnames); + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + if (new_frame == NULL) { + goto error; + } + } + // _SAVE_RETURN_OFFSET + { + #if TIER_ONE + frame->return_offset = (uint16_t)(next_instr - this_instr); + #endif + #if TIER_TWO + frame->return_offset = oparg; + #endif + } + // _PUSH_FRAME + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + LLTRACE_RESUME_FRAME(); + } + DISPATCH(); + } + + TARGET(CALL_KW_NON_PY) { + frame->instr_ptr = next_instr; + next_instr += 4; + INSTRUCTION_STATS(CALL_KW_NON_PY); + static_assert(INLINE_CACHE_ENTRIES_CALL_KW == 3, "incorrect cache size"); + _PyStackRef callable; + _PyStackRef kwnames; + _PyStackRef self_or_null; + _PyStackRef *args; + _PyStackRef res; + /* Skip 1 cache entry */ + /* Skip 2 cache entries */ + // _CHECK_IS_NOT_PY_CALLABLE_KW + callable = stack_pointer[-3 - oparg]; + { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + DEOPT_IF(PyFunction_Check(callable_o), CALL_KW); + DEOPT_IF(Py_TYPE(callable_o) == &PyMethod_Type, CALL_KW); + } + // _CALL_KW_NON_PY + kwnames = stack_pointer[-1]; + args = &stack_pointer[-1 - oparg]; + self_or_null = stack_pointer[-2 - oparg]; + { + #if TIER_ONE + assert(opcode != INSTRUMENTED_CALL); + #endif + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); + int total_args = oparg; + if (self_or_null_o != NULL) { + args--; + total_args++; + } + /* Callable is not a normal Python function */ + STACKREFS_TO_PYOBJECTS(args, total_args, args_o); + if (CONVERSION_FAILED(args_o)) { + PyStackRef_CLOSE(callable); + PyStackRef_CLOSE(self_or_null); + for (int _i = oparg; --_i >= 0;) { + PyStackRef_CLOSE(args[_i]); + } + PyStackRef_CLOSE(kwnames); + if (true) { + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } + } + PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames); + int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o); + PyObject *res_o = PyObject_Vectorcall( + callable_o, args_o, + positional_args | PY_VECTORCALL_ARGUMENTS_OFFSET, + kwnames_o); + PyStackRef_CLOSE(kwnames); + STACKREFS_TO_PYOBJECTS_CLEANUP(args_o); + assert((res_o != NULL) ^ (_PyErr_Occurred(tstate) != NULL)); + PyStackRef_CLOSE(callable); + for (int i = 0; i < total_args; i++) { + PyStackRef_CLOSE(args[i]); + } + if (res_o == NULL) { + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + goto error; + } + res = PyStackRef_FromPyObjectSteal(res_o); + } // _CHECK_PERIODIC { _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); @@ -1850,6 +2042,87 @@ DISPATCH(); } + TARGET(CALL_KW_PY) { + _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + next_instr += 4; + INSTRUCTION_STATS(CALL_KW_PY); + static_assert(INLINE_CACHE_ENTRIES_CALL_KW == 3, "incorrect cache size"); + _PyStackRef callable; + _PyStackRef self_or_null; + _PyStackRef kwnames; + _PyStackRef *args; + _PyInterpreterFrame *new_frame; + /* Skip 1 cache entry */ + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, CALL_KW); + } + // _CHECK_FUNCTION_VERSION_KW + callable = stack_pointer[-3 - oparg]; + { + uint32_t func_version = read_u32(&this_instr[2].cache); + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + DEOPT_IF(!PyFunction_Check(callable_o), CALL_KW); + PyFunctionObject *func = (PyFunctionObject *)callable_o; + DEOPT_IF(func->func_version != func_version, CALL_KW); + } + // _PY_FRAME_KW + kwnames = stack_pointer[-1]; + args = &stack_pointer[-1 - oparg]; + self_or_null = stack_pointer[-2 - oparg]; + { + PyObject *callable_o = PyStackRef_AsPyObjectBorrow(callable); + PyObject *self_or_null_o = PyStackRef_AsPyObjectBorrow(self_or_null); + // oparg counts all of the args, but *not* self: + int total_args = oparg; + if (self_or_null_o != NULL) { + args--; + total_args++; + } + PyObject *kwnames_o = PyStackRef_AsPyObjectBorrow(kwnames); + int positional_args = total_args - (int)PyTuple_GET_SIZE(kwnames_o); + assert(Py_TYPE(callable_o) == &PyFunction_Type); + int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags; + PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o)); + new_frame = _PyEvalFramePushAndInit( + tstate, (PyFunctionObject *)PyStackRef_AsPyObjectSteal(callable), locals, + args, positional_args, kwnames_o + ); + PyStackRef_CLOSE(kwnames); + // The frame has stolen all the arguments from the stack, + // so there is no need to clean them up. + stack_pointer += -3 - oparg; + assert(WITHIN_STACK_BOUNDS()); + if (new_frame == NULL) { + goto error; + } + } + // _SAVE_RETURN_OFFSET + { + #if TIER_ONE + frame->return_offset = (uint16_t)(next_instr - this_instr); + #endif + #if TIER_TWO + frame->return_offset = oparg; + #endif + } + // _PUSH_FRAME + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + LLTRACE_RESUME_FRAME(); + } + DISPATCH(); + } + TARGET(CALL_LEN) { frame->instr_ptr = next_instr; next_instr += 4; @@ -3906,8 +4179,12 @@ TARGET(INSTRUMENTED_CALL_KW) { _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; (void)this_instr; - next_instr += 1; + next_instr += 4; INSTRUCTION_STATS(INSTRUMENTED_CALL_KW); + uint16_t counter = read_u16(&this_instr[1].cache); + (void)counter; + uint32_t version = read_u32(&this_instr[2].cache); + (void)version; int is_meth = !PyStackRef_IsNull(PEEK(oparg + 2)); int total_args = oparg + is_meth; PyObject *function = PyStackRef_AsPyObjectBorrow(PEEK(oparg + 3)); @@ -3917,6 +4194,7 @@ tstate, PY_MONITORING_EVENT_CALL, frame, this_instr, function, arg); if (err) goto error; + PAUSE_ADAPTIVE_COUNTER(this_instr[1].counter); GO_TO_INSTRUCTION(CALL_KW); } diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 9ea01e2..0418105 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -169,6 +169,9 @@ static void *opcode_targets[256] = { &&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS, &&TARGET_CALL_BUILTIN_O, &&TARGET_CALL_ISINSTANCE, + &&TARGET_CALL_KW_BOUND_METHOD, + &&TARGET_CALL_KW_NON_PY, + &&TARGET_CALL_KW_PY, &&TARGET_CALL_LEN, &&TARGET_CALL_LIST_APPEND, &&TARGET_CALL_METHOD_DESCRIPTOR_FAST, @@ -232,9 +235,6 @@ static void *opcode_targets[256] = { &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_INSTRUMENTED_END_FOR, &&TARGET_INSTRUMENTED_END_SEND, &&TARGET_INSTRUMENTED_LOAD_SUPER_ATTR, diff --git a/Python/optimizer.c b/Python/optimizer.c index dbd5467..9198e41 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -807,7 +807,7 @@ translate_bytecode_to_trace( ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); goto done; } - assert(_PyOpcode_Deopt[opcode] == CALL); + assert(_PyOpcode_Deopt[opcode] == CALL || _PyOpcode_Deopt[opcode] == CALL_KW); int func_version_offset = offsetof(_PyCallCache, func_version)/sizeof(_Py_CODEUNIT) // Add one to account for the actual opcode/oparg pair: diff --git a/Python/optimizer_bytecodes.c b/Python/optimizer_bytecodes.c index 010733e..97e4c64 100644 --- a/Python/optimizer_bytecodes.c +++ b/Python/optimizer_bytecodes.c @@ -627,6 +627,15 @@ dummy_func(void) { ctx->done = true; } + op(_PY_FRAME_KW, (callable, self_or_null, args[oparg], kwnames -- new_frame: _Py_UOpsAbstractFrame*)) { + (void)callable; + (void)self_or_null; + (void)args; + (void)kwnames; + new_frame = NULL; + ctx->done = true; + } + op(_RETURN_VALUE, (retval -- res)) { SYNC_SP(); ctx->frame->stack_pointer = stack_pointer; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 866d7d9..3ec2e69 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1974,6 +1974,62 @@ /* _DO_CALL_KW is not a viable micro-op for tier 2 */ + case _PY_FRAME_KW: { + _Py_UopsSymbol *kwnames; + _Py_UopsSymbol **args; + _Py_UopsSymbol *self_or_null; + _Py_UopsSymbol *callable; + _Py_UOpsAbstractFrame *new_frame; + kwnames = stack_pointer[-1]; + args = &stack_pointer[-1 - oparg]; + self_or_null = stack_pointer[-2 - oparg]; + callable = stack_pointer[-3 - oparg]; + (void)callable; + (void)self_or_null; + (void)args; + (void)kwnames; + new_frame = NULL; + ctx->done = true; + stack_pointer[-3 - oparg] = (_Py_UopsSymbol *)new_frame; + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + + case _CHECK_FUNCTION_VERSION_KW: { + break; + } + + case _CHECK_METHOD_VERSION_KW: { + break; + } + + case _EXPAND_METHOD_KW: { + _Py_UopsSymbol *method; + _Py_UopsSymbol *self; + _Py_UopsSymbol *kwnames; + method = sym_new_not_null(ctx); + self = sym_new_not_null(ctx); + kwnames = sym_new_not_null(ctx); + stack_pointer[-3 - oparg] = method; + stack_pointer[-2 - oparg] = self; + stack_pointer[-1] = kwnames; + break; + } + + case _CHECK_IS_NOT_PY_CALLABLE_KW: { + break; + } + + case _CALL_KW_NON_PY: { + _Py_UopsSymbol *res; + res = sym_new_not_null(ctx); + stack_pointer[-3 - oparg] = res; + stack_pointer += -2 - oparg; + assert(WITHIN_STACK_BOUNDS()); + break; + } + /* _INSTRUMENTED_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ /* __DO_CALL_FUNCTION_EX is not a viable micro-op for tier 2 */ diff --git a/Python/specialize.c b/Python/specialize.c index 4a22738..4fec5a3 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1904,6 +1904,33 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, return 0; } + +static int +specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, + bool bound_method) +{ + _PyCallCache *cache = (_PyCallCache *)(instr + 1); + PyCodeObject *code = (PyCodeObject *)func->func_code; + int kind = function_kind(code); + /* Don't specialize if PEP 523 is active */ + if (_PyInterpreterState_GET()->eval_frame) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_PEP_523); + return -1; + } + if (kind == SPEC_FAIL_CODE_NOT_OPTIMIZED) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CODE_NOT_OPTIMIZED); + return -1; + } + int version = _PyFunction_GetVersionForCurrentState(func); + if (version == 0) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS); + return -1; + } + write_u32(cache->func_version, version); + instr->op.code = bound_method ? CALL_KW_BOUND_METHOD : CALL_KW_PY; + return 0; +} + static int specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) { @@ -1999,6 +2026,46 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) } } +void +_Py_Specialize_CallKw(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) +{ + PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st); + + assert(ENABLE_SPECIALIZATION); + assert(_PyOpcode_Caches[CALL_KW] == INLINE_CACHE_ENTRIES_CALL_KW); + assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL_KW); + _PyCallCache *cache = (_PyCallCache *)(instr + 1); + int fail; + if (PyFunction_Check(callable)) { + fail = specialize_py_call_kw((PyFunctionObject *)callable, instr, nargs, false); + } + else if (PyMethod_Check(callable)) { + PyObject *func = ((PyMethodObject *)callable)->im_func; + if (PyFunction_Check(func)) { + fail = specialize_py_call_kw((PyFunctionObject *)func, instr, nargs, true); + } + else { + SPECIALIZATION_FAIL(CALL_KW, SPEC_FAIL_CALL_BOUND_METHOD); + fail = -1; + } + } + else { + instr->op.code = CALL_KW_NON_PY; + fail = 0; + } + if (fail) { + STAT_INC(CALL, failure); + assert(!PyErr_Occurred()); + instr->op.code = CALL_KW; + cache->counter = adaptive_counter_backoff(cache->counter); + } + else { + STAT_INC(CALL, success); + assert(!PyErr_Occurred()); + cache->counter = adaptive_counter_cooldown(); + } +} + #ifdef Py_STATS static int binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs) |