summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authormpage <mpage@meta.com>2024-12-11 23:18:22 (GMT)
committerGitHub <noreply@github.com>2024-12-11 23:18:22 (GMT)
commitc84928ed6de105696be24859e03f3ab27e11daf6 (patch)
treec533074edffdb8ae55dfa197ce8d63417f28949a /Python
parente8f4e272cc828f2b79fa17fc6b9786bdddab7ce4 (diff)
downloadcpython-c84928ed6de105696be24859e03f3ab27e11daf6.zip
cpython-c84928ed6de105696be24859e03f3ab27e11daf6.tar.gz
cpython-c84928ed6de105696be24859e03f3ab27e11daf6.tar.bz2
gh-115999: Specialize `CALL_KW` in free-threaded builds (#127713)
* Enable specialization of CALL_KW * Fix bug pushing frame in _PY_FRAME_KW `_PY_FRAME_KW` pushes a pointer to the new frame onto the stack for consumption by the next uop. When pushing the frame fails, we do not want to push the result, `NULL`, to the stack because it is not a valid stackref. This works in the default build because `PyStackRef_NULL` and `NULL` are the same value, so the `PyStackRef_XCLOSE()` in the error handler ignores it. In the free-threaded build the values are not the same; `PyStackRef_XCLOSE()` will attempt to decref a null pointer.
Diffstat (limited to 'Python')
-rw-r--r--Python/bytecodes.c9
-rw-r--r--Python/executor_cases.c.h11
-rw-r--r--Python/generated_cases.c.h24
-rw-r--r--Python/specialize.c17
4 files changed, 26 insertions, 35 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index 3d28094..d0e4c2b 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -4311,7 +4311,7 @@ dummy_func(
assert(Py_TYPE(callable_o) == &PyFunction_Type);
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
- new_frame = _PyEvalFramePushAndInit(
+ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, callable[0], locals,
args, positional_args, kwnames_o, frame
);
@@ -4319,9 +4319,10 @@ dummy_func(
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
SYNC_SP();
- if (new_frame == NULL) {
+ if (temp == NULL) {
ERROR_NO_POP();
}
+ new_frame = temp;
}
op(_CHECK_FUNCTION_VERSION_KW, (func_version/2, callable[1], self_or_null[1], unused[oparg], kwnames -- callable[1], self_or_null[1], unused[oparg], kwnames)) {
@@ -4372,7 +4373,7 @@ dummy_func(
_PUSH_FRAME;
specializing op(_SPECIALIZE_CALL_KW, (counter/1, callable[1], self_or_null[1], args[oparg], kwnames -- callable[1], self_or_null[1], args[oparg], kwnames)) {
- #if ENABLE_SPECIALIZATION
+ #if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_CallKw(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0]));
@@ -4380,7 +4381,7 @@ dummy_func(
}
OPCODE_DEFERRED_INC(CALL_KW);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
}
macro(CALL_KW) =
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 987ff2e..18f1977 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -5235,7 +5235,7 @@
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
_PyFrame_SetStackPointer(frame, stack_pointer);
- new_frame = _PyEvalFramePushAndInit(
+ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, callable[0], locals,
args, positional_args, kwnames_o, frame
);
@@ -5243,12 +5243,15 @@
PyStackRef_CLOSE(kwnames);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
- stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame;
- stack_pointer += -2 - oparg;
+ stack_pointer += -3 - oparg;
assert(WITHIN_STACK_BOUNDS());
- if (new_frame == NULL) {
+ if (temp == NULL) {
JUMP_TO_ERROR();
}
+ new_frame = temp;
+ stack_pointer[0].bits = (uintptr_t)new_frame;
+ stack_pointer += 1;
+ assert(WITHIN_STACK_BOUNDS());
break;
}
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index 33f32ab..fc0f555 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -1895,7 +1895,7 @@
callable = &stack_pointer[-3 - oparg];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
- #if ENABLE_SPECIALIZATION
+ #if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
@@ -1905,7 +1905,7 @@
}
OPCODE_DEFERRED_INC(CALL_KW);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
}
/* Skip 2 cache entries */
// _MAYBE_EXPAND_METHOD_KW
@@ -2093,7 +2093,7 @@
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
_PyFrame_SetStackPointer(frame, stack_pointer);
- new_frame = _PyEvalFramePushAndInit(
+ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, callable[0], locals,
args, positional_args, kwnames_o, frame
);
@@ -2101,12 +2101,12 @@
PyStackRef_CLOSE(kwnames);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
- stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame;
- stack_pointer += -2 - oparg;
+ stack_pointer += -3 - oparg;
assert(WITHIN_STACK_BOUNDS());
- if (new_frame == NULL) {
+ if (temp == NULL) {
goto error;
}
+ new_frame = temp;
}
// _SAVE_RETURN_OFFSET
{
@@ -2123,8 +2123,6 @@
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
_PyInterpreterFrame *temp = new_frame;
- stack_pointer += -1;
- assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
assert(new_frame->previous == frame || new_frame->previous->previous == frame);
CALL_STAT_INC(inlined_py_calls);
@@ -2271,7 +2269,7 @@
int code_flags = ((PyCodeObject*)PyFunction_GET_CODE(callable_o))->co_flags;
PyObject *locals = code_flags & CO_OPTIMIZED ? NULL : Py_NewRef(PyFunction_GET_GLOBALS(callable_o));
_PyFrame_SetStackPointer(frame, stack_pointer);
- new_frame = _PyEvalFramePushAndInit(
+ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, callable[0], locals,
args, positional_args, kwnames_o, frame
);
@@ -2279,12 +2277,12 @@
PyStackRef_CLOSE(kwnames);
// The frame has stolen all the arguments from the stack,
// so there is no need to clean them up.
- stack_pointer[-3 - oparg].bits = (uintptr_t)new_frame;
- stack_pointer += -2 - oparg;
+ stack_pointer += -3 - oparg;
assert(WITHIN_STACK_BOUNDS());
- if (new_frame == NULL) {
+ if (temp == NULL) {
goto error;
}
+ new_frame = temp;
}
// _SAVE_RETURN_OFFSET
{
@@ -2301,8 +2299,6 @@
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
_PyInterpreterFrame *temp = new_frame;
- stack_pointer += -1;
- assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
assert(new_frame->previous == frame || new_frame->previous->previous == frame);
CALL_STAT_INC(inlined_py_calls);
diff --git a/Python/specialize.c b/Python/specialize.c
index d3fea71..fd182e7 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -2107,7 +2107,7 @@ specialize_py_call_kw(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs,
return -1;
}
write_u32(cache->func_version, version);
- instr->op.code = bound_method ? CALL_KW_BOUND_METHOD : CALL_KW_PY;
+ specialize(instr, bound_method ? CALL_KW_BOUND_METHOD : CALL_KW_PY);
return 0;
}
@@ -2202,10 +2202,9 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs)
{
PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st);
- assert(ENABLE_SPECIALIZATION);
+ assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[CALL_KW] == INLINE_CACHE_ENTRIES_CALL_KW);
assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL_KW);
- _PyCallCache *cache = (_PyCallCache *)(instr + 1);
int fail;
if (PyFunction_Check(callable)) {
fail = specialize_py_call_kw((PyFunctionObject *)callable, instr, nargs, false);
@@ -2221,19 +2220,11 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs)
}
}
else {
- instr->op.code = CALL_KW_NON_PY;
+ specialize(instr, CALL_KW_NON_PY);
fail = 0;
}
if (fail) {
- STAT_INC(CALL, failure);
- assert(!PyErr_Occurred());
- instr->op.code = CALL_KW;
- cache->counter = adaptive_counter_backoff(cache->counter);
- }
- else {
- STAT_INC(CALL, success);
- assert(!PyErr_Occurred());
- cache->counter = adaptive_counter_cooldown();
+ unspecialize(instr);
}
}