summaryrefslogtreecommitdiffstats
path: root/Python
diff options
context:
space:
mode:
authormpage <mpage@meta.com>2024-12-03 19:20:20 (GMT)
committerGitHub <noreply@github.com>2024-12-03 19:20:20 (GMT)
commitdabcecfd6dadb9430733105ba36925b290343d31 (patch)
tree56bd2af5b31dbc182c6cd7c81a33745112871391 /Python
parentfc5a0dc22483a35068888e828c65796d7a792c14 (diff)
downloadcpython-dabcecfd6dadb9430733105ba36925b290343d31.zip
cpython-dabcecfd6dadb9430733105ba36925b290343d31.tar.gz
cpython-dabcecfd6dadb9430733105ba36925b290343d31.tar.bz2
gh-115999: Enable specialization of `CALL` instructions in free-threaded builds (#127123)
The CALL family of instructions were mostly thread-safe already and only required a small number of changes, which are documented below. A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe: Added _PyType_LookupRefAndVersion, which returns the type version corresponding to the returned ref. Added _PyType_CacheInitForSpecialization, which takes an init method and the corresponding type version and only populates the specialization cache if the current type version matches the supplied version. This prevents potentially caching a stale value in free-threaded builds if we race with an update to __init__. Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the reference to __init__ that is stored in the specialization cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes. Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on failure. A few other miscellaneous changes were also needed: Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's per-object lock is held while we are appending to it. Add missing co_tlbc for _Py_InitCleanup. Stop/start the world around setting the eval frame hook. This allows us to read interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 documented below.
Diffstat (limited to 'Python')
-rw-r--r--Python/bytecodes.c16
-rw-r--r--Python/executor_cases.c.h21
-rw-r--r--Python/generated_cases.c.h21
-rw-r--r--Python/perf_trampoline.c6
-rw-r--r--Python/pystate.c2
-rw-r--r--Python/specialize.c112
6 files changed, 107 insertions, 71 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index d6be3ce..3d28094 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -3329,7 +3329,7 @@ dummy_func(
};
specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) {
- #if ENABLE_SPECIALIZATION
+ #if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0]));
@@ -3337,7 +3337,7 @@ dummy_func(
}
OPCODE_DEFERRED_INC(CALL);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
}
op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) {
@@ -3722,10 +3722,10 @@ dummy_func(
DEOPT_IF(!PyStackRef_IsNull(null[0]));
DEOPT_IF(!PyType_Check(callable_o));
PyTypeObject *tp = (PyTypeObject *)callable_o;
- DEOPT_IF(tp->tp_version_tag != type_version);
+ DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version);
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
- PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
+ PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize));
STAT_INC(CALL, hit);
@@ -3743,17 +3743,19 @@ dummy_func(
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
+ assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
DEAD(init);
DEAD(self);
- init_frame = _PyEvalFramePushAndInit(
+ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
SYNC_SP();
- if (init_frame == NULL) {
+ if (temp == NULL) {
_PyEval_FrameClearAndPop(tstate, shim);
ERROR_NO_POP();
}
+ init_frame = temp;
frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
/* Account for pushing the extra frame.
* We don't check recursion depth here,
@@ -4000,8 +4002,10 @@ dummy_func(
DEOPT_IF(callable_o != interp->callable_cache.list_append);
assert(self_o != NULL);
DEOPT_IF(!PyList_Check(self_o));
+ DEOPT_IF(!LOCK_OBJECT(self_o));
STAT_INC(CALL, hit);
int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
+ UNLOCK_OBJECT(self_o);
PyStackRef_CLOSE(self);
PyStackRef_CLOSE(callable);
ERROR_IF(err, error);
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 5808146..987ff2e 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -4500,13 +4500,13 @@
JUMP_TO_JUMP_TARGET();
}
PyTypeObject *tp = (PyTypeObject *)callable_o;
- if (tp->tp_version_tag != type_version) {
+ if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) {
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
- PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
+ PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)) {
UOP_STAT_INC(uopcode, miss);
@@ -4537,25 +4537,29 @@
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
+ assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
stack_pointer = _PyFrame_GetStackPointer(frame);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
_PyFrame_SetStackPointer(frame, stack_pointer);
- init_frame = _PyEvalFramePushAndInit(
+ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
stack_pointer = _PyFrame_GetStackPointer(frame);
- stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame;
- stack_pointer += -1 - oparg;
+ stack_pointer += -2 - oparg;
assert(WITHIN_STACK_BOUNDS());
- if (init_frame == NULL) {
+ if (temp == NULL) {
_PyEval_FrameClearAndPop(tstate, shim);
JUMP_TO_ERROR();
}
+ init_frame = temp;
frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
/* Account for pushing the extra frame.
* We don't check recursion depth here,
* as it will be checked after start_frame */
tstate->py_recursion_remaining--;
+ stack_pointer[0].bits = (uintptr_t)init_frame;
+ stack_pointer += 1;
+ assert(WITHIN_STACK_BOUNDS());
break;
}
@@ -4908,8 +4912,13 @@
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
+ if (!LOCK_OBJECT(self_o)) {
+ UOP_STAT_INC(uopcode, miss);
+ JUMP_TO_JUMP_TARGET();
+ }
STAT_INC(CALL, hit);
int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
+ UNLOCK_OBJECT(self_o);
PyStackRef_CLOSE(self);
PyStackRef_CLOSE(callable);
if (err) JUMP_TO_ERROR();
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index ef191f6..33f32ab 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -880,7 +880,7 @@
callable = &stack_pointer[-2 - oparg];
uint16_t counter = read_u16(&this_instr[1].cache);
(void)counter;
- #if ENABLE_SPECIALIZATION
+ #if ENABLE_SPECIALIZATION_FT
if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
next_instr = this_instr;
_PyFrame_SetStackPointer(frame, stack_pointer);
@@ -890,7 +890,7 @@
}
OPCODE_DEFERRED_INC(CALL);
ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
- #endif /* ENABLE_SPECIALIZATION */
+ #endif /* ENABLE_SPECIALIZATION_FT */
}
/* Skip 2 cache entries */
// _MAYBE_EXPAND_METHOD
@@ -1048,10 +1048,10 @@
DEOPT_IF(!PyStackRef_IsNull(null[0]), CALL);
DEOPT_IF(!PyType_Check(callable_o), CALL);
PyTypeObject *tp = (PyTypeObject *)callable_o;
- DEOPT_IF(tp->tp_version_tag != type_version, CALL);
+ DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version, CALL);
assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES);
PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o;
- PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init;
+ PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init);
PyCodeObject *code = (PyCodeObject *)init_func->func_code;
DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL);
STAT_INC(CALL, hit);
@@ -1073,20 +1073,21 @@
_PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked(
tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame);
assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK);
+ assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE);
stack_pointer = _PyFrame_GetStackPointer(frame);
/* Push self onto stack of shim */
shim->localsplus[0] = PyStackRef_DUP(self[0]);
_PyFrame_SetStackPointer(frame, stack_pointer);
- init_frame = _PyEvalFramePushAndInit(
+ _PyInterpreterFrame *temp = _PyEvalFramePushAndInit(
tstate, init[0], NULL, args-1, oparg+1, NULL, shim);
stack_pointer = _PyFrame_GetStackPointer(frame);
- stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame;
- stack_pointer += -1 - oparg;
+ stack_pointer += -2 - oparg;
assert(WITHIN_STACK_BOUNDS());
- if (init_frame == NULL) {
+ if (temp == NULL) {
_PyEval_FrameClearAndPop(tstate, shim);
goto error;
}
+ init_frame = temp;
frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL;
/* Account for pushing the extra frame.
* We don't check recursion depth here,
@@ -1100,8 +1101,6 @@
// Eventually this should be the only occurrence of this code.
assert(tstate->interp->eval_frame == NULL);
_PyInterpreterFrame *temp = new_frame;
- stack_pointer += -1;
- assert(WITHIN_STACK_BOUNDS());
_PyFrame_SetStackPointer(frame, stack_pointer);
assert(new_frame->previous == frame || new_frame->previous->previous == frame);
CALL_STAT_INC(inlined_py_calls);
@@ -2383,8 +2382,10 @@
DEOPT_IF(callable_o != interp->callable_cache.list_append, CALL);
assert(self_o != NULL);
DEOPT_IF(!PyList_Check(self_o), CALL);
+ DEOPT_IF(!LOCK_OBJECT(self_o), CALL);
STAT_INC(CALL, hit);
int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg));
+ UNLOCK_OBJECT(self_o);
PyStackRef_CLOSE(self);
PyStackRef_CLOSE(callable);
if (err) goto pop_3_error;
diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c
index f144f7d..ad077dc 100644
--- a/Python/perf_trampoline.c
+++ b/Python/perf_trampoline.c
@@ -484,11 +484,11 @@ _PyPerfTrampoline_Init(int activate)
return -1;
}
if (!activate) {
- tstate->interp->eval_frame = NULL;
+ _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL);
perf_status = PERF_STATUS_NO_INIT;
}
else {
- tstate->interp->eval_frame = py_trampoline_evaluator;
+ _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator);
if (new_code_arena() < 0) {
return -1;
}
@@ -514,7 +514,7 @@ _PyPerfTrampoline_Fini(void)
}
PyThreadState *tstate = _PyThreadState_GET();
if (tstate->interp->eval_frame == py_trampoline_evaluator) {
- tstate->interp->eval_frame = NULL;
+ _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL);
}
if (perf_status == PERF_STATUS_OK) {
trampoline_api.free_state(trampoline_api.state);
diff --git a/Python/pystate.c b/Python/pystate.c
index 3ceae22..839413a 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -2838,7 +2838,9 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp,
}
#endif
RARE_EVENT_INC(set_eval_frame_func);
+ _PyEval_StopTheWorld(interp);
interp->eval_frame = eval_frame;
+ _PyEval_StartTheWorld(interp);
}
diff --git a/Python/specialize.c b/Python/specialize.c
index 8b2d1a1..ec2cd70 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -1911,38 +1911,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD
unspecialize(instr);
}
-/* Returns a borrowed reference.
- * The reference is only valid if guarded by a type version check.
- */
-static PyFunctionObject *
-get_init_for_simple_managed_python_class(PyTypeObject *tp)
+/* Returns a strong reference. */
+static PyObject *
+get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_version)
{
assert(tp->tp_new == PyBaseObject_Type.tp_new);
if (tp->tp_alloc != PyType_GenericAlloc) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN);
return NULL;
}
- if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) {
+ unsigned long tp_flags = PyType_GetFlags(tp);
+ if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES);
return NULL;
}
- if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
+ if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) {
/* Is this possible? */
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR);
return NULL;
}
- PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__));
+ PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), tp_version);
if (init == NULL || !PyFunction_Check(init)) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON);
+ Py_XDECREF(init);
return NULL;
}
int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init));
if (kind != SIMPLE_FUNCTION) {
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE);
+ Py_DECREF(init);
return NULL;
}
- ((PyHeapTypeObject *)tp)->_spec_cache.init = init;
- return (PyFunctionObject *)init;
+ return init;
}
static int
@@ -1954,20 +1954,20 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
int oparg = instr->op.arg;
if (nargs == 1 && oparg == 1) {
if (tp == &PyUnicode_Type) {
- instr->op.code = CALL_STR_1;
+ specialize(instr, CALL_STR_1);
return 0;
}
else if (tp == &PyType_Type) {
- instr->op.code = CALL_TYPE_1;
+ specialize(instr, CALL_TYPE_1);
return 0;
}
else if (tp == &PyTuple_Type) {
- instr->op.code = CALL_TUPLE_1;
+ specialize(instr, CALL_TUPLE_1);
return 0;
}
}
if (tp->tp_vectorcall != NULL) {
- instr->op.code = CALL_BUILTIN_CLASS;
+ specialize(instr, CALL_BUILTIN_CLASS);
return 0;
}
goto generic;
@@ -1976,19 +1976,25 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
goto generic;
}
if (tp->tp_new == PyBaseObject_Type.tp_new) {
- PyFunctionObject *init = get_init_for_simple_managed_python_class(tp);
- if (type_get_version(tp, CALL) == 0) {
+ unsigned int tp_version = 0;
+ PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version);
+ if (!tp_version) {
+ SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS);
+ Py_XDECREF(init);
return -1;
}
- if (init != NULL) {
+ if (init != NULL && _PyType_CacheInitForSpecialization(
+ (PyHeapTypeObject *)tp, init, tp_version)) {
_PyCallCache *cache = (_PyCallCache *)(instr + 1);
- write_u32(cache->func_version, tp->tp_version_tag);
- _Py_SET_OPCODE(*instr, CALL_ALLOC_AND_ENTER_INIT);
+ write_u32(cache->func_version, tp_version);
+ specialize(instr, CALL_ALLOC_AND_ENTER_INIT);
+ Py_DECREF(init);
return 0;
}
+ Py_XDECREF(init);
}
generic:
- instr->op.code = CALL_NON_PY_GENERAL;
+ specialize(instr, CALL_NON_PY_GENERAL);
return 0;
}
@@ -2004,7 +2010,7 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr,
SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS);
return -1;
}
- instr->op.code = CALL_METHOD_DESCRIPTOR_NOARGS;
+ specialize(instr, CALL_METHOD_DESCRIPTOR_NOARGS);
return 0;
}
case METH_O: {
@@ -2018,22 +2024,22 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr,
bool pop = (next.op.code == POP_TOP);
int oparg = instr->op.arg;
if ((PyObject *)descr == list_append && oparg == 1 && pop) {
- instr->op.code = CALL_LIST_APPEND;
+ specialize(instr, CALL_LIST_APPEND);
return 0;
}
- instr->op.code = CALL_METHOD_DESCRIPTOR_O;
+ specialize(instr, CALL_METHOD_DESCRIPTOR_O);
return 0;
}
case METH_FASTCALL: {
- instr->op.code = CALL_METHOD_DESCRIPTOR_FAST;
+ specialize(instr, CALL_METHOD_DESCRIPTOR_FAST);
return 0;
}
case METH_FASTCALL | METH_KEYWORDS: {
- instr->op.code = CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS;
+ specialize(instr, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS);
return 0;
}
}
- instr->op.code = CALL_NON_PY_GENERAL;
+ specialize(instr, CALL_NON_PY_GENERAL);
return 0;
}
@@ -2063,12 +2069,15 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs,
return -1;
}
write_u32(cache->func_version, version);
+ uint8_t opcode;
if (argcount == nargs + bound_method) {
- instr->op.code = bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS;
+ opcode =
+ bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS;
}
else {
- instr->op.code = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL;
+ opcode = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL;
}
+ specialize(instr, opcode);
return 0;
}
@@ -2117,10 +2126,10 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
/* len(o) */
PyInterpreterState *interp = _PyInterpreterState_GET();
if (callable == interp->callable_cache.len) {
- instr->op.code = CALL_LEN;
+ specialize(instr, CALL_LEN);
return 0;
}
- instr->op.code = CALL_BUILTIN_O;
+ specialize(instr, CALL_BUILTIN_O);
return 0;
}
case METH_FASTCALL: {
@@ -2128,19 +2137,19 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs)
/* isinstance(o1, o2) */
PyInterpreterState *interp = _PyInterpreterState_GET();
if (callable == interp->callable_cache.isinstance) {
- instr->op.code = CALL_ISINSTANCE;
+ specialize(instr, CALL_ISINSTANCE);
return 0;
}
}
- instr->op.code = CALL_BUILTIN_FAST;
+ specialize(instr, CALL_BUILTIN_FAST);
return 0;
}
case METH_FASTCALL | METH_KEYWORDS: {
- instr->op.code = CALL_BUILTIN_FAST_WITH_KEYWORDS;
+ specialize(instr, CALL_BUILTIN_FAST_WITH_KEYWORDS);
return 0;
}
default:
- instr->op.code = CALL_NON_PY_GENERAL;
+ specialize(instr, CALL_NON_PY_GENERAL);
return 0;
}
}
@@ -2150,10 +2159,9 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs)
{
PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st);
- assert(ENABLE_SPECIALIZATION);
+ assert(ENABLE_SPECIALIZATION_FT);
assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL);
assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL);
- _PyCallCache *cache = (_PyCallCache *)(instr + 1);
int fail;
if (PyCFunction_CheckExact(callable)) {
fail = specialize_c_call(callable, instr, nargs);
@@ -2178,19 +2186,11 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs)
}
}
else {
- instr->op.code = CALL_NON_PY_GENERAL;
+ specialize(instr, CALL_NON_PY_GENERAL);
fail = 0;
}
if (fail) {
- STAT_INC(CALL, failure);
- assert(!PyErr_Occurred());
- instr->op.code = CALL;
- cache->counter = adaptive_counter_backoff(cache->counter);
- }
- else {
- STAT_INC(CALL, success);
- assert(!PyErr_Occurred());
- cache->counter = adaptive_counter_cooldown();
+ unspecialize(instr);
}
}
@@ -2793,6 +2793,16 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr)
* Ends with a RESUME so that it is not traced.
* This is used as a plain code object, not a function,
* so must not access globals or builtins.
+ * There are a few other constraints imposed on the code
+ * by the free-threaded build:
+ *
+ * 1. The RESUME instruction must not be executed. Otherwise we may attempt to
+ * free the statically allocated TLBC array.
+ * 2. It must contain no specializable instructions. Specializing multiple
+ * copies of the same bytecode is not thread-safe in free-threaded builds.
+ *
+ * This should be dynamically allocated if either of those restrictions need to
+ * be lifted.
*/
#define NO_LOC_4 (128 | (PY_CODE_LOCATION_INFO_NONE << 3) | 3)
@@ -2802,6 +2812,13 @@ static const PyBytesObject no_location = {
.ob_sval = { NO_LOC_4 }
};
+#ifdef Py_GIL_DISABLED
+static _PyCodeArray init_cleanup_tlbc = {
+ .size = 1,
+ .entries = {(char*) &_Py_InitCleanup.co_code_adaptive},
+};
+#endif
+
const struct _PyCode8 _Py_InitCleanup = {
_PyVarObject_HEAD_INIT(&PyCode_Type, 3),
.co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty),
@@ -2817,6 +2834,9 @@ const struct _PyCode8 _Py_InitCleanup = {
._co_firsttraceable = 4,
.co_stacksize = 2,
.co_framesize = 2 + FRAME_SPECIALS_SIZE,
+#ifdef Py_GIL_DISABLED
+ .co_tlbc = &init_cleanup_tlbc,
+#endif
.co_code_adaptive = {
EXIT_INIT_CHECK, 0,
RETURN_VALUE, 0,