diff options
author | mpage <mpage@meta.com> | 2024-12-03 19:20:20 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-12-03 19:20:20 (GMT) |
commit | dabcecfd6dadb9430733105ba36925b290343d31 (patch) | |
tree | 56bd2af5b31dbc182c6cd7c81a33745112871391 /Python | |
parent | fc5a0dc22483a35068888e828c65796d7a792c14 (diff) | |
download | cpython-dabcecfd6dadb9430733105ba36925b290343d31.zip cpython-dabcecfd6dadb9430733105ba36925b290343d31.tar.gz cpython-dabcecfd6dadb9430733105ba36925b290343d31.tar.bz2 |
gh-115999: Enable specialization of `CALL` instructions in free-threaded builds (#127123)
The CALL family of instructions were mostly thread-safe already and only required a small number of changes, which are documented below.
A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe:
Added _PyType_LookupRefAndVersion, which returns the type version corresponding to the returned ref.
Added _PyType_CacheInitForSpecialization, which takes an init method and the corresponding type version and only populates the specialization cache if the current type version matches the supplied version. This prevents potentially caching a stale value in free-threaded builds if we race with an update to __init__.
Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the reference to __init__ that is stored in the specialization cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes.
Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on failure.
A few other miscellaneous changes were also needed:
Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's per-object lock is held while we are appending to it.
Add missing co_tlbc for _Py_InitCleanup.
Stop/start the world around setting the eval frame hook. This allows us to read interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 documented below.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 16 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 21 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 21 | ||||
-rw-r--r-- | Python/perf_trampoline.c | 6 | ||||
-rw-r--r-- | Python/pystate.c | 2 | ||||
-rw-r--r-- | Python/specialize.c | 112 |
6 files changed, 107 insertions, 71 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d6be3ce..3d28094 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3329,7 +3329,7 @@ dummy_func( }; specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0])); @@ -3337,7 +3337,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) { @@ -3722,10 +3722,10 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null[0])); DEOPT_IF(!PyType_Check(callable_o)); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)); STAT_INC(CALL, hit); @@ -3743,17 +3743,19 @@ dummy_func( _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); DEAD(init); DEAD(self); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); SYNC_SP(); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); ERROR_NO_POP(); } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, @@ -4000,8 +4002,10 @@ dummy_func( DEOPT_IF(callable_o != interp->callable_cache.list_append); assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o)); + DEOPT_IF(!LOCK_OBJECT(self_o)); STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); ERROR_IF(err, error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5808146..987ff2e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4500,13 +4500,13 @@ JUMP_TO_JUMP_TARGET(); } PyTypeObject *tp = (PyTypeObject *)callable_o; - if (tp->tp_version_tag != type_version) { + if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)) { UOP_STAT_INC(uopcode, miss); @@ -4537,25 +4537,29 @@ _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); stack_pointer = _PyFrame_GetStackPointer(frame); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); JUMP_TO_ERROR(); } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; + stack_pointer[0].bits = (uintptr_t)init_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -4908,8 +4912,13 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + if (!LOCK_OBJECT(self_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) JUMP_TO_ERROR(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ef191f6..33f32ab 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -880,7 +880,7 @@ callable = &stack_pointer[-2 - oparg]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -890,7 +890,7 @@ } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 2 cache entries */ // _MAYBE_EXPAND_METHOD @@ -1048,10 +1048,10 @@ DEOPT_IF(!PyStackRef_IsNull(null[0]), CALL); DEOPT_IF(!PyType_Check(callable_o), CALL); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version, CALL); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version, CALL); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL); STAT_INC(CALL, hit); @@ -1073,20 +1073,21 @@ _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); stack_pointer = _PyFrame_GetStackPointer(frame); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); goto error; } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, @@ -1100,8 +1101,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); @@ -2383,8 +2382,10 @@ DEOPT_IF(callable_o != interp->callable_cache.list_append, CALL); assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o), CALL); + DEOPT_IF(!LOCK_OBJECT(self_o), CALL); STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) goto pop_3_error; diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index f144f7d..ad077dc 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -484,11 +484,11 @@ _PyPerfTrampoline_Init(int activate) return -1; } if (!activate) { - tstate->interp->eval_frame = NULL; + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); perf_status = PERF_STATUS_NO_INIT; } else { - tstate->interp->eval_frame = py_trampoline_evaluator; + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator); if (new_code_arena() < 0) { return -1; } @@ -514,7 +514,7 @@ _PyPerfTrampoline_Fini(void) } PyThreadState *tstate = _PyThreadState_GET(); if (tstate->interp->eval_frame == py_trampoline_evaluator) { - tstate->interp->eval_frame = NULL; + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); } if (perf_status == PERF_STATUS_OK) { trampoline_api.free_state(trampoline_api.state); diff --git a/Python/pystate.c b/Python/pystate.c index 3ceae22..839413a 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2838,7 +2838,9 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, } #endif RARE_EVENT_INC(set_eval_frame_func); + _PyEval_StopTheWorld(interp); interp->eval_frame = eval_frame; + _PyEval_StartTheWorld(interp); } diff --git a/Python/specialize.c b/Python/specialize.c index 8b2d1a1..ec2cd70 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1911,38 +1911,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD unspecialize(instr); } -/* Returns a borrowed reference. - * The reference is only valid if guarded by a type version check. - */ -static PyFunctionObject * -get_init_for_simple_managed_python_class(PyTypeObject *tp) +/* Returns a strong reference. */ +static PyObject * +get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_version) { assert(tp->tp_new == PyBaseObject_Type.tp_new); if (tp->tp_alloc != PyType_GenericAlloc) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN); return NULL; } - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { + unsigned long tp_flags = PyType_GetFlags(tp); + if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES); return NULL; } - if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) { /* Is this possible? */ SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR); return NULL; } - PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__)); + PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), tp_version); if (init == NULL || !PyFunction_Check(init)) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON); + Py_XDECREF(init); return NULL; } int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init)); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE); + Py_DECREF(init); return NULL; } - ((PyHeapTypeObject *)tp)->_spec_cache.init = init; - return (PyFunctionObject *)init; + return init; } static int @@ -1954,20 +1954,20 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) int oparg = instr->op.arg; if (nargs == 1 && oparg == 1) { if (tp == &PyUnicode_Type) { - instr->op.code = CALL_STR_1; + specialize(instr, CALL_STR_1); return 0; } else if (tp == &PyType_Type) { - instr->op.code = CALL_TYPE_1; + specialize(instr, CALL_TYPE_1); return 0; } else if (tp == &PyTuple_Type) { - instr->op.code = CALL_TUPLE_1; + specialize(instr, CALL_TUPLE_1); return 0; } } if (tp->tp_vectorcall != NULL) { - instr->op.code = CALL_BUILTIN_CLASS; + specialize(instr, CALL_BUILTIN_CLASS); return 0; } goto generic; @@ -1976,19 +1976,25 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) goto generic; } if (tp->tp_new == PyBaseObject_Type.tp_new) { - PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); - if (type_get_version(tp, CALL) == 0) { + unsigned int tp_version = 0; + PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version); + if (!tp_version) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS); + Py_XDECREF(init); return -1; } - if (init != NULL) { + if (init != NULL && _PyType_CacheInitForSpecialization( + (PyHeapTypeObject *)tp, init, tp_version)) { _PyCallCache *cache = (_PyCallCache *)(instr + 1); - write_u32(cache->func_version, tp->tp_version_tag); - _Py_SET_OPCODE(*instr, CALL_ALLOC_AND_ENTER_INIT); + write_u32(cache->func_version, tp_version); + specialize(instr, CALL_ALLOC_AND_ENTER_INIT); + Py_DECREF(init); return 0; } + Py_XDECREF(init); } generic: - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); return 0; } @@ -2004,7 +2010,7 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); return -1; } - instr->op.code = CALL_METHOD_DESCRIPTOR_NOARGS; + specialize(instr, CALL_METHOD_DESCRIPTOR_NOARGS); return 0; } case METH_O: { @@ -2018,22 +2024,22 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, bool pop = (next.op.code == POP_TOP); int oparg = instr->op.arg; if ((PyObject *)descr == list_append && oparg == 1 && pop) { - instr->op.code = CALL_LIST_APPEND; + specialize(instr, CALL_LIST_APPEND); return 0; } - instr->op.code = CALL_METHOD_DESCRIPTOR_O; + specialize(instr, CALL_METHOD_DESCRIPTOR_O); return 0; } case METH_FASTCALL: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST); return 0; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS); return 0; } } - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); return 0; } @@ -2063,12 +2069,15 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, return -1; } write_u32(cache->func_version, version); + uint8_t opcode; if (argcount == nargs + bound_method) { - instr->op.code = bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; + opcode = + bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; } else { - instr->op.code = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; + opcode = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; } + specialize(instr, opcode); return 0; } @@ -2117,10 +2126,10 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) /* len(o) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.len) { - instr->op.code = CALL_LEN; + specialize(instr, CALL_LEN); return 0; } - instr->op.code = CALL_BUILTIN_O; + specialize(instr, CALL_BUILTIN_O); return 0; } case METH_FASTCALL: { @@ -2128,19 +2137,19 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) /* isinstance(o1, o2) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.isinstance) { - instr->op.code = CALL_ISINSTANCE; + specialize(instr, CALL_ISINSTANCE); return 0; } } - instr->op.code = CALL_BUILTIN_FAST; + specialize(instr, CALL_BUILTIN_FAST); return 0; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_BUILTIN_FAST_WITH_KEYWORDS; + specialize(instr, CALL_BUILTIN_FAST_WITH_KEYWORDS); return 0; } default: - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); return 0; } } @@ -2150,10 +2159,9 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) { PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL); - _PyCallCache *cache = (_PyCallCache *)(instr + 1); int fail; if (PyCFunction_CheckExact(callable)) { fail = specialize_c_call(callable, instr, nargs); @@ -2178,19 +2186,11 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) } } else { - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); fail = 0; } if (fail) { - STAT_INC(CALL, failure); - assert(!PyErr_Occurred()); - instr->op.code = CALL; - cache->counter = adaptive_counter_backoff(cache->counter); - } - else { - STAT_INC(CALL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } } @@ -2793,6 +2793,16 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr) * Ends with a RESUME so that it is not traced. * This is used as a plain code object, not a function, * so must not access globals or builtins. + * There are a few other constraints imposed on the code + * by the free-threaded build: + * + * 1. The RESUME instruction must not be executed. Otherwise we may attempt to + * free the statically allocated TLBC array. + * 2. It must contain no specializable instructions. Specializing multiple + * copies of the same bytecode is not thread-safe in free-threaded builds. + * + * This should be dynamically allocated if either of those restrictions need to + * be lifted. */ #define NO_LOC_4 (128 | (PY_CODE_LOCATION_INFO_NONE << 3) | 3) @@ -2802,6 +2812,13 @@ static const PyBytesObject no_location = { .ob_sval = { NO_LOC_4 } }; +#ifdef Py_GIL_DISABLED +static _PyCodeArray init_cleanup_tlbc = { + .size = 1, + .entries = {(char*) &_Py_InitCleanup.co_code_adaptive}, +}; +#endif + const struct _PyCode8 _Py_InitCleanup = { _PyVarObject_HEAD_INIT(&PyCode_Type, 3), .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty), @@ -2817,6 +2834,9 @@ const struct _PyCode8 _Py_InitCleanup = { ._co_firsttraceable = 4, .co_stacksize = 2, .co_framesize = 2 + FRAME_SPECIALS_SIZE, +#ifdef Py_GIL_DISABLED + .co_tlbc = &init_cleanup_tlbc, +#endif .co_code_adaptive = { EXIT_INIT_CHECK, 0, RETURN_VALUE, 0, |