diff options
author | mpage <mpage@meta.com> | 2024-11-21 19:22:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-21 19:22:21 (GMT) |
commit | 09c240f20c47db126ad7e162df41e5c2596962d4 (patch) | |
tree | b9d2c0c6fec8392210040e554bb6569278430df3 /Python | |
parent | 9dabace39d118ec7a204b6970f8a3f475a11522c (diff) | |
download | cpython-09c240f20c47db126ad7e162df41e5c2596962d4.zip cpython-09c240f20c47db126ad7e162df41e5c2596962d4.tar.gz cpython-09c240f20c47db126ad7e162df41e5c2596962d4.tar.bz2 |
gh-115999: Specialize `LOAD_GLOBAL` in free-threaded builds (#126607)
Enable specialization of LOAD_GLOBAL in free-threaded builds.
Thread-safety of specialization in free-threaded builds is provided by the following:
A critical section is held on both the globals and builtins objects during specialization. This ensures we get an atomic view of both builtins and globals during specialization.
Generation of new keys versions is made atomic in free-threaded builds.
Existing helpers are used to atomically modify the opcode.
Thread-safety of specialized instructions in free-threaded builds is provided by the following:
Relaxed atomics are used when loading and storing dict keys versions. This avoids potential data races as the dict keys versions are read without holding the dictionary's per-object lock in version guards.
Dicts keys objects are passed from keys version guards to the downstream uops. This ensures that we are loading from the correct offset in the keys object. Once a unicode key has been stored in a keys object for a combined dictionary in free-threaded builds, the offset that it is stored in will never be reused for a different key. Once the version guard passes, we know that we are reading from the correct offset.
The dictionary read fast-path is used to read values from the dictionary once we know the correct offset.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/bytecodes.c | 37 | ||||
-rw-r--r-- | Python/executor_cases.c.h | 39 | ||||
-rw-r--r-- | Python/generated_cases.c.h | 37 | ||||
-rw-r--r-- | Python/optimizer.c | 14 | ||||
-rw-r--r-- | Python/specialize.c | 46 |
5 files changed, 115 insertions, 58 deletions
diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7ffe2f5..71b1dc0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1569,7 +1569,7 @@ dummy_func( }; specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -1578,7 +1578,7 @@ dummy_func( } OPCODE_DEFERRED_INC(LOAD_GLOBAL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // res[1] because we need a pointer to res to pass it to _PyEval_LoadGlobalStackRef @@ -1599,16 +1599,18 @@ dummy_func( op(_GUARD_GLOBALS_VERSION, (version/1 --)) { PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - assert(DK_IS_UNICODE(dict->ma_keys)); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + assert(DK_IS_UNICODE(keys)); } op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version / 1 -- globals_keys: PyDictKeysObject *)) { PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - globals_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); } @@ -1616,33 +1618,44 @@ dummy_func( { PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - builtins_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); } op(_LOAD_GLOBAL_MODULE_FROM_KEYS, (index/1, globals_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEAD(globals_keys); SYNC_SP(); DEOPT_IF(res_o == NULL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } op(_LOAD_GLOBAL_BUILTINS_FROM_KEYS, (index/1, builtins_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEAD(builtins_keys); SYNC_SP(); DEOPT_IF(res_o == NULL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } macro(LOAD_GLOBAL_MODULE) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 976a342..8acf7a4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1870,11 +1870,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - assert(DK_IS_UNICODE(dict->ma_keys)); + assert(DK_IS_UNICODE(keys)); break; } @@ -1886,11 +1887,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - globals_keys = dict->ma_keys; + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); stack_pointer[0].bits = (uintptr_t)globals_keys; stack_pointer += 1; @@ -1906,11 +1908,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - builtins_keys = dict->ma_keys; + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); stack_pointer[0].bits = (uintptr_t)builtins_keys; stack_pointer += 1; @@ -1926,17 +1929,25 @@ globals_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND0(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + if (!increfed) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; stack_pointer += 1 + (oparg & 1); @@ -1952,17 +1963,25 @@ builtins_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND0(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + if (!increfed) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; stack_pointer += 1 + (oparg & 1); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f3db2f9..8896229 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6098,7 +6098,7 @@ { uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -6109,7 +6109,7 @@ } OPCODE_DEFERRED_INC(LOAD_GLOBAL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 1 cache entry */ /* Skip 1 cache entry */ @@ -6144,28 +6144,35 @@ uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - assert(DK_IS_UNICODE(dict->ma_keys)); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + assert(DK_IS_UNICODE(keys)); } // _GUARD_BUILTINS_VERSION_PUSH_KEYS { uint16_t version = read_u16(&this_instr[3].cache); PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - builtins_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); } // _LOAD_GLOBAL_BUILTINS_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed, LOAD_GLOBAL); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; @@ -6188,8 +6195,9 @@ uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - globals_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); } /* Skip 1 cache entry */ @@ -6197,12 +6205,17 @@ { uint16_t index = read_u16(&this_instr[4].cache); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed, LOAD_GLOBAL); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; diff --git a/Python/optimizer.c b/Python/optimizer.c index bc2ecc0..6a23221 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -205,8 +205,8 @@ _PyOptimizer_Optimize( return 1; } -_PyExecutorObject * -_Py_GetExecutor(PyCodeObject *code, int offset) +static _PyExecutorObject * +get_executor_lock_held(PyCodeObject *code, int offset) { int code_len = (int)Py_SIZE(code); for (int i = 0 ; i < code_len;) { @@ -222,6 +222,16 @@ _Py_GetExecutor(PyCodeObject *code, int offset) return NULL; } +_PyExecutorObject * +_Py_GetExecutor(PyCodeObject *code, int offset) +{ + _PyExecutorObject *executor; + Py_BEGIN_CRITICAL_SECTION(code); + executor = get_executor_lock_held(code, offset); + Py_END_CRITICAL_SECTION(); + return executor; +} + static PyObject * is_valid(PyObject *self, PyObject *Py_UNUSED(ignored)) { diff --git a/Python/specialize.c b/Python/specialize.c index ad41dfc..af37e24 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1519,12 +1519,12 @@ PyObject *descr, DescriptorClassification kind, bool is_method) return 1; } -void -_Py_Specialize_LoadGlobal( +static void +specialize_load_global_lock_held( PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name) { - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[LOAD_GLOBAL] == INLINE_CACHE_ENTRIES_LOAD_GLOBAL); /* Use inline cache */ _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)(instr + 1); @@ -1549,8 +1549,8 @@ _Py_Specialize_LoadGlobal( SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState( - interp, globals_keys); + uint32_t keys_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) globals); if (keys_version == 0) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; @@ -1561,8 +1561,8 @@ _Py_Specialize_LoadGlobal( } cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)keys_version; - instr->op.code = LOAD_GLOBAL_MODULE; - goto success; + specialize(instr, LOAD_GLOBAL_MODULE); + return; } if (!PyDict_CheckExact(builtins)) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); @@ -1582,8 +1582,8 @@ _Py_Specialize_LoadGlobal( SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState( - interp, globals_keys); + uint32_t globals_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) globals); if (globals_version == 0) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; @@ -1592,8 +1592,8 @@ _Py_Specialize_LoadGlobal( SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState( - interp, builtin_keys); + uint32_t builtins_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) builtins); if (builtins_version == 0) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; @@ -1605,18 +1605,20 @@ _Py_Specialize_LoadGlobal( cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)globals_version; cache->builtin_keys_version = (uint16_t)builtins_version; - instr->op.code = LOAD_GLOBAL_BUILTIN; - goto success; -fail: - STAT_INC(LOAD_GLOBAL, failure); - assert(!PyErr_Occurred()); - instr->op.code = LOAD_GLOBAL; - cache->counter = adaptive_counter_backoff(cache->counter); + specialize(instr, LOAD_GLOBAL_BUILTIN); return; -success: - STAT_INC(LOAD_GLOBAL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); +fail: + unspecialize(instr); +} + +void +_Py_Specialize_LoadGlobal( + PyObject *globals, PyObject *builtins, + _Py_CODEUNIT *instr, PyObject *name) +{ + Py_BEGIN_CRITICAL_SECTION2(globals, builtins); + specialize_load_global_lock_held(globals, builtins, instr, name); + Py_END_CRITICAL_SECTION2(); } #ifdef Py_STATS |