From 09c240f20c47db126ad7e162df41e5c2596962d4 Mon Sep 17 00:00:00 2001 From: mpage Date: Thu, 21 Nov 2024 11:22:21 -0800 Subject: gh-115999: Specialize `LOAD_GLOBAL` in free-threaded builds (#126607) Enable specialization of LOAD_GLOBAL in free-threaded builds. Thread-safety of specialization in free-threaded builds is provided by the following: A critical section is held on both the globals and builtins objects during specialization. This ensures we get an atomic view of both builtins and globals during specialization. Generation of new keys versions is made atomic in free-threaded builds. Existing helpers are used to atomically modify the opcode. Thread-safety of specialized instructions in free-threaded builds is provided by the following: Relaxed atomics are used when loading and storing dict keys versions. This avoids potential data races as the dict keys versions are read without holding the dictionary's per-object lock in version guards. Dicts keys objects are passed from keys version guards to the downstream uops. This ensures that we are loading from the correct offset in the keys object. Once a unicode key has been stored in a keys object for a combined dictionary in free-threaded builds, the offset that it is stored in will never be reused for a different key. Once the version guard passes, we know that we are reading from the correct offset. The dictionary read fast-path is used to read values from the dictionary once we know the correct offset. --- Include/internal/pycore_dict.h | 11 +++++++ Include/internal/pycore_object.h | 15 +++++++++ Lib/test/test_opcache.py | 19 ++++++++++- Objects/dictobject.c | 68 +++++++++++++++++++++++++++++++++------ Objects/funcobject.c | 2 ++ Python/bytecodes.c | 37 ++++++++++++++------- Python/executor_cases.c.h | 39 ++++++++++++++++------ Python/generated_cases.c.h | 37 ++++++++++++++------- Python/optimizer.c | 14 ++++++-- Python/specialize.c | 46 +++++++++++++------------- Tools/cases_generator/analyzer.py | 3 ++ 11 files changed, 222 insertions(+), 69 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index f7d747c..6e4a308 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -90,6 +90,17 @@ extern PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *); extern uint32_t _PyDictKeys_GetVersionForCurrentState( PyInterpreterState *interp, PyDictKeysObject *dictkeys); +/* Gets a version number unique to the current state of the keys of dict, if possible. + * + * In free-threaded builds ensures that the dict can be used for lock-free + * reads if a version was assigned. + * + * The caller must hold the per-object lock on dict. + * + * Returns the version number, or zero if it was not possible to get a version number. */ +extern uint32_t _PyDict_GetKeysVersionForCurrentState( + PyInterpreterState *interp, PyDictObject *dict); + extern size_t _PyDict_KeysSize(PyDictKeysObject *keys); extern void _PyDictKeys_DecRef(PyDictKeysObject *keys); diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index cafc02f..783d88c 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -14,6 +14,7 @@ extern "C" { #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED #include "pycore_pystate.h" // _PyInterpreterState_GET() +#include "pycore_stackref.h" #include "pycore_uniqueid.h" // _PyObject_ThreadIncrefSlow() // This value is added to `ob_ref_shared` for objects that use deferred @@ -595,6 +596,20 @@ _Py_TryIncrefCompare(PyObject **src, PyObject *op) return 1; } +static inline int +_Py_TryIncrefCompareStackRef(PyObject **src, PyObject *op, _PyStackRef *out) +{ + if (_Py_IsImmortal(op) || _PyObject_HasDeferredRefcount(op)) { + *out = (_PyStackRef){ .bits = (intptr_t)op | Py_TAG_DEFERRED }; + return 1; + } + if (_Py_TryIncrefCompare(src, op)) { + *out = PyStackRef_FromPyObjectSteal(op); + return 1; + } + return 0; +} + /* Loads and increfs an object from ptr, which may contain a NULL value. Safe with concurrent (atomic) updates to ptr. NOTE: The writer must set maybe-weakref on the stored object! */ diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 78e4bf4..ee7fd17 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -546,7 +546,6 @@ class TestCallCache(TestBase): @threading_helper.requires_working_threading() -@requires_specialization class TestRacesDoNotCrash(TestBase): # Careful with these. Bigger numbers have a higher chance of catching bugs, # but you can also burn through a *ton* of type/dict/function versions: @@ -588,6 +587,7 @@ class TestRacesDoNotCrash(TestBase): for writer in writers: writer.join() + @requires_specialization def test_binary_subscr_getitem(self): def get_items(): class C: @@ -617,6 +617,7 @@ class TestRacesDoNotCrash(TestBase): opname = "BINARY_SUBSCR_GETITEM" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_binary_subscr_list_int(self): def get_items(): items = [] @@ -640,6 +641,7 @@ class TestRacesDoNotCrash(TestBase): opname = "BINARY_SUBSCR_LIST_INT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_for_iter_gen(self): def get_items(): def g(): @@ -671,6 +673,7 @@ class TestRacesDoNotCrash(TestBase): opname = "FOR_ITER_GEN" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_for_iter_list(self): def get_items(): items = [] @@ -692,6 +695,7 @@ class TestRacesDoNotCrash(TestBase): opname = "FOR_ITER_LIST" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_class(self): def get_items(): class C: @@ -721,6 +725,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_CLASS" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_getattribute_overridden(self): def get_items(): class C: @@ -750,6 +755,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_instance_value(self): def get_items(): class C: @@ -773,6 +779,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_INSTANCE_VALUE" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_method_lazy_dict(self): def get_items(): class C(Exception): @@ -802,6 +809,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_METHOD_LAZY_DICT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_method_no_dict(self): def get_items(): class C: @@ -832,6 +840,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_METHOD_NO_DICT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_method_with_values(self): def get_items(): class C: @@ -861,6 +870,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_METHOD_WITH_VALUES" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_module(self): def get_items(): items = [] @@ -885,6 +895,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_MODULE" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_property(self): def get_items(): class C: @@ -914,6 +925,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_PROPERTY" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_load_attr_with_hint(self): def get_items(): class C: @@ -940,6 +952,7 @@ class TestRacesDoNotCrash(TestBase): opname = "LOAD_ATTR_WITH_HINT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization_ft def test_load_global_module(self): def get_items(): items = [] @@ -961,6 +974,7 @@ class TestRacesDoNotCrash(TestBase): opname, get_items, read, write, check_items=True ) + @requires_specialization def test_store_attr_instance_value(self): def get_items(): class C: @@ -983,6 +997,7 @@ class TestRacesDoNotCrash(TestBase): opname = "STORE_ATTR_INSTANCE_VALUE" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_store_attr_with_hint(self): def get_items(): class C: @@ -1008,6 +1023,7 @@ class TestRacesDoNotCrash(TestBase): opname = "STORE_ATTR_WITH_HINT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_store_subscr_list_int(self): def get_items(): items = [] @@ -1031,6 +1047,7 @@ class TestRacesDoNotCrash(TestBase): opname = "STORE_SUBSCR_LIST_INT" self.assert_races_do_not_crash(opname, get_items, read, write) + @requires_specialization def test_unpack_sequence_list(self): def get_items(): items = [] diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 393e9f9..49b213e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1285,6 +1285,20 @@ ensure_shared_on_resize(PyDictObject *mp) #endif } +static inline void +ensure_shared_on_keys_version_assignment(PyDictObject *mp) +{ + ASSERT_DICT_LOCKED((PyObject *) mp); + #ifdef Py_GIL_DISABLED + if (!IS_DICT_SHARED(mp)) { + // This ensures that a concurrent resize operation will delay + // freeing the old keys or values using QSBR, which is necessary to + // safely allow concurrent reads without locking. + SET_DICT_SHARED(mp); + } + #endif +} + #ifdef Py_GIL_DISABLED static inline Py_ALWAYS_INLINE int @@ -1644,7 +1658,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, } _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); - mp->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0); Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash); dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries); @@ -1686,7 +1700,7 @@ insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) ix = unicodekeys_lookup_unicode(keys, key, hash); if (ix == DKIX_EMPTY && keys->dk_usable > 0) { // Insert into new slot - keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(keys->dk_version, 0); Py_ssize_t hashpos = find_empty_slot(keys, hash); ix = keys->dk_nentries; dictkeys_set_index(keys, hashpos, ix); @@ -2617,7 +2631,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, ASSERT_CONSISTENT(mp); } else { - mp->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0); dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); if (DK_IS_UNICODE(mp->ma_keys)) { PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; @@ -4429,7 +4443,7 @@ dict_popitem_impl(PyDictObject *self) return NULL; } } - self->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(self->ma_keys->dk_version, 0); /* Pop last item */ PyObject *key, *value; @@ -7417,20 +7431,54 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys) dictkeys_decref(interp, keys, false); } -uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, - PyDictKeysObject *dictkeys) +static inline uint32_t +get_next_dict_keys_version(PyInterpreterState *interp) { - if (dictkeys->dk_version != 0) { - return dictkeys->dk_version; - } +#ifdef Py_GIL_DISABLED + uint32_t v; + do { + v = _Py_atomic_load_uint32_relaxed( + &interp->dict_state.next_keys_version); + if (v == 0) { + return 0; + } + } while (!_Py_atomic_compare_exchange_uint32( + &interp->dict_state.next_keys_version, &v, v + 1)); +#else if (interp->dict_state.next_keys_version == 0) { return 0; } uint32_t v = interp->dict_state.next_keys_version++; - dictkeys->dk_version = v; +#endif return v; } +// In free-threaded builds the caller must ensure that the keys object is not +// being mutated concurrently by another thread. +uint32_t +_PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, + PyDictKeysObject *dictkeys) +{ + uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version); + if (dk_version != 0) { + return dk_version; + } + dk_version = get_next_dict_keys_version(interp); + FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, dk_version); + return dk_version; +} + +uint32_t +_PyDict_GetKeysVersionForCurrentState(PyInterpreterState *interp, + PyDictObject *dict) +{ + ASSERT_DICT_LOCKED((PyObject *) dict); + uint32_t dk_version = + _PyDictKeys_GetVersionForCurrentState(interp, dict->ma_keys); + ensure_shared_on_keys_version_assignment(dict); + return dk_version; +} + static inline int validate_watcher_id(PyInterpreterState *interp, int watcher_id) { diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 1f2387f..4ba4728 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -289,12 +289,14 @@ functions is running. */ +#ifndef Py_GIL_DISABLED static inline struct _func_version_cache_item * get_cache_item(PyInterpreterState *interp, uint32_t version) { return interp->func_state.func_version_cache + (version % FUNC_VERSION_CACHE_SIZE); } +#endif void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7ffe2f5..71b1dc0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1569,7 +1569,7 @@ dummy_func( }; specializing op(_SPECIALIZE_LOAD_GLOBAL, (counter/1 -- )) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -1578,7 +1578,7 @@ dummy_func( } OPCODE_DEFERRED_INC(LOAD_GLOBAL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // res[1] because we need a pointer to res to pass it to _PyEval_LoadGlobalStackRef @@ -1599,16 +1599,18 @@ dummy_func( op(_GUARD_GLOBALS_VERSION, (version/1 --)) { PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - assert(DK_IS_UNICODE(dict->ma_keys)); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + assert(DK_IS_UNICODE(keys)); } op(_GUARD_GLOBALS_VERSION_PUSH_KEYS, (version / 1 -- globals_keys: PyDictKeysObject *)) { PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - globals_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); } @@ -1616,33 +1618,44 @@ dummy_func( { PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict)); - DEOPT_IF(dict->ma_keys->dk_version != version); - builtins_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version); + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); } op(_LOAD_GLOBAL_MODULE_FROM_KEYS, (index/1, globals_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEAD(globals_keys); SYNC_SP(); DEOPT_IF(res_o == NULL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } op(_LOAD_GLOBAL_BUILTINS_FROM_KEYS, (index/1, builtins_keys: PyDictKeysObject* -- res, null if (oparg & 1))) { PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEAD(builtins_keys); SYNC_SP(); DEOPT_IF(res_o == NULL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } macro(LOAD_GLOBAL_MODULE) = diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 976a342..8acf7a4 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1870,11 +1870,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - assert(DK_IS_UNICODE(dict->ma_keys)); + assert(DK_IS_UNICODE(keys)); break; } @@ -1886,11 +1887,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - globals_keys = dict->ma_keys; + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); stack_pointer[0].bits = (uintptr_t)globals_keys; stack_pointer += 1; @@ -1906,11 +1908,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - if (dict->ma_keys->dk_version != version) { + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + if (FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - builtins_keys = dict->ma_keys; + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); stack_pointer[0].bits = (uintptr_t)builtins_keys; stack_pointer += 1; @@ -1926,17 +1929,25 @@ globals_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND0(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + if (!increfed) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; stack_pointer += 1 + (oparg & 1); @@ -1952,17 +1963,25 @@ builtins_keys = (PyDictKeysObject *)stack_pointer[-1].bits; uint16_t index = (uint16_t)CURRENT_OPERAND0(); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); if (res_o == NULL) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + if (!increfed) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; stack_pointer += 1 + (oparg & 1); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index f3db2f9..8896229 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6098,7 +6098,7 @@ { uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { PyObject *name = GETITEM(FRAME_CO_NAMES, oparg>>1); next_instr = this_instr; @@ -6109,7 +6109,7 @@ } OPCODE_DEFERRED_INC(LOAD_GLOBAL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 1 cache entry */ /* Skip 1 cache entry */ @@ -6144,28 +6144,35 @@ uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - assert(DK_IS_UNICODE(dict->ma_keys)); + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + assert(DK_IS_UNICODE(keys)); } // _GUARD_BUILTINS_VERSION_PUSH_KEYS { uint16_t version = read_u16(&this_instr[3].cache); PyDictObject *dict = (PyDictObject *)BUILTINS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - builtins_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + builtins_keys = keys; assert(DK_IS_UNICODE(builtins_keys)); } // _LOAD_GLOBAL_BUILTINS_FROM_KEYS { uint16_t index = read_u16(&this_instr[4].cache); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(builtins_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed, LOAD_GLOBAL); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; @@ -6188,8 +6195,9 @@ uint16_t version = read_u16(&this_instr[2].cache); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(!PyDict_CheckExact(dict), LOAD_GLOBAL); - DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); - globals_keys = dict->ma_keys; + PyDictKeysObject *keys = FT_ATOMIC_LOAD_PTR_ACQUIRE(dict->ma_keys); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(keys->dk_version) != version, LOAD_GLOBAL); + globals_keys = keys; assert(DK_IS_UNICODE(globals_keys)); } /* Skip 1 cache entry */ @@ -6197,12 +6205,17 @@ { uint16_t index = read_u16(&this_instr[4].cache); PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(globals_keys); - PyObject *res_o = entries[index].me_value; + PyObject *res_o = FT_ATOMIC_LOAD_PTR_RELAXED(entries[index].me_value); DEOPT_IF(res_o == NULL, LOAD_GLOBAL); + #if Py_GIL_DISABLED + int increfed = _Py_TryIncrefCompareStackRef(&entries[index].me_value, res_o, &res); + DEOPT_IF(!increfed, LOAD_GLOBAL); + #else Py_INCREF(res_o); + res = PyStackRef_FromPyObjectSteal(res_o); + #endif STAT_INC(LOAD_GLOBAL, hit); null = PyStackRef_NULL; - res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[0] = res; if (oparg & 1) stack_pointer[1] = null; diff --git a/Python/optimizer.c b/Python/optimizer.c index bc2ecc0..6a23221 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -205,8 +205,8 @@ _PyOptimizer_Optimize( return 1; } -_PyExecutorObject * -_Py_GetExecutor(PyCodeObject *code, int offset) +static _PyExecutorObject * +get_executor_lock_held(PyCodeObject *code, int offset) { int code_len = (int)Py_SIZE(code); for (int i = 0 ; i < code_len;) { @@ -222,6 +222,16 @@ _Py_GetExecutor(PyCodeObject *code, int offset) return NULL; } +_PyExecutorObject * +_Py_GetExecutor(PyCodeObject *code, int offset) +{ + _PyExecutorObject *executor; + Py_BEGIN_CRITICAL_SECTION(code); + executor = get_executor_lock_held(code, offset); + Py_END_CRITICAL_SECTION(); + return executor; +} + static PyObject * is_valid(PyObject *self, PyObject *Py_UNUSED(ignored)) { diff --git a/Python/specialize.c b/Python/specialize.c index ad41dfc..af37e24 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1519,12 +1519,12 @@ PyObject *descr, DescriptorClassification kind, bool is_method) return 1; } -void -_Py_Specialize_LoadGlobal( +static void +specialize_load_global_lock_held( PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name) { - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[LOAD_GLOBAL] == INLINE_CACHE_ENTRIES_LOAD_GLOBAL); /* Use inline cache */ _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)(instr + 1); @@ -1549,8 +1549,8 @@ _Py_Specialize_LoadGlobal( SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState( - interp, globals_keys); + uint32_t keys_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) globals); if (keys_version == 0) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; @@ -1561,8 +1561,8 @@ _Py_Specialize_LoadGlobal( } cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)keys_version; - instr->op.code = LOAD_GLOBAL_MODULE; - goto success; + specialize(instr, LOAD_GLOBAL_MODULE); + return; } if (!PyDict_CheckExact(builtins)) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_LOAD_GLOBAL_NON_DICT); @@ -1582,8 +1582,8 @@ _Py_Specialize_LoadGlobal( SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - uint32_t globals_version = _PyDictKeys_GetVersionForCurrentState( - interp, globals_keys); + uint32_t globals_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) globals); if (globals_version == 0) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; @@ -1592,8 +1592,8 @@ _Py_Specialize_LoadGlobal( SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_RANGE); goto fail; } - uint32_t builtins_version = _PyDictKeys_GetVersionForCurrentState( - interp, builtin_keys); + uint32_t builtins_version = _PyDict_GetKeysVersionForCurrentState( + interp, (PyDictObject*) builtins); if (builtins_version == 0) { SPECIALIZATION_FAIL(LOAD_GLOBAL, SPEC_FAIL_OUT_OF_VERSIONS); goto fail; @@ -1605,18 +1605,20 @@ _Py_Specialize_LoadGlobal( cache->index = (uint16_t)index; cache->module_keys_version = (uint16_t)globals_version; cache->builtin_keys_version = (uint16_t)builtins_version; - instr->op.code = LOAD_GLOBAL_BUILTIN; - goto success; -fail: - STAT_INC(LOAD_GLOBAL, failure); - assert(!PyErr_Occurred()); - instr->op.code = LOAD_GLOBAL; - cache->counter = adaptive_counter_backoff(cache->counter); + specialize(instr, LOAD_GLOBAL_BUILTIN); return; -success: - STAT_INC(LOAD_GLOBAL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); +fail: + unspecialize(instr); +} + +void +_Py_Specialize_LoadGlobal( + PyObject *globals, PyObject *builtins, + _Py_CODEUNIT *instr, PyObject *name) +{ + Py_BEGIN_CRITICAL_SECTION2(globals, builtins); + specialize_load_global_lock_held(globals, builtins, instr, name); + Py_END_CRITICAL_SECTION2(); } #ifdef Py_STATS diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 96b3244..e02e07e 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -623,6 +623,9 @@ NON_ESCAPING_FUNCTIONS = ( "_Py_NewRef", "_Py_SINGLETON", "_Py_STR", + "_Py_TryIncrefCompare", + "_Py_TryIncrefCompareStackRef", + "_Py_atomic_load_ptr_acquire", "_Py_atomic_load_uintptr_relaxed", "_Py_set_eval_breaker_bit", "advance_backoff_counter", -- cgit v0.12