diff options
author | mpage <mpage@meta.com> | 2024-11-21 19:22:21 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-21 19:22:21 (GMT) |
commit | 09c240f20c47db126ad7e162df41e5c2596962d4 (patch) | |
tree | b9d2c0c6fec8392210040e554bb6569278430df3 /Objects | |
parent | 9dabace39d118ec7a204b6970f8a3f475a11522c (diff) | |
download | cpython-09c240f20c47db126ad7e162df41e5c2596962d4.zip cpython-09c240f20c47db126ad7e162df41e5c2596962d4.tar.gz cpython-09c240f20c47db126ad7e162df41e5c2596962d4.tar.bz2 |
gh-115999: Specialize `LOAD_GLOBAL` in free-threaded builds (#126607)
Enable specialization of LOAD_GLOBAL in free-threaded builds.
Thread-safety of specialization in free-threaded builds is provided by the following:
A critical section is held on both the globals and builtins objects during specialization. This ensures we get an atomic view of both builtins and globals during specialization.
Generation of new keys versions is made atomic in free-threaded builds.
Existing helpers are used to atomically modify the opcode.
Thread-safety of specialized instructions in free-threaded builds is provided by the following:
Relaxed atomics are used when loading and storing dict keys versions. This avoids potential data races as the dict keys versions are read without holding the dictionary's per-object lock in version guards.
Dicts keys objects are passed from keys version guards to the downstream uops. This ensures that we are loading from the correct offset in the keys object. Once a unicode key has been stored in a keys object for a combined dictionary in free-threaded builds, the offset that it is stored in will never be reused for a different key. Once the version guard passes, we know that we are reading from the correct offset.
The dictionary read fast-path is used to read values from the dictionary once we know the correct offset.
Diffstat (limited to 'Objects')
-rw-r--r-- | Objects/dictobject.c | 68 | ||||
-rw-r--r-- | Objects/funcobject.c | 2 |
2 files changed, 60 insertions, 10 deletions
diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 393e9f9..49b213e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1285,6 +1285,20 @@ ensure_shared_on_resize(PyDictObject *mp) #endif } +static inline void +ensure_shared_on_keys_version_assignment(PyDictObject *mp) +{ + ASSERT_DICT_LOCKED((PyObject *) mp); + #ifdef Py_GIL_DISABLED + if (!IS_DICT_SHARED(mp)) { + // This ensures that a concurrent resize operation will delay + // freeing the old keys or values using QSBR, which is necessary to + // safely allow concurrent reads without locking. + SET_DICT_SHARED(mp); + } + #endif +} + #ifdef Py_GIL_DISABLED static inline Py_ALWAYS_INLINE int @@ -1644,7 +1658,7 @@ insert_combined_dict(PyInterpreterState *interp, PyDictObject *mp, } _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); - mp->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0); Py_ssize_t hashpos = find_empty_slot(mp->ma_keys, hash); dictkeys_set_index(mp->ma_keys, hashpos, mp->ma_keys->dk_nentries); @@ -1686,7 +1700,7 @@ insert_split_key(PyDictKeysObject *keys, PyObject *key, Py_hash_t hash) ix = unicodekeys_lookup_unicode(keys, key, hash); if (ix == DKIX_EMPTY && keys->dk_usable > 0) { // Insert into new slot - keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(keys->dk_version, 0); Py_ssize_t hashpos = find_empty_slot(keys, hash); ix = keys->dk_nentries; dictkeys_set_index(keys, hashpos, ix); @@ -2617,7 +2631,7 @@ delitem_common(PyDictObject *mp, Py_hash_t hash, Py_ssize_t ix, ASSERT_CONSISTENT(mp); } else { - mp->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(mp->ma_keys->dk_version, 0); dictkeys_set_index(mp->ma_keys, hashpos, DKIX_DUMMY); if (DK_IS_UNICODE(mp->ma_keys)) { PyDictUnicodeEntry *ep = &DK_UNICODE_ENTRIES(mp->ma_keys)[ix]; @@ -4429,7 +4443,7 @@ dict_popitem_impl(PyDictObject *self) return NULL; } } - self->ma_keys->dk_version = 0; + FT_ATOMIC_STORE_UINT32_RELAXED(self->ma_keys->dk_version, 0); /* Pop last item */ PyObject *key, *value; @@ -7417,20 +7431,54 @@ _PyDictKeys_DecRef(PyDictKeysObject *keys) dictkeys_decref(interp, keys, false); } -uint32_t _PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, - PyDictKeysObject *dictkeys) +static inline uint32_t +get_next_dict_keys_version(PyInterpreterState *interp) { - if (dictkeys->dk_version != 0) { - return dictkeys->dk_version; - } +#ifdef Py_GIL_DISABLED + uint32_t v; + do { + v = _Py_atomic_load_uint32_relaxed( + &interp->dict_state.next_keys_version); + if (v == 0) { + return 0; + } + } while (!_Py_atomic_compare_exchange_uint32( + &interp->dict_state.next_keys_version, &v, v + 1)); +#else if (interp->dict_state.next_keys_version == 0) { return 0; } uint32_t v = interp->dict_state.next_keys_version++; - dictkeys->dk_version = v; +#endif return v; } +// In free-threaded builds the caller must ensure that the keys object is not +// being mutated concurrently by another thread. +uint32_t +_PyDictKeys_GetVersionForCurrentState(PyInterpreterState *interp, + PyDictKeysObject *dictkeys) +{ + uint32_t dk_version = FT_ATOMIC_LOAD_UINT32_RELAXED(dictkeys->dk_version); + if (dk_version != 0) { + return dk_version; + } + dk_version = get_next_dict_keys_version(interp); + FT_ATOMIC_STORE_UINT32_RELAXED(dictkeys->dk_version, dk_version); + return dk_version; +} + +uint32_t +_PyDict_GetKeysVersionForCurrentState(PyInterpreterState *interp, + PyDictObject *dict) +{ + ASSERT_DICT_LOCKED((PyObject *) dict); + uint32_t dk_version = + _PyDictKeys_GetVersionForCurrentState(interp, dict->ma_keys); + ensure_shared_on_keys_version_assignment(dict); + return dk_version; +} + static inline int validate_watcher_id(PyInterpreterState *interp, int watcher_id) { diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 1f2387f..4ba4728 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -289,12 +289,14 @@ functions is running. */ +#ifndef Py_GIL_DISABLED static inline struct _func_version_cache_item * get_cache_item(PyInterpreterState *interp, uint32_t version) { return interp->func_state.func_version_cache + (version % FUNC_VERSION_CACHE_SIZE); } +#endif void _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version) |