From 1b15c89a17ca3de6b05de5379b8717e9738c51ef Mon Sep 17 00:00:00 2001
From: Neil Schemenauer <nas-github@arctrix.com>
Date: Thu, 19 Dec 2024 10:21:17 -0800
Subject: gh-115999: Specialize `STORE_ATTR` in free-threaded builds.
 (gh-127838)

* Add `_PyDictKeys_StringLookupSplit` which does locking on dict keys and
  use in place of `_PyDictKeys_StringLookup`.

* Change `_PyObject_TryGetInstanceAttribute` to use that function
  in the case of split keys.

* Add `unicodekeys_lookup_split` helper which allows code sharing
  between `_Py_dict_lookup` and `_PyDictKeys_StringLookupSplit`.

* Fix locking for `STORE_ATTR_INSTANCE_VALUE`.  Create
  `_GUARD_TYPE_VERSION_AND_LOCK` uop so that object stays locked and
  `tp_version_tag` cannot change.

* Pass `tp_version_tag` to `specialize_dict_access()`, ensuring
  the version we store on the cache is the correct one (in case of
  it changing during the specalize analysis).

* Split `analyze_descriptor` into `analyze_descriptor_load` and
  `analyze_descriptor_store` since those don't share much logic.
  Add `descriptor_is_class` helper function.

* In `specialize_dict_access`, double check `_PyObject_GetManagedDict()`
  in case we race and dict was materialized before the lock.

* Avoid borrowed references in `_Py_Specialize_StoreAttr()`.

* Use `specialize()` and `unspecialize()` helpers.

* Add unit tests to ensure specializing happens as expected in FT builds.

* Add unit tests to attempt to trigger data races (useful for running under TSAN).

* Add `has_split_table` function to `_testinternalcapi`.
---
 Include/internal/pycore_dict.h             |   1 +
 Include/internal/pycore_opcode_metadata.h  |   4 +-
 Include/internal/pycore_uop_ids.h          | 201 ++++++++++-----------
 Include/internal/pycore_uop_metadata.h     |   6 +-
 Lib/test/test_free_threading/test_races.py | 141 +++++++++++++++
 Lib/test/test_opcache.py                   |  66 +++++++
 Modules/_testinternalcapi.c                |   9 +
 Objects/dictobject.c                       |  66 +++++--
 Python/bytecodes.c                         |  65 +++++--
 Python/executor_cases.c.h                  |  93 +++++++---
 Python/generated_cases.c.h                 |  86 +++++----
 Python/optimizer_cases.c.h                 |   4 +
 Python/specialize.c                        | 271 +++++++++++++++++------------
 13 files changed, 716 insertions(+), 297 deletions(-)

diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h
index 6e4a308..7192700 100644
--- a/Include/internal/pycore_dict.h
+++ b/Include/internal/pycore_dict.h
@@ -114,6 +114,7 @@ extern Py_ssize_t _Py_dict_lookup_threadsafe_stackref(PyDictObject *mp, PyObject
 
 extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *);
 extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key);
+extern Py_ssize_t _PyDictKeys_StringLookupSplit(PyDictKeysObject* dictkeys, PyObject *key);
 PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
 PyAPI_FUNC(void) _PyDict_LoadGlobalStackRef(PyDictObject *, PyDictObject *, PyObject *, _PyStackRef *);
 
diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h
index e09fff0..5fb2368 100644
--- a/Include/internal/pycore_opcode_metadata.h
+++ b/Include/internal/pycore_opcode_metadata.h
@@ -2141,7 +2141,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = {
     [SET_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [STORE_ATTR] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG },
     [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_EXIT_FLAG },
-    [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_EXIT_FLAG },
+    [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG },
     [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ESCAPES_FLAG },
     [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG },
     [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG },
@@ -2340,7 +2340,7 @@ _PyOpcode_macro_expansion[256] = {
     [SET_FUNCTION_ATTRIBUTE] = { .nuops = 1, .uops = { { _SET_FUNCTION_ATTRIBUTE, 0, 0 } } },
     [SET_UPDATE] = { .nuops = 1, .uops = { { _SET_UPDATE, 0, 0 } } },
     [STORE_ATTR] = { .nuops = 1, .uops = { { _STORE_ATTR, 0, 0 } } },
-    [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_NO_DICT, 0, 0 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 } } },
+    [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION_AND_LOCK, 2, 1 }, { _GUARD_DORV_NO_DICT, 0, 0 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 } } },
     [STORE_ATTR_SLOT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_SLOT, 1, 3 } } },
     [STORE_ATTR_WITH_HINT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_WITH_HINT, 1, 3 } } },
     [STORE_DEREF] = { .nuops = 1, .uops = { { _STORE_DEREF, 0, 0 } } },
diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h
index 5fc57e4..92515b4 100644
--- a/Include/internal/pycore_uop_ids.h
+++ b/Include/internal/pycore_uop_ids.h
@@ -139,15 +139,16 @@ extern "C" {
 #define _GUARD_TOS_FLOAT 386
 #define _GUARD_TOS_INT 387
 #define _GUARD_TYPE_VERSION 388
+#define _GUARD_TYPE_VERSION_AND_LOCK 389
 #define _IMPORT_FROM IMPORT_FROM
 #define _IMPORT_NAME IMPORT_NAME
-#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 389
-#define _INIT_CALL_PY_EXACT_ARGS 390
-#define _INIT_CALL_PY_EXACT_ARGS_0 391
-#define _INIT_CALL_PY_EXACT_ARGS_1 392
-#define _INIT_CALL_PY_EXACT_ARGS_2 393
-#define _INIT_CALL_PY_EXACT_ARGS_3 394
-#define _INIT_CALL_PY_EXACT_ARGS_4 395
+#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 390
+#define _INIT_CALL_PY_EXACT_ARGS 391
+#define _INIT_CALL_PY_EXACT_ARGS_0 392
+#define _INIT_CALL_PY_EXACT_ARGS_1 393
+#define _INIT_CALL_PY_EXACT_ARGS_2 394
+#define _INIT_CALL_PY_EXACT_ARGS_3 395
+#define _INIT_CALL_PY_EXACT_ARGS_4 396
 #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX
 #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW
 #define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER
@@ -160,143 +161,143 @@ extern "C" {
 #define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE
 #define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE
 #define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE
-#define _INTERNAL_INCREMENT_OPT_COUNTER 396
-#define _IS_NONE 397
+#define _INTERNAL_INCREMENT_OPT_COUNTER 397
+#define _IS_NONE 398
 #define _IS_OP IS_OP
-#define _ITER_CHECK_LIST 398
-#define _ITER_CHECK_RANGE 399
-#define _ITER_CHECK_TUPLE 400
-#define _ITER_JUMP_LIST 401
-#define _ITER_JUMP_RANGE 402
-#define _ITER_JUMP_TUPLE 403
-#define _ITER_NEXT_LIST 404
-#define _ITER_NEXT_RANGE 405
-#define _ITER_NEXT_TUPLE 406
-#define _JUMP_TO_TOP 407
+#define _ITER_CHECK_LIST 399
+#define _ITER_CHECK_RANGE 400
+#define _ITER_CHECK_TUPLE 401
+#define _ITER_JUMP_LIST 402
+#define _ITER_JUMP_RANGE 403
+#define _ITER_JUMP_TUPLE 404
+#define _ITER_NEXT_LIST 405
+#define _ITER_NEXT_RANGE 406
+#define _ITER_NEXT_TUPLE 407
+#define _JUMP_TO_TOP 408
 #define _LIST_APPEND LIST_APPEND
 #define _LIST_EXTEND LIST_EXTEND
-#define _LOAD_ATTR 408
-#define _LOAD_ATTR_CLASS 409
-#define _LOAD_ATTR_CLASS_0 410
-#define _LOAD_ATTR_CLASS_1 411
+#define _LOAD_ATTR 409
+#define _LOAD_ATTR_CLASS 410
+#define _LOAD_ATTR_CLASS_0 411
+#define _LOAD_ATTR_CLASS_1 412
 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN
-#define _LOAD_ATTR_INSTANCE_VALUE 412
-#define _LOAD_ATTR_INSTANCE_VALUE_0 413
-#define _LOAD_ATTR_INSTANCE_VALUE_1 414
-#define _LOAD_ATTR_METHOD_LAZY_DICT 415
-#define _LOAD_ATTR_METHOD_NO_DICT 416
-#define _LOAD_ATTR_METHOD_WITH_VALUES 417
-#define _LOAD_ATTR_MODULE 418
-#define _LOAD_ATTR_MODULE_FROM_KEYS 419
-#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 420
-#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 421
-#define _LOAD_ATTR_PROPERTY_FRAME 422
-#define _LOAD_ATTR_SLOT 423
-#define _LOAD_ATTR_SLOT_0 424
-#define _LOAD_ATTR_SLOT_1 425
-#define _LOAD_ATTR_WITH_HINT 426
+#define _LOAD_ATTR_INSTANCE_VALUE 413
+#define _LOAD_ATTR_INSTANCE_VALUE_0 414
+#define _LOAD_ATTR_INSTANCE_VALUE_1 415
+#define _LOAD_ATTR_METHOD_LAZY_DICT 416
+#define _LOAD_ATTR_METHOD_NO_DICT 417
+#define _LOAD_ATTR_METHOD_WITH_VALUES 418
+#define _LOAD_ATTR_MODULE 419
+#define _LOAD_ATTR_MODULE_FROM_KEYS 420
+#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 421
+#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 422
+#define _LOAD_ATTR_PROPERTY_FRAME 423
+#define _LOAD_ATTR_SLOT 424
+#define _LOAD_ATTR_SLOT_0 425
+#define _LOAD_ATTR_SLOT_1 426
+#define _LOAD_ATTR_WITH_HINT 427
 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS
-#define _LOAD_BYTECODE 427
+#define _LOAD_BYTECODE 428
 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT
 #define _LOAD_CONST LOAD_CONST
 #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL
-#define _LOAD_CONST_INLINE 428
-#define _LOAD_CONST_INLINE_BORROW 429
-#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 430
-#define _LOAD_CONST_INLINE_WITH_NULL 431
+#define _LOAD_CONST_INLINE 429
+#define _LOAD_CONST_INLINE_BORROW 430
+#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 431
+#define _LOAD_CONST_INLINE_WITH_NULL 432
 #define _LOAD_DEREF LOAD_DEREF
-#define _LOAD_FAST 432
-#define _LOAD_FAST_0 433
-#define _LOAD_FAST_1 434
-#define _LOAD_FAST_2 435
-#define _LOAD_FAST_3 436
-#define _LOAD_FAST_4 437
-#define _LOAD_FAST_5 438
-#define _LOAD_FAST_6 439
-#define _LOAD_FAST_7 440
+#define _LOAD_FAST 433
+#define _LOAD_FAST_0 434
+#define _LOAD_FAST_1 435
+#define _LOAD_FAST_2 436
+#define _LOAD_FAST_3 437
+#define _LOAD_FAST_4 438
+#define _LOAD_FAST_5 439
+#define _LOAD_FAST_6 440
+#define _LOAD_FAST_7 441
 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR
 #define _LOAD_FAST_CHECK LOAD_FAST_CHECK
 #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST
 #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF
 #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS
-#define _LOAD_GLOBAL 441
-#define _LOAD_GLOBAL_BUILTINS 442
-#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 443
-#define _LOAD_GLOBAL_MODULE 444
-#define _LOAD_GLOBAL_MODULE_FROM_KEYS 445
+#define _LOAD_GLOBAL 442
+#define _LOAD_GLOBAL_BUILTINS 443
+#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 444
+#define _LOAD_GLOBAL_MODULE 445
+#define _LOAD_GLOBAL_MODULE_FROM_KEYS 446
 #define _LOAD_LOCALS LOAD_LOCALS
 #define _LOAD_NAME LOAD_NAME
-#define _LOAD_SMALL_INT 446
-#define _LOAD_SMALL_INT_0 447
-#define _LOAD_SMALL_INT_1 448
-#define _LOAD_SMALL_INT_2 449
-#define _LOAD_SMALL_INT_3 450
+#define _LOAD_SMALL_INT 447
+#define _LOAD_SMALL_INT_0 448
+#define _LOAD_SMALL_INT_1 449
+#define _LOAD_SMALL_INT_2 450
+#define _LOAD_SMALL_INT_3 451
 #define _LOAD_SPECIAL LOAD_SPECIAL
 #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR
 #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD
-#define _MAKE_CALLARGS_A_TUPLE 451
+#define _MAKE_CALLARGS_A_TUPLE 452
 #define _MAKE_CELL MAKE_CELL
 #define _MAKE_FUNCTION MAKE_FUNCTION
-#define _MAKE_WARM 452
+#define _MAKE_WARM 453
 #define _MAP_ADD MAP_ADD
 #define _MATCH_CLASS MATCH_CLASS
 #define _MATCH_KEYS MATCH_KEYS
 #define _MATCH_MAPPING MATCH_MAPPING
 #define _MATCH_SEQUENCE MATCH_SEQUENCE
-#define _MAYBE_EXPAND_METHOD 453
-#define _MAYBE_EXPAND_METHOD_KW 454
-#define _MONITOR_CALL 455
-#define _MONITOR_JUMP_BACKWARD 456
-#define _MONITOR_RESUME 457
+#define _MAYBE_EXPAND_METHOD 454
+#define _MAYBE_EXPAND_METHOD_KW 455
+#define _MONITOR_CALL 456
+#define _MONITOR_JUMP_BACKWARD 457
+#define _MONITOR_RESUME 458
 #define _NOP NOP
 #define _POP_EXCEPT POP_EXCEPT
-#define _POP_JUMP_IF_FALSE 458
-#define _POP_JUMP_IF_TRUE 459
+#define _POP_JUMP_IF_FALSE 459
+#define _POP_JUMP_IF_TRUE 460
 #define _POP_TOP POP_TOP
-#define _POP_TOP_LOAD_CONST_INLINE_BORROW 460
+#define _POP_TOP_LOAD_CONST_INLINE_BORROW 461
 #define _PUSH_EXC_INFO PUSH_EXC_INFO
-#define _PUSH_FRAME 461
+#define _PUSH_FRAME 462
 #define _PUSH_NULL PUSH_NULL
-#define _PY_FRAME_GENERAL 462
-#define _PY_FRAME_KW 463
-#define _QUICKEN_RESUME 464
-#define _REPLACE_WITH_TRUE 465
+#define _PY_FRAME_GENERAL 463
+#define _PY_FRAME_KW 464
+#define _QUICKEN_RESUME 465
+#define _REPLACE_WITH_TRUE 466
 #define _RESUME_CHECK RESUME_CHECK
 #define _RETURN_GENERATOR RETURN_GENERATOR
 #define _RETURN_VALUE RETURN_VALUE
-#define _SAVE_RETURN_OFFSET 466
-#define _SEND 467
-#define _SEND_GEN_FRAME 468
+#define _SAVE_RETURN_OFFSET 467
+#define _SEND 468
+#define _SEND_GEN_FRAME 469
 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS
 #define _SET_ADD SET_ADD
 #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE
 #define _SET_UPDATE SET_UPDATE
-#define _START_EXECUTOR 469
-#define _STORE_ATTR 470
-#define _STORE_ATTR_INSTANCE_VALUE 471
-#define _STORE_ATTR_SLOT 472
-#define _STORE_ATTR_WITH_HINT 473
+#define _START_EXECUTOR 470
+#define _STORE_ATTR 471
+#define _STORE_ATTR_INSTANCE_VALUE 472
+#define _STORE_ATTR_SLOT 473
+#define _STORE_ATTR_WITH_HINT 474
 #define _STORE_DEREF STORE_DEREF
-#define _STORE_FAST 474
-#define _STORE_FAST_0 475
-#define _STORE_FAST_1 476
-#define _STORE_FAST_2 477
-#define _STORE_FAST_3 478
-#define _STORE_FAST_4 479
-#define _STORE_FAST_5 480
-#define _STORE_FAST_6 481
-#define _STORE_FAST_7 482
+#define _STORE_FAST 475
+#define _STORE_FAST_0 476
+#define _STORE_FAST_1 477
+#define _STORE_FAST_2 478
+#define _STORE_FAST_3 479
+#define _STORE_FAST_4 480
+#define _STORE_FAST_5 481
+#define _STORE_FAST_6 482
+#define _STORE_FAST_7 483
 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST
 #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST
 #define _STORE_GLOBAL STORE_GLOBAL
 #define _STORE_NAME STORE_NAME
-#define _STORE_SLICE 483
-#define _STORE_SUBSCR 484
+#define _STORE_SLICE 484
+#define _STORE_SUBSCR 485
 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT
 #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT
 #define _SWAP SWAP
-#define _TIER2_RESUME_CHECK 485
-#define _TO_BOOL 486
+#define _TIER2_RESUME_CHECK 486
+#define _TO_BOOL 487
 #define _TO_BOOL_BOOL TO_BOOL_BOOL
 #define _TO_BOOL_INT TO_BOOL_INT
 #define _TO_BOOL_LIST TO_BOOL_LIST
@@ -306,13 +307,13 @@ extern "C" {
 #define _UNARY_NEGATIVE UNARY_NEGATIVE
 #define _UNARY_NOT UNARY_NOT
 #define _UNPACK_EX UNPACK_EX
-#define _UNPACK_SEQUENCE 487
+#define _UNPACK_SEQUENCE 488
 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST
 #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE
 #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE
 #define _WITH_EXCEPT_START WITH_EXCEPT_START
 #define _YIELD_VALUE YIELD_VALUE
-#define MAX_UOP_ID 487
+#define MAX_UOP_ID 488
 
 #ifdef __cplusplus
 }
diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h
index eadfda4..e71194b 100644
--- a/Include/internal/pycore_uop_metadata.h
+++ b/Include/internal/pycore_uop_metadata.h
@@ -148,6 +148,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_LOAD_SUPER_ATTR_METHOD] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_LOAD_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_GUARD_TYPE_VERSION] = HAS_EXIT_FLAG,
+    [_GUARD_TYPE_VERSION_AND_LOCK] = HAS_EXIT_FLAG,
     [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG,
     [_LOAD_ATTR_INSTANCE_VALUE_0] = HAS_DEOPT_FLAG,
     [_LOAD_ATTR_INSTANCE_VALUE_1] = HAS_DEOPT_FLAG,
@@ -167,7 +168,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = {
     [_GUARD_DORV_NO_DICT] = HAS_EXIT_FLAG,
     [_STORE_ATTR_INSTANCE_VALUE] = 0,
     [_STORE_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG,
-    [_STORE_ATTR_SLOT] = 0,
+    [_STORE_ATTR_SLOT] = HAS_DEOPT_FLAG,
     [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG,
     [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG,
     [_COMPARE_OP_INT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG,
@@ -428,6 +429,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = {
     [_GUARD_TOS_FLOAT] = "_GUARD_TOS_FLOAT",
     [_GUARD_TOS_INT] = "_GUARD_TOS_INT",
     [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION",
+    [_GUARD_TYPE_VERSION_AND_LOCK] = "_GUARD_TYPE_VERSION_AND_LOCK",
     [_IMPORT_FROM] = "_IMPORT_FROM",
     [_IMPORT_NAME] = "_IMPORT_NAME",
     [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS",
@@ -839,6 +841,8 @@ int _PyUop_num_popped(int opcode, int oparg)
             return 1;
         case _GUARD_TYPE_VERSION:
             return 0;
+        case _GUARD_TYPE_VERSION_AND_LOCK:
+            return 0;
         case _CHECK_MANAGED_OBJECT_HAS_VALUES:
             return 0;
         case _LOAD_ATTR_INSTANCE_VALUE_0:
diff --git a/Lib/test/test_free_threading/test_races.py b/Lib/test/test_free_threading/test_races.py
index 09e1d52..6998255 100644
--- a/Lib/test/test_free_threading/test_races.py
+++ b/Lib/test/test_free_threading/test_races.py
@@ -4,6 +4,7 @@ import functools
 import threading
 import time
 import unittest
+import _testinternalcapi
 
 from test.support import threading_helper
 
@@ -129,6 +130,146 @@ class TestRaces(TestBase):
         # with the cell binding being changed).
         do_race(access, mutate)
 
+    def test_racing_to_bool(self):
+
+        seq = [1]
+
+        class C:
+            def __bool__(self):
+                return False
+
+        def access():
+            if seq:
+                return 1
+            else:
+                return 2
+
+        def mutate():
+            nonlocal seq
+            seq = [1]
+            time.sleep(0)
+            seq = C()
+            time.sleep(0)
+
+        do_race(access, mutate)
+
+    def test_racing_store_attr_slot(self):
+        class C:
+            __slots__ = ['x', '__dict__']
+
+        c = C()
+
+        def set_slot():
+            for i in range(10):
+                c.x = i
+            time.sleep(0)
+
+        def change_type():
+            def set_x(self, x):
+                pass
+
+            def get_x(self):
+                pass
+
+            C.x = property(get_x, set_x)
+            time.sleep(0)
+            del C.x
+            time.sleep(0)
+
+        do_race(set_slot, change_type)
+
+        def set_getattribute():
+            C.__getattribute__ = lambda self, x: x
+            time.sleep(0)
+            del C.__getattribute__
+            time.sleep(0)
+
+        do_race(set_slot, set_getattribute)
+
+    def test_racing_store_attr_instance_value(self):
+        class C:
+            pass
+
+        c = C()
+
+        def set_value():
+            for i in range(100):
+                c.x = i
+
+        set_value()
+
+        def read():
+            x = c.x
+
+        def mutate():
+            # Adding a property for 'x' should unspecialize it.
+            C.x = property(lambda self: None, lambda self, x: None)
+            time.sleep(0)
+            del C.x
+            time.sleep(0)
+
+        do_race(read, mutate)
+
+    def test_racing_store_attr_with_hint(self):
+        class C:
+            pass
+
+        c = C()
+        for i in range(29):
+            setattr(c, f"_{i}", None)
+
+        def set_value():
+            for i in range(100):
+                c.x = i
+
+        set_value()
+
+        def read():
+            x = c.x
+
+        def mutate():
+            # Adding a property for 'x' should unspecialize it.
+            C.x = property(lambda self: None, lambda self, x: None)
+            time.sleep(0)
+            del C.x
+            time.sleep(0)
+
+        do_race(read, mutate)
+
+    def make_shared_key_dict(self):
+        class C:
+            pass
+
+        a = C()
+        a.x = 1
+        return a.__dict__
+
+    def test_racing_store_attr_dict(self):
+        """Test STORE_ATTR with various dictionary types."""
+        class C:
+            pass
+
+        c = C()
+
+        def set_value():
+            for i in range(20):
+                c.x = i
+
+        def mutate():
+            nonlocal c
+            c.x = 1
+            self.assertTrue(_testinternalcapi.has_inline_values(c))
+            for i in range(30):
+                setattr(c, f"_{i}", None)
+            self.assertFalse(_testinternalcapi.has_inline_values(c.__dict__))
+            c.__dict__ = self.make_shared_key_dict()
+            self.assertTrue(_testinternalcapi.has_split_table(c.__dict__))
+            c.__dict__[1] = None
+            self.assertFalse(_testinternalcapi.has_split_table(c.__dict__))
+            c = C()
+
+        do_race(set_value, mutate)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py
index 94709e2..ad0b0c4 100644
--- a/Lib/test/test_opcache.py
+++ b/Lib/test/test_opcache.py
@@ -1385,6 +1385,72 @@ class TestSpecializer(TestBase):
 
     @cpython_only
     @requires_specialization_ft
+    def test_store_attr_slot(self):
+        class C:
+            __slots__ = ['x']
+
+        def set_slot():
+            c = C()
+            for i in range(100):
+                c.x = i
+
+        set_slot()
+
+        self.assert_specialized(set_slot, "STORE_ATTR_SLOT")
+        self.assert_no_opcode(set_slot, "STORE_ATTR")
+
+        # Adding a property for 'x' should unspecialize it.
+        C.x = property(lambda self: None, lambda self, x: None)
+        set_slot()
+        self.assert_no_opcode(set_slot, "STORE_ATTR_SLOT")
+
+    @cpython_only
+    @requires_specialization_ft
+    def test_store_attr_instance_value(self):
+        class C:
+            pass
+
+        def set_value():
+            c = C()
+            for i in range(100):
+                c.x = i
+
+        set_value()
+
+        self.assert_specialized(set_value, "STORE_ATTR_INSTANCE_VALUE")
+        self.assert_no_opcode(set_value, "STORE_ATTR")
+
+        # Adding a property for 'x' should unspecialize it.
+        C.x = property(lambda self: None, lambda self, x: None)
+        set_value()
+        self.assert_no_opcode(set_value, "STORE_ATTR_INSTANCE_VALUE")
+
+    @cpython_only
+    @requires_specialization_ft
+    def test_store_attr_with_hint(self):
+        class C:
+            pass
+
+        c = C()
+        for i in range(29):
+            setattr(c, f"_{i}", None)
+
+        def set_value():
+            for i in range(100):
+                c.x = i
+
+        set_value()
+
+        self.assert_specialized(set_value, "STORE_ATTR_WITH_HINT")
+        self.assert_no_opcode(set_value, "STORE_ATTR")
+
+        # Adding a property for 'x' should unspecialize it.
+        C.x = property(lambda self: None, lambda self, x: None)
+        set_value()
+        self.assert_no_opcode(set_value, "STORE_ATTR_WITH_HINT")
+
+    @cpython_only
+    @requires_specialization_ft
     def test_to_bool(self):
         def to_bool_bool():
             true_cnt, false_cnt = 0, 0
diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c
index 014f899..150d34d 100644
--- a/Modules/_testinternalcapi.c
+++ b/Modules/_testinternalcapi.c
@@ -1989,6 +1989,14 @@ has_inline_values(PyObject *self, PyObject *obj)
     Py_RETURN_FALSE;
 }
 
+static PyObject *
+has_split_table(PyObject *self, PyObject *obj)
+{
+    if (PyDict_Check(obj) && _PyDict_HasSplitTable((PyDictObject *)obj)) {
+        Py_RETURN_TRUE;
+    }
+    Py_RETURN_FALSE;
+}
 
 // Circumvents standard version assignment machinery - use with caution and only on
 // short-lived heap types
@@ -2139,6 +2147,7 @@ static PyMethodDef module_functions[] = {
     {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS},
     {"reset_rare_event_counters", reset_rare_event_counters, METH_NOARGS},
     {"has_inline_values", has_inline_values, METH_O},
+    {"has_split_table", has_split_table, METH_O},
     {"type_assign_specific_version_unsafe", type_assign_specific_version_unsafe, METH_VARARGS,
      PyDoc_STR("forcefully assign type->tp_version_tag")},
 
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 05c93a3..2a054c3 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1129,6 +1129,35 @@ dictkeys_generic_lookup(PyDictObject *mp, PyDictKeysObject* dk, PyObject *key, P
     return do_lookup(mp, dk, key, hash, compare_generic);
 }
 
+#ifdef Py_GIL_DISABLED
+static Py_ssize_t
+unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key,
+                                      Py_hash_t hash);
+#endif
+
+static Py_ssize_t
+unicodekeys_lookup_split(PyDictKeysObject* dk, PyObject *key, Py_hash_t hash)
+{
+    Py_ssize_t ix;
+    assert(dk->dk_kind == DICT_KEYS_SPLIT);
+    assert(PyUnicode_CheckExact(key));
+
+#ifdef Py_GIL_DISABLED
+    // A split dictionaries keys can be mutated by other dictionaries
+    // but if we have a unicode key we can avoid locking the shared
+    // keys.
+    ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
+    if (ix == DKIX_KEY_CHANGED) {
+        LOCK_KEYS(dk);
+        ix = unicodekeys_lookup_unicode(dk, key, hash);
+        UNLOCK_KEYS(dk);
+    }
+#else
+    ix = unicodekeys_lookup_unicode(dk, key, hash);
+#endif
+    return ix;
+}
+
 /* Lookup a string in a (all unicode) dict keys.
  * Returns DKIX_ERROR if key is not a string,
  * or if the dict keys is not all strings.
@@ -1153,13 +1182,24 @@ _PyDictKeys_StringLookup(PyDictKeysObject* dk, PyObject *key)
     return unicodekeys_lookup_unicode(dk, key, hash);
 }
 
-#ifdef Py_GIL_DISABLED
-
-static Py_ssize_t
-unicodekeys_lookup_unicode_threadsafe(PyDictKeysObject* dk, PyObject *key,
-                                      Py_hash_t hash);
-
-#endif
+/* Like _PyDictKeys_StringLookup() but only works on split keys.  Note
+ * that in free-threaded builds this locks the keys object as required.
+ */
+Py_ssize_t
+_PyDictKeys_StringLookupSplit(PyDictKeysObject* dk, PyObject *key)
+{
+    assert(dk->dk_kind == DICT_KEYS_SPLIT);
+    assert(PyUnicode_CheckExact(key));
+    Py_hash_t hash = unicode_get_hash(key);
+    if (hash == -1) {
+        hash = PyUnicode_Type.tp_hash(key);
+        if (hash == -1) {
+            PyErr_Clear();
+            return DKIX_ERROR;
+        }
+    }
+    return unicodekeys_lookup_split(dk, key, hash);
+}
 
 /*
 The basic lookup function used by all operations.
@@ -1192,15 +1232,7 @@ start:
         if (PyUnicode_CheckExact(key)) {
 #ifdef Py_GIL_DISABLED
             if (kind == DICT_KEYS_SPLIT) {
-                // A split dictionaries keys can be mutated by other
-                // dictionaries but if we have a unicode key we can avoid
-                // locking the shared keys.
-                ix = unicodekeys_lookup_unicode_threadsafe(dk, key, hash);
-                if (ix == DKIX_KEY_CHANGED) {
-                    LOCK_KEYS(dk);
-                    ix = unicodekeys_lookup_unicode(dk, key, hash);
-                    UNLOCK_KEYS(dk);
-                }
+                ix = unicodekeys_lookup_split(dk, key, hash);
             }
             else {
                 ix = unicodekeys_lookup_unicode(dk, key, hash);
@@ -6967,7 +6999,7 @@ _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr
 
     PyDictKeysObject *keys = CACHED_KEYS(Py_TYPE(obj));
     assert(keys != NULL);
-    Py_ssize_t ix = _PyDictKeys_StringLookup(keys, name);
+    Py_ssize_t ix = _PyDictKeys_StringLookupSplit(keys, name);
     if (ix == DKIX_EMPTY) {
         *attr = NULL;
         return true;
diff --git a/Python/bytecodes.c b/Python/bytecodes.c
index cf089c3..30c12dd 100644
--- a/Python/bytecodes.c
+++ b/Python/bytecodes.c
@@ -1467,7 +1467,7 @@ dummy_func(
         };
 
         specializing op(_SPECIALIZE_STORE_ATTR, (counter/1, owner -- owner)) {
-            #if ENABLE_SPECIALIZATION
+            #if ENABLE_SPECIALIZATION_FT
             if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
                 next_instr = this_instr;
@@ -1476,7 +1476,7 @@ dummy_func(
             }
             OPCODE_DEFERRED_INC(STORE_ATTR);
             ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-            #endif  /* ENABLE_SPECIALIZATION */
+            #endif  /* ENABLE_SPECIALIZATION_FT */
         }
 
         op(_STORE_ATTR, (v, owner --)) {
@@ -2129,7 +2129,18 @@ dummy_func(
         op(_GUARD_TYPE_VERSION, (type_version/2, owner -- owner)) {
             PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
             assert(type_version != 0);
-            EXIT_IF(tp->tp_version_tag != type_version);
+            EXIT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version);
+        }
+
+        op(_GUARD_TYPE_VERSION_AND_LOCK, (type_version/2, owner -- owner)) {
+            PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
+            assert(type_version != 0);
+            EXIT_IF(!LOCK_OBJECT(owner_o));
+            PyTypeObject *tp = Py_TYPE(owner_o);
+            if (FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version) {
+                UNLOCK_OBJECT(owner_o);
+                EXIT_IF(true);
+            }
         }
 
         op(_CHECK_MANAGED_OBJECT_HAS_VALUES, (owner -- owner)) {
@@ -2336,8 +2347,11 @@ dummy_func(
 
             assert(Py_TYPE(owner_o)->tp_dictoffset < 0);
             assert(Py_TYPE(owner_o)->tp_flags & Py_TPFLAGS_INLINE_VALUES);
-            EXIT_IF(_PyObject_GetManagedDict(owner_o));
-            EXIT_IF(_PyObject_InlineValues(owner_o)->valid == 0);
+            if (_PyObject_GetManagedDict(owner_o) ||
+                    !FT_ATOMIC_LOAD_UINT8(_PyObject_InlineValues(owner_o)->valid)) {
+                UNLOCK_OBJECT(owner_o);
+                EXIT_IF(true);
+            }
         }
 
         op(_STORE_ATTR_INSTANCE_VALUE, (offset/1, value, owner --)) {
@@ -2347,21 +2361,20 @@ dummy_func(
             assert(_PyObject_GetManagedDict(owner_o) == NULL);
             PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset);
             PyObject *old_value = *value_ptr;
-            *value_ptr = PyStackRef_AsPyObjectSteal(value);
+            FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value));
             if (old_value == NULL) {
                 PyDictValues *values = _PyObject_InlineValues(owner_o);
                 Py_ssize_t index = value_ptr - values->values;
                 _PyDictValues_AddToInsertionOrder(values, index);
             }
-            else {
-                Py_DECREF(old_value);
-            }
+            UNLOCK_OBJECT(owner_o);
+            Py_XDECREF(old_value);
             PyStackRef_CLOSE(owner);
         }
 
         macro(STORE_ATTR_INSTANCE_VALUE) =
             unused/1 +
-            _GUARD_TYPE_VERSION +
+            _GUARD_TYPE_VERSION_AND_LOCK +
             _GUARD_DORV_NO_DICT +
             _STORE_ATTR_INSTANCE_VALUE;
 
@@ -2370,16 +2383,34 @@ dummy_func(
             assert(Py_TYPE(owner_o)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
             PyDictObject *dict = _PyObject_GetManagedDict(owner_o);
             DEOPT_IF(dict == NULL);
+            DEOPT_IF(!LOCK_OBJECT(dict));
+            #ifdef Py_GIL_DISABLED
+            if (dict != _PyObject_GetManagedDict(owner_o)) {
+                UNLOCK_OBJECT(dict);
+                DEOPT_IF(true);
+            }
+            #endif
             assert(PyDict_CheckExact((PyObject *)dict));
             PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
-            DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries);
-            DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys));
+            if (hint >= (size_t)dict->ma_keys->dk_nentries ||
+                    !DK_IS_UNICODE(dict->ma_keys)) {
+                UNLOCK_OBJECT(dict);
+                DEOPT_IF(true);
+            }
             PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
-            DEOPT_IF(ep->me_key != name);
+            if (ep->me_key != name) {
+                UNLOCK_OBJECT(dict);
+                DEOPT_IF(true);
+            }
             PyObject *old_value = ep->me_value;
-            DEOPT_IF(old_value == NULL);
+            if (old_value == NULL) {
+                UNLOCK_OBJECT(dict);
+                DEOPT_IF(true);
+            }
             _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
-            ep->me_value = PyStackRef_AsPyObjectSteal(value);
+            FT_ATOMIC_STORE_PTR_RELEASE(ep->me_value, PyStackRef_AsPyObjectSteal(value));
+            UNLOCK_OBJECT(dict);
+
             // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault,
             // when dict only holds the strong reference to value in ep->me_value.
             Py_XDECREF(old_value);
@@ -2395,10 +2426,12 @@ dummy_func(
         op(_STORE_ATTR_SLOT, (index/1, value, owner --)) {
             PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
 
+            DEOPT_IF(!LOCK_OBJECT(owner_o));
             char *addr = (char *)owner_o + index;
             STAT_INC(STORE_ATTR, hit);
             PyObject *old_value = *(PyObject **)addr;
-            *(PyObject **)addr = PyStackRef_AsPyObjectSteal(value);
+            FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value));
+            UNLOCK_OBJECT(owner_o);
             Py_XDECREF(old_value);
             PyStackRef_CLOSE(owner);
         }
diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h
index 9bfc3e7..6e752c5 100644
--- a/Python/executor_cases.c.h
+++ b/Python/executor_cases.c.h
@@ -2574,13 +2574,34 @@
             uint32_t type_version = (uint32_t)CURRENT_OPERAND0();
             PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
             assert(type_version != 0);
-            if (tp->tp_version_tag != type_version) {
+            if (FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
             break;
         }
 
+        case _GUARD_TYPE_VERSION_AND_LOCK: {
+            _PyStackRef owner;
+            owner = stack_pointer[-1];
+            uint32_t type_version = (uint32_t)CURRENT_OPERAND0();
+            PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
+            assert(type_version != 0);
+            if (!LOCK_OBJECT(owner_o)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
+            PyTypeObject *tp = Py_TYPE(owner_o);
+            if (FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version) {
+                UNLOCK_OBJECT(owner_o);
+                if (true) {
+                    UOP_STAT_INC(uopcode, miss);
+                    JUMP_TO_JUMP_TARGET();
+                }
+            }
+            break;
+        }
+
         case _CHECK_MANAGED_OBJECT_HAS_VALUES: {
             _PyStackRef owner;
             owner = stack_pointer[-1];
@@ -2910,13 +2931,13 @@
             PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
             assert(Py_TYPE(owner_o)->tp_dictoffset < 0);
             assert(Py_TYPE(owner_o)->tp_flags & Py_TPFLAGS_INLINE_VALUES);
-            if (_PyObject_GetManagedDict(owner_o)) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
-            }
-            if (_PyObject_InlineValues(owner_o)->valid == 0) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
+            if (_PyObject_GetManagedDict(owner_o) ||
+                !FT_ATOMIC_LOAD_UINT8(_PyObject_InlineValues(owner_o)->valid)) {
+                UNLOCK_OBJECT(owner_o);
+                if (true) {
+                    UOP_STAT_INC(uopcode, miss);
+                    JUMP_TO_JUMP_TARGET();
+                }
             }
             break;
         }
@@ -2932,15 +2953,14 @@
             assert(_PyObject_GetManagedDict(owner_o) == NULL);
             PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset);
             PyObject *old_value = *value_ptr;
-            *value_ptr = PyStackRef_AsPyObjectSteal(value);
+            FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value));
             if (old_value == NULL) {
                 PyDictValues *values = _PyObject_InlineValues(owner_o);
                 Py_ssize_t index = value_ptr - values->values;
                 _PyDictValues_AddToInsertionOrder(values, index);
             }
-            else {
-                Py_DECREF(old_value);
-            }
+            UNLOCK_OBJECT(owner_o);
+            Py_XDECREF(old_value);
             PyStackRef_CLOSE(owner);
             stack_pointer += -2;
             assert(WITHIN_STACK_BOUNDS());
@@ -2961,30 +2981,50 @@
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            assert(PyDict_CheckExact((PyObject *)dict));
-            PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
-            if (hint >= (size_t)dict->ma_keys->dk_nentries) {
+            if (!LOCK_OBJECT(dict)) {
                 UOP_STAT_INC(uopcode, miss);
                 JUMP_TO_JUMP_TARGET();
             }
-            if (!DK_IS_UNICODE(dict->ma_keys)) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
+            #ifdef Py_GIL_DISABLED
+            if (dict != _PyObject_GetManagedDict(owner_o)) {
+                UNLOCK_OBJECT(dict);
+                if (true) {
+                    UOP_STAT_INC(uopcode, miss);
+                    JUMP_TO_JUMP_TARGET();
+                }
+            }
+            #endif
+            assert(PyDict_CheckExact((PyObject *)dict));
+            PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
+            if (hint >= (size_t)dict->ma_keys->dk_nentries ||
+                !DK_IS_UNICODE(dict->ma_keys)) {
+                UNLOCK_OBJECT(dict);
+                if (true) {
+                    UOP_STAT_INC(uopcode, miss);
+                    JUMP_TO_JUMP_TARGET();
+                }
             }
             PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
             if (ep->me_key != name) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
+                UNLOCK_OBJECT(dict);
+                if (true) {
+                    UOP_STAT_INC(uopcode, miss);
+                    JUMP_TO_JUMP_TARGET();
+                }
             }
             PyObject *old_value = ep->me_value;
             if (old_value == NULL) {
-                UOP_STAT_INC(uopcode, miss);
-                JUMP_TO_JUMP_TARGET();
+                UNLOCK_OBJECT(dict);
+                if (true) {
+                    UOP_STAT_INC(uopcode, miss);
+                    JUMP_TO_JUMP_TARGET();
+                }
             }
             _PyFrame_SetStackPointer(frame, stack_pointer);
             _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
             stack_pointer = _PyFrame_GetStackPointer(frame);
-            ep->me_value = PyStackRef_AsPyObjectSteal(value);
+            FT_ATOMIC_STORE_PTR_RELEASE(ep->me_value, PyStackRef_AsPyObjectSteal(value));
+            UNLOCK_OBJECT(dict);
             // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault,
             // when dict only holds the strong reference to value in ep->me_value.
             Py_XDECREF(old_value);
@@ -3002,10 +3042,15 @@
             value = stack_pointer[-2];
             uint16_t index = (uint16_t)CURRENT_OPERAND0();
             PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
+            if (!LOCK_OBJECT(owner_o)) {
+                UOP_STAT_INC(uopcode, miss);
+                JUMP_TO_JUMP_TARGET();
+            }
             char *addr = (char *)owner_o + index;
             STAT_INC(STORE_ATTR, hit);
             PyObject *old_value = *(PyObject **)addr;
-            *(PyObject **)addr = PyStackRef_AsPyObjectSteal(value);
+            FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value));
+            UNLOCK_OBJECT(owner_o);
             Py_XDECREF(old_value);
             PyStackRef_CLOSE(owner);
             stack_pointer += -2;
diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h
index ac89891..ee5c55a 100644
--- a/Python/generated_cases.c.h
+++ b/Python/generated_cases.c.h
@@ -5319,7 +5319,7 @@
                 uint32_t type_version = read_u32(&this_instr[4].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             // _LOAD_ATTR_CLASS
             {
@@ -5388,7 +5388,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             // _CHECK_MANAGED_OBJECT_HAS_VALUES
             {
@@ -5433,7 +5433,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             // _CHECK_ATTR_METHOD_LAZY_DICT
             {
@@ -5476,7 +5476,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             /* Skip 2 cache entries */
             // _LOAD_ATTR_METHOD_NO_DICT
@@ -5512,7 +5512,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             // _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT
             {
@@ -5611,7 +5611,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             /* Skip 2 cache entries */
             // _LOAD_ATTR_NONDESCRIPTOR_NO_DICT
@@ -5642,7 +5642,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             // _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT
             {
@@ -5688,7 +5688,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             /* Skip 2 cache entries */
             // _LOAD_ATTR_PROPERTY_FRAME
@@ -5750,7 +5750,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             // _LOAD_ATTR_SLOT
             {
@@ -5787,7 +5787,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, LOAD_ATTR);
             }
             // _CHECK_ATTR_WITH_HINT
             {
@@ -7314,7 +7314,7 @@
                 owner = stack_pointer[-1];
                 uint16_t counter = read_u16(&this_instr[1].cache);
                 (void)counter;
-                #if ENABLE_SPECIALIZATION
+                #if ENABLE_SPECIALIZATION_FT
                 if (ADAPTIVE_COUNTER_TRIGGERS(counter)) {
                     PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
                     next_instr = this_instr;
@@ -7325,7 +7325,7 @@
                 }
                 OPCODE_DEFERRED_INC(STORE_ATTR);
                 ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter);
-                #endif  /* ENABLE_SPECIALIZATION */
+                #endif  /* ENABLE_SPECIALIZATION_FT */
             }
             /* Skip 3 cache entries */
             // _STORE_ATTR
@@ -7353,21 +7353,29 @@
             _PyStackRef owner;
             _PyStackRef value;
             /* Skip 1 cache entry */
-            // _GUARD_TYPE_VERSION
+            // _GUARD_TYPE_VERSION_AND_LOCK
             {
                 owner = stack_pointer[-1];
                 uint32_t type_version = read_u32(&this_instr[2].cache);
-                PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
+                PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR);
+                DEOPT_IF(!LOCK_OBJECT(owner_o), STORE_ATTR);
+                PyTypeObject *tp = Py_TYPE(owner_o);
+                if (FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version) {
+                    UNLOCK_OBJECT(owner_o);
+                    DEOPT_IF(true, STORE_ATTR);
+                }
             }
             // _GUARD_DORV_NO_DICT
             {
                 PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
                 assert(Py_TYPE(owner_o)->tp_dictoffset < 0);
                 assert(Py_TYPE(owner_o)->tp_flags & Py_TPFLAGS_INLINE_VALUES);
-                DEOPT_IF(_PyObject_GetManagedDict(owner_o), STORE_ATTR);
-                DEOPT_IF(_PyObject_InlineValues(owner_o)->valid == 0, STORE_ATTR);
+                if (_PyObject_GetManagedDict(owner_o) ||
+                    !FT_ATOMIC_LOAD_UINT8(_PyObject_InlineValues(owner_o)->valid)) {
+                    UNLOCK_OBJECT(owner_o);
+                    DEOPT_IF(true, STORE_ATTR);
+                }
             }
             // _STORE_ATTR_INSTANCE_VALUE
             {
@@ -7378,15 +7386,14 @@
                 assert(_PyObject_GetManagedDict(owner_o) == NULL);
                 PyObject **value_ptr = (PyObject**)(((char *)owner_o) + offset);
                 PyObject *old_value = *value_ptr;
-                *value_ptr = PyStackRef_AsPyObjectSteal(value);
+                FT_ATOMIC_STORE_PTR_RELEASE(*value_ptr, PyStackRef_AsPyObjectSteal(value));
                 if (old_value == NULL) {
                     PyDictValues *values = _PyObject_InlineValues(owner_o);
                     Py_ssize_t index = value_ptr - values->values;
                     _PyDictValues_AddToInsertionOrder(values, index);
                 }
-                else {
-                    Py_DECREF(old_value);
-                }
+                UNLOCK_OBJECT(owner_o);
+                Py_XDECREF(old_value);
                 PyStackRef_CLOSE(owner);
             }
             stack_pointer += -2;
@@ -7408,17 +7415,19 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, STORE_ATTR);
             }
             // _STORE_ATTR_SLOT
             {
                 value = stack_pointer[-2];
                 uint16_t index = read_u16(&this_instr[4].cache);
                 PyObject *owner_o = PyStackRef_AsPyObjectBorrow(owner);
+                DEOPT_IF(!LOCK_OBJECT(owner_o), STORE_ATTR);
                 char *addr = (char *)owner_o + index;
                 STAT_INC(STORE_ATTR, hit);
                 PyObject *old_value = *(PyObject **)addr;
-                *(PyObject **)addr = PyStackRef_AsPyObjectSteal(value);
+                FT_ATOMIC_STORE_PTR_RELEASE(*(PyObject **)addr, PyStackRef_AsPyObjectSteal(value));
+                UNLOCK_OBJECT(owner_o);
                 Py_XDECREF(old_value);
                 PyStackRef_CLOSE(owner);
             }
@@ -7441,7 +7450,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, STORE_ATTR);
             }
             // _STORE_ATTR_WITH_HINT
             {
@@ -7451,18 +7460,35 @@
                 assert(Py_TYPE(owner_o)->tp_flags & Py_TPFLAGS_MANAGED_DICT);
                 PyDictObject *dict = _PyObject_GetManagedDict(owner_o);
                 DEOPT_IF(dict == NULL, STORE_ATTR);
+                DEOPT_IF(!LOCK_OBJECT(dict), STORE_ATTR);
+                #ifdef Py_GIL_DISABLED
+                if (dict != _PyObject_GetManagedDict(owner_o)) {
+                    UNLOCK_OBJECT(dict);
+                    DEOPT_IF(true, STORE_ATTR);
+                }
+                #endif
                 assert(PyDict_CheckExact((PyObject *)dict));
                 PyObject *name = GETITEM(FRAME_CO_NAMES, oparg);
-                DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR);
-                DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys), STORE_ATTR);
+                if (hint >= (size_t)dict->ma_keys->dk_nentries ||
+                    !DK_IS_UNICODE(dict->ma_keys)) {
+                    UNLOCK_OBJECT(dict);
+                    DEOPT_IF(true, STORE_ATTR);
+                }
                 PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint;
-                DEOPT_IF(ep->me_key != name, STORE_ATTR);
+                if (ep->me_key != name) {
+                    UNLOCK_OBJECT(dict);
+                    DEOPT_IF(true, STORE_ATTR);
+                }
                 PyObject *old_value = ep->me_value;
-                DEOPT_IF(old_value == NULL, STORE_ATTR);
+                if (old_value == NULL) {
+                    UNLOCK_OBJECT(dict);
+                    DEOPT_IF(true, STORE_ATTR);
+                }
                 _PyFrame_SetStackPointer(frame, stack_pointer);
                 _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
                 stack_pointer = _PyFrame_GetStackPointer(frame);
-                ep->me_value = PyStackRef_AsPyObjectSteal(value);
+                FT_ATOMIC_STORE_PTR_RELEASE(ep->me_value, PyStackRef_AsPyObjectSteal(value));
+                UNLOCK_OBJECT(dict);
                 // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault,
                 // when dict only holds the strong reference to value in ep->me_value.
                 Py_XDECREF(old_value);
@@ -7815,7 +7841,7 @@
                 uint32_t type_version = read_u32(&this_instr[2].cache);
                 PyTypeObject *tp = Py_TYPE(PyStackRef_AsPyObjectBorrow(owner));
                 assert(type_version != 0);
-                DEOPT_IF(tp->tp_version_tag != type_version, TO_BOOL);
+                DEOPT_IF(FT_ATOMIC_LOAD_UINT_RELAXED(tp->tp_version_tag) != type_version, TO_BOOL);
             }
             // _REPLACE_WITH_TRUE
             {
diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h
index 2c3133d..b46079e 100644
--- a/Python/optimizer_cases.c.h
+++ b/Python/optimizer_cases.c.h
@@ -1121,6 +1121,10 @@
             break;
         }
 
+        case _GUARD_TYPE_VERSION_AND_LOCK: {
+            break;
+        }
+
         case _CHECK_MANAGED_OBJECT_HAS_VALUES: {
             break;
         }
diff --git a/Python/specialize.c b/Python/specialize.c
index 6c45320..349ed47 100644
--- a/Python/specialize.c
+++ b/Python/specialize.c
@@ -741,8 +741,8 @@ static int function_kind(PyCodeObject *code);
 #ifndef Py_GIL_DISABLED
 static bool function_check_args(PyObject *o, int expected_argcount, int opcode);
 static uint32_t function_get_version(PyObject *o, int opcode);
-#endif
 static uint32_t type_get_version(PyTypeObject *t, int opcode);
+#endif
 
 static int
 specialize_module_load_attr_lock_held(PyDictObject *dict, _Py_CODEUNIT *instr, PyObject *name)
@@ -881,71 +881,142 @@ classify_descriptor(PyObject *descriptor, bool has_getattr)
     return NON_DESCRIPTOR;
 }
 
-static DescriptorClassification
-analyze_descriptor(PyTypeObject *type, PyObject *name, PyObject **descr, int store)
+static bool
+descriptor_is_class(PyObject *descriptor, PyObject *name)
 {
+    return ((PyUnicode_CompareWithASCIIString(name, "__class__") == 0) &&
+            (descriptor == _PyType_Lookup(&PyBaseObject_Type, name)));
+}
+
+#ifndef Py_GIL_DISABLED
+static DescriptorClassification
+analyze_descriptor_load(PyTypeObject *type, PyObject *name, PyObject **descr) {
     bool has_getattr = false;
-    if (store) {
-        if (type->tp_setattro != PyObject_GenericSetAttr) {
+    getattrofunc getattro_slot = type->tp_getattro;
+    if (getattro_slot == PyObject_GenericGetAttr) {
+        /* Normal attribute lookup; */
+        has_getattr = false;
+    }
+    else if (getattro_slot == _Py_slot_tp_getattr_hook ||
+        getattro_slot == _Py_slot_tp_getattro) {
+        /* One or both of __getattribute__ or __getattr__ may have been
+         overridden See typeobject.c for why these functions are special. */
+        PyObject *getattribute = _PyType_LookupRef(type, &_Py_ID(__getattribute__));
+        PyInterpreterState *interp = _PyInterpreterState_GET();
+        bool has_custom_getattribute = getattribute != NULL &&
+            getattribute != interp->callable_cache.object__getattribute__;
+        PyObject *getattr = _PyType_LookupRef(type, &_Py_ID(__getattr__));
+        has_getattr = getattr != NULL;
+        Py_XDECREF(getattr);
+        if (has_custom_getattribute) {
+            if (getattro_slot == _Py_slot_tp_getattro &&
+                !has_getattr &&
+                Py_IS_TYPE(getattribute, &PyFunction_Type)) {
+                *descr = getattribute;
+                return GETATTRIBUTE_IS_PYTHON_FUNCTION;
+            }
+            /* Potentially both __getattr__ and __getattribute__ are set.
+               Too complicated */
+            Py_DECREF(getattribute);
             *descr = NULL;
             return GETSET_OVERRIDDEN;
         }
+        /* Potentially has __getattr__ but no custom __getattribute__.
+           Fall through to usual descriptor analysis.
+           Usual attribute lookup should only be allowed at runtime
+           if we can guarantee that there is no way an exception can be
+           raised. This means some specializations, e.g. specializing
+           for property() isn't safe.
+        */
+        Py_XDECREF(getattribute);
     }
     else {
-        getattrofunc getattro_slot = type->tp_getattro;
-        if (getattro_slot == PyObject_GenericGetAttr) {
-            /* Normal attribute lookup; */
-            has_getattr = false;
-        }
-        else if (getattro_slot == _Py_slot_tp_getattr_hook ||
-            getattro_slot == _Py_slot_tp_getattro) {
-            /* One or both of __getattribute__ or __getattr__ may have been
-             overridden See typeobject.c for why these functions are special. */
-            PyObject *getattribute = _PyType_Lookup(type,
-                &_Py_ID(__getattribute__));
-            PyInterpreterState *interp = _PyInterpreterState_GET();
-            bool has_custom_getattribute = getattribute != NULL &&
-                getattribute != interp->callable_cache.object__getattribute__;
-            has_getattr = _PyType_Lookup(type, &_Py_ID(__getattr__)) != NULL;
-            if (has_custom_getattribute) {
-                if (getattro_slot == _Py_slot_tp_getattro &&
-                    !has_getattr &&
-                    Py_IS_TYPE(getattribute, &PyFunction_Type)) {
-                    *descr = getattribute;
-                    return GETATTRIBUTE_IS_PYTHON_FUNCTION;
-                }
-                /* Potentially both __getattr__ and __getattribute__ are set.
-                   Too complicated */
-                *descr = NULL;
-                return GETSET_OVERRIDDEN;
-            }
-            /* Potentially has __getattr__ but no custom __getattribute__.
-               Fall through to usual descriptor analysis.
-               Usual attribute lookup should only be allowed at runtime
-               if we can guarantee that there is no way an exception can be
-               raised. This means some specializations, e.g. specializing
-               for property() isn't safe.
-            */
-        }
-        else {
-            *descr = NULL;
-            return GETSET_OVERRIDDEN;
-        }
+        *descr = NULL;
+        return GETSET_OVERRIDDEN;
     }
-    PyObject *descriptor = _PyType_Lookup(type, name);
+    PyObject *descriptor = _PyType_LookupRef(type, name);
     *descr = descriptor;
-    if (PyUnicode_CompareWithASCIIString(name, "__class__") == 0) {
-        if (descriptor == _PyType_Lookup(&PyBaseObject_Type, name)) {
-            return DUNDER_CLASS;
-        }
+    if (descriptor_is_class(descriptor, name)) {
+        return DUNDER_CLASS;
     }
     return classify_descriptor(descriptor, has_getattr);
 }
+#endif //!Py_GIL_DISABLED
+
+static DescriptorClassification
+analyze_descriptor_store(PyTypeObject *type, PyObject *name, PyObject **descr, unsigned int *tp_version)
+{
+    if (type->tp_setattro != PyObject_GenericSetAttr) {
+        *descr = NULL;
+        return GETSET_OVERRIDDEN;
+    }
+    PyObject *descriptor = _PyType_LookupRefAndVersion(type, name, tp_version);
+    *descr = descriptor;
+    if (descriptor_is_class(descriptor, name)) {
+        return DUNDER_CLASS;
+    }
+    return classify_descriptor(descriptor, false);
+}
+
+static int
+specialize_dict_access_inline(
+    PyObject *owner, _Py_CODEUNIT *instr, PyTypeObject *type,
+    DescriptorClassification kind, PyObject *name, unsigned int tp_version,
+    int base_op, int values_op)
+{
+    _PyAttrCache *cache = (_PyAttrCache *)(instr + 1);
+    PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys;
+    assert(PyUnicode_CheckExact(name));
+    Py_ssize_t index = _PyDictKeys_StringLookupSplit(keys, name);
+    assert (index != DKIX_ERROR);
+    if (index == DKIX_EMPTY) {
+        SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NOT_IN_KEYS);
+        return 0;
+    }
+    assert(index >= 0);
+    char *value_addr = (char *)&_PyObject_InlineValues(owner)->values[index];
+    Py_ssize_t offset = value_addr - (char *)owner;
+    if (offset != (uint16_t)offset) {
+        SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
+        return 0;
+    }
+    cache->index = (uint16_t)offset;
+    write_u32(cache->version, tp_version);
+    specialize(instr, values_op);
+    return 1;
+}
+
+static int
+specialize_dict_access_hint(
+    PyDictObject *dict, _Py_CODEUNIT *instr, PyTypeObject *type,
+    DescriptorClassification kind, PyObject *name, unsigned int tp_version,
+    int base_op, int hint_op)
+{
+    _PyAttrCache *cache = (_PyAttrCache *)(instr + 1);
+    // We found an instance with a __dict__.
+    if (_PyDict_HasSplitTable(dict)) {
+        SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_SPLIT_DICT);
+        return 0;
+    }
+    Py_ssize_t index = _PyDict_LookupIndex(dict, name);
+    if (index != (uint16_t)index) {
+        SPECIALIZATION_FAIL(base_op,
+                            index == DKIX_EMPTY ?
+                            SPEC_FAIL_ATTR_NOT_IN_DICT :
+                            SPEC_FAIL_OUT_OF_RANGE);
+        return 0;
+    }
+    cache->index = (uint16_t)index;
+    write_u32(cache->version, tp_version);
+    specialize(instr, hint_op);
+    return 1;
+}
+
 
 static int
 specialize_dict_access(
     PyObject *owner, _Py_CODEUNIT *instr, PyTypeObject *type,
-    DescriptorClassification kind, PyObject *name,
+    DescriptorClassification kind, PyObject *name, unsigned int tp_version,
     int base_op, int values_op, int hint_op)
 {
     assert(kind == NON_OVERRIDING || kind == NON_DESCRIPTOR || kind == ABSENT ||
@@ -956,29 +1027,25 @@ specialize_dict_access(
         SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NOT_MANAGED_DICT);
         return 0;
     }
-    _PyAttrCache *cache = (_PyAttrCache *)(instr + 1);
     if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES &&
-        _PyObject_InlineValues(owner)->valid &&
+        FT_ATOMIC_LOAD_UINT8(_PyObject_InlineValues(owner)->valid) &&
         !(base_op == STORE_ATTR && _PyObject_GetManagedDict(owner) != NULL))
     {
-        PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys;
-        assert(PyUnicode_CheckExact(name));
-        Py_ssize_t index = _PyDictKeys_StringLookup(keys, name);
-        assert (index != DKIX_ERROR);
-        if (index == DKIX_EMPTY) {
-            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_NOT_IN_KEYS);
-            return 0;
+        int res;
+        Py_BEGIN_CRITICAL_SECTION(owner);
+        PyDictObject *dict = _PyObject_GetManagedDict(owner);
+        if (dict == NULL) {
+            // managed dict, not materialized, inline values valid
+            res = specialize_dict_access_inline(owner, instr, type, kind, name,
+                                                tp_version, base_op, values_op);
         }
-        assert(index >= 0);
-        char *value_addr = (char *)&_PyObject_InlineValues(owner)->values[index];
-        Py_ssize_t offset = value_addr - (char *)owner;
-        if (offset != (uint16_t)offset) {
-            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE);
-            return 0;
+        else {
+            // lost race and dict was created, fail specialization
+            SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OTHER);
+            res = 0;
         }
-        write_u32(cache->version, type->tp_version_tag);
-        cache->index = (uint16_t)offset;
-        specialize(instr, values_op);
+        Py_END_CRITICAL_SECTION();
+        return res;
     }
     else {
         PyDictObject *dict = _PyObject_GetManagedDict(owner);
@@ -986,25 +1053,14 @@ specialize_dict_access(
             SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NO_DICT);
             return 0;
         }
-        // We found an instance with a __dict__.
-        if (dict->ma_values) {
-            SPECIALIZATION_FAIL(base_op, SPEC_FAIL_ATTR_SPLIT_DICT);
-            return 0;
-        }
-        Py_ssize_t index =
-            _PyDict_LookupIndex(dict, name);
-        if (index != (uint16_t)index) {
-            SPECIALIZATION_FAIL(base_op,
-                                index == DKIX_EMPTY ?
-                                SPEC_FAIL_ATTR_NOT_IN_DICT :
-                                SPEC_FAIL_OUT_OF_RANGE);
-            return 0;
-        }
-        cache->index = (uint16_t)index;
-        write_u32(cache->version, type->tp_version_tag);
-        specialize(instr, hint_op);
+        int res;
+        Py_BEGIN_CRITICAL_SECTION(dict);
+        // materialized managed dict
+        res = specialize_dict_access_hint(dict, instr, type, kind, name,
+                                          tp_version, base_op, hint_op);
+        Py_END_CRITICAL_SECTION();
+        return res;
     }
-    return 1;
 }
 
 #ifndef Py_GIL_DISABLED
@@ -1050,7 +1106,8 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na
     PyTypeObject *type = Py_TYPE(owner);
     bool shadow = instance_has_key(owner, name);
     PyObject *descr = NULL;
-    DescriptorClassification kind = analyze_descriptor(type, name, &descr, 0);
+    DescriptorClassification kind = analyze_descriptor_load(type, name, &descr);
+    Py_XDECREF(descr); // turn strong ref into a borrowed ref
     assert(descr != NULL || kind == ABSENT || kind == GETSET_OVERRIDDEN);
     if (type_get_version(type, LOAD_ATTR) == 0) {
         return -1;
@@ -1204,8 +1261,8 @@ specialize_instance_load_attr(PyObject* owner, _Py_CODEUNIT* instr, PyObject* na
     }
     Py_UNREACHABLE();
 try_instance:
-    if (specialize_dict_access(owner, instr, type, kind, name, LOAD_ATTR,
-                                    LOAD_ATTR_INSTANCE_VALUE, LOAD_ATTR_WITH_HINT))
+    if (specialize_dict_access(owner, instr, type, kind, name, type->tp_version_tag,
+                               LOAD_ATTR, LOAD_ATTR_INSTANCE_VALUE, LOAD_ATTR_WITH_HINT))
     {
         return 0;
     }
@@ -1259,8 +1316,9 @@ _Py_Specialize_StoreAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *na
 {
     PyObject *owner = PyStackRef_AsPyObjectBorrow(owner_st);
 
-    assert(ENABLE_SPECIALIZATION);
+    assert(ENABLE_SPECIALIZATION_FT);
     assert(_PyOpcode_Caches[STORE_ATTR] == INLINE_CACHE_ENTRIES_STORE_ATTR);
+    PyObject *descr = NULL;
     _PyAttrCache *cache = (_PyAttrCache *)(instr + 1);
     PyTypeObject *type = Py_TYPE(owner);
     if (!_PyType_IsReady(type)) {
@@ -1274,11 +1332,12 @@ _Py_Specialize_StoreAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *na
         SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OVERRIDDEN);
         goto fail;
     }
-    PyObject *descr;
-    DescriptorClassification kind = analyze_descriptor(type, name, &descr, 1);
-    if (type_get_version(type, STORE_ATTR) == 0) {
+    unsigned int tp_version = 0;
+    DescriptorClassification kind = analyze_descriptor_store(type, name, &descr, &tp_version);
+    if (tp_version == 0) {
         goto fail;
     }
+    assert(descr != NULL || kind == ABSENT || kind == GETSET_OVERRIDDEN);
     switch(kind) {
         case OVERRIDING:
             SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_ATTR_OVERRIDING_DESCRIPTOR);
@@ -1309,8 +1368,8 @@ _Py_Specialize_StoreAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *na
             assert(dmem->type == Py_T_OBJECT_EX || dmem->type == _Py_T_OBJECT);
             assert(offset > 0);
             cache->index = (uint16_t)offset;
-            write_u32(cache->version, type->tp_version_tag);
-            instr->op.code = STORE_ATTR_SLOT;
+            write_u32(cache->version, tp_version);
+            specialize(instr, STORE_ATTR_SLOT);
             goto success;
         }
         case DUNDER_CLASS:
@@ -1337,22 +1396,19 @@ _Py_Specialize_StoreAttr(_PyStackRef owner_st, _Py_CODEUNIT *instr, PyObject *na
             SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_ATTR_CLASS_ATTR_SIMPLE);
             goto fail;
         case ABSENT:
-            if (specialize_dict_access(owner, instr, type, kind, name, STORE_ATTR,
-                                    STORE_ATTR_INSTANCE_VALUE, STORE_ATTR_WITH_HINT))
-            {
+            if (specialize_dict_access(owner, instr, type, kind, name, tp_version,
+                                       STORE_ATTR, STORE_ATTR_INSTANCE_VALUE,
+                                       STORE_ATTR_WITH_HINT)) {
                 goto success;
             }
     }
 fail:
-    STAT_INC(STORE_ATTR, failure);
-    assert(!PyErr_Occurred());
-    instr->op.code = STORE_ATTR;
-    cache->counter = adaptive_counter_backoff(cache->counter);
+    Py_XDECREF(descr);
+    unspecialize(instr);
     return;
 success:
-    STAT_INC(STORE_ATTR, success);
-    assert(!PyErr_Occurred());
-    cache->counter = adaptive_counter_cooldown();
+    Py_XDECREF(descr);
+    return;
 }
 
 #ifndef Py_GIL_DISABLED
@@ -1421,7 +1477,8 @@ specialize_class_load_attr(PyObject *owner, _Py_CODEUNIT *instr,
     }
     PyObject *descr = NULL;
     DescriptorClassification kind = 0;
-    kind = analyze_descriptor(cls, name, &descr, 0);
+    kind = analyze_descriptor_load(cls, name, &descr);
+    Py_XDECREF(descr); // turn strong ref into a borrowed ref
     if (type_get_version(cls, LOAD_ATTR) == 0) {
         return -1;
     }
@@ -1714,7 +1771,6 @@ function_get_version(PyObject *o, int opcode)
     }
     return version;
 }
-#endif  // Py_GIL_DISABLED
 
 /* Returning 0 indicates a failure. */
 static uint32_t
@@ -1727,6 +1783,7 @@ type_get_version(PyTypeObject *t, int opcode)
     }
     return version;
 }
+#endif  // Py_GIL_DISABLED
 
 void
 _Py_Specialize_BinarySubscr(
-- 
cgit v0.12