From 27486c3365a1f628e2df9a0e88c7c51706a1282e Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Fri, 22 Nov 2024 19:00:35 +0200 Subject: gh-115999: Add free-threaded specialization for `UNPACK_SEQUENCE` (#126600) Add free-threaded specialization for `UNPACK_SEQUENCE` opcode. `UNPACK_SEQUENCE_TUPLE/UNPACK_SEQUENCE_TWO_TUPLE` are already thread safe since tuples are immutable. `UNPACK_SEQUENCE_LIST` is not thread safe because of nature of lists (there is nothing preventing another thread from adding items to or removing them the list while the instruction is executing). To achieve thread safety we add a critical section to the implementation of `UNPACK_SEQUENCE_LIST`, especially around the parts where we check the size of the list and push items onto the stack. --------- Co-authored-by: Matt Page Co-authored-by: mpage --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_opcache.py | 31 +++++++++++++++++++++++++++++++ Python/bytecodes.c | 20 ++++++++++++++++---- Python/executor_cases.c.h | 22 ++++++++++++++++++++-- Python/generated_cases.c.h | 26 ++++++++++++++++++++++---- Python/specialize.c | 30 ++++++++++++------------------ 7 files changed, 103 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 58e583e..5328087 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1223,7 +1223,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1b2880c..1c1f478 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -112,7 +112,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index a0292b3..1a6eac2 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1339,6 +1339,37 @@ class TestSpecializer(TestBase): self.assert_specialized(to_bool_str, "TO_BOOL_STR") self.assert_no_opcode(to_bool_str, "TO_BOOL") + @cpython_only + @requires_specialization_ft + def test_unpack_sequence(self): + def f(): + for _ in range(100): + a, b = 1, 2 + self.assertEqual(a, 1) + self.assertEqual(b, 2) + + f() + self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE") + self.assert_no_opcode(f, "UNPACK_SEQUENCE") + + def g(): + for _ in range(100): + a, = 1, + self.assertEqual(a, 1) + + g() + self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE") + self.assert_no_opcode(g, "UNPACK_SEQUENCE") + + def x(): + for _ in range(100): + a, b = [1, 2] + self.assertEqual(a, 1) + self.assertEqual(b, 2) + + x() + self.assert_specialized(x, "UNPACK_SEQUENCE_LIST") + self.assert_no_opcode(x, "UNPACK_SEQUENCE") if __name__ == "__main__": unittest.main() diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 6ee886c..83240d5 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1381,7 +1381,7 @@ dummy_func( }; specializing op(_SPECIALIZE_UNPACK_SEQUENCE, (counter/1, seq -- seq)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_UnpackSequence(seq, next_instr, oparg); @@ -1389,7 +1389,7 @@ dummy_func( } OPCODE_DEFERRED_INC(UNPACK_SEQUENCE); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ (void)seq; (void)counter; } @@ -1429,12 +1429,24 @@ dummy_func( inst(UNPACK_SEQUENCE_LIST, (unused/1, seq -- values[oparg])) { PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o)); - DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg); + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + PyCriticalSection_Begin(&cs, seq_o); + #endif + if (PyList_GET_SIZE(seq_o) != oparg) { + #ifdef Py_GIL_DISABLED + PyCriticalSection_End(&cs); + #endif + DEOPT_IF(true); + } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + PyCriticalSection_End(&cs); + #endif DECREF_INPUTS(); } @@ -2525,7 +2537,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CONTAINS_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } macro(CONTAINS_OP) = _SPECIALIZE_CONTAINS_OP + _CONTAINS_OP; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5c7138a..22de7ca 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1711,15 +1711,33 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_Begin(&cs, seq_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif if (PyList_GET_SIZE(seq_o) != oparg) { - UOP_STAT_INC(uopcode, miss); - JUMP_TO_JUMP_TARGET(); + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + if (true) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 13947849..182c9cf 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3405,7 +3405,7 @@ } OPCODE_DEFERRED_INC(CONTAINS_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _CONTAINS_OP { @@ -7994,7 +7994,7 @@ seq = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -8004,7 +8004,7 @@ } OPCODE_DEFERRED_INC(UNPACK_SEQUENCE); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ (void)seq; (void)counter; } @@ -8035,12 +8035,30 @@ values = &stack_pointer[-1]; PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o), UNPACK_SEQUENCE); - DEOPT_IF(PyList_GET_SIZE(seq_o) != oparg, UNPACK_SEQUENCE); + #ifdef Py_GIL_DISABLED + PyCriticalSection cs; + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_Begin(&cs, seq_o); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + if (PyList_GET_SIZE(seq_o) != oparg) { + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif + DEOPT_IF(true, UNPACK_SEQUENCE); + } STAT_INC(UNPACK_SEQUENCE, hit); PyObject **items = _PyList_ITEMS(seq_o); for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyCriticalSection_End(&cs); + stack_pointer = _PyFrame_GetStackPointer(frame); + #endif PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/specialize.c b/Python/specialize.c index c69f61c..716d53a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2487,39 +2487,33 @@ _Py_Specialize_UnpackSequence(_PyStackRef seq_st, _Py_CODEUNIT *instr, int oparg { PyObject *seq = PyStackRef_AsPyObjectBorrow(seq_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[UNPACK_SEQUENCE] == INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE); - _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)(instr + 1); if (PyTuple_CheckExact(seq)) { if (PyTuple_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); - goto failure; + unspecialize(instr); + return; } if (PyTuple_GET_SIZE(seq) == 2) { - instr->op.code = UNPACK_SEQUENCE_TWO_TUPLE; - goto success; + specialize(instr, UNPACK_SEQUENCE_TWO_TUPLE); + return; } - instr->op.code = UNPACK_SEQUENCE_TUPLE; - goto success; + specialize(instr, UNPACK_SEQUENCE_TUPLE); + return; } if (PyList_CheckExact(seq)) { if (PyList_GET_SIZE(seq) != oparg) { SPECIALIZATION_FAIL(UNPACK_SEQUENCE, SPEC_FAIL_EXPECTED_ERROR); - goto failure; + unspecialize(instr); + return; } - instr->op.code = UNPACK_SEQUENCE_LIST; - goto success; + specialize(instr, UNPACK_SEQUENCE_LIST); + return; } SPECIALIZATION_FAIL(UNPACK_SEQUENCE, unpack_sequence_fail_kind(seq)); -failure: - STAT_INC(UNPACK_SEQUENCE, failure); - instr->op.code = UNPACK_SEQUENCE; - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(UNPACK_SEQUENCE, success); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } #ifdef Py_STATS -- cgit v0.12