From 4dc27bc0b76aa5985ccef2901f7a4f5d36b97995 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 21 Jun 2024 16:20:41 -0400 Subject: [3.13] gh-119344: Make critical section API public (GH-119353) (#120856) This makes the following macros public as part of the non-limited C-API for locking a single object or two objects at once. * `Py_BEGIN_CRITICAL_SECTION(op)` / `Py_END_CRITICAL_SECTION()` * `Py_BEGIN_CRITICAL_SECTION2(a, b)` / `Py_END_CRITICAL_SECTION2()` The supporting functions and structs used by the macros are also exposed for cases where C macros are not available. (cherry picked from commit 8f17d69b7bc906e8407095317842cc0fd52cd84a) --- Doc/c-api/init.rst | 104 ++++++++++ Include/Python.h | 1 + Include/cpython/critical_section.h | 134 +++++++++++++ Include/critical_section.h | 16 ++ Include/internal/pycore_critical_section.h | 218 +++++++-------------- Makefile.pre.in | 2 + .../2024-05-21-19-41-41.gh-issue-119344.QKvzQb.rst | 1 + Modules/_sre/sre.c | 2 +- Modules/_testcapimodule.c | 13 ++ Objects/dictobject.c | 2 +- Objects/listobject.c | 2 +- Objects/typeobject.c | 71 +++---- PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/critical_section.c | 106 +++++++--- 15 files changed, 457 insertions(+), 223 deletions(-) create mode 100644 Include/cpython/critical_section.h create mode 100644 Include/critical_section.h create mode 100644 Misc/NEWS.d/next/C API/2024-05-21-19-41-41.gh-issue-119344.QKvzQb.rst diff --git a/Doc/c-api/init.rst b/Doc/c-api/init.rst index 6b45413..1fab3f5 100644 --- a/Doc/c-api/init.rst +++ b/Doc/c-api/init.rst @@ -2202,3 +2202,107 @@ The C-API provides a basic mutual exclusion lock. issue a fatal error. .. versionadded:: 3.13 + +.. _python-critical-section-api: + +Python Critical Section API +--------------------------- + +The critical section API provides a deadlock avoidance layer on top of +per-object locks for :term:`free-threaded ` CPython. They are +intended to replace reliance on the :term:`global interpreter lock`, and are +no-ops in versions of Python with the global interpreter lock. + +Critical sections avoid deadlocks by implicitly suspending active critical +sections and releasing the locks during calls to :c:func:`PyEval_SaveThread`. +When :c:func:`PyEval_RestoreThread` is called, the most recent critical section +is resumed, and its locks reacquired. This means the critical section API +provides weaker guarantees than traditional locks -- they are useful because +their behavior is similar to the :term:`GIL`. + +The functions and structs used by the macros are exposed for cases +where C macros are not available. They should only be used as in the +given macro expansions. Note that the sizes and contents of the structures may +change in future Python versions. + +.. note:: + + Operations that need to lock two objects at once must use + :c:macro:`Py_BEGIN_CRITICAL_SECTION2`. You *cannot* use nested critical + sections to lock more than one object at once, because the inner critical + section may suspend the outer critical sections. This API does not provide + a way to lock more than two objects at once. + +Example usage:: + + static PyObject * + set_field(MyObject *self, PyObject *value) + { + Py_BEGIN_CRITICAL_SECTION(self); + Py_SETREF(self->field, Py_XNewRef(value)); + Py_END_CRITICAL_SECTION(); + Py_RETURN_NONE; + } + +In the above example, :c:macro:`Py_SETREF` calls :c:macro:`Py_DECREF`, which +can call arbitrary code through an object's deallocation function. The critical +section API avoids potentital deadlocks due to reentrancy and lock ordering +by allowing the runtime to temporarily suspend the critical section if the +code triggered by the finalizer blocks and calls :c:func:`PyEval_SaveThread`. + +.. c:macro:: Py_BEGIN_CRITICAL_SECTION(op) + + Acquires the per-object lock for the object *op* and begins a + critical section. + + In the free-threaded build, this macro expands to:: + + { + PyCriticalSection _py_cs; + PyCriticalSection_Begin(&_py_cs, (PyObject*)(op)) + + In the default build, this macro expands to ``{``. + + .. versionadded:: 3.13 + +.. c:macro:: Py_END_CRITICAL_SECTION() + + Ends the critical section and releases the per-object lock. + + In the free-threaded build, this macro expands to:: + + PyCriticalSection_End(&_py_cs); + } + + In the default build, this macro expands to ``}``. + + .. versionadded:: 3.13 + +.. c:macro:: Py_BEGIN_CRITICAL_SECTION2(a, b) + + Acquires the per-objects locks for the objects *a* and *b* and begins a + critical section. The locks are acquired in a consistent order (lowest + address first) to avoid lock ordering deadlocks. + + In the free-threaded build, this macro expands to:: + + { + PyCriticalSection2 _py_cs2; + PyCriticalSection_Begin2(&_py_cs2, (PyObject*)(a), (PyObject*)(b)) + + In the default build, this macro expands to ``{``. + + .. versionadded:: 3.13 + +.. c:macro:: Py_END_CRITICAL_SECTION2() + + Ends the critical section and releases the per-object locks. + + In the free-threaded build, this macro expands to:: + + PyCriticalSection_End2(&_py_cs2); + } + + In the default build, this macro expands to ``}``. + + .. versionadded:: 3.13 diff --git a/Include/Python.h b/Include/Python.h index ba2724c..95bc631 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -124,6 +124,7 @@ #include "import.h" #include "abstract.h" #include "bltinmodule.h" +#include "critical_section.h" #include "cpython/pyctype.h" #include "pystrtod.h" #include "pystrcmp.h" diff --git a/Include/cpython/critical_section.h b/Include/cpython/critical_section.h new file mode 100644 index 0000000..35db3fb --- /dev/null +++ b/Include/cpython/critical_section.h @@ -0,0 +1,134 @@ +#ifndef Py_CPYTHON_CRITICAL_SECTION_H +# error "this header file must not be included directly" +#endif + +// Python critical sections +// +// Conceptually, critical sections are a deadlock avoidance layer on top of +// per-object locks. These helpers, in combination with those locks, replace +// our usage of the global interpreter lock to provide thread-safety for +// otherwise thread-unsafe objects, such as dict. +// +// NOTE: These APIs are no-ops in non-free-threaded builds. +// +// Straightforward per-object locking could introduce deadlocks that were not +// present when running with the GIL. Threads may hold locks for multiple +// objects simultaneously because Python operations can nest. If threads were +// to acquire the same locks in different orders, they would deadlock. +// +// One way to avoid deadlocks is to allow threads to hold only the lock (or +// locks) for a single operation at a time (typically a single lock, but some +// operations involve two locks). When a thread begins a nested operation it +// could suspend the locks for any outer operation: before beginning the nested +// operation, the locks for the outer operation are released and when the +// nested operation completes, the locks for the outer operation are +// reacquired. +// +// To improve performance, this API uses a variation of the above scheme. +// Instead of immediately suspending locks any time a nested operation begins, +// locks are only suspended if the thread would block. This reduces the number +// of lock acquisitions and releases for nested operations, while still +// avoiding deadlocks. +// +// Additionally, the locks for any active operation are suspended around +// other potentially blocking operations, such as I/O. This is because the +// interaction between locks and blocking operations can lead to deadlocks in +// the same way as the interaction between multiple locks. +// +// Each thread's critical sections and their corresponding locks are tracked in +// a stack in `PyThreadState.critical_section`. When a thread calls +// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when +// waiting to acquire a lock, the thread suspends all of its active critical +// sections, temporarily releasing the associated locks. When the thread calls +// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) +// critical section by reacquiring the associated lock or locks. See +// `_PyCriticalSection_Resume()`. +// +// NOTE: Only the top-most critical section is guaranteed to be active. +// Operations that need to lock two objects at once must use +// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections +// to lock more than one object at once, because the inner critical section +// may suspend the outer critical sections. This API does not provide a way +// to lock more than two objects at once (though it could be added later +// if actually needed). +// +// NOTE: Critical sections implicitly behave like reentrant locks because +// attempting to acquire the same lock will suspend any outer (earlier) +// critical sections. However, they are less efficient for this use case than +// purposefully designed reentrant locks. +// +// Example usage: +// Py_BEGIN_CRITICAL_SECTION(op); +// ... +// Py_END_CRITICAL_SECTION(); +// +// To lock two objects at once: +// Py_BEGIN_CRITICAL_SECTION2(op1, op2); +// ... +// Py_END_CRITICAL_SECTION2(); + +typedef struct PyCriticalSection PyCriticalSection; +typedef struct PyCriticalSection2 PyCriticalSection2; + +PyAPI_FUNC(void) +PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op); + +PyAPI_FUNC(void) +PyCriticalSection_End(PyCriticalSection *c); + +PyAPI_FUNC(void) +PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b); + +PyAPI_FUNC(void) +PyCriticalSection2_End(PyCriticalSection2 *c); + +#ifndef Py_GIL_DISABLED +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { +# define Py_END_CRITICAL_SECTION() \ + } +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { +# define Py_END_CRITICAL_SECTION2() \ + } +#else /* !Py_GIL_DISABLED */ + +// NOTE: the contents of this struct are private and may change betweeen +// Python releases without a deprecation period. +struct PyCriticalSection { + // Tagged pointer to an outer active critical section (or 0). + uintptr_t _cs_prev; + + // Mutex used to protect critical section + PyMutex *_cs_mutex; +}; + +// A critical section protected by two mutexes. Use +// Py_BEGIN_CRITICAL_SECTION2 and Py_END_CRITICAL_SECTION2. +// NOTE: the contents of this struct are private and may change betweeen +// Python releases without a deprecation period. +struct PyCriticalSection2 { + PyCriticalSection _cs_base; + + PyMutex *_cs_mutex2; +}; + +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + PyCriticalSection _py_cs; \ + PyCriticalSection_Begin(&_py_cs, _PyObject_CAST(op)) + +# define Py_END_CRITICAL_SECTION() \ + PyCriticalSection_End(&_py_cs); \ + } + +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyCriticalSection2_Begin(&_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b)) + +# define Py_END_CRITICAL_SECTION2() \ + PyCriticalSection2_End(&_py_cs2); \ + } + +#endif diff --git a/Include/critical_section.h b/Include/critical_section.h new file mode 100644 index 0000000..3b37615 --- /dev/null +++ b/Include/critical_section.h @@ -0,0 +1,16 @@ +#ifndef Py_CRITICAL_SECTION_H +#define Py_CRITICAL_SECTION_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_CRITICAL_SECTION_H +# include "cpython/critical_section.h" +# undef Py_CPYTHON_CRITICAL_SECTION_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CRITICAL_SECTION_H */ diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 3e15c3a..78cd0d5 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -13,75 +13,9 @@ extern "C" { #endif -// Implementation of Python critical sections -// -// Conceptually, critical sections are a deadlock avoidance layer on top of -// per-object locks. These helpers, in combination with those locks, replace -// our usage of the global interpreter lock to provide thread-safety for -// otherwise thread-unsafe objects, such as dict. -// -// NOTE: These APIs are no-ops in non-free-threaded builds. -// -// Straightforward per-object locking could introduce deadlocks that were not -// present when running with the GIL. Threads may hold locks for multiple -// objects simultaneously because Python operations can nest. If threads were -// to acquire the same locks in different orders, they would deadlock. -// -// One way to avoid deadlocks is to allow threads to hold only the lock (or -// locks) for a single operation at a time (typically a single lock, but some -// operations involve two locks). When a thread begins a nested operation it -// could suspend the locks for any outer operation: before beginning the nested -// operation, the locks for the outer operation are released and when the -// nested operation completes, the locks for the outer operation are -// reacquired. -// -// To improve performance, this API uses a variation of the above scheme. -// Instead of immediately suspending locks any time a nested operation begins, -// locks are only suspended if the thread would block. This reduces the number -// of lock acquisitions and releases for nested operations, while still -// avoiding deadlocks. -// -// Additionally, the locks for any active operation are suspended around -// other potentially blocking operations, such as I/O. This is because the -// interaction between locks and blocking operations can lead to deadlocks in -// the same way as the interaction between multiple locks. -// -// Each thread's critical sections and their corresponding locks are tracked in -// a stack in `PyThreadState.critical_section`. When a thread calls -// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when -// waiting to acquire a lock, the thread suspends all of its active critical -// sections, temporarily releasing the associated locks. When the thread calls -// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) -// critical section by reacquiring the associated lock or locks. See -// `_PyCriticalSection_Resume()`. -// -// NOTE: Only the top-most critical section is guaranteed to be active. -// Operations that need to lock two objects at once must use -// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections -// to lock more than one object at once, because the inner critical section -// may suspend the outer critical sections. This API does not provide a way -// to lock more than two objects at once (though it could be added later -// if actually needed). -// -// NOTE: Critical sections implicitly behave like reentrant locks because -// attempting to acquire the same lock will suspend any outer (earlier) -// critical sections. However, they are less efficient for this use case than -// purposefully designed reentrant locks. -// -// Example usage: -// Py_BEGIN_CRITICAL_SECTION(op); -// ... -// Py_END_CRITICAL_SECTION(); -// -// To lock two objects at once: -// Py_BEGIN_CRITICAL_SECTION2(op1, op2); -// ... -// Py_END_CRITICAL_SECTION2(); - - // Tagged pointers to critical sections use the two least significant bits to // mark if the pointed-to critical section is inactive and whether it is a -// _PyCriticalSection2 object. +// PyCriticalSection2 object. #define _Py_CRITICAL_SECTION_INACTIVE 0x1 #define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2 #define _Py_CRITICAL_SECTION_MASK 0x3 @@ -89,24 +23,13 @@ extern "C" { #ifdef Py_GIL_DISABLED # define Py_BEGIN_CRITICAL_SECTION_MUT(mutex) \ { \ - _PyCriticalSection _cs; \ - _PyCriticalSection_Begin(&_cs, mutex) - -# define Py_BEGIN_CRITICAL_SECTION(op) \ - Py_BEGIN_CRITICAL_SECTION_MUT(&_PyObject_CAST(op)->ob_mutex) + PyCriticalSection _py_cs; \ + _PyCriticalSection_BeginMutex(&_py_cs, mutex) -# define Py_END_CRITICAL_SECTION() \ - _PyCriticalSection_End(&_cs); \ - } - -# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ +# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) \ { \ - _PyCriticalSection2 _cs2; \ - _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex) - -# define Py_END_CRITICAL_SECTION2() \ - _PyCriticalSection2_End(&_cs2); \ - } + PyCriticalSection2 _py_cs2; \ + _PyCriticalSection2_BeginMutex(&_py_cs2, m1, m2) // Specialized version of critical section locking to safely use // PySequence_Fast APIs without the GIL. For performance, the argument *to* @@ -117,21 +40,21 @@ extern "C" { { \ PyObject *_orig_seq = _PyObject_CAST(original); \ const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \ - _PyCriticalSection _cs; \ + PyCriticalSection _cs; \ if (_should_lock_cs) { \ - _PyCriticalSection_Begin(&_cs, &_orig_seq->ob_mutex); \ + _PyCriticalSection_Begin(&_cs, _orig_seq); \ } # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \ if (_should_lock_cs) { \ - _PyCriticalSection_End(&_cs); \ + PyCriticalSection_End(&_cs); \ } \ } // Asserts that the mutex is locked. The mutex must be held by the // top-most critical section otherwise there's the possibility // that the mutex would be swalled out in some code paths. -#define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) \ +#define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) \ _PyCriticalSection_AssertHeld(mutex) // Asserts that the mutex for the given object is locked. The mutex must @@ -139,73 +62,57 @@ extern "C" { // possibility that the mutex would be swalled out in some code paths. #ifdef Py_DEBUG -#define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) \ +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) \ if (Py_REFCNT(op) != 1) { \ _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyObject_CAST(op)->ob_mutex); \ } #else /* Py_DEBUG */ -#define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) #endif /* Py_DEBUG */ #else /* !Py_GIL_DISABLED */ // The critical section APIs are no-ops with the GIL. -# define Py_BEGIN_CRITICAL_SECTION_MUT(mut) -# define Py_BEGIN_CRITICAL_SECTION(op) -# define Py_END_CRITICAL_SECTION() -# define Py_BEGIN_CRITICAL_SECTION2(a, b) -# define Py_END_CRITICAL_SECTION2() -# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) -# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() +# define Py_BEGIN_CRITICAL_SECTION_MUT(mut) { +# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) { +# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) { +# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() } # define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) # define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) #endif /* !Py_GIL_DISABLED */ -typedef struct { - // Tagged pointer to an outer active critical section (or 0). - // The two least-significant-bits indicate whether the pointed-to critical - // section is inactive and whether it is a _PyCriticalSection2 object. - uintptr_t prev; - - // Mutex used to protect critical section - PyMutex *mutex; -} _PyCriticalSection; - -// A critical section protected by two mutexes. Use -// _PyCriticalSection2_Begin and _PyCriticalSection2_End. -typedef struct { - _PyCriticalSection base; - - PyMutex *mutex2; -} _PyCriticalSection2; - -static inline int -_PyCriticalSection_IsActive(uintptr_t tag) -{ - return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; -} - // Resumes the top-most critical section. PyAPI_FUNC(void) _PyCriticalSection_Resume(PyThreadState *tstate); // (private) slow path for locking the mutex PyAPI_FUNC(void) -_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m); +_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m); PyAPI_FUNC(void) -_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, +_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, int is_m1_locked); +PyAPI_FUNC(void) +_PyCriticalSection_SuspendAll(PyThreadState *tstate); + +#ifdef Py_GIL_DISABLED + +static inline int +_PyCriticalSection_IsActive(uintptr_t tag) +{ + return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; +} + static inline void -_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) +_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) { if (PyMutex_LockFast(&m->_bits)) { PyThreadState *tstate = _PyThreadState_GET(); - c->mutex = m; - c->prev = tstate->critical_section; + c->_cs_mutex = m; + c->_cs_prev = tstate->critical_section; tstate->critical_section = (uintptr_t)c; } else { @@ -213,14 +120,21 @@ _PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) } } +static inline void +_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) +{ + _PyCriticalSection_BeginMutex(c, &op->ob_mutex); +} +#define PyCriticalSection_Begin _PyCriticalSection_Begin + // Removes the top-most critical section from the thread's stack of critical // sections. If the new top-most critical section is inactive, then it is // resumed. static inline void -_PyCriticalSection_Pop(_PyCriticalSection *c) +_PyCriticalSection_Pop(PyCriticalSection *c) { PyThreadState *tstate = _PyThreadState_GET(); - uintptr_t prev = c->prev; + uintptr_t prev = c->_cs_prev; tstate->critical_section = prev; if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) { @@ -229,20 +143,21 @@ _PyCriticalSection_Pop(_PyCriticalSection *c) } static inline void -_PyCriticalSection_End(_PyCriticalSection *c) +_PyCriticalSection_End(PyCriticalSection *c) { - PyMutex_Unlock(c->mutex); + PyMutex_Unlock(c->_cs_mutex); _PyCriticalSection_Pop(c); } +#define PyCriticalSection_End _PyCriticalSection_End static inline void -_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) { if (m1 == m2) { // If the two mutex arguments are the same, treat this as a critical // section with a single mutex. - c->mutex2 = NULL; - _PyCriticalSection_Begin(&c->base, m1); + c->_cs_mutex2 = NULL; + _PyCriticalSection_BeginMutex(&c->_cs_base, m1); return; } @@ -258,9 +173,9 @@ _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) if (PyMutex_LockFast(&m1->_bits)) { if (PyMutex_LockFast(&m2->_bits)) { PyThreadState *tstate = _PyThreadState_GET(); - c->base.mutex = m1; - c->mutex2 = m2; - c->base.prev = tstate->critical_section; + c->_cs_base._cs_mutex = m1; + c->_cs_mutex2 = m2; + c->_cs_base._cs_prev = tstate->critical_section; uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; tstate->critical_section = p; @@ -275,19 +190,22 @@ _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) } static inline void -_PyCriticalSection2_End(_PyCriticalSection2 *c) +_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) { - if (c->mutex2) { - PyMutex_Unlock(c->mutex2); - } - PyMutex_Unlock(c->base.mutex); - _PyCriticalSection_Pop(&c->base); + _PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex); } +#define PyCriticalSection2_Begin _PyCriticalSection2_Begin -PyAPI_FUNC(void) -_PyCriticalSection_SuspendAll(PyThreadState *tstate); - -#ifdef Py_GIL_DISABLED +static inline void +_PyCriticalSection2_End(PyCriticalSection2 *c) +{ + if (c->_cs_mutex2) { + PyMutex_Unlock(c->_cs_mutex2); + } + PyMutex_Unlock(c->_cs_base._cs_mutex); + _PyCriticalSection_Pop(&c->_cs_base); +} +#define PyCriticalSection2_End _PyCriticalSection2_End static inline void _PyCriticalSection_AssertHeld(PyMutex *mutex) @@ -296,18 +214,18 @@ _PyCriticalSection_AssertHeld(PyMutex *mutex) PyThreadState *tstate = _PyThreadState_GET(); uintptr_t prev = tstate->critical_section; if (prev & _Py_CRITICAL_SECTION_TWO_MUTEXES) { - _PyCriticalSection2 *cs = (_PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK); - assert(cs != NULL && (cs->base.mutex == mutex || cs->mutex2 == mutex)); + PyCriticalSection2 *cs = (PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && (cs->_cs_base._cs_mutex == mutex || cs->_cs_mutex2 == mutex)); } else { - _PyCriticalSection *cs = (_PyCriticalSection *)(tstate->critical_section & ~_Py_CRITICAL_SECTION_MASK); - assert(cs != NULL && cs->mutex == mutex); + PyCriticalSection *cs = (PyCriticalSection *)(tstate->critical_section & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && cs->_cs_mutex == mutex); } #endif } -#endif +#endif /* Py_GIL_DISABLED */ #ifdef __cplusplus } diff --git a/Makefile.pre.in b/Makefile.pre.in index c587b0d..6a7b21f 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1004,6 +1004,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/codecs.h \ $(srcdir)/Include/compile.h \ $(srcdir)/Include/complexobject.h \ + $(srcdir)/Include/critical_section.h \ $(srcdir)/Include/descrobject.h \ $(srcdir)/Include/dictobject.h \ $(srcdir)/Include/dynamic_annotations.h \ @@ -1081,6 +1082,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/cpython/compile.h \ $(srcdir)/Include/cpython/complexobject.h \ $(srcdir)/Include/cpython/context.h \ + $(srcdir)/Include/cpython/critical_section.h \ $(srcdir)/Include/cpython/descrobject.h \ $(srcdir)/Include/cpython/dictobject.h \ $(srcdir)/Include/cpython/fileobject.h \ diff --git a/Misc/NEWS.d/next/C API/2024-05-21-19-41-41.gh-issue-119344.QKvzQb.rst b/Misc/NEWS.d/next/C API/2024-05-21-19-41-41.gh-issue-119344.QKvzQb.rst new file mode 100644 index 0000000..5a2e4d9 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-05-21-19-41-41.gh-issue-119344.QKvzQb.rst @@ -0,0 +1 @@ +The critical section API is now public as part of the non-limited C API. diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 0c656b4..0a888af 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -2371,7 +2371,7 @@ _sre_SRE_Match_groupdict_impl(MatchObject *self, PyObject *default_value) goto exit; } } -exit: +exit:; Py_END_CRITICAL_SECTION(); return result; diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 8b40821..1fa7c37 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -3320,6 +3320,18 @@ function_set_warning(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) Py_RETURN_NONE; } +static PyObject * +test_critical_sections(PyObject *module, PyObject *Py_UNUSED(args)) +{ + Py_BEGIN_CRITICAL_SECTION(module); + Py_END_CRITICAL_SECTION(); + + Py_BEGIN_CRITICAL_SECTION2(module, module); + Py_END_CRITICAL_SECTION2(); + + Py_RETURN_NONE; +} + static PyMethodDef TestMethods[] = { {"set_errno", set_errno, METH_VARARGS}, {"test_config", test_config, METH_NOARGS}, @@ -3463,6 +3475,7 @@ static PyMethodDef TestMethods[] = { {"check_pyimport_addmodule", check_pyimport_addmodule, METH_VARARGS}, {"test_weakref_capi", test_weakref_capi, METH_NOARGS}, {"function_set_warning", function_set_warning, METH_NOARGS}, + {"test_critical_sections", test_critical_sections, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 6e1c3b9..c7ea6bf 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3109,7 +3109,7 @@ _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value) goto dict_iter_exit; } } -dict_iter_exit: +dict_iter_exit:; Py_END_CRITICAL_SECTION(); } else { while ((key = PyIter_Next(it)) != NULL) { diff --git a/Objects/listobject.c b/Objects/listobject.c index a05ddea..dc9df3c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -452,7 +452,7 @@ PyList_SetItem(PyObject *op, Py_ssize_t i, p = self->ob_item + i; Py_XSETREF(*p, newitem); ret = 0; -end: +end:; Py_END_CRITICAL_SECTION(); return ret; } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 7c11d87..79085b6 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -62,24 +62,13 @@ class object "PyObject *" "&PyBaseObject_Type" // be released and reacquired during a subclass update if there's contention // on the subclass lock. #define TYPE_LOCK &PyInterpreterState_Get()->types.mutex -#define BEGIN_TYPE_LOCK() \ - { \ - _PyCriticalSection _cs; \ - _PyCriticalSection_Begin(&_cs, TYPE_LOCK); \ +#define BEGIN_TYPE_LOCK() Py_BEGIN_CRITICAL_SECTION_MUT(TYPE_LOCK) +#define END_TYPE_LOCK() Py_END_CRITICAL_SECTION() -#define END_TYPE_LOCK() \ - _PyCriticalSection_End(&_cs); \ - } - -#define BEGIN_TYPE_DICT_LOCK(d) \ - { \ - _PyCriticalSection2 _cs; \ - _PyCriticalSection2_Begin(&_cs, TYPE_LOCK, \ - &_PyObject_CAST(d)->ob_mutex); \ +#define BEGIN_TYPE_DICT_LOCK(d) \ + Py_BEGIN_CRITICAL_SECTION2_MUT(TYPE_LOCK, &_PyObject_CAST(d)->ob_mutex) -#define END_TYPE_DICT_LOCK() \ - _PyCriticalSection2_End(&_cs); \ - } +#define END_TYPE_DICT_LOCK() Py_END_CRITICAL_SECTION2() #define ASSERT_TYPE_LOCK_HELD() \ _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(TYPE_LOCK) @@ -442,7 +431,7 @@ _PyType_GetBases(PyTypeObject *self) BEGIN_TYPE_LOCK(); res = lookup_tp_bases(self); Py_INCREF(res); - END_TYPE_LOCK() + END_TYPE_LOCK(); return res; } @@ -513,7 +502,7 @@ _PyType_GetMRO(PyTypeObject *self) BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(self); Py_XINCREF(mro); - END_TYPE_LOCK() + END_TYPE_LOCK(); return mro; #else return Py_XNewRef(lookup_tp_mro(self)); @@ -950,10 +939,10 @@ PyType_Watch(int watcher_id, PyObject* obj) return -1; } // ensure we will get a callback on the next modification - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); assign_version_tag(interp, type); type->tp_watched |= (1 << watcher_id); - END_TYPE_LOCK() + END_TYPE_LOCK(); return 0; } @@ -1063,9 +1052,9 @@ PyType_Modified(PyTypeObject *type) return; } - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); type_modified_unlocked(type); - END_TYPE_LOCK() + END_TYPE_LOCK(); } static int @@ -1185,9 +1174,9 @@ int PyUnstable_Type_AssignVersionTag(PyTypeObject *type) { PyInterpreterState *interp = _PyInterpreterState_GET(); int assigned; - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); assigned = assign_version_tag(interp, type); - END_TYPE_LOCK() + END_TYPE_LOCK(); return assigned; } @@ -1470,7 +1459,7 @@ type_get_mro(PyTypeObject *type, void *context) { PyObject *mro; - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); mro = lookup_tp_mro(type); if (mro == NULL) { mro = Py_None; @@ -1478,7 +1467,7 @@ type_get_mro(PyTypeObject *type, void *context) Py_INCREF(mro); } - END_TYPE_LOCK() + END_TYPE_LOCK(); return mro; } @@ -2924,9 +2913,9 @@ static PyObject * mro_implementation(PyTypeObject *type) { PyObject *mro; - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); mro = mro_implementation_unlocked(type); - END_TYPE_LOCK() + END_TYPE_LOCK(); return mro; } @@ -3113,9 +3102,9 @@ static int mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); res = mro_internal_unlocked(type, 0, p_old_mro); - END_TYPE_LOCK() + END_TYPE_LOCK(); return res; } @@ -4980,7 +4969,7 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def) } PyObject *res = NULL; - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); PyObject *mro = lookup_tp_mro(type); // The type must be ready @@ -5007,7 +4996,7 @@ get_module_by_def(PyTypeObject *type, PyModuleDef *def) break; } } - END_TYPE_LOCK() + END_TYPE_LOCK(); return res; } @@ -5265,13 +5254,13 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) int has_version = 0; int version = 0; - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); res = find_name_in_mro(type, name, &error); if (MCACHE_CACHEABLE_NAME(name)) { has_version = assign_version_tag(interp, type); version = type->tp_version_tag; } - END_TYPE_LOCK() + END_TYPE_LOCK(); /* Only put NULL results into cache if there was no error. */ if (error) { @@ -8262,14 +8251,14 @@ PyType_Ready(PyTypeObject *type) } int res; - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); if (!(type->tp_flags & Py_TPFLAGS_READY)) { res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); } - END_TYPE_LOCK() + END_TYPE_LOCK(); return res; } @@ -8303,7 +8292,7 @@ init_static_type(PyInterpreterState *interp, PyTypeObject *self, int res; BEGIN_TYPE_LOCK(); res = type_ready(self, initial); - END_TYPE_LOCK() + END_TYPE_LOCK(); if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); managed_static_type_state_clear(interp, self, isbuiltin, initial); @@ -8775,7 +8764,7 @@ hackcheck(PyObject *self, setattrofunc func, const char *what) int res; BEGIN_TYPE_LOCK(); res = hackcheck_unlocked(self, func, what); - END_TYPE_LOCK() + END_TYPE_LOCK(); return res; } @@ -10704,14 +10693,14 @@ fixup_slot_dispatchers(PyTypeObject *type) // This lock isn't strictly necessary because the type has not been // exposed to anyone else yet, but update_ont_slot calls find_name_in_mro // where we'd like to assert that the type is locked. - BEGIN_TYPE_LOCK() + BEGIN_TYPE_LOCK(); assert(!PyErr_Occurred()); for (pytype_slotdef *p = slotdefs; p->name; ) { p = update_one_slot(type, p); } - END_TYPE_LOCK() + END_TYPE_LOCK(); } static void @@ -11000,7 +10989,7 @@ _super_lookup_descr(PyTypeObject *su_type, PyTypeObject *su_obj_type, PyObject * another thread can modify it after we end the critical section below */ Py_XINCREF(mro); - END_TYPE_LOCK() + END_TYPE_LOCK(); if (mro == NULL) return NULL; diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index a415d45..6edcef7 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -135,6 +135,7 @@ + @@ -145,6 +146,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 2048d9a..824648e 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -72,6 +72,9 @@ Include + + Include + Include @@ -375,6 +378,9 @@ Include\cpython + + Include\cpython + Include\cpython diff --git a/Python/critical_section.c b/Python/critical_section.c index ac679ac..62ed255 100644 --- a/Python/critical_section.c +++ b/Python/critical_section.c @@ -3,85 +3,96 @@ #include "pycore_lock.h" #include "pycore_critical_section.h" -static_assert(_Alignof(_PyCriticalSection) >= 4, +#ifdef Py_GIL_DISABLED +static_assert(_Alignof(PyCriticalSection) >= 4, "critical section must be aligned to at least 4 bytes"); +#endif void -_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m) +_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m) { +#ifdef Py_GIL_DISABLED PyThreadState *tstate = _PyThreadState_GET(); - c->mutex = NULL; - c->prev = (uintptr_t)tstate->critical_section; + c->_cs_mutex = NULL; + c->_cs_prev = (uintptr_t)tstate->critical_section; tstate->critical_section = (uintptr_t)c; PyMutex_Lock(m); - c->mutex = m; + c->_cs_mutex = m; +#endif } void -_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, +_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, int is_m1_locked) { +#ifdef Py_GIL_DISABLED PyThreadState *tstate = _PyThreadState_GET(); - c->base.mutex = NULL; - c->mutex2 = NULL; - c->base.prev = tstate->critical_section; + c->_cs_base._cs_mutex = NULL; + c->_cs_mutex2 = NULL; + c->_cs_base._cs_prev = tstate->critical_section; tstate->critical_section = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; if (!is_m1_locked) { PyMutex_Lock(m1); } PyMutex_Lock(m2); - c->base.mutex = m1; - c->mutex2 = m2; + c->_cs_base._cs_mutex = m1; + c->_cs_mutex2 = m2; +#endif } -static _PyCriticalSection * +#ifdef Py_GIL_DISABLED +static PyCriticalSection * untag_critical_section(uintptr_t tag) { - return (_PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK); + return (PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK); } +#endif // Release all locks held by critical sections. This is called by // _PyThreadState_Detach. void _PyCriticalSection_SuspendAll(PyThreadState *tstate) { +#ifdef Py_GIL_DISABLED uintptr_t *tagptr = &tstate->critical_section; while (_PyCriticalSection_IsActive(*tagptr)) { - _PyCriticalSection *c = untag_critical_section(*tagptr); + PyCriticalSection *c = untag_critical_section(*tagptr); - if (c->mutex) { - PyMutex_Unlock(c->mutex); + if (c->_cs_mutex) { + PyMutex_Unlock(c->_cs_mutex); if ((*tagptr & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { - _PyCriticalSection2 *c2 = (_PyCriticalSection2 *)c; - if (c2->mutex2) { - PyMutex_Unlock(c2->mutex2); + PyCriticalSection2 *c2 = (PyCriticalSection2 *)c; + if (c2->_cs_mutex2) { + PyMutex_Unlock(c2->_cs_mutex2); } } } *tagptr |= _Py_CRITICAL_SECTION_INACTIVE; - tagptr = &c->prev; + tagptr = &c->_cs_prev; } +#endif } void _PyCriticalSection_Resume(PyThreadState *tstate) { +#ifdef Py_GIL_DISABLED uintptr_t p = tstate->critical_section; - _PyCriticalSection *c = untag_critical_section(p); + PyCriticalSection *c = untag_critical_section(p); assert(!_PyCriticalSection_IsActive(p)); - PyMutex *m1 = c->mutex; - c->mutex = NULL; + PyMutex *m1 = c->_cs_mutex; + c->_cs_mutex = NULL; PyMutex *m2 = NULL; - _PyCriticalSection2 *c2 = NULL; + PyCriticalSection2 *c2 = NULL; if ((p & _Py_CRITICAL_SECTION_TWO_MUTEXES)) { - c2 = (_PyCriticalSection2 *)c; - m2 = c2->mutex2; - c2->mutex2 = NULL; + c2 = (PyCriticalSection2 *)c; + m2 = c2->_cs_mutex2; + c2->_cs_mutex2 = NULL; } if (m1) { @@ -91,10 +102,47 @@ _PyCriticalSection_Resume(PyThreadState *tstate) PyMutex_Lock(m2); } - c->mutex = m1; + c->_cs_mutex = m1; if (m2) { - c2->mutex2 = m2; + c2->_cs_mutex2 = m2; } tstate->critical_section &= ~_Py_CRITICAL_SECTION_INACTIVE; +#endif +} + +#undef PyCriticalSection_Begin +void +PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) +{ +#ifdef Py_GIL_DISABLED + _PyCriticalSection_Begin(c, op); +#endif +} + +#undef PyCriticalSection_End +void +PyCriticalSection_End(PyCriticalSection *c) +{ +#ifdef Py_GIL_DISABLED + _PyCriticalSection_End(c); +#endif +} + +#undef PyCriticalSection2_Begin +void +PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) +{ +#ifdef Py_GIL_DISABLED + _PyCriticalSection2_Begin(c, a, b); +#endif +} + +#undef PyCriticalSection2_End +void +PyCriticalSection2_End(PyCriticalSection2 *c) +{ +#ifdef Py_GIL_DISABLED + _PyCriticalSection2_End(c); +#endif } -- cgit v0.12