diff options
author | Sam Gross <colesbury@gmail.com> | 2024-06-21 20:20:41 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-21 20:20:41 (GMT) |
commit | 4dc27bc0b76aa5985ccef2901f7a4f5d36b97995 (patch) | |
tree | 3219748fd1c47dec959096aed7457cf1fad5d43a /Include | |
parent | e748805f2a0f783723ef02809f86cd3d8fb8bf2e (diff) | |
download | cpython-4dc27bc0b76aa5985ccef2901f7a4f5d36b97995.zip cpython-4dc27bc0b76aa5985ccef2901f7a4f5d36b97995.tar.gz cpython-4dc27bc0b76aa5985ccef2901f7a4f5d36b97995.tar.bz2 |
[3.13] gh-119344: Make critical section API public (GH-119353) (#120856)
This makes the following macros public as part of the non-limited C-API for
locking a single object or two objects at once.
* `Py_BEGIN_CRITICAL_SECTION(op)` / `Py_END_CRITICAL_SECTION()`
* `Py_BEGIN_CRITICAL_SECTION2(a, b)` / `Py_END_CRITICAL_SECTION2()`
The supporting functions and structs used by the macros are also exposed for
cases where C macros are not available.
(cherry picked from commit 8f17d69b7bc906e8407095317842cc0fd52cd84a)
Diffstat (limited to 'Include')
-rw-r--r-- | Include/Python.h | 1 | ||||
-rw-r--r-- | Include/cpython/critical_section.h | 134 | ||||
-rw-r--r-- | Include/critical_section.h | 16 | ||||
-rw-r--r-- | Include/internal/pycore_critical_section.h | 218 |
4 files changed, 219 insertions, 150 deletions
diff --git a/Include/Python.h b/Include/Python.h index ba2724c..95bc631 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -124,6 +124,7 @@ #include "import.h" #include "abstract.h" #include "bltinmodule.h" +#include "critical_section.h" #include "cpython/pyctype.h" #include "pystrtod.h" #include "pystrcmp.h" diff --git a/Include/cpython/critical_section.h b/Include/cpython/critical_section.h new file mode 100644 index 0000000..35db3fb --- /dev/null +++ b/Include/cpython/critical_section.h @@ -0,0 +1,134 @@ +#ifndef Py_CPYTHON_CRITICAL_SECTION_H +# error "this header file must not be included directly" +#endif + +// Python critical sections +// +// Conceptually, critical sections are a deadlock avoidance layer on top of +// per-object locks. These helpers, in combination with those locks, replace +// our usage of the global interpreter lock to provide thread-safety for +// otherwise thread-unsafe objects, such as dict. +// +// NOTE: These APIs are no-ops in non-free-threaded builds. +// +// Straightforward per-object locking could introduce deadlocks that were not +// present when running with the GIL. Threads may hold locks for multiple +// objects simultaneously because Python operations can nest. If threads were +// to acquire the same locks in different orders, they would deadlock. +// +// One way to avoid deadlocks is to allow threads to hold only the lock (or +// locks) for a single operation at a time (typically a single lock, but some +// operations involve two locks). When a thread begins a nested operation it +// could suspend the locks for any outer operation: before beginning the nested +// operation, the locks for the outer operation are released and when the +// nested operation completes, the locks for the outer operation are +// reacquired. +// +// To improve performance, this API uses a variation of the above scheme. +// Instead of immediately suspending locks any time a nested operation begins, +// locks are only suspended if the thread would block. This reduces the number +// of lock acquisitions and releases for nested operations, while still +// avoiding deadlocks. +// +// Additionally, the locks for any active operation are suspended around +// other potentially blocking operations, such as I/O. This is because the +// interaction between locks and blocking operations can lead to deadlocks in +// the same way as the interaction between multiple locks. +// +// Each thread's critical sections and their corresponding locks are tracked in +// a stack in `PyThreadState.critical_section`. When a thread calls +// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when +// waiting to acquire a lock, the thread suspends all of its active critical +// sections, temporarily releasing the associated locks. When the thread calls +// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) +// critical section by reacquiring the associated lock or locks. See +// `_PyCriticalSection_Resume()`. +// +// NOTE: Only the top-most critical section is guaranteed to be active. +// Operations that need to lock two objects at once must use +// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections +// to lock more than one object at once, because the inner critical section +// may suspend the outer critical sections. This API does not provide a way +// to lock more than two objects at once (though it could be added later +// if actually needed). +// +// NOTE: Critical sections implicitly behave like reentrant locks because +// attempting to acquire the same lock will suspend any outer (earlier) +// critical sections. However, they are less efficient for this use case than +// purposefully designed reentrant locks. +// +// Example usage: +// Py_BEGIN_CRITICAL_SECTION(op); +// ... +// Py_END_CRITICAL_SECTION(); +// +// To lock two objects at once: +// Py_BEGIN_CRITICAL_SECTION2(op1, op2); +// ... +// Py_END_CRITICAL_SECTION2(); + +typedef struct PyCriticalSection PyCriticalSection; +typedef struct PyCriticalSection2 PyCriticalSection2; + +PyAPI_FUNC(void) +PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op); + +PyAPI_FUNC(void) +PyCriticalSection_End(PyCriticalSection *c); + +PyAPI_FUNC(void) +PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b); + +PyAPI_FUNC(void) +PyCriticalSection2_End(PyCriticalSection2 *c); + +#ifndef Py_GIL_DISABLED +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { +# define Py_END_CRITICAL_SECTION() \ + } +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { +# define Py_END_CRITICAL_SECTION2() \ + } +#else /* !Py_GIL_DISABLED */ + +// NOTE: the contents of this struct are private and may change betweeen +// Python releases without a deprecation period. +struct PyCriticalSection { + // Tagged pointer to an outer active critical section (or 0). + uintptr_t _cs_prev; + + // Mutex used to protect critical section + PyMutex *_cs_mutex; +}; + +// A critical section protected by two mutexes. Use +// Py_BEGIN_CRITICAL_SECTION2 and Py_END_CRITICAL_SECTION2. +// NOTE: the contents of this struct are private and may change betweeen +// Python releases without a deprecation period. +struct PyCriticalSection2 { + PyCriticalSection _cs_base; + + PyMutex *_cs_mutex2; +}; + +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + PyCriticalSection _py_cs; \ + PyCriticalSection_Begin(&_py_cs, _PyObject_CAST(op)) + +# define Py_END_CRITICAL_SECTION() \ + PyCriticalSection_End(&_py_cs); \ + } + +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyCriticalSection2_Begin(&_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b)) + +# define Py_END_CRITICAL_SECTION2() \ + PyCriticalSection2_End(&_py_cs2); \ + } + +#endif diff --git a/Include/critical_section.h b/Include/critical_section.h new file mode 100644 index 0000000..3b37615 --- /dev/null +++ b/Include/critical_section.h @@ -0,0 +1,16 @@ +#ifndef Py_CRITICAL_SECTION_H +#define Py_CRITICAL_SECTION_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_CRITICAL_SECTION_H +# include "cpython/critical_section.h" +# undef Py_CPYTHON_CRITICAL_SECTION_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CRITICAL_SECTION_H */ diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h index 3e15c3a..78cd0d5 100644 --- a/Include/internal/pycore_critical_section.h +++ b/Include/internal/pycore_critical_section.h @@ -13,75 +13,9 @@ extern "C" { #endif -// Implementation of Python critical sections -// -// Conceptually, critical sections are a deadlock avoidance layer on top of -// per-object locks. These helpers, in combination with those locks, replace -// our usage of the global interpreter lock to provide thread-safety for -// otherwise thread-unsafe objects, such as dict. -// -// NOTE: These APIs are no-ops in non-free-threaded builds. -// -// Straightforward per-object locking could introduce deadlocks that were not -// present when running with the GIL. Threads may hold locks for multiple -// objects simultaneously because Python operations can nest. If threads were -// to acquire the same locks in different orders, they would deadlock. -// -// One way to avoid deadlocks is to allow threads to hold only the lock (or -// locks) for a single operation at a time (typically a single lock, but some -// operations involve two locks). When a thread begins a nested operation it -// could suspend the locks for any outer operation: before beginning the nested -// operation, the locks for the outer operation are released and when the -// nested operation completes, the locks for the outer operation are -// reacquired. -// -// To improve performance, this API uses a variation of the above scheme. -// Instead of immediately suspending locks any time a nested operation begins, -// locks are only suspended if the thread would block. This reduces the number -// of lock acquisitions and releases for nested operations, while still -// avoiding deadlocks. -// -// Additionally, the locks for any active operation are suspended around -// other potentially blocking operations, such as I/O. This is because the -// interaction between locks and blocking operations can lead to deadlocks in -// the same way as the interaction between multiple locks. -// -// Each thread's critical sections and their corresponding locks are tracked in -// a stack in `PyThreadState.critical_section`. When a thread calls -// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when -// waiting to acquire a lock, the thread suspends all of its active critical -// sections, temporarily releasing the associated locks. When the thread calls -// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) -// critical section by reacquiring the associated lock or locks. See -// `_PyCriticalSection_Resume()`. -// -// NOTE: Only the top-most critical section is guaranteed to be active. -// Operations that need to lock two objects at once must use -// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections -// to lock more than one object at once, because the inner critical section -// may suspend the outer critical sections. This API does not provide a way -// to lock more than two objects at once (though it could be added later -// if actually needed). -// -// NOTE: Critical sections implicitly behave like reentrant locks because -// attempting to acquire the same lock will suspend any outer (earlier) -// critical sections. However, they are less efficient for this use case than -// purposefully designed reentrant locks. -// -// Example usage: -// Py_BEGIN_CRITICAL_SECTION(op); -// ... -// Py_END_CRITICAL_SECTION(); -// -// To lock two objects at once: -// Py_BEGIN_CRITICAL_SECTION2(op1, op2); -// ... -// Py_END_CRITICAL_SECTION2(); - - // Tagged pointers to critical sections use the two least significant bits to // mark if the pointed-to critical section is inactive and whether it is a -// _PyCriticalSection2 object. +// PyCriticalSection2 object. #define _Py_CRITICAL_SECTION_INACTIVE 0x1 #define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2 #define _Py_CRITICAL_SECTION_MASK 0x3 @@ -89,24 +23,13 @@ extern "C" { #ifdef Py_GIL_DISABLED # define Py_BEGIN_CRITICAL_SECTION_MUT(mutex) \ { \ - _PyCriticalSection _cs; \ - _PyCriticalSection_Begin(&_cs, mutex) - -# define Py_BEGIN_CRITICAL_SECTION(op) \ - Py_BEGIN_CRITICAL_SECTION_MUT(&_PyObject_CAST(op)->ob_mutex) + PyCriticalSection _py_cs; \ + _PyCriticalSection_BeginMutex(&_py_cs, mutex) -# define Py_END_CRITICAL_SECTION() \ - _PyCriticalSection_End(&_cs); \ - } - -# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ +# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) \ { \ - _PyCriticalSection2 _cs2; \ - _PyCriticalSection2_Begin(&_cs2, &_PyObject_CAST(a)->ob_mutex, &_PyObject_CAST(b)->ob_mutex) - -# define Py_END_CRITICAL_SECTION2() \ - _PyCriticalSection2_End(&_cs2); \ - } + PyCriticalSection2 _py_cs2; \ + _PyCriticalSection2_BeginMutex(&_py_cs2, m1, m2) // Specialized version of critical section locking to safely use // PySequence_Fast APIs without the GIL. For performance, the argument *to* @@ -117,21 +40,21 @@ extern "C" { { \ PyObject *_orig_seq = _PyObject_CAST(original); \ const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \ - _PyCriticalSection _cs; \ + PyCriticalSection _cs; \ if (_should_lock_cs) { \ - _PyCriticalSection_Begin(&_cs, &_orig_seq->ob_mutex); \ + _PyCriticalSection_Begin(&_cs, _orig_seq); \ } # define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \ if (_should_lock_cs) { \ - _PyCriticalSection_End(&_cs); \ + PyCriticalSection_End(&_cs); \ } \ } // Asserts that the mutex is locked. The mutex must be held by the // top-most critical section otherwise there's the possibility // that the mutex would be swalled out in some code paths. -#define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) \ +#define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) \ _PyCriticalSection_AssertHeld(mutex) // Asserts that the mutex for the given object is locked. The mutex must @@ -139,73 +62,57 @@ extern "C" { // possibility that the mutex would be swalled out in some code paths. #ifdef Py_DEBUG -#define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) \ +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) \ if (Py_REFCNT(op) != 1) { \ _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyObject_CAST(op)->ob_mutex); \ } #else /* Py_DEBUG */ -#define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) #endif /* Py_DEBUG */ #else /* !Py_GIL_DISABLED */ // The critical section APIs are no-ops with the GIL. -# define Py_BEGIN_CRITICAL_SECTION_MUT(mut) -# define Py_BEGIN_CRITICAL_SECTION(op) -# define Py_END_CRITICAL_SECTION() -# define Py_BEGIN_CRITICAL_SECTION2(a, b) -# define Py_END_CRITICAL_SECTION2() -# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) -# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() +# define Py_BEGIN_CRITICAL_SECTION_MUT(mut) { +# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) { +# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) { +# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() } # define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) # define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) #endif /* !Py_GIL_DISABLED */ -typedef struct { - // Tagged pointer to an outer active critical section (or 0). - // The two least-significant-bits indicate whether the pointed-to critical - // section is inactive and whether it is a _PyCriticalSection2 object. - uintptr_t prev; - - // Mutex used to protect critical section - PyMutex *mutex; -} _PyCriticalSection; - -// A critical section protected by two mutexes. Use -// _PyCriticalSection2_Begin and _PyCriticalSection2_End. -typedef struct { - _PyCriticalSection base; - - PyMutex *mutex2; -} _PyCriticalSection2; - -static inline int -_PyCriticalSection_IsActive(uintptr_t tag) -{ - return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; -} - // Resumes the top-most critical section. PyAPI_FUNC(void) _PyCriticalSection_Resume(PyThreadState *tstate); // (private) slow path for locking the mutex PyAPI_FUNC(void) -_PyCriticalSection_BeginSlow(_PyCriticalSection *c, PyMutex *m); +_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m); PyAPI_FUNC(void) -_PyCriticalSection2_BeginSlow(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, +_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, int is_m1_locked); +PyAPI_FUNC(void) +_PyCriticalSection_SuspendAll(PyThreadState *tstate); + +#ifdef Py_GIL_DISABLED + +static inline int +_PyCriticalSection_IsActive(uintptr_t tag) +{ + return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; +} + static inline void -_PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) +_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) { if (PyMutex_LockFast(&m->_bits)) { PyThreadState *tstate = _PyThreadState_GET(); - c->mutex = m; - c->prev = tstate->critical_section; + c->_cs_mutex = m; + c->_cs_prev = tstate->critical_section; tstate->critical_section = (uintptr_t)c; } else { @@ -213,14 +120,21 @@ _PyCriticalSection_Begin(_PyCriticalSection *c, PyMutex *m) } } +static inline void +_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) +{ + _PyCriticalSection_BeginMutex(c, &op->ob_mutex); +} +#define PyCriticalSection_Begin _PyCriticalSection_Begin + // Removes the top-most critical section from the thread's stack of critical // sections. If the new top-most critical section is inactive, then it is // resumed. static inline void -_PyCriticalSection_Pop(_PyCriticalSection *c) +_PyCriticalSection_Pop(PyCriticalSection *c) { PyThreadState *tstate = _PyThreadState_GET(); - uintptr_t prev = c->prev; + uintptr_t prev = c->_cs_prev; tstate->critical_section = prev; if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) { @@ -229,20 +143,21 @@ _PyCriticalSection_Pop(_PyCriticalSection *c) } static inline void -_PyCriticalSection_End(_PyCriticalSection *c) +_PyCriticalSection_End(PyCriticalSection *c) { - PyMutex_Unlock(c->mutex); + PyMutex_Unlock(c->_cs_mutex); _PyCriticalSection_Pop(c); } +#define PyCriticalSection_End _PyCriticalSection_End static inline void -_PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) { if (m1 == m2) { // If the two mutex arguments are the same, treat this as a critical // section with a single mutex. - c->mutex2 = NULL; - _PyCriticalSection_Begin(&c->base, m1); + c->_cs_mutex2 = NULL; + _PyCriticalSection_BeginMutex(&c->_cs_base, m1); return; } @@ -258,9 +173,9 @@ _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) if (PyMutex_LockFast(&m1->_bits)) { if (PyMutex_LockFast(&m2->_bits)) { PyThreadState *tstate = _PyThreadState_GET(); - c->base.mutex = m1; - c->mutex2 = m2; - c->base.prev = tstate->critical_section; + c->_cs_base._cs_mutex = m1; + c->_cs_mutex2 = m2; + c->_cs_base._cs_prev = tstate->critical_section; uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; tstate->critical_section = p; @@ -275,19 +190,22 @@ _PyCriticalSection2_Begin(_PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) } static inline void -_PyCriticalSection2_End(_PyCriticalSection2 *c) +_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) { - if (c->mutex2) { - PyMutex_Unlock(c->mutex2); - } - PyMutex_Unlock(c->base.mutex); - _PyCriticalSection_Pop(&c->base); + _PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex); } +#define PyCriticalSection2_Begin _PyCriticalSection2_Begin -PyAPI_FUNC(void) -_PyCriticalSection_SuspendAll(PyThreadState *tstate); - -#ifdef Py_GIL_DISABLED +static inline void +_PyCriticalSection2_End(PyCriticalSection2 *c) +{ + if (c->_cs_mutex2) { + PyMutex_Unlock(c->_cs_mutex2); + } + PyMutex_Unlock(c->_cs_base._cs_mutex); + _PyCriticalSection_Pop(&c->_cs_base); +} +#define PyCriticalSection2_End _PyCriticalSection2_End static inline void _PyCriticalSection_AssertHeld(PyMutex *mutex) @@ -296,18 +214,18 @@ _PyCriticalSection_AssertHeld(PyMutex *mutex) PyThreadState *tstate = _PyThreadState_GET(); uintptr_t prev = tstate->critical_section; if (prev & _Py_CRITICAL_SECTION_TWO_MUTEXES) { - _PyCriticalSection2 *cs = (_PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK); - assert(cs != NULL && (cs->base.mutex == mutex || cs->mutex2 == mutex)); + PyCriticalSection2 *cs = (PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && (cs->_cs_base._cs_mutex == mutex || cs->_cs_mutex2 == mutex)); } else { - _PyCriticalSection *cs = (_PyCriticalSection *)(tstate->critical_section & ~_Py_CRITICAL_SECTION_MASK); - assert(cs != NULL && cs->mutex == mutex); + PyCriticalSection *cs = (PyCriticalSection *)(tstate->critical_section & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && cs->_cs_mutex == mutex); } #endif } -#endif +#endif /* Py_GIL_DISABLED */ #ifdef __cplusplus } |