summaryrefslogtreecommitdiffstats
path: root/Include/internal/pycore_lock.h
diff options
context:
space:
mode:
authorSam Gross <colesbury@gmail.com>2023-09-19 15:54:29 (GMT)
committerGitHub <noreply@github.com>2023-09-19 15:54:29 (GMT)
commit0c89056fe59ac42f09978582479d40e58a236856 (patch)
tree06cd5a790da2a6dd3862567419c25572f96ae373 /Include/internal/pycore_lock.h
parent0a31ff0050eec5079fd4c9cafd33b4e3e9afd9ab (diff)
downloadcpython-0c89056fe59ac42f09978582479d40e58a236856.zip
cpython-0c89056fe59ac42f09978582479d40e58a236856.tar.gz
cpython-0c89056fe59ac42f09978582479d40e58a236856.tar.bz2
gh-108724: Add PyMutex and _PyParkingLot APIs (gh-109344)
PyMutex is a one byte lock with fast, inlineable lock and unlock functions for the common uncontended case. The design is based on WebKit's WTF::Lock. PyMutex is built using the _PyParkingLot APIs, which provides a cross-platform futex-like API (based on WebKit's WTF::ParkingLot). This internal API will be used for building other synchronization primitives used to implement PEP 703, such as one-time initialization and events. This also includes tests and a mini benchmark in Tools/lockbench/lockbench.py to compare with the existing PyThread_type_lock. Uncontended acquisition + release: * Linux (x86-64): PyMutex: 11 ns, PyThread_type_lock: 44 ns * macOS (arm64): PyMutex: 13 ns, PyThread_type_lock: 18 ns * Windows (x86-64): PyMutex: 13 ns, PyThread_type_lock: 38 ns PR Overview: The primary purpose of this PR is to implement PyMutex, but there are a number of support pieces (described below). * PyMutex: A 1-byte lock that doesn't require memory allocation to initialize and is generally faster than the existing PyThread_type_lock. The API is internal only for now. * _PyParking_Lot: A futex-like API based on the API of the same name in WebKit. Used to implement PyMutex. * _PyRawMutex: A word sized lock used to implement _PyParking_Lot. * PyEvent: A one time event. This was used a bunch in the "nogil" fork and is useful for testing the PyMutex implementation, so I've included it as part of the PR. * pycore_llist.h: Defines common operations on doubly-linked list. Not strictly necessary (could do the list operations manually), but they come up frequently in the "nogil" fork. ( Similar to https://man.freebsd.org/cgi/man.cgi?queue) --------- Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
Diffstat (limited to 'Include/internal/pycore_lock.h')
-rw-r--r--Include/internal/pycore_lock.h158
1 files changed, 158 insertions, 0 deletions
diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h
new file mode 100644
index 0000000..c4bb76a
--- /dev/null
+++ b/Include/internal/pycore_lock.h
@@ -0,0 +1,158 @@
+// Lightweight locks and other synchronization mechanisms.
+//
+// These implementations are based on WebKit's WTF::Lock. See
+// https://webkit.org/blog/6161/locking-in-webkit/ for a description of the
+// design.
+#ifndef Py_INTERNAL_LOCK_H
+#define Py_INTERNAL_LOCK_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+# error "this header requires Py_BUILD_CORE define"
+#endif
+
+#include "pycore_time.h" // _PyTime_t
+
+
+// A mutex that occupies one byte. The lock can be zero initialized.
+//
+// Only the two least significant bits are used. The remaining bits should be
+// zero:
+// 0b00: unlocked
+// 0b01: locked
+// 0b10: unlocked and has parked threads
+// 0b11: locked and has parked threads
+//
+// Typical initialization:
+// PyMutex m = (PyMutex){0};
+//
+// Typical usage:
+// PyMutex_Lock(&m);
+// ...
+// PyMutex_Unlock(&m);
+typedef struct _PyMutex {
+ uint8_t v;
+} PyMutex;
+
+#define _Py_UNLOCKED 0
+#define _Py_LOCKED 1
+#define _Py_HAS_PARKED 2
+
+// (private) slow path for locking the mutex
+PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);
+
+// (private) slow path for unlocking the mutex
+PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);
+
+// Locks the mutex.
+//
+// If the mutex is currently locked, the calling thread will be parked until
+// the mutex is unlocked. If the current thread holds the GIL, then the GIL
+// will be released while the thread is parked.
+static inline void
+PyMutex_Lock(PyMutex *m)
+{
+ uint8_t expected = _Py_UNLOCKED;
+ if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) {
+ _PyMutex_LockSlow(m);
+ }
+}
+
+// Unlocks the mutex.
+static inline void
+PyMutex_Unlock(PyMutex *m)
+{
+ uint8_t expected = _Py_LOCKED;
+ if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_UNLOCKED)) {
+ _PyMutex_UnlockSlow(m);
+ }
+}
+
+// Checks if the mutex is currently locked.
+static inline int
+PyMutex_IsLocked(PyMutex *m)
+{
+ return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0;
+}
+
+typedef enum _PyLockFlags {
+ // Do not detach/release the GIL when waiting on the lock.
+ _Py_LOCK_DONT_DETACH = 0,
+
+ // Detach/release the GIL while waiting on the lock.
+ _PY_LOCK_DETACH = 1,
+
+ // Handle signals if interrupted while waiting on the lock.
+ _PY_LOCK_HANDLE_SIGNALS = 2,
+} _PyLockFlags;
+
+// Lock a mutex with an optional timeout and additional options. See
+// _PyLockFlags for details.
+extern PyLockStatus
+_PyMutex_LockTimed(PyMutex *m, _PyTime_t timeout_ns, _PyLockFlags flags);
+
+// Unlock a mutex, returns 0 if the mutex is not locked (used for improved
+// error messages).
+extern int _PyMutex_TryUnlock(PyMutex *m);
+
+
+// PyEvent is a one-time event notification
+typedef struct {
+ uint8_t v;
+} PyEvent;
+
+// Set the event and notify any waiting threads.
+// Export for '_testinternalcapi' shared extension
+PyAPI_FUNC(void) _PyEvent_Notify(PyEvent *evt);
+
+// Wait for the event to be set. If the event is already set, then this returns
+// immediately.
+PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);
+
+// Wait for the event to be set, or until the timeout expires. If the event is
+// already set, then this returns immediately. Returns 1 if the event was set,
+// and 0 if the timeout expired or thread was interrupted.
+PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, _PyTime_t timeout_ns);
+
+
+// _PyRawMutex implements a word-sized mutex that that does not depend on the
+// parking lot API, and therefore can be used in the parking lot
+// implementation.
+//
+// The mutex uses a packed representation: the least significant bit is used to
+// indicate whether the mutex is locked or not. The remaining bits are either
+// zero or a pointer to a `struct raw_mutex_entry` (see lock.c).
+typedef struct {
+ uintptr_t v;
+} _PyRawMutex;
+
+// Slow paths for lock/unlock
+extern void _PyRawMutex_LockSlow(_PyRawMutex *m);
+extern void _PyRawMutex_UnlockSlow(_PyRawMutex *m);
+
+static inline void
+_PyRawMutex_Lock(_PyRawMutex *m)
+{
+ uintptr_t unlocked = _Py_UNLOCKED;
+ if (_Py_atomic_compare_exchange_uintptr(&m->v, &unlocked, _Py_LOCKED)) {
+ return;
+ }
+ _PyRawMutex_LockSlow(m);
+}
+
+static inline void
+_PyRawMutex_Unlock(_PyRawMutex *m)
+{
+ uintptr_t locked = _Py_LOCKED;
+ if (_Py_atomic_compare_exchange_uintptr(&m->v, &locked, _Py_UNLOCKED)) {
+ return;
+ }
+ _PyRawMutex_UnlockSlow(m);
+}
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* !Py_INTERNAL_LOCK_H */