Include/internal/pycore_lock.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

// Lightweight locks and other synchronization mechanisms.
//
// These implementations are based on WebKit's WTF::Lock. See
// https://webkit.org/blog/6161/locking-in-webkit/ for a description of the
// design.
#ifndef Py_INTERNAL_LOCK_H
#define Py_INTERNAL_LOCK_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
#  error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_time.h"          // PyTime_t


// A mutex that occupies one byte. The lock can be zero initialized.
//
// Only the two least significant bits are used. The remaining bits should be
// zero:
// 0b00: unlocked
// 0b01: locked
// 0b10: unlocked and has parked threads
// 0b11: locked and has parked threads
//
// Typical initialization:
//   PyMutex m = (PyMutex){0};
//
// Typical usage:
//   PyMutex_Lock(&m);
//   ...
//   PyMutex_Unlock(&m);

// NOTE: In Py_GIL_DISABLED builds, `struct _PyMutex` is defined in Include/object.h.
// The Py_GIL_DISABLED builds need the definition in Include/object.h for the
// `ob_mutex` field in PyObject. For the default (non-free-threaded) build,
// we define the struct here to avoid exposing it in the public API.
#ifndef Py_GIL_DISABLED
struct _PyMutex { uint8_t v; };
#endif

typedef struct _PyMutex PyMutex;

#define _Py_UNLOCKED    0
#define _Py_LOCKED      1
#define _Py_HAS_PARKED  2
#define _Py_ONCE_INITIALIZED 4

// (private) slow path for locking the mutex
PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);

// (private) slow path for unlocking the mutex
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);

static inline int
PyMutex_LockFast(uint8_t *lock_bits)
{
    uint8_t expected = _Py_UNLOCKED;
    return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED);
}

// Locks the mutex.
//
// If the mutex is currently locked, the calling thread will be parked until
// the mutex is unlocked. If the current thread holds the GIL, then the GIL
// will be released while the thread is parked.
static inline void
PyMutex_Lock(PyMutex *m)
{
    uint8_t expected = _Py_UNLOCKED;
    if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) {
        _PyMutex_LockSlow(m);
    }
}

// Unlocks the mutex.
static inline void
PyMutex_Unlock(PyMutex *m)
{
    uint8_t expected = _Py_LOCKED;
    if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_UNLOCKED)) {
        _PyMutex_UnlockSlow(m);
    }
}

// Checks if the mutex is currently locked.
static inline int
PyMutex_IsLocked(PyMutex *m)
{
    return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0;
}

// Re-initializes the mutex after a fork to the unlocked state.
static inline void
_PyMutex_at_fork_reinit(PyMutex *m)
{
    memset(m, 0, sizeof(*m));
}

typedef enum _PyLockFlags {
    // Do not detach/release the GIL when waiting on the lock.
    _Py_LOCK_DONT_DETACH = 0,

    // Detach/release the GIL while waiting on the lock.
    _PY_LOCK_DETACH = 1,

    // Handle signals if interrupted while waiting on the lock.
    _PY_LOCK_HANDLE_SIGNALS = 2,
} _PyLockFlags;

// Lock a mutex with an optional timeout and additional options. See
// _PyLockFlags for details.
extern PyLockStatus
_PyMutex_LockTimed(PyMutex *m, PyTime_t timeout_ns, _PyLockFlags flags);

// Lock a mutex with aditional options. See _PyLockFlags for details.
static inline void
PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags)
{
    uint8_t expected = _Py_UNLOCKED;
    if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) {
        _PyMutex_LockTimed(m, -1, flags);
    }
}

// Unlock a mutex, returns 0 if the mutex is not locked (used for improved
// error messages).
extern int _PyMutex_TryUnlock(PyMutex *m);


// PyEvent is a one-time event notification
typedef struct {
    uint8_t v;
} PyEvent;

// Set the event and notify any waiting threads.
// Export for '_testinternalcapi' shared extension
PyAPI_FUNC(void) _PyEvent_Notify(PyEvent *evt);

// Wait for the event to be set. If the event is already set, then this returns
// immediately.
PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);

// Wait for the event to be set, or until the timeout expires. If the event is
// already set, then this returns immediately. Returns 1 if the event was set,
// and 0 if the timeout expired or thread was interrupted.
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns);


// _PyRawMutex implements a word-sized mutex that that does not depend on the
// parking lot API, and therefore can be used in the parking lot
// implementation.
//
// The mutex uses a packed representation: the least significant bit is used to
// indicate whether the mutex is locked or not. The remaining bits are either
// zero or a pointer to a `struct raw_mutex_entry` (see lock.c).
typedef struct {
    uintptr_t v;
} _PyRawMutex;

// Slow paths for lock/unlock
extern void _PyRawMutex_LockSlow(_PyRawMutex *m);
extern void _PyRawMutex_UnlockSlow(_PyRawMutex *m);

static inline void
_PyRawMutex_Lock(_PyRawMutex *m)
{
    uintptr_t unlocked = _Py_UNLOCKED;
    if (_Py_atomic_compare_exchange_uintptr(&m->v, &unlocked, _Py_LOCKED)) {
        return;
    }
    _PyRawMutex_LockSlow(m);
}

static inline void
_PyRawMutex_Unlock(_PyRawMutex *m)
{
    uintptr_t locked = _Py_LOCKED;
    if (_Py_atomic_compare_exchange_uintptr(&m->v, &locked, _Py_UNLOCKED)) {
        return;
    }
    _PyRawMutex_UnlockSlow(m);
}

// A data structure that can be used to run initialization code once in a
// thread-safe manner. The C++11 equivalent is std::call_once.
typedef struct {
    uint8_t v;
} _PyOnceFlag;

// Type signature for one-time initialization functions. The function should
// return 0 on success and -1 on failure.
typedef int _Py_once_fn_t(void *arg);

// (private) slow path for one time initialization
PyAPI_FUNC(int)
_PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg);

// Calls `fn` once using `flag`. The `arg` is passed to the call to `fn`.
//
// Returns 0 on success and -1 on failure.
//
// If `fn` returns 0 (success), then subsequent calls immediately return 0.
// If `fn` returns -1 (failure), then subsequent calls will retry the call.
static inline int
_PyOnceFlag_CallOnce(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg)
{
    if (_Py_atomic_load_uint8(&flag->v) == _Py_ONCE_INITIALIZED) {
        return 0;
    }
    return _PyOnceFlag_CallOnceSlow(flag, fn, arg);
}

// A readers-writer (RW) lock. The lock supports multiple concurrent readers or
// a single writer. The lock is write-preferring: if a writer is waiting while
// the lock is read-locked then, new readers will be blocked. This avoids
// starvation of writers.
//
// In C++, the equivalent synchronization primitive is std::shared_mutex
// with shared ("read") and exclusive ("write") locking.
//
// The two least significant bits are used to indicate if the lock is
// write-locked and if there are parked threads (either readers or writers)
// waiting to acquire the lock. The remaining bits are used to indicate the
// number of readers holding the lock.
//
// 0b000..00000: unlocked
// 0bnnn..nnn00: nnn..nnn readers holding the lock
// 0bnnn..nnn10: nnn..nnn readers holding the lock and a writer is waiting
// 0b00000..010: unlocked with awoken writer about to acquire lock
// 0b00000..001: write-locked
// 0b00000..011: write-locked and readers or other writers are waiting
//
// Note that reader_count must be zero if the lock is held by a writer, and
// vice versa. The lock can only be held by readers or a writer, but not both.
//
// The design is optimized for simplicity of the implementation. The lock is
// not fair: if fairness is desired, use an additional PyMutex to serialize
// writers. The lock is also not reentrant.
typedef struct {
    uintptr_t bits;
} _PyRWMutex;

// Read lock (i.e., shared lock)
PyAPI_FUNC(void) _PyRWMutex_RLock(_PyRWMutex *rwmutex);
PyAPI_FUNC(void) _PyRWMutex_RUnlock(_PyRWMutex *rwmutex);

// Write lock (i.e., exclusive lock)
PyAPI_FUNC(void) _PyRWMutex_Lock(_PyRWMutex *rwmutex);
PyAPI_FUNC(void) _PyRWMutex_Unlock(_PyRWMutex *rwmutex);

// Similar to linux seqlock: https://en.wikipedia.org/wiki/Seqlock
// We use a sequence number to lock the writer, an even sequence means we're unlocked, an odd
// sequence means we're locked.  Readers will read the sequence before attempting to read the
// underlying data and then read the sequence number again after reading the data.  If the
// sequence has not changed the data is valid.
//
// Differs a little bit in that we use CAS on sequence as the lock, instead of a seperate spin lock.
// The writer can also detect that the undelering data has not changed and abandon the write
// and restore the previous sequence.
typedef struct {
    uint32_t sequence;
} _PySeqLock;

// Lock the sequence lock for the writer
PyAPI_FUNC(void) _PySeqLock_LockWrite(_PySeqLock *seqlock);

// Unlock the sequence lock and move to the next sequence number.
PyAPI_FUNC(void) _PySeqLock_UnlockWrite(_PySeqLock *seqlock);

// Abandon the current update indicating that no mutations have occured
// and restore the previous sequence value.
PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock);

// Begin a read operation and return the current sequence number.
PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock);

// End the read operation and confirm that the sequence number has not changed.
// Returns 1 if the read was successful or 0 if the read should be re-tried.
PyAPI_FUNC(uint32_t) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous);

// Check if the lock was held during a fork and clear the lock.  Returns 1
// if the lock was held and any associated datat should be cleared.
PyAPI_FUNC(uint32_t) _PySeqLock_AfterFork(_PySeqLock *seqlock);

#ifdef __cplusplus
}
#endif
#endif   /* !Py_INTERNAL_LOCK_H */