1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
|
/*
* Implementation of the Global Interpreter Lock (GIL).
*/
#include <stdlib.h>
#include <errno.h>
#include "pycore_atomic.h"
/*
Notes about the implementation:
- The GIL is just a boolean variable (locked) whose access is protected
by a mutex (gil_mutex), and whose changes are signalled by a condition
variable (gil_cond). gil_mutex is taken for short periods of time,
and therefore mostly uncontended.
- In the GIL-holding thread, the main loop (PyEval_EvalFrameEx) must be
able to release the GIL on demand by another thread. A volatile boolean
variable (gil_drop_request) is used for that purpose, which is checked
at every turn of the eval loop. That variable is set after a wait of
`interval` microseconds on `gil_cond` has timed out.
[Actually, another volatile boolean variable (eval_breaker) is used
which ORs several conditions into one. Volatile booleans are
sufficient as inter-thread signalling means since Python is run
on cache-coherent architectures only.]
- A thread wanting to take the GIL will first let pass a given amount of
time (`interval` microseconds) before setting gil_drop_request. This
encourages a defined switching period, but doesn't enforce it since
opcodes can take an arbitrary time to execute.
The `interval` value is available for the user to read and modify
using the Python API `sys.{get,set}switchinterval()`.
- When a thread releases the GIL and gil_drop_request is set, that thread
ensures that another GIL-awaiting thread gets scheduled.
It does so by waiting on a condition variable (switch_cond) until
the value of last_holder is changed to something else than its
own thread state pointer, indicating that another thread was able to
take the GIL.
This is meant to prohibit the latency-adverse behaviour on multi-core
machines where one thread would speculatively release the GIL, but still
run and end up being the first to re-acquire it, making the "timeslices"
much longer than expected.
(Note: this mechanism is enabled with FORCE_SWITCHING above)
*/
#include "condvar.h"
#define MUTEX_INIT(mut) \
if (PyMUTEX_INIT(&(mut))) { \
Py_FatalError("PyMUTEX_INIT(" #mut ") failed"); };
#define MUTEX_FINI(mut) \
if (PyMUTEX_FINI(&(mut))) { \
Py_FatalError("PyMUTEX_FINI(" #mut ") failed"); };
#define MUTEX_LOCK(mut) \
if (PyMUTEX_LOCK(&(mut))) { \
Py_FatalError("PyMUTEX_LOCK(" #mut ") failed"); };
#define MUTEX_UNLOCK(mut) \
if (PyMUTEX_UNLOCK(&(mut))) { \
Py_FatalError("PyMUTEX_UNLOCK(" #mut ") failed"); };
#define COND_INIT(cond) \
if (PyCOND_INIT(&(cond))) { \
Py_FatalError("PyCOND_INIT(" #cond ") failed"); };
#define COND_FINI(cond) \
if (PyCOND_FINI(&(cond))) { \
Py_FatalError("PyCOND_FINI(" #cond ") failed"); };
#define COND_SIGNAL(cond) \
if (PyCOND_SIGNAL(&(cond))) { \
Py_FatalError("PyCOND_SIGNAL(" #cond ") failed"); };
#define COND_WAIT(cond, mut) \
if (PyCOND_WAIT(&(cond), &(mut))) { \
Py_FatalError("PyCOND_WAIT(" #cond ") failed"); };
#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
{ \
int r = PyCOND_TIMEDWAIT(&(cond), &(mut), (microseconds)); \
if (r < 0) \
Py_FatalError("PyCOND_WAIT(" #cond ") failed"); \
if (r) /* 1 == timeout, 2 == impl. can't say, so assume timeout */ \
timeout_result = 1; \
else \
timeout_result = 0; \
} \
#define DEFAULT_INTERVAL 5000
static void _gil_initialize(struct _gil_runtime_state *gil)
{
_Py_atomic_int uninitialized = {-1};
gil->locked = uninitialized;
gil->interval = DEFAULT_INTERVAL;
}
static int gil_created(struct _gil_runtime_state *gil)
{
return (_Py_atomic_load_explicit(&gil->locked, _Py_memory_order_acquire) >= 0);
}
static void create_gil(struct _gil_runtime_state *gil)
{
MUTEX_INIT(gil->mutex);
#ifdef FORCE_SWITCHING
MUTEX_INIT(gil->switch_mutex);
#endif
COND_INIT(gil->cond);
#ifdef FORCE_SWITCHING
COND_INIT(gil->switch_cond);
#endif
_Py_atomic_store_relaxed(&gil->last_holder, 0);
_Py_ANNOTATE_RWLOCK_CREATE(&gil->locked);
_Py_atomic_store_explicit(&gil->locked, 0, _Py_memory_order_release);
}
static void destroy_gil(struct _gil_runtime_state *gil)
{
/* some pthread-like implementations tie the mutex to the cond
* and must have the cond destroyed first.
*/
COND_FINI(gil->cond);
MUTEX_FINI(gil->mutex);
#ifdef FORCE_SWITCHING
COND_FINI(gil->switch_cond);
MUTEX_FINI(gil->switch_mutex);
#endif
_Py_atomic_store_explicit(&gil->locked, -1,
_Py_memory_order_release);
_Py_ANNOTATE_RWLOCK_DESTROY(&gil->locked);
}
static void recreate_gil(struct _gil_runtime_state *gil)
{
_Py_ANNOTATE_RWLOCK_DESTROY(&gil->locked);
/* XXX should we destroy the old OS resources here? */
create_gil(gil);
}
static void
drop_gil(struct _ceval_runtime_state *ceval, struct _ceval_state *ceval2,
PyThreadState *tstate)
{
struct _gil_runtime_state *gil = &ceval->gil;
if (!_Py_atomic_load_relaxed(&gil->locked)) {
Py_FatalError("drop_gil: GIL is not locked");
}
/* tstate is allowed to be NULL (early interpreter init) */
if (tstate != NULL) {
/* Sub-interpreter support: threads might have been switched
under our feet using PyThreadState_Swap(). Fix the GIL last
holder variable so that our heuristics work. */
_Py_atomic_store_relaxed(&gil->last_holder, (uintptr_t)tstate);
}
MUTEX_LOCK(gil->mutex);
_Py_ANNOTATE_RWLOCK_RELEASED(&gil->locked, /*is_write=*/1);
_Py_atomic_store_relaxed(&gil->locked, 0);
COND_SIGNAL(gil->cond);
MUTEX_UNLOCK(gil->mutex);
#ifdef FORCE_SWITCHING
if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request) && tstate != NULL) {
MUTEX_LOCK(gil->switch_mutex);
/* Not switched yet => wait */
if (((PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) == tstate)
{
assert(is_tstate_valid(tstate));
RESET_GIL_DROP_REQUEST(tstate->interp);
/* NOTE: if COND_WAIT does not atomically start waiting when
releasing the mutex, another thread can run through, take
the GIL and drop it again, and reset the condition
before we even had a chance to wait for it. */
COND_WAIT(gil->switch_cond, gil->switch_mutex);
}
MUTEX_UNLOCK(gil->switch_mutex);
}
#endif
}
/* Check if a Python thread must exit immediately, rather than taking the GIL
if Py_Finalize() has been called.
When this function is called by a daemon thread after Py_Finalize() has been
called, the GIL does no longer exist.
tstate must be non-NULL. */
static inline int
tstate_must_exit(PyThreadState *tstate)
{
/* bpo-39877: Access _PyRuntime directly rather than using
tstate->interp->runtime to support calls from Python daemon threads.
After Py_Finalize() has been called, tstate can be a dangling pointer:
point to PyThreadState freed memory. */
PyThreadState *finalizing = _PyRuntimeState_GetFinalizing(&_PyRuntime);
return (finalizing != NULL && finalizing != tstate);
}
/* Take the GIL.
The function saves errno at entry and restores its value at exit.
tstate must be non-NULL. */
static void
take_gil(PyThreadState *tstate)
{
int err = errno;
assert(tstate != NULL);
if (tstate_must_exit(tstate)) {
/* bpo-39877: If Py_Finalize() has been called and tstate is not the
thread which called Py_Finalize(), exit immediately the thread.
This code path can be reached by a daemon thread after Py_Finalize()
completes. In this case, tstate is a dangling pointer: points to
PyThreadState freed memory. */
PyThread_exit_thread();
}
assert(is_tstate_valid(tstate));
PyInterpreterState *interp = tstate->interp;
struct _ceval_runtime_state *ceval = &interp->runtime->ceval;
struct _ceval_state *ceval2 = &interp->ceval;
struct _gil_runtime_state *gil = &ceval->gil;
/* Check that _PyEval_InitThreads() was called to create the lock */
assert(gil_created(gil));
MUTEX_LOCK(gil->mutex);
if (!_Py_atomic_load_relaxed(&gil->locked)) {
goto _ready;
}
while (_Py_atomic_load_relaxed(&gil->locked)) {
unsigned long saved_switchnum = gil->switch_number;
unsigned long interval = (gil->interval >= 1 ? gil->interval : 1);
int timed_out = 0;
COND_TIMED_WAIT(gil->cond, gil->mutex, interval, timed_out);
/* If we timed out and no switch occurred in the meantime, it is time
to ask the GIL-holding thread to drop it. */
if (timed_out &&
_Py_atomic_load_relaxed(&gil->locked) &&
gil->switch_number == saved_switchnum)
{
if (tstate_must_exit(tstate)) {
MUTEX_UNLOCK(gil->mutex);
PyThread_exit_thread();
}
assert(is_tstate_valid(tstate));
SET_GIL_DROP_REQUEST(interp);
}
}
_ready:
#ifdef FORCE_SWITCHING
/* This mutex must be taken before modifying gil->last_holder:
see drop_gil(). */
MUTEX_LOCK(gil->switch_mutex);
#endif
/* We now hold the GIL */
_Py_atomic_store_relaxed(&gil->locked, 1);
_Py_ANNOTATE_RWLOCK_ACQUIRED(&gil->locked, /*is_write=*/1);
if (tstate != (PyThreadState*)_Py_atomic_load_relaxed(&gil->last_holder)) {
_Py_atomic_store_relaxed(&gil->last_holder, (uintptr_t)tstate);
++gil->switch_number;
}
#ifdef FORCE_SWITCHING
COND_SIGNAL(gil->switch_cond);
MUTEX_UNLOCK(gil->switch_mutex);
#endif
if (tstate_must_exit(tstate)) {
/* bpo-36475: If Py_Finalize() has been called and tstate is not
the thread which called Py_Finalize(), exit immediately the
thread.
This code path can be reached by a daemon thread which was waiting
in take_gil() while the main thread called
wait_for_thread_shutdown() from Py_Finalize(). */
MUTEX_UNLOCK(gil->mutex);
drop_gil(ceval, ceval2, tstate);
PyThread_exit_thread();
}
assert(is_tstate_valid(tstate));
if (_Py_atomic_load_relaxed(&ceval2->gil_drop_request)) {
RESET_GIL_DROP_REQUEST(interp);
}
else {
/* bpo-40010: eval_breaker should be recomputed to be set to 1 if there
is a pending signal: signal received by another thread which cannot
handle signals.
Note: RESET_GIL_DROP_REQUEST() calls COMPUTE_EVAL_BREAKER(). */
COMPUTE_EVAL_BREAKER(interp, ceval, ceval2);
}
/* Don't access tstate if the thread must exit */
if (tstate->async_exc != NULL) {
_PyEval_SignalAsyncExc(tstate->interp);
}
MUTEX_UNLOCK(gil->mutex);
errno = err;
}
void _PyEval_SetSwitchInterval(unsigned long microseconds)
{
struct _gil_runtime_state *gil = &_PyRuntime.ceval.gil;
gil->interval = microseconds;
}
unsigned long _PyEval_GetSwitchInterval()
{
struct _gil_runtime_state *gil = &_PyRuntime.ceval.gil;
return gil->interval;
}
|