Python/ceval_gil.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

/*
 * Implementation of the Global Interpreter Lock (GIL).
 */

#include <stdlib.h>
#include <errno.h>


/* First some general settings */

/* microseconds (the Python API uses seconds, though) */
#define DEFAULT_INTERVAL 5000
static unsigned long gil_interval = DEFAULT_INTERVAL;
#define INTERVAL (gil_interval >= 1 ? gil_interval : 1)

/* Enable if you want to force the switching of threads at least every `gil_interval` */
#undef FORCE_SWITCHING
#define FORCE_SWITCHING


/*
   Notes about the implementation:

   - The GIL is just a boolean variable (gil_locked) whose access is protected
     by a mutex (gil_mutex), and whose changes are signalled by a condition
     variable (gil_cond). gil_mutex is taken for short periods of time,
     and therefore mostly uncontended.

   - In the GIL-holding thread, the main loop (PyEval_EvalFrameEx) must be
     able to release the GIL on demand by another thread. A volatile boolean
     variable (gil_drop_request) is used for that purpose, which is checked
     at every turn of the eval loop. That variable is set after a wait of
     `interval` microseconds on `gil_cond` has timed out.
      
      [Actually, another volatile boolean variable (eval_breaker) is used
       which ORs several conditions into one. Volatile booleans are
       sufficient as inter-thread signalling means since Python is run
       on cache-coherent architectures only.]

   - A thread wanting to take the GIL will first let pass a given amount of
     time (`interval` microseconds) before setting gil_drop_request. This
     encourages a defined switching period, but doesn't enforce it since
     opcodes can take an arbitrary time to execute.
 
     The `interval` value is available for the user to read and modify
     using the Python API `sys.{get,set}switchinterval()`.

   - When a thread releases the GIL and gil_drop_request is set, that thread
     ensures that another GIL-awaiting thread gets scheduled.
     It does so by waiting on a condition variable (switch_cond) until
     the value of gil_last_holder is changed to something else than its
     own thread state pointer, indicating that another thread was able to
     take the GIL.
 
     This is meant to prohibit the latency-adverse behaviour on multi-core
     machines where one thread would speculatively release the GIL, but still
     run and end up being the first to re-acquire it, making the "timeslices"
     much longer than expected.
     (Note: this mechanism is enabled with FORCE_SWITCHING above)
*/

#ifndef _POSIX_THREADS
/* This means pthreads are not implemented in libc headers, hence the macro
   not present in unistd.h. But they still can be implemented as an external
   library (e.g. gnu pth in pthread emulation) */
# ifdef HAVE_PTHREAD_H
#  include <pthread.h> /* _POSIX_THREADS */
# endif
#endif


#ifdef _POSIX_THREADS

/*
 * POSIX support
 */

#include <pthread.h>

#define ADD_MICROSECONDS(tv, interval) \
do { \
    tv.tv_usec += (long) interval; \
    tv.tv_sec += tv.tv_usec / 1000000; \
    tv.tv_usec %= 1000000; \
} while (0)

/* We assume all modern POSIX systems have gettimeofday() */
#ifdef GETTIMEOFDAY_NO_TZ
#define GETTIMEOFDAY(ptv) gettimeofday(ptv)
#else
#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL)
#endif

#define MUTEX_T pthread_mutex_t
#define MUTEX_INIT(mut) \
    if (pthread_mutex_init(&mut, NULL)) { \
        Py_FatalError("pthread_mutex_init(" #mut ") failed"); };
#define MUTEX_LOCK(mut) \
    if (pthread_mutex_lock(&mut)) { \
        Py_FatalError("pthread_mutex_lock(" #mut ") failed"); };
#define MUTEX_UNLOCK(mut) \
    if (pthread_mutex_unlock(&mut)) { \
        Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); };

#define COND_T pthread_cond_t
#define COND_INIT(cond) \
    if (pthread_cond_init(&cond, NULL)) { \
        Py_FatalError("pthread_cond_init(" #cond ") failed"); };
#define COND_RESET(cond)
#define COND_SIGNAL(cond) \
    if (pthread_cond_signal(&cond)) { \
        Py_FatalError("pthread_cond_signal(" #cond ") failed"); };
#define COND_WAIT(cond, mut) \
    if (pthread_cond_wait(&cond, &mut)) { \
        Py_FatalError("pthread_cond_wait(" #cond ") failed"); };
#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
    { \
        int r; \
        struct timespec ts; \
        struct timeval deadline; \
        \
        GETTIMEOFDAY(&deadline); \
        ADD_MICROSECONDS(deadline, microseconds); \
        ts.tv_sec = deadline.tv_sec; \
        ts.tv_nsec = deadline.tv_usec * 1000; \
        \
        r = pthread_cond_timedwait(&cond, &mut, &ts); \
        if (r == ETIMEDOUT) \
            timeout_result = 1; \
        else if (r) \
            Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \
        else \
            timeout_result = 0; \
    } \

#elif defined(NT_THREADS)

/*
 * Windows (2000 and later, as well as (hopefully) CE) support
 */

#include <windows.h>

#define MUTEX_T HANDLE
#define MUTEX_INIT(mut) \
    if (!(mut = CreateMutex(NULL, FALSE, NULL))) { \
        Py_FatalError("CreateMutex(" #mut ") failed"); };
#define MUTEX_LOCK(mut) \
    if (WaitForSingleObject(mut, INFINITE) != WAIT_OBJECT_0) { \
        Py_FatalError("WaitForSingleObject(" #mut ") failed"); };
#define MUTEX_UNLOCK(mut) \
    if (!ReleaseMutex(mut)) { \
        Py_FatalError("ReleaseMutex(" #mut ") failed"); };

/* We emulate condition variables with events. It is sufficient here.
   WaitForMultipleObjects() allows the event to be caught and the mutex
   to be taken atomically.
   As for SignalObjectAndWait(), its semantics are unfortunately a bit
   more foggy. Many sources on the Web define it as atomically releasing
   the first object while starting to wait on the second, but MSDN states
   it is *not* atomic...

   In any case, the emulation here is tailored for our particular use case.
   For example, we don't care how many threads are woken up when a condition
   gets signalled. Generic emulations of the pthread_cond_* API using
   Win32 functions can be found on the Web.
   The following read can be edificating (or not):
   http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
*/
#define COND_T HANDLE
#define COND_INIT(cond) \
    /* auto-reset, non-signalled */ \
    if (!(cond = CreateEvent(NULL, FALSE, FALSE, NULL))) { \
        Py_FatalError("CreateMutex(" #cond ") failed"); };
#define COND_RESET(cond) \
    if (!ResetEvent(cond)) { \
        Py_FatalError("ResetEvent(" #cond ") failed"); };
#define COND_SIGNAL(cond) \
    if (!SetEvent(cond)) { \
        Py_FatalError("SetEvent(" #cond ") failed"); };
#define COND_WAIT(cond, mut) \
    { \
        if (SignalObjectAndWait(mut, cond, INFINITE, FALSE) != WAIT_OBJECT_0) \
            Py_FatalError("SignalObjectAndWait(" #mut ", " #cond") failed"); \
        MUTEX_LOCK(mut); \
    }
#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \
    { \
        DWORD r; \
        HANDLE objects[2] = { cond, mut }; \
        MUTEX_UNLOCK(mut); \
        r = WaitForMultipleObjects(2, objects, TRUE, microseconds / 1000); \
        if (r == WAIT_TIMEOUT) { \
            MUTEX_LOCK(mut); \
            timeout_result = 1; \
        } \
        else if (r != WAIT_OBJECT_0) \
            Py_FatalError("WaitForSingleObject(" #cond ") failed"); \
        else \
            timeout_result = 0; \
    }

#else

#error You need either a POSIX-compatible or a Windows system!

#endif /* _POSIX_THREADS, NT_THREADS */


/* Whether the GIL is already taken (-1 if uninitialized). This is volatile
   because it can be read without any lock taken in ceval.c. */
static volatile int gil_locked = -1;
/* Number of GIL switches since the beginning. */
static unsigned long gil_switch_number = 0;
/* Last thread holding / having held the GIL. This helps us know whether
   anyone else was scheduled after we dropped the GIL. */
static PyThreadState *gil_last_holder = NULL;

/* This condition variable allows one or several threads to wait until
   the GIL is released. In addition, the mutex also protects the above
   variables. */
static COND_T gil_cond;
static MUTEX_T gil_mutex;

#ifdef FORCE_SWITCHING
/* This condition variable helps the GIL-releasing thread wait for
   a GIL-awaiting thread to be scheduled and take the GIL. */
static COND_T switch_cond;
static MUTEX_T switch_mutex;
#endif


static int gil_created(void)
{
    return gil_locked >= 0;
}

static void create_gil(void)
{
    MUTEX_INIT(gil_mutex);
#ifdef FORCE_SWITCHING
    MUTEX_INIT(switch_mutex);
#endif
    COND_INIT(gil_cond);
#ifdef FORCE_SWITCHING
    COND_INIT(switch_cond);
#endif
    gil_locked = 0;
    gil_last_holder = NULL;
}

static void recreate_gil(void)
{
    create_gil();
}

static void drop_gil(PyThreadState *tstate)
{
    /* NOTE: tstate is allowed to be NULL. */
    if (!gil_locked)
        Py_FatalError("drop_gil: GIL is not locked");
    if (tstate != NULL && tstate != gil_last_holder)
        Py_FatalError("drop_gil: wrong thread state");

    MUTEX_LOCK(gil_mutex);
    gil_locked = 0;
    COND_SIGNAL(gil_cond);
    MUTEX_UNLOCK(gil_mutex);
    
#ifdef FORCE_SWITCHING
    if (gil_drop_request && tstate != NULL) {
        MUTEX_LOCK(switch_mutex);
        /* Not switched yet => wait */
        if (gil_last_holder == tstate) {
	    RESET_GIL_DROP_REQUEST();
            /* NOTE: if COND_WAIT does not atomically start waiting when
               releasing the mutex, another thread can run through, take
               the GIL and drop it again, and reset the condition
               before we even had a chance to wait for it. */
            COND_WAIT(switch_cond, switch_mutex);
            COND_RESET(switch_cond);
	}
        MUTEX_UNLOCK(switch_mutex);
    }
#endif
}

static void take_gil(PyThreadState *tstate)
{
    int err;
    if (tstate == NULL)
        Py_FatalError("take_gil: NULL tstate");

    err = errno;
    MUTEX_LOCK(gil_mutex);

    if (!gil_locked)
        goto _ready;
    
    COND_RESET(gil_cond);
    while (gil_locked) {
        int timed_out = 0;
        unsigned long saved_switchnum;

        saved_switchnum = gil_switch_number;
        COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out);
        /* If we timed out and no switch occurred in the meantime, it is time
           to ask the GIL-holding thread to drop it. */
        if (timed_out && gil_locked && gil_switch_number == saved_switchnum) {
            SET_GIL_DROP_REQUEST();
        }
    }
_ready:
#ifdef FORCE_SWITCHING
    /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */
    MUTEX_LOCK(switch_mutex);
#endif
    /* We now hold the GIL */
    gil_locked = 1;

    if (tstate != gil_last_holder) {
        gil_last_holder = tstate;
        ++gil_switch_number;
    }
#ifdef FORCE_SWITCHING
    COND_SIGNAL(switch_cond);
    MUTEX_UNLOCK(switch_mutex);
#endif
    if (gil_drop_request) {
        RESET_GIL_DROP_REQUEST();
    }
    if (tstate->async_exc != NULL) {
        _PyEval_SignalAsyncExc();
    }
    
    MUTEX_UNLOCK(gil_mutex);
    errno = err;
}

void _PyEval_SetSwitchInterval(unsigned long microseconds)
{
    gil_interval = microseconds;
}

unsigned long _PyEval_GetSwitchInterval()
{
    return gil_interval;
}