diff options
Diffstat (limited to 'Python/ceval_gil.h')
-rw-r--r-- | Python/ceval_gil.h | 433 |
1 files changed, 433 insertions, 0 deletions
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h new file mode 100644 index 0000000..bf7a350 --- /dev/null +++ b/Python/ceval_gil.h @@ -0,0 +1,433 @@ +/* + * Implementation of the Global Interpreter Lock (GIL). + */ + +#include <stdlib.h> +#include <errno.h> + + +/* First some general settings */ + +/* microseconds (the Python API uses seconds, though) */ +#define DEFAULT_INTERVAL 5000 +static unsigned long gil_interval = DEFAULT_INTERVAL; +#define INTERVAL (gil_interval >= 1 ? gil_interval : 1) + +/* Enable if you want to force the switching of threads at least every `gil_interval` */ +#undef FORCE_SWITCHING +#define FORCE_SWITCHING + + +/* + Notes about the implementation: + + - The GIL is just a boolean variable (gil_locked) whose access is protected + by a mutex (gil_mutex), and whose changes are signalled by a condition + variable (gil_cond). gil_mutex is taken for short periods of time, + and therefore mostly uncontended. + + - In the GIL-holding thread, the main loop (PyEval_EvalFrameEx) must be + able to release the GIL on demand by another thread. A volatile boolean + variable (gil_drop_request) is used for that purpose, which is checked + at every turn of the eval loop. That variable is set after a wait of + `interval` microseconds on `gil_cond` has timed out. + + [Actually, another volatile boolean variable (eval_breaker) is used + which ORs several conditions into one. Volatile booleans are + sufficient as inter-thread signalling means since Python is run + on cache-coherent architectures only.] + + - A thread wanting to take the GIL will first let pass a given amount of + time (`interval` microseconds) before setting gil_drop_request. This + encourages a defined switching period, but doesn't enforce it since + opcodes can take an arbitrary time to execute. + + The `interval` value is available for the user to read and modify + using the Python API `sys.{get,set}switchinterval()`. + + - When a thread releases the GIL and gil_drop_request is set, that thread + ensures that another GIL-awaiting thread gets scheduled. + It does so by waiting on a condition variable (switch_cond) until + the value of gil_last_holder is changed to something else than its + own thread state pointer, indicating that another thread was able to + take the GIL. + + This is meant to prohibit the latency-adverse behaviour on multi-core + machines where one thread would speculatively release the GIL, but still + run and end up being the first to re-acquire it, making the "timeslices" + much longer than expected. + (Note: this mechanism is enabled with FORCE_SWITCHING above) +*/ + +#ifndef _POSIX_THREADS +/* This means pthreads are not implemented in libc headers, hence the macro + not present in unistd.h. But they still can be implemented as an external + library (e.g. gnu pth in pthread emulation) */ +# ifdef HAVE_PTHREAD_H +# include <pthread.h> /* _POSIX_THREADS */ +# endif +#endif + + +#ifdef _POSIX_THREADS + +/* + * POSIX support + */ + +#include <pthread.h> + +#define ADD_MICROSECONDS(tv, interval) \ +do { \ + tv.tv_usec += (long) interval; \ + tv.tv_sec += tv.tv_usec / 1000000; \ + tv.tv_usec %= 1000000; \ +} while (0) + +/* We assume all modern POSIX systems have gettimeofday() */ +#ifdef GETTIMEOFDAY_NO_TZ +#define GETTIMEOFDAY(ptv) gettimeofday(ptv) +#else +#define GETTIMEOFDAY(ptv) gettimeofday(ptv, (struct timezone *)NULL) +#endif + +#define MUTEX_T pthread_mutex_t +#define MUTEX_INIT(mut) \ + if (pthread_mutex_init(&mut, NULL)) { \ + Py_FatalError("pthread_mutex_init(" #mut ") failed"); }; +#define MUTEX_FINI(mut) \ + if (pthread_mutex_destroy(&mut)) { \ + Py_FatalError("pthread_mutex_destroy(" #mut ") failed"); }; +#define MUTEX_LOCK(mut) \ + if (pthread_mutex_lock(&mut)) { \ + Py_FatalError("pthread_mutex_lock(" #mut ") failed"); }; +#define MUTEX_UNLOCK(mut) \ + if (pthread_mutex_unlock(&mut)) { \ + Py_FatalError("pthread_mutex_unlock(" #mut ") failed"); }; + +#define COND_T pthread_cond_t +#define COND_INIT(cond) \ + if (pthread_cond_init(&cond, NULL)) { \ + Py_FatalError("pthread_cond_init(" #cond ") failed"); }; +#define COND_FINI(cond) \ + if (pthread_cond_destroy(&cond)) { \ + Py_FatalError("pthread_cond_destroy(" #cond ") failed"); }; +#define COND_SIGNAL(cond) \ + if (pthread_cond_signal(&cond)) { \ + Py_FatalError("pthread_cond_signal(" #cond ") failed"); }; +#define COND_WAIT(cond, mut) \ + if (pthread_cond_wait(&cond, &mut)) { \ + Py_FatalError("pthread_cond_wait(" #cond ") failed"); }; +#define COND_TIMED_WAIT(cond, mut, microseconds, timeout_result) \ + { \ + int r; \ + struct timespec ts; \ + struct timeval deadline; \ + \ + GETTIMEOFDAY(&deadline); \ + ADD_MICROSECONDS(deadline, microseconds); \ + ts.tv_sec = deadline.tv_sec; \ + ts.tv_nsec = deadline.tv_usec * 1000; \ + \ + r = pthread_cond_timedwait(&cond, &mut, &ts); \ + if (r == ETIMEDOUT) \ + timeout_result = 1; \ + else if (r) \ + Py_FatalError("pthread_cond_timedwait(" #cond ") failed"); \ + else \ + timeout_result = 0; \ + } \ + +#elif defined(NT_THREADS) + +/* + * Windows (2000 and later, as well as (hopefully) CE) support + */ + +#include <windows.h> + +#define MUTEX_T CRITICAL_SECTION +#define MUTEX_INIT(mut) do { \ + if (!(InitializeCriticalSectionAndSpinCount(&(mut), 4000))) \ + Py_FatalError("CreateMutex(" #mut ") failed"); \ +} while (0) +#define MUTEX_FINI(mut) \ + DeleteCriticalSection(&(mut)) +#define MUTEX_LOCK(mut) \ + EnterCriticalSection(&(mut)) +#define MUTEX_UNLOCK(mut) \ + LeaveCriticalSection(&(mut)) + +/* We emulate condition variables with a semaphore. + We use a Semaphore rather than an auto-reset event, because although + an auto-resent event might appear to solve the lost-wakeup bug (race + condition between releasing the outer lock and waiting) because it + maintains state even though a wait hasn't happened, there is still + a lost wakeup problem if more than one thread are interrupted in the + critical place. A semaphore solves that. + Because it is ok to signal a condition variable with no one + waiting, we need to keep track of the number of + waiting threads. Otherwise, the semaphore's state could rise + without bound. + + Generic emulations of the pthread_cond_* API using + Win32 functions can be found on the Web. + The following read can be edificating (or not): + http://www.cse.wustl.edu/~schmidt/win32-cv-1.html +*/ +typedef struct COND_T +{ + HANDLE sem; /* the semaphore */ + int n_waiting; /* how many are unreleased */ +} COND_T; + +__inline static void _cond_init(COND_T *cond) +{ + /* A semaphore with a large max value, The positive value + * is only needed to catch those "lost wakeup" events and + * race conditions when a timed wait elapses. + */ + if (!(cond->sem = CreateSemaphore(NULL, 0, 1000, NULL))) + Py_FatalError("CreateSemaphore() failed"); + cond->n_waiting = 0; +} + +__inline static void _cond_fini(COND_T *cond) +{ + BOOL ok = CloseHandle(cond->sem); + if (!ok) + Py_FatalError("CloseHandle() failed"); +} + +__inline static void _cond_wait(COND_T *cond, MUTEX_T *mut) +{ + ++cond->n_waiting; + MUTEX_UNLOCK(*mut); + /* "lost wakeup bug" would occur if the caller were interrupted here, + * but we are safe because we are using a semaphore wich has an internal + * count. + */ + if (WaitForSingleObject(cond->sem, INFINITE) == WAIT_FAILED) + Py_FatalError("WaitForSingleObject() failed"); + MUTEX_LOCK(*mut); +} + +__inline static int _cond_timed_wait(COND_T *cond, MUTEX_T *mut, + int us) +{ + DWORD r; + ++cond->n_waiting; + MUTEX_UNLOCK(*mut); + r = WaitForSingleObject(cond->sem, us / 1000); + if (r == WAIT_FAILED) + Py_FatalError("WaitForSingleObject() failed"); + MUTEX_LOCK(*mut); + if (r == WAIT_TIMEOUT) + --cond->n_waiting; + /* Here we have a benign race condition with _cond_signal. If the + * wait operation has timed out, but before we can acquire the + * mutex again to decrement n_waiting, a thread holding the mutex + * still sees a positive n_waiting value and may call + * ReleaseSemaphore and decrement n_waiting. + * This will cause n_waiting to be decremented twice. + * This is benign, though, because ReleaseSemaphore will also have + * been called, leaving the semaphore state positive. We may + * thus end up with semaphore in state 1, and n_waiting == -1, and + * the next time someone calls _cond_wait(), that thread will + * pass right through, decrementing the semaphore state and + * incrementing n_waiting, thus correcting the extra _cond_signal. + */ + return r == WAIT_TIMEOUT; +} + +__inline static void _cond_signal(COND_T *cond) { + /* NOTE: This must be called with the mutex held */ + if (cond->n_waiting > 0) { + if (!ReleaseSemaphore(cond->sem, 1, NULL)) + Py_FatalError("ReleaseSemaphore() failed"); + --cond->n_waiting; + } +} + +#define COND_INIT(cond) \ + _cond_init(&(cond)) +#define COND_FINI(cond) \ + _cond_fini(&(cond)) +#define COND_SIGNAL(cond) \ + _cond_signal(&(cond)) +#define COND_WAIT(cond, mut) \ + _cond_wait(&(cond), &(mut)) +#define COND_TIMED_WAIT(cond, mut, us, timeout_result) do { \ + (timeout_result) = _cond_timed_wait(&(cond), &(mut), us); \ +} while (0) + +#else + +#error You need either a POSIX-compatible or a Windows system! + +#endif /* _POSIX_THREADS, NT_THREADS */ + + +/* Whether the GIL is already taken (-1 if uninitialized). This is atomic + because it can be read without any lock taken in ceval.c. */ +static _Py_atomic_int gil_locked = {-1}; +/* Number of GIL switches since the beginning. */ +static unsigned long gil_switch_number = 0; +/* Last PyThreadState holding / having held the GIL. This helps us know + whether anyone else was scheduled after we dropped the GIL. */ +static _Py_atomic_address gil_last_holder = {NULL}; + +/* This condition variable allows one or several threads to wait until + the GIL is released. In addition, the mutex also protects the above + variables. */ +static COND_T gil_cond; +static MUTEX_T gil_mutex; + +#ifdef FORCE_SWITCHING +/* This condition variable helps the GIL-releasing thread wait for + a GIL-awaiting thread to be scheduled and take the GIL. */ +static COND_T switch_cond; +static MUTEX_T switch_mutex; +#endif + + +static int gil_created(void) +{ + return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0; +} + +static void create_gil(void) +{ + MUTEX_INIT(gil_mutex); +#ifdef FORCE_SWITCHING + MUTEX_INIT(switch_mutex); +#endif + COND_INIT(gil_cond); +#ifdef FORCE_SWITCHING + COND_INIT(switch_cond); +#endif + _Py_atomic_store_relaxed(&gil_last_holder, NULL); + _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked); + _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release); +} + +static void destroy_gil(void) +{ + MUTEX_FINI(gil_mutex); +#ifdef FORCE_SWITCHING + MUTEX_FINI(switch_mutex); +#endif + COND_FINI(gil_cond); +#ifdef FORCE_SWITCHING + COND_FINI(switch_cond); +#endif + _Py_atomic_store_explicit(&gil_locked, -1, _Py_memory_order_release); + _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked); +} + +static void recreate_gil(void) +{ + _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked); + /* XXX should we destroy the old OS resources here? */ + create_gil(); +} + +static void drop_gil(PyThreadState *tstate) +{ + if (!_Py_atomic_load_relaxed(&gil_locked)) + Py_FatalError("drop_gil: GIL is not locked"); + /* tstate is allowed to be NULL (early interpreter init) */ + if (tstate != NULL) { + /* Sub-interpreter support: threads might have been switched + under our feet using PyThreadState_Swap(). Fix the GIL last + holder variable so that our heuristics work. */ + _Py_atomic_store_relaxed(&gil_last_holder, tstate); + } + + MUTEX_LOCK(gil_mutex); + _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1); + _Py_atomic_store_relaxed(&gil_locked, 0); + COND_SIGNAL(gil_cond); + MUTEX_UNLOCK(gil_mutex); + +#ifdef FORCE_SWITCHING + if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) { + MUTEX_LOCK(switch_mutex); + /* Not switched yet => wait */ + if (_Py_atomic_load_relaxed(&gil_last_holder) == tstate) { + RESET_GIL_DROP_REQUEST(); + /* NOTE: if COND_WAIT does not atomically start waiting when + releasing the mutex, another thread can run through, take + the GIL and drop it again, and reset the condition + before we even had a chance to wait for it. */ + COND_WAIT(switch_cond, switch_mutex); + } + MUTEX_UNLOCK(switch_mutex); + } +#endif +} + +static void take_gil(PyThreadState *tstate) +{ + int err; + if (tstate == NULL) + Py_FatalError("take_gil: NULL tstate"); + + err = errno; + MUTEX_LOCK(gil_mutex); + + if (!_Py_atomic_load_relaxed(&gil_locked)) + goto _ready; + + while (_Py_atomic_load_relaxed(&gil_locked)) { + int timed_out = 0; + unsigned long saved_switchnum; + + saved_switchnum = gil_switch_number; + COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out); + /* If we timed out and no switch occurred in the meantime, it is time + to ask the GIL-holding thread to drop it. */ + if (timed_out && + _Py_atomic_load_relaxed(&gil_locked) && + gil_switch_number == saved_switchnum) { + SET_GIL_DROP_REQUEST(); + } + } +_ready: +#ifdef FORCE_SWITCHING + /* This mutex must be taken before modifying gil_last_holder (see drop_gil()). */ + MUTEX_LOCK(switch_mutex); +#endif + /* We now hold the GIL */ + _Py_atomic_store_relaxed(&gil_locked, 1); + _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1); + + if (tstate != _Py_atomic_load_relaxed(&gil_last_holder)) { + _Py_atomic_store_relaxed(&gil_last_holder, tstate); + ++gil_switch_number; + } + +#ifdef FORCE_SWITCHING + COND_SIGNAL(switch_cond); + MUTEX_UNLOCK(switch_mutex); +#endif + if (_Py_atomic_load_relaxed(&gil_drop_request)) { + RESET_GIL_DROP_REQUEST(); + } + if (tstate->async_exc != NULL) { + _PyEval_SignalAsyncExc(); + } + + MUTEX_UNLOCK(gil_mutex); + errno = err; +} + +void _PyEval_SetSwitchInterval(unsigned long microseconds) +{ + gil_interval = microseconds; +} + +unsigned long _PyEval_GetSwitchInterval() +{ + return gil_interval; +} |