5 files changed, 253 insertions, 55 deletions
diff --git a/Python/ceval.c b/Python/ceval.c
index 0c14eb0..09f939e 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -216,23 +216,46 @@ PyEval_GetCallStats(PyObject *self)
 #endif
 
 
+/* This can set eval_breaker to 0 even though gil_drop_request became
+   1.  We believe this is all right because the eval loop will release
+   the GIL eventually anyway. */
 #define COMPUTE_EVAL_BREAKER() \
-	(eval_breaker = gil_drop_request | pendingcalls_to_do | pending_async_exc)
+	_Py_atomic_store_relaxed( \
+		&eval_breaker, \
+		_Py_atomic_load_relaxed(&gil_drop_request) | \
+		_Py_atomic_load_relaxed(&pendingcalls_to_do) | \
+		pending_async_exc)
 
 #define SET_GIL_DROP_REQUEST() \
-	do { gil_drop_request = 1; eval_breaker = 1; } while (0)
+	do { \
+		_Py_atomic_store_relaxed(&gil_drop_request, 1); \
+		_Py_atomic_store_relaxed(&eval_breaker, 1); \
+	} while (0)
 
 #define RESET_GIL_DROP_REQUEST() \
-	do { gil_drop_request = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+	do { \
+		_Py_atomic_store_relaxed(&gil_drop_request, 0); \
+		COMPUTE_EVAL_BREAKER(); \
+	} while (0)
 
+/* Pending calls are only modified under pending_lock */
 #define SIGNAL_PENDING_CALLS() \
-	do { pendingcalls_to_do = 1; eval_breaker = 1; } while (0)
+	do { \
+		_Py_atomic_store_relaxed(&pendingcalls_to_do, 1); \
+		_Py_atomic_store_relaxed(&eval_breaker, 1); \
+	} while (0)
 
 #define UNSIGNAL_PENDING_CALLS() \
-	do { pendingcalls_to_do = 0; COMPUTE_EVAL_BREAKER(); } while (0)
+	do { \
+		_Py_atomic_store_relaxed(&pendingcalls_to_do, 0); \
+		COMPUTE_EVAL_BREAKER(); \
+	} while (0)
 
 #define SIGNAL_ASYNC_EXC() \
-	do { pending_async_exc = 1; eval_breaker = 1; } while (0)
+	do { \
+		pending_async_exc = 1; \
+		_Py_atomic_store_relaxed(&eval_breaker, 1); \
+	} while (0)
 
 #define UNSIGNAL_ASYNC_EXC() \
 	do { pending_async_exc = 0; COMPUTE_EVAL_BREAKER(); } while (0)
@@ -249,13 +272,14 @@ static PyThread_type_lock pending_lock = 0; /* for pending calls */
 static long main_thread = 0;
 /* This single variable consolidates all requests to break out of the fast path
    in the eval loop. */
-static volatile int eval_breaker = 0;
-/* Request for droppping the GIL */
-static volatile int gil_drop_request = 0;
-/* Request for running pending calls */
-static volatile int pendingcalls_to_do = 0; 
-/* Request for looking at the `async_exc` field of the current thread state */
-static volatile int pending_async_exc = 0;
+static _Py_atomic_int eval_breaker = {0};
+/* Request for dropping the GIL */
+static _Py_atomic_int gil_drop_request = {0};
+/* Request for running pending calls. */
+static _Py_atomic_int pendingcalls_to_do = {0};
+/* Request for looking at the `async_exc` field of the current thread state.
+   Guarded by the GIL. */
+static int pending_async_exc = 0;
 
 #include "ceval_gil.h"
 
@@ -293,7 +317,8 @@ PyEval_ReleaseLock(void)
 	   We therefore avoid PyThreadState_GET() which dumps a fatal error
 	   in debug mode.
 	*/
-	drop_gil(_PyThreadState_Current);
+	drop_gil((PyThreadState*)_Py_atomic_load_relaxed(
+		&_PyThreadState_Current));
 }
 
 void
@@ -360,8 +385,8 @@ PyEval_ReInitThreads(void)
 }
 
 #else
-static int eval_breaker = 0;
-static int gil_drop_request = 0;
+static _Py_atomic_int eval_breaker = {0};
+static _Py_atomic_int gil_drop_request = {0};
 static int pending_async_exc = 0;
 #endif /* WITH_THREAD */
 
@@ -1217,7 +1242,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 		   async I/O handler); see Py_AddPendingCall() and
 		   Py_MakePendingCalls() above. */
 
-		if (eval_breaker) {
+		if (_Py_atomic_load_relaxed(&eval_breaker)) {
 			if (*next_instr == SETUP_FINALLY) {
 				/* Make the last opcode before
 				   a try: finally: block uninterruptable. */
@@ -1227,13 +1252,13 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
 #ifdef WITH_TSC
 			ticked = 1;
 #endif
-			if (pendingcalls_to_do) {
+			if (_Py_atomic_load_relaxed(&pendingcalls_to_do)) {
 				if (Py_MakePendingCalls() < 0) {
 					why = WHY_EXCEPTION;
 					goto on_error;
 				}
 			}
-			if (gil_drop_request) {
+			if (_Py_atomic_load_relaxed(&gil_drop_request)) {
 #ifdef WITH_THREAD
 				/* Give another thread a chance */
 				if (PyThreadState_Swap(NULL) != tstate)
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index d4d6fdd..a284c5d 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -207,14 +207,14 @@ do { \
 #endif /* _POSIX_THREADS, NT_THREADS */
 
 
-/* Whether the GIL is already taken (-1 if uninitialized). This is volatile
+/* Whether the GIL is already taken (-1 if uninitialized). This is atomic
    because it can be read without any lock taken in ceval.c. */
-static volatile int gil_locked = -1;
+static _Py_atomic_int gil_locked = {-1};
 /* Number of GIL switches since the beginning. */
 static unsigned long gil_switch_number = 0;
-/* Last thread holding / having held the GIL. This helps us know whether
-   anyone else was scheduled after we dropped the GIL. */
-static PyThreadState *gil_last_holder = NULL;
+/* Last PyThreadState holding / having held the GIL. This helps us know
+   whether anyone else was scheduled after we dropped the GIL. */
+static _Py_atomic_address gil_last_holder = {NULL};
 
 /* This condition variable allows one or several threads to wait until
    the GIL is released. In addition, the mutex also protects the above
@@ -232,7 +232,7 @@ static MUTEX_T switch_mutex;
 
 static int gil_created(void)
 {
-    return gil_locked >= 0;
+    return _Py_atomic_load_explicit(&gil_locked, _Py_memory_order_acquire) >= 0;
 }
 
 static void create_gil(void)
@@ -245,33 +245,37 @@ static void create_gil(void)
 #ifdef FORCE_SWITCHING
     COND_INIT(switch_cond);
 #endif
-    gil_locked = 0;
-    gil_last_holder = NULL;
+    _Py_atomic_store_relaxed(&gil_last_holder, NULL);
+    _Py_ANNOTATE_RWLOCK_CREATE(&gil_locked);
+    _Py_atomic_store_explicit(&gil_locked, 0, _Py_memory_order_release);
 }
 
 static void recreate_gil(void)
 {
+    _Py_ANNOTATE_RWLOCK_DESTROY(&gil_locked);
     create_gil();
 }
 
 static void drop_gil(PyThreadState *tstate)
 {
     /* NOTE: tstate is allowed to be NULL. */
-    if (!gil_locked)
+    if (!_Py_atomic_load_relaxed(&gil_locked))
         Py_FatalError("drop_gil: GIL is not locked");
-    if (tstate != NULL && tstate != gil_last_holder)
+    if (tstate != NULL &&
+        tstate != _Py_atomic_load_relaxed(&gil_last_holder))
         Py_FatalError("drop_gil: wrong thread state");
 
     MUTEX_LOCK(gil_mutex);
-    gil_locked = 0;
+    _Py_ANNOTATE_RWLOCK_RELEASED(&gil_locked, /*is_write=*/1);
+    _Py_atomic_store_relaxed(&gil_locked, 0);
     COND_SIGNAL(gil_cond);
     MUTEX_UNLOCK(gil_mutex);
     
 #ifdef FORCE_SWITCHING
-    if (gil_drop_request && tstate != NULL) {
+    if (_Py_atomic_load_relaxed(&gil_drop_request) && tstate != NULL) {
         MUTEX_LOCK(switch_mutex);
         /* Not switched yet => wait */
-        if (gil_last_holder == tstate) {
+        if (_Py_atomic_load_relaxed(&gil_last_holder) == tstate) {
 	    RESET_GIL_DROP_REQUEST();
             /* NOTE: if COND_WAIT does not atomically start waiting when
                releasing the mutex, another thread can run through, take
@@ -294,11 +298,11 @@ static void take_gil(PyThreadState *tstate)
     err = errno;
     MUTEX_LOCK(gil_mutex);
 
-    if (!gil_locked)
+    if (!_Py_atomic_load_relaxed(&gil_locked))
         goto _ready;
     
     COND_RESET(gil_cond);
-    while (gil_locked) {
+    while (_Py_atomic_load_relaxed(&gil_locked)) {
         int timed_out = 0;
         unsigned long saved_switchnum;
 
@@ -306,7 +310,9 @@ static void take_gil(PyThreadState *tstate)
         COND_TIMED_WAIT(gil_cond, gil_mutex, INTERVAL, timed_out);
         /* If we timed out and no switch occurred in the meantime, it is time
            to ask the GIL-holding thread to drop it. */
-        if (timed_out && gil_locked && gil_switch_number == saved_switchnum) {
+        if (timed_out &&
+            _Py_atomic_load_relaxed(&gil_locked) &&
+            gil_switch_number == saved_switchnum) {
             SET_GIL_DROP_REQUEST();
         }
     }
@@ -316,17 +322,19 @@ _ready:
     MUTEX_LOCK(switch_mutex);
 #endif
     /* We now hold the GIL */
-    gil_locked = 1;
+    _Py_atomic_store_relaxed(&gil_locked, 1);
+    _Py_ANNOTATE_RWLOCK_ACQUIRED(&gil_locked, /*is_write=*/1);
 
-    if (tstate != gil_last_holder) {
-        gil_last_holder = tstate;
+    if (tstate != _Py_atomic_load_relaxed(&gil_last_holder)) {
+        _Py_atomic_store_relaxed(&gil_last_holder, tstate);
         ++gil_switch_number;
     }
+
 #ifdef FORCE_SWITCHING
     COND_SIGNAL(switch_cond);
     MUTEX_UNLOCK(switch_mutex);
 #endif
-    if (gil_drop_request) {
+    if (_Py_atomic_load_relaxed(&gil_drop_request)) {
         RESET_GIL_DROP_REQUEST();
     }
     if (tstate->async_exc != NULL) {
diff --git a/Python/dynamic_annotations.c b/Python/dynamic_annotations.c
new file mode 100644
index 0000000..10511da
--- /dev/null
+++ b/Python/dynamic_annotations.c
@@ -0,0 +1,154 @@
+/* Copyright (c) 2008-2009, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * ---
+ * Author: Kostya Serebryany
+ */
+
+#ifdef _MSC_VER
+# include <windows.h>
+#endif
+
+#ifdef __cplusplus
+# error "This file should be built as pure C to avoid name mangling"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "dynamic_annotations.h"
+
+/* Each function is empty and called (via a macro) only in debug mode.
+   The arguments are captured by dynamic tools at runtime. */
+
+#if DYNAMIC_ANNOTATIONS_ENABLED == 1
+
+void AnnotateRWLockCreate(const char *file, int line,
+                          const volatile void *lock){}
+void AnnotateRWLockDestroy(const char *file, int line,
+                           const volatile void *lock){}
+void AnnotateRWLockAcquired(const char *file, int line,
+                            const volatile void *lock, long is_w){}
+void AnnotateRWLockReleased(const char *file, int line,
+                            const volatile void *lock, long is_w){}
+void AnnotateBarrierInit(const char *file, int line,
+                         const volatile void *barrier, long count,
+                         long reinitialization_allowed) {}
+void AnnotateBarrierWaitBefore(const char *file, int line,
+                               const volatile void *barrier) {}
+void AnnotateBarrierWaitAfter(const char *file, int line,
+                              const volatile void *barrier) {}
+void AnnotateBarrierDestroy(const char *file, int line,
+                            const volatile void *barrier) {}
+
+void AnnotateCondVarWait(const char *file, int line,
+                         const volatile void *cv,
+                         const volatile void *lock){}
+void AnnotateCondVarSignal(const char *file, int line,
+                           const volatile void *cv){}
+void AnnotateCondVarSignalAll(const char *file, int line,
+                              const volatile void *cv){}
+void AnnotatePublishMemoryRange(const char *file, int line,
+                                const volatile void *address,
+                                long size){}
+void AnnotateUnpublishMemoryRange(const char *file, int line,
+                                  const volatile void *address,
+                                  long size){}
+void AnnotatePCQCreate(const char *file, int line,
+                       const volatile void *pcq){}
+void AnnotatePCQDestroy(const char *file, int line,
+                        const volatile void *pcq){}
+void AnnotatePCQPut(const char *file, int line,
+                    const volatile void *pcq){}
+void AnnotatePCQGet(const char *file, int line,
+                    const volatile void *pcq){}
+void AnnotateNewMemory(const char *file, int line,
+                       const volatile void *mem,
+                       long size){}
+void AnnotateExpectRace(const char *file, int line,
+                        const volatile void *mem,
+                        const char *description){}
+void AnnotateBenignRace(const char *file, int line,
+                        const volatile void *mem,
+                        const char *description){}
+void AnnotateBenignRaceSized(const char *file, int line,
+                             const volatile void *mem,
+                             long size,
+                             const char *description) {}
+void AnnotateMutexIsUsedAsCondVar(const char *file, int line,
+                                  const volatile void *mu){}
+void AnnotateTraceMemory(const char *file, int line,
+                         const volatile void *arg){}
+void AnnotateThreadName(const char *file, int line,
+                        const char *name){}
+void AnnotateIgnoreReadsBegin(const char *file, int line){}
+void AnnotateIgnoreReadsEnd(const char *file, int line){}
+void AnnotateIgnoreWritesBegin(const char *file, int line){}
+void AnnotateIgnoreWritesEnd(const char *file, int line){}
+void AnnotateIgnoreSyncBegin(const char *file, int line){}
+void AnnotateIgnoreSyncEnd(const char *file, int line){}
+void AnnotateEnableRaceDetection(const char *file, int line, int enable){}
+void AnnotateNoOp(const char *file, int line,
+                  const volatile void *arg){}
+void AnnotateFlushState(const char *file, int line){}
+
+static int GetRunningOnValgrind(void) {
+#ifdef RUNNING_ON_VALGRIND
+  if (RUNNING_ON_VALGRIND) return 1;
+#endif
+
+#ifndef _MSC_VER
+  char *running_on_valgrind_str = getenv("RUNNING_ON_VALGRIND");
+  if (running_on_valgrind_str) {
+    return strcmp(running_on_valgrind_str, "0") != 0;
+  }
+#else
+  /* Visual Studio issues warnings if we use getenv,
+   * so we use GetEnvironmentVariableA instead.
+   */
+  char value[100] = "1";
+  int res = GetEnvironmentVariableA("RUNNING_ON_VALGRIND",
+                                    value, sizeof(value));
+  /* value will remain "1" if res == 0 or res >= sizeof(value). The latter
+   * can happen only if the given value is long, in this case it can't be "0".
+   */
+  if (res > 0 && !strcmp(value, "0"))
+    return 1;
+#endif
+  return 0;
+}
+
+/* See the comments in dynamic_annotations.h */
+int RunningOnValgrind(void) {
+  static volatile int running_on_valgrind = -1;
+  /* C doesn't have thread-safe initialization of statics, and we
+     don't want to depend on pthread_once here, so hack it. */
+  int local_running_on_valgrind = running_on_valgrind;
+  if (local_running_on_valgrind == -1)
+    running_on_valgrind = local_running_on_valgrind = GetRunningOnValgrind();
+  return local_running_on_valgrind;
+}
+
+#endif  /* DYNAMIC_ANNOTATIONS_ENABLED == 1 */
diff --git a/Python/pystate.c b/Python/pystate.c
index eb2dfa6..7154aea 100644
--- a/Python/pystate.c
+++ b/Python/pystate.c
@@ -47,7 +47,9 @@ static int autoTLSkey = 0;
 
 static PyInterpreterState *interp_head = NULL;
 
-PyThreadState *_PyThreadState_Current = NULL;
+/* Assuming the current thread holds the GIL, this is the
+   PyThreadState for the current thread. */
+_Py_atomic_address _PyThreadState_Current = {NULL};
 PyThreadFrameGetter _PyThreadState_GetFrame = NULL;
 
 #ifdef WITH_THREAD
@@ -334,7 +336,7 @@ tstate_delete_common(PyThreadState *tstate)
 void
 PyThreadState_Delete(PyThreadState *tstate)
 {
-	if (tstate == _PyThreadState_Current)
+	if (tstate == _Py_atomic_load_relaxed(&_PyThreadState_Current))
 		Py_FatalError("PyThreadState_Delete: tstate is still current");
 	tstate_delete_common(tstate);
 #ifdef WITH_THREAD
@@ -348,11 +350,12 @@ PyThreadState_Delete(PyThreadState *tstate)
 void
 PyThreadState_DeleteCurrent()
 {
-	PyThreadState *tstate = _PyThreadState_Current;
+	PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed(
+		&_PyThreadState_Current);
 	if (tstate == NULL)
 		Py_FatalError(
 			"PyThreadState_DeleteCurrent: no current tstate");
-	_PyThreadState_Current = NULL;
+	_Py_atomic_store_relaxed(&_PyThreadState_Current, NULL);
 	tstate_delete_common(tstate);
 	if (autoTLSkey && PyThread_get_key_value(autoTLSkey) == tstate)
 		PyThread_delete_key_value(autoTLSkey);
@@ -364,19 +367,22 @@ PyThreadState_DeleteCurrent()
 PyThreadState *
 PyThreadState_Get(void)
 {
-	if (_PyThreadState_Current == NULL)
+	PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed(
+		&_PyThreadState_Current);
+	if (tstate == NULL)
 		Py_FatalError("PyThreadState_Get: no current thread");
 
-	return _PyThreadState_Current;
+	return tstate;
 }
 
 
 PyThreadState *
 PyThreadState_Swap(PyThreadState *newts)
 {
-	PyThreadState *oldts = _PyThreadState_Current;
+	PyThreadState *oldts = (PyThreadState*)_Py_atomic_load_relaxed(
+		&_PyThreadState_Current);
 
-	_PyThreadState_Current = newts;
+	_Py_atomic_store_relaxed(&_PyThreadState_Current, newts);
 	/* It should not be possible for more than one thread state
 	   to be used for a thread.  Check this the best we can in debug
 	   builds.
@@ -405,16 +411,18 @@ PyThreadState_Swap(PyThreadState *newts)
 PyObject *
 PyThreadState_GetDict(void)
 {
-	if (_PyThreadState_Current == NULL)
+	PyThreadState *tstate = (PyThreadState*)_Py_atomic_load_relaxed(
+		&_PyThreadState_Current);
+	if (tstate == NULL)
 		return NULL;
 
-	if (_PyThreadState_Current->dict == NULL) {
+	if (tstate->dict == NULL) {
 		PyObject *d;
-		_PyThreadState_Current->dict = d = PyDict_New();
+		tstate->dict = d = PyDict_New();
 		if (d == NULL)
 			PyErr_Clear();
 	}
-	return _PyThreadState_Current->dict;
+	return tstate->dict;
 }
 
 
@@ -550,10 +558,7 @@ PyThreadState_IsCurrent(PyThreadState *tstate)
 {
 	/* Must be the tstate for this thread */
 	assert(PyGILState_GetThisThreadState()==tstate);
-	/* On Windows at least, simple reads and writes to 32 bit values
-	   are atomic.
-	*/
-	return tstate == _PyThreadState_Current;
+	return tstate == _Py_atomic_load_relaxed(&_PyThreadState_Current);
 }
 
 /* Internal initialization/finalization functions called by
diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h
index 6088c71..f60f36d 100644
--- a/Python/thread_pthread.h
+++ b/Python/thread_pthread.h
@@ -397,6 +397,12 @@ PyThread_allocate_lock(void)
 		status = pthread_mutex_init(&lock->mut,
 					    pthread_mutexattr_default);
 		CHECK_STATUS("pthread_mutex_init");
+                /* Mark the pthread mutex underlying a Python mutex as
+                   pure happens-before.  We can't simply mark the
+                   Python-level mutex as a mutex because it can be
+                   acquired and released in different threads, which
+                   will cause errors. */
+		_Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(&lock->mut);
 
 		status = pthread_cond_init(&lock->lock_released,
 					   pthread_condattr_default);