From 31368a4f0e531c19affe2a1becd25fc316bc7501 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@redhat.com>
Date: Tue, 30 Oct 2018 15:14:25 +0100
Subject: bpo-35081: Move Include/pyatomic.c to Include/internal/ (GH-10239)

Add pyatomic.h to the VS project (it wasn't referenced).
---
 Include/Python.h                   |   2 -
 Include/internal/ceval.h           |   2 +-
 Include/internal/gil.h             |   6 +-
 Include/internal/pyatomic.h        | 544 +++++++++++++++++++++++++++++++++++++
 Include/internal/pystate.h         |   1 -
 Include/pyatomic.h                 | 535 ------------------------------------
 Makefile.pre.in                    |   2 +-
 Modules/signalmodule.c             |   2 +
 PCbuild/pythoncore.vcxproj         |   1 +
 PCbuild/pythoncore.vcxproj.filters |   3 +
 Python/ceval_gil.h                 |   1 +
 11 files changed, 556 insertions(+), 543 deletions(-)
 create mode 100644 Include/internal/pyatomic.h
 delete mode 100644 Include/pyatomic.h

diff --git a/Include/Python.h b/Include/Python.h
index 80200fe..cf87a5c 100644
--- a/Include/Python.h
+++ b/Include/Python.h
@@ -53,8 +53,6 @@
 #include "pyport.h"
 #include "pymacro.h"
 
-#include "pyatomic.h"
-
 /* Debug-mode build with pymalloc implies PYMALLOC_DEBUG.
  *  PYMALLOC_DEBUG is in error if pymalloc is not in use.
  */
diff --git a/Include/internal/ceval.h b/Include/internal/ceval.h
index cdabb95..4297b5a 100644
--- a/Include/internal/ceval.h
+++ b/Include/internal/ceval.h
@@ -4,7 +4,7 @@
 extern "C" {
 #endif
 
-#include "pyatomic.h"
+#include "internal/pyatomic.h"
 #include "pythread.h"
 
 struct _pending_calls {
diff --git a/Include/internal/gil.h b/Include/internal/gil.h
index 6139bd2..7743b3f 100644
--- a/Include/internal/gil.h
+++ b/Include/internal/gil.h
@@ -4,11 +4,11 @@
 extern "C" {
 #endif
 
-#include "pyatomic.h"
-
 #include "internal/condvar.h"
+#include "internal/pyatomic.h"
+
 #ifndef Py_HAVE_CONDVAR
-#error You need either a POSIX-compatible or a Windows system!
+#  error You need either a POSIX-compatible or a Windows system!
 #endif
 
 /* Enable if you want to force the switching of threads at least
diff --git a/Include/internal/pyatomic.h b/Include/internal/pyatomic.h
new file mode 100644
index 0000000..5f349cc
--- /dev/null
+++ b/Include/internal/pyatomic.h
@@ -0,0 +1,544 @@
+#ifndef Py_ATOMIC_H
+#define Py_ATOMIC_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef Py_BUILD_CORE
+#  error "Py_BUILD_CORE must be defined to include this header"
+#endif
+
+#include "dynamic_annotations.h"
+
+#include "pyconfig.h"
+
+#if defined(HAVE_STD_ATOMIC)
+#include <stdatomic.h>
+#endif
+
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#include <immintrin.h>
+#endif
+
+/* This is modeled after the atomics interface from C1x, according to
+ * the draft at
+ * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
+ * Operations and types are named the same except with a _Py_ prefix
+ * and have the same semantics.
+ *
+ * Beware, the implementations here are deep magic.
+ */
+
+#if defined(HAVE_STD_ATOMIC)
+
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed = memory_order_relaxed,
+    _Py_memory_order_acquire = memory_order_acquire,
+    _Py_memory_order_release = memory_order_release,
+    _Py_memory_order_acq_rel = memory_order_acq_rel,
+    _Py_memory_order_seq_cst = memory_order_seq_cst
+} _Py_memory_order;
+
+typedef struct _Py_atomic_address {
+    atomic_uintptr_t _value;
+} _Py_atomic_address;
+
+typedef struct _Py_atomic_int {
+    atomic_int _value;
+} _Py_atomic_int;
+
+#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
+    atomic_signal_fence(ORDER)
+
+#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
+    atomic_thread_fence(ORDER)
+
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
+
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
+    atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
+
+/* Use builtin atomic operations in GCC >= 4.7 */
+#elif defined(HAVE_BUILTIN_ATOMIC)
+
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed = __ATOMIC_RELAXED,
+    _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
+    _Py_memory_order_release = __ATOMIC_RELEASE,
+    _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
+    _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
+} _Py_memory_order;
+
+typedef struct _Py_atomic_address {
+    uintptr_t _value;
+} _Py_atomic_address;
+
+typedef struct _Py_atomic_int {
+    int _value;
+} _Py_atomic_int;
+
+#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
+    __atomic_signal_fence(ORDER)
+
+#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
+    __atomic_thread_fence(ORDER)
+
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    (assert((ORDER) == __ATOMIC_RELAXED                       \
+            || (ORDER) == __ATOMIC_SEQ_CST                    \
+            || (ORDER) == __ATOMIC_RELEASE),                  \
+     __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
+
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
+    (assert((ORDER) == __ATOMIC_RELAXED                       \
+            || (ORDER) == __ATOMIC_SEQ_CST                    \
+            || (ORDER) == __ATOMIC_ACQUIRE                    \
+            || (ORDER) == __ATOMIC_CONSUME),                  \
+     __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
+
+/* Only support GCC (for expression statements) and x86 (for simple
+ * atomic semantics) and MSVC x86/x64/ARM */
+#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed,
+    _Py_memory_order_acquire,
+    _Py_memory_order_release,
+    _Py_memory_order_acq_rel,
+    _Py_memory_order_seq_cst
+} _Py_memory_order;
+
+typedef struct _Py_atomic_address {
+    uintptr_t _value;
+} _Py_atomic_address;
+
+typedef struct _Py_atomic_int {
+    int _value;
+} _Py_atomic_int;
+
+
+static __inline__ void
+_Py_atomic_signal_fence(_Py_memory_order order)
+{
+    if (order != _Py_memory_order_relaxed)
+        __asm__ volatile("":::"memory");
+}
+
+static __inline__ void
+_Py_atomic_thread_fence(_Py_memory_order order)
+{
+    if (order != _Py_memory_order_relaxed)
+        __asm__ volatile("mfence":::"memory");
+}
+
+/* Tell the race checker about this operation's effects. */
+static __inline__ void
+_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
+{
+    (void)address;              /* shut up -Wunused-parameter */
+    switch(order) {
+    case _Py_memory_order_release:
+    case _Py_memory_order_acq_rel:
+    case _Py_memory_order_seq_cst:
+        _Py_ANNOTATE_HAPPENS_BEFORE(address);
+        break;
+    case _Py_memory_order_relaxed:
+    case _Py_memory_order_acquire:
+        break;
+    }
+    switch(order) {
+    case _Py_memory_order_acquire:
+    case _Py_memory_order_acq_rel:
+    case _Py_memory_order_seq_cst:
+        _Py_ANNOTATE_HAPPENS_AFTER(address);
+        break;
+    case _Py_memory_order_relaxed:
+    case _Py_memory_order_release:
+        break;
+    }
+}
+
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    __extension__ ({ \
+        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
+        __typeof__(atomic_val->_value) new_val = NEW_VAL;\
+        volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
+        _Py_memory_order order = ORDER; \
+        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
+        \
+        /* Perform the operation. */ \
+        _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
+        switch(order) { \
+        case _Py_memory_order_release: \
+            _Py_atomic_signal_fence(_Py_memory_order_release); \
+            /* fallthrough */ \
+        case _Py_memory_order_relaxed: \
+            *volatile_data = new_val; \
+            break; \
+        \
+        case _Py_memory_order_acquire: \
+        case _Py_memory_order_acq_rel: \
+        case _Py_memory_order_seq_cst: \
+            __asm__ volatile("xchg %0, %1" \
+                         : "+r"(new_val) \
+                         : "m"(atomic_val->_value) \
+                         : "memory"); \
+            break; \
+        } \
+        _Py_ANNOTATE_IGNORE_WRITES_END(); \
+    })
+
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
+    __extension__ ({  \
+        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
+        __typeof__(atomic_val->_value) result; \
+        volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
+        _Py_memory_order order = ORDER; \
+        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
+        \
+        /* Perform the operation. */ \
+        _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
+        switch(order) { \
+        case _Py_memory_order_release: \
+        case _Py_memory_order_acq_rel: \
+        case _Py_memory_order_seq_cst: \
+            /* Loads on x86 are not releases by default, so need a */ \
+            /* thread fence. */ \
+            _Py_atomic_thread_fence(_Py_memory_order_release); \
+            break; \
+        default: \
+            /* No fence */ \
+            break; \
+        } \
+        result = *volatile_data; \
+        switch(order) { \
+        case _Py_memory_order_acquire: \
+        case _Py_memory_order_acq_rel: \
+        case _Py_memory_order_seq_cst: \
+            /* Loads on x86 are automatically acquire operations so */ \
+            /* can get by with just a compiler fence. */ \
+            _Py_atomic_signal_fence(_Py_memory_order_acquire); \
+            break; \
+        default: \
+            /* No fence */ \
+            break; \
+        } \
+        _Py_ANNOTATE_IGNORE_READS_END(); \
+        result; \
+    })
+
+#elif defined(_MSC_VER)
+/*  _Interlocked* functions provide a full memory barrier and are therefore
+    enough for acq_rel and seq_cst. If the HLE variants aren't available
+    in hardware they will fall back to a full memory barrier as well.
+
+    This might affect performance but likely only in some very specific and
+    hard to meassure scenario.
+*/
+#if defined(_M_IX86) || defined(_M_X64)
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed,
+    _Py_memory_order_acquire,
+    _Py_memory_order_release,
+    _Py_memory_order_acq_rel,
+    _Py_memory_order_seq_cst
+} _Py_memory_order;
+
+typedef struct _Py_atomic_address {
+    volatile uintptr_t _value;
+} _Py_atomic_address;
+
+typedef struct _Py_atomic_int {
+    volatile int _value;
+} _Py_atomic_int;
+
+
+#if defined(_M_X64)
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    switch (ORDER) { \
+    case _Py_memory_order_acquire: \
+      _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    case _Py_memory_order_release: \
+      _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    default: \
+      _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+  }
+#else
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
+#endif
+
+#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  switch (ORDER) { \
+  case _Py_memory_order_acquire: \
+    _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  case _Py_memory_order_release: \
+    _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  default: \
+    _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  }
+
+#if defined(_M_X64)
+/*  This has to be an intptr_t for now.
+    gil_created() uses -1 as a sentinel value, if this returns
+    a uintptr_t it will do an unsigned compare and crash
+*/
+inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
+    __int64 old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
+      break;
+    }
+    }
+    return old;
+}
+
+#else
+#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
+#endif
+
+inline int _Py_atomic_load_32bit(volatile int* value, int order) {
+    long old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
+      break;
+    }
+    }
+    return old;
+}
+
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  if (sizeof(*ATOMIC_VAL._value) == 8) { \
+    _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
+    _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
+
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
+  ( \
+    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
+    _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
+    _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
+  )
+#elif defined(_M_ARM) || defined(_M_ARM64)
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed,
+    _Py_memory_order_acquire,
+    _Py_memory_order_release,
+    _Py_memory_order_acq_rel,
+    _Py_memory_order_seq_cst
+} _Py_memory_order;
+
+typedef struct _Py_atomic_address {
+    volatile uintptr_t _value;
+} _Py_atomic_address;
+
+typedef struct _Py_atomic_int {
+    volatile int _value;
+} _Py_atomic_int;
+
+
+#if defined(_M_ARM64)
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    switch (ORDER) { \
+    case _Py_memory_order_acquire: \
+      _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    case _Py_memory_order_release: \
+      _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+    default: \
+      _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
+      break; \
+  }
+#else
+#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
+#endif
+
+#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  switch (ORDER) { \
+  case _Py_memory_order_acquire: \
+    _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  case _Py_memory_order_release: \
+    _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  default: \
+    _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
+    break; \
+  }
+
+#if defined(_M_ARM64)
+/*  This has to be an intptr_t for now.
+    gil_created() uses -1 as a sentinel value, if this returns
+    a uintptr_t it will do an unsigned compare and crash
+*/
+inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
+    uintptr_t old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange64(value, old, old) != old);
+      break;
+    }
+    }
+    return old;
+}
+
+#else
+#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
+#endif
+
+inline int _Py_atomic_load_32bit(volatile int* value, int order) {
+    int old;
+    switch (order) {
+    case _Py_memory_order_acquire:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_acq(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_release:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange_rel(value, old, old) != old);
+      break;
+    }
+    case _Py_memory_order_relaxed:
+      old = *value;
+      break;
+    default:
+    {
+      do {
+        old = *value;
+      } while(_InterlockedCompareExchange(value, old, old) != old);
+      break;
+    }
+    }
+    return old;
+}
+
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+  if (sizeof(*ATOMIC_VAL._value) == 8) { \
+    _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
+    _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
+
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
+  ( \
+    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
+    _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
+    _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
+  )
+#endif
+#else  /* !gcc x86  !_msc_ver */
+typedef enum _Py_memory_order {
+    _Py_memory_order_relaxed,
+    _Py_memory_order_acquire,
+    _Py_memory_order_release,
+    _Py_memory_order_acq_rel,
+    _Py_memory_order_seq_cst
+} _Py_memory_order;
+
+typedef struct _Py_atomic_address {
+    uintptr_t _value;
+} _Py_atomic_address;
+
+typedef struct _Py_atomic_int {
+    int _value;
+} _Py_atomic_int;
+/* Fall back to other compilers and processors by assuming that simple
+   volatile accesses are atomic.  This is false, so people should port
+   this. */
+#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
+#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
+#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
+    ((ATOMIC_VAL)->_value = NEW_VAL)
+#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
+    ((ATOMIC_VAL)->_value)
+#endif
+
+/* Standardized shortcuts. */
+#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
+    _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst)
+#define _Py_atomic_load(ATOMIC_VAL) \
+    _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst)
+
+/* Python-local extensions */
+
+#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
+    _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
+#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
+    _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* Py_ATOMIC_H */
diff --git a/Include/internal/pystate.h b/Include/internal/pystate.h
index c93dda2..38845d3 100644
--- a/Include/internal/pystate.h
+++ b/Include/internal/pystate.h
@@ -5,7 +5,6 @@ extern "C" {
 #endif
 
 #include "pystate.h"
-#include "pyatomic.h"
 #include "pythread.h"
 
 #include "internal/mem.h"
diff --git a/Include/pyatomic.h b/Include/pyatomic.h
deleted file mode 100644
index 9a497a6..0000000
--- a/Include/pyatomic.h
+++ /dev/null
@@ -1,535 +0,0 @@
-#ifndef Py_ATOMIC_H
-#define Py_ATOMIC_H
-#ifdef Py_BUILD_CORE
-
-#include "dynamic_annotations.h"
-
-#include "pyconfig.h"
-
-#if defined(HAVE_STD_ATOMIC)
-#include <stdatomic.h>
-#endif
-
-
-#if defined(_MSC_VER)
-#include <intrin.h>
-#include <immintrin.h>
-#endif
-
-/* This is modeled after the atomics interface from C1x, according to
- * the draft at
- * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
- * Operations and types are named the same except with a _Py_ prefix
- * and have the same semantics.
- *
- * Beware, the implementations here are deep magic.
- */
-
-#if defined(HAVE_STD_ATOMIC)
-
-typedef enum _Py_memory_order {
-    _Py_memory_order_relaxed = memory_order_relaxed,
-    _Py_memory_order_acquire = memory_order_acquire,
-    _Py_memory_order_release = memory_order_release,
-    _Py_memory_order_acq_rel = memory_order_acq_rel,
-    _Py_memory_order_seq_cst = memory_order_seq_cst
-} _Py_memory_order;
-
-typedef struct _Py_atomic_address {
-    atomic_uintptr_t _value;
-} _Py_atomic_address;
-
-typedef struct _Py_atomic_int {
-    atomic_int _value;
-} _Py_atomic_int;
-
-#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
-    atomic_signal_fence(ORDER)
-
-#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
-    atomic_thread_fence(ORDER)
-
-#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-    atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
-
-#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
-    atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
-
-/* Use builtin atomic operations in GCC >= 4.7 */
-#elif defined(HAVE_BUILTIN_ATOMIC)
-
-typedef enum _Py_memory_order {
-    _Py_memory_order_relaxed = __ATOMIC_RELAXED,
-    _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
-    _Py_memory_order_release = __ATOMIC_RELEASE,
-    _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
-    _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
-} _Py_memory_order;
-
-typedef struct _Py_atomic_address {
-    uintptr_t _value;
-} _Py_atomic_address;
-
-typedef struct _Py_atomic_int {
-    int _value;
-} _Py_atomic_int;
-
-#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
-    __atomic_signal_fence(ORDER)
-
-#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
-    __atomic_thread_fence(ORDER)
-
-#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-    (assert((ORDER) == __ATOMIC_RELAXED                       \
-            || (ORDER) == __ATOMIC_SEQ_CST                    \
-            || (ORDER) == __ATOMIC_RELEASE),                  \
-     __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
-
-#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
-    (assert((ORDER) == __ATOMIC_RELAXED                       \
-            || (ORDER) == __ATOMIC_SEQ_CST                    \
-            || (ORDER) == __ATOMIC_ACQUIRE                    \
-            || (ORDER) == __ATOMIC_CONSUME),                  \
-     __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
-
-/* Only support GCC (for expression statements) and x86 (for simple
- * atomic semantics) and MSVC x86/x64/ARM */
-#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
-typedef enum _Py_memory_order {
-    _Py_memory_order_relaxed,
-    _Py_memory_order_acquire,
-    _Py_memory_order_release,
-    _Py_memory_order_acq_rel,
-    _Py_memory_order_seq_cst
-} _Py_memory_order;
-
-typedef struct _Py_atomic_address {
-    uintptr_t _value;
-} _Py_atomic_address;
-
-typedef struct _Py_atomic_int {
-    int _value;
-} _Py_atomic_int;
-
-
-static __inline__ void
-_Py_atomic_signal_fence(_Py_memory_order order)
-{
-    if (order != _Py_memory_order_relaxed)
-        __asm__ volatile("":::"memory");
-}
-
-static __inline__ void
-_Py_atomic_thread_fence(_Py_memory_order order)
-{
-    if (order != _Py_memory_order_relaxed)
-        __asm__ volatile("mfence":::"memory");
-}
-
-/* Tell the race checker about this operation's effects. */
-static __inline__ void
-_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
-{
-    (void)address;              /* shut up -Wunused-parameter */
-    switch(order) {
-    case _Py_memory_order_release:
-    case _Py_memory_order_acq_rel:
-    case _Py_memory_order_seq_cst:
-        _Py_ANNOTATE_HAPPENS_BEFORE(address);
-        break;
-    case _Py_memory_order_relaxed:
-    case _Py_memory_order_acquire:
-        break;
-    }
-    switch(order) {
-    case _Py_memory_order_acquire:
-    case _Py_memory_order_acq_rel:
-    case _Py_memory_order_seq_cst:
-        _Py_ANNOTATE_HAPPENS_AFTER(address);
-        break;
-    case _Py_memory_order_relaxed:
-    case _Py_memory_order_release:
-        break;
-    }
-}
-
-#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-    __extension__ ({ \
-        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
-        __typeof__(atomic_val->_value) new_val = NEW_VAL;\
-        volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
-        _Py_memory_order order = ORDER; \
-        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
-        \
-        /* Perform the operation. */ \
-        _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
-        switch(order) { \
-        case _Py_memory_order_release: \
-            _Py_atomic_signal_fence(_Py_memory_order_release); \
-            /* fallthrough */ \
-        case _Py_memory_order_relaxed: \
-            *volatile_data = new_val; \
-            break; \
-        \
-        case _Py_memory_order_acquire: \
-        case _Py_memory_order_acq_rel: \
-        case _Py_memory_order_seq_cst: \
-            __asm__ volatile("xchg %0, %1" \
-                         : "+r"(new_val) \
-                         : "m"(atomic_val->_value) \
-                         : "memory"); \
-            break; \
-        } \
-        _Py_ANNOTATE_IGNORE_WRITES_END(); \
-    })
-
-#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
-    __extension__ ({  \
-        __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
-        __typeof__(atomic_val->_value) result; \
-        volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
-        _Py_memory_order order = ORDER; \
-        _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
-        \
-        /* Perform the operation. */ \
-        _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
-        switch(order) { \
-        case _Py_memory_order_release: \
-        case _Py_memory_order_acq_rel: \
-        case _Py_memory_order_seq_cst: \
-            /* Loads on x86 are not releases by default, so need a */ \
-            /* thread fence. */ \
-            _Py_atomic_thread_fence(_Py_memory_order_release); \
-            break; \
-        default: \
-            /* No fence */ \
-            break; \
-        } \
-        result = *volatile_data; \
-        switch(order) { \
-        case _Py_memory_order_acquire: \
-        case _Py_memory_order_acq_rel: \
-        case _Py_memory_order_seq_cst: \
-            /* Loads on x86 are automatically acquire operations so */ \
-            /* can get by with just a compiler fence. */ \
-            _Py_atomic_signal_fence(_Py_memory_order_acquire); \
-            break; \
-        default: \
-            /* No fence */ \
-            break; \
-        } \
-        _Py_ANNOTATE_IGNORE_READS_END(); \
-        result; \
-    })
-
-#elif defined(_MSC_VER)
-/*  _Interlocked* functions provide a full memory barrier and are therefore
-    enough for acq_rel and seq_cst. If the HLE variants aren't available
-    in hardware they will fall back to a full memory barrier as well.
-
-    This might affect performance but likely only in some very specific and
-    hard to meassure scenario.
-*/
-#if defined(_M_IX86) || defined(_M_X64)
-typedef enum _Py_memory_order {
-    _Py_memory_order_relaxed,
-    _Py_memory_order_acquire,
-    _Py_memory_order_release,
-    _Py_memory_order_acq_rel,
-    _Py_memory_order_seq_cst
-} _Py_memory_order;
-
-typedef struct _Py_atomic_address {
-    volatile uintptr_t _value;
-} _Py_atomic_address;
-
-typedef struct _Py_atomic_int {
-    volatile int _value;
-} _Py_atomic_int;
-
-
-#if defined(_M_X64)
-#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
-    switch (ORDER) { \
-    case _Py_memory_order_acquire: \
-      _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
-      break; \
-    case _Py_memory_order_release: \
-      _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
-      break; \
-    default: \
-      _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
-      break; \
-  }
-#else
-#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
-#endif
-
-#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
-  switch (ORDER) { \
-  case _Py_memory_order_acquire: \
-    _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
-    break; \
-  case _Py_memory_order_release: \
-    _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
-    break; \
-  default: \
-    _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
-    break; \
-  }
-
-#if defined(_M_X64)
-/*  This has to be an intptr_t for now.
-    gil_created() uses -1 as a sentinel value, if this returns
-    a uintptr_t it will do an unsigned compare and crash
-*/
-inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
-    __int64 old;
-    switch (order) {
-    case _Py_memory_order_acquire:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_release:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_relaxed:
-      old = *value;
-      break;
-    default:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
-      break;
-    }
-    }
-    return old;
-}
-
-#else
-#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
-#endif
-
-inline int _Py_atomic_load_32bit(volatile int* value, int order) {
-    long old;
-    switch (order) {
-    case _Py_memory_order_acquire:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_release:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_relaxed:
-      old = *value;
-      break;
-    default:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
-      break;
-    }
-    }
-    return old;
-}
-
-#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-  if (sizeof(*ATOMIC_VAL._value) == 8) { \
-    _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
-    _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
-
-#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
-  ( \
-    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
-    _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
-    _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
-  )
-#elif defined(_M_ARM) || defined(_M_ARM64)
-typedef enum _Py_memory_order {
-    _Py_memory_order_relaxed,
-    _Py_memory_order_acquire,
-    _Py_memory_order_release,
-    _Py_memory_order_acq_rel,
-    _Py_memory_order_seq_cst
-} _Py_memory_order;
-
-typedef struct _Py_atomic_address {
-    volatile uintptr_t _value;
-} _Py_atomic_address;
-
-typedef struct _Py_atomic_int {
-    volatile int _value;
-} _Py_atomic_int;
-
-
-#if defined(_M_ARM64)
-#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
-    switch (ORDER) { \
-    case _Py_memory_order_acquire: \
-      _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
-      break; \
-    case _Py_memory_order_release: \
-      _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
-      break; \
-    default: \
-      _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
-      break; \
-  }
-#else
-#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
-#endif
-
-#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
-  switch (ORDER) { \
-  case _Py_memory_order_acquire: \
-    _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
-    break; \
-  case _Py_memory_order_release: \
-    _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
-    break; \
-  default: \
-    _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
-    break; \
-  }
-
-#if defined(_M_ARM64)
-/*  This has to be an intptr_t for now.
-    gil_created() uses -1 as a sentinel value, if this returns
-    a uintptr_t it will do an unsigned compare and crash
-*/
-inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
-    uintptr_t old;
-    switch (order) {
-    case _Py_memory_order_acquire:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_release:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_relaxed:
-      old = *value;
-      break;
-    default:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange64(value, old, old) != old);
-      break;
-    }
-    }
-    return old;
-}
-
-#else
-#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
-#endif
-
-inline int _Py_atomic_load_32bit(volatile int* value, int order) {
-    int old;
-    switch (order) {
-    case _Py_memory_order_acquire:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange_acq(value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_release:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange_rel(value, old, old) != old);
-      break;
-    }
-    case _Py_memory_order_relaxed:
-      old = *value;
-      break;
-    default:
-    {
-      do {
-        old = *value;
-      } while(_InterlockedCompareExchange(value, old, old) != old);
-      break;
-    }
-    }
-    return old;
-}
-
-#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-  if (sizeof(*ATOMIC_VAL._value) == 8) { \
-    _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
-    _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
-
-#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
-  ( \
-    sizeof(*(ATOMIC_VAL._value)) == 8 ? \
-    _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
-    _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
-  )
-#endif
-#else  /* !gcc x86  !_msc_ver */
-typedef enum _Py_memory_order {
-    _Py_memory_order_relaxed,
-    _Py_memory_order_acquire,
-    _Py_memory_order_release,
-    _Py_memory_order_acq_rel,
-    _Py_memory_order_seq_cst
-} _Py_memory_order;
-
-typedef struct _Py_atomic_address {
-    uintptr_t _value;
-} _Py_atomic_address;
-
-typedef struct _Py_atomic_int {
-    int _value;
-} _Py_atomic_int;
-/* Fall back to other compilers and processors by assuming that simple
-   volatile accesses are atomic.  This is false, so people should port
-   this. */
-#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
-#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
-#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
-    ((ATOMIC_VAL)->_value = NEW_VAL)
-#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
-    ((ATOMIC_VAL)->_value)
-#endif
-
-/* Standardized shortcuts. */
-#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
-    _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst)
-#define _Py_atomic_load(ATOMIC_VAL) \
-    _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst)
-
-/* Python-local extensions */
-
-#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
-    _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
-#define _Py_atomic_load_relaxed(ATOMIC_VAL) \
-    _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
-#endif  /* Py_BUILD_CORE */
-#endif  /* Py_ATOMIC_H */
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 61b469d..232025f 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -988,7 +988,6 @@ PYTHON_HEADERS= \
 		$(srcdir)/Include/pgen.h \
 		$(srcdir)/Include/pgenheaders.h \
 		$(srcdir)/Include/pyarena.h \
-		$(srcdir)/Include/pyatomic.h \
 		$(srcdir)/Include/pycapsule.h \
 		$(srcdir)/Include/pyctype.h \
 		$(srcdir)/Include/pydebug.h \
@@ -1029,6 +1028,7 @@ PYTHON_HEADERS= \
 		$(srcdir)/Include/internal/ceval.h \
 		$(srcdir)/Include/internal/gil.h \
 		$(srcdir)/Include/internal/mem.h \
+		$(srcdir)/Include/internal/pyatomic.h \
 		$(srcdir)/Include/internal/pygetopt.h \
 		$(srcdir)/Include/internal/pystate.h \
 		$(srcdir)/Include/internal/context.h \
diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c
index d120948..a81de6a 100644
--- a/Modules/signalmodule.c
+++ b/Modules/signalmodule.c
@@ -4,6 +4,8 @@
 /* XXX Signals should be recorded per thread, now we have thread state. */
 
 #include "Python.h"
+#include "internal/pyatomic.h"
+
 #ifndef MS_WINDOWS
 #include "posixmodule.h"
 #endif
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index cb83539..f65bb5b 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -118,6 +118,7 @@
     <ClInclude Include="..\Include\internal\gil.h" />
     <ClInclude Include="..\Include\internal\hamt.h" />
     <ClInclude Include="..\Include\internal\mem.h" />
+    <ClInclude Include="..\Include\internal\pyatomic.h" />
     <ClInclude Include="..\Include\internal\pystate.h" />
     <ClInclude Include="..\Include\internal\warnings.h" />
     <ClInclude Include="..\Include\intrcheck.h" />
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 510a9c2..7fdadc8 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -153,6 +153,9 @@
     <ClInclude Include="..\Include\internal\mem.h">
       <Filter>Include</Filter>
     </ClInclude>
+    <ClInclude Include="..\Include\internal\pyatomic.h">
+      <Filter>Include</Filter>
+    </ClInclude>
     <ClInclude Include="..\Include\internal\pystate.h">
       <Filter>Include</Filter>
     </ClInclude>
diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h
index ef51890..4a054a9 100644
--- a/Python/ceval_gil.h
+++ b/Python/ceval_gil.h
@@ -5,6 +5,7 @@
 #include <stdlib.h>
 #include <errno.h>
 
+#include "internal/pyatomic.h"
 
 /* First some general settings */
 
-- 
cgit v0.12