From e664d7f89d2b9960d9049237136396e824795cac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A4r=20Bj=C3=B6rklund?= Date: Sat, 12 Aug 2017 11:19:30 +0200 Subject: bpo-30747: Attempt to fix atomic load/store (#2383) _Py_atomic_* are currently not implemented as atomic operations when building with MSVC. This patch attempts to implement parts of the functionality required. --- Include/pyatomic.h | 305 ++++++++++++++++++++- .../2017-08-08-12-00-29.bpo-30747.g2kZRT.rst | 2 + 2 files changed, 297 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2017-08-08-12-00-29.bpo-30747.g2kZRT.rst diff --git a/Include/pyatomic.h b/Include/pyatomic.h index 893d30d..832d951 100644 --- a/Include/pyatomic.h +++ b/Include/pyatomic.h @@ -10,6 +10,12 @@ #include #endif + +#if defined(_MSC_VER) +#include +#include +#endif + /* This is modeled after the atomics interface from C1x, according to * the draft at * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. @@ -87,8 +93,9 @@ typedef struct _Py_atomic_int { || (ORDER) == __ATOMIC_CONSUME), \ __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER)) -#else - +/* Only support GCC (for expression statements) and x86 (for simple + * atomic semantics) and MSVC x86/x64/ARM */ +#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) typedef enum _Py_memory_order { _Py_memory_order_relaxed, _Py_memory_order_acquire, @@ -105,9 +112,6 @@ typedef struct _Py_atomic_int { int _value; } _Py_atomic_int; -/* Only support GCC (for expression statements) and x86 (for simple - * atomic semantics) for now */ -#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) static __inline__ void _Py_atomic_signal_fence(_Py_memory_order order) @@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order) static __inline__ void _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) { - (void)address; /* shut up -Wunused-parameter */ + (void)address; /* shut up -Wunused-parameter */ switch(order) { case _Py_memory_order_release: case _Py_memory_order_acq_rel: @@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) result; \ }) -#else /* !gcc x86 */ +#elif defined(_MSC_VER) +/* _Interlocked* functions provide a full memory barrier and are therefore + enough for acq_rel and seq_cst. If the HLE variants aren't available + in hardware they will fall back to a full memory barrier as well. + + This might affect performance but likely only in some very specific and + hard to meassure scenario. +*/ +#if defined(_M_IX86) || defined(_M_X64) +typedef enum _Py_memory_order { + _Py_memory_order_relaxed, + _Py_memory_order_acquire, + _Py_memory_order_release, + _Py_memory_order_acq_rel, + _Py_memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + volatile uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + volatile int _value; +} _Py_atomic_int; + + +#if defined(_M_X64) +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + } +#else +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); +#endif + +#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + } + +#if defined(_M_X64) +/* This has to be an intptr_t for now. + gil_created() uses -1 as a sentinel value, if this returns + a uintptr_t it will do an unsigned compare and crash +*/ +inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { + uintptr_t old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_HLEAcquire(value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_HLERelease(value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange64(value, old, old) != old); + break; + } + } + return old; +} + +#else +#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL +#endif + +inline int _Py_atomic_load_32bit(volatile int* value, int order) { + int old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange_HLEAcquire(value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange_HLERelease(value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange(value, old, old) != old); + break; + } + } + return old; +} + +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + if (sizeof(*ATOMIC_VAL._value) == 8) { \ + _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ + _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } + +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + ( \ + sizeof(*(ATOMIC_VAL._value)) == 8 ? \ + _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ + _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ + ) +#elif defined(_M_ARM) || defined(_M_ARM64) +typedef enum _Py_memory_order { + _Py_memory_order_relaxed, + _Py_memory_order_acquire, + _Py_memory_order_release, + _Py_memory_order_acq_rel, + _Py_memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + volatile uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + volatile int _value; +} _Py_atomic_int; + + +#if defined(_M_ARM64) +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + } +#else +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); +#endif + +#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + } + +#if defined(_M_ARM64) +/* This has to be an intptr_t for now. + gil_created() uses -1 as a sentinel value, if this returns + a uintptr_t it will do an unsigned compare and crash +*/ +inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { + uintptr_t old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_acq(value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_rel(value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange64(value, old, old) != old); + break; + } + } + return old; +} + +#else +#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL +#endif + +inline int _Py_atomic_load_32bit(volatile int* value, int order) { + int old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange_acq(value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange_rel(value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange(value, old, old) != old); + break; + } + } + return old; +} + +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + if (sizeof(*ATOMIC_VAL._value) == 8) { \ + _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ + _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } + +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + ( \ + sizeof(*(ATOMIC_VAL._value)) == 8 ? \ + _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ + _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ + ) +#endif +#else /* !gcc x86 !_msc_ver */ +typedef enum _Py_memory_order { + _Py_memory_order_relaxed, + _Py_memory_order_acquire, + _Py_memory_order_release, + _Py_memory_order_acq_rel, + _Py_memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + int _value; +} _Py_atomic_int; /* Fall back to other compilers and processors by assuming that simple volatile accesses are atomic. This is false, so people should port this. */ @@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) ((ATOMIC_VAL)->_value = NEW_VAL) #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ ((ATOMIC_VAL)->_value) - -#endif /* !gcc x86 */ #endif /* Standardized shortcuts. */ @@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed) #define _Py_atomic_load_relaxed(ATOMIC_VAL) \ _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed) - #endif /* Py_BUILD_CORE */ #endif /* Py_ATOMIC_H */ diff --git a/Misc/NEWS.d/next/Core and Builtins/2017-08-08-12-00-29.bpo-30747.g2kZRT.rst b/Misc/NEWS.d/next/Core and Builtins/2017-08-08-12-00-29.bpo-30747.g2kZRT.rst new file mode 100644 index 0000000..04a726a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2017-08-08-12-00-29.bpo-30747.g2kZRT.rst @@ -0,0 +1,2 @@ +Add a non-dummy implementation of _Py_atomic_store and _Py_atomic_load on +MSVC. -- cgit v0.12