From 31368a4f0e531c19affe2a1becd25fc316bc7501 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 30 Oct 2018 15:14:25 +0100 Subject: bpo-35081: Move Include/pyatomic.c to Include/internal/ (GH-10239) Add pyatomic.h to the VS project (it wasn't referenced). --- Include/Python.h | 2 - Include/internal/ceval.h | 2 +- Include/internal/gil.h | 6 +- Include/internal/pyatomic.h | 544 +++++++++++++++++++++++++++++++++++++ Include/internal/pystate.h | 1 - Include/pyatomic.h | 535 ------------------------------------ Makefile.pre.in | 2 +- Modules/signalmodule.c | 2 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Python/ceval_gil.h | 1 + 11 files changed, 556 insertions(+), 543 deletions(-) create mode 100644 Include/internal/pyatomic.h delete mode 100644 Include/pyatomic.h diff --git a/Include/Python.h b/Include/Python.h index 80200fe..cf87a5c 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -53,8 +53,6 @@ #include "pyport.h" #include "pymacro.h" -#include "pyatomic.h" - /* Debug-mode build with pymalloc implies PYMALLOC_DEBUG. * PYMALLOC_DEBUG is in error if pymalloc is not in use. */ diff --git a/Include/internal/ceval.h b/Include/internal/ceval.h index cdabb95..4297b5a 100644 --- a/Include/internal/ceval.h +++ b/Include/internal/ceval.h @@ -4,7 +4,7 @@ extern "C" { #endif -#include "pyatomic.h" +#include "internal/pyatomic.h" #include "pythread.h" struct _pending_calls { diff --git a/Include/internal/gil.h b/Include/internal/gil.h index 6139bd2..7743b3f 100644 --- a/Include/internal/gil.h +++ b/Include/internal/gil.h @@ -4,11 +4,11 @@ extern "C" { #endif -#include "pyatomic.h" - #include "internal/condvar.h" +#include "internal/pyatomic.h" + #ifndef Py_HAVE_CONDVAR -#error You need either a POSIX-compatible or a Windows system! +# error You need either a POSIX-compatible or a Windows system! #endif /* Enable if you want to force the switching of threads at least diff --git a/Include/internal/pyatomic.h b/Include/internal/pyatomic.h new file mode 100644 index 0000000..5f349cc --- /dev/null +++ b/Include/internal/pyatomic.h @@ -0,0 +1,544 @@ +#ifndef Py_ATOMIC_H +#define Py_ATOMIC_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "Py_BUILD_CORE must be defined to include this header" +#endif + +#include "dynamic_annotations.h" + +#include "pyconfig.h" + +#if defined(HAVE_STD_ATOMIC) +#include +#endif + + +#if defined(_MSC_VER) +#include +#include +#endif + +/* This is modeled after the atomics interface from C1x, according to + * the draft at + * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. + * Operations and types are named the same except with a _Py_ prefix + * and have the same semantics. + * + * Beware, the implementations here are deep magic. + */ + +#if defined(HAVE_STD_ATOMIC) + +typedef enum _Py_memory_order { + _Py_memory_order_relaxed = memory_order_relaxed, + _Py_memory_order_acquire = memory_order_acquire, + _Py_memory_order_release = memory_order_release, + _Py_memory_order_acq_rel = memory_order_acq_rel, + _Py_memory_order_seq_cst = memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + atomic_uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + atomic_int _value; +} _Py_atomic_int; + +#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ + atomic_signal_fence(ORDER) + +#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ + atomic_thread_fence(ORDER) + +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER) + +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER) + +/* Use builtin atomic operations in GCC >= 4.7 */ +#elif defined(HAVE_BUILTIN_ATOMIC) + +typedef enum _Py_memory_order { + _Py_memory_order_relaxed = __ATOMIC_RELAXED, + _Py_memory_order_acquire = __ATOMIC_ACQUIRE, + _Py_memory_order_release = __ATOMIC_RELEASE, + _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL, + _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST +} _Py_memory_order; + +typedef struct _Py_atomic_address { + uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + int _value; +} _Py_atomic_int; + +#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ + __atomic_signal_fence(ORDER) + +#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ + __atomic_thread_fence(ORDER) + +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + (assert((ORDER) == __ATOMIC_RELAXED \ + || (ORDER) == __ATOMIC_SEQ_CST \ + || (ORDER) == __ATOMIC_RELEASE), \ + __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)) + +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + (assert((ORDER) == __ATOMIC_RELAXED \ + || (ORDER) == __ATOMIC_SEQ_CST \ + || (ORDER) == __ATOMIC_ACQUIRE \ + || (ORDER) == __ATOMIC_CONSUME), \ + __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER)) + +/* Only support GCC (for expression statements) and x86 (for simple + * atomic semantics) and MSVC x86/x64/ARM */ +#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) +typedef enum _Py_memory_order { + _Py_memory_order_relaxed, + _Py_memory_order_acquire, + _Py_memory_order_release, + _Py_memory_order_acq_rel, + _Py_memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + int _value; +} _Py_atomic_int; + + +static __inline__ void +_Py_atomic_signal_fence(_Py_memory_order order) +{ + if (order != _Py_memory_order_relaxed) + __asm__ volatile("":::"memory"); +} + +static __inline__ void +_Py_atomic_thread_fence(_Py_memory_order order) +{ + if (order != _Py_memory_order_relaxed) + __asm__ volatile("mfence":::"memory"); +} + +/* Tell the race checker about this operation's effects. */ +static __inline__ void +_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) +{ + (void)address; /* shut up -Wunused-parameter */ + switch(order) { + case _Py_memory_order_release: + case _Py_memory_order_acq_rel: + case _Py_memory_order_seq_cst: + _Py_ANNOTATE_HAPPENS_BEFORE(address); + break; + case _Py_memory_order_relaxed: + case _Py_memory_order_acquire: + break; + } + switch(order) { + case _Py_memory_order_acquire: + case _Py_memory_order_acq_rel: + case _Py_memory_order_seq_cst: + _Py_ANNOTATE_HAPPENS_AFTER(address); + break; + case _Py_memory_order_relaxed: + case _Py_memory_order_release: + break; + } +} + +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + __extension__ ({ \ + __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ + __typeof__(atomic_val->_value) new_val = NEW_VAL;\ + volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \ + _Py_memory_order order = ORDER; \ + _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ + \ + /* Perform the operation. */ \ + _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \ + switch(order) { \ + case _Py_memory_order_release: \ + _Py_atomic_signal_fence(_Py_memory_order_release); \ + /* fallthrough */ \ + case _Py_memory_order_relaxed: \ + *volatile_data = new_val; \ + break; \ + \ + case _Py_memory_order_acquire: \ + case _Py_memory_order_acq_rel: \ + case _Py_memory_order_seq_cst: \ + __asm__ volatile("xchg %0, %1" \ + : "+r"(new_val) \ + : "m"(atomic_val->_value) \ + : "memory"); \ + break; \ + } \ + _Py_ANNOTATE_IGNORE_WRITES_END(); \ + }) + +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + __extension__ ({ \ + __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ + __typeof__(atomic_val->_value) result; \ + volatile __typeof__(result) *volatile_data = &atomic_val->_value; \ + _Py_memory_order order = ORDER; \ + _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ + \ + /* Perform the operation. */ \ + _Py_ANNOTATE_IGNORE_READS_BEGIN(); \ + switch(order) { \ + case _Py_memory_order_release: \ + case _Py_memory_order_acq_rel: \ + case _Py_memory_order_seq_cst: \ + /* Loads on x86 are not releases by default, so need a */ \ + /* thread fence. */ \ + _Py_atomic_thread_fence(_Py_memory_order_release); \ + break; \ + default: \ + /* No fence */ \ + break; \ + } \ + result = *volatile_data; \ + switch(order) { \ + case _Py_memory_order_acquire: \ + case _Py_memory_order_acq_rel: \ + case _Py_memory_order_seq_cst: \ + /* Loads on x86 are automatically acquire operations so */ \ + /* can get by with just a compiler fence. */ \ + _Py_atomic_signal_fence(_Py_memory_order_acquire); \ + break; \ + default: \ + /* No fence */ \ + break; \ + } \ + _Py_ANNOTATE_IGNORE_READS_END(); \ + result; \ + }) + +#elif defined(_MSC_VER) +/* _Interlocked* functions provide a full memory barrier and are therefore + enough for acq_rel and seq_cst. If the HLE variants aren't available + in hardware they will fall back to a full memory barrier as well. + + This might affect performance but likely only in some very specific and + hard to meassure scenario. +*/ +#if defined(_M_IX86) || defined(_M_X64) +typedef enum _Py_memory_order { + _Py_memory_order_relaxed, + _Py_memory_order_acquire, + _Py_memory_order_release, + _Py_memory_order_acq_rel, + _Py_memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + volatile uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + volatile int _value; +} _Py_atomic_int; + + +#if defined(_M_X64) +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + } +#else +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); +#endif + +#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + } + +#if defined(_M_X64) +/* This has to be an intptr_t for now. + gil_created() uses -1 as a sentinel value, if this returns + a uintptr_t it will do an unsigned compare and crash +*/ +inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { + __int64 old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old); + break; + } + } + return old; +} + +#else +#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL +#endif + +inline int _Py_atomic_load_32bit(volatile int* value, int order) { + long old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old); + break; + } + } + return old; +} + +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + if (sizeof(*ATOMIC_VAL._value) == 8) { \ + _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ + _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } + +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + ( \ + sizeof(*(ATOMIC_VAL._value)) == 8 ? \ + _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \ + _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \ + ) +#elif defined(_M_ARM) || defined(_M_ARM64) +typedef enum _Py_memory_order { + _Py_memory_order_relaxed, + _Py_memory_order_acquire, + _Py_memory_order_release, + _Py_memory_order_acq_rel, + _Py_memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + volatile uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + volatile int _value; +} _Py_atomic_int; + + +#if defined(_M_ARM64) +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ + break; \ + } +#else +#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); +#endif + +#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ + switch (ORDER) { \ + case _Py_memory_order_acquire: \ + _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + case _Py_memory_order_release: \ + _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + default: \ + _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ + break; \ + } + +#if defined(_M_ARM64) +/* This has to be an intptr_t for now. + gil_created() uses -1 as a sentinel value, if this returns + a uintptr_t it will do an unsigned compare and crash +*/ +inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { + uintptr_t old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_acq(value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange64_rel(value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange64(value, old, old) != old); + break; + } + } + return old; +} + +#else +#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL +#endif + +inline int _Py_atomic_load_32bit(volatile int* value, int order) { + int old; + switch (order) { + case _Py_memory_order_acquire: + { + do { + old = *value; + } while(_InterlockedCompareExchange_acq(value, old, old) != old); + break; + } + case _Py_memory_order_release: + { + do { + old = *value; + } while(_InterlockedCompareExchange_rel(value, old, old) != old); + break; + } + case _Py_memory_order_relaxed: + old = *value; + break; + default: + { + do { + old = *value; + } while(_InterlockedCompareExchange(value, old, old) != old); + break; + } + } + return old; +} + +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + if (sizeof(*ATOMIC_VAL._value) == 8) { \ + _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ + _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } + +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + ( \ + sizeof(*(ATOMIC_VAL._value)) == 8 ? \ + _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ + _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ + ) +#endif +#else /* !gcc x86 !_msc_ver */ +typedef enum _Py_memory_order { + _Py_memory_order_relaxed, + _Py_memory_order_acquire, + _Py_memory_order_release, + _Py_memory_order_acq_rel, + _Py_memory_order_seq_cst +} _Py_memory_order; + +typedef struct _Py_atomic_address { + uintptr_t _value; +} _Py_atomic_address; + +typedef struct _Py_atomic_int { + int _value; +} _Py_atomic_int; +/* Fall back to other compilers and processors by assuming that simple + volatile accesses are atomic. This is false, so people should port + this. */ +#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0) +#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0) +#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ + ((ATOMIC_VAL)->_value = NEW_VAL) +#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ + ((ATOMIC_VAL)->_value) +#endif + +/* Standardized shortcuts. */ +#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \ + _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst) +#define _Py_atomic_load(ATOMIC_VAL) \ + _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst) + +/* Python-local extensions */ + +#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \ + _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed) +#define _Py_atomic_load_relaxed(ATOMIC_VAL) \ + _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed) + +#ifdef __cplusplus +} +#endif +#endif /* Py_ATOMIC_H */ diff --git a/Include/internal/pystate.h b/Include/internal/pystate.h index c93dda2..38845d3 100644 --- a/Include/internal/pystate.h +++ b/Include/internal/pystate.h @@ -5,7 +5,6 @@ extern "C" { #endif #include "pystate.h" -#include "pyatomic.h" #include "pythread.h" #include "internal/mem.h" diff --git a/Include/pyatomic.h b/Include/pyatomic.h deleted file mode 100644 index 9a497a6..0000000 --- a/Include/pyatomic.h +++ /dev/null @@ -1,535 +0,0 @@ -#ifndef Py_ATOMIC_H -#define Py_ATOMIC_H -#ifdef Py_BUILD_CORE - -#include "dynamic_annotations.h" - -#include "pyconfig.h" - -#if defined(HAVE_STD_ATOMIC) -#include -#endif - - -#if defined(_MSC_VER) -#include -#include -#endif - -/* This is modeled after the atomics interface from C1x, according to - * the draft at - * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. - * Operations and types are named the same except with a _Py_ prefix - * and have the same semantics. - * - * Beware, the implementations here are deep magic. - */ - -#if defined(HAVE_STD_ATOMIC) - -typedef enum _Py_memory_order { - _Py_memory_order_relaxed = memory_order_relaxed, - _Py_memory_order_acquire = memory_order_acquire, - _Py_memory_order_release = memory_order_release, - _Py_memory_order_acq_rel = memory_order_acq_rel, - _Py_memory_order_seq_cst = memory_order_seq_cst -} _Py_memory_order; - -typedef struct _Py_atomic_address { - atomic_uintptr_t _value; -} _Py_atomic_address; - -typedef struct _Py_atomic_int { - atomic_int _value; -} _Py_atomic_int; - -#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ - atomic_signal_fence(ORDER) - -#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ - atomic_thread_fence(ORDER) - -#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ - atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER) - -#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ - atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER) - -/* Use builtin atomic operations in GCC >= 4.7 */ -#elif defined(HAVE_BUILTIN_ATOMIC) - -typedef enum _Py_memory_order { - _Py_memory_order_relaxed = __ATOMIC_RELAXED, - _Py_memory_order_acquire = __ATOMIC_ACQUIRE, - _Py_memory_order_release = __ATOMIC_RELEASE, - _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL, - _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST -} _Py_memory_order; - -typedef struct _Py_atomic_address { - uintptr_t _value; -} _Py_atomic_address; - -typedef struct _Py_atomic_int { - int _value; -} _Py_atomic_int; - -#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ - __atomic_signal_fence(ORDER) - -#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ - __atomic_thread_fence(ORDER) - -#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ - (assert((ORDER) == __ATOMIC_RELAXED \ - || (ORDER) == __ATOMIC_SEQ_CST \ - || (ORDER) == __ATOMIC_RELEASE), \ - __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)) - -#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ - (assert((ORDER) == __ATOMIC_RELAXED \ - || (ORDER) == __ATOMIC_SEQ_CST \ - || (ORDER) == __ATOMIC_ACQUIRE \ - || (ORDER) == __ATOMIC_CONSUME), \ - __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER)) - -/* Only support GCC (for expression statements) and x86 (for simple - * atomic semantics) and MSVC x86/x64/ARM */ -#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) -typedef enum _Py_memory_order { - _Py_memory_order_relaxed, - _Py_memory_order_acquire, - _Py_memory_order_release, - _Py_memory_order_acq_rel, - _Py_memory_order_seq_cst -} _Py_memory_order; - -typedef struct _Py_atomic_address { - uintptr_t _value; -} _Py_atomic_address; - -typedef struct _Py_atomic_int { - int _value; -} _Py_atomic_int; - - -static __inline__ void -_Py_atomic_signal_fence(_Py_memory_order order) -{ - if (order != _Py_memory_order_relaxed) - __asm__ volatile("":::"memory"); -} - -static __inline__ void -_Py_atomic_thread_fence(_Py_memory_order order) -{ - if (order != _Py_memory_order_relaxed) - __asm__ volatile("mfence":::"memory"); -} - -/* Tell the race checker about this operation's effects. */ -static __inline__ void -_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) -{ - (void)address; /* shut up -Wunused-parameter */ - switch(order) { - case _Py_memory_order_release: - case _Py_memory_order_acq_rel: - case _Py_memory_order_seq_cst: - _Py_ANNOTATE_HAPPENS_BEFORE(address); - break; - case _Py_memory_order_relaxed: - case _Py_memory_order_acquire: - break; - } - switch(order) { - case _Py_memory_order_acquire: - case _Py_memory_order_acq_rel: - case _Py_memory_order_seq_cst: - _Py_ANNOTATE_HAPPENS_AFTER(address); - break; - case _Py_memory_order_relaxed: - case _Py_memory_order_release: - break; - } -} - -#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ - __extension__ ({ \ - __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ - __typeof__(atomic_val->_value) new_val = NEW_VAL;\ - volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \ - _Py_memory_order order = ORDER; \ - _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ - \ - /* Perform the operation. */ \ - _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \ - switch(order) { \ - case _Py_memory_order_release: \ - _Py_atomic_signal_fence(_Py_memory_order_release); \ - /* fallthrough */ \ - case _Py_memory_order_relaxed: \ - *volatile_data = new_val; \ - break; \ - \ - case _Py_memory_order_acquire: \ - case _Py_memory_order_acq_rel: \ - case _Py_memory_order_seq_cst: \ - __asm__ volatile("xchg %0, %1" \ - : "+r"(new_val) \ - : "m"(atomic_val->_value) \ - : "memory"); \ - break; \ - } \ - _Py_ANNOTATE_IGNORE_WRITES_END(); \ - }) - -#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ - __extension__ ({ \ - __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ - __typeof__(atomic_val->_value) result; \ - volatile __typeof__(result) *volatile_data = &atomic_val->_value; \ - _Py_memory_order order = ORDER; \ - _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ - \ - /* Perform the operation. */ \ - _Py_ANNOTATE_IGNORE_READS_BEGIN(); \ - switch(order) { \ - case _Py_memory_order_release: \ - case _Py_memory_order_acq_rel: \ - case _Py_memory_order_seq_cst: \ - /* Loads on x86 are not releases by default, so need a */ \ - /* thread fence. */ \ - _Py_atomic_thread_fence(_Py_memory_order_release); \ - break; \ - default: \ - /* No fence */ \ - break; \ - } \ - result = *volatile_data; \ - switch(order) { \ - case _Py_memory_order_acquire: \ - case _Py_memory_order_acq_rel: \ - case _Py_memory_order_seq_cst: \ - /* Loads on x86 are automatically acquire operations so */ \ - /* can get by with just a compiler fence. */ \ - _Py_atomic_signal_fence(_Py_memory_order_acquire); \ - break; \ - default: \ - /* No fence */ \ - break; \ - } \ - _Py_ANNOTATE_IGNORE_READS_END(); \ - result; \ - }) - -#elif defined(_MSC_VER) -/* _Interlocked* functions provide a full memory barrier and are therefore - enough for acq_rel and seq_cst. If the HLE variants aren't available - in hardware they will fall back to a full memory barrier as well. - - This might affect performance but likely only in some very specific and - hard to meassure scenario. -*/ -#if defined(_M_IX86) || defined(_M_X64) -typedef enum _Py_memory_order { - _Py_memory_order_relaxed, - _Py_memory_order_acquire, - _Py_memory_order_release, - _Py_memory_order_acq_rel, - _Py_memory_order_seq_cst -} _Py_memory_order; - -typedef struct _Py_atomic_address { - volatile uintptr_t _value; -} _Py_atomic_address; - -typedef struct _Py_atomic_int { - volatile int _value; -} _Py_atomic_int; - - -#if defined(_M_X64) -#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ - switch (ORDER) { \ - case _Py_memory_order_acquire: \ - _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ - break; \ - case _Py_memory_order_release: \ - _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ - break; \ - default: \ - _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ - break; \ - } -#else -#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); -#endif - -#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ - switch (ORDER) { \ - case _Py_memory_order_acquire: \ - _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ - break; \ - case _Py_memory_order_release: \ - _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ - break; \ - default: \ - _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ - break; \ - } - -#if defined(_M_X64) -/* This has to be an intptr_t for now. - gil_created() uses -1 as a sentinel value, if this returns - a uintptr_t it will do an unsigned compare and crash -*/ -inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { - __int64 old; - switch (order) { - case _Py_memory_order_acquire: - { - do { - old = *value; - } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old); - break; - } - case _Py_memory_order_release: - { - do { - old = *value; - } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old); - break; - } - case _Py_memory_order_relaxed: - old = *value; - break; - default: - { - do { - old = *value; - } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old); - break; - } - } - return old; -} - -#else -#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL -#endif - -inline int _Py_atomic_load_32bit(volatile int* value, int order) { - long old; - switch (order) { - case _Py_memory_order_acquire: - { - do { - old = *value; - } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old); - break; - } - case _Py_memory_order_release: - { - do { - old = *value; - } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old); - break; - } - case _Py_memory_order_relaxed: - old = *value; - break; - default: - { - do { - old = *value; - } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old); - break; - } - } - return old; -} - -#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ - if (sizeof(*ATOMIC_VAL._value) == 8) { \ - _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ - _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } - -#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ - ( \ - sizeof(*(ATOMIC_VAL._value)) == 8 ? \ - _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \ - _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \ - ) -#elif defined(_M_ARM) || defined(_M_ARM64) -typedef enum _Py_memory_order { - _Py_memory_order_relaxed, - _Py_memory_order_acquire, - _Py_memory_order_release, - _Py_memory_order_acq_rel, - _Py_memory_order_seq_cst -} _Py_memory_order; - -typedef struct _Py_atomic_address { - volatile uintptr_t _value; -} _Py_atomic_address; - -typedef struct _Py_atomic_int { - volatile int _value; -} _Py_atomic_int; - - -#if defined(_M_ARM64) -#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ - switch (ORDER) { \ - case _Py_memory_order_acquire: \ - _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ - break; \ - case _Py_memory_order_release: \ - _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ - break; \ - default: \ - _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ - break; \ - } -#else -#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); -#endif - -#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ - switch (ORDER) { \ - case _Py_memory_order_acquire: \ - _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ - break; \ - case _Py_memory_order_release: \ - _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ - break; \ - default: \ - _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ - break; \ - } - -#if defined(_M_ARM64) -/* This has to be an intptr_t for now. - gil_created() uses -1 as a sentinel value, if this returns - a uintptr_t it will do an unsigned compare and crash -*/ -inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { - uintptr_t old; - switch (order) { - case _Py_memory_order_acquire: - { - do { - old = *value; - } while(_InterlockedCompareExchange64_acq(value, old, old) != old); - break; - } - case _Py_memory_order_release: - { - do { - old = *value; - } while(_InterlockedCompareExchange64_rel(value, old, old) != old); - break; - } - case _Py_memory_order_relaxed: - old = *value; - break; - default: - { - do { - old = *value; - } while(_InterlockedCompareExchange64(value, old, old) != old); - break; - } - } - return old; -} - -#else -#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL -#endif - -inline int _Py_atomic_load_32bit(volatile int* value, int order) { - int old; - switch (order) { - case _Py_memory_order_acquire: - { - do { - old = *value; - } while(_InterlockedCompareExchange_acq(value, old, old) != old); - break; - } - case _Py_memory_order_release: - { - do { - old = *value; - } while(_InterlockedCompareExchange_rel(value, old, old) != old); - break; - } - case _Py_memory_order_relaxed: - old = *value; - break; - default: - { - do { - old = *value; - } while(_InterlockedCompareExchange(value, old, old) != old); - break; - } - } - return old; -} - -#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ - if (sizeof(*ATOMIC_VAL._value) == 8) { \ - _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ - _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } - -#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ - ( \ - sizeof(*(ATOMIC_VAL._value)) == 8 ? \ - _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ - _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ - ) -#endif -#else /* !gcc x86 !_msc_ver */ -typedef enum _Py_memory_order { - _Py_memory_order_relaxed, - _Py_memory_order_acquire, - _Py_memory_order_release, - _Py_memory_order_acq_rel, - _Py_memory_order_seq_cst -} _Py_memory_order; - -typedef struct _Py_atomic_address { - uintptr_t _value; -} _Py_atomic_address; - -typedef struct _Py_atomic_int { - int _value; -} _Py_atomic_int; -/* Fall back to other compilers and processors by assuming that simple - volatile accesses are atomic. This is false, so people should port - this. */ -#define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0) -#define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0) -#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ - ((ATOMIC_VAL)->_value = NEW_VAL) -#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ - ((ATOMIC_VAL)->_value) -#endif - -/* Standardized shortcuts. */ -#define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \ - _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst) -#define _Py_atomic_load(ATOMIC_VAL) \ - _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst) - -/* Python-local extensions */ - -#define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \ - _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed) -#define _Py_atomic_load_relaxed(ATOMIC_VAL) \ - _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed) -#endif /* Py_BUILD_CORE */ -#endif /* Py_ATOMIC_H */ diff --git a/Makefile.pre.in b/Makefile.pre.in index 61b469d..232025f 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -988,7 +988,6 @@ PYTHON_HEADERS= \ $(srcdir)/Include/pgen.h \ $(srcdir)/Include/pgenheaders.h \ $(srcdir)/Include/pyarena.h \ - $(srcdir)/Include/pyatomic.h \ $(srcdir)/Include/pycapsule.h \ $(srcdir)/Include/pyctype.h \ $(srcdir)/Include/pydebug.h \ @@ -1029,6 +1028,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/ceval.h \ $(srcdir)/Include/internal/gil.h \ $(srcdir)/Include/internal/mem.h \ + $(srcdir)/Include/internal/pyatomic.h \ $(srcdir)/Include/internal/pygetopt.h \ $(srcdir)/Include/internal/pystate.h \ $(srcdir)/Include/internal/context.h \ diff --git a/Modules/signalmodule.c b/Modules/signalmodule.c index d120948..a81de6a 100644 --- a/Modules/signalmodule.c +++ b/Modules/signalmodule.c @@ -4,6 +4,8 @@ /* XXX Signals should be recorded per thread, now we have thread state. */ #include "Python.h" +#include "internal/pyatomic.h" + #ifndef MS_WINDOWS #include "posixmodule.h" #endif diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index cb83539..f65bb5b 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -118,6 +118,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 510a9c2..7fdadc8 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -153,6 +153,9 @@ Include + + Include + Include diff --git a/Python/ceval_gil.h b/Python/ceval_gil.h index ef51890..4a054a9 100644 --- a/Python/ceval_gil.h +++ b/Python/ceval_gil.h @@ -5,6 +5,7 @@ #include #include +#include "internal/pyatomic.h" /* First some general settings */ -- cgit v0.12