summaryrefslogtreecommitdiffstats
path: root/include/jemalloc
diff options
context:
space:
mode:
authorDavid Goldblatt <davidgoldblatt@fb.com>2017-01-25 17:54:27 (GMT)
committerDavid Goldblatt <davidtgoldblatt@gmail.com>2017-03-03 21:40:59 (GMT)
commitd4ac7582f32f506d5203bea2f0115076202add38 (patch)
treec7a84707cc1a41c9ef6a7d4e692e48ff6fa8ee77 /include/jemalloc
parent957b8c5f2171f54f66689875144830e682be8e64 (diff)
downloadjemalloc-d4ac7582f32f506d5203bea2f0115076202add38.zip
jemalloc-d4ac7582f32f506d5203bea2f0115076202add38.tar.gz
jemalloc-d4ac7582f32f506d5203bea2f0115076202add38.tar.bz2
Introduce a backport of C11 atomics
This introduces a backport of C11 atomics. It has four implementations; ranked in order of preference, they are: - GCC/Clang __atomic builtins - GCC/Clang __sync builtins - MSVC _Interlocked builtins - C11 atomics, from <stdatomic.h> The primary advantages are: - Close adherence to the standard API gives us a defined memory model. - Type safety: atomic objects are now separate types from non-atomic ones, so that it's impossible to mix up atomic and non-atomic updates (which is undefined behavior that compilers are starting to take advantage of). - Efficiency: we can specify ordering for operations, avoiding fences and atomic operations on strongly ordered architectures (example: `atomic_write_u32(ptr, val);` involves a CAS loop, whereas `atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store. This diff leaves in the current atomics API (implementing them in terms of the backport). This lets us transition uses over piecemeal. Testing: This is by nature hard to test. I've manually tested the first three options on Linux on gcc by futzing with the #defines manually, on freebsd with gcc and clang, on MSVC, and on OS X with clang. All of these were x86 machines though, and we don't have any test infrastructure set up for non-x86 platforms.
Diffstat (limited to 'include/jemalloc')
-rw-r--r--include/jemalloc/internal/atomic.h111
-rw-r--r--include/jemalloc/internal/atomic_c11.h97
-rw-r--r--include/jemalloc/internal/atomic_externs.h12
-rw-r--r--include/jemalloc/internal/atomic_gcc_atomic.h125
-rw-r--r--include/jemalloc/internal/atomic_gcc_sync.h191
-rw-r--r--include/jemalloc/internal/atomic_inlines.h525
-rw-r--r--include/jemalloc/internal/atomic_msvc.h158
-rw-r--r--include/jemalloc/internal/atomic_types.h8
-rw-r--r--include/jemalloc/internal/jemalloc_internal.h.in22
-rw-r--r--include/jemalloc/internal/jemalloc_internal_defs.h.in13
-rw-r--r--include/jemalloc/internal/private_symbols.txt20
11 files changed, 698 insertions, 584 deletions
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
new file mode 100644
index 0000000..84fbbdf
--- /dev/null
+++ b/include/jemalloc/internal/atomic.h
@@ -0,0 +1,111 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_H
+#define JEMALLOC_INTERNAL_ATOMIC_H
+
+#define ATOMIC_INLINE static inline
+
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
+# include "jemalloc/internal/atomic_gcc_atomic.h"
+#elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
+# include "jemalloc/internal/atomic_gcc_sync.h"
+#elif defined(_MSC_VER)
+# include "jemalloc/internal/atomic_msvc.h"
+#elif defined(JEMALLOC_C11_ATOMICS)
+# include "jemalloc/internal/atomic_c11.h"
+#else
+# error "Don't have atomics implemented on this platform."
+#endif
+
+/*
+ * This header gives more or less a backport of C11 atomics. The user can write
+ * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate
+ * counterparts of the C11 atomic functions for type, as so:
+ * JEMALLOC_GENERATE_ATOMICS(int *, pi, 3);
+ * and then write things like:
+ * int *some_ptr;
+ * atomic_pi_t atomic_ptr_to_int;
+ * atomic_store_pi(&atomic_ptr_to_int, some_ptr, ATOMIC_RELAXED);
+ * int *prev_value = atomic_exchange_pi(&ptr_to_int, NULL, ATOMIC_ACQ_REL);
+ * assert(some_ptr == prev_value);
+ * and expect things to work in the obvious way.
+ *
+ * Also included (with naming differences to avoid conflicts with the standard
+ * library):
+ * atomic_fence(atomic_memory_order_t) (mimics C11's atomic_thread_fence).
+ * ATOMIC_INIT (mimics C11's ATOMIC_VAR_INIT).
+ */
+
+/*
+ * Pure convenience, so that we don't have to type "atomic_memory_order_"
+ * quite so often.
+ */
+#define ATOMIC_RELAXED atomic_memory_order_relaxed
+#define ATOMIC_ACQUIRE atomic_memory_order_acquire,
+#define ATOMIC_RELEASE atomic_memory_order_release,
+#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel,
+#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
+
+/*
+ * In order to let us transition atomics usage piecemeal (and reason locally
+ * about memory orders), we'll support the previous API for a while.
+ */
+#define JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type) \
+ATOMIC_INLINE type \
+atomic_read_##short_type(type *p) { \
+ return atomic_load_##short_type ((atomic_##short_type##_t *)p, \
+ ATOMIC_SEQ_CST); \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_write_##short_type(type *p, const type val) { \
+ atomic_store_##short_type((atomic_##short_type##_t *)p, \
+ (type)val, ATOMIC_SEQ_CST); \
+} \
+ATOMIC_INLINE bool \
+atomic_cas_##short_type(type *p, type c, type s) { \
+ /* Note the '!' -- atomic_cas inverts the usual semantics. */ \
+ return !atomic_compare_exchange_strong_##short_type( \
+ (atomic_##short_type##_t *)p, &c, s, ATOMIC_SEQ_CST, \
+ ATOMIC_SEQ_CST); \
+}
+
+#define JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(type, short_type) \
+JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type) \
+ \
+ATOMIC_INLINE type \
+atomic_add_##short_type(type *p, type x) { \
+ return atomic_fetch_add_##short_type( \
+ (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) + x; \
+} \
+ATOMIC_INLINE type \
+atomic_sub_##short_type(type *p, type x) { \
+ return atomic_fetch_sub_##short_type( \
+ (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) - x; \
+}
+
+JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(void *, p)
+
+/*
+ * There's no actual guarantee that sizeof(bool) == 1, but it's true on the only
+ * platform that actually needs to know the size, MSVC.
+ */
+JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
+JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(bool, b)
+
+JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(unsigned, u)
+
+JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(size_t, zu)
+
+JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint32_t, u32)
+
+# if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint64_t, u64)
+# endif
+
+#undef ATOMIC_INLINE
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_H */
diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
new file mode 100644
index 0000000..a5f9313
--- /dev/null
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -0,0 +1,97 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
+#define JEMALLOC_INTERNAL_ATOMIC_C11_H
+
+#include <stdatomic.h>
+
+#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
+
+#define atomic_memory_order_t memory_order
+#define atomic_memory_order_relaxed memory_order_relaxed
+#define atomic_memory_order_acquire memory_order_acquire
+#define atomic_memory_order_release memory_order_release
+#define atomic_memory_order_acq_rel memory_order_acq_rel
+#define atomic_memory_order_seq_cst memory_order_seq_cst
+
+#define atomic_fence atomic_thread_fence
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+typedef _Atomic(type) atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ /* \
+ * A strict interpretation of the C standard prevents \
+ * atomic_load from taking a const argument, but it's \
+ * convenient for our purposes. This cast is a workaround. \
+ */ \
+ atomic_##short_type##_t* a_nonconst = \
+ (atomic_##short_type##_t*)a; \
+ return atomic_load_explicit(a_nonconst, mo); \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ atomic_store_explicit(a, val, mo); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return atomic_exchange_explicit(a, val, mo); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return atomic_compare_exchange_weak_explicit(a, expected, \
+ desired, success_mo, failure_mo); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return atomic_compare_exchange_strong_explicit(a, expected, \
+ desired, success_mo, failure_mo); \
+}
+
+/*
+ * Integral types have some special operations available that non-integral ones
+ * lack.
+ */
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_add_explicit(a, val, mo); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_sub_explicit(a, val, mo); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_and_explicit(a, val, mo); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_or_explicit(a, val, mo); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return atomic_fetch_xor_explicit(a, val, mo); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */
diff --git a/include/jemalloc/internal/atomic_externs.h b/include/jemalloc/internal/atomic_externs.h
deleted file mode 100644
index 09f0640..0000000
--- a/include/jemalloc/internal/atomic_externs.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
-#define JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
-
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#define atomic_read_u64(p) atomic_add_u64(p, 0)
-#endif
-#define atomic_read_u32(p) atomic_add_u32(p, 0)
-#define atomic_read_p(p) atomic_add_p(p, NULL)
-#define atomic_read_zu(p) atomic_add_zu(p, 0)
-#define atomic_read_u(p) atomic_add_u(p, 0)
-
-#endif /* JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H */
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
new file mode 100644
index 0000000..3d13b4a
--- /dev/null
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -0,0 +1,125 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+ atomic_memory_order_relaxed,
+ atomic_memory_order_acquire,
+ atomic_memory_order_release,
+ atomic_memory_order_acq_rel,
+ atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE int
+atomic_enum_to_builtin(atomic_memory_order_t mo) {
+ switch (mo) {
+ case atomic_memory_order_relaxed:
+ return __ATOMIC_RELAXED;
+ case atomic_memory_order_acquire:
+ return __ATOMIC_ACQUIRE;
+ case atomic_memory_order_release:
+ return __ATOMIC_RELEASE;
+ case atomic_memory_order_acq_rel:
+ return __ATOMIC_ACQ_REL;
+ case atomic_memory_order_seq_cst:
+ return __ATOMIC_SEQ_CST;
+ }
+ /* Can't actually happen; the switch is exhaustive. */
+ return __ATOMIC_SEQ_CST;
+}
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+ __atomic_thread_fence(atomic_enum_to_builtin(mo));
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+typedef struct { \
+ type repr; \
+} atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ type result; \
+ __atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo)); \
+ return result; \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ __atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ type result; \
+ __atomic_exchange(&a->repr, &val, &result, \
+ atomic_enum_to_builtin(mo)); \
+ return result; \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return __atomic_compare_exchange(&a->repr, expected, &desired, \
+ true, atomic_enum_to_builtin(success_mo), \
+ atomic_enum_to_builtin(failure_mo)); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ return __atomic_compare_exchange(&a->repr, expected, &desired, \
+ false, \
+ atomic_enum_to_builtin(success_mo), \
+ atomic_enum_to_builtin(failure_mo)); \
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_add(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_sub(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_and(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_or(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __atomic_fetch_xor(&a->repr, val, \
+ atomic_enum_to_builtin(mo)); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
new file mode 100644
index 0000000..30846e4
--- /dev/null
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -0,0 +1,191 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+ atomic_memory_order_relaxed,
+ atomic_memory_order_acquire,
+ atomic_memory_order_release,
+ atomic_memory_order_acq_rel,
+ atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+ /* Easy cases first: no barrier, and full barrier. */
+ if (mo == atomic_memory_order_relaxed) {
+ asm volatile("" ::: "memory");
+ return;
+ }
+ if (mo == atomic_memory_order_seq_cst) {
+ asm volatile("" ::: "memory");
+ __sync_synchronize();
+ asm volatile("" ::: "memory");
+ return;
+ }
+ asm volatile("" ::: "memory");
+# if defined(__i386__) || defined(__x86_64__)
+ /* This is implicit on x86. */
+# elif defined(__ppc__)
+ asm volatile("lwsync");
+# elif defined(__sparc__) && defined(__arch64__)
+ if (mo == atomic_memory_order_acquire) {
+ asm volatile("membar #LoadLoad | #LoadStore");
+ } else if (mo == atomic_memory_order_release) {
+ asm volatile("membar #LoadStore | #StoreStore");
+ } else {
+ asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
+ }
+# else
+ __sync_synchronize();
+# endif
+ asm volatile("" ::: "memory");
+}
+
+/*
+ * A correct implementation of seq_cst loads and stores on weakly ordered
+ * architectures could do either of the following:
+ * 1. store() is weak-fence -> store -> strong fence, load() is load ->
+ * strong-fence.
+ * 2. store() is strong-fence -> store, load() is strong-fence -> load ->
+ * weak-fence.
+ * The tricky thing is, load() and store() above can be the load or store
+ * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
+ * means going with strategy 2.
+ * On strongly ordered architectures, the natural strategy is to stick a strong
+ * fence after seq_cst stores, and have naked loads. So we want the strong
+ * fences in different places on different architectures.
+ * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
+ * accomplish this.
+ */
+
+ATOMIC_INLINE void
+atomic_pre_sc_load_fence() {
+# if defined(__i386__) || defined(__x86_64__) || \
+ (defined(__sparc__) && defined(__arch64__))
+ atomic_fence(atomic_memory_order_relaxed);
+# else
+ atomic_fence(atomic_memory_order_seq_cst);
+# endif
+}
+
+ATOMIC_INLINE void
+atomic_post_sc_store_fence() {
+# if defined(__i386__) || defined(__x86_64__) || \
+ (defined(__sparc__) && defined(__arch64__))
+ atomic_fence(atomic_memory_order_seq_cst);
+# else
+ atomic_fence(atomic_memory_order_relaxed);
+# endif
+
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+typedef struct { \
+ type volatile repr; \
+} atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ if (mo == atomic_memory_order_seq_cst) { \
+ atomic_pre_sc_load_fence(); \
+ } \
+ type result = a->repr; \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_acquire); \
+ } \
+ return result; \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_release); \
+ } \
+ a->repr = val; \
+ if (mo == atomic_memory_order_seq_cst) { \
+ atomic_post_sc_store_fence(); \
+ } \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ /* \
+ * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
+ * an atomic exchange builtin. We fake it with a CAS loop. \
+ */ \
+ while (true) { \
+ type old = a->repr; \
+ if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
+ return old; \
+ } \
+ } \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
+ desired); \
+ if (prev == *expected) { \
+ return true; \
+ } else { \
+ *expected = prev; \
+ return false; \
+ } \
+} \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ type prev = __sync_val_compare_and_swap(&a->repr, *expected, \
+ desired); \
+ if (prev == *expected) { \
+ return true; \
+ } else { \
+ *expected = prev; \
+ return false; \
+ } \
+}
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, \
+ /* unused */ lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_add(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_sub(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_and(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_or(&a->repr, val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return __sync_fetch_and_xor(&a->repr, val); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h
deleted file mode 100644
index de66d57..0000000
--- a/include/jemalloc/internal/atomic_inlines.h
+++ /dev/null
@@ -1,525 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ATOMIC_INLINES_H
-#define JEMALLOC_INTERNAL_ATOMIC_INLINES_H
-
-/*
- * All arithmetic functions return the arithmetic result of the atomic
- * operation. Some atomic operation APIs return the value prior to mutation, in
- * which case the following functions must redundantly compute the result so
- * that it can be returned. These functions are normally inlined, so the extra
- * operations can be optimized away if the return values aren't used by the
- * callers.
- *
- * <t> atomic_read_<t>(<t> *p) { return *p; }
- * <t> atomic_add_<t>(<t> *p, <t> x) { return *p += x; }
- * <t> atomic_sub_<t>(<t> *p, <t> x) { return *p -= x; }
- * bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
- * {
- * if (*p != c)
- * return true;
- * *p = s;
- * return false;
- * }
- * void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
- */
-
-#ifndef JEMALLOC_ENABLE_INLINE
-# ifdef JEMALLOC_ATOMIC_U64
-uint64_t atomic_add_u64(uint64_t *p, uint64_t x);
-uint64_t atomic_sub_u64(uint64_t *p, uint64_t x);
-bool atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s);
-void atomic_write_u64(uint64_t *p, uint64_t x);
-# endif
-uint32_t atomic_add_u32(uint32_t *p, uint32_t x);
-uint32_t atomic_sub_u32(uint32_t *p, uint32_t x);
-bool atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s);
-void atomic_write_u32(uint32_t *p, uint32_t x);
-void *atomic_add_p(void **p, void *x);
-void *atomic_sub_p(void **p, void *x);
-bool atomic_cas_p(void **p, void *c, void *s);
-void atomic_write_p(void **p, const void *x);
-size_t atomic_add_zu(size_t *p, size_t x);
-size_t atomic_sub_zu(size_t *p, size_t x);
-bool atomic_cas_zu(size_t *p, size_t c, size_t s);
-void atomic_write_zu(size_t *p, size_t x);
-unsigned atomic_add_u(unsigned *p, unsigned x);
-unsigned atomic_sub_u(unsigned *p, unsigned x);
-bool atomic_cas_u(unsigned *p, unsigned c, unsigned s);
-void atomic_write_u(unsigned *p, unsigned x);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
-/******************************************************************************/
-/* 64-bit operations. */
-#ifdef JEMALLOC_ATOMIC_U64
-# if (defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
- uint64_t t = x;
-
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (t), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return t + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
- uint64_t t;
-
- x = (uint64_t)(-(int64_t)x);
- t = x;
- asm volatile (
- "lock; xaddq %0, %1;"
- : "+r" (t), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return t + x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
- uint8_t success;
-
- asm volatile (
- "lock; cmpxchgq %4, %0;"
- "sete %1;"
- : "=m" (*p), "=a" (success) /* Outputs. */
- : "m" (*p), "a" (c), "r" (s) /* Inputs. */
- : "memory" /* Clobbers. */
- );
-
- return !(bool)success;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
- asm volatile (
- "xchgq %1, %0;" /* Lock is implied by xchgq. */
- : "=m" (*p), "+r" (x) /* Outputs. */
- : "m" (*p) /* Inputs. */
- : "memory" /* Clobbers. */
- );
-}
-# elif (defined(JEMALLOC_C11ATOMICS))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
- volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
- return atomic_fetch_add(a, x) + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
- volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
- return atomic_fetch_sub(a, x) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
- volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
- return !atomic_compare_exchange_strong(a, &c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
- volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
- atomic_store(a, x);
-}
-# elif (defined(JEMALLOC_ATOMIC9))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
- /*
- * atomic_fetchadd_64() doesn't exist, but we only ever use this
- * function on LP64 systems, so atomic_fetchadd_long() will do.
- */
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- return atomic_fetchadd_long(p, (unsigned long)x) + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- return !atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
- assert(sizeof(uint64_t) == sizeof(unsigned long));
-
- atomic_store_rel_long(p, x);
-}
-# elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
- return OSAtomicAdd64((int64_t)x, (int64_t *)p);
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
- return OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
- return !OSAtomicCompareAndSwap64(c, s, (int64_t *)p);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
- uint64_t o;
-
- /*The documented OSAtomic*() API does not expose an atomic exchange. */
- do {
- o = atomic_read_u64(p);
- } while (atomic_cas_u64(p, o, x));
-}
-# elif (defined(_MSC_VER))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
- return InterlockedExchangeAdd64(p, x) + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
- return InterlockedExchangeAdd64(p, -((int64_t)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
- uint64_t o;
-
- o = InterlockedCompareExchange64(p, s, c);
- return o != c;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
- InterlockedExchange64(p, x);
-}
-# elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
- defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
- return __sync_add_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
- return __sync_sub_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
- return !__sync_bool_compare_and_swap(p, c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
- __sync_lock_test_and_set(p, x);
-}
-# else
-# error "Missing implementation for 64-bit atomic operations"
-# endif
-#endif
-
-/******************************************************************************/
-/* 32-bit operations. */
-#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
- uint32_t t = x;
-
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (t), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return t + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
- uint32_t t;
-
- x = (uint32_t)(-(int32_t)x);
- t = x;
- asm volatile (
- "lock; xaddl %0, %1;"
- : "+r" (t), "=m" (*p) /* Outputs. */
- : "m" (*p) /* Inputs. */
- );
-
- return t + x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
- uint8_t success;
-
- asm volatile (
- "lock; cmpxchgl %4, %0;"
- "sete %1;"
- : "=m" (*p), "=a" (success) /* Outputs. */
- : "m" (*p), "a" (c), "r" (s) /* Inputs. */
- : "memory"
- );
-
- return !(bool)success;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
- asm volatile (
- "xchgl %1, %0;" /* Lock is implied by xchgl. */
- : "=m" (*p), "+r" (x) /* Outputs. */
- : "m" (*p) /* Inputs. */
- : "memory" /* Clobbers. */
- );
-}
-# elif (defined(JEMALLOC_C11ATOMICS))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
- volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
- return atomic_fetch_add(a, x) + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
- volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
- return atomic_fetch_sub(a, x) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
- volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
- return !atomic_compare_exchange_strong(a, &c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
- volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
- atomic_store(a, x);
-}
-#elif (defined(JEMALLOC_ATOMIC9))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
- return atomic_fetchadd_32(p, x) + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
- return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
- return !atomic_cmpset_32(p, c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
- atomic_store_rel_32(p, x);
-}
-#elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
- return OSAtomicAdd32((int32_t)x, (int32_t *)p);
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
- return OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
- return !OSAtomicCompareAndSwap32(c, s, (int32_t *)p);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
- uint32_t o;
-
- /*The documented OSAtomic*() API does not expose an atomic exchange. */
- do {
- o = atomic_read_u32(p);
- } while (atomic_cas_u32(p, o, x));
-}
-#elif (defined(_MSC_VER))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
- return InterlockedExchangeAdd(p, x) + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
- return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
- uint32_t o;
-
- o = InterlockedCompareExchange(p, s, c);
- return o != c;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
- InterlockedExchange(p, x);
-}
-#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
- defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
- return __sync_add_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
- return __sync_sub_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
- return !__sync_bool_compare_and_swap(p, c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
- __sync_lock_test_and_set(p, x);
-}
-#else
-# error "Missing implementation for 32-bit atomic operations"
-#endif
-
-/******************************************************************************/
-/* Pointer operations. */
-JEMALLOC_INLINE void *
-atomic_add_p(void **p, void *x) {
-#if (LG_SIZEOF_PTR == 3)
- return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
- return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-JEMALLOC_INLINE void *
-atomic_sub_p(void **p, void *x) {
-#if (LG_SIZEOF_PTR == 3)
- return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_PTR == 2)
- return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_p(void **p, void *c, void *s) {
-#if (LG_SIZEOF_PTR == 3)
- return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
-#elif (LG_SIZEOF_PTR == 2)
- return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
-#endif
-}
-
-JEMALLOC_INLINE void
-atomic_write_p(void **p, const void *x) {
-#if (LG_SIZEOF_PTR == 3)
- atomic_write_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
- atomic_write_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-/******************************************************************************/
-/* size_t operations. */
-JEMALLOC_INLINE size_t
-atomic_add_zu(size_t *p, size_t x) {
-#if (LG_SIZEOF_PTR == 3)
- return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
- return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-JEMALLOC_INLINE size_t
-atomic_sub_zu(size_t *p, size_t x) {
-#if (LG_SIZEOF_PTR == 3)
- return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_PTR == 2)
- return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_zu(size_t *p, size_t c, size_t s) {
-#if (LG_SIZEOF_PTR == 3)
- return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
-#elif (LG_SIZEOF_PTR == 2)
- return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
-#endif
-}
-
-JEMALLOC_INLINE void
-atomic_write_zu(size_t *p, size_t x) {
-#if (LG_SIZEOF_PTR == 3)
- atomic_write_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
- atomic_write_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-/******************************************************************************/
-/* unsigned operations. */
-JEMALLOC_INLINE unsigned
-atomic_add_u(unsigned *p, unsigned x) {
-#if (LG_SIZEOF_INT == 3)
- return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_INT == 2)
- return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-JEMALLOC_INLINE unsigned
-atomic_sub_u(unsigned *p, unsigned x) {
-#if (LG_SIZEOF_INT == 3)
- return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_INT == 2)
- return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u(unsigned *p, unsigned c, unsigned s) {
-#if (LG_SIZEOF_INT == 3)
- return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
-#elif (LG_SIZEOF_INT == 2)
- return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
-#endif
-}
-
-JEMALLOC_INLINE void
-atomic_write_u(unsigned *p, unsigned x) {
-#if (LG_SIZEOF_INT == 3)
- atomic_write_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_INT == 2)
- atomic_write_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-/******************************************************************************/
-#endif
-#endif /* JEMALLOC_INTERNAL_ATOMIC_INLINES_H */
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
new file mode 100644
index 0000000..67057ce
--- /dev/null
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -0,0 +1,158 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+ atomic_memory_order_relaxed,
+ atomic_memory_order_acquire,
+ atomic_memory_order_release,
+ atomic_memory_order_acq_rel,
+ atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+typedef char atomic_repr_0_t;
+typedef short atomic_repr_1_t;
+typedef long atomic_repr_2_t;
+typedef __int64 atomic_repr_3_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+ _ReadWriteBarrier();
+# if defined(_M_ARM) || defined(_M_ARM64)
+ /* ARM needs a barrier for everything but relaxed. */
+ if (mo != atomic_memory_order_relaxed) {
+ MemoryBarrier();
+ }
+# elif defined(_M_IX86) || defined (_M_X64)
+ /* x86 needs a barrier only for seq_cst. */
+ if (mo == atomic_memory_order_seq_cst) {
+ MemoryBarrier();
+ }
+# else
+# error "Don't know how to create atomics for this platform for MSVC."
+# endif
+ _ReadWriteBarrier();
+}
+
+#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
+
+#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
+#define ATOMIC_RAW_CONCAT(a, b) a ## b
+
+#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT( \
+ base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
+
+#define ATOMIC_INTERLOCKED_SUFFIX(lg_size) \
+ ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
+
+#define ATOMIC_INTERLOCKED_SUFFIX_0 8
+#define ATOMIC_INTERLOCKED_SUFFIX_1 16
+#define ATOMIC_INTERLOCKED_SUFFIX_2
+#define ATOMIC_INTERLOCKED_SUFFIX_3 64
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \
+typedef struct { \
+ ATOMIC_INTERLOCKED_REPR(lg_size) repr; \
+} atomic_##short_type##_t; \
+ \
+ATOMIC_INLINE type \
+atomic_load_##short_type(const atomic_##short_type##_t *a, \
+ atomic_memory_order_t mo) { \
+ ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr; \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_acquire); \
+ } \
+ return (type) ret; \
+} \
+ \
+ATOMIC_INLINE void \
+atomic_store_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ if (mo != atomic_memory_order_relaxed) { \
+ atomic_fence(atomic_memory_order_release); \
+ } \
+ a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val; \
+ if (mo == atomic_memory_order_seq_cst) { \
+ atomic_fence(atomic_memory_order_seq_cst); \
+ } \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+ atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange, \
+ lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ ATOMIC_INTERLOCKED_REPR(lg_size) e = \
+ (ATOMIC_INTERLOCKED_REPR(lg_size))*expected; \
+ ATOMIC_INTERLOCKED_REPR(lg_size) d = \
+ (ATOMIC_INTERLOCKED_REPR(lg_size))desired; \
+ ATOMIC_INTERLOCKED_REPR(lg_size) old = \
+ ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, \
+ lg_size)(&a->repr, d, e); \
+ if (old == e) { \
+ return true; \
+ } else { \
+ *expected = (type)old; \
+ return false; \
+ } \
+} \
+ \
+ATOMIC_INLINE bool \
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
+ type *expected, type desired, atomic_memory_order_t success_mo, \
+ atomic_memory_order_t failure_mo) { \
+ /* We implement the weak version with strong semantics. */ \
+ return atomic_compare_exchange_weak_##short_type(a, expected, \
+ desired, success_mo, failure_mo); \
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size) \
+JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size) \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd, \
+ lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ \
+ATOMIC_INLINE type \
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ /* \
+ * MSVC warns on negation of unsigned operands, but for us it \
+ * gives exactly the right semantics (MAX_TYPE + 1 - operand). \
+ */ \
+ __pragma(warning(push)) \
+ __pragma(warning(disable: 4146)) \
+ return atomic_fetch_add_##short_type(a, -val, mo); \
+ __pragma(warning(pop)) \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)( \
+ &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)( \
+ &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+} \
+ATOMIC_INLINE type \
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, \
+ type val, atomic_memory_order_t mo) { \
+ return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)( \
+ &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */
diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h
deleted file mode 100644
index 0fd5e5b..0000000
--- a/include/jemalloc/internal/atomic_types.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H
-#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H
-
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-# define JEMALLOC_ATOMIC_U64
-#endif
-
-#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 0d0440b..f18acab 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -146,14 +146,6 @@ static const bool have_thp =
#endif
;
-#if defined(JEMALLOC_C11ATOMICS) && !defined(__cplusplus)
-#include <stdatomic.h>
-#endif
-
-#ifdef JEMALLOC_ATOMIC9
-#include <machine/atomic.h>
-#endif
-
#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
#include <libkern/OSAtomic.h>
#endif
@@ -199,11 +191,22 @@ static const bool have_thp =
* its translation unit). Each component is now broken up into multiple header
* files, corresponding to the sections above (e.g. instead of "tsd.h", we now
* have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h").
+ *
+ * Those files which have been converted to explicitly include their
+ * inter-component dependencies are now in the initial HERMETIC HEADERS
+ * section. These headers may still rely on this file for system headers and
+ * global jemalloc headers, however.
*/
#include "jemalloc/internal/jemalloc_internal_macros.h"
/******************************************************************************/
+/* HERMETIC HEADERS */
+/******************************************************************************/
+
+#include "jemalloc/internal/atomic.h"
+
+/******************************************************************************/
/* TYPES */
/******************************************************************************/
@@ -380,7 +383,6 @@ typedef unsigned szind_t;
#include "jemalloc/internal/nstime_types.h"
#include "jemalloc/internal/util_types.h"
-#include "jemalloc/internal/atomic_types.h"
#include "jemalloc/internal/spin_types.h"
#include "jemalloc/internal/prng_types.h"
#include "jemalloc/internal/ticker_types.h"
@@ -489,7 +491,6 @@ void jemalloc_postfork_child(void);
#include "jemalloc/internal/nstime_externs.h"
#include "jemalloc/internal/util_externs.h"
-#include "jemalloc/internal/atomic_externs.h"
#include "jemalloc/internal/ckh_externs.h"
#include "jemalloc/internal/stats_externs.h"
#include "jemalloc/internal/ctl_externs.h"
@@ -513,7 +514,6 @@ void jemalloc_postfork_child(void);
/******************************************************************************/
#include "jemalloc/internal/util_inlines.h"
-#include "jemalloc/internal/atomic_inlines.h"
#include "jemalloc/internal/spin_inlines.h"
#include "jemalloc/internal/prng_inlines.h"
#include "jemalloc/internal/ticker_inlines.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6c70e16..b2e0077 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -30,16 +30,13 @@
#undef LG_VADDR
/* Defined if C11 atomics are available. */
-#undef JEMALLOC_C11ATOMICS
+#undef JEMALLOC_C11_ATOMICS
-/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */
-#undef JEMALLOC_ATOMIC9
+/* Defined if GCC __atomic atomics are available. */
+#undef JEMALLOC_GCC_ATOMIC_ATOMICS
-/*
- * Defined if OSAtomic*() functions are available, as provided by Darwin, and
- * documented in the atomic(3) manual page.
- */
-#undef JEMALLOC_OSATOMIC
+/* Defined if GCC __sync atomics are available. */
+#undef JEMALLOC_GCC_SYNC_ATOMICS
/*
* Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 0234181..b122dae 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -72,26 +72,6 @@ arena_tdata_get
arena_tdata_get_hard
arenas
arenas_tdata_cleanup
-atomic_add_p
-atomic_add_u
-atomic_add_u32
-atomic_add_u64
-atomic_add_zu
-atomic_cas_p
-atomic_cas_u
-atomic_cas_u32
-atomic_cas_u64
-atomic_cas_zu
-atomic_sub_p
-atomic_sub_u
-atomic_sub_u32
-atomic_sub_u64
-atomic_sub_zu
-atomic_write_p
-atomic_write_u
-atomic_write_u32
-atomic_write_u64
-atomic_write_zu
b0get
base_alloc
base_boot