summaryrefslogtreecommitdiffstats
path: root/jemalloc/src
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2010-09-05 17:35:13 (GMT)
committerJason Evans <jasone@canonware.com>2010-09-12 01:20:16 (GMT)
commit2dbecf1f6267fae7a161b9c39cfd4d04ce168a29 (patch)
tree246a2c4d549d4b007184c518e69deacbad4a0aaa /jemalloc/src
parentb267d0f86aff15a0edb2929f09060c118ed98ec4 (diff)
downloadjemalloc-2dbecf1f6267fae7a161b9c39cfd4d04ce168a29.zip
jemalloc-2dbecf1f6267fae7a161b9c39cfd4d04ce168a29.tar.gz
jemalloc-2dbecf1f6267fae7a161b9c39cfd4d04ce168a29.tar.bz2
Port to Mac OS X.
Add Mac OS X support, based in large part on the OS X support in Mozilla's version of jemalloc.
Diffstat (limited to 'jemalloc/src')
-rw-r--r--jemalloc/src/arena.c52
-rw-r--r--jemalloc/src/base.c2
-rw-r--r--jemalloc/src/chunk.c27
-rw-r--r--jemalloc/src/chunk_mmap.c46
-rw-r--r--jemalloc/src/ctl.c14
-rw-r--r--jemalloc/src/huge.c4
-rw-r--r--jemalloc/src/jemalloc.c88
-rw-r--r--jemalloc/src/mutex.c4
-rw-r--r--jemalloc/src/prof.c123
-rw-r--r--jemalloc/src/rtree.c42
-rw-r--r--jemalloc/src/tcache.c26
-rw-r--r--jemalloc/src/zone.c354
12 files changed, 653 insertions, 129 deletions
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index ee859fc..db3d401 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -181,9 +181,6 @@ static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
void *ptr, size_t size, size_t oldsize);
static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
-#ifdef JEMALLOC_TINY
-static size_t pow2_ceil(size_t x);
-#endif
static bool small_size2bin_init(void);
#ifdef JEMALLOC_DEBUG
static void small_size2bin_validate(void);
@@ -426,7 +423,7 @@ arena_chunk_alloc(arena_t *arena)
zero = false;
malloc_mutex_unlock(&arena->lock);
- chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
+ chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero);
malloc_mutex_lock(&arena->lock);
if (chunk == NULL)
return (NULL);
@@ -606,10 +603,18 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
ql_new(&mapelms);
flag_zeroed =
-#ifdef JEMALLOC_SWAP
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
+ /*
+ * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous
+ * mappings, but not for file-backed mappings.
+ */
+# ifdef JEMALLOC_SWAP
swap_enabled ? 0 :
-#endif
+# endif
CHUNK_MAP_ZEROED;
+#else
+ 0;
+#endif
/*
* If chunk is the spare, temporarily re-allocate it, 1) so that its
@@ -649,9 +654,6 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
/*
* Update internal elements in the page map, so
* that CHUNK_MAP_ZEROED is properly set.
- * madvise(..., MADV_DONTNEED) results in
- * zero-filled pages for anonymous mappings,
- * but not for file-backed mappings.
*/
mapelm->bits = (npages << PAGE_SHIFT) |
CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED |
@@ -715,8 +717,20 @@ arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
assert(ndirty >= npages);
ndirty -= npages;
#endif
+
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
(npages << PAGE_SHIFT), MADV_DONTNEED);
+#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+ madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+ (npages << PAGE_SHIFT), MADV_FREE);
+#elif defined(JEMALLOC_PURGE_MSYNC_KILLPAGES)
+ msync((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)),
+ (npages << PAGE_SHIFT), MS_KILLPAGES);
+#else
+# error "No method defined for purging unused dirty pages."
+#endif
+
#ifdef JEMALLOC_STATS
nmadvise++;
#endif
@@ -2239,26 +2253,6 @@ arena_new(arena_t *arena, unsigned ind)
return (false);
}
-#ifdef JEMALLOC_TINY
-/* Compute the smallest power of 2 that is >= x. */
-static size_t
-pow2_ceil(size_t x)
-{
-
- x--;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
-#if (SIZEOF_PTR == 8)
- x |= x >> 32;
-#endif
- x++;
- return (x);
-}
-#endif
-
#ifdef JEMALLOC_DEBUG
static void
small_size2bin_validate(void)
diff --git a/jemalloc/src/base.c b/jemalloc/src/base.c
index 605197e..cc85e84 100644
--- a/jemalloc/src/base.c
+++ b/jemalloc/src/base.c
@@ -32,7 +32,7 @@ base_pages_alloc(size_t minsize)
assert(minsize != 0);
csize = CHUNK_CEILING(minsize);
zero = false;
- base_pages = chunk_alloc(csize, &zero);
+ base_pages = chunk_alloc(csize, true, &zero);
if (base_pages == NULL)
return (true);
base_next_addr = base_pages;
diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c
index e6e3bcd..5cb9961 100644
--- a/jemalloc/src/chunk.c
+++ b/jemalloc/src/chunk.c
@@ -14,6 +14,10 @@ malloc_mutex_t chunks_mtx;
chunk_stats_t stats_chunks;
#endif
+#ifdef JEMALLOC_IVSALLOC
+rtree_t *chunks_rtree;
+#endif
+
/* Various chunk-related settings. */
size_t chunksize;
size_t chunksize_mask; /* (chunksize - 1). */
@@ -30,7 +34,7 @@ size_t arena_maxclass; /* Max size class for arenas. */
* advantage of them if they are returned.
*/
void *
-chunk_alloc(size_t size, bool *zero)
+chunk_alloc(size_t size, bool base, bool *zero)
{
void *ret;
@@ -63,6 +67,14 @@ chunk_alloc(size_t size, bool *zero)
/* All strategies for allocation failed. */
ret = NULL;
RETURN:
+#ifdef JEMALLOC_IVSALLOC
+ if (base == false && ret != NULL) {
+ if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) {
+ chunk_dealloc(ret, size);
+ return (NULL);
+ }
+ }
+#endif
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
if (ret != NULL) {
# ifdef JEMALLOC_PROF
@@ -104,6 +116,9 @@ chunk_dealloc(void *chunk, size_t size)
assert(size != 0);
assert((size & chunksize_mask) == 0);
+#ifdef JEMALLOC_IVSALLOC
+ rtree_set(chunks_rtree, (uintptr_t)chunk, NULL);
+#endif
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
malloc_mutex_lock(&chunks_mtx);
stats_chunks.curchunks -= (size / chunksize);
@@ -126,21 +141,27 @@ chunk_boot(void)
{
/* Set variables according to the value of opt_lg_chunk. */
- chunksize = (1LU << opt_lg_chunk);
+ chunksize = (ZU(1) << opt_lg_chunk);
assert(chunksize >= PAGE_SIZE);
chunksize_mask = chunksize - 1;
chunk_npages = (chunksize >> PAGE_SHIFT);
+#ifdef JEMALLOC_IVSALLOC
+ chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk);
+ if (chunks_rtree == NULL)
+ return (true);
+#endif
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
if (malloc_mutex_init(&chunks_mtx))
return (true);
memset(&stats_chunks, 0, sizeof(chunk_stats_t));
#endif
-
#ifdef JEMALLOC_SWAP
if (chunk_swap_boot())
return (true);
#endif
+ if (chunk_mmap_boot())
+ return (true);
#ifdef JEMALLOC_DSS
if (chunk_dss_boot())
return (true);
diff --git a/jemalloc/src/chunk_mmap.c b/jemalloc/src/chunk_mmap.c
index d9f9e86..a3d09e9 100644
--- a/jemalloc/src/chunk_mmap.c
+++ b/jemalloc/src/chunk_mmap.c
@@ -6,19 +6,22 @@
/*
* Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
- * potentially avoid some system calls. We can get away without TLS here,
- * since the state of mmap_unaligned only affects performance, rather than
- * correct function.
+ * potentially avoid some system calls.
*/
-static
#ifndef NO_TLS
- __thread
+static __thread bool mmap_unaligned_tls
+ JEMALLOC_ATTR(tls_model("initial-exec"));
+#define MMAP_UNALIGNED_GET() mmap_unaligned_tls
+#define MMAP_UNALIGNED_SET(v) do { \
+ mmap_unaligned_tls = (v); \
+} while (0)
+#else
+static pthread_key_t mmap_unaligned_tsd;
+#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd))
+#define MMAP_UNALIGNED_SET(v) do { \
+ pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \
+} while (0)
#endif
- bool mmap_unaligned
-#ifndef NO_TLS
- JEMALLOC_ATTR(tls_model("initial-exec"))
-#endif
- ;
/******************************************************************************/
/* Function prototypes for non-inline static functions. */
@@ -128,7 +131,7 @@ chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve)
* method.
*/
if (unaligned == false)
- mmap_unaligned = false;
+ MMAP_UNALIGNED_SET(false);
return (ret);
}
@@ -166,7 +169,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve)
* fast method next time.
*/
- if (mmap_unaligned == false) {
+ if (MMAP_UNALIGNED_GET() == false) {
size_t offset;
ret = pages_map(NULL, size, noreserve);
@@ -175,7 +178,7 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve)
offset = CHUNK_ADDR2OFFSET(ret);
if (offset != 0) {
- mmap_unaligned = true;
+ MMAP_UNALIGNED_SET(true);
/* Try to extend chunk boundary. */
if (pages_map((void *)((uintptr_t)ret + size),
chunksize - offset, noreserve) == NULL) {
@@ -184,7 +187,8 @@ chunk_alloc_mmap_internal(size_t size, bool noreserve)
* the reliable-but-expensive method.
*/
pages_unmap(ret, size);
- ret = chunk_alloc_mmap_slow(size, true, noreserve);
+ ret = chunk_alloc_mmap_slow(size, true,
+ noreserve);
} else {
/* Clean up unneeded leading space. */
pages_unmap(ret, chunksize - offset);
@@ -216,3 +220,17 @@ chunk_dealloc_mmap(void *chunk, size_t size)
pages_unmap(chunk, size);
}
+
+bool
+chunk_mmap_boot(void)
+{
+
+#ifdef NO_TLS
+ if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) {
+ malloc_write("<jemalloc>: Error in pthread_key_create()\n");
+ return (true);
+ }
+#endif
+
+ return (false);
+}
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index 128883f..6491306 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -41,9 +41,7 @@ CTL_PROTO(epoch)
#ifdef JEMALLOC_TCACHE
CTL_PROTO(tcache_flush)
#endif
-#ifndef NO_TLS
CTL_PROTO(thread_arena)
-#endif
CTL_PROTO(config_debug)
CTL_PROTO(config_dss)
CTL_PROTO(config_dynamic_page_shift)
@@ -213,11 +211,9 @@ static const ctl_node_t tcache_node[] = {
};
#endif
-#ifndef NO_TLS
static const ctl_node_t thread_node[] = {
{NAME("arena"), CTL(thread_arena)}
};
-#endif
static const ctl_node_t config_node[] = {
{NAME("debug"), CTL(config_debug)},
@@ -457,9 +453,7 @@ static const ctl_node_t root_node[] = {
#ifdef JEMALLOC_TCACHE
{NAME("tcache"), CHILD(tcache)},
#endif
-#ifndef NO_TLS
{NAME("thread"), CHILD(thread)},
-#endif
{NAME("config"), CHILD(config)},
{NAME("opt"), CHILD(opt)},
{NAME("arenas"), CHILD(arenas)},
@@ -1040,13 +1034,13 @@ tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
VOID();
- tcache = tcache_tls;
+ tcache = TCACHE_GET();
if (tcache == NULL) {
ret = 0;
goto RETURN;
}
tcache_destroy(tcache);
- tcache_tls = NULL;
+ TCACHE_SET(NULL);
ret = 0;
RETURN:
@@ -1054,7 +1048,6 @@ RETURN:
}
#endif
-#ifndef NO_TLS
static int
thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
void *newp, size_t newlen)
@@ -1085,14 +1078,13 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
}
/* Set new arena association. */
- arenas_map = arena;
+ ARENA_SET(arena);
}
ret = 0;
RETURN:
return (ret);
}
-#endif
/******************************************************************************/
diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c
index 49962ea..be35d16 100644
--- a/jemalloc/src/huge.c
+++ b/jemalloc/src/huge.c
@@ -37,7 +37,7 @@ huge_malloc(size_t size, bool zero)
if (node == NULL)
return (NULL);
- ret = chunk_alloc(csize, &zero);
+ ret = chunk_alloc(csize, false, &zero);
if (ret == NULL) {
base_node_dealloc(node);
return (NULL);
@@ -99,7 +99,7 @@ huge_palloc(size_t alignment, size_t size)
return (NULL);
zero = false;
- ret = chunk_alloc(alloc_size, &zero);
+ ret = chunk_alloc(alloc_size, false, &zero);
if (ret == NULL) {
base_node_dealloc(node);
return (NULL);
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index b36590d..ebce3ca 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -89,12 +89,12 @@
malloc_mutex_t arenas_lock;
arena_t **arenas;
unsigned narenas;
-#ifndef NO_TLS
static unsigned next_arena;
-#endif
#ifndef NO_TLS
-__thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec"));
+__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#else
+pthread_key_t arenas_tsd;
#endif
/* Set to true once the allocator has been initialized. */
@@ -104,7 +104,7 @@ static bool malloc_initialized = false;
static pthread_t malloc_initializer = (unsigned long)0;
/* Used to avoid initialization races. */
-static malloc_mutex_t init_lock = PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP;
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
#ifdef DYNAMIC_PAGE_SHIFT
size_t pagesize;
@@ -146,8 +146,6 @@ static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void);
static unsigned malloc_ncpus(void);
static bool malloc_init_hard(void);
-static void jemalloc_prefork(void);
-static void jemalloc_postfork(void);
/******************************************************************************/
/* malloc_message() setup. */
@@ -200,7 +198,6 @@ arenas_extend(unsigned ind)
return (arenas[0]);
}
-#ifndef NO_TLS
/*
* Choose an arena based on a per-thread value (slow-path code only, called
* only by choose_arena()).
@@ -219,11 +216,10 @@ choose_arena_hard(void)
} else
ret = arenas[0];
- arenas_map = ret;
+ ARENA_SET(ret);
return (ret);
}
-#endif
static void
stats_print_atexit(void)
@@ -697,14 +693,12 @@ MALLOC_OUT:
return (true);
}
-#ifndef NO_TLS
/*
* Assign the initial arena to the initial thread, in order to avoid
* spurious creation of an extra arena if the application switches to
* threaded mode.
*/
- arenas_map = arenas[0];
-#endif
+ ARENA_SET(arenas[0]);
malloc_mutex_init(&arenas_lock);
@@ -748,37 +742,15 @@ MALLOC_OUT:
narenas = 1;
}
-#ifdef NO_TLS
- if (narenas > 1) {
- static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
- 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
- 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
- 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
- 223, 227, 229, 233, 239, 241, 251, 257, 263};
- unsigned nprimes, parenas;
+ next_arena = (narenas > 0) ? 1 : 0;
- /*
- * Pick a prime number of hash arenas that is more than narenas
- * so that direct hashing of pthread_self() pointers tends to
- * spread allocations evenly among the arenas.
- */
- assert((narenas & 1) == 0); /* narenas must be even. */
- nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
- parenas = primes[nprimes - 1]; /* In case not enough primes. */
- for (i = 1; i < nprimes; i++) {
- if (primes[i] > narenas) {
- parenas = primes[i];
- break;
- }
- }
- narenas = parenas;
+#ifdef NO_TLS
+ if (pthread_key_create(&arenas_tsd, NULL) != 0) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
}
#endif
-#ifndef NO_TLS
- next_arena = (narenas > 0) ? 1 : 0;
-#endif
-
/* Allocate and initialize arenas. */
arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
if (arenas == NULL) {
@@ -793,11 +765,35 @@ MALLOC_OUT:
/* Copy the pointer to the one arena that was already initialized. */
arenas[0] = init_arenas[0];
+#ifdef JEMALLOC_ZONE
+ /* Register the custom zone. */
+ malloc_zone_register(create_zone());
+
+ /*
+ * Convert the default szone to an "overlay zone" that is capable of
+ * deallocating szone-allocated objects, but allocating new objects
+ * from jemalloc.
+ */
+ szone2ozone(malloc_default_zone());
+#endif
+
malloc_initialized = true;
malloc_mutex_unlock(&init_lock);
return (false);
}
+
+#ifdef JEMALLOC_ZONE
+JEMALLOC_ATTR(constructor)
+void
+jemalloc_darwin_init(void)
+{
+
+ if (malloc_init_hard())
+ abort();
+}
+#endif
+
/*
* End initialization functions.
*/
@@ -1219,8 +1215,12 @@ JEMALLOC_P(malloc_usable_size)(const void *ptr)
{
size_t ret;
+#ifdef JEMALLOC_IVSALLOC
+ ret = ivsalloc(ptr);
+#else
assert(ptr != NULL);
ret = isalloc(ptr);
+#endif
return (ret);
}
@@ -1298,11 +1298,13 @@ JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp,
* is threaded here.
*/
-static void
+void
jemalloc_prefork(void)
{
unsigned i;
+ assert(isthreaded);
+
/* Acquire all mutexes in a safe order. */
malloc_mutex_lock(&arenas_lock);
@@ -1324,11 +1326,13 @@ jemalloc_prefork(void)
#endif
}
-static void
+void
jemalloc_postfork(void)
{
unsigned i;
+ assert(isthreaded);
+
/* Release all mutexes, now that fork() has completed. */
#ifdef JEMALLOC_SWAP
@@ -1349,3 +1353,5 @@ jemalloc_postfork(void)
}
malloc_mutex_unlock(&arenas_lock);
}
+
+/******************************************************************************/
diff --git a/jemalloc/src/mutex.c b/jemalloc/src/mutex.c
index 3b6081a..337312b 100644
--- a/jemalloc/src/mutex.c
+++ b/jemalloc/src/mutex.c
@@ -59,7 +59,11 @@ malloc_mutex_init(malloc_mutex_t *mutex)
if (pthread_mutexattr_init(&attr) != 0)
return (true);
+#ifdef PTHREAD_MUTEX_ADAPTIVE_NP
pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+#else
+ pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
+#endif
if (pthread_mutex_init(mutex, &attr) != 0) {
pthread_mutexattr_destroy(&attr);
return (true);
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index 6d6910e..e70b132 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -45,7 +45,19 @@ static malloc_mutex_t bt2ctx_mtx;
* into the associated prof_ctx_t objects, and unlink/free the prof_thr_cnt_t
* objects.
*/
+#ifndef NO_TLS
static __thread ckh_t *bt2cnt_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+# define BT2CNT_GET() bt2cnt_tls
+# define BT2CNT_SET(v) do { \
+ bt2cnt_tls = (v); \
+ pthread_setspecific(bt2cnt_tsd, (void *)(v)); \
+} while (0)
+#else
+# define BT2CNT_GET() ((ckh_t *)pthread_getspecific(bt2cnt_tsd))
+# define BT2CNT_SET(v) do { \
+ pthread_setspecific(bt2cnt_tsd, (void *)(v)); \
+} while (0)
+#endif
/*
* Same contents as b2cnt_tls, but initialized such that the TSD destructor is
@@ -57,12 +69,45 @@ static pthread_key_t bt2cnt_tsd;
/* (1U << opt_lg_prof_bt_max). */
static unsigned prof_bt_max;
-static __thread uint64_t prof_sample_prn_state
- JEMALLOC_ATTR(tls_model("initial-exec"));
-static __thread uint64_t prof_sample_threshold
- JEMALLOC_ATTR(tls_model("initial-exec"));
-static __thread uint64_t prof_sample_accum
+typedef struct prof_sample_state_s prof_sample_state_t;
+struct prof_sample_state_s {
+ uint64_t prn_state;
+ uint64_t threshold;
+ uint64_t accum;
+};
+
+#ifndef NO_TLS
+static __thread prof_sample_state_t prof_sample_state_tls
JEMALLOC_ATTR(tls_model("initial-exec"));
+# define PROF_SAMPLE_STATE_GET(r) do { \
+ r = &prof_sample_state_tls; \
+} while (0)
+#else
+static pthread_key_t prof_sample_state_tsd;
+/* Used only if an OOM error occurs in PROF_SAMPLE_STATE_GET(). */
+prof_sample_state_t prof_sample_state_oom;
+# define PROF_SAMPLE_STATE_GET(r) do { \
+ r = (prof_sample_state_t *)pthread_getspecific( \
+ prof_sample_state_tsd); \
+ if (r == NULL) { \
+ r = ipalloc(CACHELINE, sizeof(prof_sample_state_t)); \
+ if (r == NULL) { \
+ malloc_write("<jemalloc>: Error in heap " \
+ "profiler: out of memory; subsequent heap " \
+ "profiles may be inaccurate\n"); \
+ if (opt_abort) \
+ abort(); \
+ /* Failure is not an option... */ \
+ r = &prof_sample_state_oom; \
+ } \
+ pthread_setspecific(prof_sample_state_tsd, (void *)r); \
+ } \
+} while (0)
+# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd))
+# define ARENA_SET(v) do { \
+ pthread_setspecific(arenas_tsd, (void *)(v)); \
+} while (0)
+#endif
static malloc_mutex_t prof_dump_seq_mtx;
static uint64_t prof_dump_seq;
@@ -116,6 +161,9 @@ static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1,
size_t *hash2);
static bool prof_bt_keycomp(const void *k1, const void *k2);
static void bt2cnt_thread_cleanup(void *arg);
+#ifdef NO_TLS
+static void prof_sample_state_thread_cleanup(void *arg);
+#endif
/******************************************************************************/
@@ -436,7 +484,7 @@ static prof_thr_cnt_t *
prof_lookup(prof_bt_t *bt)
{
prof_thr_cnt_t *ret;
- ckh_t *bt2cnt = bt2cnt_tls;
+ ckh_t *bt2cnt = BT2CNT_GET();
if (bt2cnt == NULL) {
/* Initialize an empty cache for this thread. */
@@ -448,8 +496,8 @@ prof_lookup(prof_bt_t *bt)
idalloc(bt2cnt);
return (NULL);
}
- bt2cnt_tls = bt2cnt;
- pthread_setspecific(bt2cnt_tsd, bt2cnt);
+
+ BT2CNT_SET(bt2cnt);
}
if (ckh_search(bt2cnt, bt, NULL, (void **)&ret)) {
@@ -519,15 +567,17 @@ prof_sample_threshold_update(void)
{
uint64_t r;
double u;
+ prof_sample_state_t *prof_sample_state;
/*
* Compute prof_sample_threshold as a geometrically distributed random
* variable with mean (2^opt_lg_prof_sample).
*/
- prn64(r, 53, prof_sample_prn_state, (uint64_t)1125899906842625LLU,
- 1058392653243283975);
+ PROF_SAMPLE_STATE_GET(prof_sample_state);
+ prn64(r, 53, prof_sample_state->prn_state,
+ (uint64_t)1125899906842625LLU, 1058392653243283975);
u = (double)r * (1.0/9007199254740992.0L);
- prof_sample_threshold = (uint64_t)(log(u) /
+ prof_sample_state->threshold = (uint64_t)(log(u) /
log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample))))
+ (uint64_t)1U;
}
@@ -551,26 +601,31 @@ prof_alloc_prep(size_t size)
prof_backtrace(&bt, 2, prof_bt_max);
ret = prof_lookup(&bt);
} else {
- if (prof_sample_threshold == 0) {
+ prof_sample_state_t *prof_sample_state;
+
+ PROF_SAMPLE_STATE_GET(prof_sample_state);
+ if (prof_sample_state->threshold == 0) {
/*
* Initialize. Seed the prng differently for each
* thread.
*/
- prof_sample_prn_state = (uint64_t)(uintptr_t)&size;
+ prof_sample_state->prn_state =
+ (uint64_t)(uintptr_t)&size;
prof_sample_threshold_update();
}
/*
* Determine whether to capture a backtrace based on whether
* size is enough for prof_accum to reach
- * prof_sample_threshold. However, delay updating these
+ * prof_sample_state->threshold. However, delay updating these
* variables until prof_{m,re}alloc(), because we don't know
* for sure that the allocation will succeed.
*
* Use subtraction rather than addition to avoid potential
* integer overflow.
*/
- if (size >= prof_sample_threshold - prof_sample_accum) {
+ if (size >= prof_sample_state->threshold -
+ prof_sample_state->accum) {
bt_init(&bt, vec);
prof_backtrace(&bt, 2, prof_bt_max);
ret = prof_lookup(&bt);
@@ -621,21 +676,26 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
static inline void
prof_sample_accum_update(size_t size)
{
+ prof_sample_state_t *prof_sample_state;
/* Sampling logic is unnecessary if the interval is 1. */
assert(opt_lg_prof_sample != 0);
/* Take care to avoid integer overflow. */
- if (size >= prof_sample_threshold - prof_sample_accum) {
- prof_sample_accum -= (prof_sample_threshold - size);
+ PROF_SAMPLE_STATE_GET(prof_sample_state);
+ if (size >= prof_sample_state->threshold - prof_sample_state->accum) {
+ prof_sample_state->accum -= (prof_sample_state->threshold -
+ size);
/* Compute new prof_sample_threshold. */
prof_sample_threshold_update();
- while (prof_sample_accum >= prof_sample_threshold) {
- prof_sample_accum -= prof_sample_threshold;
+ while (prof_sample_state->accum >=
+ prof_sample_state->threshold) {
+ prof_sample_state->accum -=
+ prof_sample_state->threshold;
prof_sample_threshold_update();
}
} else
- prof_sample_accum += size;
+ prof_sample_state->accum += size;
}
void
@@ -1244,7 +1304,7 @@ bt2cnt_thread_cleanup(void *arg)
{
ckh_t *bt2cnt;
- bt2cnt = bt2cnt_tls;
+ bt2cnt = BT2CNT_GET();
if (bt2cnt != NULL) {
ql_head(prof_thr_cnt_t) cnts_ql;
size_t tabind;
@@ -1278,7 +1338,7 @@ bt2cnt_thread_cleanup(void *arg)
*/
ckh_delete(bt2cnt);
idalloc(bt2cnt);
- bt2cnt_tls = NULL;
+ BT2CNT_SET(NULL);
/* Delete cnt's. */
while ((cnt = ql_last(&cnts_ql, link)) != NULL) {
@@ -1288,6 +1348,17 @@ bt2cnt_thread_cleanup(void *arg)
}
}
+#ifdef NO_TLS
+static void
+prof_sample_state_thread_cleanup(void *arg)
+{
+ prof_sample_state_t *prof_sample_state = (prof_sample_state_t *)arg;
+
+ if (prof_sample_state != &prof_sample_state_oom)
+ idalloc(prof_sample_state);
+}
+#endif
+
void
prof_boot0(void)
{
@@ -1332,6 +1403,14 @@ prof_boot1(void)
"<jemalloc>: Error in pthread_key_create()\n");
abort();
}
+#ifdef NO_TLS
+ if (pthread_key_create(&prof_sample_state_tsd,
+ prof_sample_state_thread_cleanup) != 0) {
+ malloc_write(
+ "<jemalloc>: Error in pthread_key_create()\n");
+ abort();
+ }
+#endif
prof_bt_max = (1U << opt_lg_prof_bt_max);
if (malloc_mutex_init(&prof_dump_seq_mtx))
diff --git a/jemalloc/src/rtree.c b/jemalloc/src/rtree.c
new file mode 100644
index 0000000..a583751
--- /dev/null
+++ b/jemalloc/src/rtree.c
@@ -0,0 +1,42 @@
+#define RTREE_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+rtree_t *
+rtree_new(unsigned bits)
+{
+ rtree_t *ret;
+ unsigned bits_per_level, height, i;
+
+ bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
+ height = bits / bits_per_level;
+ if (height * bits_per_level != bits)
+ height++;
+ assert(height * bits_per_level >= bits);
+
+ ret = (rtree_t*)base_alloc(sizeof(rtree_t) + (sizeof(unsigned) *
+ (height - 1)));
+ if (ret == NULL)
+ return (NULL);
+ memset(ret, 0, sizeof(rtree_t) + (sizeof(unsigned) * (height - 1)));
+
+ malloc_mutex_init(&ret->mutex);
+ ret->height = height;
+ if (bits_per_level * height > bits)
+ ret->level2bits[0] = bits % bits_per_level;
+ else
+ ret->level2bits[0] = bits_per_level;
+ for (i = 1; i < height; i++)
+ ret->level2bits[i] = bits_per_level;
+
+ ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]);
+ if (ret->root == NULL) {
+ /*
+ * We leak the rtree here, since there's no generic base
+ * deallocation.
+ */
+ return (NULL);
+ }
+ memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);
+
+ return (ret);
+}
diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c
index ace24ce..8634383 100644
--- a/jemalloc/src/tcache.c
+++ b/jemalloc/src/tcache.c
@@ -9,13 +9,15 @@ ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
/* Map of thread-specific caches. */
+#ifndef NO_TLS
__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
+#endif
/*
* Same contents as tcache, but initialized such that the TSD destructor is
* called when a thread exits, so that the cache can be cleaned up.
*/
-static pthread_key_t tcache_tsd;
+pthread_key_t tcache_tsd;
size_t nhbins;
size_t tcache_maxclass;
@@ -239,8 +241,7 @@ tcache_create(arena_t *arena)
for (; i < nhbins; i++)
tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
- tcache_tls = tcache;
- pthread_setspecific(tcache_tsd, tcache);
+ TCACHE_SET(tcache);
return (tcache);
}
@@ -328,11 +329,24 @@ tcache_thread_cleanup(void *arg)
{
tcache_t *tcache = (tcache_t *)arg;
- assert(tcache == tcache_tls);
- if (tcache != NULL) {
+ if (tcache == (void *)(uintptr_t)1) {
+ /*
+ * The previous time this destructor was called, we set the key
+ * to 1 so that other destructors wouldn't cause re-creation of
+ * the tcache. This time, do nothing, so that the destructor
+ * will not be called again.
+ */
+ } else if (tcache == (void *)(uintptr_t)2) {
+ /*
+ * Another destructor called an allocator function after this
+ * destructor was called. Reset tcache to 1 in order to
+ * receive another callback.
+ */
+ TCACHE_SET((uintptr_t)1);
+ } else if (tcache != NULL) {
assert(tcache != (void *)(uintptr_t)1);
tcache_destroy(tcache);
- tcache_tls = (void *)(uintptr_t)1;
+ TCACHE_SET((uintptr_t)1);
}
}
diff --git a/jemalloc/src/zone.c b/jemalloc/src/zone.c
new file mode 100644
index 0000000..2c1b231
--- /dev/null
+++ b/jemalloc/src/zone.c
@@ -0,0 +1,354 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+#ifndef JEMALLOC_ZONE
+# error "This source file is for zones on Darwin (OS X)."
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+static malloc_zone_t zone, szone;
+static struct malloc_introspection_t zone_introspect, ozone_introspect;
+
+/******************************************************************************/
+/* Function prototypes for non-inline static functions. */
+
+static size_t zone_size(malloc_zone_t *zone, void *ptr);
+static void *zone_malloc(malloc_zone_t *zone, size_t size);
+static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void *zone_valloc(malloc_zone_t *zone, size_t size);
+static void zone_free(malloc_zone_t *zone, void *ptr);
+static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void *zone_memalign(malloc_zone_t *zone, size_t alignment,
+ size_t size);
+static void zone_free_definite_size(malloc_zone_t *zone, void *ptr,
+ size_t size);
+#endif
+static void *zone_destroy(malloc_zone_t *zone);
+static size_t zone_good_size(malloc_zone_t *zone, size_t size);
+static void zone_force_lock(malloc_zone_t *zone);
+static void zone_force_unlock(malloc_zone_t *zone);
+static size_t ozone_size(malloc_zone_t *zone, void *ptr);
+static void ozone_free(malloc_zone_t *zone, void *ptr);
+static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size,
+ void **results, unsigned num_requested);
+static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed,
+ unsigned num);
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr,
+ size_t size);
+#endif
+static void ozone_force_lock(malloc_zone_t *zone);
+static void ozone_force_unlock(malloc_zone_t *zone);
+
+/******************************************************************************/
+/*
+ * Functions.
+ */
+
+static size_t
+zone_size(malloc_zone_t *zone, void *ptr)
+{
+
+ /*
+ * There appear to be places within Darwin (such as setenv(3)) that
+ * cause calls to this function with pointers that *no* zone owns. If
+ * we knew that all pointers were owned by *some* zone, we could split
+ * our zone into two parts, and use one as the default allocator and
+ * the other as the default deallocator/reallocator. Since that will
+ * not work in practice, we must check all pointers to assure that they
+ * reside within a mapped chunk before determining size.
+ */
+ return (ivsalloc(ptr));
+}
+
+static void *
+zone_malloc(malloc_zone_t *zone, size_t size)
+{
+
+ return (JEMALLOC_P(malloc)(size));
+}
+
+static void *
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
+{
+
+ return (JEMALLOC_P(calloc)(num, size));
+}
+
+static void *
+zone_valloc(malloc_zone_t *zone, size_t size)
+{
+ void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+ JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size);
+
+ return (ret);
+}
+
+static void
+zone_free(malloc_zone_t *zone, void *ptr)
+{
+
+ JEMALLOC_P(free)(ptr);
+}
+
+static void *
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+ return (JEMALLOC_P(realloc)(ptr, size));
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void *
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
+{
+ void *ret = NULL; /* Assignment avoids useless compiler warning. */
+
+ JEMALLOC_P(posix_memalign)(&ret, alignment, size);
+
+ return (ret);
+}
+
+static void
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+ assert(ivsalloc(ptr) == size);
+ JEMALLOC_P(free)(ptr);
+}
+#endif
+
+static void *
+zone_destroy(malloc_zone_t *zone)
+{
+
+ /* This function should never be called. */
+ assert(false);
+ return (NULL);
+}
+
+static size_t
+zone_good_size(malloc_zone_t *zone, size_t size)
+{
+ size_t ret;
+ void *p;
+
+ /*
+ * Actually create an object of the appropriate size, then find out
+ * how large it could have been without moving up to the next size
+ * class.
+ */
+ p = JEMALLOC_P(malloc)(size);
+ if (p != NULL) {
+ ret = isalloc(p);
+ JEMALLOC_P(free)(p);
+ } else
+ ret = size;
+
+ return (ret);
+}
+
+static void
+zone_force_lock(malloc_zone_t *zone)
+{
+
+ if (isthreaded)
+ jemalloc_prefork();
+}
+
+static void
+zone_force_unlock(malloc_zone_t *zone)
+{
+
+ if (isthreaded)
+ jemalloc_postfork();
+}
+
+malloc_zone_t *
+create_zone(void)
+{
+
+ zone.size = (void *)zone_size;
+ zone.malloc = (void *)zone_malloc;
+ zone.calloc = (void *)zone_calloc;
+ zone.valloc = (void *)zone_valloc;
+ zone.free = (void *)zone_free;
+ zone.realloc = (void *)zone_realloc;
+ zone.destroy = (void *)zone_destroy;
+ zone.zone_name = "jemalloc_zone";
+ zone.batch_malloc = NULL;
+ zone.batch_free = NULL;
+ zone.introspect = &zone_introspect;
+ zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ zone.memalign = zone_memalign;
+ zone.free_definite_size = zone_free_definite_size;
+#endif
+
+ zone_introspect.enumerator = NULL;
+ zone_introspect.good_size = (void *)zone_good_size;
+ zone_introspect.check = NULL;
+ zone_introspect.print = NULL;
+ zone_introspect.log = NULL;
+ zone_introspect.force_lock = (void *)zone_force_lock;
+ zone_introspect.force_unlock = (void *)zone_force_unlock;
+ zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ zone_introspect.zone_locked = NULL;
+#endif
+
+ return (&zone);
+}
+
+static size_t
+ozone_size(malloc_zone_t *zone, void *ptr)
+{
+ size_t ret;
+
+ ret = ivsalloc(ptr);
+ if (ret == 0)
+ ret = szone.size(zone, ptr);
+
+ return (ret);
+}
+
+static void
+ozone_free(malloc_zone_t *zone, void *ptr)
+{
+
+ if (ivsalloc(ptr) != 0)
+ JEMALLOC_P(free)(ptr);
+ else {
+ size_t size = szone.size(zone, ptr);
+ if (size != 0)
+ (szone.free)(zone, ptr);
+ }
+}
+
+static void *
+ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
+{
+ size_t oldsize;
+
+ if (ptr == NULL)
+ return (JEMALLOC_P(malloc)(size));
+
+ oldsize = ivsalloc(ptr);
+ if (oldsize != 0)
+ return (JEMALLOC_P(realloc)(ptr, size));
+ else {
+ oldsize = szone.size(zone, ptr);
+ if (oldsize == 0)
+ return (JEMALLOC_P(malloc)(size));
+ else {
+ void *ret = JEMALLOC_P(malloc)(size);
+ if (ret != NULL) {
+ memcpy(ret, ptr, (oldsize < size) ? oldsize :
+ size);
+ (szone.free)(zone, ptr);
+ }
+ return (ret);
+ }
+ }
+}
+
+static unsigned
+ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results,
+ unsigned num_requested)
+{
+
+ /* Don't bother implementing this interface, since it isn't required. */
+ return (0);
+}
+
+static void
+ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num)
+{
+ unsigned i;
+
+ for (i = 0; i < num; i++)
+ ozone_free(zone, to_be_freed[i]);
+}
+
+#if (JEMALLOC_ZONE_VERSION >= 6)
+static void
+ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
+{
+
+ if (ivsalloc(ptr) != 0) {
+ assert(ivsalloc(ptr) == size);
+ JEMALLOC_P(free)(ptr);
+ } else {
+ assert(size == szone.size(zone, ptr));
+ szone.free_definite_size(zone, ptr, size);
+ }
+}
+#endif
+
+static void
+ozone_force_lock(malloc_zone_t *zone)
+{
+
+ /* jemalloc locking is taken care of by the normal jemalloc zone. */
+ szone.introspect->force_lock(zone);
+}
+
+static void
+ozone_force_unlock(malloc_zone_t *zone)
+{
+
+ /* jemalloc locking is taken care of by the normal jemalloc zone. */
+ szone.introspect->force_unlock(zone);
+}
+
+/*
+ * Overlay the default scalable zone (szone) such that existing allocations are
+ * drained, and further allocations come from jemalloc. This is necessary
+ * because Core Foundation directly accesses and uses the szone before the
+ * jemalloc library is even loaded.
+ */
+void
+szone2ozone(malloc_zone_t *zone)
+{
+
+ /*
+ * Stash a copy of the original szone so that we can call its
+ * functions as needed. Note that the internally, the szone stores its
+ * bookkeeping data structures immediately following the malloc_zone_t
+ * header, so when calling szone functions, we need to pass a pointer
+ * to the original zone structure.
+ */
+ memcpy(&szone, zone, sizeof(malloc_zone_t));
+
+ zone->size = (void *)ozone_size;
+ zone->malloc = (void *)zone_malloc;
+ zone->calloc = (void *)zone_calloc;
+ zone->valloc = (void *)zone_valloc;
+ zone->free = (void *)ozone_free;
+ zone->realloc = (void *)ozone_realloc;
+ zone->destroy = (void *)zone_destroy;
+ zone->zone_name = "jemalloc_ozone";
+ zone->batch_malloc = ozone_batch_malloc;
+ zone->batch_free = ozone_batch_free;
+ zone->introspect = &ozone_introspect;
+ zone->version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ zone->memalign = zone_memalign;
+ zone->free_definite_size = ozone_free_definite_size;
+#endif
+
+ ozone_introspect.enumerator = NULL;
+ ozone_introspect.good_size = (void *)zone_good_size;
+ ozone_introspect.check = NULL;
+ ozone_introspect.print = NULL;
+ ozone_introspect.log = NULL;
+ ozone_introspect.force_lock = (void *)ozone_force_lock;
+ ozone_introspect.force_unlock = (void *)ozone_force_unlock;
+ ozone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+ ozone_introspect.zone_locked = NULL;
+#endif
+}