diff options
Diffstat (limited to 'jemalloc/include')
| -rw-r--r-- | jemalloc/include/jemalloc/internal/arena.h | 188 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/chunk.h | 8 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/chunk_mmap.h | 2 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/ckh.h | 2 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/ctl.h | 10 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/huge.h | 7 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/jemalloc_internal.h.in | 478 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/mutex.h | 7 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/prof.h | 443 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/rtree.h | 161 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/stats.h | 2 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/tcache.h | 46 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/internal/zone.h | 23 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/jemalloc.h.in | 26 | ||||
| -rw-r--r-- | jemalloc/include/jemalloc/jemalloc_defs.h.in | 36 |
15 files changed, 1207 insertions, 232 deletions
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h index c1955f1..9556c2c 100644 --- a/jemalloc/include/jemalloc/internal/arena.h +++ b/jemalloc/include/jemalloc/internal/arena.h @@ -121,17 +121,17 @@ struct arena_chunk_map_s { * * p : run page offset * s : run size - * c : size class (used only if prof_promote is true) + * c : (binind+1) for size class (used only if prof_promote is true) * x : don't care * - : 0 * + : 1 - * [DZLA] : bit set - * [dzla] : bit unset + * [DULA] : bit set + * [dula] : bit unset * * Unallocated (clean): - * ssssssss ssssssss ssss---- ----dz-- - * xxxxxxxx xxxxxxxx xxxx---- -----Zxx - * ssssssss ssssssss ssss---- ----dZ-- + * ssssssss ssssssss ssss---- ----du-- + * xxxxxxxx xxxxxxxx xxxx---- -----Uxx + * ssssssss ssssssss ssss---- ----dU-- * * Unallocated (dirty): * ssssssss ssssssss ssss---- ----D--- @@ -144,7 +144,7 @@ struct arena_chunk_map_s { * pppppppp pppppppp pppp---- ----d--a * * Large: - * ssssssss ssssssss ssss++++ ++++D-la + * ssssssss ssssssss ssss---- ----D-la * xxxxxxxx xxxxxxxx xxxx---- ----xxxx * -------- -------- -------- ----D-la * @@ -152,7 +152,7 @@ struct arena_chunk_map_s { * ssssssss ssssssss sssscccc ccccD-la * * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss++++ ++++D-la + * ssssssss ssssssss ssss---- ----D-la */ size_t bits; #ifdef JEMALLOC_PROF @@ -161,7 +161,7 @@ struct arena_chunk_map_s { #endif #define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) #define CHUNK_MAP_DIRTY ((size_t)0x8U) -#define CHUNK_MAP_ZEROED ((size_t)0x4U) +#define CHUNK_MAP_UNZEROED ((size_t)0x4U) #define CHUNK_MAP_LARGE ((size_t)0x2U) #define CHUNK_MAP_ALLOCATED ((size_t)0x1U) #define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED @@ -187,7 +187,12 @@ struct arena_chunk_s { /* Number of dirty pages. */ size_t ndirty; - /* Map of pages within chunk that keeps track of free/large/small. */ + /* + * Map of pages within chunk that keeps track of free/large/small. The + * first map_bias entries are omitted, since the chunk header does not + * need to be tracked in the map. This omission saves a header page + * for common chunk sizes (e.g. 4 MiB). + */ arena_chunk_map_t map[1]; /* Dynamically sized. */ }; typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; @@ -416,8 +421,12 @@ extern size_t sspace_min; extern size_t sspace_max; #define small_maxclass sspace_max -#define nlclasses (chunk_npages - arena_chunk_header_npages) +#define nlclasses (chunk_npages - map_bias) +void arena_purge_all(arena_t *arena); +#ifdef JEMALLOC_PROF +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +#endif #ifdef JEMALLOC_TCACHE void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind @@ -426,20 +435,15 @@ void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, # endif ); #endif -#ifdef JEMALLOC_PROF -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); -#endif void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); void *arena_malloc(size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t alignment, size_t size, - size_t alloc_size); +void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, + size_t alignment, bool zero); size_t arena_salloc(const void *ptr); #ifdef JEMALLOC_PROF void arena_prof_promoted(const void *ptr, size_t size); size_t arena_salloc_demote(const void *ptr); -prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); #endif void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, arena_chunk_map_t *mapelm); @@ -449,7 +453,10 @@ void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); #endif -void *arena_ralloc(void *ptr, size_t size, size_t oldsize); +void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); bool arena_new(arena_t *arena, unsigned ind); bool arena_boot(void); @@ -458,10 +465,149 @@ bool arena_boot(void); #ifdef JEMALLOC_H_INLINES #ifndef JEMALLOC_ENABLE_INLINE +unsigned arena_run_regind(arena_run_t *run, arena_bin_t *bin, + const void *ptr, size_t size); +# ifdef JEMALLOC_PROF +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +# endif void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE unsigned +arena_run_regind(arena_run_t *run, arena_bin_t *bin, const void *ptr, + size_t size) +{ + unsigned shift, diff, regind; + + assert(run->magic == ARENA_RUN_MAGIC); + + /* + * Avoid doing division with a variable divisor if possible. Using + * actual division here can reduce allocator throughput by over 20%! + */ + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset); + + /* Rescale (factor powers of 2 out of the numerator and denominator). */ + shift = ffs(size) - 1; + diff >>= shift; + size >>= shift; + + if (size == 1) { + /* The divisor was a power of 2. */ + regind = diff; + } else { + /* + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and 1 and 2 are both powers of two, which are + * handled above. + */ +#define SIZE_INV_SHIFT 21 +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) + static const unsigned size_invs[] = { + SIZE_INV(3), + SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), + SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), + SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), + SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), + SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), + SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), + SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) + }; + + if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) + regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; + else + regind = diff / size; +#undef SIZE_INV +#undef SIZE_INV_SHIFT + } + assert(diff == regind * size); + assert(regind < bin->nregs); + + return (regind); +} + +#ifdef JEMALLOC_PROF +JEMALLOC_INLINE prof_ctx_t * +arena_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote) + ret = (prof_ctx_t *)(uintptr_t)1U; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + unsigned regind; + + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); + } + } else + ret = chunk->map[pageind-map_bias].prof_ctx; + + return (ret); +} + +JEMALLOC_INLINE void +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote == false) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + unsigned regind; + + assert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin, ptr, bin->reg_size); + + *((prof_ctx_t **)((uintptr_t)run + bin->ctx0_offset + + (regind * sizeof(prof_ctx_t *)))) = ctx; + } else + assert((uintptr_t)ctx == (uintptr_t)1U); + } else + chunk->map[pageind-map_bias].prof_ctx = ctx; +} +#endif + JEMALLOC_INLINE void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) { @@ -474,8 +620,8 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapelm = &chunk->map[pageind]; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapelm = &chunk->map[pageind-map_bias]; assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { /* Small allocation. */ diff --git a/jemalloc/include/jemalloc/internal/chunk.h b/jemalloc/include/jemalloc/internal/chunk.h index 1f6abf7..a60f0ad 100644 --- a/jemalloc/include/jemalloc/internal/chunk.h +++ b/jemalloc/include/jemalloc/internal/chunk.h @@ -39,13 +39,17 @@ extern malloc_mutex_t chunks_mtx; extern chunk_stats_t stats_chunks; #endif +#ifdef JEMALLOC_IVSALLOC +extern rtree_t *chunks_rtree; +#endif + extern size_t chunksize; extern size_t chunksize_mask; /* (chunksize - 1). */ extern size_t chunk_npages; -extern size_t arena_chunk_header_npages; +extern size_t map_bias; /* Number of arena chunk header pages. */ extern size_t arena_maxclass; /* Max size class for arenas. */ -void *chunk_alloc(size_t size, bool *zero); +void *chunk_alloc(size_t size, bool base, bool *zero); void chunk_dealloc(void *chunk, size_t size); bool chunk_boot(void); diff --git a/jemalloc/include/jemalloc/internal/chunk_mmap.h b/jemalloc/include/jemalloc/internal/chunk_mmap.h index dc52448..07b50a4 100644 --- a/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ b/jemalloc/include/jemalloc/internal/chunk_mmap.h @@ -13,6 +13,8 @@ void *chunk_alloc_mmap(size_t size); void *chunk_alloc_mmap_noreserve(size_t size); void chunk_dealloc_mmap(void *chunk, size_t size); +bool chunk_mmap_boot(void); + #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES diff --git a/jemalloc/include/jemalloc/internal/ckh.h b/jemalloc/include/jemalloc/internal/ckh.h index c39ea5c..d4e391b 100644 --- a/jemalloc/include/jemalloc/internal/ckh.h +++ b/jemalloc/include/jemalloc/internal/ckh.h @@ -45,7 +45,7 @@ struct ckh_s { #endif /* Used for pseudo-random number generation. */ -#define CKH_A 12345 +#define CKH_A 1103515241 #define CKH_C 12347 uint32_t prn_state; diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h index 7bbf21e..8776ad1 100644 --- a/jemalloc/include/jemalloc/internal/ctl.h +++ b/jemalloc/include/jemalloc/internal/ctl.h @@ -82,9 +82,9 @@ bool ctl_boot(void); #define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ != 0) { \ - malloc_write("<jemalloc>: Invalid xmallctl(\""); \ + malloc_write("<jemalloc>: Failure in xmallctl(\""); \ malloc_write(name); \ - malloc_write("\", ...) call\n"); \ + malloc_write("\", ...)\n"); \ abort(); \ } \ } while (0) @@ -92,9 +92,9 @@ bool ctl_boot(void); #define xmallctlnametomib(name, mibp, miblenp) do { \ if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ malloc_write( \ - "<jemalloc>: Invalid xmallctlnametomib(\""); \ + "<jemalloc>: Failure in xmallctlnametomib(\""); \ malloc_write(name); \ - malloc_write("\", ...) call\n"); \ + malloc_write("\", ...)\n"); \ abort(); \ } \ } while (0) @@ -103,7 +103,7 @@ bool ctl_boot(void); if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ newlen) != 0) { \ malloc_write( \ - "<jemalloc>: Invalid xmallctlbymib() call\n"); \ + "<jemalloc>: Failure in xmallctlbymib()\n"); \ abort(); \ } \ } while (0) diff --git a/jemalloc/include/jemalloc/internal/huge.h b/jemalloc/include/jemalloc/internal/huge.h index 0c0582f..bf23127 100644 --- a/jemalloc/include/jemalloc/internal/huge.h +++ b/jemalloc/include/jemalloc/internal/huge.h @@ -20,8 +20,11 @@ extern size_t huge_allocated; extern malloc_mutex_t huge_mtx; void *huge_malloc(size_t size, bool zero); -void *huge_palloc(size_t alignment, size_t size); -void *huge_ralloc(void *ptr, size_t size, size_t oldsize); +void *huge_palloc(size_t size, size_t alignment, bool zero); +void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra); +void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); void huge_dalloc(void *ptr); size_t huge_salloc(const void *ptr); #ifdef JEMALLOC_PROF diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in index 2c3f32f..3d25300 100644 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in @@ -17,16 +17,29 @@ #include <stdio.h> #include <stdlib.h> #include <stdint.h> +#include <stddef.h> +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif #include <inttypes.h> #include <string.h> #include <strings.h> +#include <ctype.h> #include <unistd.h> #include <fcntl.h> #include <pthread.h> +#include <math.h> #define JEMALLOC_MANGLE #include "../jemalloc@install_suffix@.h" +#ifdef JEMALLOC_ZONE +#include <mach/mach_error.h> +#include <mach/mach_init.h> +#include <mach/vm_map.h> +#include <malloc/malloc.h> +#endif + #ifdef JEMALLOC_LAZY_LOCK #include <dlfcn.h> #endif @@ -49,7 +62,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); malloc_write("<jemalloc>: "); \ malloc_write(__FILE__); \ malloc_write(":"); \ - malloc_write(umax2s(__LINE__, 10, line_buf)); \ + malloc_write(u2s(__LINE__, 10, line_buf)); \ malloc_write(": Failed assertion: "); \ malloc_write("\""); \ malloc_write(#e); \ @@ -77,6 +90,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); /******************************************************************************/ #define JEMALLOC_H_TYPES +#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) + #define ZU(z) ((size_t)z) #ifndef __DECONST @@ -92,8 +107,8 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); # define JEMALLOC_INLINE static inline #endif -/* Size of stack-allocated buffer passed to strerror_r(). */ -#define STRERROR_BUF 64 +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 /* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ #ifdef __i386__ @@ -159,6 +174,16 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) #define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) +#ifdef PAGE_SHIFT +# undef PAGE_SHIFT +#endif +#ifdef PAGE_SIZE +# undef PAGE_SIZE +#endif +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif + #ifdef DYNAMIC_PAGE_SHIFT # define PAGE_SHIFT lg_pagesize # define PAGE_SIZE pagesize @@ -184,8 +209,12 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_TYPES @@ -203,8 +232,12 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_STRUCTS @@ -224,6 +257,7 @@ extern bool opt_xmalloc; #ifdef JEMALLOC_FILL extern bool opt_zero; #endif +extern size_t opt_narenas; #ifdef DYNAMIC_PAGE_SHIFT extern size_t pagesize; @@ -240,8 +274,19 @@ extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ * Map of pthread_self() --> arenas[???], used for selecting an arena to use * for allocations. */ -extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); +extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define ARENA_GET() arenas_tls +# define ARENA_SET(v) do { \ + arenas_tls = (v); \ +} while (0) +#else +extern pthread_key_t arenas_tsd; +# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) +# define ARENA_SET(v) do { \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ +} while (0) #endif + /* * Arenas that are used to service external requests. Not all elements of the * arenas array are necessarily used; arenas are created lazily as needed. @@ -249,10 +294,55 @@ extern __thread arena_t *arenas_map JEMALLOC_ATTR(tls_model("initial-exec")); extern arena_t **arenas; extern unsigned narenas; +#ifdef JEMALLOC_STATS +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; +# ifndef NO_TLS +extern __thread thread_allocated_t thread_allocated_tls; +# define ALLOCATED_GET() thread_allocated_tls.allocated +# define DEALLOCATED_GET() thread_allocated_tls.deallocated +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_tls.allocated += a; \ + thread_allocated_tls.deallocated += d; \ +} while (0) +# else +extern pthread_key_t thread_allocated_tsd; +# define ALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd))->allocated : 0) +# define DEALLOCATED_GET() \ + (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \ + ? ((thread_allocated_t \ + *)pthread_getspecific(thread_allocated_tsd))->deallocated : \ + 0) +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_t *thread_allocated = (thread_allocated_t *) \ + pthread_getspecific(thread_allocated_tsd); \ + if (thread_allocated != NULL) { \ + thread_allocated->allocated += (a); \ + thread_allocated->deallocated += (d); \ + } else { \ + thread_allocated = (thread_allocated_t *) \ + imalloc(sizeof(thread_allocated_t)); \ + if (thread_allocated != NULL) { \ + pthread_setspecific(thread_allocated_tsd, \ + thread_allocated); \ + thread_allocated->allocated = (a); \ + thread_allocated->deallocated = (d); \ + } \ + } \ +} while (0) +# endif +#endif + arena_t *arenas_extend(unsigned ind); -#ifndef NO_TLS arena_t *choose_arena_hard(void); -#endif +int buferror(int errnum, char *buf, size_t buflen); +void jemalloc_prefork(void); +void jemalloc_postfork(void); #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" @@ -265,8 +355,12 @@ arena_t *choose_arena_hard(void); #include "jemalloc/internal/base.h" #include "jemalloc/internal/chunk.h" #include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" #include "jemalloc/internal/tcache.h" #include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif #include "jemalloc/internal/prof.h" #undef JEMALLOC_H_EXTERNS @@ -285,133 +379,55 @@ arena_t *choose_arena_hard(void); #include "jemalloc/internal/huge.h" #ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +size_t s2u(size_t size); +size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); void malloc_write(const char *s); arena_t *choose_arena(void); #endif #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* - * Wrapper around malloc_message() that avoids the need for - * JEMALLOC_P(malloc_message)(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) { - JEMALLOC_P(malloc_message)(NULL, s); + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); } /* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). + * Compute usable size that would result from allocating an object with the + * specified size. */ -JEMALLOC_INLINE arena_t * -choose_arena(void) -{ - arena_t *ret; - - /* - * We can only use TLS if this is a PIC library, since for the static - * library version, libc's malloc is used by TLS allocation, which - * introduces a bootstrapping issue. - */ -#ifndef NO_TLS - ret = arenas_map; - if (ret == NULL) { - ret = choose_arena_hard(); - assert(ret != NULL); - } -#else - if (isthreaded && narenas > 1) { - unsigned long ind; - - /* - * Hash pthread_self() to one of the arenas. There is a prime - * number of arenas, so this has a reasonable chance of - * working. Even so, the hashing can be easily thwarted by - * inconvenient pthread_self() values. Without specific - * knowledge of how pthread_self() calculates values, we can't - * easily do much better than this. - */ - ind = (unsigned long) pthread_self() % narenas; - - /* - * Optimistially assume that arenas[ind] has been initialized. - * At worst, we find out that some other thread has already - * done so, after acquiring the lock in preparation. Note that - * this lazy locking also has the effect of lazily forcing - * cache coherency; without the lock acquisition, there's no - * guarantee that modification of arenas[ind] by another thread - * would be seen on this CPU for an arbitrary amount of time. - * - * In general, this approach to modifying a synchronized value - * isn't a good idea, but in this case we only ever modify the - * value once, so things work out well. - */ - ret = arenas[ind]; - if (ret == NULL) { - /* - * Avoid races with another thread that may have already - * initialized arenas[ind]. - */ - malloc_mutex_lock(&arenas_lock); - if (arenas[ind] == NULL) - ret = arenas_extend((unsigned)ind); - else - ret = arenas[ind]; - malloc_mutex_unlock(&arenas_lock); - } - } else - ret = arenas[0]; -#endif - - assert(ret != NULL); - return (ret); -} -#endif - -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/hash.h" -#include "jemalloc/internal/prof.h" - -#ifndef JEMALLOC_ENABLE_INLINE -void *imalloc(size_t size); -void *icalloc(size_t size); -void *ipalloc(size_t alignment, size_t size); -size_t isalloc(const void *ptr); -void *iralloc(void *ptr, size_t size); -void idalloc(void *ptr); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE void * -imalloc(size_t size) -{ - - assert(size != 0); - - if (size <= arena_maxclass) - return (arena_malloc(size, false)); - else - return (huge_malloc(size, false)); -} - -JEMALLOC_INLINE void * -icalloc(size_t size) +JEMALLOC_INLINE size_t +s2u(size_t size) { + if (size <= small_maxclass) + return arenas[0]->bins[small_size2bin[size]].reg_size; if (size <= arena_maxclass) - return (arena_malloc(size, true)); - else - return (huge_malloc(size, true)); + return PAGE_CEILING(size); + return CHUNK_CEILING(size); } -JEMALLOC_INLINE void * -ipalloc(size_t alignment, size_t size) +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_INLINE size_t +sa2u(size_t size, size_t alignment, size_t *run_size_p) { - void *ret; - size_t ceil_size; + size_t usize; /* * Round size up to the nearest multiple of alignment. @@ -431,20 +447,23 @@ ipalloc(size_t alignment, size_t size) * will further round up to a power of two, but that never causes * correctness issues. */ - ceil_size = (size + (alignment - 1)) & (-alignment); + usize = (size + (alignment - 1)) & (-alignment); /* - * (ceil_size < size) protects against the combination of maximal + * (usize < size) protects against the combination of maximal * alignment and size greater than maximal alignment. */ - if (ceil_size < size) { + if (usize < size) { /* size_t overflow. */ - return (NULL); + return (0); } - if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE - && ceil_size <= arena_maxclass)) - ret = arena_malloc(ceil_size, false); - else { + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { + if (usize <= small_maxclass) { + return + (arenas[0]->bins[small_size2bin[usize]].reg_size); + } + return (PAGE_CEILING(usize)); + } else { size_t run_size; /* @@ -452,30 +471,30 @@ ipalloc(size_t alignment, size_t size) * permanently; it makes later calculations simpler. */ alignment = PAGE_CEILING(alignment); - ceil_size = PAGE_CEILING(size); + usize = PAGE_CEILING(size); /* - * (ceil_size < size) protects against very large sizes within + * (usize < size) protects against very large sizes within * PAGE_SIZE of SIZE_T_MAX. * - * (ceil_size + alignment < ceil_size) protects against the - * combination of maximal alignment and ceil_size large enough + * (usize + alignment < usize) protects against the + * combination of maximal alignment and usize large enough * to cause overflow. This is similar to the first overflow * check above, but it needs to be repeated due to the new - * ceil_size value, which may now be *equal* to maximal + * usize value, which may now be *equal* to maximal * alignment, whereas before we only detected overflow if the * original size was *greater* than maximal alignment. */ - if (ceil_size < size || ceil_size + alignment < ceil_size) { + if (usize < size || usize + alignment < usize) { /* size_t overflow. */ - return (NULL); + return (0); } /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. */ - if (ceil_size >= alignment) - run_size = ceil_size + alignment - PAGE_SIZE; + if (usize >= alignment) + run_size = usize + alignment - PAGE_SIZE; else { /* * It is possible that (alignment << 1) will cause @@ -488,15 +507,112 @@ ipalloc(size_t alignment, size_t size) */ run_size = (alignment << 1) - PAGE_SIZE; } + if (run_size_p != NULL) + *run_size_p = run_size; - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), alignment, ceil_size, - run_size); - } else if (alignment <= chunksize) - ret = huge_malloc(ceil_size, false); - else - ret = huge_palloc(alignment, ceil_size); + if (run_size <= arena_maxclass) + return (PAGE_CEILING(usize)); + return (CHUNK_CEILING(usize)); } +} + +/* + * Wrapper around malloc_message() that avoids the need for + * JEMALLOC_P(malloc_message)(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + JEMALLOC_P(malloc_message)(NULL, s); +} + +/* + * Choose an arena based on a per-thread value (fast-path code, calls slow-path + * code if necessary). + */ +JEMALLOC_INLINE arena_t * +choose_arena(void) +{ + arena_t *ret; + + ret = ARENA_GET(); + if (ret == NULL) { + ret = choose_arena_hard(); + assert(ret != NULL); + } + + return (ret); +} +#endif + +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif + +#ifndef JEMALLOC_ENABLE_INLINE +void *imalloc(size_t size); +void *icalloc(size_t size); +void *ipalloc(size_t size, size_t alignment, bool zero); +size_t isalloc(const void *ptr); +# ifdef JEMALLOC_IVSALLOC +size_t ivsalloc(const void *ptr); +# endif +void idalloc(void *ptr); +void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool no_move); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE void * +imalloc(size_t size) +{ + + assert(size != 0); + + if (size <= arena_maxclass) + return (arena_malloc(size, false)); + else + return (huge_malloc(size, false)); +} + +JEMALLOC_INLINE void * +icalloc(size_t size) +{ + + if (size <= arena_maxclass) + return (arena_malloc(size, true)); + else + return (huge_malloc(size, true)); +} + +JEMALLOC_INLINE void * +ipalloc(size_t size, size_t alignment, bool zero) +{ + void *ret; + size_t usize; + size_t run_size +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; + + usize = sa2u(size, alignment, &run_size); + if (usize == 0) + return (NULL); + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) + ret = arena_malloc(usize, zero); + else if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), usize, run_size, alignment, + zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero); + else + ret = huge_palloc(usize, alignment, zero); assert(((uintptr_t)ret & (alignment - 1)) == 0); return (ret); @@ -526,21 +642,18 @@ isalloc(const void *ptr) return (ret); } -JEMALLOC_INLINE void * -iralloc(void *ptr, size_t size) +#ifdef JEMALLOC_IVSALLOC +JEMALLOC_INLINE size_t +ivsalloc(const void *ptr) { - size_t oldsize; - assert(ptr != NULL); - assert(size != 0); + /* Return 0 if ptr is not within a chunk managed by jemalloc. */ + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) + return (0); - oldsize = isalloc(ptr); - - if (size <= arena_maxclass) - return (arena_ralloc(ptr, size, oldsize)); - else - return (huge_ralloc(ptr, size, oldsize)); + return (isalloc(ptr)); } +#endif JEMALLOC_INLINE void idalloc(void *ptr) @@ -555,7 +668,70 @@ idalloc(void *ptr) else huge_dalloc(ptr); } + +JEMALLOC_INLINE void * +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool no_move) +{ + void *ret; + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + size_t copysize; + + /* + * Existing object alignment is inadquate; allocate new space + * and copy. + */ + if (no_move) + return (NULL); + ret = ipalloc(size + extra, alignment, zero); + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + ret = ipalloc(size, alignment, zero); + if (ret == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller + * has no expectation that the extra bytes will be reliably + * preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(ret, ptr, copysize); + idalloc(ptr); + return (ret); + } + + if (no_move) { + if (size <= arena_maxclass) { + return (arena_ralloc_no_move(ptr, oldsize, size, + extra, zero)); + } else { + return (huge_ralloc_no_move(ptr, oldsize, size, + extra)); + } + } else { + if (size + extra <= arena_maxclass) { + return (arena_ralloc(ptr, oldsize, size, extra, + alignment, zero)); + } else { + return (huge_ralloc(ptr, oldsize, size, extra, + alignment, zero)); + } + } +} #endif +#include "jemalloc/internal/prof.h" + #undef JEMALLOC_H_INLINES /******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/mutex.h b/jemalloc/include/jemalloc/internal/mutex.h index 108bfa8..dcca01e 100644 --- a/jemalloc/include/jemalloc/internal/mutex.h +++ b/jemalloc/include/jemalloc/internal/mutex.h @@ -3,6 +3,12 @@ typedef pthread_mutex_t malloc_mutex_t; +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +#else +# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#endif + #endif /* JEMALLOC_H_TYPES */ /******************************************************************************/ #ifdef JEMALLOC_H_STRUCTS @@ -18,6 +24,7 @@ extern bool isthreaded; #endif bool malloc_mutex_init(malloc_mutex_t *mutex); +void malloc_mutex_destroy(malloc_mutex_t *mutex); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h index fb55fb9..7864000 100644 --- a/jemalloc/include/jemalloc/internal/prof.h +++ b/jemalloc/include/jemalloc/internal/prof.h @@ -6,20 +6,25 @@ typedef struct prof_bt_s prof_bt_t; typedef struct prof_cnt_s prof_cnt_t; typedef struct prof_thr_cnt_s prof_thr_cnt_t; typedef struct prof_ctx_s prof_ctx_t; -typedef struct prof_s prof_t; +typedef struct prof_tdata_s prof_tdata_t; /* Option defaults. */ -#define LG_PROF_BT_MAX_DEFAULT 2 +#define PROF_PREFIX_DEFAULT "jeprof" +#define LG_PROF_BT_MAX_DEFAULT 7 #define LG_PROF_SAMPLE_DEFAULT 0 -#define LG_PROF_INTERVAL_DEFAULT 30 +#define LG_PROF_INTERVAL_DEFAULT -1 +#define LG_PROF_TCMAX_DEFAULT -1 /* * Hard limit on stack backtrace depth. Note that the version of * prof_backtrace() that is based on __builtin_return_address() necessarily has - * a hard-coded number of backtrace frame handlers, so increasing - * LG_PROF_BT_MAX requires changing prof_backtrace(). + * a hard-coded number of backtrace frame handlers. */ -#define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ +#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) +# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) +#else +# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ +#endif #define PROF_BT_MAX (1U << LG_PROF_BT_MAX) /* Initial hash table size. */ @@ -34,16 +39,16 @@ typedef struct prof_s prof_t; struct prof_bt_s { /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; + void **vec; + unsigned len; }; #ifdef JEMALLOC_PROF_LIBGCC /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ typedef struct { - prof_bt_t *bt; - unsigned nignore; - unsigned max; + prof_bt_t *bt; + unsigned nignore; + unsigned max; } prof_unwind_data_t; #endif @@ -51,11 +56,11 @@ struct prof_cnt_s { /* * Profiling counters. An allocation/deallocation pair can operate on * different prof_thr_cnt_t objects that are linked into the same - * prof_ctx_t sets_ql, so it is possible for the cur* counters to go + * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go * negative. In principle it is possible for the *bytes counters to - * overflow/underflow, but a general solution would require some form - * of 128-bit counter solution; this implementation doesn't bother to - * solve that problem. + * overflow/underflow, but a general solution would require something + * like 128-bit counters; this implementation doesn't bother to solve + * that problem. */ int64_t curobjs; int64_t curbytes; @@ -64,15 +69,18 @@ struct prof_cnt_s { }; struct prof_thr_cnt_s { - /* Linkage into prof_ctx_t's sets_ql. */ - ql_elm(prof_thr_cnt_t) link; + /* Linkage into prof_ctx_t's cnts_ql. */ + ql_elm(prof_thr_cnt_t) cnts_link; + + /* Linkage into thread's LRU. */ + ql_elm(prof_thr_cnt_t) lru_link; /* * Associated context. If a thread frees an object that it did not * allocate, it is possible that the context is not cached in the * thread's hash table, in which case it must be able to look up the * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's sets_ql. + * and link it into the prof_ctx_t's cnts_ql. */ prof_ctx_t *ctx; @@ -101,11 +109,11 @@ struct prof_ctx_s { /* Associated backtrace. */ prof_bt_t *bt; - /* Protects cnt_merged and sets_ql. */ + /* Protects cnt_merged and cnts_ql. */ malloc_mutex_t lock; - /* Temporary storage for aggregation during dump. */ - prof_cnt_t cnt_dump; + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; /* When threads exit, they merge their stats into cnt_merged. */ prof_cnt_t cnt_merged; @@ -117,6 +125,31 @@ struct prof_ctx_s { ql_head(prof_thr_cnt_t) cnts_ql; }; +struct prof_tdata_s { + /* + * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a + * cache of backtraces, with associated thread-specific prof_thr_cnt_t + * objects. Other threads may read the prof_thr_cnt_t contents, but no + * others will ever write them. + * + * Upon thread exit, the thread must merge all the prof_thr_cnt_t + * counter data into the associated prof_ctx_t objects, and unlink/free + * the prof_thr_cnt_t objects. + */ + ckh_t bt2cnt; + + /* LRU for contents of bt2cnt. */ + ql_head(prof_thr_cnt_t) lru_ql; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; + + /* Sampling state. */ + uint64_t prn_state; + uint64_t threshold; + uint64_t accum; +}; + #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS @@ -129,11 +162,14 @@ extern bool opt_prof; * to notice state changes. */ extern bool opt_prof_active; -extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_udump; /* High-water memory dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ +extern char opt_prof_prefix[PATH_MAX + 1]; /* * Profile dump interval, measured in bytes allocated. Each arena triggers a @@ -150,25 +186,362 @@ extern uint64_t prof_interval; */ extern bool prof_promote; -bool prof_init(prof_t *prof, bool master); -void prof_destroy(prof_t *prof); +/* (1U << opt_lg_prof_bt_max). */ +extern unsigned prof_bt_max; -prof_thr_cnt_t *prof_alloc_prep(size_t size); -prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr, - size_t old_size, prof_ctx_t *old_ctx); -void prof_free(const void *ptr); +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +#ifndef NO_TLS +extern __thread prof_tdata_t *prof_tdata_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +# define PROF_TCACHE_GET() prof_tdata_tls +# define PROF_TCACHE_SET(v) do { \ + prof_tdata_tls = (v); \ + pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ +} while (0) +#else +# define PROF_TCACHE_GET() \ + ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) +# define PROF_TCACHE_SET(v) do { \ + pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ +} while (0) +#endif +/* + * Same contents as b2cnt_tls, but initialized such that the TSD destructor is + * called when a thread exits, so that prof_tdata_tls contents can be merged, + * unlinked, and deallocated. + */ +extern pthread_key_t prof_tdata_tsd; + +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); +prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); void prof_idump(void); bool prof_mdump(const char *filename); -void prof_udump(void); +void prof_gdump(void); +prof_tdata_t *prof_tdata_init(void); void prof_boot0(void); -bool prof_boot1(void); +void prof_boot1(void); +bool prof_boot2(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ #ifdef JEMALLOC_H_INLINES +#ifndef JEMALLOC_ENABLE_INLINE +void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +prof_thr_cnt_t *prof_alloc_prep(size_t size); +prof_ctx_t *prof_ctx_get(const void *ptr); +void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +bool prof_sample_accum_update(size_t size); +void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx); +void prof_free(const void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_INLINE void +prof_sample_threshold_update(prof_tdata_t *prof_tdata) +{ + uint64_t r; + double u; + + /* + * Compute prof_sample_threshold as a geometrically distributed random + * variable with mean (2^opt_lg_prof_sample). + */ + prn64(r, 53, prof_tdata->prn_state, + (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); + u = (double)r * (1.0/9007199254740992.0L); + prof_tdata->threshold = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + + (uint64_t)1U; +} + +JEMALLOC_INLINE prof_thr_cnt_t * +prof_alloc_prep(size_t size) +{ +#ifdef JEMALLOC_ENABLE_INLINE + /* This function does not have its own stack frame, because it is inlined. */ +# define NIGNORE 1 +#else +# define NIGNORE 2 +#endif + prof_thr_cnt_t *ret; + prof_tdata_t *prof_tdata; + prof_bt_t bt; + + assert(size == s2u(size)); + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) + return (NULL); + } + + if (opt_prof_active == false) { + /* Sampling is currently inactive, so avoid sampling. */ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } else if (opt_lg_prof_sample == 0) { + /* + * Don't bother with sampling logic, since sampling interval is + * 1. + */ + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else { + if (prof_tdata->threshold == 0) { + /* + * Initialize. Seed the prng differently for each + * thread. + */ + prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; + prof_sample_threshold_update(prof_tdata); + } + + /* + * Determine whether to capture a backtrace based on whether + * size is enough for prof_accum to reach + * prof_tdata->threshold. However, delay updating these + * variables until prof_{m,re}alloc(), because we don't know + * for sure that the allocation will succeed. + * + * Use subtraction rather than addition to avoid potential + * integer overflow. + */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } + + return (ret); +#undef NIGNORE +} + +JEMALLOC_INLINE prof_ctx_t * +prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + assert(chunk->arena->magic == ARENA_MAGIC); + + ret = arena_prof_ctx_get(ptr); + } else + ret = huge_prof_ctx_get(ptr); + + return (ret); +} + +JEMALLOC_INLINE void +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + assert(chunk->arena->magic == ARENA_MAGIC); + + arena_prof_ctx_set(ptr, ctx); + } else + huge_prof_ctx_set(ptr, ctx); +} + +JEMALLOC_INLINE bool +prof_sample_accum_update(size_t size) +{ + prof_tdata_t *prof_tdata; + + /* Sampling logic is unnecessary if the interval is 1. */ + assert(opt_lg_prof_sample != 0); + + prof_tdata = PROF_TCACHE_GET(); + assert(prof_tdata != NULL); + + /* Take care to avoid integer overflow. */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + prof_tdata->accum -= (prof_tdata->threshold - size); + /* Compute new prof_sample_threshold. */ + prof_sample_threshold_update(prof_tdata); + while (prof_tdata->accum >= prof_tdata->threshold) { + prof_tdata->accum -= prof_tdata->threshold; + prof_sample_threshold_update(prof_tdata); + } + return (false); + } else { + prof_tdata->accum += size; + return (true); + } +} + +JEMALLOC_INLINE void +prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) +{ + + assert(ptr != NULL); + assert(size == isalloc(ptr)); + + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. For malloc()-like allocation, it is + * always possible to tell in advance how large an + * object's usable size will be, so there should never + * be a difference between the size passed to + * prof_alloc_prep() and prof_malloc(). + */ + assert((uintptr_t)cnt == (uintptr_t)1U); + } + } + + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + /*********/ + mb_write(); + /*********/ + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); +} + +JEMALLOC_INLINE void +prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx) +{ + prof_thr_cnt_t *told_cnt; + + assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + + if (ptr != NULL) { + assert(size == isalloc(ptr)); + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. The size passed to + * prof_alloc_prep() was larger than what + * actually got allocated, so a backtrace was + * captured for this allocation, even though + * its actual size was insufficient to cross + * the sample threshold. + */ + cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } + } + + if ((uintptr_t)old_ctx > (uintptr_t)1U) { + told_cnt = prof_lookup(old_ctx->bt); + if (told_cnt == NULL) { + /* + * It's too late to propagate OOM for this realloc(), + * so operate directly on old_cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(&old_ctx->lock); + old_ctx->cnt_merged.curobjs--; + old_ctx->cnt_merged.curbytes -= old_size; + malloc_mutex_unlock(&old_ctx->lock); + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } else + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + cnt->epoch++; + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) { + told_cnt->cnts.curobjs--; + told_cnt->cnts.curbytes -= old_size; + } + if ((uintptr_t)cnt > (uintptr_t)1U) { + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + } + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) + cnt->epoch++; + /*********/ + mb_write(); /* Not strictly necessary. */ +} + +JEMALLOC_INLINE void +prof_free(const void *ptr, size_t size) +{ + prof_ctx_t *ctx = prof_ctx_get(ptr); + + if ((uintptr_t)ctx > (uintptr_t)1) { + assert(size == isalloc(ptr)); + prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + + if (tcnt != NULL) { + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + tcnt->cnts.curobjs--; + tcnt->cnts.curbytes -= size; + /*********/ + mb_write(); + /*********/ + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else { + /* + * OOM during free() cannot be propagated, so operate + * directly on cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs--; + ctx->cnt_merged.curbytes -= size; + malloc_mutex_unlock(&ctx->lock); + } + } +} +#endif + #endif /* JEMALLOC_H_INLINES */ /******************************************************************************/ #endif /* JEMALLOC_PROF */ diff --git a/jemalloc/include/jemalloc/internal/rtree.h b/jemalloc/include/jemalloc/internal/rtree.h new file mode 100644 index 0000000..9d58eba --- /dev/null +++ b/jemalloc/include/jemalloc/internal/rtree.h @@ -0,0 +1,161 @@ +/* + * This radix tree implementation is tailored to the singular purpose of + * tracking which chunks are currently owned by jemalloc. This functionality + * is mandatory for OS X, where jemalloc must be able to respond to object + * ownership queries. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct rtree_s rtree_t; + +/* + * Size of each radix tree node (must be a power of 2). This impacts tree + * depth. + */ +#if (LG_SIZEOF_PTR == 2) +# define RTREE_NODESIZE (1U << 14) +#else +# define RTREE_NODESIZE CACHELINE +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct rtree_s { + malloc_mutex_t mutex; + void **root; + unsigned height; + unsigned level2bits[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rtree_t *rtree_new(unsigned bits); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +#ifndef JEMALLOC_DEBUG +void *rtree_get_locked(rtree_t *rtree, uintptr_t key); +#endif +void *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(RTREE_C_)) +#define RTREE_GET_GENERATE(f) \ +/* The least significant bits of the key are ignored. */ \ +JEMALLOC_INLINE void * \ +f(rtree_t *rtree, uintptr_t key) \ +{ \ + void *ret; \ + uintptr_t subkey; \ + unsigned i, lshift, height, bits; \ + void **node, **child; \ + \ + RTREE_LOCK(&rtree->mutex); \ + for (i = lshift = 0, height = rtree->height, node = rtree->root;\ + i < height - 1; \ + i++, lshift += bits, node = child) { \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + 3)) - bits); \ + child = (void**)node[subkey]; \ + if (child == NULL) { \ + RTREE_UNLOCK(&rtree->mutex); \ + return (NULL); \ + } \ + } \ + \ + /* \ + * node is a leaf, so it contains values rather than node \ + * pointers. \ + */ \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ + bits); \ + ret = node[subkey]; \ + RTREE_UNLOCK(&rtree->mutex); \ + \ + RTREE_GET_VALIDATE \ + return (ret); \ +} + +#ifdef JEMALLOC_DEBUG +# define RTREE_LOCK(l) malloc_mutex_lock(l) +# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) +# define RTREE_GET_VALIDATE +RTREE_GET_GENERATE(rtree_get_locked) +# undef RTREE_LOCK +# undef RTREE_UNLOCK +# undef RTREE_GET_VALIDATE +#endif + +#define RTREE_LOCK(l) +#define RTREE_UNLOCK(l) +#ifdef JEMALLOC_DEBUG + /* + * Suppose that it were possible for a jemalloc-allocated chunk to be + * munmap()ped, followed by a different allocator in another thread re-using + * overlapping virtual memory, all without invalidating the cached rtree + * value. The result would be a false positive (the rtree would claim that + * jemalloc owns memory that it had actually discarded). This scenario + * seems impossible, but the following assertion is a prudent sanity check. + */ +# define RTREE_GET_VALIDATE \ + assert(rtree_get_locked(rtree, key) == ret); +#else +# define RTREE_GET_VALIDATE +#endif +RTREE_GET_GENERATE(rtree_get) +#undef RTREE_LOCK +#undef RTREE_UNLOCK +#undef RTREE_GET_VALIDATE + +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, void *val) +{ + uintptr_t subkey; + unsigned i, lshift, height, bits; + void **node, **child; + + malloc_mutex_lock(&rtree->mutex); + for (i = lshift = 0, height = rtree->height, node = rtree->root; + i < height - 1; + i++, lshift += bits, node = child) { + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + bits); + child = (void**)node[subkey]; + if (child == NULL) { + child = (void**)base_alloc(sizeof(void *) << + rtree->level2bits[i+1]); + if (child == NULL) { + malloc_mutex_unlock(&rtree->mutex); + return (true); + } + memset(child, 0, sizeof(void *) << + rtree->level2bits[i+1]); + node[subkey] = child; + } + } + + /* node is a leaf, so it contains values rather than node pointers. */ + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); + node[subkey] = val; + malloc_mutex_unlock(&rtree->mutex); + + return (false); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/stats.h b/jemalloc/include/jemalloc/internal/stats.h index cbf035f..3fc2080 100644 --- a/jemalloc/include/jemalloc/internal/stats.h +++ b/jemalloc/include/jemalloc/internal/stats.h @@ -154,7 +154,7 @@ struct chunk_stats_s { extern bool opt_stats_print; -char *umax2s(uintmax_t x, unsigned base, char *s); +char *u2s(uint64_t x, unsigned base, char *s); #ifdef JEMALLOC_STATS void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h index a8be436..1ad91a9 100644 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ b/jemalloc/include/jemalloc/internal/tcache.h @@ -17,7 +17,7 @@ typedef struct tcache_s tcache_t; /* Number of cache slots for large size classes. */ #define TCACHE_NSLOTS_LARGE 20 -/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */ +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ #define LG_TCACHE_MAXCLASS_DEFAULT 15 /* @@ -61,12 +61,25 @@ struct tcache_s { #ifdef JEMALLOC_H_EXTERNS extern bool opt_tcache; -extern ssize_t opt_lg_tcache_maxclass; +extern ssize_t opt_lg_tcache_max; extern ssize_t opt_lg_tcache_gc_sweep; /* Map of thread-specific caches. */ +#ifndef NO_TLS extern __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define TCACHE_GET() tcache_tls +# define TCACHE_SET(v) do { \ + tcache_tls = (tcache_t *)(v); \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#else +# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) +# define TCACHE_SET(v) do { \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#endif +extern pthread_key_t tcache_tsd; /* * Number of tcache bins. There are nbins small-object bins, plus 0 or more @@ -122,14 +135,23 @@ tcache_get(void) if ((isthreaded & opt_tcache) == false) return (NULL); - tcache = tcache_tls; - if ((uintptr_t)tcache <= (uintptr_t)1) { + tcache = TCACHE_GET(); + if ((uintptr_t)tcache <= (uintptr_t)2) { if (tcache == NULL) { tcache = tcache_create(choose_arena()); if (tcache == NULL) return (NULL); - } else + } else { + if (tcache == (void *)(uintptr_t)1) { + /* + * Make a note that an allocator function was + * called after the tcache_thread_cleanup() was + * called. + */ + TCACHE_SET((uintptr_t)2); + } return (NULL); + } } return (tcache); @@ -258,9 +280,9 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) } else { #ifdef JEMALLOC_PROF arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) - >> PAGE_SHIFT); - chunk->map[pageind].bits |= CHUNK_MAP_CLASS_MASK; + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + PAGE_SHIFT); + chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; #endif if (zero == false) { #ifdef JEMALLOC_FILL @@ -299,8 +321,8 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapelm = &chunk->map[pageind]; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapelm = &chunk->map[pageind-map_bias]; run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); assert(run->magic == ARENA_RUN_MAGIC); @@ -339,7 +361,6 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) arena_chunk_t *chunk; size_t pageind, binind; tcache_bin_t *tbin; - arena_chunk_map_t *mapelm; assert((size & PAGE_MASK) == 0); assert(arena_salloc(ptr) > small_maxclass); @@ -347,8 +368,7 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; - pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT); - mapelm = &chunk->map[pageind]; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = nbins + (size >> PAGE_SHIFT) - 1; #ifdef JEMALLOC_FILL diff --git a/jemalloc/include/jemalloc/internal/zone.h b/jemalloc/include/jemalloc/internal/zone.h new file mode 100644 index 0000000..859b529 --- /dev/null +++ b/jemalloc/include/jemalloc/internal/zone.h @@ -0,0 +1,23 @@ +#ifndef JEMALLOC_ZONE +# error "This source file is for zones on Darwin (OS X)." +#endif +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +malloc_zone_t *create_zone(void); +void szone2ozone(malloc_zone_t *zone); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/jemalloc.h.in b/jemalloc/include/jemalloc/jemalloc.h.in index 8ef8183..4dd3981 100644 --- a/jemalloc/include/jemalloc/jemalloc.h.in +++ b/jemalloc/include/jemalloc/jemalloc.h.in @@ -4,6 +4,9 @@ extern "C" { #endif +#include <limits.h> +#include <strings.h> + #define JEMALLOC_VERSION "@jemalloc_version@" #define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ #define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ @@ -16,7 +19,20 @@ extern "C" { # define JEMALLOC_P(s) s #endif -extern const char *JEMALLOC_P(malloc_options); +#define ALLOCM_LG_ALIGN ((int)0x3f) +#if LG_SIZEOF_PTR == 2 +#define ALLOCM_ALIGN(a) (ffs(a)-1) +#else +#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +#endif +#define ALLOCM_ZERO ((int)0x40) +#define ALLOCM_NO_MOVE ((int)0x80) + +#define ALLOCM_SUCCESS 0 +#define ALLOCM_ERR_OOM 1 +#define ALLOCM_ERR_NOT_MOVED 2 + +extern const char *JEMALLOC_P(malloc_conf); extern void (*JEMALLOC_P(malloc_message))(void *, const char *); void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); @@ -36,6 +52,14 @@ int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); +int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) + JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) + JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); + #ifdef __cplusplus }; #endif diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in index 8b98d67..b8f3f36 100644 --- a/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in @@ -13,6 +13,7 @@ * the API prefixing. */ #undef JEMALLOC_PREFIX +#undef JEMALLOC_CPREFIX #if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) #undef JEMALLOC_P #endif @@ -31,6 +32,9 @@ # define JEMALLOC_ATTR(s) #endif +/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ +#undef JEMALLOC_CC_SILENCE + /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables * inline functions. @@ -92,6 +96,38 @@ /* TLS is used to map arenas and magazine caches to threads. */ #undef NO_TLS +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +#undef JEMALLOC_IVSALLOC + +/* + * Define overrides for non-standard allocator-related functions if they + * are present on the system. + */ +#undef JEMALLOC_OVERRIDE_MEMALIGN +#undef JEMALLOC_OVERRIDE_VALLOC + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#undef JEMALLOC_ZONE +#undef JEMALLOC_ZONE_VERSION + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. + */ +#undef JEMALLOC_PURGE_MADVISE_DONTNEED +#undef JEMALLOC_PURGE_MADVISE_FREE + /* sizeof(void *) == 2^LG_SIZEOF_PTR. */ #undef LG_SIZEOF_PTR |
