summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Evans <je@facebook.com>2010-10-21 00:39:18 (GMT)
committerJason Evans <je@facebook.com>2010-10-21 00:39:18 (GMT)
commit93443689a4367cc6fe3de1c9e918adc13d8f9100 (patch)
treeeac41ca1b9c5b1ce50d43e9a296ecbd407266751
parent21fb95bba6ea922e0523f269c0d9a32640047a29 (diff)
downloadjemalloc-93443689a4367cc6fe3de1c9e918adc13d8f9100.zip
jemalloc-93443689a4367cc6fe3de1c9e918adc13d8f9100.tar.gz
jemalloc-93443689a4367cc6fe3de1c9e918adc13d8f9100.tar.bz2
Add per thread allocation counters, and enhance heap sampling.
Add the "thread.allocated" and "thread.deallocated" mallctls, which can be used to query the total number of bytes ever allocated/deallocated by the calling thread. Add s2u() and sa2u(), which can be used to compute the usable size that will result from an allocation request of a particular size/alignment. Re-factor ipalloc() to use sa2u(). Enhance the heap profiler to trigger samples based on usable size, rather than request size. This has a subtle, but important, impact on the accuracy of heap sampling. For example, previous to this change, 16- and 17-byte objects were sampled at nearly the same rate, but 17-byte objects actually consume 32 bytes each. Therefore it was possible for the sample to be somewhat skewed compared to actual memory usage of the allocated objects.
-rw-r--r--jemalloc/Makefile.in3
-rw-r--r--jemalloc/doc/jemalloc.3.in15
-rw-r--r--jemalloc/include/jemalloc/internal/jemalloc_internal.h.in258
-rw-r--r--jemalloc/include/jemalloc/internal/prof.h6
-rw-r--r--jemalloc/src/arena.c3
-rw-r--r--jemalloc/src/ctl.c14
-rw-r--r--jemalloc/src/jemalloc.c250
-rw-r--r--jemalloc/src/prof.c62
-rw-r--r--jemalloc/test/allocated.c105
-rw-r--r--jemalloc/test/allocated.exp2
10 files changed, 563 insertions, 155 deletions
diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in
index 7863c1b..ca807fd 100644
--- a/jemalloc/Makefile.in
+++ b/jemalloc/Makefile.in
@@ -58,7 +58,8 @@ DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \
@objroot@lib/libjemalloc@install_suffix@.$(SO) \
@objroot@lib/libjemalloc@install_suffix@_pic.a
MAN3 := @objroot@doc/jemalloc@install_suffix@.3
-CTESTS := @srcroot@test/allocm.c @srcroot@test/posix_memalign.c \
+CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \
+ @srcroot@test/posix_memalign.c \
@srcroot@test/rallocm.c @srcroot@test/thread_arena.c
.PHONY: all dist install check clean distclean relclean
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index c8d2f29..5202a2b 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -888,6 +888,21 @@ mallctl), it will be automatically initialized as a side effect of calling this
interface.
.Ed
.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "thread.allocated (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Get the total number of bytes ever allocated by the calling thread.
+@roff_stats@This counter has the potential to wrap around; it is up to the
+@roff_stats@application to appropriately interpret the counter in such cases.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "thread.deallocated (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Get the total number of bytes ever deallocated by the calling
+@roff_stats@thread.
+@roff_stats@This counter has the potential to wrap around; it is up to the
+@roff_stats@application to appropriately interpret the counter in such cases.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
.It Sy "config.debug (bool) r-"
.Bd -ragged -offset indent -compact
--enable-debug was specified during build configuration.
diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
index 6ad7b06..eb60962 100644
--- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in
@@ -291,6 +291,50 @@ extern pthread_key_t arenas_tsd;
extern arena_t **arenas;
extern unsigned narenas;
+#ifdef JEMALLOC_STATS
+typedef struct {
+ uint64_t allocated;
+ uint64_t deallocated;
+} thread_allocated_t;
+# ifndef NO_TLS
+extern __thread thread_allocated_t thread_allocated_tls;
+# define ALLOCATED_GET() thread_allocated_tls.allocated
+# define DEALLOCATED_GET() thread_allocated_tls.deallocated
+# define ALLOCATED_ADD(a, d) do { \
+ thread_allocated_tls.allocated += a; \
+ thread_allocated_tls.deallocated += d; \
+} while (0)
+# else
+extern pthread_key_t thread_allocated_tsd;
+# define ALLOCATED_GET() \
+ (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
+ ? ((thread_allocated_t *) \
+ pthread_getspecific(thread_allocated_tsd))->allocated : 0)
+# define DEALLOCATED_GET() \
+ (uint64_t)((pthread_getspecific(thread_allocated_tsd) != NULL) \
+ ? ((thread_allocated_t \
+ *)pthread_getspecific(thread_allocated_tsd))->deallocated : \
+ 0)
+# define ALLOCATED_ADD(a, d) do { \
+ thread_allocated_t *thread_allocated = (thread_allocated_t *) \
+ pthread_getspecific(thread_allocated_tsd); \
+ if (thread_allocated != NULL) { \
+ thread_allocated->allocated += (a); \
+ thread_allocated->deallocated += (d); \
+ } else { \
+ thread_allocated = (thread_allocated_t *) \
+ imalloc(sizeof(thread_allocated_t)); \
+ if (thread_allocated != NULL) { \
+ pthread_setspecific(thread_allocated_tsd, \
+ thread_allocated); \
+ thread_allocated->allocated = (a); \
+ thread_allocated->deallocated = (d); \
+ } \
+ } \
+} while (0)
+# endif
+#endif
+
arena_t *arenas_extend(unsigned ind);
arena_t *choose_arena_hard(void);
int buferror(int errnum, char *buf, size_t buflen);
@@ -333,6 +377,8 @@ void jemalloc_postfork(void);
#ifndef JEMALLOC_ENABLE_INLINE
size_t pow2_ceil(size_t x);
+size_t s2u(size_t size);
+size_t sa2u(size_t size, size_t alignment, size_t *run_size_p);
void malloc_write(const char *s);
arena_t *choose_arena(void);
#endif
@@ -357,6 +403,117 @@ pow2_ceil(size_t x)
}
/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_INLINE size_t
+s2u(size_t size)
+{
+
+ if (size <= small_maxclass)
+ return arenas[0]->bins[small_size2bin[size]].reg_size;
+ if (size <= arena_maxclass)
+ return PAGE_CEILING(size);
+ return CHUNK_CEILING(size);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_INLINE size_t
+sa2u(size_t size, size_t alignment, size_t *run_size_p)
+{
+ size_t usize;
+
+ /*
+ * Round size up to the nearest multiple of alignment.
+ *
+ * This done, we can take advantage of the fact that for each small
+ * size class, every object is aligned at the smallest power of two
+ * that is non-zero in the base two representation of the size. For
+ * example:
+ *
+ * Size | Base 2 | Minimum alignment
+ * -----+----------+------------------
+ * 96 | 1100000 | 32
+ * 144 | 10100000 | 32
+ * 192 | 11000000 | 64
+ *
+ * Depending on runtime settings, it is possible that arena_malloc()
+ * will further round up to a power of two, but that never causes
+ * correctness issues.
+ */
+ usize = (size + (alignment - 1)) & (-alignment);
+ /*
+ * (usize < size) protects against the combination of maximal
+ * alignment and size greater than maximal alignment.
+ */
+ if (usize < size) {
+ /* size_t overflow. */
+ return (0);
+ }
+
+ if (usize <= arena_maxclass && alignment <= PAGE_SIZE) {
+ if (usize <= small_maxclass) {
+ return
+ (arenas[0]->bins[small_size2bin[usize]].reg_size);
+ }
+ return (PAGE_CEILING(usize));
+ } else {
+ size_t run_size;
+
+ /*
+ * We can't achieve subpage alignment, so round up alignment
+ * permanently; it makes later calculations simpler.
+ */
+ alignment = PAGE_CEILING(alignment);
+ usize = PAGE_CEILING(size);
+ /*
+ * (usize < size) protects against very large sizes within
+ * PAGE_SIZE of SIZE_T_MAX.
+ *
+ * (usize + alignment < usize) protects against the
+ * combination of maximal alignment and usize large enough
+ * to cause overflow. This is similar to the first overflow
+ * check above, but it needs to be repeated due to the new
+ * usize value, which may now be *equal* to maximal
+ * alignment, whereas before we only detected overflow if the
+ * original size was *greater* than maximal alignment.
+ */
+ if (usize < size || usize + alignment < usize) {
+ /* size_t overflow. */
+ return (0);
+ }
+
+ /*
+ * Calculate the size of the over-size run that arena_palloc()
+ * would need to allocate in order to guarantee the alignment.
+ */
+ if (usize >= alignment)
+ run_size = usize + alignment - PAGE_SIZE;
+ else {
+ /*
+ * It is possible that (alignment << 1) will cause
+ * overflow, but it doesn't matter because we also
+ * subtract PAGE_SIZE, which in the case of overflow
+ * leaves us with a very large run_size. That causes
+ * the first conditional below to fail, which means
+ * that the bogus run_size value never gets used for
+ * anything important.
+ */
+ run_size = (alignment << 1) - PAGE_SIZE;
+ }
+ if (run_size_p != NULL)
+ *run_size_p = run_size;
+
+ if (run_size <= arena_maxclass)
+ return (PAGE_CEILING(usize));
+ return (CHUNK_CEILING(usize));
+ }
+}
+
+/*
* Wrapper around malloc_message() that avoids the need for
* JEMALLOC_P(malloc_message)(...) throughout the code.
*/
@@ -435,92 +592,25 @@ JEMALLOC_INLINE void *
ipalloc(size_t size, size_t alignment, bool zero)
{
void *ret;
- size_t ceil_size;
+ size_t usize;
+ size_t run_size
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
- /*
- * Round size up to the nearest multiple of alignment.
- *
- * This done, we can take advantage of the fact that for each small
- * size class, every object is aligned at the smallest power of two
- * that is non-zero in the base two representation of the size. For
- * example:
- *
- * Size | Base 2 | Minimum alignment
- * -----+----------+------------------
- * 96 | 1100000 | 32
- * 144 | 10100000 | 32
- * 192 | 11000000 | 64
- *
- * Depending on runtime settings, it is possible that arena_malloc()
- * will further round up to a power of two, but that never causes
- * correctness issues.
- */
- ceil_size = (size + (alignment - 1)) & (-alignment);
- /*
- * (ceil_size < size) protects against the combination of maximal
- * alignment and size greater than maximal alignment.
- */
- if (ceil_size < size) {
- /* size_t overflow. */
+ usize = sa2u(size, alignment, &run_size);
+ if (usize == 0)
return (NULL);
- }
-
- if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
- && ceil_size <= arena_maxclass))
- ret = arena_malloc(ceil_size, zero);
- else {
- size_t run_size;
-
- /*
- * We can't achieve subpage alignment, so round up alignment
- * permanently; it makes later calculations simpler.
- */
- alignment = PAGE_CEILING(alignment);
- ceil_size = PAGE_CEILING(size);
- /*
- * (ceil_size < size) protects against very large sizes within
- * PAGE_SIZE of SIZE_T_MAX.
- *
- * (ceil_size + alignment < ceil_size) protects against the
- * combination of maximal alignment and ceil_size large enough
- * to cause overflow. This is similar to the first overflow
- * check above, but it needs to be repeated due to the new
- * ceil_size value, which may now be *equal* to maximal
- * alignment, whereas before we only detected overflow if the
- * original size was *greater* than maximal alignment.
- */
- if (ceil_size < size || ceil_size + alignment < ceil_size) {
- /* size_t overflow. */
- return (NULL);
- }
-
- /*
- * Calculate the size of the over-size run that arena_palloc()
- * would need to allocate in order to guarantee the alignment.
- */
- if (ceil_size >= alignment)
- run_size = ceil_size + alignment - PAGE_SIZE;
- else {
- /*
- * It is possible that (alignment << 1) will cause
- * overflow, but it doesn't matter because we also
- * subtract PAGE_SIZE, which in the case of overflow
- * leaves us with a very large run_size. That causes
- * the first conditional below to fail, which means
- * that the bogus run_size value never gets used for
- * anything important.
- */
- run_size = (alignment << 1) - PAGE_SIZE;
- }
-
- if (run_size <= arena_maxclass) {
- ret = arena_palloc(choose_arena(), ceil_size, run_size,
- alignment, zero);
- } else if (alignment <= chunksize)
- ret = huge_malloc(ceil_size, zero);
- else
- ret = huge_palloc(ceil_size, alignment, zero);
- }
+ if (usize <= arena_maxclass && alignment <= PAGE_SIZE)
+ ret = arena_malloc(usize, zero);
+ else if (run_size <= arena_maxclass) {
+ ret = arena_palloc(choose_arena(), usize, run_size, alignment,
+ zero);
+ } else if (alignment <= chunksize)
+ ret = huge_malloc(usize, zero);
+ else
+ ret = huge_palloc(usize, alignment, zero);
assert(((uintptr_t)ret & (alignment - 1)) == 0);
return (ret);
diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h
index 1aa85bb..0d139da 100644
--- a/jemalloc/include/jemalloc/internal/prof.h
+++ b/jemalloc/include/jemalloc/internal/prof.h
@@ -179,9 +179,9 @@ extern bool prof_promote;
prof_thr_cnt_t *prof_alloc_prep(size_t size);
prof_ctx_t *prof_ctx_get(const void *ptr);
-void prof_malloc(const void *ptr, prof_thr_cnt_t *cnt);
-void prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
- size_t old_size, prof_ctx_t *old_ctx);
+void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt);
+void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+ const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx);
void prof_free(const void *ptr);
void prof_idump(void);
bool prof_mdump(const char *filename);
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index 52f3d66..a54a090 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -1613,7 +1613,8 @@ arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment,
arena_chunk_t *chunk;
assert((size & PAGE_MASK) == 0);
- assert((alignment & PAGE_MASK) == 0);
+
+ alignment = PAGE_CEILING(alignment);
malloc_mutex_lock(&arena->lock);
ret = (void *)arena_run_alloc(arena, alloc_size, true, zero);
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index edbbb20..dbc5cd4 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -42,6 +42,10 @@ CTL_PROTO(epoch)
CTL_PROTO(tcache_flush)
#endif
CTL_PROTO(thread_arena)
+#ifdef JEMALLOC_STATS
+CTL_PROTO(thread_allocated)
+CTL_PROTO(thread_deallocated)
+#endif
CTL_PROTO(config_debug)
CTL_PROTO(config_dss)
CTL_PROTO(config_dynamic_page_shift)
@@ -216,6 +220,11 @@ static const ctl_node_t tcache_node[] = {
static const ctl_node_t thread_node[] = {
{NAME("arena"), CTL(thread_arena)}
+#ifdef JEMALLOC_STATS
+ ,
+ {NAME("allocated"), CTL(thread_allocated)},
+ {NAME("deallocated"), CTL(thread_deallocated)}
+#endif
};
static const ctl_node_t config_node[] = {
@@ -1092,6 +1101,11 @@ RETURN:
return (ret);
}
+#ifdef JEMALLOC_STATS
+CTL_RO_GEN(thread_allocated, ALLOCATED_GET(), uint64_t);
+CTL_RO_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t);
+#endif
+
/******************************************************************************/
#ifdef JEMALLOC_DEBUG
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index 6f9ec76..f3cba15 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -15,14 +15,22 @@ __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec"));
pthread_key_t arenas_tsd;
#endif
+#ifdef JEMALLOC_STATS
+# ifndef NO_TLS
+__thread thread_allocated_t thread_allocated_tls;
+# else
+pthread_key_t thread_allocated_tsd;
+# endif
+#endif
+
/* Set to true once the allocator has been initialized. */
-static bool malloc_initialized = false;
+static bool malloc_initialized = false;
/* Used to let the initializing thread recursively allocate. */
-static pthread_t malloc_initializer = (unsigned long)0;
+static pthread_t malloc_initializer = (unsigned long)0;
/* Used to avoid initialization races. */
-static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
#ifdef DYNAMIC_PAGE_SHIFT
size_t pagesize;
@@ -63,6 +71,9 @@ static int opt_narenas_lshift = 0;
static void wrtmessage(void *cbopaque, const char *s);
static void stats_print_atexit(void);
static unsigned malloc_ncpus(void);
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+static void thread_allocated_cleanup(void *arg);
+#endif
static bool malloc_init_hard(void);
/******************************************************************************/
@@ -222,6 +233,17 @@ malloc_ncpus(void)
return (ret);
}
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+static void
+thread_allocated_cleanup(void *arg)
+{
+ uint64_t *allocated = (uint64_t *)arg;
+
+ if (allocated != NULL)
+ idalloc(allocated);
+}
+#endif
+
/*
* FreeBSD's pthreads implementation calls malloc(3), so the malloc
* implementation has to take pains to avoid infinite recursion during
@@ -633,6 +655,15 @@ MALLOC_OUT:
return (true);
}
+#if (defined(JEMALLOC_STATS) && defined(NO_TLS))
+ /* Initialize allocation counters before any allocations can occur. */
+ if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup)
+ != 0) {
+ malloc_mutex_unlock(&init_lock);
+ return (true);
+ }
+#endif
+
/*
* Create enough scaffolding to allow recursive allocation in
* malloc_ncpus().
@@ -766,6 +797,13 @@ void *
JEMALLOC_P(malloc)(size_t size)
{
void *ret;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
+#endif
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE
@@ -801,20 +839,26 @@ JEMALLOC_P(malloc)(size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(size)) == NULL) {
+ usize = s2u(size);
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL;
goto OOM;
}
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <=
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) {
ret = imalloc(small_maxclass+1);
if (ret != NULL)
- arena_prof_promoted(ret, size);
+ arena_prof_promoted(ret, usize);
} else
ret = imalloc(size);
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(size);
+#endif
ret = imalloc(size);
+ }
OOM:
if (ret == NULL) {
@@ -833,7 +877,13 @@ RETURN:
#endif
#ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL)
- prof_malloc(ret, cnt);
+ prof_malloc(ret, usize, cnt);
+#endif
+#ifdef JEMALLOC_STATS
+ if (ret != NULL) {
+ assert(usize == isalloc(ret));
+ ALLOCATED_ADD(usize, 0);
+ }
#endif
return (ret);
}
@@ -845,6 +895,13 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
{
int ret;
void *result;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
+#endif
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE
@@ -896,17 +953,18 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(size)) == NULL) {
+ usize = sa2u(size, alignment, NULL);
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
result = NULL;
ret = EINVAL;
} else {
if (prof_promote && (uintptr_t)cnt !=
- (uintptr_t)1U && size <= small_maxclass) {
+ (uintptr_t)1U && usize <= small_maxclass) {
result = ipalloc(small_maxclass+1,
alignment, false);
if (result != NULL) {
arena_prof_promoted(result,
- size);
+ usize);
}
} else {
result = ipalloc(size, alignment,
@@ -915,7 +973,12 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
}
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = sa2u(size, alignment, NULL);
+#endif
result = ipalloc(size, alignment, false);
+ }
}
if (result == NULL) {
@@ -934,9 +997,15 @@ JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size)
ret = 0;
RETURN:
+#ifdef JEMALLOC_STATS
+ if (result != NULL) {
+ assert(usize == isalloc(result));
+ ALLOCATED_ADD(usize, 0);
+ }
+#endif
#ifdef JEMALLOC_PROF
if (opt_prof && result != NULL)
- prof_malloc(result, cnt);
+ prof_malloc(result, usize, cnt);
#endif
return (ret);
}
@@ -948,6 +1017,13 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
{
void *ret;
size_t num_size;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
+# ifdef JEMALLOC_CC_SILENCE
+ = 0
+# endif
+ ;
+#endif
#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE
@@ -988,20 +1064,26 @@ JEMALLOC_P(calloc)(size_t num, size_t size)
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(num_size)) == NULL) {
+ usize = s2u(num_size);
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL;
goto RETURN;
}
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && num_size
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize
<= small_maxclass) {
ret = icalloc(small_maxclass+1);
if (ret != NULL)
- arena_prof_promoted(ret, num_size);
+ arena_prof_promoted(ret, usize);
} else
ret = icalloc(num_size);
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(num_size);
+#endif
ret = icalloc(num_size);
+ }
RETURN:
if (ret == NULL) {
@@ -1017,7 +1099,13 @@ RETURN:
#ifdef JEMALLOC_PROF
if (opt_prof && ret != NULL)
- prof_malloc(ret, cnt);
+ prof_malloc(ret, usize, cnt);
+#endif
+#ifdef JEMALLOC_STATS
+ if (ret != NULL) {
+ assert(usize == isalloc(ret));
+ ALLOCATED_ADD(usize, 0);
+ }
#endif
return (ret);
}
@@ -1027,12 +1115,15 @@ void *
JEMALLOC_P(realloc)(void *ptr, size_t size)
{
void *ret;
-#ifdef JEMALLOC_PROF
- size_t old_size
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t usize
# ifdef JEMALLOC_CC_SILENCE
= 0
# endif
;
+ size_t old_size = 0;
+#endif
+#ifdef JEMALLOC_PROF
prof_thr_cnt_t *cnt
# ifdef JEMALLOC_CC_SILENCE
= NULL
@@ -1053,9 +1144,11 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
#ifdef JEMALLOC_SYSV
else {
if (ptr != NULL) {
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ old_size = isalloc(ptr);
+#endif
#ifdef JEMALLOC_PROF
if (opt_prof) {
- old_size = isalloc(ptr);
old_ctx = prof_ctx_get(ptr);
cnt = NULL;
}
@@ -1064,7 +1157,6 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
}
#ifdef JEMALLOC_PROF
else if (opt_prof) {
- old_size = 0;
old_ctx = NULL;
cnt = NULL;
}
@@ -1079,25 +1171,33 @@ JEMALLOC_P(realloc)(void *ptr, size_t size)
assert(malloc_initialized || malloc_initializer ==
pthread_self());
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ old_size = isalloc(ptr);
+#endif
#ifdef JEMALLOC_PROF
if (opt_prof) {
- old_size = isalloc(ptr);
+ usize = s2u(size);
old_ctx = prof_ctx_get(ptr);
- if ((cnt = prof_alloc_prep(size)) == NULL) {
+ if ((cnt = prof_alloc_prep(usize)) == NULL) {
ret = NULL;
goto OOM;
}
if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U &&
- size <= small_maxclass) {
+ usize <= small_maxclass) {
ret = iralloc(ptr, small_maxclass+1, 0, 0,
false, false);
if (ret != NULL)
- arena_prof_promoted(ret, size);
+ arena_prof_promoted(ret, usize);
} else
ret = iralloc(ptr, size, 0, 0, false, false);
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(size);
+#endif
ret = iralloc(ptr, size, 0, 0, false, false);
+ }
#ifdef JEMALLOC_PROF
OOM:
@@ -1114,10 +1214,8 @@ OOM:
}
} else {
#ifdef JEMALLOC_PROF
- if (opt_prof) {
- old_size = 0;
+ if (opt_prof)
old_ctx = NULL;
- }
#endif
if (malloc_init()) {
#ifdef JEMALLOC_PROF
@@ -1128,23 +1226,29 @@ OOM:
} else {
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(size)) == NULL)
+ usize = s2u(size);
+ if ((cnt = prof_alloc_prep(usize)) == NULL)
ret = NULL;
else {
if (prof_promote && (uintptr_t)cnt !=
- (uintptr_t)1U && size <=
+ (uintptr_t)1U && usize <=
small_maxclass) {
ret = imalloc(small_maxclass+1);
if (ret != NULL) {
arena_prof_promoted(ret,
- size);
+ usize);
}
} else
ret = imalloc(size);
}
} else
#endif
+ {
+#ifdef JEMALLOC_STATS
+ usize = s2u(size);
+#endif
ret = imalloc(size);
+ }
}
if (ret == NULL) {
@@ -1164,7 +1268,13 @@ RETURN:
#endif
#ifdef JEMALLOC_PROF
if (opt_prof)
- prof_realloc(ret, cnt, ptr, old_size, old_ctx);
+ prof_realloc(ret, usize, cnt, ptr, old_size, old_ctx);
+#endif
+#ifdef JEMALLOC_STATS
+ if (ret != NULL) {
+ assert(usize == isalloc(ret));
+ ALLOCATED_ADD(usize, old_size);
+ }
#endif
return (ret);
}
@@ -1182,6 +1292,9 @@ JEMALLOC_P(free)(void *ptr)
if (opt_prof)
prof_free(ptr);
#endif
+#ifdef JEMALLOC_STATS
+ ALLOCATED_ADD(0, isalloc(ptr));
+#endif
idalloc(ptr);
}
}
@@ -1325,6 +1438,7 @@ int
JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
{
void *p;
+ size_t usize;
size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
bool zero = flags & ALLOCM_ZERO;
@@ -1340,30 +1454,48 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags)
#ifdef JEMALLOC_PROF
if (opt_prof) {
- if ((cnt = prof_alloc_prep(size)) == NULL)
+ usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment,
+ NULL);
+ if ((cnt = prof_alloc_prep(usize)) == NULL)
goto OOM;
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <=
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <=
small_maxclass) {
p = iallocm(small_maxclass+1, alignment, zero);
if (p == NULL)
goto OOM;
- arena_prof_promoted(p, size);
+ arena_prof_promoted(p, usize);
} else {
p = iallocm(size, alignment, zero);
if (p == NULL)
goto OOM;
}
+
+ if (rsize != NULL)
+ *rsize = usize;
} else
#endif
{
p = iallocm(size, alignment, zero);
if (p == NULL)
goto OOM;
+#ifndef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ {
+ usize = (alignment == 0) ? s2u(size) : sa2u(size,
+ alignment, NULL);
+#ifdef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ *rsize = usize;
+ }
}
*ptr = p;
- if (rsize != NULL)
- *rsize = isalloc(p);
+#ifdef JEMALLOC_STATS
+ assert(usize == isalloc(p));
+ ALLOCATED_ADD(usize, 0);
+#endif
return (ALLOCM_SUCCESS);
OOM:
#ifdef JEMALLOC_XMALLOC
@@ -1384,12 +1516,15 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
int flags)
{
void *p, *q;
+ size_t usize;
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ size_t old_size;
+#endif
size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK)
& (SIZE_T_MAX-1));
bool zero = flags & ALLOCM_ZERO;
bool no_move = flags & ALLOCM_NO_MOVE;
#ifdef JEMALLOC_PROF
- size_t old_size;
prof_thr_cnt_t *cnt;
prof_ctx_t *old_ctx;
#endif
@@ -1403,36 +1538,60 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra,
p = *ptr;
#ifdef JEMALLOC_PROF
if (opt_prof) {
+ /*
+ * usize isn't knowable before iralloc() returns when extra is
+ * non-zero. Therefore, compute its maximum possible value and
+ * use that in prof_alloc_prep() to decide whether to capture a
+ * backtrace. prof_realloc() will use the actual usize to
+ * decide whether to sample.
+ */
+ size_t max_usize = (alignment == 0) ? s2u(size+extra) :
+ sa2u(size+extra, alignment, NULL);
old_size = isalloc(p);
old_ctx = prof_ctx_get(p);
- if ((cnt = prof_alloc_prep(size)) == NULL)
+ if ((cnt = prof_alloc_prep(max_usize)) == NULL)
goto OOM;
- if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && size <=
- small_maxclass) {
+ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize
+ <= small_maxclass) {
q = iralloc(p, small_maxclass+1, (small_maxclass+1 >=
size+extra) ? 0 : size+extra - (small_maxclass+1),
alignment, zero, no_move);
if (q == NULL)
goto ERR;
- arena_prof_promoted(q, size);
+ usize = isalloc(q);
+ arena_prof_promoted(q, usize);
} else {
q = iralloc(p, size, extra, alignment, zero, no_move);
if (q == NULL)
goto ERR;
+ usize = isalloc(q);
}
- prof_realloc(q, cnt, p, old_size, old_ctx);
+ prof_realloc(q, usize, cnt, p, old_size, old_ctx);
} else
#endif
{
+#ifdef JEMALLOC_STATS
+ old_size = isalloc(p);
+#endif
q = iralloc(p, size, extra, alignment, zero, no_move);
if (q == NULL)
goto ERR;
+#ifndef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ {
+ usize = isalloc(q);
+#ifdef JEMALLOC_STATS
+ if (rsize != NULL)
+#endif
+ *rsize = usize;
+ }
}
*ptr = q;
- if (rsize != NULL)
- *rsize = isalloc(q);
-
+#ifdef JEMALLOC_STATS
+ ALLOCATED_ADD(usize, old_size);
+#endif
return (ALLOCM_SUCCESS);
ERR:
if (no_move)
@@ -1484,6 +1643,9 @@ JEMALLOC_P(dallocm)(void *ptr, int flags)
if (opt_prof)
prof_free(ptr);
#endif
+#ifdef JEMALLOC_STATS
+ ALLOCATED_ADD(0, isalloc(ptr));
+#endif
idalloc(ptr);
return (ALLOCM_SUCCESS);
diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c
index e715da9..583a6e9 100644
--- a/jemalloc/src/prof.c
+++ b/jemalloc/src/prof.c
@@ -47,7 +47,8 @@ static __thread prof_tcache_t *prof_tcache_tls
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
} while (0)
#else
-# define PROF_TCACHE_GET() ((ckh_t *)pthread_getspecific(prof_tcache_tsd))
+# define PROF_TCACHE_GET() \
+ ((prof_tcache_t *)pthread_getspecific(prof_tcache_tsd))
# define PROF_TCACHE_SET(v) do { \
pthread_setspecific(prof_tcache_tsd, (void *)(v)); \
} while (0)
@@ -69,7 +70,7 @@ static __thread void **vec_tls
pthread_setspecific(vec_tsd, (void *)(v)); \
} while (0)
#else
-# define VEC_GET() ((ckh_t *)pthread_getspecific(vec_tsd))
+# define VEC_GET() ((void **)pthread_getspecific(vec_tsd))
# define VEC_SET(v) do { \
pthread_setspecific(vec_tsd, (void *)(v)); \
} while (0)
@@ -106,7 +107,8 @@ prof_sample_state_t prof_sample_state_oom;
r = (prof_sample_state_t *)pthread_getspecific( \
prof_sample_state_tsd); \
if (r == NULL) { \
- r = ipalloc(sizeof(prof_sample_state_t), CACHELINE); \
+ r = ipalloc(sizeof(prof_sample_state_t), CACHELINE, \
+ false); \
if (r == NULL) { \
malloc_write("<jemalloc>: Error in heap " \
"profiler: out of memory; subsequent heap " \
@@ -658,6 +660,8 @@ prof_alloc_prep(size_t size)
void **vec;
prof_bt_t bt;
+ assert(size == s2u(size));
+
vec = VEC_GET();
if (vec == NULL) {
vec = imalloc(sizeof(void *) * prof_bt_max);
@@ -750,7 +754,7 @@ prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
huge_prof_ctx_set(ptr, ctx);
}
-static inline void
+static inline bool
prof_sample_accum_update(size_t size)
{
prof_sample_state_t *prof_sample_state;
@@ -771,22 +775,33 @@ prof_sample_accum_update(size_t size)
prof_sample_state->threshold;
prof_sample_threshold_update();
}
- } else
+ return (false);
+ } else {
prof_sample_state->accum += size;
+ return (true);
+ }
}
void
-prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
+prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt)
{
- size_t size;
assert(ptr != NULL);
+ assert(size == s2u(size));
if (opt_lg_prof_sample != 0) {
- size = isalloc(ptr);
- prof_sample_accum_update(size);
- } else if ((uintptr_t)cnt > (uintptr_t)1U)
- size = isalloc(ptr);
+ if (prof_sample_accum_update(size)) {
+ /*
+ * Don't sample. For malloc()-like allocation, it is
+ * always possible to tell in advance how large an
+ * object's usable size will be, so there should never
+ * be a difference between the size passed to
+ * prof_alloc_prep() and prof_malloc().
+ */
+ assert(false);
+ return;
+ }
+ }
if ((uintptr_t)cnt > (uintptr_t)1U) {
prof_ctx_set(ptr, cnt->ctx);
@@ -813,24 +828,27 @@ prof_malloc(const void *ptr, prof_thr_cnt_t *cnt)
}
void
-prof_realloc(const void *ptr, prof_thr_cnt_t *cnt, const void *old_ptr,
- size_t old_size, prof_ctx_t *old_ctx)
+prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt,
+ const void *old_ptr, size_t old_size, prof_ctx_t *old_ctx)
{
- size_t size
-#ifdef JEMALLOC_CC_SILENCE
- = 0
-#endif
- ;
prof_thr_cnt_t *told_cnt;
assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U);
if (ptr != NULL) {
if (opt_lg_prof_sample != 0) {
- size = isalloc(ptr);
- prof_sample_accum_update(size);
- } else if ((uintptr_t)cnt > (uintptr_t)1U)
- size = isalloc(ptr);
+ if (prof_sample_accum_update(size)) {
+ /*
+ * Don't sample. The size passed to
+ * prof_alloc_prep() was larger than what
+ * actually got allocated., so a backtrace was
+ * captured for this allocation, even though
+ * its actual size was insufficient to cross
+ * the sample threshold.
+ */
+ return;
+ }
+ }
}
if ((uintptr_t)old_ctx > (uintptr_t)1U) {
diff --git a/jemalloc/test/allocated.c b/jemalloc/test/allocated.c
new file mode 100644
index 0000000..64a1735
--- /dev/null
+++ b/jemalloc/test/allocated.c
@@ -0,0 +1,105 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+
+#define JEMALLOC_MANGLE
+#include "jemalloc_test.h"
+
+void *
+thread_start(void *arg)
+{
+ int err;
+ void *p;
+ uint64_t a0, a1, d0, d1;
+ size_t sz, usize;
+
+ sz = sizeof(a0);
+ if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL,
+ 0))) {
+ if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+ assert(false);
+#endif
+ goto RETURN;
+ }
+ fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+ strerror(err));
+ exit(1);
+ }
+
+ sz = sizeof(d0);
+ if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL,
+ 0))) {
+ if (err == ENOENT) {
+#ifdef JEMALLOC_STATS
+ assert(false);
+#endif
+ goto RETURN;
+ }
+ fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__,
+ strerror(err));
+ exit(1);
+ }
+
+ p = JEMALLOC_P(malloc)(1);
+ if (p == NULL) {
+ fprintf(stderr, "%s(): Error in malloc()\n", __func__);
+ exit(1);
+ }
+
+ sz = sizeof(a1);
+ JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0);
+
+ usize = JEMALLOC_P(malloc_usable_size)(p);
+ assert(a0 + usize <= a1);
+
+ JEMALLOC_P(free)(p);
+
+ sz = sizeof(d1);
+ JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0);
+
+ assert(d0 + usize <= d1);
+
+RETURN:
+ return (NULL);
+}
+
+int
+main(void)
+{
+ int ret = 0;
+ pthread_t thread;
+
+ fprintf(stderr, "Test begin\n");
+
+ thread_start(NULL);
+
+ if (pthread_create(&thread, NULL, thread_start, NULL)
+ != 0) {
+ fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+ ret = 1;
+ goto RETURN;
+ }
+ pthread_join(thread, (void *)&ret);
+
+ thread_start(NULL);
+
+ if (pthread_create(&thread, NULL, thread_start, NULL)
+ != 0) {
+ fprintf(stderr, "%s(): Error in pthread_create()\n", __func__);
+ ret = 1;
+ goto RETURN;
+ }
+ pthread_join(thread, (void *)&ret);
+
+ thread_start(NULL);
+
+RETURN:
+ fprintf(stderr, "Test end\n");
+ return (ret);
+}
diff --git a/jemalloc/test/allocated.exp b/jemalloc/test/allocated.exp
new file mode 100644
index 0000000..369a88d
--- /dev/null
+++ b/jemalloc/test/allocated.exp
@@ -0,0 +1,2 @@
+Test begin
+Test end