summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJason Evans <je@fb.com>2014-09-07 21:40:19 (GMT)
committerJason Evans <je@fb.com>2014-09-07 21:40:19 (GMT)
commitb718cf77e9917f6ae1995c2e2b219ff4219c9f46 (patch)
treed917a0cd167f15fc269ff2f0264a96663af1e280 /src
parentc21b05ea09874222266b3e36ceb18765fcb4a00b (diff)
downloadjemalloc-b718cf77e9917f6ae1995c2e2b219ff4219c9f46.zip
jemalloc-b718cf77e9917f6ae1995c2e2b219ff4219c9f46.tar.gz
jemalloc-b718cf77e9917f6ae1995c2e2b219ff4219c9f46.tar.bz2
Optimize [nmd]alloc() fast paths.
Optimize [nmd]alloc() fast paths such that the (flags == 0) case is streamlined, flags decoding only happens to the minimum degree necessary, and no conditionals are repeated.
Diffstat (limited to 'src')
-rw-r--r--src/arena.c2
-rw-r--r--src/huge.c2
-rw-r--r--src/jemalloc.c239
3 files changed, 141 insertions, 102 deletions
diff --git a/src/arena.c b/src/arena.c
index d9dda83..8d34cf6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2108,7 +2108,7 @@ arena_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
copysize = (size < oldsize) ? size : oldsize;
JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
memcpy(ret, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
+ iqalloc(ptr, try_tcache_dalloc);
return (ret);
}
diff --git a/src/huge.c b/src/huge.c
index 5f0c698..e773309 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -129,7 +129,7 @@ huge_ralloc(arena_t *arena, void *ptr, size_t oldsize, size_t size,
*/
copysize = (size < oldsize) ? size : oldsize;
memcpy(ret, ptr, copysize);
- iqalloct(ptr, try_tcache_dalloc);
+ iqalloc(ptr, try_tcache_dalloc);
return (ret);
}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9df7001..71e921b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -870,7 +870,7 @@ imalloc_prof_sample(size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
- p = imalloc(SMALL_MAXCLASS+1);
+ p = imalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
@@ -950,9 +950,8 @@ imemalign_prof_sample(size_t alignment, size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
- assert(sa2u(SMALL_MAXCLASS+1, alignment) != 0);
- p = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment), alignment,
- false);
+ assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS);
+ p = imalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
@@ -1077,7 +1076,7 @@ icalloc_prof_sample(size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
- p = icalloc(SMALL_MAXCLASS+1);
+ p = icalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
@@ -1174,12 +1173,12 @@ irealloc_prof_sample(void *oldptr, size_t usize, prof_tctx_t *tctx)
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
- p = iralloc(oldptr, SMALL_MAXCLASS+1, 0, 0, false);
+ p = iralloc(oldptr, LARGE_MINCLASS, 0, false);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else
- p = iralloc(oldptr, usize, 0, 0, false);
+ p = iralloc(oldptr, usize, 0, false);
return (p);
}
@@ -1194,7 +1193,7 @@ irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx)
if ((uintptr_t)tctx != (uintptr_t)1U)
p = irealloc_prof_sample(oldptr, usize, tctx);
else
- p = iralloc(oldptr, usize, 0, 0, false);
+ p = iralloc(oldptr, usize, 0, false);
if (p == NULL)
return (NULL);
prof_realloc(p, usize, tctx, old_usize, old_tctx);
@@ -1203,7 +1202,7 @@ irealloc_prof(void *oldptr, size_t old_usize, size_t usize, prof_tctx_t *tctx)
}
JEMALLOC_INLINE_C void
-ifree(void *ptr)
+ifree(void *ptr, bool try_tcache)
{
size_t usize;
UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
@@ -1220,7 +1219,7 @@ ifree(void *ptr)
thread_allocated_tsd_get()->deallocated += usize;
if (config_valgrind && in_valgrind)
rzsize = p2rz(ptr);
- iqalloc(ptr);
+ iqalloc(ptr, try_tcache);
JEMALLOC_VALGRIND_FREE(ptr, rzsize);
}
@@ -1236,7 +1235,7 @@ je_realloc(void *ptr, size_t size)
if (ptr != NULL) {
/* realloc(ptr, 0) is equivalent to free(ptr). */
UTRACE(ptr, 0, 0);
- ifree(ptr);
+ ifree(ptr, true);
return (NULL);
}
size = 1;
@@ -1261,7 +1260,7 @@ je_realloc(void *ptr, size_t size)
} else {
if (config_stats || (config_valgrind && in_valgrind))
usize = s2u(size);
- ret = iralloc(ptr, size, 0, 0, false);
+ ret = iralloc(ptr, size, 0, false);
}
} else {
/* realloc(NULL, size) is equivalent to malloc(size). */
@@ -1295,7 +1294,7 @@ je_free(void *ptr)
UTRACE(ptr, 0, 0);
if (ptr != NULL)
- ifree(ptr);
+ ifree(ptr, true);
}
/*
@@ -1363,99 +1362,153 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
* Begin non-standard functions.
*/
+JEMALLOC_ALWAYS_INLINE_C void
+imallocx_flags_decode_hard(size_t size, int flags, size_t *usize,
+ size_t *alignment, bool *zero, bool *try_tcache, arena_t **arena)
+{
+
+ if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) {
+ *alignment = 0;
+ *usize = s2u(size);
+ } else {
+ *alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
+ *usize = sa2u(size, *alignment);
+ }
+ *zero = MALLOCX_ZERO_GET(flags);
+ if ((flags & MALLOCX_ARENA_MASK) != 0) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
+ *try_tcache = false;
+ *arena = arenas[arena_ind];
+ } else {
+ *try_tcache = true;
+ *arena = NULL;
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE_C void
+imallocx_flags_decode(size_t size, int flags, size_t *usize, size_t *alignment,
+ bool *zero, bool *try_tcache, arena_t **arena)
+{
+
+ if (flags == 0) {
+ *usize = s2u(size);
+ assert(usize != 0);
+ *alignment = 0;
+ *zero = false;
+ *try_tcache = true;
+ *arena = NULL;
+ } else {
+ imallocx_flags_decode_hard(size, flags, usize, alignment, zero,
+ try_tcache, arena);
+ }
+}
+
JEMALLOC_ALWAYS_INLINE_C void *
-imallocx(size_t usize, size_t alignment, bool zero, bool try_tcache,
+imallocx_flags(size_t usize, size_t alignment, bool zero, bool try_tcache,
arena_t *arena)
{
- assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize,
- alignment)));
-
if (alignment != 0)
return (ipalloct(usize, alignment, zero, try_tcache, arena));
- else if (zero)
+ if (zero)
return (icalloct(usize, try_tcache, arena));
- else
- return (imalloct(usize, try_tcache, arena));
+ return (imalloct(usize, try_tcache, arena));
+}
+
+
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_maybe_flags(size_t size, int flags, size_t usize, size_t alignment,
+ bool zero, bool try_tcache, arena_t *arena)
+{
+
+ if (flags == 0)
+ return (imalloc(size));
+ return (imallocx_flags(usize, alignment, zero, try_tcache, arena));
}
static void *
-imallocx_prof_sample(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena, prof_tctx_t *tctx)
+imallocx_prof_sample(size_t size, int flags, size_t usize, size_t alignment,
+ bool zero, bool try_tcache, arena_t *arena)
{
void *p;
- if (tctx == NULL)
- return (NULL);
if (usize <= SMALL_MAXCLASS) {
- size_t usize_promoted = (alignment == 0) ?
- s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment);
- assert(usize_promoted != 0);
- p = imallocx(usize_promoted, alignment, zero, try_tcache,
- arena);
+ assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
+ sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
+ p = imalloc(LARGE_MINCLASS);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
- } else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
+ } else {
+ p = imallocx_maybe_flags(size, flags, usize, alignment, zero,
+ try_tcache, arena);
+ }
return (p);
}
JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(size_t usize, size_t alignment, bool zero, bool try_tcache,
- arena_t *arena, prof_tctx_t *tctx)
+imallocx_prof(size_t size, int flags, size_t *usize)
{
void *p;
+ size_t alignment;
+ bool zero;
+ bool try_tcache;
+ arena_t *arena;
+ prof_tctx_t *tctx;
- if ((uintptr_t)tctx != (uintptr_t)1U) {
- p = imallocx_prof_sample(usize, alignment, zero, try_tcache,
- arena, tctx);
+ imallocx_flags_decode(size, flags, usize, &alignment, &zero,
+ &try_tcache, &arena);
+ tctx = prof_alloc_prep(*usize);
+ if ((uintptr_t)tctx == (uintptr_t)1U) {
+ p = imallocx_maybe_flags(size, flags, *usize, alignment, zero,
+ try_tcache, arena);
+ } else if ((uintptr_t)tctx > (uintptr_t)1U) {
+ p = imallocx_prof_sample(size, flags, *usize, alignment, zero,
+ try_tcache, arena);
} else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
+ p = NULL;
if (p == NULL)
return (NULL);
- prof_malloc(p, usize, tctx);
+ prof_malloc(p, *usize, tctx);
return (p);
}
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_no_prof(size_t size, int flags, size_t *usize)
+{
+ size_t alignment;
+ bool zero;
+ bool try_tcache;
+ arena_t *arena;
+
+ if (flags == 0) {
+ if (config_stats || (config_valgrind && in_valgrind))
+ *usize = s2u(size);
+ return (imalloc(size));
+ }
+
+ imallocx_flags_decode_hard(size, flags, usize, &alignment, &zero,
+ &try_tcache, &arena);
+ return (imallocx_flags(*usize, alignment, zero, try_tcache, arena));
+}
+
void *
je_mallocx(size_t size, int flags)
{
void *p;
size_t usize;
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
- bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
- arena_t *arena;
- bool try_tcache;
assert(size != 0);
if (malloc_init())
goto label_oom;
- if (arena_ind != UINT_MAX) {
- arena = arenas[arena_ind];
- try_tcache = false;
- } else {
- arena = NULL;
- try_tcache = true;
- }
-
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
- assert(usize != 0);
-
- if (config_prof && opt_prof) {
- prof_tctx_t *tctx;
-
- tctx = prof_alloc_prep(usize);
- p = imallocx_prof(usize, alignment, zero, try_tcache, arena,
- tctx);
- } else
- p = imallocx(usize, alignment, zero, try_tcache, arena);
+ if (config_prof && opt_prof)
+ p = imallocx_prof(size, flags, &usize);
+ else
+ p = imallocx_no_prof(size, flags, &usize);
if (p == NULL)
goto label_oom;
@@ -1464,7 +1517,7 @@ je_mallocx(size_t size, int flags)
thread_allocated_tsd_get()->allocated += usize;
}
UTRACE(0, size, p);
- JEMALLOC_VALGRIND_MALLOC(true, p, usize, zero);
+ JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags));
return (p);
label_oom:
if (config_xmalloc && opt_xmalloc) {
@@ -1485,15 +1538,14 @@ irallocx_prof_sample(void *oldptr, size_t size, size_t alignment, size_t usize,
if (tctx == NULL)
return (NULL);
if (usize <= SMALL_MAXCLASS) {
- p = iralloct(oldptr, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >=
- size) ? 0 : size - (SMALL_MAXCLASS+1), alignment, zero,
+ p = iralloct(oldptr, LARGE_MINCLASS, alignment, zero,
try_tcache_alloc, try_tcache_dalloc, arena);
if (p == NULL)
return (NULL);
arena_prof_promoted(p, usize);
} else {
- p = iralloct(oldptr, size, 0, alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
+ p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc,
+ try_tcache_dalloc, arena);
}
return (p);
@@ -1512,8 +1564,8 @@ irallocx_prof(void *oldptr, size_t old_usize, size_t size, size_t alignment,
p = irallocx_prof_sample(oldptr, size, alignment, *usize, zero,
try_tcache_alloc, try_tcache_dalloc, arena, tctx);
else {
- p = iralloct(oldptr, size, 0, alignment, zero,
- try_tcache_alloc, try_tcache_dalloc, arena);
+ p = iralloct(oldptr, size, alignment, zero, try_tcache_alloc,
+ try_tcache_dalloc, arena);
}
if (p == NULL)
return (NULL);
@@ -1540,10 +1592,8 @@ je_rallocx(void *ptr, size_t size, int flags)
void *p;
size_t usize, old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+ size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
bool try_tcache_alloc, try_tcache_dalloc;
arena_t *arena;
@@ -1552,7 +1602,8 @@ je_rallocx(void *ptr, size_t size, int flags)
assert(malloc_initialized || IS_INITIALIZER);
malloc_thread_init();
- if (arena_ind != UINT_MAX) {
+ if ((flags & MALLOCX_ARENA_MASK) != 0) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena_chunk_t *chunk;
try_tcache_alloc = false;
chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1582,7 +1633,7 @@ je_rallocx(void *ptr, size_t size, int flags)
if (p == NULL)
goto label_oom;
} else {
- p = iralloct(ptr, size, 0, alignment, zero, try_tcache_alloc,
+ p = iralloct(ptr, size, alignment, zero, try_tcache_alloc,
try_tcache_dalloc, arena);
if (p == NULL)
goto label_oom;
@@ -1677,10 +1728,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
{
size_t usize, old_usize;
UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
+ size_t alignment = MALLOCX_ALIGN_GET(flags);
bool zero = flags & MALLOCX_ZERO;
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
arena_t *arena;
assert(ptr != NULL);
@@ -1689,9 +1738,10 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
assert(malloc_initialized || IS_INITIALIZER);
malloc_thread_init();
- if (arena_ind != UINT_MAX)
+ if ((flags & MALLOCX_ARENA_MASK) != 0) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena = arenas[arena_ind];
- else
+ } else
arena = NULL;
old_usize = isalloc(ptr, config_prof);
@@ -1753,15 +1803,13 @@ je_sallocx(const void *ptr, int flags)
void
je_dallocx(void *ptr, int flags)
{
- size_t usize;
- UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
- unsigned arena_ind = ((unsigned)(flags >> 8)) - 1;
bool try_tcache;
assert(ptr != NULL);
assert(malloc_initialized || IS_INITIALIZER);
- if (arena_ind != UINT_MAX) {
+ if ((flags & MALLOCX_ARENA_MASK) != 0) {
+ unsigned arena_ind = MALLOCX_ARENA_GET(flags);
arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
try_tcache = (chunk == ptr || chunk->arena !=
arenas[arena_ind]);
@@ -1769,34 +1817,25 @@ je_dallocx(void *ptr, int flags)
try_tcache = true;
UTRACE(ptr, 0, 0);
- if (config_stats || config_valgrind)
- usize = isalloc(ptr, config_prof);
- if (config_prof && opt_prof) {
- if (config_stats == false && config_valgrind == false)
- usize = isalloc(ptr, config_prof);
- prof_free(ptr, usize);
- }
- if (config_stats)
- thread_allocated_tsd_get()->deallocated += usize;
- if (config_valgrind && in_valgrind)
- rzsize = p2rz(ptr);
- iqalloct(ptr, try_tcache);
- JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+ ifree(ptr, try_tcache);
}
size_t
je_nallocx(size_t size, int flags)
{
size_t usize;
- size_t alignment = (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK)
- & (SIZE_T_MAX-1));
assert(size != 0);
if (malloc_init())
return (0);
- usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
+ if ((flags & MALLOCX_LG_ALIGN_MASK) == 0)
+ usize = s2u(size);
+ else {
+ size_t alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
+ usize = sa2u(size, alignment);
+ }
assert(usize != 0);
return (usize);
}