From 5ff709c264e52651de25b788692c62ff1f6f389c Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Wed, 11 Apr 2012 18:13:45 -0700 Subject: Normalize aligned allocation algorithms. Normalize arena_palloc(), chunk_alloc_mmap_slow(), and chunk_recycle_dss() to use the same algorithm for trimming over-allocation. Add the ALIGNMENT_ADDR2BASE(), ALIGNMENT_ADDR2OFFSET(), and ALIGNMENT_CEILING() macros, and use them where appropriate. Remove the run_size_p parameter from sa2u(). Fix a potential deadlock in chunk_recycle_dss() that was introduced by eae269036c9f702d9fa9be497a1a2aa1be13a29e (Add alignment support to chunk_alloc()). --- include/jemalloc/internal/arena.h | 3 +- include/jemalloc/internal/jemalloc_internal.h.in | 65 +++++++--------- include/jemalloc/internal/private_namespace.h | 1 - src/arena.c | 52 ++++++------- src/chunk_dss.c | 94 ++++++++++++++---------- src/chunk_mmap.c | 50 +++++-------- src/ckh.c | 6 +- src/jemalloc.c | 22 +++--- 8 files changed, 138 insertions(+), 155 deletions(-) diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index f52fac4..3790818 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -407,8 +407,7 @@ void arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, void arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info); void *arena_malloc_small(arena_t *arena, size_t size, bool zero); void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, - size_t alignment, bool zero); +void *arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero); size_t arena_salloc(const void *ptr, bool demote); void arena_prof_promoted(const void *ptr, size_t size); void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index a16e5e2..57895fb 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -293,6 +293,18 @@ static const bool config_ivsalloc = #define PAGE_CEILING(s) \ (((s) + PAGE_MASK) & ~PAGE_MASK) +/* Return the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2BASE(a, alignment) \ + ((void *)((uintptr_t)(a) & (-(alignment)))) + +/* Return the offset between a and the nearest aligned address at or below a. */ +#define ALIGNMENT_ADDR2OFFSET(a, alignment) \ + ((size_t)((uintptr_t)(a) & (alignment - 1))) + +/* Return the smallest alignment multiple that is >= s. */ +#define ALIGNMENT_CEILING(s, alignment) \ + (((s) + (alignment - 1)) & (-(alignment))) + #ifdef JEMALLOC_VALGRIND /* * The JEMALLOC_VALGRIND_*() macros must be macros rather than functions @@ -499,7 +511,7 @@ void jemalloc_postfork_child(void); malloc_tsd_protos(JEMALLOC_ATTR(unused), arenas, arena_t *) size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); +size_t sa2u(size_t size, size_t alignment); arena_t *choose_arena(arena_t *arena); #endif @@ -531,10 +543,12 @@ s2u(size_t size) * specified size and alignment. */ JEMALLOC_INLINE size_t -sa2u(size_t size, size_t alignment, size_t *run_size_p) +sa2u(size_t size, size_t alignment) { size_t usize; + assert(alignment != 0 && ((alignment - 1) & alignment) == 0); + /* * Round size up to the nearest multiple of alignment. * @@ -549,7 +563,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) * 144 | 10100000 | 32 * 192 | 11000000 | 64 */ - usize = (size + (alignment - 1)) & (-alignment); + usize = ALIGNMENT_CEILING(size, alignment); /* * (usize < size) protects against the combination of maximal * alignment and size greater than maximal alignment. @@ -592,24 +606,10 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) /* * Calculate the size of the over-size run that arena_palloc() * would need to allocate in order to guarantee the alignment. + * If the run wouldn't fit within a chunk, round up to a huge + * allocation size. */ - if (usize >= alignment) - run_size = usize + alignment - PAGE; - else { - /* - * It is possible that (alignment << 1) will cause - * overflow, but it doesn't matter because we also - * subtract PAGE, which in the case of overflow leaves - * us with a very large run_size. That causes the - * first conditional below to fail, which means that - * the bogus run_size value never gets used for - * anything important. - */ - run_size = (alignment << 1) - PAGE; - } - if (run_size_p != NULL) - *run_size_p = run_size; - + run_size = usize + alignment - PAGE; if (run_size <= arena_maxclass) return (PAGE_CEILING(usize)); return (CHUNK_CEILING(usize)); @@ -685,32 +685,21 @@ ipalloc(size_t usize, size_t alignment, bool zero) void *ret; assert(usize != 0); - assert(usize == sa2u(usize, alignment, NULL)); + assert(usize == sa2u(usize, alignment)); if (usize <= arena_maxclass && alignment <= PAGE) ret = arena_malloc(NULL, usize, zero, true); else { - size_t run_size JEMALLOC_CC_SILENCE_INIT(0); - - /* - * Ideally we would only ever call sa2u() once per aligned - * allocation request, and the caller of this function has - * already done so once. However, it's rather burdensome to - * require every caller to pass in run_size, especially given - * that it's only relevant to large allocations. Therefore, - * just call it again here in order to get run_size. - */ - sa2u(usize, alignment, &run_size); - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(NULL), usize, run_size, - alignment, zero); + if (usize <= arena_maxclass) { + ret = arena_palloc(choose_arena(NULL), usize, alignment, + zero); } else if (alignment <= chunksize) ret = huge_malloc(usize, zero); else ret = huge_palloc(usize, alignment, zero); } - assert(((uintptr_t)ret & (alignment - 1)) == 0); + assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret); return (ret); } @@ -818,7 +807,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, */ if (no_move) return (NULL); - usize = sa2u(size + extra, alignment, NULL); + usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -826,7 +815,7 @@ iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, if (extra == 0) return (NULL); /* Try again, without extra this time. */ - usize = sa2u(size, alignment, NULL); + usize = sa2u(size, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); diff --git a/include/jemalloc/internal/private_namespace.h b/include/jemalloc/internal/private_namespace.h index a962192..fca6595 100644 --- a/include/jemalloc/internal/private_namespace.h +++ b/include/jemalloc/internal/private_namespace.h @@ -55,7 +55,6 @@ #define chunk_alloc JEMALLOC_N(chunk_alloc) #define chunk_alloc_dss JEMALLOC_N(chunk_alloc_dss) #define chunk_alloc_mmap JEMALLOC_N(chunk_alloc_mmap) -#define chunk_alloc_mmap_noreserve JEMALLOC_N(chunk_alloc_mmap_noreserve) #define chunk_boot JEMALLOC_N(chunk_boot) #define chunk_dealloc JEMALLOC_N(chunk_dealloc) #define chunk_dealloc_dss JEMALLOC_N(chunk_dealloc_dss) diff --git a/src/arena.c b/src/arena.c index 1d4f61e..1a108db 100644 --- a/src/arena.c +++ b/src/arena.c @@ -1418,48 +1418,38 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) /* Only handles large allocations that require more than page alignment. */ void * -arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, - bool zero) +arena_palloc(arena_t *arena, size_t size, size_t alignment, bool zero) { void *ret; - size_t offset; + size_t alloc_size, leadsize, trailsize; + arena_run_t *run; arena_chunk_t *chunk; assert((size & PAGE_MASK) == 0); alignment = PAGE_CEILING(alignment); + alloc_size = size + alignment - PAGE; malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); - if (ret == NULL) { + run = arena_run_alloc(arena, alloc_size, true, zero); + if (run == NULL) { malloc_mutex_unlock(&arena->lock); return (NULL); } + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - - offset = (uintptr_t)ret & (alignment - 1); - assert((offset & PAGE_MASK) == 0); - assert(offset < alloc_size); - if (offset == 0) - arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false); - else { - size_t leadsize, trailsize; - - leadsize = alignment - offset; - if (leadsize > 0) { - arena_run_trim_head(arena, chunk, ret, alloc_size, - alloc_size - leadsize); - ret = (void *)((uintptr_t)ret + leadsize); - } - - trailsize = alloc_size - leadsize - size; - if (trailsize != 0) { - /* Trim trailing space. */ - assert(trailsize < alloc_size); - arena_run_trim_tail(arena, chunk, ret, size + trailsize, - size, false); - } + leadsize = ALIGNMENT_CEILING((uintptr_t)run, alignment) - + (uintptr_t)run; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)run + leadsize); + if (leadsize != 0) { + arena_run_trim_head(arena, chunk, run, alloc_size, alloc_size - + leadsize); + } + if (trailsize != 0) { + arena_run_trim_tail(arena, chunk, ret, size + trailsize, size, + false); } if (config_stats) { @@ -1950,7 +1940,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, * copying. */ if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment, NULL); + size_t usize = sa2u(size + extra, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); @@ -1962,7 +1952,7 @@ arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, return (NULL); /* Try again, this time without extra. */ if (alignment != 0) { - size_t usize = sa2u(size, alignment, NULL); + size_t usize = sa2u(size, alignment); if (usize == 0) return (NULL); ret = ipalloc(usize, alignment, zero); diff --git a/src/chunk_dss.c b/src/chunk_dss.c index ccd86b9..7c03409 100644 --- a/src/chunk_dss.c +++ b/src/chunk_dss.c @@ -36,51 +36,71 @@ static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); static void * chunk_recycle_dss(size_t size, size_t alignment, bool *zero) { - extent_node_t *node, key; + void *ret; + extent_node_t *node; + extent_node_t key; + size_t alloc_size, leadsize, trailsize; cassert(config_dss); + alloc_size = size + alignment - chunksize; + /* Beware size_t wrap-around. */ + if (alloc_size < size) + return (NULL); key.addr = NULL; - key.size = size + alignment - chunksize; + key.size = alloc_size; malloc_mutex_lock(&dss_mtx); node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); - if (node != NULL) { - size_t offset = (size_t)((uintptr_t)(node->addr) & (alignment - - 1)); - void *ret; - if (offset > 0) - offset = alignment - offset; - ret = (void *)((uintptr_t)(node->addr) + offset); - - /* Remove node from the tree. */ - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - if (offset > 0) { - /* Insert the leading space as a smaller chunk. */ - node->size = offset; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - } - if (alignment - chunksize > offset) { - if (offset > 0) - node = base_node_alloc(); - /* Insert the trailing space as a smaller chunk. */ - node->addr = (void *)((uintptr_t)(ret) + size); - node->size = alignment - chunksize - offset; - extent_tree_szad_insert(&dss_chunks_szad, node); - extent_tree_ad_insert(&dss_chunks_ad, node); - } else if (offset == 0) - base_node_dealloc(node); - + if (node == NULL) { malloc_mutex_unlock(&dss_mtx); - - if (*zero) - memset(ret, 0, size); - return (ret); + return (NULL); + } + leadsize = ALIGNMENT_CEILING((uintptr_t)node->addr, alignment) - + (uintptr_t)node->addr; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)node->addr + leadsize); + /* Remove node from the tree. */ + extent_tree_szad_remove(&dss_chunks_szad, node); + extent_tree_ad_remove(&dss_chunks_ad, node); + if (leadsize != 0) { + /* Insert the leading space as a smaller chunk. */ + node->size = leadsize; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + node = NULL; + } + if (trailsize != 0) { + /* Insert the trailing space as a smaller chunk. */ + if (node == NULL) { + /* + * An additional node is required, but + * base_node_alloc() can cause a new base chunk to be + * allocated. Drop dss_mtx in order to avoid deadlock, + * and if node allocation fails, deallocate the result + * before returning an error. + */ + malloc_mutex_unlock(&dss_mtx); + node = base_node_alloc(); + if (node == NULL) { + chunk_dealloc_dss(ret, size); + return (NULL); + } + malloc_mutex_lock(&dss_mtx); + } + node->addr = (void *)((uintptr_t)(ret) + size); + node->size = trailsize; + extent_tree_szad_insert(&dss_chunks_szad, node); + extent_tree_ad_insert(&dss_chunks_ad, node); + node = NULL; } malloc_mutex_unlock(&dss_mtx); - return (NULL); + if (node != NULL) + base_node_dealloc(node); + if (*zero) + memset(ret, 0, size); + return (ret); } void * @@ -129,8 +149,8 @@ chunk_alloc_dss(size_t size, size_t alignment, bool *zero) * recycled for later use. */ cpad = (void *)((uintptr_t)dss_max + gap_size); - ret = (void *)(((uintptr_t)dss_max + (alignment - 1)) & - ~(alignment - 1)); + ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max, + alignment); cpad_size = (uintptr_t)ret - (uintptr_t)cpad; dss_next = (void *)((uintptr_t)ret + size); if ((uintptr_t)ret < (uintptr_t)dss_max || diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c index 37dad20..6cbf094 100644 --- a/src/chunk_mmap.c +++ b/src/chunk_mmap.c @@ -19,7 +19,6 @@ static void *pages_map(void *addr, size_t size); static void pages_unmap(void *addr, size_t size); static void *chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned); -static void *chunk_alloc_mmap_internal(size_t size, size_t alignment); /******************************************************************************/ @@ -76,34 +75,28 @@ pages_unmap(void *addr, size_t size) static void * chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) { - void *ret; - size_t offset; + void *ret, *pages; + size_t alloc_size, leadsize, trailsize; + alloc_size = size + alignment - PAGE; /* Beware size_t wrap-around. */ - if (size + chunksize <= size) + if (alloc_size < size) return (NULL); - - ret = pages_map(NULL, size + alignment); - if (ret == NULL) + pages = pages_map(NULL, alloc_size); + if (pages == NULL) return (NULL); - - /* Clean up unneeded leading/trailing space. */ - offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); - if (offset != 0) { + leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) - + (uintptr_t)pages; + assert(alloc_size >= leadsize + size); + trailsize = alloc_size - leadsize - size; + ret = (void *)((uintptr_t)pages + leadsize); + if (leadsize != 0) { /* Note that mmap() returned an unaligned mapping. */ unaligned = true; - - /* Leading space. */ - pages_unmap(ret, alignment - offset); - - ret = (void *)((uintptr_t)ret + (alignment - offset)); - - /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), offset); - } else { - /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), alignment); + pages_unmap(pages, leadsize); } + if (trailsize != 0) + pages_unmap((void *)((uintptr_t)ret + size), trailsize); /* * If mmap() returned an aligned mapping, reset mmap_unaligned so that @@ -118,8 +111,8 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool unaligned) return (ret); } -static void * -chunk_alloc_mmap_internal(size_t size, size_t alignment) +void * +chunk_alloc_mmap(size_t size, size_t alignment) { void *ret; @@ -158,7 +151,7 @@ chunk_alloc_mmap_internal(size_t size, size_t alignment) if (ret == NULL) return (NULL); - offset = (size_t)((uintptr_t)(ret) & (alignment - 1)); + offset = ALIGNMENT_ADDR2OFFSET(ret, alignment); if (offset != 0) { bool mu = true; mmap_unaligned_tsd_set(&mu); @@ -185,13 +178,6 @@ chunk_alloc_mmap_internal(size_t size, size_t alignment) return (ret); } -void * -chunk_alloc_mmap(size_t size, size_t alignment) -{ - - return (chunk_alloc_mmap_internal(size, alignment)); -} - void chunk_dealloc_mmap(void *chunk, size_t size) { diff --git a/src/ckh.c b/src/ckh.c index 169fc0d..742a950 100644 --- a/src/ckh.c +++ b/src/ckh.c @@ -264,7 +264,7 @@ ckh_grow(ckh_t *ckh) size_t usize; lg_curcells++; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) { ret = true; goto label_return; @@ -309,7 +309,7 @@ ckh_shrink(ckh_t *ckh) */ lg_prevbuckets = ckh->lg_curbuckets; lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE); if (usize == 0) return; tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); @@ -382,7 +382,7 @@ ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) ckh->hash = hash; ckh->keycomp = keycomp; - usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); + usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE); if (usize == 0) { ret = true; goto label_return; diff --git a/src/jemalloc.c b/src/jemalloc.c index 237dd58..8e10c55 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -862,7 +862,7 @@ imemalign(void **memptr, size_t alignment, size_t size, goto label_return; } - usize = sa2u(size, alignment, NULL); + usize = sa2u(size, alignment); if (usize == 0) { result = NULL; ret = ENOMEM; @@ -878,9 +878,9 @@ imemalign(void **memptr, size_t alignment, size_t size, if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= SMALL_MAXCLASS) { assert(sa2u(SMALL_MAXCLASS+1, - alignment, NULL) != 0); + alignment) != 0); result = ipalloc(sa2u(SMALL_MAXCLASS+1, - alignment, NULL), alignment, false); + alignment), alignment, false); if (result != NULL) { arena_prof_promoted(result, usize); @@ -1343,8 +1343,8 @@ JEMALLOC_INLINE void * iallocm(size_t usize, size_t alignment, bool zero) { - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, - NULL))); + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, + alignment))); if (alignment != 0) return (ipalloc(usize, alignment, zero)); @@ -1372,7 +1372,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) if (malloc_init()) goto label_oom; - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) goto label_oom; @@ -1384,7 +1384,7 @@ je_allocm(void **ptr, size_t *rsize, size_t size, int flags) SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, - alignment, NULL); + alignment); assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); if (p == NULL) @@ -1454,7 +1454,7 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) * decide whether to sample. */ size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment, NULL); + sa2u(size+extra, alignment); prof_ctx_t *old_ctx = prof_ctx_get(p); old_size = isalloc(p, true); if (config_valgrind && opt_valgrind) @@ -1466,8 +1466,8 @@ je_rallocm(void **ptr, size_t *rsize, size_t size, size_t extra, int flags) * Use minimum usize to determine whether promotion may happen. */ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U - && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= SMALL_MAXCLASS) { + && ((alignment == 0) ? s2u(size) : sa2u(size, alignment)) + <= SMALL_MAXCLASS) { q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); @@ -1596,7 +1596,7 @@ je_nallocm(size_t *rsize, size_t size, int flags) if (malloc_init()) return (ALLOCM_ERR_OOM); - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, NULL); + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment); if (usize == 0) return (ALLOCM_ERR_OOM); -- cgit v0.12