summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Evans <je@facebook.com>2010-03-17 23:27:39 (GMT)
committerJason Evans <je@facebook.com>2010-03-17 23:27:39 (GMT)
commitdafde14e08ddfda747aabb2045b350848b601b2e (patch)
treeabea413fa035c70d61e180585c85074d11be341a
parente69bee01de62b56d3e585042d341743239568043 (diff)
downloadjemalloc-dafde14e08ddfda747aabb2045b350848b601b2e.zip
jemalloc-dafde14e08ddfda747aabb2045b350848b601b2e.tar.gz
jemalloc-dafde14e08ddfda747aabb2045b350848b601b2e.tar.bz2
Remove medium size classes.
Remove medium size classes, because concurrent dirty page purging is no longer capable of purging inactive dirty pages inside active runs (due to recent arena/bin locking changes). Enhance tcache to support caching large objects, so that the same range of size classes is still cached, despite the removal of medium size class support.
-rw-r--r--jemalloc/INSTALL6
-rw-r--r--jemalloc/doc/jemalloc.3.in131
-rw-r--r--jemalloc/include/jemalloc/internal/arena.h63
-rw-r--r--jemalloc/include/jemalloc/internal/ctl.h7
-rw-r--r--jemalloc/include/jemalloc/internal/stats.h15
-rw-r--r--jemalloc/include/jemalloc/internal/tcache.h181
-rw-r--r--jemalloc/include/jemalloc/jemalloc_defs.h.in6
-rw-r--r--jemalloc/src/arena.c218
-rw-r--r--jemalloc/src/ctl.c89
-rw-r--r--jemalloc/src/jemalloc.c70
-rw-r--r--jemalloc/src/stats.c68
-rw-r--r--jemalloc/src/tcache.c143
12 files changed, 514 insertions, 483 deletions
diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL
index 6157ae5..7ce7e79 100644
--- a/jemalloc/INSTALL
+++ b/jemalloc/INSTALL
@@ -71,9 +71,9 @@ any of the following arguments (not a definitive list) to 'configure':
are 4-byte-aligned.
--disable-tcache
- Disable thread-specific caches for small and medium objects. Objects are
- cached and released in bulk, thus reducing the total number of mutex
- operations. Use the 'H' and 'G' options to control thread-specific caching.
+ Disable thread-specific caches for small objects. Objects are cached and
+ released in bulk, thus reducing the total number of mutex operations. Use
+ the 'H', 'G', and 'M' options to control thread-specific caching.
--enable-swap
Enable mmap()ed swap file support. When this feature is built in, it is
diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in
index 321e95f..1ea93bf 100644
--- a/jemalloc/doc/jemalloc.3.in
+++ b/jemalloc/doc/jemalloc.3.in
@@ -38,7 +38,7 @@
.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93
.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $
.\"
-.Dd March 14, 2010
+.Dd March 17, 2010
.Dt JEMALLOC 3
.Os
.Sh NAME
@@ -378,13 +378,15 @@ will disable dirty page purging.
@roff_tcache@.It H
@roff_tcache@Enable/disable thread-specific caching.
@roff_tcache@When there are multiple threads, each thread uses a
-@roff_tcache@thread-specific cache for small and medium objects.
+@roff_tcache@thread-specific cache for objects up to a certain size.
@roff_tcache@Thread-specific caching allows many allocations to be satisfied
@roff_tcache@without performing any thread synchronization, at the cost of
@roff_tcache@increased memory use.
@roff_tcache@See the
@roff_tcache@.Dq G
-@roff_tcache@option for related tuning information.
+@roff_tcache@and
+@roff_tcache@.Dq M
+@roff_tcache@options for related tuning information.
@roff_tcache@This option is enabled by default.
@roff_prof@.It I
@roff_prof@Double/halve the average interval between memory profile dumps, as
@@ -426,16 +428,15 @@ The default chunk size is 4 MiB.
@roff_prof@See the
@roff_prof@.Dq F option for information on analyzing heap profile output.
@roff_prof@This option is disabled by default.
-.It M
-Double/halve the size of the maximum medium size class.
-The valid range is from one page to one half chunk.
-The default value is 32 KiB.
+@roff_tcache@.It M
+@roff_tcache@Double/halve the maximum size class to cache.
+@roff_tcache@At a minimum, all small size classes are cached, and at a maximum
+@roff_tcache@all large size classes are cached.
+@roff_tcache@The default maximum is 32 KiB.
.It N
Double/halve the number of arenas.
-The default number of arenas is
-@roff_tcache@two
-@roff_no_tcache@four
-times the number of CPUs, or one if there is a single CPU.
+The default number of arenas is four times the number of CPUs, or one if there
+is a single CPU.
@roff_swap@.It O
@roff_swap@Over-commit memory as a side effect of using anonymous
@roff_swap@.Xr mmap 2
@@ -550,9 +551,9 @@ However, it may make sense to reduce the number of arenas if an application
does not make much use of the allocation functions.
.Pp
@roff_tcache@In addition to multiple arenas, this allocator supports
-@roff_tcache@thread-specific caching for small and medium objects, in order to
-@roff_tcache@make it possible to completely avoid synchronization for most small
-@roff_tcache@and medium allocation requests.
+@roff_tcache@thread-specific caching for small objects, in order to make it
+@roff_tcache@possible to completely avoid synchronization for most small
+@roff_tcache@allocation requests.
@roff_tcache@Such caching allows very fast allocation in the common case, but it
@roff_tcache@increases memory usage and fragmentation, since a bounded number of
@roff_tcache@objects can remain allocated in each thread cache.
@@ -563,27 +564,23 @@ Chunks are always aligned to multiples of the chunk size.
This alignment makes it possible to find metadata for user objects very
quickly.
.Pp
-User objects are broken into four categories according to size: small, medium,
-large, and huge.
+User objects are broken into three categories according to size: small, large,
+and huge.
Small objects are smaller than one page.
-Medium objects range from one page to an upper limit determined at run time (see
-the
-.Dq M
-option).
Large objects are smaller than the chunk size.
Huge objects are a multiple of the chunk size.
-Small, medium, and large objects are managed by arenas; huge objects are managed
+Small and large objects are managed by arenas; huge objects are managed
separately in a single data structure that is shared by all threads.
Huge objects are used by applications infrequently enough that this single
data structure is not a scalability issue.
.Pp
Each chunk that is managed by an arena tracks its contents as runs of
-contiguous pages (unused, backing a set of small or medium objects, or backing
-one large object).
+contiguous pages (unused, backing a set of small objects, or backing one large
+object).
The combination of chunk alignment and chunk page maps makes it possible to
determine all metadata regarding small and large allocations in constant time.
.Pp
-Small and medium objects are managed in groups by page runs.
+Small objects are managed in groups by page runs.
Each run maintains a bitmap that tracks which regions are in use.
@roff_tiny@Allocation requests that are no more than half the quantum (8 or 16,
@roff_tiny@depending on architecture) are rounded up to the nearest power of
@@ -603,13 +600,7 @@ Allocation requests that are more than the minimum subpage-multiple size class,
but no more than the maximum subpage-multiple size class are rounded up to the
nearest multiple of the subpage size (256).
Allocation requests that are more than the maximum subpage-multiple size class,
-but no more than the maximum medium size class (see the
-.Dq M
-option) are rounded up to the nearest medium size class; spacing is an
-automatically determined power of two and ranges from the subpage size to the
-page size.
-Allocation requests that are more than the maximum medium size class, but small
-enough to fit in an arena-managed chunk (see the
+but small enough to fit in an arena-managed chunk (see the
.Dq K
option), are rounded up to the nearest run size.
Allocation requests that are too large to fit in an arena-managed chunk are
@@ -838,13 +829,6 @@ See the
option.
.Ed
.\"-----------------------------------------------------------------------------
-.It Sy "opt.lg_medium_max (size_t) r-"
-.Bd -ragged -offset indent -compact
-See the
-.Dq M
-option.
-.Ed
-.\"-----------------------------------------------------------------------------
.It Sy "opt.lg_dirty_mult (ssize_t) r-"
.Bd -ragged -offset indent -compact
See the
@@ -900,11 +884,6 @@ Subpage size class interval.
Page size.
.Ed
.\"-----------------------------------------------------------------------------
-.It Sy "arenas.medium (size_t) r-"
-.Bd -ragged -offset indent -compact
-Medium size class interval.
-.Ed
-.\"-----------------------------------------------------------------------------
.It Sy "arenas.chunksize (size_t) r-"
.Bd -ragged -offset indent -compact
Chunk size.
@@ -952,15 +931,10 @@ Minimum subpage-spaced size class.
Maximum subpage-spaced size class.
.Ed
.\"-----------------------------------------------------------------------------
-.It Sy "arenas.medium_min (size_t) r-"
-.Bd -ragged -offset indent -compact
-Minimum medium-spaced size class.
-.Ed
-.\"-----------------------------------------------------------------------------
-.It Sy "arenas.medium_max (size_t) r-"
-.Bd -ragged -offset indent -compact
-Maximum medium-spaced size class.
-.Ed
+@roff_tcache@.It Sy "arenas.tcache_max (size_t) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Maximum thread-cached size class.
+@roff_tcache@.Ed
.\"-----------------------------------------------------------------------------
.It Sy "arenas.ntbins (unsigned) r-"
.Bd -ragged -offset indent -compact
@@ -982,16 +956,16 @@ Number of cacheline-spaced bin size classes.
Number of subpage-spaced bin size classes.
.Ed
.\"-----------------------------------------------------------------------------
-.It Sy "arenas.nmbins (unsigned) r-"
-.Bd -ragged -offset indent -compact
-Number of medium-spaced bin size classes.
-.Ed
-.\"-----------------------------------------------------------------------------
.It Sy "arenas.nbins (unsigned) r-"
.Bd -ragged -offset indent -compact
Total number of bin size classes.
.Ed
.\"-----------------------------------------------------------------------------
+@roff_tcache@.It Sy "arenas.nhbins (unsigned) r-"
+@roff_tcache@.Bd -ragged -offset indent -compact
+@roff_tcache@Total number of thread cache bin size classes.
+@roff_tcache@.Ed
+.\"-----------------------------------------------------------------------------
.It Sy "arenas.bin.<i>.size (size_t) r-"
.Bd -ragged -offset indent -compact
Maximum size supported by size class.
@@ -1147,26 +1121,6 @@ has not been called.
@roff_stats@Cumulative number of small allocation requests.
@roff_stats@.Ed
.\"-----------------------------------------------------------------------------
-@roff_stats@.It Sy "stats.arenas.<i>.medium.allocated (size_t) r-"
-@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Number of bytes currently allocated by medium objects.
-@roff_stats@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_stats@.It Sy "stats.arenas.<i>.medium.nmalloc (uint64_t) r-"
-@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Cumulative number of allocation requests served by medium bins.
-@roff_stats@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_stats@.It Sy "stats.arenas.<i>.medium.ndalloc (uint64_t) r-"
-@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Cumulative number of medium objects returned to bins.
-@roff_stats@.Ed
-.\"-----------------------------------------------------------------------------
-@roff_stats@.It Sy "stats.arenas.<i>.medium.nrequests (uint64_t) r-"
-@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Cumulative number of medium allocation requests.
-@roff_stats@.Ed
-.\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "stats.arenas.<i>.large.allocated (size_t) r-"
@roff_stats@.Bd -ragged -offset indent -compact
@roff_stats@Number of bytes currently allocated by large objects.
@@ -1174,12 +1128,19 @@ has not been called.
.\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "stats.arenas.<i>.large.nmalloc (uint64_t) r-"
@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Cumulative number of large allocation requests.
+@roff_stats@Cumulative number of large allocation requests served directly by
+@roff_stats@the arena.
@roff_stats@.Ed
.\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "stats.arenas.<i>.large.ndalloc (uint64_t) r-"
@roff_stats@.Bd -ragged -offset indent -compact
-@roff_stats@Cumulative number of large deallocation requests.
+@roff_stats@Cumulative number of large deallocation requests served directly by
+@roff_stats@the arena.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "stats.arenas.<i>.large.nrequests (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Cumulative number of large allocation requests.
@roff_stats@.Ed
.\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "stats.arenas.<i>.bins.<j>.allocated (size_t) r-"
@@ -1233,6 +1194,18 @@ has not been called.
@roff_stats@Current number of runs.
@roff_stats@.Ed
.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "stats.arenas.<i>.lruns.<j>.nmalloc (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Cumulative number of allocation requests for this size class served
+@roff_stats@directly by the arena.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
+@roff_stats@.It Sy "stats.arenas.<i>.lruns.<j>.ndalloc (uint64_t) r-"
+@roff_stats@.Bd -ragged -offset indent -compact
+@roff_stats@Cumulative number of deallocation requests for this size class
+@roff_stats@served directly by the arena.
+@roff_stats@.Ed
+.\"-----------------------------------------------------------------------------
@roff_stats@.It Sy "stats.arenas.<i>.lruns.<j>.nrequests (uint64_t) r-"
@roff_stats@.Bd -ragged -offset indent -compact
@roff_stats@Cumulative number of allocation requests for this size class.
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h
index c58f536..fc66380 100644
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@@ -36,23 +36,6 @@
#define LG_CSPACE_MAX_DEFAULT 9
/*
- * Maximum medium size class. This must not be more than 1/4 of a chunk
- * (LG_MEDIUM_MAX_DEFAULT <= LG_CHUNK_DEFAULT - 2).
- */
-#define LG_MEDIUM_MAX_DEFAULT 15
-
-/* Return the smallest medium size class that is >= s. */
-#define MEDIUM_CEILING(s) \
- (((s) + mspace_mask) & ~mspace_mask)
-
-/*
- * Soft limit on the number of medium size classes. Spacing between medium
- * size classes never exceeds pagesize, which can force more than NBINS_MAX
- * medium size classes.
- */
-#define NMBINS_MAX 16
-
-/*
* RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
* as small as possible such that this setting is still honored, without
* violating other constraints. The goal is to make runs as small as possible
@@ -126,7 +109,7 @@ struct arena_chunk_map_s {
*
* ? : Unallocated: Run address for first/last pages, unset for internal
* pages.
- * Small/medium: Don't care.
+ * Small: Don't care.
* Large: Run size for first page, unset for trailing pages.
* - : Unused.
* d : dirty?
@@ -147,7 +130,7 @@ struct arena_chunk_map_s {
* xxxxxxxx xxxxxxxx xxxx---- ----d---
* ssssssss ssssssss ssss---- -----z--
*
- * Small/medium:
+ * Small:
* pppppppp pppppppp pppp---- -------a
* pppppppp pppppppp pppp---- -------a
* pppppppp pppppppp pppp---- -------a
@@ -386,7 +369,6 @@ struct arena_s {
extern size_t opt_lg_qspace_max;
extern size_t opt_lg_cspace_max;
-extern size_t opt_lg_medium_max;
extern ssize_t opt_lg_dirty_mult;
extern uint8_t const *small_size2bin;
@@ -399,9 +381,7 @@ extern uint8_t const *small_size2bin;
extern unsigned nqbins; /* Number of quantum-spaced bins. */
extern unsigned ncbins; /* Number of cacheline-spaced bins. */
extern unsigned nsbins; /* Number of subpage-spaced bins. */
-extern unsigned nmbins; /* Number of medium bins. */
extern unsigned nbins;
-extern unsigned mbin0; /* mbin offset (nbins - nmbins). */
#ifdef JEMALLOC_TINY
# define tspace_max ((size_t)(QUANTUM >> 1))
#endif
@@ -412,18 +392,12 @@ extern size_t cspace_max;
extern size_t sspace_min;
extern size_t sspace_max;
#define small_maxclass sspace_max
-#define medium_min PAGE_SIZE
-extern size_t medium_max;
-#define bin_maxclass medium_max
-/* Spacing between medium size classes. */
-extern size_t lg_mspace;
-extern size_t mspace_mask;
-
-#define nlclasses ((chunksize - PAGE_SIZE) >> PAGE_SHIFT)
+#define nlclasses (chunk_npages - arena_chunk_header_npages)
#ifdef JEMALLOC_TCACHE
-void arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind
+void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+ size_t binind
# ifdef JEMALLOC_PROF
, uint64_t prof_accumbytes
# endif
@@ -433,7 +407,7 @@ void arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind
void arena_prof_accum(arena_t *arena, uint64_t accumbytes);
#endif
void *arena_malloc_small(arena_t *arena, size_t size, bool zero);
-void *arena_malloc_medium(arena_t *arena, size_t size, bool zero);
+void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
void *arena_malloc(size_t size, bool zero);
void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
size_t alloc_size);
@@ -484,7 +458,7 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
tcache_t *tcache;
if ((tcache = tcache_get()) != NULL)
- tcache_dalloc(tcache, ptr);
+ tcache_dalloc_small(tcache, ptr);
else {
#endif
arena_run_t *run;
@@ -506,8 +480,31 @@ arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
}
#endif
} else {
+#ifdef JEMALLOC_TCACHE
+ size_t size = mapelm->bits & ~PAGE_MASK;
+
assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ if (size <= tcache_maxclass) {
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ tcache_dalloc_large(tcache, ptr, size);
+ else {
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+ }
+ } else {
+ malloc_mutex_lock(&arena->lock);
+ arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+ }
+#else
+ assert(((uintptr_t)ptr & PAGE_MASK) == 0);
+ malloc_mutex_lock(&arena->lock);
arena_dalloc_large(arena, chunk, ptr);
+ malloc_mutex_unlock(&arena->lock);
+#endif
}
}
#endif
diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h
index 9a39e14..7bbf21e 100644
--- a/jemalloc/include/jemalloc/internal/ctl.h
+++ b/jemalloc/include/jemalloc/internal/ctl.h
@@ -34,17 +34,12 @@ struct ctl_arena_stats_s {
#ifdef JEMALLOC_STATS
arena_stats_t astats;
- /* Aggregate stats for small/medium size classes, based on bin stats. */
+ /* Aggregate stats for small size classes, based on bin stats. */
size_t allocated_small;
uint64_t nmalloc_small;
uint64_t ndalloc_small;
uint64_t nrequests_small;
- size_t allocated_medium;
- uint64_t nmalloc_medium;
- uint64_t ndalloc_medium;
- uint64_t nrequests_medium;
-
malloc_bin_stats_t *bstats; /* nbins elements. */
malloc_large_stats_t *lstats; /* nlclasses elements. */
#endif
diff --git a/jemalloc/include/jemalloc/internal/stats.h b/jemalloc/include/jemalloc/internal/stats.h
index 47701e6..cbf035f 100644
--- a/jemalloc/include/jemalloc/internal/stats.h
+++ b/jemalloc/include/jemalloc/internal/stats.h
@@ -35,6 +35,7 @@ struct malloc_bin_stats_s {
* cached by tcache.
*/
size_t allocated;
+
/*
* Total number of allocation/deallocation requests served directly by
* the bin. Note that tcache may allocate an object, then recycle it
@@ -77,7 +78,18 @@ struct malloc_bin_stats_s {
struct malloc_large_stats_s {
/*
- * Number of allocation requests that corresponded to this size class.
+ * Total number of allocation/deallocation requests served directly by
+ * the arena. Note that tcache may allocate an object, then recycle it
+ * many times, resulting many increments to nrequests, but only one
+ * each to nmalloc and ndalloc.
+ */
+ uint64_t nmalloc;
+ uint64_t ndalloc;
+
+ /*
+ * Number of allocation requests that correspond to this size class.
+ * This includes requests served by tcache, though tcache only
+ * periodically merges into this counter.
*/
uint64_t nrequests;
@@ -105,6 +117,7 @@ struct arena_stats_s {
size_t allocated_large;
uint64_t nmalloc_large;
uint64_t ndalloc_large;
+ uint64_t nrequests_large;
/*
* One element for each possible size class, including sizes that
diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h
index afad709..96e9cf1 100644
--- a/jemalloc/include/jemalloc/internal/tcache.h
+++ b/jemalloc/include/jemalloc/internal/tcache.h
@@ -6,20 +6,27 @@ typedef struct tcache_bin_s tcache_bin_t;
typedef struct tcache_s tcache_t;
/*
- * Absolute maximum number of cache slots for each bin in the thread cache.
- * This is an additional constraint beyond that imposed as: twice the number of
- * regions per run for this size class.
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache. This is an additional constraint beyond that imposed as: twice the
+ * number of regions per run for this size class.
*
* This constant must be an even number.
*/
-#define TCACHE_NSLOTS_MAX 200
- /*
- * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
- * events between full GC sweeps (-1: disabled). Integer rounding may cause
- * the actual number to be slightly higher, since GC is performed
- * incrementally.
- */
-#define LG_TCACHE_GC_SWEEP_DEFAULT 13
+#define TCACHE_NSLOTS_SMALL_MAX 200
+
+/* Number of cache slots for large size classes. */
+#define TCACHE_NSLOTS_LARGE 20
+
+/* (1U << opt_lg_tcache_maxclass) is used to compute tcache_maxclass. */
+#define LG_TCACHE_MAXCLASS_DEFAULT 15
+
+/*
+ * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation
+ * events between full GC sweeps (-1: disabled). Integer rounding may cause
+ * the actual number to be slightly higher, since GC is performed
+ * incrementally.
+ */
+#define LG_TCACHE_GC_SWEEP_DEFAULT 13
#endif /* JEMALLOC_H_TYPES */
/******************************************************************************/
@@ -54,22 +61,38 @@ struct tcache_s {
#ifdef JEMALLOC_H_EXTERNS
extern bool opt_tcache;
+extern ssize_t opt_lg_tcache_maxclass;
extern ssize_t opt_lg_tcache_gc_sweep;
/* Map of thread-specific caches. */
extern __thread tcache_t *tcache_tls
JEMALLOC_ATTR(tls_model("initial-exec"));
+/*
+ * Number of tcache bins. There are nbins small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern size_t nhbins;
+
+/* Maximum cached size class. */
+extern size_t tcache_maxclass;
+
/* Number of tcache allocation/deallocation events between incremental GCs. */
extern unsigned tcache_gc_incr;
-void tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
+void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache_t *tcache
+#endif
+ );
+void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache_t *tcache
#endif
);
tcache_t *tcache_create(arena_t *arena);
-void *tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind);
+void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin,
+ size_t binind);
void tcache_destroy(tcache_t *tcache);
#ifdef JEMALLOC_STATS
void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
@@ -83,9 +106,11 @@ void tcache_boot(void);
#ifndef JEMALLOC_ENABLE_INLINE
void tcache_event(tcache_t *tcache);
tcache_t *tcache_get(void);
-void *tcache_bin_alloc(tcache_bin_t *tbin);
-void *tcache_alloc(tcache_t *tcache, size_t size, bool zero);
-void tcache_dalloc(tcache_t *tcache, void *ptr);
+void *tcache_alloc_easy(tcache_bin_t *tbin);
+void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero);
+void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero);
+void tcache_dalloc_small(tcache_t *tcache, void *ptr);
+void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size);
#endif
#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
@@ -128,25 +153,36 @@ tcache_event(tcache_t *tcache)
* Flush (ceiling) 3/4 of the objects below the low
* water mark.
*/
- tcache_bin_flush(tbin, binind, tbin->ncached -
- tbin->low_water + (tbin->low_water >> 2)
+ if (binind < nbins) {
+ tcache_bin_flush_small(tbin, binind,
+ tbin->ncached - tbin->low_water +
+ (tbin->low_water >> 2)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+ } else {
+ tcache_bin_flush_large(tbin, binind,
+ tbin->ncached - tbin->low_water +
+ (tbin->low_water >> 2)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
- , tcache
+ , tcache
#endif
- );
+ );
+ }
}
tbin->low_water = tbin->ncached;
tbin->high_water = tbin->ncached;
tcache->next_gc_bin++;
- if (tcache->next_gc_bin == nbins)
+ if (tcache->next_gc_bin == nhbins)
tcache->next_gc_bin = 0;
tcache->ev_cnt = 0;
}
}
JEMALLOC_INLINE void *
-tcache_bin_alloc(tcache_bin_t *tbin)
+tcache_alloc_easy(tcache_bin_t *tbin)
{
void *ret;
@@ -161,23 +197,18 @@ tcache_bin_alloc(tcache_bin_t *tbin)
}
JEMALLOC_INLINE void *
-tcache_alloc(tcache_t *tcache, size_t size, bool zero)
+tcache_alloc_small(tcache_t *tcache, size_t size, bool zero)
{
void *ret;
size_t binind;
tcache_bin_t *tbin;
- if (size <= small_maxclass)
- binind = small_size2bin[size];
- else {
- binind = mbin0 + ((MEDIUM_CEILING(size) - medium_min) >>
- lg_mspace);
- }
+ binind = small_size2bin[size];
assert(binind < nbins);
tbin = &tcache->tbins[binind];
- ret = tcache_bin_alloc(tbin);
+ ret = tcache_alloc_easy(tbin);
if (ret == NULL) {
- ret = tcache_alloc_hard(tcache, tbin, binind);
+ ret = tcache_alloc_small_hard(tcache, tbin, binind);
if (ret == NULL)
return (NULL);
}
@@ -203,8 +234,52 @@ tcache_alloc(tcache_t *tcache, size_t size, bool zero)
return (ret);
}
+JEMALLOC_INLINE void *
+tcache_alloc_large(tcache_t *tcache, size_t size, bool zero)
+{
+ void *ret;
+ size_t binind;
+ tcache_bin_t *tbin;
+
+ size = PAGE_CEILING(size);
+ assert(size <= tcache_maxclass);
+ binind = nbins + (size >> PAGE_SHIFT) - 1;
+ assert(binind < nhbins);
+ tbin = &tcache->tbins[binind];
+ ret = tcache_alloc_easy(tbin);
+ if (ret == NULL) {
+ /*
+ * Only allocate one large object at a time, because it's quite
+ * expensive to create one and not use it.
+ */
+ ret = arena_malloc_large(tcache->arena, size, zero);
+ if (ret == NULL)
+ return (NULL);
+ } else {
+ if (zero == false) {
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ret, 0xa5, size);
+ else if (opt_zero)
+ memset(ret, 0, size);
+#endif
+ } else
+ memset(ret, 0, size);
+
+#ifdef JEMALLOC_STATS
+ tbin->tstats.nrequests++;
+#endif
+#ifdef JEMALLOC_PROF
+ tcache->prof_accumbytes += size;
+#endif
+ }
+
+ tcache_event(tcache);
+ return (ret);
+}
+
JEMALLOC_INLINE void
-tcache_dalloc(tcache_t *tcache, void *ptr)
+tcache_dalloc_small(tcache_t *tcache, void *ptr)
{
arena_t *arena;
arena_chunk_t *chunk;
@@ -234,7 +309,47 @@ tcache_dalloc(tcache_t *tcache, void *ptr)
tbin = &tcache->tbins[binind];
if (tbin->ncached == tbin->ncached_max) {
- tcache_bin_flush(tbin, binind, (tbin->ncached_max >> 1)
+ tcache_bin_flush_small(tbin, binind, (tbin->ncached_max >> 1)
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+ }
+ assert(tbin->ncached < tbin->ncached_max);
+ *(void **)ptr = tbin->avail;
+ tbin->avail = ptr;
+ tbin->ncached++;
+ if (tbin->ncached > tbin->high_water)
+ tbin->high_water = tbin->ncached;
+
+ tcache_event(tcache);
+}
+
+JEMALLOC_INLINE void
+tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size)
+{
+ arena_t *arena;
+ arena_chunk_t *chunk;
+ size_t pageind, binind;
+ tcache_bin_t *tbin;
+ arena_chunk_map_t *mapelm;
+
+ assert((size & PAGE_MASK) == 0);
+
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ arena = chunk->arena;
+ pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
+ mapelm = &chunk->map[pageind];
+ binind = nbins + (size >> PAGE_SHIFT) - 1;
+
+#ifdef JEMALLOC_FILL
+ if (opt_junk)
+ memset(ptr, 0x5a, bin->reg_size);
+#endif
+
+ tbin = &tcache->tbins[binind];
+ if (tbin->ncached == tbin->ncached_max) {
+ tcache_bin_flush_large(tbin, binind, (tbin->ncached_max >> 1)
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache
#endif
diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in
index 4b4ea7d..ddf960d 100644
--- a/jemalloc/include/jemalloc/jemalloc_defs.h.in
+++ b/jemalloc/include/jemalloc/jemalloc_defs.h.in
@@ -61,9 +61,9 @@
#undef JEMALLOC_TINY
/*
- * JEMALLOC_TCACHE enables a thread-specific caching layer for small and medium
- * objects. This makes it possible to allocate/deallocate objects without any
- * locking when the cache is in the steady state.
+ * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
+ * This makes it possible to allocate/deallocate objects without any locking
+ * when the cache is in the steady state.
*/
#undef JEMALLOC_TCACHE
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index a3d6654..435cf69 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -6,7 +6,6 @@
size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
-size_t opt_lg_medium_max = LG_MEDIUM_MAX_DEFAULT;
ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
uint8_t const *small_size2bin;
@@ -14,15 +13,12 @@ uint8_t const *small_size2bin;
unsigned nqbins;
unsigned ncbins;
unsigned nsbins;
-unsigned nmbins;
unsigned nbins;
-unsigned mbin0;
size_t qspace_max;
size_t cspace_min;
size_t cspace_max;
size_t sspace_min;
size_t sspace_max;
-size_t medium_max;
size_t lg_mspace;
size_t mspace_mask;
@@ -178,8 +174,6 @@ static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
-static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
-static bool arena_is_large(const void *ptr);
static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
arena_run_t *run, arena_bin_t *bin);
static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
@@ -1077,7 +1071,7 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
#ifdef JEMALLOC_TCACHE
void
-arena_tcache_fill(arena_t *arena, tcache_bin_t *tbin, size_t binind
+arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind
# ifdef JEMALLOC_PROF
, uint64_t prof_accumbytes
# endif
@@ -1239,7 +1233,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
size_t binind;
binind = small_size2bin[size];
- assert(binind < mbin0);
+ assert(binind < nbins);
bin = &arena->bins[binind];
size = bin->reg_size;
@@ -1282,58 +1276,6 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
}
void *
-arena_malloc_medium(arena_t *arena, size_t size, bool zero)
-{
- void *ret;
- arena_bin_t *bin;
- arena_run_t *run;
- size_t binind;
-
- size = MEDIUM_CEILING(size);
- binind = mbin0 + ((size - medium_min) >> lg_mspace);
- assert(binind < nbins);
- bin = &arena->bins[binind];
- assert(bin->reg_size == size);
-
- malloc_mutex_lock(&bin->lock);
- if ((run = bin->runcur) != NULL && run->nfree > 0)
- ret = arena_run_reg_alloc(run, bin);
- else
- ret = arena_bin_malloc_hard(arena, bin);
-
- if (ret == NULL) {
- malloc_mutex_unlock(&bin->lock);
- return (NULL);
- }
-
-#ifdef JEMALLOC_STATS
- bin->stats.allocated += size;
- bin->stats.nmalloc++;
- bin->stats.nrequests++;
-#endif
- malloc_mutex_unlock(&bin->lock);
-#ifdef JEMALLOC_PROF
- if (isthreaded == false) {
- malloc_mutex_lock(&arena->lock);
- arena_prof_accum(arena, size);
- malloc_mutex_unlock(&arena->lock);
- }
-#endif
-
- if (zero == false) {
-#ifdef JEMALLOC_FILL
- if (opt_junk)
- memset(ret, 0xa5, size);
- else if (opt_zero)
- memset(ret, 0, size);
-#endif
- } else
- memset(ret, 0, size);
-
- return (ret);
-}
-
-static void *
arena_malloc_large(arena_t *arena, size_t size, bool zero)
{
void *ret;
@@ -1348,7 +1290,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
}
#ifdef JEMALLOC_STATS
arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
@@ -1381,21 +1325,31 @@ arena_malloc(size_t size, bool zero)
assert(size != 0);
assert(QUANTUM_CEILING(size) <= arena_maxclass);
- if (size <= bin_maxclass) {
+ if (size <= small_maxclass) {
#ifdef JEMALLOC_TCACHE
tcache_t *tcache;
if ((tcache = tcache_get()) != NULL)
- return (tcache_alloc(tcache, size, zero));
+ return (tcache_alloc_small(tcache, size, zero));
+ else
+
#endif
- if (size <= small_maxclass)
return (arena_malloc_small(choose_arena(), size, zero));
- else {
- return (arena_malloc_medium(choose_arena(), size,
- zero));
- }
- } else
- return (arena_malloc_large(choose_arena(), size, zero));
+ } else {
+#ifdef JEMALLOC_TCACHE
+ if (size <= tcache_maxclass) {
+ tcache_t *tcache;
+
+ if ((tcache = tcache_get()) != NULL)
+ return (tcache_alloc_large(tcache, size, zero));
+ else {
+ return (arena_malloc_large(choose_arena(),
+ size, zero));
+ }
+ } else
+#endif
+ return (arena_malloc_large(choose_arena(), size, zero));
+ }
}
/* Only handles large allocations that require more than page alignment. */
@@ -1444,7 +1398,9 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
#ifdef JEMALLOC_STATS
arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
@@ -1464,22 +1420,6 @@ arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
return (ret);
}
-static bool
-arena_is_large(const void *ptr)
-{
- arena_chunk_t *chunk;
- size_t pageind, mapbits;
-
- assert(ptr != NULL);
- assert(CHUNK_ADDR2BASE(ptr) != ptr);
-
- chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
- pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
- mapbits = chunk->map[pageind].bits;
- assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
- return ((mapbits & CHUNK_MAP_LARGE) != 0);
-}
-
/* Return the size of the allocation pointed to by ptr. */
size_t
arena_salloc(const void *ptr)
@@ -1781,8 +1721,11 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty,
astats->allocated_large += arena->stats.allocated_large;
astats->nmalloc_large += arena->stats.nmalloc_large;
astats->ndalloc_large += arena->stats.ndalloc_large;
+ astats->nrequests_large += arena->stats.nrequests_large;
for (i = 0; i < nlclasses; i++) {
+ lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
+ lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
lstats[i].nrequests += arena->stats.lstats[i].nrequests;
lstats[i].highruns += arena->stats.lstats[i].highruns;
lstats[i].curruns += arena->stats.lstats[i].curruns;
@@ -1815,8 +1758,6 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
{
/* Large allocation. */
- malloc_mutex_lock(&arena->lock);
-
#ifdef JEMALLOC_FILL
# ifndef JEMALLOC_STATS
if (opt_junk)
@@ -1838,12 +1779,12 @@ arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
#ifdef JEMALLOC_STATS
arena->stats.ndalloc_large++;
arena->stats.allocated_large -= size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
#endif
}
arena_run_dalloc(arena, (arena_run_t *)ptr, true);
- malloc_mutex_unlock(&arena->lock);
}
static void
@@ -1863,10 +1804,13 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
#ifdef JEMALLOC_STATS
arena->stats.ndalloc_large++;
arena->stats.allocated_large -= oldsize;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
@@ -1910,10 +1854,13 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
#ifdef JEMALLOC_STATS
arena->stats.ndalloc_large++;
arena->stats.allocated_large -= oldsize;
+ arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++;
arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
arena->stats.nmalloc_large++;
+ arena->stats.nrequests_large++;
arena->stats.allocated_large += size;
+ arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
@@ -1988,30 +1935,15 @@ arena_ralloc(void *ptr, size_t size, size_t oldsize)
void *ret;
size_t copysize;
- /*
- * Try to avoid moving the allocation.
- *
- * posix_memalign() can cause allocation of "large" objects that are
- * smaller than bin_maxclass (in order to meet alignment requirements).
- * Therefore, do not assume that (oldsize <= bin_maxclass) indicates
- * ptr refers to a bin-allocated object.
- */
+ /* Try to avoid moving the allocation. */
if (oldsize <= arena_maxclass) {
- if (arena_is_large(ptr) == false ) {
- if (size <= small_maxclass) {
- if (oldsize <= small_maxclass &&
- small_size2bin[size] ==
- small_size2bin[oldsize])
- goto IN_PLACE;
- } else if (size <= bin_maxclass) {
- if (small_maxclass < oldsize && oldsize <=
- bin_maxclass && MEDIUM_CEILING(size) ==
- MEDIUM_CEILING(oldsize))
- goto IN_PLACE;
- }
+ if (oldsize <= small_maxclass) {
+ if (size <= small_maxclass && small_size2bin[size] ==
+ small_size2bin[oldsize])
+ goto IN_PLACE;
} else {
assert(size <= arena_maxclass);
- if (size > bin_maxclass) {
+ if (size > small_maxclass) {
if (arena_ralloc_large(ptr, size, oldsize) ==
false)
return (ptr);
@@ -2019,23 +1951,6 @@ arena_ralloc(void *ptr, size_t size, size_t oldsize)
}
}
- /* Try to avoid moving the allocation. */
- if (size <= small_maxclass) {
- if (oldsize <= small_maxclass && small_size2bin[size] ==
- small_size2bin[oldsize])
- goto IN_PLACE;
- } else if (size <= bin_maxclass) {
- if (small_maxclass < oldsize && oldsize <= bin_maxclass &&
- MEDIUM_CEILING(size) == MEDIUM_CEILING(oldsize))
- goto IN_PLACE;
- } else {
- if (bin_maxclass < oldsize && oldsize <= arena_maxclass) {
- assert(size > bin_maxclass);
- if (arena_ralloc_large(ptr, size, oldsize) == false)
- return (ptr);
- }
- }
-
/*
* If we get here, then size and oldsize are different enough that we
* need to move the object. In that case, fall back to allocating new
@@ -2074,13 +1989,12 @@ arena_new(arena_t *arena, unsigned ind)
#ifdef JEMALLOC_STATS
memset(&arena->stats, 0, sizeof(arena_stats_t));
- arena->stats.lstats = (malloc_large_stats_t *)base_alloc(
- sizeof(malloc_large_stats_t) * ((chunksize - PAGE_SIZE) >>
- PAGE_SHIFT));
+ arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses *
+ sizeof(malloc_large_stats_t));
if (arena->stats.lstats == NULL)
return (true);
- memset(arena->stats.lstats, 0, sizeof(malloc_large_stats_t) *
- ((chunksize - PAGE_SIZE) >> PAGE_SHIFT));
+ memset(arena->stats.lstats, 0, nlclasses *
+ sizeof(malloc_large_stats_t));
# ifdef JEMALLOC_TCACHE
ql_new(&arena->tcache_ql);
# endif
@@ -2159,7 +2073,7 @@ arena_new(arena_t *arena, unsigned ind)
}
/* Subpage-spaced bins. */
- for (; i < ntbins + nqbins + ncbins + nsbins; i++) {
+ for (; i < nbins; i++) {
bin = &arena->bins[i];
if (malloc_mutex_init(&bin->lock))
return (true);
@@ -2176,24 +2090,6 @@ arena_new(arena_t *arena, unsigned ind)
#endif
}
- /* Medium bins. */
- for (; i < nbins; i++) {
- bin = &arena->bins[i];
- if (malloc_mutex_init(&bin->lock))
- return (true);
- bin->runcur = NULL;
- arena_run_tree_new(&bin->runs);
-
- bin->reg_size = medium_min + ((i - (ntbins + nqbins + ncbins +
- nsbins)) << lg_mspace);
-
- prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
-
-#ifdef JEMALLOC_STATS
- memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
-#endif
- }
-
#ifdef JEMALLOC_DEBUG
arena->magic = ARENA_MAGIC;
#endif
@@ -2355,7 +2251,6 @@ arena_boot(void)
sspace_min += SUBPAGE;
assert(sspace_min < PAGE_SIZE);
sspace_max = PAGE_SIZE - SUBPAGE;
- medium_max = (1U << opt_lg_medium_max);
#ifdef JEMALLOC_TINY
assert(LG_QUANTUM >= LG_TINY_MIN);
@@ -2364,24 +2259,9 @@ arena_boot(void)
nqbins = qspace_max >> LG_QUANTUM;
ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
+ nbins = ntbins + nqbins + ncbins + nsbins;
/*
- * Compute medium size class spacing and the number of medium size
- * classes. Limit spacing to no more than pagesize, but if possible
- * use the smallest spacing that does not exceed NMBINS_MAX medium size
- * classes.
- */
- lg_mspace = LG_SUBPAGE;
- nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
- while (lg_mspace < PAGE_SHIFT && nmbins > NMBINS_MAX) {
- lg_mspace = lg_mspace + 1;
- nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
- }
- mspace_mask = (1U << lg_mspace) - 1U;
-
- mbin0 = ntbins + nqbins + ncbins + nsbins;
- nbins = mbin0 + nmbins;
- /*
* The small_size2bin lookup table uses uint8_t to encode each bin
* index, so we cannot support more than 256 small size classes. This
* limit is difficult to exceed (not even possible with 16B quantum and
@@ -2389,10 +2269,10 @@ arena_boot(void)
* nonetheless we need to protect against this case in order to avoid
* undefined behavior.
*/
- if (mbin0 > 256) {
+ if (nbins > 256) {
char line_buf[UMAX2S_BUFSIZE];
malloc_write("<jemalloc>: Too many small size classes (");
- malloc_write(umax2s(mbin0, 10, line_buf));
+ malloc_write(umax2s(nbins, 10, line_buf));
malloc_write(" > max 256)\n");
abort();
}
diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c
index 1644f73..2249102 100644
--- a/jemalloc/src/ctl.c
+++ b/jemalloc/src/ctl.c
@@ -84,7 +84,6 @@ CTL_PROTO(opt_prof_leak)
CTL_PROTO(opt_stats_print)
CTL_PROTO(opt_lg_qspace_max)
CTL_PROTO(opt_lg_cspace_max)
-CTL_PROTO(opt_lg_medium_max)
CTL_PROTO(opt_lg_dirty_mult)
CTL_PROTO(opt_lg_chunk)
#ifdef JEMALLOC_SWAP
@@ -102,7 +101,6 @@ CTL_PROTO(arenas_quantum)
CTL_PROTO(arenas_cacheline)
CTL_PROTO(arenas_subpage)
CTL_PROTO(arenas_pagesize)
-CTL_PROTO(arenas_medium)
CTL_PROTO(arenas_chunksize)
#ifdef JEMALLOC_TINY
CTL_PROTO(arenas_tspace_min)
@@ -114,14 +112,17 @@ CTL_PROTO(arenas_cspace_min)
CTL_PROTO(arenas_cspace_max)
CTL_PROTO(arenas_sspace_min)
CTL_PROTO(arenas_sspace_max)
-CTL_PROTO(arenas_medium_min)
-CTL_PROTO(arenas_medium_max)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(arenas_tcache_max)
+#endif
CTL_PROTO(arenas_ntbins)
CTL_PROTO(arenas_nqbins)
CTL_PROTO(arenas_ncbins)
CTL_PROTO(arenas_nsbins)
-CTL_PROTO(arenas_nmbins)
CTL_PROTO(arenas_nbins)
+#ifdef JEMALLOC_TCACHE
+CTL_PROTO(arenas_nhbins)
+#endif
CTL_PROTO(arenas_nlruns)
#ifdef JEMALLOC_PROF
CTL_PROTO(prof_dump)
@@ -138,13 +139,10 @@ CTL_PROTO(stats_arenas_i_small_allocated)
CTL_PROTO(stats_arenas_i_small_nmalloc)
CTL_PROTO(stats_arenas_i_small_ndalloc)
CTL_PROTO(stats_arenas_i_small_nrequests)
-CTL_PROTO(stats_arenas_i_medium_allocated)
-CTL_PROTO(stats_arenas_i_medium_nmalloc)
-CTL_PROTO(stats_arenas_i_medium_ndalloc)
-CTL_PROTO(stats_arenas_i_medium_nrequests)
CTL_PROTO(stats_arenas_i_large_allocated)
CTL_PROTO(stats_arenas_i_large_nmalloc)
CTL_PROTO(stats_arenas_i_large_ndalloc)
+CTL_PROTO(stats_arenas_i_large_nrequests)
CTL_PROTO(stats_arenas_i_bins_j_allocated)
CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
@@ -158,6 +156,8 @@ CTL_PROTO(stats_arenas_i_bins_j_nreruns)
CTL_PROTO(stats_arenas_i_bins_j_highruns)
CTL_PROTO(stats_arenas_i_bins_j_curruns)
INDEX_PROTO(stats_arenas_i_bins_j)
+CTL_PROTO(stats_arenas_i_lruns_j_nmalloc)
+CTL_PROTO(stats_arenas_i_lruns_j_ndalloc)
CTL_PROTO(stats_arenas_i_lruns_j_nrequests)
CTL_PROTO(stats_arenas_i_lruns_j_highruns)
CTL_PROTO(stats_arenas_i_lruns_j_curruns)
@@ -255,7 +255,6 @@ static const ctl_node_t opt_node[] = {
{NAME("stats_print"), CTL(opt_stats_print)},
{NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)},
{NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)},
- {NAME("lg_medium_max"), CTL(opt_lg_medium_max)},
{NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)},
{NAME("lg_chunk"), CTL(opt_lg_chunk)}
#ifdef JEMALLOC_SWAP
@@ -295,7 +294,6 @@ static const ctl_node_t arenas_node[] = {
{NAME("cacheline"), CTL(arenas_cacheline)},
{NAME("subpage"), CTL(arenas_subpage)},
{NAME("pagesize"), CTL(arenas_pagesize)},
- {NAME("medium"), CTL(arenas_medium)},
{NAME("chunksize"), CTL(arenas_chunksize)},
#ifdef JEMALLOC_TINY
{NAME("tspace_min"), CTL(arenas_tspace_min)},
@@ -307,14 +305,17 @@ static const ctl_node_t arenas_node[] = {
{NAME("cspace_max"), CTL(arenas_cspace_max)},
{NAME("sspace_min"), CTL(arenas_sspace_min)},
{NAME("sspace_max"), CTL(arenas_sspace_max)},
- {NAME("medium_min"), CTL(arenas_medium_min)},
- {NAME("medium_max"), CTL(arenas_medium_max)},
+#ifdef JEMALLOC_TCACHE
+ {NAME("tcache_max"), CTL(arenas_tcache_max)},
+#endif
{NAME("ntbins"), CTL(arenas_ntbins)},
{NAME("nqbins"), CTL(arenas_nqbins)},
{NAME("ncbins"), CTL(arenas_ncbins)},
{NAME("nsbins"), CTL(arenas_nsbins)},
- {NAME("nmbins"), CTL(arenas_nmbins)},
{NAME("nbins"), CTL(arenas_nbins)},
+#ifdef JEMALLOC_TCACHE
+ {NAME("nhbins"), CTL(arenas_nhbins)},
+#endif
{NAME("bin"), CHILD(arenas_bin)},
{NAME("nlruns"), CTL(arenas_nlruns)},
{NAME("lrun"), CHILD(arenas_lrun)}
@@ -347,17 +348,11 @@ static const ctl_node_t stats_arenas_i_small_node[] = {
{NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)}
};
-static const ctl_node_t stats_arenas_i_medium_node[] = {
- {NAME("allocated"), CTL(stats_arenas_i_medium_allocated)},
- {NAME("nmalloc"), CTL(stats_arenas_i_medium_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_medium_ndalloc)},
- {NAME("nrequests"), CTL(stats_arenas_i_medium_nrequests)}
-};
-
static const ctl_node_t stats_arenas_i_large_node[] = {
{NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
{NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
- {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}
+ {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
+ {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)}
};
static const ctl_node_t stats_arenas_i_bins_j_node[] = {
@@ -383,6 +378,8 @@ static const ctl_node_t stats_arenas_i_bins_node[] = {
};
static const ctl_node_t stats_arenas_i_lruns_j_node[] = {
+ {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)},
+ {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)},
{NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)},
{NAME("highruns"), CTL(stats_arenas_i_lruns_j_highruns)},
{NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)}
@@ -406,7 +403,6 @@ static const ctl_node_t stats_arenas_i_node[] = {
{NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)},
{NAME("purged"), CTL(stats_arenas_i_purged)},
{NAME("small"), CHILD(stats_arenas_i_small)},
- {NAME("medium"), CHILD(stats_arenas_i_medium)},
{NAME("large"), CHILD(stats_arenas_i_large)},
{NAME("bins"), CHILD(stats_arenas_i_bins)},
{NAME("lruns"), CHILD(stats_arenas_i_lruns)}
@@ -505,10 +501,6 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
astats->nmalloc_small = 0;
astats->ndalloc_small = 0;
astats->nrequests_small = 0;
- astats->allocated_medium = 0;
- astats->nmalloc_medium = 0;
- astats->ndalloc_medium = 0;
- astats->nrequests_medium = 0;
memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t));
memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t));
#endif
@@ -523,19 +515,12 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty,
&cstats->astats, cstats->bstats, cstats->lstats);
- for (i = 0; i < mbin0; i++) {
+ for (i = 0; i < nbins; i++) {
cstats->allocated_small += cstats->bstats[i].allocated;
cstats->nmalloc_small += cstats->bstats[i].nmalloc;
cstats->ndalloc_small += cstats->bstats[i].ndalloc;
cstats->nrequests_small += cstats->bstats[i].nrequests;
}
-
- for (; i < nbins; i++) {
- cstats->allocated_medium += cstats->bstats[i].allocated;
- cstats->nmalloc_medium += cstats->bstats[i].nmalloc;
- cstats->ndalloc_medium += cstats->bstats[i].ndalloc;
- cstats->nrequests_medium += cstats->bstats[i].nrequests;
- }
}
static void
@@ -556,16 +541,14 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
sstats->ndalloc_small += astats->ndalloc_small;
sstats->nrequests_small += astats->nrequests_small;
- sstats->allocated_medium += astats->allocated_medium;
- sstats->nmalloc_medium += astats->nmalloc_medium;
- sstats->ndalloc_medium += astats->ndalloc_medium;
- sstats->nrequests_medium += astats->nrequests_medium;
-
sstats->astats.allocated_large += astats->astats.allocated_large;
sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+ sstats->astats.nrequests_large += astats->astats.nrequests_large;
for (i = 0; i < nlclasses; i++) {
+ sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+ sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
sstats->lstats[i].highruns += astats->lstats[i].highruns;
sstats->lstats[i].curruns += astats->lstats[i].curruns;
@@ -648,7 +631,6 @@ ctl_refresh(void)
#ifdef JEMALLOC_STATS
ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small
- + ctl_stats.arenas[narenas].allocated_medium
+ ctl_stats.arenas[narenas].astats.allocated_large
+ ctl_stats.huge.allocated;
ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT)
@@ -1178,7 +1160,6 @@ CTL_RO_GEN(opt_prof_leak, opt_prof_leak, bool)
CTL_RO_GEN(opt_stats_print, opt_stats_print, bool)
CTL_RO_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t)
CTL_RO_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t)
-CTL_RO_GEN(opt_lg_medium_max, opt_lg_medium_max, size_t)
CTL_RO_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
CTL_RO_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
#ifdef JEMALLOC_SWAP
@@ -1239,7 +1220,6 @@ CTL_RO_GEN(arenas_quantum, QUANTUM, size_t)
CTL_RO_GEN(arenas_cacheline, CACHELINE, size_t)
CTL_RO_GEN(arenas_subpage, SUBPAGE, size_t)
CTL_RO_GEN(arenas_pagesize, PAGE_SIZE, size_t)
-CTL_RO_GEN(arenas_medium, (1U << lg_mspace), size_t)
CTL_RO_GEN(arenas_chunksize, chunksize, size_t)
#ifdef JEMALLOC_TINY
CTL_RO_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t)
@@ -1251,14 +1231,17 @@ CTL_RO_GEN(arenas_cspace_min, cspace_min, size_t)
CTL_RO_GEN(arenas_cspace_max, cspace_max, size_t)
CTL_RO_GEN(arenas_sspace_min, sspace_min, size_t)
CTL_RO_GEN(arenas_sspace_max, sspace_max, size_t)
-CTL_RO_GEN(arenas_medium_min, medium_min, size_t)
-CTL_RO_GEN(arenas_medium_max, medium_max, size_t)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_GEN(arenas_tcache_max, tcache_maxclass, size_t)
+#endif
CTL_RO_GEN(arenas_ntbins, ntbins, unsigned)
CTL_RO_GEN(arenas_nqbins, nqbins, unsigned)
CTL_RO_GEN(arenas_ncbins, ncbins, unsigned)
CTL_RO_GEN(arenas_nsbins, nsbins, unsigned)
-CTL_RO_GEN(arenas_nmbins, nmbins, unsigned)
CTL_RO_GEN(arenas_nbins, nbins, unsigned)
+#ifdef JEMALLOC_TCACHE
+CTL_RO_GEN(arenas_nhbins, nhbins, unsigned)
+#endif
CTL_RO_GEN(arenas_nlruns, nlclasses, size_t)
/******************************************************************************/
@@ -1304,20 +1287,14 @@ CTL_RO_GEN(stats_arenas_i_small_ndalloc,
ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
CTL_RO_GEN(stats_arenas_i_small_nrequests,
ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
-CTL_RO_GEN(stats_arenas_i_medium_allocated,
- ctl_stats.arenas[mib[2]].allocated_medium, size_t)
-CTL_RO_GEN(stats_arenas_i_medium_nmalloc,
- ctl_stats.arenas[mib[2]].nmalloc_medium, uint64_t)
-CTL_RO_GEN(stats_arenas_i_medium_ndalloc,
- ctl_stats.arenas[mib[2]].ndalloc_medium, uint64_t)
-CTL_RO_GEN(stats_arenas_i_medium_nrequests,
- ctl_stats.arenas[mib[2]].nrequests_medium, uint64_t)
CTL_RO_GEN(stats_arenas_i_large_allocated,
ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
CTL_RO_GEN(stats_arenas_i_large_nmalloc,
ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
CTL_RO_GEN(stats_arenas_i_large_ndalloc,
ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
+CTL_RO_GEN(stats_arenas_i_large_nrequests,
+ ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t)
CTL_RO_GEN(stats_arenas_i_bins_j_allocated,
ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t)
@@ -1351,6 +1328,10 @@ stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
return (super_stats_arenas_i_bins_j_node);
}
+CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc,
+ ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
+CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc,
+ ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests,
ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
CTL_RO_GEN(stats_arenas_i_lruns_j_curruns,
diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c
index 49c2b0b..d880769 100644
--- a/jemalloc/src/jemalloc.c
+++ b/jemalloc/src/jemalloc.c
@@ -52,17 +52,9 @@
* | | | 3584 |
* | | | 3840 |
* |========================================|
- * | Medium | 4 KiB |
- * | | 6 KiB |
+ * | Large | 4 KiB |
* | | 8 KiB |
- * | | ... |
- * | | 28 KiB |
- * | | 30 KiB |
- * | | 32 KiB |
- * |========================================|
- * | Large | 36 KiB |
- * | | 40 KiB |
- * | | 44 KiB |
+ * | | 12 KiB |
* | | ... |
* | | 1012 KiB |
* | | 1016 KiB |
@@ -76,9 +68,8 @@
*
* Different mechanisms are used accoding to category:
*
- * Small/medium : Each size class is segregated into its own set of runs.
- * Each run maintains a bitmap of which regions are
- * free/allocated.
+ * Small: Each size class is segregated into its own set of runs. Each run
+ * maintains a bitmap of which regions are free/allocated.
*
* Large : Each allocation is backed by a dedicated run. Metadata are stored
* in the associated arena chunk header maps.
@@ -252,6 +243,11 @@ stats_print_atexit(void)
if (arena != NULL) {
tcache_t *tcache;
+ /*
+ * tcache_stats_merge() locks bins, so if any code is
+ * introduced that acquires both arena and bin locks in
+ * the opposite order, deadlocks may result.
+ */
malloc_mutex_lock(&arena->lock);
ql_foreach(tcache, &arena->tcache_ql, link) {
tcache_stats_merge(tcache, arena);
@@ -510,14 +506,10 @@ MALLOC_OUT:
case 'k':
/*
* Chunks always require at least one
- * header page, plus enough room to
- * hold a run for the largest medium
- * size class (one page more than the
- * size).
+ * header page, plus one data page.
*/
if ((1U << (opt_lg_chunk - 1)) >=
- (2U << PAGE_SHIFT) + (1U <<
- opt_lg_medium_max))
+ (2U << PAGE_SHIFT))
opt_lg_chunk--;
break;
case 'K':
@@ -533,15 +525,17 @@ MALLOC_OUT:
opt_prof_leak = true;
break;
#endif
+#ifdef JEMALLOC_TCACHE
case 'm':
- if (opt_lg_medium_max > PAGE_SHIFT)
- opt_lg_medium_max--;
+ if (opt_lg_tcache_maxclass >= 0)
+ opt_lg_tcache_maxclass--;
break;
case 'M':
- if (opt_lg_medium_max + 1 <
- opt_lg_chunk)
- opt_lg_medium_max++;
+ if (opt_lg_tcache_maxclass + 1 <
+ (sizeof(size_t) << 3))
+ opt_lg_tcache_maxclass++;
break;
+#endif
case 'n':
opt_narenas_lshift--;
break;
@@ -725,33 +719,7 @@ MALLOC_OUT:
* For SMP systems, create more than one arena per CPU by
* default.
*/
-#ifdef JEMALLOC_TCACHE
- if (opt_tcache
-# ifdef JEMALLOC_PROF
- /*
- * Profile data storage concurrency is directly linked to
- * the number of arenas, so only drop the number of arenas
- * on behalf of enabled tcache if profiling is disabled.
- */
- && opt_prof == false
-# endif
- ) {
- /*
- * Only large object allocation/deallocation is
- * guaranteed to acquire an arena mutex, so we can get
- * away with fewer arenas than without thread caching.
- */
- opt_narenas_lshift += 1;
- } else {
-#endif
- /*
- * All allocations must acquire an arena mutex, so use
- * plenty of arenas.
- */
- opt_narenas_lshift += 2;
-#ifdef JEMALLOC_TCACHE
- }
-#endif
+ opt_narenas_lshift += 2;
}
/* Determine how many arenas to use. */
diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c
index c57db47..a5ec1f1 100644
--- a/jemalloc/src/stats.c
+++ b/jemalloc/src/stats.c
@@ -234,8 +234,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
j,
j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
"Q" : j < ntbins_ + nqbins + ncbins ? "C" :
- j < ntbins_ + nqbins + ncbins + nsbins ? "S"
- : "M",
+ "S",
reg_size, nregs, run_size / pagesize,
allocated, nmalloc, ndalloc, nrequests,
nfills, nflushes, nruns, reruns, highruns,
@@ -248,8 +247,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
j,
j < ntbins_ ? "T" : j < ntbins_ + nqbins ?
"Q" : j < ntbins_ + nqbins + ncbins ? "C" :
- j < ntbins_ + nqbins + ncbins + nsbins ? "S"
- : "M",
+ "S",
reg_size, nregs, run_size / pagesize,
allocated, nmalloc, ndalloc, nruns, reruns,
highruns, curruns);
@@ -278,12 +276,17 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
CTL_GET("arenas.pagesize", &pagesize, size_t);
malloc_cprintf(write_cb, cbopaque,
- "large: size pages nrequests maxruns curruns\n");
+ "large: size pages nmalloc ndalloc nrequests"
+ " maxruns curruns\n");
CTL_GET("arenas.nlruns", &nlruns, size_t);
for (j = 0, gap_start = -1; j < nlruns; j++) {
- uint64_t nrequests;
+ uint64_t nmalloc, ndalloc, nrequests;
size_t run_size, highruns, curruns;
+ CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc,
+ uint64_t);
+ CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc,
+ uint64_t);
CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests,
uint64_t);
if (nrequests == 0) {
@@ -301,9 +304,10 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
gap_start = -1;
}
malloc_cprintf(write_cb, cbopaque,
- "%13zu %5zu %12"PRIu64" %12zu %12zu\n",
- run_size, run_size / pagesize, nrequests, highruns,
- curruns);
+ "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64
+ " %12zu %12zu\n",
+ run_size, run_size / pagesize, nmalloc, ndalloc,
+ nrequests, highruns, curruns);
}
}
if (gap_start != -1)
@@ -318,10 +322,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
uint64_t npurge, nmadvise, purged;
size_t small_allocated;
uint64_t small_nmalloc, small_ndalloc, small_nrequests;
- size_t medium_allocated;
- uint64_t medium_nmalloc, medium_ndalloc, medium_nrequests;
size_t large_allocated;
- uint64_t large_nmalloc, large_ndalloc;
+ uint64_t large_nmalloc, large_ndalloc, large_nrequests;
CTL_GET("arenas.pagesize", &pagesize, size_t);
@@ -345,26 +347,19 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
malloc_cprintf(write_cb, cbopaque,
"small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
- CTL_I_GET("stats.arenas.0.medium.allocated", &medium_allocated, size_t);
- CTL_I_GET("stats.arenas.0.medium.nmalloc", &medium_nmalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.medium.ndalloc", &medium_ndalloc, uint64_t);
- CTL_I_GET("stats.arenas.0.medium.nrequests", &medium_nrequests,
- uint64_t);
- malloc_cprintf(write_cb, cbopaque,
- "medium: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
- medium_allocated, medium_nmalloc, medium_ndalloc, medium_nrequests);
CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t);
CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t);
CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t);
+ CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t);
malloc_cprintf(write_cb, cbopaque,
"large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
- large_allocated, large_nmalloc, large_ndalloc, large_nmalloc);
+ large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
malloc_cprintf(write_cb, cbopaque,
"total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n",
- small_allocated + medium_allocated + large_allocated,
- small_nmalloc + medium_nmalloc + large_nmalloc,
- small_ndalloc + medium_ndalloc + large_ndalloc,
- small_nrequests + medium_nrequests + large_nmalloc);
+ small_allocated + large_allocated,
+ small_nmalloc + large_nmalloc,
+ small_ndalloc + large_ndalloc,
+ small_nrequests + large_nrequests);
malloc_cprintf(write_cb, cbopaque, "active: %12zu\n",
pactive * pagesize );
CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t);
@@ -511,11 +506,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
write_cb(cbopaque, umax2s(sv, 10, s));
write_cb(cbopaque, "\n");
- CTL_GET("arenas.medium", &sv, size_t);
- write_cb(cbopaque, "Medium spacing: ");
- write_cb(cbopaque, umax2s(sv, 10, s));
- write_cb(cbopaque, "\n");
-
if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz,
NULL, 0)) == 0) {
write_cb(cbopaque, "Tiny 2^n-spaced sizes: [");
@@ -551,14 +541,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
write_cb(cbopaque, umax2s(sv, 10, s));
write_cb(cbopaque, "]\n");
- CTL_GET("arenas.medium_min", &sv, size_t);
- write_cb(cbopaque, "Medium sizes: [");
- write_cb(cbopaque, umax2s(sv, 10, s));
- write_cb(cbopaque, "..");
- CTL_GET("arenas.medium_max", &sv, size_t);
- write_cb(cbopaque, umax2s(sv, 10, s));
- write_cb(cbopaque, "]\n");
-
CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t);
if (ssv >= 0) {
write_cb(cbopaque,
@@ -569,6 +551,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
write_cb(cbopaque,
"Min active:dirty page ratio per arena: N/A\n");
}
+ if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv,
+ &ssz, NULL, 0)) == 0) {
+ write_cb(cbopaque,
+ "Maximum thread-cached size class: ");
+ write_cb(cbopaque, umax2s(sv, 10, s));
+ write_cb(cbopaque, "\n");
+ }
if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv,
&ssz, NULL, 0)) == 0) {
size_t tcache_gc_sweep = (1U << ssv);
@@ -705,7 +694,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
for (i = 0; i < narenas; i++) {
if (initialized[i]) {
- malloc_cprintf(write_cb, cbopaque,
+ malloc_cprintf(write_cb,
+ cbopaque,
"\narenas[%u]:\n", i);
stats_arena_print(write_cb,
cbopaque, i);
diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c
index feba61b..6113dec 100644
--- a/jemalloc/src/tcache.c
+++ b/jemalloc/src/tcache.c
@@ -5,6 +5,7 @@
/* Data. */
bool opt_tcache = true;
+ssize_t opt_lg_tcache_maxclass = LG_TCACHE_MAXCLASS_DEFAULT;
ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
/* Map of thread-specific caches. */
@@ -16,6 +17,8 @@ __thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec"));
*/
static pthread_key_t tcache_tsd;
+size_t nhbins;
+size_t tcache_maxclass;
unsigned tcache_gc_incr;
/******************************************************************************/
@@ -26,11 +29,11 @@ static void tcache_thread_cleanup(void *arg);
/******************************************************************************/
void *
-tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
+tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
{
void *ret;
- arena_tcache_fill(tcache->arena, tbin, binind
+ arena_tcache_fill_small(tcache->arena, tbin, binind
#ifdef JEMALLOC_PROF
, tcache->prof_accumbytes
#endif
@@ -38,13 +41,13 @@ tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
#ifdef JEMALLOC_PROF
tcache->prof_accumbytes = 0;
#endif
- ret = tcache_bin_alloc(tbin);
+ ret = tcache_alloc_easy(tbin);
return (ret);
}
void
-tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
+tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache_t *tcache
#endif
@@ -53,6 +56,7 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
void *flush, *deferred, *ptr;
unsigned i, nflush, ndeferred;
+ assert(binind < nbins);
assert(rem <= tbin->ncached);
for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL;
@@ -120,6 +124,79 @@ tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem
tbin->low_water = tbin->ncached;
}
+void
+tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache_t *tcache
+#endif
+ )
+{
+ void *flush, *deferred, *ptr;
+ unsigned i, nflush, ndeferred;
+
+ assert(binind < nhbins);
+ assert(rem <= tbin->ncached);
+
+ for (flush = tbin->avail, nflush = tbin->ncached - rem; flush != NULL;
+ flush = deferred, nflush = ndeferred) {
+ /* Lock the arena associated with the first object. */
+ arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(flush);
+ arena_t *arena = chunk->arena;
+
+ malloc_mutex_lock(&arena->lock);
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ if (arena == tcache->arena) {
+#endif
+#ifdef JEMALLOC_PROF
+ arena_prof_accum(arena, tcache->prof_accumbytes);
+ tcache->prof_accumbytes = 0;
+#endif
+#ifdef JEMALLOC_STATS
+ arena->stats.nrequests_large += tbin->tstats.nrequests;
+ arena->stats.lstats[binind - nbins].nrequests +=
+ tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
+#endif
+#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS))
+ }
+#endif
+ deferred = NULL;
+ ndeferred = 0;
+ for (i = 0; i < nflush; i++) {
+ ptr = flush;
+ assert(ptr != NULL);
+ flush = *(void **)ptr;
+ chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+ if (chunk->arena == arena)
+ arena_dalloc_large(arena, chunk, ptr);
+ else {
+ /*
+ * This object was allocated via a different
+ * arena than the one that is currently locked.
+ * Stash the object, so that it can be handled
+ * in a future pass.
+ */
+ *(void **)ptr = deferred;
+ deferred = ptr;
+ ndeferred++;
+ }
+ }
+ malloc_mutex_unlock(&arena->lock);
+
+ if (flush != NULL) {
+ /*
+ * This was the first pass, and rem cached objects
+ * remain.
+ */
+ tbin->avail = flush;
+ }
+ }
+
+ tbin->ncached = rem;
+ if (tbin->ncached < tbin->low_water)
+ tbin->low_water = tbin->ncached;
+}
+
tcache_t *
tcache_create(arena_t *arena)
{
@@ -127,7 +204,7 @@ tcache_create(arena_t *arena)
size_t size;
unsigned i;
- size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nbins - 1));
+ size = sizeof(tcache_t) + (sizeof(tcache_bin_t) * (nhbins - 1));
/*
* Round up to the nearest multiple of the cacheline size, in order to
* avoid the possibility of false cacheline sharing.
@@ -138,8 +215,6 @@ tcache_create(arena_t *arena)
if (size <= small_maxclass)
tcache = (tcache_t *)arena_malloc_small(arena, size, true);
- else if (size <= bin_maxclass)
- tcache = (tcache_t *)arena_malloc_medium(arena, size, true);
else
tcache = (tcache_t *)icalloc(size);
@@ -155,14 +230,16 @@ tcache_create(arena_t *arena)
#endif
tcache->arena = arena;
- assert((TCACHE_NSLOTS_MAX & 1U) == 0);
+ assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
for (i = 0; i < nbins; i++) {
- if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_MAX) {
+ if ((arena->bins[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) {
tcache->tbins[i].ncached_max = (arena->bins[i].nregs <<
1);
} else
- tcache->tbins[i].ncached_max = TCACHE_NSLOTS_MAX;
+ tcache->tbins[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX;
}
+ for (; i < nhbins; i++)
+ tcache->tbins[i].ncached_max = TCACHE_NSLOTS_LARGE;
tcache_tls = tcache;
pthread_setspecific(tcache_tsd, tcache);
@@ -185,7 +262,7 @@ tcache_destroy(tcache_t *tcache)
for (i = 0; i < nbins; i++) {
tcache_bin_t *tbin = &tcache->tbins[i];
- tcache_bin_flush(tbin, i, 0
+ tcache_bin_flush_small(tbin, i, 0
#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
, tcache
#endif
@@ -202,6 +279,26 @@ tcache_destroy(tcache_t *tcache)
#endif
}
+ for (; i < nhbins; i++) {
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ tcache_bin_flush_large(tbin, i, 0
+#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF))
+ , tcache
+#endif
+ );
+
+#ifdef JEMALLOC_STATS
+ if (tbin->tstats.nrequests != 0) {
+ arena_t *arena = tcache->arena;
+ malloc_mutex_lock(&arena->lock);
+ arena->stats.nrequests_large += tbin->tstats.nrequests;
+ arena->stats.lstats[i - nbins].nrequests +=
+ tbin->tstats.nrequests;
+ malloc_mutex_unlock(&arena->lock);
+ }
+#endif
+ }
+
#ifdef JEMALLOC_PROF
if (tcache->prof_accumbytes > 0) {
malloc_mutex_lock(&tcache->arena->lock);
@@ -210,7 +307,7 @@ tcache_destroy(tcache_t *tcache)
}
#endif
- if (arena_salloc(tcache) <= bin_maxclass) {
+ if (arena_salloc(tcache) <= small_maxclass) {
arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
arena_t *arena = chunk->arena;
size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
@@ -256,6 +353,14 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena)
malloc_mutex_unlock(&bin->lock);
tbin->tstats.nrequests = 0;
}
+
+ for (; i < nhbins; i++) {
+ malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins];
+ tcache_bin_t *tbin = &tcache->tbins[i];
+ arena->stats.nrequests_large += tbin->tstats.nrequests;
+ lstats->nrequests += tbin->tstats.nrequests;
+ tbin->tstats.nrequests = 0;
+ }
}
#endif
@@ -264,6 +369,20 @@ tcache_boot(void)
{
if (opt_tcache) {
+ /*
+ * If necessary, clamp opt_lg_tcache_maxclass, now that
+ * small_maxclass and arena_maxclass are known.
+ */
+ if (opt_lg_tcache_maxclass < 0 || (1U <<
+ opt_lg_tcache_maxclass) < small_maxclass)
+ tcache_maxclass = small_maxclass;
+ else if ((1U << opt_lg_tcache_maxclass) > arena_maxclass)
+ tcache_maxclass = arena_maxclass;
+ else
+ tcache_maxclass = (1U << opt_lg_tcache_maxclass);
+
+ nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT);
+
/* Compute incremental GC event threshold. */
if (opt_lg_tcache_gc_sweep >= 0) {
tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /