diff options
author | Jason Evans <je@fb.com> | 2012-02-29 00:50:47 (GMT) |
---|---|---|
committer | Jason Evans <je@fb.com> | 2012-02-29 00:50:47 (GMT) |
commit | b172610317babc7f365584ddd7fdaf4eb8d9d04c (patch) | |
tree | 9c16ec02092d0a6f781072721679dcce7f3497dd | |
parent | 5389146191b279ca3b90028357dd6ad66b283def (diff) | |
download | jemalloc-b172610317babc7f365584ddd7fdaf4eb8d9d04c.zip jemalloc-b172610317babc7f365584ddd7fdaf4eb8d9d04c.tar.gz jemalloc-b172610317babc7f365584ddd7fdaf4eb8d9d04c.tar.bz2 |
Simplify small size class infrastructure.
Program-generate small size class tables for all valid combinations of
LG_TINY_MIN, LG_QUANTUM, and PAGE_SHIFT. Use the appropriate table to generate
all relevant data structures, and remove the distinction between
tiny/quantum/cacheline/subpage bins.
Remove --enable-dynamic-page-shift. This option didn't prove useful in
practice, and it prevented optimizations.
Add Tilera architecture support.
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | INSTALL | 8 | ||||
-rw-r--r-- | configure.ac | 24 | ||||
-rw-r--r-- | doc/jemalloc.xml.in | 198 | ||||
-rw-r--r-- | include/jemalloc/internal/arena.h | 98 | ||||
-rw-r--r-- | include/jemalloc/internal/atomic.h | 4 | ||||
-rw-r--r-- | include/jemalloc/internal/ctl.h | 2 | ||||
-rw-r--r-- | include/jemalloc/internal/jemalloc_internal.h.in | 98 | ||||
-rw-r--r-- | include/jemalloc/internal/mb.h | 9 | ||||
-rwxr-xr-x | include/jemalloc/internal/size_classes.sh | 132 | ||||
-rw-r--r-- | include/jemalloc/internal/tcache.h | 16 | ||||
-rw-r--r-- | include/jemalloc/jemalloc_defs.h.in | 3 | ||||
-rw-r--r-- | src/arena.c | 369 | ||||
-rw-r--r-- | src/ctl.c | 69 | ||||
-rw-r--r-- | src/jemalloc.c | 53 | ||||
-rw-r--r-- | src/stats.c | 72 | ||||
-rw-r--r-- | src/tcache.c | 31 |
17 files changed, 328 insertions, 859 deletions
@@ -11,6 +11,7 @@ /lib/ /Makefile /include/jemalloc/internal/jemalloc_internal\.h +/include/jemalloc/internal/size_classes\.h /include/jemalloc/jemalloc\.h /include/jemalloc/jemalloc_defs\.h /test/jemalloc_test\.h @@ -113,14 +113,6 @@ any of the following arguments (not a definitive list) to 'configure': rather than a minimal allocation. See the "opt.sysv" option documentation for usage details. ---enable-dynamic-page-shift - Under most conditions, the system page size never changes (usually 4KiB or - 8KiB, depending on architecture and configuration), and unless this option - is enabled, jemalloc assumes that page size can safely be determined during - configuration and hard-coded. Enabling dynamic page size determination has - a measurable impact on performance, since the compiler is forced to load - the page size from memory rather than embedding immediate values. - --enable-lazy-lock Enable code that wraps pthread_create() to detect when an application switches from single-threaded to multi-threaded mode, so that it can avoid diff --git a/configure.ac b/configure.ac index fdbf1ba..91caef4 100644 --- a/configure.ac +++ b/configure.ac @@ -367,8 +367,10 @@ cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" +cfghdrs_in="${cfghdrs_in} ${srcroot}include/jemalloc/internal/size_classes.sh" cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" +cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h" cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" @@ -640,23 +642,6 @@ if test "x$enable_sysv" = "x1" ; then fi AC_SUBST([enable_sysv]) -dnl Do not determine page shift at run time by default. -AC_ARG_ENABLE([dynamic_page_shift], - [AS_HELP_STRING([--enable-dynamic-page-shift], - [Determine page size at run time (don't trust configure result)])], -[if test "x$enable_dynamic_page_shift" = "xno" ; then - enable_dynamic_page_shift="0" -else - enable_dynamic_page_shift="1" -fi -], -[enable_dynamic_page_shift="0"] -) -if test "x$enable_dynamic_page_shift" = "x1" ; then - AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ]) -fi -AC_SUBST([enable_dynamic_page_shift]) - AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) AC_RUN_IFELSE([AC_LANG_PROGRAM( [[#include <stdio.h> @@ -866,6 +851,11 @@ dnl ============================================================================ dnl Check for typedefs, structures, and compiler characteristics. AC_HEADER_STDBOOL +AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [ + mkdir -p "include/jemalloc/internal" + "${srcdir}/include/jemalloc/internal/size_classes.sh" > "${objroot}include/jemalloc/internal/size_classes.h" +]) + dnl Process .in files. AC_SUBST([cfghdrs_in]) AC_SUBST([cfghdrs_out]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index 1e8c800..cfe120f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -458,20 +458,11 @@ for (i = 0; i < nbins; i++) { a frontier and free list to track which regions are in use. Allocation requests that are no more than half the quantum (8 or 16, depending on architecture) are rounded up to the nearest power of two that is at least - <code language="C">sizeof(<type>void *</type>)</code>. Allocation requests - that are more than half the quantum, but no more than the minimum - cacheline-multiple size class (see the <link - linkend="opt.lg_qspace_max"><mallctl>opt.lg_qspace_max</mallctl></link> - option) are rounded up to the nearest multiple of the quantum. Allocation - requests that are more than the minimum cacheline-multiple size class, but - no more than the minimum subpage-multiple size class (see the <link - linkend="opt.lg_cspace_max"><mallctl>opt.lg_cspace_max</mallctl></link> - option) are rounded up to the nearest multiple of the cacheline size (64). - Allocation requests that are more than the minimum subpage-multiple size - class, but no more than the maximum subpage-multiple size class are rounded - up to the nearest multiple of the subpage size (256). Allocation requests - that are more than the maximum subpage-multiple size class, but small - enough to fit in an arena-managed chunk (see the <link + <code language="C">sizeof(<type>double</type>)</code>. All other small + object size classes are multiples of the quantum, spaced such that internal + fragmentation is limited to approximately 25% for all but the smallest size + classes. Allocation requests that are larger than the maximum small size + class, but small enough to fit in an arena-managed chunk (see the <link linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), are rounded up to the nearest run size. Allocation requests that are too large to fit in an arena-managed chunk are rounded up to the nearest multiple of @@ -507,16 +498,28 @@ for (i = 0; i < nbins; i++) { <entry>[8]</entry> </row> <row> - <entry>Quantum-spaced</entry> + <entry>16-spaced</entry> <entry>[16, 32, 48, ..., 128]</entry> </row> <row> - <entry>Cacheline-spaced</entry> - <entry>[192, 256, 320, ..., 512]</entry> + <entry>32-spaced</entry> + <entry>[160, 192, 224, 256]</entry> </row> <row> - <entry>Subpage-spaced</entry> - <entry>[768, 1024, 1280, ..., 3840]</entry> + <entry>64-spaced</entry> + <entry>[320, 384, 448, 512]</entry> + </row> + <row> + <entry>128-spaced</entry> + <entry>[640, 768, 896, 1024]</entry> + </row> + <row> + <entry>256-spaced</entry> + <entry>[1280, 1536, 1792, 2048]</entry> + </row> + <row> + <entry>512-spaced</entry> + <entry>[2560, 3072, 3584]</entry> </row> <row> <entry namest="c1" nameend="c2">Large</entry> @@ -714,30 +717,6 @@ for (i = 0; i < nbins; i++) { </para></listitem> </varlistentry> - <varlistentry id="opt.lg_qspace_max"> - <term> - <mallctl>opt.lg_qspace_max</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Size (log base 2) of the maximum size class that is a - multiple of the quantum (8 or 16 bytes, depending on architecture). - Above this size, cacheline spacing is used for size classes. The - default value is 128 bytes (2^7).</para></listitem> - </varlistentry> - - <varlistentry id="opt.lg_cspace_max"> - <term> - <mallctl>opt.lg_cspace_max</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Size (log base 2) of the maximum size class that is a - multiple of the cacheline size (64). Above this size, subpage spacing - (256 bytes) is used for size classes. The default value is 512 bytes - (2^9).</para></listitem> - </varlistentry> - <varlistentry id="opt.lg_chunk"> <term> <mallctl>opt.lg_chunk</mallctl> @@ -1180,24 +1159,6 @@ malloc_conf = "xmalloc:true";]]></programlisting> <varlistentry> <term> - <mallctl>arenas.cacheline</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Assumed cacheline size.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.subpage</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Subpage size class interval.</para></listitem> - </varlistentry> - - <varlistentry> - <term> <mallctl>arenas.pagesize</mallctl> (<type>size_t</type>) <literal>r-</literal> @@ -1216,80 +1177,6 @@ malloc_conf = "xmalloc:true";]]></programlisting> <varlistentry> <term> - <mallctl>arenas.tspace_min</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Minimum tiny size class. Tiny size classes are powers - of two.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.tspace_max</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Maximum tiny size class. Tiny size classes are powers - of two.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.qspace_min</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Minimum quantum-spaced size class.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.qspace_max</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Maximum quantum-spaced size class.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.cspace_min</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Minimum cacheline-spaced size class.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.cspace_max</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Maximum cacheline-spaced size class.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.sspace_min</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Minimum subpage-spaced size class.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.sspace_max</mallctl> - (<type>size_t</type>) - <literal>r-</literal> - </term> - <listitem><para>Maximum subpage-spaced size class.</para></listitem> - </varlistentry> - - <varlistentry> - <term> <mallctl>arenas.tcache_max</mallctl> (<type>size_t</type>) <literal>r-</literal> @@ -1300,50 +1187,11 @@ malloc_conf = "xmalloc:true";]]></programlisting> <varlistentry> <term> - <mallctl>arenas.ntbins</mallctl> - (<type>unsigned</type>) - <literal>r-</literal> - </term> - <listitem><para>Number of tiny bin size classes.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.nqbins</mallctl> - (<type>unsigned</type>) - <literal>r-</literal> - </term> - <listitem><para>Number of quantum-spaced bin size - classes.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.ncbins</mallctl> - (<type>unsigned</type>) - <literal>r-</literal> - </term> - <listitem><para>Number of cacheline-spaced bin size - classes.</para></listitem> - </varlistentry> - - <varlistentry> - <term> - <mallctl>arenas.nsbins</mallctl> - (<type>unsigned</type>) - <literal>r-</literal> - </term> - <listitem><para>Number of subpage-spaced bin size - classes.</para></listitem> - </varlistentry> - - <varlistentry> - <term> <mallctl>arenas.nbins</mallctl> (<type>unsigned</type>) <literal>r-</literal> </term> - <listitem><para>Total number of bin size classes.</para></listitem> + <listitem><para>Number of bin size classes.</para></listitem> </varlistentry> <varlistentry> diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index 4a87ef5..16c2b1e 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -2,39 +2,6 @@ #ifdef JEMALLOC_H_TYPES /* - * Subpages are an artificially designated partitioning of pages. Their only - * purpose is to support subpage-spaced size classes. - * - * There must be at least 4 subpages per page, due to the way size classes are - * handled. - */ -#define LG_SUBPAGE 8 -#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) -#define SUBPAGE_MASK (SUBPAGE - 1) - -/* Return the smallest subpage multiple that is >= s. */ -#define SUBPAGE_CEILING(s) \ - (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) - -/* Smallest size class to support. */ -#define LG_TINY_MIN 3 -#define TINY_MIN (1U << LG_TINY_MIN) - -/* - * Maximum size class that is a multiple of the quantum, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_QSPACE_MAX_DEFAULT 7 - -/* - * Maximum size class that is a multiple of the cacheline, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_CSPACE_MAX_DEFAULT 9 - -/* * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized * as small as possible such that this setting is still honored, without * violating other constraints. The goal is to make runs as small as possible @@ -364,75 +331,26 @@ struct arena_s { arena_avail_tree_t runs_avail_clean; arena_avail_tree_t runs_avail_dirty; - /* - * bins is used to store trees of free regions of the following sizes, - * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and - * default MALLOC_CONF. - * - * bins[i] | size | - * --------+--------+ - * 0 | 8 | - * --------+--------+ - * 1 | 16 | - * 2 | 32 | - * 3 | 48 | - * : : - * 6 | 96 | - * 7 | 112 | - * 8 | 128 | - * --------+--------+ - * 9 | 192 | - * 10 | 256 | - * 11 | 320 | - * 12 | 384 | - * 13 | 448 | - * 14 | 512 | - * --------+--------+ - * 15 | 768 | - * 16 | 1024 | - * 17 | 1280 | - * : : - * 25 | 3328 | - * 26 | 3584 | - * 27 | 3840 | - * --------+--------+ - */ - arena_bin_t bins[1]; /* Dynamically sized. */ + /* bins is used to store trees of free regions. */ + arena_bin_t bins[NBINS]; }; #endif /* JEMALLOC_H_STRUCTS */ /******************************************************************************/ #ifdef JEMALLOC_H_EXTERNS -extern size_t opt_lg_qspace_max; -extern size_t opt_lg_cspace_max; extern ssize_t opt_lg_dirty_mult; /* * small_size2bin is a compact lookup table that rounds request sizes up to * size classes. In order to reduce cache footprint, the table is compressed, * and all accesses are via the SMALL_SIZE2BIN macro. */ -extern uint8_t const *small_size2bin; +extern uint8_t const small_size2bin[]; #define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) -extern arena_bin_info_t *arena_bin_info; - -/* Various bin-related settings. */ - /* Number of (2^n)-spaced tiny bins. */ -#define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -extern unsigned nqbins; /* Number of quantum-spaced bins. */ -extern unsigned ncbins; /* Number of cacheline-spaced bins. */ -extern unsigned nsbins; /* Number of subpage-spaced bins. */ -extern unsigned nbins; -#define tspace_max ((size_t)(QUANTUM >> 1)) -#define qspace_min QUANTUM -extern size_t qspace_max; -extern size_t cspace_min; -extern size_t cspace_max; -extern size_t sspace_min; -extern size_t sspace_max; -#define small_maxclass sspace_max +extern arena_bin_info_t arena_bin_info[NBINS]; +/* Number of large size classes. */ #define nlclasses (chunk_npages - map_bias) void arena_purge_all(arena_t *arena); @@ -457,7 +375,7 @@ void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero); bool arena_new(arena_t *arena, unsigned ind); -bool arena_boot(void); +void arena_boot(void); #endif /* JEMALLOC_H_EXTERNS */ /******************************************************************************/ @@ -478,7 +396,7 @@ JEMALLOC_INLINE size_t arena_bin_index(arena_t *arena, arena_bin_t *bin) { size_t binind = bin - arena->bins; - assert(binind < nbins); + assert(binind < NBINS); return (binind); } @@ -633,7 +551,7 @@ arena_malloc(size_t size, bool zero) assert(size != 0); assert(QUANTUM_CEILING(size) <= arena_maxclass); - if (size <= small_maxclass) { + if (size <= SMALL_MAXCLASS) { if ((tcache = tcache_get()) != NULL) return (tcache_alloc_small(tcache, size, zero)); else diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h index 9a29862..8c68593 100644 --- a/include/jemalloc/internal/atomic.h +++ b/include/jemalloc/internal/atomic.h @@ -70,7 +70,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x) return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); } -#elif (defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint64_t atomic_add_uint64(uint64_t *p, uint64_t x) { @@ -133,7 +133,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x) return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); } -#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__i386__) || defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE uint32_t atomic_add_uint32(uint32_t *p, uint32_t x) { diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h index de4b941..28be2ae 100644 --- a/include/jemalloc/internal/ctl.h +++ b/include/jemalloc/internal/ctl.h @@ -40,7 +40,7 @@ struct ctl_arena_stats_s { uint64_t ndalloc_small; uint64_t nrequests_small; - malloc_bin_stats_t *bstats; /* nbins elements. */ + malloc_bin_stats_t bstats[NBINS]; malloc_large_stats_t *lstats; /* nlclasses elements. */ }; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 971336e..f43fcd2 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -229,33 +229,48 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); /* Size of stack-allocated buffer passed to buferror(). */ #define BUFERROR_BUF 64 -/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ -#ifdef __i386__ -# define LG_QUANTUM 4 -#endif -#ifdef __ia64__ -# define LG_QUANTUM 4 -#endif -#ifdef __alpha__ -# define LG_QUANTUM 4 -#endif -#ifdef __sparc64__ -# define LG_QUANTUM 4 -#endif -#if (defined(__amd64__) || defined(__x86_64__)) -# define LG_QUANTUM 4 -#endif -#ifdef __arm__ -# define LG_QUANTUM 3 -#endif -#ifdef __mips__ -# define LG_QUANTUM 3 -#endif -#ifdef __powerpc__ -# define LG_QUANTUM 4 -#endif -#ifdef __s390x__ -# define LG_QUANTUM 4 +/* Smallest size class to support. */ +#define LG_TINY_MIN 3 +#define TINY_MIN (1U << LG_TINY_MIN) + +/* + * Minimum alignment of allocations is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +#ifndef LG_QUANTUM +# ifdef __i386__ +# define LG_QUANTUM 4 +# endif +# ifdef __ia64__ +# define LG_QUANTUM 4 +# endif +# ifdef __alpha__ +# define LG_QUANTUM 4 +# endif +# ifdef __sparc64__ +# define LG_QUANTUM 4 +# endif +# if (defined(__amd64__) || defined(__x86_64__)) +# define LG_QUANTUM 4 +# endif +# ifdef __arm__ +# define LG_QUANTUM 3 +# endif +# ifdef __mips__ +# define LG_QUANTUM 3 +# endif +# ifdef __powerpc__ +# define LG_QUANTUM 4 +# endif +# ifdef __s390x__ +# define LG_QUANTUM 4 +# endif +# ifdef __tile__ +# define LG_QUANTUM 4 +# endif +# ifndef LG_QUANTUM +# error "No LG_QUANTUM definition for architecture; specify via CPPFLAGS" +# endif #endif #define QUANTUM ((size_t)(1U << LG_QUANTUM)) @@ -291,15 +306,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #define CACHELINE_CEILING(s) \ (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) -/* - * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If - * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where - * compile-time values are required for the purposes of defining data - * structures. - */ +/* Page size. STATIC_PAGE_SHIFT is determined by the configure script. */ #define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) #define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) - #ifdef PAGE_SHIFT # undef PAGE_SHIFT #endif @@ -309,16 +318,9 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #ifdef PAGE_MASK # undef PAGE_MASK #endif - -#ifdef DYNAMIC_PAGE_SHIFT -# define PAGE_SHIFT lg_pagesize -# define PAGE_SIZE pagesize -# define PAGE_MASK pagesize_mask -#else -# define PAGE_SHIFT STATIC_PAGE_SHIFT -# define PAGE_SIZE STATIC_PAGE_SIZE -# define PAGE_MASK STATIC_PAGE_MASK -#endif +#define PAGE_SHIFT STATIC_PAGE_SHIFT +#define PAGE_SIZE STATIC_PAGE_SIZE +#define PAGE_MASK STATIC_PAGE_MASK /* Return the smallest pagesize multiple that is >= s. */ #define PAGE_CEILING(s) \ @@ -327,6 +329,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -352,6 +355,7 @@ extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -455,6 +459,7 @@ void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -480,6 +485,7 @@ void jemalloc_postfork(void); #include "jemalloc/internal/atomic.h" #include "jemalloc/internal/prn.h" #include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/stats.h" #include "jemalloc/internal/ctl.h" #include "jemalloc/internal/mutex.h" @@ -525,7 +531,7 @@ JEMALLOC_INLINE size_t s2u(size_t size) { - if (size <= small_maxclass) + if (size <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); if (size <= arena_maxclass) return (PAGE_CEILING(size)); @@ -570,7 +576,7 @@ sa2u(size_t size, size_t alignment, size_t *run_size_p) } if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) + if (usize <= SMALL_MAXCLASS) return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); return (PAGE_CEILING(usize)); } else { diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h index dc9f2a5..3cfa787 100644 --- a/include/jemalloc/internal/mb.h +++ b/include/jemalloc/internal/mb.h @@ -54,7 +54,7 @@ mb_write(void) ); #endif } -#elif (defined(__amd64_) || defined(__x86_64__)) +#elif (defined(__amd64__) || defined(__x86_64__)) JEMALLOC_INLINE void mb_write(void) { @@ -87,6 +87,13 @@ mb_write(void) : "memory" /* Clobbers. */ ); } +#elif defined(__tile__) +JEMALLOC_INLINE void +mb_write(void) +{ + + __sync_synchronize(); +} #else /* * This is much slower than a simple memory barrier, but the semantics of mutex diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh new file mode 100755 index 0000000..d8306a5 --- /dev/null +++ b/include/jemalloc/internal/size_classes.sh @@ -0,0 +1,132 @@ +#!/bin/sh + +# The following limits are chosen such that they cover all supported platforms. + +# Range of quanta. +lg_qmin=3 +lg_qmax=4 + +# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)]. +lg_tmin=3 + +# Range of page sizes. +lg_pmin=12 +lg_pmax=16 + +function pow2() { + e=$1 + pow2_result=1 + while [ ${e} -gt 0 ] ; do + pow2_result=`expr ${pow2_result} + ${pow2_result}` + e=`expr ${e} - 1` + done +} + +cat <<EOF +/* This file was automatically generated by size_classes.sh. */ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +EOF + +lg_q=${lg_qmin} +while [ ${lg_q} -le ${lg_qmax} ] ; do + lg_t=${lg_tmin} + while [ ${lg_t} -le ${lg_q} ] ; do + lg_p=${lg_pmin} + while [ ${lg_p} -le ${lg_pmax} ] ; do + cat <<EOF +#if (LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && PAGE_SHIFT == ${lg_p}) +#define SIZE_CLASSES_DEFINED +EOF + pow2 ${lg_q}; q=${pow2_result} + pow2 ${lg_t}; t=${pow2_result} + pow2 ${lg_p}; p=${pow2_result} + bin=0 + psz=0 + sz=${t} + delta=`expr ${sz} - ${psz}` +cat <<EOF +/* SIZE_CLASS(bin, delta, sz) */ +#define SIZE_CLASSES \\ +EOF + + # Tiny size classes. + while [ ${sz} -lt ${q} ] ; do + cat <<EOF + SIZE_CLASS(${bin}, ${delta}, ${sz}) \\ +EOF + bin=`expr ${bin} + 1` + psz=${sz} + sz=`expr ${sz} + ${sz}` + delta=`expr ${sz} - ${psz}` + done + # Quantum-multiple size classes. For each doubling of sz, as many as 4 + # size classes exist. Their spacing is the greater of: + # - q + # - sz/4, where sz is a power of 2 + while [ ${sz} -lt ${p} ] ; do + if [ ${sz} -ge `expr ${q} \* 4` ] ; then + i=`expr ${sz} / 4` + else + i=${q} + fi + next_2pow=`expr ${sz} \* 2` + while [ ${sz} -lt $next_2pow ] ; do + cat <<EOF + SIZE_CLASS(${bin}, ${delta}, ${sz}) \\ +EOF + bin=`expr ${bin} + 1` + psz=${sz} + sz=`expr ${sz} + ${i}` + delta=`expr ${sz} - ${psz}` + done + done + cat <<EOF + +#define NBINS ${bin} +#define SMALL_MAXCLASS ${psz} +#endif + +EOF + lg_p=`expr ${lg_p} + 1` + done + lg_t=`expr ${lg_t} + 1` + done + lg_q=`expr ${lg_q} + 1` +done + +cat <<EOF +#ifndef SIZE_CLASSES_DEFINED +# error "No size class definitions match configuration" +#endif +#undef SIZE_CLASSES_DEFINED +/* + * The small_size2bin lookup table uses uint8_t to encode each bin index, so we + * cannot support more than 256 small size classes. Further constrain NBINS to + * 255 to support prof_promote, since all small size classes, plus a "not + * small" size class must be stored in 8 bits of arena_chunk_map_t's bits + * field. + */ +#if (NBINS > 255) +# error "Too many small size classes" +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +EOF diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h index 717682d..b964a12 100644 --- a/include/jemalloc/internal/tcache.h +++ b/include/jemalloc/internal/tcache.h @@ -91,7 +91,7 @@ extern __thread tcache_t *tcache_tls extern pthread_key_t tcache_tsd; /* - * Number of tcache bins. There are nbins small-object bins, plus 0 or more + * Number of tcache bins. There are NBINS small-object bins, plus 0 or more * large-object bins. */ extern size_t nhbins; @@ -181,7 +181,7 @@ tcache_event(tcache_t *tcache) * Flush (ceiling) 3/4 of the objects below the low * water mark. */ - if (binind < nbins) { + if (binind < NBINS) { tcache_bin_flush_small(tbin, binind, tbin->ncached - tbin->low_water + (tbin->low_water >> 2), tcache); @@ -238,7 +238,7 @@ tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) tcache_bin_t *tbin; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); if (ret == NULL) { @@ -275,7 +275,7 @@ tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) size = PAGE_CEILING(size); assert(size <= tcache_maxclass); - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> PAGE_SHIFT) - 1; assert(binind < nhbins); tbin = &tcache->tbins[binind]; ret = tcache_alloc_easy(tbin); @@ -328,7 +328,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) size_t pageind, binind; arena_chunk_map_t *mapelm; - assert(arena_salloc(ptr) <= small_maxclass); + assert(arena_salloc(ptr) <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; @@ -339,7 +339,7 @@ tcache_dalloc_small(tcache_t *tcache, void *ptr) bin = run->bin; binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / sizeof(arena_bin_t); - assert(binind < nbins); + assert(binind < NBINS); if (config_fill && opt_junk) memset(ptr, 0x5a, arena_bin_info[binind].reg_size); @@ -367,13 +367,13 @@ tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) tcache_bin_info_t *tbin_info; assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr) > small_maxclass); + assert(arena_salloc(ptr) > SMALL_MAXCLASS); assert(arena_salloc(ptr) <= tcache_maxclass); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); arena = chunk->arena; pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = nbins + (size >> PAGE_SHIFT) - 1; + binind = NBINS + (size >> PAGE_SHIFT) - 1; if (config_fill && opt_junk) memset(ptr, 0x5a, size); diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in index 66da6f3..53e8520 100644 --- a/include/jemalloc/jemalloc_defs.h.in +++ b/include/jemalloc/jemalloc_defs.h.in @@ -104,9 +104,6 @@ /* Support lazy locking (avoid locking unless a second thread is launched). */ #undef JEMALLOC_LAZY_LOCK -/* Determine page size at run time if defined. */ -#undef DYNAMIC_PAGE_SHIFT - /* One page is 2^STATIC_PAGE_SHIFT bytes. */ #undef STATIC_PAGE_SHIFT diff --git a/src/arena.c b/src/arena.c index 33f3f85..72b7f44 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,128 +4,38 @@ /******************************************************************************/ /* Data. */ -size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; -size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; -uint8_t const *small_size2bin; -arena_bin_info_t *arena_bin_info; - -/* Various bin-related settings. */ -unsigned nqbins; -unsigned ncbins; -unsigned nsbins; -unsigned nbins; -size_t qspace_max; -size_t cspace_min; -size_t cspace_max; -size_t sspace_min; -size_t sspace_max; - -size_t lg_mspace; -size_t mspace_mask; +arena_bin_info_t arena_bin_info[NBINS]; -/* - * const_small_size2bin is a static constant lookup table that in the common - * case can be used as-is for small_size2bin. - */ +JEMALLOC_ATTR(aligned(CACHELINE)) +const uint8_t small_size2bin[] = { #define S2B_8(i) i, #define S2B_16(i) S2B_8(i) S2B_8(i) #define S2B_32(i) S2B_16(i) S2B_16(i) #define S2B_64(i) S2B_32(i) S2B_32(i) #define S2B_128(i) S2B_64(i) S2B_64(i) #define S2B_256(i) S2B_128(i) S2B_128(i) -/* - * The number of elements in const_small_size2bin is dependent on the - * definition for SUBPAGE. - */ -static JEMALLOC_ATTR(aligned(CACHELINE)) - const uint8_t const_small_size2bin[] = { -#if (LG_QUANTUM == 4) -/* 16-byte quantum **********************/ - S2B_8(0) /* 8 */ - S2B_8(1) /* 16 */ -# define S2B_QMIN 1 - S2B_16(S2B_QMIN + 1) /* 32 */ - S2B_16(S2B_QMIN + 2) /* 48 */ - S2B_16(S2B_QMIN + 3) /* 64 */ - S2B_16(S2B_QMIN + 4) /* 80 */ - S2B_16(S2B_QMIN + 5) /* 96 */ - S2B_16(S2B_QMIN + 6) /* 112 */ - S2B_16(S2B_QMIN + 7) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 8) -#else -/* 8-byte quantum ***********************/ -# define S2B_QMIN 0 - S2B_8(S2B_QMIN + 0) /* 8 */ - S2B_8(S2B_QMIN + 1) /* 16 */ - S2B_8(S2B_QMIN + 2) /* 24 */ - S2B_8(S2B_QMIN + 3) /* 32 */ - S2B_8(S2B_QMIN + 4) /* 40 */ - S2B_8(S2B_QMIN + 5) /* 48 */ - S2B_8(S2B_QMIN + 6) /* 56 */ - S2B_8(S2B_QMIN + 7) /* 64 */ - S2B_8(S2B_QMIN + 8) /* 72 */ - S2B_8(S2B_QMIN + 9) /* 80 */ - S2B_8(S2B_QMIN + 10) /* 88 */ - S2B_8(S2B_QMIN + 11) /* 96 */ - S2B_8(S2B_QMIN + 12) /* 104 */ - S2B_8(S2B_QMIN + 13) /* 112 */ - S2B_8(S2B_QMIN + 14) /* 120 */ - S2B_8(S2B_QMIN + 15) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 16) -#endif -/****************************************/ - S2B_64(S2B_CMIN + 0) /* 192 */ - S2B_64(S2B_CMIN + 1) /* 256 */ - S2B_64(S2B_CMIN + 2) /* 320 */ - S2B_64(S2B_CMIN + 3) /* 384 */ - S2B_64(S2B_CMIN + 4) /* 448 */ - S2B_64(S2B_CMIN + 5) /* 512 */ -# define S2B_SMIN (S2B_CMIN + 6) - S2B_256(S2B_SMIN + 0) /* 768 */ - S2B_256(S2B_SMIN + 1) /* 1024 */ - S2B_256(S2B_SMIN + 2) /* 1280 */ - S2B_256(S2B_SMIN + 3) /* 1536 */ - S2B_256(S2B_SMIN + 4) /* 1792 */ - S2B_256(S2B_SMIN + 5) /* 2048 */ - S2B_256(S2B_SMIN + 6) /* 2304 */ - S2B_256(S2B_SMIN + 7) /* 2560 */ - S2B_256(S2B_SMIN + 8) /* 2816 */ - S2B_256(S2B_SMIN + 9) /* 3072 */ - S2B_256(S2B_SMIN + 10) /* 3328 */ - S2B_256(S2B_SMIN + 11) /* 3584 */ - S2B_256(S2B_SMIN + 12) /* 3840 */ -#if (STATIC_PAGE_SHIFT == 13) - S2B_256(S2B_SMIN + 13) /* 4096 */ - S2B_256(S2B_SMIN + 14) /* 4352 */ - S2B_256(S2B_SMIN + 15) /* 4608 */ - S2B_256(S2B_SMIN + 16) /* 4864 */ - S2B_256(S2B_SMIN + 17) /* 5120 */ - S2B_256(S2B_SMIN + 18) /* 5376 */ - S2B_256(S2B_SMIN + 19) /* 5632 */ - S2B_256(S2B_SMIN + 20) /* 5888 */ - S2B_256(S2B_SMIN + 21) /* 6144 */ - S2B_256(S2B_SMIN + 22) /* 6400 */ - S2B_256(S2B_SMIN + 23) /* 6656 */ - S2B_256(S2B_SMIN + 24) /* 6912 */ - S2B_256(S2B_SMIN + 25) /* 7168 */ - S2B_256(S2B_SMIN + 26) /* 7424 */ - S2B_256(S2B_SMIN + 27) /* 7680 */ - S2B_256(S2B_SMIN + 28) /* 7936 */ -#endif -}; -#undef S2B_1 -#undef S2B_2 -#undef S2B_4 +#define S2B_512(i) S2B_256(i) S2B_256(i) +#define S2B_1024(i) S2B_512(i) S2B_512(i) +#define S2B_2048(i) S2B_1024(i) S2B_1024(i) +#define S2B_4096(i) S2B_2048(i) S2B_2048(i) +#define S2B_8192(i) S2B_4096(i) S2B_4096(i) +#define SIZE_CLASS(bin, delta, size) \ + S2B_##delta(bin) + SIZE_CLASSES #undef S2B_8 #undef S2B_16 #undef S2B_32 #undef S2B_64 #undef S2B_128 #undef S2B_256 -#undef S2B_QMIN -#undef S2B_CMIN -#undef S2B_SMIN +#undef S2B_512 +#undef S2B_1024 +#undef S2B_2048 +#undef S2B_4096 +#undef S2B_8192 +#undef SIZE_CLASS +}; /******************************************************************************/ /* Function prototypes for non-inline static functions. */ @@ -160,12 +70,9 @@ static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -static bool small_size2bin_init(void); -static void small_size2bin_validate(void); -static bool small_size2bin_init_hard(void); static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size); -static bool bin_info_init(void); +static void bin_info_init(void); /******************************************************************************/ @@ -1368,7 +1275,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero) size_t binind; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); bin = &arena->bins[binind]; size = arena_bin_info[binind].reg_size; @@ -1553,12 +1460,12 @@ arena_prof_promoted(const void *ptr, size_t size) assert(ptr != NULL); assert(CHUNK_ADDR2BASE(ptr) != ptr); assert(isalloc(ptr) == PAGE_SIZE); - assert(size <= small_maxclass); + assert(size <= SMALL_MAXCLASS); chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); + assert(binind < NBINS); chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); } @@ -1594,7 +1501,7 @@ arena_salloc_demote(const void *ptr) CHUNK_MAP_CLASS_MASK) != 0) { size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < nbins); + assert(binind < NBINS); ret = arena_bin_info[binind].reg_size; } assert(ret != 0); @@ -1762,7 +1669,7 @@ arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, } malloc_mutex_unlock(&arena->lock); - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; malloc_mutex_lock(&bin->lock); @@ -1963,10 +1870,10 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, * Avoid moving the allocation if the size class can be left the same. */ if (oldsize <= arena_maxclass) { - if (oldsize <= small_maxclass) { + if (oldsize <= SMALL_MAXCLASS) { assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size == oldsize); - if ((size + extra <= small_maxclass && + if ((size + extra <= SMALL_MAXCLASS && SMALL_SIZE2BIN(size + extra) == SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && size + extra >= oldsize)) { @@ -1978,7 +1885,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, } } else { assert(size <= arena_maxclass); - if (size + extra > small_maxclass) { + if (size + extra > SMALL_MAXCLASS) { if (arena_ralloc_large(ptr, oldsize, size, extra, zero) == false) return (ptr); @@ -2083,7 +1990,7 @@ arena_new(arena_t *arena, unsigned ind) arena_avail_tree_new(&arena->runs_avail_dirty); /* Initialize bins. */ - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { bin = &arena->bins[i]; if (malloc_mutex_init(&bin->lock)) return (true); @@ -2096,119 +2003,6 @@ arena_new(arena_t *arena, unsigned ind) return (false); } -static void -small_size2bin_validate(void) -{ - size_t i, size, binind; - - i = 1; - /* Tiny. */ - for (; i < TINY_MIN; i++) { - size = TINY_MIN; - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Quantum-spaced. */ - for (; i <= qspace_max; i++) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i++) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Sub-page. */ - for (; i <= sspace_max; i++) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) - >> LG_SUBPAGE); - assert(SMALL_SIZE2BIN(i) == binind); - } -} - -static bool -small_size2bin_init(void) -{ - - if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)) - return (small_size2bin_init_hard()); - - small_size2bin = const_small_size2bin; - if (config_debug) - small_size2bin_validate(); - return (false); -} - -static bool -small_size2bin_init_hard(void) -{ - size_t i, size, binind; - uint8_t *custom_small_size2bin; -#define CUSTOM_SMALL_SIZE2BIN(s) \ - custom_small_size2bin[(s-1) >> LG_TINY_MIN] - - assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)); - - custom_small_size2bin = (uint8_t *) - base_alloc(small_maxclass >> LG_TINY_MIN); - if (custom_small_size2bin == NULL) - return (true); - - i = 1; - /* Tiny. */ - for (; i < TINY_MIN; i += TINY_MIN) { - size = TINY_MIN; - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Quantum-spaced. */ - for (; i <= qspace_max; i += TINY_MIN) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i += TINY_MIN) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Sub-page. */ - for (; i <= sspace_max; i += TINY_MIN) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> - LG_SUBPAGE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - - small_size2bin = custom_small_size2bin; - if (config_debug) - small_size2bin_validate(); - return (false); -#undef CUSTOM_SMALL_SIZE2BIN -} - /* * Calculate bin_info->run_size such that it meets the following constraints: * @@ -2330,104 +2124,27 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) return (good_run_size); } -static bool +static void bin_info_init(void) { arena_bin_info_t *bin_info; - unsigned i; - size_t prev_run_size; - - arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); - if (arena_bin_info == NULL) - return (true); - - prev_run_size = PAGE_SIZE; - i = 0; - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + - ncbins)) << LG_SUBPAGE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - return (false); + size_t prev_run_size = PAGE_SIZE; + +#define SIZE_CLASS(bin, delta, size) \ + bin_info = &arena_bin_info[bin]; \ + bin_info->reg_size = size; \ + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);\ + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + SIZE_CLASSES +#undef SIZE_CLASS } -bool +void arena_boot(void) { size_t header_size; unsigned i; - /* Set variables according to the value of opt_lg_[qc]space_max. */ - qspace_max = (1U << opt_lg_qspace_max); - cspace_min = CACHELINE_CEILING(qspace_max); - if (cspace_min == qspace_max) - cspace_min += CACHELINE; - cspace_max = (1U << opt_lg_cspace_max); - sspace_min = SUBPAGE_CEILING(cspace_max); - if (sspace_min == cspace_max) - sspace_min += SUBPAGE; - assert(sspace_min < PAGE_SIZE); - sspace_max = PAGE_SIZE - SUBPAGE; - - assert(LG_QUANTUM >= LG_TINY_MIN); - assert(ntbins <= LG_QUANTUM); - nqbins = qspace_max >> LG_QUANTUM; - ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; - nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1; - nbins = ntbins + nqbins + ncbins + nsbins; - - /* - * The small_size2bin lookup table uses uint8_t to encode each bin - * index, so we cannot support more than 256 small size classes. This - * limit is difficult to exceed (not even possible with 16B quantum and - * 4KiB pages), and such configurations are impractical, but - * nonetheless we need to protect against this case in order to avoid - * undefined behavior. - * - * Further constrain nbins to 255 if prof_promote is true, since all - * small size classes, plus a "not small" size class must be stored in - * 8 bits of arena_chunk_map_t's bits field. - */ - if (config_prof && opt_prof && prof_promote && nbins > 255) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write("<jemalloc>: Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 255)\n"); - abort(); - } else if (nbins > 256) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write("<jemalloc>: Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 256)\n"); - abort(); - } - /* * Compute the header size such that it is large enough to contain the * page map. The page map is biased to omit entries for the header @@ -2451,11 +2168,5 @@ arena_boot(void) arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); - if (small_size2bin_init()) - return (true); - - if (bin_info_init()) - return (true); - - return (false); + bin_info_init(); } @@ -47,7 +47,6 @@ CTL_PROTO(thread_deallocated) CTL_PROTO(thread_deallocatedp) CTL_PROTO(config_debug) CTL_PROTO(config_dss) -CTL_PROTO(config_dynamic_page_shift) CTL_PROTO(config_fill) CTL_PROTO(config_lazy_lock) CTL_PROTO(config_prof) @@ -59,8 +58,6 @@ CTL_PROTO(config_tcache) CTL_PROTO(config_tls) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) -CTL_PROTO(opt_lg_qspace_max) -CTL_PROTO(opt_lg_cspace_max) CTL_PROTO(opt_lg_chunk) CTL_PROTO(opt_narenas) CTL_PROTO(opt_lg_dirty_mult) @@ -88,23 +85,9 @@ INDEX_PROTO(arenas_lrun_i) CTL_PROTO(arenas_narenas) CTL_PROTO(arenas_initialized) CTL_PROTO(arenas_quantum) -CTL_PROTO(arenas_cacheline) -CTL_PROTO(arenas_subpage) CTL_PROTO(arenas_pagesize) CTL_PROTO(arenas_chunksize) -CTL_PROTO(arenas_tspace_min) -CTL_PROTO(arenas_tspace_max) -CTL_PROTO(arenas_qspace_min) -CTL_PROTO(arenas_qspace_max) -CTL_PROTO(arenas_cspace_min) -CTL_PROTO(arenas_cspace_max) -CTL_PROTO(arenas_sspace_min) -CTL_PROTO(arenas_sspace_max) CTL_PROTO(arenas_tcache_max) -CTL_PROTO(arenas_ntbins) -CTL_PROTO(arenas_nqbins) -CTL_PROTO(arenas_ncbins) -CTL_PROTO(arenas_nsbins) CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlruns) @@ -185,7 +168,6 @@ static const ctl_node_t thread_node[] = { static const ctl_node_t config_node[] = { {NAME("debug"), CTL(config_debug)}, {NAME("dss"), CTL(config_dss)}, - {NAME("dynamic_page_shift"), CTL(config_dynamic_page_shift)}, {NAME("fill"), CTL(config_fill)}, {NAME("lazy_lock"), CTL(config_lazy_lock)}, {NAME("prof"), CTL(config_prof)}, @@ -200,8 +182,6 @@ static const ctl_node_t config_node[] = { static const ctl_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, - {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, - {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, {NAME("lg_chunk"), CTL(opt_lg_chunk)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, @@ -250,23 +230,9 @@ static const ctl_node_t arenas_node[] = { {NAME("narenas"), CTL(arenas_narenas)}, {NAME("initialized"), CTL(arenas_initialized)}, {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("cacheline"), CTL(arenas_cacheline)}, - {NAME("subpage"), CTL(arenas_subpage)}, {NAME("pagesize"), CTL(arenas_pagesize)}, {NAME("chunksize"), CTL(arenas_chunksize)}, - {NAME("tspace_min"), CTL(arenas_tspace_min)}, - {NAME("tspace_max"), CTL(arenas_tspace_max)}, - {NAME("qspace_min"), CTL(arenas_qspace_min)}, - {NAME("qspace_max"), CTL(arenas_qspace_max)}, - {NAME("cspace_min"), CTL(arenas_cspace_min)}, - {NAME("cspace_max"), CTL(arenas_cspace_max)}, - {NAME("sspace_min"), CTL(arenas_sspace_min)}, - {NAME("sspace_max"), CTL(arenas_sspace_max)}, {NAME("tcache_max"), CTL(arenas_tcache_max)}, - {NAME("ntbins"), CTL(arenas_ntbins)}, - {NAME("nqbins"), CTL(arenas_nqbins)}, - {NAME("ncbins"), CTL(arenas_ncbins)}, - {NAME("nsbins"), CTL(arenas_nsbins)}, {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)}, {NAME("bin"), CHILD(arenas_bin)}, @@ -397,12 +363,6 @@ static bool ctl_arena_init(ctl_arena_stats_t *astats) { - if (astats->bstats == NULL) { - astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins * - sizeof(malloc_bin_stats_t)); - if (astats->bstats == NULL) - return (true); - } if (astats->lstats == NULL) { astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * sizeof(malloc_large_stats_t)); @@ -425,7 +385,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats) astats->nmalloc_small = 0; astats->ndalloc_small = 0; astats->nrequests_small = 0; - memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); + memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t)); memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); } @@ -439,7 +399,7 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats, cstats->lstats); - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { cstats->allocated_small += cstats->bstats[i].allocated; cstats->nmalloc_small += cstats->bstats[i].nmalloc; cstats->ndalloc_small += cstats->bstats[i].ndalloc; @@ -477,7 +437,7 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) sstats->lstats[i].curruns += astats->lstats[i].curruns; } - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { sstats->bstats[i].allocated += astats->bstats[i].allocated; sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; @@ -1092,7 +1052,6 @@ CTL_RO_NL_CGEN(config_stats, thread_deallocatedp, DEALLOCATEDP_GET(), CTL_RO_BOOL_CONFIG_GEN(config_debug) CTL_RO_BOOL_CONFIG_GEN(config_dss) -CTL_RO_BOOL_CONFIG_GEN(config_dynamic_page_shift) CTL_RO_BOOL_CONFIG_GEN(config_fill) CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock) CTL_RO_BOOL_CONFIG_GEN(config_prof) @@ -1107,8 +1066,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_xmalloc) /******************************************************************************/ CTL_RO_NL_GEN(opt_abort, opt_abort, bool) -CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) -CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) @@ -1138,7 +1095,7 @@ const ctl_node_t * arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) { - if (i > nbins) + if (i > NBINS) return (NULL); return (super_arenas_bin_i_node); } @@ -1182,24 +1139,10 @@ RETURN: } CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) -CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t) -CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) -CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) -CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) -CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) -CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) -CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) -CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t) -CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) -CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) -CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) -CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) -CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) +CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned) CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) @@ -1346,7 +1289,7 @@ const ctl_node_t * stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) { - if (j > nbins) + if (j > NBINS) return (NULL); return (super_stats_arenas_i_bins_j_node); } diff --git a/src/jemalloc.c b/src/jemalloc.c index 81829fe..08e5f31 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -95,9 +95,7 @@ arenas_extend(unsigned ind) { arena_t *ret; - /* Allocate enough space for trailing bins. */ - ret = (arena_t *)base_alloc(offsetof(arena_t, bins) - + (sizeof(arena_bin_t) * nbins)); + ret = (arena_t *)base_alloc(sizeof(arena_t)); if (ret != NULL && arena_new(ret, ind) == false) { arenas[ind] = ret; return (ret); @@ -563,10 +561,6 @@ malloc_conf_init(void) } CONF_HANDLE_BOOL(abort) - CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, - PAGE_SHIFT-1) - CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, - PAGE_SHIFT-1) /* * Chunks always require at least one * header page, * plus one data page. @@ -613,14 +607,6 @@ malloc_conf_init(void) #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P } - - /* Validate configuration of options that are inter-related. */ - if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { - malloc_write("<jemalloc>: Invalid lg_[qc]space_max " - "relationship; restoring defaults\n"); - opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; - opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; - } } } @@ -709,10 +695,7 @@ malloc_init_hard(void) if (config_prof) prof_boot1(); - if (arena_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } + arena_boot(); if (config_tcache && tcache_boot()) { malloc_mutex_unlock(&init_lock); @@ -893,8 +876,8 @@ JEMALLOC_P(malloc)(size_t size) goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); if (ret != NULL) arena_prof_promoted(ret, usize); } else @@ -992,10 +975,10 @@ imemalign(void **memptr, size_t alignment, size_t size) ret = EINVAL; } else { if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= small_maxclass) { - assert(sa2u(small_maxclass+1, + (uintptr_t)1U && usize <= SMALL_MAXCLASS) { + assert(sa2u(SMALL_MAXCLASS+1, alignment, NULL) != 0); - result = ipalloc(sa2u(small_maxclass+1, + result = ipalloc(sa2u(SMALL_MAXCLASS+1, alignment, NULL), alignment, false); if (result != NULL) { arena_prof_promoted(result, @@ -1091,8 +1074,8 @@ JEMALLOC_P(calloc)(size_t num, size_t size) goto RETURN; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize - <= small_maxclass) { - ret = icalloc(small_maxclass+1); + <= SMALL_MAXCLASS) { + ret = icalloc(SMALL_MAXCLASS+1); if (ret != NULL) arena_prof_promoted(ret, usize); } else @@ -1177,8 +1160,8 @@ JEMALLOC_P(realloc)(void *ptr, size_t size) goto OOM; } if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - usize <= small_maxclass) { - ret = iralloc(ptr, small_maxclass+1, 0, 0, + usize <= SMALL_MAXCLASS) { + ret = iralloc(ptr, SMALL_MAXCLASS+1, 0, 0, false, false); if (ret != NULL) arena_prof_promoted(ret, usize); @@ -1220,8 +1203,8 @@ OOM: else { if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); + SMALL_MAXCLASS) { + ret = imalloc(SMALL_MAXCLASS+1); if (ret != NULL) { arena_prof_promoted(ret, usize); @@ -1436,9 +1419,9 @@ JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) if (cnt == NULL) goto OOM; if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { + SMALL_MAXCLASS) { size_t usize_promoted = (alignment == 0) ? - s2u(small_maxclass+1) : sa2u(small_maxclass+1, + s2u(SMALL_MAXCLASS+1) : sa2u(SMALL_MAXCLASS+1, alignment, NULL); assert(usize_promoted != 0); p = iallocm(usize_promoted, alignment, zero); @@ -1517,9 +1500,9 @@ JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, */ if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && ((alignment == 0) ? s2u(size) : sa2u(size, - alignment, NULL)) <= small_maxclass) { - q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= - size+extra) ? 0 : size+extra - (small_maxclass+1), + alignment, NULL)) <= SMALL_MAXCLASS) { + q = iralloc(p, SMALL_MAXCLASS+1, (SMALL_MAXCLASS+1 >= + size+extra) ? 0 : size+extra - (SMALL_MAXCLASS+1), alignment, zero, no_move); if (q == NULL) goto ERR; diff --git a/src/stats.c b/src/stats.c index 6d9ba9d..e4500df 100644 --- a/src/stats.c +++ b/src/stats.c @@ -159,12 +159,12 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("config.tcache", &config_tcache, bool); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" + "bins: bin size regs pgs allocated nmalloc" " ndalloc nrequests nfills nflushes" " newruns reruns curruns\n"); } else { malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" + "bins: bin size regs pgs allocated nmalloc" " ndalloc newruns reruns curruns\n"); } CTL_GET("arenas.nbins", &nbins, unsigned); @@ -176,7 +176,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, if (gap_start == UINT_MAX) gap_start = j; } else { - unsigned ntbins_, nqbins, ncbins, nsbins; size_t reg_size, run_size, allocated; uint32_t nregs; uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; @@ -196,10 +195,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, } gap_start = UINT_MAX; } - CTL_GET("arenas.ntbins", &ntbins_, unsigned); - CTL_GET("arenas.nqbins", &nqbins, unsigned); - CTL_GET("arenas.ncbins", &ncbins, unsigned); - CTL_GET("arenas.nsbins", &nsbins, unsigned); CTL_J_GET("arenas.bin.0.size", ®_size, size_t); CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); @@ -223,27 +218,19 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, size_t); if (config_tcache) { malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nrequests, nfills, nflushes, nruns, reruns, curruns); } else { malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + "%13u %5zu %4u %3zu %12zu %12"PRIu64 " %12"PRIu64" %12"PRIu64" %12"PRIu64 " %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, + j, reg_size, nregs, run_size / pagesize, allocated, nmalloc, ndalloc, nruns, reruns, curruns); } @@ -496,8 +483,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, "Run-time option settings:\n"); OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_qspace_max) - OPT_WRITE_SIZE_T(lg_cspace_max) OPT_WRITE_SIZE_T(lg_chunk) OPT_WRITE_SIZE_T(narenas) OPT_WRITE_SSIZE_T(lg_dirty_mult) @@ -541,51 +526,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, write_cb(cbopaque, u2s(sv, 10, s)); write_cb(cbopaque, "\n"); - CTL_GET("arenas.cacheline", &sv, size_t); - write_cb(cbopaque, "Cacheline size (assumed): "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("arenas.subpage", &sv, size_t); - write_cb(cbopaque, "Subpage spacing: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, - NULL, 0)) == 0) { - write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - - CTL_GET("arenas.tspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - } - - CTL_GET("arenas.qspace_min", &sv, size_t); - write_cb(cbopaque, "Quantum-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.qspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.cspace_min", &sv, size_t); - write_cb(cbopaque, "Cacheline-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.cspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.sspace_min", &sv, size_t); - write_cb(cbopaque, "Subpage-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.sspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); if (ssv >= 0) { write_cb(cbopaque, diff --git a/src/tcache.c b/src/tcache.c index 4f4ed6c..fa05728 100644 --- a/src/tcache.c +++ b/src/tcache.c @@ -55,7 +55,7 @@ tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem, unsigned i, nflush, ndeferred; bool merged_stats = false; - assert(binind < nbins); + assert(binind < NBINS); assert(rem <= tbin->ncached); for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { @@ -152,7 +152,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, merged_stats = true; arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += + arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; } @@ -185,7 +185,7 @@ tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem, arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += + arena->stats.lstats[binind - NBINS].nrequests += tbin->tstats.nrequests; tbin->tstats.nrequests = 0; malloc_mutex_unlock(&arena->lock); @@ -220,7 +220,7 @@ tcache_create(arena_t *arena) */ size = (size + CACHELINE_MASK) & (-CACHELINE); - if (size <= small_maxclass) + if (size <= SMALL_MAXCLASS) tcache = (tcache_t *)arena_malloc_small(arena, size, true); else if (size <= tcache_maxclass) tcache = (tcache_t *)arena_malloc_large(arena, size, true); @@ -266,7 +266,7 @@ tcache_destroy(tcache_t *tcache) tcache_stats_merge(tcache, tcache->arena); } - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { tcache_bin_t *tbin = &tcache->tbins[i]; tcache_bin_flush_small(tbin, i, 0, tcache); @@ -287,7 +287,7 @@ tcache_destroy(tcache_t *tcache) arena_t *arena = tcache->arena; malloc_mutex_lock(&arena->lock); arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[i - nbins].nrequests += + arena->stats.lstats[i - NBINS].nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(&arena->lock); } @@ -300,7 +300,7 @@ tcache_destroy(tcache_t *tcache) } tcache_size = arena_salloc(tcache); - if (tcache_size <= small_maxclass) { + if (tcache_size <= SMALL_MAXCLASS) { arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); arena_t *arena = chunk->arena; size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> @@ -357,7 +357,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) unsigned i; /* Merge and reset tcache stats. */ - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { arena_bin_t *bin = &arena->bins[i]; tcache_bin_t *tbin = &tcache->tbins[i]; malloc_mutex_lock(&bin->lock); @@ -367,7 +367,7 @@ tcache_stats_merge(tcache_t *tcache, arena_t *arena) } for (; i < nhbins; i++) { - malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins]; + malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS]; tcache_bin_t *tbin = &tcache->tbins[i]; arena->stats.nrequests_large += tbin->tstats.nrequests; lstats->nrequests += tbin->tstats.nrequests; @@ -384,17 +384,18 @@ tcache_boot(void) /* * If necessary, clamp opt_lg_tcache_max, now that - * small_maxclass and arena_maxclass are known. + * SMALL_MAXCLASS and arena_maxclass are known. + * XXX Can this be done earlier? */ if (opt_lg_tcache_max < 0 || (1U << - opt_lg_tcache_max) < small_maxclass) - tcache_maxclass = small_maxclass; + opt_lg_tcache_max) < SMALL_MAXCLASS) + tcache_maxclass = SMALL_MAXCLASS; else if ((1U << opt_lg_tcache_max) > arena_maxclass) tcache_maxclass = arena_maxclass; else tcache_maxclass = (1U << opt_lg_tcache_max); - nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); + nhbins = NBINS + (tcache_maxclass >> PAGE_SHIFT); /* Initialize tcache_bin_info. */ tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * @@ -402,7 +403,7 @@ tcache_boot(void) if (tcache_bin_info == NULL) return (true); stack_nelms = 0; - for (i = 0; i < nbins; i++) { + for (i = 0; i < NBINS; i++) { if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { tcache_bin_info[i].ncached_max = @@ -421,7 +422,7 @@ tcache_boot(void) /* Compute incremental GC event threshold. */ if (opt_lg_tcache_gc_sweep >= 0) { tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / - nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == + NBINS) + (((1U << opt_lg_tcache_gc_sweep) % NBINS == 0) ? 0 : 1); } else tcache_gc_incr = 0; |