From ef8897b4b938111fcc9b54725067f1dbb33a4c20 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 13 Feb 2012 14:30:52 -0800
Subject: Make 8-byte tiny size class non-optional.

When tiny size class support was first added, it was intended to support
truly tiny size classes (even 2 bytes).  However, this wasn't very
useful in practice, so the minimum tiny size class has been limited to
sizeof(void *) for a long time now.  This is too small to be standards
compliant, but other commonly used malloc implementations do not even
bother using a 16-byte quantum  on systems with vector units (SSE2+,
AltiVEC, etc.).  As such, it is safe in practice to support an 8-byte
tiny size class on 64-bit systems that support 16-byte types.
---
 INSTALL                                          |   7 --
 configure.ac                                     |  17 ----
 doc/jemalloc.xml.in                              |  23 ++----
 include/jemalloc/internal/arena.h                |  19 ++---
 include/jemalloc/internal/jemalloc_internal.h.in |   7 --
 include/jemalloc/jemalloc_defs.h.in              |   6 --
 src/arena.c                                      | 101 +++++++----------------
 src/ctl.c                                        |   9 +-
 8 files changed, 44 insertions(+), 145 deletions(-)

diff --git a/INSTALL b/INSTALL
index a210ec5..9124ac3 100644
--- a/INSTALL
+++ b/INSTALL
@@ -90,13 +90,6 @@ any of the following arguments (not a definitive list) to 'configure':
     Statically link against the specified libunwind.a rather than dynamically
     linking with -lunwind.
 
---disable-tiny
-    Disable tiny (sub-quantum-sized) object support.  Technically it is not
-    legal for a malloc implementation to allocate objects with less than
-    quantum alignment (8 or 16 bytes, depending on architecture), but in
-    practice it never causes any problems if, for example, 4-byte allocations
-    are 4-byte-aligned.
-
 --disable-tcache
     Disable thread-specific caches for small objects.  Objects are cached and
     released in bulk, thus reducing the total number of mutex operations.  See
diff --git a/configure.ac b/configure.ac
index e818f65..fdbf1ba 100644
--- a/configure.ac
+++ b/configure.ac
@@ -560,22 +560,6 @@ if test "x$enable_prof" = "x1" ; then
 fi
 AC_SUBST([enable_prof])
 
-dnl Enable tiny allocations by default.
-AC_ARG_ENABLE([tiny],
-  [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])],
-[if test "x$enable_tiny" = "xno" ; then
-  enable_tiny="0"
-else
-  enable_tiny="1"
-fi
-],
-[enable_tiny="1"]
-)
-if test "x$enable_tiny" = "x1" ; then
-  AC_DEFINE([JEMALLOC_TINY], [ ])
-fi
-AC_SUBST([enable_tiny])
-
 dnl Enable thread-specific caching by default.
 AC_ARG_ENABLE([tcache],
   [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
@@ -934,7 +918,6 @@ AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
-AC_MSG_RESULT([tiny               : ${enable_tiny}])
 AC_MSG_RESULT([tcache             : ${enable_tcache}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index dc11642..f9f1475 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -455,13 +455,12 @@ for (i = 0; i < nbins; i++) {
     allocations in constant time.</para>
 
     <para>Small objects are managed in groups by page runs.  Each run maintains
-    a frontier and free list to track which regions are in use.  Unless
-    <option>--disable-tiny</option> is specified during configuration,
-    allocation requests that are no more than half the quantum (8 or 16,
-    depending on architecture) are rounded up to the nearest power of two that
-    is at least <code language="C">sizeof(<type>void *</type>)</code>.
-    Allocation requests that are more than half the quantum, but no more than
-    the minimum cacheline-multiple size class (see the <link
+    a frontier and free list to track which regions are in use.  Allocation
+    requests that are no more than half the quantum (8 or 16, depending on
+    architecture) are rounded up to the nearest power of two that is at least
+    <code language="C">sizeof(<type>void *</type>)</code>.  Allocation requests
+    that are more than half the quantum, but no more than the minimum
+    cacheline-multiple size class (see the <link
     linkend="opt.lg_qspace_max"><mallctl>opt.lg_qspace_max</mallctl></link>
     option) are rounded up to the nearest multiple of the quantum.  Allocation
     requests that are more than the minimum cacheline-multiple size class, but
@@ -682,16 +681,6 @@ for (i = 0; i < nbins; i++) {
 
       <varlistentry>
         <term>
-          <mallctl>config.tiny</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--disable-tiny</option> was not specified
-        during build configuration.</para></listitem>
-      </varlistentry>
-
-      <varlistentry>
-        <term>
           <mallctl>config.tls</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index b8de12b..cacb03f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -17,7 +17,7 @@
 	(((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
 
 /* Smallest size class to support. */
-#define	LG_TINY_MIN		LG_SIZEOF_PTR
+#define	LG_TINY_MIN		3
 #define	TINY_MIN		(1U << LG_TINY_MIN)
 
 /*
@@ -418,18 +418,13 @@ extern uint8_t const	*small_size2bin;
 extern arena_bin_info_t	*arena_bin_info;
 
 /* Various bin-related settings. */
-#ifdef JEMALLOC_TINY		/* Number of (2^n)-spaced tiny bins. */
-#  define		ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
-#else
-#  define		ntbins	0
-#endif
+				/* Number of (2^n)-spaced tiny bins. */
+#define			ntbins	((unsigned)(LG_QUANTUM - LG_TINY_MIN))
 extern unsigned		nqbins; /* Number of quantum-spaced bins. */
 extern unsigned		ncbins; /* Number of cacheline-spaced bins. */
 extern unsigned		nsbins; /* Number of subpage-spaced bins. */
 extern unsigned		nbins;
-#ifdef JEMALLOC_TINY
-#  define		tspace_max	((size_t)(QUANTUM >> 1))
-#endif
+#define			tspace_max	((size_t)(QUANTUM >> 1))
 #define			qspace_min	QUANTUM
 extern size_t		qspace_max;
 extern size_t		cspace_min;
@@ -633,18 +628,18 @@ arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx)
 JEMALLOC_INLINE void *
 arena_malloc(size_t size, bool zero)
 {
-	tcache_t *tcache = tcache_get();
+	tcache_t *tcache;
 
 	assert(size != 0);
 	assert(QUANTUM_CEILING(size) <= arena_maxclass);
 
 	if (size <= small_maxclass) {
-		if (tcache != NULL)
+		if ((tcache = tcache_get()) != NULL)
 			return (tcache_alloc_small(tcache, size, zero));
 		else
 			return (arena_malloc_small(choose_arena(), size, zero));
 	} else {
-		if (tcache != NULL && size <= tcache_maxclass)
+		if (size <= tcache_maxclass && (tcache = tcache_get()) != NULL)
 			return (tcache_alloc_large(tcache, size, zero));
 		else
 			return (arena_malloc_large(choose_arena(), size, zero));
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 4441537..971336e 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -118,13 +118,6 @@ static const bool config_tcache =
     false
 #endif
     ;
-static const bool config_tiny =
-#ifdef JEMALLOC_TINY
-    true
-#else
-    false
-#endif
-    ;
 static const bool config_tls =
 #ifdef JEMALLOC_TLS
     true
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index f78028b..66da6f3 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -80,12 +80,6 @@
 #undef JEMALLOC_PROF_GCC
 
 /*
- * JEMALLOC_TINY enables support for tiny objects, which are smaller than one
- * quantum.
- */
-#undef JEMALLOC_TINY
-
-/*
  * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
  * This makes it possible to allocate/deallocate objects without any locking
  * when the cache is in the steady state.
diff --git a/src/arena.c b/src/arena.c
index 8a158df..32afd0c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -28,14 +28,7 @@ size_t		mspace_mask;
  * const_small_size2bin is a static constant lookup table that in the common
  * case can be used as-is for small_size2bin.
  */
-#if (LG_TINY_MIN == 2)
-#define	S2B_4(i)	i,
-#define	S2B_8(i)	S2B_4(i) S2B_4(i)
-#elif (LG_TINY_MIN == 3)
 #define	S2B_8(i)	i,
-#else
-#  error "Unsupported LG_TINY_MIN"
-#endif
 #define	S2B_16(i)	S2B_8(i) S2B_8(i)
 #define	S2B_32(i)	S2B_16(i) S2B_16(i)
 #define	S2B_64(i)	S2B_32(i) S2B_32(i)
@@ -49,23 +42,9 @@ static JEMALLOC_ATTR(aligned(CACHELINE))
     const uint8_t	const_small_size2bin[] = {
 #if (LG_QUANTUM == 4)
 /* 16-byte quantum **********************/
-#  ifdef JEMALLOC_TINY
-#    if (LG_TINY_MIN == 2)
-       S2B_4(0)			/*    4 */
-       S2B_4(1)			/*    8 */
-       S2B_8(2)			/*   16 */
-#      define S2B_QMIN 2
-#    elif (LG_TINY_MIN == 3)
-       S2B_8(0)			/*    8 */
-       S2B_8(1)			/*   16 */
-#      define S2B_QMIN 1
-#    else
-#      error "Unsupported LG_TINY_MIN"
-#    endif
-#  else
-	S2B_16(0)		/*   16 */
-#    define S2B_QMIN 0
-#  endif
+	S2B_8(0)		/*    8 */
+	S2B_8(1)		/*   16 */
+#  define S2B_QMIN 1
 	S2B_16(S2B_QMIN + 1)	/*   32 */
 	S2B_16(S2B_QMIN + 2)	/*   48 */
 	S2B_16(S2B_QMIN + 3)	/*   64 */
@@ -76,18 +55,8 @@ static JEMALLOC_ATTR(aligned(CACHELINE))
 #  define S2B_CMIN (S2B_QMIN + 8)
 #else
 /* 8-byte quantum ***********************/
-#  ifdef JEMALLOC_TINY
-#    if (LG_TINY_MIN == 2)
-       S2B_4(0)			/*    4 */
-       S2B_4(1)			/*    8 */
-#      define S2B_QMIN 1
-#    else
-#      error "Unsupported LG_TINY_MIN"
-#    endif
-#  else
-	S2B_8(0)		/*    8 */
-#    define S2B_QMIN 0
-#  endif
+#  define S2B_QMIN 0
+	S2B_8(S2B_QMIN + 0)	/*    8 */
 	S2B_8(S2B_QMIN + 1)	/*   16 */
 	S2B_8(S2B_QMIN + 2)	/*   24 */
 	S2B_8(S2B_QMIN + 3)	/*   32 */
@@ -2153,17 +2122,15 @@ small_size2bin_validate(void)
 
 	i = 1;
 	/* Tiny. */
-	if (config_tiny) {
-		for (; i < (1U << LG_TINY_MIN); i++) {
-			size = pow2_ceil(1U << LG_TINY_MIN);
-			binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-			assert(SMALL_SIZE2BIN(i) == binind);
-		}
-		for (; i < qspace_min; i++) {
-			size = pow2_ceil(i);
-			binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-			assert(SMALL_SIZE2BIN(i) == binind);
-		}
+	for (; i < TINY_MIN; i++) {
+		size = TINY_MIN;
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		assert(SMALL_SIZE2BIN(i) == binind);
+	}
+	for (; i < qspace_min; i++) {
+		size = pow2_ceil(i);
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		assert(SMALL_SIZE2BIN(i) == binind);
 	}
 	/* Quantum-spaced. */
 	for (; i <= qspace_max; i++) {
@@ -2223,17 +2190,15 @@ small_size2bin_init_hard(void)
 
 	i = 1;
 	/* Tiny. */
-	if (config_tiny) {
-		for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) {
-			size = pow2_ceil(1U << LG_TINY_MIN);
-			binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-			CUSTOM_SMALL_SIZE2BIN(i) = binind;
-		}
-		for (; i < qspace_min; i += TINY_MIN) {
-			size = pow2_ceil(i);
-			binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
-			CUSTOM_SMALL_SIZE2BIN(i) = binind;
-		}
+	for (; i < TINY_MIN; i += TINY_MIN) {
+		size = TINY_MIN;
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		CUSTOM_SMALL_SIZE2BIN(i) = binind;
+	}
+	for (; i < qspace_min; i += TINY_MIN) {
+		size = pow2_ceil(i);
+		binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
+		CUSTOM_SMALL_SIZE2BIN(i) = binind;
 	}
 	/* Quantum-spaced. */
 	for (; i <= qspace_max; i += TINY_MIN) {
@@ -2398,17 +2363,12 @@ bin_info_init(void)
 	prev_run_size = PAGE_SIZE;
 	i = 0;
 	/* (2^n)-spaced tiny bins. */
-	if (config_tiny) {
-		for (; i < ntbins; i++) {
-			bin_info = &arena_bin_info[i];
-			bin_info->reg_size = (1U << (LG_TINY_MIN + i));
-			prev_run_size = bin_info_run_size_calc(bin_info,
-			    prev_run_size);
-			bitmap_info_init(&bin_info->bitmap_info,
-			    bin_info->nregs);
-		}
+	for (; i < ntbins; i++) {
+		bin_info = &arena_bin_info[i];
+		bin_info->reg_size = (1U << (LG_TINY_MIN + i));
+		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
+		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 	}
-
 	/* Quantum-spaced bins. */
 	for (; i < ntbins + nqbins; i++) {
 		bin_info = &arena_bin_info[i];
@@ -2416,7 +2376,6 @@ bin_info_init(void)
 		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
 		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 	}
-
 	/* Cacheline-spaced bins. */
 	for (; i < ntbins + nqbins + ncbins; i++) {
 		bin_info = &arena_bin_info[i];
@@ -2425,7 +2384,6 @@ bin_info_init(void)
 		prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size);
 		bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 	}
-
 	/* Subpage-spaced bins. */
 	for (; i < nbins; i++) {
 		bin_info = &arena_bin_info[i];
@@ -2456,8 +2414,7 @@ arena_boot(void)
 	assert(sspace_min < PAGE_SIZE);
 	sspace_max = PAGE_SIZE - SUBPAGE;
 
-	if (config_tiny)
-		assert(LG_QUANTUM >= LG_TINY_MIN);
+	assert(LG_QUANTUM >= LG_TINY_MIN);
 	assert(ntbins <= LG_QUANTUM);
 	nqbins = qspace_max >> LG_QUANTUM;
 	ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
diff --git a/src/ctl.c b/src/ctl.c
index 2ac2f66..6d0423f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -56,7 +56,6 @@ CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_sysv)
 CTL_PROTO(config_tcache)
-CTL_PROTO(config_tiny)
 CTL_PROTO(config_tls)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
@@ -199,7 +198,6 @@ static const ctl_node_t	config_node[] = {
 	{NAME("stats"),			CTL(config_stats)},
 	{NAME("sysv"),			CTL(config_sysv)},
 	{NAME("tcache"),		CTL(config_tcache)},
-	{NAME("tiny"),			CTL(config_tiny)},
 	{NAME("tls"),			CTL(config_tls)},
 	{NAME("xmalloc"),		CTL(config_xmalloc)}
 };
@@ -993,8 +991,6 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	int ret;							\
 	bool oldval;							\
 									\
-	if (n == false)							\
-		return (ENOENT);					\
 	READONLY();							\
 	oldval = n;							\
 	READ(oldval, bool);						\
@@ -1115,7 +1111,6 @@ CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind)
 CTL_RO_BOOL_CONFIG_GEN(config_stats)
 CTL_RO_BOOL_CONFIG_GEN(config_sysv)
 CTL_RO_BOOL_CONFIG_GEN(config_tcache)
-CTL_RO_BOOL_CONFIG_GEN(config_tiny)
 CTL_RO_BOOL_CONFIG_GEN(config_tls)
 CTL_RO_BOOL_CONFIG_GEN(config_xmalloc)
 
@@ -1203,8 +1198,8 @@ CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t)
 CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t)
 CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t)
 CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t)
-CTL_RO_NL_CGEN(config_tiny, arenas_tspace_min, (1U << LG_TINY_MIN), size_t)
-CTL_RO_NL_CGEN(config_tiny, arenas_tspace_max, (qspace_min >> 1), size_t)
+CTL_RO_NL_GEN(arenas_tspace_min, TINY_MIN, size_t)
+CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t)
 CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t)
 CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t)
 CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t)
-- 
cgit v0.12