summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQi Wang <interwq@gwu.edu>2017-08-24 21:29:28 (GMT)
committerQi Wang <interwq@gmail.com>2017-08-30 23:47:32 (GMT)
commit47b20bb6544de9cdd4ca7ab870d6ad257c0ce4ff (patch)
tree0c7a06a91a4c7988b9d95cbc0595c29403395fd9
parentea91dfa58e11373748f747041c3041f72c9a7658 (diff)
downloadjemalloc-47b20bb6544de9cdd4ca7ab870d6ad257c0ce4ff.zip
jemalloc-47b20bb6544de9cdd4ca7ab870d6ad257c0ce4ff.tar.gz
jemalloc-47b20bb6544de9cdd4ca7ab870d6ad257c0ce4ff.tar.bz2
Change opt.metadata_thp to [disabled,auto,always].
To avoid the high RSS caused by THP + low usage arena (i.e. THP becomes a significant percentage), added a new "auto" option which will only start using THP after a base allocator used up the first THP region. Starting from the second hugepage (in a single arena), "auto" behaves the same as "always", i.e. madvise hugepage right away.
-rw-r--r--doc/jemalloc.xml.in12
-rw-r--r--include/jemalloc/internal/base_externs.h3
-rw-r--r--include/jemalloc/internal/base_inlines.h4
-rw-r--r--include/jemalloc/internal/base_types.h17
-rw-r--r--src/base.c46
-rw-r--r--src/ctl.c3
-rw-r--r--src/jemalloc.c18
-rw-r--r--src/pages.c2
-rw-r--r--src/stats.c2
-rw-r--r--test/unit/mallctl.c2
10 files changed, 84 insertions, 25 deletions
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f1712f0..0c95604 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -919,13 +919,15 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
<varlistentry id="opt.metadata_thp">
<term>
<mallctl>opt.metadata_thp</mallctl>
- (<type>bool</type>)
+ (<type>const char *</type>)
<literal>r-</literal>
</term>
- <listitem><para>If true, allow jemalloc to use transparent huge page
- (THP) for internal metadata (see <link
- linkend="stats.metadata">stats.metadata</link> for details). This
- option is disabled by default.</para></listitem>
+ <listitem><para>Controls whether to allow jemalloc to use transparent
+ huge page (THP) for internal metadata (see <link
+ linkend="stats.metadata">stats.metadata</link>). <quote>always</quote>
+ allows such usage. <quote>auto</quote> uses no THP initially, but may
+ begin to do so when metadata usage reaches certain level. The default
+ is <quote>disabled</quote>.</para></listitem>
</varlistentry>
<varlistentry id="opt.retain">
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index a5cb8a8..6cd1187 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,7 +1,8 @@
#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
-extern bool opt_metadata_thp;
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
base_t *b0get(void);
base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index 931560b..aec0e2e 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -6,4 +6,8 @@ base_ind_get(const base_t *base) {
return base->ind;
}
+static inline bool
+metadata_thp_enabled(void) {
+ return (opt_metadata_thp != metadata_thp_disabled);
+}
#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index 6e71033..97e38a9 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -4,6 +4,21 @@
typedef struct base_block_s base_block_t;
typedef struct base_s base_t;
-#define METADATA_THP_DEFAULT false
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+
+typedef enum {
+ metadata_thp_disabled = 0,
+ /*
+ * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+ * + low usage arena (i.e. THP becomes a significant percentage), the
+ * "auto" option only starts using THP after a base allocator used up
+ * the first THP region. Starting from the second hugepage (in a single
+ * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+ * right away.
+ */
+ metadata_thp_auto = 1,
+ metadata_thp_always = 2,
+ metadata_thp_mode_limit = 3
+} metadata_thp_mode_t;
#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/src/base.c b/src/base.c
index 9925978..9cb02b6 100644
--- a/src/base.c
+++ b/src/base.c
@@ -12,7 +12,13 @@
static base_t *b0;
-bool opt_metadata_thp = METADATA_THP_DEFAULT;
+metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
+
+const char *metadata_thp_mode_names[] = {
+ "disabled",
+ "auto",
+ "always"
+};
/******************************************************************************/
@@ -24,7 +30,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
/* We use hugepage sizes regardless of opt_metadata_thp. */
assert(size == HUGEPAGE_CEILING(size));
- size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
+ size_t alignment = metadata_thp_enabled() ? HUGEPAGE : PAGE;
if (extent_hooks == &extent_hooks_default) {
addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
} else {
@@ -36,12 +42,6 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
post_reentrancy(tsd);
}
- if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
- assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
- (size & HUGEPAGE_MASK) == 0);
- pages_huge(addr, size);
- }
-
return addr;
}
@@ -101,7 +101,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
post_reentrancy(tsd);
}
label_done:
- if (opt_metadata_thp && thp_state_madvise) {
+ if (metadata_thp_enabled() && thp_state_madvise) {
/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
(size & HUGEPAGE_MASK) == 0);
@@ -181,8 +181,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
* On success a pointer to the initialized base_block_t header is returned.
*/
static base_block_t *
-base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
- pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
+ unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
size_t alignment) {
alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
size_t usize = ALIGNMENT_CEILING(size, alignment);
@@ -208,6 +208,26 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
if (block == NULL) {
return NULL;
}
+
+ if (metadata_thp_enabled() && thp_state_madvise) {
+ void *addr = (void *)block;
+ assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+ (block_size & HUGEPAGE_MASK) == 0);
+ /* base == NULL indicates this is a new base. */
+ if (base != NULL || opt_metadata_thp == metadata_thp_always) {
+ /* Use hugepage for the new block. */
+ pages_huge(addr, block_size);
+ }
+ if (base != NULL && opt_metadata_thp == metadata_thp_auto) {
+ /* Make the first block THP lazily. */
+ base_block_t *first_block = base->blocks;
+ if (first_block->next == NULL) {
+ assert((first_block->size & HUGEPAGE_MASK) == 0);
+ pages_huge(first_block, first_block->size);
+ }
+ }
+ }
+
*pind_last = sz_psz2ind(block_size);
block->size = block_size;
block->next = NULL;
@@ -231,7 +251,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
* called.
*/
malloc_mutex_unlock(tsdn, &base->mtx);
- base_block_t *block = base_block_alloc(tsdn, extent_hooks,
+ base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
alignment);
malloc_mutex_lock(tsdn, &base->mtx);
@@ -259,7 +279,7 @@ base_t *
base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
pszind_t pind_last = 0;
size_t extent_sn_next = 0;
- base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
+ base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
&pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
if (block == NULL) {
return NULL;
diff --git a/src/ctl.c b/src/ctl.c
index c299103..ace10b0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1570,7 +1570,8 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
-CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
+CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
+ const char *)
CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cbae259..3c0ea7d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1055,7 +1055,23 @@ malloc_conf_init(void) {
if (opt_abort_conf && had_conf_error) {
malloc_abort_invalid_conf();
}
- CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
+ if (strncmp("metadata_thp", k, klen) == 0) {
+ int i;
+ bool match = false;
+ for (i = 0; i < metadata_thp_mode_limit; i++) {
+ if (strncmp(metadata_thp_mode_names[i],
+ v, vlen) == 0) {
+ opt_metadata_thp = i;
+ match = true;
+ break;
+ }
+ }
+ if (!match) {
+ malloc_conf_error("Invalid conf value",
+ k, klen, v, vlen);
+ }
+ continue;
+ }
CONF_HANDLE_BOOL(opt_retain, "retain")
if (strncmp("dss", k, klen) == 0) {
int i;
diff --git a/src/pages.c b/src/pages.c
index 70f1fd3..4ca3107 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -418,7 +418,7 @@ os_overcommits_proc(void) {
static void
init_thp_state(void) {
if (!have_madvise_huge) {
- if (opt_metadata_thp && opt_abort) {
+ if (metadata_thp_enabled() && opt_abort) {
malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
abort();
}
diff --git a/src/stats.c b/src/stats.c
index 746cc42..e1a3f8c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -802,11 +802,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
}
OPT_WRITE_BOOL(abort, ",")
OPT_WRITE_BOOL(abort_conf, ",")
- OPT_WRITE_BOOL(metadata_thp, ",")
OPT_WRITE_BOOL(retain, ",")
OPT_WRITE_CHAR_P(dss, ",")
OPT_WRITE_UNSIGNED(narenas, ",")
OPT_WRITE_CHAR_P(percpu_arena, ",")
+ OPT_WRITE_CHAR_P(metadata_thp, ",")
OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 0b14e78..5612cce 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -158,7 +158,7 @@ TEST_BEGIN(test_mallctl_opt) {
TEST_MALLCTL_OPT(bool, abort, always);
TEST_MALLCTL_OPT(bool, abort_conf, always);
- TEST_MALLCTL_OPT(bool, metadata_thp, always);
+ TEST_MALLCTL_OPT(const char *, metadata_thp, always);
TEST_MALLCTL_OPT(bool, retain, always);
TEST_MALLCTL_OPT(const char *, dss, always);
TEST_MALLCTL_OPT(unsigned, narenas, always);