From d84d2909c3132ee633c92fd0d720ec2aed80ff11 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Tue, 28 Feb 2017 01:08:28 -0800 Subject: Fix/enhance THP integration. Detect whether chunks start off as THP-capable by default (according to the state of /sys/kernel/mm/transparent_hugepage/enabled), and use this as the basis for whether to call pages_nohuge() once per chunk during first purge of any of the chunk's page runs. Add the --disable-thp configure option, as well as the the opt.thp mallctl. This resolves #541. --- INSTALL | 7 ++ configure.ac | 24 ++++- doc/jemalloc.xml.in | 26 +++++ include/jemalloc/internal/arena.h | 1 + include/jemalloc/internal/jemalloc_internal.h.in | 7 ++ .../jemalloc/internal/jemalloc_internal_defs.h.in | 11 ++- include/jemalloc/internal/private_symbols.txt | 1 + src/arena.c | 105 ++++++++++++++++++--- src/ctl.c | 6 ++ src/jemalloc.c | 3 + src/pages.c | 4 +- src/stats.c | 1 + test/unit/mallctl.c | 2 + 13 files changed, 177 insertions(+), 21 deletions(-) diff --git a/INSTALL b/INSTALL index 08b3624..19196ec 100644 --- a/INSTALL +++ b/INSTALL @@ -157,6 +157,13 @@ any of the following arguments (not a definitive list) to 'configure': released in bulk, thus reducing the total number of mutex operations. See the "opt.tcache" option for usage details. +--disable-thp + Disable transparent huge page (THP) integration. On systems with THP + support, THPs are explicitly disabled as a side effect of unused dirty page + purging for chunks that back small and/or large allocations, because such + chunks typically comprise active, unused dirty, and untouched clean + pages. + --disable-munmap Disable virtual memory deallocation via munmap(2); instead keep track of the virtual memory for later use. munmap() is disabled by default (i.e. diff --git a/configure.ac b/configure.ac index db9e722..20a8a64 100644 --- a/configure.ac +++ b/configure.ac @@ -1683,10 +1683,31 @@ if test "x${je_cv_madvise}" = "xyes" ; then madvise((void *)0, 0, MADV_NOHUGEPAGE); ], [je_cv_thp]) if test "x${je_cv_thp}" = "xyes" ; then - AC_DEFINE([JEMALLOC_THP], [ ]) + AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ]) fi fi +dnl Enable transparent huge page support by default. +AC_ARG_ENABLE([thp], + [AS_HELP_STRING([--disable-thp], + [Disable transparent huge page supprot])], +[if test "x$enable_thp" = "xno" -o "x${je_cv_thp}" != "xyes" ; then + enable_thp="0" +else + enable_thp="1" +fi +], +[if test "x${je_cv_thp}" = "xyes" ; then + enable_thp="1" +else + enable_thp="0" +fi +]) +if test "x$enable_thp" = "x1" ; then + AC_DEFINE([JEMALLOC_THP], [ ]) +fi +AC_SUBST([enable_thp]) + dnl ============================================================================ dnl Check whether __sync_{add,sub}_and_fetch() are available despite dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined. @@ -2014,6 +2035,7 @@ AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) AC_MSG_RESULT([tcache : ${enable_tcache}]) +AC_MSG_RESULT([thp : ${enable_thp}]) AC_MSG_RESULT([fill : ${enable_fill}]) AC_MSG_RESULT([utrace : ${enable_utrace}]) AC_MSG_RESULT([valgrind : ${enable_valgrind}]) diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in index d9c8345..c97ab0f 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -850,6 +850,17 @@ for (i = 0; i < nbins; i++) { during build configuration. + + + config.thp + (bool) + r- + + was not specified + during build configuration, and the system supports transparent huge + page manipulation. + + config.tls @@ -1162,6 +1173,21 @@ malloc_conf = "xmalloc:true";]]> forcefully disabled. + + + opt.thp + (bool) + r- + [] + + Transparent huge page (THP) integration + enabled/disabled. When enabled, THPs are explicitly disabled as a side + effect of unused dirty page purging for chunks that back small and/or + large allocations, because such chunks typically comprise active, + unused dirty, and untouched clean pages. This option is enabled by + default. + + opt.lg_tcache_max diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h index ce4e602..119e3a5 100644 --- a/include/jemalloc/internal/arena.h +++ b/include/jemalloc/internal/arena.h @@ -506,6 +506,7 @@ static const size_t large_pad = #endif ; +extern bool opt_thp; extern purge_mode_t opt_purge; extern const char *purge_mode_names[]; extern ssize_t opt_lg_dirty_mult; diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in index 6213dd8..e3b499a 100644 --- a/include/jemalloc/internal/jemalloc_internal.h.in +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -99,6 +99,13 @@ static const bool config_tcache = false #endif ; +static const bool config_thp = +#ifdef JEMALLOC_THP + true +#else + false +#endif + ; static const bool config_tls = #ifdef JEMALLOC_TLS true diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in index b7ae3b7..7c88b0d 100644 --- a/include/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in @@ -253,6 +253,12 @@ #undef JEMALLOC_HAVE_MADVISE /* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#undef JEMALLOC_HAVE_MADVISE_HUGE + +/* * Methods for purging unused pages differ between operating systems. * * madvise(..., MADV_FREE) : This marks pages as being unused, such that they @@ -264,10 +270,7 @@ #undef JEMALLOC_PURGE_MADVISE_FREE #undef JEMALLOC_PURGE_MADVISE_DONTNEED -/* - * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE - * arguments to madvise(2). - */ +/* Defined if transparent huge page support is enabled. */ #undef JEMALLOC_THP /* Define if operating system has alloca.h header. */ diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt index a83d984..0aa9b01 100644 --- a/include/jemalloc/internal/private_symbols.txt +++ b/include/jemalloc/internal/private_symbols.txt @@ -392,6 +392,7 @@ opt_quarantine opt_redzone opt_stats_print opt_tcache +opt_thp opt_utrace opt_xmalloc opt_zero diff --git a/src/arena.c b/src/arena.c index ca992f7..a9dff0b 100644 --- a/src/arena.c +++ b/src/arena.c @@ -4,6 +4,8 @@ /******************************************************************************/ /* Data. */ +bool opt_thp = true; +static bool thp_initially_huge; purge_mode_t opt_purge = PURGE_DEFAULT; const char *purge_mode_names[] = { "ratio", @@ -680,7 +682,9 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena) if (chunk == NULL) return (NULL); - chunk->hugepage = true; + if (config_thp && opt_thp) { + chunk->hugepage = thp_initially_huge; + } /* * Initialize the map to contain one maximal free untouched run. Mark @@ -745,14 +749,17 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena) static void arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) { - size_t sn, hugepage; + size_t sn; + UNUSED bool hugepage JEMALLOC_CC_SILENCE_INIT(false); bool committed; chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER; chunk_deregister(chunk, &chunk->node); sn = extent_node_sn_get(&chunk->node); - hugepage = chunk->hugepage; + if (config_thp && opt_thp) { + hugepage = chunk->hugepage; + } committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0); if (!committed) { /* @@ -765,13 +772,16 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk) chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE, arena->ind); } - if (!hugepage) { + if (config_thp && opt_thp && hugepage != thp_initially_huge) { /* - * Convert chunk back to the default state, so that all - * subsequent chunk allocations start out with chunks that can - * be backed by transparent huge pages. + * Convert chunk back to initial THP state, so that all + * subsequent chunk allocations start out in a consistent state. */ - pages_huge(chunk, chunksize); + if (thp_initially_huge) { + pages_huge(chunk, chunksize); + } else { + pages_nohuge(chunk, chunksize); + } } chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize, @@ -1711,13 +1721,13 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, /* * If this is the first run purged within chunk, mark - * the chunk as non-huge. This will prevent all use of - * transparent huge pages for this chunk until the chunk - * as a whole is deallocated. + * the chunk as non-THP-capable. This will prevent all + * use of THPs for this chunk until the chunk as a whole + * is deallocated. */ - if (chunk->hugepage) { - pages_nohuge(chunk, chunksize); - chunk->hugepage = false; + if (config_thp && opt_thp && chunk->hugepage) { + chunk->hugepage = pages_nohuge(chunk, + chunksize); } assert(pageind + npages <= chunk_npages); @@ -3772,11 +3782,78 @@ bin_info_init(void) #undef SC } +static void +init_thp_initially_huge(void) { + int fd; + char buf[sizeof("[always] madvise never\n")]; + ssize_t nread; + static const char *enabled_states[] = { + "[always] madvise never\n", + "always [madvise] never\n", + "always madvise [never]\n" + }; + static const bool thp_initially_huge_states[] = { + true, + false, + false + }; + unsigned i; + + if (config_debug) { + for (i = 0; i < sizeof(enabled_states)/sizeof(const char *); + i++) { + assert(sizeof(buf) > strlen(enabled_states[i])); + } + } + assert(sizeof(enabled_states)/sizeof(const char *) == + sizeof(thp_initially_huge_states)/sizeof(bool)); + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) + fd = (int)syscall(SYS_open, + "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#else + fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#endif + if (fd == -1) { + goto label_error; + } + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) + nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); +#else + nread = read(fd, &buf, sizeof(buf)); +#endif + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + + if (nread < 1) { + goto label_error; + } + for (i = 0; i < sizeof(enabled_states)/sizeof(const char *); + i++) { + if (strncmp(buf, enabled_states[i], (size_t)nread) == 0) { + thp_initially_huge = thp_initially_huge_states[i]; + return; + } + } + +label_error: + thp_initially_huge = false; +} + void arena_boot(void) { unsigned i; + if (config_thp && opt_thp) { + init_thp_initially_huge(); + } + arena_lg_dirty_mult_default_set(opt_lg_dirty_mult); arena_decay_time_default_set(opt_decay_time); diff --git a/src/ctl.c b/src/ctl.c index 1e62e2d..56bc4f4 100644 --- a/src/ctl.c +++ b/src/ctl.c @@ -84,6 +84,7 @@ CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) CTL_PROTO(config_tcache) +CTL_PROTO(config_thp) CTL_PROTO(config_tls) CTL_PROTO(config_utrace) CTL_PROTO(config_valgrind) @@ -104,6 +105,7 @@ CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) CTL_PROTO(opt_lg_tcache_max) +CTL_PROTO(opt_thp) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) CTL_PROTO(opt_prof_active) @@ -258,6 +260,7 @@ static const ctl_named_node_t config_node[] = { {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, {NAME("tcache"), CTL(config_tcache)}, + {NAME("thp"), CTL(config_thp)}, {NAME("tls"), CTL(config_tls)}, {NAME("utrace"), CTL(config_utrace)}, {NAME("valgrind"), CTL(config_valgrind)}, @@ -281,6 +284,7 @@ static const ctl_named_node_t opt_node[] = { {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, + {NAME("thp"), CTL(opt_thp)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, {NAME("prof_active"), CTL(opt_prof_active)}, @@ -1268,6 +1272,7 @@ CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) CTL_RO_CONFIG_GEN(config_stats, bool) CTL_RO_CONFIG_GEN(config_tcache, bool) +CTL_RO_CONFIG_GEN(config_thp, bool) CTL_RO_CONFIG_GEN(config_tls, bool) CTL_RO_CONFIG_GEN(config_utrace, bool) CTL_RO_CONFIG_GEN(config_valgrind, bool) @@ -1291,6 +1296,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool) CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) +CTL_RO_NL_CGEN(config_thp, opt_thp, opt_thp, bool) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool) diff --git a/src/jemalloc.c b/src/jemalloc.c index e9d8352..1cefd4c 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c @@ -1272,6 +1272,9 @@ malloc_conf_init(void) "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) } + if (config_thp) { + CONF_HANDLE_BOOL(opt_thp, "thp", true) + } if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof", true) CONF_HANDLE_CHAR_P(opt_prof_prefix, diff --git a/src/pages.c b/src/pages.c index 5f0c966..7698e49 100644 --- a/src/pages.c +++ b/src/pages.c @@ -199,7 +199,7 @@ pages_huge(void *addr, size_t size) assert(PAGE_ADDR2BASE(addr) == addr); assert(PAGE_CEILING(size) == size); -#ifdef JEMALLOC_THP +#ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_HUGEPAGE) != 0); #else return (false); @@ -213,7 +213,7 @@ pages_nohuge(void *addr, size_t size) assert(PAGE_ADDR2BASE(addr) == addr); assert(PAGE_CEILING(size) == size); -#ifdef JEMALLOC_THP +#ifdef JEMALLOC_HAVE_MADVISE_HUGE return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); #else return (false); diff --git a/src/stats.c b/src/stats.c index 92b8086..b76afc5 100644 --- a/src/stats.c +++ b/src/stats.c @@ -750,6 +750,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, OPT_WRITE_BOOL(xmalloc, ",") OPT_WRITE_BOOL(tcache, ",") OPT_WRITE_SSIZE_T(lg_tcache_max, ",") + OPT_WRITE_BOOL(thp, ",") OPT_WRITE_BOOL(prof, ",") OPT_WRITE_CHAR_P(prof_prefix, ",") OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c index 2353c92..3d1a740 100644 --- a/test/unit/mallctl.c +++ b/test/unit/mallctl.c @@ -142,6 +142,7 @@ TEST_BEGIN(test_mallctl_config) TEST_MALLCTL_CONFIG(prof_libunwind, bool); TEST_MALLCTL_CONFIG(stats, bool); TEST_MALLCTL_CONFIG(tcache, bool); + TEST_MALLCTL_CONFIG(thp, bool); TEST_MALLCTL_CONFIG(tls, bool); TEST_MALLCTL_CONFIG(utrace, bool); TEST_MALLCTL_CONFIG(valgrind, bool); @@ -182,6 +183,7 @@ TEST_BEGIN(test_mallctl_opt) TEST_MALLCTL_OPT(bool, xmalloc, xmalloc); TEST_MALLCTL_OPT(bool, tcache, tcache); TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache); + TEST_MALLCTL_OPT(bool, thp, thp); TEST_MALLCTL_OPT(bool, prof, prof); TEST_MALLCTL_OPT(const char *, prof_prefix, prof); TEST_MALLCTL_OPT(bool, prof_active, prof); -- cgit v0.12