/doc/src/widgets-and-layouts/

ion>space:mode:
authorQi Wang <interwq@gwu.edu>2022-05-06 18:28:25 (GMT)
committerQi Wang <interwq@gwu.edu>2022-05-06 18:28:25 (GMT)
commit54eaed1d8b56b1aa528be3bdd1877e59c56fa90c (patch)
treee79620e0c00b1f8b6b698fbe74df6bae7d812ae2
parentea6b3e973b477b8061e0076bb257dbd7f3faa756 (diff)
parent304c919829f9f340669b61fa64867cfe5dba8021 (diff)
downloadjemalloc-5.3.0.zip
jemalloc-5.3.0.tar.gz
jemalloc-5.3.0.tar.bz2
Merge branch 'dev'5.3.0
Diffstat
-rw-r--r--.appveyor.yml1
-rw-r--r--.cirrus.yml33
-rw-r--r--.clang-format122
-rw-r--r--.gitignore8
-rw-r--r--.travis.yml448
-rw-r--r--ChangeLog100
-rw-r--r--INSTALL.md27
-rw-r--r--Makefile.in219
-rw-r--r--TUNING.md8
-rw-r--r--bin/jeprof.in112
-rwxr-xr-xbuild-aux/config.guess1033
-rwxr-xr-xbuild-aux/config.sub2626
-rw-r--r--configure.ac591
-rw-r--r--doc/jemalloc.xml.in302
-rw-r--r--doc_internal/PROFILING_INTERNALS.md127
-rw-r--r--doc_internal/jemalloc.svg1
-rw-r--r--include/jemalloc/internal/activity_callback.h23
-rw-r--r--include/jemalloc/internal/arena_externs.h77
-rw-r--r--include/jemalloc/internal/arena_inlines_a.h35
-rw-r--r--include/jemalloc/internal/arena_inlines_b.h489
-rw-r--r--include/jemalloc/internal/arena_stats.h227
-rw-r--r--include/jemalloc/internal/arena_structs.h101
-rw-r--r--include/jemalloc/internal/arena_structs_a.h11
-rw-r--r--include/jemalloc/internal/arena_structs_b.h232
-rw-r--r--include/jemalloc/internal/arena_types.h23
-rw-r--r--include/jemalloc/internal/atomic.h33
-rw-r--r--include/jemalloc/internal/background_thread_externs.h7
-rw-r--r--include/jemalloc/internal/background_thread_inlines.h14
-rw-r--r--include/jemalloc/internal/background_thread_structs.h12
-rw-r--r--include/jemalloc/internal/base.h110
-rw-r--r--include/jemalloc/internal/base_externs.h22
-rw-r--r--include/jemalloc/internal/base_inlines.h13
-rw-r--r--include/jemalloc/internal/base_structs.h59
-rw-r--r--include/jemalloc/internal/base_types.h33
-rw-r--r--include/jemalloc/internal/bin.h85
-rw-r--r--include/jemalloc/internal/bin_info.h50
-rw-r--r--include/jemalloc/internal/bin_stats.h5
-rw-r--r--include/jemalloc/internal/bin_types.h2
-rw-r--r--include/jemalloc/internal/bit_util.h457
-rw-r--r--include/jemalloc/internal/bitmap.h21
-rw-r--r--include/jemalloc/internal/buf_writer.h32
-rw-r--r--include/jemalloc/internal/cache_bin.h625
-rw-r--r--include/jemalloc/internal/counter.h34
-rw-r--r--include/jemalloc/internal/ctl.h31
-rw-r--r--include/jemalloc/internal/decay.h186
-rw-r--r--include/jemalloc/internal/ecache.h55
-rw-r--r--include/jemalloc/internal/edata.h698
-rw-r--r--include/jemalloc/internal/edata_cache.h49
-rw-r--r--include/jemalloc/internal/ehooks.h412
-rw-r--r--include/jemalloc/internal/emap.h357
-rw-r--r--include/jemalloc/internal/emitter.h74
-rw-r--r--include/jemalloc/internal/eset.h77
-rw-r--r--include/jemalloc/internal/exp_grow.h50
-rw-r--r--include/jemalloc/internal/extent.h137
-rw-r--r--include/jemalloc/internal/extent_externs.h83
-rw-r--r--include/jemalloc/internal/extent_inlines.h501
-rw-r--r--include/jemalloc/internal/extent_structs.h256
-rw-r--r--include/jemalloc/internal/extent_types.h23
-rw-r--r--include/jemalloc/internal/fb.h373
-rw-r--r--include/jemalloc/internal/fxp.h126
-rw-r--r--include/jemalloc/internal/hash.h65
-rw-r--r--include/jemalloc/internal/hpa.h182
-rw-r--r--include/jemalloc/internal/hpa_hooks.h17
-rw-r--r--include/jemalloc/internal/hpa_opts.h74
-rw-r--r--include/jemalloc/internal/hpdata.h413
-rw-r--r--include/jemalloc/internal/inspect.h40
-rw-r--r--include/jemalloc/internal/jemalloc_internal_decls.h16
-rw-r--r--include/jemalloc/internal/jemalloc_internal_defs.h.in61
-rw-r--r--include/jemalloc/internal/jemalloc_internal_externs.h28
-rw-r--r--include/jemalloc/internal/jemalloc_internal_includes.h16
-rw-r--r--include/jemalloc/internal/jemalloc_internal_inlines_a.h82
-rw-r--r--include/jemalloc/internal/jemalloc_internal_inlines_b.h52
-rw-r--r--include/jemalloc/internal/jemalloc_internal_inlines_c.h124
-rw-r--r--include/jemalloc/internal/jemalloc_internal_macros.h13
-rw-r--r--include/jemalloc/internal/jemalloc_internal_types.h28
-rw-r--r--include/jemalloc/internal/jemalloc_preamble.h.in52
-rw-r--r--include/jemalloc/internal/large_externs.h26
-rw-r--r--include/jemalloc/internal/lockedint.h204
-rw-r--r--include/jemalloc/internal/malloc_io.h11
-rw-r--r--include/jemalloc/internal/mpsc_queue.h134
-rw-r--r--include/jemalloc/internal/mutex.h63
-rw-r--r--include/jemalloc/internal/mutex_pool.h94
-rw-r--r--include/jemalloc/internal/mutex_prof.h13
-rw-r--r--include/jemalloc/internal/nstime.h43
-rw-r--r--include/jemalloc/internal/pa.h243
-rw-r--r--include/jemalloc/internal/pac.h179
-rw-r--r--include/jemalloc/internal/pages.h31
-rw-r--r--include/jemalloc/internal/pai.h95
-rw-r--r--include/jemalloc/internal/peak.h37
-rw-r--r--include/jemalloc/internal/peak_event.h24
-rw-r--r--include/jemalloc/internal/ph.h817
-rw-r--r--include/jemalloc/internal/prng.h93
-rw-r--r--include/jemalloc/internal/prof_data.h37
-rw-r--r--include/jemalloc/internal/prof_externs.h116
-rw-r--r--include/jemalloc/internal/prof_hook.h21
-rw-r--r--include/jemalloc/internal/prof_inlines.h261
-rw-r--r--include/jemalloc/internal/prof_inlines_a.h85
-rw-r--r--include/jemalloc/internal/prof_inlines_b.h250
-rw-r--r--include/jemalloc/internal/prof_log.h22
-rw-r--r--include/jemalloc/internal/prof_recent.h23
-rw-r--r--include/jemalloc/internal/prof_stats.h17
-rw-r--r--include/jemalloc/internal/prof_structs.h47
-rw-r--r--include/jemalloc/internal/prof_sys.h30
-rw-r--r--include/jemalloc/internal/prof_types.h37
-rw-r--r--include/jemalloc/internal/psset.h131
-rw-r--r--include/jemalloc/internal/ql.h129
-rw-r--r--include/jemalloc/internal/qr.h130
-rw-r--r--include/jemalloc/internal/quantum.h12
-rw-r--r--include/jemalloc/internal/rb.h920
-rw-r--r--include/jemalloc/internal/rtree.h520
-rw-r--r--include/jemalloc/internal/rtree_tsd.h24
-rw-r--r--include/jemalloc/internal/safety_check.h7
-rw-r--r--include/jemalloc/internal/san.h191
-rw-r--r--include/jemalloc/internal/san_bump.h52
-rw-r--r--include/jemalloc/internal/sc.h78
-rw-r--r--include/jemalloc/internal/sec.h120
-rw-r--r--include/jemalloc/internal/sec_opts.h59
-rw-r--r--include/jemalloc/internal/slab_data.h12
-rw-r--r--include/jemalloc/internal/stats.h29
-rw-r--r--include/jemalloc/internal/sz.h97
-rw-r--r--include/jemalloc/internal/tcache_externs.h60
-rw-r--r--include/jemalloc/internal/tcache_inlines.h156
-rw-r--r--include/jemalloc/internal/tcache_structs.h62
-rw-r--r--include/jemalloc/internal/tcache_types.h40
-rw-r--r--include/jemalloc/internal/test_hooks.h23
-rw-r--r--include/jemalloc/internal/thread_event.h301
-rw-r--r--include/jemalloc/internal/ticker.h92
-rw-r--r--include/jemalloc/internal/tsd.h243
-rw-r--r--include/jemalloc/internal/tsd_generic.h23
-rw-r--r--include/jemalloc/internal/tsd_malloc_thread_cleanup.h2
-rw-r--r--include/jemalloc/internal/tsd_types.h2
-rw-r--r--include/jemalloc/internal/tsd_win.h2
-rw-r--r--include/jemalloc/internal/typed_list.h55
-rw-r--r--include/jemalloc/internal/util.h56
-rw-r--r--include/jemalloc/internal/witness.h183
-rw-r--r--include/jemalloc/jemalloc_defs.h.in6
-rw-r--r--include/jemalloc/jemalloc_macros.h.in20
-rw-r--r--include/jemalloc/jemalloc_protos.h.in23
-rw-r--r--m4/ax_cxx_compile_stdcxx.m4449
-rw-r--r--msvc/projects/vc2015/jemalloc/jemalloc.vcxproj40
-rw-r--r--msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters108
-rw-r--r--msvc/projects/vc2017/jemalloc/jemalloc.vcxproj39
-rw-r--r--msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters107
-rw-r--r--msvc/test_threads/test_threads.cpp1
-rwxr-xr-xscripts/check-formatting.sh28
-rw-r--r--scripts/freebsd/before_install.sh3
-rw-r--r--scripts/freebsd/before_script.sh10
-rw-r--r--scripts/freebsd/script.sh3
-rwxr-xr-xscripts/gen_run_tests.py36
-rwxr-xr-xscripts/gen_travis.py380
-rw-r--r--scripts/linux/before_install.sh13
-rw-r--r--scripts/windows/before_install.sh83
-rw-r--r--scripts/windows/before_script.sh20
-rw-r--r--scripts/windows/script.sh10
-rw-r--r--src/arena.c2063
-rw-r--r--src/background_thread.c335
-rw-r--r--src/base.c193
-rw-r--r--src/bin.c30
-rw-r--r--src/bin_info.c30
-rw-r--r--src/bitmap.c1
-rw-r--r--src/buf_writer.c144
-rw-r--r--src/cache_bin.c99
-rw-r--r--src/ckh.c7
-rw-r--r--src/counter.c30
-rw-r--r--src/ctl.c1687
-rw-r--r--src/decay.c295
-rw-r--r--src/ecache.c35
-rw-r--r--src/edata.c6
-rw-r--r--src/edata_cache.c154
-rw-r--r--src/ehooks.c275
-rw-r--r--src/emap.c386
-rw-r--r--src/eset.c282
-rw-r--r--src/exp_grow.c8
-rw-r--r--src/extent.c2481
-rw-r--r--src/extent_dss.c42
-rw-r--r--src/extent_mmap.c1
-rw-r--r--src/fxp.c124
-rw-r--r--src/hash.c3
-rw-r--r--src/hook.c6
-rw-r--r--src/hpa.c1044
-rw-r--r--src/hpa_hooks.c63
-rw-r--r--src/hpdata.c325
-rw-r--r--src/inspect.c77
-rw-r--r--src/jemalloc.c2174
-rw-r--r--src/jemalloc_cpp.cpp131
-rw-r--r--src/large.c299
-rw-r--r--src/malloc_io.c46
-rw-r--r--src/mutex.c19
-rw-r--r--src/mutex_pool.c18
-rw-r--r--src/nstime.c127
-rw-r--r--src/pa.c277
-rw-r--r--src/pa_extra.c191
-rw-r--r--src/pac.c587
-rw-r--r--src/pages.c209
-rw-r--r--src/pai.c31
-rw-r--r--src/peak_event.c82
-rw-r--r--src/prng.c3
-rw-r--r--src/prof.c2923
-rw-r--r--src/prof_data.c1447
-rw-r--r--src/prof_log.c717
-rw-r--r--src/prof_recent.c600
-rw-r--r--src/prof_stats.c57
-rw-r--r--src/prof_sys.c669
-rw-r--r--src/psset.c385
-rw-r--r--src/rtree.c75
-rw-r--r--src/safety_check.c16
-rw-r--r--src/san.c208
-rw-r--r--src/san_bump.c104
-rw-r--r--src/sc.c17
-rw-r--r--src/sec.c422
-rw-r--r--src/stats.c794
-rw-r--r--src/sz.c52
-rw-r--r--src/tcache.c1137
-rw-r--r--src/thread_event.c343
-rw-r--r--src/ticker.c31
-rwxr-xr-xsrc/ticker.py15
-rw-r--r--src/tsd.c75
-rw-r--r--src/witness.c44
-rw-r--r--test/analyze/prof_bias.c60
-rw-r--r--test/analyze/rand.c276
-rw-r--r--test/analyze/sizes.c53
-rw-r--r--test/include/test/arena_util.h155
-rw-r--r--test/include/test/bench.h60
-rw-r--r--test/include/test/bgthd.h17
-rw-r--r--test/include/test/btalloc.h2
-rw-r--r--test/include/test/extent_hooks.h40
-rw-r--r--test/include/test/jemalloc_test.h.in15
-rw-r--r--test/include/test/mq.h4
-rw-r--r--test/include/test/nbits.h111
-rw-r--r--test/include/test/san.h14
-rw-r--r--test/include/test/sleep.h1
-rw-r--r--test/include/test/test.h385
-rw-r--r--test/integration/MALLOCX_ARENA.c8
-rw-r--r--test/integration/aligned_alloc.c16
-rw-r--r--test/integration/allocated.c22
-rw-r--r--test/integration/cpp/basic.cpp5
-rw-r--r--test/integration/cpp/infallible_new_false.cpp23
-rw-r--r--test/integration/cpp/infallible_new_false.sh8
-rw-r--r--test/integration/cpp/infallible_new_true.cpp67
-rw-r--r--test/integration/cpp/infallible_new_true.sh8
-rw-r--r--test/integration/extent.c155
-rw-r--r--test/integration/malloc.c2
-rw-r--r--test/integration/mallocx.c56
-rw-r--r--test/integration/overflow.c20
-rw-r--r--test/integration/posix_memalign.c14
-rw-r--r--test/integration/rallocx.c118
-rw-r--r--test/integration/slab_sizes.c22
-rw-r--r--test/integration/smallocx.c66
-rw-r--r--test/integration/thread_arena.c10
-rw-r--r--test/integration/thread_tcache_enabled.c38
-rw-r--r--test/integration/xallocx.c110
-rw-r--r--test/src/sleep.c (renamed from test/src/mq.c)4
-rw-r--r--test/src/test.c4
-rw-r--r--test/src/timer.c3
-rw-r--r--test/stress/batch_alloc.c198
-rw-r--r--test/stress/fill_flush.c76
-rw-r--r--test/stress/large_microbench.c33
-rw-r--r--test/stress/mallctl.c74
-rw-r--r--test/stress/microbench.c43
-rw-r--r--test/unit/SFMT.c32
-rw-r--r--test/unit/a0.c2
-rw-r--r--test/unit/arena_decay.c436
-rw-r--r--test/unit/arena_decay.sh3
-rw-r--r--test/unit/arena_reset.c90
-rw-r--r--test/unit/atomic.c46
-rw-r--r--test/unit/background_thread.c37
-rw-r--r--test/unit/background_thread_enable.c55
-rw-r--r--test/unit/base.c109
-rw-r--r--test/unit/batch_alloc.c189
-rw-r--r--test/unit/batch_alloc.sh3
-rw-r--r--test/unit/batch_alloc_prof.c1
-rw-r--r--test/unit/batch_alloc_prof.sh3
-rw-r--r--test/unit/binshard.c38
-rw-r--r--test/unit/bit_util.c256
-rw-r--r--test/unit/bitmap.c194
-rw-r--r--test/unit/buf_writer.c196
-rw-r--r--test/unit/cache_bin.c384
-rw-r--r--test/unit/ckh.c74
-rw-r--r--test/unit/counter.c80
-rw-r--r--test/unit/decay.c784
-rw-r--r--test/unit/decay.sh3
-rw-r--r--test/unit/div.c2
-rw-r--r--test/unit/double_free.c77
-rw-r--r--test/unit/double_free.h1
-rw-r--r--test/unit/edata_cache.c226
-rw-r--r--test/unit/emitter.c180
-rw-r--r--test/unit/extent_quantize.c50
-rw-r--r--test/unit/fb.c954
-rw-r--r--test/unit/fork.c8
-rw-r--r--test/unit/fxp.c394
-rw-r--r--test/unit/hash.c2
-rw-r--r--test/unit/hook.c348
-rw-r--r--test/unit/hpa.c459
-rw-r--r--test/unit/hpa_background_thread.c188
-rw-r--r--test/unit/hpa_background_thread.sh4
-rw-r--r--test/unit/hpdata.c244
-rw-r--r--test/unit/huge.c58
-rw-r--r--test/unit/inspect.c (renamed from test/unit/extent_util.c)87
-rw-r--r--test/unit/inspect.sh5
-rw-r--r--test/unit/junk.c282
-rw-r--r--test/unit/log.c31
-rw-r--r--test/unit/mallctl.c684
-rw-r--r--test/unit/malloc_conf_2.c29
-rw-r--r--test/unit/malloc_conf_2.sh1
-rw-r--r--test/unit/malloc_io.c28
-rw-r--r--test/unit/math.c12
-rw-r--r--test/unit/mpsc_queue.c304
-rw-r--r--test/unit/mq.c18
-rw-r--r--test/unit/mtx.c6
-rw-r--r--test/unit/nstime.c119
-rw-r--r--test/unit/oversize_threshold.c133
-rw-r--r--test/unit/pa.c126
-rw-r--r--test/unit/pack.c20
-rw-r--r--test/unit/pages.c6
-rw-r--r--test/unit/peak.c47
-rw-r--r--test/unit/ph.c110
-rw-r--r--test/unit/prng.c226
-rw-r--r--test/unit/prof_accum.c13
-rw-r--r--test/unit/prof_active.c16
-rw-r--r--test/unit/prof_active.sh2
-rw-r--r--test/unit/prof_gdump.c29
-rw-r--r--test/unit/prof_hook.c169
-rw-r--r--test/unit/prof_hook.sh6
-rw-r--r--test/unit/prof_idump.c25
-rw-r--r--test/unit/prof_log.c65
-rw-r--r--test/unit/prof_log.sh2
-rw-r--r--test/unit/prof_mdump.c216
-rw-r--r--test/unit/prof_mdump.sh6
-rw-r--r--test/unit/prof_recent.c678
-rw-r--r--test/unit/prof_recent.sh5
-rw-r--r--test/unit/prof_reset.c112
-rw-r--r--test/unit/prof_reset.sh2
-rw-r--r--test/unit/prof_stats.c151
-rw-r--r--test/unit/prof_stats.sh5
-rw-r--r--test/unit/prof_sys_thread_name.c77
-rw-r--r--test/unit/prof_sys_thread_name.sh5
-rw-r--r--test/unit/prof_tctx.c38
-rw-r--r--test/unit/prof_tctx.sh2
-rw-r--r--test/unit/prof_thread_name.c22
-rw-r--r--test/unit/psset.c748
-rw-r--r--test/unit/ql.c139
-rw-r--r--test/unit/qr.c44
-rw-r--r--test/unit/rb.c794
-rw-r--r--test/unit/retained.c60
-rw-r--r--test/unit/rtree.c321
-rw-r--r--test/unit/safety_check.c33
-rw-r--r--test/unit/safety_check.sh2
-rw-r--r--test/unit/san.c207
-rw-r--r--test/unit/san.sh3
-rw-r--r--test/unit/san_bump.c111
-rw-r--r--test/unit/sc.c6
-rw-r--r--test/unit/sec.c634
-rw-r--r--test/unit/seq.c12
-rw-r--r--test/unit/size_check.c79
-rw-r--r--test/unit/size_check.sh5
-rw-r--r--test/unit/size_classes.c88
-rw-r--r--test/unit/slab.c22
-rw-r--r--test/unit/smoothstep.c12
-rw-r--r--test/unit/stats.c229
-rw-r--r--test/unit/stats_print.c26
-rw-r--r--test/unit/sz.c66
-rw-r--r--test/unit/tcache_max.c175
-rw-r--r--test/unit/tcache_max.sh3
-rw-r--r--test/unit/test_hooks.c10
-rw-r--r--test/unit/thread_event.c34
-rw-r--r--test/unit/thread_event.sh5
-rw-r--r--test/unit/ticker.c65
-rw-r--r--test/unit/tsd.c55
-rw-r--r--test/unit/uaf.c262
-rw-r--r--test/unit/witness.c32
-rw-r--r--test/unit/zero.c10
-rw-r--r--test/unit/zero_realloc_abort.c26
-rw-r--r--test/unit/zero_realloc_abort.sh3
-rw-r--r--test/unit/zero_realloc_alloc.c48
-rw-r--r--test/unit/zero_realloc_alloc.sh3
-rw-r--r--test/unit/zero_realloc_free.c33
-rw-r--r--test/unit/zero_realloc_free.sh3
-rw-r--r--test/unit/zero_reallocs.c40
-rw-r--r--test/unit/zero_reallocs.sh3
379 files changed, 45977 insertions, 17135 deletions
diff --git a/.appveyor.yml b/.appveyor.yml
index 90b0368..d31f9ae 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -31,7 +31,6 @@ install:
- set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
- if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
- if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
- - pacman --noconfirm -Suy mingw-w64-%CPU%-make
build_script:
- bash -c "autoconf"
diff --git a/.cirrus.yml b/.cirrus.yml
index 019d2c3..7569539 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -3,18 +3,43 @@ env:
ARCH: amd64
task:
+ matrix:
+ env:
+ DEBUG_CONFIG: --enable-debug
+ env:
+ DEBUG_CONFIG: --disable-debug
+ matrix:
+ - env:
+ PROF_CONFIG: --enable-prof
+ - env:
+ PROF_CONFIG: --disable-prof
+ matrix:
+ - name: 64-bit
+ env:
+ CC:
+ CXX:
+ - name: 32-bit
+ env:
+ CC: cc -m32
+ CXX: c++ -m32
+ matrix:
+ - env:
+ UNCOMMON_CONFIG:
+ - env:
+ UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
freebsd_instance:
matrix:
- image: freebsd-12-0-release-amd64
- image: freebsd-11-2-release-amd64
+ image: freebsd-12-3-release-amd64
install_script:
- sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
- pkg upgrade -y
- pkg install -y autoconf gmake
script:
- autoconf
- #- ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS" } $CONFIGURE_FLAGS
- - ./configure
+ # We don't perfectly track freebsd stdlib.h definitions. This is fine when
+ # we count as a system header, but breaks otherwise, like during these
+ # tests.
+ - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
- export JFLAG=`sysctl -n kern.smp.cpus`
- gmake -j${JFLAG}
- gmake -j${JFLAG} tests
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..719c03c
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,122 @@
+# jemalloc targets clang-format version 8. We include every option it supports
+# here, but comment out the ones that aren't relevant for us.
+---
+# AccessModifierOffset: -2
+AlignAfterOpenBracket: DontAlign
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands: false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: AllDefinitions
+AlwaysBreakBeforeMultilineStrings: true
+# AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: false
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+# BreakAfterJavaFieldAnnotations: true
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+# BreakConstructorInitializers: BeforeColon
+# BreakInheritanceList: BeforeColon
+BreakStringLiterals: false
+ColumnLimit: 80
+# CommentPragmas: ''
+# CompactNamespaces: true
+# ConstructorInitializerAllOnOneLineOrOnePerLine: true
+# ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros: [ ql_foreach, qr_foreach, ]
+# IncludeBlocks: Preserve
+# IncludeCategories:
+# - Regex: '^<.*\.h(pp)?>'
+# Priority: 1
+# IncludeIsMainRegex: ''
+IndentCaseLabels: false
+IndentPPDirectives: AfterHash
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+# JavaImportGroups: []
+# JavaScriptQuotes: Leave
+# JavaScriptWrapImports: True
+KeepEmptyLinesAtTheStartOfBlocks: false
+Language: Cpp
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+# NamespaceIndentation: None
+# ObjCBinPackProtocolList: Auto
+# ObjCBlockIndentWidth: 2
+# ObjCSpaceAfterProperty: false
+# ObjCSpaceBeforeProtocolList: false
+
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+# PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+# RawStringFormats:
+# - Language: TextProto
+# Delimiters:
+# - 'pb'
+# - 'proto'
+# EnclosingFunctions:
+# - 'PARSE_TEXT_PROTO'
+# BasedOnStyle: google
+# - Language: Cpp
+# Delimiters:
+# - 'cc'
+# - 'cpp'
+# BasedOnStyle: llvm
+# CanonicalDelimiter: 'cc'
+ReflowComments: true
+SortIncludes: false
+SpaceAfterCStyleCast: false
+# SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+# SpaceBeforeCpp11BracedList: false
+# SpaceBeforeCtorInitializerColon: true
+# SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+# SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+# SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+# Standard: Cpp11
+# This is nominally supported in clang-format version 8, but not in the build
+# used by some of the core jemalloc developers.
+# StatementMacros: []
+TabWidth: 8
+UseTab: Never
+...
diff --git a/.gitignore b/.gitignore
index 5ca0ad1..1c0b338 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,8 @@
/doc/jemalloc.html
/doc/jemalloc.3
+/doc_internal/PROFILING_INTERNALS.pdf
+
/jemalloc.pc
/lib/
@@ -50,6 +52,7 @@ test/include/test/jemalloc_test.h
test/include/test/jemalloc_test_defs.h
/test/integration/[A-Za-z]*
+!/test/integration/cpp/
!/test/integration/[A-Za-z]*.*
/test/integration/*.[od]
/test/integration/*.out
@@ -71,6 +74,11 @@ test/include/test/jemalloc_test_defs.h
/test/unit/*.[od]
/test/unit/*.out
+/test/analyze/[A-Za-z]*
+!/test/analyze/[A-Za-z]*.*
+/test/analyze/*.[od]
+/test/analyze/*.out
+
/VERSION
*.pdb
diff --git a/.travis.yml b/.travis.yml
index 2da5da8..bf44fad 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,195 +1,413 @@
-language: generic
-dist: precise
+# This config file is generated by ./scripts/gen_travis.py.
+# Do not edit by hand.
-matrix:
+# We use 'minimal', because 'generic' makes Windows VMs hang at startup. Also
+# the software provided by 'generic' is simply not needed for our tests.
+# Differences are explained here:
+# https://docs.travis-ci.com/user/languages/minimal-and-generic/
+language: minimal
+dist: focal
+
+jobs:
include:
+ - os: windows
+ arch: amd64
+ env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+ - os: windows
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+ - os: windows
+ arch: amd64
+ env: CC=cl.exe CXX=cl.exe
+ - os: windows
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+ - os: windows
+ arch: amd64
+ env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+ - os: windows
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+ - os: windows
+ arch: amd64
+ env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+ - os: windows
+ arch: amd64
+ env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: freebsd
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+ - os: linux
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+ - os: linux
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: &gcc_multilib
- apt:
- packages:
- - gcc-multilib
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- - os: osx
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- addons: *gcc_multilib
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: linux
+ arch: ppc64le
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+ - os: osx
+ arch: amd64
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
# Development build
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
# --enable-expermental-smallocx:
- os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+ env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
- # Valgrind
- - os: linux
- env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
- addons:
- apt:
- packages:
- - valgrind
+before_install:
+ - |-
+ if test -f "./scripts/$TRAVIS_OS_NAME/before_install.sh"; then
+ source ./scripts/$TRAVIS_OS_NAME/before_install.sh
+ fi
before_script:
- - autoconf
- - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
- - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS" } $CONFIGURE_FLAGS
- - make -j3
- - make -j3 tests
+ - |-
+ if test -f "./scripts/$TRAVIS_OS_NAME/before_script.sh"; then
+ source ./scripts/$TRAVIS_OS_NAME/before_script.sh
+ else
+ scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
+ autoconf
+ # If COMPILER_FLAGS are not empty, add them to CC and CXX
+ ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
+ make -j3
+ make -j3 tests
+ fi
script:
- - make check
+ - |-
+ if test -f "./scripts/$TRAVIS_OS_NAME/script.sh"; then
+ source ./scripts/$TRAVIS_OS_NAME/script.sh
+ else
+ make check
+ fi
diff --git a/ChangeLog b/ChangeLog
index e55813b..32fde56 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,106 @@ brevity. Much more detail can be found in the git revision history:
https://github.com/jemalloc/jemalloc
+* 5.3.0 (May 6, 2022)
+
+ This release contains many speed and space optimizations, from micro
+ optimizations on common paths to rework of internal data structures and
+ locking schemes, and many more too detailed to list below. Multiple percent
+ of system level metric improvements were measured in tested production
+ workloads. The release has gone through large-scale production testing.
+
+ New features:
+ - Add the thread.idle mallctl which hints that the calling thread will be
+ idle for a nontrivial period of time. (@davidtgoldblatt)
+ - Allow small size classes to be the maximum size class to cache in the
+ thread-specific cache, through the opt.[lg_]tcache_max option. (@interwq,
+ @jordalgo)
+ - Make the behavior of realloc(ptr, 0) configurable with opt.zero_realloc.
+ (@davidtgoldblatt)
+ - Add 'make uninstall' support. (@sangshuduo, @Lapenkov)
+ - Support C++17 over-aligned allocation. (@marksantaniello)
+ - Add the thread.peak mallctl for approximate per-thread peak memory tracking.
+ (@davidtgoldblatt)
+ - Add interval-based stats output opt.stats_interval. (@interwq)
+ - Add prof.prefix to override filename prefixes for dumps. (@zhxchen17)
+ - Add high resolution timestamp support for profiling. (@tyroguru)
+ - Add the --collapsed flag to jeprof for flamegraph generation.
+ (@igorwwwwwwwwwwwwwwwwwwww)
+ - Add the --debug-syms-by-id option to jeprof for debug symbols discovery.
+ (@DeannaGelbart)
+ - Add the opt.prof_leak_error option to exit with error code when leak is
+ detected using opt.prof_final. (@yunxuo)
+ - Add opt.cache_oblivious as an runtime alternative to config.cache_oblivious.
+ (@interwq)
+ - Add mallctl interfaces:
+ + opt.zero_realloc (@davidtgoldblatt)
+ + opt.cache_oblivious (@interwq)
+ + opt.prof_leak_error (@yunxuo)
+ + opt.stats_interval (@interwq)
+ + opt.stats_interval_opts (@interwq)
+ + opt.tcache_max (@interwq)
+ + opt.trust_madvise (@azat)
+ + prof.prefix (@zhxchen17)
+ + stats.zero_reallocs (@davidtgoldblatt)
+ + thread.idle (@davidtgoldblatt)
+ + thread.peak.{read,reset} (@davidtgoldblatt)
+
+ Bug fixes:
+ - Fix the synchronization around explicit tcache creation which could cause
+ invalid tcache identifiers. This regression was first released in 5.0.0.
+ (@yoshinorim, @davidtgoldblatt)
+ - Fix a profiling biasing issue which could cause incorrect heap usage and
+ object counts. This issue existed in all previous releases with the heap
+ profiling feature. (@davidtgoldblatt)
+ - Fix the order of stats counter updating on large realloc which could cause
+ failed assertions. This regression was first released in 5.0.0. (@azat)
+ - Fix the locking on the arena destroy mallctl, which could cause concurrent
+ arena creations to fail. This functionality was first introduced in 5.0.0.
+ (@interwq)
+
+ Portability improvements:
+ - Remove nothrow from system function declarations on macOS and FreeBSD.
+ (@davidtgoldblatt, @fredemmott, @leres)
+ - Improve overcommit and page alignment settings on NetBSD. (@zoulasc)
+ - Improve CPU affinity support on BSD platforms. (@devnexen)
+ - Improve utrace detection and support. (@devnexen)
+ - Improve QEMU support with MADV_DONTNEED zeroed pages detection. (@azat)
+ - Add memcntl support on Solaris / illumos. (@devnexen)
+ - Improve CPU_SPINWAIT on ARM. (@AWSjswinney)
+ - Improve TSD cleanup on FreeBSD. (@Lapenkov)
+ - Disable percpu_arena if the CPU count cannot be reliably detected. (@azat)
+ - Add malloc_size(3) override support. (@devnexen)
+ - Add mmap VM_MAKE_TAG support. (@devnexen)
+ - Add support for MADV_[NO]CORE. (@devnexen)
+ - Add support for DragonFlyBSD. (@devnexen)
+ - Fix the QUANTUM setting on MIPS64. (@brooksdavis)
+ - Add the QUANTUM setting for ARC. (@vineetgarc)
+ - Add the QUANTUM setting for LoongArch. (@wangjl-uos)
+ - Add QNX support. (@jqian-aurora)
+ - Avoid atexit(3) calls unless the relevant profiling features are enabled.
+ (@BusyJay, @laiwei-rice, @interwq)
+ - Fix unknown option detection when using Clang. (@Lapenkov)
+ - Fix symbol conflict with musl libc. (@georgthegreat)
+ - Add -Wimplicit-fallthrough checks. (@nickdesaulniers)
+ - Add __forceinline support on MSVC. (@santagada)
+ - Improve FreeBSD and Windows CI support. (@Lapenkov)
+ - Add CI support for PPC64LE architecture. (@ezeeyahoo)
+
+ Incompatible changes:
+ - Maximum size class allowed in tcache (opt.[lg_]tcache_max) now has an upper
+ bound of 8MiB. (@interwq)
+
+ Optimizations and refactors (@davidtgoldblatt, @Lapenkov, @interwq):
+ - Optimize the common cases of the thread cache operations.
+ - Optimize internal data structures, including RB tree and pairing heap.
+ - Optimize the internal locking on extent management.
+ - Extract and refactor the internal page allocator and interface modules.
+
+ Documentation:
+ - Fix doc build with --with-install-suffix. (@lawmurray, @interwq)
+ - Add PROFILING_INTERNALS.md. (@davidtgoldblatt)
+ - Ensure the proper order of doc building and installation. (@Mingli-Yu)
+
* 5.2.1 (August 5, 2019)
This release is primarily about Windows. A critical virtual memory leak is
diff --git a/INSTALL.md b/INSTALL.md
index b8f729b..90da718 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -9,14 +9,17 @@ If building from unpackaged developer sources, the simplest command sequence
that might work is:
./autogen.sh
- make dist
make
make install
-Note that documentation is not built by the default target because doing so
-would create a dependency on xsltproc in packaged releases, hence the
-requirement to either run 'make dist' or avoid installing docs via the various
-install_* targets documented below.
+You can uninstall the installed build artifacts like this:
+
+ make uninstall
+
+Notes:
+ - "autoconf" needs to be installed
+ - Documentation is built by the default target only when xsltproc is
+available. Build will warn but not stop if the dependency is missing.
## Advanced configuration
@@ -188,13 +191,13 @@ any of the following arguments (not a definitive list) to 'configure':
* `--disable-cache-oblivious`
- Disable cache-oblivious large allocation alignment for large allocation
- requests with no alignment constraints. If this feature is disabled, all
- large allocations are page-aligned as an implementation artifact, which can
- severely harm CPU cache utilization. However, the cache-oblivious layout
- comes at the cost of one extra page per large allocation, which in the
- most extreme case increases physical memory usage for the 16 KiB size class
- to 20 KiB.
+ Disable cache-oblivious large allocation alignment by default, for large
+ allocation requests with no alignment constraints. If this feature is
+ disabled, all large allocations are page-aligned as an implementation
+ artifact, which can severely harm CPU cache utilization. However, the
+ cache-oblivious layout comes at the cost of one extra page per large
+ allocation, which in the most extreme case increases physical memory usage
+ for the 16 KiB size class to 20 KiB.
* `--disable-syscall`
diff --git a/Makefile.in b/Makefile.in
index 7128b00..1193cd8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -99,31 +99,60 @@ C_SRCS := $(srcroot)src/jemalloc.c \
$(srcroot)src/background_thread.c \
$(srcroot)src/base.c \
$(srcroot)src/bin.c \
+ $(srcroot)src/bin_info.c \
$(srcroot)src/bitmap.c \
+ $(srcroot)src/buf_writer.c \
+ $(srcroot)src/cache_bin.c \
$(srcroot)src/ckh.c \
+ $(srcroot)src/counter.c \
$(srcroot)src/ctl.c \
+ $(srcroot)src/decay.c \
$(srcroot)src/div.c \
+ $(srcroot)src/ecache.c \
+ $(srcroot)src/edata.c \
+ $(srcroot)src/edata_cache.c \
+ $(srcroot)src/ehooks.c \
+ $(srcroot)src/emap.c \
+ $(srcroot)src/eset.c \
+ $(srcroot)src/exp_grow.c \
$(srcroot)src/extent.c \
$(srcroot)src/extent_dss.c \
$(srcroot)src/extent_mmap.c \
- $(srcroot)src/hash.c \
+ $(srcroot)src/fxp.c \
+ $(srcroot)src/san.c \
+ $(srcroot)src/san_bump.c \
$(srcroot)src/hook.c \
+ $(srcroot)src/hpa.c \
+ $(srcroot)src/hpa_hooks.c \
+ $(srcroot)src/hpdata.c \
+ $(srcroot)src/inspect.c \
$(srcroot)src/large.c \
$(srcroot)src/log.c \
$(srcroot)src/malloc_io.c \
$(srcroot)src/mutex.c \
- $(srcroot)src/mutex_pool.c \
$(srcroot)src/nstime.c \
+ $(srcroot)src/pa.c \
+ $(srcroot)src/pa_extra.c \
+ $(srcroot)src/pai.c \
+ $(srcroot)src/pac.c \
$(srcroot)src/pages.c \
- $(srcroot)src/prng.c \
+ $(srcroot)src/peak_event.c \
$(srcroot)src/prof.c \
+ $(srcroot)src/prof_data.c \
+ $(srcroot)src/prof_log.c \
+ $(srcroot)src/prof_recent.c \
+ $(srcroot)src/prof_stats.c \
+ $(srcroot)src/prof_sys.c \
+ $(srcroot)src/psset.c \
$(srcroot)src/rtree.c \
$(srcroot)src/safety_check.c \
- $(srcroot)src/stats.c \
$(srcroot)src/sc.c \
+ $(srcroot)src/sec.c \
+ $(srcroot)src/stats.c \
$(srcroot)src/sz.c \
$(srcroot)src/tcache.c \
$(srcroot)src/test_hooks.c \
+ $(srcroot)src/thread_event.c \
$(srcroot)src/ticker.c \
$(srcroot)src/tsd.c \
$(srcroot)src/witness.c
@@ -148,88 +177,124 @@ else
LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
endif
PC := $(objroot)jemalloc.pc
-MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
DOCS := $(DOCS_HTML) $(DOCS_MAN3)
C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
$(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
- $(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
+ $(srcroot)test/src/mtx.c $(srcroot)test/src/sleep.c \
$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
ifeq (1, $(link_whole_archive))
C_UTIL_INTEGRATION_SRCS :=
C_UTIL_CPP_SRCS :=
else
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c \
+ $(srcroot)src/ticker.c
C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
endif
TESTS_UNIT := \
$(srcroot)test/unit/a0.c \
+ $(srcroot)test/unit/arena_decay.c \
$(srcroot)test/unit/arena_reset.c \
$(srcroot)test/unit/atomic.c \
$(srcroot)test/unit/background_thread.c \
$(srcroot)test/unit/background_thread_enable.c \
$(srcroot)test/unit/base.c \
+ $(srcroot)test/unit/batch_alloc.c \
+ $(srcroot)test/unit/binshard.c \
$(srcroot)test/unit/bitmap.c \
$(srcroot)test/unit/bit_util.c \
- $(srcroot)test/unit/binshard.c \
+ $(srcroot)test/unit/buf_writer.c \
+ $(srcroot)test/unit/cache_bin.c \
$(srcroot)test/unit/ckh.c \
+ $(srcroot)test/unit/counter.c \
$(srcroot)test/unit/decay.c \
$(srcroot)test/unit/div.c \
+ $(srcroot)test/unit/double_free.c \
+ $(srcroot)test/unit/edata_cache.c \
$(srcroot)test/unit/emitter.c \
$(srcroot)test/unit/extent_quantize.c \
- $(srcroot)test/unit/extent_util.c \
+ ${srcroot}test/unit/fb.c \
$(srcroot)test/unit/fork.c \
+ ${srcroot}test/unit/fxp.c \
+ ${srcroot}test/unit/san.c \
+ ${srcroot}test/unit/san_bump.c \
$(srcroot)test/unit/hash.c \
$(srcroot)test/unit/hook.c \
+ $(srcroot)test/unit/hpa.c \
+ $(srcroot)test/unit/hpa_background_thread.c \
+ $(srcroot)test/unit/hpdata.c \
$(srcroot)test/unit/huge.c \
+ $(srcroot)test/unit/inspect.c \
$(srcroot)test/unit/junk.c \
$(srcroot)test/unit/junk_alloc.c \
$(srcroot)test/unit/junk_free.c \
$(srcroot)test/unit/log.c \
$(srcroot)test/unit/mallctl.c \
+ $(srcroot)test/unit/malloc_conf_2.c \
$(srcroot)test/unit/malloc_io.c \
$(srcroot)test/unit/math.c \
+ $(srcroot)test/unit/mpsc_queue.c \
$(srcroot)test/unit/mq.c \
$(srcroot)test/unit/mtx.c \
+ $(srcroot)test/unit/nstime.c \
+ $(srcroot)test/unit/oversize_threshold.c \
+ $(srcroot)test/unit/pa.c \
$(srcroot)test/unit/pack.c \
$(srcroot)test/unit/pages.c \
+ $(srcroot)test/unit/peak.c \
$(srcroot)test/unit/ph.c \
$(srcroot)test/unit/prng.c \
$(srcroot)test/unit/prof_accum.c \
$(srcroot)test/unit/prof_active.c \
$(srcroot)test/unit/prof_gdump.c \
+ $(srcroot)test/unit/prof_hook.c \
$(srcroot)test/unit/prof_idump.c \
$(srcroot)test/unit/prof_log.c \
+ $(srcroot)test/unit/prof_mdump.c \
+ $(srcroot)test/unit/prof_recent.c \
$(srcroot)test/unit/prof_reset.c \
+ $(srcroot)test/unit/prof_stats.c \
$(srcroot)test/unit/prof_tctx.c \
$(srcroot)test/unit/prof_thread_name.c \
+ $(srcroot)test/unit/prof_sys_thread_name.c \
+ $(srcroot)test/unit/psset.c \
$(srcroot)test/unit/ql.c \
$(srcroot)test/unit/qr.c \
$(srcroot)test/unit/rb.c \
$(srcroot)test/unit/retained.c \
$(srcroot)test/unit/rtree.c \
$(srcroot)test/unit/safety_check.c \
+ $(srcroot)test/unit/sc.c \
+ $(srcroot)test/unit/sec.c \
$(srcroot)test/unit/seq.c \
$(srcroot)test/unit/SFMT.c \
- $(srcroot)test/unit/sc.c \
+ $(srcroot)test/unit/size_check.c \
$(srcroot)test/unit/size_classes.c \
$(srcroot)test/unit/slab.c \
$(srcroot)test/unit/smoothstep.c \
$(srcroot)test/unit/spin.c \
$(srcroot)test/unit/stats.c \
$(srcroot)test/unit/stats_print.c \
+ $(srcroot)test/unit/sz.c \
+ $(srcroot)test/unit/tcache_max.c \
$(srcroot)test/unit/test_hooks.c \
+ $(srcroot)test/unit/thread_event.c \
$(srcroot)test/unit/ticker.c \
- $(srcroot)test/unit/nstime.c \
$(srcroot)test/unit/tsd.c \
+ $(srcroot)test/unit/uaf.c \
$(srcroot)test/unit/witness.c \
- $(srcroot)test/unit/zero.c
+ $(srcroot)test/unit/zero.c \
+ $(srcroot)test/unit/zero_realloc_abort.c \
+ $(srcroot)test/unit/zero_realloc_free.c \
+ $(srcroot)test/unit/zero_realloc_alloc.c \
+ $(srcroot)test/unit/zero_reallocs.c
ifeq (@enable_prof@, 1)
TESTS_UNIT += \
- $(srcroot)test/unit/arena_reset_prof.c
+ $(srcroot)test/unit/arena_reset_prof.c \
+ $(srcroot)test/unit/batch_alloc_prof.c
endif
TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
$(srcroot)test/integration/allocated.c \
@@ -251,16 +316,26 @@ TESTS_INTEGRATION += \
endif
ifeq (@enable_cxx@, 1)
CPP_SRCS := $(srcroot)src/jemalloc_cpp.cpp
-TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp
+TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp \
+ $(srcroot)test/integration/cpp/infallible_new_true.cpp \
+ $(srcroot)test/integration/cpp/infallible_new_false.cpp
else
CPP_SRCS :=
TESTS_INTEGRATION_CPP :=
endif
-TESTS_STRESS := $(srcroot)test/stress/microbench.c \
- $(srcroot)test/stress/hookbench.c
+TESTS_ANALYZE := $(srcroot)test/analyze/prof_bias.c \
+ $(srcroot)test/analyze/rand.c \
+ $(srcroot)test/analyze/sizes.c
+TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
+ $(srcroot)test/stress/fill_flush.c \
+ $(srcroot)test/stress/hookbench.c \
+ $(srcroot)test/stress/large_microbench.c \
+ $(srcroot)test/stress/mallctl.c \
+ $(srcroot)test/stress/microbench.c
-TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
+TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
+ $(TESTS_ANALYZE) $(TESTS_STRESS)
PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
@@ -276,14 +351,19 @@ C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O))
C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O))
C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
C_UTIL_INTEGRATION_OBJS := $(C_UTIL_INTEGRATION_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
+C_TESTLIB_ANALYZE_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.analyze.$(O))
C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O))
-C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS)
+C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) \
+ $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_ANALYZE_OBJS) \
+ $(C_TESTLIB_STRESS_OBJS)
TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
+TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O))
TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
-TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
+TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \
+ $(TESTS_STRESS_OBJS)
TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
.PHONY: all dist build_doc_html build_doc_man build_doc
@@ -298,7 +378,7 @@ all: build_lib
dist: build_doc
-$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+$(objroot)doc/%$(install_suffix).html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
ifneq ($(XSLROOT),)
$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
else
@@ -308,9 +388,16 @@ endif
@echo "Missing xsltproc. "$@" not (re)built."
endif
-$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+$(objroot)doc/%$(install_suffix).3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
ifneq ($(XSLROOT),)
$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
+# The -o option (output filename) of xsltproc may not work (it uses the
+# <refname> in the .xml file). Manually add the suffix if so.
+ ifneq ($(install_suffix),)
+ @if [ -f $(objroot)doc/jemalloc.3 ]; then \
+ mv $(objroot)doc/jemalloc.3 $(objroot)doc/jemalloc$(install_suffix).3 ; \
+ fi
+ endif
else
ifeq ($(wildcard $(DOCS_MAN3)),)
@echo "Missing xsltproc. Doc not built." > $@
@@ -357,12 +444,15 @@ $(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
$(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c
$(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
$(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c
+$(C_TESTLIB_ANALYZE_OBJS): $(objroot)test/src/%.analyze.$(O): $(srcroot)test/src/%.c
+$(C_TESTLIB_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
$(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c
$(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB
$(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
$(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
$(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
$(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
+$(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
$(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
$(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
$(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
@@ -382,7 +472,7 @@ $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
endif
$(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
-$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
$(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
@mkdir -p $(@D)
@@ -406,7 +496,7 @@ $(objroot)include/jemalloc/internal/private_namespace_jet.gen.h: $(C_JET_SYMS)
$(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
%.h: %.gen.h
- @if ! `cmp -s $< $@` ; then echo "cp $< $<"; cp $< $@ ; fi
+ @if ! `cmp -s $< $@` ; then echo "cp $< $@"; cp $< $@ ; fi
$(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
@mkdir -p $(@D)
@@ -445,6 +535,10 @@ $(objroot)test/integration/cpp/%$(EXE): $(objroot)test/integration/cpp/%.$(O) $(
@mkdir -p $(@D)
$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+$(objroot)test/analyze/%$(EXE): $(objroot)test/analyze/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_ANALYZE_OBJS)
+ @mkdir -p $(@D)
+ $(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
$(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@mkdir -p $(@D)
$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
@@ -461,20 +555,18 @@ endif
install_bin:
$(INSTALL) -d $(BINDIR)
@for b in $(BINS); do \
- echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
- $(INSTALL) -m 755 $$b $(BINDIR); \
+ $(INSTALL) -v -m 755 $$b $(BINDIR); \
done
install_include:
$(INSTALL) -d $(INCLUDEDIR)/jemalloc
@for h in $(C_HDRS); do \
- echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
- $(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+ $(INSTALL) -v -m 644 $$h $(INCLUDEDIR)/jemalloc; \
done
install_lib_shared: $(DSOS)
$(INSTALL) -d $(LIBDIR)
- $(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+ $(INSTALL) -v -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
ifneq ($(SOREV),$(SO))
ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
endif
@@ -482,15 +574,13 @@ endif
install_lib_static: $(STATIC_LIBS)
$(INSTALL) -d $(LIBDIR)
@for l in $(STATIC_LIBS); do \
- echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
- $(INSTALL) -m 755 $$l $(LIBDIR); \
+ $(INSTALL) -v -m 755 $$l $(LIBDIR); \
done
install_lib_pc: $(PC)
$(INSTALL) -d $(LIBDIR)/pkgconfig
@for l in $(PC); do \
- echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
- $(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
+ $(INSTALL) -v -m 644 $$l $(LIBDIR)/pkgconfig; \
done
ifeq ($(enable_shared), 1)
@@ -501,21 +591,19 @@ install_lib: install_lib_static
endif
install_lib: install_lib_pc
-install_doc_html:
+install_doc_html: build_doc_html
$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
@for d in $(DOCS_HTML); do \
- echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
- $(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+ $(INSTALL) -v -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
done
-install_doc_man:
+install_doc_man: build_doc_man
$(INSTALL) -d $(MANDIR)/man3
@for d in $(DOCS_MAN3); do \
- echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
- $(INSTALL) -m 644 $$d $(MANDIR)/man3; \
+ $(INSTALL) -v -m 644 $$d $(MANDIR)/man3; \
done
-install_doc: build_doc install_doc_html install_doc_man
+install_doc: install_doc_html install_doc_man
install: install_bin install_include install_lib
@@ -523,15 +611,60 @@ ifeq ($(enable_doc), 1)
install: install_doc
endif
+uninstall_bin:
+ $(RM) -v $(foreach b,$(notdir $(BINS)),$(BINDIR)/$(b))
+
+uninstall_include:
+ $(RM) -v $(foreach h,$(notdir $(C_HDRS)),$(INCLUDEDIR)/jemalloc/$(h))
+ rmdir -v $(INCLUDEDIR)/jemalloc
+
+uninstall_lib_shared:
+ $(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SOREV)
+ifneq ($(SOREV),$(SO))
+ $(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SO)
+endif
+
+uninstall_lib_static:
+ $(RM) -v $(foreach l,$(notdir $(STATIC_LIBS)),$(LIBDIR)/$(l))
+
+uninstall_lib_pc:
+ $(RM) -v $(foreach p,$(notdir $(PC)),$(LIBDIR)/pkgconfig/$(p))
+
+ifeq ($(enable_shared), 1)
+uninstall_lib: uninstall_lib_shared
+endif
+ifeq ($(enable_static), 1)
+uninstall_lib: uninstall_lib_static
+endif
+uninstall_lib: uninstall_lib_pc
+
+uninstall_doc_html:
+ $(RM) -v $(foreach d,$(notdir $(DOCS_HTML)),$(DATADIR)/doc/jemalloc$(install_suffix)/$(d))
+ rmdir -v $(DATADIR)/doc/jemalloc$(install_suffix)
+
+uninstall_doc_man:
+ $(RM) -v $(foreach d,$(notdir $(DOCS_MAN3)),$(MANDIR)/man3/$(d))
+
+uninstall_doc: uninstall_doc_html uninstall_doc_man
+
+uninstall: uninstall_bin uninstall_include uninstall_lib
+
+ifeq ($(enable_doc), 1)
+uninstall: uninstall_doc
+endif
+
tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
+tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
-tests: tests_unit tests_integration tests_stress
+tests: tests_unit tests_integration tests_analyze tests_stress
check_unit_dir:
@mkdir -p $(objroot)test/unit
check_integration_dir:
@mkdir -p $(objroot)test/integration
+analyze_dir:
+ @mkdir -p $(objroot)test/analyze
stress_dir:
@mkdir -p $(objroot)test/stress
check_dir: check_unit_dir check_integration_dir
@@ -548,6 +681,12 @@ check_integration_decay: tests_integration check_integration_dir
$(MALLOC_CONF)="dirty_decay_ms:0,muzzy_decay_ms:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
check_integration: tests_integration check_integration_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+analyze: tests_analyze analyze_dir
+ifeq ($(enable_prof), 1)
+ $(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
+else
+ $(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
+endif
stress: tests_stress stress_dir
$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
check: check_unit check_integration check_integration_decay check_integration_prof
diff --git a/TUNING.md b/TUNING.md
index 34fca05..e96399d 100644
--- a/TUNING.md
+++ b/TUNING.md
@@ -1,5 +1,5 @@
This document summarizes the common approaches for performance fine tuning with
-jemalloc (as of 5.1.0). The default configuration of jemalloc tends to work
+jemalloc (as of 5.3.0). The default configuration of jemalloc tends to work
reasonably well in practice, and most applications should not have to tune any
options. However, in order to cover a wide range of applications and avoid
pathological cases, the default setting is sometimes kept conservative and
@@ -76,14 +76,14 @@ Examples:
* High resource consumption application, prioritizing memory usage:
- `background_thread:true` combined with shorter decay time (decreased
- `dirty_decay_ms` and / or `muzzy_decay_ms`,
+ `background_thread:true,tcache_max:4096` combined with shorter decay time
+ (decreased `dirty_decay_ms` and / or `muzzy_decay_ms`,
e.g. `dirty_decay_ms:5000,muzzy_decay_ms:5000`), and lower arena count
(e.g. number of CPUs).
* Low resource consumption application:
- `narenas:1,lg_tcache_max:13` combined with shorter decay time (decreased
+ `narenas:1,tcache_max:1024` combined with shorter decay time (decreased
`dirty_decay_ms` and / or `muzzy_decay_ms`,e.g.
`dirty_decay_ms:1000,muzzy_decay_ms:0`).
diff --git a/bin/jeprof.in b/bin/jeprof.in
index 3ed408c..dbf6252 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -205,6 +205,8 @@ Output type:
--svg Generate SVG to stdout
--gif Generate GIF to stdout
--raw Generate symbolized jeprof data (useful with remote fetch)
+ --collapsed Generate collapsed stacks for building flame graphs
+ (see http://www.brendangregg.com/flamegraphs.html)
Heap-Profile Options:
--inuse_space Display in-use (mega)bytes [default]
@@ -238,6 +240,7 @@ Miscellaneous:
--test Run unit tests
--help This message
--version Version information
+ --debug-syms-by-id (Linux only) Find debug symbol files by build ID as well as by name
Environment Variables:
JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof
@@ -332,6 +335,7 @@ sub Init() {
$main::opt_gif = 0;
$main::opt_svg = 0;
$main::opt_raw = 0;
+ $main::opt_collapsed = 0;
$main::opt_nodecount = 80;
$main::opt_nodefraction = 0.005;
@@ -362,6 +366,7 @@ sub Init() {
$main::opt_tools = "";
$main::opt_debug = 0;
$main::opt_test = 0;
+ $main::opt_debug_syms_by_id = 0;
# These are undocumented flags used only by unittests.
$main::opt_test_stride = 0;
@@ -405,6 +410,7 @@ sub Init() {
"svg!" => \$main::opt_svg,
"gif!" => \$main::opt_gif,
"raw!" => \$main::opt_raw,
+ "collapsed!" => \$main::opt_collapsed,
"interactive!" => \$main::opt_interactive,
"nodecount=i" => \$main::opt_nodecount,
"nodefraction=f" => \$main::opt_nodefraction,
@@ -429,6 +435,7 @@ sub Init() {
"tools=s" => \$main::opt_tools,
"test!" => \$main::opt_test,
"debug!" => \$main::opt_debug,
+ "debug-syms-by-id!" => \$main::opt_debug_syms_by_id,
# Undocumented flags used only by unittests:
"test_stride=i" => \$main::opt_test_stride,
) || usage("Invalid option(s)");
@@ -490,6 +497,7 @@ sub Init() {
$main::opt_svg +
$main::opt_gif +
$main::opt_raw +
+ $main::opt_collapsed +
$main::opt_interactive +
0;
if ($modes > 1) {
@@ -572,6 +580,11 @@ sub Init() {
foreach (@prefix_list) {
s|/+$||;
}
+
+ # Flag to prevent us from trying over and over to use
+ # elfutils if it's not installed (used only with
+ # --debug-syms-by-id option).
+ $main::gave_up_on_elfutils = 0;
}
sub FilterAndPrint {
@@ -621,6 +634,8 @@ sub FilterAndPrint {
PrintText($symbols, $flat, $cumulative, -1);
} elsif ($main::opt_raw) {
PrintSymbolizedProfile($symbols, $profile, $main::prog);
+ } elsif ($main::opt_collapsed) {
+ PrintCollapsedStacks($symbols, $profile);
} elsif ($main::opt_callgrind) {
PrintCallgrind($calls);
} else {
@@ -2810,6 +2825,40 @@ sub IsSecondPcAlwaysTheSame {
return $second_pc;
}
+sub ExtractSymbolNameInlineStack {
+ my $symbols = shift;
+ my $address = shift;
+
+ my @stack = ();
+
+ if (exists $symbols->{$address}) {
+ my @localinlinestack = @{$symbols->{$address}};
+ for (my $i = $#localinlinestack; $i > 0; $i-=3) {
+ my $file = $localinlinestack[$i-1];
+ my $fn = $localinlinestack[$i-0];
+
+ if ($file eq "?" || $file eq ":0") {
+ $file = "??:0";
+ }
+ if ($fn eq '??') {
+ # If we can't get the symbol name, at least use the file information.
+ $fn = $file;
+ }
+ my $suffix = "[inline]";
+ if ($i == 2) {
+ $suffix = "";
+ }
+ push (@stack, $fn.$suffix);
+ }
+ }
+ else {
+ # If we can't get a symbol name, at least fill in the address.
+ push (@stack, $address);
+ }
+
+ return @stack;
+}
+
sub ExtractSymbolLocation {
my $symbols = shift;
my $address = shift;
@@ -2884,6 +2933,17 @@ sub FilterFrames {
return $result;
}
+sub PrintCollapsedStacks {
+ my $symbols = shift;
+ my $profile = shift;
+
+ while (my ($stack_trace, $count) = each %$profile) {
+ my @address = split(/\n/, $stack_trace);
+ my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address );
+ printf("%s %d\n", join(";", @names), $count);
+ }
+}
+
sub RemoveUninterestingFrames {
my $symbols = shift;
my $profile = shift;
@@ -4440,16 +4500,54 @@ sub FindLibrary {
# For libc libraries, the copy in /usr/lib/debug contains debugging symbols
sub DebuggingLibrary {
my $file = shift;
- if ($file =~ m|^/|) {
- if (-f "/usr/lib/debug$file") {
- return "/usr/lib/debug$file";
- } elsif (-f "/usr/lib/debug$file.debug") {
- return "/usr/lib/debug$file.debug";
+
+ if ($file !~ m|^/|) {
+ return undef;
+ }
+
+ # Find debug symbol file if it's named after the library's name.
+
+ if (-f "/usr/lib/debug$file") {
+ if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; }
+ return "/usr/lib/debug$file";
+ } elsif (-f "/usr/lib/debug$file.debug") {
+ if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; }
+ return "/usr/lib/debug$file.debug";
+ }
+
+ if(!$main::opt_debug_syms_by_id) {
+ if($main::opt_debug) { print STDERR "no debug symbols found for $file\n" };
+ return undef;
+ }
+
+ # Find debug file if it's named after the library's build ID.
+
+ my $readelf = '';
+ if (!$main::gave_up_on_elfutils) {
+ $readelf = qx/eu-readelf -n ${file}/;
+ if ($?) {
+ print STDERR "Cannot run eu-readelf. To use --debug-syms-by-id you must be on Linux, with elfutils installed.\n";
+ $main::gave_up_on_elfutils = 1;
+ return undef;
+ }
+ my $buildID = $1 if $readelf =~ /Build ID: ([A-Fa-f0-9]+)/s;
+ if (defined $buildID && length $buildID > 0) {
+ my $symbolFile = '/usr/lib/debug/.build-id/' . substr($buildID, 0, 2) . '/' . substr($buildID, 2) . '.debug';
+ if (-e $symbolFile) {
+ if($main::opt_debug) { print STDERR "found debug symbol file $symbolFile for $file\n" };
+ return $symbolFile;
+ } else {
+ if($main::opt_debug) { print STDERR "no debug symbol file found for $file, build ID: $buildID\n" };
+ return undef;
}
+ }
}
+
+ if($main::opt_debug) { print STDERR "no debug symbols found for $file, build ID unknown\n" };
return undef;
}
+
# Parse text section header of a library using objdump
sub ParseTextSectionHeaderFromObjdump {
my $lib = shift;
@@ -4987,7 +5085,7 @@ sub MapToSymbols {
} else {
# MapSymbolsWithNM tags each routine with its starting address,
# useful in case the image has multiple occurrences of this
- # routine. (It uses a syntax that resembles template paramters,
+ # routine. (It uses a syntax that resembles template parameters,
# that are automatically stripped out by ShortFunctionName().)
# addr2line does not provide the same information. So we check
# if nm disambiguated our symbol, and if so take the annotated
@@ -5339,7 +5437,7 @@ sub GetProcedureBoundaries {
# "nm -f $image" is supposed to fail on GNU nm, but if:
#
# a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
- # b. you have a.out in your current directory (a not uncommon occurence)
+ # b. you have a.out in your current directory (a not uncommon occurrence)
#
# then "nm -f $image" succeeds because -f only looks at the first letter of
# the argument, which looks valid because it's [BbSsPp], and then since
diff --git a/build-aux/config.guess b/build-aux/config.guess
index 2e9ad7f..f772702 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,8 +1,8 @@
#! /bin/sh
# Attempt to guess a canonical system name.
-# Copyright 1992-2016 Free Software Foundation, Inc.
+# Copyright 1992-2021 Free Software Foundation, Inc.
-timestamp='2016-10-02'
+timestamp='2021-01-01'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2016-10-02'
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -27,19 +27,19 @@ timestamp='2016-10-02'
# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
#
# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
#
# Please send patches to <config-patches@gnu.org>.
-me=`echo "$0" | sed -e 's,.*/,,'`
+me=$(echo "$0" | sed -e 's,.*/,,')
usage="\
Usage: $0 [OPTION]
Output the configuration name of the system \`$me' is run on.
-Operation modes:
+Options:
-h, --help print this help, then exit
-t, --time-stamp print date of last modification, then exit
-v, --version print version number, then exit
@@ -50,7 +50,7 @@ version="\
GNU config.guess ($timestamp)
Originally written by Per Bothner.
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -84,8 +84,6 @@ if test $# != 0; then
exit 1
fi
-trap 'exit 1' 1 2 15
-
# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
# compiler to aid in system detection is discouraged as it requires
# temporary files to be created and, as you can see below, it is a
@@ -96,66 +94,89 @@ trap 'exit 1' 1 2 15
# Portable tmp directory creation inspired by the Autoconf team.
-set_cc_for_build='
-trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
-trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
-: ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
- { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
- { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
- { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
-dummy=$tmp/dummy ;
-tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
-case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,) echo "int x;" > $dummy.c ;
- for c in cc gcc c89 c99 ; do
- if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
- CC_FOR_BUILD="$c"; break ;
- fi ;
- done ;
- if test x"$CC_FOR_BUILD" = x ; then
- CC_FOR_BUILD=no_compiler_found ;
- fi
- ;;
- ,,*) CC_FOR_BUILD=$CC ;;
- ,*,*) CC_FOR_BUILD=$HOST_CC ;;
-esac ; set_cc_for_build= ;'
+tmp=
+# shellcheck disable=SC2172
+trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
+
+set_cc_for_build() {
+ # prevent multiple calls if $tmp is already set
+ test "$tmp" && return 0
+ : "${TMPDIR=/tmp}"
+ # shellcheck disable=SC2039
+ { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
+ dummy=$tmp/dummy
+ case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
+ ,,) echo "int x;" > "$dummy.c"
+ for driver in cc gcc c89 c99 ; do
+ if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+ CC_FOR_BUILD="$driver"
+ break
+ fi
+ done
+ if test x"$CC_FOR_BUILD" = x ; then
+ CC_FOR_BUILD=no_compiler_found
+ fi
+ ;;
+ ,,*) CC_FOR_BUILD=$CC ;;
+ ,*,*) CC_FOR_BUILD=$HOST_CC ;;
+ esac
+}
# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
# (ghazi@noc.rutgers.edu 1994-08-24)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+if test -f /.attbin/uname ; then
PATH=$PATH:/.attbin ; export PATH
fi
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown
+UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown
+UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown
+UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
-case "${UNAME_SYSTEM}" in
+case "$UNAME_SYSTEM" in
Linux|GNU|GNU/*)
- # If the system lacks a compiler, then just pick glibc.
- # We could probably try harder.
- LIBC=gnu
+ LIBC=unknown
- eval $set_cc_for_build
- cat <<-EOF > $dummy.c
+ set_cc_for_build
+ cat <<-EOF > "$dummy.c"
#include <features.h>
#if defined(__UCLIBC__)
LIBC=uclibc
#elif defined(__dietlibc__)
LIBC=dietlibc
- #else
+ #elif defined(__GLIBC__)
LIBC=gnu
+ #else
+ #include <stdarg.h>
+ /* First heuristic to detect musl libc. */
+ #ifdef __DEFINED_va_list
+ LIBC=musl
+ #endif
#endif
EOF
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
+ eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
+
+ # Second heuristic to detect musl libc.
+ if [ "$LIBC" = unknown ] &&
+ command -v ldd >/dev/null &&
+ ldd --version 2>&1 | grep -q ^musl; then
+ LIBC=musl
+ fi
+
+ # If the system lacks a compiler, then just pick glibc.
+ # We could probably try harder.
+ if [ "$LIBC" = unknown ]; then
+ LIBC=gnu
+ fi
;;
esac
# Note: order is significant - the case branches are not exclusive.
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
*:NetBSD:*:*)
# NetBSD (nbsd) targets should (where applicable) match one or
# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
@@ -168,31 +189,32 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# Note: NetBSD doesn't particularly care about the vendor
# portion of the name. We always set it to "unknown".
sysctl="sysctl -n hw.machine_arch"
- UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
- /sbin/$sysctl 2>/dev/null || \
- /usr/sbin/$sysctl 2>/dev/null || \
- echo unknown)`
- case "${UNAME_MACHINE_ARCH}" in
+ UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
+ "/sbin/$sysctl" 2>/dev/null || \
+ "/usr/sbin/$sysctl" 2>/dev/null || \
+ echo unknown))
+ case "$UNAME_MACHINE_ARCH" in
+ aarch64eb) machine=aarch64_be-unknown ;;
armeb) machine=armeb-unknown ;;
arm*) machine=arm-unknown ;;
sh3el) machine=shl-unknown ;;
sh3eb) machine=sh-unknown ;;
sh5el) machine=sh5le-unknown ;;
earmv*)
- arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
- endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
- machine=${arch}${endian}-unknown
+ arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,')
+ endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p')
+ machine="${arch}${endian}"-unknown
;;
- *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+ *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
esac
# The Operating System including object format, if it has switched
# to ELF recently (or will in the future) and ABI.
- case "${UNAME_MACHINE_ARCH}" in
+ case "$UNAME_MACHINE_ARCH" in
earm*)
os=netbsdelf
;;
arm*|i386|m68k|ns32k|sh3*|sparc|vax)
- eval $set_cc_for_build
+ set_cc_for_build
if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ELF__
then
@@ -208,10 +230,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
;;
esac
# Determine ABI tags.
- case "${UNAME_MACHINE_ARCH}" in
+ case "$UNAME_MACHINE_ARCH" in
earm*)
expr='s/^earmv[0-9]/-eabi/;s/eb$//'
- abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
+ abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr")
;;
esac
# The OS release
@@ -219,60 +241,75 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# thus, need a distinct triplet. However, they do not need
# kernel version information, so it can be replaced with a
# suitable tag, in the style of linux-gnu.
- case "${UNAME_VERSION}" in
+ case "$UNAME_VERSION" in
Debian*)
release='-gnu'
;;
*)
- release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
+ release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2)
;;
esac
# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
# contains redundant information, the shorter form:
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
- echo "${machine}-${os}${release}${abi}"
+ echo "$machine-${os}${release}${abi-}"
exit ;;
*:Bitrig:*:*)
- UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
- echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+ UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//')
+ echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
exit ;;
*:OpenBSD:*:*)
- UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
- echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+ UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//')
+ echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
exit ;;
*:LibertyBSD:*:*)
- UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
- echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
+ UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//')
+ echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
+ exit ;;
+ *:MidnightBSD:*:*)
+ echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
exit ;;
*:ekkoBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+ echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
exit ;;
*:SolidBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+ echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
+ exit ;;
+ *:OS108:*:*)
+ echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
exit ;;
macppc:MirBSD:*:*)
- echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+ echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
exit ;;
*:MirBSD:*:*)
- echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+ echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
exit ;;
*:Sortix:*:*)
- echo ${UNAME_MACHINE}-unknown-sortix
+ echo "$UNAME_MACHINE"-unknown-sortix
+ exit ;;
+ *:Twizzler:*:*)
+ echo "$UNAME_MACHINE"-unknown-twizzler
+ exit ;;
+ *:Redox:*:*)
+ echo "$UNAME_MACHINE"-unknown-redox
+ exit ;;
+ mips:OSF1:*.*)
+ echo mips-dec-osf1
exit ;;
alpha:OSF1:*:*)
case $UNAME_RELEASE in
*4.0)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+ UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}')
;;
*5.*)
- UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+ UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}')
;;
esac
# According to Compaq, /usr/sbin/psrinfo has been available on
# OSF/1 and Tru64 systems produced since 1995. I hope that
# covers most systems running today. This code pipes the CPU
# types through head -n 1, so we only detect the type of CPU 0.
- ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+ ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1)
case "$ALPHA_CPU_TYPE" in
"EV4 (21064)")
UNAME_MACHINE=alpha ;;
@@ -310,28 +347,19 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# A Tn.n version is a released field test version.
# A Xn.n version is an unreleased experimental baselevel.
# 1.2 uses "1.2" for uname -r.
- echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+ echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)"
# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
exitcode=$?
trap '' 0
exit $exitcode ;;
- Alpha\ *:Windows_NT*:*)
- # How do we know it's Interix rather than the generic POSIX subsystem?
- # Should we change UNAME_MACHINE based on the output of uname instead
- # of the specific Alpha model?
- echo alpha-pc-interix
- exit ;;
- 21064:Windows_NT:50:3)
- echo alpha-dec-winnt3.5
- exit ;;
Amiga*:UNIX_System_V:4.0:*)
echo m68k-unknown-sysv4
exit ;;
*:[Aa]miga[Oo][Ss]:*:*)
- echo ${UNAME_MACHINE}-unknown-amigaos
+ echo "$UNAME_MACHINE"-unknown-amigaos
exit ;;
*:[Mm]orph[Oo][Ss]:*:*)
- echo ${UNAME_MACHINE}-unknown-morphos
+ echo "$UNAME_MACHINE"-unknown-morphos
exit ;;
*:OS/390:*:*)
echo i370-ibm-openedition
@@ -343,7 +371,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
echo powerpc-ibm-os400
exit ;;
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
- echo arm-acorn-riscix${UNAME_RELEASE}
+ echo arm-acorn-riscix"$UNAME_RELEASE"
exit ;;
arm*:riscos:*:*|arm*:RISCOS:*:*)
echo arm-unknown-riscos
@@ -353,7 +381,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
exit ;;
Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
- if test "`(/bin/universe) 2>/dev/null`" = att ; then
+ if test "$( (/bin/universe) 2>/dev/null)" = att ; then
echo pyramid-pyramid-sysv3
else
echo pyramid-pyramid-bsd
@@ -366,28 +394,28 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
echo sparc-icl-nx6
exit ;;
DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
- case `/usr/bin/uname -p` in
+ case $(/usr/bin/uname -p) in
sparc) echo sparc-icl-nx7; exit ;;
esac ;;
s390x:SunOS:*:*)
- echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
exit ;;
sun4H:SunOS:5.*:*)
- echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
exit ;;
sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
- echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
exit ;;
i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
- echo i386-pc-auroraux${UNAME_RELEASE}
+ echo i386-pc-auroraux"$UNAME_RELEASE"
exit ;;
i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
- eval $set_cc_for_build
+ set_cc_for_build
SUN_ARCH=i386
# If there is a compiler, see if it is configured for 64-bit objects.
# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
# This test works for both compilers.
- if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+ if test "$CC_FOR_BUILD" != no_compiler_found; then
if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
grep IS_64BIT_ARCH >/dev/null
@@ -395,40 +423,40 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
SUN_ARCH=x86_64
fi
fi
- echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
exit ;;
sun4*:SunOS:6*:*)
# According to config.sub, this is the proper way to canonicalize
# SunOS6. Hard to guess exactly what SunOS6 will be like, but
# it's likely to be more like Solaris than SunOS4.
- echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
exit ;;
sun4*:SunOS:*:*)
- case "`/usr/bin/arch -k`" in
+ case "$(/usr/bin/arch -k)" in
Series*|S4*)
- UNAME_RELEASE=`uname -v`
+ UNAME_RELEASE=$(uname -v)
;;
esac
# Japanese Language versions have a version number like `4.1.3-JL'.
- echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+ echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')"
exit ;;
sun3*:SunOS:*:*)
- echo m68k-sun-sunos${UNAME_RELEASE}
+ echo m68k-sun-sunos"$UNAME_RELEASE"
exit ;;
sun*:*:4.2BSD:*)
- UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
- test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
- case "`/bin/arch`" in
+ UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null)
+ test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
+ case "$(/bin/arch)" in
sun3)
- echo m68k-sun-sunos${UNAME_RELEASE}
+ echo m68k-sun-sunos"$UNAME_RELEASE"
;;
sun4)
- echo sparc-sun-sunos${UNAME_RELEASE}
+ echo sparc-sun-sunos"$UNAME_RELEASE"
;;
esac
exit ;;
aushp:SunOS:*:*)
- echo sparc-auspex-sunos${UNAME_RELEASE}
+ echo sparc-auspex-sunos"$UNAME_RELEASE"
exit ;;
# The situation for MiNT is a little confusing. The machine name
# can be virtually everything (everything which is not
@@ -439,44 +467,44 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
# MiNT. But MiNT is downward compatible to TOS, so this should
# be no problem.
atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
+ echo m68k-atari-mint"$UNAME_RELEASE"
exit ;;
atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
+ echo m68k-atari-mint"$UNAME_RELEASE"
exit ;;
*falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
- echo m68k-atari-mint${UNAME_RELEASE}
+ echo m68k-atari-mint"$UNAME_RELEASE"
exit ;;
milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
- echo m68k-milan-mint${UNAME_RELEASE}
+ echo m68k-milan-mint"$UNAME_RELEASE"
exit ;;
hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
- echo m68k-hades-mint${UNAME_RELEASE}
+ echo m68k-hades-mint"$UNAME_RELEASE"
exit ;;
*:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
- echo m68k-unknown-mint${UNAME_RELEASE}
+ echo m68k-unknown-mint"$UNAME_RELEASE"
exit ;;
m68k:machten:*:*)
- echo m68k-apple-machten${UNAME_RELEASE}
+ echo m68k-apple-machten"$UNAME_RELEASE"
exit ;;
powerpc:machten:*:*)
- echo powerpc-apple-machten${UNAME_RELEASE}
+ echo powerpc-apple-machten"$UNAME_RELEASE"
exit ;;
RISC*:Mach:*:*)
echo mips-dec-mach_bsd4.3
exit ;;
RISC*:ULTRIX:*:*)
- echo mips-dec-ultrix${UNAME_RELEASE}
+ echo mips-dec-ultrix"$UNAME_RELEASE"
exit ;;
VAX*:ULTRIX*:*:*)
- echo vax-dec-ultrix${UNAME_RELEASE}
+ echo vax-dec-ultrix"$UNAME_RELEASE"
exit ;;
2020:CLIX:*:* | 2430:CLIX:*:*)
- echo clipper-intergraph-clix${UNAME_RELEASE}
+ echo clipper-intergraph-clix"$UNAME_RELEASE"
exit ;;
mips:*:*:UMIPS | mips:*:*:RISCos)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
+ set_cc_for_build
+ sed 's/^ //' << EOF > "$dummy.c"
#ifdef __cplusplus
#include <stdio.h> /* for printf() prototype */
int main (int argc, char *argv[]) {
@@ -485,23 +513,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
#endif
#if defined (host_mips) && defined (MIPSEB)
#if defined (SYSTYPE_SYSV)
- printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+ printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
#endif
#if defined (SYSTYPE_SVR4)
- printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+ printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
#endif
#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
- printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+ printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
#endif
#endif
exit (-1);
}
EOF
- $CC_FOR_BUILD -o $dummy $dummy.c &&
- dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
- SYSTEM_NAME=`$dummy $dummyarg` &&
+ $CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
+ dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') &&
+ SYSTEM_NAME=$("$dummy" "$dummyarg") &&
{ echo "$SYSTEM_NAME"; exit; }
- echo mips-mips-riscos${UNAME_RELEASE}
+ echo mips-mips-riscos"$UNAME_RELEASE"
exit ;;
Motorola:PowerMAX_OS:*:*)
echo powerpc-motorola-powermax
@@ -526,18 +554,18 @@ EOF
exit ;;
AViiON:dgux:*:*)
# DG/UX returns AViiON for all architectures
- UNAME_PROCESSOR=`/usr/bin/uname -p`
- if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+ UNAME_PROCESSOR=$(/usr/bin/uname -p)
+ if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
then
- if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
- [ ${TARGET_BINARY_INTERFACE}x = x ]
+ if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
+ test "$TARGET_BINARY_INTERFACE"x = x
then
- echo m88k-dg-dgux${UNAME_RELEASE}
+ echo m88k-dg-dgux"$UNAME_RELEASE"
else
- echo m88k-dg-dguxbcs${UNAME_RELEASE}
+ echo m88k-dg-dguxbcs"$UNAME_RELEASE"
fi
else
- echo i586-dg-dgux${UNAME_RELEASE}
+ echo i586-dg-dgux"$UNAME_RELEASE"
fi
exit ;;
M88*:DolphinOS:*:*) # DolphinOS (SVR3)
@@ -554,26 +582,26 @@ EOF
echo m68k-tektronix-bsd
exit ;;
*:IRIX*:*:*)
- echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+ echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')"
exit ;;
????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id
- exit ;; # Note that: echo "'`uname -s`'" gives 'AIX '
+ exit ;; # Note that: echo "'$(uname -s)'" gives 'AIX '
i*86:AIX:*:*)
echo i386-ibm-aix
exit ;;
ia64:AIX:*:*)
- if [ -x /usr/bin/oslevel ] ; then
- IBM_REV=`/usr/bin/oslevel`
+ if test -x /usr/bin/oslevel ; then
+ IBM_REV=$(/usr/bin/oslevel)
else
- IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
fi
- echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+ echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
exit ;;
*:AIX:2:3)
if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
+ set_cc_for_build
+ sed 's/^ //' << EOF > "$dummy.c"
#include <sys/systemcfg.h>
main()
@@ -584,7 +612,7 @@ EOF
exit(0);
}
EOF
- if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+ if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy")
then
echo "$SYSTEM_NAME"
else
@@ -597,28 +625,28 @@ EOF
fi
exit ;;
*:AIX:*:[4567])
- IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
- if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+ IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }')
+ if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
IBM_ARCH=rs6000
else
IBM_ARCH=powerpc
fi
- if [ -x /usr/bin/lslpp ] ; then
- IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
- awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+ if test -x /usr/bin/lslpp ; then
+ IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc |
+ awk -F: '{ print $3 }' | sed s/[0-9]*$/0/)
else
- IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+ IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
fi
- echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+ echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
exit ;;
*:AIX:*:*)
echo rs6000-ibm-aix
exit ;;
- ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+ ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
echo romp-ibm-bsd4.4
exit ;;
ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and
- echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to
+ echo romp-ibm-bsd"$UNAME_RELEASE" # 4.3 with uname added to
exit ;; # report: romp-ibm BSD 4.3
*:BOSX:*:*)
echo rs6000-bull-bosx
@@ -633,28 +661,28 @@ EOF
echo m68k-hp-bsd4.4
exit ;;
9000/[34678]??:HP-UX:*:*)
- HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
- case "${UNAME_MACHINE}" in
- 9000/31? ) HP_ARCH=m68000 ;;
- 9000/[34]?? ) HP_ARCH=m68k ;;
+ HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
+ case "$UNAME_MACHINE" in
+ 9000/31?) HP_ARCH=m68000 ;;
+ 9000/[34]??) HP_ARCH=m68k ;;
9000/[678][0-9][0-9])
- if [ -x /usr/bin/getconf ]; then
- sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
- sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
- case "${sc_cpu_version}" in
+ if test -x /usr/bin/getconf; then
+ sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null)
+ sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null)
+ case "$sc_cpu_version" in
523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
532) # CPU_PA_RISC2_0
- case "${sc_kernel_bits}" in
+ case "$sc_kernel_bits" in
32) HP_ARCH=hppa2.0n ;;
64) HP_ARCH=hppa2.0w ;;
'') HP_ARCH=hppa2.0 ;; # HP-UX 10.20
esac ;;
esac
fi
- if [ "${HP_ARCH}" = "" ]; then
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
+ if test "$HP_ARCH" = ""; then
+ set_cc_for_build
+ sed 's/^ //' << EOF > "$dummy.c"
#define _HPUX_SOURCE
#include <stdlib.h>
@@ -687,13 +715,13 @@ EOF
exit (0);
}
EOF
- (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+ (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy")
test -z "$HP_ARCH" && HP_ARCH=hppa
fi ;;
esac
- if [ ${HP_ARCH} = hppa2.0w ]
+ if test "$HP_ARCH" = hppa2.0w
then
- eval $set_cc_for_build
+ set_cc_for_build
# hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
# 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler
@@ -712,15 +740,15 @@ EOF
HP_ARCH=hppa64
fi
fi
- echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+ echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
exit ;;
ia64:HP-UX:*:*)
- HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
- echo ia64-hp-hpux${HPUX_REV}
+ HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
+ echo ia64-hp-hpux"$HPUX_REV"
exit ;;
3050*:HI-UX:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
+ set_cc_for_build
+ sed 's/^ //' << EOF > "$dummy.c"
#include <unistd.h>
int
main ()
@@ -745,11 +773,11 @@ EOF
exit (0);
}
EOF
- $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+ $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") &&
{ echo "$SYSTEM_NAME"; exit; }
echo unknown-hitachi-hiuxwe2
exit ;;
- 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+ 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
echo hppa1.1-hp-bsd
exit ;;
9000/8??:4.3bsd:*:*)
@@ -758,17 +786,17 @@ EOF
*9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
echo hppa1.0-hp-mpeix
exit ;;
- hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+ hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
echo hppa1.1-hp-osf
exit ;;
hp8??:OSF1:*:*)
echo hppa1.0-hp-osf
exit ;;
i*86:OSF1:*:*)
- if [ -x /usr/sbin/sysversion ] ; then
- echo ${UNAME_MACHINE}-unknown-osf1mk
+ if test -x /usr/sbin/sysversion ; then
+ echo "$UNAME_MACHINE"-unknown-osf1mk
else
- echo ${UNAME_MACHINE}-unknown-osf1
+ echo "$UNAME_MACHINE"-unknown-osf1
fi
exit ;;
parisc*:Lites*:*:*)
@@ -793,130 +821,123 @@ EOF
echo c4-convex-bsd
exit ;;
CRAY*Y-MP:*:*:*)
- echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
exit ;;
CRAY*[A-Z]90:*:*:*)
- echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+ echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
-e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
-e 's/\.[^.]*$/.X/'
exit ;;
CRAY*TS:*:*:*)
- echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
exit ;;
CRAY*T3E:*:*:*)
- echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
exit ;;
CRAY*SV1:*:*:*)
- echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
exit ;;
*:UNICOS/mp:*:*)
- echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+ echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
exit ;;
F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
- FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
- FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+ FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)
+ FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
+ FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/')
echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
5000:UNIX_System_V:4.*:*)
- FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
- FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+ FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
+ FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/')
echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
exit ;;
i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
- echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+ echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
exit ;;
sparc*:BSD/OS:*:*)
- echo sparc-unknown-bsdi${UNAME_RELEASE}
+ echo sparc-unknown-bsdi"$UNAME_RELEASE"
exit ;;
*:BSD/OS:*:*)
- echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+ echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
+ exit ;;
+ arm:FreeBSD:*:*)
+ UNAME_PROCESSOR=$(uname -p)
+ set_cc_for_build
+ if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ARM_PCS_VFP
+ then
+ echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi
+ else
+ echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf
+ fi
exit ;;
*:FreeBSD:*:*)
- UNAME_PROCESSOR=`/usr/bin/uname -p`
- case ${UNAME_PROCESSOR} in
+ UNAME_PROCESSOR=$(/usr/bin/uname -p)
+ case "$UNAME_PROCESSOR" in
amd64)
- echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
- *)
- echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ UNAME_PROCESSOR=x86_64 ;;
+ i386)
+ UNAME_PROCESSOR=i586 ;;
esac
+ echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
exit ;;
i*:CYGWIN*:*)
- echo ${UNAME_MACHINE}-pc-cygwin
+ echo "$UNAME_MACHINE"-pc-cygwin
exit ;;
*:MINGW64*:*)
- echo ${UNAME_MACHINE}-pc-mingw64
+ echo "$UNAME_MACHINE"-pc-mingw64
exit ;;
*:MINGW*:*)
- echo ${UNAME_MACHINE}-pc-mingw32
+ echo "$UNAME_MACHINE"-pc-mingw32
exit ;;
*:MSYS*:*)
- echo ${UNAME_MACHINE}-pc-msys
- exit ;;
- i*:windows32*:*)
- # uname -m includes "-pc" on this system.
- echo ${UNAME_MACHINE}-mingw32
+ echo "$UNAME_MACHINE"-pc-msys
exit ;;
i*:PW*:*)
- echo ${UNAME_MACHINE}-pc-pw32
+ echo "$UNAME_MACHINE"-pc-pw32
exit ;;
*:Interix*:*)
- case ${UNAME_MACHINE} in
+ case "$UNAME_MACHINE" in
x86)
- echo i586-pc-interix${UNAME_RELEASE}
+ echo i586-pc-interix"$UNAME_RELEASE"
exit ;;
authenticamd | genuineintel | EM64T)
- echo x86_64-unknown-interix${UNAME_RELEASE}
+ echo x86_64-unknown-interix"$UNAME_RELEASE"
exit ;;
IA64)
- echo ia64-unknown-interix${UNAME_RELEASE}
+ echo ia64-unknown-interix"$UNAME_RELEASE"
exit ;;
esac ;;
- [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
- echo i${UNAME_MACHINE}-pc-mks
- exit ;;
- 8664:Windows_NT:*)
- echo x86_64-pc-mks
- exit ;;
- i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
- # How do we know it's Interix rather than the generic POSIX subsystem?
- # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
- # UNAME_MACHINE based on the output of uname instead of i386?
- echo i586-pc-interix
- exit ;;
i*:UWIN*:*)
- echo ${UNAME_MACHINE}-pc-uwin
+ echo "$UNAME_MACHINE"-pc-uwin
exit ;;
amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
- echo x86_64-unknown-cygwin
- exit ;;
- p*:CYGWIN*:*)
- echo powerpcle-unknown-cygwin
+ echo x86_64-pc-cygwin
exit ;;
prep*:SunOS:5.*:*)
- echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+ echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
exit ;;
*:GNU:*:*)
# the GNU system
- echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+ echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')"
exit ;;
*:GNU/*:*:*)
# other systems with GNU libc and userland
- echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+ echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC"
exit ;;
- i*86:Minix:*:*)
- echo ${UNAME_MACHINE}-pc-minix
+ *:Minix:*:*)
+ echo "$UNAME_MACHINE"-unknown-minix
exit ;;
aarch64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
aarch64_be:Linux:*:*)
UNAME_MACHINE=aarch64_be
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
alpha:Linux:*:*)
- case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+ case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in
EV5) UNAME_MACHINE=alphaev5 ;;
EV56) UNAME_MACHINE=alphaev56 ;;
PCA56) UNAME_MACHINE=alphapca56 ;;
@@ -927,140 +948,181 @@ EOF
esac
objdump --private-headers /bin/sh | grep -q ld.so.1
if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
arc:Linux:*:* | arceb:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
arm*:Linux:*:*)
- eval $set_cc_for_build
+ set_cc_for_build
if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_EABI__
then
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
else
if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
| grep -q __ARM_PCS_VFP
then
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
else
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
fi
fi
exit ;;
avr32*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
cris:Linux:*:*)
- echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+ echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
exit ;;
crisv32:Linux:*:*)
- echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+ echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
exit ;;
e2k:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
frv:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
hexagon:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
i*86:Linux:*:*)
- echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+ echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
exit ;;
ia64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
k1om:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ exit ;;
+ loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
m32r*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
m68*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
mips:Linux:*:* | mips64:Linux:*:*)
- eval $set_cc_for_build
- sed 's/^ //' << EOF >$dummy.c
+ set_cc_for_build
+ IS_GLIBC=0
+ test x"${LIBC}" = xgnu && IS_GLIBC=1
+ sed 's/^ //' << EOF > "$dummy.c"
#undef CPU
- #undef ${UNAME_MACHINE}
- #undef ${UNAME_MACHINE}el
+ #undef mips
+ #undef mipsel
+ #undef mips64
+ #undef mips64el
+ #if ${IS_GLIBC} && defined(_ABI64)
+ LIBCABI=gnuabi64
+ #else
+ #if ${IS_GLIBC} && defined(_ABIN32)
+ LIBCABI=gnuabin32
+ #else
+ LIBCABI=${LIBC}
+ #endif
+ #endif
+
+ #if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+ CPU=mipsisa64r6
+ #else
+ #if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+ CPU=mipsisa32r6
+ #else
+ #if defined(__mips64)
+ CPU=mips64
+ #else
+ CPU=mips
+ #endif
+ #endif
+ #endif
+
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
- CPU=${UNAME_MACHINE}el
+ MIPS_ENDIAN=el
#else
#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
- CPU=${UNAME_MACHINE}
+ MIPS_ENDIAN=
#else
- CPU=
+ MIPS_ENDIAN=
#endif
#endif
EOF
- eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
- test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
+ eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')"
+ test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
;;
mips64el:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
openrisc*:Linux:*:*)
- echo or1k-unknown-linux-${LIBC}
+ echo or1k-unknown-linux-"$LIBC"
exit ;;
or32:Linux:*:* | or1k*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
padre:Linux:*:*)
- echo sparc-unknown-linux-${LIBC}
+ echo sparc-unknown-linux-"$LIBC"
exit ;;
parisc64:Linux:*:* | hppa64:Linux:*:*)
- echo hppa64-unknown-linux-${LIBC}
+ echo hppa64-unknown-linux-"$LIBC"
exit ;;
parisc:Linux:*:* | hppa:Linux:*:*)
# Look for CPU level
- case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
- PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
- PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
- *) echo hppa-unknown-linux-${LIBC} ;;
+ case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in
+ PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
+ PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
+ *) echo hppa-unknown-linux-"$LIBC" ;;
esac
exit ;;
ppc64:Linux:*:*)
- echo powerpc64-unknown-linux-${LIBC}
+ echo powerpc64-unknown-linux-"$LIBC"
exit ;;
ppc:Linux:*:*)
- echo powerpc-unknown-linux-${LIBC}
+ echo powerpc-unknown-linux-"$LIBC"
exit ;;
ppc64le:Linux:*:*)
- echo powerpc64le-unknown-linux-${LIBC}
+ echo powerpc64le-unknown-linux-"$LIBC"
exit ;;
ppcle:Linux:*:*)
- echo powerpcle-unknown-linux-${LIBC}
+ echo powerpcle-unknown-linux-"$LIBC"
exit ;;
- riscv32:Linux:*:* | riscv64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
s390:Linux:*:* | s390x:Linux:*:*)
- echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
+ echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
exit ;;
sh64*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
sh*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
sparc:Linux:*:* | sparc64:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
tile*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
vax:Linux:*:*)
- echo ${UNAME_MACHINE}-dec-linux-${LIBC}
+ echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
exit ;;
x86_64:Linux:*:*)
- echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+ set_cc_for_build
+ LIBCABI=$LIBC
+ if test "$CC_FOR_BUILD" != no_compiler_found; then
+ if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
+ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_X32 >/dev/null
+ then
+ LIBCABI="$LIBC"x32
+ fi
+ fi
+ echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI"
exit ;;
xtensa*:Linux:*:*)
- echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+ echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
exit ;;
i*86:DYNIX/ptx:4*:*)
# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -1074,51 +1136,51 @@ EOF
# I am not positive that other SVR4 systems won't match this,
# I just have to hope. -- rms.
# Use sysv4.2uw... so that sysv4* matches it.
- echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+ echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
exit ;;
i*86:OS/2:*:*)
# If we were able to find `uname', then EMX Unix compatibility
# is probably installed.
- echo ${UNAME_MACHINE}-pc-os2-emx
+ echo "$UNAME_MACHINE"-pc-os2-emx
exit ;;
i*86:XTS-300:*:STOP)
- echo ${UNAME_MACHINE}-unknown-stop
+ echo "$UNAME_MACHINE"-unknown-stop
exit ;;
i*86:atheos:*:*)
- echo ${UNAME_MACHINE}-unknown-atheos
+ echo "$UNAME_MACHINE"-unknown-atheos
exit ;;
i*86:syllable:*:*)
- echo ${UNAME_MACHINE}-pc-syllable
+ echo "$UNAME_MACHINE"-pc-syllable
exit ;;
i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
- echo i386-unknown-lynxos${UNAME_RELEASE}
+ echo i386-unknown-lynxos"$UNAME_RELEASE"
exit ;;
i*86:*DOS:*:*)
- echo ${UNAME_MACHINE}-pc-msdosdjgpp
+ echo "$UNAME_MACHINE"-pc-msdosdjgpp
exit ;;
- i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
- UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+ i*86:*:4.*:*)
+ UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//')
if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
- echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+ echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
else
- echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+ echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
fi
exit ;;
i*86:*:5:[678]*)
# UnixWare 7.x, OpenUNIX and OpenServer 6.
- case `/bin/uname -X | grep "^Machine"` in
+ case $(/bin/uname -X | grep "^Machine") in
*486*) UNAME_MACHINE=i486 ;;
*Pentium) UNAME_MACHINE=i586 ;;
*Pent*|*Celeron) UNAME_MACHINE=i686 ;;
esac
- echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+ echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
exit ;;
i*86:*:3.2:*)
if test -f /usr/options/cb.name; then
- UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
- echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+ UNAME_REL=$(sed -n 's/.*Version //p' </usr/options/cb.name)
+ echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
elif /bin/uname -X 2>/dev/null >/dev/null ; then
- UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+ UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //'))
(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
&& UNAME_MACHINE=i586
@@ -1126,9 +1188,9 @@ EOF
&& UNAME_MACHINE=i686
(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
&& UNAME_MACHINE=i686
- echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+ echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
else
- echo ${UNAME_MACHINE}-pc-sysv32
+ echo "$UNAME_MACHINE"-pc-sysv32
fi
exit ;;
pc:*:*:*)
@@ -1148,9 +1210,9 @@ EOF
exit ;;
i860:*:4.*:*) # i860-SVR4
if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
- echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+ echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
else # Add other i860-SVR4 vendors below as they are discovered.
- echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4
+ echo i860-unknown-sysv"$UNAME_RELEASE" # Unknown i860-SVR4
fi
exit ;;
mini*:CTIX:SYS*5:*)
@@ -1168,41 +1230,41 @@ EOF
3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
OS_REL=''
test -r /etc/.relid \
- && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+ && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
- && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+ && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
&& { echo i486-ncr-sysv4; exit; } ;;
NCR*:*:4.2:* | MPRAS*:*:4.2:*)
OS_REL='.3'
test -r /etc/.relid \
- && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+ && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
- && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+ && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
- && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+ && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
- && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+ && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
- echo m68k-unknown-lynxos${UNAME_RELEASE}
+ echo m68k-unknown-lynxos"$UNAME_RELEASE"
exit ;;
mc68030:UNIX_System_V:4.*:*)
echo m68k-atari-sysv4
exit ;;
TSUNAMI:LynxOS:2.*:*)
- echo sparc-unknown-lynxos${UNAME_RELEASE}
+ echo sparc-unknown-lynxos"$UNAME_RELEASE"
exit ;;
rs6000:LynxOS:2.*:*)
- echo rs6000-unknown-lynxos${UNAME_RELEASE}
+ echo rs6000-unknown-lynxos"$UNAME_RELEASE"
exit ;;
PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
- echo powerpc-unknown-lynxos${UNAME_RELEASE}
+ echo powerpc-unknown-lynxos"$UNAME_RELEASE"
exit ;;
SM[BE]S:UNIX_SV:*:*)
- echo mips-dde-sysv${UNAME_RELEASE}
+ echo mips-dde-sysv"$UNAME_RELEASE"
exit ;;
RM*:ReliantUNIX-*:*:*)
echo mips-sni-sysv4
@@ -1212,8 +1274,8 @@ EOF
exit ;;
*:SINIX-*:*:*)
if uname -p 2>/dev/null >/dev/null ; then
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
- echo ${UNAME_MACHINE}-sni-sysv4
+ UNAME_MACHINE=$( (uname -p) 2>/dev/null)
+ echo "$UNAME_MACHINE"-sni-sysv4
else
echo ns32k-sni-sysv
fi
@@ -1233,23 +1295,23 @@ EOF
exit ;;
i*86:VOS:*:*)
# From Paul.Green@stratus.com.
- echo ${UNAME_MACHINE}-stratus-vos
+ echo "$UNAME_MACHINE"-stratus-vos
exit ;;
*:VOS:*:*)
# From Paul.Green@stratus.com.
echo hppa1.1-stratus-vos
exit ;;
mc68*:A/UX:*:*)
- echo m68k-apple-aux${UNAME_RELEASE}
+ echo m68k-apple-aux"$UNAME_RELEASE"
exit ;;
news*:NEWS-OS:6*:*)
echo mips-sony-newsos6
exit ;;
R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
- if [ -d /usr/nec ]; then
- echo mips-nec-sysv${UNAME_RELEASE}
+ if test -d /usr/nec; then
+ echo mips-nec-sysv"$UNAME_RELEASE"
else
- echo mips-unknown-sysv${UNAME_RELEASE}
+ echo mips-unknown-sysv"$UNAME_RELEASE"
fi
exit ;;
BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only.
@@ -1268,80 +1330,97 @@ EOF
echo x86_64-unknown-haiku
exit ;;
SX-4:SUPER-UX:*:*)
- echo sx4-nec-superux${UNAME_RELEASE}
+ echo sx4-nec-superux"$UNAME_RELEASE"
exit ;;
SX-5:SUPER-UX:*:*)
- echo sx5-nec-superux${UNAME_RELEASE}
+ echo sx5-nec-superux"$UNAME_RELEASE"
exit ;;
SX-6:SUPER-UX:*:*)
- echo sx6-nec-superux${UNAME_RELEASE}
+ echo sx6-nec-superux"$UNAME_RELEASE"
exit ;;
SX-7:SUPER-UX:*:*)
- echo sx7-nec-superux${UNAME_RELEASE}
+ echo sx7-nec-superux"$UNAME_RELEASE"
exit ;;
SX-8:SUPER-UX:*:*)
- echo sx8-nec-superux${UNAME_RELEASE}
+ echo sx8-nec-superux"$UNAME_RELEASE"
exit ;;
SX-8R:SUPER-UX:*:*)
- echo sx8r-nec-superux${UNAME_RELEASE}
+ echo sx8r-nec-superux"$UNAME_RELEASE"
exit ;;
SX-ACE:SUPER-UX:*:*)
- echo sxace-nec-superux${UNAME_RELEASE}
+ echo sxace-nec-superux"$UNAME_RELEASE"
exit ;;
Power*:Rhapsody:*:*)
- echo powerpc-apple-rhapsody${UNAME_RELEASE}
+ echo powerpc-apple-rhapsody"$UNAME_RELEASE"
exit ;;
*:Rhapsody:*:*)
- echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+ echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
+ exit ;;
+ arm64:Darwin:*:*)
+ echo aarch64-apple-darwin"$UNAME_RELEASE"
exit ;;
*:Darwin:*:*)
- UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
- eval $set_cc_for_build
- if test "$UNAME_PROCESSOR" = unknown ; then
- UNAME_PROCESSOR=powerpc
+ UNAME_PROCESSOR=$(uname -p)
+ case $UNAME_PROCESSOR in
+ unknown) UNAME_PROCESSOR=powerpc ;;
+ esac
+ if command -v xcode-select > /dev/null 2> /dev/null && \
+ ! xcode-select --print-path > /dev/null 2> /dev/null ; then
+ # Avoid executing cc if there is no toolchain installed as
+ # cc will be a stub that puts up a graphical alert
+ # prompting the user to install developer tools.
+ CC_FOR_BUILD=no_compiler_found
+ else
+ set_cc_for_build
fi
- if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
- if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
- if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
- (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
- grep IS_64BIT_ARCH >/dev/null
- then
- case $UNAME_PROCESSOR in
- i386) UNAME_PROCESSOR=x86_64 ;;
- powerpc) UNAME_PROCESSOR=powerpc64 ;;
- esac
- fi
+ if test "$CC_FOR_BUILD" != no_compiler_found; then
+ if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_64BIT_ARCH >/dev/null
+ then
+ case $UNAME_PROCESSOR in
+ i386) UNAME_PROCESSOR=x86_64 ;;
+ powerpc) UNAME_PROCESSOR=powerpc64 ;;
+ esac
+ fi
+ # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
+ if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
+ (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+ grep IS_PPC >/dev/null
+ then
+ UNAME_PROCESSOR=powerpc
fi
elif test "$UNAME_PROCESSOR" = i386 ; then
- # Avoid executing cc on OS X 10.9, as it ships with a stub
- # that puts up a graphical alert prompting to install
- # developer tools. Any system running Mac OS X 10.7 or
- # later (Darwin 11 and later) is required to have a 64-bit
- # processor. This is not true of the ARM version of Darwin
- # that Apple uses in portable devices.
- UNAME_PROCESSOR=x86_64
+ # uname -m returns i386 or x86_64
+ UNAME_PROCESSOR=$UNAME_MACHINE
fi
- echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+ echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
exit ;;
*:procnto*:*:* | *:QNX:[0123456789]*:*)
- UNAME_PROCESSOR=`uname -p`
+ UNAME_PROCESSOR=$(uname -p)
if test "$UNAME_PROCESSOR" = x86; then
UNAME_PROCESSOR=i386
UNAME_MACHINE=pc
fi
- echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+ echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
exit ;;
*:QNX:*:4*)
echo i386-pc-qnx
exit ;;
- NEO-?:NONSTOP_KERNEL:*:*)
- echo neo-tandem-nsk${UNAME_RELEASE}
+ NEO-*:NONSTOP_KERNEL:*:*)
+ echo neo-tandem-nsk"$UNAME_RELEASE"
exit ;;
NSE-*:NONSTOP_KERNEL:*:*)
- echo nse-tandem-nsk${UNAME_RELEASE}
+ echo nse-tandem-nsk"$UNAME_RELEASE"
+ exit ;;
+ NSR-*:NONSTOP_KERNEL:*:*)
+ echo nsr-tandem-nsk"$UNAME_RELEASE"
+ exit ;;
+ NSV-*:NONSTOP_KERNEL:*:*)
+ echo nsv-tandem-nsk"$UNAME_RELEASE"
exit ;;
- NSR-?:NONSTOP_KERNEL:*:*)
- echo nsr-tandem-nsk${UNAME_RELEASE}
+ NSX-*:NONSTOP_KERNEL:*:*)
+ echo nsx-tandem-nsk"$UNAME_RELEASE"
exit ;;
*:NonStop-UX:*:*)
echo mips-compaq-nonstopux
@@ -1350,18 +1429,19 @@ EOF
echo bs2000-siemens-sysv
exit ;;
DS/*:UNIX_System_V:*:*)
- echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+ echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
exit ;;
*:Plan9:*:*)
# "uname -m" is not consistent, so use $cputype instead. 386
# is converted to i386 for consistency with other x86
# operating systems.
+ # shellcheck disable=SC2154
if test "$cputype" = 386; then
UNAME_MACHINE=i386
else
UNAME_MACHINE="$cputype"
fi
- echo ${UNAME_MACHINE}-unknown-plan9
+ echo "$UNAME_MACHINE"-unknown-plan9
exit ;;
*:TOPS-10:*:*)
echo pdp10-unknown-tops10
@@ -1382,14 +1462,14 @@ EOF
echo pdp10-unknown-its
exit ;;
SEI:*:*:SEIUX)
- echo mips-sei-seiux${UNAME_RELEASE}
+ echo mips-sei-seiux"$UNAME_RELEASE"
exit ;;
*:DragonFly:*:*)
- echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+ echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
exit ;;
*:*VMS:*:*)
- UNAME_MACHINE=`(uname -p) 2>/dev/null`
- case "${UNAME_MACHINE}" in
+ UNAME_MACHINE=$( (uname -p) 2>/dev/null)
+ case "$UNAME_MACHINE" in
A*) echo alpha-dec-vms ; exit ;;
I*) echo ia64-dec-vms ; exit ;;
V*) echo vax-dec-vms ; exit ;;
@@ -1398,32 +1478,190 @@ EOF
echo i386-pc-xenix
exit ;;
i*86:skyos:*:*)
- echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
+ echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')"
exit ;;
i*86:rdos:*:*)
- echo ${UNAME_MACHINE}-pc-rdos
+ echo "$UNAME_MACHINE"-pc-rdos
exit ;;
i*86:AROS:*:*)
- echo ${UNAME_MACHINE}-pc-aros
+ echo "$UNAME_MACHINE"-pc-aros
exit ;;
x86_64:VMkernel:*:*)
- echo ${UNAME_MACHINE}-unknown-esx
+ echo "$UNAME_MACHINE"-unknown-esx
exit ;;
amd64:Isilon\ OneFS:*:*)
echo x86_64-unknown-onefs
exit ;;
+ *:Unleashed:*:*)
+ echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
+ exit ;;
+esac
+
+# No uname command or uname output not recognized.
+set_cc_for_build
+cat > "$dummy.c" <<EOF
+#ifdef _SEQUENT_
+#include <sys/types.h>
+#include <sys/utsname.h>
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#include <signal.h>
+#if defined(_SIZE_T_) || defined(SIGLOST)
+#include <sys/utsname.h>
+#endif
+#endif
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+ /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed,
+ I don't know.... */
+ printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+ printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+ "4"
+#else
+ ""
+#endif
+ ); exit (0);
+#endif
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+ int version;
+ version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null);
+ if (version < 4)
+ printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+ else
+ printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+ exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+ printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+ printf ("ns32k-encore-mach\n"); exit (0);
+#else
+ printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+ printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+ printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+ printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+ struct utsname un;
+
+ uname(&un);
+ if (strncmp(un.version, "V2", 2) == 0) {
+ printf ("i386-sequent-ptx2\n"); exit (0);
+ }
+ if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+ printf ("i386-sequent-ptx1\n"); exit (0);
+ }
+ printf ("i386-sequent-ptx\n"); exit (0);
+#endif
+
+#if defined (vax)
+#if !defined (ultrix)
+#include <sys/param.h>
+#if defined (BSD)
+#if BSD == 43
+ printf ("vax-dec-bsd4.3\n"); exit (0);
+#else
+#if BSD == 199006
+ printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#else
+ printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#endif
+#else
+ printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#else
+#if defined(_SIZE_T_) || defined(SIGLOST)
+ struct utsname un;
+ uname (&un);
+ printf ("vax-dec-ultrix%s\n", un.release); exit (0);
+#else
+ printf ("vax-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#if defined(_SIZE_T_) || defined(SIGLOST)
+ struct utsname *un;
+ uname (&un);
+ printf ("mips-dec-ultrix%s\n", un.release); exit (0);
+#else
+ printf ("mips-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (alliant) && defined (i860)
+ printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+ exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) &&
+ { echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
+
+echo "$0: unable to guess system type" >&2
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+ mips:Linux | mips64:Linux)
+ # If we got here on MIPS GNU/Linux, output extra information.
+ cat >&2 <<EOF
+
+NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
+the system type. Please install a C compiler and try again.
+EOF
+ ;;
esac
cat >&2 <<EOF
-$0: unable to guess system type
This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite
-config.guess and config.sub with the latest versions from:
+operating system you are using. If your script is old, overwrite *all*
+copies of config.guess and config.sub with the latest versions from:
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+ https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
and
- http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+ https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
+EOF
+
+year=$(echo $timestamp | sed 's,-.*,,')
+# shellcheck disable=SC2003
+if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then
+ cat >&2 <<EOF
If $0 has already been updated, send the following data and any
information you think might be pertinent to config-patches@gnu.org to
@@ -1431,31 +1669,32 @@ provide the necessary information to handle your system.
config.guess timestamp = $timestamp
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
+uname -m = $( (uname -m) 2>/dev/null || echo unknown)
+uname -r = $( (uname -r) 2>/dev/null || echo unknown)
+uname -s = $( (uname -s) 2>/dev/null || echo unknown)
+uname -v = $( (uname -v) 2>/dev/null || echo unknown)
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
-/bin/uname -X = `(/bin/uname -X) 2>/dev/null`
+/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null)
+/bin/uname -X = $( (/bin/uname -X) 2>/dev/null)
-hostinfo = `(hostinfo) 2>/dev/null`
-/bin/universe = `(/bin/universe) 2>/dev/null`
-/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null`
-/bin/arch = `(/bin/arch) 2>/dev/null`
-/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+hostinfo = $( (hostinfo) 2>/dev/null)
+/bin/universe = $( (/bin/universe) 2>/dev/null)
+/usr/bin/arch -k = $( (/usr/bin/arch -k) 2>/dev/null)
+/bin/arch = $( (/bin/arch) 2>/dev/null)
+/usr/bin/oslevel = $( (/usr/bin/oslevel) 2>/dev/null)
+/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null)
-UNAME_MACHINE = ${UNAME_MACHINE}
-UNAME_RELEASE = ${UNAME_RELEASE}
-UNAME_SYSTEM = ${UNAME_SYSTEM}
-UNAME_VERSION = ${UNAME_VERSION}
+UNAME_MACHINE = "$UNAME_MACHINE"
+UNAME_RELEASE = "$UNAME_RELEASE"
+UNAME_SYSTEM = "$UNAME_SYSTEM"
+UNAME_VERSION = "$UNAME_VERSION"
EOF
+fi
exit 1
# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "timestamp='"
# time-stamp-format: "%:y-%02m-%02d"
# time-stamp-end: "'"
diff --git a/build-aux/config.sub b/build-aux/config.sub
index dd2ca93..b0f8492 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,8 +1,8 @@
#! /bin/sh
# Configuration validation subroutine script.
-# Copyright 1992-2016 Free Software Foundation, Inc.
+# Copyright 1992-2021 Free Software Foundation, Inc.
-timestamp='2016-11-04'
+timestamp='2021-01-07'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2016-11-04'
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
@@ -33,7 +33,7 @@ timestamp='2016-11-04'
# Otherwise, we print the canonical config type on stdout and succeed.
# You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
# This file is supposed to be the same for all GNU packages
# and recognize all the CPU types, system types and aliases
@@ -50,14 +50,14 @@ timestamp='2016-11-04'
# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
# It is wrong to echo any other type of specification.
-me=`echo "$0" | sed -e 's,.*/,,'`
+me=$(echo "$0" | sed -e 's,.*/,,')
usage="\
Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
Canonicalize a configuration name.
-Operation modes:
+Options:
-h, --help print this help, then exit
-t, --time-stamp print date of last modification, then exit
-v, --version print version number, then exit
@@ -67,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
version="\
GNU config.sub ($timestamp)
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -89,12 +89,12 @@ while test $# -gt 0 ; do
- ) # Use stdin as input.
break ;;
-* )
- echo "$me: invalid option $1$help"
+ echo "$me: invalid option $1$help" >&2
exit 1 ;;
*local*)
# First pass through any local machine types.
- echo $1
+ echo "$1"
exit ;;
* )
@@ -110,1244 +110,1169 @@ case $# in
exit 1;;
esac
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
- nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
- linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
- knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
- kopensolaris*-gnu* | cloudabi*-eabi* | \
- storm-chaos* | os2-emx* | rtmk-nova*)
- os=-$maybe_os
- basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
- ;;
- android-linux)
- os=-linux-android
- basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
- ;;
- *)
- basic_machine=`echo $1 | sed 's/-[^-]*$//'`
- if [ $basic_machine != $1 ]
- then os=`echo $1 | sed 's/.*-/-/'`
- else os=; fi
- ;;
-esac
+# Split fields of configuration type
+# shellcheck disable=SC2162
+IFS="-" read field1 field2 field3 field4 <<EOF
+$1
+EOF
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work. We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
- -sun*os*)
- # Prevent following clause from handling this invalid input.
- ;;
- -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
- -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
- -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
- -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
- -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
- -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
- -apple | -axis | -knuth | -cray | -microblaze*)
- os=
- basic_machine=$1
- ;;
- -bluegene*)
- os=-cnk
- ;;
- -sim | -cisco | -oki | -wec | -winbond)
- os=
- basic_machine=$1
- ;;
- -scout)
- ;;
- -wrs)
- os=-vxworks
- basic_machine=$1
- ;;
- -chorusos*)
- os=-chorusos
- basic_machine=$1
- ;;
- -chorusrdb)
- os=-chorusrdb
- basic_machine=$1
- ;;
- -hiux*)
- os=-hiuxwe2
- ;;
- -sco6)
- os=-sco5v6
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5)
- os=-sco3.2v5
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco4)
- os=-sco3.2v4
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2.[4-9]*)
- os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2v[4-9]*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco5v6*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco*)
- os=-sco3.2v2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -udk*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -isc)
- os=-isc2.2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -clix*)
- basic_machine=clipper-intergraph
- ;;
- -isc*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -lynx*178)
- os=-lynxos178
- ;;
- -lynx*5)
- os=-lynxos5
- ;;
- -lynx*)
- os=-lynxos
+# Separate into logical components for further validation
+case $1 in
+ *-*-*-*-*)
+ echo Invalid configuration \`"$1"\': more than four components >&2
+ exit 1
;;
- -ptx*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+ *-*-*-*)
+ basic_machine=$field1-$field2
+ basic_os=$field3-$field4
;;
- -windowsnt*)
- os=`echo $os | sed -e 's/windowsnt/winnt/'`
+ *-*-*)
+ # Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
+ # parts
+ maybe_os=$field2-$field3
+ case $maybe_os in
+ nto-qnx* | linux-* | uclinux-uclibc* \
+ | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
+ | netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
+ | storm-chaos* | os2-emx* | rtmk-nova*)
+ basic_machine=$field1
+ basic_os=$maybe_os
+ ;;
+ android-linux)
+ basic_machine=$field1-unknown
+ basic_os=linux-android
+ ;;
+ *)
+ basic_machine=$field1-$field2
+ basic_os=$field3
+ ;;
+ esac
;;
- -psos*)
- os=-psos
+ *-*)
+ # A lone config we happen to match not fitting any pattern
+ case $field1-$field2 in
+ decstation-3100)
+ basic_machine=mips-dec
+ basic_os=
+ ;;
+ *-*)
+ # Second component is usually, but not always the OS
+ case $field2 in
+ # Prevent following clause from handling this valid os
+ sun*os*)
+ basic_machine=$field1
+ basic_os=$field2
+ ;;
+ # Manufacturers
+ dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
+ | att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
+ | unicom* | ibm* | next | hp | isi* | apollo | altos* \
+ | convergent* | ncr* | news | 32* | 3600* | 3100* \
+ | hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
+ | ultra | tti* | harris | dolphin | highlevel | gould \
+ | cbm | ns | masscomp | apple | axis | knuth | cray \
+ | microblaze* | sim | cisco \
+ | oki | wec | wrs | winbond)
+ basic_machine=$field1-$field2
+ basic_os=
+ ;;
+ *)
+ basic_machine=$field1
+ basic_os=$field2
+ ;;
+ esac
+ ;;
+ esac
;;
- -mint | -mint[0-9]*)
- basic_machine=m68k-atari
- os=-mint
+ *)
+ # Convert single-component short-hands not valid as part of
+ # multi-component configurations.
+ case $field1 in
+ 386bsd)
+ basic_machine=i386-pc
+ basic_os=bsd
+ ;;
+ a29khif)
+ basic_machine=a29k-amd
+ basic_os=udi
+ ;;
+ adobe68k)
+ basic_machine=m68010-adobe
+ basic_os=scout
+ ;;
+ alliant)
+ basic_machine=fx80-alliant
+ basic_os=
+ ;;
+ altos | altos3068)
+ basic_machine=m68k-altos
+ basic_os=
+ ;;
+ am29k)
+ basic_machine=a29k-none
+ basic_os=bsd
+ ;;
+ amdahl)
+ basic_machine=580-amdahl
+ basic_os=sysv
+ ;;
+ amiga)
+ basic_machine=m68k-unknown
+ basic_os=
+ ;;
+ amigaos | amigados)
+ basic_machine=m68k-unknown
+ basic_os=amigaos
+ ;;
+ amigaunix | amix)
+ basic_machine=m68k-unknown
+ basic_os=sysv4
+ ;;
+ apollo68)
+ basic_machine=m68k-apollo
+ basic_os=sysv
+ ;;
+ apollo68bsd)
+ basic_machine=m68k-apollo
+ basic_os=bsd
+ ;;
+ aros)
+ basic_machine=i386-pc
+ basic_os=aros
+ ;;
+ aux)
+ basic_machine=m68k-apple
+ basic_os=aux
+ ;;
+ balance)
+ basic_machine=ns32k-sequent
+ basic_os=dynix
+ ;;
+ blackfin)
+ basic_machine=bfin-unknown
+ basic_os=linux
+ ;;
+ cegcc)
+ basic_machine=arm-unknown
+ basic_os=cegcc
+ ;;
+ convex-c1)
+ basic_machine=c1-convex
+ basic_os=bsd
+ ;;
+ convex-c2)
+ basic_machine=c2-convex
+ basic_os=bsd
+ ;;
+ convex-c32)
+ basic_machine=c32-convex
+ basic_os=bsd
+ ;;
+ convex-c34)
+ basic_machine=c34-convex
+ basic_os=bsd
+ ;;
+ convex-c38)
+ basic_machine=c38-convex
+ basic_os=bsd
+ ;;
+ cray)
+ basic_machine=j90-cray
+ basic_os=unicos
+ ;;
+ crds | unos)
+ basic_machine=m68k-crds
+ basic_os=
+ ;;
+ da30)
+ basic_machine=m68k-da30
+ basic_os=
+ ;;
+ decstation | pmax | pmin | dec3100 | decstatn)
+ basic_machine=mips-dec
+ basic_os=
+ ;;
+ delta88)
+ basic_machine=m88k-motorola
+ basic_os=sysv3
+ ;;
+ dicos)
+ basic_machine=i686-pc
+ basic_os=dicos
+ ;;
+ djgpp)
+ basic_machine=i586-pc
+ basic_os=msdosdjgpp
+ ;;
+ ebmon29k)
+ basic_machine=a29k-amd
+ basic_os=ebmon
+ ;;
+ es1800 | OSE68k | ose68k | ose | OSE)
+ basic_machine=m68k-ericsson
+ basic_os=ose
+ ;;
+ gmicro)
+ basic_machine=tron-gmicro
+ basic_os=sysv
+ ;;
+ go32)
+ basic_machine=i386-pc
+ basic_os=go32
+ ;;
+ h8300hms)
+ basic_machine=h8300-hitachi
+ basic_os=hms
+ ;;
+ h8300xray)
+ basic_machine=h8300-hitachi
+ basic_os=xray
+ ;;
+ h8500hms)
+ basic_machine=h8500-hitachi
+ basic_os=hms
+ ;;
+ harris)
+ basic_machine=m88k-harris
+ basic_os=sysv3
+ ;;
+ hp300 | hp300hpux)
+ basic_machine=m68k-hp
+ basic_os=hpux
+ ;;
+ hp300bsd)
+ basic_machine=m68k-hp
+ basic_os=bsd
+ ;;
+ hppaosf)
+ basic_machine=hppa1.1-hp
+ basic_os=osf
+ ;;
+ hppro)
+ basic_machine=hppa1.1-hp
+ basic_os=proelf
+ ;;
+ i386mach)
+ basic_machine=i386-mach
+ basic_os=mach
+ ;;
+ isi68 | isi)
+ basic_machine=m68k-isi
+ basic_os=sysv
+ ;;
+ m68knommu)
+ basic_machine=m68k-unknown
+ basic_os=linux
+ ;;
+ magnum | m3230)
+ basic_machine=mips-mips
+ basic_os=sysv
+ ;;
+ merlin)
+ basic_machine=ns32k-utek
+ basic_os=sysv
+ ;;
+ mingw64)
+ basic_machine=x86_64-pc
+ basic_os=mingw64
+ ;;
+ mingw32)
+ basic_machine=i686-pc
+ basic_os=mingw32
+ ;;
+ mingw32ce)
+ basic_machine=arm-unknown
+ basic_os=mingw32ce
+ ;;
+ monitor)
+ basic_machine=m68k-rom68k
+ basic_os=coff
+ ;;
+ morphos)
+ basic_machine=powerpc-unknown
+ basic_os=morphos
+ ;;
+ moxiebox)
+ basic_machine=moxie-unknown
+ basic_os=moxiebox
+ ;;
+ msdos)
+ basic_machine=i386-pc
+ basic_os=msdos
+ ;;
+ msys)
+ basic_machine=i686-pc
+ basic_os=msys
+ ;;
+ mvs)
+ basic_machine=i370-ibm
+ basic_os=mvs
+ ;;
+ nacl)
+ basic_machine=le32-unknown
+ basic_os=nacl
+ ;;
+ ncr3000)
+ basic_machine=i486-ncr
+ basic_os=sysv4
+ ;;
+ netbsd386)
+ basic_machine=i386-pc
+ basic_os=netbsd
+ ;;
+ netwinder)
+ basic_machine=armv4l-rebel
+ basic_os=linux
+ ;;
+ news | news700 | news800 | news900)
+ basic_machine=m68k-sony
+ basic_os=newsos
+ ;;
+ news1000)
+ basic_machine=m68030-sony
+ basic_os=newsos
+ ;;
+ necv70)
+ basic_machine=v70-nec
+ basic_os=sysv
+ ;;
+ nh3000)
+ basic_machine=m68k-harris
+ basic_os=cxux
+ ;;
+ nh[45]000)
+ basic_machine=m88k-harris
+ basic_os=cxux
+ ;;
+ nindy960)
+ basic_machine=i960-intel
+ basic_os=nindy
+ ;;
+ mon960)
+ basic_machine=i960-intel
+ basic_os=mon960
+ ;;
+ nonstopux)
+ basic_machine=mips-compaq
+ basic_os=nonstopux
+ ;;
+ os400)
+ basic_machine=powerpc-ibm
+ basic_os=os400
+ ;;
+ OSE68000 | ose68000)
+ basic_machine=m68000-ericsson
+ basic_os=ose
+ ;;
+ os68k)
+ basic_machine=m68k-none
+ basic_os=os68k
+ ;;
+ paragon)
+ basic_machine=i860-intel
+ basic_os=osf
+ ;;
+ parisc)
+ basic_machine=hppa-unknown
+ basic_os=linux
+ ;;
+ psp)
+ basic_machine=mipsallegrexel-sony
+ basic_os=psp
+ ;;
+ pw32)
+ basic_machine=i586-unknown
+ basic_os=pw32
+ ;;
+ rdos | rdos64)
+ basic_machine=x86_64-pc
+ basic_os=rdos
+ ;;
+ rdos32)
+ basic_machine=i386-pc
+ basic_os=rdos
+ ;;
+ rom68k)
+ basic_machine=m68k-rom68k
+ basic_os=coff
+ ;;
+ sa29200)
+ basic_machine=a29k-amd
+ basic_os=udi
+ ;;
+ sei)
+ basic_machine=mips-sei
+ basic_os=seiux
+ ;;
+ sequent)
+ basic_machine=i386-sequent
+ basic_os=
+ ;;
+ sps7)
+ basic_machine=m68k-bull
+ basic_os=sysv2
+ ;;
+ st2000)
+ basic_machine=m68k-tandem
+ basic_os=
+ ;;
+ stratus)
+ basic_machine=i860-stratus
+ basic_os=sysv4
+ ;;
+ sun2)
+ basic_machine=m68000-sun
+ basic_os=
+ ;;
+ sun2os3)
+ basic_machine=m68000-sun
+ basic_os=sunos3
+ ;;
+ sun2os4)
+ basic_machine=m68000-sun
+ basic_os=sunos4
+ ;;
+ sun3)
+ basic_machine=m68k-sun
+ basic_os=
+ ;;
+ sun3os3)
+ basic_machine=m68k-sun
+ basic_os=sunos3
+ ;;
+ sun3os4)
+ basic_machine=m68k-sun
+ basic_os=sunos4
+ ;;
+ sun4)
+ basic_machine=sparc-sun
+ basic_os=
+ ;;
+ sun4os3)
+ basic_machine=sparc-sun
+ basic_os=sunos3
+ ;;
+ sun4os4)
+ basic_machine=sparc-sun
+ basic_os=sunos4
+ ;;
+ sun4sol2)
+ basic_machine=sparc-sun
+ basic_os=solaris2
+ ;;
+ sun386 | sun386i | roadrunner)
+ basic_machine=i386-sun
+ basic_os=
+ ;;
+ sv1)
+ basic_machine=sv1-cray
+ basic_os=unicos
+ ;;
+ symmetry)
+ basic_machine=i386-sequent
+ basic_os=dynix
+ ;;
+ t3e)
+ basic_machine=alphaev5-cray
+ basic_os=unicos
+ ;;
+ t90)
+ basic_machine=t90-cray
+ basic_os=unicos
+ ;;
+ toad1)
+ basic_machine=pdp10-xkl
+ basic_os=tops20
+ ;;
+ tpf)
+ basic_machine=s390x-ibm
+ basic_os=tpf
+ ;;
+ udi29k)
+ basic_machine=a29k-amd
+ basic_os=udi
+ ;;
+ ultra3)
+ basic_machine=a29k-nyu
+ basic_os=sym1
+ ;;
+ v810 | necv810)
+ basic_machine=v810-nec
+ basic_os=none
+ ;;
+ vaxv)
+ basic_machine=vax-dec
+ basic_os=sysv
+ ;;
+ vms)
+ basic_machine=vax-dec
+ basic_os=vms
+ ;;
+ vsta)
+ basic_machine=i386-pc
+ basic_os=vsta
+ ;;
+ vxworks960)
+ basic_machine=i960-wrs
+ basic_os=vxworks
+ ;;
+ vxworks68)
+ basic_machine=m68k-wrs
+ basic_os=vxworks
+ ;;
+ vxworks29k)
+ basic_machine=a29k-wrs
+ basic_os=vxworks
+ ;;
+ xbox)
+ basic_machine=i686-pc
+ basic_os=mingw32
+ ;;
+ ymp)
+ basic_machine=ymp-cray
+ basic_os=unicos
+ ;;
+ *)
+ basic_machine=$1
+ basic_os=
+ ;;
+ esac
;;
esac
-# Decode aliases for certain CPU-COMPANY combinations.
+# Decode 1-component or ad-hoc basic machines
case $basic_machine in
- # Recognize the basic CPU types without company name.
- # Some are omitted here because they have special meanings below.
- 1750a | 580 \
- | a29k \
- | aarch64 | aarch64_be \
- | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
- | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
- | am33_2.0 \
- | arc | arceb \
- | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
- | avr | avr32 \
- | ba \
- | be32 | be64 \
- | bfin \
- | c4x | c8051 | clipper \
- | d10v | d30v | dlx | dsp16xx \
- | e2k | epiphany \
- | fido | fr30 | frv | ft32 \
- | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
- | hexagon \
- | i370 | i860 | i960 | ia64 \
- | ip2k | iq2000 \
- | k1om \
- | le32 | le64 \
- | lm32 \
- | m32c | m32r | m32rle | m68000 | m68k | m88k \
- | maxq | mb | microblaze | microblazeel | mcore | mep | metag \
- | mips | mipsbe | mipseb | mipsel | mipsle \
- | mips16 \
- | mips64 | mips64el \
- | mips64octeon | mips64octeonel \
- | mips64orion | mips64orionel \
- | mips64r5900 | mips64r5900el \
- | mips64vr | mips64vrel \
- | mips64vr4100 | mips64vr4100el \
- | mips64vr4300 | mips64vr4300el \
- | mips64vr5000 | mips64vr5000el \
- | mips64vr5900 | mips64vr5900el \
- | mipsisa32 | mipsisa32el \
- | mipsisa32r2 | mipsisa32r2el \
- | mipsisa32r6 | mipsisa32r6el \
- | mipsisa64 | mipsisa64el \
- | mipsisa64r2 | mipsisa64r2el \
- | mipsisa64r6 | mipsisa64r6el \
- | mipsisa64sb1 | mipsisa64sb1el \
- | mipsisa64sr71k | mipsisa64sr71kel \
- | mipsr5900 | mipsr5900el \
- | mipstx39 | mipstx39el \
- | mn10200 | mn10300 \
- | moxie \
- | mt \
- | msp430 \
- | nds32 | nds32le | nds32be \
- | nios | nios2 | nios2eb | nios2el \
- | ns16k | ns32k \
- | open8 | or1k | or1knd | or32 \
- | pdp10 | pdp11 | pj | pjl \
- | powerpc | powerpc64 | powerpc64le | powerpcle \
- | pru \
- | pyramid \
- | riscv32 | riscv64 \
- | rl78 | rx \
- | score \
- | sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
- | sh64 | sh64le \
- | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
- | sparcv8 | sparcv9 | sparcv9b | sparcv9v \
- | spu \
- | tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
- | ubicom32 \
- | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
- | visium \
- | we32k \
- | x86 | xc16x | xstormy16 | xtensa \
- | z8k | z80)
- basic_machine=$basic_machine-unknown
- ;;
- c54x)
- basic_machine=tic54x-unknown
- ;;
- c55x)
- basic_machine=tic55x-unknown
- ;;
- c6x)
- basic_machine=tic6x-unknown
- ;;
- leon|leon[3-9])
- basic_machine=sparc-$basic_machine
- ;;
- m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
- basic_machine=$basic_machine-unknown
- os=-none
+ # Here we handle the default manufacturer of certain CPU types. It is in
+ # some cases the only manufacturer, in others, it is the most popular.
+ w89k)
+ cpu=hppa1.1
+ vendor=winbond
;;
- m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+ op50n)
+ cpu=hppa1.1
+ vendor=oki
;;
- ms1)
- basic_machine=mt-unknown
+ op60c)
+ cpu=hppa1.1
+ vendor=oki
;;
-
- strongarm | thumb | xscale)
- basic_machine=arm-unknown
+ ibm*)
+ cpu=i370
+ vendor=ibm
;;
- xgate)
- basic_machine=$basic_machine-unknown
- os=-none
+ orion105)
+ cpu=clipper
+ vendor=highlevel
;;
- xscaleeb)
- basic_machine=armeb-unknown
+ mac | mpw | mac-mpw)
+ cpu=m68k
+ vendor=apple
;;
-
- xscaleel)
- basic_machine=armel-unknown
+ pmac | pmac-mpw)
+ cpu=powerpc
+ vendor=apple
;;
- # We use `pc' rather than `unknown'
- # because (1) that's what they normally are, and
- # (2) the word "unknown" tends to confuse beginning users.
- i*86 | x86_64)
- basic_machine=$basic_machine-pc
- ;;
- # Object if more than one company name word.
- *-*-*)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
- ;;
- # Recognize the basic CPU types with company name.
- 580-* \
- | a29k-* \
- | aarch64-* | aarch64_be-* \
- | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
- | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
- | alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
- | arm-* | armbe-* | armle-* | armeb-* | armv*-* \
- | avr-* | avr32-* \
- | ba-* \
- | be32-* | be64-* \
- | bfin-* | bs2000-* \
- | c[123]* | c30-* | [cjt]90-* | c4x-* \
- | c8051-* | clipper-* | craynv-* | cydra-* \
- | d10v-* | d30v-* | dlx-* \
- | e2k-* | elxsi-* \
- | f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
- | h8300-* | h8500-* \
- | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
- | hexagon-* \
- | i*86-* | i860-* | i960-* | ia64-* \
- | ip2k-* | iq2000-* \
- | k1om-* \
- | le32-* | le64-* \
- | lm32-* \
- | m32c-* | m32r-* | m32rle-* \
- | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
- | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
- | microblaze-* | microblazeel-* \
- | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
- | mips16-* \
- | mips64-* | mips64el-* \
- | mips64octeon-* | mips64octeonel-* \
- | mips64orion-* | mips64orionel-* \
- | mips64r5900-* | mips64r5900el-* \
- | mips64vr-* | mips64vrel-* \
- | mips64vr4100-* | mips64vr4100el-* \
- | mips64vr4300-* | mips64vr4300el-* \
- | mips64vr5000-* | mips64vr5000el-* \
- | mips64vr5900-* | mips64vr5900el-* \
- | mipsisa32-* | mipsisa32el-* \
- | mipsisa32r2-* | mipsisa32r2el-* \
- | mipsisa32r6-* | mipsisa32r6el-* \
- | mipsisa64-* | mipsisa64el-* \
- | mipsisa64r2-* | mipsisa64r2el-* \
- | mipsisa64r6-* | mipsisa64r6el-* \
- | mipsisa64sb1-* | mipsisa64sb1el-* \
- | mipsisa64sr71k-* | mipsisa64sr71kel-* \
- | mipsr5900-* | mipsr5900el-* \
- | mipstx39-* | mipstx39el-* \
- | mmix-* \
- | mt-* \
- | msp430-* \
- | nds32-* | nds32le-* | nds32be-* \
- | nios-* | nios2-* | nios2eb-* | nios2el-* \
- | none-* | np1-* | ns16k-* | ns32k-* \
- | open8-* \
- | or1k*-* \
- | orion-* \
- | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
- | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
- | pru-* \
- | pyramid-* \
- | riscv32-* | riscv64-* \
- | rl78-* | romp-* | rs6000-* | rx-* \
- | sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
- | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
- | sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
- | sparclite-* \
- | sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
- | tahoe-* \
- | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
- | tile*-* \
- | tron-* \
- | ubicom32-* \
- | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
- | vax-* \
- | visium-* \
- | we32k-* \
- | x86-* | x86_64-* | xc16x-* | xps100-* \
- | xstormy16-* | xtensa*-* \
- | ymp-* \
- | z8k-* | z80-*)
- ;;
- # Recognize the basic CPU types without company name, with glob match.
- xtensa*)
- basic_machine=$basic_machine-unknown
- ;;
# Recognize the various machine names and aliases which stand
# for a CPU type and a company and sometimes even an OS.
- 386bsd)
- basic_machine=i386-unknown
- os=-bsd
- ;;
3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
- basic_machine=m68000-att
+ cpu=m68000
+ vendor=att
;;
3b*)
- basic_machine=we32k-att
- ;;
- a29khif)
- basic_machine=a29k-amd
- os=-udi
- ;;
- abacus)
- basic_machine=abacus-unknown
- ;;
- adobe68k)
- basic_machine=m68010-adobe
- os=-scout
- ;;
- alliant | fx80)
- basic_machine=fx80-alliant
- ;;
- altos | altos3068)
- basic_machine=m68k-altos
- ;;
- am29k)
- basic_machine=a29k-none
- os=-bsd
- ;;
- amd64)
- basic_machine=x86_64-pc
- ;;
- amd64-*)
- basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- amdahl)
- basic_machine=580-amdahl
- os=-sysv
- ;;
- amiga | amiga-*)
- basic_machine=m68k-unknown
- ;;
- amigaos | amigados)
- basic_machine=m68k-unknown
- os=-amigaos
- ;;
- amigaunix | amix)
- basic_machine=m68k-unknown
- os=-sysv4
- ;;
- apollo68)
- basic_machine=m68k-apollo
- os=-sysv
- ;;
- apollo68bsd)
- basic_machine=m68k-apollo
- os=-bsd
- ;;
- aros)
- basic_machine=i386-pc
- os=-aros
- ;;
- asmjs)
- basic_machine=asmjs-unknown
- ;;
- aux)
- basic_machine=m68k-apple
- os=-aux
- ;;
- balance)
- basic_machine=ns32k-sequent
- os=-dynix
- ;;
- blackfin)
- basic_machine=bfin-unknown
- os=-linux
- ;;
- blackfin-*)
- basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
+ cpu=we32k
+ vendor=att
;;
bluegene*)
- basic_machine=powerpc-ibm
- os=-cnk
- ;;
- c54x-*)
- basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- c55x-*)
- basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- c6x-*)
- basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- c90)
- basic_machine=c90-cray
- os=-unicos
- ;;
- cegcc)
- basic_machine=arm-unknown
- os=-cegcc
- ;;
- convex-c1)
- basic_machine=c1-convex
- os=-bsd
- ;;
- convex-c2)
- basic_machine=c2-convex
- os=-bsd
- ;;
- convex-c32)
- basic_machine=c32-convex
- os=-bsd
- ;;
- convex-c34)
- basic_machine=c34-convex
- os=-bsd
- ;;
- convex-c38)
- basic_machine=c38-convex
- os=-bsd
- ;;
- cray | j90)
- basic_machine=j90-cray
- os=-unicos
- ;;
- craynv)
- basic_machine=craynv-cray
- os=-unicosmp
- ;;
- cr16 | cr16-*)
- basic_machine=cr16-unknown
- os=-elf
- ;;
- crds | unos)
- basic_machine=m68k-crds
- ;;
- crisv32 | crisv32-* | etraxfs*)
- basic_machine=crisv32-axis
- ;;
- cris | cris-* | etrax*)
- basic_machine=cris-axis
- ;;
- crx)
- basic_machine=crx-unknown
- os=-elf
- ;;
- da30 | da30-*)
- basic_machine=m68k-da30
- ;;
- decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
- basic_machine=mips-dec
+ cpu=powerpc
+ vendor=ibm
+ basic_os=cnk
;;
decsystem10* | dec10*)
- basic_machine=pdp10-dec
- os=-tops10
+ cpu=pdp10
+ vendor=dec
+ basic_os=tops10
;;
decsystem20* | dec20*)
- basic_machine=pdp10-dec
- os=-tops20
+ cpu=pdp10
+ vendor=dec
+ basic_os=tops20
;;
delta | 3300 | motorola-3300 | motorola-delta \
| 3300-motorola | delta-motorola)
- basic_machine=m68k-motorola
- ;;
- delta88)
- basic_machine=m88k-motorola
- os=-sysv3
- ;;
- dicos)
- basic_machine=i686-pc
- os=-dicos
- ;;
- djgpp)
- basic_machine=i586-pc
- os=-msdosdjgpp
+ cpu=m68k
+ vendor=motorola
;;
- dpx20 | dpx20-*)
- basic_machine=rs6000-bull
- os=-bosx
- ;;
- dpx2* | dpx2*-bull)
- basic_machine=m68k-bull
- os=-sysv3
- ;;
- e500v[12])
- basic_machine=powerpc-unknown
- os=$os"spe"
- ;;
- e500v[12]-*)
- basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=$os"spe"
- ;;
- ebmon29k)
- basic_machine=a29k-amd
- os=-ebmon
- ;;
- elxsi)
- basic_machine=elxsi-elxsi
- os=-bsd
+ dpx2*)
+ cpu=m68k
+ vendor=bull
+ basic_os=sysv3
;;
encore | umax | mmax)
- basic_machine=ns32k-encore
+ cpu=ns32k
+ vendor=encore
;;
- es1800 | OSE68k | ose68k | ose | OSE)
- basic_machine=m68k-ericsson
- os=-ose
+ elxsi)
+ cpu=elxsi
+ vendor=elxsi
+ basic_os=${basic_os:-bsd}
;;
fx2800)
- basic_machine=i860-alliant
+ cpu=i860
+ vendor=alliant
;;
genix)
- basic_machine=ns32k-ns
- ;;
- gmicro)
- basic_machine=tron-gmicro
- os=-sysv
- ;;
- go32)
- basic_machine=i386-pc
- os=-go32
+ cpu=ns32k
+ vendor=ns
;;
h3050r* | hiux*)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- h8300hms)
- basic_machine=h8300-hitachi
- os=-hms
- ;;
- h8300xray)
- basic_machine=h8300-hitachi
- os=-xray
- ;;
- h8500hms)
- basic_machine=h8500-hitachi
- os=-hms
- ;;
- harris)
- basic_machine=m88k-harris
- os=-sysv3
- ;;
- hp300-*)
- basic_machine=m68k-hp
- ;;
- hp300bsd)
- basic_machine=m68k-hp
- os=-bsd
- ;;
- hp300hpux)
- basic_machine=m68k-hp
- os=-hpux
+ cpu=hppa1.1
+ vendor=hitachi
+ basic_os=hiuxwe2
;;
hp3k9[0-9][0-9] | hp9[0-9][0-9])
- basic_machine=hppa1.0-hp
+ cpu=hppa1.0
+ vendor=hp
;;
hp9k2[0-9][0-9] | hp9k31[0-9])
- basic_machine=m68000-hp
+ cpu=m68000
+ vendor=hp
;;
hp9k3[2-9][0-9])
- basic_machine=m68k-hp
+ cpu=m68k
+ vendor=hp
;;
hp9k6[0-9][0-9] | hp6[0-9][0-9])
- basic_machine=hppa1.0-hp
+ cpu=hppa1.0
+ vendor=hp
;;
hp9k7[0-79][0-9] | hp7[0-79][0-9])
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k78[0-9] | hp78[0-9])
# FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
# FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k8[0-9][13679] | hp8[0-9][13679])
- basic_machine=hppa1.1-hp
+ cpu=hppa1.1
+ vendor=hp
;;
hp9k8[0-9][0-9] | hp8[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hppa-next)
- os=-nextstep3
- ;;
- hppaosf)
- basic_machine=hppa1.1-hp
- os=-osf
- ;;
- hppro)
- basic_machine=hppa1.1-hp
- os=-proelf
- ;;
- i370-ibm* | ibm*)
- basic_machine=i370-ibm
+ cpu=hppa1.0
+ vendor=hp
;;
i*86v32)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv32
+ cpu=$(echo "$1" | sed -e 's/86.*/86/')
+ vendor=pc
+ basic_os=sysv32
;;
i*86v4*)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv4
+ cpu=$(echo "$1" | sed -e 's/86.*/86/')
+ vendor=pc
+ basic_os=sysv4
;;
i*86v)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv
+ cpu=$(echo "$1" | sed -e 's/86.*/86/')
+ vendor=pc
+ basic_os=sysv
;;
i*86sol2)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-solaris2
- ;;
- i386mach)
- basic_machine=i386-mach
- os=-mach
+ cpu=$(echo "$1" | sed -e 's/86.*/86/')
+ vendor=pc
+ basic_os=solaris2
;;
- i386-vsta | vsta)
- basic_machine=i386-unknown
- os=-vsta
+ j90 | j90-cray)
+ cpu=j90
+ vendor=cray
+ basic_os=${basic_os:-unicos}
;;
iris | iris4d)
- basic_machine=mips-sgi
- case $os in
- -irix*)
+ cpu=mips
+ vendor=sgi
+ case $basic_os in
+ irix*)
;;
*)
- os=-irix4
+ basic_os=irix4
;;
esac
;;
- isi68 | isi)
- basic_machine=m68k-isi
- os=-sysv
- ;;
- leon-*|leon[3-9]-*)
- basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
- ;;
- m68knommu)
- basic_machine=m68k-unknown
- os=-linux
- ;;
- m68knommu-*)
- basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
- ;;
- m88k-omron*)
- basic_machine=m88k-omron
- ;;
- magnum | m3230)
- basic_machine=mips-mips
- os=-sysv
- ;;
- merlin)
- basic_machine=ns32k-utek
- os=-sysv
- ;;
- microblaze*)
- basic_machine=microblaze-xilinx
- ;;
- mingw64)
- basic_machine=x86_64-pc
- os=-mingw64
- ;;
- mingw32)
- basic_machine=i686-pc
- os=-mingw32
- ;;
- mingw32ce)
- basic_machine=arm-unknown
- os=-mingw32ce
- ;;
miniframe)
- basic_machine=m68000-convergent
+ cpu=m68000
+ vendor=convergent
;;
- *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
- mips3*-*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
- ;;
- mips3*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
- ;;
- monitor)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- morphos)
- basic_machine=powerpc-unknown
- os=-morphos
- ;;
- moxiebox)
- basic_machine=moxie-unknown
- os=-moxiebox
- ;;
- msdos)
- basic_machine=i386-pc
- os=-msdos
- ;;
- ms1-*)
- basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
- ;;
- msys)
- basic_machine=i686-pc
- os=-msys
- ;;
- mvs)
- basic_machine=i370-ibm
- os=-mvs
- ;;
- nacl)
- basic_machine=le32-unknown
- os=-nacl
- ;;
- ncr3000)
- basic_machine=i486-ncr
- os=-sysv4
- ;;
- netbsd386)
- basic_machine=i386-unknown
- os=-netbsd
- ;;
- netwinder)
- basic_machine=armv4l-rebel
- os=-linux
- ;;
- news | news700 | news800 | news900)
- basic_machine=m68k-sony
- os=-newsos
- ;;
- news1000)
- basic_machine=m68030-sony
- os=-newsos
+ *mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
+ cpu=m68k
+ vendor=atari
+ basic_os=mint
;;
news-3600 | risc-news)
- basic_machine=mips-sony
- os=-newsos
- ;;
- necv70)
- basic_machine=v70-nec
- os=-sysv
- ;;
- next | m*-next )
- basic_machine=m68k-next
- case $os in
- -nextstep* )
+ cpu=mips
+ vendor=sony
+ basic_os=newsos
+ ;;
+ next | m*-next)
+ cpu=m68k
+ vendor=next
+ case $basic_os in
+ openstep*)
+ ;;
+ nextstep*)
;;
- -ns2*)
- os=-nextstep2
+ ns2*)
+ basic_os=nextstep2
;;
*)
- os=-nextstep3
+ basic_os=nextstep3
;;
esac
;;
- nh3000)
- basic_machine=m68k-harris
- os=-cxux
- ;;
- nh[45]000)
- basic_machine=m88k-harris
- os=-cxux
- ;;
- nindy960)
- basic_machine=i960-intel
- os=-nindy
- ;;
- mon960)
- basic_machine=i960-intel
- os=-mon960
- ;;
- nonstopux)
- basic_machine=mips-compaq
- os=-nonstopux
- ;;
np1)
- basic_machine=np1-gould
- ;;
- neo-tandem)
- basic_machine=neo-tandem
- ;;
- nse-tandem)
- basic_machine=nse-tandem
- ;;
- nsr-tandem)
- basic_machine=nsr-tandem
+ cpu=np1
+ vendor=gould
;;
op50n-* | op60c-*)
- basic_machine=hppa1.1-oki
- os=-proelf
- ;;
- openrisc | openrisc-*)
- basic_machine=or32-unknown
- ;;
- os400)
- basic_machine=powerpc-ibm
- os=-os400
- ;;
- OSE68000 | ose68000)
- basic_machine=m68000-ericsson
- os=-ose
- ;;
- os68k)
- basic_machine=m68k-none
- os=-os68k
+ cpu=hppa1.1
+ vendor=oki
+ basic_os=proelf
;;
pa-hitachi)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- paragon)
- basic_machine=i860-intel
- os=-osf
- ;;
- parisc)
- basic_machine=hppa-unknown
- os=-linux
- ;;
- parisc-*)
- basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
- os=-linux
+ cpu=hppa1.1
+ vendor=hitachi
+ basic_os=hiuxwe2
;;
pbd)
- basic_machine=sparc-tti
+ cpu=sparc
+ vendor=tti
;;
pbb)
- basic_machine=m68k-tti
- ;;
- pc532 | pc532-*)
- basic_machine=ns32k-pc532
- ;;
- pc98)
- basic_machine=i386-pc
- ;;
- pc98-*)
- basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentium | p5 | k5 | k6 | nexgen | viac3)
- basic_machine=i586-pc
- ;;
- pentiumpro | p6 | 6x86 | athlon | athlon_*)
- basic_machine=i686-pc
- ;;
- pentiumii | pentium2 | pentiumiii | pentium3)
- basic_machine=i686-pc
- ;;
- pentium4)
- basic_machine=i786-pc
- ;;
- pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
- basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+ cpu=m68k
+ vendor=tti
;;
- pentiumpro-* | p6-* | 6x86-* | athlon-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentium4-*)
- basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+ pc532)
+ cpu=ns32k
+ vendor=pc532
;;
pn)
- basic_machine=pn-gould
- ;;
- power) basic_machine=power-ibm
- ;;
- ppc | ppcbe) basic_machine=powerpc-unknown
- ;;
- ppc-* | ppcbe-*)
- basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppcle | powerpclittle)
- basic_machine=powerpcle-unknown
- ;;
- ppcle-* | powerpclittle-*)
- basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppc64) basic_machine=powerpc64-unknown
- ;;
- ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppc64le | powerpc64little)
- basic_machine=powerpc64le-unknown
+ cpu=pn
+ vendor=gould
;;
- ppc64le-* | powerpc64little-*)
- basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+ power)
+ cpu=power
+ vendor=ibm
;;
ps2)
- basic_machine=i386-ibm
- ;;
- pw32)
- basic_machine=i586-unknown
- os=-pw32
- ;;
- rdos | rdos64)
- basic_machine=x86_64-pc
- os=-rdos
- ;;
- rdos32)
- basic_machine=i386-pc
- os=-rdos
- ;;
- rom68k)
- basic_machine=m68k-rom68k
- os=-coff
+ cpu=i386
+ vendor=ibm
;;
rm[46]00)
- basic_machine=mips-siemens
+ cpu=mips
+ vendor=siemens
;;
rtpc | rtpc-*)
- basic_machine=romp-ibm
- ;;
- s390 | s390-*)
- basic_machine=s390-ibm
- ;;
- s390x | s390x-*)
- basic_machine=s390x-ibm
+ cpu=romp
+ vendor=ibm
;;
- sa29200)
- basic_machine=a29k-amd
- os=-udi
- ;;
- sb1)
- basic_machine=mipsisa64sb1-unknown
+ sde)
+ cpu=mipsisa32
+ vendor=sde
+ basic_os=${basic_os:-elf}
;;
- sb1el)
- basic_machine=mipsisa64sb1el-unknown
+ simso-wrs)
+ cpu=sparclite
+ vendor=wrs
+ basic_os=vxworks
;;
- sde)
- basic_machine=mipsisa32-sde
- os=-elf
+ tower | tower-32)
+ cpu=m68k
+ vendor=ncr
;;
- sei)
- basic_machine=mips-sei
- os=-seiux
+ vpp*|vx|vx-*)
+ cpu=f301
+ vendor=fujitsu
;;
- sequent)
- basic_machine=i386-sequent
+ w65)
+ cpu=w65
+ vendor=wdc
;;
- sh)
- basic_machine=sh-hitachi
- os=-hms
+ w89k-*)
+ cpu=hppa1.1
+ vendor=winbond
+ basic_os=proelf
;;
- sh5el)
- basic_machine=sh5le-unknown
+ none)
+ cpu=none
+ vendor=none
;;
- sh64)
- basic_machine=sh64-unknown
+ leon|leon[3-9])
+ cpu=sparc
+ vendor=$basic_machine
;;
- sparclite-wrs | simso-wrs)
- basic_machine=sparclite-wrs
- os=-vxworks
+ leon-*|leon[3-9]-*)
+ cpu=sparc
+ vendor=$(echo "$basic_machine" | sed 's/-.*//')
;;
- sps7)
- basic_machine=m68k-bull
- os=-sysv2
+
+ *-*)
+ # shellcheck disable=SC2162
+ IFS="-" read cpu vendor <<EOF
+$basic_machine
+EOF
;;
- spur)
- basic_machine=spur-unknown
+ # We use `pc' rather than `unknown'
+ # because (1) that's what they normally are, and
+ # (2) the word "unknown" tends to confuse beginning users.
+ i*86 | x86_64)
+ cpu=$basic_machine
+ vendor=pc
;;
- st2000)
- basic_machine=m68k-tandem
+ # These rules are duplicated from below for sake of the special case above;
+ # i.e. things that normalized to x86 arches should also default to "pc"
+ pc98)
+ cpu=i386
+ vendor=pc
;;
- stratus)
- basic_machine=i860-stratus
- os=-sysv4
+ x64 | amd64)
+ cpu=x86_64
+ vendor=pc
;;
- strongarm-* | thumb-*)
- basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
+ # Recognize the basic CPU types without company name.
+ *)
+ cpu=$basic_machine
+ vendor=unknown
;;
- sun2)
- basic_machine=m68000-sun
+esac
+
+unset -v basic_machine
+
+# Decode basic machines in the full and proper CPU-Company form.
+case $cpu-$vendor in
+ # Here we handle the default manufacturer of certain CPU types in canonical form. It is in
+ # some cases the only manufacturer, in others, it is the most popular.
+ craynv-unknown)
+ vendor=cray
+ basic_os=${basic_os:-unicosmp}
;;
- sun2os3)
- basic_machine=m68000-sun
- os=-sunos3
+ c90-unknown | c90-cray)
+ vendor=cray
+ basic_os=${Basic_os:-unicos}
;;
- sun2os4)
- basic_machine=m68000-sun
- os=-sunos4
+ fx80-unknown)
+ vendor=alliant
;;
- sun3os3)
- basic_machine=m68k-sun
- os=-sunos3
+ romp-unknown)
+ vendor=ibm
;;
- sun3os4)
- basic_machine=m68k-sun
- os=-sunos4
+ mmix-unknown)
+ vendor=knuth
;;
- sun4os3)
- basic_machine=sparc-sun
- os=-sunos3
+ microblaze-unknown | microblazeel-unknown)
+ vendor=xilinx
;;
- sun4os4)
- basic_machine=sparc-sun
- os=-sunos4
+ rs6000-unknown)
+ vendor=ibm
;;
- sun4sol2)
- basic_machine=sparc-sun
- os=-solaris2
+ vax-unknown)
+ vendor=dec
;;
- sun3 | sun3-*)
- basic_machine=m68k-sun
+ pdp11-unknown)
+ vendor=dec
;;
- sun4)
- basic_machine=sparc-sun
+ we32k-unknown)
+ vendor=att
;;
- sun386 | sun386i | roadrunner)
- basic_machine=i386-sun
+ cydra-unknown)
+ vendor=cydrome
;;
- sv1)
- basic_machine=sv1-cray
- os=-unicos
+ i370-ibm*)
+ vendor=ibm
;;
- symmetry)
- basic_machine=i386-sequent
- os=-dynix
+ orion-unknown)
+ vendor=highlevel
;;
- t3e)
- basic_machine=alphaev5-cray
- os=-unicos
+ xps-unknown | xps100-unknown)
+ cpu=xps100
+ vendor=honeywell
;;
- t90)
- basic_machine=t90-cray
- os=-unicos
+
+ # Here we normalize CPU types with a missing or matching vendor
+ dpx20-unknown | dpx20-bull)
+ cpu=rs6000
+ vendor=bull
+ basic_os=${basic_os:-bosx}
;;
- tile*)
- basic_machine=$basic_machine-unknown
- os=-linux-gnu
+
+ # Here we normalize CPU types irrespective of the vendor
+ amd64-*)
+ cpu=x86_64
;;
- tx39)
- basic_machine=mipstx39-unknown
+ blackfin-*)
+ cpu=bfin
+ basic_os=linux
;;
- tx39el)
- basic_machine=mipstx39el-unknown
+ c54x-*)
+ cpu=tic54x
;;
- toad1)
- basic_machine=pdp10-xkl
- os=-tops20
+ c55x-*)
+ cpu=tic55x
;;
- tower | tower-32)
- basic_machine=m68k-ncr
+ c6x-*)
+ cpu=tic6x
;;
- tpf)
- basic_machine=s390x-ibm
- os=-tpf
+ e500v[12]-*)
+ cpu=powerpc
+ basic_os=${basic_os}"spe"
;;
- udi29k)
- basic_machine=a29k-amd
- os=-udi
+ mips3*-*)
+ cpu=mips64
;;
- ultra3)
- basic_machine=a29k-nyu
- os=-sym1
+ ms1-*)
+ cpu=mt
;;
- v810 | necv810)
- basic_machine=v810-nec
- os=-none
+ m68knommu-*)
+ cpu=m68k
+ basic_os=linux
;;
- vaxv)
- basic_machine=vax-dec
- os=-sysv
+ m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
+ cpu=s12z
;;
- vms)
- basic_machine=vax-dec
- os=-vms
+ openrisc-*)
+ cpu=or32
;;
- vpp*|vx|vx-*)
- basic_machine=f301-fujitsu
+ parisc-*)
+ cpu=hppa
+ basic_os=linux
;;
- vxworks960)
- basic_machine=i960-wrs
- os=-vxworks
+ pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+ cpu=i586
;;
- vxworks68)
- basic_machine=m68k-wrs
- os=-vxworks
+ pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+ cpu=i686
;;
- vxworks29k)
- basic_machine=a29k-wrs
- os=-vxworks
+ pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+ cpu=i686
;;
- w65*)
- basic_machine=w65-wdc
- os=-none
+ pentium4-*)
+ cpu=i786
;;
- w89k-*)
- basic_machine=hppa1.1-winbond
- os=-proelf
+ pc98-*)
+ cpu=i386
;;
- xbox)
- basic_machine=i686-pc
- os=-mingw32
+ ppc-* | ppcbe-*)
+ cpu=powerpc
;;
- xps | xps100)
- basic_machine=xps100-honeywell
+ ppcle-* | powerpclittle-*)
+ cpu=powerpcle
;;
- xscale-* | xscalee[bl]-*)
- basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
+ ppc64-*)
+ cpu=powerpc64
;;
- ymp)
- basic_machine=ymp-cray
- os=-unicos
+ ppc64le-* | powerpc64little-*)
+ cpu=powerpc64le
;;
- z8k-*-coff)
- basic_machine=z8k-unknown
- os=-sim
+ sb1-*)
+ cpu=mipsisa64sb1
;;
- z80-*-coff)
- basic_machine=z80-unknown
- os=-sim
+ sb1el-*)
+ cpu=mipsisa64sb1el
;;
- none)
- basic_machine=none-none
- os=-none
+ sh5e[lb]-*)
+ cpu=$(echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/')
;;
-
-# Here we handle the default manufacturer of certain CPU types. It is in
-# some cases the only manufacturer, in others, it is the most popular.
- w89k)
- basic_machine=hppa1.1-winbond
+ spur-*)
+ cpu=spur
;;
- op50n)
- basic_machine=hppa1.1-oki
+ strongarm-* | thumb-*)
+ cpu=arm
;;
- op60c)
- basic_machine=hppa1.1-oki
+ tx39-*)
+ cpu=mipstx39
;;
- romp)
- basic_machine=romp-ibm
+ tx39el-*)
+ cpu=mipstx39el
;;
- mmix)
- basic_machine=mmix-knuth
+ x64-*)
+ cpu=x86_64
;;
- rs6000)
- basic_machine=rs6000-ibm
+ xscale-* | xscalee[bl]-*)
+ cpu=$(echo "$cpu" | sed 's/^xscale/arm/')
;;
- vax)
- basic_machine=vax-dec
+ arm64-*)
+ cpu=aarch64
;;
- pdp10)
- # there are many clones, so DEC is not a safe bet
- basic_machine=pdp10-unknown
+
+ # Recognize the canonical CPU Types that limit and/or modify the
+ # company names they are paired with.
+ cr16-*)
+ basic_os=${basic_os:-elf}
;;
- pdp11)
- basic_machine=pdp11-dec
+ crisv32-* | etraxfs*-*)
+ cpu=crisv32
+ vendor=axis
;;
- we32k)
- basic_machine=we32k-att
+ cris-* | etrax*-*)
+ cpu=cris
+ vendor=axis
;;
- sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
- basic_machine=sh-unknown
+ crx-*)
+ basic_os=${basic_os:-elf}
;;
- sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
- basic_machine=sparc-sun
+ neo-tandem)
+ cpu=neo
+ vendor=tandem
;;
- cydra)
- basic_machine=cydra-cydrome
+ nse-tandem)
+ cpu=nse
+ vendor=tandem
;;
- orion)
- basic_machine=orion-highlevel
+ nsr-tandem)
+ cpu=nsr
+ vendor=tandem
;;
- orion105)
- basic_machine=clipper-highlevel
+ nsv-tandem)
+ cpu=nsv
+ vendor=tandem
;;
- mac | mpw | mac-mpw)
- basic_machine=m68k-apple
+ nsx-tandem)
+ cpu=nsx
+ vendor=tandem
;;
- pmac | pmac-mpw)
- basic_machine=powerpc-apple
+ mipsallegrexel-sony)
+ cpu=mipsallegrexel
+ vendor=sony
;;
- *-unknown)
- # Make sure to match an already-canonicalized machine name.
+ tile*-*)
+ basic_os=${basic_os:-linux-gnu}
;;
+
*)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
+ # Recognize the canonical CPU types that are allowed with any
+ # company name.
+ case $cpu in
+ 1750a | 580 \
+ | a29k \
+ | aarch64 | aarch64_be \
+ | abacus \
+ | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
+ | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
+ | alphapca5[67] | alpha64pca5[67] \
+ | am33_2.0 \
+ | amdgcn \
+ | arc | arceb \
+ | arm | arm[lb]e | arme[lb] | armv* \
+ | avr | avr32 \
+ | asmjs \
+ | ba \
+ | be32 | be64 \
+ | bfin | bpf | bs2000 \
+ | c[123]* | c30 | [cjt]90 | c4x \
+ | c8051 | clipper | craynv | csky | cydra \
+ | d10v | d30v | dlx | dsp16xx \
+ | e2k | elxsi | epiphany \
+ | f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
+ | h8300 | h8500 \
+ | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+ | hexagon \
+ | i370 | i*86 | i860 | i960 | ia16 | ia64 \
+ | ip2k | iq2000 \
+ | k1om \
+ | le32 | le64 \
+ | lm32 \
+ | loongarch32 | loongarch64 | loongarchx32 \
+ | m32c | m32r | m32rle \
+ | m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
+ | m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
+ | m88110 | m88k | maxq | mb | mcore | mep | metag \
+ | microblaze | microblazeel \
+ | mips | mipsbe | mipseb | mipsel | mipsle \
+ | mips16 \
+ | mips64 | mips64eb | mips64el \
+ | mips64octeon | mips64octeonel \
+ | mips64orion | mips64orionel \
+ | mips64r5900 | mips64r5900el \
+ | mips64vr | mips64vrel \
+ | mips64vr4100 | mips64vr4100el \
+ | mips64vr4300 | mips64vr4300el \
+ | mips64vr5000 | mips64vr5000el \
+ | mips64vr5900 | mips64vr5900el \
+ | mipsisa32 | mipsisa32el \
+ | mipsisa32r2 | mipsisa32r2el \
+ | mipsisa32r6 | mipsisa32r6el \
+ | mipsisa64 | mipsisa64el \
+ | mipsisa64r2 | mipsisa64r2el \
+ | mipsisa64r6 | mipsisa64r6el \
+ | mipsisa64sb1 | mipsisa64sb1el \
+ | mipsisa64sr71k | mipsisa64sr71kel \
+ | mipsr5900 | mipsr5900el \
+ | mipstx39 | mipstx39el \
+ | mmix \
+ | mn10200 | mn10300 \
+ | moxie \
+ | mt \
+ | msp430 \
+ | nds32 | nds32le | nds32be \
+ | nfp \
+ | nios | nios2 | nios2eb | nios2el \
+ | none | np1 | ns16k | ns32k | nvptx \
+ | open8 \
+ | or1k* \
+ | or32 \
+ | orion \
+ | picochip \
+ | pdp10 | pdp11 | pj | pjl | pn | power \
+ | powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
+ | pru \
+ | pyramid \
+ | riscv | riscv32 | riscv32be | riscv64 | riscv64be \
+ | rl78 | romp | rs6000 | rx \
+ | s390 | s390x \
+ | score \
+ | sh | shl \
+ | sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
+ | sh[1234]e[lb] | sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
+ | sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
+ | sparclite \
+ | sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+ | spu \
+ | tahoe \
+ | thumbv7* \
+ | tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+ | tron \
+ | ubicom32 \
+ | v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
+ | vax \
+ | visium \
+ | w65 \
+ | wasm32 | wasm64 \
+ | we32k \
+ | x86 | x86_64 | xc16x | xgate | xps100 \
+ | xstormy16 | xtensa* \
+ | ymp \
+ | z8k | z80)
+ ;;
+
+ *)
+ echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+ exit 1
+ ;;
+ esac
;;
esac
# Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
- *-digital*)
- basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+case $vendor in
+ digital*)
+ vendor=dec
;;
- *-commodore*)
- basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+ commodore*)
+ vendor=cbm
;;
*)
;;
@@ -1355,203 +1280,213 @@ esac
# Decode manufacturer-specific aliases for certain operating systems.
-if [ x"$os" != x"" ]
+if test x$basic_os != x
then
-case $os in
- # First match some system type aliases
- # that might get confused with valid system types.
- # -solaris* is a basic system type, with this one exception.
- -auroraux)
- os=-auroraux
+
+# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
+# set os.
+case $basic_os in
+ gnu/linux*)
+ kernel=linux
+ os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|')
+ ;;
+ os2-emx)
+ kernel=os2
+ os=$(echo $basic_os | sed -e 's|os2-emx|emx|')
+ ;;
+ nto-qnx*)
+ kernel=nto
+ os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|')
+ ;;
+ *-*)
+ # shellcheck disable=SC2162
+ IFS="-" read kernel os <<EOF
+$basic_os
+EOF
+ ;;
+ # Default OS when just kernel was specified
+ nto*)
+ kernel=nto
+ os=$(echo $basic_os | sed -e 's|nto|qnx|')
+ ;;
+ linux*)
+ kernel=linux
+ os=$(echo $basic_os | sed -e 's|linux|gnu|')
;;
- -solaris1 | -solaris1.*)
- os=`echo $os | sed -e 's|solaris1|sunos4|'`
+ *)
+ kernel=
+ os=$basic_os
+ ;;
+esac
+
+# Now, normalize the OS (knowing we just have one component, it's not a kernel,
+# etc.)
+case $os in
+ # First match some system type aliases that might get confused
+ # with valid system types.
+ # solaris* is a basic system type, with this one exception.
+ auroraux)
+ os=auroraux
;;
- -solaris)
- os=-solaris2
+ bluegene*)
+ os=cnk
;;
- -svr4*)
- os=-sysv4
+ solaris1 | solaris1.*)
+ os=$(echo $os | sed -e 's|solaris1|sunos4|')
;;
- -unixware*)
- os=-sysv4.2uw
+ solaris)
+ os=solaris2
;;
- -gnu/linux*)
- os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+ unixware*)
+ os=sysv4.2uw
;;
- # First accept the basic system types.
- # The portable systems comes first.
- # Each alternative MUST END IN A *, to match a version number.
- # -sysv* is not here because it comes later, after sysvr4.
- -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
- | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
- | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
- | -sym* | -kopensolaris* | -plan9* \
- | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
- | -aos* | -aros* | -cloudabi* | -sortix* \
- | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
- | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
- | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
- | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
- | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
- | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
- | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
- | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
- | -chorusos* | -chorusrdb* | -cegcc* \
- | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
- | -linux-newlib* | -linux-musl* | -linux-uclibc* \
- | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
- | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
- | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
- | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
- | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
- | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
- | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
- | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
- | -onefs* | -tirtos* | -phoenix* | -fuchsia*)
- # Remember, each alternative MUST END IN *, to match a version number.
- ;;
- -qnx*)
- case $basic_machine in
- x86-* | i*86-*)
- ;;
- *)
- os=-nto$os
- ;;
- esac
+ # es1800 is here to avoid being matched by es* (a different OS)
+ es1800*)
+ os=ose
;;
- -nto-qnx*)
+ # Some version numbers need modification
+ chorusos*)
+ os=chorusos
;;
- -nto*)
- os=`echo $os | sed -e 's|nto|nto-qnx|'`
+ isc)
+ os=isc2.2
;;
- -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
- | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
- | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+ sco6)
+ os=sco5v6
;;
- -mac*)
- os=`echo $os | sed -e 's|mac|macos|'`
+ sco5)
+ os=sco3.2v5
;;
- -linux-dietlibc)
- os=-linux-dietlibc
+ sco4)
+ os=sco3.2v4
;;
- -linux*)
- os=`echo $os | sed -e 's|linux|linux-gnu|'`
+ sco3.2.[4-9]*)
+ os=$(echo $os | sed -e 's/sco3.2./sco3.2v/')
;;
- -sunos5*)
- os=`echo $os | sed -e 's|sunos5|solaris2|'`
+ sco*v* | scout)
+ # Don't match below
;;
- -sunos6*)
- os=`echo $os | sed -e 's|sunos6|solaris3|'`
+ sco*)
+ os=sco3.2v2
;;
- -opened*)
- os=-openedition
+ psos*)
+ os=psos
;;
- -os400*)
- os=-os400
+ qnx*)
+ os=qnx
;;
- -wince*)
- os=-wince
+ hiux*)
+ os=hiuxwe2
;;
- -osfrose*)
- os=-osfrose
+ lynx*178)
+ os=lynxos178
;;
- -osf*)
- os=-osf
+ lynx*5)
+ os=lynxos5
;;
- -utek*)
- os=-bsd
+ lynxos*)
+ # don't get caught up in next wildcard
;;
- -dynix*)
- os=-bsd
+ lynx*)
+ os=lynxos
;;
- -acis*)
- os=-aos
+ mac[0-9]*)
+ os=$(echo "$os" | sed -e 's|mac|macos|')
;;
- -atheos*)
- os=-atheos
+ opened*)
+ os=openedition
;;
- -syllable*)
- os=-syllable
+ os400*)
+ os=os400
;;
- -386bsd)
- os=-bsd
+ sunos5*)
+ os=$(echo "$os" | sed -e 's|sunos5|solaris2|')
;;
- -ctix* | -uts*)
- os=-sysv
+ sunos6*)
+ os=$(echo "$os" | sed -e 's|sunos6|solaris3|')
;;
- -nova*)
- os=-rtmk-nova
+ wince*)
+ os=wince
;;
- -ns2 )
- os=-nextstep2
+ utek*)
+ os=bsd
;;
- -nsk*)
- os=-nsk
+ dynix*)
+ os=bsd
;;
- # Preserve the version number of sinix5.
- -sinix5.*)
- os=`echo $os | sed -e 's|sinix|sysv|'`
+ acis*)
+ os=aos
;;
- -sinix*)
- os=-sysv4
+ atheos*)
+ os=atheos
;;
- -tpf*)
- os=-tpf
+ syllable*)
+ os=syllable
;;
- -triton*)
- os=-sysv3
+ 386bsd)
+ os=bsd
;;
- -oss*)
- os=-sysv3
+ ctix* | uts*)
+ os=sysv
;;
- -svr4)
- os=-sysv4
+ nova*)
+ os=rtmk-nova
;;
- -svr3)
- os=-sysv3
+ ns2)
+ os=nextstep2
;;
- -sysvr4)
- os=-sysv4
+ # Preserve the version number of sinix5.
+ sinix5.*)
+ os=$(echo $os | sed -e 's|sinix|sysv|')
;;
- # This must come after -sysvr4.
- -sysv*)
+ sinix*)
+ os=sysv4
;;
- -ose*)
- os=-ose
+ tpf*)
+ os=tpf
;;
- -es1800*)
- os=-ose
+ triton*)
+ os=sysv3
;;
- -xenix)
- os=-xenix
+ oss*)
+ os=sysv3
;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- os=-mint
+ svr4*)
+ os=sysv4
;;
- -aros*)
- os=-aros
+ svr3)
+ os=sysv3
;;
- -zvmoe)
- os=-zvmoe
+ sysvr4)
+ os=sysv4
;;
- -dicos*)
- os=-dicos
+ ose*)
+ os=ose
;;
- -nacl*)
+ *mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
+ os=mint
;;
- -ios)
+ dicos*)
+ os=dicos
;;
- -none)
+ pikeos*)
+ # Until real need of OS specific support for
+ # particular features comes up, bare metal
+ # configurations are quite functional.
+ case $cpu in
+ arm*)
+ os=eabi
+ ;;
+ *)
+ os=elf
+ ;;
+ esac
;;
*)
- # Get rid of the `-' at the beginning of $os.
- os=`echo $os | sed 's/[^-]*-//'`
- echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
- exit 1
+ # No normalization, but not necessarily accepted, that comes below.
;;
esac
+
else
# Here we handle the default operating systems that come with various machines.
@@ -1564,261 +1499,356 @@ else
# will signal an error saying that MANUFACTURER isn't an operating
# system, and we'll never get to this point.
-case $basic_machine in
+kernel=
+case $cpu-$vendor in
score-*)
- os=-elf
+ os=elf
;;
spu-*)
- os=-elf
+ os=elf
;;
*-acorn)
- os=-riscix1.2
+ os=riscix1.2
;;
arm*-rebel)
- os=-linux
+ kernel=linux
+ os=gnu
;;
arm*-semi)
- os=-aout
+ os=aout
;;
c4x-* | tic4x-*)
- os=-coff
+ os=coff
;;
c8051-*)
- os=-elf
+ os=elf
+ ;;
+ clipper-intergraph)
+ os=clix
;;
hexagon-*)
- os=-elf
+ os=elf
;;
tic54x-*)
- os=-coff
+ os=coff
;;
tic55x-*)
- os=-coff
+ os=coff
;;
tic6x-*)
- os=-coff
+ os=coff
;;
# This must come before the *-dec entry.
pdp10-*)
- os=-tops20
+ os=tops20
;;
pdp11-*)
- os=-none
+ os=none
;;
*-dec | vax-*)
- os=-ultrix4.2
+ os=ultrix4.2
;;
m68*-apollo)
- os=-domain
+ os=domain
;;
i386-sun)
- os=-sunos4.0.2
+ os=sunos4.0.2
;;
m68000-sun)
- os=-sunos3
+ os=sunos3
;;
m68*-cisco)
- os=-aout
+ os=aout
;;
mep-*)
- os=-elf
+ os=elf
;;
mips*-cisco)
- os=-elf
+ os=elf
;;
mips*-*)
- os=-elf
+ os=elf
;;
or32-*)
- os=-coff
+ os=coff
;;
*-tti) # must be before sparc entry or we get the wrong os.
- os=-sysv3
+ os=sysv3
;;
sparc-* | *-sun)
- os=-sunos4.1.1
+ os=sunos4.1.1
;;
- *-be)
- os=-beos
+ pru-*)
+ os=elf
;;
- *-haiku)
- os=-haiku
+ *-be)
+ os=beos
;;
*-ibm)
- os=-aix
+ os=aix
;;
*-knuth)
- os=-mmixware
+ os=mmixware
;;
*-wec)
- os=-proelf
+ os=proelf
;;
*-winbond)
- os=-proelf
+ os=proelf
;;
*-oki)
- os=-proelf
+ os=proelf
;;
*-hp)
- os=-hpux
+ os=hpux
;;
*-hitachi)
- os=-hiux
+ os=hiux
;;
i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
- os=-sysv
+ os=sysv
;;
*-cbm)
- os=-amigaos
+ os=amigaos
;;
*-dg)
- os=-dgux
+ os=dgux
;;
*-dolphin)
- os=-sysv3
+ os=sysv3
;;
m68k-ccur)
- os=-rtu
+ os=rtu
;;
m88k-omron*)
- os=-luna
+ os=luna
;;
- *-next )
- os=-nextstep
+ *-next)
+ os=nextstep
;;
*-sequent)
- os=-ptx
+ os=ptx
;;
*-crds)
- os=-unos
+ os=unos
;;
*-ns)
- os=-genix
+ os=genix
;;
i370-*)
- os=-mvs
- ;;
- *-next)
- os=-nextstep3
+ os=mvs
;;
*-gould)
- os=-sysv
+ os=sysv
;;
*-highlevel)
- os=-bsd
+ os=bsd
;;
*-encore)
- os=-bsd
+ os=bsd
;;
*-sgi)
- os=-irix
+ os=irix
;;
*-siemens)
- os=-sysv4
+ os=sysv4
;;
*-masscomp)
- os=-rtu
+ os=rtu
;;
f30[01]-fujitsu | f700-fujitsu)
- os=-uxpv
+ os=uxpv
;;
*-rom68k)
- os=-coff
+ os=coff
;;
*-*bug)
- os=-coff
+ os=coff
;;
*-apple)
- os=-macos
+ os=macos
;;
*-atari*)
- os=-mint
+ os=mint
+ ;;
+ *-wrs)
+ os=vxworks
;;
*)
- os=-none
+ os=none
;;
esac
+
fi
+# Now, validate our (potentially fixed-up) OS.
+case $os in
+ # Sometimes we do "kernel-abi", so those need to count as OSes.
+ musl* | newlib* | uclibc*)
+ ;;
+ # Likewise for "kernel-libc"
+ eabi* | gnueabi*)
+ ;;
+ # Now accept the basic system types.
+ # The portable systems comes first.
+ # Each alternative MUST end in a * to match a version number.
+ gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
+ | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
+ | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
+ | sym* | plan9* | psp* | sim* | xray* | os68k* | v88r* \
+ | hiux* | abug | nacl* | netware* | windows* \
+ | os9* | macos* | osx* | ios* \
+ | mpw* | magic* | mmixware* | mon960* | lnews* \
+ | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
+ | aos* | aros* | cloudabi* | sortix* | twizzler* \
+ | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
+ | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
+ | mirbsd* | netbsd* | dicos* | openedition* | ose* \
+ | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
+ | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
+ | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
+ | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
+ | udi* | lites* | ieee* | go32* | aux* | hcos* \
+ | chorusrdb* | cegcc* | glidix* \
+ | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
+ | midipix* | mingw32* | mingw64* | mint* \
+ | uxpv* | beos* | mpeix* | udk* | moxiebox* \
+ | interix* | uwin* | mks* | rhapsody* | darwin* \
+ | openstep* | oskit* | conix* | pw32* | nonstopux* \
+ | storm-chaos* | tops10* | tenex* | tops20* | its* \
+ | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
+ | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
+ | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
+ | skyos* | haiku* | rdos* | toppers* | drops* | es* \
+ | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
+ | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
+ | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*)
+ ;;
+ # This one is extra strict with allowed versions
+ sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
+ # Don't forget version if it is 3.2v4 or newer.
+ ;;
+ none)
+ ;;
+ *)
+ echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
+ exit 1
+ ;;
+esac
+
+# As a final step for OS-related things, validate the OS-kernel combination
+# (given a valid OS), if there is a kernel.
+case $kernel-$os in
+ linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
+ ;;
+ uclinux-uclibc* )
+ ;;
+ -dietlibc* | -newlib* | -musl* | -uclibc* )
+ # These are just libc implementations, not actual OSes, and thus
+ # require a kernel.
+ echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
+ exit 1
+ ;;
+ kfreebsd*-gnu* | kopensolaris*-gnu*)
+ ;;
+ nto-qnx*)
+ ;;
+ os2-emx)
+ ;;
+ *-eabi* | *-gnueabi*)
+ ;;
+ -*)
+ # Blank kernel with real OS is always fine.
+ ;;
+ *-*)
+ echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
+ exit 1
+ ;;
+esac
+
# Here we handle the case where we know the os, and the CPU type, but not the
# manufacturer. We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
- *-unknown)
- case $os in
- -riscix*)
+case $vendor in
+ unknown)
+ case $cpu-$os in
+ *-riscix*)
vendor=acorn
;;
- -sunos*)
+ *-sunos*)
vendor=sun
;;
- -cnk*|-aix*)
+ *-cnk* | *-aix*)
vendor=ibm
;;
- -beos*)
+ *-beos*)
vendor=be
;;
- -hpux*)
+ *-hpux*)
vendor=hp
;;
- -mpeix*)
+ *-mpeix*)
vendor=hp
;;
- -hiux*)
+ *-hiux*)
vendor=hitachi
;;
- -unos*)
+ *-unos*)
vendor=crds
;;
- -dgux*)
+ *-dgux*)
vendor=dg
;;
- -luna*)
+ *-luna*)
vendor=omron
;;
- -genix*)
+ *-genix*)
vendor=ns
;;
- -mvs* | -opened*)
+ *-clix*)
+ vendor=intergraph
+ ;;
+ *-mvs* | *-opened*)
+ vendor=ibm
+ ;;
+ *-os400*)
vendor=ibm
;;
- -os400*)
+ s390-* | s390x-*)
vendor=ibm
;;
- -ptx*)
+ *-ptx*)
vendor=sequent
;;
- -tpf*)
+ *-tpf*)
vendor=ibm
;;
- -vxsim* | -vxworks* | -windiss*)
+ *-vxsim* | *-vxworks* | *-windiss*)
vendor=wrs
;;
- -aux*)
+ *-aux*)
vendor=apple
;;
- -hms*)
+ *-hms*)
vendor=hitachi
;;
- -mpw* | -macos*)
+ *-mpw* | *-macos*)
vendor=apple
;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+ *-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
vendor=atari
;;
- -vos*)
+ *-vos*)
vendor=stratus
;;
esac
- basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
;;
esac
-echo $basic_machine$os
+echo "$cpu-$vendor-${kernel:+$kernel-}$os"
exit
# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "timestamp='"
# time-stamp-format: "%:y-%02m-%02d"
# time-stamp-end: "'"
diff --git a/configure.ac b/configure.ac
index 261d81c..f6d25f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,12 +131,14 @@ abs_objroot="`pwd`/"
AC_SUBST([abs_objroot])
dnl Munge install path variables.
-if test "x$prefix" = "xNONE" ; then
- prefix="/usr/local"
-fi
-if test "x$exec_prefix" = "xNONE" ; then
- exec_prefix=$prefix
-fi
+case "$prefix" in
+ *\ * ) AC_MSG_ERROR([Prefix should not contain spaces]) ;;
+ "NONE" ) prefix="/usr/local" ;;
+esac
+case "$exec_prefix" in
+ *\ * ) AC_MSG_ERROR([Exec prefix should not contain spaces]) ;;
+ "NONE" ) exec_prefix=$prefix ;;
+esac
PREFIX=$prefix
AC_SUBST([PREFIX])
BINDIR=`eval echo $bindir`
@@ -237,19 +239,29 @@ fi
if test "x$GCC" = "xyes" ; then
JE_CFLAGS_ADD([-std=gnu11])
if test "x$je_cv_cflags_added" = "x-std=gnu11" ; then
- AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+ AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT], [ ], [ ])
else
JE_CFLAGS_ADD([-std=gnu99])
if test "x$je_cv_cflags_added" = "x-std=gnu99" ; then
- AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+ AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT], [ ], [ ])
fi
fi
+ JE_CFLAGS_ADD([-Werror=unknown-warning-option])
JE_CFLAGS_ADD([-Wall])
JE_CFLAGS_ADD([-Wextra])
JE_CFLAGS_ADD([-Wshorten-64-to-32])
JE_CFLAGS_ADD([-Wsign-compare])
JE_CFLAGS_ADD([-Wundef])
JE_CFLAGS_ADD([-Wno-format-zero-length])
+ JE_CFLAGS_ADD([-Wpointer-arith])
+ dnl This warning triggers on the use of the universal zero initializer, which
+ dnl is a very handy idiom for things like the tcache static initializer (which
+ dnl has lots of nested structs). See the discussion at.
+ dnl https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119
+ JE_CFLAGS_ADD([-Wno-missing-braces])
+ dnl This one too.
+ JE_CFLAGS_ADD([-Wno-missing-field-initializers])
+ JE_CFLAGS_ADD([-Wno-missing-attributes])
JE_CFLAGS_ADD([-pipe])
JE_CFLAGS_ADD([-g3])
elif test "x$je_cv_msvc" = "xyes" ; then
@@ -290,8 +302,11 @@ if test "x$enable_cxx" = "x1" ; then
dnl Require at least c++14, which is the first version to support sized
dnl deallocation. C++ support is not compiled otherwise.
m4_include([m4/ax_cxx_compile_stdcxx.m4])
- AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
- if test "x${HAVE_CXX14}" = "x1" ; then
+ AX_CXX_COMPILE_STDCXX([17], [noext], [optional])
+ if test "x${HAVE_CXX17}" != "x1"; then
+ AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
+ fi
+ if test "x${HAVE_CXX14}" = "x1" -o "x${HAVE_CXX17}" = "x1"; then
JE_CXXFLAGS_ADD([-Wall])
JE_CXXFLAGS_ADD([-Wextra])
JE_CXXFLAGS_ADD([-g3])
@@ -312,6 +327,9 @@ if test "x$enable_cxx" = "x1" ; then
enable_cxx="0"
fi
fi
+if test "x$enable_cxx" = "x1"; then
+ AC_DEFINE([JEMALLOC_ENABLE_CXX], [ ], [ ])
+fi
AC_SUBST([enable_cxx])
AC_SUBST([CONFIGURE_CXXFLAGS])
AC_SUBST([SPECIFIED_CXXFLAGS])
@@ -319,7 +337,7 @@ AC_SUBST([EXTRA_CXXFLAGS])
AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
if test "x${ac_cv_big_endian}" = "x1" ; then
- AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
+ AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ], [ ])
fi
if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
@@ -339,7 +357,7 @@ else
AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
fi
fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR], [ ])
AC_CHECK_SIZEOF([int])
if test "x${ac_cv_sizeof_int}" = "x8" ; then
@@ -349,7 +367,7 @@ elif test "x${ac_cv_sizeof_int}" = "x4" ; then
else
AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}])
fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT], [ ])
AC_CHECK_SIZEOF([long])
if test "x${ac_cv_sizeof_long}" = "x8" ; then
@@ -359,7 +377,7 @@ elif test "x${ac_cv_sizeof_long}" = "x4" ; then
else
AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG], [ ])
AC_CHECK_SIZEOF([long long])
if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
@@ -369,7 +387,7 @@ elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
else
AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}])
fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG], [ ])
AC_CHECK_SIZEOF([intmax_t])
if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
@@ -381,7 +399,7 @@ elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then
else
AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}])
fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T], [ ])
AC_CANONICAL_HOST
dnl CPU-specific settings.
@@ -407,12 +425,23 @@ case "${host_cpu}" in
fi
fi
;;
+ aarch64|arm*)
+ HAVE_CPU_SPINWAIT=1
+ dnl isb is a better equivalent to the pause instruction on x86.
+ AC_CACHE_VAL([je_cv_isb],
+ [JE_COMPILABLE([isb instruction], [],
+ [[__asm__ volatile("isb"); return 0;]],
+ [je_cv_isb])])
+ if test "x${je_cv_isb}" = "xyes" ; then
+ CPU_SPINWAIT='__asm__ volatile("isb")'
+ fi
+ ;;
*)
HAVE_CPU_SPINWAIT=0
;;
esac
-AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT])
-AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
+AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT], [ ])
+AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT], [ ])
AC_ARG_WITH([lg_vaddr],
[AS_HELP_STRING([--with-lg-vaddr=<lg-vaddr>], [Number of significant virtual address bits])],
@@ -477,7 +506,7 @@ typedef unsigned __int32 uint32_t;
LG_VADDR="${je_cv_lg_vaddr}"
fi
if test "x${LG_VADDR}" != "xerror" ; then
- AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+ AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
else
AC_MSG_ERROR([cannot determine number of significant virtual address bits])
fi
@@ -499,7 +528,7 @@ typedef unsigned __int32 uint32_t;
fi
;;
esac
-AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
LD_PRELOAD_VAR="LD_PRELOAD"
so="so"
@@ -583,7 +612,7 @@ if test ! -e "${objroot}VERSION" ; then
if test ! -e "${srcroot}VERSION" ; then
AC_MSG_RESULT(
[Missing VERSION file, and unable to generate it; creating bogus VERSION])
- echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+ echo "0.0.0-0-g000000missing_version_try_git_fetch_tags" > "${objroot}VERSION"
else
cp ${srcroot}VERSION ${objroot}VERSION
fi
@@ -609,6 +638,7 @@ dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
dnl definitions need to be seen before any headers are included, which is a pain
dnl to make happen otherwise.
default_retain="0"
+zero_realloc_default_free="0"
maps_coalesce="1"
DUMP_SYMS="${NM} -a"
SYM_PREFIX=""
@@ -626,8 +656,9 @@ case "${host}" in
SYM_PREFIX="_"
;;
*-*-freebsd*)
+ JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)
abi="elf"
- AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
+ AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ], [ ])
force_lazy_lock="1"
;;
*-*-dragonfly*)
@@ -640,41 +671,45 @@ case "${host}" in
*-*-bitrig*)
abi="elf"
;;
- *-*-linux-android)
+ *-*-linux-android*)
dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
abi="elf"
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
- AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
- AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
- AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
- AC_DEFINE([JEMALLOC_C11_ATOMICS])
+ glibc="0"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+ AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+ AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+ AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+ AC_DEFINE([JEMALLOC_C11_ATOMICS], [ ], [ ])
force_tls="0"
if test "${LG_SIZEOF_PTR}" = "3"; then
default_retain="1"
fi
+ zero_realloc_default_free="1"
;;
*-*-linux*)
dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
abi="elf"
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
- AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
- AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
- AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
- AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+ glibc="1"
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+ AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+ AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+ AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+ AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ], [ ])
if test "${LG_SIZEOF_PTR}" = "3"; then
default_retain="1"
fi
+ zero_realloc_default_free="1"
;;
*-*-kfreebsd*)
dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
abi="elf"
- AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
- AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
- AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
- AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+ AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+ AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ], [ ])
+ AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+ AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ], [ ])
;;
*-*-netbsd*)
AC_MSG_CHECKING([ABI])
@@ -741,6 +776,12 @@ case "${host}" in
if test "${LG_SIZEOF_PTR}" = "3"; then
default_retain="1"
fi
+ zero_realloc_default_free="1"
+ ;;
+ *-*-nto-qnx)
+ abi="elf"
+ force_tls="0"
+ AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
;;
*)
AC_MSG_RESULT([Unsupported operating system: ${host}])
@@ -763,7 +804,7 @@ AC_CHECK_HEADERS([malloc.h], [
AC_MSG_RESULT([no])
])
])
-AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST])
+AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST], [ ])
AC_SUBST([abi])
AC_SUBST([RPATH])
AC_SUBST([LD_PRELOAD_VAR])
@@ -801,7 +842,7 @@ JE_COMPILABLE([__attribute__ syntax],
[],
[je_cv_attribute])
if test "x${je_cv_attribute}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ], [ ])
if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
JE_CFLAGS_ADD([-fvisibility=hidden])
JE_CXXFLAGS_ADD([-fvisibility=hidden])
@@ -829,7 +870,7 @@ JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
[je_cv_alloc_size])
JE_CFLAGS_RESTORE()
if test "x${je_cv_alloc_size}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ], [ ])
fi
dnl Check for format(gnu_printf, ...) attribute support.
JE_CFLAGS_SAVE()
@@ -840,7 +881,7 @@ JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
[je_cv_format_gnu_printf])
JE_CFLAGS_RESTORE()
if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ], [ ])
fi
dnl Check for format(printf, ...) attribute support.
JE_CFLAGS_SAVE()
@@ -851,7 +892,7 @@ JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
[je_cv_format_printf])
JE_CFLAGS_RESTORE()
if test "x${je_cv_format_printf}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ], [ ])
fi
dnl Check for format_arg(...) attribute support.
@@ -863,7 +904,51 @@ JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
[je_cv_format_arg])
JE_CFLAGS_RESTORE()
if test "x${je_cv_format_arg}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ], [ ])
+fi
+
+dnl Check for fallthrough attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Wimplicit-fallthrough])
+JE_COMPILABLE([fallthrough attribute],
+ [#if !__has_attribute(fallthrough)
+ #error "foo"
+ #endif],
+ [int x = 0;
+ switch (x) {
+ case 0: __attribute__((__fallthrough__));
+ case 1: return 1;
+ }],
+ [je_cv_fallthrough])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_fallthrough}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_FALLTHROUGH], [ ], [ ])
+ JE_CFLAGS_ADD([-Wimplicit-fallthrough])
+ JE_CXXFLAGS_ADD([-Wimplicit-fallthrough])
+fi
+
+dnl Check for cold attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([cold attribute], [],
+ [__attribute__((__cold__)) void foo();],
+ [je_cv_cold])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_cold}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ], [ ])
+fi
+
+dnl Check for VM_MAKE_TAG for mmap support.
+JE_COMPILABLE([vm_make_tag],
+ [#include <sys/mman.h>
+ #include <mach/vm_statistics.h>],
+ [void *p;
+ p = mmap(0, 16, PROT_READ, MAP_ANON|MAP_PRIVATE, VM_MAKE_TAG(1), 0);
+ munmap(p, 16);],
+ [je_cv_vm_make_tag])
+if test "x${je_cv_vm_make_tag}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_VM_MAKE_TAG], [ ], [ ])
fi
dnl Support optional additions to rpath.
@@ -898,7 +983,7 @@ AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
dnl Enable documentation
AC_ARG_ENABLE([doc],
- [AS_HELP_STRING([--enable-documentation], [Build documentation])],
+ [AS_HELP_STRING([--enable-doc], [Build documentation])],
if test "x$enable_doc" = "xno" ; then
enable_doc="0"
else
@@ -955,11 +1040,11 @@ else
fi]
)
if test "x$JEMALLOC_PREFIX" = "x" ; then
- AC_DEFINE([JEMALLOC_IS_MALLOC])
+ AC_DEFINE([JEMALLOC_IS_MALLOC], [ ], [ ])
else
JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
- AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
- AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
+ AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"], [ ])
+ AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"], [ ])
fi
AC_SUBST([JEMALLOC_PREFIX])
AC_SUBST([JEMALLOC_CPREFIX])
@@ -967,42 +1052,45 @@ AC_SUBST([JEMALLOC_CPREFIX])
AC_ARG_WITH([export],
[AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
[if test "x$with_export" = "xno"; then
- AC_DEFINE([JEMALLOC_EXPORT],[])
+ AC_DEFINE([JEMALLOC_EXPORT],[], [ ])
fi]
)
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
dnl Check for additional platform-specific public API functions.
AC_CHECK_FUNC([memalign],
- [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ], [ ])
public_syms="${public_syms} memalign"])
AC_CHECK_FUNC([valloc],
- [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ], [ ])
public_syms="${public_syms} valloc"])
+AC_CHECK_FUNC([malloc_size],
+ [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ], [ ])
+ public_syms="${public_syms} malloc_size"])
dnl Check for allocator-related functions that should be wrapped.
wrap_syms=
if test "x${JEMALLOC_PREFIX}" = "x" ; then
AC_CHECK_FUNC([__libc_calloc],
- [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ], [ ])
wrap_syms="${wrap_syms} __libc_calloc"])
AC_CHECK_FUNC([__libc_free],
- [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ], [ ])
wrap_syms="${wrap_syms} __libc_free"])
AC_CHECK_FUNC([__libc_malloc],
- [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ], [ ])
wrap_syms="${wrap_syms} __libc_malloc"])
AC_CHECK_FUNC([__libc_memalign],
- [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ], [ ])
wrap_syms="${wrap_syms} __libc_memalign"])
AC_CHECK_FUNC([__libc_realloc],
- [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ], [ ])
wrap_syms="${wrap_syms} __libc_realloc"])
AC_CHECK_FUNC([__libc_valloc],
- [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ], [ ])
wrap_syms="${wrap_syms} __libc_valloc"])
AC_CHECK_FUNC([__posix_memalign],
- [AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ])
+ [AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ], [ ])
wrap_syms="${wrap_syms} __posix_memalign"])
fi
@@ -1020,14 +1108,17 @@ AC_ARG_WITH([private_namespace],
[JEMALLOC_PRIVATE_NAMESPACE="${with_private_namespace}je_"],
[JEMALLOC_PRIVATE_NAMESPACE="je_"]
)
-AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], [$JEMALLOC_PRIVATE_NAMESPACE])
+AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], [$JEMALLOC_PRIVATE_NAMESPACE], [ ])
private_namespace="$JEMALLOC_PRIVATE_NAMESPACE"
AC_SUBST([private_namespace])
dnl Do not add suffix to installed files by default.
AC_ARG_WITH([install_suffix],
[AS_HELP_STRING([--with-install-suffix=<suffix>], [Suffix to append to all installed files])],
- [INSTALL_SUFFIX="$with_install_suffix"],
+ [case "$with_install_suffix" in
+ *\ * ) AC_MSG_ERROR([Install suffix should not contain spaces]) ;;
+ * ) INSTALL_SUFFIX="$with_install_suffix" ;;
+esac],
[INSTALL_SUFFIX=]
)
install_suffix="$INSTALL_SUFFIX"
@@ -1040,7 +1131,7 @@ AC_ARG_WITH([malloc_conf],
[JEMALLOC_CONFIG_MALLOC_CONF=""]
)
config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
-AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"])
+AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"], [ ])
dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of
dnl jemalloc_protos_jet.h easy.
@@ -1129,10 +1220,7 @@ fi
[enable_debug="0"]
)
if test "x$enable_debug" = "x1" ; then
- AC_DEFINE([JEMALLOC_DEBUG], [ ])
-fi
-if test "x$enable_debug" = "x1" ; then
- AC_DEFINE([JEMALLOC_DEBUG], [ ])
+ AC_DEFINE([JEMALLOC_DEBUG], [ ], [ ])
fi
AC_SUBST([enable_debug])
@@ -1164,7 +1252,7 @@ fi
[enable_stats="1"]
)
if test "x$enable_stats" = "x1" ; then
- AC_DEFINE([JEMALLOC_STATS], [ ])
+ AC_DEFINE([JEMALLOC_STATS], [ ], [ ])
fi
AC_SUBST([enable_stats])
@@ -1180,7 +1268,7 @@ fi
[enable_experimental_smallocx="0"]
)
if test "x$enable_experimental_smallocx" = "x1" ; then
- AC_DEFINE([JEMALLOC_EXPERIMENTAL_SMALLOCX_API])
+ AC_DEFINE([JEMALLOC_EXPERIMENTAL_SMALLOCX_API], [ ], [ ])
fi
AC_SUBST([enable_experimental_smallocx])
@@ -1207,6 +1295,9 @@ AC_ARG_ENABLE([prof-libunwind],
enable_prof_libunwind="0"
else
enable_prof_libunwind="1"
+ if test "x$enable_prof" = "x0" ; then
+ AC_MSG_ERROR([--enable-prof-libunwind should only be used with --enable-prof])
+ fi
fi
],
[enable_prof_libunwind="0"]
@@ -1234,7 +1325,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
fi
if test "x${enable_prof_libunwind}" = "x1" ; then
backtrace_method="libunwind"
- AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
+ AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ], [ ])
fi
fi
@@ -1257,7 +1348,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
fi
if test "x${enable_prof_libgcc}" = "x1" ; then
backtrace_method="libgcc"
- AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+ AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ], [ ])
fi
else
enable_prof_libgcc="0"
@@ -1278,7 +1369,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
-a "x$GCC" = "xyes" ; then
JE_CFLAGS_ADD([-fno-omit-frame-pointer])
backtrace_method="gcc intrinsics"
- AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
+ AC_DEFINE([JEMALLOC_PROF_GCC], [ ], [ ])
else
enable_prof_gcc="0"
fi
@@ -1293,19 +1384,24 @@ if test "x$enable_prof" = "x1" ; then
dnl Heap profiling uses the log(3) function.
JE_APPEND_VS(LIBS, $LM)
- AC_DEFINE([JEMALLOC_PROF], [ ])
+ AC_DEFINE([JEMALLOC_PROF], [ ], [ ])
fi
AC_SUBST([enable_prof])
dnl Indicate whether adjacent virtual memory mappings automatically coalesce
dnl (and fragment on demand).
if test "x${maps_coalesce}" = "x1" ; then
- AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
+ AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ], [ ])
fi
dnl Indicate whether to retain memory (rather than using munmap()) by default.
if test "x$default_retain" = "x1" ; then
- AC_DEFINE([JEMALLOC_RETAIN], [ ])
+ AC_DEFINE([JEMALLOC_RETAIN], [ ], [ ])
+fi
+
+dnl Indicate whether realloc(ptr, 0) defaults to the "alloc" behavior.
+if test "x$zero_realloc_default_free" = "x1" ; then
+ AC_DEFINE([JEMALLOC_ZERO_REALLOC_DEFAULT_FREE], [ ], [ ])
fi
dnl Enable allocation from DSS if supported by the OS.
@@ -1322,7 +1418,7 @@ else
fi
if test "x$have_dss" = "x1" ; then
- AC_DEFINE([JEMALLOC_DSS], [ ])
+ AC_DEFINE([JEMALLOC_DSS], [ ], [ ])
fi
dnl Support the junk/zero filling option by default.
@@ -1337,7 +1433,7 @@ fi
[enable_fill="1"]
)
if test "x$enable_fill" = "x1" ; then
- AC_DEFINE([JEMALLOC_FILL], [ ])
+ AC_DEFINE([JEMALLOC_FILL], [ ], [ ])
fi
AC_SUBST([enable_fill])
@@ -1362,10 +1458,25 @@ JE_COMPILABLE([utrace(2)], [
utrace((void *)0, 0);
], [je_cv_utrace])
if test "x${je_cv_utrace}" = "xno" ; then
- enable_utrace="0"
-fi
-if test "x$enable_utrace" = "x1" ; then
- AC_DEFINE([JEMALLOC_UTRACE], [ ])
+ JE_COMPILABLE([utrace(2) with label], [
+ #include <sys/types.h>
+ #include <sys/param.h>
+ #include <sys/time.h>
+ #include <sys/uio.h>
+ #include <sys/ktrace.h>
+ ], [
+ utrace((void *)0, (void *)0, 0);
+ ], [je_cv_utrace_label])
+ if test "x${je_cv_utrace_label}" = "xno"; then
+ enable_utrace="0"
+ fi
+ if test "x$enable_utrace" = "x1" ; then
+ AC_DEFINE([JEMALLOC_UTRACE_LABEL], [ ], [ ])
+ fi
+else
+ if test "x$enable_utrace" = "x1" ; then
+ AC_DEFINE([JEMALLOC_UTRACE], [ ], [ ])
+ fi
fi
AC_SUBST([enable_utrace])
@@ -1381,7 +1492,7 @@ fi
[enable_xmalloc="0"]
)
if test "x$enable_xmalloc" = "x1" ; then
- AC_DEFINE([JEMALLOC_XMALLOC], [ ])
+ AC_DEFINE([JEMALLOC_XMALLOC], [ ], [ ])
fi
AC_SUBST([enable_xmalloc])
@@ -1398,7 +1509,7 @@ fi
[enable_cache_oblivious="1"]
)
if test "x$enable_cache_oblivious" = "x1" ; then
- AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
+ AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ], [ ])
fi
AC_SUBST([enable_cache_oblivious])
@@ -1414,7 +1525,7 @@ fi
[enable_log="0"]
)
if test "x$enable_log" = "x1" ; then
- AC_DEFINE([JEMALLOC_LOG], [ ])
+ AC_DEFINE([JEMALLOC_LOG], [ ], [ ])
fi
AC_SUBST([enable_log])
@@ -1430,7 +1541,7 @@ fi
[enable_readlinkat="0"]
)
if test "x$enable_readlinkat" = "x1" ; then
- AC_DEFINE([JEMALLOC_READLINKAT], [ ])
+ AC_DEFINE([JEMALLOC_READLINKAT], [ ], [ ])
fi
AC_SUBST([enable_readlinkat])
@@ -1447,10 +1558,44 @@ fi
[enable_opt_safety_checks="0"]
)
if test "x$enable_opt_safety_checks" = "x1" ; then
- AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ])
+ AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ], [ ])
fi
AC_SUBST([enable_opt_safety_checks])
+dnl Look for sized-deallocation bugs while otherwise being in opt mode.
+AC_ARG_ENABLE([opt-size-checks],
+ [AS_HELP_STRING([--enable-opt-size-checks],
+ [Perform sized-deallocation argument checks, even in opt mode])],
+[if test "x$enable_opt_size_checks" = "xno" ; then
+ enable_opt_size_checks="0"
+else
+ enable_opt_size_checks="1"
+fi
+],
+[enable_opt_size_checks="0"]
+)
+if test "x$enable_opt_size_checks" = "x1" ; then
+ AC_DEFINE([JEMALLOC_OPT_SIZE_CHECKS], [ ], [ ])
+fi
+AC_SUBST([enable_opt_size_checks])
+
+dnl Do not check for use-after-free by default.
+AC_ARG_ENABLE([uaf-detection],
+ [AS_HELP_STRING([--enable-uaf-detection],
+ [Allow sampled junk-filling on deallocation to detect use-after-free])],
+[if test "x$enable_uaf_detection" = "xno" ; then
+ enable_uaf_detection="0"
+else
+ enable_uaf_detection="1"
+fi
+],
+[enable_uaf_detection="0"]
+)
+if test "x$enable_uaf_detection" = "x1" ; then
+ AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ])
+fi
+AC_SUBST([enable_uaf_detection])
+
JE_COMPILABLE([a program using __builtin_unreachable], [
void foo (void) {
__builtin_unreachable();
@@ -1461,9 +1606,9 @@ void foo (void) {
}
], [je_cv_gcc_builtin_unreachable])
if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+ AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable], [ ])
else
- AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+ AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort], [ ])
fi
dnl ============================================================================
@@ -1483,9 +1628,9 @@ JE_COMPILABLE([a program using __builtin_ffsl], [
}
], [je_cv_gcc_builtin_ffsl])
if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll])
- AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl])
- AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll], [ ])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl], [ ])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs], [ ])
else
JE_COMPILABLE([a program using ffsl], [
#include <stdio.h>
@@ -1498,9 +1643,9 @@ else
}
], [je_cv_function_ffsl])
if test "x${je_cv_function_ffsl}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll])
- AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl])
- AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll], [ ])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl], [ ])
+ AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs], [ ])
else
AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()])
fi
@@ -1517,22 +1662,39 @@ JE_COMPILABLE([a program using __builtin_popcountl], [
}
], [je_cv_gcc_builtin_popcountl])
if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
- AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
+ AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount], [ ])
+ AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl], [ ])
+ AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTLL], [__builtin_popcountll], [ ])
fi
AC_ARG_WITH([lg_quantum],
[AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
- [Base 2 log of minimum allocation alignment])],
- [LG_QUANTA="$with_lg_quantum"],
- [LG_QUANTA="3 4"])
+ [Base 2 log of minimum allocation alignment])])
if test "x$with_lg_quantum" != "x" ; then
- AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
+ AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum], [ ])
+fi
+
+AC_ARG_WITH([lg_slab_maxregs],
+ [AS_HELP_STRING([--with-lg-slab-maxregs=<lg-slab-maxregs>],
+ [Base 2 log of maximum number of regions in a slab (used with malloc_conf slab_sizes)])],
+ [CONFIG_LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
+ [CONFIG_LG_SLAB_MAXREGS=""])
+if test "x$with_lg_slab_maxregs" != "x" ; then
+ AC_DEFINE_UNQUOTED([CONFIG_LG_SLAB_MAXREGS], [$with_lg_slab_maxregs], [ ])
fi
AC_ARG_WITH([lg_page],
[AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
[LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
+case "${host}" in
+ aarch64-apple-darwin*)
+ dnl When cross-compile for Apple M1 and no page size specified, use the
+ dnl default and skip detecting the page size (which is likely incorrect).
+ if test "x${host}" != "x${build}" -a "x$LG_PAGE" = "xdetect"; then
+ LG_PAGE=14
+ fi
+ ;;
+esac
if test "x$LG_PAGE" = "xdetect"; then
AC_CACHE_CHECK([LG_PAGE],
[je_cv_lg_page],
@@ -1579,7 +1741,7 @@ if test "x${je_cv_lg_page}" != "x" ; then
LG_PAGE="${je_cv_lg_page}"
fi
if test "x${LG_PAGE}" != "xundefined" ; then
- AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE])
+ AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE], [ ])
else
AC_MSG_ERROR([cannot determine value for LG_PAGE])
fi
@@ -1616,7 +1778,7 @@ if test "x${LG_PAGE}" != "xundefined" -a \
"${je_cv_lg_hugepage}" -lt "${LG_PAGE}" ; then
AC_MSG_ERROR([Huge page size (2^${je_cv_lg_hugepage}) must be at least page size (2^${LG_PAGE})])
fi
-AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
+AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}], [ ])
dnl ============================================================================
dnl Enable libdl by default.
@@ -1637,7 +1799,7 @@ dnl ============================================================================
dnl Configure pthreads.
if test "x$abi" != "xpecoff" ; then
- AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ], [ ])
AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
dnl Some systems may embed pthreads functionality in libc; check for libpthread
dnl first, but try libc too before failing.
@@ -1655,7 +1817,7 @@ dnl Check if we have dlsym support.
[AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
[have_dlsym="0"])
if test "x$have_dlsym" = "x1" ; then
- AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ], [ ])
fi
else
have_dlsym="0"
@@ -1667,7 +1829,7 @@ dnl Check if we have dlsym support.
pthread_atfork((void *)0, (void *)0, (void *)0);
], [je_cv_pthread_atfork])
if test "x${je_cv_pthread_atfork}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ], [ ])
fi
dnl Check if pthread_setname_np is available with the expected API.
JE_COMPILABLE([pthread_setname_np(3)], [
@@ -1676,7 +1838,38 @@ dnl Check if we have dlsym support.
pthread_setname_np(pthread_self(), "setname_test");
], [je_cv_pthread_setname_np])
if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ], [ ])
+ fi
+ dnl Check if pthread_getname_np is not necessarily present despite
+ dnl the pthread_setname_np counterpart
+ JE_COMPILABLE([pthread_getname_np(3)], [
+#include <pthread.h>
+#include <stdlib.h>
+], [
+ {
+ char *name = malloc(16);
+ pthread_getname_np(pthread_self(), name, 16);
+ free(name);
+ }
+], [je_cv_pthread_getname_np])
+ if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ], [ ])
+ fi
+ dnl Check if pthread_get_name_np is not necessarily present despite
+ dnl the pthread_set_name_np counterpart
+ JE_COMPILABLE([pthread_get_name_np(3)], [
+#include <pthread.h>
+#include <pthread_np.h>
+#include <stdlib.h>
+], [
+ {
+ char *name = malloc(16);
+ pthread_get_name_np(pthread_self(), name, 16);
+ free(name);
+ }
+], [je_cv_pthread_get_name_np])
+ if test "x${je_cv_pthread_get_name_np}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GET_NAME_NP], [ ], [ ])
fi
fi
@@ -1708,7 +1901,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
], [je_cv_clock_monotonic_coarse])
if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
+ AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE], [ ], [ ])
fi
dnl check for CLOCK_MONOTONIC.
@@ -1724,7 +1917,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
#endif
], [je_cv_clock_monotonic])
if test "x${je_cv_clock_monotonic}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+ AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC], [ ], [ ])
fi
dnl Check for mach_absolute_time().
@@ -1734,7 +1927,19 @@ JE_COMPILABLE([mach_absolute_time()], [
mach_absolute_time();
], [je_cv_mach_absolute_time])
if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+ AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME], [ ], [ ])
+fi
+
+dnl check for CLOCK_REALTIME (always should be available on Linux)
+JE_COMPILABLE([clock_gettime(CLOCK_REALTIME, ...)], [
+#include <time.h>
+], [
+ struct timespec ts;
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+], [je_cv_clock_realtime])
+if test "x${je_cv_clock_realtime}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME], [ ], [ ])
fi
dnl Use syscall(2) (if available) by default.
@@ -1762,7 +1967,7 @@ if test "x$enable_syscall" = "x1" ; then
[je_cv_syscall])
JE_CFLAGS_RESTORE()
if test "x$je_cv_syscall" = "xyes" ; then
- AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
+ AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ], [ ])
fi
fi
@@ -1772,7 +1977,7 @@ AC_CHECK_FUNC([secure_getenv],
[have_secure_getenv="0"]
)
if test "x$have_secure_getenv" = "x1" ; then
- AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ], [ ])
fi
dnl Check if the GNU-specific sched_getcpu function exists.
@@ -1781,7 +1986,7 @@ AC_CHECK_FUNC([sched_getcpu],
[have_sched_getcpu="0"]
)
if test "x$have_sched_getcpu" = "x1" ; then
- AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ], [ ])
fi
dnl Check if the GNU-specific sched_setaffinity function exists.
@@ -1790,7 +1995,7 @@ AC_CHECK_FUNC([sched_setaffinity],
[have_sched_setaffinity="0"]
)
if test "x$have_sched_setaffinity" = "x1" ; then
- AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ], [ ])
fi
dnl Check if the Solaris/BSD issetugid function exists.
@@ -1799,7 +2004,7 @@ AC_CHECK_FUNC([issetugid],
[have_issetugid="0"]
)
if test "x$have_issetugid" = "x1" ; then
- AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ], [ ])
fi
dnl Check whether the BSD-specific _malloc_thread_cleanup() exists. If so, use
@@ -1811,8 +2016,8 @@ AC_CHECK_FUNC([_malloc_thread_cleanup],
[have__malloc_thread_cleanup="0"]
)
if test "x$have__malloc_thread_cleanup" = "x1" ; then
- AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ])
- wrap_syms="${wrap_syms} _malloc_thread_cleanup"
+ AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ], [ ])
+ wrap_syms="${wrap_syms} _malloc_thread_cleanup _malloc_tsd_cleanup_register"
force_tls="1"
fi
@@ -1824,10 +2029,18 @@ AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb],
[have__pthread_mutex_init_calloc_cb="0"]
)
if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
- AC_DEFINE([JEMALLOC_MUTEX_INIT_CB])
+ AC_DEFINE([JEMALLOC_MUTEX_INIT_CB], [ ], [ ])
wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
fi
+AC_CHECK_FUNC([memcntl],
+ [have_memcntl="1"],
+ [have_memcntl="0"],
+ )
+if test "x$have_memcntl" = "x1" ; then
+ AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ], [ ])
+fi
+
dnl Disable lazy locking by default.
AC_ARG_ENABLE([lazy_lock],
[AS_HELP_STRING([--enable-lazy-lock],
@@ -1854,7 +2067,7 @@ if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
fi
if test "x$enable_lazy_lock" = "x1" ; then
if test "x$have_dlsym" = "x1" ; then
- AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+ AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ], [ ])
else
AC_MSG_ERROR([Missing dlsym support: lazy-lock cannot be enabled.])
fi
@@ -1887,7 +2100,7 @@ else
fi
AC_SUBST([enable_tls])
if test "x${enable_tls}" = "x1" ; then
- AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
+ AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ], [ ])
fi
dnl ============================================================================
@@ -1908,7 +2121,7 @@ JE_COMPILABLE([C11 atomics], [
return r == 0;
], [je_cv_c11_atomics])
if test "x${je_cv_c11_atomics}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_C11_ATOMICS])
+ AC_DEFINE([JEMALLOC_C11_ATOMICS], [ ], [ ])
fi
dnl ============================================================================
@@ -1923,7 +2136,7 @@ JE_COMPILABLE([GCC __atomic atomics], [
return after_add == 1;
], [je_cv_gcc_atomic_atomics])
if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS])
+ AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS], [ ], [ ])
dnl check for 8-bit atomic support
JE_COMPILABLE([GCC 8-bit __atomic atomics], [
@@ -1935,7 +2148,7 @@ if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
return after_add == 1;
], [je_cv_gcc_u8_atomic_atomics])
if test "x${je_cv_gcc_u8_atomic_atomics}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_GCC_U8_ATOMIC_ATOMICS])
+ AC_DEFINE([JEMALLOC_GCC_U8_ATOMIC_ATOMICS], [ ], [ ])
fi
fi
@@ -1950,7 +2163,7 @@ JE_COMPILABLE([GCC __sync atomics], [
return (before_add == 0) && (after_add == 1);
], [je_cv_gcc_sync_atomics])
if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS])
+ AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS], [ ], [ ])
dnl check for 8-bit atomic support
JE_COMPILABLE([GCC 8-bit __sync atomics], [
@@ -1961,7 +2174,7 @@ if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
return (before_add == 0) && (after_add == 1);
], [je_cv_gcc_u8_sync_atomics])
if test "x${je_cv_gcc_u8_sync_atomics}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_GCC_U8_SYNC_ATOMICS])
+ AC_DEFINE([JEMALLOC_GCC_U8_SYNC_ATOMICS], [ ], [ ])
fi
fi
@@ -1986,7 +2199,7 @@ JE_COMPILABLE([Darwin OSAtomic*()], [
}
], [je_cv_osatomic])
if test "x${je_cv_osatomic}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_OSATOMIC], [ ])
+ AC_DEFINE([JEMALLOC_OSATOMIC], [ ], [ ])
fi
dnl ============================================================================
@@ -1998,7 +2211,7 @@ JE_COMPILABLE([madvise(2)], [
madvise((void *)0, 0, 0);
], [je_cv_madvise])
if test "x${je_cv_madvise}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ], [ ])
dnl Check for madvise(..., MADV_FREE).
JE_COMPILABLE([madvise(..., MADV_FREE)], [
@@ -2007,12 +2220,12 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_FREE);
], [je_cv_madv_free])
if test "x${je_cv_madv_free}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ], [ ])
elif test "x${je_cv_madvise}" = "xyes" ; then
case "${host_cpu}" in i686|x86_64)
case "${host}" in *-*-linux*)
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
- AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ])
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ], [ ])
+ AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ], [ ])
;;
esac
;;
@@ -2026,7 +2239,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_DONTNEED);
], [je_cv_madv_dontneed])
if test "x${je_cv_madv_dontneed}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+ AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ], [ ])
fi
dnl Check for madvise(..., MADV_DO[NT]DUMP).
@@ -2037,7 +2250,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_DODUMP);
], [je_cv_madv_dontdump])
if test "x${je_cv_madv_dontdump}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ])
+ AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ], [ ])
fi
dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
@@ -2047,19 +2260,61 @@ if test "x${je_cv_madvise}" = "xyes" ; then
madvise((void *)0, 0, MADV_HUGEPAGE);
madvise((void *)0, 0, MADV_NOHUGEPAGE);
], [je_cv_thp])
+ dnl Check for madvise(..., MADV_[NO]CORE).
+ JE_COMPILABLE([madvise(..., MADV_[[NO]]CORE)], [
+#include <sys/mman.h>
+], [
+ madvise((void *)0, 0, MADV_NOCORE);
+ madvise((void *)0, 0, MADV_CORE);
+], [je_cv_madv_nocore])
+ if test "x${je_cv_madv_nocore}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
+ fi
case "${host_cpu}" in
arm*)
;;
*)
if test "x${je_cv_thp}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
fi
;;
esac
+else
+ dnl Check for posix_madvise.
+ JE_COMPILABLE([posix_madvise], [
+ #include <sys/mman.h>
+ ], [
+ posix_madvise((void *)0, 0, 0);
+ ], [je_cv_posix_madvise])
+ if test "x${je_cv_posix_madvise}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_POSIX_MADVISE], [ ], [ ])
+
+ dnl Check for posix_madvise(..., POSIX_MADV_DONTNEED).
+ JE_COMPILABLE([posix_madvise(..., POSIX_MADV_DONTNEED)], [
+ #include <sys/mman.h>
+ ], [
+ posix_madvise((void *)0, 0, POSIX_MADV_DONTNEED);
+ ], [je_cv_posix_madv_dontneed])
+ if test "x${je_cv_posix_madv_dontneed}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED], [ ], [ ])
+ fi
+ fi
fi
dnl ============================================================================
-dnl Check for __builtin_clz() and __builtin_clzl().
+dnl Check for mprotect(2).
+
+JE_COMPILABLE([mprotect(2)], [
+#include <sys/mman.h>
+], [
+ mprotect((void *)0, 0, PROT_NONE);
+], [je_cv_mprotect])
+if test "x${je_cv_mprotect}" = "xyes" ; then
+ AC_DEFINE([JEMALLOC_HAVE_MPROTECT], [ ], [ ])
+fi
+
+dnl ============================================================================
+dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
AC_CACHE_CHECK([for __builtin_clz],
[je_cv_builtin_clz],
@@ -2073,12 +2328,16 @@ AC_CACHE_CHECK([for __builtin_clz],
unsigned long x = 0;
int y = __builtin_clzl(x);
}
+ {
+ unsigned long long x = 0;
+ int y = __builtin_clzll(x);
+ }
])],
[je_cv_builtin_clz=yes],
[je_cv_builtin_clz=no])])
if test "x${je_cv_builtin_clz}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ], [ ])
fi
dnl ============================================================================
@@ -2097,7 +2356,7 @@ JE_COMPILABLE([Darwin os_unfair_lock_*()], [
#endif
], [je_cv_os_unfair_lock])
if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+ AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ], [ ])
fi
dnl ============================================================================
@@ -2123,7 +2382,7 @@ if test "x${enable_zone_allocator}" = "x1" ; then
if test "x${abi}" != "xmacho"; then
AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
fi
- AC_DEFINE([JEMALLOC_ZONE], [ ])
+ AC_DEFINE([JEMALLOC_ZONE], [ ], [ ])
fi
dnl ============================================================================
@@ -2144,52 +2403,56 @@ AC_SUBST([enable_initial_exec_tls])
if test "x${je_cv_tls_model}" = "xyes" -a \
"x${enable_initial_exec_tls}" = "x1" ; then
AC_DEFINE([JEMALLOC_TLS_MODEL],
- [__attribute__((tls_model("initial-exec")))])
+ [__attribute__((tls_model("initial-exec")))],
+ [ ])
else
- AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
+ AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
fi
dnl ============================================================================
dnl Enable background threads if possible.
-if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
- AC_DEFINE([JEMALLOC_BACKGROUND_THREAD])
+if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \
+ "x${abi}" != "xmacho" ; then
+ AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
fi
dnl ============================================================================
dnl Check for glibc malloc hooks
-JE_COMPILABLE([glibc malloc hook], [
-#include <stddef.h>
+if test "x$glibc" = "x1" ; then
+ JE_COMPILABLE([glibc malloc hook], [
+ #include <stddef.h>
-extern void (* __free_hook)(void *ptr);
-extern void *(* __malloc_hook)(size_t size);
-extern void *(* __realloc_hook)(void *ptr, size_t size);
+ extern void (* __free_hook)(void *ptr);
+ extern void *(* __malloc_hook)(size_t size);
+ extern void *(* __realloc_hook)(void *ptr, size_t size);
], [
- void *ptr = 0L;
- if (__malloc_hook) ptr = __malloc_hook(1);
- if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
- if (__free_hook && ptr) __free_hook(ptr);
+ void *ptr = 0L;
+ if (__malloc_hook) ptr = __malloc_hook(1);
+ if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
+ if (__free_hook && ptr) __free_hook(ptr);
], [je_cv_glibc_malloc_hook])
-if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
- if test "x${JEMALLOC_PREFIX}" = "x" ; then
- AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
- wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+ if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
+ if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ], [ ])
+ wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+ fi
fi
-fi
-JE_COMPILABLE([glibc memalign hook], [
-#include <stddef.h>
+ JE_COMPILABLE([glibc memalign hook], [
+ #include <stddef.h>
-extern void *(* __memalign_hook)(size_t alignment, size_t size);
+ extern void *(* __memalign_hook)(size_t alignment, size_t size);
], [
- void *ptr = 0L;
- if (__memalign_hook) ptr = __memalign_hook(16, 7);
+ void *ptr = 0L;
+ if (__memalign_hook) ptr = __memalign_hook(16, 7);
], [je_cv_glibc_memalign_hook])
-if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
- if test "x${JEMALLOC_PREFIX}" = "x" ; then
- AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
- wrap_syms="${wrap_syms} __memalign_hook"
+ if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
+ if test "x${JEMALLOC_PREFIX}" = "x" ; then
+ AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ], [ ])
+ wrap_syms="${wrap_syms} __memalign_hook"
+ fi
fi
fi
@@ -2202,7 +2465,7 @@ JE_COMPILABLE([pthreads adaptive mutexes], [
pthread_mutexattr_destroy(&attr);
], [je_cv_pthread_mutex_adaptive_np])
if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ])
+ AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ], [ ])
fi
JE_CFLAGS_SAVE()
@@ -2221,7 +2484,7 @@ JE_COMPILABLE([strerror_r returns char with gnu source], [
], [je_cv_strerror_r_returns_char_with_gnu_source])
JE_CFLAGS_RESTORE()
if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
- AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ])
+ AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ], [ ])
fi
dnl ============================================================================
@@ -2391,7 +2654,7 @@ AC_MSG_RESULT([static libs : ${enable_static}])
AC_MSG_RESULT([autogen : ${enable_autogen}])
AC_MSG_RESULT([debug : ${enable_debug}])
AC_MSG_RESULT([stats : ${enable_stats}])
-AC_MSG_RESULT([experimetal_smallocx : ${enable_experimental_smallocx}])
+AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
AC_MSG_RESULT([prof : ${enable_prof}])
AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}])
AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7fecda7..e28e8f3 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -630,7 +630,7 @@ for (i = 0; i < nbins; i++) {
</row>
<row>
<entry>8 KiB</entry>
- <entry>[40 KiB, 48 KiB, 54 KiB, 64 KiB]</entry>
+ <entry>[40 KiB, 48 KiB, 56 KiB, 64 KiB]</entry>
</row>
<row>
<entry>16 KiB</entry>
@@ -936,6 +936,22 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
</para></listitem>
</varlistentry>
+ <varlistentry id="opt.cache_oblivious">
+ <term>
+ <mallctl>opt.cache_oblivious</mallctl>
+ (<type>bool</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Enable / Disable cache-oblivious large allocation
+ alignment, for large requests with no alignment constraints. If this
+ feature is disabled, all large allocations are page-aligned as an
+ implementation artifact, which can severely harm CPU cache utilization.
+ However, the cache-oblivious layout comes at the cost of one extra page
+ per large allocation, which in the most extreme case increases physical
+ memory usage for the 16 KiB size class to 20 KiB. This option is enabled
+ by default.</para></listitem>
+ </varlistentry>
+
<varlistentry id="opt.metadata_thp">
<term>
<mallctl>opt.metadata_thp</mallctl>
@@ -950,6 +966,17 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
is <quote>disabled</quote>.</para></listitem>
</varlistentry>
+ <varlistentry id="opt.trust_madvise">
+ <term>
+ <mallctl>opt.trust_madvise</mallctl>
+ (<type>bool</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>If true, do not perform runtime check for MADV_DONTNEED,
+ to check that it actually zeros pages. The default is disabled on Linux
+ and enabled elsewhere.</para></listitem>
+ </varlistentry>
+
<varlistentry id="opt.retain">
<term>
<mallctl>opt.retain</mallctl>
@@ -1185,6 +1212,41 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
enabled. The default is <quote></quote>.</para></listitem>
</varlistentry>
+ <varlistentry id="opt.stats_interval">
+ <term>
+ <mallctl>opt.stats_interval</mallctl>
+ (<type>int64_t</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Average interval between statistics outputs, as measured
+ in bytes of allocation activity. The actual interval may be sporadic
+ because decentralized event counters are used to avoid synchronization
+ bottlenecks. The output may be triggered on any thread, which then
+ calls <function>malloc_stats_print()</function>. <link
+ linkend="opt.stats_interval_opts"><mallctl>opt.stats_interval_opts</mallctl></link>
+ can be combined to specify output options. By default,
+ interval-triggered stats output is disabled (encoded as
+ -1).</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="opt.stats_interval_opts">
+ <term>
+ <mallctl>opt.stats_interval_opts</mallctl>
+ (<type>const char *</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para>Options (the <parameter>opts</parameter> string) to pass
+ to the <function>malloc_stats_print()</function> for interval based
+ statistics printing (enabled
+ through <link
+ linkend="opt.stats_interval"><mallctl>opt.stats_interval</mallctl></link>). See
+ available options in <link
+ linkend="malloc_stats_print_opts"><function>malloc_stats_print()</function></link>.
+ Has no effect unless <link
+ linkend="opt.stats_interval"><mallctl>opt.stats_interval</mallctl></link> is
+ enabled. The default is <quote></quote>.</para></listitem>
+ </varlistentry>
+
<varlistentry id="opt.junk">
<term>
<mallctl>opt.junk</mallctl>
@@ -1266,21 +1328,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
a certain size. Thread-specific caching allows many allocations to be
satisfied without performing any thread synchronization, at the cost of
increased memory use. See the <link
- linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
+ linkend="opt.tcache_max"><mallctl>opt.tcache_max</mallctl></link>
option for related tuning information. This option is enabled by
default.</para></listitem>
</varlistentry>
- <varlistentry id="opt.lg_tcache_max">
+ <varlistentry id="opt.tcache_max">
<term>
- <mallctl>opt.lg_tcache_max</mallctl>
+ <mallctl>opt.tcache_max</mallctl>
(<type>size_t</type>)
<literal>r-</literal>
</term>
- <listitem><para>Maximum size class (log base 2) to cache in the
- thread-specific cache (tcache). At a minimum, all small size classes
- are cached, and at a maximum all large size classes are cached. The
- default maximum is 32 KiB (2^15).</para></listitem>
+ <listitem><para>Maximum size class to cache in the thread-specific cache
+ (tcache). At a minimum, the first size class is cached; and at a
+ maximum, size classes up to 8 MiB can be cached. The default maximum is
+ 32 KiB (2^15). As a convenience, this may also be set by specifying
+ lg_tcache_max, which will be taken to be the base-2 logarithm of the
+ setting of tcache_max.</para></listitem>
</varlistentry>
<varlistentry id="opt.thp">
@@ -1344,7 +1408,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
set to the empty string, no automatic dumps will occur; this is
primarily useful for disabling the automatic final heap dump (which
also disables leak reporting, if enabled). The default prefix is
- <filename>jeprof</filename>.</para></listitem>
+ <filename>jeprof</filename>. This prefix value can be overridden by
+ <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>.
+ </para></listitem>
</varlistentry>
<varlistentry id="opt.prof_active">
@@ -1423,8 +1489,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</filename>,
where <literal>&lt;prefix&gt;</literal> is controlled by the
<link
- linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option. By default, interval-triggered profile dumping is disabled
+ linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+ <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
+ options. By default, interval-triggered profile dumping is disabled
(encoded as -1).
</para></listitem>
</varlistentry>
@@ -1456,8 +1523,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
usage to a file named according to the pattern
<filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
where <literal>&lt;prefix&gt;</literal> is controlled by the <link
- linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option. Note that <function>atexit()</function> may allocate
+ linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+ <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
+ options. Note that <function>atexit()</function> may allocate
memory during application initialization and then deadlock internally
when jemalloc in turn calls <function>atexit()</function>, so
this option is not universally usable (though the application can
@@ -1478,8 +1546,57 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<manvolnum>3</manvolnum></citerefentry> function to report memory leaks
detected by allocation sampling. See the
<link linkend="opt.prof"><mallctl>opt.prof</mallctl></link> option for
- information on analyzing heap profile output. This option is disabled
- by default.</para></listitem>
+ information on analyzing heap profile output. Works only when combined
+ with <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl>
+ </link>, otherwise does nothing. This option is disabled by default.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="opt.prof_leak_error">
+ <term>
+ <mallctl>opt.prof_leak_error</mallctl>
+ (<type>bool</type>)
+ <literal>r-</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Similar to <link linkend="opt.prof_leak"><mallctl>
+ opt.prof_leak</mallctl></link>, but makes the process exit with error
+ code 1 if a memory leak is detected. This option supersedes
+ <link linkend="opt.prof_leak"><mallctl>opt.prof_leak</mallctl></link>,
+ meaning that if both are specified, this option takes precedence. When
+ enabled, also enables <link linkend="opt.prof_leak"><mallctl>
+ opt.prof_leak</mallctl></link>. Works only when combined with
+ <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl></link>,
+ otherwise does nothing. This option is disabled by default.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="opt.zero_realloc">
+ <term>
+ <mallctl>opt.zero_realloc</mallctl>
+ (<type>const char *</type>)
+ <literal>r-</literal>
+ </term>
+ <listitem><para> Determines the behavior of
+ <function>realloc()</function> when passed a value of zero for the new
+ size. <quote>alloc</quote> treats this as an allocation of size zero
+ (and returns a non-null result except in case of resource exhaustion).
+ <quote>free</quote> treats this as a deallocation of the pointer, and
+ returns <constant>NULL</constant> without setting
+ <varname>errno</varname>. <quote>abort</quote> aborts the process if
+ zero is passed. The default is <quote>free</quote> on Linux and
+ Windows, and <quote>alloc</quote> elsewhere.</para>
+
+ <para>There is considerable divergence of behaviors across
+ implementations in handling this case. Many have the behavior of
+ <quote>free</quote>. This can introduce security vulnerabilities, since
+ a <constant>NULL</constant> return value indicates failure, and the
+ continued validity of the passed-in pointer (per POSIX and C11).
+ <quote>alloc</quote> is safe, but can cause leaks in programs that
+ expect the common behavior. Programs intended to be portable and
+ leak-free cannot assume either behavior, and must therefore never call
+ realloc with a size of 0. The <quote>abort</quote> option enables these
+ testing this behavior.</para></listitem>
</varlistentry>
<varlistentry id="thread.arena">
@@ -1520,7 +1637,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<link
linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
mallctl. This is useful for avoiding the overhead of repeated
- <function>mallctl*()</function> calls.</para></listitem>
+ <function>mallctl*()</function> calls. Note that the underlying counter
+ should not be modified by the application.</para></listitem>
</varlistentry>
<varlistentry id="thread.deallocated">
@@ -1547,7 +1665,44 @@ malloc_conf = "xmalloc:true";]]></programlisting>
<link
linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
mallctl. This is useful for avoiding the overhead of repeated
- <function>mallctl*()</function> calls.</para></listitem>
+ <function>mallctl*()</function> calls. Note that the underlying counter
+ should not be modified by the application.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="thread.peak.read">
+ <term>
+ <mallctl>thread.peak.read</mallctl>
+ (<type>uint64_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Get an approximation of the maximum value of the
+ difference between the number of bytes allocated and the number of bytes
+ deallocated by the calling thread since the last call to <link
+ linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>,
+ or since the thread's creation if it has not called <link
+ linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>.
+ No guarantees are made about the quality of the approximation, but
+ jemalloc currently endeavors to maintain accuracy to within one hundred
+ kilobytes.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry id="thread.peak.reset">
+ <term>
+ <mallctl>thread.peak.reset</mallctl>
+ (<type>void</type>)
+ <literal>--</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Resets the counter for net bytes allocated in the calling
+ thread to zero. This affects subsequent calls to <link
+ linkend="thread.peak.read"><mallctl>thread.peak.read</mallctl></link>,
+ but not the values returned by <link
+ linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
+ or <link
+ linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>.
+ </para></listitem>
</varlistentry>
<varlistentry id="thread.tcache.enabled">
@@ -1618,6 +1773,28 @@ malloc_conf = "xmalloc:true";]]></programlisting>
default.</para></listitem>
</varlistentry>
+ <varlistentry id="thread.idle">
+ <term>
+ <mallctl>thread.idle</mallctl>
+ (<type>void</type>)
+ <literal>--</literal>
+ </term>
+ <listitem><para>Hints to jemalloc that the calling thread will be idle
+ for some nontrivial period of time (say, on the order of seconds), and
+ that doing some cleanup operations may be beneficial. There are no
+ guarantees as to what specific operations will be performed; currently
+ this flushes the caller's tcache and may (according to some heuristic)
+ purge its associated arena.</para>
+ <para>This is not intended to be a general-purpose background activity
+ mechanism, and threads should not wake up multiple times solely to call
+ it. Rather, a thread waiting for a task should do a timed wait first,
+ call <link linkend="thread.idle"><mallctl>thread.idle</mallctl></link>
+ if no task appears in the timeout interval, and then do an untimed wait.
+ For such a background activity mechanism, see
+ <link linkend="background_thread"><mallctl>background_thread</mallctl></link>.
+ </para></listitem>
+ </varlistentry>
+
<varlistentry id="tcache.create">
<term>
<mallctl>tcache.create</mallctl>
@@ -1631,7 +1808,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
automatically managed one that is used by default. Each explicit cache
can be used by only one thread at a time; the application must assure
that this constraint holds.
+ </para>
+
+ <para>If the amount of space supplied for storing the thread-specific
+ cache identifier does not equal
+ <code language="C">sizeof(<type>unsigned</type>)</code>, no
+ thread-specific cache will be created, no data will be written to the
+ space pointed by <parameter>oldp</parameter>, and
+ <parameter>*oldlenp</parameter> will be set to 0.
</para></listitem>
+
</varlistentry>
<varlistentry id="tcache.flush">
@@ -2171,7 +2357,14 @@ struct extent_hooks_s {
</term>
<listitem><para>Explicitly create a new arena outside the range of
automatically managed arenas, with optionally specified extent hooks,
- and return the new arena index.</para></listitem>
+ and return the new arena index.</para>
+
+ <para>If the amount of space supplied for storing the arena index does
+ not equal <code language="C">sizeof(<type>unsigned</type>)</code>, no
+ arena will be created, no data will be written to the space pointed by
+ <parameter>oldp</parameter>, and <parameter>*oldlenp</parameter> will
+ be set to 0.
+ </para></listitem>
</varlistentry>
<varlistentry id="arenas.lookup">
@@ -2223,9 +2416,24 @@ struct extent_hooks_s {
is specified, to a file according to the pattern
<filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.m&lt;mseq&gt;.heap</filename>,
where <literal>&lt;prefix&gt;</literal> is controlled by the
+ <link linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+ and <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
+ options.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="prof.prefix">
+ <term>
+ <mallctl>prof.prefix</mallctl>
+ (<type>const char *</type>)
+ <literal>-w</literal>
+ [<option>--enable-prof</option>]
+ </term>
+ <listitem><para>Set the filename prefix for profile dumps. See
<link
linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option.</para></listitem>
+ for the default setting. This can be useful to differentiate profile
+ dumps such as from forked processes.
+ </para></listitem>
</varlistentry>
<varlistentry id="prof.gdump">
@@ -2240,8 +2448,9 @@ struct extent_hooks_s {
dumped to files named according to the pattern
<filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
where <literal>&lt;prefix&gt;</literal> is controlled by the <link
- linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
- option.</para></listitem>
+ linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+ <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
+ options.</para></listitem>
</varlistentry>
<varlistentry id="prof.reset">
@@ -2398,6 +2607,21 @@ struct extent_hooks_s {
</para></listitem>
</varlistentry>
+ <varlistentry id="stats.zero_reallocs">
+ <term>
+ <mallctl>stats.zero_reallocs</mallctl>
+ (<type>size_t</type>)
+ <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Number of times that the <function>realloc()</function>
+ was called with a non-<constant>NULL</constant> pointer argument and a
+ <constant>0</constant> size argument. This is a fundamentally unsafe
+ pattern in portable programs; see <link linkend="opt.zero_realloc">
+ <mallctl>opt.zero_realloc</mallctl></link> for details.
+ </para></listitem>
+ </varlistentry>
+
<varlistentry id="stats.background_thread.num_threads">
<term>
<mallctl>stats.background_thread.num_threads</mallctl>
@@ -2509,6 +2733,30 @@ struct extent_hooks_s {
counters</link>.</para></listitem>
</varlistentry>
+ <varlistentry id="stats.mutexes.prof_thds_data">
+ <term>
+ <mallctl>stats.mutexes.prof_thds_data.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>prof</varname> threads data mutex
+ (global scope; profiling related). <mallctl>{counter}</mallctl> is one
+ of the counters in <link linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
+ <varlistentry id="stats.mutexes.prof_dump">
+ <term>
+ <mallctl>stats.mutexes.prof_dump.{counter}</mallctl>
+ (<type>counter specific type</type>) <literal>r-</literal>
+ [<option>--enable-stats</option>]
+ </term>
+ <listitem><para>Statistics on <varname>prof</varname> dumping mutex
+ (global scope; profiling related). <mallctl>{counter}</mallctl> is one
+ of the counters in <link linkend="mutex_counters">mutex profiling
+ counters</link>.</para></listitem>
+ </varlistentry>
+
<varlistentry id="stats.mutexes.reset">
<term>
<mallctl>stats.mutexes.reset</mallctl>
@@ -3250,7 +3498,7 @@ heap_v2/524288
[...]
@ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
t*: 13: 6688 [0: 0]
- t3: 12: 6496 [0: ]
+ t3: 12: 6496 [0: 0]
t99: 1: 192 [0: 0]
[...]
@@ -3261,9 +3509,9 @@ descriptions of the corresponding fields. <programlisting><![CDATA[
<heap_profile_format_version>/<mean_sample_interval>
<aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
[...]
- <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+ <thread_3_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
[...]
- <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+ <thread_99_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
[...]
@ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
<backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
@@ -3420,8 +3668,10 @@ MAPPED_LIBRARIES:
<listitem><para><parameter>newp</parameter> is not
<constant>NULL</constant>, and <parameter>newlen</parameter> is too
large or too small. Alternatively, <parameter>*oldlenp</parameter>
- is too large or too small; in this case as much data as possible
- are read despite the error.</para></listitem>
+ is too large or too small; when it happens, except for a very few
+ cases explicitly documented otherwise, as much data as possible
+ are read despite the error, with the amount of data read being
+ recorded in <parameter>*oldlenp</parameter>.</para></listitem>
</varlistentry>
<varlistentry>
<term><errorname>ENOENT</errorname></term>
diff --git a/doc_internal/PROFILING_INTERNALS.md b/doc_internal/PROFILING_INTERNALS.md
new file mode 100644
index 0000000..0a9f31c
--- /dev/null
+++ b/doc_internal/PROFILING_INTERNALS.md
@@ -0,0 +1,127 @@
+# jemalloc profiling
+This describes the mathematical basis behind jemalloc's profiling implementation, as well as the implementation tricks that make it effective. Historically, the jemalloc profiling design simply copied tcmalloc's. The implementation has since diverged, due to both the desire to record additional information, and to correct some biasing bugs.
+
+Note: this document is markdown with embedded LaTeX; different markdown renderers may not produce the expected output. Viewing with `pandoc -s PROFILING_INTERNALS.md -o PROFILING_INTERNALS.pdf` is recommended.
+
+## Some tricks in our implementation toolbag
+
+### Sampling
+Recording our metadata is quite expensive; we need to walk up the stack to get a stack trace. On top of that, we need to allocate storage to record that stack trace, and stick it somewhere where a profile-dumping call can find it. That call might happen on another thread, so we'll probably need to take a lock to do so. These costs are quite large compared to the average cost of an allocation. To manage this, we'll only sample some fraction of allocations. This will miss some of them, so our data will be incomplete, but we'll try to make up for it. We can tune our sampling rate to balance accuracy and performance.
+
+### Fast Bernoulli sampling
+Compared to our fast paths, even a `coinflip(p)` function can be quite expensive. Having to do a random-number generation and some floating point operations would be a sizeable relative cost. However (as pointed out in [[Vitter, 1987](https://dl.acm.org/doi/10.1145/23002.23003)]), if we can orchestrate our algorithm so that many of our `coinflip` calls share their parameter value, we can do better. We can sample from the geometric distribution, and initialize a counter with the result. When the counter hits 0, the `coinflip` function returns true (and reinitializes its internal counter).
+This can let us do a random-number generation once per (logical) coinflip that comes up heads, rather than once per (logical) coinflip. Since we expect to sample relatively rarely, this can be a large win.
+
+### Fast-path / slow-path thinking
+Most programs have a skewed distribution of allocations. Smaller allocations are much more frequent than large ones, but shorter lived and less common as a fraction of program memory. "Small" and "large" are necessarily sort of fuzzy terms, but if we define "small" as "allocations jemalloc puts into slabs" and "large" as the others, then it's not uncommon for small allocations to be hundreds of times more frequent than large ones, but take up around half the amount of heap space as large ones. Moreover, small allocations tend to be much cheaper than large ones (often by a factor of 20-30): they're more likely to hit in thread caches, less likely to have to do an mmap, and cheaper to fill (by the user) once the allocation has been returned.
+
+## An unbiased estimator of space consumption from (almost) arbitrary sampling strategies
+Suppose we have a sampling strategy that meets the following criteria:
+
+ - One allocation being sampled is independent of other allocations being sampled.
+ - Each allocation has a non-zero probability of being sampled.
+
+We can then estimate the bytes in live allocations through some particular stack trace as:
+
+$$ \sum_i S_i I_i \frac{1}{\mathrm{E}[I_i]} $$
+
+where the sum ranges over some index variable of live allocations from that stack, $S_i$ is the size of the $i$'th allocation, and $I_i$ is an indicator random variable for whether or not the $i'th$ allocation is sampled. $S_i$ and $\mathrm{E}[I_i]$ are constants (the program allocations are fixed; the random variables are the sampling decisions), so taking the expectation we get
+
+$$ \sum_i S_i \mathrm{E}[I_i] \frac{1}{\mathrm{E}[I_i]}.$$
+
+This is of course $\sum_i S_i$, as we want (and, a similar calculation could be done for allocation counts as well).
+This is a fairly general strategy; note that while we require that sampling decisions be independent of one another's outcomes, they don't have to be independent of previous allocations, total bytes allocated, etc. You can imagine strategies that:
+
+ - Sample allocations at program startup at a higher rate than subsequent allocations
+ - Sample even-indexed allocations more frequently than odd-indexed ones (so long as no allocation has zero sampling probability)
+ - Let threads declare themselves as high-sampling-priority, and sample their allocations at an increased rate.
+
+These can all be fit into this framework to give an unbiased estimator.
+
+## Evaluating sampling strategies
+Not all strategies for picking allocations to sample are equally good, of course. Among unbiased estimators, the lower the variance, the lower the mean squared error. Using the estimator above, the variance is:
+
+$$
+\begin{aligned}
+& \mathrm{Var}[\sum_i S_i I_i \frac{1}{\mathrm{E}[I_i]}] \\
+=& \sum_i \mathrm{Var}[S_i I_i \frac{1}{\mathrm{E}[I_i]}] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{Var}[I_i] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{Var}[I_i] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{E}[I_i](1 - \mathrm{E}[I_i]) \\
+=& \sum_i S_i^2 \frac{1 - \mathrm{E}[I_i]}{\mathrm{E}[I_i]}.
+\end{aligned}
+$$
+
+We can use this formula to compare various strategy choices. All else being equal, lower-variance strategies are better.
+
+## Possible sampling strategies
+Because of the desire to avoid the fast-path costs, we'd like to use our Bernoulli trick if possible. There are two obvious counters to use: a coinflip per allocation, and a coinflip per byte allocated.
+
+### Bernoulli sampling per-allocation
+An obvious strategy is to pick some large $N$, and give each allocation a $1/N$ chance of being sampled. This would let us use our Bernoulli-via-Geometric trick. Using the formula from above, we can compute the variance as:
+
+$$ \sum_i S_i^2 \frac{1 - \frac{1}{N}}{\frac{1}{N}} = (N-1) \sum_i S_i^2.$$
+
+That is, an allocation of size $Z$ contributes a term of $(N-1)Z^2$ to the variance.
+
+### Bernoulli sampling per-byte
+Another option we have is to pick some rate $R$, and give each byte a $1/R$ chance of being picked for sampling (at which point we would sample its contained allocation). The chance of an allocation of size $Z$ being sampled, then, is
+
+$$1-(1-\frac{1}{R})^{Z}$$
+
+and an allocation of size $Z$ contributes a term of
+
+$$Z^2 \frac{(1-\frac{1}{R})^{Z}}{1-(1-\frac{1}{R})^{Z}}.$$
+
+In practical settings, $R$ is large, and so this is well-approximated by
+
+$$Z^2 \frac{e^{-Z/R}}{1 - e^{-Z/R}} .$$
+
+Just to get a sense of the dynamics here, let's look at the behavior for various values of $Z$. When $Z$ is small relative to $R$, we can use $e^z \approx 1 + x$, and conclude that the variance contributed by a small-$Z$ allocation is around
+
+$$Z^2 \frac{1-Z/R}{Z/R} \approx RZ.$$
+
+When $Z$ is comparable to $R$, the variance term is near $Z^2$ (we have $\frac{e^{-Z/R}}{1 - e^{-Z/R}} = 1$ when $Z/R = \ln 2 \approx 0.693$). When $Z$ is large relative to $R$, the variance term goes to zero.
+
+## Picking a sampling strategy
+The fast-path/slow-path dynamics of allocation patterns point us towards the per-byte sampling approach:
+
+ - The quadratic increase in variance per allocation in the first approach is quite costly when heaps have a non-negligible portion of their bytes in those allocations, which is practically often the case.
+ - The Bernoulli-per-byte approach shifts more of its samples towards large allocations, which are already a slow-path.
+ - We drive several tickers (e.g. tcache gc) by bytes allocated, and report bytes-allocated as a user-visible statistic, so we have to do all the necessary bookkeeping anyways.
+
+Indeed, this is the approach we use in jemalloc. Our heap dumps record the size of the allocation and the sampling rate $R$, and jeprof unbiases by dividing by $1 - e^{-Z/R}$. The framework above would suggest dividing by $1-(1-1/R)^Z$; instead, we use the fact that $R$ is large in practical situations, and so $e^{-Z/R}$ is a good approximation (and faster to compute). (Equivalently, we may also see this as the factor that falls out from viewing sampling as a Poisson process directly).
+
+## Consequences for heap dump consumers
+Using this approach means that there are a few things users need to be aware of.
+
+### Stack counts are not proportional to allocation frequencies
+If one stack appears twice as often as another, this by itself does not imply that it allocates twice as often. Consider the case in which there are only two types of allocating call stacks in a program. Stack A allocates 8 bytes, and occurs a million times in a program. Stack B allocates 8 MB, and occurs just once in a program. If our sampling rate $R$ is about 1MB, we expect stack A to show up about 8 times, and stack B to show up once. Stack A isn't 8 times more frequent than stack B, though; it's a million times more frequent.
+
+### Aggregation must be done after unbiasing samples
+Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from a million machines. We then have 8 million occurs of stack A (each of 8 bytes), and a million occurrences of stack B (each of 8 MB). If we sum first, we'll attribute 64 MB to stack A, and 8 TB to stack B. Unbiasing changes these numbers by an infinitesimal amount, so that sum-then-unbias dramatically underreports the amount of memory allocated by stack A.
+
+## An avenue for future exploration
+While the framework we laid out above is pretty general, as an engineering decision we're only interested in fairly simple approaches (i.e. ones for which the chance of an allocation being sampled depends only on its size). Our job is then: for each size class $Z$, pick a probability $p_Z$ that an allocation of that size will be sampled. We made some handwave-y references to statistical distributions to justify our choices, but there's no reason we need to pick them that way. Any set of non-zero probabilities is a valid choice.
+The real limiting factor in our ability to reduce estimator variance is that fact that sampling is expensive; we want to make sure we only do it on a small fraction of allocations. Our goal, then, is to pick the $p_Z$ to minimize variance given some maximum sampling rate $P$. If we define $a_Z$ to be the fraction of allocations of size $Z$, and $l_Z$ to be the fraction of allocations of size $Z$ still alive at the time of a heap dump, then we can phrase this as an optimization problem over the choices of $p_Z$:
+
+Minimize
+
+$$ \sum_Z Z^2 l_Z \frac{1-p_Z}{p_Z} $$
+
+subject to
+
+$$ \sum_Z a_Z p_Z \leq P $$
+
+Ignoring a term that doesn't depend on $p_Z$, the objective is minimized whenever
+
+$$ \sum_Z Z^2 l_Z \frac{1}{p_Z} $$
+
+is. For a particular program, $l_Z$ and $a_Z$ are just numbers that can be obtained (exactly) from existing stats introspection facilities, and we have a fairly tractable convex optimization problem (it can be framed as a second-order cone program). It would be interesting to evaluate, for various common allocation patterns, how well our current strategy adapts. Do our actual choices for $p_Z$ closely correspond to the optimal ones? How close is the variance of our choices to the variance of the optimal strategy?
+You can imagine an implementation that actually goes all the way, and makes $p_Z$ selections a tuning parameter. I don't think this is a good use of development time for the foreseeable future; but I do wonder about the answers to some of these questions.
+
+## Implementation realities
+
+The nice story above is at least partially a lie. Initially, jeprof (copying its logic from pprof) had the sum-then-unbias error described above. The current version of jemalloc does the unbiasing step on a per-allocation basis internally, so that we're always tracking what the unbiased numbers "should" be. The problem is, actually surfacing those unbiased numbers would require a breaking change to jeprof (and the various already-deployed tools that have copied its logic). Instead, we use a little bit more trickery. Since we know at dump time the numbers we want jeprof to report, we simply choose the values we'll output so that the jeprof numbers will match the true numbers. The math is described in `src/prof_data.c` (where the only cleverness is a change of variables that lets the exponentials fall out).
+
+This has the effect of making the output of jeprof (and related tools) correct, while making its inputs incorrect. This can be annoying to human readers of raw profiling dump output.
diff --git a/doc_internal/jemalloc.svg b/doc_internal/jemalloc.svg
new file mode 100644
index 0000000..5e77327
--- /dev/null
+++ b/doc_internal/jemalloc.svg
@@ -0,0 +1 @@
+<svg id="Layer_3" data-name="Layer 3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 499 184.27"><defs><style>.cls-1,.cls-3{fill:none;}.cls-2{clip-path:url(#clip-path);}.cls-3{stroke:#262262;stroke-linecap:round;stroke-linejoin:round;stroke-width:4px;}</style><clipPath id="clip-path" transform="translate(-100.66 -259.87)"><path class="cls-1" d="M144.57,396c0,18.2-9.37,27.83-37.33,23.55V400.1c11.11,2.14,12.18-.27,12.18-11.5V324.11h25Zm-12.71-78.66c-9,0-15.52-1.48-15.52-12.71S122.9,292,131.86,292s15.52,1.2,15.52,12.58C147.38,315.55,141,317.29,131.86,317.29Zm50.57,76.39c-30.64,0-35.85-18.86-35.85-35.59s5.61-35.32,35.72-35.32c35.32,0,33.44,28,33.44,40.67H170.12c.54,9.5,4,14.05,11.37,14.05,6.83,0,9.64-3.34,10-7.89l24.75.13C215.48,383.38,205.84,393.68,182.43,393.68Zm-1.47-55c-6.69,0-10,2.81-10.84,12h21.41C190.73,341.9,188.18,338.69,181,338.69Zm112.78,53.65V351.4c0-4.15-1.33-8.16-6-8.16-5,0-6,3.75-6,8.16v40.94H256.42V351.4c0-4.15-.81-8.16-5.89-8.16s-6.29,3.75-6.29,8.16v40.94H219.09V324.11h14l4.15,8c2.67-4.69,10.56-9.37,18.86-9.37,7.36,0,16.19,2.14,21,9.1,3.48-5.22,11.11-9.1,20.21-9.1,19.13,0,21.54,11.37,21.54,27.16v42.41Zm83.09,0L372.41,383c-5.48,7.22-13.11,10.7-24.35,10.7-14.85,0-26.75-6-26.75-19.93,0-15.26,12.44-20.88,44.28-23,0-9.5-1.61-12.57-8.83-12.57-6.69,0-8.56,3.48-8.56,9.9H323.45c0-12.85,6.82-25.29,32.64-25.29,30,0,34.65,14.45,34.65,31.17v38.4Zm-21.54-28.63c-6.29.94-8.3,4.28-8.3,7.36,0,4.28,2.41,6.69,8.3,6.69s10.17-4.82,10.17-15.12ZM396,392.34V297.75h24.75v94.59Zm30.77,0V297.75h24.75v94.59Zm62.21,1.34c-28.09,0-34.11-18.6-34.11-35.32s6.29-35.59,34.38-35.59c27.7,0,34.12,19,34.12,35.59C523.33,375.22,516.91,393.68,488.94,393.68Zm.27-50.84c-7.89,0-11.37,4.82-11.37,15.52s3.61,15.39,11.1,15.39c7.9,0,11.38-4.42,11.38-15.39C500.32,347.79,497.24,342.84,489.21,342.84Zm69.17,50.84c-28.9,0-34.52-18.6-34.52-35.32s5.76-35.59,34.12-35.59c21.14,0,34.52,10.84,34.52,31.17H568.42c0-9.23-5.49-11.23-10.17-11.23-7,0-11.11,4.54-11.11,15.38s4,15.52,11.11,15.52c4.81,0,10-2.41,10-10.57H592.5C592.5,383.38,579,393.68,558.38,393.68Z"/></clipPath></defs><title>jemalloc Final Logo</title><g class="cls-2"><line class="cls-3" x1="345" y1="182.27" x2="345" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="475" y1="182.27" x2="475" y2="2"/><line class="cls-3" x1="195" y1="182.27" x2="195" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="337" y1="182.27" x2="337" y2="2"/><line class="cls-3" x1="215" y1="182.27" x2="215" y2="2"/><line class="cls-3" x1="95" y1="182.27" x2="95" y2="2"/><line class="cls-3" x1="415" y1="182.27" x2="415" y2="2"/><line class="cls-3" x1="385" y1="182.27" x2="385" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="65" y1="182.27" x2="65" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="163" y1="182.27" x2="163" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="252" y1="182.27" x2="252" y2="2"/><line class="cls-3" x1="450" y1="182.27" x2="450" y2="2"/><line class="cls-3" x1="271" y1="182.27" x2="271" y2="2"/><line class="cls-3" x1="332" y1="182.27" x2="332" y2="2"/><line class="cls-3" x1="203" y1="182.27" x2="203" y2="2"/><line class="cls-3" x1="13" y1="182.27" x2="13" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="354" y1="182.27" x2="354" y2="2"/><line class="cls-3" x1="235" y1="182.27" x2="235" y2="2"/><line class="cls-3" x1="115" y1="182.27" x2="115" y2="2"/><line class="cls-3" x1="53" y1="182.27" x2="53" y2="2"/><line class="cls-3" x1="484" y1="182.27" x2="484" y2="2"/><line class="cls-3" x1="405" y1="182.27" x2="405" y2="2"/><line class="cls-3" x1="85" y1="182.27" x2="85" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="435" y1="182.27" x2="435" y2="2"/><line class="cls-3" x1="123" y1="182.27" x2="123" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="155" y1="182.27" x2="155" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="470" y1="182.27" x2="470" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="262" y1="182.27" x2="262" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="243" y1="182.27" x2="243" y2="2"/><line class="cls-3" x1="22" y1="182.27" x2="22" y2="2"/><line class="cls-3" x1="383" y1="182.27" x2="383" y2="2"/><line class="cls-3" x1="5" y1="182.27" x2="5" y2="2"/><line class="cls-3" x1="133" y1="182.27" x2="133" y2="2"/><line class="cls-3" x1="362" y1="182.27" x2="362" y2="2"/><line class="cls-3" x1="288" y1="182.27" x2="288" y2="2"/><line class="cls-3" x1="298" y1="182.27" x2="298" y2="2"/><line class="cls-3" x1="423" y1="182.27" x2="423" y2="2"/><line class="cls-3" x1="369" y1="182.27" x2="369" y2="2"/><line class="cls-3" x1="490" y1="182.27" x2="490" y2="2"/><line class="cls-3" x1="2" y1="182.27" x2="2" y2="2"/><line class="cls-3" x1="493" y1="182.27" x2="493" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="475" y1="182.27" x2="475" y2="2"/><line class="cls-3" x1="195" y1="182.27" x2="195" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="337" y1="182.27" x2="337" y2="2"/><line class="cls-3" x1="215" y1="182.27" x2="215" y2="2"/><line class="cls-3" x1="95" y1="182.27" x2="95" y2="2"/><line class="cls-3" x1="415" y1="182.27" x2="415" y2="2"/><line class="cls-3" x1="385" y1="182.27" x2="385" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="65" y1="182.27" x2="65" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="163" y1="182.27" x2="163" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="252" y1="182.27" x2="252" y2="2"/><line class="cls-3" x1="450" y1="182.27" x2="450" y2="2"/><line class="cls-3" x1="271" y1="182.27" x2="271" y2="2"/><line class="cls-3" x1="306" y1="182.27" x2="306" y2="2"/><line class="cls-3" x1="203" y1="182.27" x2="203" y2="2"/><line class="cls-3" x1="13" y1="182.27" x2="13" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="354" y1="182.27" x2="354" y2="2"/><line class="cls-3" x1="235" y1="182.27" x2="235" y2="2"/><line class="cls-3" x1="115" y1="182.27" x2="115" y2="2"/><line class="cls-3" x1="53" y1="182.27" x2="53" y2="2"/><line class="cls-3" x1="484" y1="182.27" x2="484" y2="2"/><line class="cls-3" x1="405" y1="182.27" x2="405" y2="2"/><line class="cls-3" x1="85" y1="182.27" x2="85" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="435" y1="182.27" x2="435" y2="2"/><line class="cls-3" x1="123" y1="182.27" x2="123" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="155" y1="182.27" x2="155" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="470" y1="182.27" x2="470" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="262" y1="182.27" x2="262" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="328" y1="182.27" x2="328" y2="2"/><line class="cls-3" x1="243" y1="182.27" x2="243" y2="2"/><line class="cls-3" x1="22" y1="182.27" x2="22" y2="2"/><line class="cls-3" x1="383" y1="182.27" x2="383" y2="2"/><line class="cls-3" x1="5" y1="182.27" x2="5" y2="2"/><line class="cls-3" x1="32" y1="182.27" x2="32" y2="2"/><line class="cls-3" x1="133" y1="182.27" x2="133" y2="2"/><line class="cls-3" x1="362" y1="182.27" x2="362" y2="2"/><line class="cls-3" x1="288" y1="182.27" x2="288" y2="2"/><line class="cls-3" x1="298" y1="182.27" x2="298" y2="2"/><line class="cls-3" x1="423" y1="182.27" x2="423" y2="2"/><line class="cls-3" x1="369" y1="182.27" x2="369" y2="2"/><line class="cls-3" x1="490" y1="182.27" x2="490" y2="2"/><line class="cls-3" x1="2" y1="182.27" x2="2" y2="2"/><line class="cls-3" x1="493" y1="182.27" x2="493" y2="2"/><line class="cls-3" x1="349" y1="182.27" x2="349" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="479" y1="182.27" x2="479" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="199" y1="182.27" x2="199" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="341" y1="182.27" x2="341" y2="2"/><line class="cls-3" x1="219" y1="182.27" x2="219" y2="2"/><line class="cls-3" x1="99" y1="182.27" x2="99" y2="2"/><line class="cls-3" x1="41" y1="182.27" x2="41" y2="2"/><line class="cls-3" x1="419" y1="182.27" x2="419" y2="2"/><line class="cls-3" x1="389" y1="182.27" x2="389" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="69" y1="182.27" x2="69" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="454" y1="182.27" x2="454" y2="2"/><line class="cls-3" x1="275" y1="182.27" x2="275" y2="2"/><line class="cls-3" x1="308" y1="182.27" x2="308" y2="2"/><line class="cls-3" x1="207" y1="182.27" x2="207" y2="2"/><line class="cls-3" x1="17" y1="182.27" x2="17" y2="2"/><line class="cls-3" x1="377" y1="182.27" x2="377" y2="2"/><line class="cls-3" x1="358" y1="182.27" x2="358" y2="2"/><line class="cls-3" x1="238" y1="182.27" x2="238" y2="2"/><line class="cls-3" x1="119" y1="182.27" x2="119" y2="2"/><line class="cls-3" x1="488" y1="182.27" x2="488" y2="2"/><line class="cls-3" x1="409" y1="182.27" x2="409" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="439" y1="182.27" x2="439" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="127" y1="182.27" x2="127" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="159" y1="182.27" x2="159" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="474" y1="182.27" x2="474" y2="2"/><line class="cls-3" x1="266" y1="182.27" x2="266" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="247" y1="182.27" x2="247" y2="2"/><line class="cls-3" x1="26" y1="182.27" x2="26" y2="2"/><line class="cls-3" x1="387" y1="182.27" x2="387" y2="2"/><line class="cls-3" x1="9" y1="182.27" x2="9" y2="2"/><line class="cls-3" x1="137" y1="182.27" x2="137" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="56" y1="182.27" x2="56" y2="2"/><line class="cls-3" x1="494" y1="182.27" x2="494" y2="2"/><line class="cls-3" x1="497" y1="182.27" x2="497" y2="2"/><line class="cls-3" x1="349" y1="182.27" x2="349" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="479" y1="182.27" x2="479" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="199" y1="182.27" x2="199" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="341" y1="182.27" x2="341" y2="2"/><line class="cls-3" x1="219" y1="182.27" x2="219" y2="2"/><line class="cls-3" x1="99" y1="182.27" x2="99" y2="2"/><line class="cls-3" x1="41" y1="182.27" x2="41" y2="2"/><line class="cls-3" x1="419" y1="182.27" x2="419" y2="2"/><line class="cls-3" x1="389" y1="182.27" x2="389" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="69" y1="182.27" x2="69" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="141" y1="182.27" x2="141" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="454" y1="182.27" x2="454" y2="2"/><line class="cls-3" x1="275" y1="182.27" x2="275" y2="2"/><line class="cls-3" x1="308" y1="182.27" x2="308" y2="2"/><line class="cls-3" x1="207" y1="182.27" x2="207" y2="2"/><line class="cls-3" x1="17" y1="182.27" x2="17" y2="2"/><line class="cls-3" x1="377" y1="182.27" x2="377" y2="2"/><line class="cls-3" x1="119" y1="182.27" x2="119" y2="2"/><line class="cls-3" x1="488" y1="182.27" x2="488" y2="2"/><line class="cls-3" x1="409" y1="182.27" x2="409" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="439" y1="182.27" x2="439" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="127" y1="182.27" x2="127" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="159" y1="182.27" x2="159" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="474" y1="182.27" x2="474" y2="2"/><line class="cls-3" x1="295" y1="182.27" x2="295" y2="2"/><line class="cls-3" x1="266" y1="182.27" x2="266" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="247" y1="182.27" x2="247" y2="2"/><line class="cls-3" x1="58" y1="182.27" x2="58" y2="2"/><line class="cls-3" x1="387" y1="182.27" x2="387" y2="2"/><line class="cls-3" x1="9" y1="182.27" x2="9" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="301" y1="182.27" x2="301" y2="2"/><line class="cls-3" x1="428" y1="182.27" x2="428" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="56" y1="182.27" x2="56" y2="2"/><line class="cls-3" x1="494" y1="182.27" x2="494" y2="2"/><line class="cls-3" x1="497" y1="182.27" x2="497" y2="2"/></g></svg> \ No newline at end of file
diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
new file mode 100644
index 0000000..6c2e84e
--- /dev/null
+++ b/include/jemalloc/internal/activity_callback.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
+#define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
+
+/*
+ * The callback to be executed "periodically", in response to some amount of
+ * allocator activity.
+ *
+ * This callback need not be computing any sort of peak (although that's the
+ * intended first use case), but we drive it from the peak counter, so it's
+ * keeps things tidy to keep it here.
+ *
+ * The calls to this thunk get driven by the peak_event module.
+ */
+#define ACTIVITY_CALLBACK_THUNK_INITIALIZER {NULL, NULL}
+typedef void (*activity_callback_t)(void *uctx, uint64_t allocated,
+ uint64_t deallocated);
+typedef struct activity_callback_thunk_s activity_callback_thunk_t;
+struct activity_callback_thunk_s {
+ activity_callback_t callback;
+ void *uctx;
+};
+
+#endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a4523ae..e6fceaa 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -2,59 +2,67 @@
#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/div.h"
#include "jemalloc/internal/extent_dss.h"
#include "jemalloc/internal/hook.h"
#include "jemalloc/internal/pages.h"
#include "jemalloc/internal/stats.h"
+/*
+ * When the amount of pages to be purged exceeds this amount, deferred purge
+ * should happen.
+ */
+#define ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD UINT64_C(1024)
+
extern ssize_t opt_dirty_decay_ms;
extern ssize_t opt_muzzy_decay_ms;
extern percpu_arena_mode_t opt_percpu_arena;
extern const char *percpu_arena_mode_names[];
-extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
+extern div_info_t arena_binind_div_info[SC_NBINS];
+
extern malloc_mutex_t arenas_lock;
+extern emap_t arena_emap_global;
extern size_t opt_oversize_threshold;
extern size_t oversize_threshold;
+/*
+ * arena_bin_offsets[binind] is the offset of the first bin shard for size class
+ * binind.
+ */
+extern uint32_t arena_bin_offsets[SC_NBINS];
+
void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
- bin_stats_t *bstats, arena_stats_large_t *lstats,
- arena_stats_extents_t *estats);
-void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent);
-#ifdef JEMALLOC_JET
-size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
-#endif
-extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
- size_t usize, size_t alignment, bool *zero);
+ bin_stats_data_t *bstats, arena_stats_large_t *lstats,
+ pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
+edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
+ size_t usize, size_t alignment, bool zero);
void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
- extent_t *extent);
+ edata_t *edata);
void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
- extent_t *extent, size_t oldsize);
+ edata_t *edata, size_t oldsize);
void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
- extent_t *extent, size_t oldsize);
-ssize_t arena_dirty_decay_ms_get(arena_t *arena);
-bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
-ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
-bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+ edata_t *edata, size_t oldsize);
+bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
+ ssize_t decay_ms);
+ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
bool all);
+uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
+void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
void arena_reset(tsd_t *tsd, arena_t *arena);
void arena_destroy(tsd_t *tsd, arena_t *arena);
-void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
- cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
- bool zero);
-
-typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
-extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
+void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
+ cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
+ const unsigned nfill);
void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
szind_t ind, bool zero);
@@ -63,8 +71,12 @@ void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
bool slow_path);
-void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
- szind_t binind, extent_t *extent, void *ptr);
+void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
+
+void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
+ edata_t *slab, bin_t *bin);
+void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
+ edata_t *slab, bin_t *bin);
void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
size_t extra, bool zero, size_t *newsize);
@@ -72,6 +84,9 @@ void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
size_t size, size_t alignment, bool zero, tcache_t *tcache,
hook_ralloc_args_t *hook_args);
dss_prec_t arena_dss_prec_get(arena_t *arena);
+ehooks_t *arena_get_ehooks(arena_t *arena);
+extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
+ extent_hooks_t *extent_hooks);
bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
ssize_t arena_dirty_decay_ms_default_get(void);
bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
@@ -82,14 +97,15 @@ bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
unsigned arena_nthreads_get(arena_t *arena, bool internal);
void arena_nthreads_inc(arena_t *arena, bool internal);
void arena_nthreads_dec(arena_t *arena, bool internal);
-size_t arena_extent_sn_next(arena_t *arena);
-arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
bool arena_init_huge(void);
bool arena_is_huge(unsigned arena_ind);
arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
unsigned *binshard);
-void arena_boot(sc_data_t *sc_data);
+size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+ void **ptrs, size_t nfill, bool zero);
+bool arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
@@ -98,6 +114,7 @@ void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork8(tsdn_t *tsdn, arena_t *arena);
void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 9abf7f6..8568358 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -3,7 +3,7 @@
static inline unsigned
arena_ind_get(const arena_t *arena) {
- return base_ind_get(arena->base);
+ return arena->ind;
}
static inline void
@@ -21,37 +21,4 @@ arena_internal_get(arena_t *arena) {
return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
}
-static inline bool
-arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
- cassert(config_prof);
-
- if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
- return false;
- }
-
- return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
-}
-
-static inline void
-percpu_arena_update(tsd_t *tsd, unsigned cpu) {
- assert(have_percpu_arena);
- arena_t *oldarena = tsd_arena_get(tsd);
- assert(oldarena != NULL);
- unsigned oldind = arena_ind_get(oldarena);
-
- if (oldind != cpu) {
- unsigned newind = cpu;
- arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
- assert(newarena != NULL);
-
- /* Set new arena/tcache associations. */
- arena_migrate(tsd, oldind, newind);
- tcache_t *tcache = tcache_get(tsd);
- if (tcache != NULL) {
- tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
- newarena);
- }
- }
-}
-
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index dd92657..fa81537 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,16 +1,20 @@
#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
+#include "jemalloc/internal/div.h"
+#include "jemalloc/internal/emap.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/safety_check.h"
#include "jemalloc/internal/sc.h"
#include "jemalloc/internal/sz.h"
#include "jemalloc/internal/ticker.h"
-JEMALLOC_ALWAYS_INLINE bool
-arena_has_default_hooks(arena_t *arena) {
- return (extent_hooks_get(arena) == &extent_hooks_default);
+static inline arena_t *
+arena_get_from_edata(edata_t *edata) {
+ return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(edata)],
+ ATOMIC_RELAXED);
}
JEMALLOC_ALWAYS_INLINE arena_t *
@@ -34,127 +38,109 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
return arena_choose(tsd, NULL);
}
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
+ prof_info_t *prof_info, bool reset_recent) {
cassert(config_prof);
assert(ptr != NULL);
+ assert(prof_info != NULL);
+
+ edata_t *edata = NULL;
+ bool is_slab;
/* Static check. */
if (alloc_ctx == NULL) {
- const extent_t *extent = iealloc(tsdn, ptr);
- if (unlikely(!extent_slab_get(extent))) {
- return large_prof_tctx_get(tsdn, extent);
- }
+ edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+ ptr);
+ is_slab = edata_slab_get(edata);
+ } else if (unlikely(!(is_slab = alloc_ctx->slab))) {
+ edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+ ptr);
+ }
+
+ if (unlikely(!is_slab)) {
+ /* edata must have been initialized at this point. */
+ assert(edata != NULL);
+ large_prof_info_get(tsd, edata, prof_info, reset_recent);
} else {
- if (unlikely(!alloc_ctx->slab)) {
- return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr));
- }
+ prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+ /*
+ * No need to set other fields in prof_info; they will never be
+ * accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
+ */
}
- return (prof_tctx_t *)(uintptr_t)1U;
}
JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
- alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
+ emap_alloc_ctx_t *alloc_ctx) {
cassert(config_prof);
assert(ptr != NULL);
/* Static check. */
if (alloc_ctx == NULL) {
- extent_t *extent = iealloc(tsdn, ptr);
- if (unlikely(!extent_slab_get(extent))) {
- large_prof_tctx_set(tsdn, extent, tctx);
+ edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
+ &arena_emap_global, ptr);
+ if (unlikely(!edata_slab_get(edata))) {
+ large_prof_tctx_reset(edata);
}
} else {
if (unlikely(!alloc_ctx->slab)) {
- large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx);
+ edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
+ &arena_emap_global, ptr);
+ large_prof_tctx_reset(edata);
}
}
}
-static inline void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
cassert(config_prof);
assert(ptr != NULL);
- extent_t *extent = iealloc(tsdn, ptr);
- assert(!extent_slab_get(extent));
+ edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+ ptr);
+ assert(!edata_slab_get(edata));
- large_prof_tctx_reset(tsdn, extent);
-}
-
-JEMALLOC_ALWAYS_INLINE nstime_t
-arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
- alloc_ctx_t *alloc_ctx) {
- cassert(config_prof);
- assert(ptr != NULL);
-
- extent_t *extent = iealloc(tsdn, ptr);
- /*
- * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
- * sure we have a sampled allocation.
- */
- assert(!extent_slab_get(extent));
- return large_prof_alloc_time_get(extent);
+ large_prof_tctx_reset(edata);
}
JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
- nstime_t t) {
+arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
+ size_t size) {
cassert(config_prof);
- assert(ptr != NULL);
- extent_t *extent = iealloc(tsdn, ptr);
- assert(!extent_slab_get(extent));
- large_prof_alloc_time_set(extent, t);
+ assert(!edata_slab_get(edata));
+ large_prof_info_set(edata, tctx, size);
}
JEMALLOC_ALWAYS_INLINE void
arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
- tsd_t *tsd;
- ticker_t *decay_ticker;
-
if (unlikely(tsdn_null(tsdn))) {
return;
}
- tsd = tsdn_tsd(tsdn);
- decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
- if (unlikely(decay_ticker == NULL)) {
- return;
- }
- if (unlikely(ticker_ticks(decay_ticker, nticks))) {
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ /*
+ * We use the ticker_geom_t to avoid having per-arena state in the tsd.
+ * Instead of having a countdown-until-decay timer running for every
+ * arena in every thread, we flip a coin once per tick, whose
+ * probability of coming up heads is 1/nticks; this is effectively the
+ * operation of the ticker_geom_t. Each arena has the same chance of a
+ * coinflip coming up heads (1/ARENA_DECAY_NTICKS_PER_UPDATE), so we can
+ * use a single ticker for all of them.
+ */
+ ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
+ uint64_t *prng_state = tsd_prng_statep_get(tsd);
+ if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks))) {
arena_decay(tsdn, arena, false, false);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
- malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx);
- malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx);
-
arena_decay_ticks(tsdn, arena, 1);
}
-/* Purge a single extent to retained / unmapped directly. */
-JEMALLOC_ALWAYS_INLINE void
-arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t **r_extent_hooks,
- extent_t *extent) {
- size_t extent_size = extent_size_get(extent);
- extent_dalloc_wrapper(tsdn, arena,
- r_extent_hooks, extent);
- if (config_stats) {
- /* Update stats accordingly. */
- arena_stats_lock(tsdn, &arena->stats);
- arena_stats_add_u64(tsdn, &arena->stats,
- &arena->decay_dirty.stats->nmadvise, 1);
- arena_stats_add_u64(tsdn, &arena->stats,
- &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
- arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
- extent_size);
- arena_stats_unlock(tsdn, &arena->stats);
- }
-}
-
JEMALLOC_ALWAYS_INLINE void *
arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
tcache_t *tcache, bool slow_path) {
@@ -178,21 +164,19 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
JEMALLOC_ALWAYS_INLINE arena_t *
arena_aalloc(tsdn_t *tsdn, const void *ptr) {
- return extent_arena_get(iealloc(tsdn, ptr));
+ edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+ unsigned arena_ind = edata_arena_ind_get(edata);
+ return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
}
JEMALLOC_ALWAYS_INLINE size_t
arena_salloc(tsdn_t *tsdn, const void *ptr) {
assert(ptr != NULL);
+ emap_alloc_ctx_t alloc_ctx;
+ emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
+ assert(alloc_ctx.szind != SC_NSIZES);
- rtree_ctx_t rtree_ctx_fallback;
- rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
- szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
- (uintptr_t)ptr, true);
- assert(szind != SC_NSIZES);
-
- return sz_index2size(szind);
+ return sz_index2size(alloc_ctx.szind);
}
JEMALLOC_ALWAYS_INLINE size_t
@@ -206,26 +190,53 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
* failure.
*/
- rtree_ctx_t rtree_ctx_fallback;
- rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
- extent_t *extent;
- szind_t szind;
- if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
- (uintptr_t)ptr, false, &extent, &szind)) {
+ emap_full_alloc_ctx_t full_alloc_ctx;
+ bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
+ ptr, &full_alloc_ctx);
+ if (missing) {
return 0;
}
- if (extent == NULL) {
+ if (full_alloc_ctx.edata == NULL) {
return 0;
}
- assert(extent_state_get(extent) == extent_state_active);
+ assert(edata_state_get(full_alloc_ctx.edata) == extent_state_active);
/* Only slab members should be looked up via interior pointers. */
- assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
+ assert(edata_addr_get(full_alloc_ctx.edata) == ptr
+ || edata_slab_get(full_alloc_ctx.edata));
+
+ assert(full_alloc_ctx.szind != SC_NSIZES);
+
+ return sz_index2size(full_alloc_ctx.szind);
+}
- assert(szind != SC_NSIZES);
+JEMALLOC_ALWAYS_INLINE bool
+large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
+ if (!config_opt_safety_checks) {
+ return false;
+ }
+
+ /*
+ * Eagerly detect double free and sized dealloc bugs for large sizes.
+ * The cost is low enough (as edata will be accessed anyway) to be
+ * enabled all the time.
+ */
+ if (unlikely(edata == NULL ||
+ edata_state_get(edata) != extent_state_active)) {
+ safety_check_fail("Invalid deallocation detected: "
+ "pages being freed (%p) not currently active, "
+ "possibly caused by double free bugs.",
+ (uintptr_t)edata_addr_get(edata));
+ return true;
+ }
+ size_t input_size = sz_index2size(szind);
+ if (unlikely(input_size != edata_usize_get(edata))) {
+ safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
+ /* true_size */ edata_usize_get(edata), input_size);
+ return true;
+ }
- return sz_index2size(szind);
+ return false;
}
static inline void
@@ -233,8 +244,13 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
if (config_prof && unlikely(szind < SC_NBINS)) {
arena_dalloc_promoted(tsdn, ptr, NULL, true);
} else {
- extent_t *extent = iealloc(tsdn, ptr);
- large_dalloc(tsdn, extent);
+ edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+ ptr);
+ if (large_dalloc_safety_checks(edata, ptr, szind)) {
+ /* See the comment in isfree. */
+ return;
+ }
+ large_dalloc(tsdn, edata);
}
}
@@ -242,27 +258,22 @@ static inline void
arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
assert(ptr != NULL);
- rtree_ctx_t rtree_ctx_fallback;
- rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
- szind_t szind;
- bool slab;
- rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
- true, &szind, &slab);
+ emap_alloc_ctx_t alloc_ctx;
+ emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
if (config_debug) {
- extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
- rtree_ctx, (uintptr_t)ptr, true);
- assert(szind == extent_szind_get(extent));
- assert(szind < SC_NSIZES);
- assert(slab == extent_slab_get(extent));
+ edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+ ptr);
+ assert(alloc_ctx.szind == edata_szind_get(edata));
+ assert(alloc_ctx.szind < SC_NSIZES);
+ assert(alloc_ctx.slab == edata_slab_get(edata));
}
- if (likely(slab)) {
+ if (likely(alloc_ctx.slab)) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
- arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+ arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
}
}
@@ -277,14 +288,19 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
slow_path);
}
} else {
- extent_t *extent = iealloc(tsdn, ptr);
- large_dalloc(tsdn, extent);
+ edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+ ptr);
+ if (large_dalloc_safety_checks(edata, ptr, szind)) {
+ /* See the comment in isfree. */
+ return;
+ }
+ large_dalloc(tsdn, edata);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
- alloc_ctx_t *alloc_ctx, bool slow_path) {
+ emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
@@ -293,34 +309,30 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
return;
}
- szind_t szind;
- bool slab;
- rtree_ctx_t *rtree_ctx;
- if (alloc_ctx != NULL) {
- szind = alloc_ctx->szind;
- slab = alloc_ctx->slab;
- assert(szind != SC_NSIZES);
+ emap_alloc_ctx_t alloc_ctx;
+ if (caller_alloc_ctx != NULL) {
+ alloc_ctx = *caller_alloc_ctx;
} else {
- rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
- rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
- (uintptr_t)ptr, true, &szind, &slab);
+ util_assume(!tsdn_null(tsdn));
+ emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
+ &alloc_ctx);
}
if (config_debug) {
- rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
- extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
- rtree_ctx, (uintptr_t)ptr, true);
- assert(szind == extent_szind_get(extent));
- assert(szind < SC_NSIZES);
- assert(slab == extent_slab_get(extent));
+ edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+ ptr);
+ assert(alloc_ctx.szind == edata_szind_get(edata));
+ assert(alloc_ctx.szind < SC_NSIZES);
+ assert(alloc_ctx.slab == edata_slab_get(edata));
}
- if (likely(slab)) {
+ if (likely(alloc_ctx.slab)) {
/* Small allocation. */
- tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
- slow_path);
+ tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+ alloc_ctx.szind, slow_path);
} else {
- arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
+ arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
+ slow_path);
}
}
@@ -329,47 +341,43 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
assert(ptr != NULL);
assert(size <= SC_LARGE_MAXCLASS);
- szind_t szind;
- bool slab;
+ emap_alloc_ctx_t alloc_ctx;
if (!config_prof || !opt_prof) {
/*
* There is no risk of being confused by a promoted sampled
* object, so base szind and slab on the given size.
*/
- szind = sz_size2index(size);
- slab = (szind < SC_NBINS);
+ alloc_ctx.szind = sz_size2index(size);
+ alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
}
if ((config_prof && opt_prof) || config_debug) {
- rtree_ctx_t rtree_ctx_fallback;
- rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
- &rtree_ctx_fallback);
-
- rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
- (uintptr_t)ptr, true, &szind, &slab);
+ emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
+ &alloc_ctx);
- assert(szind == sz_size2index(size));
- assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
+ assert(alloc_ctx.szind == sz_size2index(size));
+ assert((config_prof && opt_prof)
+ || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
if (config_debug) {
- extent_t *extent = rtree_extent_read(tsdn,
- &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
- assert(szind == extent_szind_get(extent));
- assert(slab == extent_slab_get(extent));
+ edata_t *edata = emap_edata_lookup(tsdn,
+ &arena_emap_global, ptr);
+ assert(alloc_ctx.szind == edata_szind_get(edata));
+ assert(alloc_ctx.slab == edata_slab_get(edata));
}
}
- if (likely(slab)) {
+ if (likely(alloc_ctx.slab)) {
/* Small allocation. */
arena_dalloc_small(tsdn, ptr);
} else {
- arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+ arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
}
}
JEMALLOC_ALWAYS_INLINE void
arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
- alloc_ctx_t *alloc_ctx, bool slow_path) {
+ emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
assert(!tsdn_null(tsdn) || tcache == NULL);
assert(ptr != NULL);
assert(size <= SC_LARGE_MAXCLASS);
@@ -379,49 +387,164 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
return;
}
- szind_t szind;
- bool slab;
- alloc_ctx_t local_ctx;
+ emap_alloc_ctx_t alloc_ctx;
if (config_prof && opt_prof) {
- if (alloc_ctx == NULL) {
+ if (caller_alloc_ctx == NULL) {
/* Uncommon case and should be a static check. */
- rtree_ctx_t rtree_ctx_fallback;
- rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
- &rtree_ctx_fallback);
- rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
- (uintptr_t)ptr, true, &local_ctx.szind,
- &local_ctx.slab);
- assert(local_ctx.szind == sz_size2index(size));
- alloc_ctx = &local_ctx;
+ emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
+ &alloc_ctx);
+ assert(alloc_ctx.szind == sz_size2index(size));
+ } else {
+ alloc_ctx = *caller_alloc_ctx;
}
- slab = alloc_ctx->slab;
- szind = alloc_ctx->szind;
} else {
/*
* There is no risk of being confused by a promoted sampled
* object, so base szind and slab on the given size.
*/
- szind = sz_size2index(size);
- slab = (szind < SC_NBINS);
+ alloc_ctx.szind = sz_size2index(size);
+ alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
}
if (config_debug) {
- rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
- rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
- (uintptr_t)ptr, true, &szind, &slab);
- extent_t *extent = rtree_extent_read(tsdn,
- &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
- assert(szind == extent_szind_get(extent));
- assert(slab == extent_slab_get(extent));
+ edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+ ptr);
+ assert(alloc_ctx.szind == edata_szind_get(edata));
+ assert(alloc_ctx.slab == edata_slab_get(edata));
}
- if (likely(slab)) {
+ if (likely(alloc_ctx.slab)) {
/* Small allocation. */
- tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
- slow_path);
+ tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+ alloc_ctx.szind, slow_path);
} else {
- arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
+ arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
+ slow_path);
+ }
+}
+
+static inline void
+arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
+ size_t alignment) {
+ assert(edata_base_get(edata) == edata_addr_get(edata));
+
+ if (alignment < PAGE) {
+ unsigned lg_range = LG_PAGE -
+ lg_floor(CACHELINE_CEILING(alignment));
+ size_t r;
+ if (!tsdn_null(tsdn)) {
+ tsd_t *tsd = tsdn_tsd(tsdn);
+ r = (size_t)prng_lg_range_u64(
+ tsd_prng_statep_get(tsd), lg_range);
+ } else {
+ uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+ r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
+ }
+ uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+ lg_range);
+ edata->e_addr = (void *)((uintptr_t)edata->e_addr +
+ random_offset);
+ assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
+ edata->e_addr);
+ }
+}
+
+/*
+ * The dalloc bin info contains just the information that the common paths need
+ * during tcache flushes. By force-inlining these paths, and using local copies
+ * of data (so that the compiler knows it's constant), we avoid a whole bunch of
+ * redundant loads and stores by leaving this information in registers.
+ */
+typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
+struct arena_dalloc_bin_locked_info_s {
+ div_info_t div_info;
+ uint32_t nregs;
+ uint64_t ndalloc;
+};
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
+ edata_t *slab, const void *ptr) {
+ size_t diff, regind;
+
+ /* Freeing a pointer outside the slab can cause assertion failure. */
+ assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+ assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+ /* Freeing an interior pointer can cause assertion failure. */
+ assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
+ (uintptr_t)bin_infos[binind].reg_size == 0);
+
+ diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+ /* Avoid doing division with a variable divisor. */
+ regind = div_compute(&info->div_info, diff);
+
+ assert(regind < bin_infos[binind].nregs);
+
+ return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
+ szind_t binind) {
+ info->div_info = arena_binind_div_info[binind];
+ info->nregs = bin_infos[binind].nregs;
+ info->ndalloc = 0;
+}
+
+/*
+ * Does the deallocation work associated with freeing a single pointer (a
+ * "step") in between a arena_dalloc_bin_locked begin and end call.
+ *
+ * Returns true if arena_slab_dalloc must be called on slab. Doesn't do
+ * stats updates, which happen during finish (this lets running counts get left
+ * in a register).
+ */
+JEMALLOC_ALWAYS_INLINE bool
+arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+ arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
+ void *ptr) {
+ const bin_info_t *bin_info = &bin_infos[binind];
+ size_t regind = arena_slab_regind(info, binind, slab, ptr);
+ slab_data_t *slab_data = edata_slab_data_get(slab);
+
+ assert(edata_nfree_get(slab) < bin_info->nregs);
+ /* Freeing an unallocated pointer can cause assertion failure. */
+ assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
+
+ bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+ edata_nfree_inc(slab);
+
+ if (config_stats) {
+ info->ndalloc++;
+ }
+
+ unsigned nfree = edata_nfree_get(slab);
+ if (nfree == bin_info->nregs) {
+ arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
+ bin);
+ return true;
+ } else if (nfree == 1 && slab != bin->slabcur) {
+ arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
+ bin);
}
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+ arena_dalloc_bin_locked_info_t *info) {
+ if (config_stats) {
+ bin->stats.ndalloc += info->ndalloc;
+ assert(bin->stats.curregs >= (size_t)info->ndalloc);
+ bin->stats.curregs -= (size_t)info->ndalloc;
+ }
+}
+
+static inline bin_t *
+arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
+ bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
+ return shard0 + binshard;
}
#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 23949ed..15f1d34 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -2,77 +2,41 @@
#define JEMALLOC_INTERNAL_ARENA_STATS_H
#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/lockedint.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/pa.h"
#include "jemalloc/internal/sc.h"
JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
-/*
- * In those architectures that support 64-bit atomics, we use atomic updates for
- * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize
- * externally.
- */
-#ifdef JEMALLOC_ATOMIC_U64
-typedef atomic_u64_t arena_stats_u64_t;
-#else
-/* Must hold the arena stats mutex while reading atomically. */
-typedef uint64_t arena_stats_u64_t;
-#endif
-
typedef struct arena_stats_large_s arena_stats_large_t;
struct arena_stats_large_s {
/*
* Total number of allocation/deallocation requests served directly by
* the arena.
*/
- arena_stats_u64_t nmalloc;
- arena_stats_u64_t ndalloc;
+ locked_u64_t nmalloc;
+ locked_u64_t ndalloc;
/*
* Number of allocation requests that correspond to this size class.
* This includes requests served by tcache, though tcache only
* periodically merges into this counter.
*/
- arena_stats_u64_t nrequests; /* Partially derived. */
+ locked_u64_t nrequests; /* Partially derived. */
/*
* Number of tcache fills / flushes for large (similarly, periodically
* merged). Note that there is no large tcache batch-fill currently
* (i.e. only fill 1 at a time); however flush may be batched.
*/
- arena_stats_u64_t nfills; /* Partially derived. */
- arena_stats_u64_t nflushes; /* Partially derived. */
+ locked_u64_t nfills; /* Partially derived. */
+ locked_u64_t nflushes; /* Partially derived. */
/* Current number of allocations of this size class. */
size_t curlextents; /* Derived. */
};
-typedef struct arena_stats_decay_s arena_stats_decay_t;
-struct arena_stats_decay_s {
- /* Total number of purge sweeps. */
- arena_stats_u64_t npurge;
- /* Total number of madvise calls made. */
- arena_stats_u64_t nmadvise;
- /* Total number of pages purged. */
- arena_stats_u64_t purged;
-};
-
-typedef struct arena_stats_extents_s arena_stats_extents_t;
-struct arena_stats_extents_s {
- /*
- * Stats for a given index in the range [0, SC_NPSIZES] in an extents_t.
- * We track both bytes and # of extents: two extents in the same bucket
- * may have different sizes if adjacent size classes differ by more than
- * a page, so bytes cannot always be derived from # of extents.
- */
- atomic_zu_t ndirty;
- atomic_zu_t dirty_bytes;
- atomic_zu_t nmuzzy;
- atomic_zu_t muzzy_bytes;
- atomic_zu_t nretained;
- atomic_zu_t retained_bytes;
-};
-
/*
* Arena stats. Note that fields marked "derived" are not directly maintained
* within the arena code; rather their values are derived during stats merge
@@ -80,43 +44,36 @@ struct arena_stats_extents_s {
*/
typedef struct arena_stats_s arena_stats_t;
struct arena_stats_s {
-#ifndef JEMALLOC_ATOMIC_U64
- malloc_mutex_t mtx;
-#endif
-
- /* Number of bytes currently mapped, excluding retained memory. */
- atomic_zu_t mapped; /* Partially derived. */
+ LOCKEDINT_MTX_DECLARE(mtx)
/*
- * Number of unused virtual memory bytes currently retained. Retained
- * bytes are technically mapped (though always decommitted or purged),
- * but they are excluded from the mapped statistic (above).
+ * resident includes the base stats -- that's why it lives here and not
+ * in pa_shard_stats_t.
*/
- atomic_zu_t retained; /* Derived. */
-
- /* Number of extent_t structs allocated by base, but not being used. */
- atomic_zu_t extent_avail;
-
- arena_stats_decay_t decay_dirty;
- arena_stats_decay_t decay_muzzy;
+ size_t base; /* Derived. */
+ size_t resident; /* Derived. */
+ size_t metadata_thp; /* Derived. */
+ size_t mapped; /* Derived. */
- atomic_zu_t base; /* Derived. */
atomic_zu_t internal;
- atomic_zu_t resident; /* Derived. */
- atomic_zu_t metadata_thp;
- atomic_zu_t allocated_large; /* Derived. */
- arena_stats_u64_t nmalloc_large; /* Derived. */
- arena_stats_u64_t ndalloc_large; /* Derived. */
- arena_stats_u64_t nfills_large; /* Derived. */
- arena_stats_u64_t nflushes_large; /* Derived. */
- arena_stats_u64_t nrequests_large; /* Derived. */
+ size_t allocated_large; /* Derived. */
+ uint64_t nmalloc_large; /* Derived. */
+ uint64_t ndalloc_large; /* Derived. */
+ uint64_t nfills_large; /* Derived. */
+ uint64_t nflushes_large; /* Derived. */
+ uint64_t nrequests_large; /* Derived. */
- /* VM space had to be leaked (undocumented). Normally 0. */
- atomic_zu_t abandoned_vm;
+ /*
+ * The stats logically owned by the pa_shard in the same arena. This
+ * lives here only because it's convenient for the purposes of the ctl
+ * module -- it only knows about the single arena_stats.
+ */
+ pa_shard_stats_t pa_shard_stats;
/* Number of bytes cached in tcache associated with this arena. */
- atomic_zu_t tcache_bytes; /* Derived. */
+ size_t tcache_bytes; /* Derived. */
+ size_t tcache_stashed_bytes; /* Derived. */
mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
@@ -134,138 +91,24 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
assert(((char *)arena_stats)[i] == 0);
}
}
-#ifndef JEMALLOC_ATOMIC_U64
- if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+ if (LOCKEDINT_MTX_INIT(arena_stats->mtx, "arena_stats",
WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
return true;
}
-#endif
/* Memory is zeroed, so there is no need to clear stats. */
return false;
}
static inline void
-arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
- malloc_mutex_lock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline void
-arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
- malloc_mutex_unlock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline uint64_t
-arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
- arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
- return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
- malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
- return *p;
-#endif
-}
-
-static inline void
-arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
- arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
- atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
-#else
- malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
- *p += x;
-#endif
-}
-
-static inline void
-arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
- arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
- uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
- assert(r - x <= r);
-#else
- malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
- *p -= x;
- assert(*p + x >= *p);
-#endif
-}
-
-/*
- * Non-atomically sets *dst += src. *dst needs external synchronization.
- * This lets us avoid the cost of a fetch_add when its unnecessary (note that
- * the types here are atomic).
- */
-static inline void
-arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
-#ifdef JEMALLOC_ATOMIC_U64
- uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
- atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
-#else
- *dst += src;
-#endif
-}
-
-static inline size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
- atomic_zu_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
- return atomic_load_zu(p, ATOMIC_RELAXED);
-#else
- malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
- return atomic_load_zu(p, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
- atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
- atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
-#else
- malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
- size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
- atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
- atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
- size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
- assert(r - x <= r);
-#else
- malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
- size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
- atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
-#endif
-}
-
-/* Like the _u64 variant, needs an externally synchronized *dst. */
-static inline void
-arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
- size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
- atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
-}
-
-static inline void
arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
szind_t szind, uint64_t nrequests) {
- arena_stats_lock(tsdn, arena_stats);
+ LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
- arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
- arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
- arena_stats_unlock(tsdn, arena_stats);
-}
-
-static inline void
-arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
- arena_stats_lock(tsdn, arena_stats);
- arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
- arena_stats_unlock(tsdn, arena_stats);
+ locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+ &lstats->nrequests, nrequests);
+ locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+ &lstats->nflushes, 1);
+ LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
}
#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
new file mode 100644
index 0000000..e2a5a40
--- /dev/null
+++ b/include/jemalloc/internal/arena_structs.h
@@ -0,0 +1,101 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_H
+
+#include "jemalloc/internal/arena_stats.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/counter.h"
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/pa.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/ticker.h"
+
+struct arena_s {
+ /*
+ * Number of threads currently assigned to this arena. Each thread has
+ * two distinct assignments, one for application-serving allocation, and
+ * the other for internal metadata allocation. Internal metadata must
+ * not be allocated from arenas explicitly created via the arenas.create
+ * mallctl, because the arena.<i>.reset mallctl indiscriminately
+ * discards all allocations for the affected arena.
+ *
+ * 0: Application allocation.
+ * 1: Internal metadata allocation.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_u_t nthreads[2];
+
+ /* Next bin shard for binding new threads. Synchronization: atomic. */
+ atomic_u_t binshard_next;
+
+ /*
+ * When percpu_arena is enabled, to amortize the cost of reading /
+ * updating the current CPU id, track the most recent thread accessing
+ * this arena, and only read CPU if there is a mismatch.
+ */
+ tsdn_t *last_thd;
+
+ /* Synchronization: internal. */
+ arena_stats_t stats;
+
+ /*
+ * Lists of tcaches and cache_bin_array_descriptors for extant threads
+ * associated with this arena. Stats from these are merged
+ * incrementally, and at exit if opt_stats_print is enabled.
+ *
+ * Synchronization: tcache_ql_mtx.
+ */
+ ql_head(tcache_slow_t) tcache_ql;
+ ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
+ malloc_mutex_t tcache_ql_mtx;
+
+ /*
+ * Represents a dss_prec_t, but atomically.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_u_t dss_prec;
+
+ /*
+ * Extant large allocations.
+ *
+ * Synchronization: large_mtx.
+ */
+ edata_list_active_t large;
+ /* Synchronizes all large allocation/update/deallocation. */
+ malloc_mutex_t large_mtx;
+
+ /* The page-level allocator shard this arena uses. */
+ pa_shard_t pa_shard;
+
+ /*
+ * A cached copy of base->ind. This can get accessed on hot paths;
+ * looking it up in base requires an extra pointer hop / cache miss.
+ */
+ unsigned ind;
+
+ /*
+ * Base allocator, from which arena metadata are allocated.
+ *
+ * Synchronization: internal.
+ */
+ base_t *base;
+ /* Used to determine uptime. Read-only after initialization. */
+ nstime_t create_time;
+
+ /*
+ * The arena is allocated alongside its bins; really this is a
+ * dynamically sized array determined by the binshard settings.
+ */
+ bin_t bins[0];
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h
deleted file mode 100644
index 46aa77c..0000000
--- a/include/jemalloc/internal/arena_structs_a.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
-#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
-
-#include "jemalloc/internal/bitmap.h"
-
-struct arena_slab_data_s {
- /* Per region allocated/deallocated bitmap. */
- bitmap_t bitmap[BITMAP_GROUPS_MAX];
-};
-
-#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
deleted file mode 100644
index eeab57f..0000000
--- a/include/jemalloc/internal/arena_structs_b.h
+++ /dev/null
@@ -1,232 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
-#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
-
-#include "jemalloc/internal/arena_stats.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bin.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/extent_dss.h"
-#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/ticker.h"
-
-struct arena_decay_s {
- /* Synchronizes all non-atomic fields. */
- malloc_mutex_t mtx;
- /*
- * True if a thread is currently purging the extents associated with
- * this decay structure.
- */
- bool purging;
- /*
- * Approximate time in milliseconds from the creation of a set of unused
- * dirty pages until an equivalent set of unused dirty pages is purged
- * and/or reused.
- */
- atomic_zd_t time_ms;
- /* time / SMOOTHSTEP_NSTEPS. */
- nstime_t interval;
- /*
- * Time at which the current decay interval logically started. We do
- * not actually advance to a new epoch until sometime after it starts
- * because of scheduling and computation delays, and it is even possible
- * to completely skip epochs. In all cases, during epoch advancement we
- * merge all relevant activity into the most recently recorded epoch.
- */
- nstime_t epoch;
- /* Deadline randomness generator. */
- uint64_t jitter_state;
- /*
- * Deadline for current epoch. This is the sum of interval and per
- * epoch jitter which is a uniform random variable in [0..interval).
- * Epochs always advance by precise multiples of interval, but we
- * randomize the deadline to reduce the likelihood of arenas purging in
- * lockstep.
- */
- nstime_t deadline;
- /*
- * Number of unpurged pages at beginning of current epoch. During epoch
- * advancement we use the delta between arena->decay_*.nunpurged and
- * extents_npages_get(&arena->extents_*) to determine how many dirty
- * pages, if any, were generated.
- */
- size_t nunpurged;
- /*
- * Trailing log of how many unused dirty pages were generated during
- * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
- * element is the most recent epoch. Corresponding epoch times are
- * relative to epoch.
- */
- size_t backlog[SMOOTHSTEP_NSTEPS];
-
- /*
- * Pointer to associated stats. These stats are embedded directly in
- * the arena's stats due to how stats structures are shared between the
- * arena and ctl code.
- *
- * Synchronization: Same as associated arena's stats field. */
- arena_stats_decay_t *stats;
- /* Peak number of pages in associated extents. Used for debug only. */
- uint64_t ceil_npages;
-};
-
-struct arena_s {
- /*
- * Number of threads currently assigned to this arena. Each thread has
- * two distinct assignments, one for application-serving allocation, and
- * the other for internal metadata allocation. Internal metadata must
- * not be allocated from arenas explicitly created via the arenas.create
- * mallctl, because the arena.<i>.reset mallctl indiscriminately
- * discards all allocations for the affected arena.
- *
- * 0: Application allocation.
- * 1: Internal metadata allocation.
- *
- * Synchronization: atomic.
- */
- atomic_u_t nthreads[2];
-
- /* Next bin shard for binding new threads. Synchronization: atomic. */
- atomic_u_t binshard_next;
-
- /*
- * When percpu_arena is enabled, to amortize the cost of reading /
- * updating the current CPU id, track the most recent thread accessing
- * this arena, and only read CPU if there is a mismatch.
- */
- tsdn_t *last_thd;
-
- /* Synchronization: internal. */
- arena_stats_t stats;
-
- /*
- * Lists of tcaches and cache_bin_array_descriptors for extant threads
- * associated with this arena. Stats from these are merged
- * incrementally, and at exit if opt_stats_print is enabled.
- *
- * Synchronization: tcache_ql_mtx.
- */
- ql_head(tcache_t) tcache_ql;
- ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
- malloc_mutex_t tcache_ql_mtx;
-
- /* Synchronization: internal. */
- prof_accum_t prof_accum;
-
- /*
- * PRNG state for cache index randomization of large allocation base
- * pointers.
- *
- * Synchronization: atomic.
- */
- atomic_zu_t offset_state;
-
- /*
- * Extent serial number generator state.
- *
- * Synchronization: atomic.
- */
- atomic_zu_t extent_sn_next;
-
- /*
- * Represents a dss_prec_t, but atomically.
- *
- * Synchronization: atomic.
- */
- atomic_u_t dss_prec;
-
- /*
- * Number of pages in active extents.
- *
- * Synchronization: atomic.
- */
- atomic_zu_t nactive;
-
- /*
- * Extant large allocations.
- *
- * Synchronization: large_mtx.
- */
- extent_list_t large;
- /* Synchronizes all large allocation/update/deallocation. */
- malloc_mutex_t large_mtx;
-
- /*
- * Collections of extents that were previously allocated. These are
- * used when allocating extents, in an attempt to re-use address space.
- *
- * Synchronization: internal.
- */
- extents_t extents_dirty;
- extents_t extents_muzzy;
- extents_t extents_retained;
-
- /*
- * Decay-based purging state, responsible for scheduling extent state
- * transitions.
- *
- * Synchronization: internal.
- */
- arena_decay_t decay_dirty; /* dirty --> muzzy */
- arena_decay_t decay_muzzy; /* muzzy --> retained */
-
- /*
- * Next extent size class in a growing series to use when satisfying a
- * request via the extent hooks (only if opt_retain). This limits the
- * number of disjoint virtual memory ranges so that extent merging can
- * be effective even if multiple arenas' extent allocation requests are
- * highly interleaved.
- *
- * retain_grow_limit is the max allowed size ind to expand (unless the
- * required size is greater). Default is no limit, and controlled
- * through mallctl only.
- *
- * Synchronization: extent_grow_mtx
- */
- pszind_t extent_grow_next;
- pszind_t retain_grow_limit;
- malloc_mutex_t extent_grow_mtx;
-
- /*
- * Available extent structures that were allocated via
- * base_alloc_extent().
- *
- * Synchronization: extent_avail_mtx.
- */
- extent_tree_t extent_avail;
- atomic_zu_t extent_avail_cnt;
- malloc_mutex_t extent_avail_mtx;
-
- /*
- * bins is used to store heaps of free regions.
- *
- * Synchronization: internal.
- */
- bins_t bins[SC_NBINS];
-
- /*
- * Base allocator, from which arena metadata are allocated.
- *
- * Synchronization: internal.
- */
- base_t *base;
- /* Used to determine uptime. Read-only after initialization. */
- nstime_t create_time;
-};
-
-/* Used in conjunction with tsd for fast arena-related context lookup. */
-struct arena_tdata_s {
- ticker_t decay_ticker;
-};
-
-/* Used to pass rtree lookup context down the path. */
-struct alloc_ctx_s {
- szind_t szind;
- bool slab;
-};
-
-#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 624937e..d0e1291 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -3,21 +3,14 @@
#include "jemalloc/internal/sc.h"
-/* Maximum number of regions in one slab. */
-#define LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
-#define SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
-
/* Default decay times in milliseconds. */
#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000)
#define MUZZY_DECAY_MS_DEFAULT (0)
/* Number of event ticks between time checks. */
-#define DECAY_NTICKS_PER_UPDATE 1000
+#define ARENA_DECAY_NTICKS_PER_UPDATE 1000
-typedef struct arena_slab_data_s arena_slab_data_t;
typedef struct arena_decay_s arena_decay_t;
typedef struct arena_s arena_t;
-typedef struct arena_tdata_s arena_tdata_t;
-typedef struct alloc_ctx_s alloc_ctx_t;
typedef enum {
percpu_arena_mode_names_base = 0, /* Used for options processing. */
@@ -48,4 +41,18 @@ typedef enum {
*/
#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20)
+struct arena_config_s {
+ /* extent hooks to be used for the arena */
+ extent_hooks_t *extent_hooks;
+
+ /*
+ * Use extent hooks for metadata (base) allocations when true.
+ */
+ bool metadata_use_hooks;
+};
+
+typedef struct arena_config_s arena_config_t;
+
+extern const arena_config_t arena_config_default;
+
#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index a76f54c..c0f7312 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -52,6 +52,27 @@
#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
/*
+ * Another convenience -- simple atomic helper functions.
+ */
+#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type, \
+ lg_size) \
+ JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size) \
+ ATOMIC_INLINE void \
+ atomic_load_add_store_##short_type(atomic_##short_type##_t *a, \
+ type inc) { \
+ type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED); \
+ type newval = oldval + inc; \
+ atomic_store_##short_type(a, newval, ATOMIC_RELAXED); \
+ } \
+ ATOMIC_INLINE void \
+ atomic_load_sub_store_##short_type(atomic_##short_type##_t *a, \
+ type inc) { \
+ type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED); \
+ type newval = oldval - inc; \
+ atomic_store_##short_type(a, newval, ATOMIC_RELAXED); \
+ }
+
+/*
* Not all platforms have 64-bit atomics. If we do, this #define exposes that
* fact.
*/
@@ -67,18 +88,18 @@ JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
*/
JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
-JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
-JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
-JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
-JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint8_t, u8, 0)
-JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint32_t, u32, 2)
#ifdef JEMALLOC_ATOMIC_U64
-JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint64_t, u64, 3)
#endif
#undef ATOMIC_INLINE
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 0f997e1..6ae3c8d 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -12,8 +12,9 @@ extern background_thread_info_t *background_thread_info;
bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
bool background_threads_enable(tsd_t *tsd);
bool background_threads_disable(tsd_t *tsd);
-void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
- arena_decay_t *decay, size_t npages_new);
+bool background_thread_is_started(background_thread_info_t* info);
+void background_thread_wakeup_early(background_thread_info_t *info,
+ nstime_t *remaining_sleep);
void background_thread_prefork0(tsdn_t *tsdn);
void background_thread_prefork1(tsdn_t *tsdn);
void background_thread_postfork_parent(tsdn_t *tsdn);
@@ -27,6 +28,6 @@ extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
void *(*)(void *), void *__restrict);
#endif
bool background_thread_boot0(void);
-bool background_thread_boot1(tsdn_t *tsdn);
+bool background_thread_boot1(tsdn_t *tsdn, base_t *base);
#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index f85e86f..92c5feb 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -45,18 +45,4 @@ background_thread_indefinite_sleep(background_thread_info_t *info) {
return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE);
}
-JEMALLOC_ALWAYS_INLINE void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
- bool is_background_thread) {
- if (!background_thread_enabled() || is_background_thread) {
- return;
- }
- background_thread_info_t *info =
- arena_background_thread_info_get(arena);
- if (background_thread_indefinite_sleep(info)) {
- background_thread_interval_check(tsdn, arena,
- &arena->decay_dirty, 0);
- }
-}
-
#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index c02aa43..83a9198 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -11,6 +11,17 @@
#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
#define DEFAULT_NUM_BACKGROUND_THREAD 4
+/*
+ * These exist only as a transitional state. Eventually, deferral should be
+ * part of the PAI, and each implementation can indicate wait times with more
+ * specificity.
+ */
+#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2)
+#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
+
+#define BACKGROUND_THREAD_DEFERRED_MIN UINT64_C(0)
+#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_MAX
+
typedef enum {
background_thread_stopped,
background_thread_started,
@@ -48,6 +59,7 @@ struct background_thread_stats_s {
size_t num_threads;
uint64_t num_runs;
nstime_t run_interval;
+ mutex_prof_data_t max_counter_per_bg_thd;
};
typedef struct background_thread_stats_s background_thread_stats_t;
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
new file mode 100644
index 0000000..9b2c9fb
--- /dev/null
+++ b/include/jemalloc/internal/base.h
@@ -0,0 +1,110 @@
+#ifndef JEMALLOC_INTERNAL_BASE_H
+#define JEMALLOC_INTERNAL_BASE_H
+
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/mutex.h"
+
+enum metadata_thp_mode_e {
+ metadata_thp_disabled = 0,
+ /*
+ * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+ * + low usage arena (i.e. THP becomes a significant percentage), the
+ * "auto" option only starts using THP after a base allocator used up
+ * the first THP region. Starting from the second hugepage (in a single
+ * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+ * right away.
+ */
+ metadata_thp_auto = 1,
+ metadata_thp_always = 2,
+ metadata_thp_mode_limit = 3
+};
+typedef enum metadata_thp_mode_e metadata_thp_mode_t;
+
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
+
+
+/* Embedded at the beginning of every block of base-managed virtual memory. */
+typedef struct base_block_s base_block_t;
+struct base_block_s {
+ /* Total size of block's virtual memory mapping. */
+ size_t size;
+
+ /* Next block in list of base's blocks. */
+ base_block_t *next;
+
+ /* Tracks unused trailing space. */
+ edata_t edata;
+};
+
+typedef struct base_s base_t;
+struct base_s {
+ /*
+ * User-configurable extent hook functions.
+ */
+ ehooks_t ehooks;
+
+ /*
+ * User-configurable extent hook functions for metadata allocations.
+ */
+ ehooks_t ehooks_base;
+
+ /* Protects base_alloc() and base_stats_get() operations. */
+ malloc_mutex_t mtx;
+
+ /* Using THP when true (metadata_thp auto mode). */
+ bool auto_thp_switched;
+ /*
+ * Most recent size class in the series of increasingly large base
+ * extents. Logarithmic spacing between subsequent allocations ensures
+ * that the total number of distinct mappings remains small.
+ */
+ pszind_t pind_last;
+
+ /* Serial number generation state. */
+ size_t extent_sn_next;
+
+ /* Chain of all blocks associated with base. */
+ base_block_t *blocks;
+
+ /* Heap of extents that track unused trailing space within blocks. */
+ edata_heap_t avail[SC_NSIZES];
+
+ /* Stats, only maintained if config_stats. */
+ size_t allocated;
+ size_t resident;
+ size_t mapped;
+ /* Number of THP regions touched. */
+ size_t n_thp;
+};
+
+static inline unsigned
+base_ind_get(const base_t *base) {
+ return ehooks_ind_get(&base->ehooks);
+}
+
+static inline bool
+metadata_thp_enabled(void) {
+ return (opt_metadata_thp != metadata_thp_disabled);
+}
+
+base_t *b0get(void);
+base_t *base_new(tsdn_t *tsdn, unsigned ind,
+ const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
+void base_delete(tsdn_t *tsdn, base_t *base);
+ehooks_t *base_ehooks_get(base_t *base);
+ehooks_t *base_ehooks_get_for_metadata(base_t *base);
+extent_hooks_t *base_extent_hooks_set(base_t *base,
+ extent_hooks_t *extent_hooks);
+void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
+void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+ size_t *resident, size_t *mapped, size_t *n_thp);
+void base_prefork(tsdn_t *tsdn, base_t *base);
+void base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool base_boot(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BASE_H */
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
deleted file mode 100644
index 7b705c9..0000000
--- a/include/jemalloc/internal/base_externs.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
-#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
-
-extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *metadata_thp_mode_names[];
-
-base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void base_delete(tsdn_t *tsdn, base_t *base);
-extent_hooks_t *base_extent_hooks_get(base_t *base);
-extent_hooks_t *base_extent_hooks_set(base_t *base,
- extent_hooks_t *extent_hooks);
-void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
-extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
-void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
- size_t *resident, size_t *mapped, size_t *n_thp);
-void base_prefork(tsdn_t *tsdn, base_t *base);
-void base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool base_boot(tsdn_t *tsdn);
-
-#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
deleted file mode 100644
index aec0e2e..0000000
--- a/include/jemalloc/internal/base_inlines.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H
-#define JEMALLOC_INTERNAL_BASE_INLINES_H
-
-static inline unsigned
-base_ind_get(const base_t *base) {
- return base->ind;
-}
-
-static inline bool
-metadata_thp_enabled(void) {
- return (opt_metadata_thp != metadata_thp_disabled);
-}
-#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
deleted file mode 100644
index 07f214e..0000000
--- a/include/jemalloc/internal/base_structs.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
-#define JEMALLOC_INTERNAL_BASE_STRUCTS_H
-
-#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/sc.h"
-
-/* Embedded at the beginning of every block of base-managed virtual memory. */
-struct base_block_s {
- /* Total size of block's virtual memory mapping. */
- size_t size;
-
- /* Next block in list of base's blocks. */
- base_block_t *next;
-
- /* Tracks unused trailing space. */
- extent_t extent;
-};
-
-struct base_s {
- /* Associated arena's index within the arenas array. */
- unsigned ind;
-
- /*
- * User-configurable extent hook functions. Points to an
- * extent_hooks_t.
- */
- atomic_p_t extent_hooks;
-
- /* Protects base_alloc() and base_stats_get() operations. */
- malloc_mutex_t mtx;
-
- /* Using THP when true (metadata_thp auto mode). */
- bool auto_thp_switched;
- /*
- * Most recent size class in the series of increasingly large base
- * extents. Logarithmic spacing between subsequent allocations ensures
- * that the total number of distinct mappings remains small.
- */
- pszind_t pind_last;
-
- /* Serial number generation state. */
- size_t extent_sn_next;
-
- /* Chain of all blocks associated with base. */
- base_block_t *blocks;
-
- /* Heap of extents that track unused trailing space within blocks. */
- extent_heap_t avail[SC_NSIZES];
-
- /* Stats, only maintained if config_stats. */
- size_t allocated;
- size_t resident;
- size_t mapped;
- /* Number of THP regions touched. */
- size_t n_thp;
-};
-
-#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
deleted file mode 100644
index b6db77d..0000000
--- a/include/jemalloc/internal/base_types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H
-#define JEMALLOC_INTERNAL_BASE_TYPES_H
-
-typedef struct base_block_s base_block_t;
-typedef struct base_s base_t;
-
-#define METADATA_THP_DEFAULT metadata_thp_disabled
-
-/*
- * In auto mode, arenas switch to huge pages for the base allocator on the
- * second base block. a0 switches to thp on the 5th block (after 20 megabytes
- * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
- */
-
-#define BASE_AUTO_THP_THRESHOLD 2
-#define BASE_AUTO_THP_THRESHOLD_A0 5
-
-typedef enum {
- metadata_thp_disabled = 0,
- /*
- * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
- * + low usage arena (i.e. THP becomes a significant percentage), the
- * "auto" option only starts using THP after a base allocator used up
- * the first THP region. Starting from the second hugepage (in a single
- * arena), "auto" behaves the same as "always", i.e. madvise hugepage
- * right away.
- */
- metadata_thp_auto = 1,
- metadata_thp_always = 2,
- metadata_thp_mode_limit = 3
-} metadata_thp_mode_t;
-
-#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 8547e89..63f9739 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -3,8 +3,7 @@
#include "jemalloc/internal/bin_stats.h"
#include "jemalloc/internal/bin_types.h"
-#include "jemalloc/internal/extent_types.h"
-#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/edata.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/sc.h"
@@ -12,74 +11,34 @@
* A bin contains a set of extents that are currently being used for slab
* allocations.
*/
-
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- * /--------------------\
- * | region 0 |
- * |--------------------|
- * | region 1 |
- * |--------------------|
- * | ... |
- * | ... |
- * | ... |
- * |--------------------|
- * | region nregs-1 |
- * \--------------------/
- */
-typedef struct bin_info_s bin_info_t;
-struct bin_info_s {
- /* Size of regions in a slab for this bin's size class. */
- size_t reg_size;
-
- /* Total size of a slab for this bin's size class. */
- size_t slab_size;
-
- /* Total number of regions in a slab for this bin's size class. */
- uint32_t nregs;
-
- /* Number of sharded bins in each arena for this size class. */
- uint32_t n_shards;
-
- /*
- * Metadata used to manipulate bitmaps for slabs associated with this
- * bin.
- */
- bitmap_info_t bitmap_info;
-};
-
-extern bin_info_t bin_infos[SC_NBINS];
-
typedef struct bin_s bin_t;
struct bin_s {
/* All operations on bin_t fields require lock ownership. */
malloc_mutex_t lock;
/*
+ * Bin statistics. These get touched every time the lock is acquired,
+ * so put them close by in the hopes of getting some cache locality.
+ */
+ bin_stats_t stats;
+
+ /*
* Current slab being used to service allocations of this bin's size
* class. slabcur is independent of slabs_{nonfull,full}; whenever
* slabcur is reassigned, the previous slab must be deallocated or
* inserted into slabs_{nonfull,full}.
*/
- extent_t *slabcur;
+ edata_t *slabcur;
/*
* Heap of non-full slabs. This heap is used to assure that new
* allocations come from the non-full slab that is oldest/lowest in
* memory.
*/
- extent_heap_t slabs_nonfull;
+ edata_heap_t slabs_nonfull;
/* List used to track full slabs. */
- extent_list_t slabs_full;
-
- /* Bin statistics. */
- bin_stats_t stats;
+ edata_list_active_t slabs_full;
};
/* A set of sharded bins of the same size class. */
@@ -92,7 +51,6 @@ struct bins_s {
void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
size_t end_size, size_t nshards);
-void bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
/* Initializes a bin to empty. Returns true on error. */
bool bin_init(bin_t *bin);
@@ -104,19 +62,20 @@ void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
/* Stats. */
static inline void
-bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
+bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
malloc_mutex_lock(tsdn, &bin->lock);
malloc_mutex_prof_accum(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
- dst_bin_stats->nmalloc += bin->stats.nmalloc;
- dst_bin_stats->ndalloc += bin->stats.ndalloc;
- dst_bin_stats->nrequests += bin->stats.nrequests;
- dst_bin_stats->curregs += bin->stats.curregs;
- dst_bin_stats->nfills += bin->stats.nfills;
- dst_bin_stats->nflushes += bin->stats.nflushes;
- dst_bin_stats->nslabs += bin->stats.nslabs;
- dst_bin_stats->reslabs += bin->stats.reslabs;
- dst_bin_stats->curslabs += bin->stats.curslabs;
- dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs;
+ bin_stats_t *stats = &dst_bin_stats->stats_data;
+ stats->nmalloc += bin->stats.nmalloc;
+ stats->ndalloc += bin->stats.ndalloc;
+ stats->nrequests += bin->stats.nrequests;
+ stats->curregs += bin->stats.curregs;
+ stats->nfills += bin->stats.nfills;
+ stats->nflushes += bin->stats.nflushes;
+ stats->nslabs += bin->stats.nslabs;
+ stats->reslabs += bin->stats.reslabs;
+ stats->curslabs += bin->stats.curslabs;
+ stats->nonfull_slabs += bin->stats.nonfull_slabs;
malloc_mutex_unlock(tsdn, &bin->lock);
}
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
new file mode 100644
index 0000000..7fe65c8
--- /dev/null
+++ b/include/jemalloc/internal/bin_info.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_BIN_INFO_H
+#define JEMALLOC_INTERNAL_BIN_INFO_H
+
+#include "jemalloc/internal/bitmap.h"
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ * /--------------------\
+ * | region 0 |
+ * |--------------------|
+ * | region 1 |
+ * |--------------------|
+ * | ... |
+ * | ... |
+ * | ... |
+ * |--------------------|
+ * | region nregs-1 |
+ * \--------------------/
+ */
+typedef struct bin_info_s bin_info_t;
+struct bin_info_s {
+ /* Size of regions in a slab for this bin's size class. */
+ size_t reg_size;
+
+ /* Total size of a slab for this bin's size class. */
+ size_t slab_size;
+
+ /* Total number of regions in a slab for this bin's size class. */
+ uint32_t nregs;
+
+ /* Number of sharded bins in each arena for this size class. */
+ uint32_t n_shards;
+
+ /*
+ * Metadata used to manipulate bitmaps for slabs associated with this
+ * bin.
+ */
+ bitmap_info_t bitmap_info;
+};
+
+extern bin_info_t bin_infos[SC_NBINS];
+
+void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
+
+#endif /* JEMALLOC_INTERNAL_BIN_INFO_H */
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index d04519c..0b99297 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -47,8 +47,11 @@ struct bin_stats_s {
/* Current size of nonfull slabs heap in this bin. */
size_t nonfull_slabs;
+};
+typedef struct bin_stats_data_s bin_stats_data_t;
+struct bin_stats_data_s {
+ bin_stats_t stats_data;
mutex_prof_data_t mutex_data;
};
-
#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 3533606..945e832 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -3,7 +3,7 @@
#include "jemalloc/internal/sc.h"
-#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
+#define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH)
#define N_BIN_SHARDS_DEFAULT 1
/* Used in TSD static initializer only. Real init in arena_bind(). */
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index c045eb8..bac5914 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -3,144 +3,383 @@
#include "jemalloc/internal/assert.h"
-#define BIT_UTIL_INLINE static inline
-
/* Sanity check. */
#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
|| !defined(JEMALLOC_INTERNAL_FFS)
# error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
#endif
+/*
+ * Unlike the builtins and posix ffs functions, our ffs requires a non-zero
+ * input, and returns the position of the lowest bit set (as opposed to the
+ * posix versions, which return 1 larger than that position and use a return
+ * value of zero as a sentinel. This tends to simplify logic in callers, and
+ * allows for consistency with the builtins we build fls on top of.
+ */
+static inline unsigned
+ffs_llu(unsigned long long x) {
+ util_assume(x != 0);
+ return JEMALLOC_INTERNAL_FFSLL(x) - 1;
+}
-BIT_UTIL_INLINE unsigned
-ffs_llu(unsigned long long bitmap) {
- return JEMALLOC_INTERNAL_FFSLL(bitmap);
+static inline unsigned
+ffs_lu(unsigned long x) {
+ util_assume(x != 0);
+ return JEMALLOC_INTERNAL_FFSL(x) - 1;
}
-BIT_UTIL_INLINE unsigned
-ffs_lu(unsigned long bitmap) {
- return JEMALLOC_INTERNAL_FFSL(bitmap);
+static inline unsigned
+ffs_u(unsigned x) {
+ util_assume(x != 0);
+ return JEMALLOC_INTERNAL_FFS(x) - 1;
}
-BIT_UTIL_INLINE unsigned
-ffs_u(unsigned bitmap) {
- return JEMALLOC_INTERNAL_FFS(bitmap);
+#define DO_FLS_SLOW(x, suffix) do { \
+ util_assume(x != 0); \
+ x |= (x >> 1); \
+ x |= (x >> 2); \
+ x |= (x >> 4); \
+ x |= (x >> 8); \
+ x |= (x >> 16); \
+ if (sizeof(x) > 4) { \
+ /* \
+ * If sizeof(x) is 4, then the expression "x >> 32" \
+ * will generate compiler warnings even if the code \
+ * never executes. This circumvents the warning, and \
+ * gets compiled out in optimized builds. \
+ */ \
+ int constant_32 = sizeof(x) * 4; \
+ x |= (x >> constant_32); \
+ } \
+ x++; \
+ if (x == 0) { \
+ return 8 * sizeof(x) - 1; \
+ } \
+ return ffs_##suffix(x) - 1; \
+} while(0)
+
+static inline unsigned
+fls_llu_slow(unsigned long long x) {
+ DO_FLS_SLOW(x, llu);
}
-#ifdef JEMALLOC_INTERNAL_POPCOUNTL
-BIT_UTIL_INLINE unsigned
+static inline unsigned
+fls_lu_slow(unsigned long x) {
+ DO_FLS_SLOW(x, lu);
+}
+
+static inline unsigned
+fls_u_slow(unsigned x) {
+ DO_FLS_SLOW(x, u);
+}
+
+#undef DO_FLS_SLOW
+
+#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
+static inline unsigned
+fls_llu(unsigned long long x) {
+ util_assume(x != 0);
+ /*
+ * Note that the xor here is more naturally written as subtraction; the
+ * last bit set is the number of bits in the type minus the number of
+ * leading zero bits. But GCC implements that as:
+ * bsr edi, edi
+ * mov eax, 31
+ * xor edi, 31
+ * sub eax, edi
+ * If we write it as xor instead, then we get
+ * bsr eax, edi
+ * as desired.
+ */
+ return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+ util_assume(x != 0);
+ return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+ util_assume(x != 0);
+ return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
+}
+#elif defined(_MSC_VER)
+
+#if LG_SIZEOF_PTR == 3
+#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
+#else
+/*
+ * This never actually runs; we're just dodging a compiler error for the
+ * never-taken branch where sizeof(void *) == 8.
+ */
+#define DO_BSR64(bit, x) bit = 0; unreachable()
+#endif
+
+#define DO_FLS(x) do { \
+ if (x == 0) { \
+ return 8 * sizeof(x); \
+ } \
+ unsigned long bit; \
+ if (sizeof(x) == 4) { \
+ _BitScanReverse(&bit, (unsigned)x); \
+ return (unsigned)bit; \
+ } \
+ if (sizeof(x) == 8 && sizeof(void *) == 8) { \
+ DO_BSR64(bit, x); \
+ return (unsigned)bit; \
+ } \
+ if (sizeof(x) == 8 && sizeof(void *) == 4) { \
+ /* Dodge a compiler warning, as above. */ \
+ int constant_32 = sizeof(x) * 4; \
+ if (_BitScanReverse(&bit, \
+ (unsigned)(x >> constant_32))) { \
+ return 32 + (unsigned)bit; \
+ } else { \
+ _BitScanReverse(&bit, (unsigned)x); \
+ return (unsigned)bit; \
+ } \
+ } \
+ unreachable(); \
+} while (0)
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+ DO_FLS(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+ DO_FLS(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+ DO_FLS(x);
+}
+
+#undef DO_FLS
+#undef DO_BSR64
+#else
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+ return fls_llu_slow(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+ return fls_lu_slow(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+ return fls_u_slow(x);
+}
+#endif
+
+#if LG_SIZEOF_LONG_LONG > 3
+# error "Haven't implemented popcount for 16-byte ints."
+#endif
+
+#define DO_POPCOUNT(x, type) do { \
+ /* \
+ * Algorithm from an old AMD optimization reference manual. \
+ * We're putting a little bit more work than you might expect \
+ * into the no-instrinsic case, since we only support the \
+ * GCC intrinsics spelling of popcount (for now). Detecting \
+ * whether or not the popcount builtin is actually useable in \
+ * MSVC is nontrivial. \
+ */ \
+ \
+ type bmul = (type)0x0101010101010101ULL; \
+ \
+ /* \
+ * Replace each 2 bits with the sideways sum of the original \
+ * values. 0x5 = 0b0101. \
+ * \
+ * You might expect this to be: \
+ * x = (x & 0x55...) + ((x >> 1) & 0x55...). \
+ * That costs an extra mask relative to this, though. \
+ */ \
+ x = x - ((x >> 1) & (0x55U * bmul)); \
+ /* Replace each 4 bits with their sideays sum. 0x3 = 0b0011. */\
+ x = (x & (bmul * 0x33U)) + ((x >> 2) & (bmul * 0x33U)); \
+ /* \
+ * Replace each 8 bits with their sideways sum. Note that we \
+ * can't overflow within each 4-bit sum here, so we can skip \
+ * the initial mask. \
+ */ \
+ x = (x + (x >> 4)) & (bmul * 0x0FU); \
+ /* \
+ * None of the partial sums in this multiplication (viewed in \
+ * base-256) can overflow into the next digit. So the least \
+ * significant byte of the product will be the least \
+ * significant byte of the original value, the second least \
+ * significant byte will be the sum of the two least \
+ * significant bytes of the original value, and so on. \
+ * Importantly, the high byte will be the byte-wise sum of all \
+ * the bytes of the original value. \
+ */ \
+ x = x * bmul; \
+ x >>= ((sizeof(x) - 1) * 8); \
+ return (unsigned)x; \
+} while(0)
+
+static inline unsigned
+popcount_u_slow(unsigned bitmap) {
+ DO_POPCOUNT(bitmap, unsigned);
+}
+
+static inline unsigned
+popcount_lu_slow(unsigned long bitmap) {
+ DO_POPCOUNT(bitmap, unsigned long);
+}
+
+static inline unsigned
+popcount_llu_slow(unsigned long long bitmap) {
+ DO_POPCOUNT(bitmap, unsigned long long);
+}
+
+#undef DO_POPCOUNT
+
+static inline unsigned
+popcount_u(unsigned bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNT
+ return JEMALLOC_INTERNAL_POPCOUNT(bitmap);
+#else
+ return popcount_u_slow(bitmap);
+#endif
+}
+
+static inline unsigned
popcount_lu(unsigned long bitmap) {
- return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+ return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+#else
+ return popcount_lu_slow(bitmap);
+#endif
}
+
+static inline unsigned
+popcount_llu(unsigned long long bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNTLL
+ return JEMALLOC_INTERNAL_POPCOUNTLL(bitmap);
+#else
+ return popcount_llu_slow(bitmap);
#endif
+}
/*
* Clears first unset bit in bitmap, and returns
* place of bit. bitmap *must not* be 0.
*/
-BIT_UTIL_INLINE size_t
+static inline size_t
cfs_lu(unsigned long* bitmap) {
- size_t bit = ffs_lu(*bitmap) - 1;
+ util_assume(*bitmap != 0);
+ size_t bit = ffs_lu(*bitmap);
*bitmap ^= ZU(1) << bit;
return bit;
}
-BIT_UTIL_INLINE unsigned
-ffs_zu(size_t bitmap) {
+static inline unsigned
+ffs_zu(size_t x) {
#if LG_SIZEOF_PTR == LG_SIZEOF_INT
- return ffs_u(bitmap);
+ return ffs_u(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
- return ffs_lu(bitmap);
+ return ffs_lu(x);
#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
- return ffs_llu(bitmap);
+ return ffs_llu(x);
#else
#error No implementation for size_t ffs()
#endif
}
-BIT_UTIL_INLINE unsigned
-ffs_u64(uint64_t bitmap) {
+static inline unsigned
+fls_zu(size_t x) {
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+ return fls_u(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+ return fls_lu(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+ return fls_llu(x);
+#else
+#error No implementation for size_t fls()
+#endif
+}
+
+
+static inline unsigned
+ffs_u64(uint64_t x) {
#if LG_SIZEOF_LONG == 3
- return ffs_lu(bitmap);
+ return ffs_lu(x);
#elif LG_SIZEOF_LONG_LONG == 3
- return ffs_llu(bitmap);
+ return ffs_llu(x);
#else
#error No implementation for 64-bit ffs()
#endif
}
-BIT_UTIL_INLINE unsigned
-ffs_u32(uint32_t bitmap) {
+static inline unsigned
+fls_u64(uint64_t x) {
+#if LG_SIZEOF_LONG == 3
+ return fls_lu(x);
+#elif LG_SIZEOF_LONG_LONG == 3
+ return fls_llu(x);
+#else
+#error No implementation for 64-bit fls()
+#endif
+}
+
+static inline unsigned
+ffs_u32(uint32_t x) {
#if LG_SIZEOF_INT == 2
- return ffs_u(bitmap);
+ return ffs_u(x);
#else
#error No implementation for 32-bit ffs()
#endif
- return ffs_u(bitmap);
+ return ffs_u(x);
+}
+
+static inline unsigned
+fls_u32(uint32_t x) {
+#if LG_SIZEOF_INT == 2
+ return fls_u(x);
+#else
+#error No implementation for 32-bit fls()
+#endif
+ return fls_u(x);
}
-BIT_UTIL_INLINE uint64_t
+static inline uint64_t
pow2_ceil_u64(uint64_t x) {
-#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
- if(unlikely(x <= 1)) {
+ if (unlikely(x <= 1)) {
return x;
}
- size_t msb_on_index;
-#if (defined(__amd64__) || defined(__x86_64__))
- asm ("bsrq %1, %0"
- : "=r"(msb_on_index) // Outputs.
- : "r"(x-1) // Inputs.
- );
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
- msb_on_index = (63 ^ __builtin_clzll(x - 1));
-#endif
+ size_t msb_on_index = fls_u64(x - 1);
+ /*
+ * Range-check; it's on the callers to ensure that the result of this
+ * call won't overflow.
+ */
assert(msb_on_index < 63);
return 1ULL << (msb_on_index + 1);
-#else
- x--;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
- x |= x >> 32;
- x++;
- return x;
-#endif
}
-BIT_UTIL_INLINE uint32_t
+static inline uint32_t
pow2_ceil_u32(uint32_t x) {
-#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
- if(unlikely(x <= 1)) {
- return x;
+ if (unlikely(x <= 1)) {
+ return x;
}
- size_t msb_on_index;
-#if (defined(__i386__))
- asm ("bsr %1, %0"
- : "=r"(msb_on_index) // Outputs.
- : "r"(x-1) // Inputs.
- );
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
- msb_on_index = (31 ^ __builtin_clz(x - 1));
-#endif
+ size_t msb_on_index = fls_u32(x - 1);
+ /* As above. */
assert(msb_on_index < 31);
return 1U << (msb_on_index + 1);
-#else
- x--;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
- x++;
- return x;
-#endif
}
/* Compute the smallest power of 2 that is >= x. */
-BIT_UTIL_INLINE size_t
+static inline size_t
pow2_ceil_zu(size_t x) {
#if (LG_SIZEOF_PTR == 3)
return pow2_ceil_u64(x);
@@ -149,77 +388,21 @@ pow2_ceil_zu(size_t x) {
#endif
}
-#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
- size_t ret;
- assert(x != 0);
-
- asm ("bsr %1, %0"
- : "=r"(ret) // Outputs.
- : "r"(x) // Inputs.
- );
- assert(ret < UINT_MAX);
- return (unsigned)ret;
-}
-#elif (defined(_MSC_VER))
-BIT_UTIL_INLINE unsigned
+static inline unsigned
lg_floor(size_t x) {
- unsigned long ret;
-
- assert(x != 0);
-
+ util_assume(x != 0);
#if (LG_SIZEOF_PTR == 3)
- _BitScanReverse64(&ret, x);
-#elif (LG_SIZEOF_PTR == 2)
- _BitScanReverse(&ret, x);
+ return fls_u64(x);
#else
-# error "Unsupported type size for lg_floor()"
+ return fls_u32(x);
#endif
- assert(ret < UINT_MAX);
- return (unsigned)ret;
}
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
- assert(x != 0);
-#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
- return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
-#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
- return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
-#else
-# error "Unsupported type size for lg_floor()"
-#endif
-}
-#else
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
- assert(x != 0);
-
- x |= (x >> 1);
- x |= (x >> 2);
- x |= (x >> 4);
- x |= (x >> 8);
- x |= (x >> 16);
-#if (LG_SIZEOF_PTR == 3)
- x |= (x >> 32);
-#endif
- if (x == SIZE_T_MAX) {
- return (8 << LG_SIZEOF_PTR) - 1;
- }
- x++;
- return ffs_zu(x) - 2;
-}
-#endif
-
-BIT_UTIL_INLINE unsigned
+static inline unsigned
lg_ceil(size_t x) {
return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
}
-#undef BIT_UTIL_INLINE
-
/* A compile-time version of lg_floor and lg_ceil. */
#define LG_FLOOR_1(x) 0
#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index c3f9cb4..dc19454 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -1,7 +1,6 @@
#ifndef JEMALLOC_INTERNAL_BITMAP_H
#define JEMALLOC_INTERNAL_BITMAP_H
-#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/bit_util.h"
#include "jemalloc/internal/sc.h"
@@ -9,9 +8,9 @@ typedef unsigned long bitmap_t;
#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#if LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
+#if SC_LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
/* Maximum bitmap bit count is determined by maximum regions per slab. */
-# define LG_BITMAP_MAXBITS LG_SLAB_MAXREGS
+# define LG_BITMAP_MAXBITS SC_LG_SLAB_MAXREGS
#else
/* Maximum bitmap bit count is determined by number of extent size classes. */
# define LG_BITMAP_MAXBITS LG_CEIL(SC_NSIZES)
@@ -273,7 +272,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
}
return bitmap_ffu(bitmap, binfo, sib_base);
}
- bit += ((size_t)(ffs_lu(group_masked) - 1)) <<
+ bit += ((size_t)ffs_lu(group_masked)) <<
(lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
}
assert(bit >= min_bit);
@@ -285,9 +284,9 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
- 1);
size_t bit;
do {
- bit = ffs_lu(g);
- if (bit != 0) {
- return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+ if (g != 0) {
+ bit = ffs_lu(g);
+ return (i << LG_BITMAP_GROUP_NBITS) + bit;
}
i++;
g = bitmap[i];
@@ -308,20 +307,20 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
#ifdef BITMAP_USE_TREE
i = binfo->nlevels - 1;
g = bitmap[binfo->levels[i].group_offset];
- bit = ffs_lu(g) - 1;
+ bit = ffs_lu(g);
while (i > 0) {
i--;
g = bitmap[binfo->levels[i].group_offset + bit];
- bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
+ bit = (bit << LG_BITMAP_GROUP_NBITS) + ffs_lu(g);
}
#else
i = 0;
g = bitmap[0];
- while ((bit = ffs_lu(g)) == 0) {
+ while (g == 0) {
i++;
g = bitmap[i];
}
- bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+ bit = (i << LG_BITMAP_GROUP_NBITS) + ffs_lu(g);
#endif
bitmap_set(bitmap, binfo, bit);
return bit;
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
new file mode 100644
index 0000000..37aa6de
--- /dev/null
+++ b/include/jemalloc/internal/buf_writer.h
@@ -0,0 +1,32 @@
+#ifndef JEMALLOC_INTERNAL_BUF_WRITER_H
+#define JEMALLOC_INTERNAL_BUF_WRITER_H
+
+/*
+ * Note: when using the buffered writer, cbopaque is passed to write_cb only
+ * when the buffer is flushed. It would make a difference if cbopaque points
+ * to something that's changing for each write_cb call, or something that
+ * affects write_cb in a way dependent on the content of the output string.
+ * However, the most typical usage case in practice is that cbopaque points to
+ * some "option like" content for the write_cb, so it doesn't matter.
+ */
+
+typedef struct {
+ write_cb_t *write_cb;
+ void *cbopaque;
+ char *buf;
+ size_t buf_size;
+ size_t buf_end;
+ bool internal_buf;
+} buf_writer_t;
+
+bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
+ write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
+void buf_writer_flush(buf_writer_t *buf_writer);
+write_cb_t buf_writer_cb;
+void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
+
+typedef ssize_t (read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
+void buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
+ void *read_cbopaque);
+
+#endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index d14556a..caf5be3 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -2,6 +2,7 @@
#define JEMALLOC_INTERNAL_CACHE_BIN_H
#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sz.h"
/*
* The cache_bins are the mechanism that the tcache and the arena use to
@@ -13,14 +14,38 @@
* of the tcache at all.
*/
+/*
+ * The size in bytes of each cache bin stack. We also use this to indicate
+ * *counts* of individual objects.
+ */
+typedef uint16_t cache_bin_sz_t;
/*
- * The count of the number of cached allocations in a bin. We make this signed
- * so that negative numbers can encode "invalid" states (e.g. a low water mark
- * of -1 for a cache that has been depleted).
+ * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
+ * bug starts leaking those. Make it look like the junk pattern but be distinct
+ * from it.
*/
-typedef int32_t cache_bin_sz_t;
+static const uintptr_t cache_bin_preceding_junk =
+ (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
+/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
+static const uintptr_t cache_bin_trailing_junk =
+ (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+/*
+ * That implies the following value, for the maximum number of items in any
+ * individual bin. The cache bins track their bounds looking just at the low
+ * bits of a pointer, compared against a cache_bin_sz_t. So that's
+ * 1 << (sizeof(cache_bin_sz_t) * 8)
+ * bytes spread across pointer sized objects to get the maximum.
+ */
+#define CACHE_BIN_NCACHED_MAX (((size_t)1 << sizeof(cache_bin_sz_t) * 8) \
+ / sizeof(void *) - 1)
+
+/*
+ * This lives inside the cache_bin (for locality reasons), and is initialized
+ * alongside it, but is otherwise not modified by any cache bin operations.
+ * It's logically public and maintained by its callers.
+ */
typedef struct cache_bin_stats_s cache_bin_stats_t;
struct cache_bin_stats_s {
/*
@@ -36,34 +61,75 @@ struct cache_bin_stats_s {
*/
typedef struct cache_bin_info_s cache_bin_info_t;
struct cache_bin_info_s {
- /* Upper limit on ncached. */
cache_bin_sz_t ncached_max;
};
+/*
+ * Responsible for caching allocations associated with a single size.
+ *
+ * Several pointers are used to track the stack. To save on metadata bytes,
+ * only the stack_head is a full sized pointer (which is dereferenced on the
+ * fastpath), while the others store only the low 16 bits -- this is correct
+ * because a single stack never takes more space than 2^16 bytes, and at the
+ * same time only equality checks are performed on the low bits.
+ *
+ * (low addr) (high addr)
+ * |------stashed------|------available------|------cached-----|
+ * ^ ^ ^ ^
+ * low_bound(derived) low_bits_full stack_head low_bits_empty
+ */
typedef struct cache_bin_s cache_bin_t;
struct cache_bin_s {
- /* Min # cached since last GC. */
- cache_bin_sz_t low_water;
- /* # of cached objects. */
- cache_bin_sz_t ncached;
/*
- * ncached and stats are both modified frequently. Let's keep them
+ * The stack grows down. Whenever the bin is nonempty, the head points
+ * to an array entry containing a valid allocation. When it is empty,
+ * the head points to one element past the owned array.
+ */
+ void **stack_head;
+ /*
+ * cur_ptr and stats are both modified frequently. Let's keep them
* close so that they have a higher chance of being on the same
* cacheline, thus less write-backs.
*/
cache_bin_stats_t tstats;
+
/*
- * Stack of available objects.
+ * The low bits of the address of the first item in the stack that
+ * hasn't been used since the last GC, to track the low water mark (min
+ * # of cached items).
*
- * To make use of adjacent cacheline prefetch, the items in the avail
- * stack goes to higher address for newer allocations. avail points
- * just above the available space, which means that
- * avail[-ncached, ... -1] are available items and the lowest item will
- * be allocated first.
+ * Since the stack grows down, this is a higher address than
+ * low_bits_full.
*/
- void **avail;
+ uint16_t low_bits_low_water;
+
+ /*
+ * The low bits of the value that stack_head will take on when the array
+ * is full (of cached & stashed items). But remember that stack_head
+ * always points to a valid item when the array is nonempty -- this is
+ * in the array.
+ *
+ * Recall that since the stack grows down, this is the lowest available
+ * address in the array for caching. Only adjusted when stashing items.
+ */
+ uint16_t low_bits_full;
+
+ /*
+ * The low bits of the value that stack_head will take on when the array
+ * is empty.
+ *
+ * The stack grows down -- this is one past the highest address in the
+ * array. Immutable after initialization.
+ */
+ uint16_t low_bits_empty;
};
+/*
+ * The cache_bins live inside the tcache, but the arena (by design) isn't
+ * supposed to know much about tcache internals. To let the arena iterate over
+ * associated bins, we keep (with the tcache) a linked list of
+ * cache_bin_array_descriptor_ts that tell the arena how to find the bins.
+ */
typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
struct cache_bin_array_descriptor_s {
/*
@@ -72,37 +138,214 @@ struct cache_bin_array_descriptor_s {
*/
ql_elm(cache_bin_array_descriptor_t) link;
/* Pointers to the tcache bins. */
- cache_bin_t *bins_small;
- cache_bin_t *bins_large;
+ cache_bin_t *bins;
};
static inline void
cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
- cache_bin_t *bins_small, cache_bin_t *bins_large) {
+ cache_bin_t *bins) {
ql_elm_new(descriptor, link);
- descriptor->bins_small = bins_small;
- descriptor->bins_large = bins_large;
+ descriptor->bins = bins;
}
-JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
- void *ret;
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_nonfast_aligned(const void *ptr) {
+ if (!config_uaf_detection) {
+ return false;
+ }
+ /*
+ * Currently we use alignment to decide which pointer to junk & stash on
+ * dealloc (for catching use-after-free). In some common cases a
+ * page-aligned check is needed already (sdalloc w/ config_prof), so we
+ * are getting it more or less for free -- no added instructions on
+ * free_fastpath.
+ *
+ * Another way of deciding which pointer to sample, is adding another
+ * thread_event to pick one every N bytes. That also adds no cost on
+ * the fastpath, however it will tend to pick large allocations which is
+ * not the desired behavior.
+ */
+ return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0;
+}
+
+/* Returns ncached_max: Upper limit on ncached. */
+static inline cache_bin_sz_t
+cache_bin_info_ncached_max(cache_bin_info_t *info) {
+ return info->ncached_max;
+}
+
+/*
+ * Internal.
+ *
+ * Asserts that the pointer associated with earlier is <= the one associated
+ * with later.
+ */
+static inline void
+cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+ if (earlier > later) {
+ assert(bin->low_bits_full > bin->low_bits_empty);
+ }
+}
- bin->ncached--;
+/*
+ * Internal.
+ *
+ * Does difference calculations that handle wraparound correctly. Earlier must
+ * be associated with the position earlier in memory.
+ */
+static inline uint16_t
+cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later, bool racy) {
+ /*
+ * When it's racy, bin->low_bits_full can be modified concurrently. It
+ * can cross the uint16_t max value and become less than
+ * bin->low_bits_empty at the time of the check.
+ */
+ if (!racy) {
+ cache_bin_assert_earlier(bin, earlier, later);
+ }
+ return later - earlier;
+}
+/*
+ * Number of items currently cached in the bin, without checking ncached_max.
+ * We require specifying whether or not the request is racy or not (i.e. whether
+ * or not concurrent modifications are possible).
+ */
+static inline cache_bin_sz_t
+cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
+ cache_bin_sz_t diff = cache_bin_diff(bin,
+ (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, racy);
+ cache_bin_sz_t n = diff / sizeof(void *);
/*
- * Check for both bin->ncached == 0 and ncached < low_water
- * in a single branch.
+ * We have undefined behavior here; if this function is called from the
+ * arena stats updating code, then stack_head could change from the
+ * first line to the next one. Morally, these loads should be atomic,
+ * but compilers won't currently generate comparisons with in-memory
+ * operands against atomics, and these variables get accessed on the
+ * fast paths. This should still be "safe" in the sense of generating
+ * the correct assembly for the foreseeable future, though.
*/
- if (unlikely(bin->ncached <= bin->low_water)) {
- bin->low_water = bin->ncached;
- if (bin->ncached == -1) {
- bin->ncached = 0;
- *success = false;
- return NULL;
- }
+ assert(n == 0 || *(bin->stack_head) != NULL || racy);
+ return n;
+}
+
+/*
+ * Number of items currently cached in the bin, with checking ncached_max. The
+ * caller must know that no concurrent modification of the cache_bin is
+ * possible.
+ */
+static inline cache_bin_sz_t
+cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+ cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+ /* racy */ false);
+ assert(n <= cache_bin_info_ncached_max(info));
+ return n;
+}
+
+/*
+ * Internal.
+ *
+ * A pointer to the position one past the end of the backing array.
+ *
+ * Do not call if racy, because both 'bin->stack_head' and 'bin->low_bits_full'
+ * are subject to concurrent modifications.
+ */
+static inline void **
+cache_bin_empty_position_get(cache_bin_t *bin) {
+ cache_bin_sz_t diff = cache_bin_diff(bin,
+ (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty,
+ /* racy */ false);
+ uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
+ void **ret = (void **)empty_bits;
+
+ assert(ret >= bin->stack_head);
+
+ return ret;
+}
+
+/*
+ * Internal.
+ *
+ * Calculates low bits of the lower bound of the usable cache bin's range (see
+ * cache_bin_t visual representation above).
+ *
+ * No values are concurrently modified, so should be safe to read in a
+ * multithreaded environment. Currently concurrent access happens only during
+ * arena statistics collection.
+ */
+static inline uint16_t
+cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
+ return (uint16_t)bin->low_bits_empty -
+ info->ncached_max * sizeof(void *);
+}
+
+/*
+ * Internal.
+ *
+ * A pointer to the position with the lowest address of the backing array.
+ */
+static inline void **
+cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
+ cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+ void **ret = cache_bin_empty_position_get(bin) - ncached_max;
+ assert(ret <= bin->stack_head);
+
+ return ret;
+}
+
+/*
+ * As the name implies. This is important since it's not correct to try to
+ * batch fill a nonempty cache bin.
+ */
+static inline void
+cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
+ assert(cache_bin_ncached_get_local(bin, info) == 0);
+ assert(cache_bin_empty_position_get(bin) == bin->stack_head);
+}
+
+/*
+ * Get low water, but without any of the correctness checking we do for the
+ * caller-usable version, if we are temporarily breaking invariants (like
+ * ncached >= low_water during flush).
+ */
+static inline cache_bin_sz_t
+cache_bin_low_water_get_internal(cache_bin_t *bin) {
+ return cache_bin_diff(bin, bin->low_bits_low_water,
+ bin->low_bits_empty, /* racy */ false) / sizeof(void *);
+}
+
+/* Returns the numeric value of low water in [0, ncached]. */
+static inline cache_bin_sz_t
+cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
+ cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
+ assert(low_water <= cache_bin_info_ncached_max(info));
+ assert(low_water <= cache_bin_ncached_get_local(bin, info));
+
+ cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
+ bin->low_bits_low_water);
+
+ return low_water;
+}
+
+/*
+ * Indicates that the current cache bin position should be the low water mark
+ * going forward.
+ */
+static inline void
+cache_bin_low_water_set(cache_bin_t *bin) {
+ bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
+}
+
+static inline void
+cache_bin_low_water_adjust(cache_bin_t *bin) {
+ if (cache_bin_ncached_get_internal(bin, /* racy */ false)
+ < cache_bin_low_water_get_internal(bin)) {
+ cache_bin_low_water_set(bin);
}
+}
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
/*
* success (instead of ret) should be checked upon the return of this
* function. We avoid checking (ret == NULL) because there is never a
@@ -110,22 +353,318 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
* and eagerly checking ret would cause pipeline stall (waiting for the
* cacheline).
*/
- *success = true;
- ret = *(bin->avail - (bin->ncached + 1));
- return ret;
+ /*
+ * This may read from the empty position; however the loaded value won't
+ * be used. It's safe because the stack has one more slot reserved.
+ */
+ void *ret = *bin->stack_head;
+ uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+ void **new_head = bin->stack_head + 1;
+
+ /*
+ * Note that the low water mark is at most empty; if we pass this check,
+ * we know we're non-empty.
+ */
+ if (likely(low_bits != bin->low_bits_low_water)) {
+ bin->stack_head = new_head;
+ *success = true;
+ return ret;
+ }
+ if (!adjust_low_water) {
+ *success = false;
+ return NULL;
+ }
+ /*
+ * In the fast-path case where we call alloc_easy and then alloc, the
+ * previous checking and computation is optimized away -- we didn't
+ * actually commit any of our operations.
+ */
+ if (likely(low_bits != bin->low_bits_empty)) {
+ bin->stack_head = new_head;
+ bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
+ *success = true;
+ return ret;
+ }
+ *success = false;
+ return NULL;
+}
+
+/*
+ * Allocate an item out of the bin, failing if we're at the low-water mark.
+ */
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
+ /* We don't look at info if we're not adjusting low-water. */
+ return cache_bin_alloc_impl(bin, success, false);
+}
+
+/*
+ * Allocate an item out of the bin, even if we're currently at the low-water
+ * mark (and failing only if the bin is empty).
+ */
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc(cache_bin_t *bin, bool *success) {
+ return cache_bin_alloc_impl(bin, success, true);
+}
+
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
+ cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+ /* racy */ false);
+ if (n > num) {
+ n = (cache_bin_sz_t)num;
+ }
+ memcpy(out, bin->stack_head, n * sizeof(void *));
+ bin->stack_head += n;
+ cache_bin_low_water_adjust(bin);
+
+ return n;
}
JEMALLOC_ALWAYS_INLINE bool
-cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) {
- if (unlikely(bin->ncached == bin_info->ncached_max)) {
+cache_bin_full(cache_bin_t *bin) {
+ return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+}
+
+/*
+ * Free an object into the given bin. Fails only if the bin is full.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
+ if (unlikely(cache_bin_full(bin))) {
return false;
}
- assert(bin->ncached < bin_info->ncached_max);
- bin->ncached++;
- *(bin->avail - bin->ncached) = ptr;
+
+ bin->stack_head--;
+ *bin->stack_head = ptr;
+ cache_bin_assert_earlier(bin, bin->low_bits_full,
+ (uint16_t)(uintptr_t)bin->stack_head);
return true;
}
+/* Returns false if failed to stash (i.e. bin is full). */
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_stash(cache_bin_t *bin, void *ptr) {
+ if (cache_bin_full(bin)) {
+ return false;
+ }
+
+ /* Stash at the full position, in the [full, head) range. */
+ uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
+ /* Wraparound handled as well. */
+ uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head,
+ /* racy */ false);
+ *(void **)((uintptr_t)bin->stack_head - diff) = ptr;
+
+ assert(!cache_bin_full(bin));
+ bin->low_bits_full += sizeof(void *);
+ cache_bin_assert_earlier(bin, bin->low_bits_full, low_bits_head);
+
+ return true;
+}
+
+/*
+ * Get the number of stashed pointers.
+ *
+ * When called from a thread not owning the TLS (i.e. racy = true), it's
+ * important to keep in mind that 'bin->stack_head' and 'bin->low_bits_full' can
+ * be modified concurrently and almost none assertions about their values can be
+ * made.
+ */
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
+ bool racy) {
+ cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+ uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
+ info);
+
+ cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
+ bin->low_bits_full, racy) / sizeof(void *);
+ assert(n <= ncached_max);
+
+ if (!racy) {
+ /* Below are for assertions only. */
+ void **low_bound = cache_bin_low_bound_get(bin, info);
+
+ assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+ void *stashed = *(low_bound + n - 1);
+ bool aligned = cache_bin_nonfast_aligned(stashed);
+#ifdef JEMALLOC_JET
+ /* Allow arbitrary pointers to be stashed in tests. */
+ aligned = true;
+#endif
+ assert(n == 0 || (stashed != NULL && aligned));
+ }
+
+ return n;
+}
+
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+ cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info,
+ /* racy */ false);
+ assert(n <= cache_bin_info_ncached_max(info));
+ return n;
+}
+
+/*
+ * Obtain a racy view of the number of items currently in the cache bin, in the
+ * presence of possible concurrent modifications.
+ */
+static inline void
+cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
+ cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
+ cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, /* racy */ true);
+ assert(n <= cache_bin_info_ncached_max(info));
+ *ncached = n;
+
+ n = cache_bin_nstashed_get_internal(bin, info, /* racy */ true);
+ assert(n <= cache_bin_info_ncached_max(info));
+ *nstashed = n;
+ /* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
+}
+
+/*
+ * Filling and flushing are done in batch, on arrays of void *s. For filling,
+ * the arrays go forward, and can be accessed with ordinary array arithmetic.
+ * For flushing, we work from the end backwards, and so need to use special
+ * accessors that invert the usual ordering.
+ *
+ * This is important for maintaining first-fit; the arena code fills with
+ * earliest objects first, and so those are the ones we should return first for
+ * cache_bin_alloc calls. When flushing, we should flush the objects that we
+ * wish to return later; those at the end of the array. This is better for the
+ * first-fit heuristic as well as for cache locality; the most recently freed
+ * objects are the ones most likely to still be in cache.
+ *
+ * This all sounds very hand-wavey and theoretical, but reverting the ordering
+ * on one or the other pathway leads to measurable slowdowns.
+ */
+
+typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
+struct cache_bin_ptr_array_s {
+ cache_bin_sz_t n;
+ void **ptr;
+};
+
+/*
+ * Declare a cache_bin_ptr_array_t sufficient for nval items.
+ *
+ * In the current implementation, this could be just part of a
+ * cache_bin_ptr_array_init_... call, since we reuse the cache bin stack memory.
+ * Indirecting behind a macro, though, means experimenting with linked-list
+ * representations is easy (since they'll require an alloca in the calling
+ * frame).
+ */
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval) \
+ cache_bin_ptr_array_t name; \
+ name.n = (nval)
+
+/*
+ * Start a fill. The bin must be empty, and This must be followed by a
+ * finish_fill call before doing any alloc/dalloc operations on the bin.
+ */
+static inline void
+cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
+ cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
+ cache_bin_assert_empty(bin, info);
+ arr->ptr = cache_bin_empty_position_get(bin) - nfill;
+}
+
+/*
+ * While nfill in cache_bin_init_ptr_array_for_fill is the number we *intend* to
+ * fill, nfilled here is the number we actually filled (which may be less, in
+ * case of OOM.
+ */
+static inline void
+cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
+ cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
+ cache_bin_assert_empty(bin, info);
+ void **empty_position = cache_bin_empty_position_get(bin);
+ if (nfilled < arr->n) {
+ memmove(empty_position - nfilled, empty_position - arr->n,
+ nfilled * sizeof(void *));
+ }
+ bin->stack_head = empty_position - nfilled;
+}
+
+/*
+ * Same deal, but with flush. Unlike fill (which can fail), the user must flush
+ * everything we give them.
+ */
+static inline void
+cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
+ cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
+ arr->ptr = cache_bin_empty_position_get(bin) - nflush;
+ assert(cache_bin_ncached_get_local(bin, info) == 0
+ || *arr->ptr != NULL);
+}
+
+static inline void
+cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
+ cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
+ unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
+ memmove(bin->stack_head + nflushed, bin->stack_head,
+ rem * sizeof(void *));
+ bin->stack_head = bin->stack_head + nflushed;
+ cache_bin_low_water_adjust(bin);
+}
+
+static inline void
+cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
+ cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
+ cache_bin_sz_t nstashed) {
+ assert(nstashed > 0);
+ assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
+
+ void **low_bound = cache_bin_low_bound_get(bin, info);
+ arr->ptr = low_bound;
+ assert(*arr->ptr != NULL);
+}
+
+static inline void
+cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
+ void **low_bound = cache_bin_low_bound_get(bin, info);
+
+ /* Reset the bin local full position. */
+ bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
+ assert(cache_bin_nstashed_get_local(bin, info) == 0);
+}
+
+/*
+ * Initialize a cache_bin_info to represent up to the given number of items in
+ * the cache_bins it is associated with.
+ */
+void cache_bin_info_init(cache_bin_info_t *bin_info,
+ cache_bin_sz_t ncached_max);
+/*
+ * Given an array of initialized cache_bin_info_ts, determine how big an
+ * allocation is required to initialize a full set of cache_bin_ts.
+ */
+void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+ size_t *size, size_t *alignment);
+
+/*
+ * Actually initialize some cache bins. Callers should allocate the backing
+ * memory indicated by a call to cache_bin_compute_alloc. They should then
+ * preincrement, call init once for each bin and info, and then call
+ * cache_bin_postincrement. *alloc_cur will then point immediately past the end
+ * of the allocation.
+ */
+void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
+ void *alloc, size_t *cur_offset);
+void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
+ void *alloc, size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+ size_t *cur_offset);
+
+/*
+ * If a cache bin was zero initialized (either because it lives in static or
+ * thread-local storage, or was memset to 0), this function indicates whether or
+ * not cache_bin_init was called on it.
+ */
+bool cache_bin_still_zero_initialized(cache_bin_t *bin);
+
#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
new file mode 100644
index 0000000..79abf06
--- /dev/null
+++ b/include/jemalloc/internal/counter.h
@@ -0,0 +1,34 @@
+#ifndef JEMALLOC_INTERNAL_COUNTER_H
+#define JEMALLOC_INTERNAL_COUNTER_H
+
+#include "jemalloc/internal/mutex.h"
+
+typedef struct counter_accum_s {
+ LOCKEDINT_MTX_DECLARE(mtx)
+ locked_u64_t accumbytes;
+ uint64_t interval;
+} counter_accum_t;
+
+JEMALLOC_ALWAYS_INLINE bool
+counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t bytes) {
+ uint64_t interval = counter->interval;
+ assert(interval > 0);
+ LOCKEDINT_MTX_LOCK(tsdn, counter->mtx);
+ /*
+ * If the event moves fast enough (and/or if the event handling is slow
+ * enough), extreme overflow can cause counter trigger coalescing.
+ * This is an intentional mechanism that avoids rate-limiting
+ * allocation.
+ */
+ bool overflow = locked_inc_mod_u64(tsdn, LOCKEDINT_MTX(counter->mtx),
+ &counter->accumbytes, bytes, interval);
+ LOCKEDINT_MTX_UNLOCK(tsdn, counter->mtx);
+ return overflow;
+}
+
+bool counter_accum_init(counter_accum_t *counter, uint64_t interval);
+void counter_prefork(tsdn_t *tsdn, counter_accum_t *counter);
+void counter_postfork_parent(tsdn_t *tsdn, counter_accum_t *counter);
+void counter_postfork_child(tsdn_t *tsdn, counter_accum_t *counter);
+
+#endif /* JEMALLOC_INTERNAL_COUNTER_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 1d1aacc..63d27f8 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -42,9 +42,11 @@ typedef struct ctl_arena_stats_s {
uint64_t nfills_small;
uint64_t nflushes_small;
- bin_stats_t bstats[SC_NBINS];
+ bin_stats_data_t bstats[SC_NBINS];
arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
- arena_stats_extents_t estats[SC_NPSIZES];
+ pac_estats_t estats[SC_NPSIZES];
+ hpa_shard_stats_t hpastats;
+ sec_stats_t secstats;
} ctl_arena_stats_t;
typedef struct ctl_stats_s {
@@ -96,13 +98,17 @@ typedef struct ctl_arenas_s {
int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
void *newp, size_t newlen);
int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
-
int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
size_t *oldlenp, void *newp, size_t newlen);
+int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+ size_t *miblenp);
+int ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+ size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
bool ctl_boot(void);
void ctl_prefork(tsdn_t *tsdn);
void ctl_postfork_parent(tsdn_t *tsdn);
void ctl_postfork_child(tsdn_t *tsdn);
+void ctl_mtx_assert_held(tsdn_t *tsdn);
#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \
if (je_mallctl(name, oldp, oldlenp, newp, newlen) \
@@ -131,4 +137,23 @@ void ctl_postfork_child(tsdn_t *tsdn);
} \
} while (0)
+#define xmallctlmibnametomib(mib, miblen, name, miblenp) do { \
+ if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp) \
+ != 0) { \
+ malloc_write( \
+ "<jemalloc>: Failure in ctl_mibnametomib()\n"); \
+ abort(); \
+ } \
+} while (0)
+
+#define xmallctlbymibname(mib, miblen, name, miblenp, oldp, oldlenp, \
+ newp, newlen) do { \
+ if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp, \
+ oldp, oldlenp, newp, newlen) != 0) { \
+ malloc_write( \
+ "<jemalloc>: Failure in ctl_bymibname()\n"); \
+ abort(); \
+ } \
+} while (0)
+
#endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
new file mode 100644
index 0000000..cf6a9d2
--- /dev/null
+++ b/include/jemalloc/internal/decay.h
@@ -0,0 +1,186 @@
+#ifndef JEMALLOC_INTERNAL_DECAY_H
+#define JEMALLOC_INTERNAL_DECAY_H
+
+#include "jemalloc/internal/smoothstep.h"
+
+#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
+
+/*
+ * The decay_t computes the number of pages we should purge at any given time.
+ * Page allocators inform a decay object when pages enter a decay-able state
+ * (i.e. dirty or muzzy), and query it to determine how many pages should be
+ * purged at any given time.
+ *
+ * This is mostly a single-threaded data structure and doesn't care about
+ * synchronization at all; it's the caller's responsibility to manage their
+ * synchronization on their own. There are two exceptions:
+ * 1) It's OK to racily call decay_ms_read (i.e. just the simplest state query).
+ * 2) The mtx and purging fields live (and are initialized) here, but are
+ * logically owned by the page allocator. This is just a convenience (since
+ * those fields would be duplicated for both the dirty and muzzy states
+ * otherwise).
+ */
+typedef struct decay_s decay_t;
+struct decay_s {
+ /* Synchronizes all non-atomic fields. */
+ malloc_mutex_t mtx;
+ /*
+ * True if a thread is currently purging the extents associated with
+ * this decay structure.
+ */
+ bool purging;
+ /*
+ * Approximate time in milliseconds from the creation of a set of unused
+ * dirty pages until an equivalent set of unused dirty pages is purged
+ * and/or reused.
+ */
+ atomic_zd_t time_ms;
+ /* time / SMOOTHSTEP_NSTEPS. */
+ nstime_t interval;
+ /*
+ * Time at which the current decay interval logically started. We do
+ * not actually advance to a new epoch until sometime after it starts
+ * because of scheduling and computation delays, and it is even possible
+ * to completely skip epochs. In all cases, during epoch advancement we
+ * merge all relevant activity into the most recently recorded epoch.
+ */
+ nstime_t epoch;
+ /* Deadline randomness generator. */
+ uint64_t jitter_state;
+ /*
+ * Deadline for current epoch. This is the sum of interval and per
+ * epoch jitter which is a uniform random variable in [0..interval).
+ * Epochs always advance by precise multiples of interval, but we
+ * randomize the deadline to reduce the likelihood of arenas purging in
+ * lockstep.
+ */
+ nstime_t deadline;
+ /*
+ * The number of pages we cap ourselves at in the current epoch, per
+ * decay policies. Updated on an epoch change. After an epoch change,
+ * the caller should take steps to try to purge down to this amount.
+ */
+ size_t npages_limit;
+ /*
+ * Number of unpurged pages at beginning of current epoch. During epoch
+ * advancement we use the delta between arena->decay_*.nunpurged and
+ * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
+ * if any, were generated.
+ */
+ size_t nunpurged;
+ /*
+ * Trailing log of how many unused dirty pages were generated during
+ * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+ * element is the most recent epoch. Corresponding epoch times are
+ * relative to epoch.
+ *
+ * Updated only on epoch advance, triggered by
+ * decay_maybe_advance_epoch, below.
+ */
+ size_t backlog[SMOOTHSTEP_NSTEPS];
+
+ /* Peak number of pages in associated extents. Used for debug only. */
+ uint64_t ceil_npages;
+};
+
+/*
+ * The current decay time setting. This is the only public access to a decay_t
+ * that's allowed without holding mtx.
+ */
+static inline ssize_t
+decay_ms_read(const decay_t *decay) {
+ return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+}
+
+/*
+ * See the comment on the struct field -- the limit on pages we should allow in
+ * this decay state this epoch.
+ */
+static inline size_t
+decay_npages_limit_get(const decay_t *decay) {
+ return decay->npages_limit;
+}
+
+/* How many unused dirty pages were generated during the last epoch. */
+static inline size_t
+decay_epoch_npages_delta(const decay_t *decay) {
+ return decay->backlog[SMOOTHSTEP_NSTEPS - 1];
+}
+
+/*
+ * Current epoch duration, in nanoseconds. Given that new epochs are started
+ * somewhat haphazardly, this is not necessarily exactly the time between any
+ * two calls to decay_maybe_advance_epoch; see the comments on fields in the
+ * decay_t.
+ */
+static inline uint64_t
+decay_epoch_duration_ns(const decay_t *decay) {
+ return nstime_ns(&decay->interval);
+}
+
+static inline bool
+decay_immediately(const decay_t *decay) {
+ ssize_t decay_ms = decay_ms_read(decay);
+ return decay_ms == 0;
+}
+
+static inline bool
+decay_disabled(const decay_t *decay) {
+ ssize_t decay_ms = decay_ms_read(decay);
+ return decay_ms < 0;
+}
+
+/* Returns true if decay is enabled and done gradually. */
+static inline bool
+decay_gradually(const decay_t *decay) {
+ ssize_t decay_ms = decay_ms_read(decay);
+ return decay_ms > 0;
+}
+
+/*
+ * Returns true if the passed in decay time setting is valid.
+ * < -1 : invalid
+ * -1 : never decay
+ * 0 : decay immediately
+ * > 0 : some positive decay time, up to a maximum allowed value of
+ * NSTIME_SEC_MAX * 1000, which corresponds to decaying somewhere in the early
+ * 27th century. By that time, we expect to have implemented alternate purging
+ * strategies.
+ */
+bool decay_ms_valid(ssize_t decay_ms);
+
+/*
+ * As a precondition, the decay_t must be zeroed out (as if with memset).
+ *
+ * Returns true on error.
+ */
+bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
+
+/*
+ * Given an already-initialized decay_t, reinitialize it with the given decay
+ * time. The decay_t must have previously been initialized (and should not then
+ * be zeroed).
+ */
+void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
+
+/*
+ * Compute how many of 'npages_new' pages we would need to purge in 'time'.
+ */
+uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
+ size_t npages_new);
+
+/* Returns true if the epoch advanced and there are pages to purge. */
+bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
+ size_t current_npages);
+
+/*
+ * Calculates wait time until a number of pages in the interval
+ * [0.5 * npages_threshold .. 1.5 * npages_threshold] should be purged.
+ *
+ * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
+ * indefinite wait.
+ */
+uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
+ uint64_t npages_threshold);
+
+#endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
new file mode 100644
index 0000000..71cae3e
--- /dev/null
+++ b/include/jemalloc/internal/ecache.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_ECACHE_H
+#define JEMALLOC_INTERNAL_ECACHE_H
+
+#include "jemalloc/internal/eset.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/mutex.h"
+
+typedef struct ecache_s ecache_t;
+struct ecache_s {
+ malloc_mutex_t mtx;
+ eset_t eset;
+ eset_t guarded_eset;
+ /* All stored extents must be in the same state. */
+ extent_state_t state;
+ /* The index of the ehooks the ecache is associated with. */
+ unsigned ind;
+ /*
+ * If true, delay coalescing until eviction; otherwise coalesce during
+ * deallocation.
+ */
+ bool delay_coalesce;
+};
+
+static inline size_t
+ecache_npages_get(ecache_t *ecache) {
+ return eset_npages_get(&ecache->eset) +
+ eset_npages_get(&ecache->guarded_eset);
+}
+
+/* Get the number of extents in the given page size index. */
+static inline size_t
+ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
+ return eset_nextents_get(&ecache->eset, ind) +
+ eset_nextents_get(&ecache->guarded_eset, ind);
+}
+
+/* Get the sum total bytes of the extents in the given page size index. */
+static inline size_t
+ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
+ return eset_nbytes_get(&ecache->eset, ind) +
+ eset_nbytes_get(&ecache->guarded_eset, ind);
+}
+
+static inline unsigned
+ecache_ind_get(ecache_t *ecache) {
+ return ecache->ind;
+}
+
+bool ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
+ unsigned ind, bool delay_coalesce);
+void ecache_prefork(tsdn_t *tsdn, ecache_t *ecache);
+void ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache);
+void ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache);
+
+#endif /* JEMALLOC_INTERNAL_ECACHE_H */
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
new file mode 100644
index 0000000..af039ea
--- /dev/null
+++ b/include/jemalloc/internal/edata.h
@@ -0,0 +1,698 @@
+#ifndef JEMALLOC_INTERNAL_EDATA_H
+#define JEMALLOC_INTERNAL_EDATA_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin_info.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/hpdata.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/slab_data.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/typed_list.h"
+
+/*
+ * sizeof(edata_t) is 128 bytes on 64-bit architectures. Ensure the alignment
+ * to free up the low bits in the rtree leaf.
+ */
+#define EDATA_ALIGNMENT 128
+
+enum extent_state_e {
+ extent_state_active = 0,
+ extent_state_dirty = 1,
+ extent_state_muzzy = 2,
+ extent_state_retained = 3,
+ extent_state_transition = 4, /* States below are intermediate. */
+ extent_state_merging = 5,
+ extent_state_max = 5 /* Sanity checking only. */
+};
+typedef enum extent_state_e extent_state_t;
+
+enum extent_head_state_e {
+ EXTENT_NOT_HEAD,
+ EXTENT_IS_HEAD /* See comments in ehooks_default_merge_impl(). */
+};
+typedef enum extent_head_state_e extent_head_state_t;
+
+/*
+ * Which implementation of the page allocator interface, (PAI, defined in
+ * pai.h) owns the given extent?
+ */
+enum extent_pai_e {
+ EXTENT_PAI_PAC = 0,
+ EXTENT_PAI_HPA = 1
+};
+typedef enum extent_pai_e extent_pai_t;
+
+struct e_prof_info_s {
+ /* Time when this was allocated. */
+ nstime_t e_prof_alloc_time;
+ /* Allocation request size. */
+ size_t e_prof_alloc_size;
+ /* Points to a prof_tctx_t. */
+ atomic_p_t e_prof_tctx;
+ /*
+ * Points to a prof_recent_t for the allocation; NULL
+ * means the recent allocation record no longer exists.
+ * Protected by prof_recent_alloc_mtx.
+ */
+ atomic_p_t e_prof_recent_alloc;
+};
+typedef struct e_prof_info_s e_prof_info_t;
+
+/*
+ * The information about a particular edata that lives in an emap. Space is
+ * more precious there (the information, plus the edata pointer, has to live in
+ * a 64-bit word if we want to enable a packed representation.
+ *
+ * There are two things that are special about the information here:
+ * - It's quicker to access. You have one fewer pointer hop, since finding the
+ * edata_t associated with an item always requires accessing the rtree leaf in
+ * which this data is stored.
+ * - It can be read unsynchronized, and without worrying about lifetime issues.
+ */
+typedef struct edata_map_info_s edata_map_info_t;
+struct edata_map_info_s {
+ bool slab;
+ szind_t szind;
+};
+
+typedef struct edata_cmp_summary_s edata_cmp_summary_t;
+struct edata_cmp_summary_s {
+ uint64_t sn;
+ uintptr_t addr;
+};
+
+/* Extent (span of pages). Use accessor functions for e_* fields. */
+typedef struct edata_s edata_t;
+ph_structs(edata_avail, edata_t);
+ph_structs(edata_heap, edata_t);
+struct edata_s {
+ /*
+ * Bitfield containing several fields:
+ *
+ * a: arena_ind
+ * b: slab
+ * c: committed
+ * p: pai
+ * z: zeroed
+ * g: guarded
+ * t: state
+ * i: szind
+ * f: nfree
+ * s: bin_shard
+ *
+ * 00000000 ... 0000ssss ssffffff ffffiiii iiiitttg zpcbaaaa aaaaaaaa
+ *
+ * arena_ind: Arena from which this extent came, or all 1 bits if
+ * unassociated.
+ *
+ * slab: The slab flag indicates whether the extent is used for a slab
+ * of small regions. This helps differentiate small size classes,
+ * and it indicates whether interior pointers can be looked up via
+ * iealloc().
+ *
+ * committed: The committed flag indicates whether physical memory is
+ * committed to the extent, whether explicitly or implicitly
+ * as on a system that overcommits and satisfies physical
+ * memory needs on demand via soft page faults.
+ *
+ * pai: The pai flag is an extent_pai_t.
+ *
+ * zeroed: The zeroed flag is used by extent recycling code to track
+ * whether memory is zero-filled.
+ *
+ * guarded: The guarded flag is use by the sanitizer to track whether
+ * the extent has page guards around it.
+ *
+ * state: The state flag is an extent_state_t.
+ *
+ * szind: The szind flag indicates usable size class index for
+ * allocations residing in this extent, regardless of whether the
+ * extent is a slab. Extent size and usable size often differ
+ * even for non-slabs, either due to sz_large_pad or promotion of
+ * sampled small regions.
+ *
+ * nfree: Number of free regions in slab.
+ *
+ * bin_shard: the shard of the bin from which this extent came.
+ */
+ uint64_t e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+
+#define EDATA_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
+#define EDATA_BITS_ARENA_SHIFT 0
+#define EDATA_BITS_ARENA_MASK MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
+
+#define EDATA_BITS_SLAB_WIDTH 1
+#define EDATA_BITS_SLAB_SHIFT (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_SLAB_MASK MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
+
+#define EDATA_BITS_COMMITTED_WIDTH 1
+#define EDATA_BITS_COMMITTED_SHIFT (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_COMMITTED_MASK MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
+
+#define EDATA_BITS_PAI_WIDTH 1
+#define EDATA_BITS_PAI_SHIFT (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_PAI_MASK MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
+
+#define EDATA_BITS_ZEROED_WIDTH 1
+#define EDATA_BITS_ZEROED_SHIFT (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
+#define EDATA_BITS_ZEROED_MASK MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
+
+#define EDATA_BITS_GUARDED_WIDTH 1
+#define EDATA_BITS_GUARDED_SHIFT (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_GUARDED_MASK MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
+
+#define EDATA_BITS_STATE_WIDTH 3
+#define EDATA_BITS_STATE_SHIFT (EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
+#define EDATA_BITS_STATE_MASK MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
+
+#define EDATA_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES)
+#define EDATA_BITS_SZIND_SHIFT (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_SZIND_MASK MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
+
+#define EDATA_BITS_NFREE_WIDTH (SC_LG_SLAB_MAXREGS + 1)
+#define EDATA_BITS_NFREE_SHIFT (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_NFREE_MASK MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
+
+#define EDATA_BITS_BINSHARD_WIDTH 6
+#define EDATA_BITS_BINSHARD_SHIFT (EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_BINSHARD_MASK MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
+
+#define EDATA_BITS_IS_HEAD_WIDTH 1
+#define EDATA_BITS_IS_HEAD_SHIFT (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_IS_HEAD_MASK MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
+
+ /* Pointer to the extent that this structure is responsible for. */
+ void *e_addr;
+
+ union {
+ /*
+ * Extent size and serial number associated with the extent
+ * structure (different than the serial number for the extent at
+ * e_addr).
+ *
+ * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
+ */
+ size_t e_size_esn;
+ #define EDATA_SIZE_MASK ((size_t)~(PAGE-1))
+ #define EDATA_ESN_MASK ((size_t)PAGE-1)
+ /* Base extent size, which may not be a multiple of PAGE. */
+ size_t e_bsize;
+ };
+
+ /*
+ * If this edata is a user allocation from an HPA, it comes out of some
+ * pageslab (we don't yet support huegpage allocations that don't fit
+ * into pageslabs). This tracks it.
+ */
+ hpdata_t *e_ps;
+
+ /*
+ * Serial number. These are not necessarily unique; splitting an extent
+ * results in two extents with the same serial number.
+ */
+ uint64_t e_sn;
+
+ union {
+ /*
+ * List linkage used when the edata_t is active; either in
+ * arena's large allocations or bin_t's slabs_full.
+ */
+ ql_elm(edata_t) ql_link_active;
+ /*
+ * Pairing heap linkage. Used whenever the extent is inactive
+ * (in the page allocators), or when it is active and in
+ * slabs_nonfull, or when the edata_t is unassociated with an
+ * extent and sitting in an edata_cache.
+ */
+ union {
+ edata_heap_link_t heap_link;
+ edata_avail_link_t avail_link;
+ };
+ };
+
+ union {
+ /*
+ * List linkage used when the extent is inactive:
+ * - Stashed dirty extents
+ * - Ecache LRU functionality.
+ */
+ ql_elm(edata_t) ql_link_inactive;
+ /* Small region slab metadata. */
+ slab_data_t e_slab_data;
+
+ /* Profiling data, used for large objects. */
+ e_prof_info_t e_prof_info;
+ };
+};
+
+TYPED_LIST(edata_list_active, edata_t, ql_link_active)
+TYPED_LIST(edata_list_inactive, edata_t, ql_link_inactive)
+
+static inline unsigned
+edata_arena_ind_get(const edata_t *edata) {
+ unsigned arena_ind = (unsigned)((edata->e_bits &
+ EDATA_BITS_ARENA_MASK) >> EDATA_BITS_ARENA_SHIFT);
+ assert(arena_ind < MALLOCX_ARENA_LIMIT);
+
+ return arena_ind;
+}
+
+static inline szind_t
+edata_szind_get_maybe_invalid(const edata_t *edata) {
+ szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK) >>
+ EDATA_BITS_SZIND_SHIFT);
+ assert(szind <= SC_NSIZES);
+ return szind;
+}
+
+static inline szind_t
+edata_szind_get(const edata_t *edata) {
+ szind_t szind = edata_szind_get_maybe_invalid(edata);
+ assert(szind < SC_NSIZES); /* Never call when "invalid". */
+ return szind;
+}
+
+static inline size_t
+edata_usize_get(const edata_t *edata) {
+ return sz_index2size(edata_szind_get(edata));
+}
+
+static inline unsigned
+edata_binshard_get(const edata_t *edata) {
+ unsigned binshard = (unsigned)((edata->e_bits &
+ EDATA_BITS_BINSHARD_MASK) >> EDATA_BITS_BINSHARD_SHIFT);
+ assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
+ return binshard;
+}
+
+static inline uint64_t
+edata_sn_get(const edata_t *edata) {
+ return edata->e_sn;
+}
+
+static inline extent_state_t
+edata_state_get(const edata_t *edata) {
+ return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK) >>
+ EDATA_BITS_STATE_SHIFT);
+}
+
+static inline bool
+edata_guarded_get(const edata_t *edata) {
+ return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK) >>
+ EDATA_BITS_GUARDED_SHIFT);
+}
+
+static inline bool
+edata_zeroed_get(const edata_t *edata) {
+ return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
+ EDATA_BITS_ZEROED_SHIFT);
+}
+
+static inline bool
+edata_committed_get(const edata_t *edata) {
+ return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK) >>
+ EDATA_BITS_COMMITTED_SHIFT);
+}
+
+static inline extent_pai_t
+edata_pai_get(const edata_t *edata) {
+ return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK) >>
+ EDATA_BITS_PAI_SHIFT);
+}
+
+static inline bool
+edata_slab_get(const edata_t *edata) {
+ return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
+ EDATA_BITS_SLAB_SHIFT);
+}
+
+static inline unsigned
+edata_nfree_get(const edata_t *edata) {
+ assert(edata_slab_get(edata));
+ return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK) >>
+ EDATA_BITS_NFREE_SHIFT);
+}
+
+static inline void *
+edata_base_get(const edata_t *edata) {
+ assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
+ !edata_slab_get(edata));
+ return PAGE_ADDR2BASE(edata->e_addr);
+}
+
+static inline void *
+edata_addr_get(const edata_t *edata) {
+ assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
+ !edata_slab_get(edata));
+ return edata->e_addr;
+}
+
+static inline size_t
+edata_size_get(const edata_t *edata) {
+ return (edata->e_size_esn & EDATA_SIZE_MASK);
+}
+
+static inline size_t
+edata_esn_get(const edata_t *edata) {
+ return (edata->e_size_esn & EDATA_ESN_MASK);
+}
+
+static inline size_t
+edata_bsize_get(const edata_t *edata) {
+ return edata->e_bsize;
+}
+
+static inline hpdata_t *
+edata_ps_get(const edata_t *edata) {
+ assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+ return edata->e_ps;
+}
+
+static inline void *
+edata_before_get(const edata_t *edata) {
+ return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
+}
+
+static inline void *
+edata_last_get(const edata_t *edata) {
+ return (void *)((uintptr_t)edata_base_get(edata) +
+ edata_size_get(edata) - PAGE);
+}
+
+static inline void *
+edata_past_get(const edata_t *edata) {
+ return (void *)((uintptr_t)edata_base_get(edata) +
+ edata_size_get(edata));
+}
+
+static inline slab_data_t *
+edata_slab_data_get(edata_t *edata) {
+ assert(edata_slab_get(edata));
+ return &edata->e_slab_data;
+}
+
+static inline const slab_data_t *
+edata_slab_data_get_const(const edata_t *edata) {
+ assert(edata_slab_get(edata));
+ return &edata->e_slab_data;
+}
+
+static inline prof_tctx_t *
+edata_prof_tctx_get(const edata_t *edata) {
+ return (prof_tctx_t *)atomic_load_p(&edata->e_prof_info.e_prof_tctx,
+ ATOMIC_ACQUIRE);
+}
+
+static inline const nstime_t *
+edata_prof_alloc_time_get(const edata_t *edata) {
+ return &edata->e_prof_info.e_prof_alloc_time;
+}
+
+static inline size_t
+edata_prof_alloc_size_get(const edata_t *edata) {
+ return edata->e_prof_info.e_prof_alloc_size;
+}
+
+static inline prof_recent_t *
+edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
+ return (prof_recent_t *)atomic_load_p(
+ &edata->e_prof_info.e_prof_recent_alloc, ATOMIC_RELAXED);
+}
+
+static inline void
+edata_arena_ind_set(edata_t *edata, unsigned arena_ind) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK) |
+ ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
+}
+
+static inline void
+edata_binshard_set(edata_t *edata, unsigned binshard) {
+ /* The assertion assumes szind is set already. */
+ assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK) |
+ ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
+}
+
+static inline void
+edata_addr_set(edata_t *edata, void *addr) {
+ edata->e_addr = addr;
+}
+
+static inline void
+edata_size_set(edata_t *edata, size_t size) {
+ assert((size & ~EDATA_SIZE_MASK) == 0);
+ edata->e_size_esn = size | (edata->e_size_esn & ~EDATA_SIZE_MASK);
+}
+
+static inline void
+edata_esn_set(edata_t *edata, size_t esn) {
+ edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK) | (esn &
+ EDATA_ESN_MASK);
+}
+
+static inline void
+edata_bsize_set(edata_t *edata, size_t bsize) {
+ edata->e_bsize = bsize;
+}
+
+static inline void
+edata_ps_set(edata_t *edata, hpdata_t *ps) {
+ assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+ edata->e_ps = ps;
+}
+
+static inline void
+edata_szind_set(edata_t *edata, szind_t szind) {
+ assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK) |
+ ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
+}
+
+static inline void
+edata_nfree_set(edata_t *edata, unsigned nfree) {
+ assert(edata_slab_get(edata));
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK) |
+ ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+}
+
+static inline void
+edata_nfree_binshard_set(edata_t *edata, unsigned nfree, unsigned binshard) {
+ /* The assertion assumes szind is set already. */
+ assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
+ edata->e_bits = (edata->e_bits &
+ (~EDATA_BITS_NFREE_MASK & ~EDATA_BITS_BINSHARD_MASK)) |
+ ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT) |
+ ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+}
+
+static inline void
+edata_nfree_inc(edata_t *edata) {
+ assert(edata_slab_get(edata));
+ edata->e_bits += ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT);
+}
+
+static inline void
+edata_nfree_dec(edata_t *edata) {
+ assert(edata_slab_get(edata));
+ edata->e_bits -= ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT);
+}
+
+static inline void
+edata_nfree_sub(edata_t *edata, uint64_t n) {
+ assert(edata_slab_get(edata));
+ edata->e_bits -= (n << EDATA_BITS_NFREE_SHIFT);
+}
+
+static inline void
+edata_sn_set(edata_t *edata, uint64_t sn) {
+ edata->e_sn = sn;
+}
+
+static inline void
+edata_state_set(edata_t *edata, extent_state_t state) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK) |
+ ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
+}
+
+static inline void
+edata_guarded_set(edata_t *edata, bool guarded) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK) |
+ ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
+}
+
+static inline void
+edata_zeroed_set(edata_t *edata, bool zeroed) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
+ ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
+}
+
+static inline void
+edata_committed_set(edata_t *edata, bool committed) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK) |
+ ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
+}
+
+static inline void
+edata_pai_set(edata_t *edata, extent_pai_t pai) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK) |
+ ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
+}
+
+static inline void
+edata_slab_set(edata_t *edata, bool slab) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
+ ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
+}
+
+static inline void
+edata_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
+ atomic_store_p(&edata->e_prof_info.e_prof_tctx, tctx, ATOMIC_RELEASE);
+}
+
+static inline void
+edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
+ nstime_copy(&edata->e_prof_info.e_prof_alloc_time, t);
+}
+
+static inline void
+edata_prof_alloc_size_set(edata_t *edata, size_t size) {
+ edata->e_prof_info.e_prof_alloc_size = size;
+}
+
+static inline void
+edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
+ prof_recent_t *recent_alloc) {
+ atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
+ ATOMIC_RELAXED);
+}
+
+static inline bool
+edata_is_head_get(edata_t *edata) {
+ return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
+ EDATA_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+edata_is_head_set(edata_t *edata, bool is_head) {
+ edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
+ ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
+}
+
+static inline bool
+edata_state_in_transition(extent_state_t state) {
+ return state >= extent_state_transition;
+}
+
+/*
+ * Because this function is implemented as a sequence of bitfield modifications,
+ * even though each individual bit is properly initialized, we technically read
+ * uninitialized data within it. This is mostly fine, since most callers get
+ * their edatas from zeroing sources, but callers who make stack edata_ts need
+ * to manually zero them.
+ */
+static inline void
+edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
+ bool slab, szind_t szind, uint64_t sn, extent_state_t state, bool zeroed,
+ bool committed, extent_pai_t pai, extent_head_state_t is_head) {
+ assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+
+ edata_arena_ind_set(edata, arena_ind);
+ edata_addr_set(edata, addr);
+ edata_size_set(edata, size);
+ edata_slab_set(edata, slab);
+ edata_szind_set(edata, szind);
+ edata_sn_set(edata, sn);
+ edata_state_set(edata, state);
+ edata_guarded_set(edata, false);
+ edata_zeroed_set(edata, zeroed);
+ edata_committed_set(edata, committed);
+ edata_pai_set(edata, pai);
+ edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
+ if (config_prof) {
+ edata_prof_tctx_set(edata, NULL);
+ }
+}
+
+static inline void
+edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
+ edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
+ edata_addr_set(edata, addr);
+ edata_bsize_set(edata, bsize);
+ edata_slab_set(edata, false);
+ edata_szind_set(edata, SC_NSIZES);
+ edata_sn_set(edata, sn);
+ edata_state_set(edata, extent_state_active);
+ edata_guarded_set(edata, false);
+ edata_zeroed_set(edata, true);
+ edata_committed_set(edata, true);
+ /*
+ * This isn't strictly true, but base allocated extents never get
+ * deallocated and can't be looked up in the emap, but no sense in
+ * wasting a state bit to encode this fact.
+ */
+ edata_pai_set(edata, EXTENT_PAI_PAC);
+}
+
+static inline int
+edata_esn_comp(const edata_t *a, const edata_t *b) {
+ size_t a_esn = edata_esn_get(a);
+ size_t b_esn = edata_esn_get(b);
+
+ return (a_esn > b_esn) - (a_esn < b_esn);
+}
+
+static inline int
+edata_ead_comp(const edata_t *a, const edata_t *b) {
+ uintptr_t a_eaddr = (uintptr_t)a;
+ uintptr_t b_eaddr = (uintptr_t)b;
+
+ return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
+}
+
+static inline edata_cmp_summary_t
+edata_cmp_summary_get(const edata_t *edata) {
+ return (edata_cmp_summary_t){edata_sn_get(edata),
+ (uintptr_t)edata_addr_get(edata)};
+}
+
+static inline int
+edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
+ int ret;
+ ret = (a.sn > b.sn) - (a.sn < b.sn);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = (a.addr > b.addr) - (a.addr < b.addr);
+ return ret;
+}
+
+static inline int
+edata_snad_comp(const edata_t *a, const edata_t *b) {
+ edata_cmp_summary_t a_cmp = edata_cmp_summary_get(a);
+ edata_cmp_summary_t b_cmp = edata_cmp_summary_get(b);
+
+ return edata_cmp_summary_comp(a_cmp, b_cmp);
+}
+
+static inline int
+edata_esnead_comp(const edata_t *a, const edata_t *b) {
+ int ret;
+
+ ret = edata_esn_comp(a, b);
+ if (ret != 0) {
+ return ret;
+ }
+
+ ret = edata_ead_comp(a, b);
+ return ret;
+}
+
+ph_proto(, edata_avail, edata_t)
+ph_proto(, edata_heap, edata_t)
+
+#endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
new file mode 100644
index 0000000..8b6c0ef
--- /dev/null
+++ b/include/jemalloc/internal/edata_cache.h
@@ -0,0 +1,49 @@
+#ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
+#define JEMALLOC_INTERNAL_EDATA_CACHE_H
+
+#include "jemalloc/internal/base.h"
+
+/* For tests only. */
+#define EDATA_CACHE_FAST_FILL 4
+
+/*
+ * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
+ * the underlying extents they describe). The contents of returned edata_t
+ * objects are garbage and cannot be relied upon.
+ */
+
+typedef struct edata_cache_s edata_cache_t;
+struct edata_cache_s {
+ edata_avail_t avail;
+ atomic_zu_t count;
+ malloc_mutex_t mtx;
+ base_t *base;
+};
+
+bool edata_cache_init(edata_cache_t *edata_cache, base_t *base);
+edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache);
+void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
+
+void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
+void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
+void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
+
+/*
+ * An edata_cache_small is like an edata_cache, but it relies on external
+ * synchronization and avoids first-fit strategies.
+ */
+
+typedef struct edata_cache_fast_s edata_cache_fast_t;
+struct edata_cache_fast_s {
+ edata_list_inactive_t list;
+ edata_cache_t *fallback;
+ bool disabled;
+};
+
+void edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback);
+edata_t *edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs);
+void edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs,
+ edata_t *edata);
+void edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs);
+
+#endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
new file mode 100644
index 0000000..8d9513e
--- /dev/null
+++ b/include/jemalloc/internal/ehooks.h
@@ -0,0 +1,412 @@
+#ifndef JEMALLOC_INTERNAL_EHOOKS_H
+#define JEMALLOC_INTERNAL_EHOOKS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_mmap.h"
+
+/*
+ * This module is the internal interface to the extent hooks (both
+ * user-specified and external). Eventually, this will give us the flexibility
+ * to use multiple different versions of user-visible extent-hook APIs under a
+ * single user interface.
+ *
+ * Current API expansions (not available to anyone but the default hooks yet):
+ * - Head state tracking. Hooks can decide whether or not to merge two
+ * extents based on whether or not one of them is the head (i.e. was
+ * allocated on its own). The later extent loses its "head" status.
+ */
+
+extern const extent_hooks_t ehooks_default_extent_hooks;
+
+typedef struct ehooks_s ehooks_t;
+struct ehooks_s {
+ /*
+ * The user-visible id that goes with the ehooks (i.e. that of the base
+ * they're a part of, the associated arena's index within the arenas
+ * array).
+ */
+ unsigned ind;
+ /* Logically an extent_hooks_t *. */
+ atomic_p_t ptr;
+};
+
+extern const extent_hooks_t ehooks_default_extent_hooks;
+
+/*
+ * These are not really part of the public API. Each hook has a fast-path for
+ * the default-hooks case that can avoid various small inefficiencies:
+ * - Forgetting tsd and then calling tsd_get within the hook.
+ * - Getting more state than necessary out of the extent_t.
+ * - Doing arena_ind -> arena -> arena_ind lookups.
+ * By making the calls to these functions visible to the compiler, it can move
+ * those extra bits of computation down below the fast-paths where they get ignored.
+ */
+void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
+bool ehooks_default_dalloc_impl(void *addr, size_t size);
+void ehooks_default_destroy_impl(void *addr, size_t size);
+bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
+#ifdef PAGES_CAN_PURGE_LAZY
+bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
+#endif
+#ifdef PAGES_CAN_PURGE_FORCED
+bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
+#endif
+bool ehooks_default_split_impl();
+/*
+ * Merge is the only default extent hook we declare -- see the comment in
+ * ehooks_merge.
+ */
+bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a,
+ size_t size_a, void *addr_b, size_t size_b, bool committed,
+ unsigned arena_ind);
+bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b);
+void ehooks_default_zero_impl(void *addr, size_t size);
+void ehooks_default_guard_impl(void *guard1, void *guard2);
+void ehooks_default_unguard_impl(void *guard1, void *guard2);
+
+/*
+ * We don't officially support reentrancy from wtihin the extent hooks. But
+ * various people who sit within throwing distance of the jemalloc team want
+ * that functionality in certain limited cases. The default reentrancy guards
+ * assert that we're not reentrant from a0 (since it's the bootstrap arena,
+ * where reentrant allocations would be redirected), which we would incorrectly
+ * trigger in cases where a0 has extent hooks (those hooks themselves can't be
+ * reentrant, then, but there are reasonable uses for such functionality, like
+ * putting internal metadata on hugepages). Therefore, we use the raw
+ * reentrancy guards.
+ *
+ * Eventually, we need to think more carefully about whether and where we
+ * support allocating from within extent hooks (and what that means for things
+ * like profiling, stats collection, etc.), and document what the guarantee is.
+ */
+static inline void
+ehooks_pre_reentrancy(tsdn_t *tsdn) {
+ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+ tsd_pre_reentrancy_raw(tsd);
+}
+
+static inline void
+ehooks_post_reentrancy(tsdn_t *tsdn) {
+ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+ tsd_post_reentrancy_raw(tsd);
+}
+
+/* Beginning of the public API. */
+void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind);
+
+static inline unsigned
+ehooks_ind_get(const ehooks_t *ehooks) {
+ return ehooks->ind;
+}
+
+static inline void
+ehooks_set_extent_hooks_ptr(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
+ atomic_store_p(&ehooks->ptr, extent_hooks, ATOMIC_RELEASE);
+}
+
+static inline extent_hooks_t *
+ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
+ return (extent_hooks_t *)atomic_load_p(&ehooks->ptr, ATOMIC_ACQUIRE);
+}
+
+static inline bool
+ehooks_are_default(ehooks_t *ehooks) {
+ return ehooks_get_extent_hooks_ptr(ehooks) ==
+ &ehooks_default_extent_hooks;
+}
+
+/*
+ * In some cases, a caller needs to allocate resources before attempting to call
+ * a hook. If that hook is doomed to fail, this is wasteful. We therefore
+ * include some checks for such cases.
+ */
+static inline bool
+ehooks_dalloc_will_fail(ehooks_t *ehooks) {
+ if (ehooks_are_default(ehooks)) {
+ return opt_retain;
+ } else {
+ return ehooks_get_extent_hooks_ptr(ehooks)->dalloc == NULL;
+ }
+}
+
+static inline bool
+ehooks_split_will_fail(ehooks_t *ehooks) {
+ return ehooks_get_extent_hooks_ptr(ehooks)->split == NULL;
+}
+
+static inline bool
+ehooks_merge_will_fail(ehooks_t *ehooks) {
+ return ehooks_get_extent_hooks_ptr(ehooks)->merge == NULL;
+}
+
+static inline bool
+ehooks_guard_will_fail(ehooks_t *ehooks) {
+ /*
+ * Before the guard hooks are officially introduced, limit the use to
+ * the default hooks only.
+ */
+ return !ehooks_are_default(ehooks);
+}
+
+/*
+ * Some hooks are required to return zeroed memory in certain situations. In
+ * debug mode, we do some heuristic checks that they did what they were supposed
+ * to.
+ *
+ * This isn't really ehooks-specific (i.e. anyone can check for zeroed memory).
+ * But incorrect zero information indicates an ehook bug.
+ */
+static inline void
+ehooks_debug_zero_check(void *addr, size_t size) {
+ assert(((uintptr_t)addr & PAGE_MASK) == 0);
+ assert((size & PAGE_MASK) == 0);
+ assert(size > 0);
+ if (config_debug) {
+ /* Check the whole first page. */
+ size_t *p = (size_t *)addr;
+ for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+ assert(p[i] == 0);
+ }
+ /*
+ * And 4 spots within. There's a tradeoff here; the larger
+ * this number, the more likely it is that we'll catch a bug
+ * where ehooks return a sparsely non-zero range. But
+ * increasing the number of checks also increases the number of
+ * page faults in debug mode. FreeBSD does much of their
+ * day-to-day development work in debug mode, so we don't want
+ * even the debug builds to be too slow.
+ */
+ const size_t nchecks = 4;
+ assert(PAGE >= sizeof(size_t) * nchecks);
+ for (size_t i = 0; i < nchecks; ++i) {
+ assert(p[i * (size / sizeof(size_t) / nchecks)] == 0);
+ }
+ }
+}
+
+
+static inline void *
+ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
+ size_t alignment, bool *zero, bool *commit) {
+ bool orig_zero = *zero;
+ void *ret;
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
+ alignment, zero, commit, ehooks_ind_get(ehooks));
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ ret = extent_hooks->alloc(extent_hooks, new_addr, size,
+ alignment, zero, commit, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ }
+ assert(new_addr == NULL || ret == NULL || new_addr == ret);
+ assert(!orig_zero || *zero);
+ if (*zero && ret != NULL) {
+ ehooks_debug_zero_check(ret, size);
+ }
+ return ret;
+}
+
+static inline bool
+ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+ bool committed) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ return ehooks_default_dalloc_impl(addr, size);
+ } else if (extent_hooks->dalloc == NULL) {
+ return true;
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ bool err = extent_hooks->dalloc(extent_hooks, addr, size,
+ committed, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ return err;
+ }
+}
+
+static inline void
+ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+ bool committed) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ ehooks_default_destroy_impl(addr, size);
+ } else if (extent_hooks->destroy == NULL) {
+ /* Do nothing. */
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ extent_hooks->destroy(extent_hooks, addr, size, committed,
+ ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ }
+}
+
+static inline bool
+ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+ size_t offset, size_t length) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ bool err;
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ err = ehooks_default_commit_impl(addr, offset, length);
+ } else if (extent_hooks->commit == NULL) {
+ err = true;
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ err = extent_hooks->commit(extent_hooks, addr, size,
+ offset, length, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ }
+ if (!err) {
+ ehooks_debug_zero_check(addr, size);
+ }
+ return err;
+}
+
+static inline bool
+ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+ size_t offset, size_t length) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ return ehooks_default_decommit_impl(addr, offset, length);
+ } else if (extent_hooks->decommit == NULL) {
+ return true;
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ bool err = extent_hooks->decommit(extent_hooks, addr, size,
+ offset, length, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ return err;
+ }
+}
+
+static inline bool
+ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+ size_t offset, size_t length) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+#ifdef PAGES_CAN_PURGE_LAZY
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ return ehooks_default_purge_lazy_impl(addr, offset, length);
+ }
+#endif
+ if (extent_hooks->purge_lazy == NULL) {
+ return true;
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ bool err = extent_hooks->purge_lazy(extent_hooks, addr, size,
+ offset, length, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ return err;
+ }
+}
+
+static inline bool
+ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+ size_t offset, size_t length) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ /*
+ * It would be correct to have a ehooks_debug_zero_check call at the end
+ * of this function; purge_forced is required to zero. But checking
+ * would touch the page in question, which may have performance
+ * consequences (imagine the hooks are using hugepages, with a global
+ * zero page off). Even in debug mode, it's usually a good idea to
+ * avoid cases that can dramatically increase memory consumption.
+ */
+#ifdef PAGES_CAN_PURGE_FORCED
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ return ehooks_default_purge_forced_impl(addr, offset, length);
+ }
+#endif
+ if (extent_hooks->purge_forced == NULL) {
+ return true;
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ bool err = extent_hooks->purge_forced(extent_hooks, addr, size,
+ offset, length, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ return err;
+ }
+}
+
+static inline bool
+ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+ size_t size_a, size_t size_b, bool committed) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ if (ehooks_are_default(ehooks)) {
+ return ehooks_default_split_impl();
+ } else if (extent_hooks->split == NULL) {
+ return true;
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ bool err = extent_hooks->split(extent_hooks, addr, size, size_a,
+ size_b, committed, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ return err;
+ }
+}
+
+static inline bool
+ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
+ void *addr_b, size_t size_b, bool committed) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ return ehooks_default_merge_impl(tsdn, addr_a, addr_b);
+ } else if (extent_hooks->merge == NULL) {
+ return true;
+ } else {
+ ehooks_pre_reentrancy(tsdn);
+ bool err = extent_hooks->merge(extent_hooks, addr_a, size_a,
+ addr_b, size_b, committed, ehooks_ind_get(ehooks));
+ ehooks_post_reentrancy(tsdn);
+ return err;
+ }
+}
+
+static inline void
+ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ ehooks_default_zero_impl(addr, size);
+ } else {
+ /*
+ * It would be correct to try using the user-provided purge
+ * hooks (since they are required to have zeroed the extent if
+ * they indicate success), but we don't necessarily know their
+ * cost. We'll be conservative and use memset.
+ */
+ memset(addr, 0, size);
+ }
+}
+
+static inline bool
+ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
+ bool err;
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ ehooks_default_guard_impl(guard1, guard2);
+ err = false;
+ } else {
+ err = true;
+ }
+
+ return err;
+}
+
+static inline bool
+ehooks_unguard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
+ bool err;
+ extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+
+ if (extent_hooks == &ehooks_default_extent_hooks) {
+ ehooks_default_unguard_impl(guard1, guard2);
+ err = false;
+ } else {
+ err = true;
+ }
+
+ return err;
+}
+
+#endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
new file mode 100644
index 0000000..847af32
--- /dev/null
+++ b/include/jemalloc/internal/emap.h
@@ -0,0 +1,357 @@
+#ifndef JEMALLOC_INTERNAL_EMAP_H
+#define JEMALLOC_INTERNAL_EMAP_H
+
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/rtree.h"
+
+/*
+ * Note: Ends without at semicolon, so that
+ * EMAP_DECLARE_RTREE_CTX;
+ * in uses will avoid empty-statement warnings.
+ */
+#define EMAP_DECLARE_RTREE_CTX \
+ rtree_ctx_t rtree_ctx_fallback; \
+ rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+
+typedef struct emap_s emap_t;
+struct emap_s {
+ rtree_t rtree;
+};
+
+/* Used to pass rtree lookup context down the path. */
+typedef struct emap_alloc_ctx_t emap_alloc_ctx_t;
+struct emap_alloc_ctx_t {
+ szind_t szind;
+ bool slab;
+};
+
+typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t;
+struct emap_full_alloc_ctx_s {
+ szind_t szind;
+ bool slab;
+ edata_t *edata;
+};
+
+bool emap_init(emap_t *emap, base_t *base, bool zeroed);
+
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+ bool slab);
+
+void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+ extent_state_t state);
+
+/*
+ * The two acquire functions below allow accessing neighbor edatas, if it's safe
+ * and valid to do so (i.e. from the same arena, of the same state, etc.). This
+ * is necessary because the ecache locks are state based, and only protect
+ * edatas with the same state. Therefore the neighbor edata's state needs to be
+ * verified first, before chasing the edata pointer. The returned edata will be
+ * in an acquired state, meaning other threads will be prevented from accessing
+ * it, even if technically the edata can still be discovered from the rtree.
+ *
+ * This means, at any moment when holding pointers to edata, either one of the
+ * state based locks is held (and the edatas are all of the protected state), or
+ * the edatas are in an acquired state (e.g. in active or merging state). The
+ * acquire operation itself (changing the edata to an acquired state) is done
+ * under the state locks.
+ */
+edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
+ edata_t *edata, extent_pai_t pai, extent_state_t expected_state,
+ bool forward);
+edata_t *emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
+ edata_t *edata, extent_pai_t pai, extent_state_t expected_state);
+void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+ extent_state_t new_state);
+
+/*
+ * Associate the given edata with its beginning and end address, setting the
+ * szind and slab info appropriately.
+ * Returns true on error (i.e. resource exhaustion).
+ */
+bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+ szind_t szind, bool slab);
+
+/*
+ * Does the same thing, but with the interior of the range, for slab
+ * allocations.
+ *
+ * You might wonder why we don't just have a single emap_register function that
+ * does both depending on the value of 'slab'. The answer is twofold:
+ * - As a practical matter, in places like the extract->split->commit pathway,
+ * we defer the interior operation until we're sure that the commit won't fail
+ * (but we have to register the split boundaries there).
+ * - In general, we're trying to move to a world where the page-specific
+ * allocator doesn't know as much about how the pages it allocates will be
+ * used, and passing a 'slab' parameter everywhere makes that more
+ * complicated.
+ *
+ * Unlike the boundary version, this function can't fail; this is because slabs
+ * can't get big enough to touch a new page that neither of the boundaries
+ * touched, so no allocation is necessary to fill the interior once the boundary
+ * has been touched.
+ */
+void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+ szind_t szind);
+
+void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+
+typedef struct emap_prepare_s emap_prepare_t;
+struct emap_prepare_s {
+ rtree_leaf_elm_t *lead_elm_a;
+ rtree_leaf_elm_t *lead_elm_b;
+ rtree_leaf_elm_t *trail_elm_a;
+ rtree_leaf_elm_t *trail_elm_b;
+};
+
+/**
+ * These functions the emap metadata management for merging, splitting, and
+ * reusing extents. In particular, they set the boundary mappings from
+ * addresses to edatas. If the result is going to be used as a slab, you
+ * still need to call emap_register_interior on it, though.
+ *
+ * Remap simply changes the szind and slab status of an extent's boundary
+ * mappings. If the extent is not a slab, it doesn't bother with updating the
+ * end mapping (since lookups only occur in the interior of an extent for
+ * slabs). Since the szind and slab status only make sense for active extents,
+ * this should only be called while activating or deactivating an extent.
+ *
+ * Split and merge have a "prepare" and a "commit" portion. The prepare portion
+ * does the operations that can be done without exclusive access to the extent
+ * in question, while the commit variant requires exclusive access to maintain
+ * the emap invariants. The only function that can fail is emap_split_prepare,
+ * and it returns true on failure (at which point the caller shouldn't commit).
+ *
+ * In all cases, "lead" refers to the lower-addressed extent, and trail to the
+ * higher-addressed one. It's the caller's responsibility to set the edata
+ * state appropriately.
+ */
+bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+ edata_t *edata, size_t size_a, edata_t *trail, size_t size_b);
+void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+ edata_t *lead, size_t size_a, edata_t *trail, size_t size_b);
+void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+ edata_t *lead, edata_t *trail);
+void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+ edata_t *lead, edata_t *trail);
+
+/* Assert that the emap's view of the given edata matches the edata's view. */
+void emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+static inline void
+emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+ if (config_debug) {
+ emap_do_assert_mapped(tsdn, emap, edata);
+ }
+}
+
+/* Assert that the given edata isn't in the map. */
+void emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+static inline void
+emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+ if (config_debug) {
+ emap_do_assert_not_mapped(tsdn, emap, edata);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+ assert(config_debug);
+ emap_assert_mapped(tsdn, emap, edata);
+
+ EMAP_DECLARE_RTREE_CTX;
+ rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+ (uintptr_t)edata_base_get(edata));
+
+ return edata_state_in_transition(contents.metadata.state);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+ if (!config_debug) {
+ /* For assertions only. */
+ return false;
+ }
+
+ /*
+ * The edata is considered acquired if no other threads will attempt to
+ * read / write any fields from it. This includes a few cases:
+ *
+ * 1) edata not hooked into emap yet -- This implies the edata just got
+ * allocated or initialized.
+ *
+ * 2) in an active or transition state -- In both cases, the edata can
+ * be discovered from the emap, however the state tracked in the rtree
+ * will prevent other threads from accessing the actual edata.
+ */
+ EMAP_DECLARE_RTREE_CTX;
+ rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+ rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+ /* init_missing */ false);
+ if (elm == NULL) {
+ return true;
+ }
+ rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+ /* dependent */ true);
+ if (contents.edata == NULL ||
+ contents.metadata.state == extent_state_active ||
+ edata_state_in_transition(contents.metadata.state)) {
+ return true;
+ }
+
+ return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
+ assert(edata_arena_ind_get(inner) == edata_arena_ind_get(outer));
+ assert(edata_pai_get(inner) == edata_pai_get(outer));
+ assert(edata_committed_get(inner) == edata_committed_get(outer));
+ assert(edata_state_get(inner) == extent_state_active);
+ assert(edata_state_get(outer) == extent_state_merging);
+ assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
+ assert(edata_base_get(inner) == edata_past_get(outer) ||
+ edata_base_get(outer) == edata_past_get(inner));
+}
+
+JEMALLOC_ALWAYS_INLINE void
+extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
+ assert(edata_arena_ind_get(original) == edata_arena_ind_get(expand));
+ assert(edata_pai_get(original) == edata_pai_get(expand));
+ assert(edata_state_get(original) == extent_state_active);
+ assert(edata_state_get(expand) == extent_state_merging);
+ assert(edata_past_get(original) == edata_base_get(expand));
+}
+
+JEMALLOC_ALWAYS_INLINE edata_t *
+emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
+ EMAP_DECLARE_RTREE_CTX;
+
+ return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
+}
+
+/* Fills in alloc_ctx with the info in the map. */
+JEMALLOC_ALWAYS_INLINE void
+emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+ emap_alloc_ctx_t *alloc_ctx) {
+ EMAP_DECLARE_RTREE_CTX;
+
+ rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
+ rtree_ctx, (uintptr_t)ptr);
+ alloc_ctx->szind = metadata.szind;
+ alloc_ctx->slab = metadata.slab;
+}
+
+/* The pointer must be mapped. */
+JEMALLOC_ALWAYS_INLINE void
+emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+ emap_full_alloc_ctx_t *full_alloc_ctx) {
+ EMAP_DECLARE_RTREE_CTX;
+
+ rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+ (uintptr_t)ptr);
+ full_alloc_ctx->edata = contents.edata;
+ full_alloc_ctx->szind = contents.metadata.szind;
+ full_alloc_ctx->slab = contents.metadata.slab;
+}
+
+/*
+ * The pointer is allowed to not be mapped.
+ *
+ * Returns true when the pointer is not present.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+ emap_full_alloc_ctx_t *full_alloc_ctx) {
+ EMAP_DECLARE_RTREE_CTX;
+
+ rtree_contents_t contents;
+ bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
+ (uintptr_t)ptr, &contents);
+ if (err) {
+ return true;
+ }
+ full_alloc_ctx->edata = contents.edata;
+ full_alloc_ctx->szind = contents.metadata.szind;
+ full_alloc_ctx->slab = contents.metadata.slab;
+ return false;
+}
+
+/*
+ * Only used on the fastpath of free. Returns true when cannot be fulfilled by
+ * fast path, e.g. when the metadata key is not cached.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
+ emap_alloc_ctx_t *alloc_ctx) {
+ /* Use the unsafe getter since this may gets called during exit. */
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
+
+ rtree_metadata_t metadata;
+ bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
+ rtree_ctx, (uintptr_t)ptr, &metadata);
+ if (err) {
+ return true;
+ }
+ alloc_ctx->szind = metadata.szind;
+ alloc_ctx->slab = metadata.slab;
+ return false;
+}
+
+/*
+ * We want to do batch lookups out of the cache bins, which use
+ * cache_bin_ptr_array_get to access the i'th element of the bin (since they
+ * invert usual ordering in deciding what to flush). This lets the emap avoid
+ * caring about its caller's ordering.
+ */
+typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
+/*
+ * This allows size-checking assertions, which we can only do while we're in the
+ * process of edata lookups.
+ */
+typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
+
+typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
+union emap_batch_lookup_result_u {
+ edata_t *edata;
+ rtree_leaf_elm_t *rtree_leaf;
+};
+
+JEMALLOC_ALWAYS_INLINE void
+emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
+ emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
+ emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
+ emap_batch_lookup_result_t *result) {
+ /* Avoids null-checking tsdn in the loop below. */
+ util_assume(tsd != NULL);
+ rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get(tsd);
+
+ for (size_t i = 0; i < nptrs; i++) {
+ const void *ptr = ptr_getter(ptr_getter_ctx, i);
+ /*
+ * Reuse the edatas array as a temp buffer, lying a little about
+ * the types.
+ */
+ result[i].rtree_leaf = rtree_leaf_elm_lookup(tsd_tsdn(tsd),
+ &emap->rtree, rtree_ctx, (uintptr_t)ptr,
+ /* dependent */ true, /* init_missing */ false);
+ }
+
+ for (size_t i = 0; i < nptrs; i++) {
+ rtree_leaf_elm_t *elm = result[i].rtree_leaf;
+ rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
+ &emap->rtree, elm, /* dependent */ true);
+ result[i].edata = contents.edata;
+ emap_full_alloc_ctx_t alloc_ctx;
+ /*
+ * Not all these fields are read in practice by the metadata
+ * visitor. But the compiler can easily optimize away the ones
+ * that aren't, so no sense in being incomplete.
+ */
+ alloc_ctx.szind = contents.metadata.szind;
+ alloc_ctx.slab = contents.metadata.slab;
+ alloc_ctx.edata = contents.edata;
+ metadata_visitor(metadata_visitor_ctx, &alloc_ctx);
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 542bc79..9482f68 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -6,6 +6,7 @@
typedef enum emitter_output_e emitter_output_t;
enum emitter_output_e {
emitter_output_json,
+ emitter_output_json_compact,
emitter_output_table
};
@@ -21,6 +22,7 @@ typedef enum emitter_type_e emitter_type_t;
enum emitter_type_e {
emitter_type_bool,
emitter_type_int,
+ emitter_type_int64,
emitter_type_unsigned,
emitter_type_uint32,
emitter_type_uint64,
@@ -66,7 +68,7 @@ typedef struct emitter_s emitter_t;
struct emitter_s {
emitter_output_t output;
/* The output information. */
- void (*write_cb)(void *, const char *);
+ write_cb_t *write_cb;
void *cbopaque;
int nesting_depth;
/* True if we've already emitted a value at the given depth. */
@@ -75,6 +77,12 @@ struct emitter_s {
bool emitted_key;
};
+static inline bool
+emitter_outputs_json(emitter_t *emitter) {
+ return emitter->output == emitter_output_json ||
+ emitter->output == emitter_output_json_compact;
+}
+
/* Internal convenience function. Write to the emitter the given string. */
JEMALLOC_FORMAT_PRINTF(2, 3)
static inline void
@@ -135,13 +143,16 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
switch (value_type) {
case emitter_type_bool:
- emitter_printf(emitter,
+ emitter_printf(emitter,
emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
*(const bool *)value ? "true" : "false");
break;
case emitter_type_int:
EMIT_SIMPLE(int, "%d")
break;
+ case emitter_type_int64:
+ EMIT_SIMPLE(int64_t, "%" FMTd64)
+ break;
case emitter_type_unsigned:
EMIT_SIMPLE(unsigned, "%u")
break;
@@ -159,7 +170,7 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
* anywhere near the fmt size.
*/
assert(str_written < BUF_SIZE);
- emitter_printf(emitter,
+ emitter_printf(emitter,
emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
break;
case emitter_type_uint32:
@@ -196,6 +207,7 @@ static inline void
emitter_indent(emitter_t *emitter) {
int amount = emitter->nesting_depth;
const char *indent_str;
+ assert(emitter->output != emitter_output_json_compact);
if (emitter->output == emitter_output_json) {
indent_str = "\t";
} else {
@@ -209,12 +221,18 @@ emitter_indent(emitter_t *emitter) {
static inline void
emitter_json_key_prefix(emitter_t *emitter) {
+ assert(emitter_outputs_json(emitter));
if (emitter->emitted_key) {
emitter->emitted_key = false;
return;
}
- emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
- emitter_indent(emitter);
+ if (emitter->item_at_depth) {
+ emitter_printf(emitter, ",");
+ }
+ if (emitter->output != emitter_output_json_compact) {
+ emitter_printf(emitter, "\n");
+ emitter_indent(emitter);
+ }
}
/******************************************************************************/
@@ -222,27 +240,28 @@ emitter_json_key_prefix(emitter_t *emitter) {
static inline void
emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
- void (*write_cb)(void *, const char *), void *cbopaque) {
+ write_cb_t *write_cb, void *cbopaque) {
emitter->output = emitter_output;
emitter->write_cb = write_cb;
emitter->cbopaque = cbopaque;
emitter->item_at_depth = false;
- emitter->emitted_key = false;
+ emitter->emitted_key = false;
emitter->nesting_depth = 0;
}
/******************************************************************************/
/* JSON public API. */
-/*
+/*
* Emits a key (e.g. as appears in an object). The next json entity emitted will
* be the corresponding value.
*/
static inline void
emitter_json_key(emitter_t *emitter, const char *json_key) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
emitter_json_key_prefix(emitter);
- emitter_printf(emitter, "\"%s\": ", json_key);
+ emitter_printf(emitter, "\"%s\":%s", json_key,
+ emitter->output == emitter_output_json_compact ? "" : " ");
emitter->emitted_key = true;
}
}
@@ -250,7 +269,7 @@ emitter_json_key(emitter_t *emitter, const char *json_key) {
static inline void
emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
const void *value) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
emitter_json_key_prefix(emitter);
emitter_print_value(emitter, emitter_justify_none, -1,
value_type, value);
@@ -268,7 +287,7 @@ emitter_json_kv(emitter_t *emitter, const char *json_key,
static inline void
emitter_json_array_begin(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
emitter_json_key_prefix(emitter);
emitter_printf(emitter, "[");
emitter_nest_inc(emitter);
@@ -284,18 +303,20 @@ emitter_json_array_kv_begin(emitter_t *emitter, const char *json_key) {
static inline void
emitter_json_array_end(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
assert(emitter->nesting_depth > 0);
emitter_nest_dec(emitter);
- emitter_printf(emitter, "\n");
- emitter_indent(emitter);
+ if (emitter->output != emitter_output_json_compact) {
+ emitter_printf(emitter, "\n");
+ emitter_indent(emitter);
+ }
emitter_printf(emitter, "]");
}
}
static inline void
emitter_json_object_begin(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
emitter_json_key_prefix(emitter);
emitter_printf(emitter, "{");
emitter_nest_inc(emitter);
@@ -311,11 +332,13 @@ emitter_json_object_kv_begin(emitter_t *emitter, const char *json_key) {
static inline void
emitter_json_object_end(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
assert(emitter->nesting_depth > 0);
emitter_nest_dec(emitter);
- emitter_printf(emitter, "\n");
- emitter_indent(emitter);
+ if (emitter->output != emitter_output_json_compact) {
+ emitter_printf(emitter, "\n");
+ emitter_indent(emitter);
+ }
emitter_printf(emitter, "}");
}
}
@@ -420,7 +443,7 @@ emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
emitter_type_t value_type, const void *value,
const char *table_note_key, emitter_type_t table_note_value_type,
const void *table_note_value) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
emitter_json_key(emitter, json_key);
emitter_json_value(emitter, value_type, value);
} else {
@@ -440,7 +463,7 @@ emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
static inline void
emitter_dict_begin(emitter_t *emitter, const char *json_key,
const char *table_header) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
emitter_json_key(emitter, json_key);
emitter_json_object_begin(emitter);
} else {
@@ -450,7 +473,7 @@ emitter_dict_begin(emitter_t *emitter, const char *json_key,
static inline void
emitter_dict_end(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
emitter_json_object_end(emitter);
} else {
emitter_table_dict_end(emitter);
@@ -459,7 +482,7 @@ emitter_dict_end(emitter_t *emitter) {
static inline void
emitter_begin(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
assert(emitter->nesting_depth == 0);
emitter_printf(emitter, "{");
emitter_nest_inc(emitter);
@@ -476,10 +499,11 @@ emitter_begin(emitter_t *emitter) {
static inline void
emitter_end(emitter_t *emitter) {
- if (emitter->output == emitter_output_json) {
+ if (emitter_outputs_json(emitter)) {
assert(emitter->nesting_depth == 1);
emitter_nest_dec(emitter);
- emitter_printf(emitter, "\n}\n");
+ emitter_printf(emitter, "%s", emitter->output ==
+ emitter_output_json_compact ? "}" : "\n}\n");
}
}
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
new file mode 100644
index 0000000..4f689b4
--- /dev/null
+++ b/include/jemalloc/internal/eset.h
@@ -0,0 +1,77 @@
+#ifndef JEMALLOC_INTERNAL_ESET_H
+#define JEMALLOC_INTERNAL_ESET_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/fb.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/mutex.h"
+
+/*
+ * An eset ("extent set") is a quantized collection of extents, with built-in
+ * LRU queue.
+ *
+ * This class is not thread-safe; synchronization must be done externally if
+ * there are mutating operations. One exception is the stats counters, which
+ * may be read without any locking.
+ */
+
+typedef struct eset_bin_s eset_bin_t;
+struct eset_bin_s {
+ edata_heap_t heap;
+ /*
+ * We do first-fit across multiple size classes. If we compared against
+ * the min element in each heap directly, we'd take a cache miss per
+ * extent we looked at. If we co-locate the edata summaries, we only
+ * take a miss on the edata we're actually going to return (which is
+ * inevitable anyways).
+ */
+ edata_cmp_summary_t heap_min;
+};
+
+typedef struct eset_bin_stats_s eset_bin_stats_t;
+struct eset_bin_stats_s {
+ atomic_zu_t nextents;
+ atomic_zu_t nbytes;
+};
+
+typedef struct eset_s eset_t;
+struct eset_s {
+ /* Bitmap for which set bits correspond to non-empty heaps. */
+ fb_group_t bitmap[FB_NGROUPS(SC_NPSIZES + 1)];
+
+ /* Quantized per size class heaps of extents. */
+ eset_bin_t bins[SC_NPSIZES + 1];
+
+ eset_bin_stats_t bin_stats[SC_NPSIZES + 1];
+
+ /* LRU of all extents in heaps. */
+ edata_list_inactive_t lru;
+
+ /* Page sum for all extents in heaps. */
+ atomic_zu_t npages;
+
+ /*
+ * A duplication of the data in the containing ecache. We use this only
+ * for assertions on the states of the passed-in extents.
+ */
+ extent_state_t state;
+};
+
+void eset_init(eset_t *eset, extent_state_t state);
+
+size_t eset_npages_get(eset_t *eset);
+/* Get the number of extents in the given page size index. */
+size_t eset_nextents_get(eset_t *eset, pszind_t ind);
+/* Get the sum total bytes of the extents in the given page size index. */
+size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
+
+void eset_insert(eset_t *eset, edata_t *edata);
+void eset_remove(eset_t *eset, edata_t *edata);
+/*
+ * Select an extent from this eset of the given size and alignment. Returns
+ * null if no such item could be found.
+ */
+edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment, bool exact_only,
+ unsigned lg_max_fit);
+
+#endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/exp_grow.h b/include/jemalloc/internal/exp_grow.h
new file mode 100644
index 0000000..8566b8a
--- /dev/null
+++ b/include/jemalloc/internal/exp_grow.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_EXP_GROW_H
+#define JEMALLOC_INTERNAL_EXP_GROW_H
+
+typedef struct exp_grow_s exp_grow_t;
+struct exp_grow_s {
+ /*
+ * Next extent size class in a growing series to use when satisfying a
+ * request via the extent hooks (only if opt_retain). This limits the
+ * number of disjoint virtual memory ranges so that extent merging can
+ * be effective even if multiple arenas' extent allocation requests are
+ * highly interleaved.
+ *
+ * retain_grow_limit is the max allowed size ind to expand (unless the
+ * required size is greater). Default is no limit, and controlled
+ * through mallctl only.
+ */
+ pszind_t next;
+ pszind_t limit;
+};
+
+static inline bool
+exp_grow_size_prepare(exp_grow_t *exp_grow, size_t alloc_size_min,
+ size_t *r_alloc_size, pszind_t *r_skip) {
+ *r_skip = 0;
+ *r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
+ while (*r_alloc_size < alloc_size_min) {
+ (*r_skip)++;
+ if (exp_grow->next + *r_skip >=
+ sz_psz2ind(SC_LARGE_MAXCLASS)) {
+ /* Outside legal range. */
+ return true;
+ }
+ *r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
+ }
+ return false;
+}
+
+static inline void
+exp_grow_size_commit(exp_grow_t *exp_grow, pszind_t skip) {
+ if (exp_grow->next + skip + 1 <= exp_grow->limit) {
+ exp_grow->next += skip + 1;
+ } else {
+ exp_grow->next = exp_grow->limit;
+ }
+
+}
+
+void exp_grow_init(exp_grow_t *exp_grow);
+
+#endif /* JEMALLOC_INTERNAL_EXP_GROW_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
new file mode 100644
index 0000000..1d51d41
--- /dev/null
+++ b/include/jemalloc/internal/extent.h
@@ -0,0 +1,137 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_H
+#define JEMALLOC_INTERNAL_EXTENT_H
+
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+
+/*
+ * This module contains the page-level allocator. It chooses the addresses that
+ * allocations requested by other modules will inhabit, and updates the global
+ * metadata to reflect allocation/deallocation/purging decisions.
+ */
+
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+extern size_t opt_lg_extent_max_active_fit;
+
+edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+ bool zero, bool guarded);
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+ bool zero, bool guarded);
+void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ ecache_t *ecache, edata_t *edata);
+edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ ecache_t *ecache, size_t npages_min);
+
+void extent_gdump_add(tsdn_t *tsdn, const edata_t *edata);
+void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+ edata_t *edata);
+void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ edata_t *edata);
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+ bool growing_retained);
+void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ edata_t *edata);
+void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ edata_t *edata);
+bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+ size_t offset, size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+ size_t offset, size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+ size_t offset, size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+ size_t offset, size_t length);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
+ ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
+ bool holding_core_locks);
+bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+ edata_t *a, edata_t *b);
+bool extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+ bool commit, bool zero, bool growing_retained);
+size_t extent_sn_next(pac_t *pac);
+bool extent_boot(void);
+
+JEMALLOC_ALWAYS_INLINE bool
+extent_neighbor_head_state_mergeable(bool edata_is_head,
+ bool neighbor_is_head, bool forward) {
+ /*
+ * Head states checking: disallow merging if the higher addr extent is a
+ * head extent. This helps preserve first-fit, and more importantly
+ * makes sure no merge across arenas.
+ */
+ if (forward) {
+ if (neighbor_is_head) {
+ return false;
+ }
+ } else {
+ if (edata_is_head) {
+ return false;
+ }
+ }
+ return true;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
+ extent_pai_t pai, extent_state_t expected_state, bool forward,
+ bool expanding) {
+ edata_t *neighbor = contents.edata;
+ if (neighbor == NULL) {
+ return false;
+ }
+ /* It's not safe to access *neighbor yet; must verify states first. */
+ bool neighbor_is_head = contents.metadata.is_head;
+ if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
+ neighbor_is_head, forward)) {
+ return false;
+ }
+ extent_state_t neighbor_state = contents.metadata.state;
+ if (pai == EXTENT_PAI_PAC) {
+ if (neighbor_state != expected_state) {
+ return false;
+ }
+ /* From this point, it's safe to access *neighbor. */
+ if (!expanding && (edata_committed_get(edata) !=
+ edata_committed_get(neighbor))) {
+ /*
+ * Some platforms (e.g. Windows) require an explicit
+ * commit step (and writing to uncommitted memory is not
+ * allowed).
+ */
+ return false;
+ }
+ } else {
+ if (neighbor_state == extent_state_active) {
+ return false;
+ }
+ /* From this point, it's safe to access *neighbor. */
+ }
+
+ assert(edata_pai_get(edata) == pai);
+ if (edata_pai_get(neighbor) != pai) {
+ return false;
+ }
+ if (opt_retain) {
+ assert(edata_arena_ind_get(edata) ==
+ edata_arena_ind_get(neighbor));
+ } else {
+ if (edata_arena_ind_get(edata) !=
+ edata_arena_ind_get(neighbor)) {
+ return false;
+ }
+ }
+ assert(!edata_guarded_get(edata) && !edata_guarded_get(neighbor));
+
+ return true;
+}
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
deleted file mode 100644
index 8aba576..0000000
--- a/include/jemalloc/internal/extent_externs.h
+++ /dev/null
@@ -1,83 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
-#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
-
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
-
-extern size_t opt_lg_extent_max_active_fit;
-
-extern rtree_t extents_rtree;
-extern const extent_hooks_t extent_hooks_default;
-extern mutex_pool_t extent_mutex_pool;
-
-extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-
-extent_hooks_t *extent_hooks_get(arena_t *arena);
-extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
- extent_hooks_t *extent_hooks);
-
-#ifdef JEMALLOC_JET
-size_t extent_size_quantize_floor(size_t size);
-size_t extent_size_quantize_ceil(size_t size);
-#endif
-
-ph_proto(, extent_avail_, extent_tree_t, extent_t)
-ph_proto(, extent_heap_, extent_heap_t, extent_t)
-
-bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
- bool delay_coalesce);
-extent_state_t extents_state_get(const extents_t *extents);
-size_t extents_npages_get(extents_t *extents);
-/* Get the number of extents in the given page size index. */
-size_t extents_nextents_get(extents_t *extents, pszind_t ind);
-/* Get the sum total bytes of the extents in the given page size index. */
-size_t extents_nbytes_get(extents_t *extents, pszind_t ind);
-extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
- size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
- bool *zero, bool *commit);
-void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
-extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
-void extents_prefork(tsdn_t *tsdn, extents_t *extents);
-void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
-void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
-extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
- size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent);
-void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent);
-bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
- size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
- size_t length);
-bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
- size_t length);
-bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
- size_t length);
-extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
- szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
- extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
-
-bool extent_boot(void);
-
-void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
- size_t *nfree, size_t *nregs, size_t *size);
-void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
- size_t *nfree, size_t *nregs, size_t *size,
- size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
deleted file mode 100644
index 77fa4c4..0000000
--- a/include/jemalloc/internal/extent_inlines.h
+++ /dev/null
@@ -1,501 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
-#define JEMALLOC_INTERNAL_EXTENT_INLINES_H
-
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/sz.h"
-
-static inline void
-extent_lock(tsdn_t *tsdn, extent_t *extent) {
- assert(extent != NULL);
- mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
-}
-
-static inline void
-extent_unlock(tsdn_t *tsdn, extent_t *extent) {
- assert(extent != NULL);
- mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
-}
-
-static inline void
-extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
- assert(extent1 != NULL && extent2 != NULL);
- mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
- (uintptr_t)extent2);
-}
-
-static inline void
-extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
- assert(extent1 != NULL && extent2 != NULL);
- mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
- (uintptr_t)extent2);
-}
-
-static inline unsigned
-extent_arena_ind_get(const extent_t *extent) {
- unsigned arena_ind = (unsigned)((extent->e_bits &
- EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
- assert(arena_ind < MALLOCX_ARENA_LIMIT);
-
- return arena_ind;
-}
-
-static inline arena_t *
-extent_arena_get(const extent_t *extent) {
- unsigned arena_ind = extent_arena_ind_get(extent);
-
- return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
-}
-
-static inline szind_t
-extent_szind_get_maybe_invalid(const extent_t *extent) {
- szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
- EXTENT_BITS_SZIND_SHIFT);
- assert(szind <= SC_NSIZES);
- return szind;
-}
-
-static inline szind_t
-extent_szind_get(const extent_t *extent) {
- szind_t szind = extent_szind_get_maybe_invalid(extent);
- assert(szind < SC_NSIZES); /* Never call when "invalid". */
- return szind;
-}
-
-static inline size_t
-extent_usize_get(const extent_t *extent) {
- return sz_index2size(extent_szind_get(extent));
-}
-
-static inline unsigned
-extent_binshard_get(const extent_t *extent) {
- unsigned binshard = (unsigned)((extent->e_bits &
- EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
- assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
- return binshard;
-}
-
-static inline size_t
-extent_sn_get(const extent_t *extent) {
- return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
- EXTENT_BITS_SN_SHIFT);
-}
-
-static inline extent_state_t
-extent_state_get(const extent_t *extent) {
- return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
- EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline bool
-extent_zeroed_get(const extent_t *extent) {
- return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
- EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline bool
-extent_committed_get(const extent_t *extent) {
- return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
- EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline bool
-extent_dumpable_get(const extent_t *extent) {
- return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
- EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline bool
-extent_slab_get(const extent_t *extent) {
- return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
- EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline unsigned
-extent_nfree_get(const extent_t *extent) {
- assert(extent_slab_get(extent));
- return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
- EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void *
-extent_base_get(const extent_t *extent) {
- assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
- !extent_slab_get(extent));
- return PAGE_ADDR2BASE(extent->e_addr);
-}
-
-static inline void *
-extent_addr_get(const extent_t *extent) {
- assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
- !extent_slab_get(extent));
- return extent->e_addr;
-}
-
-static inline size_t
-extent_size_get(const extent_t *extent) {
- return (extent->e_size_esn & EXTENT_SIZE_MASK);
-}
-
-static inline size_t
-extent_esn_get(const extent_t *extent) {
- return (extent->e_size_esn & EXTENT_ESN_MASK);
-}
-
-static inline size_t
-extent_bsize_get(const extent_t *extent) {
- return extent->e_bsize;
-}
-
-static inline void *
-extent_before_get(const extent_t *extent) {
- return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
-}
-
-static inline void *
-extent_last_get(const extent_t *extent) {
- return (void *)((uintptr_t)extent_base_get(extent) +
- extent_size_get(extent) - PAGE);
-}
-
-static inline void *
-extent_past_get(const extent_t *extent) {
- return (void *)((uintptr_t)extent_base_get(extent) +
- extent_size_get(extent));
-}
-
-static inline arena_slab_data_t *
-extent_slab_data_get(extent_t *extent) {
- assert(extent_slab_get(extent));
- return &extent->e_slab_data;
-}
-
-static inline const arena_slab_data_t *
-extent_slab_data_get_const(const extent_t *extent) {
- assert(extent_slab_get(extent));
- return &extent->e_slab_data;
-}
-
-static inline prof_tctx_t *
-extent_prof_tctx_get(const extent_t *extent) {
- return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
- ATOMIC_ACQUIRE);
-}
-
-static inline nstime_t
-extent_prof_alloc_time_get(const extent_t *extent) {
- return extent->e_alloc_time;
-}
-
-static inline void
-extent_arena_set(extent_t *extent, arena_t *arena) {
- unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
- MALLOCX_ARENA_BITS) - 1);
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
- ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
-}
-
-static inline void
-extent_binshard_set(extent_t *extent, unsigned binshard) {
- /* The assertion assumes szind is set already. */
- assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
- ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
-}
-
-static inline void
-extent_addr_set(extent_t *extent, void *addr) {
- extent->e_addr = addr;
-}
-
-static inline void
-extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
- assert(extent_base_get(extent) == extent_addr_get(extent));
-
- if (alignment < PAGE) {
- unsigned lg_range = LG_PAGE -
- lg_floor(CACHELINE_CEILING(alignment));
- size_t r;
- if (!tsdn_null(tsdn)) {
- tsd_t *tsd = tsdn_tsd(tsdn);
- r = (size_t)prng_lg_range_u64(
- tsd_offset_statep_get(tsd), lg_range);
- } else {
- r = prng_lg_range_zu(
- &extent_arena_get(extent)->offset_state,
- lg_range, true);
- }
- uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
- lg_range);
- extent->e_addr = (void *)((uintptr_t)extent->e_addr +
- random_offset);
- assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
- extent->e_addr);
- }
-}
-
-static inline void
-extent_size_set(extent_t *extent, size_t size) {
- assert((size & ~EXTENT_SIZE_MASK) == 0);
- extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
-}
-
-static inline void
-extent_esn_set(extent_t *extent, size_t esn) {
- extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
- EXTENT_ESN_MASK);
-}
-
-static inline void
-extent_bsize_set(extent_t *extent, size_t bsize) {
- extent->e_bsize = bsize;
-}
-
-static inline void
-extent_szind_set(extent_t *extent, szind_t szind) {
- assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
- ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
-}
-
-static inline void
-extent_nfree_set(extent_t *extent, unsigned nfree) {
- assert(extent_slab_get(extent));
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
- ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
- /* The assertion assumes szind is set already. */
- assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
- extent->e_bits = (extent->e_bits &
- (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
- ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
- ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_inc(extent_t *extent) {
- assert(extent_slab_get(extent));
- extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_dec(extent_t *extent) {
- assert(extent_slab_get(extent));
- extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_sub(extent_t *extent, uint64_t n) {
- assert(extent_slab_get(extent));
- extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_sn_set(extent_t *extent, size_t sn) {
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
- ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
-}
-
-static inline void
-extent_state_set(extent_t *extent, extent_state_t state) {
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
- ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline void
-extent_zeroed_set(extent_t *extent, bool zeroed) {
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
- ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline void
-extent_committed_set(extent_t *extent, bool committed) {
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
- ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline void
-extent_dumpable_set(extent_t *extent, bool dumpable) {
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
- ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline void
-extent_slab_set(extent_t *extent, bool slab) {
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
- ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline void
-extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
- atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
-}
-
-static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
- nstime_copy(&extent->e_alloc_time, &t);
-}
-
-static inline bool
-extent_is_head_get(extent_t *extent) {
- if (maps_coalesce) {
- not_reached();
- }
-
- return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
- EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_is_head_set(extent_t *extent, bool is_head) {
- if (maps_coalesce) {
- not_reached();
- }
-
- extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
- ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
- bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
- bool committed, bool dumpable, extent_head_state_t is_head) {
- assert(addr == PAGE_ADDR2BASE(addr) || !slab);
-
- extent_arena_set(extent, arena);
- extent_addr_set(extent, addr);
- extent_size_set(extent, size);
- extent_slab_set(extent, slab);
- extent_szind_set(extent, szind);
- extent_sn_set(extent, sn);
- extent_state_set(extent, state);
- extent_zeroed_set(extent, zeroed);
- extent_committed_set(extent, committed);
- extent_dumpable_set(extent, dumpable);
- ql_elm_new(extent, ql_link);
- if (!maps_coalesce) {
- extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
- false);
- }
- if (config_prof) {
- extent_prof_tctx_set(extent, NULL);
- }
-}
-
-static inline void
-extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
- extent_arena_set(extent, NULL);
- extent_addr_set(extent, addr);
- extent_bsize_set(extent, bsize);
- extent_slab_set(extent, false);
- extent_szind_set(extent, SC_NSIZES);
- extent_sn_set(extent, sn);
- extent_state_set(extent, extent_state_active);
- extent_zeroed_set(extent, true);
- extent_committed_set(extent, true);
- extent_dumpable_set(extent, true);
-}
-
-static inline void
-extent_list_init(extent_list_t *list) {
- ql_new(list);
-}
-
-static inline extent_t *
-extent_list_first(const extent_list_t *list) {
- return ql_first(list);
-}
-
-static inline extent_t *
-extent_list_last(const extent_list_t *list) {
- return ql_last(list, ql_link);
-}
-
-static inline void
-extent_list_append(extent_list_t *list, extent_t *extent) {
- ql_tail_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_prepend(extent_list_t *list, extent_t *extent) {
- ql_head_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_replace(extent_list_t *list, extent_t *to_remove,
- extent_t *to_insert) {
- ql_after_insert(to_remove, to_insert, ql_link);
- ql_remove(list, to_remove, ql_link);
-}
-
-static inline void
-extent_list_remove(extent_list_t *list, extent_t *extent) {
- ql_remove(list, extent, ql_link);
-}
-
-static inline int
-extent_sn_comp(const extent_t *a, const extent_t *b) {
- size_t a_sn = extent_sn_get(a);
- size_t b_sn = extent_sn_get(b);
-
- return (a_sn > b_sn) - (a_sn < b_sn);
-}
-
-static inline int
-extent_esn_comp(const extent_t *a, const extent_t *b) {
- size_t a_esn = extent_esn_get(a);
- size_t b_esn = extent_esn_get(b);
-
- return (a_esn > b_esn) - (a_esn < b_esn);
-}
-
-static inline int
-extent_ad_comp(const extent_t *a, const extent_t *b) {
- uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
- uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
-
- return (a_addr > b_addr) - (a_addr < b_addr);
-}
-
-static inline int
-extent_ead_comp(const extent_t *a, const extent_t *b) {
- uintptr_t a_eaddr = (uintptr_t)a;
- uintptr_t b_eaddr = (uintptr_t)b;
-
- return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
-}
-
-static inline int
-extent_snad_comp(const extent_t *a, const extent_t *b) {
- int ret;
-
- ret = extent_sn_comp(a, b);
- if (ret != 0) {
- return ret;
- }
-
- ret = extent_ad_comp(a, b);
- return ret;
-}
-
-static inline int
-extent_esnead_comp(const extent_t *a, const extent_t *b) {
- int ret;
-
- ret = extent_esn_comp(a, b);
- if (ret != 0) {
- return ret;
- }
-
- ret = extent_ead_comp(a, b);
- return ret;
-}
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
deleted file mode 100644
index 767cd89..0000000
--- a/include/jemalloc/internal/extent_structs.h
+++ /dev/null
@@ -1,256 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
-#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
-
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bit_util.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/sc.h"
-
-typedef enum {
- extent_state_active = 0,
- extent_state_dirty = 1,
- extent_state_muzzy = 2,
- extent_state_retained = 3
-} extent_state_t;
-
-/* Extent (span of pages). Use accessor functions for e_* fields. */
-struct extent_s {
- /*
- * Bitfield containing several fields:
- *
- * a: arena_ind
- * b: slab
- * c: committed
- * d: dumpable
- * z: zeroed
- * t: state
- * i: szind
- * f: nfree
- * s: bin_shard
- * n: sn
- *
- * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
- *
- * arena_ind: Arena from which this extent came, or all 1 bits if
- * unassociated.
- *
- * slab: The slab flag indicates whether the extent is used for a slab
- * of small regions. This helps differentiate small size classes,
- * and it indicates whether interior pointers can be looked up via
- * iealloc().
- *
- * committed: The committed flag indicates whether physical memory is
- * committed to the extent, whether explicitly or implicitly
- * as on a system that overcommits and satisfies physical
- * memory needs on demand via soft page faults.
- *
- * dumpable: The dumpable flag indicates whether or not we've set the
- * memory in question to be dumpable. Note that this
- * interacts somewhat subtly with user-specified extent hooks,
- * since we don't know if *they* are fiddling with
- * dumpability (in which case, we don't want to undo whatever
- * they're doing). To deal with this scenario, we:
- * - Make dumpable false only for memory allocated with the
- * default hooks.
- * - Only allow memory to go from non-dumpable to dumpable,
- * and only once.
- * - Never make the OS call to allow dumping when the
- * dumpable bit is already set.
- * These three constraints mean that we will never
- * accidentally dump user memory that the user meant to set
- * nondumpable with their extent hooks.
- *
- *
- * zeroed: The zeroed flag is used by extent recycling code to track
- * whether memory is zero-filled.
- *
- * state: The state flag is an extent_state_t.
- *
- * szind: The szind flag indicates usable size class index for
- * allocations residing in this extent, regardless of whether the
- * extent is a slab. Extent size and usable size often differ
- * even for non-slabs, either due to sz_large_pad or promotion of
- * sampled small regions.
- *
- * nfree: Number of free regions in slab.
- *
- * bin_shard: the shard of the bin from which this extent came.
- *
- * sn: Serial number (potentially non-unique).
- *
- * Serial numbers may wrap around if !opt_retain, but as long as
- * comparison functions fall back on address comparison for equal
- * serial numbers, stable (if imperfect) ordering is maintained.
- *
- * Serial numbers may not be unique even in the absence of
- * wrap-around, e.g. when splitting an extent and assigning the same
- * serial number to both resulting adjacent extents.
- */
- uint64_t e_bits;
-#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
-
-#define EXTENT_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
-#define EXTENT_BITS_ARENA_SHIFT 0
-#define EXTENT_BITS_ARENA_MASK MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
-
-#define EXTENT_BITS_SLAB_WIDTH 1
-#define EXTENT_BITS_SLAB_SHIFT (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
-#define EXTENT_BITS_SLAB_MASK MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
-
-#define EXTENT_BITS_COMMITTED_WIDTH 1
-#define EXTENT_BITS_COMMITTED_SHIFT (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
-#define EXTENT_BITS_COMMITTED_MASK MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
-
-#define EXTENT_BITS_DUMPABLE_WIDTH 1
-#define EXTENT_BITS_DUMPABLE_SHIFT (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
-#define EXTENT_BITS_DUMPABLE_MASK MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
-
-#define EXTENT_BITS_ZEROED_WIDTH 1
-#define EXTENT_BITS_ZEROED_SHIFT (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
-#define EXTENT_BITS_ZEROED_MASK MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
-
-#define EXTENT_BITS_STATE_WIDTH 2
-#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
-#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
-
-#define EXTENT_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES)
-#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
-
-#define EXTENT_BITS_NFREE_WIDTH (LG_SLAB_MAXREGS + 1)
-#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
-#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
-
-#define EXTENT_BITS_BINSHARD_WIDTH 6
-#define EXTENT_BITS_BINSHARD_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
-#define EXTENT_BITS_BINSHARD_MASK MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
-
-#define EXTENT_BITS_IS_HEAD_WIDTH 1
-#define EXTENT_BITS_IS_HEAD_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
-#define EXTENT_BITS_IS_HEAD_MASK MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
-
-#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
-#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
-
- /* Pointer to the extent that this structure is responsible for. */
- void *e_addr;
-
- union {
- /*
- * Extent size and serial number associated with the extent
- * structure (different than the serial number for the extent at
- * e_addr).
- *
- * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
- */
- size_t e_size_esn;
- #define EXTENT_SIZE_MASK ((size_t)~(PAGE-1))
- #define EXTENT_ESN_MASK ((size_t)PAGE-1)
- /* Base extent size, which may not be a multiple of PAGE. */
- size_t e_bsize;
- };
-
- /*
- * List linkage, used by a variety of lists:
- * - bin_t's slabs_full
- * - extents_t's LRU
- * - stashed dirty extents
- * - arena's large allocations
- */
- ql_elm(extent_t) ql_link;
-
- /*
- * Linkage for per size class sn/address-ordered heaps, and
- * for extent_avail
- */
- phn(extent_t) ph_link;
-
- union {
- /* Small region slab metadata. */
- arena_slab_data_t e_slab_data;
-
- /* Profiling data, used for large objects. */
- struct {
- /* Time when this was allocated. */
- nstime_t e_alloc_time;
- /* Points to a prof_tctx_t. */
- atomic_p_t e_prof_tctx;
- };
- };
-};
-typedef ql_head(extent_t) extent_list_t;
-typedef ph(extent_t) extent_tree_t;
-typedef ph(extent_t) extent_heap_t;
-
-/* Quantized collection of extents, with built-in LRU queue. */
-struct extents_s {
- malloc_mutex_t mtx;
-
- /*
- * Quantized per size class heaps of extents.
- *
- * Synchronization: mtx.
- */
- extent_heap_t heaps[SC_NPSIZES + 1];
- atomic_zu_t nextents[SC_NPSIZES + 1];
- atomic_zu_t nbytes[SC_NPSIZES + 1];
-
- /*
- * Bitmap for which set bits correspond to non-empty heaps.
- *
- * Synchronization: mtx.
- */
- bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
-
- /*
- * LRU of all extents in heaps.
- *
- * Synchronization: mtx.
- */
- extent_list_t lru;
-
- /*
- * Page sum for all extents in heaps.
- *
- * The synchronization here is a little tricky. Modifications to npages
- * must hold mtx, but reads need not (though, a reader who sees npages
- * without holding the mutex can't assume anything about the rest of the
- * state of the extents_t).
- */
- atomic_zu_t npages;
-
- /* All stored extents must be in the same state. */
- extent_state_t state;
-
- /*
- * If true, delay coalescing until eviction; otherwise coalesce during
- * deallocation.
- */
- bool delay_coalesce;
-};
-
-/*
- * The following two structs are for experimental purposes. See
- * experimental_utilization_query_ctl and
- * experimental_utilization_batch_query_ctl in src/ctl.c.
- */
-
-struct extent_util_stats_s {
- size_t nfree;
- size_t nregs;
- size_t size;
-};
-
-struct extent_util_stats_verbose_s {
- void *slabcur_addr;
- size_t nfree;
- size_t nregs;
- size_t size;
- size_t bin_nfree;
- size_t bin_nregs;
-};
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
deleted file mode 100644
index 96925cf..0000000
--- a/include/jemalloc/internal/extent_types.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
-#define JEMALLOC_INTERNAL_EXTENT_TYPES_H
-
-typedef struct extent_s extent_t;
-typedef struct extents_s extents_t;
-
-typedef struct extent_util_stats_s extent_util_stats_t;
-typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
-
-#define EXTENT_HOOKS_INITIALIZER NULL
-
-/*
- * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
- * is the max ratio between the size of the active extent and the new extent.
- */
-#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
-
-typedef enum {
- EXTENT_NOT_HEAD,
- EXTENT_IS_HEAD /* Only relevant for Windows && opt.retain. */
-} extent_head_state_t;
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/fb.h b/include/jemalloc/internal/fb.h
new file mode 100644
index 0000000..90c4091
--- /dev/null
+++ b/include/jemalloc/internal/fb.h
@@ -0,0 +1,373 @@
+#ifndef JEMALLOC_INTERNAL_FB_H
+#define JEMALLOC_INTERNAL_FB_H
+
+/*
+ * The flat bitmap module. This has a larger API relative to the bitmap module
+ * (supporting things like backwards searches, and searching for both set and
+ * unset bits), at the cost of slower operations for very large bitmaps.
+ *
+ * Initialized flat bitmaps start at all-zeros (all bits unset).
+ */
+
+typedef unsigned long fb_group_t;
+#define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
+#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
+ + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
+
+static inline void
+fb_init(fb_group_t *fb, size_t nbits) {
+ size_t ngroups = FB_NGROUPS(nbits);
+ memset(fb, 0, ngroups * sizeof(fb_group_t));
+}
+
+static inline bool
+fb_empty(fb_group_t *fb, size_t nbits) {
+ size_t ngroups = FB_NGROUPS(nbits);
+ for (size_t i = 0; i < ngroups; i++) {
+ if (fb[i] != 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+static inline bool
+fb_full(fb_group_t *fb, size_t nbits) {
+ size_t ngroups = FB_NGROUPS(nbits);
+ size_t trailing_bits = nbits % FB_GROUP_BITS;
+ size_t limit = (trailing_bits == 0 ? ngroups : ngroups - 1);
+ for (size_t i = 0; i < limit; i++) {
+ if (fb[i] != ~(fb_group_t)0) {
+ return false;
+ }
+ }
+ if (trailing_bits == 0) {
+ return true;
+ }
+ return fb[ngroups - 1] == ((fb_group_t)1 << trailing_bits) - 1;
+}
+
+static inline bool
+fb_get(fb_group_t *fb, size_t nbits, size_t bit) {
+ assert(bit < nbits);
+ size_t group_ind = bit / FB_GROUP_BITS;
+ size_t bit_ind = bit % FB_GROUP_BITS;
+ return (bool)(fb[group_ind] & ((fb_group_t)1 << bit_ind));
+}
+
+static inline void
+fb_set(fb_group_t *fb, size_t nbits, size_t bit) {
+ assert(bit < nbits);
+ size_t group_ind = bit / FB_GROUP_BITS;
+ size_t bit_ind = bit % FB_GROUP_BITS;
+ fb[group_ind] |= ((fb_group_t)1 << bit_ind);
+}
+
+static inline void
+fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
+ assert(bit < nbits);
+ size_t group_ind = bit / FB_GROUP_BITS;
+ size_t bit_ind = bit % FB_GROUP_BITS;
+ fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
+}
+
+
+/*
+ * Some implementation details. This visitation function lets us apply a group
+ * visitor to each group in the bitmap (potentially modifying it). The mask
+ * indicates which bits are logically part of the visitation.
+ */
+typedef void (*fb_group_visitor_t)(void *ctx, fb_group_t *fb, fb_group_t mask);
+JEMALLOC_ALWAYS_INLINE void
+fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
+ size_t start, size_t cnt) {
+ assert(cnt > 0);
+ assert(start + cnt <= nbits);
+ size_t group_ind = start / FB_GROUP_BITS;
+ size_t start_bit_ind = start % FB_GROUP_BITS;
+ /*
+ * The first group is special; it's the only one we don't start writing
+ * to from bit 0.
+ */
+ size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
+ ? FB_GROUP_BITS - start_bit_ind : cnt);
+ /*
+ * We can basically split affected words into:
+ * - The first group, where we touch only the high bits
+ * - The last group, where we touch only the low bits
+ * - The middle, where we set all the bits to the same thing.
+ * We treat each case individually. The last two could be merged, but
+ * this can lead to bad codegen for those middle words.
+ */
+ /* First group */
+ fb_group_t mask = ((~(fb_group_t)0)
+ >> (FB_GROUP_BITS - first_group_cnt))
+ << start_bit_ind;
+ visit(ctx, &fb[group_ind], mask);
+
+ cnt -= first_group_cnt;
+ group_ind++;
+ /* Middle groups */
+ while (cnt > FB_GROUP_BITS) {
+ visit(ctx, &fb[group_ind], ~(fb_group_t)0);
+ cnt -= FB_GROUP_BITS;
+ group_ind++;
+ }
+ /* Last group */
+ if (cnt != 0) {
+ mask = (~(fb_group_t)0) >> (FB_GROUP_BITS - cnt);
+ visit(ctx, &fb[group_ind], mask);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+ bool val = *(bool *)ctx;
+ if (val) {
+ *fb |= mask;
+ } else {
+ *fb &= ~mask;
+ }
+}
+
+/* Sets the cnt bits starting at position start. Must not have a 0 count. */
+static inline void
+fb_set_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+ bool val = true;
+ fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
+}
+
+/* Unsets the cnt bits starting at position start. Must not have a 0 count. */
+static inline void
+fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+ bool val = false;
+ fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_scount_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+ size_t *scount = (size_t *)ctx;
+ *scount += popcount_lu(*fb & mask);
+}
+
+/* Finds the number of set bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_scount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+ size_t scount = 0;
+ fb_visit_impl(fb, nbits, &fb_scount_visitor, &scount, start, cnt);
+ return scount;
+}
+
+/* Finds the number of unset bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+ size_t scount = fb_scount(fb, nbits, start, cnt);
+ return cnt - scount;
+}
+
+/*
+ * An implementation detail; find the first bit at position >= min_bit with the
+ * value val.
+ *
+ * Returns the number of bits in the bitmap if no such bit exists.
+ */
+JEMALLOC_ALWAYS_INLINE ssize_t
+fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
+ bool forward) {
+ assert(start < nbits);
+ size_t ngroups = FB_NGROUPS(nbits);
+ ssize_t group_ind = start / FB_GROUP_BITS;
+ size_t bit_ind = start % FB_GROUP_BITS;
+
+ fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
+
+ fb_group_t group = fb[group_ind];
+ group ^= maybe_invert;
+ if (forward) {
+ /* Only keep ones in bits bit_ind and above. */
+ group &= ~((1LU << bit_ind) - 1);
+ } else {
+ /*
+ * Only keep ones in bits bit_ind and below. You might more
+ * naturally express this as (1 << (bit_ind + 1)) - 1, but
+ * that shifts by an invalid amount if bit_ind is one less than
+ * FB_GROUP_BITS.
+ */
+ group &= ((2LU << bit_ind) - 1);
+ }
+ ssize_t group_ind_bound = forward ? (ssize_t)ngroups : -1;
+ while (group == 0) {
+ group_ind += forward ? 1 : -1;
+ if (group_ind == group_ind_bound) {
+ return forward ? (ssize_t)nbits : (ssize_t)-1;
+ }
+ group = fb[group_ind];
+ group ^= maybe_invert;
+ }
+ assert(group != 0);
+ size_t bit = forward ? ffs_lu(group) : fls_lu(group);
+ size_t pos = group_ind * FB_GROUP_BITS + bit;
+ /*
+ * The high bits of a partially filled last group are zeros, so if we're
+ * looking for zeros we don't want to report an invalid result.
+ */
+ if (forward && !val && pos > nbits) {
+ return nbits;
+ }
+ return pos;
+}
+
+/*
+ * Find the first set bit in the bitmap with an index >= min_bit. Returns the
+ * number of bits in the bitmap if no such bit exists.
+ */
+static inline size_t
+fb_ffu(fb_group_t *fb, size_t nbits, size_t min_bit) {
+ return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ false,
+ /* forward */ true);
+}
+
+/* The same, but looks for an unset bit. */
+static inline size_t
+fb_ffs(fb_group_t *fb, size_t nbits, size_t min_bit) {
+ return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ true,
+ /* forward */ true);
+}
+
+/*
+ * Find the last set bit in the bitmap with an index <= max_bit. Returns -1 if
+ * no such bit exists.
+ */
+static inline ssize_t
+fb_flu(fb_group_t *fb, size_t nbits, size_t max_bit) {
+ return fb_find_impl(fb, nbits, max_bit, /* val */ false,
+ /* forward */ false);
+}
+
+static inline ssize_t
+fb_fls(fb_group_t *fb, size_t nbits, size_t max_bit) {
+ return fb_find_impl(fb, nbits, max_bit, /* val */ true,
+ /* forward */ false);
+}
+
+/* Returns whether or not we found a range. */
+JEMALLOC_ALWAYS_INLINE bool
+fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+ size_t *r_len, bool val, bool forward) {
+ assert(start < nbits);
+ ssize_t next_range_begin = fb_find_impl(fb, nbits, start, val, forward);
+ if ((forward && next_range_begin == (ssize_t)nbits)
+ || (!forward && next_range_begin == (ssize_t)-1)) {
+ return false;
+ }
+ /* Half open range; the set bits are [begin, end). */
+ ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
+ forward);
+ if (forward) {
+ *r_begin = next_range_begin;
+ *r_len = next_range_end - next_range_begin;
+ } else {
+ *r_begin = next_range_end + 1;
+ *r_len = next_range_begin - next_range_end;
+ }
+ return true;
+}
+
+/*
+ * Used to iterate through ranges of set bits.
+ *
+ * Tries to find the next contiguous sequence of set bits with a first index >=
+ * start. If one exists, puts the earliest bit of the range in *r_begin, its
+ * length in *r_len, and returns true. Otherwise, returns false (without
+ * touching *r_begin or *r_end).
+ */
+static inline bool
+fb_srange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+ size_t *r_len) {
+ return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+ /* val */ true, /* forward */ true);
+}
+
+/*
+ * The same as fb_srange_iter, but searches backwards from start rather than
+ * forwards. (The position returned is still the earliest bit in the range).
+ */
+static inline bool
+fb_srange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+ size_t *r_len) {
+ return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+ /* val */ true, /* forward */ false);
+}
+
+/* Similar to fb_srange_iter, but searches for unset bits. */
+static inline bool
+fb_urange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+ size_t *r_len) {
+ return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+ /* val */ false, /* forward */ true);
+}
+
+/* Similar to fb_srange_riter, but searches for unset bits. */
+static inline bool
+fb_urange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+ size_t *r_len) {
+ return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+ /* val */ false, /* forward */ false);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
+ size_t begin = 0;
+ size_t longest_len = 0;
+ size_t len = 0;
+ while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
+ &len, val, /* forward */ true)) {
+ if (len > longest_len) {
+ longest_len = len;
+ }
+ begin += len;
+ }
+ return longest_len;
+}
+
+static inline size_t
+fb_srange_longest(fb_group_t *fb, size_t nbits) {
+ return fb_range_longest_impl(fb, nbits, /* val */ true);
+}
+
+static inline size_t
+fb_urange_longest(fb_group_t *fb, size_t nbits) {
+ return fb_range_longest_impl(fb, nbits, /* val */ false);
+}
+
+/*
+ * Initializes each bit of dst with the bitwise-AND of the corresponding bits of
+ * src1 and src2. All bitmaps must be the same size.
+ */
+static inline void
+fb_bit_and(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+ size_t ngroups = FB_NGROUPS(nbits);
+ for (size_t i = 0; i < ngroups; i++) {
+ dst[i] = src1[i] & src2[i];
+ }
+}
+
+/* Like fb_bit_and, but with bitwise-OR. */
+static inline void
+fb_bit_or(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+ size_t ngroups = FB_NGROUPS(nbits);
+ for (size_t i = 0; i < ngroups; i++) {
+ dst[i] = src1[i] | src2[i];
+ }
+}
+
+/* Initializes dst bit i to the negation of source bit i. */
+static inline void
+fb_bit_not(fb_group_t *dst, fb_group_t *src, size_t nbits) {
+ size_t ngroups = FB_NGROUPS(nbits);
+ for (size_t i = 0; i < ngroups; i++) {
+ dst[i] = ~src[i];
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
new file mode 100644
index 0000000..415a982
--- /dev/null
+++ b/include/jemalloc/internal/fxp.h
@@ -0,0 +1,126 @@
+#ifndef JEMALLOC_INTERNAL_FXP_H
+#define JEMALLOC_INTERNAL_FXP_H
+
+/*
+ * A simple fixed-point math implementation, supporting only unsigned values
+ * (with overflow being an error).
+ *
+ * It's not in general safe to use floating point in core code, because various
+ * libc implementations we get linked against can assume that malloc won't touch
+ * floating point state and call it with an unusual calling convention.
+ */
+
+/*
+ * High 16 bits are the integer part, low 16 are the fractional part. Or
+ * equivalently, repr == 2**16 * val, where we use "val" to refer to the
+ * (imaginary) fractional representation of the true value.
+ *
+ * We pick a uint32_t here since it's convenient in some places to
+ * double the representation size (i.e. multiplication and division use
+ * 64-bit integer types), and a uint64_t is the largest type we're
+ * certain is available.
+ */
+typedef uint32_t fxp_t;
+#define FXP_INIT_INT(x) ((x) << 16)
+#define FXP_INIT_PERCENT(pct) (((pct) << 16) / 100)
+
+/*
+ * Amount of precision used in parsing and printing numbers. The integer bound
+ * is simply because the integer part of the number gets 16 bits, and so is
+ * bounded by 65536.
+ *
+ * We use a lot of precision for the fractional part, even though most of it
+ * gets rounded off; this lets us get exact values for the important special
+ * case where the denominator is a small power of 2 (for instance,
+ * 1/512 == 0.001953125 is exactly representable even with only 16 bits of
+ * fractional precision). We need to left-shift by 16 before dividing by
+ * 10**precision, so we pick precision to be floor(log(2**48)) = 14.
+ */
+#define FXP_INTEGER_PART_DIGITS 5
+#define FXP_FRACTIONAL_PART_DIGITS 14
+
+/*
+ * In addition to the integer and fractional parts of the number, we need to
+ * include a null character and (possibly) a decimal point.
+ */
+#define FXP_BUF_SIZE (FXP_INTEGER_PART_DIGITS + FXP_FRACTIONAL_PART_DIGITS + 2)
+
+static inline fxp_t
+fxp_add(fxp_t a, fxp_t b) {
+ return a + b;
+}
+
+static inline fxp_t
+fxp_sub(fxp_t a, fxp_t b) {
+ assert(a >= b);
+ return a - b;
+}
+
+static inline fxp_t
+fxp_mul(fxp_t a, fxp_t b) {
+ uint64_t unshifted = (uint64_t)a * (uint64_t)b;
+ /*
+ * Unshifted is (a.val * 2**16) * (b.val * 2**16)
+ * == (a.val * b.val) * 2**32, but we want
+ * (a.val * b.val) * 2 ** 16.
+ */
+ return (uint32_t)(unshifted >> 16);
+}
+
+static inline fxp_t
+fxp_div(fxp_t a, fxp_t b) {
+ assert(b != 0);
+ uint64_t unshifted = ((uint64_t)a << 32) / (uint64_t)b;
+ /*
+ * Unshifted is (a.val * 2**16) * (2**32) / (b.val * 2**16)
+ * == (a.val / b.val) * (2 ** 32), which again corresponds to a right
+ * shift of 16.
+ */
+ return (uint32_t)(unshifted >> 16);
+}
+
+static inline uint32_t
+fxp_round_down(fxp_t a) {
+ return a >> 16;
+}
+
+static inline uint32_t
+fxp_round_nearest(fxp_t a) {
+ uint32_t fractional_part = (a & ((1U << 16) - 1));
+ uint32_t increment = (uint32_t)(fractional_part >= (1U << 15));
+ return (a >> 16) + increment;
+}
+
+/*
+ * Approximately computes x * frac, without the size limitations that would be
+ * imposed by converting u to an fxp_t.
+ */
+static inline size_t
+fxp_mul_frac(size_t x_orig, fxp_t frac) {
+ assert(frac <= (1U << 16));
+ /*
+ * Work around an over-enthusiastic warning about type limits below (on
+ * 32-bit platforms, a size_t is always less than 1ULL << 48).
+ */
+ uint64_t x = (uint64_t)x_orig;
+ /*
+ * If we can guarantee no overflow, multiply first before shifting, to
+ * preserve some precision. Otherwise, shift first and then multiply.
+ * In the latter case, we only lose the low 16 bits of a 48-bit number,
+ * so we're still accurate to within 1/2**32.
+ */
+ if (x < (1ULL << 48)) {
+ return (size_t)((x * frac) >> 16);
+ } else {
+ return (size_t)((x >> 16) * (uint64_t)frac);
+ }
+}
+
+/*
+ * Returns true on error. Otherwise, returns false and updates *ptr to point to
+ * the first character not parsed (because it wasn't a digit).
+ */
+bool fxp_parse(fxp_t *a, const char *ptr, char **end);
+void fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]);
+
+#endif /* JEMALLOC_INTERNAL_FXP_H */
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 0270034..7f94567 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -104,8 +104,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
uint32_t k1 = 0;
switch (len & 3) {
- case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH
- case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH
+ case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH;
+ case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH;
case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
k1 *= c2; h1 ^= k1;
}
@@ -177,29 +177,29 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
uint32_t k4 = 0;
switch (len & 15) {
- case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH
- case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH
+ case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH;
+ case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH;
case 13: k4 ^= tail[12] << 0;
k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
- JEMALLOC_FALLTHROUGH
- case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH
- case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH
- case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH
+ JEMALLOC_FALLTHROUGH;
+ case 12: k3 ^= (uint32_t) tail[11] << 24; JEMALLOC_FALLTHROUGH;
+ case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
+ case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
case 9: k3 ^= tail[ 8] << 0;
- k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
- JEMALLOC_FALLTHROUGH
- case 8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH
- case 7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH
- case 6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH
+ k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+ JEMALLOC_FALLTHROUGH;
+ case 8: k2 ^= (uint32_t) tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
+ case 7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
+ case 6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
case 5: k2 ^= tail[ 4] << 0;
k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
- JEMALLOC_FALLTHROUGH
- case 4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH
- case 3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH
- case 2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH
+ JEMALLOC_FALLTHROUGH;
+ case 4: k1 ^= (uint32_t) tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
+ case 3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
+ case 2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
case 1: k1 ^= tail[ 0] << 0;
k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
- JEMALLOC_FALLTHROUGH
+ break;
}
}
@@ -261,24 +261,25 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
uint64_t k2 = 0;
switch (len & 15) {
- case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH
- case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH
- case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH
- case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH
- case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH
- case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; JEMALLOC_FALLTHROUGH
+ case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH;
+ case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH;
+ case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH;
+ case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH;
+ case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH;
+ case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; JEMALLOC_FALLTHROUGH;
case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
- JEMALLOC_FALLTHROUGH
- case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH
- case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH
- case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH
- case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH
- case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH
- case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH
- case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; JEMALLOC_FALLTHROUGH
+ JEMALLOC_FALLTHROUGH;
+ case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH;
+ case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH;
+ case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH;
+ case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH;
+ case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH;
+ case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH;
+ case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; JEMALLOC_FALLTHROUGH;
case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+ break;
}
}
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
new file mode 100644
index 0000000..f356285
--- /dev/null
+++ b/include/jemalloc/internal/hpa.h
@@ -0,0 +1,182 @@
+#ifndef JEMALLOC_INTERNAL_HPA_H
+#define JEMALLOC_INTERNAL_HPA_H
+
+#include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/hpa_hooks.h"
+#include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/psset.h"
+
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+ /*
+ * The mutex guarding most of the operations on the central data
+ * structure.
+ */
+ malloc_mutex_t mtx;
+ /*
+ * Guards expansion of eden. We separate this from the regular mutex so
+ * that cheaper operations can still continue while we're doing the OS
+ * call.
+ */
+ malloc_mutex_t grow_mtx;
+ /*
+ * Either NULL (if empty), or some integer multiple of a
+ * hugepage-aligned number of hugepages. We carve them off one at a
+ * time to satisfy new pageslab requests.
+ *
+ * Guarded by grow_mtx.
+ */
+ void *eden;
+ size_t eden_len;
+ /* Source for metadata. */
+ base_t *base;
+ /* Number of grow operations done on this hpa_central_t. */
+ uint64_t age_counter;
+
+ /* The HPA hooks. */
+ hpa_hooks_t hooks;
+};
+
+typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
+struct hpa_shard_nonderived_stats_s {
+ /*
+ * The number of times we've purged within a hugepage.
+ *
+ * Guarded by mtx.
+ */
+ uint64_t npurge_passes;
+ /*
+ * The number of individual purge calls we perform (which should always
+ * be bigger than npurge_passes, since each pass purges at least one
+ * extent within a hugepage.
+ *
+ * Guarded by mtx.
+ */
+ uint64_t npurges;
+
+ /*
+ * The number of times we've hugified a pageslab.
+ *
+ * Guarded by mtx.
+ */
+ uint64_t nhugifies;
+ /*
+ * The number of times we've dehugified a pageslab.
+ *
+ * Guarded by mtx.
+ */
+ uint64_t ndehugifies;
+};
+
+/* Completely derived; only used by CTL. */
+typedef struct hpa_shard_stats_s hpa_shard_stats_t;
+struct hpa_shard_stats_s {
+ psset_stats_t psset_stats;
+ hpa_shard_nonderived_stats_t nonderived_stats;
+};
+
+typedef struct hpa_shard_s hpa_shard_t;
+struct hpa_shard_s {
+ /*
+ * pai must be the first member; we cast from a pointer to it to a
+ * pointer to the hpa_shard_t.
+ */
+ pai_t pai;
+
+ /* The central allocator we get our hugepages from. */
+ hpa_central_t *central;
+ /* Protects most of this shard's state. */
+ malloc_mutex_t mtx;
+ /*
+ * Guards the shard's access to the central allocator (preventing
+ * multiple threads operating on this shard from accessing the central
+ * allocator).
+ */
+ malloc_mutex_t grow_mtx;
+ /* The base metadata allocator. */
+ base_t *base;
+
+ /*
+ * This edata cache is the one we use when allocating a small extent
+ * from a pageslab. The pageslab itself comes from the centralized
+ * allocator, and so will use its edata_cache.
+ */
+ edata_cache_fast_t ecf;
+
+ psset_t psset;
+
+ /*
+ * How many grow operations have occurred.
+ *
+ * Guarded by grow_mtx.
+ */
+ uint64_t age_counter;
+
+ /* The arena ind we're associated with. */
+ unsigned ind;
+
+ /*
+ * Our emap. This is just a cache of the emap pointer in the associated
+ * hpa_central.
+ */
+ emap_t *emap;
+
+ /* The configuration choices for this hpa shard. */
+ hpa_shard_opts_t opts;
+
+ /*
+ * How many pages have we started but not yet finished purging in this
+ * hpa shard.
+ */
+ size_t npending_purge;
+
+ /*
+ * Those stats which are copied directly into the CTL-centric hpa shard
+ * stats.
+ */
+ hpa_shard_nonderived_stats_t stats;
+
+ /*
+ * Last time we performed purge on this shard.
+ */
+ nstime_t last_purge;
+};
+
+/*
+ * Whether or not the HPA can be used given the current configuration. This is
+ * is not necessarily a guarantee that it backs its allocations by hugepages,
+ * just that it can function properly given the system it's running on.
+ */
+bool hpa_supported();
+bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
+ base_t *base, edata_cache_t *edata_cache, unsigned ind,
+ const hpa_shard_opts_t *opts);
+
+void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
+void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
+ hpa_shard_stats_t *dst);
+
+/*
+ * Notify the shard that we won't use it for allocations much longer. Due to
+ * the possibility of races, we don't actually prevent allocations; just flush
+ * and disable the embedded edata_cache_small.
+ */
+void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
+
+void hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
+ bool deferral_allowed);
+void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
+
+/*
+ * We share the fork ordering with the PA and arena prefork handling; that's why
+ * these are 3 and 4 rather than 0 and 1.
+ */
+void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
+
+#endif /* JEMALLOC_INTERNAL_HPA_H */
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
new file mode 100644
index 0000000..4ea221c
--- /dev/null
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_HPA_HOOKS_H
+#define JEMALLOC_INTERNAL_HPA_HOOKS_H
+
+typedef struct hpa_hooks_s hpa_hooks_t;
+struct hpa_hooks_s {
+ void *(*map)(size_t size);
+ void (*unmap)(void *ptr, size_t size);
+ void (*purge)(void *ptr, size_t size);
+ void (*hugify)(void *ptr, size_t size);
+ void (*dehugify)(void *ptr, size_t size);
+ void (*curtime)(nstime_t *r_time, bool first_reading);
+ uint64_t (*ms_since)(nstime_t *r_time);
+};
+
+extern hpa_hooks_t hpa_hooks_default;
+
+#endif /* JEMALLOC_INTERNAL_HPA_HOOKS_H */
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
new file mode 100644
index 0000000..ee84fea
--- /dev/null
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -0,0 +1,74 @@
+#ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
+#define JEMALLOC_INTERNAL_HPA_OPTS_H
+
+#include "jemalloc/internal/fxp.h"
+
+/*
+ * This file is morally part of hpa.h, but is split out for header-ordering
+ * reasons.
+ */
+
+typedef struct hpa_shard_opts_s hpa_shard_opts_t;
+struct hpa_shard_opts_s {
+ /*
+ * The largest size we'll allocate out of the shard. For those
+ * allocations refused, the caller (in practice, the PA module) will
+ * fall back to the more general (for now) PAC, which can always handle
+ * any allocation request.
+ */
+ size_t slab_max_alloc;
+
+ /*
+ * When the number of active bytes in a hugepage is >=
+ * hugification_threshold, we force hugify it.
+ */
+ size_t hugification_threshold;
+
+ /*
+ * The HPA purges whenever the number of pages exceeds dirty_mult *
+ * active_pages. This may be set to (fxp_t)-1 to disable purging.
+ */
+ fxp_t dirty_mult;
+
+ /*
+ * Whether or not the PAI methods are allowed to defer work to a
+ * subsequent hpa_shard_do_deferred_work() call. Practically, this
+ * corresponds to background threads being enabled. We track this
+ * ourselves for encapsulation purposes.
+ */
+ bool deferral_allowed;
+
+ /*
+ * How long a hugepage has to be a hugification candidate before it will
+ * actually get hugified.
+ */
+ uint64_t hugify_delay_ms;
+
+ /*
+ * Minimum amount of time between purges.
+ */
+ uint64_t min_purge_interval_ms;
+};
+
+#define HPA_SHARD_OPTS_DEFAULT { \
+ /* slab_max_alloc */ \
+ 64 * 1024, \
+ /* hugification_threshold */ \
+ HUGEPAGE * 95 / 100, \
+ /* dirty_mult */ \
+ FXP_INIT_PERCENT(25), \
+ /* \
+ * deferral_allowed \
+ * \
+ * Really, this is always set by the arena during creation \
+ * or by an hpa_shard_set_deferral_allowed call, so the value \
+ * we put here doesn't matter. \
+ */ \
+ false, \
+ /* hugify_delay_ms */ \
+ 10 * 1000, \
+ /* min_purge_interval_ms */ \
+ 5 * 1000 \
+}
+
+#endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
new file mode 100644
index 0000000..1fb534d
--- /dev/null
+++ b/include/jemalloc/internal/hpdata.h
@@ -0,0 +1,413 @@
+#ifndef JEMALLOC_INTERNAL_HPDATA_H
+#define JEMALLOC_INTERNAL_HPDATA_H
+
+#include "jemalloc/internal/fb.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/typed_list.h"
+
+/*
+ * The metadata representation we use for extents in hugepages. While the PAC
+ * uses the edata_t to represent both active and inactive extents, the HP only
+ * uses the edata_t for active ones; instead, inactive extent state is tracked
+ * within hpdata associated with the enclosing hugepage-sized, hugepage-aligned
+ * region of virtual address space.
+ *
+ * An hpdata need not be "truly" backed by a hugepage (which is not necessarily
+ * an observable property of any given region of address space). It's just
+ * hugepage-sized and hugepage-aligned; it's *potentially* huge.
+ */
+typedef struct hpdata_s hpdata_t;
+ph_structs(hpdata_age_heap, hpdata_t);
+struct hpdata_s {
+ /*
+ * We likewise follow the edata convention of mangling names and forcing
+ * the use of accessors -- this lets us add some consistency checks on
+ * access.
+ */
+
+ /*
+ * The address of the hugepage in question. This can't be named h_addr,
+ * since that conflicts with a macro defined in Windows headers.
+ */
+ void *h_address;
+ /* Its age (measured in psset operations). */
+ uint64_t h_age;
+ /* Whether or not we think the hugepage is mapped that way by the OS. */
+ bool h_huge;
+
+ /*
+ * For some properties, we keep parallel sets of bools; h_foo_allowed
+ * and h_in_psset_foo_container. This is a decoupling mechanism to
+ * avoid bothering the hpa (which manages policies) from the psset
+ * (which is the mechanism used to enforce those policies). This allows
+ * all the container management logic to live in one place, without the
+ * HPA needing to know or care how that happens.
+ */
+
+ /*
+ * Whether or not the hpdata is allowed to be used to serve allocations,
+ * and whether or not the psset is currently tracking it as such.
+ */
+ bool h_alloc_allowed;
+ bool h_in_psset_alloc_container;
+
+ /*
+ * The same, but with purging. There's no corresponding
+ * h_in_psset_purge_container, because the psset (currently) always
+ * removes hpdatas from their containers during updates (to implement
+ * LRU for purging).
+ */
+ bool h_purge_allowed;
+
+ /* And with hugifying. */
+ bool h_hugify_allowed;
+ /* When we became a hugification candidate. */
+ nstime_t h_time_hugify_allowed;
+ bool h_in_psset_hugify_container;
+
+ /* Whether or not a purge or hugify is currently happening. */
+ bool h_mid_purge;
+ bool h_mid_hugify;
+
+ /*
+ * Whether or not the hpdata is being updated in the psset (i.e. if
+ * there has been a psset_update_begin call issued without a matching
+ * psset_update_end call). Eventually this will expand to other types
+ * of updates.
+ */
+ bool h_updating;
+
+ /* Whether or not the hpdata is in a psset. */
+ bool h_in_psset;
+
+ union {
+ /* When nonempty (and also nonfull), used by the psset bins. */
+ hpdata_age_heap_link_t age_link;
+ /*
+ * When empty (or not corresponding to any hugepage), list
+ * linkage.
+ */
+ ql_elm(hpdata_t) ql_link_empty;
+ };
+
+ /*
+ * Linkage for the psset to track candidates for purging and hugifying.
+ */
+ ql_elm(hpdata_t) ql_link_purge;
+ ql_elm(hpdata_t) ql_link_hugify;
+
+ /* The length of the largest contiguous sequence of inactive pages. */
+ size_t h_longest_free_range;
+
+ /* Number of active pages. */
+ size_t h_nactive;
+
+ /* A bitmap with bits set in the active pages. */
+ fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+
+ /*
+ * Number of dirty or active pages, and a bitmap tracking them. One
+ * way to think of this is as which pages are dirty from the OS's
+ * perspective.
+ */
+ size_t h_ntouched;
+
+ /* The touched pages (using the same definition as above). */
+ fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+};
+
+TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
+TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
+TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
+
+ph_proto(, hpdata_age_heap, hpdata_t);
+
+static inline void *
+hpdata_addr_get(const hpdata_t *hpdata) {
+ return hpdata->h_address;
+}
+
+static inline void
+hpdata_addr_set(hpdata_t *hpdata, void *addr) {
+ assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+ hpdata->h_address = addr;
+}
+
+static inline uint64_t
+hpdata_age_get(const hpdata_t *hpdata) {
+ return hpdata->h_age;
+}
+
+static inline void
+hpdata_age_set(hpdata_t *hpdata, uint64_t age) {
+ hpdata->h_age = age;
+}
+
+static inline bool
+hpdata_huge_get(const hpdata_t *hpdata) {
+ return hpdata->h_huge;
+}
+
+static inline bool
+hpdata_alloc_allowed_get(const hpdata_t *hpdata) {
+ return hpdata->h_alloc_allowed;
+}
+
+static inline void
+hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) {
+ hpdata->h_alloc_allowed = alloc_allowed;
+}
+
+static inline bool
+hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) {
+ return hpdata->h_in_psset_alloc_container;
+}
+
+static inline void
+hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
+ assert(in_container != hpdata->h_in_psset_alloc_container);
+ hpdata->h_in_psset_alloc_container = in_container;
+}
+
+static inline bool
+hpdata_purge_allowed_get(const hpdata_t *hpdata) {
+ return hpdata->h_purge_allowed;
+}
+
+static inline void
+hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
+ assert(purge_allowed == false || !hpdata->h_mid_purge);
+ hpdata->h_purge_allowed = purge_allowed;
+}
+
+static inline bool
+hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
+ return hpdata->h_hugify_allowed;
+}
+
+static inline void
+hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) {
+ assert(!hpdata->h_mid_hugify);
+ hpdata->h_hugify_allowed = true;
+ hpdata->h_time_hugify_allowed = now;
+}
+
+static inline nstime_t
+hpdata_time_hugify_allowed(hpdata_t *hpdata) {
+ return hpdata->h_time_hugify_allowed;
+}
+
+static inline void
+hpdata_disallow_hugify(hpdata_t *hpdata) {
+ hpdata->h_hugify_allowed = false;
+}
+
+static inline bool
+hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) {
+ return hpdata->h_in_psset_hugify_container;
+}
+
+static inline void
+hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) {
+ assert(in_container != hpdata->h_in_psset_hugify_container);
+ hpdata->h_in_psset_hugify_container = in_container;
+}
+
+static inline bool
+hpdata_mid_purge_get(const hpdata_t *hpdata) {
+ return hpdata->h_mid_purge;
+}
+
+static inline void
+hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) {
+ assert(mid_purge != hpdata->h_mid_purge);
+ hpdata->h_mid_purge = mid_purge;
+}
+
+static inline bool
+hpdata_mid_hugify_get(const hpdata_t *hpdata) {
+ return hpdata->h_mid_hugify;
+}
+
+static inline void
+hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) {
+ assert(mid_hugify != hpdata->h_mid_hugify);
+ hpdata->h_mid_hugify = mid_hugify;
+}
+
+static inline bool
+hpdata_changing_state_get(const hpdata_t *hpdata) {
+ return hpdata->h_mid_purge || hpdata->h_mid_hugify;
+}
+
+
+static inline bool
+hpdata_updating_get(const hpdata_t *hpdata) {
+ return hpdata->h_updating;
+}
+
+static inline void
+hpdata_updating_set(hpdata_t *hpdata, bool updating) {
+ assert(updating != hpdata->h_updating);
+ hpdata->h_updating = updating;
+}
+
+static inline bool
+hpdata_in_psset_get(const hpdata_t *hpdata) {
+ return hpdata->h_in_psset;
+}
+
+static inline void
+hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
+ assert(in_psset != hpdata->h_in_psset);
+ hpdata->h_in_psset = in_psset;
+}
+
+static inline size_t
+hpdata_longest_free_range_get(const hpdata_t *hpdata) {
+ return hpdata->h_longest_free_range;
+}
+
+static inline void
+hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
+ assert(longest_free_range <= HUGEPAGE_PAGES);
+ hpdata->h_longest_free_range = longest_free_range;
+}
+
+static inline size_t
+hpdata_nactive_get(hpdata_t *hpdata) {
+ return hpdata->h_nactive;
+}
+
+static inline size_t
+hpdata_ntouched_get(hpdata_t *hpdata) {
+ return hpdata->h_ntouched;
+}
+
+static inline size_t
+hpdata_ndirty_get(hpdata_t *hpdata) {
+ return hpdata->h_ntouched - hpdata->h_nactive;
+}
+
+static inline size_t
+hpdata_nretained_get(hpdata_t *hpdata) {
+ return HUGEPAGE_PAGES - hpdata->h_ntouched;
+}
+
+static inline void
+hpdata_assert_empty(hpdata_t *hpdata) {
+ assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
+ assert(hpdata->h_nactive == 0);
+}
+
+/*
+ * Only used in tests, and in hpdata_assert_consistent, below. Verifies some
+ * consistency properties of the hpdata (e.g. that cached counts of page stats
+ * match computed ones).
+ */
+static inline bool
+hpdata_consistent(hpdata_t *hpdata) {
+ if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+ != hpdata_longest_free_range_get(hpdata)) {
+ return false;
+ }
+ if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+ != hpdata->h_nactive) {
+ return false;
+ }
+ if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+ != hpdata->h_ntouched) {
+ return false;
+ }
+ if (hpdata->h_ntouched < hpdata->h_nactive) {
+ return false;
+ }
+ if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
+ return false;
+ }
+ if (hpdata_changing_state_get(hpdata)
+ && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
+ return false;
+ }
+ if (hpdata_hugify_allowed_get(hpdata)
+ != hpdata_in_psset_hugify_container_get(hpdata)) {
+ return false;
+ }
+ return true;
+}
+
+static inline void
+hpdata_assert_consistent(hpdata_t *hpdata) {
+ assert(hpdata_consistent(hpdata));
+}
+
+static inline bool
+hpdata_empty(hpdata_t *hpdata) {
+ return hpdata->h_nactive == 0;
+}
+
+static inline bool
+hpdata_full(hpdata_t *hpdata) {
+ return hpdata->h_nactive == HUGEPAGE_PAGES;
+}
+
+void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+
+/*
+ * Given an hpdata which can serve an allocation request, pick and reserve an
+ * offset within that allocation.
+ */
+void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
+void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
+
+/*
+ * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
+ * subranges of a hugepage while holding a lock, drop the lock during the actual
+ * purging of them, and reacquire it to update the metadata again.
+ */
+typedef struct hpdata_purge_state_s hpdata_purge_state_t;
+struct hpdata_purge_state_s {
+ size_t npurged;
+ size_t ndirty_to_purge;
+ fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
+ size_t next_purge_search_begin;
+};
+
+/*
+ * Initializes purge state. The access to hpdata must be externally
+ * synchronized with other hpdata_* calls.
+ *
+ * You can tell whether or not a thread is purging or hugifying a given hpdata
+ * via hpdata_changing_state_get(hpdata). Racing hugification or purging
+ * operations aren't allowed.
+ *
+ * Once you begin purging, you have to follow through and call hpdata_purge_next
+ * until you're done, and then end. Allocating out of an hpdata undergoing
+ * purging is not allowed.
+ *
+ * Returns the number of dirty pages that will be purged.
+ */
+size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+
+/*
+ * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
+ * true, and returns true. Otherwise, returns false to indicate that we're
+ * done.
+ *
+ * This requires exclusive access to the purge state, but *not* to the hpdata.
+ * In particular, unreserve calls are allowed while purging (i.e. you can dalloc
+ * into one part of the hpdata while purging a different part).
+ */
+bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+ void **r_purge_addr, size_t *r_purge_size);
+/*
+ * Updates the hpdata metadata after all purging is done. Needs external
+ * synchronization.
+ */
+void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+
+void hpdata_hugify(hpdata_t *hpdata);
+void hpdata_dehugify(hpdata_t *hpdata);
+
+#endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
new file mode 100644
index 0000000..65fef51
--- /dev/null
+++ b/include/jemalloc/internal/inspect.h
@@ -0,0 +1,40 @@
+#ifndef JEMALLOC_INTERNAL_INSPECT_H
+#define JEMALLOC_INTERNAL_INSPECT_H
+
+/*
+ * This module contains the heap introspection capabilities. For now they are
+ * exposed purely through mallctl APIs in the experimental namespace, but this
+ * may change over time.
+ */
+
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+typedef struct inspect_extent_util_stats_s inspect_extent_util_stats_t;
+struct inspect_extent_util_stats_s {
+ size_t nfree;
+ size_t nregs;
+ size_t size;
+};
+
+typedef struct inspect_extent_util_stats_verbose_s
+ inspect_extent_util_stats_verbose_t;
+
+struct inspect_extent_util_stats_verbose_s {
+ void *slabcur_addr;
+ size_t nfree;
+ size_t nregs;
+ size_t size;
+ size_t bin_nfree;
+ size_t bin_nregs;
+};
+
+void inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+ size_t *nfree, size_t *nregs, size_t *size);
+void inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+ size_t *nfree, size_t *nregs, size_t *size,
+ size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+
+#endif /* JEMALLOC_INTERNAL_INSPECT_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 7d6053e..983027c 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -5,6 +5,7 @@
#ifdef _WIN32
# include <windows.h>
# include "msvc_compat/windows_extra.h"
+# include "msvc_compat/strings.h"
# ifdef _WIN64
# if LG_VADDR <= 32
# error Generate the headers using x64 vcargs
@@ -31,8 +32,12 @@
# include <sys/uio.h>
# endif
# include <pthread.h>
-# ifdef __FreeBSD__
+# if defined(__FreeBSD__) || defined(__DragonFly__)
# include <pthread_np.h>
+# include <sched.h>
+# if defined(__FreeBSD__)
+# define cpu_set_t cpuset_t
+# endif
# endif
# include <signal.h>
# ifdef JEMALLOC_OS_UNFAIR_LOCK
@@ -91,4 +96,13 @@ isblank(int c) {
#endif
#include <fcntl.h>
+/*
+ * The Win32 midl compiler has #define small char; we don't use midl, but
+ * "small" is a nice identifier to have available when talking about size
+ * classes.
+ */
+#ifdef small
+# undef small
+#endif
+
#endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c442a21..3588072 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -85,6 +85,12 @@
/* Defined if pthread_setname_np(3) is available. */
#undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+/* Defined if pthread_getname_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP
+
+/* Defined if pthread_get_name_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP
+
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
@@ -101,6 +107,11 @@
#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
/*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_REALTIME
+
+/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
@@ -162,6 +173,9 @@
/* Support utrace(2)-based tracing. */
#undef JEMALLOC_UTRACE
+/* Support utrace(2)-based tracing (label based signature). */
+#undef JEMALLOC_UTRACE_LABEL
+
/* Support optional abort() on OOM. */
#undef JEMALLOC_XMALLOC
@@ -177,6 +191,9 @@
/* One page is 2^LG_PAGE bytes. */
#undef LG_PAGE
+/* Maximum number of regions in a slab. */
+#undef CONFIG_LG_SLAB_MAXREGS
+
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
@@ -291,11 +308,40 @@
#undef JEMALLOC_MADVISE_DONTDUMP
/*
+ * Defined if MADV_[NO]CORE is supported as an argument to madvise.
+ */
+#undef JEMALLOC_MADVISE_NOCORE
+
+/* Defined if mprotect(2) is available. */
+#undef JEMALLOC_HAVE_MPROTECT
+
+/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
#undef JEMALLOC_THP
+/* Defined if posix_madvise is available. */
+#undef JEMALLOC_HAVE_POSIX_MADVISE
+
+/*
+ * Method for purging unused pages using posix_madvise.
+ *
+ * posix_madvise(..., POSIX_MADV_DONTNEED)
+ */
+#undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS
+
+/*
+ * Defined if memcntl page admin call is supported
+ */
+#undef JEMALLOC_HAVE_MEMCNTL
+
+/*
+ * Defined if malloc_size is supported
+ */
+#undef JEMALLOC_HAVE_MALLOC_SIZE
+
/* Define if operating system has alloca.h header. */
#undef JEMALLOC_HAS_ALLOCA_H
@@ -363,4 +409,19 @@
/* Performs additional safety checks when defined. */
#undef JEMALLOC_OPT_SAFETY_CHECKS
+/* Is C++ support being built? */
+#undef JEMALLOC_ENABLE_CXX
+
+/* Performs additional size checks when defined. */
+#undef JEMALLOC_OPT_SIZE_CHECKS
+
+/* Allows sampled junk and stash for checking use-after-free when defined. */
+#undef JEMALLOC_UAF_DETECTION
+
+/* Darwin VM_MAKE_TAG support */
+#undef JEMALLOC_HAVE_VM_MAKE_TAG
+
+/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
+#undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
+
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index d291170..fc834c6 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -2,7 +2,10 @@
#define JEMALLOC_INTERNAL_EXTERNS_H
#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/sec_opts.h"
#include "jemalloc/internal/tsd_types.h"
+#include "jemalloc/internal/nstime.h"
/* TSD checks this to set thread local slow state accordingly. */
extern bool malloc_slow;
@@ -10,14 +13,30 @@ extern bool malloc_slow;
/* Run-time options. */
extern bool opt_abort;
extern bool opt_abort_conf;
+extern bool opt_trust_madvise;
extern bool opt_confirm_conf;
+extern bool opt_hpa;
+extern hpa_shard_opts_t opt_hpa_opts;
+extern sec_opts_t opt_hpa_sec_opts;
+
extern const char *opt_junk;
extern bool opt_junk_alloc;
extern bool opt_junk_free;
+extern void (*junk_free_callback)(void *ptr, size_t size);
+extern void (*junk_alloc_callback)(void *ptr, size_t size);
extern bool opt_utrace;
extern bool opt_xmalloc;
+extern bool opt_experimental_infallible_new;
extern bool opt_zero;
extern unsigned opt_narenas;
+extern zero_realloc_action_t opt_zero_realloc_action;
+extern malloc_init_t malloc_init_state;
+extern const char *zero_realloc_mode_names[];
+extern atomic_zu_t zero_realloc_count;
+extern bool opt_cache_oblivious;
+
+/* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
+extern uintptr_t san_cache_bin_nonfast_mask;
/* Number of CPUs. */
extern unsigned ncpus;
@@ -41,17 +60,16 @@ void *bootstrap_calloc(size_t num, size_t size);
void bootstrap_free(void *ptr);
void arena_set(unsigned ind, arena_t *arena);
unsigned narenas_total_get(void);
-arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
+arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
-void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+void arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
void iarena_cleanup(tsd_t *tsd);
void arena_cleanup(tsd_t *tsd);
-void arenas_tdata_cleanup(tsd_t *tsd);
+size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
void jemalloc_prefork(void);
void jemalloc_postfork_parent(void);
void jemalloc_postfork_child(void);
-bool malloc_initialized(void);
void je_sdallocx_noflags(void *ptr, size_t size);
+void *malloc_default(size_t size);
#endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 437eaa4..751c112 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -10,7 +10,7 @@
* structs, externs, and inlines), and included each header file multiple times
* in this file, picking out the portion we want on each pass using the
* following #defines:
- * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data
+ * JEMALLOC_H_TYPES : Preprocessor-defined constants and pseudo-opaque data
* types.
* JEMALLOC_H_STRUCTS : Data structures.
* JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
@@ -40,8 +40,6 @@
/* TYPES */
/******************************************************************************/
-#include "jemalloc/internal/extent_types.h"
-#include "jemalloc/internal/base_types.h"
#include "jemalloc/internal/arena_types.h"
#include "jemalloc/internal/tcache_types.h"
#include "jemalloc/internal/prof_types.h"
@@ -50,11 +48,8 @@
/* STRUCTS */
/******************************************************************************/
-#include "jemalloc/internal/arena_structs_a.h"
-#include "jemalloc/internal/extent_structs.h"
-#include "jemalloc/internal/base_structs.h"
#include "jemalloc/internal/prof_structs.h"
-#include "jemalloc/internal/arena_structs_b.h"
+#include "jemalloc/internal/arena_structs.h"
#include "jemalloc/internal/tcache_structs.h"
#include "jemalloc/internal/background_thread_structs.h"
@@ -63,8 +58,6 @@
/******************************************************************************/
#include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/extent_externs.h"
-#include "jemalloc/internal/base_externs.h"
#include "jemalloc/internal/arena_externs.h"
#include "jemalloc/internal/large_externs.h"
#include "jemalloc/internal/tcache_externs.h"
@@ -76,19 +69,16 @@
/******************************************************************************/
#include "jemalloc/internal/jemalloc_internal_inlines_a.h"
-#include "jemalloc/internal/base_inlines.h"
/*
* Include portions of arena code interleaved with tcache code in order to
* resolve circular dependencies.
*/
-#include "jemalloc/internal/prof_inlines_a.h"
#include "jemalloc/internal/arena_inlines_a.h"
-#include "jemalloc/internal/extent_inlines.h"
#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
#include "jemalloc/internal/tcache_inlines.h"
#include "jemalloc/internal/arena_inlines_b.h"
#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
-#include "jemalloc/internal/prof_inlines_b.h"
+#include "jemalloc/internal/prof_inlines.h"
#include "jemalloc/internal/background_thread_inlines.h"
#endif /* JEMALLOC_INTERNAL_INCLUDES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index ddde9b4..9e27cc3 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -56,31 +56,6 @@ percpu_arena_ind_limit(percpu_arena_mode_t mode) {
}
}
-static inline arena_tdata_t *
-arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
- arena_tdata_t *tdata;
- arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
-
- if (unlikely(arenas_tdata == NULL)) {
- /* arenas_tdata hasn't been initialized yet. */
- return arena_tdata_get_hard(tsd, ind);
- }
- if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
- /*
- * ind is invalid, cache is old (too small), or tdata to be
- * initialized.
- */
- return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
- NULL);
- }
-
- tdata = &arenas_tdata[ind];
- if (likely(tdata != NULL) || !refresh_if_missing) {
- return tdata;
- }
- return arena_tdata_get_hard(tsd, ind);
-}
-
static inline arena_t *
arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
arena_t *ret;
@@ -90,36 +65,12 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
if (unlikely(ret == NULL)) {
if (init_if_missing) {
- ret = arena_init(tsdn, ind,
- (extent_hooks_t *)&extent_hooks_default);
+ ret = arena_init(tsdn, ind, &arena_config_default);
}
}
return ret;
}
-static inline ticker_t *
-decay_ticker_get(tsd_t *tsd, unsigned ind) {
- arena_tdata_t *tdata;
-
- tdata = arena_tdata_get(tsd, ind, true);
- if (unlikely(tdata == NULL)) {
- return NULL;
- }
- return &tdata->decay_ticker;
-}
-
-JEMALLOC_ALWAYS_INLINE cache_bin_t *
-tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
- assert(binind < SC_NBINS);
- return &tcache->bins_small[binind];
-}
-
-JEMALLOC_ALWAYS_INLINE cache_bin_t *
-tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
- assert(binind >= SC_NBINS &&binind < nhbins);
- return &tcache->bins_large[binind - SC_NBINS];
-}
-
JEMALLOC_ALWAYS_INLINE bool
tcache_available(tsd_t *tsd) {
/*
@@ -129,9 +80,9 @@ tcache_available(tsd_t *tsd) {
*/
if (likely(tsd_tcache_enabled_get(tsd))) {
/* Associated arena == NULL implies tcache init in progress. */
- assert(tsd_tcachep_get(tsd)->arena == NULL ||
- tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail !=
- NULL);
+ if (config_debug && tsd_tcache_slowp_get(tsd)->arena != NULL) {
+ tcache_assert_initialized(tsd_tcachep_get(tsd));
+ }
return true;
}
@@ -147,28 +98,25 @@ tcache_get(tsd_t *tsd) {
return tsd_tcachep_get(tsd);
}
+JEMALLOC_ALWAYS_INLINE tcache_slow_t *
+tcache_slow_get(tsd_t *tsd) {
+ if (!tcache_available(tsd)) {
+ return NULL;
+ }
+
+ return tsd_tcache_slowp_get(tsd);
+}
+
static inline void
pre_reentrancy(tsd_t *tsd, arena_t *arena) {
/* arena is the current context. Reentry from a0 is not allowed. */
assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
-
- bool fast = tsd_fast(tsd);
- assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
- ++*tsd_reentrancy_levelp_get(tsd);
- if (fast) {
- /* Prepare slow path for reentrancy. */
- tsd_slow_update(tsd);
- assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
- }
+ tsd_pre_reentrancy_raw(tsd);
}
static inline void
post_reentrancy(tsd_t *tsd) {
- int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
- assert(*reentrancy_level > 0);
- if (--*reentrancy_level == 0) {
- tsd_slow_update(tsd);
- }
+ tsd_post_reentrancy_raw(tsd);
}
#endif /* JEMALLOC_INTERNAL_INLINES_A_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 70d6e57..152f8a0 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,7 +1,31 @@
#ifndef JEMALLOC_INTERNAL_INLINES_B_H
#define JEMALLOC_INTERNAL_INLINES_B_H
-#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/extent.h"
+
+static inline void
+percpu_arena_update(tsd_t *tsd, unsigned cpu) {
+ assert(have_percpu_arena);
+ arena_t *oldarena = tsd_arena_get(tsd);
+ assert(oldarena != NULL);
+ unsigned oldind = arena_ind_get(oldarena);
+
+ if (oldind != cpu) {
+ unsigned newind = cpu;
+ arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
+ assert(newarena != NULL);
+
+ /* Set new arena/tcache associations. */
+ arena_migrate(tsd, oldarena, newarena);
+ tcache_t *tcache = tcache_get(tsd);
+ if (tcache != NULL) {
+ tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+ tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
+ tcache, newarena);
+ }
+ }
+}
+
/* Choose an arena based on a per-thread value. */
static inline arena_t *
@@ -22,18 +46,19 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
ret = arena_choose_hard(tsd, internal);
assert(ret);
if (tcache_available(tsd)) {
- tcache_t *tcache = tcache_get(tsd);
- if (tcache->arena != NULL) {
- /* See comments in tcache_data_init().*/
- assert(tcache->arena ==
+ tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+ tcache_t *tcache = tsd_tcachep_get(tsd);
+ if (tcache_slow->arena != NULL) {
+ /* See comments in tsd_tcache_data_init().*/
+ assert(tcache_slow->arena ==
arena_get(tsd_tsdn(tsd), 0, false));
- if (tcache->arena != ret) {
+ if (tcache_slow->arena != ret) {
tcache_arena_reassociate(tsd_tsdn(tsd),
- tcache, ret);
+ tcache_slow, tcache, ret);
}
} else {
- tcache_arena_associate(tsd_tsdn(tsd), tcache,
- ret);
+ tcache_arena_associate(tsd_tsdn(tsd),
+ tcache_slow, tcache, ret);
}
}
}
@@ -75,13 +100,4 @@ arena_is_auto(arena_t *arena) {
return (arena_ind_get(arena) < manual_arena_base);
}
-JEMALLOC_ALWAYS_INLINE extent_t *
-iealloc(tsdn_t *tsdn, const void *ptr) {
- rtree_ctx_t rtree_ctx_fallback;
- rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
- return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
- (uintptr_t)ptr, true);
-}
-
#endif /* JEMALLOC_INTERNAL_INLINES_B_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index cdb10eb..b0868b7 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -3,7 +3,9 @@
#include "jemalloc/internal/hook.h"
#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/log.h"
#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
#include "jemalloc/internal/witness.h"
/*
@@ -101,8 +103,8 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
}
JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
- bool is_internal, bool slow_path) {
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+ emap_alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path) {
assert(ptr != NULL);
assert(!is_internal || tcache == NULL);
assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
@@ -125,7 +127,7 @@ idalloc(tsd_t *tsd, void *ptr) {
JEMALLOC_ALWAYS_INLINE void
isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
- alloc_ctx_t *alloc_ctx, bool slow_path) {
+ emap_alloc_ctx_t *alloc_ctx, bool slow_path) {
witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
WITNESS_RANK_CORE, 0);
arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
@@ -219,4 +221,120 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
newsize);
}
+JEMALLOC_ALWAYS_INLINE void
+fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
+ cache_bin_t *bin, void *ret) {
+ thread_allocated_set(tsd, allocated_after);
+ if (config_stats) {
+ bin->tstats.nrequests++;
+ }
+
+ LOG("core.malloc.exit", "result: %p", ret);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_initialized(void) {
+ return (malloc_init_state == malloc_init_initialized);
+}
+
+/*
+ * malloc() fastpath. Included here so that we can inline it into operator new;
+ * function call overhead there is non-negligible as a fraction of total CPU in
+ * allocation-heavy C++ programs. We take the fallback alloc to allow malloc
+ * (which can return NULL) to differ in its behavior from operator new (which
+ * can't). It matches the signature of malloc / operator new so that we can
+ * tail-call the fallback allocator, allowing us to avoid setting up the call
+ * frame in the common case.
+ *
+ * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit
+ * tcache. If either of these is false, we tail-call to the slowpath,
+ * malloc_default(). Tail-calling is used to avoid any caller-saved
+ * registers.
+ *
+ * fastpath supports ticker and profiling, both of which will also
+ * tail-call to the slowpath if they fire.
+ */
+JEMALLOC_ALWAYS_INLINE void *
+imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
+ LOG("core.malloc.entry", "size: %zu", size);
+ if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
+ return fallback_alloc(size);
+ }
+
+ tsd_t *tsd = tsd_get(false);
+ if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
+ return fallback_alloc(size);
+ }
+ /*
+ * The code below till the branch checking the next_event threshold may
+ * execute before malloc_init(), in which case the threshold is 0 to
+ * trigger slow path and initialization.
+ *
+ * Note that when uninitialized, only the fast-path variants of the sz /
+ * tsd facilities may be called.
+ */
+ szind_t ind;
+ /*
+ * The thread_allocated counter in tsd serves as a general purpose
+ * accumulator for bytes of allocation to trigger different types of
+ * events. usize is always needed to advance thread_allocated, though
+ * it's not always needed in the core allocation logic.
+ */
+ size_t usize;
+ sz_size2index_usize_fastpath(size, &ind, &usize);
+ /* Fast path relies on size being a bin. */
+ assert(ind < SC_NBINS);
+ assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
+ (size <= SC_SMALL_MAXCLASS));
+
+ uint64_t allocated, threshold;
+ te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
+ uint64_t allocated_after = allocated + usize;
+ /*
+ * The ind and usize might be uninitialized (or partially) before
+ * malloc_init(). The assertions check for: 1) full correctness (usize
+ * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
+ * when !initialized.
+ */
+ if (!malloc_initialized()) {
+ assert(threshold == 0);
+ } else {
+ assert(ind == sz_size2index(size));
+ assert(usize > 0 && usize == sz_index2size(ind));
+ }
+ /*
+ * Check for events and tsd non-nominal (fast_threshold will be set to
+ * 0) in a single branch.
+ */
+ if (unlikely(allocated_after >= threshold)) {
+ return fallback_alloc(size);
+ }
+ assert(tsd_fast(tsd));
+
+ tcache_t *tcache = tsd_tcachep_get(tsd);
+ assert(tcache == tcache_get(tsd));
+ cache_bin_t *bin = &tcache->bins[ind];
+ bool tcache_success;
+ void *ret;
+
+ /*
+ * We split up the code this way so that redundant low-water
+ * computation doesn't happen on the (more common) case in which we
+ * don't touch the low water mark. The compiler won't do this
+ * duplication on its own.
+ */
+ ret = cache_bin_alloc_easy(bin, &tcache_success);
+ if (tcache_success) {
+ fastpath_success_finish(tsd, allocated_after, bin, ret);
+ return ret;
+ }
+ ret = cache_bin_alloc(bin, &tcache_success);
+ if (tcache_success) {
+ fastpath_success_finish(tsd, allocated_after, bin, ret);
+ return ret;
+ }
+
+ return fallback_alloc(size);
+}
+
#endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index d8ea06f..e97b5f9 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -4,7 +4,11 @@
#ifdef JEMALLOC_DEBUG
# define JEMALLOC_ALWAYS_INLINE static inline
#else
-# define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+# ifdef _MSC_VER
+# define JEMALLOC_ALWAYS_INLINE static __forceinline
+# else
+# define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+# endif
#endif
#ifdef _MSC_VER
# define inline _inline
@@ -40,13 +44,6 @@
#define JEMALLOC_VA_ARGS_HEAD(head, ...) head
#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
-#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) \
- && defined(JEMALLOC_HAVE_ATTR) && (__GNUC__ >= 7)
-#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough);
-#else
-#define JEMALLOC_FALLTHROUGH /* falls through */
-#endif
-
/* Diagnostic suppression macros */
#if defined(_MSC_VER) && !defined(__clang__)
# define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index e296c5a..62c2b59 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -3,15 +3,31 @@
#include "jemalloc/internal/quantum.h"
-/* Page size index type. */
-typedef unsigned pszind_t;
-
-/* Size class index type. */
-typedef unsigned szind_t;
-
/* Processor / core id type. */
typedef int malloc_cpuid_t;
+/* When realloc(non-null-ptr, 0) is called, what happens? */
+enum zero_realloc_action_e {
+ /* Realloc(ptr, 0) is free(ptr); return malloc(0); */
+ zero_realloc_action_alloc = 0,
+ /* Realloc(ptr, 0) is free(ptr); */
+ zero_realloc_action_free = 1,
+ /* Realloc(ptr, 0) aborts. */
+ zero_realloc_action_abort = 2
+};
+typedef enum zero_realloc_action_e zero_realloc_action_t;
+
+/* Signature of write callback. */
+typedef void (write_cb_t)(void *, const char *);
+
+enum malloc_init_e {
+ malloc_init_uninitialized = 3,
+ malloc_init_a0_initialized = 2,
+ malloc_init_recursible = 1,
+ malloc_init_initialized = 0 /* Common case --> jnz. */
+};
+typedef enum malloc_init_e malloc_init_t;
+
/*
* Flags bits:
*
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 3418cbf..5ce77d9 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -4,8 +4,14 @@
#include "jemalloc_internal_defs.h"
#include "jemalloc/internal/jemalloc_internal_decls.h"
-#ifdef JEMALLOC_UTRACE
+#if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL)
#include <sys/ktrace.h>
+# if defined(JEMALLOC_UTRACE)
+# define UTRACE_CALL(p, l) utrace(p, l)
+# else
+# define UTRACE_CALL(p, l) utrace("jemalloc_process", p, l)
+# define JEMALLOC_UTRACE
+# endif
#endif
#define JEMALLOC_NO_DEMANGLE
@@ -180,6 +186,35 @@ static const bool config_opt_safety_checks =
#endif
;
+/*
+ * Extra debugging of sized deallocations too onerous to be included in the
+ * general safety checks.
+ */
+static const bool config_opt_size_checks =
+#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_DEBUG)
+ true
+#else
+ false
+#endif
+ ;
+
+static const bool config_uaf_detection =
+#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
+ true
+#else
+ false
+#endif
+ ;
+
+/* Whether or not the C++ extensions are enabled. */
+static const bool config_enable_cxx =
+#ifdef JEMALLOC_ENABLE_CXX
+ true
+#else
+ false
+#endif
+;
+
#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
/* Currently percpu_arena depends on sched_getcpu. */
#define JEMALLOC_PERCPU_ARENA
@@ -209,5 +244,20 @@ static const bool have_background_thread =
false
#endif
;
+static const bool config_high_res_timer =
+#ifdef JEMALLOC_HAVE_CLOCK_REALTIME
+ true
+#else
+ false
+#endif
+ ;
+
+static const bool have_memcntl =
+#ifdef JEMALLOC_HAVE_MEMCNTL
+ true
+#else
+ false
+#endif
+ ;
#endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index a05019e..8e09122 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -6,27 +6,19 @@
void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
bool zero);
-bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+bool large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
size_t usize_max, bool zero);
void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
size_t alignment, bool zero, tcache_t *tcache,
hook_ralloc_args_t *hook_args);
-typedef void (large_dalloc_junk_t)(void *, size_t);
-extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
-
-typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
-extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
-
-void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
-void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
-void large_dalloc(tsdn_t *tsdn, extent_t *extent);
-size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
-prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
-void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
-void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
-
-nstime_t large_prof_alloc_time_get(const extent_t *extent);
-void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
+void large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc(tsdn_t *tsdn, edata_t *edata);
+size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
+void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
+ bool reset_recent);
+void large_prof_tctx_reset(edata_t *edata);
+void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size);
#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
new file mode 100644
index 0000000..d020ebe
--- /dev/null
+++ b/include/jemalloc/internal/lockedint.h
@@ -0,0 +1,204 @@
+#ifndef JEMALLOC_INTERNAL_LOCKEDINT_H
+#define JEMALLOC_INTERNAL_LOCKEDINT_H
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+
+typedef struct locked_u64_s locked_u64_t;
+#ifdef JEMALLOC_ATOMIC_U64
+struct locked_u64_s {
+ atomic_u64_t val;
+};
+#else
+/* Must hold the associated mutex. */
+struct locked_u64_s {
+ uint64_t val;
+};
+#endif
+
+typedef struct locked_zu_s locked_zu_t;
+struct locked_zu_s {
+ atomic_zu_t val;
+};
+
+#ifndef JEMALLOC_ATOMIC_U64
+# define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
+# define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) \
+ malloc_mutex_init(&(mu), name, rank, rank_mode)
+# define LOCKEDINT_MTX(mtx) (&(mtx))
+# define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
+# define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+# define LOCKEDINT_MTX_PREFORK(tsdn, mu) malloc_mutex_prefork(tsdn, &(mu))
+# define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu) \
+ malloc_mutex_postfork_parent(tsdn, &(mu))
+# define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu) \
+ malloc_mutex_postfork_child(tsdn, &(mu))
+#else
+# define LOCKEDINT_MTX_DECLARE(name)
+# define LOCKEDINT_MTX(mtx) NULL
+# define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
+# define LOCKEDINT_MTX_LOCK(tsdn, mu)
+# define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
+# define LOCKEDINT_MTX_PREFORK(tsdn, mu)
+# define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
+# define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
+#endif
+
+#ifdef JEMALLOC_ATOMIC_U64
+# define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
+#else
+# define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) \
+ malloc_mutex_assert_owner(tsdn, (mtx))
+#endif
+
+static inline uint64_t
+locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
+ LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+ return p->val;
+#endif
+}
+
+static inline void
+locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+ uint64_t x) {
+ LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+#ifdef JEMALLOC_ATOMIC_U64
+ atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
+#else
+ p->val += x;
+#endif
+}
+
+static inline void
+locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+ uint64_t x) {
+ LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+#ifdef JEMALLOC_ATOMIC_U64
+ uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
+ assert(r - x <= r);
+#else
+ p->val -= x;
+ assert(p->val + x >= p->val);
+#endif
+}
+
+/* Increment and take modulus. Returns whether the modulo made any change. */
+static inline bool
+locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+ const uint64_t x, const uint64_t modulus) {
+ LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+ uint64_t before, after;
+ bool overflow;
+#ifdef JEMALLOC_ATOMIC_U64
+ before = atomic_load_u64(&p->val, ATOMIC_RELAXED);
+ do {
+ after = before + x;
+ assert(after >= before);
+ overflow = (after >= modulus);
+ if (overflow) {
+ after %= modulus;
+ }
+ } while (!atomic_compare_exchange_weak_u64(&p->val, &before, after,
+ ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+ before = p->val;
+ after = before + x;
+ overflow = (after >= modulus);
+ if (overflow) {
+ after %= modulus;
+ }
+ p->val = after;
+#endif
+ return overflow;
+}
+
+/*
+ * Non-atomically sets *dst += src. *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+locked_inc_u64_unsynchronized(locked_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+ uint64_t cur_dst = atomic_load_u64(&dst->val, ATOMIC_RELAXED);
+ atomic_store_u64(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+#else
+ dst->val += src;
+#endif
+}
+
+static inline uint64_t
+locked_read_u64_unsynchronized(locked_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+ return p->val;
+#endif
+}
+
+static inline void
+locked_init_u64_unsynchronized(locked_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+ atomic_store_u64(&p->val, x, ATOMIC_RELAXED);
+#else
+ p->val = x;
+#endif
+}
+
+static inline size_t
+locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
+ LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+#ifdef JEMALLOC_ATOMIC_U64
+ return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#else
+ return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+ size_t x) {
+ LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+#ifdef JEMALLOC_ATOMIC_U64
+ atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
+#else
+ size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+ atomic_store_zu(&p->val, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+ size_t x) {
+ LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+#ifdef JEMALLOC_ATOMIC_U64
+ size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
+ assert(r - x <= r);
+#else
+ size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+ atomic_store_zu(&p->val, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+locked_inc_zu_unsynchronized(locked_zu_t *dst, size_t src) {
+ size_t cur_dst = atomic_load_zu(&dst->val, ATOMIC_RELAXED);
+ atomic_store_zu(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+}
+
+/*
+ * Unlike the _u64 variant, this is safe to call unconditionally.
+ */
+static inline size_t
+locked_read_atomic_zu(locked_zu_t *p) {
+ return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+}
+
+#endif /* JEMALLOC_INTERNAL_LOCKEDINT_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 1d1a414..a375bda 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -1,6 +1,8 @@
#ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
#define JEMALLOC_INTERNAL_MALLOC_IO_H
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
#ifdef _WIN32
# ifdef _WIN64
# define FMT64_PREFIX "ll"
@@ -40,6 +42,7 @@
*/
#define MALLOC_PRINTF_BUFSIZE 4096
+write_cb_t wrtmessage;
int buferror(int err, char *buf, size_t buflen);
uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
int base);
@@ -57,10 +60,10 @@ size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
* The caller can set write_cb to null to choose to print with the
* je_malloc_message hook.
*/
-void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
- const char *format, va_list ap);
-void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
- const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+ va_list ap);
+void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+ ...) JEMALLOC_FORMAT_PRINTF(3, 4);
void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
static inline ssize_t
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
new file mode 100644
index 0000000..316ea9b
--- /dev/null
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -0,0 +1,134 @@
+#ifndef JEMALLOC_INTERNAL_MPSC_QUEUE_H
+#define JEMALLOC_INTERNAL_MPSC_QUEUE_H
+
+#include "jemalloc/internal/atomic.h"
+
+/*
+ * A concurrent implementation of a multi-producer, single-consumer queue. It
+ * supports three concurrent operations:
+ * - Push
+ * - Push batch
+ * - Pop batch
+ *
+ * These operations are all lock-free.
+ *
+ * The implementation is the simple two-stack queue built on a Treiber stack.
+ * It's not terribly efficient, but this isn't expected to go into anywhere with
+ * hot code. In fact, we don't really even need queue semantics in any
+ * anticipated use cases; we could get away with just the stack. But this way
+ * lets us frame the API in terms of the existing list types, which is a nice
+ * convenience. We can save on cache misses by introducing our own (parallel)
+ * single-linked list type here, and dropping FIFO semantics, if we need this to
+ * get faster. Since we're currently providing queue semantics though, we use
+ * the prev field in the link rather than the next field for Treiber-stack
+ * linkage, so that we can preserve order for bash-pushed lists (recall that the
+ * two-stack tricks reverses orders in the lock-free first stack).
+ */
+
+#define mpsc_queue(a_type) \
+struct { \
+ atomic_p_t tail; \
+}
+
+#define mpsc_queue_proto(a_attr, a_prefix, a_queue_type, a_type, \
+ a_list_type) \
+/* Initialize a queue. */ \
+a_attr void \
+a_prefix##new(a_queue_type *queue); \
+/* Insert all items in src into the queue, clearing src. */ \
+a_attr void \
+a_prefix##push_batch(a_queue_type *queue, a_list_type *src); \
+/* Insert node into the queue. */ \
+a_attr void \
+a_prefix##push(a_queue_type *queue, a_type *node); \
+/* \
+ * Pop all items in the queue into the list at dst. dst should already \
+ * be initialized (and may contain existing items, which then remain \
+ * in dst). \
+ */ \
+a_attr void \
+a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst);
+
+#define mpsc_queue_gen(a_attr, a_prefix, a_queue_type, a_type, \
+ a_list_type, a_link) \
+a_attr void \
+a_prefix##new(a_queue_type *queue) { \
+ atomic_store_p(&queue->tail, NULL, ATOMIC_RELAXED); \
+} \
+a_attr void \
+a_prefix##push_batch(a_queue_type *queue, a_list_type *src) { \
+ /* \
+ * Reuse the ql list next field as the Treiber stack next \
+ * field. \
+ */ \
+ a_type *first = ql_first(src); \
+ a_type *last = ql_last(src, a_link); \
+ void* cur_tail = atomic_load_p(&queue->tail, ATOMIC_RELAXED); \
+ do { \
+ /* \
+ * Note that this breaks the queue ring structure; \
+ * it's not a ring any more! \
+ */ \
+ first->a_link.qre_prev = cur_tail; \
+ /* \
+ * Note: the upcoming CAS doesn't need an atomic; every \
+ * push only needs to synchronize with the next pop, \
+ * which we get from the release sequence rules. \
+ */ \
+ } while (!atomic_compare_exchange_weak_p(&queue->tail, \
+ &cur_tail, last, ATOMIC_RELEASE, ATOMIC_RELAXED)); \
+ ql_new(src); \
+} \
+a_attr void \
+a_prefix##push(a_queue_type *queue, a_type *node) { \
+ ql_elm_new(node, a_link); \
+ a_list_type list; \
+ ql_new(&list); \
+ ql_head_insert(&list, node, a_link); \
+ a_prefix##push_batch(queue, &list); \
+} \
+a_attr void \
+a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst) { \
+ a_type *tail = atomic_load_p(&queue->tail, ATOMIC_RELAXED); \
+ if (tail == NULL) { \
+ /* \
+ * In the common special case where there are no \
+ * pending elements, bail early without a costly RMW. \
+ */ \
+ return; \
+ } \
+ tail = atomic_exchange_p(&queue->tail, NULL, ATOMIC_ACQUIRE); \
+ /* \
+ * It's a single-consumer queue, so if cur started non-NULL, \
+ * it'd better stay non-NULL. \
+ */ \
+ assert(tail != NULL); \
+ /* \
+ * We iterate through the stack and both fix up the link \
+ * structure (stack insertion broke the list requirement that \
+ * the list be circularly linked). It's just as efficient at \
+ * this point to make the queue a "real" queue, so do that as \
+ * well. \
+ * If this ever gets to be a hot spot, we can omit this fixup \
+ * and make the queue a bag (i.e. not necessarily ordered), but \
+ * that would mean jettisoning the existing list API as the \
+ * batch pushing/popping interface. \
+ */ \
+ a_list_type reversed; \
+ ql_new(&reversed); \
+ while (tail != NULL) { \
+ /* \
+ * Pop an item off the stack, prepend it onto the list \
+ * (reversing the order). Recall that we use the \
+ * list prev field as the Treiber stack next field to \
+ * preserve order of batch-pushed items when reversed. \
+ */ \
+ a_type *next = tail->a_link.qre_prev; \
+ ql_elm_new(tail, a_link); \
+ ql_head_insert(&reversed, tail, a_link); \
+ tail = next; \
+ } \
+ ql_concat(dst, &reversed, a_link); \
+}
+
+#endif /* JEMALLOC_INTERNAL_MPSC_QUEUE_H */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 7c24f07..63a0b1b 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -6,6 +6,8 @@
#include "jemalloc/internal/tsd.h"
#include "jemalloc/internal/witness.h"
+extern int64_t opt_mutex_max_spin;
+
typedef enum {
/* Can only acquire one mutex of a given witness rank at a time. */
malloc_mutex_rank_exclusive,
@@ -43,7 +45,7 @@ struct malloc_mutex_s {
#else
pthread_mutex_t lock;
#endif
- /*
+ /*
* Hint flag to avoid exclusive cache line contention
* during spin waiting
*/
@@ -67,12 +69,6 @@ struct malloc_mutex_s {
#endif
};
-/*
- * Based on benchmark results, a fixed spin with this amount of retries works
- * well for our critical sections.
- */
-#define MALLOC_MUTEX_MAX_SPIN 250
-
#ifdef _WIN32
# if _WIN32_WINNT >= 0x0600
# define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)
@@ -245,22 +241,25 @@ malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
}
-/* Copy the prof data from mutex for processing. */
static inline void
-malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
- malloc_mutex_t *mutex) {
- mutex_prof_data_t *source = &mutex->prof_data;
- /* Can only read holding the mutex. */
- malloc_mutex_assert_owner(tsdn, mutex);
-
+malloc_mutex_prof_copy(mutex_prof_data_t *dst, mutex_prof_data_t *source) {
/*
* Not *really* allowed (we shouldn't be doing non-atomic loads of
* atomic data), but the mutex protection makes this safe, and writing
* a member-for-member copy is tedious for this situation.
*/
- *data = *source;
+ *dst = *source;
/* n_wait_thds is not reported (modified w/o locking). */
- atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
+ atomic_store_u32(&dst->n_waiting_thds, 0, ATOMIC_RELAXED);
+}
+
+/* Copy the prof data from mutex for processing. */
+static inline void
+malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
+ malloc_mutex_t *mutex) {
+ /* Can only read holding the mutex. */
+ malloc_mutex_assert_owner(tsdn, mutex);
+ malloc_mutex_prof_copy(data, &mutex->prof_data);
}
static inline void
@@ -285,4 +284,36 @@ malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
data->n_lock_ops += source->n_lock_ops;
}
+/* Compare the prof data and update to the maximum. */
+static inline void
+malloc_mutex_prof_max_update(tsdn_t *tsdn, mutex_prof_data_t *data,
+ malloc_mutex_t *mutex) {
+ mutex_prof_data_t *source = &mutex->prof_data;
+ /* Can only read holding the mutex. */
+ malloc_mutex_assert_owner(tsdn, mutex);
+
+ if (nstime_compare(&source->tot_wait_time, &data->tot_wait_time) > 0) {
+ nstime_copy(&data->tot_wait_time, &source->tot_wait_time);
+ }
+ if (nstime_compare(&source->max_wait_time, &data->max_wait_time) > 0) {
+ nstime_copy(&data->max_wait_time, &source->max_wait_time);
+ }
+ if (source->n_wait_times > data->n_wait_times) {
+ data->n_wait_times = source->n_wait_times;
+ }
+ if (source->n_spin_acquired > data->n_spin_acquired) {
+ data->n_spin_acquired = source->n_spin_acquired;
+ }
+ if (source->max_n_thds > data->max_n_thds) {
+ data->max_n_thds = source->max_n_thds;
+ }
+ if (source->n_owner_switches > data->n_owner_switches) {
+ data->n_owner_switches = source->n_owner_switches;
+ }
+ if (source->n_lock_ops > data->n_lock_ops) {
+ data->n_lock_ops = source->n_lock_ops;
+ }
+ /* n_wait_thds is not reported. */
+}
+
#endif /* JEMALLOC_INTERNAL_MUTEX_H */
diff --git a/include/jemalloc/internal/mutex_pool.h b/include/jemalloc/internal/mutex_pool.h
deleted file mode 100644
index 726cece..0000000
--- a/include/jemalloc/internal/mutex_pool.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H
-#define JEMALLOC_INTERNAL_MUTEX_POOL_H
-
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/witness.h"
-
-/* We do mod reductions by this value, so it should be kept a power of 2. */
-#define MUTEX_POOL_SIZE 256
-
-typedef struct mutex_pool_s mutex_pool_t;
-struct mutex_pool_s {
- malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
-};
-
-bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank);
-
-/* Internal helper - not meant to be called outside this module. */
-static inline malloc_mutex_t *
-mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) {
- size_t hash_result[2];
- hash(&key, sizeof(key), 0xd50dcc1b, hash_result);
- return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE];
-}
-
-static inline void
-mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) {
- for (int i = 0; i < MUTEX_POOL_SIZE; i++) {
- malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]);
- }
-}
-
-/*
- * Note that a mutex pool doesn't work exactly the way an embdedded mutex would.
- * You're not allowed to acquire mutexes in the pool one at a time. You have to
- * acquire all the mutexes you'll need in a single function call, and then
- * release them all in a single function call.
- */
-
-static inline void
-mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
- mutex_pool_assert_not_held(tsdn, pool);
-
- malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
- malloc_mutex_lock(tsdn, mutex);
-}
-
-static inline void
-mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
- malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
- malloc_mutex_unlock(tsdn, mutex);
-
- mutex_pool_assert_not_held(tsdn, pool);
-}
-
-static inline void
-mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
- uintptr_t key2) {
- mutex_pool_assert_not_held(tsdn, pool);
-
- malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
- malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
- if ((uintptr_t)mutex1 < (uintptr_t)mutex2) {
- malloc_mutex_lock(tsdn, mutex1);
- malloc_mutex_lock(tsdn, mutex2);
- } else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) {
- malloc_mutex_lock(tsdn, mutex1);
- } else {
- malloc_mutex_lock(tsdn, mutex2);
- malloc_mutex_lock(tsdn, mutex1);
- }
-}
-
-static inline void
-mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
- uintptr_t key2) {
- malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
- malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
- if (mutex1 == mutex2) {
- malloc_mutex_unlock(tsdn, mutex1);
- } else {
- malloc_mutex_unlock(tsdn, mutex1);
- malloc_mutex_unlock(tsdn, mutex2);
- }
-
- mutex_pool_assert_not_held(tsdn, pool);
-}
-
-static inline void
-mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
- malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key));
-}
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 2cb8fb0..4a526a5 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -7,8 +7,14 @@
#define MUTEX_PROF_GLOBAL_MUTEXES \
OP(background_thread) \
+ OP(max_per_bg_thd) \
OP(ctl) \
- OP(prof)
+ OP(prof) \
+ OP(prof_thds_data) \
+ OP(prof_dump) \
+ OP(prof_recent_alloc) \
+ OP(prof_recent_dump) \
+ OP(prof_stats)
typedef enum {
#define OP(mtx) global_prof_mutex_##mtx,
@@ -26,7 +32,10 @@ typedef enum {
OP(decay_dirty) \
OP(decay_muzzy) \
OP(base) \
- OP(tcache_list)
+ OP(tcache_list) \
+ OP(hpa_shard) \
+ OP(hpa_shard_grow) \
+ OP(hpa_sec)
typedef enum {
#define OP(mtx) arena_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 17c177c..486e5cc 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -3,12 +3,23 @@
/* Maximum supported number of seconds (~584 years). */
#define NSTIME_SEC_MAX KQU(18446744072)
-#define NSTIME_ZERO_INITIALIZER {0}
+
+#define NSTIME_MAGIC ((uint32_t)0xb8a9ce37)
+#ifdef JEMALLOC_DEBUG
+# define NSTIME_ZERO_INITIALIZER {0, NSTIME_MAGIC}
+#else
+# define NSTIME_ZERO_INITIALIZER {0}
+#endif
typedef struct {
uint64_t ns;
+#ifdef JEMALLOC_DEBUG
+ uint32_t magic; /* Tracks if initialized. */
+#endif
} nstime_t;
+static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
+
void nstime_init(nstime_t *time, uint64_t ns);
void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
uint64_t nstime_ns(const nstime_t *time);
@@ -24,11 +35,39 @@ void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
void nstime_imultiply(nstime_t *time, uint64_t multiplier);
void nstime_idivide(nstime_t *time, uint64_t divisor);
uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+uint64_t nstime_ns_since(const nstime_t *past);
typedef bool (nstime_monotonic_t)(void);
extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
-typedef bool (nstime_update_t)(nstime_t *);
+typedef void (nstime_update_t)(nstime_t *);
extern nstime_update_t *JET_MUTABLE nstime_update;
+typedef void (nstime_prof_update_t)(nstime_t *);
+extern nstime_prof_update_t *JET_MUTABLE nstime_prof_update;
+
+void nstime_init_update(nstime_t *time);
+void nstime_prof_init_update(nstime_t *time);
+
+enum prof_time_res_e {
+ prof_time_res_default = 0,
+ prof_time_res_high = 1
+};
+typedef enum prof_time_res_e prof_time_res_t;
+
+extern prof_time_res_t opt_prof_time_res;
+extern const char *prof_time_res_mode_names[];
+
+JEMALLOC_ALWAYS_INLINE void
+nstime_init_zero(nstime_t *time) {
+ nstime_copy(time, &nstime_zero);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+nstime_equals_zero(nstime_t *time) {
+ int diff = nstime_compare(time, &nstime_zero);
+ assert(diff >= 0);
+ return diff == 0;
+}
+
#endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
new file mode 100644
index 0000000..4748a05
--- /dev/null
+++ b/include/jemalloc/internal/pa.h
@@ -0,0 +1,243 @@
+#ifndef JEMALLOC_INTERNAL_PA_H
+#define JEMALLOC_INTERNAL_PA_H
+
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/decay.h"
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/lockedint.h"
+#include "jemalloc/internal/pac.h"
+#include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/sec.h"
+
+/*
+ * The page allocator; responsible for acquiring pages of memory for
+ * allocations. It picks the implementation of the page allocator interface
+ * (i.e. a pai_t) to handle a given page-level allocation request. For now, the
+ * only such implementation is the PAC code ("page allocator classic"), but
+ * others will be coming soon.
+ */
+
+typedef struct pa_central_s pa_central_t;
+struct pa_central_s {
+ hpa_central_t hpa;
+};
+
+/*
+ * The stats for a particular pa_shard. Because of the way the ctl module
+ * handles stats epoch data collection (it has its own arena_stats, and merges
+ * the stats from each arena into it), this needs to live in the arena_stats_t;
+ * hence we define it here and let the pa_shard have a pointer (rather than the
+ * more natural approach of just embedding it in the pa_shard itself).
+ *
+ * We follow the arena_stats_t approach of marking the derived fields. These
+ * are the ones that are not maintained on their own; instead, their values are
+ * derived during those stats merges.
+ */
+typedef struct pa_shard_stats_s pa_shard_stats_t;
+struct pa_shard_stats_s {
+ /* Number of edata_t structs allocated by base, but not being used. */
+ size_t edata_avail; /* Derived. */
+ /*
+ * Stats specific to the PAC. For now, these are the only stats that
+ * exist, but there will eventually be other page allocators. Things
+ * like edata_avail make sense in a cross-PA sense, but things like
+ * npurges don't.
+ */
+ pac_stats_t pac_stats;
+};
+
+/*
+ * The local allocator handle. Keeps the state necessary to satisfy page-sized
+ * allocations.
+ *
+ * The contents are mostly internal to the PA module. The key exception is that
+ * arena decay code is allowed to grab pointers to the dirty and muzzy ecaches
+ * decay_ts, for a couple of queries, passing them back to a PA function, or
+ * acquiring decay.mtx and looking at decay.purging. The reasoning is that,
+ * while PA decides what and how to purge, the arena code decides when and where
+ * (e.g. on what thread). It's allowed to use the presence of another purger to
+ * decide.
+ * (The background thread code also touches some other decay internals, but
+ * that's not fundamental; its' just an artifact of a partial refactoring, and
+ * its accesses could be straightforwardly moved inside the decay module).
+ */
+typedef struct pa_shard_s pa_shard_t;
+struct pa_shard_s {
+ /* The central PA this shard is associated with. */
+ pa_central_t *central;
+
+ /*
+ * Number of pages in active extents.
+ *
+ * Synchronization: atomic.
+ */
+ atomic_zu_t nactive;
+
+ /*
+ * Whether or not we should prefer the hugepage allocator. Atomic since
+ * it may be concurrently modified by a thread setting extent hooks.
+ * Note that we still may do HPA operations in this arena; if use_hpa is
+ * changed from true to false, we'll free back to the hugepage allocator
+ * for those allocations.
+ */
+ atomic_b_t use_hpa;
+
+ /*
+ * If we never used the HPA to begin with, it wasn't initialized, and so
+ * we shouldn't try to e.g. acquire its mutexes during fork. This
+ * tracks that knowledge.
+ */
+ bool ever_used_hpa;
+
+ /* Allocates from a PAC. */
+ pac_t pac;
+
+ /*
+ * We place a small extent cache in front of the HPA, since we intend
+ * these configurations to use many fewer arenas, and therefore have a
+ * higher risk of hot locks.
+ */
+ sec_t hpa_sec;
+ hpa_shard_t hpa_shard;
+
+ /* The source of edata_t objects. */
+ edata_cache_t edata_cache;
+
+ unsigned ind;
+
+ malloc_mutex_t *stats_mtx;
+ pa_shard_stats_t *stats;
+
+ /* The emap this shard is tied to. */
+ emap_t *emap;
+
+ /* The base from which we get the ehooks and allocate metadat. */
+ base_t *base;
+};
+
+static inline bool
+pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
+ return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
+ pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
+}
+
+static inline ehooks_t *
+pa_shard_ehooks_get(pa_shard_t *shard) {
+ return base_ehooks_get(shard->base);
+}
+
+/* Returns true on error. */
+bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
+ hpa_hooks_t *hpa_hooks);
+
+/* Returns true on error. */
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
+ emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
+ malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
+ ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
+
+/*
+ * This isn't exposed to users; we allow late enablement of the HPA shard so
+ * that we can boot without worrying about the HPA, then turn it on in a0.
+ */
+bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
+ const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
+
+/*
+ * We stop using the HPA when custom extent hooks are installed, but still
+ * redirect deallocations to it.
+ */
+void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard);
+
+/*
+ * This does the PA-specific parts of arena reset (i.e. freeing all active
+ * allocations).
+ */
+void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
+
+/*
+ * Destroy all the remaining retained extents. Should only be called after
+ * decaying all active, dirty, and muzzy extents to the retained state, as the
+ * last step in destroying the shard.
+ */
+void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
+
+/* Gets an edata for the given allocation. */
+edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
+ size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
+ bool *deferred_work_generated);
+/* Returns true on error, in which case nothing changed. */
+bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+ size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated);
+/*
+ * The same. Sets *generated_dirty to true if we produced new dirty pages, and
+ * false otherwise.
+ */
+bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+ size_t new_size, szind_t szind, bool *deferred_work_generated);
+/*
+ * Frees the given edata back to the pa. Sets *generated_dirty if we produced
+ * new dirty pages (well, we always set it for now; but this need not be the
+ * case).
+ * (We could make generated_dirty the return value of course, but this is more
+ * consistent with the shrink pathway and our error codes here).
+ */
+void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+ bool *deferred_work_generated);
+bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+ ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
+
+/*
+ * Do deferred work on this PA shard.
+ *
+ * Morally, this should do both PAC decay and the HPA deferred work. For now,
+ * though, the arena, background thread, and PAC modules are tightly interwoven
+ * in a way that's tricky to extricate, so we only do the HPA-specific parts.
+ */
+void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
+ bool deferral_allowed);
+void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+
+/******************************************************************************/
+/*
+ * Various bits of "boring" functionality that are still part of this module,
+ * but that we relegate to pa_extra.c, to keep the core logic in pa.c as
+ * readable as possible.
+ */
+
+/*
+ * These fork phases are synchronized with the arena fork phase numbering to
+ * make it easy to keep straight. That's why there's no prefork1.
+ */
+void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
+
+void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
+ size_t *ndirty, size_t *nmuzzy);
+
+void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
+ pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
+ hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
+ size_t *resident);
+
+/*
+ * Reads the PA-owned mutex stats into the output stats array, at the
+ * appropriate positions. Morally, these stats should really live in
+ * pa_shard_stats_t, but the indices are sort of baked into the various mutex
+ * prof macros. This would be a good thing to do at some point.
+ */
+void pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
+ mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]);
+
+#endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
new file mode 100644
index 0000000..01c4e6a
--- /dev/null
+++ b/include/jemalloc/internal/pac.h
@@ -0,0 +1,179 @@
+#ifndef JEMALLOC_INTERNAL_PAC_H
+#define JEMALLOC_INTERNAL_PAC_H
+
+#include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/pai.h"
+#include "san_bump.h"
+
+
+/*
+ * Page allocator classic; an implementation of the PAI interface that:
+ * - Can be used for arenas with custom extent hooks.
+ * - Can always satisfy any allocation request (including highly-fragmentary
+ * ones).
+ * - Can use efficient OS-level zeroing primitives for demand-filled pages.
+ */
+
+/* How "eager" decay/purging should be. */
+enum pac_purge_eagerness_e {
+ PAC_PURGE_ALWAYS,
+ PAC_PURGE_NEVER,
+ PAC_PURGE_ON_EPOCH_ADVANCE
+};
+typedef enum pac_purge_eagerness_e pac_purge_eagerness_t;
+
+typedef struct pac_decay_stats_s pac_decay_stats_t;
+struct pac_decay_stats_s {
+ /* Total number of purge sweeps. */
+ locked_u64_t npurge;
+ /* Total number of madvise calls made. */
+ locked_u64_t nmadvise;
+ /* Total number of pages purged. */
+ locked_u64_t purged;
+};
+
+typedef struct pac_estats_s pac_estats_t;
+struct pac_estats_s {
+ /*
+ * Stats for a given index in the range [0, SC_NPSIZES] in the various
+ * ecache_ts.
+ * We track both bytes and # of extents: two extents in the same bucket
+ * may have different sizes if adjacent size classes differ by more than
+ * a page, so bytes cannot always be derived from # of extents.
+ */
+ size_t ndirty;
+ size_t dirty_bytes;
+ size_t nmuzzy;
+ size_t muzzy_bytes;
+ size_t nretained;
+ size_t retained_bytes;
+};
+
+typedef struct pac_stats_s pac_stats_t;
+struct pac_stats_s {
+ pac_decay_stats_t decay_dirty;
+ pac_decay_stats_t decay_muzzy;
+
+ /*
+ * Number of unused virtual memory bytes currently retained. Retained
+ * bytes are technically mapped (though always decommitted or purged),
+ * but they are excluded from the mapped statistic (above).
+ */
+ size_t retained; /* Derived. */
+
+ /*
+ * Number of bytes currently mapped, excluding retained memory (and any
+ * base-allocated memory, which is tracked by the arena stats).
+ *
+ * We name this "pac_mapped" to avoid confusion with the arena_stats
+ * "mapped".
+ */
+ atomic_zu_t pac_mapped;
+
+ /* VM space had to be leaked (undocumented). Normally 0. */
+ atomic_zu_t abandoned_vm;
+};
+
+typedef struct pac_s pac_t;
+struct pac_s {
+ /*
+ * Must be the first member (we convert it to a PAC given only a
+ * pointer). The handle to the allocation interface.
+ */
+ pai_t pai;
+ /*
+ * Collections of extents that were previously allocated. These are
+ * used when allocating extents, in an attempt to re-use address space.
+ *
+ * Synchronization: internal.
+ */
+ ecache_t ecache_dirty;
+ ecache_t ecache_muzzy;
+ ecache_t ecache_retained;
+
+ base_t *base;
+ emap_t *emap;
+ edata_cache_t *edata_cache;
+
+ /* The grow info for the retained ecache. */
+ exp_grow_t exp_grow;
+ malloc_mutex_t grow_mtx;
+
+ /* Special allocator for guarded frequently reused extents. */
+ san_bump_alloc_t sba;
+
+ /* How large extents should be before getting auto-purged. */
+ atomic_zu_t oversize_threshold;
+
+ /*
+ * Decay-based purging state, responsible for scheduling extent state
+ * transitions.
+ *
+ * Synchronization: via the internal mutex.
+ */
+ decay_t decay_dirty; /* dirty --> muzzy */
+ decay_t decay_muzzy; /* muzzy --> retained */
+
+ malloc_mutex_t *stats_mtx;
+ pac_stats_t *stats;
+
+ /* Extent serial number generator state. */
+ atomic_zu_t extent_sn_next;
+};
+
+bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
+ edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
+ ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
+ malloc_mutex_t *stats_mtx);
+
+static inline size_t
+pac_mapped(pac_t *pac) {
+ return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
+}
+
+static inline ehooks_t *
+pac_ehooks_get(pac_t *pac) {
+ return base_ehooks_get(pac->base);
+}
+
+/*
+ * All purging functions require holding decay->mtx. This is one of the few
+ * places external modules are allowed to peek inside pa_shard_t internals.
+ */
+
+/*
+ * Decays the number of pages currently in the ecache. This might not leave the
+ * ecache empty if other threads are inserting dirty objects into it
+ * concurrently with the call.
+ */
+void pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+ pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+/*
+ * Updates decay settings for the current time, and conditionally purges in
+ * response (depending on decay_purge_setting). Returns whether or not the
+ * epoch advanced.
+ */
+bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+ pac_decay_stats_t *decay_stats, ecache_t *ecache,
+ pac_purge_eagerness_t eagerness);
+
+/*
+ * Gets / sets the maximum amount that we'll grow an arena down the
+ * grow-retained pathways (unless forced to by an allocaction request).
+ *
+ * Set new_limit to NULL if it's just a query, or old_limit to NULL if you don't
+ * care about the previous value.
+ *
+ * Returns true on error (if the new limit is not valid).
+ */
+bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
+ size_t *new_limit);
+
+bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+ ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
+
+void pac_reset(tsdn_t *tsdn, pac_t *pac);
+void pac_destroy(tsdn_t *tsdn, pac_t *pac);
+
+#endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 7dae633..ad1f606 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -13,10 +13,27 @@
/* Return the smallest pagesize multiple that is >= s. */
#define PAGE_CEILING(s) \
(((s) + PAGE_MASK) & ~PAGE_MASK)
+/* Return the largest pagesize multiple that is <=s. */
+#define PAGE_FLOOR(s) \
+ ((s) & ~PAGE_MASK)
/* Huge page size. LG_HUGEPAGE is determined by the configure script. */
#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE))
#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1))
+
+#if LG_HUGEPAGE != 0
+# define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
+#else
+/*
+ * It's convenient to define arrays (or bitmaps) of HUGEPAGE_PAGES lengths. If
+ * we can't autodetect the hugepage size, it gets treated as 0, in which case
+ * we'll trigger a compiler error in those arrays. Avoid this case by ensuring
+ * that this value is at least 1. (We won't ever run in this degraded state;
+ * hpa_supported() returns false in this case.
+ */
+# define HUGEPAGE_PAGES 1
+#endif
+
/* Return the huge page base address for the huge page containing address a. */
#define HUGEPAGE_ADDR2BASE(a) \
((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
@@ -58,6 +75,18 @@ static const bool pages_can_purge_forced =
#endif
;
+#if defined(JEMALLOC_HAVE_MADVISE_HUGE) || defined(JEMALLOC_HAVE_MEMCNTL)
+# define PAGES_CAN_HUGIFY
+#endif
+
+static const bool pages_can_hugify =
+#ifdef PAGES_CAN_HUGIFY
+ true
+#else
+ false
+#endif
+ ;
+
typedef enum {
thp_mode_default = 0, /* Do not change hugepage settings. */
thp_mode_always = 1, /* Always set MADV_HUGEPAGE. */
@@ -84,5 +113,7 @@ bool pages_dontdump(void *addr, size_t size);
bool pages_dodump(void *addr, size_t size);
bool pages_boot(void);
void pages_set_thp_state (void *ptr, size_t size);
+void pages_mark_guards(void *head, void *tail);
+void pages_unmark_guards(void *head, void *tail);
#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
new file mode 100644
index 0000000..d978cd7
--- /dev/null
+++ b/include/jemalloc/internal/pai.h
@@ -0,0 +1,95 @@
+#ifndef JEMALLOC_INTERNAL_PAI_H
+#define JEMALLOC_INTERNAL_PAI_H
+
+/* An interface for page allocation. */
+
+typedef struct pai_s pai_t;
+struct pai_s {
+ /* Returns NULL on failure. */
+ edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
+ size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+ bool *deferred_work_generated);
+ /*
+ * Returns the number of extents added to the list (which may be fewer
+ * than requested, in case of OOM). The list should already be
+ * initialized. The only alignment guarantee is page-alignment, and
+ * the results are not necessarily zeroed.
+ */
+ size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
+ size_t nallocs, edata_list_active_t *results,
+ bool *deferred_work_generated);
+ bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size, bool zero,
+ bool *deferred_work_generated);
+ bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ size_t old_size, size_t new_size, bool *deferred_work_generated);
+ void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ bool *deferred_work_generated);
+ /* This function empties out list as a side-effect of being called. */
+ void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
+ edata_list_active_t *list, bool *deferred_work_generated);
+ uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
+};
+
+/*
+ * These are just simple convenience functions to avoid having to reference the
+ * same pai_t twice on every invocation.
+ */
+
+static inline edata_t *
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+ bool zero, bool guarded, bool frequent_reuse,
+ bool *deferred_work_generated) {
+ return self->alloc(tsdn, self, size, alignment, zero, guarded,
+ frequent_reuse, deferred_work_generated);
+}
+
+static inline size_t
+pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+ edata_list_active_t *results, bool *deferred_work_generated) {
+ return self->alloc_batch(tsdn, self, size, nallocs, results,
+ deferred_work_generated);
+}
+
+static inline bool
+pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+ size_t new_size, bool zero, bool *deferred_work_generated) {
+ return self->expand(tsdn, self, edata, old_size, new_size, zero,
+ deferred_work_generated);
+}
+
+static inline bool
+pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+ size_t new_size, bool *deferred_work_generated) {
+ return self->shrink(tsdn, self, edata, old_size, new_size,
+ deferred_work_generated);
+}
+
+static inline void
+pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+ bool *deferred_work_generated) {
+ self->dalloc(tsdn, self, edata, deferred_work_generated);
+}
+
+static inline void
+pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+ bool *deferred_work_generated) {
+ self->dalloc_batch(tsdn, self, list, deferred_work_generated);
+}
+
+static inline uint64_t
+pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
+ return self->time_until_deferred_work(tsdn, self);
+}
+
+/*
+ * An implementation of batch allocation that simply calls alloc once for
+ * each item in the list.
+ */
+size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
+ size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
+/* Ditto, for dalloc. */
+void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
+ edata_list_active_t *list, bool *deferred_work_generated);
+
+#endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
new file mode 100644
index 0000000..59da3e4
--- /dev/null
+++ b/include/jemalloc/internal/peak.h
@@ -0,0 +1,37 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_H
+#define JEMALLOC_INTERNAL_PEAK_H
+
+typedef struct peak_s peak_t;
+struct peak_s {
+ /* The highest recorded peak value, after adjustment (see below). */
+ uint64_t cur_max;
+ /*
+ * The difference between alloc and dalloc at the last set_zero call;
+ * this lets us cancel out the appropriate amount of excess.
+ */
+ uint64_t adjustment;
+};
+
+#define PEAK_INITIALIZER {0, 0}
+
+static inline uint64_t
+peak_max(peak_t *peak) {
+ return peak->cur_max;
+}
+
+static inline void
+peak_update(peak_t *peak, uint64_t alloc, uint64_t dalloc) {
+ int64_t candidate_max = (int64_t)(alloc - dalloc - peak->adjustment);
+ if (candidate_max > (int64_t)peak->cur_max) {
+ peak->cur_max = candidate_max;
+ }
+}
+
+/* Resets the counter to zero; all peaks are now relative to this point. */
+static inline void
+peak_set_zero(peak_t *peak, uint64_t alloc, uint64_t dalloc) {
+ peak->cur_max = 0;
+ peak->adjustment = alloc - dalloc;
+}
+
+#endif /* JEMALLOC_INTERNAL_PEAK_H */
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
new file mode 100644
index 0000000..b808ce0
--- /dev/null
+++ b/include/jemalloc/internal/peak_event.h
@@ -0,0 +1,24 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
+#define JEMALLOC_INTERNAL_PEAK_EVENT_H
+
+/*
+ * While peak.h contains the simple helper struct that tracks state, this
+ * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
+ */
+
+/* Update the peak with current tsd state. */
+void peak_event_update(tsd_t *tsd);
+/* Set current state to zero. */
+void peak_event_zero(tsd_t *tsd);
+uint64_t peak_event_max(tsd_t *tsd);
+
+/* Manual hooks. */
+/* The activity-triggered hooks. */
+uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
+uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
+void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
+uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
+void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+
+#endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 84d6778..5f091c5 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_PH_H
+#define JEMALLOC_INTERNAL_PH_H
+
/*
* A Pairing Heap implementation.
*
@@ -10,382 +13,508 @@
* http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
*
*******************************************************************************
+ *
+ * We include a non-obvious optimization:
+ * - First, we introduce a new pop-and-link operation; pop the two most
+ * recently-inserted items off the aux-list, link them, and push the resulting
+ * heap.
+ * - We maintain a count of the number of insertions since the last time we
+ * merged the aux-list (i.e. via first() or remove_first()). After N inserts,
+ * we do ffs(N) pop-and-link operations.
+ *
+ * One way to think of this is that we're progressively building up a tree in
+ * the aux-list, rather than a linked-list (think of the series of merges that
+ * will be performed as the aux-count grows).
+ *
+ * There's a couple reasons we benefit from this:
+ * - Ordinarily, after N insertions, the aux-list is of size N. With our
+ * strategy, it's of size O(log(N)). So we decrease the worst-case time of
+ * first() calls, and reduce the average cost of remove_min calls. Since
+ * these almost always occur while holding a lock, we practically reduce the
+ * frequency of unusually long hold times.
+ * - This moves the bulk of the work of merging the aux-list onto the threads
+ * that are inserting into the heap. In some common scenarios, insertions
+ * happen in bulk, from a single thread (think tcache flushing; we potentially
+ * move many slabs from slabs_full to slabs_nonfull). All the nodes in this
+ * case are in the inserting threads cache, and linking them is very cheap
+ * (cache misses dominate linking cost). Without this optimization, linking
+ * happens on the next call to remove_first. Since that remove_first call
+ * likely happens on a different thread (or at least, after the cache has
+ * gotten cold if done on the same thread), deferring linking trades cheap
+ * link operations now for expensive ones later.
+ *
+ * The ffs trick keeps amortized insert cost at constant time. Similar
+ * strategies based on periodically sorting the list after a batch of operations
+ * perform worse than this in practice, even with various fancy tricks; they
+ * all took amortized complexity of an insert from O(1) to O(log(n)).
*/
-#ifndef PH_H_
-#define PH_H_
+typedef int (*ph_cmp_t)(void *, void *);
/* Node structure. */
-#define phn(a_type) \
-struct { \
- a_type *phn_prev; \
- a_type *phn_next; \
- a_type *phn_lchild; \
+typedef struct phn_link_s phn_link_t;
+struct phn_link_s {
+ void *prev;
+ void *next;
+ void *lchild;
+};
+
+typedef struct ph_s ph_t;
+struct ph_s {
+ void *root;
+ /*
+ * Inserts done since the last aux-list merge. This is not necessarily
+ * the size of the aux-list, since it's possible that removals have
+ * happened since, and we don't track whether or not those removals are
+ * from the aux list.
+ */
+ size_t auxcount;
+};
+
+JEMALLOC_ALWAYS_INLINE phn_link_t *
+phn_link_get(void *phn, size_t offset) {
+ return (phn_link_t *)(((uintptr_t)phn) + offset);
}
-/* Root structure. */
-#define ph(a_type) \
-struct { \
- a_type *ph_root; \
+JEMALLOC_ALWAYS_INLINE void
+phn_link_init(void *phn, size_t offset) {
+ phn_link_get(phn, offset)->prev = NULL;
+ phn_link_get(phn, offset)->next = NULL;
+ phn_link_get(phn, offset)->lchild = NULL;
}
-/* Internal utility macros. */
-#define phn_lchild_get(a_type, a_field, a_phn) \
- (a_phn->a_field.phn_lchild)
-#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do { \
- a_phn->a_field.phn_lchild = a_lchild; \
-} while (0)
-
-#define phn_next_get(a_type, a_field, a_phn) \
- (a_phn->a_field.phn_next)
-#define phn_prev_set(a_type, a_field, a_phn, a_prev) do { \
- a_phn->a_field.phn_prev = a_prev; \
-} while (0)
-
-#define phn_prev_get(a_type, a_field, a_phn) \
- (a_phn->a_field.phn_prev)
-#define phn_next_set(a_type, a_field, a_phn, a_next) do { \
- a_phn->a_field.phn_next = a_next; \
-} while (0)
-
-#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do { \
- a_type *phn0child; \
- \
- assert(a_phn0 != NULL); \
- assert(a_phn1 != NULL); \
- assert(a_cmp(a_phn0, a_phn1) <= 0); \
- \
- phn_prev_set(a_type, a_field, a_phn1, a_phn0); \
- phn0child = phn_lchild_get(a_type, a_field, a_phn0); \
- phn_next_set(a_type, a_field, a_phn1, phn0child); \
- if (phn0child != NULL) { \
- phn_prev_set(a_type, a_field, phn0child, a_phn1); \
- } \
- phn_lchild_set(a_type, a_field, a_phn0, a_phn1); \
-} while (0)
-
-#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do { \
- if (a_phn0 == NULL) { \
- r_phn = a_phn1; \
- } else if (a_phn1 == NULL) { \
- r_phn = a_phn0; \
- } else if (a_cmp(a_phn0, a_phn1) < 0) { \
- phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, \
- a_cmp); \
- r_phn = a_phn0; \
- } else { \
- phn_merge_ordered(a_type, a_field, a_phn1, a_phn0, \
- a_cmp); \
- r_phn = a_phn1; \
- } \
-} while (0)
+/* Internal utility helpers. */
+JEMALLOC_ALWAYS_INLINE void *
+phn_lchild_get(void *phn, size_t offset) {
+ return phn_link_get(phn, offset)->lchild;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_lchild_set(void *phn, void *lchild, size_t offset) {
+ phn_link_get(phn, offset)->lchild = lchild;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_next_get(void *phn, size_t offset) {
+ return phn_link_get(phn, offset)->next;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_next_set(void *phn, void *next, size_t offset) {
+ phn_link_get(phn, offset)->next = next;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_prev_get(void *phn, size_t offset) {
+ return phn_link_get(phn, offset)->prev;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_prev_set(void *phn, void *prev, size_t offset) {
+ phn_link_get(phn, offset)->prev = prev;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_merge_ordered(void *phn0, void *phn1, size_t offset,
+ ph_cmp_t cmp) {
+ void *phn0child;
+
+ assert(phn0 != NULL);
+ assert(phn1 != NULL);
+ assert(cmp(phn0, phn1) <= 0);
+
+ phn_prev_set(phn1, phn0, offset);
+ phn0child = phn_lchild_get(phn0, offset);
+ phn_next_set(phn1, phn0child, offset);
+ if (phn0child != NULL) {
+ phn_prev_set(phn0child, phn1, offset);
+ }
+ phn_lchild_set(phn0, phn1, offset);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
+ void *result;
+ if (phn0 == NULL) {
+ result = phn1;
+ } else if (phn1 == NULL) {
+ result = phn0;
+ } else if (cmp(phn0, phn1) < 0) {
+ phn_merge_ordered(phn0, phn1, offset, cmp);
+ result = phn0;
+ } else {
+ phn_merge_ordered(phn1, phn0, offset, cmp);
+ result = phn1;
+ }
+ return result;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
+ void *head = NULL;
+ void *tail = NULL;
+ void *phn0 = phn;
+ void *phn1 = phn_next_get(phn0, offset);
+
+ /*
+ * Multipass merge, wherein the first two elements of a FIFO
+ * are repeatedly merged, and each result is appended to the
+ * singly linked FIFO, until the FIFO contains only a single
+ * element. We start with a sibling list but no reference to
+ * its tail, so we do a single pass over the sibling list to
+ * populate the FIFO.
+ */
+ if (phn1 != NULL) {
+ void *phnrest = phn_next_get(phn1, offset);
+ if (phnrest != NULL) {
+ phn_prev_set(phnrest, NULL, offset);
+ }
+ phn_prev_set(phn0, NULL, offset);
+ phn_next_set(phn0, NULL, offset);
+ phn_prev_set(phn1, NULL, offset);
+ phn_next_set(phn1, NULL, offset);
+ phn0 = phn_merge(phn0, phn1, offset, cmp);
+ head = tail = phn0;
+ phn0 = phnrest;
+ while (phn0 != NULL) {
+ phn1 = phn_next_get(phn0, offset);
+ if (phn1 != NULL) {
+ phnrest = phn_next_get(phn1, offset);
+ if (phnrest != NULL) {
+ phn_prev_set(phnrest, NULL, offset);
+ }
+ phn_prev_set(phn0, NULL, offset);
+ phn_next_set(phn0, NULL, offset);
+ phn_prev_set(phn1, NULL, offset);
+ phn_next_set(phn1, NULL, offset);
+ phn0 = phn_merge(phn0, phn1, offset, cmp);
+ phn_next_set(tail, phn0, offset);
+ tail = phn0;
+ phn0 = phnrest;
+ } else {
+ phn_next_set(tail, phn0, offset);
+ tail = phn0;
+ phn0 = NULL;
+ }
+ }
+ phn0 = head;
+ phn1 = phn_next_get(phn0, offset);
+ if (phn1 != NULL) {
+ while (true) {
+ head = phn_next_get(phn1, offset);
+ assert(phn_prev_get(phn0, offset) == NULL);
+ phn_next_set(phn0, NULL, offset);
+ assert(phn_prev_get(phn1, offset) == NULL);
+ phn_next_set(phn1, NULL, offset);
+ phn0 = phn_merge(phn0, phn1, offset, cmp);
+ if (head == NULL) {
+ break;
+ }
+ phn_next_set(tail, phn0, offset);
+ tail = phn0;
+ phn0 = head;
+ phn1 = phn_next_get(phn0, offset);
+ }
+ }
+ }
+ return phn0;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+ ph->auxcount = 0;
+ void *phn = phn_next_get(ph->root, offset);
+ if (phn != NULL) {
+ phn_prev_set(ph->root, NULL, offset);
+ phn_next_set(ph->root, NULL, offset);
+ phn_prev_set(phn, NULL, offset);
+ phn = phn_merge_siblings(phn, offset, cmp);
+ assert(phn_next_get(phn, offset) == NULL);
+ ph->root = phn_merge(ph->root, phn, offset, cmp);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
+ void *result;
+ void *lchild = phn_lchild_get(phn, offset);
+ if (lchild == NULL) {
+ result = NULL;
+ } else {
+ result = phn_merge_siblings(lchild, offset, cmp);
+ }
+ return result;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_new(ph_t *ph) {
+ ph->root = NULL;
+ ph->auxcount = 0;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+ph_empty(ph_t *ph) {
+ return ph->root == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+ if (ph->root == NULL) {
+ return NULL;
+ }
+ ph_merge_aux(ph, offset, cmp);
+ return ph->root;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_any(ph_t *ph, size_t offset) {
+ if (ph->root == NULL) {
+ return NULL;
+ }
+ void *aux = phn_next_get(ph->root, offset);
+ if (aux != NULL) {
+ return aux;
+ }
+ return ph->root;
+}
+
+/* Returns true if we should stop trying to merge. */
+JEMALLOC_ALWAYS_INLINE bool
+ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+ assert(ph->root != NULL);
+ void *phn0 = phn_next_get(ph->root, offset);
+ if (phn0 == NULL) {
+ return true;
+ }
+ void *phn1 = phn_next_get(phn0, offset);
+ if (phn1 == NULL) {
+ return true;
+ }
+ void *next_phn1 = phn_next_get(phn1, offset);
+ phn_next_set(phn0, NULL, offset);
+ phn_prev_set(phn0, NULL, offset);
+ phn_next_set(phn1, NULL, offset);
+ phn_prev_set(phn1, NULL, offset);
+ phn0 = phn_merge(phn0, phn1, offset, cmp);
+ phn_next_set(phn0, next_phn1, offset);
+ if (next_phn1 != NULL) {
+ phn_prev_set(next_phn1, phn0, offset);
+ }
+ phn_next_set(ph->root, phn0, offset);
+ phn_prev_set(phn0, ph->root, offset);
+ return next_phn1 == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
+ phn_link_init(phn, offset);
-#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do { \
- a_type *head = NULL; \
- a_type *tail = NULL; \
- a_type *phn0 = a_phn; \
- a_type *phn1 = phn_next_get(a_type, a_field, phn0); \
+ /*
+ * Treat the root as an aux list during insertion, and lazily merge
+ * during a_prefix##remove_first(). For elements that are inserted,
+ * then removed via a_prefix##remove() before the aux list is ever
+ * processed, this makes insert/remove constant-time, whereas eager
+ * merging would make insert O(log n).
+ */
+ if (ph->root == NULL) {
+ ph->root = phn;
+ } else {
+ /*
+ * As a special case, check to see if we can replace the root.
+ * This is practically common in some important cases, and lets
+ * us defer some insertions (hopefully, until the point where
+ * some of the items in the aux list have been removed, savings
+ * us from linking them at all).
+ */
+ if (cmp(phn, ph->root) < 0) {
+ phn_lchild_set(phn, ph->root, offset);
+ phn_prev_set(ph->root, phn, offset);
+ ph->root = phn;
+ ph->auxcount = 0;
+ return;
+ }
+ ph->auxcount++;
+ phn_next_set(phn, phn_next_get(ph->root, offset), offset);
+ if (phn_next_get(ph->root, offset) != NULL) {
+ phn_prev_set(phn_next_get(ph->root, offset), phn,
+ offset);
+ }
+ phn_prev_set(phn, ph->root, offset);
+ phn_next_set(ph->root, phn, offset);
+ }
+ if (ph->auxcount > 1) {
+ unsigned nmerges = ffs_zu(ph->auxcount - 1);
+ bool done = false;
+ for (unsigned i = 0; i < nmerges && !done; i++) {
+ done = ph_try_aux_merge_pair(ph, offset, cmp);
+ }
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+ void *ret;
+
+ if (ph->root == NULL) {
+ return NULL;
+ }
+ ph_merge_aux(ph, offset, cmp);
+ ret = ph->root;
+ ph->root = ph_merge_children(ph->root, offset, cmp);
+
+ return ret;
+
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
+ void *replace;
+ void *parent;
+
+ if (ph->root == phn) {
+ /*
+ * We can delete from aux list without merging it, but we need
+ * to merge if we are dealing with the root node and it has
+ * children.
+ */
+ if (phn_lchild_get(phn, offset) == NULL) {
+ ph->root = phn_next_get(phn, offset);
+ if (ph->root != NULL) {
+ phn_prev_set(ph->root, NULL, offset);
+ }
+ return;
+ }
+ ph_merge_aux(ph, offset, cmp);
+ if (ph->root == phn) {
+ ph->root = ph_merge_children(ph->root, offset, cmp);
+ return;
+ }
+ }
+
+ /* Get parent (if phn is leftmost child) before mutating. */
+ if ((parent = phn_prev_get(phn, offset)) != NULL) {
+ if (phn_lchild_get(parent, offset) != phn) {
+ parent = NULL;
+ }
+ }
+ /* Find a possible replacement node, and link to parent. */
+ replace = ph_merge_children(phn, offset, cmp);
+ /* Set next/prev for sibling linked list. */
+ if (replace != NULL) {
+ if (parent != NULL) {
+ phn_prev_set(replace, parent, offset);
+ phn_lchild_set(parent, replace, offset);
+ } else {
+ phn_prev_set(replace, phn_prev_get(phn, offset),
+ offset);
+ if (phn_prev_get(phn, offset) != NULL) {
+ phn_next_set(phn_prev_get(phn, offset), replace,
+ offset);
+ }
+ }
+ phn_next_set(replace, phn_next_get(phn, offset), offset);
+ if (phn_next_get(phn, offset) != NULL) {
+ phn_prev_set(phn_next_get(phn, offset), replace,
+ offset);
+ }
+ } else {
+ if (parent != NULL) {
+ void *next = phn_next_get(phn, offset);
+ phn_lchild_set(parent, next, offset);
+ if (next != NULL) {
+ phn_prev_set(next, parent, offset);
+ }
+ } else {
+ assert(phn_prev_get(phn, offset) != NULL);
+ phn_next_set(
+ phn_prev_get(phn, offset),
+ phn_next_get(phn, offset), offset);
+ }
+ if (phn_next_get(phn, offset) != NULL) {
+ phn_prev_set(
+ phn_next_get(phn, offset),
+ phn_prev_get(phn, offset), offset);
+ }
+ }
+}
+
+#define ph_structs(a_prefix, a_type) \
+typedef struct { \
+ phn_link_t link; \
+} a_prefix##_link_t; \
\
- /* \
- * Multipass merge, wherein the first two elements of a FIFO \
- * are repeatedly merged, and each result is appended to the \
- * singly linked FIFO, until the FIFO contains only a single \
- * element. We start with a sibling list but no reference to \
- * its tail, so we do a single pass over the sibling list to \
- * populate the FIFO. \
- */ \
- if (phn1 != NULL) { \
- a_type *phnrest = phn_next_get(a_type, a_field, phn1); \
- if (phnrest != NULL) { \
- phn_prev_set(a_type, a_field, phnrest, NULL); \
- } \
- phn_prev_set(a_type, a_field, phn0, NULL); \
- phn_next_set(a_type, a_field, phn0, NULL); \
- phn_prev_set(a_type, a_field, phn1, NULL); \
- phn_next_set(a_type, a_field, phn1, NULL); \
- phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0); \
- head = tail = phn0; \
- phn0 = phnrest; \
- while (phn0 != NULL) { \
- phn1 = phn_next_get(a_type, a_field, phn0); \
- if (phn1 != NULL) { \
- phnrest = phn_next_get(a_type, a_field, \
- phn1); \
- if (phnrest != NULL) { \
- phn_prev_set(a_type, a_field, \
- phnrest, NULL); \
- } \
- phn_prev_set(a_type, a_field, phn0, \
- NULL); \
- phn_next_set(a_type, a_field, phn0, \
- NULL); \
- phn_prev_set(a_type, a_field, phn1, \
- NULL); \
- phn_next_set(a_type, a_field, phn1, \
- NULL); \
- phn_merge(a_type, a_field, phn0, phn1, \
- a_cmp, phn0); \
- phn_next_set(a_type, a_field, tail, \
- phn0); \
- tail = phn0; \
- phn0 = phnrest; \
- } else { \
- phn_next_set(a_type, a_field, tail, \
- phn0); \
- tail = phn0; \
- phn0 = NULL; \
- } \
- } \
- phn0 = head; \
- phn1 = phn_next_get(a_type, a_field, phn0); \
- if (phn1 != NULL) { \
- while (true) { \
- head = phn_next_get(a_type, a_field, \
- phn1); \
- assert(phn_prev_get(a_type, a_field, \
- phn0) == NULL); \
- phn_next_set(a_type, a_field, phn0, \
- NULL); \
- assert(phn_prev_get(a_type, a_field, \
- phn1) == NULL); \
- phn_next_set(a_type, a_field, phn1, \
- NULL); \
- phn_merge(a_type, a_field, phn0, phn1, \
- a_cmp, phn0); \
- if (head == NULL) { \
- break; \
- } \
- phn_next_set(a_type, a_field, tail, \
- phn0); \
- tail = phn0; \
- phn0 = head; \
- phn1 = phn_next_get(a_type, a_field, \
- phn0); \
- } \
- } \
- } \
- r_phn = phn0; \
-} while (0)
-
-#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do { \
- a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root); \
- if (phn != NULL) { \
- phn_prev_set(a_type, a_field, a_ph->ph_root, NULL); \
- phn_next_set(a_type, a_field, a_ph->ph_root, NULL); \
- phn_prev_set(a_type, a_field, phn, NULL); \
- ph_merge_siblings(a_type, a_field, phn, a_cmp, phn); \
- assert(phn_next_get(a_type, a_field, phn) == NULL); \
- phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp, \
- a_ph->ph_root); \
- } \
-} while (0)
-
-#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do { \
- a_type *lchild = phn_lchild_get(a_type, a_field, a_phn); \
- if (lchild == NULL) { \
- r_phn = NULL; \
- } else { \
- ph_merge_siblings(a_type, a_field, lchild, a_cmp, \
- r_phn); \
- } \
-} while (0)
+typedef struct { \
+ ph_t ph; \
+} a_prefix##_t;
/*
* The ph_proto() macro generates function prototypes that correspond to the
* functions generated by an equivalently parameterized call to ph_gen().
*/
-#define ph_proto(a_attr, a_prefix, a_ph_type, a_type) \
-a_attr void a_prefix##new(a_ph_type *ph); \
-a_attr bool a_prefix##empty(a_ph_type *ph); \
-a_attr a_type *a_prefix##first(a_ph_type *ph); \
-a_attr a_type *a_prefix##any(a_ph_type *ph); \
-a_attr void a_prefix##insert(a_ph_type *ph, a_type *phn); \
-a_attr a_type *a_prefix##remove_first(a_ph_type *ph); \
-a_attr a_type *a_prefix##remove_any(a_ph_type *ph); \
-a_attr void a_prefix##remove(a_ph_type *ph, a_type *phn);
+#define ph_proto(a_attr, a_prefix, a_type) \
+ \
+a_attr void a_prefix##_new(a_prefix##_t *ph); \
+a_attr bool a_prefix##_empty(a_prefix##_t *ph); \
+a_attr a_type *a_prefix##_first(a_prefix##_t *ph); \
+a_attr a_type *a_prefix##_any(a_prefix##_t *ph); \
+a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn); \
+a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph); \
+a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn); \
+a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);
-/*
- * The ph_gen() macro generates a type-specific pairing heap implementation,
- * based on the above cpp macros.
- */
-#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp) \
+/* The ph_gen() macro generates a type-specific pairing heap implementation. */
+#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp) \
+JEMALLOC_ALWAYS_INLINE int \
+a_prefix##_ph_cmp(void *a, void *b) { \
+ return a_cmp((a_type *)a, (a_type *)b); \
+} \
+ \
a_attr void \
-a_prefix##new(a_ph_type *ph) { \
- memset(ph, 0, sizeof(ph(a_type))); \
+a_prefix##_new(a_prefix##_t *ph) { \
+ ph_new(&ph->ph); \
} \
+ \
a_attr bool \
-a_prefix##empty(a_ph_type *ph) { \
- return (ph->ph_root == NULL); \
+a_prefix##_empty(a_prefix##_t *ph) { \
+ return ph_empty(&ph->ph); \
} \
+ \
a_attr a_type * \
-a_prefix##first(a_ph_type *ph) { \
- if (ph->ph_root == NULL) { \
- return NULL; \
- } \
- ph_merge_aux(a_type, a_field, ph, a_cmp); \
- return ph->ph_root; \
+a_prefix##_first(a_prefix##_t *ph) { \
+ return ph_first(&ph->ph, offsetof(a_type, a_field), \
+ &a_prefix##_ph_cmp); \
} \
+ \
a_attr a_type * \
-a_prefix##any(a_ph_type *ph) { \
- if (ph->ph_root == NULL) { \
- return NULL; \
- } \
- a_type *aux = phn_next_get(a_type, a_field, ph->ph_root); \
- if (aux != NULL) { \
- return aux; \
- } \
- return ph->ph_root; \
+a_prefix##_any(a_prefix##_t *ph) { \
+ return ph_any(&ph->ph, offsetof(a_type, a_field)); \
} \
-a_attr void \
-a_prefix##insert(a_ph_type *ph, a_type *phn) { \
- memset(&phn->a_field, 0, sizeof(phn(a_type))); \
\
- /* \
- * Treat the root as an aux list during insertion, and lazily \
- * merge during a_prefix##remove_first(). For elements that \
- * are inserted, then removed via a_prefix##remove() before the \
- * aux list is ever processed, this makes insert/remove \
- * constant-time, whereas eager merging would make insert \
- * O(log n). \
- */ \
- if (ph->ph_root == NULL) { \
- ph->ph_root = phn; \
- } else { \
- phn_next_set(a_type, a_field, phn, phn_next_get(a_type, \
- a_field, ph->ph_root)); \
- if (phn_next_get(a_type, a_field, ph->ph_root) != \
- NULL) { \
- phn_prev_set(a_type, a_field, \
- phn_next_get(a_type, a_field, ph->ph_root), \
- phn); \
- } \
- phn_prev_set(a_type, a_field, phn, ph->ph_root); \
- phn_next_set(a_type, a_field, ph->ph_root, phn); \
- } \
+a_attr void \
+a_prefix##_insert(a_prefix##_t *ph, a_type *phn) { \
+ ph_insert(&ph->ph, phn, offsetof(a_type, a_field), \
+ a_prefix##_ph_cmp); \
} \
-a_attr a_type * \
-a_prefix##remove_first(a_ph_type *ph) { \
- a_type *ret; \
\
- if (ph->ph_root == NULL) { \
- return NULL; \
- } \
- ph_merge_aux(a_type, a_field, ph, a_cmp); \
- \
- ret = ph->ph_root; \
- \
- ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \
- ph->ph_root); \
+a_attr a_type * \
+a_prefix##_remove_first(a_prefix##_t *ph) { \
+ return ph_remove_first(&ph->ph, offsetof(a_type, a_field), \
+ a_prefix##_ph_cmp); \
+} \
\
- return ret; \
+a_attr void \
+a_prefix##_remove(a_prefix##_t *ph, a_type *phn) { \
+ ph_remove(&ph->ph, phn, offsetof(a_type, a_field), \
+ a_prefix##_ph_cmp); \
} \
+ \
a_attr a_type * \
-a_prefix##remove_any(a_ph_type *ph) { \
- /* \
- * Remove the most recently inserted aux list element, or the \
- * root if the aux list is empty. This has the effect of \
- * behaving as a LIFO (and insertion/removal is therefore \
- * constant-time) if a_prefix##[remove_]first() are never \
- * called. \
- */ \
- if (ph->ph_root == NULL) { \
- return NULL; \
- } \
- a_type *ret = phn_next_get(a_type, a_field, ph->ph_root); \
+a_prefix##_remove_any(a_prefix##_t *ph) { \
+ a_type *ret = a_prefix##_any(ph); \
if (ret != NULL) { \
- a_type *aux = phn_next_get(a_type, a_field, ret); \
- phn_next_set(a_type, a_field, ph->ph_root, aux); \
- if (aux != NULL) { \
- phn_prev_set(a_type, a_field, aux, \
- ph->ph_root); \
- } \
- return ret; \
+ a_prefix##_remove(ph, ret); \
} \
- ret = ph->ph_root; \
- ph_merge_children(a_type, a_field, ph->ph_root, a_cmp, \
- ph->ph_root); \
return ret; \
-} \
-a_attr void \
-a_prefix##remove(a_ph_type *ph, a_type *phn) { \
- a_type *replace, *parent; \
- \
- if (ph->ph_root == phn) { \
- /* \
- * We can delete from aux list without merging it, but \
- * we need to merge if we are dealing with the root \
- * node and it has children. \
- */ \
- if (phn_lchild_get(a_type, a_field, phn) == NULL) { \
- ph->ph_root = phn_next_get(a_type, a_field, \
- phn); \
- if (ph->ph_root != NULL) { \
- phn_prev_set(a_type, a_field, \
- ph->ph_root, NULL); \
- } \
- return; \
- } \
- ph_merge_aux(a_type, a_field, ph, a_cmp); \
- if (ph->ph_root == phn) { \
- ph_merge_children(a_type, a_field, ph->ph_root, \
- a_cmp, ph->ph_root); \
- return; \
- } \
- } \
- \
- /* Get parent (if phn is leftmost child) before mutating. */ \
- if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) { \
- if (phn_lchild_get(a_type, a_field, parent) != phn) { \
- parent = NULL; \
- } \
- } \
- /* Find a possible replacement node, and link to parent. */ \
- ph_merge_children(a_type, a_field, phn, a_cmp, replace); \
- /* Set next/prev for sibling linked list. */ \
- if (replace != NULL) { \
- if (parent != NULL) { \
- phn_prev_set(a_type, a_field, replace, parent); \
- phn_lchild_set(a_type, a_field, parent, \
- replace); \
- } else { \
- phn_prev_set(a_type, a_field, replace, \
- phn_prev_get(a_type, a_field, phn)); \
- if (phn_prev_get(a_type, a_field, phn) != \
- NULL) { \
- phn_next_set(a_type, a_field, \
- phn_prev_get(a_type, a_field, phn), \
- replace); \
- } \
- } \
- phn_next_set(a_type, a_field, replace, \
- phn_next_get(a_type, a_field, phn)); \
- if (phn_next_get(a_type, a_field, phn) != NULL) { \
- phn_prev_set(a_type, a_field, \
- phn_next_get(a_type, a_field, phn), \
- replace); \
- } \
- } else { \
- if (parent != NULL) { \
- a_type *next = phn_next_get(a_type, a_field, \
- phn); \
- phn_lchild_set(a_type, a_field, parent, next); \
- if (next != NULL) { \
- phn_prev_set(a_type, a_field, next, \
- parent); \
- } \
- } else { \
- assert(phn_prev_get(a_type, a_field, phn) != \
- NULL); \
- phn_next_set(a_type, a_field, \
- phn_prev_get(a_type, a_field, phn), \
- phn_next_get(a_type, a_field, phn)); \
- } \
- if (phn_next_get(a_type, a_field, phn) != NULL) { \
- phn_prev_set(a_type, a_field, \
- phn_next_get(a_type, a_field, phn), \
- phn_prev_get(a_type, a_field, phn)); \
- } \
- } \
}
-#endif /* PH_H_ */
+#endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 15cc2d1..14542aa 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -1,7 +1,6 @@
#ifndef JEMALLOC_INTERNAL_PRNG_H
#define JEMALLOC_INTERNAL_PRNG_H
-#include "jemalloc/internal/atomic.h"
#include "jemalloc/internal/bit_util.h"
/*
@@ -59,66 +58,38 @@ prng_state_next_zu(size_t state) {
/*
* The prng_lg_range functions give a uniform int in the half-open range [0,
- * 2**lg_range). If atomic is true, they do so safely from multiple threads.
- * Multithreaded 64-bit prngs aren't supported.
+ * 2**lg_range).
*/
JEMALLOC_ALWAYS_INLINE uint32_t
-prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
- uint32_t ret, state0, state1;
-
+prng_lg_range_u32(uint32_t *state, unsigned lg_range) {
assert(lg_range > 0);
assert(lg_range <= 32);
- state0 = atomic_load_u32(state, ATOMIC_RELAXED);
-
- if (atomic) {
- do {
- state1 = prng_state_next_u32(state0);
- } while (!atomic_compare_exchange_weak_u32(state, &state0,
- state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
- } else {
- state1 = prng_state_next_u32(state0);
- atomic_store_u32(state, state1, ATOMIC_RELAXED);
- }
- ret = state1 >> (32 - lg_range);
+ *state = prng_state_next_u32(*state);
+ uint32_t ret = *state >> (32 - lg_range);
return ret;
}
JEMALLOC_ALWAYS_INLINE uint64_t
prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
- uint64_t ret, state1;
-
assert(lg_range > 0);
assert(lg_range <= 64);
- state1 = prng_state_next_u64(*state);
- *state = state1;
- ret = state1 >> (64 - lg_range);
+ *state = prng_state_next_u64(*state);
+ uint64_t ret = *state >> (64 - lg_range);
return ret;
}
JEMALLOC_ALWAYS_INLINE size_t
-prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
- size_t ret, state0, state1;
-
+prng_lg_range_zu(size_t *state, unsigned lg_range) {
assert(lg_range > 0);
assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
- state0 = atomic_load_zu(state, ATOMIC_RELAXED);
-
- if (atomic) {
- do {
- state1 = prng_state_next_zu(state0);
- } while (atomic_compare_exchange_weak_zu(state, &state0,
- state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
- } else {
- state1 = prng_state_next_zu(state0);
- atomic_store_zu(state, state1, ATOMIC_RELAXED);
- }
- ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+ *state = prng_state_next_zu(*state);
+ size_t ret = *state >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
return ret;
}
@@ -129,18 +100,24 @@ prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
*/
JEMALLOC_ALWAYS_INLINE uint32_t
-prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
- uint32_t ret;
- unsigned lg_range;
-
- assert(range > 1);
+prng_range_u32(uint32_t *state, uint32_t range) {
+ assert(range != 0);
+ /*
+ * If range were 1, lg_range would be 0, so the shift in
+ * prng_lg_range_u32 would be a shift of a 32-bit variable by 32 bits,
+ * which is UB. Just handle this case as a one-off.
+ */
+ if (range == 1) {
+ return 0;
+ }
/* Compute the ceiling of lg(range). */
- lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+ unsigned lg_range = ffs_u32(pow2_ceil_u32(range));
/* Generate a result in [0..range) via repeated trial. */
+ uint32_t ret;
do {
- ret = prng_lg_range_u32(state, lg_range, atomic);
+ ret = prng_lg_range_u32(state, lg_range);
} while (ret >= range);
return ret;
@@ -148,15 +125,18 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
JEMALLOC_ALWAYS_INLINE uint64_t
prng_range_u64(uint64_t *state, uint64_t range) {
- uint64_t ret;
- unsigned lg_range;
+ assert(range != 0);
- assert(range > 1);
+ /* See the note in prng_range_u32. */
+ if (range == 1) {
+ return 0;
+ }
/* Compute the ceiling of lg(range). */
- lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+ unsigned lg_range = ffs_u64(pow2_ceil_u64(range));
/* Generate a result in [0..range) via repeated trial. */
+ uint64_t ret;
do {
ret = prng_lg_range_u64(state, lg_range);
} while (ret >= range);
@@ -165,18 +145,21 @@ prng_range_u64(uint64_t *state, uint64_t range) {
}
JEMALLOC_ALWAYS_INLINE size_t
-prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
- size_t ret;
- unsigned lg_range;
+prng_range_zu(size_t *state, size_t range) {
+ assert(range != 0);
- assert(range > 1);
+ /* See the note in prng_range_u32. */
+ if (range == 1) {
+ return 0;
+ }
/* Compute the ceiling of lg(range). */
- lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+ unsigned lg_range = ffs_u64(pow2_ceil_u64(range));
/* Generate a result in [0..range) via repeated trial. */
+ size_t ret;
do {
- ret = prng_lg_range_zu(state, lg_range, atomic);
+ ret = prng_lg_range_zu(state, lg_range);
} while (ret >= range);
return ret;
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
new file mode 100644
index 0000000..4c8e22c
--- /dev/null
+++ b/include/jemalloc/internal/prof_data.h
@@ -0,0 +1,37 @@
+#ifndef JEMALLOC_INTERNAL_PROF_DATA_H
+#define JEMALLOC_INTERNAL_PROF_DATA_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t bt2gctx_mtx;
+extern malloc_mutex_t tdatas_mtx;
+extern malloc_mutex_t prof_dump_mtx;
+
+extern malloc_mutex_t *gctx_locks;
+extern malloc_mutex_t *tdata_locks;
+
+extern size_t prof_unbiased_sz[PROF_SC_NSIZES];
+extern size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
+
+void prof_bt_hash(const void *key, size_t r_hash[2]);
+bool prof_bt_keycomp(const void *k1, const void *k2);
+
+bool prof_data_init(tsd_t *tsd);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
+char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
+int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
+void prof_unbias_map_init();
+void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
+ prof_tdata_t *tdata, bool leakcheck);
+prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+ uint64_t thr_discrim, char *thread_name, bool active);
+void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
+void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+
+/* Used in unit tests. */
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+void prof_cnt_all(prof_cnt_t *cnt_all);
+
+#endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 094f3e1..bdff134 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -2,75 +2,72 @@
#define JEMALLOC_INTERNAL_PROF_EXTERNS_H
#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_hook.h"
-extern malloc_mutex_t bt2gctx_mtx;
-
-extern bool opt_prof;
-extern bool opt_prof_active;
-extern bool opt_prof_thread_active_init;
-extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
-extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool opt_prof_gdump; /* High-water memory dumping. */
-extern bool opt_prof_final; /* Final profile dumping. */
-extern bool opt_prof_leak; /* Dump leak summary at exit. */
-extern bool opt_prof_accum; /* Report cumulative bytes. */
-extern bool opt_prof_log; /* Turn logging on at boot. */
-extern char opt_prof_prefix[
+extern bool opt_prof;
+extern bool opt_prof_active;
+extern bool opt_prof_thread_active_init;
+extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */
+extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool opt_prof_gdump; /* High-water memory dumping. */
+extern bool opt_prof_final; /* Final profile dumping. */
+extern bool opt_prof_leak; /* Dump leak summary at exit. */
+extern bool opt_prof_leak_error; /* Exit with error code if memory leaked */
+extern bool opt_prof_accum; /* Report cumulative bytes. */
+extern bool opt_prof_log; /* Turn logging on at boot. */
+extern char opt_prof_prefix[
/* Minimize memory bloat for non-prof builds. */
#ifdef JEMALLOC_PROF
PATH_MAX +
#endif
1];
+extern bool opt_prof_unbias;
+
+/* For recording recent allocations */
+extern ssize_t opt_prof_recent_alloc_max;
+
+/* Whether to use thread name provided by the system or by mallctl. */
+extern bool opt_prof_sys_thread_name;
+
+/* Whether to record per size class counts and request size totals. */
+extern bool opt_prof_stats;
/* Accessed via prof_active_[gs]et{_unlocked,}(). */
-extern bool prof_active;
+extern bool prof_active_state;
/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
-extern bool prof_gdump_val;
+extern bool prof_gdump_val;
-/*
- * Profile dump interval, measured in bytes allocated. Each arena triggers a
- * profile dump when it reaches this threshold. The effect is that the
- * interval between profile dumps averages prof_interval, though the actual
- * interval between dumps will tend to be sporadic, and the interval will be a
- * maximum of approximately (prof_interval * narenas).
- */
-extern uint64_t prof_interval;
+/* Profile dump interval, measured in bytes allocated. */
+extern uint64_t prof_interval;
/*
* Initialized as opt_lg_prof_sample, and potentially modified during profiling
* resets.
*/
-extern size_t lg_prof_sample;
-
-void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
- prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
- prof_tctx_t *tctx);
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt);
-prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
-#ifdef JEMALLOC_JET
-size_t prof_tdata_count(void);
-size_t prof_bt_count(void);
-#endif
-typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
-
-typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
-extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
-#ifdef JEMALLOC_JET
-void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
- uint64_t *accumbytes);
-#endif
-bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
+extern size_t lg_prof_sample;
+
+extern bool prof_booted;
+
+void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
+prof_backtrace_hook_t prof_backtrace_hook_get();
+
+void prof_dump_hook_set(prof_dump_hook_t hook);
+prof_dump_hook_t prof_dump_hook_get();
+
+/* Functions only accessed in prof_inlines.h */
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
+void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
+ size_t usize, prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+prof_tctx_t *prof_tctx_create(tsd_t *tsd);
void prof_idump(tsdn_t *tsdn);
bool prof_mdump(tsd_t *tsd, const char *filename);
void prof_gdump(tsdn_t *tsdn);
-prof_tdata_t *prof_tdata_init(tsd_t *tsd);
-prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_reset(tsd_t *tsd, size_t lg_sample);
+
void prof_tdata_cleanup(tsd_t *tsd);
bool prof_active_get(tsdn_t *tsdn);
bool prof_active_set(tsdn_t *tsdn, bool active);
@@ -84,22 +81,15 @@ bool prof_gdump_get(tsdn_t *tsdn);
bool prof_gdump_set(tsdn_t *tsdn, bool active);
void prof_boot0(void);
void prof_boot1(void);
-bool prof_boot2(tsd_t *tsd);
+bool prof_boot2(tsd_t *tsd, base_t *base);
void prof_prefork0(tsdn_t *tsdn);
void prof_prefork1(tsdn_t *tsdn);
void prof_postfork_parent(tsdn_t *tsdn);
void prof_postfork_child(tsdn_t *tsdn);
-void prof_sample_threshold_update(prof_tdata_t *tdata);
-
-bool prof_log_start(tsdn_t *tsdn, const char *filename);
-bool prof_log_stop(tsdn_t *tsdn);
-#ifdef JEMALLOC_JET
-size_t prof_log_bt_count(void);
-size_t prof_log_alloc_count(void);
-size_t prof_log_thr_count(void);
-bool prof_log_is_logging(void);
-bool prof_log_rep_check(void);
-void prof_log_dummy_set(bool new_value);
-#endif
+
+/* Only accessed by thread event. */
+uint64_t prof_sample_new_event_wait(tsd_t *tsd);
+uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
+void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
new file mode 100644
index 0000000..150d19d
--- /dev/null
+++ b/include/jemalloc/internal/prof_hook.h
@@ -0,0 +1,21 @@
+#ifndef JEMALLOC_INTERNAL_PROF_HOOK_H
+#define JEMALLOC_INTERNAL_PROF_HOOK_H
+
+/*
+ * The hooks types of which are declared in this file are experimental and
+ * undocumented, thus the typedefs are located in an 'internal' header.
+ */
+
+/*
+ * A hook to mock out backtrace functionality. This can be handy, since it's
+ * otherwise difficult to guarantee that two allocations are reported as coming
+ * from the exact same stack trace in the presence of an optimizing compiler.
+ */
+typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
+
+/*
+ * A callback hook that notifies about recently dumped heap profile.
+ */
+typedef void (*prof_dump_hook_t)(const char *filename);
+
+#endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
new file mode 100644
index 0000000..a8e7e7f
--- /dev/null
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -0,0 +1,261 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_H
+
+#include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
+
+JEMALLOC_ALWAYS_INLINE void
+prof_active_assert() {
+ cassert(config_prof);
+ /*
+ * If opt_prof is off, then prof_active must always be off, regardless
+ * of whether prof_active_mtx is in effect or not.
+ */
+ assert(opt_prof || !prof_active_state);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void) {
+ prof_active_assert();
+ /*
+ * Even if opt_prof is true, sampling can be temporarily disabled by
+ * setting prof_active to false. No locking is used when reading
+ * prof_active in the fast path, so there are no guarantees regarding
+ * how long it will take for all threads to notice state changes.
+ */
+ return prof_active_state;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void) {
+ /*
+ * No locking is used when reading prof_gdump_val in the fast path, so
+ * there are no guarantees regarding how long it will take for all
+ * threads to notice state changes.
+ */
+ return prof_gdump_val;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create) {
+ prof_tdata_t *tdata;
+
+ cassert(config_prof);
+
+ tdata = tsd_prof_tdata_get(tsd);
+ if (create) {
+ assert(tsd_reentrancy_level_get(tsd) == 0);
+ if (unlikely(tdata == NULL)) {
+ if (tsd_nominal(tsd)) {
+ tdata = prof_tdata_init(tsd);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ } else if (unlikely(tdata->expired)) {
+ tdata = prof_tdata_reinit(tsd, tdata);
+ tsd_prof_tdata_set(tsd, tdata);
+ }
+ assert(tdata == NULL || tdata->attached);
+ }
+
+ return tdata;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
+ prof_info_t *prof_info) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+ assert(prof_info != NULL);
+
+ arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
+ emap_alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+ assert(prof_info != NULL);
+
+ arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ arena_prof_tctx_reset(tsd, ptr, alloc_ctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+
+ arena_prof_tctx_reset_sampled(tsd, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
+ cassert(config_prof);
+ assert(edata != NULL);
+ assert((uintptr_t)tctx > (uintptr_t)1U);
+
+ arena_prof_info_set(tsd, edata, tctx, size);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_should_skip(tsd_t *tsd, bool sample_event) {
+ cassert(config_prof);
+
+ /* Fastpath: no need to load tdata */
+ if (likely(!sample_event)) {
+ return true;
+ }
+
+ /*
+ * sample_event is always obtained from the thread event module, and
+ * whenever it's true, it means that the thread event module has
+ * already checked the reentrancy level.
+ */
+ assert(tsd_reentrancy_level_get(tsd) == 0);
+
+ prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+ if (unlikely(tdata == NULL)) {
+ return true;
+ }
+
+ return !tdata->active;
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
+ prof_tctx_t *ret;
+
+ if (!prof_active ||
+ likely(prof_sample_should_skip(tsd, sample_event))) {
+ ret = (prof_tctx_t *)(uintptr_t)1U;
+ } else {
+ ret = prof_tctx_create(tsd);
+ }
+
+ return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+ emap_alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+ cassert(config_prof);
+ assert(ptr != NULL);
+ assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+
+ if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+ prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
+ } else {
+ prof_tctx_reset(tsd, ptr, alloc_ctx);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+ prof_tctx_t *tctx, bool prof_active, const void *old_ptr, size_t old_usize,
+ prof_info_t *old_prof_info, bool sample_event) {
+ bool sampled, old_sampled, moved;
+
+ cassert(config_prof);
+ assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+
+ if (prof_active && ptr != NULL) {
+ assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+ if (prof_sample_should_skip(tsd, sample_event)) {
+ /*
+ * Don't sample. The usize passed to prof_alloc_prep()
+ * was larger than what actually got allocated, so a
+ * backtrace was captured for this allocation, even
+ * though its actual usize was insufficient to cross the
+ * sample threshold.
+ */
+ prof_alloc_rollback(tsd, tctx);
+ tctx = (prof_tctx_t *)(uintptr_t)1U;
+ }
+ }
+
+ sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+ old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
+ moved = (ptr != old_ptr);
+
+ if (unlikely(sampled)) {
+ prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
+ } else if (moved) {
+ prof_tctx_reset(tsd, ptr, NULL);
+ } else if (unlikely(old_sampled)) {
+ /*
+ * prof_tctx_reset() would work for the !moved case as well,
+ * but prof_tctx_reset_sampled() is slightly cheaper, and the
+ * proper thing to do here in the presence of explicit
+ * knowledge re: moved state.
+ */
+ prof_tctx_reset_sampled(tsd, ptr);
+ } else {
+ prof_info_t prof_info;
+ prof_info_get(tsd, ptr, NULL, &prof_info);
+ assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
+ }
+
+ /*
+ * The prof_free_sampled_object() call must come after the
+ * prof_malloc_sample_object() call, because tctx and old_tctx may be
+ * the same, in which case reversing the call order could cause the tctx
+ * to be prematurely destroyed as a side effect of momentarily zeroed
+ * counters.
+ */
+ if (unlikely(old_sampled)) {
+ prof_free_sampled_object(tsd, old_usize, old_prof_info);
+ }
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prof_sample_align(size_t orig_align) {
+ /*
+ * Enforce page alignment, so that sampled allocations can be identified
+ * w/o metadata lookup.
+ */
+ assert(opt_prof);
+ return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
+ orig_align;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+ return ((uintptr_t)ptr & PAGE_MASK) == 0;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sampled(tsd_t *tsd, const void *ptr) {
+ prof_info_t prof_info;
+ prof_info_get(tsd, ptr, NULL, &prof_info);
+ bool sampled = (uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U;
+ if (sampled) {
+ assert(prof_sample_aligned(ptr));
+ }
+ return sampled;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const void *ptr, size_t usize,
+ emap_alloc_ctx_t *alloc_ctx) {
+ prof_info_t prof_info;
+ prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+
+ cassert(config_prof);
+ assert(usize == isalloc(tsd_tsdn(tsd), ptr));
+
+ if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
+ assert(prof_sample_aligned(ptr));
+ prof_free_sampled_object(tsd, usize, &prof_info);
+ }
+}
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
deleted file mode 100644
index 471d985..0000000
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
-
-#include "jemalloc/internal/mutex.h"
-
-static inline bool
-prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
- uint64_t accumbytes) {
- cassert(config_prof);
-
- bool overflow;
- uint64_t a0, a1;
-
- /*
- * If the application allocates fast enough (and/or if idump is slow
- * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
- * idump trigger coalescing. This is an intentional mechanism that
- * avoids rate-limiting allocation.
- */
-#ifdef JEMALLOC_ATOMIC_U64
- a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
- do {
- a1 = a0 + accumbytes;
- assert(a1 >= a0);
- overflow = (a1 >= prof_interval);
- if (overflow) {
- a1 %= prof_interval;
- }
- } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
- a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
- malloc_mutex_lock(tsdn, &prof_accum->mtx);
- a0 = prof_accum->accumbytes;
- a1 = a0 + accumbytes;
- overflow = (a1 >= prof_interval);
- if (overflow) {
- a1 %= prof_interval;
- }
- prof_accum->accumbytes = a1;
- malloc_mutex_unlock(tsdn, &prof_accum->mtx);
-#endif
- return overflow;
-}
-
-static inline void
-prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
- size_t usize) {
- cassert(config_prof);
-
- /*
- * Cancel out as much of the excessive prof_accumbytes increase as
- * possible without underflowing. Interval-triggered dumps occur
- * slightly more often than intended as a result of incomplete
- * canceling.
- */
- uint64_t a0, a1;
-#ifdef JEMALLOC_ATOMIC_U64
- a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
- do {
- a1 = (a0 >= SC_LARGE_MINCLASS - usize)
- ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
- } while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
- a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
- malloc_mutex_lock(tsdn, &prof_accum->mtx);
- a0 = prof_accum->accumbytes;
- a1 = (a0 >= SC_LARGE_MINCLASS - usize)
- ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
- prof_accum->accumbytes = a1;
- malloc_mutex_unlock(tsdn, &prof_accum->mtx);
-#endif
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_active_get_unlocked(void) {
- /*
- * Even if opt_prof is true, sampling can be temporarily disabled by
- * setting prof_active to false. No locking is used when reading
- * prof_active in the fast path, so there are no guarantees regarding
- * how long it will take for all threads to notice state changes.
- */
- return prof_active;
-}
-
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
deleted file mode 100644
index 8ba8a1e..0000000
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ /dev/null
@@ -1,250 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
-
-#include "jemalloc/internal/safety_check.h"
-#include "jemalloc/internal/sz.h"
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_gdump_get_unlocked(void) {
- /*
- * No locking is used when reading prof_gdump_val in the fast path, so
- * there are no guarantees regarding how long it will take for all
- * threads to notice state changes.
- */
- return prof_gdump_val;
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tdata_t *
-prof_tdata_get(tsd_t *tsd, bool create) {
- prof_tdata_t *tdata;
-
- cassert(config_prof);
-
- tdata = tsd_prof_tdata_get(tsd);
- if (create) {
- if (unlikely(tdata == NULL)) {
- if (tsd_nominal(tsd)) {
- tdata = prof_tdata_init(tsd);
- tsd_prof_tdata_set(tsd, tdata);
- }
- } else if (unlikely(tdata->expired)) {
- tdata = prof_tdata_reinit(tsd, tdata);
- tsd_prof_tdata_set(tsd, tdata);
- }
- assert(tdata == NULL || tdata->attached);
- }
-
- return tdata;
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
- cassert(config_prof);
- assert(ptr != NULL);
-
- return arena_prof_tctx_get(tsdn, ptr, alloc_ctx);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
- alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
- cassert(config_prof);
- assert(ptr != NULL);
-
- arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
- cassert(config_prof);
- assert(ptr != NULL);
-
- arena_prof_tctx_reset(tsdn, ptr, tctx);
-}
-
-JEMALLOC_ALWAYS_INLINE nstime_t
-prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
- cassert(config_prof);
- assert(ptr != NULL);
-
- return arena_prof_alloc_time_get(tsdn, ptr, alloc_ctx);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
- nstime_t t) {
- cassert(config_prof);
- assert(ptr != NULL);
-
- arena_prof_alloc_time_set(tsdn, ptr, alloc_ctx, t);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_check(tsd_t *tsd, size_t usize, bool update) {
- ssize_t check = update ? 0 : usize;
-
- int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
- if (update) {
- bytes_until_sample -= usize;
- if (tsd_nominal(tsd)) {
- tsd_bytes_until_sample_set(tsd, bytes_until_sample);
- }
- }
- if (likely(bytes_until_sample >= check)) {
- return true;
- }
-
- return false;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
- prof_tdata_t **tdata_out) {
- prof_tdata_t *tdata;
-
- cassert(config_prof);
-
- /* Fastpath: no need to load tdata */
- if (likely(prof_sample_check(tsd, usize, update))) {
- return true;
- }
-
- bool booted = tsd_prof_tdata_get(tsd);
- tdata = prof_tdata_get(tsd, true);
- if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
- tdata = NULL;
- }
-
- if (tdata_out != NULL) {
- *tdata_out = tdata;
- }
-
- if (unlikely(tdata == NULL)) {
- return true;
- }
-
- /*
- * If this was the first creation of tdata, then
- * prof_tdata_get() reset bytes_until_sample, so decrement and
- * check it again
- */
- if (!booted && prof_sample_check(tsd, usize, update)) {
- return true;
- }
-
- if (tsd_reentrancy_level_get(tsd) > 0) {
- return true;
- }
- /* Compute new sample threshold. */
- if (update) {
- prof_sample_threshold_update(tdata);
- }
- return !tdata->active;
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
- prof_tctx_t *ret;
- prof_tdata_t *tdata;
- prof_bt_t bt;
-
- assert(usize == sz_s2u(usize));
-
- if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
- &tdata))) {
- ret = (prof_tctx_t *)(uintptr_t)1U;
- } else {
- bt_init(&bt, tdata->vec);
- prof_backtrace(&bt);
- ret = prof_lookup(tsd, &bt);
- }
-
- return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
- prof_tctx_t *tctx) {
- cassert(config_prof);
- assert(ptr != NULL);
- assert(usize == isalloc(tsdn, ptr));
-
- if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
- prof_malloc_sample_object(tsdn, ptr, usize, tctx);
- } else {
- prof_tctx_set(tsdn, ptr, usize, alloc_ctx,
- (prof_tctx_t *)(uintptr_t)1U);
- }
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
- bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
- prof_tctx_t *old_tctx) {
- bool sampled, old_sampled, moved;
-
- cassert(config_prof);
- assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
-
- if (prof_active && !updated && ptr != NULL) {
- assert(usize == isalloc(tsd_tsdn(tsd), ptr));
- if (prof_sample_accum_update(tsd, usize, true, NULL)) {
- /*
- * Don't sample. The usize passed to prof_alloc_prep()
- * was larger than what actually got allocated, so a
- * backtrace was captured for this allocation, even
- * though its actual usize was insufficient to cross the
- * sample threshold.
- */
- prof_alloc_rollback(tsd, tctx, true);
- tctx = (prof_tctx_t *)(uintptr_t)1U;
- }
- }
-
- sampled = ((uintptr_t)tctx > (uintptr_t)1U);
- old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
- moved = (ptr != old_ptr);
-
- if (unlikely(sampled)) {
- prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
- } else if (moved) {
- prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL,
- (prof_tctx_t *)(uintptr_t)1U);
- } else if (unlikely(old_sampled)) {
- /*
- * prof_tctx_set() would work for the !moved case as well, but
- * prof_tctx_reset() is slightly cheaper, and the proper thing
- * to do here in the presence of explicit knowledge re: moved
- * state.
- */
- prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
- } else {
- assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) ==
- (uintptr_t)1U);
- }
-
- /*
- * The prof_free_sampled_object() call must come after the
- * prof_malloc_sample_object() call, because tctx and old_tctx may be
- * the same, in which case reversing the call order could cause the tctx
- * to be prematurely destroyed as a side effect of momentarily zeroed
- * counters.
- */
- if (unlikely(old_sampled)) {
- prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
- }
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
- prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
-
- cassert(config_prof);
- assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-
- if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
- prof_free_sampled_object(tsd, ptr, usize, tctx);
- }
-}
-
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
new file mode 100644
index 0000000..ccb557d
--- /dev/null
+++ b/include/jemalloc/internal/prof_log.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_PROF_LOG_H
+#define JEMALLOC_INTERNAL_PROF_LOG_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t log_mtx;
+
+void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+bool prof_log_init(tsd_t *tsdn);
+
+/* Used in unit tests. */
+size_t prof_log_bt_count(void);
+size_t prof_log_alloc_count(void);
+size_t prof_log_thr_count(void);
+bool prof_log_is_logging(void);
+bool prof_log_rep_check(void);
+void prof_log_dummy_set(bool new_value);
+
+bool prof_log_start(tsdn_t *tsdn, const char *filename);
+bool prof_log_stop(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_PROF_LOG_H */
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
new file mode 100644
index 0000000..df41023
--- /dev/null
+++ b/include/jemalloc/internal/prof_recent.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
+#define JEMALLOC_INTERNAL_PROF_RECENT_H
+
+extern malloc_mutex_t prof_recent_alloc_mtx;
+extern malloc_mutex_t prof_recent_dump_mtx;
+
+bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
+void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize);
+void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
+bool prof_recent_init();
+void edata_prof_recent_alloc_init(edata_t *edata);
+
+/* Used in unit tests. */
+typedef ql_head(prof_recent_t) prof_recent_list_t;
+extern prof_recent_list_t prof_recent_alloc_list;
+edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
+prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
+
+ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
+void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
+
+#endif /* JEMALLOC_INTERNAL_PROF_RECENT_H */
diff --git a/include/jemalloc/internal/prof_stats.h b/include/jemalloc/internal/prof_stats.h
new file mode 100644
index 0000000..7954e82
--- /dev/null
+++ b/include/jemalloc/internal/prof_stats.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_PROF_STATS_H
+#define JEMALLOC_INTERNAL_PROF_STATS_H
+
+typedef struct prof_stats_s prof_stats_t;
+struct prof_stats_s {
+ uint64_t req_sum;
+ uint64_t count;
+};
+
+extern malloc_mutex_t prof_stats_mtx;
+
+void prof_stats_inc(tsd_t *tsd, szind_t ind, size_t size);
+void prof_stats_dec(tsd_t *tsd, szind_t ind, size_t size);
+void prof_stats_get_live(tsd_t *tsd, szind_t ind, prof_stats_t *stats);
+void prof_stats_get_accum(tsd_t *tsd, szind_t ind, prof_stats_t *stats);
+
+#endif /* JEMALLOC_INTERNAL_PROF_STATS_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 34ed482..dd22115 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -2,6 +2,7 @@
#define JEMALLOC_INTERNAL_PROF_STRUCTS_H
#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/edata.h"
#include "jemalloc/internal/mutex.h"
#include "jemalloc/internal/prng.h"
#include "jemalloc/internal/rb.h"
@@ -15,26 +16,22 @@ struct prof_bt_s {
#ifdef JEMALLOC_PROF_LIBGCC
/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
typedef struct {
- prof_bt_t *bt;
+ void **vec;
+ unsigned *len;
unsigned max;
} prof_unwind_data_t;
#endif
-struct prof_accum_s {
-#ifndef JEMALLOC_ATOMIC_U64
- malloc_mutex_t mtx;
- uint64_t accumbytes;
-#else
- atomic_u64_t accumbytes;
-#endif
-};
-
struct prof_cnt_s {
/* Profiling counters. */
uint64_t curobjs;
+ uint64_t curobjs_shifted_unbiased;
uint64_t curbytes;
+ uint64_t curbytes_unbiased;
uint64_t accumobjs;
+ uint64_t accumobjs_shifted_unbiased;
uint64_t accumbytes;
+ uint64_t accumbytes_unbiased;
};
typedef enum {
@@ -55,6 +52,12 @@ struct prof_tctx_s {
uint64_t thr_uid;
uint64_t thr_discrim;
+ /*
+ * Reference count of how many times this tctx object is referenced in
+ * recent allocation / deallocation records, protected by tdata->lock.
+ */
+ uint64_t recent_count;
+
/* Profiling counters, protected by tdata->lock. */
prof_cnt_t cnts;
@@ -96,6 +99,15 @@ struct prof_tctx_s {
};
typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
+struct prof_info_s {
+ /* Time when the allocation was made. */
+ nstime_t alloc_time;
+ /* Points to the prof_tctx_t corresponding to the allocation. */
+ prof_tctx_t *alloc_tctx;
+ /* Allocation request size. */
+ size_t alloc_size;
+};
+
struct prof_gctx_s {
/* Protects nlimbo, cnt_summed, and tctxs. */
malloc_mutex_t *lock;
@@ -167,9 +179,6 @@ struct prof_tdata_s {
*/
ckh_t bt2tctx;
- /* Sampling state. */
- uint64_t prng_state;
-
/* State used to avoid dumping while operating on prof internals. */
bool enq;
bool enq_idump;
@@ -197,4 +206,16 @@ struct prof_tdata_s {
};
typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
+struct prof_recent_s {
+ nstime_t alloc_time;
+ nstime_t dalloc_time;
+
+ ql_elm(prof_recent_t) link;
+ size_t size;
+ size_t usize;
+ atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
+ prof_tctx_t *alloc_tctx;
+ prof_tctx_t *dalloc_tctx;
+};
+
#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
new file mode 100644
index 0000000..3d25a42
--- /dev/null
+++ b/include/jemalloc/internal/prof_sys.h
@@ -0,0 +1,30 @@
+#ifndef JEMALLOC_INTERNAL_PROF_SYS_H
+#define JEMALLOC_INTERNAL_PROF_SYS_H
+
+extern malloc_mutex_t prof_dump_filename_mtx;
+extern base_t *prof_base;
+
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_hooks_init();
+void prof_unwind_init();
+void prof_sys_thread_name_fetch(tsd_t *tsd);
+int prof_getpid(void);
+void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
+bool prof_prefix_set(tsdn_t *tsdn, const char *prefix);
+void prof_fdump_impl(tsd_t *tsd);
+void prof_idump_impl(tsd_t *tsd);
+bool prof_mdump_impl(tsd_t *tsd, const char *filename);
+void prof_gdump_impl(tsd_t *tsd);
+
+/* Used in unit tests. */
+typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
+typedef int (prof_dump_open_file_t)(const char *, int);
+extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
+typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
+typedef int (prof_dump_open_maps_t)();
+extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
+
+#endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 1eff995..ba62865 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -2,11 +2,12 @@
#define JEMALLOC_INTERNAL_PROF_TYPES_H
typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_accum_s prof_accum_t;
typedef struct prof_cnt_s prof_cnt_t;
typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_info_s prof_info_t;
typedef struct prof_gctx_s prof_gctx_t;
typedef struct prof_tdata_s prof_tdata_t;
+typedef struct prof_recent_s prof_recent_t;
/* Option defaults. */
#ifdef JEMALLOC_PROF
@@ -28,7 +29,23 @@ typedef struct prof_tdata_s prof_tdata_t;
#define PROF_CKH_MINITEMS 64
/* Size of memory buffer to use when writing dump files. */
-#define PROF_DUMP_BUFSIZE 65536
+#ifndef JEMALLOC_PROF
+/* Minimize memory bloat for non-prof builds. */
+# define PROF_DUMP_BUFSIZE 1
+#elif defined(JEMALLOC_DEBUG)
+/* Use a small buffer size in debug build, mainly to facilitate testing. */
+# define PROF_DUMP_BUFSIZE 16
+#else
+# define PROF_DUMP_BUFSIZE 65536
+#endif
+
+/* Size of size class related tables */
+#ifdef JEMALLOC_PROF
+# define PROF_SC_NSIZES SC_NSIZES
+#else
+/* Minimize memory bloat for non-prof builds. */
+# define PROF_SC_NSIZES 1
+#endif
/* Size of stack-allocated buffer used by prof_printf(). */
#define PROF_PRINTF_BUFSIZE 128
@@ -45,12 +62,14 @@ typedef struct prof_tdata_s prof_tdata_t;
*/
#define PROF_NTDATA_LOCKS 256
-/*
- * prof_tdata pointers close to NULL are used to encode state information that
- * is used for cleaning up during thread shutdown.
- */
-#define PROF_TDATA_STATE_REINCARNATED ((prof_tdata_t *)(uintptr_t)1)
-#define PROF_TDATA_STATE_PURGATORY ((prof_tdata_t *)(uintptr_t)2)
-#define PROF_TDATA_STATE_MAX PROF_TDATA_STATE_PURGATORY
+/* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+#define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
+#else
+#define PROF_DUMP_FILENAME_LEN 1
+#endif
+
+/* Default number of recent allocations to record. */
+#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
new file mode 100644
index 0000000..e1d6497
--- /dev/null
+++ b/include/jemalloc/internal/psset.h
@@ -0,0 +1,131 @@
+#ifndef JEMALLOC_INTERNAL_PSSET_H
+#define JEMALLOC_INTERNAL_PSSET_H
+
+#include "jemalloc/internal/hpdata.h"
+
+/*
+ * A page-slab set. What the eset is to PAC, the psset is to HPA. It maintains
+ * a collection of page-slabs (the intent being that they are backed by
+ * hugepages, or at least could be), and handles allocation and deallocation
+ * requests.
+ */
+
+/*
+ * One more than the maximum pszind_t we will serve out of the HPA.
+ * Practically, we expect only the first few to be actually used. This
+ * corresponds to a maximum size of of 512MB on systems with 4k pages and
+ * SC_NGROUP == 4, which is already an unreasonably large maximum. Morally, you
+ * can think of this as being SC_NPSIZES, but there's no sense in wasting that
+ * much space in the arena, making bitmaps that much larger, etc.
+ */
+#define PSSET_NPSIZES 64
+
+/*
+ * We keep two purge lists per page size class; one for hugified hpdatas (at
+ * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
+ * 1). This lets us implement a preference for purging non-hugified hpdatas
+ * among similarly-dirty ones.
+ * We reserve the last two indices for empty slabs, in that case purging
+ * hugified ones (which are definitionally all waste) before non-hugified ones
+ * (i.e. reversing the order).
+ */
+#define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES)
+
+typedef struct psset_bin_stats_s psset_bin_stats_t;
+struct psset_bin_stats_s {
+ /* How many pageslabs are in this bin? */
+ size_t npageslabs;
+ /* Of them, how many pages are active? */
+ size_t nactive;
+ /* And how many are dirty? */
+ size_t ndirty;
+};
+
+typedef struct psset_stats_s psset_stats_t;
+struct psset_stats_s {
+ /*
+ * The second index is huge stats; nonfull_slabs[pszind][0] contains
+ * stats for the non-huge slabs in bucket pszind, while
+ * nonfull_slabs[pszind][1] contains stats for the huge slabs.
+ */
+ psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
+
+ /*
+ * Full slabs don't live in any edata heap, but we still track their
+ * stats.
+ */
+ psset_bin_stats_t full_slabs[2];
+
+ /* Empty slabs are similar. */
+ psset_bin_stats_t empty_slabs[2];
+};
+
+typedef struct psset_s psset_t;
+struct psset_s {
+ /*
+ * The pageslabs, quantized by the size class of the largest contiguous
+ * free run of pages in a pageslab.
+ */
+ hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
+ /* Bitmap for which set bits correspond to non-empty heaps. */
+ fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
+ /*
+ * The sum of all bin stats in stats. This lets us quickly answer
+ * queries for the number of dirty, active, and retained pages in the
+ * entire set.
+ */
+ psset_bin_stats_t merged_stats;
+ psset_stats_t stats;
+ /*
+ * Slabs with no active allocations, but which are allowed to serve new
+ * allocations.
+ */
+ hpdata_empty_list_t empty;
+ /*
+ * Slabs which are available to be purged, ordered by how much we want
+ * to purge them (with later indices indicating slabs we want to purge
+ * more).
+ */
+ hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS];
+ /* Bitmap for which set bits correspond to non-empty purge lists. */
+ fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)];
+ /* Slabs which are available to be hugified. */
+ hpdata_hugify_list_t to_hugify;
+};
+
+void psset_init(psset_t *psset);
+void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
+
+/*
+ * Begin or end updating the given pageslab's metadata. While the pageslab is
+ * being updated, it won't be returned from psset_fit calls.
+ */
+void psset_update_begin(psset_t *psset, hpdata_t *ps);
+void psset_update_end(psset_t *psset, hpdata_t *ps);
+
+/* Analogous to the eset_fit; pick a hpdata to serve the request. */
+hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
+/* Pick one to purge. */
+hpdata_t *psset_pick_purge(psset_t *psset);
+/* Pick one to hugify. */
+hpdata_t *psset_pick_hugify(psset_t *psset);
+
+void psset_insert(psset_t *psset, hpdata_t *ps);
+void psset_remove(psset_t *psset, hpdata_t *ps);
+
+static inline size_t
+psset_npageslabs(psset_t *psset) {
+ return psset->merged_stats.npageslabs;
+}
+
+static inline size_t
+psset_nactive(psset_t *psset) {
+ return psset->merged_stats.nactive;
+}
+
+static inline size_t
+psset_ndirty(psset_t *psset) {
+ return psset->merged_stats.ndirty;
+}
+
+#endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 8029040..c7f52f8 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -3,37 +3,85 @@
#include "jemalloc/internal/qr.h"
+/*
+ * A linked-list implementation.
+ *
+ * This is built on top of the ring implementation, but that can be viewed as an
+ * implementation detail (i.e. trying to advance past the tail of the list
+ * doesn't wrap around).
+ *
+ * You define a struct like so:
+ * typedef strucy my_s my_t;
+ * struct my_s {
+ * int data;
+ * ql_elm(my_t) my_link;
+ * };
+ *
+ * // We wobble between "list" and "head" for this type; we're now mostly
+ * // heading towards "list".
+ * typedef ql_head(my_t) my_list_t;
+ *
+ * You then pass a my_list_t * for a_head arguments, a my_t * for a_elm
+ * arguments, the token "my_link" for a_field arguments, and the token "my_t"
+ * for a_type arguments.
+ */
+
/* List definitions. */
#define ql_head(a_type) \
struct { \
a_type *qlh_first; \
}
+/* Static initializer for an empty list. */
#define ql_head_initializer(a_head) {NULL}
+/* The field definition. */
#define ql_elm(a_type) qr(a_type)
-/* List functions. */
+/* A pointer to the first element in the list, or NULL if the list is empty. */
+#define ql_first(a_head) ((a_head)->qlh_first)
+
+/* Dynamically initializes a list. */
#define ql_new(a_head) do { \
- (a_head)->qlh_first = NULL; \
+ ql_first(a_head) = NULL; \
} while (0)
-#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+/*
+ * Sets dest to be the contents of src (overwriting any elements there), leaving
+ * src empty.
+ */
+#define ql_move(a_head_dest, a_head_src) do { \
+ ql_first(a_head_dest) = ql_first(a_head_src); \
+ ql_new(a_head_src); \
+} while (0)
-#define ql_first(a_head) ((a_head)->qlh_first)
+/* True if the list is empty, otherwise false. */
+#define ql_empty(a_head) (ql_first(a_head) == NULL)
+
+/*
+ * Initializes a ql_elm. Must be called even if the field is about to be
+ * overwritten.
+ */
+#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+/*
+ * Obtains the last item in the list.
+ */
#define ql_last(a_head, a_field) \
- ((ql_first(a_head) != NULL) \
- ? qr_prev(ql_first(a_head), a_field) : NULL)
+ (ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
+/*
+ * Gets a pointer to the next/prev element in the list. Trying to advance past
+ * the end or retreat before the beginning of the list returns NULL.
+ */
#define ql_next(a_head, a_elm, a_field) \
((ql_last(a_head, a_field) != (a_elm)) \
? qr_next((a_elm), a_field) : NULL)
-
#define ql_prev(a_head, a_elm, a_field) \
((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \
: NULL)
+/* Inserts a_elm before a_qlelm in the list. */
#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \
qr_before_insert((a_qlelm), (a_elm), a_field); \
if (ql_first(a_head) == (a_qlelm)) { \
@@ -41,23 +89,41 @@ struct { \
} \
} while (0)
+/* Inserts a_elm after a_qlelm in the list. */
#define ql_after_insert(a_qlelm, a_elm, a_field) \
qr_after_insert((a_qlelm), (a_elm), a_field)
+/* Inserts a_elm as the first item in the list. */
#define ql_head_insert(a_head, a_elm, a_field) do { \
- if (ql_first(a_head) != NULL) { \
+ if (!ql_empty(a_head)) { \
qr_before_insert(ql_first(a_head), (a_elm), a_field); \
} \
ql_first(a_head) = (a_elm); \
} while (0)
+/* Inserts a_elm as the last item in the list. */
#define ql_tail_insert(a_head, a_elm, a_field) do { \
- if (ql_first(a_head) != NULL) { \
+ if (!ql_empty(a_head)) { \
qr_before_insert(ql_first(a_head), (a_elm), a_field); \
} \
ql_first(a_head) = qr_next((a_elm), a_field); \
} while (0)
+/*
+ * Given lists a = [a_1, ..., a_n] and [b_1, ..., b_n], results in:
+ * a = [a1, ..., a_n, b_1, ..., b_n] and b = [].
+ */
+#define ql_concat(a_head_a, a_head_b, a_field) do { \
+ if (ql_empty(a_head_a)) { \
+ ql_move(a_head_a, a_head_b); \
+ } else if (!ql_empty(a_head_b)) { \
+ qr_meld(ql_first(a_head_a), ql_first(a_head_b), \
+ a_field); \
+ ql_new(a_head_b); \
+ } \
+} while (0)
+
+/* Removes a_elm from the list. */
#define ql_remove(a_head, a_elm, a_field) do { \
if (ql_first(a_head) == (a_elm)) { \
ql_first(a_head) = qr_next(ql_first(a_head), a_field); \
@@ -65,20 +131,63 @@ struct { \
if (ql_first(a_head) != (a_elm)) { \
qr_remove((a_elm), a_field); \
} else { \
- ql_first(a_head) = NULL; \
+ ql_new(a_head); \
} \
} while (0)
+/* Removes the first item in the list. */
#define ql_head_remove(a_head, a_type, a_field) do { \
a_type *t = ql_first(a_head); \
ql_remove((a_head), t, a_field); \
} while (0)
+/* Removes the last item in the list. */
#define ql_tail_remove(a_head, a_type, a_field) do { \
a_type *t = ql_last(a_head, a_field); \
ql_remove((a_head), t, a_field); \
} while (0)
+/*
+ * Given a = [a_1, a_2, ..., a_n-1, a_n, a_n+1, ...],
+ * ql_split(a, a_n, b, some_field) results in
+ * a = [a_1, a_2, ..., a_n-1]
+ * and replaces b's contents with:
+ * b = [a_n, a_n+1, ...]
+ */
+#define ql_split(a_head_a, a_elm, a_head_b, a_field) do { \
+ if (ql_first(a_head_a) == (a_elm)) { \
+ ql_move(a_head_b, a_head_a); \
+ } else { \
+ qr_split(ql_first(a_head_a), (a_elm), a_field); \
+ ql_first(a_head_b) = (a_elm); \
+ } \
+} while (0)
+
+/*
+ * An optimized version of:
+ * a_type *t = ql_first(a_head);
+ * ql_remove((a_head), t, a_field);
+ * ql_tail_insert((a_head), t, a_field);
+ */
+#define ql_rotate(a_head, a_field) do { \
+ ql_first(a_head) = qr_next(ql_first(a_head), a_field); \
+} while (0)
+
+/*
+ * Helper macro to iterate over each element in a list in order, starting from
+ * the head (or in reverse order, starting from the tail). The usage is
+ * (assuming my_t and my_list_t defined as above).
+ *
+ * int sum(my_list_t *list) {
+ * int sum = 0;
+ * my_t *iter;
+ * ql_foreach(iter, list, link) {
+ * sum += iter->data;
+ * }
+ * return sum;
+ * }
+ */
+
#define ql_foreach(a_var, a_head, a_field) \
qr_foreach((a_var), ql_first(a_head), a_field)
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 1e1056b..ece4f55 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -1,6 +1,21 @@
#ifndef JEMALLOC_INTERNAL_QR_H
#define JEMALLOC_INTERNAL_QR_H
+/*
+ * A ring implementation based on an embedded circular doubly-linked list.
+ *
+ * You define your struct like so:
+ *
+ * typedef struct my_s my_t;
+ * struct my_s {
+ * int data;
+ * qr(my_t) my_link;
+ * };
+ *
+ * And then pass a my_t * into macros for a_qr arguments, and the token
+ * "my_link" into a_field fields.
+ */
+
/* Ring definitions. */
#define qr(a_type) \
struct { \
@@ -8,61 +23,114 @@ struct { \
a_type *qre_prev; \
}
-/* Ring functions. */
+/*
+ * Initialize a qr link. Every link must be initialized before being used, even
+ * if that initialization is going to be immediately overwritten (say, by being
+ * passed into an insertion macro).
+ */
#define qr_new(a_qr, a_field) do { \
(a_qr)->a_field.qre_next = (a_qr); \
(a_qr)->a_field.qre_prev = (a_qr); \
} while (0)
+/*
+ * Go forwards or backwards in the ring. Note that (the ring being circular), this
+ * always succeeds -- you just keep looping around and around the ring if you
+ * chase pointers without end.
+ */
#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
-
#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
-#define qr_before_insert(a_qrelm, a_qr, a_field) do { \
- (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \
- (a_qr)->a_field.qre_next = (a_qrelm); \
- (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \
- (a_qrelm)->a_field.qre_prev = (a_qr); \
+/*
+ * Given two rings:
+ * a -> a_1 -> ... -> a_n --
+ * ^ |
+ * |------------------------
+ *
+ * b -> b_1 -> ... -> b_n --
+ * ^ |
+ * |------------------------
+ *
+ * Results in the ring:
+ * a -> a_1 -> ... -> a_n -> b -> b_1 -> ... -> b_n --
+ * ^ |
+ * |-------------------------------------------------|
+ *
+ * a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.
+ */
+#define qr_meld(a_qr_a, a_qr_b, a_field) do { \
+ (a_qr_b)->a_field.qre_prev->a_field.qre_next = \
+ (a_qr_a)->a_field.qre_prev; \
+ (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \
+ (a_qr_b)->a_field.qre_prev = \
+ (a_qr_b)->a_field.qre_prev->a_field.qre_next; \
+ (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
+ (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
} while (0)
-#define qr_after_insert(a_qrelm, a_qr, a_field) do { \
- (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \
- (a_qr)->a_field.qre_prev = (a_qrelm); \
- (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \
- (a_qrelm)->a_field.qre_next = (a_qr); \
-} while (0)
+/*
+ * Logically, this is just a meld. The intent, though, is that a_qrelm is a
+ * single-element ring, so that "before" has a more obvious interpretation than
+ * meld.
+ */
+#define qr_before_insert(a_qrelm, a_qr, a_field) \
+ qr_meld((a_qrelm), (a_qr), a_field)
-#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \
- a_type *t; \
- (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
- (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
- t = (a_qr_a)->a_field.qre_prev; \
- (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \
- (a_qr_b)->a_field.qre_prev = t; \
-} while (0)
+/* Ditto, but inserting after rather than before. */
+#define qr_after_insert(a_qrelm, a_qr, a_field) \
+ qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
/*
+ * Inverts meld; given the ring:
+ * a -> a_1 -> ... -> a_n -> b -> b_1 -> ... -> b_n --
+ * ^ |
+ * |-------------------------------------------------|
+ *
+ * Results in two rings:
+ * a -> a_1 -> ... -> a_n --
+ * ^ |
+ * |------------------------
+ *
+ * b -> b_1 -> ... -> b_n --
+ * ^ |
+ * |------------------------
+ *
* qr_meld() and qr_split() are functionally equivalent, so there's no need to
* have two copies of the code.
*/
-#define qr_split(a_qr_a, a_qr_b, a_type, a_field) \
- qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
+#define qr_split(a_qr_a, a_qr_b, a_field) \
+ qr_meld((a_qr_a), (a_qr_b), a_field)
-#define qr_remove(a_qr, a_field) do { \
- (a_qr)->a_field.qre_prev->a_field.qre_next \
- = (a_qr)->a_field.qre_next; \
- (a_qr)->a_field.qre_next->a_field.qre_prev \
- = (a_qr)->a_field.qre_prev; \
- (a_qr)->a_field.qre_next = (a_qr); \
- (a_qr)->a_field.qre_prev = (a_qr); \
-} while (0)
+/*
+ * Splits off a_qr from the rest of its ring, so that it becomes a
+ * single-element ring.
+ */
+#define qr_remove(a_qr, a_field) \
+ qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
+/*
+ * Helper macro to iterate over each element in a ring exactly once, starting
+ * with a_qr. The usage is (assuming my_t defined as above):
+ *
+ * int sum(my_t *item) {
+ * int sum = 0;
+ * my_t *iter;
+ * qr_foreach(iter, item, link) {
+ * sum += iter->data;
+ * }
+ * return sum;
+ * }
+ */
#define qr_foreach(var, a_qr, a_field) \
for ((var) = (a_qr); \
(var) != NULL; \
(var) = (((var)->a_field.qre_next != (a_qr)) \
? (var)->a_field.qre_next : NULL))
+/*
+ * The same (and with the same usage) as qr_foreach, but in the opposite order,
+ * ending with a_qr.
+ */
#define qr_reverse_foreach(var, a_qr, a_field) \
for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \
(var) != NULL; \
diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index 821086e..c22d753 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -30,11 +30,18 @@
# ifdef __hppa__
# define LG_QUANTUM 4
# endif
+# ifdef __loongarch__
+# define LG_QUANTUM 4
+# endif
# ifdef __m68k__
# define LG_QUANTUM 3
# endif
# ifdef __mips__
-# define LG_QUANTUM 3
+# if defined(__mips_n32) || defined(__mips_n64)
+# define LG_QUANTUM 4
+# else
+# define LG_QUANTUM 3
+# endif
# endif
# ifdef __nios2__
# define LG_QUANTUM 3
@@ -61,6 +68,9 @@
# ifdef __le32__
# define LG_QUANTUM 4
# endif
+# ifdef __arc__
+# define LG_QUANTUM 3
+# endif
# ifndef LG_QUANTUM
# error "Unknown minimum alignment for architecture; specify via "
"--with-lg-quantum"
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 47fa5ca..a9a51cb 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_RB_H
+#define JEMALLOC_INTERNAL_RB_H
+
/*-
*******************************************************************************
*
@@ -19,13 +22,19 @@
*******************************************************************************
*/
-#ifndef RB_H_
-#define RB_H_
-
#ifndef __PGI
#define RB_COMPACT
#endif
+/*
+ * Each node in the RB tree consumes at least 1 byte of space (for the linkage
+ * if nothing else, so there are a maximum of sizeof(void *) << 3 rb tree nodes
+ * in any process (and thus, at most sizeof(void *) << 3 nodes in any rb tree).
+ * The choice of algorithm bounds the depth of a tree to twice the binary log of
+ * the number of elements in the tree; the following bound follows.
+ */
+#define RB_MAX_DEPTH (sizeof(void *) << 4)
+
#ifdef RB_COMPACT
/* Node structure. */
#define rb_node(a_type) \
@@ -159,12 +168,22 @@ struct { \
rbtn_right_set(a_type, a_field, (r_node), (a_node)); \
} while (0)
+#define rb_summarized_only_false(...)
+#define rb_summarized_only_true(...) __VA_ARGS__
+#define rb_empty_summarize(a_node, a_lchild, a_rchild) false
+
/*
- * The rb_proto() macro generates function prototypes that correspond to the
- * functions generated by an equivalently parameterized call to rb_gen().
+ * The rb_proto() and rb_summarized_proto() macros generate function prototypes
+ * that correspond to the functions generated by an equivalently parameterized
+ * call to rb_gen() or rb_summarized_gen(), respectively.
*/
#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \
+ rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type, false)
+#define rb_summarized_proto(a_attr, a_prefix, a_rbt_type, a_type) \
+ rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type, true)
+#define rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type, \
+ a_is_summarized) \
a_attr void \
a_prefix##new(a_rbt_type *rbtree); \
a_attr bool \
@@ -195,31 +214,94 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \
a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); \
a_attr void \
a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \
- void *arg);
+ void *arg); \
+/* Extended API */ \
+rb_summarized_only_##a_is_summarized( \
+a_attr void \
+a_prefix##update_summaries(a_rbt_type *rbtree, a_type *node); \
+a_attr bool \
+a_prefix##empty_filtered(a_rbt_type *rbtree, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##first_filtered(a_rbt_type *rbtree, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##last_filtered(a_rbt_type *rbtree, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##next_filtered(a_rbt_type *rbtree, a_type *node, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##prev_filtered(a_rbt_type *rbtree, a_type *node, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##search_filtered(a_rbt_type *rbtree, const a_type *key, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##nsearch_filtered(a_rbt_type *rbtree, const a_type *key, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##psearch_filtered(a_rbt_type *rbtree, const a_type *key, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##iter_filtered(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+a_attr a_type * \
+a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx); \
+)
/*
* The rb_gen() macro generates a type-specific red-black tree implementation,
* based on the above cpp macros.
- *
* Arguments:
*
- * a_attr : Function attribute for generated functions (ex: static).
- * a_prefix : Prefix for generated functions (ex: ex_).
- * a_rb_type : Type for red-black tree data structure (ex: ex_t).
- * a_type : Type for red-black tree node data structure (ex: ex_node_t).
- * a_field : Name of red-black tree node linkage (ex: ex_link).
- * a_cmp : Node comparison function name, with the following prototype:
- * int (a_cmp *)(a_type *a_node, a_type *a_other);
- * ^^^^^^
- * or a_key
- * Interpretation of comparison function return values:
- * -1 : a_node < a_other
- * 0 : a_node == a_other
- * 1 : a_node > a_other
- * In all cases, the a_node or a_key macro argument is the first
- * argument to the comparison function, which makes it possible
- * to write comparison functions that treat the first argument
- * specially.
+ * a_attr:
+ * Function attribute for generated functions (ex: static).
+ * a_prefix:
+ * Prefix for generated functions (ex: ex_).
+ * a_rb_type:
+ * Type for red-black tree data structure (ex: ex_t).
+ * a_type:
+ * Type for red-black tree node data structure (ex: ex_node_t).
+ * a_field:
+ * Name of red-black tree node linkage (ex: ex_link).
+ * a_cmp:
+ * Node comparison function name, with the following prototype:
+ *
+ * int a_cmp(a_type *a_node, a_type *a_other);
+ * ^^^^^^
+ * or a_key
+ * Interpretation of comparison function return values:
+ * -1 : a_node < a_other
+ * 0 : a_node == a_other
+ * 1 : a_node > a_other
+ * In all cases, the a_node or a_key macro argument is the first argument to
+ * the comparison function, which makes it possible to write comparison
+ * functions that treat the first argument specially. a_cmp must be a total
+ * order on values inserted into the tree -- duplicates are not allowed.
*
* Assuming the following setup:
*
@@ -338,8 +420,193 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \
* during iteration. There is no way to stop iteration once it
* has begun.
* arg : Opaque pointer passed to cb().
+ *
+ * The rb_summarized_gen() macro generates all the functions above, but has an
+ * expanded interface. In introduces the notion of summarizing subtrees, and of
+ * filtering searches in the tree according to the information contained in
+ * those summaries.
+ * The extra macro argument is:
+ * a_summarize:
+ * Tree summarization function name, with the following prototype:
+ *
+ * bool a_summarize(a_type *a_node, const a_type *a_left_child,
+ * const a_type *a_right_child);
+ *
+ * This function should update a_node with the summary of the subtree rooted
+ * there, using the data contained in it and the summaries in a_left_child
+ * and a_right_child. One or both of them may be NULL. When the tree
+ * changes due to an insertion or removal, it updates the summaries of all
+ * nodes whose subtrees have changed (always updating the summaries of
+ * children before their parents). If the user alters a node in the tree in
+ * a way that may change its summary, they can call the generated
+ * update_summaries function to bubble up the summary changes to the root.
+ * It should return true if the summary changed (or may have changed), and
+ * false if it didn't (which will allow the implementation to terminate
+ * "bubbling up" the summaries early).
+ * As the parameter names indicate, the children are ordered as they are in
+ * the tree, a_left_child, if it is not NULL, compares less than a_node,
+ * which in turn compares less than a_right_child (if a_right_child is not
+ * NULL).
+ *
+ * Using the same setup as above but replacing the macro with
+ * rb_summarized_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp,
+ * ex_summarize)
+ *
+ * Generates all the previous functions, but adds some more:
+ *
+ * static void
+ * ex_update_summaries(ex_t *tree, ex_node_t *node);
+ * Description: Recompute all summaries of ancestors of node.
+ * Args:
+ * tree: Pointer to an initialized red-black tree object.
+ * node: The element of the tree whose summary may have changed.
+ *
+ * For each of ex_empty, ex_first, ex_last, ex_next, ex_prev, ex_search,
+ * ex_nsearch, ex_psearch, ex_iter, and ex_reverse_iter, an additional function
+ * is generated as well, with the suffix _filtered (e.g. ex_empty_filtered,
+ * ex_first_filtered, etc.). These use the concept of a "filter"; a binary
+ * property some node either satisfies or does not satisfy. Clever use of the
+ * a_summary argument to rb_summarized_gen can allow efficient computation of
+ * these predicates across whole subtrees of the tree.
+ * The extended API functions accept three additional arguments after the
+ * arguments to the corresponding non-extended equivalent.
+ *
+ * ex_fn(..., bool (*filter_node)(void *, ex_node_t *),
+ * bool (*filter_subtree)(void *, ex_node_t *), void *filter_ctx);
+ * filter_node : Returns true if the node passes the filter.
+ * filter_subtree : Returns true if some node in the subtree rooted at
+ * node passes the filter.
+ * filter_ctx : A context argument passed to the filters.
+ *
+ * For a more concrete example of summarizing and filtering, suppose we're using
+ * the red-black tree to track a set of integers:
+ *
+ * struct ex_node_s {
+ * rb_node(ex_node_t) ex_link;
+ * unsigned data;
+ * };
+ *
+ * Suppose, for some application-specific reason, we want to be able to quickly
+ * find numbers in the set which are divisible by large powers of 2 (say, for
+ * aligned allocation purposes). We augment the node with a summary field:
+ *
+ * struct ex_node_s {
+ * rb_node(ex_node_t) ex_link;
+ * unsigned data;
+ * unsigned max_subtree_ffs;
+ * }
+ *
+ * and define our summarization function as follows:
+ *
+ * bool
+ * ex_summarize(ex_node_t *node, const ex_node_t *lchild,
+ * const ex_node_t *rchild) {
+ * unsigned new_max_subtree_ffs = ffs(node->data);
+ * if (lchild != NULL && lchild->max_subtree_ffs > new_max_subtree_ffs) {
+ * new_max_subtree_ffs = lchild->max_subtree_ffs;
+ * }
+ * if (rchild != NULL && rchild->max_subtree_ffs > new_max_subtree_ffs) {
+ * new_max_subtree_ffs = rchild->max_subtree_ffs;
+ * }
+ * bool changed = (node->max_subtree_ffs != new_max_subtree_ffs)
+ * node->max_subtree_ffs = new_max_subtree_ffs;
+ * // This could be "return true" without any correctness or big-O
+ * // performance changes; but practically, precisely reporting summary
+ * // changes reduces the amount of work that has to be done when "bubbling
+ * // up" summary changes.
+ * return changed;
+ * }
+ *
+ * We can now implement our filter functions as follows:
+ * bool
+ * ex_filter_node(void *filter_ctx, ex_node_t *node) {
+ * unsigned required_ffs = *(unsigned *)filter_ctx;
+ * return ffs(node->data) >= required_ffs;
+ * }
+ * bool
+ * ex_filter_subtree(void *filter_ctx, ex_node_t *node) {
+ * unsigned required_ffs = *(unsigned *)filter_ctx;
+ * return node->max_subtree_ffs >= required_ffs;
+ * }
+ *
+ * We can now easily search for, e.g., the smallest integer in the set that's
+ * divisible by 128:
+ * ex_node_t *
+ * find_div_128(ex_tree_t *tree) {
+ * unsigned min_ffs = 7;
+ * return ex_first_filtered(tree, &ex_filter_node, &ex_filter_subtree,
+ * &min_ffs);
+ * }
+ *
+ * We could with similar ease:
+ * - Fnd the next multiple of 128 in the set that's larger than 12345 (with
+ * ex_nsearch_filtered)
+ * - Iterate over just those multiples of 64 that are in the set (with
+ * ex_iter_filtered)
+ * - Determine if the set contains any multiples of 1024 (with
+ * ex_empty_filtered).
+ *
+ * Some possibly subtle API notes:
+ * - The node argument to ex_next_filtered and ex_prev_filtered need not pass
+ * the filter; it will find the next/prev node that passes the filter.
+ * - ex_search_filtered will fail even for a node in the tree, if that node does
+ * not pass the filter. ex_psearch_filtered and ex_nsearch_filtered behave
+ * similarly; they may return a node larger/smaller than the key, even if a
+ * node equivalent to the key is in the tree (but does not pass the filter).
+ * - Similarly, if the start argument to a filtered iteration function does not
+ * pass the filter, the callback won't be invoked on it.
+ *
+ * These should make sense after a moment's reflection; each post-condition is
+ * the same as with the unfiltered version, with the added constraint that the
+ * returned node must pass the filter.
*/
#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \
+ rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp, \
+ rb_empty_summarize, false)
+#define rb_summarized_gen(a_attr, a_prefix, a_rbt_type, a_type, \
+ a_field, a_cmp, a_summarize) \
+ rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp, \
+ a_summarize, true)
+
+#define rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, \
+ a_field, a_cmp, a_summarize, a_is_summarized) \
+typedef struct { \
+ a_type *node; \
+ int cmp; \
+} a_prefix##path_entry_t; \
+static inline void \
+a_prefix##summarize_range(a_prefix##path_entry_t *rfirst, \
+ a_prefix##path_entry_t *rlast) { \
+ while ((uintptr_t)rlast >= (uintptr_t)rfirst) { \
+ a_type *node = rlast->node; \
+ /* Avoid a warning when a_summarize is rb_empty_summarize. */ \
+ (void)node; \
+ bool changed = a_summarize(node, rbtn_left_get(a_type, a_field, \
+ node), rbtn_right_get(a_type, a_field, node)); \
+ if (!changed) { \
+ break; \
+ } \
+ rlast--; \
+ } \
+} \
+/* On the remove pathways, we sometimes swap the node being removed */\
+/* and its first successor; in such cases we need to do two range */\
+/* updates; one from the node to its (former) swapped successor, the */\
+/* next from that successor to the root (with either allowed to */\
+/* bail out early if appropriate. */\
+static inline void \
+a_prefix##summarize_swapped_range(a_prefix##path_entry_t *rfirst, \
+ a_prefix##path_entry_t *rlast, a_prefix##path_entry_t *swap_loc) { \
+ if (swap_loc == NULL || rlast <= swap_loc) { \
+ a_prefix##summarize_range(rfirst, rlast); \
+ } else { \
+ a_prefix##summarize_range(swap_loc + 1, rlast); \
+ (void)a_summarize(swap_loc->node, \
+ rbtn_left_get(a_type, a_field, swap_loc->node), \
+ rbtn_right_get(a_type, a_field, swap_loc->node)); \
+ a_prefix##summarize_range(rfirst, swap_loc - 1); \
+ } \
+} \
a_attr void \
a_prefix##new(a_rbt_type *rbtree) { \
rb_new(a_type, a_field, rbtree); \
@@ -465,10 +732,8 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) { \
} \
a_attr void \
a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
- struct { \
- a_type *node; \
- int cmp; \
- } path[sizeof(void *) << 4], *pathp; \
+ a_prefix##path_entry_t path[RB_MAX_DEPTH]; \
+ a_prefix##path_entry_t *pathp; \
rbt_node_new(a_type, a_field, rbtree, node); \
/* Wind. */ \
path->node = rbtree->rbt_root; \
@@ -484,6 +749,13 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
} \
} \
pathp->node = node; \
+ /* A loop invariant we maintain is that all nodes with */\
+ /* out-of-date summaries live in path[0], path[1], ..., *pathp. */\
+ /* To maintain this, we have to summarize node, since we */\
+ /* decrement pathp before the first iteration. */\
+ assert(rbtn_left_get(a_type, a_field, node) == NULL); \
+ assert(rbtn_right_get(a_type, a_field, node) == NULL); \
+ (void)a_summarize(node, NULL, NULL); \
/* Unwind. */ \
for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \
a_type *cnode = pathp->node; \
@@ -498,9 +770,13 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
a_type *tnode; \
rbtn_black_set(a_type, a_field, leftleft); \
rbtn_rotate_right(a_type, a_field, cnode, tnode); \
+ (void)a_summarize(cnode, \
+ rbtn_left_get(a_type, a_field, cnode), \
+ rbtn_right_get(a_type, a_field, cnode)); \
cnode = tnode; \
} \
} else { \
+ a_prefix##summarize_range(path, pathp); \
return; \
} \
} else { \
@@ -521,13 +797,20 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
rbtn_rotate_left(a_type, a_field, cnode, tnode); \
rbtn_color_set(a_type, a_field, tnode, tred); \
rbtn_red_set(a_type, a_field, cnode); \
+ (void)a_summarize(cnode, \
+ rbtn_left_get(a_type, a_field, cnode), \
+ rbtn_right_get(a_type, a_field, cnode)); \
cnode = tnode; \
} \
} else { \
+ a_prefix##summarize_range(path, pathp); \
return; \
} \
} \
pathp->node = cnode; \
+ (void)a_summarize(cnode, \
+ rbtn_left_get(a_type, a_field, cnode), \
+ rbtn_right_get(a_type, a_field, cnode)); \
} \
/* Set root, and make it black. */ \
rbtree->rbt_root = path->node; \
@@ -535,12 +818,18 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \
} \
a_attr void \
a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
- struct { \
- a_type *node; \
- int cmp; \
- } *pathp, *nodep, path[sizeof(void *) << 4]; \
+ a_prefix##path_entry_t path[RB_MAX_DEPTH]; \
+ a_prefix##path_entry_t *pathp; \
+ a_prefix##path_entry_t *nodep; \
+ a_prefix##path_entry_t *swap_loc; \
+ /* This is a "real" sentinel -- NULL means we didn't swap the */\
+ /* node to be pruned with one of its successors, and so */\
+ /* summarization can terminate early whenever some summary */\
+ /* doesn't change. */\
+ swap_loc = NULL; \
+ /* This is just to silence a compiler warning. */ \
+ nodep = NULL; \
/* Wind. */ \
- nodep = NULL; /* Silence compiler warning. */ \
path->node = rbtree->rbt_root; \
for (pathp = path; pathp->node != NULL; pathp++) { \
int cmp = pathp->cmp = a_cmp(node, pathp->node); \
@@ -567,6 +856,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
pathp--; \
if (pathp->node != node) { \
/* Swap node with its successor. */ \
+ swap_loc = nodep; \
bool tred = rbtn_red_get(a_type, a_field, pathp->node); \
rbtn_color_set(a_type, a_field, pathp->node, \
rbtn_red_get(a_type, a_field, node)); \
@@ -604,6 +894,9 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_black_set(a_type, a_field, left); \
if (pathp == path) { \
rbtree->rbt_root = left; \
+ /* Nothing to summarize -- the subtree rooted at the */\
+ /* node's left child hasn't changed, and it's now the */\
+ /* root. */\
} else { \
if (pathp[-1].cmp < 0) { \
rbtn_left_set(a_type, a_field, pathp[-1].node, \
@@ -612,6 +905,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, pathp[-1].node, \
left); \
} \
+ a_prefix##summarize_swapped_range(path, &pathp[-1], \
+ swap_loc); \
} \
return; \
} else if (pathp == path) { \
@@ -620,10 +915,15 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
return; \
} \
} \
+ /* We've now established the invariant that the node has no right */\
+ /* child (well, morally; we didn't bother nulling it out if we */\
+ /* swapped it with its successor), and that the only nodes with */\
+ /* out-of-date summaries live in path[0], path[1], ..., pathp[-1].*/\
if (rbtn_red_get(a_type, a_field, pathp->node)) { \
/* Prune red node, which requires no fixup. */ \
assert(pathp[-1].cmp < 0); \
rbtn_left_set(a_type, a_field, pathp[-1].node, NULL); \
+ a_prefix##summarize_swapped_range(path, &pathp[-1], swap_loc); \
return; \
} \
/* The node to be pruned is black, so unwind until balance is */\
@@ -657,6 +957,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, pathp->node, tnode);\
rbtn_rotate_left(a_type, a_field, pathp->node, \
tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
+ (void)a_summarize(right, \
+ rbtn_left_get(a_type, a_field, right), \
+ rbtn_right_get(a_type, a_field, right)); \
} else { \
/* || */\
/* pathp(r) */\
@@ -667,7 +973,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
/* */\
rbtn_rotate_left(a_type, a_field, pathp->node, \
tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
} \
+ (void)a_summarize(tnode, rbtn_left_get(a_type, a_field, \
+ tnode), rbtn_right_get(a_type, a_field, tnode)); \
/* Balance restored, but rotation modified subtree */\
/* root. */\
assert((uintptr_t)pathp > (uintptr_t)path); \
@@ -678,6 +989,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, pathp[-1].node, \
tnode); \
} \
+ a_prefix##summarize_swapped_range(path, &pathp[-1], \
+ swap_loc); \
return; \
} else { \
a_type *right = rbtn_right_get(a_type, a_field, \
@@ -698,6 +1011,15 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, pathp->node, tnode);\
rbtn_rotate_left(a_type, a_field, pathp->node, \
tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
+ (void)a_summarize(right, \
+ rbtn_left_get(a_type, a_field, right), \
+ rbtn_right_get(a_type, a_field, right)); \
+ (void)a_summarize(tnode, \
+ rbtn_left_get(a_type, a_field, tnode), \
+ rbtn_right_get(a_type, a_field, tnode)); \
/* Balance restored, but rotation modified */\
/* subtree root, which may actually be the tree */\
/* root. */\
@@ -712,6 +1034,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, \
pathp[-1].node, tnode); \
} \
+ a_prefix##summarize_swapped_range(path, \
+ &pathp[-1], swap_loc); \
} \
return; \
} else { \
@@ -725,6 +1049,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_red_set(a_type, a_field, pathp->node); \
rbtn_rotate_left(a_type, a_field, pathp->node, \
tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
+ (void)a_summarize(tnode, \
+ rbtn_left_get(a_type, a_field, tnode), \
+ rbtn_right_get(a_type, a_field, tnode)); \
pathp->node = tnode; \
} \
} \
@@ -757,6 +1087,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
tnode); \
rbtn_right_set(a_type, a_field, unode, tnode); \
rbtn_rotate_left(a_type, a_field, unode, tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
+ (void)a_summarize(unode, \
+ rbtn_left_get(a_type, a_field, unode), \
+ rbtn_right_get(a_type, a_field, unode)); \
} else { \
/* || */\
/* pathp(b) */\
@@ -771,7 +1107,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_rotate_right(a_type, a_field, pathp->node, \
tnode); \
rbtn_black_set(a_type, a_field, tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
} \
+ (void)a_summarize(tnode, \
+ rbtn_left_get(a_type, a_field, tnode), \
+ rbtn_right_get(a_type, a_field, tnode)); \
/* Balance restored, but rotation modified subtree */\
/* root, which may actually be the tree root. */\
if (pathp == path) { \
@@ -785,6 +1127,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, pathp[-1].node, \
tnode); \
} \
+ a_prefix##summarize_swapped_range(path, &pathp[-1], \
+ swap_loc); \
} \
return; \
} else if (rbtn_red_get(a_type, a_field, pathp->node)) { \
@@ -803,6 +1147,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_black_set(a_type, a_field, leftleft); \
rbtn_rotate_right(a_type, a_field, pathp->node, \
tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
+ (void)a_summarize(tnode, \
+ rbtn_left_get(a_type, a_field, tnode), \
+ rbtn_right_get(a_type, a_field, tnode)); \
/* Balance restored, but rotation modified */\
/* subtree root. */\
assert((uintptr_t)pathp > (uintptr_t)path); \
@@ -813,6 +1163,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, pathp[-1].node, \
tnode); \
} \
+ a_prefix##summarize_swapped_range(path, &pathp[-1], \
+ swap_loc); \
return; \
} else { \
/* || */\
@@ -824,6 +1176,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_red_set(a_type, a_field, left); \
rbtn_black_set(a_type, a_field, pathp->node); \
/* Balance restored. */ \
+ a_prefix##summarize_swapped_range(path, pathp, \
+ swap_loc); \
return; \
} \
} else { \
@@ -840,6 +1194,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_black_set(a_type, a_field, leftleft); \
rbtn_rotate_right(a_type, a_field, pathp->node, \
tnode); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
+ (void)a_summarize(tnode, \
+ rbtn_left_get(a_type, a_field, tnode), \
+ rbtn_right_get(a_type, a_field, tnode)); \
/* Balance restored, but rotation modified */\
/* subtree root, which may actually be the tree */\
/* root. */\
@@ -854,6 +1214,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
rbtn_right_set(a_type, a_field, \
pathp[-1].node, tnode); \
} \
+ a_prefix##summarize_swapped_range(path, \
+ &pathp[-1], swap_loc); \
} \
return; \
} else { \
@@ -864,6 +1226,9 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \
/* / */\
/* (b) */\
rbtn_red_set(a_type, a_field, left); \
+ (void)a_summarize(pathp->node, \
+ rbtn_left_get(a_type, a_field, pathp->node), \
+ rbtn_right_get(a_type, a_field, pathp->node)); \
} \
} \
} \
@@ -1001,6 +1366,491 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *), \
void *arg) { \
a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg); \
rbtree->rbt_root = NULL; \
-}
+} \
+/* BEGIN SUMMARIZED-ONLY IMPLEMENTATION */ \
+rb_summarized_only_##a_is_summarized( \
+static inline a_prefix##path_entry_t * \
+a_prefix##wind(a_rbt_type *rbtree, \
+ a_prefix##path_entry_t path[RB_MAX_DEPTH], a_type *node) { \
+ a_prefix##path_entry_t *pathp; \
+ path->node = rbtree->rbt_root; \
+ for (pathp = path; ; pathp++) { \
+ assert((size_t)(pathp - path) < RB_MAX_DEPTH); \
+ pathp->cmp = a_cmp(node, pathp->node); \
+ if (pathp->cmp < 0) { \
+ pathp[1].node = rbtn_left_get(a_type, a_field, \
+ pathp->node); \
+ } else if (pathp->cmp == 0) { \
+ return pathp; \
+ } else { \
+ pathp[1].node = rbtn_right_get(a_type, a_field, \
+ pathp->node); \
+ } \
+ } \
+ unreachable(); \
+} \
+a_attr void \
+a_prefix##update_summaries(a_rbt_type *rbtree, a_type *node) { \
+ a_prefix##path_entry_t path[RB_MAX_DEPTH]; \
+ a_prefix##path_entry_t *pathp = a_prefix##wind(rbtree, path, node); \
+ a_prefix##summarize_range(path, pathp); \
+} \
+a_attr bool \
+a_prefix##empty_filtered(a_rbt_type *rbtree, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *node = rbtree->rbt_root; \
+ return node == NULL || !filter_subtree(filter_ctx, node); \
+} \
+static inline a_type * \
+a_prefix##first_filtered_from_node(a_type *node, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ assert(node != NULL && filter_subtree(filter_ctx, node)); \
+ while (true) { \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ a_type *right = rbtn_right_get(a_type, a_field, node); \
+ if (left != NULL && filter_subtree(filter_ctx, left)) { \
+ node = left; \
+ } else if (filter_node(filter_ctx, node)) { \
+ return node; \
+ } else { \
+ assert(right != NULL \
+ && filter_subtree(filter_ctx, right)); \
+ node = right; \
+ } \
+ } \
+ unreachable(); \
+} \
+a_attr a_type * \
+a_prefix##first_filtered(a_rbt_type *rbtree, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *node = rbtree->rbt_root; \
+ if (node == NULL || !filter_subtree(filter_ctx, node)) { \
+ return NULL; \
+ } \
+ return a_prefix##first_filtered_from_node(node, filter_node, \
+ filter_subtree, filter_ctx); \
+} \
+static inline a_type * \
+a_prefix##last_filtered_from_node(a_type *node, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ assert(node != NULL && filter_subtree(filter_ctx, node)); \
+ while (true) { \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ a_type *right = rbtn_right_get(a_type, a_field, node); \
+ if (right != NULL && filter_subtree(filter_ctx, right)) { \
+ node = right; \
+ } else if (filter_node(filter_ctx, node)) { \
+ return node; \
+ } else { \
+ assert(left != NULL \
+ && filter_subtree(filter_ctx, left)); \
+ node = left; \
+ } \
+ } \
+ unreachable(); \
+} \
+a_attr a_type * \
+a_prefix##last_filtered(a_rbt_type *rbtree, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *node = rbtree->rbt_root; \
+ if (node == NULL || !filter_subtree(filter_ctx, node)) { \
+ return NULL; \
+ } \
+ return a_prefix##last_filtered_from_node(node, filter_node, \
+ filter_subtree, filter_ctx); \
+} \
+/* Internal implementation function. Search for a node comparing */\
+/* equal to key matching the filter. If such a node is in the tree, */\
+/* return it. Additionally, the caller has the option to ask for */\
+/* bounds on the next / prev node in the tree passing the filter. */\
+/* If nextbound is true, then this function will do one of the */\
+/* following: */\
+/* - Fill in *nextbound_node with the smallest node in the tree */\
+/* greater than key passing the filter, and NULL-out */\
+/* *nextbound_subtree. */\
+/* - Fill in *nextbound_subtree with a parent of that node which is */\
+/* not a parent of the searched-for node, and NULL-out */\
+/* *nextbound_node. */\
+/* - NULL-out both *nextbound_node and *nextbound_subtree, in which */\
+/* case no node greater than key but passing the filter is in the */\
+/* tree. */\
+/* The prevbound case is similar. If the caller knows that key is in */\
+/* the tree and that the subtree rooted at key does not contain a */\
+/* node satisfying the bound being searched for, then they can pass */\
+/* false for include_subtree, in which case we won't bother searching */\
+/* there (risking a cache miss). */\
+/* */\
+/* This API is unfortunately complex; but the logic for filtered */\
+/* searches is very subtle, and otherwise we would have to repeat it */\
+/* multiple times for filtered search, nsearch, psearch, next, and */\
+/* prev. */\
+static inline a_type * \
+a_prefix##search_with_filter_bounds(a_rbt_type *rbtree, \
+ const a_type *key, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx, \
+ bool include_subtree, \
+ bool nextbound, a_type **nextbound_node, a_type **nextbound_subtree, \
+ bool prevbound, a_type **prevbound_node, a_type **prevbound_subtree) {\
+ if (nextbound) { \
+ *nextbound_node = NULL; \
+ *nextbound_subtree = NULL; \
+ } \
+ if (prevbound) { \
+ *prevbound_node = NULL; \
+ *prevbound_subtree = NULL; \
+ } \
+ a_type *tnode = rbtree->rbt_root; \
+ while (tnode != NULL && filter_subtree(filter_ctx, tnode)) { \
+ int cmp = a_cmp(key, tnode); \
+ a_type *tleft = rbtn_left_get(a_type, a_field, tnode); \
+ a_type *tright = rbtn_right_get(a_type, a_field, tnode); \
+ if (cmp < 0) { \
+ if (nextbound) { \
+ if (filter_node(filter_ctx, tnode)) { \
+ *nextbound_node = tnode; \
+ *nextbound_subtree = NULL; \
+ } else if (tright != NULL && filter_subtree( \
+ filter_ctx, tright)) { \
+ *nextbound_node = NULL; \
+ *nextbound_subtree = tright; \
+ } \
+ } \
+ tnode = tleft; \
+ } else if (cmp > 0) { \
+ if (prevbound) { \
+ if (filter_node(filter_ctx, tnode)) { \
+ *prevbound_node = tnode; \
+ *prevbound_subtree = NULL; \
+ } else if (tleft != NULL && filter_subtree( \
+ filter_ctx, tleft)) { \
+ *prevbound_node = NULL; \
+ *prevbound_subtree = tleft; \
+ } \
+ } \
+ tnode = tright; \
+ } else { \
+ if (filter_node(filter_ctx, tnode)) { \
+ return tnode; \
+ } \
+ if (include_subtree) { \
+ if (prevbound && tleft != NULL && filter_subtree( \
+ filter_ctx, tleft)) { \
+ *prevbound_node = NULL; \
+ *prevbound_subtree = tleft; \
+ } \
+ if (nextbound && tright != NULL && filter_subtree( \
+ filter_ctx, tright)) { \
+ *nextbound_node = NULL; \
+ *nextbound_subtree = tright; \
+ } \
+ } \
+ return NULL; \
+ } \
+ } \
+ return NULL; \
+} \
+a_attr a_type * \
+a_prefix##next_filtered(a_rbt_type *rbtree, a_type *node, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *nright = rbtn_right_get(a_type, a_field, node); \
+ if (nright != NULL && filter_subtree(filter_ctx, nright)) { \
+ return a_prefix##first_filtered_from_node(nright, filter_node, \
+ filter_subtree, filter_ctx); \
+ } \
+ a_type *node_candidate; \
+ a_type *subtree_candidate; \
+ a_type *search_result = a_prefix##search_with_filter_bounds( \
+ rbtree, node, filter_node, filter_subtree, filter_ctx, \
+ /* include_subtree */ false, \
+ /* nextbound */ true, &node_candidate, &subtree_candidate, \
+ /* prevbound */ false, NULL, NULL); \
+ assert(node == search_result \
+ || !filter_node(filter_ctx, node)); \
+ if (node_candidate != NULL) { \
+ return node_candidate; \
+ } \
+ if (subtree_candidate != NULL) { \
+ return a_prefix##first_filtered_from_node( \
+ subtree_candidate, filter_node, filter_subtree, \
+ filter_ctx); \
+ } \
+ return NULL; \
+} \
+a_attr a_type * \
+a_prefix##prev_filtered(a_rbt_type *rbtree, a_type *node, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *nleft = rbtn_left_get(a_type, a_field, node); \
+ if (nleft != NULL && filter_subtree(filter_ctx, nleft)) { \
+ return a_prefix##last_filtered_from_node(nleft, filter_node, \
+ filter_subtree, filter_ctx); \
+ } \
+ a_type *node_candidate; \
+ a_type *subtree_candidate; \
+ a_type *search_result = a_prefix##search_with_filter_bounds( \
+ rbtree, node, filter_node, filter_subtree, filter_ctx, \
+ /* include_subtree */ false, \
+ /* nextbound */ false, NULL, NULL, \
+ /* prevbound */ true, &node_candidate, &subtree_candidate); \
+ assert(node == search_result \
+ || !filter_node(filter_ctx, node)); \
+ if (node_candidate != NULL) { \
+ return node_candidate; \
+ } \
+ if (subtree_candidate != NULL) { \
+ return a_prefix##last_filtered_from_node( \
+ subtree_candidate, filter_node, filter_subtree, \
+ filter_ctx); \
+ } \
+ return NULL; \
+} \
+a_attr a_type * \
+a_prefix##search_filtered(a_rbt_type *rbtree, const a_type *key, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *result = a_prefix##search_with_filter_bounds(rbtree, key, \
+ filter_node, filter_subtree, filter_ctx, \
+ /* include_subtree */ false, \
+ /* nextbound */ false, NULL, NULL, \
+ /* prevbound */ false, NULL, NULL); \
+ return result; \
+} \
+a_attr a_type * \
+a_prefix##nsearch_filtered(a_rbt_type *rbtree, const a_type *key, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *node_candidate; \
+ a_type *subtree_candidate; \
+ a_type *result = a_prefix##search_with_filter_bounds(rbtree, key, \
+ filter_node, filter_subtree, filter_ctx, \
+ /* include_subtree */ true, \
+ /* nextbound */ true, &node_candidate, &subtree_candidate, \
+ /* prevbound */ false, NULL, NULL); \
+ if (result != NULL) { \
+ return result; \
+ } \
+ if (node_candidate != NULL) { \
+ return node_candidate; \
+ } \
+ if (subtree_candidate != NULL) { \
+ return a_prefix##first_filtered_from_node( \
+ subtree_candidate, filter_node, filter_subtree, \
+ filter_ctx); \
+ } \
+ return NULL; \
+} \
+a_attr a_type * \
+a_prefix##psearch_filtered(a_rbt_type *rbtree, const a_type *key, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *node_candidate; \
+ a_type *subtree_candidate; \
+ a_type *result = a_prefix##search_with_filter_bounds(rbtree, key, \
+ filter_node, filter_subtree, filter_ctx, \
+ /* include_subtree */ true, \
+ /* nextbound */ false, NULL, NULL, \
+ /* prevbound */ true, &node_candidate, &subtree_candidate); \
+ if (result != NULL) { \
+ return result; \
+ } \
+ if (node_candidate != NULL) { \
+ return node_candidate; \
+ } \
+ if (subtree_candidate != NULL) { \
+ return a_prefix##last_filtered_from_node( \
+ subtree_candidate, filter_node, filter_subtree, \
+ filter_ctx); \
+ } \
+ return NULL; \
+} \
+a_attr a_type * \
+a_prefix##iter_recurse_filtered(a_rbt_type *rbtree, a_type *node, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ if (node == NULL || !filter_subtree(filter_ctx, node)) { \
+ return NULL; \
+ } \
+ a_type *ret; \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ a_type *right = rbtn_right_get(a_type, a_field, node); \
+ ret = a_prefix##iter_recurse_filtered(rbtree, left, cb, arg, \
+ filter_node, filter_subtree, filter_ctx); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ if (filter_node(filter_ctx, node)) { \
+ ret = cb(rbtree, node, arg); \
+ } \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg, \
+ filter_node, filter_subtree, filter_ctx); \
+} \
+a_attr a_type * \
+a_prefix##iter_start_filtered(a_rbt_type *rbtree, a_type *start, \
+ a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \
+ void *arg, bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ if (!filter_subtree(filter_ctx, node)) { \
+ return NULL; \
+ } \
+ int cmp = a_cmp(start, node); \
+ a_type *ret; \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ a_type *right = rbtn_right_get(a_type, a_field, node); \
+ if (cmp < 0) { \
+ ret = a_prefix##iter_start_filtered(rbtree, start, left, cb, \
+ arg, filter_node, filter_subtree, filter_ctx); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ if (filter_node(filter_ctx, node)) { \
+ ret = cb(rbtree, node, arg); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ } \
+ return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg, \
+ filter_node, filter_subtree, filter_ctx); \
+ } else if (cmp > 0) { \
+ return a_prefix##iter_start_filtered(rbtree, start, right, \
+ cb, arg, filter_node, filter_subtree, filter_ctx); \
+ } else { \
+ if (filter_node(filter_ctx, node)) { \
+ ret = cb(rbtree, node, arg); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ } \
+ return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg, \
+ filter_node, filter_subtree, filter_ctx); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##iter_filtered(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *ret; \
+ if (start != NULL) { \
+ ret = a_prefix##iter_start_filtered(rbtree, start, \
+ rbtree->rbt_root, cb, arg, filter_node, filter_subtree, \
+ filter_ctx); \
+ } else { \
+ ret = a_prefix##iter_recurse_filtered(rbtree, rbtree->rbt_root, \
+ cb, arg, filter_node, filter_subtree, filter_ctx); \
+ } \
+ return ret; \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter_recurse_filtered(a_rbt_type *rbtree, \
+ a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \
+ void *arg, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ if (node == NULL || !filter_subtree(filter_ctx, node)) { \
+ return NULL; \
+ } \
+ a_type *ret; \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ a_type *right = rbtn_right_get(a_type, a_field, node); \
+ ret = a_prefix##reverse_iter_recurse_filtered(rbtree, right, cb, \
+ arg, filter_node, filter_subtree, filter_ctx); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ if (filter_node(filter_ctx, node)) { \
+ ret = cb(rbtree, node, arg); \
+ } \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb, \
+ arg, filter_node, filter_subtree, filter_ctx); \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter_start_filtered(a_rbt_type *rbtree, a_type *start,\
+ a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \
+ void *arg, bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ if (!filter_subtree(filter_ctx, node)) { \
+ return NULL; \
+ } \
+ int cmp = a_cmp(start, node); \
+ a_type *ret; \
+ a_type *left = rbtn_left_get(a_type, a_field, node); \
+ a_type *right = rbtn_right_get(a_type, a_field, node); \
+ if (cmp > 0) { \
+ ret = a_prefix##reverse_iter_start_filtered(rbtree, start, \
+ right, cb, arg, filter_node, filter_subtree, filter_ctx); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ if (filter_node(filter_ctx, node)) { \
+ ret = cb(rbtree, node, arg); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ } \
+ return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,\
+ arg, filter_node, filter_subtree, filter_ctx); \
+ } else if (cmp < 0) { \
+ return a_prefix##reverse_iter_start_filtered(rbtree, start, \
+ left, cb, arg, filter_node, filter_subtree, filter_ctx); \
+ } else { \
+ if (filter_node(filter_ctx, node)) { \
+ ret = cb(rbtree, node, arg); \
+ if (ret != NULL) { \
+ return ret; \
+ } \
+ } \
+ return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,\
+ arg, filter_node, filter_subtree, filter_ctx); \
+ } \
+} \
+a_attr a_type * \
+a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start, \
+ a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg, \
+ bool (*filter_node)(void *, a_type *), \
+ bool (*filter_subtree)(void *, a_type *), \
+ void *filter_ctx) { \
+ a_type *ret; \
+ if (start != NULL) { \
+ ret = a_prefix##reverse_iter_start_filtered(rbtree, start, \
+ rbtree->rbt_root, cb, arg, filter_node, filter_subtree, \
+ filter_ctx); \
+ } else { \
+ ret = a_prefix##reverse_iter_recurse_filtered(rbtree, \
+ rbtree->rbt_root, cb, arg, filter_node, filter_subtree, \
+ filter_ctx); \
+ } \
+ return ret; \
+} \
+) /* end rb_summarized_only */
-#endif /* RB_H_ */
+#endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h