From 7427525c28d58c423a68930160e3b0fe577fe953 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 31 Mar 2011 20:36:17 -0700 Subject: Move repo contents in jemalloc/ to top level. --- COPYING | 51 + ChangeLog | 213 + INSTALL | 251 + Makefile.in | 259 ++ README | 16 + autogen.sh | 17 + bin/pprof | 4893 ++++++++++++++++++++ config.guess | 1456 ++++++ config.stamp.in | 0 config.sub | 1549 +++++++ configure.ac | 927 ++++ doc/html.xsl.in | 4 + doc/jemalloc.xml.in | 2280 +++++++++ doc/manpages.xsl.in | 4 + doc/stylesheet.xsl | 7 + include/jemalloc/internal/arena.h | 743 +++ include/jemalloc/internal/atomic.h | 169 + include/jemalloc/internal/base.h | 24 + include/jemalloc/internal/bitmap.h | 184 + include/jemalloc/internal/chunk.h | 65 + include/jemalloc/internal/chunk_dss.h | 30 + include/jemalloc/internal/chunk_mmap.h | 23 + include/jemalloc/internal/chunk_swap.h | 34 + include/jemalloc/internal/ckh.h | 95 + include/jemalloc/internal/ctl.h | 118 + include/jemalloc/internal/extent.h | 49 + include/jemalloc/internal/hash.h | 70 + include/jemalloc/internal/huge.h | 41 + include/jemalloc/internal/jemalloc_internal.h.in | 786 ++++ include/jemalloc/internal/mb.h | 108 + include/jemalloc/internal/mutex.h | 86 + include/jemalloc/internal/prn.h | 60 + include/jemalloc/internal/prof.h | 561 +++ include/jemalloc/internal/ql.h | 83 + include/jemalloc/internal/qr.h | 67 + include/jemalloc/internal/rb.h | 973 ++++ include/jemalloc/internal/rtree.h | 161 + include/jemalloc/internal/stats.h | 207 + include/jemalloc/internal/tcache.h | 431 ++ include/jemalloc/internal/zone.h | 23 + include/jemalloc/jemalloc.h.in | 66 + include/jemalloc/jemalloc_defs.h.in | 158 + install-sh | 250 + jemalloc/COPYING | 51 - jemalloc/ChangeLog | 213 - jemalloc/INSTALL | 251 - jemalloc/Makefile.in | 259 -- jemalloc/README | 16 - jemalloc/autogen.sh | 17 - jemalloc/bin/pprof | 4893 -------------------- jemalloc/config.guess | 1456 ------ jemalloc/config.stamp.in | 0 jemalloc/config.sub | 1549 ------- jemalloc/configure.ac | 927 ---- jemalloc/doc/html.xsl.in | 4 - jemalloc/doc/jemalloc.xml.in | 2280 --------- jemalloc/doc/manpages.xsl.in | 4 - jemalloc/doc/stylesheet.xsl | 7 - jemalloc/include/jemalloc/internal/arena.h | 743 --- jemalloc/include/jemalloc/internal/atomic.h | 169 - jemalloc/include/jemalloc/internal/base.h | 24 - jemalloc/include/jemalloc/internal/bitmap.h | 184 - jemalloc/include/jemalloc/internal/chunk.h | 65 - jemalloc/include/jemalloc/internal/chunk_dss.h | 30 - jemalloc/include/jemalloc/internal/chunk_mmap.h | 23 - jemalloc/include/jemalloc/internal/chunk_swap.h | 34 - jemalloc/include/jemalloc/internal/ckh.h | 95 - jemalloc/include/jemalloc/internal/ctl.h | 118 - jemalloc/include/jemalloc/internal/extent.h | 49 - jemalloc/include/jemalloc/internal/hash.h | 70 - jemalloc/include/jemalloc/internal/huge.h | 41 - .../jemalloc/internal/jemalloc_internal.h.in | 786 ---- jemalloc/include/jemalloc/internal/mb.h | 108 - jemalloc/include/jemalloc/internal/mutex.h | 86 - jemalloc/include/jemalloc/internal/prn.h | 60 - jemalloc/include/jemalloc/internal/prof.h | 561 --- jemalloc/include/jemalloc/internal/ql.h | 83 - jemalloc/include/jemalloc/internal/qr.h | 67 - jemalloc/include/jemalloc/internal/rb.h | 973 ---- jemalloc/include/jemalloc/internal/rtree.h | 161 - jemalloc/include/jemalloc/internal/stats.h | 207 - jemalloc/include/jemalloc/internal/tcache.h | 431 -- jemalloc/include/jemalloc/internal/zone.h | 23 - jemalloc/include/jemalloc/jemalloc.h.in | 66 - jemalloc/include/jemalloc/jemalloc_defs.h.in | 158 - jemalloc/install-sh | 250 - jemalloc/src/arena.c | 2703 ----------- jemalloc/src/atomic.c | 2 - jemalloc/src/base.c | 106 - jemalloc/src/bitmap.c | 90 - jemalloc/src/chunk.c | 171 - jemalloc/src/chunk_dss.c | 284 -- jemalloc/src/chunk_mmap.c | 239 - jemalloc/src/chunk_swap.c | 402 -- jemalloc/src/ckh.c | 619 --- jemalloc/src/ctl.c | 1670 ------- jemalloc/src/extent.c | 41 - jemalloc/src/hash.c | 2 - jemalloc/src/huge.c | 379 -- jemalloc/src/jemalloc.c | 1847 -------- jemalloc/src/mb.c | 2 - jemalloc/src/mutex.c | 90 - jemalloc/src/prof.c | 1243 ----- jemalloc/src/rtree.c | 46 - jemalloc/src/stats.c | 790 ---- jemalloc/src/tcache.c | 480 -- jemalloc/src/zone.c | 354 -- jemalloc/test/allocated.c | 142 - jemalloc/test/allocated.exp | 2 - jemalloc/test/allocm.c | 133 - jemalloc/test/allocm.exp | 25 - jemalloc/test/bitmap.c | 157 - jemalloc/test/bitmap.exp | 2 - jemalloc/test/jemalloc_test.h.in | 6 - jemalloc/test/mremap.c | 67 - jemalloc/test/mremap.exp | 2 - jemalloc/test/posix_memalign.c | 121 - jemalloc/test/posix_memalign.exp | 25 - jemalloc/test/rallocm.c | 117 - jemalloc/test/rallocm.exp | 2 - jemalloc/test/thread_arena.c | 92 - jemalloc/test/thread_arena.exp | 2 - src/arena.c | 2703 +++++++++++ src/atomic.c | 2 + src/base.c | 106 + src/bitmap.c | 90 + src/chunk.c | 171 + src/chunk_dss.c | 284 ++ src/chunk_mmap.c | 239 + src/chunk_swap.c | 402 ++ src/ckh.c | 619 +++ src/ctl.c | 1670 +++++++ src/extent.c | 41 + src/hash.c | 2 + src/huge.c | 379 ++ src/jemalloc.c | 1847 ++++++++ src/mb.c | 2 + src/mutex.c | 90 + src/prof.c | 1243 +++++ src/rtree.c | 46 + src/stats.c | 790 ++++ src/tcache.c | 480 ++ src/zone.c | 354 ++ test/allocated.c | 142 + test/allocated.exp | 2 + test/allocm.c | 133 + test/allocm.exp | 25 + test/bitmap.c | 157 + test/bitmap.exp | 2 + test/jemalloc_test.h.in | 6 + test/mremap.c | 67 + test/mremap.exp | 2 + test/posix_memalign.c | 121 + test/posix_memalign.exp | 25 + test/rallocm.c | 117 + test/rallocm.exp | 2 + test/thread_arena.c | 92 + test/thread_arena.exp | 2 + 158 files changed, 30047 insertions(+), 30047 deletions(-) create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 INSTALL create mode 100644 Makefile.in create mode 100644 README create mode 100755 autogen.sh create mode 100755 bin/pprof create mode 100755 config.guess create mode 100644 config.stamp.in create mode 100755 config.sub create mode 100644 configure.ac create mode 100644 doc/html.xsl.in create mode 100644 doc/jemalloc.xml.in create mode 100644 doc/manpages.xsl.in create mode 100644 doc/stylesheet.xsl create mode 100644 include/jemalloc/internal/arena.h create mode 100644 include/jemalloc/internal/atomic.h create mode 100644 include/jemalloc/internal/base.h create mode 100644 include/jemalloc/internal/bitmap.h create mode 100644 include/jemalloc/internal/chunk.h create mode 100644 include/jemalloc/internal/chunk_dss.h create mode 100644 include/jemalloc/internal/chunk_mmap.h create mode 100644 include/jemalloc/internal/chunk_swap.h create mode 100644 include/jemalloc/internal/ckh.h create mode 100644 include/jemalloc/internal/ctl.h create mode 100644 include/jemalloc/internal/extent.h create mode 100644 include/jemalloc/internal/hash.h create mode 100644 include/jemalloc/internal/huge.h create mode 100644 include/jemalloc/internal/jemalloc_internal.h.in create mode 100644 include/jemalloc/internal/mb.h create mode 100644 include/jemalloc/internal/mutex.h create mode 100644 include/jemalloc/internal/prn.h create mode 100644 include/jemalloc/internal/prof.h create mode 100644 include/jemalloc/internal/ql.h create mode 100644 include/jemalloc/internal/qr.h create mode 100644 include/jemalloc/internal/rb.h create mode 100644 include/jemalloc/internal/rtree.h create mode 100644 include/jemalloc/internal/stats.h create mode 100644 include/jemalloc/internal/tcache.h create mode 100644 include/jemalloc/internal/zone.h create mode 100644 include/jemalloc/jemalloc.h.in create mode 100644 include/jemalloc/jemalloc_defs.h.in create mode 100755 install-sh delete mode 100644 jemalloc/COPYING delete mode 100644 jemalloc/ChangeLog delete mode 100644 jemalloc/INSTALL delete mode 100644 jemalloc/Makefile.in delete mode 100644 jemalloc/README delete mode 100755 jemalloc/autogen.sh delete mode 100755 jemalloc/bin/pprof delete mode 100755 jemalloc/config.guess delete mode 100644 jemalloc/config.stamp.in delete mode 100755 jemalloc/config.sub delete mode 100644 jemalloc/configure.ac delete mode 100644 jemalloc/doc/html.xsl.in delete mode 100644 jemalloc/doc/jemalloc.xml.in delete mode 100644 jemalloc/doc/manpages.xsl.in delete mode 100644 jemalloc/doc/stylesheet.xsl delete mode 100644 jemalloc/include/jemalloc/internal/arena.h delete mode 100644 jemalloc/include/jemalloc/internal/atomic.h delete mode 100644 jemalloc/include/jemalloc/internal/base.h delete mode 100644 jemalloc/include/jemalloc/internal/bitmap.h delete mode 100644 jemalloc/include/jemalloc/internal/chunk.h delete mode 100644 jemalloc/include/jemalloc/internal/chunk_dss.h delete mode 100644 jemalloc/include/jemalloc/internal/chunk_mmap.h delete mode 100644 jemalloc/include/jemalloc/internal/chunk_swap.h delete mode 100644 jemalloc/include/jemalloc/internal/ckh.h delete mode 100644 jemalloc/include/jemalloc/internal/ctl.h delete mode 100644 jemalloc/include/jemalloc/internal/extent.h delete mode 100644 jemalloc/include/jemalloc/internal/hash.h delete mode 100644 jemalloc/include/jemalloc/internal/huge.h delete mode 100644 jemalloc/include/jemalloc/internal/jemalloc_internal.h.in delete mode 100644 jemalloc/include/jemalloc/internal/mb.h delete mode 100644 jemalloc/include/jemalloc/internal/mutex.h delete mode 100644 jemalloc/include/jemalloc/internal/prn.h delete mode 100644 jemalloc/include/jemalloc/internal/prof.h delete mode 100644 jemalloc/include/jemalloc/internal/ql.h delete mode 100644 jemalloc/include/jemalloc/internal/qr.h delete mode 100644 jemalloc/include/jemalloc/internal/rb.h delete mode 100644 jemalloc/include/jemalloc/internal/rtree.h delete mode 100644 jemalloc/include/jemalloc/internal/stats.h delete mode 100644 jemalloc/include/jemalloc/internal/tcache.h delete mode 100644 jemalloc/include/jemalloc/internal/zone.h delete mode 100644 jemalloc/include/jemalloc/jemalloc.h.in delete mode 100644 jemalloc/include/jemalloc/jemalloc_defs.h.in delete mode 100755 jemalloc/install-sh delete mode 100644 jemalloc/src/arena.c delete mode 100644 jemalloc/src/atomic.c delete mode 100644 jemalloc/src/base.c delete mode 100644 jemalloc/src/bitmap.c delete mode 100644 jemalloc/src/chunk.c delete mode 100644 jemalloc/src/chunk_dss.c delete mode 100644 jemalloc/src/chunk_mmap.c delete mode 100644 jemalloc/src/chunk_swap.c delete mode 100644 jemalloc/src/ckh.c delete mode 100644 jemalloc/src/ctl.c delete mode 100644 jemalloc/src/extent.c delete mode 100644 jemalloc/src/hash.c delete mode 100644 jemalloc/src/huge.c delete mode 100644 jemalloc/src/jemalloc.c delete mode 100644 jemalloc/src/mb.c delete mode 100644 jemalloc/src/mutex.c delete mode 100644 jemalloc/src/prof.c delete mode 100644 jemalloc/src/rtree.c delete mode 100644 jemalloc/src/stats.c delete mode 100644 jemalloc/src/tcache.c delete mode 100644 jemalloc/src/zone.c delete mode 100644 jemalloc/test/allocated.c delete mode 100644 jemalloc/test/allocated.exp delete mode 100644 jemalloc/test/allocm.c delete mode 100644 jemalloc/test/allocm.exp delete mode 100644 jemalloc/test/bitmap.c delete mode 100644 jemalloc/test/bitmap.exp delete mode 100644 jemalloc/test/jemalloc_test.h.in delete mode 100644 jemalloc/test/mremap.c delete mode 100644 jemalloc/test/mremap.exp delete mode 100644 jemalloc/test/posix_memalign.c delete mode 100644 jemalloc/test/posix_memalign.exp delete mode 100644 jemalloc/test/rallocm.c delete mode 100644 jemalloc/test/rallocm.exp delete mode 100644 jemalloc/test/thread_arena.c delete mode 100644 jemalloc/test/thread_arena.exp create mode 100644 src/arena.c create mode 100644 src/atomic.c create mode 100644 src/base.c create mode 100644 src/bitmap.c create mode 100644 src/chunk.c create mode 100644 src/chunk_dss.c create mode 100644 src/chunk_mmap.c create mode 100644 src/chunk_swap.c create mode 100644 src/ckh.c create mode 100644 src/ctl.c create mode 100644 src/extent.c create mode 100644 src/hash.c create mode 100644 src/huge.c create mode 100644 src/jemalloc.c create mode 100644 src/mb.c create mode 100644 src/mutex.c create mode 100644 src/prof.c create mode 100644 src/rtree.c create mode 100644 src/stats.c create mode 100644 src/tcache.c create mode 100644 src/zone.c create mode 100644 test/allocated.c create mode 100644 test/allocated.exp create mode 100644 test/allocm.c create mode 100644 test/allocm.exp create mode 100644 test/bitmap.c create mode 100644 test/bitmap.exp create mode 100644 test/jemalloc_test.h.in create mode 100644 test/mremap.c create mode 100644 test/mremap.exp create mode 100644 test/posix_memalign.c create mode 100644 test/posix_memalign.exp create mode 100644 test/rallocm.c create mode 100644 test/rallocm.exp create mode 100644 test/thread_arena.c create mode 100644 test/thread_arena.exp diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..10ade12 --- /dev/null +++ b/COPYING @@ -0,0 +1,51 @@ +Unless otherwise specified, files in the jemalloc source distribution are +subject to the following licenses: +-------------------------------------------------------------------------------- +Copyright (C) 2002-2010 Jason Evans . +All rights reserved. +Copyright (C) 2007-2010 Mozilla Foundation. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice(s), + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice(s), + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- +Copyright (C) 2009-2010 Facebook, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. +* Neither the name of Facebook, Inc. nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..7b262c9 --- /dev/null +++ b/ChangeLog @@ -0,0 +1,213 @@ +Following are change highlights associated with official releases. Important +bug fixes are all mentioned, but internal enhancements are omitted here for +brevity (even though they are more fun to write about). Much more detail can be +found in the git revision history: + + http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git + git://canonware.com/jemalloc.git + +* 2.2.1 (March 30, 2011) + + Bug fixes: + - Implement atomic operations for x86/x64. This fixes compilation failures + for versions of gcc that are still in wide use. + - Fix an assertion in arena_purge(). + +* 2.2.0 (March 22, 2011) + + This version incorporates several improvements to algorithms and data + structures that tend to reduce fragmentation and increase speed. + + New features: + - Add the "stats.cactive" mallctl. + - Update pprof (from google-perftools 1.7). + - Improve backtracing-related configuration logic, and add the + --disable-prof-libgcc option. + + Bug fixes: + - Change default symbol visibility from "internal", to "hidden", which + decreases the overhead of library-internal function calls. + - Fix symbol visibility so that it is also set on OS X. + - Fix a build dependency regression caused by the introduction of the .pic.o + suffix for PIC object files. + - Add missing checks for mutex initialization failures. + - Don't use libgcc-based backtracing except on x64, where it is known to work. + - Fix deadlocks on OS X that were due to memory allocation in + pthread_mutex_lock(). + - Heap profiling-specific fixes: + + Fix memory corruption due to integer overflow in small region index + computation, when using a small enough sample interval that profiling + context pointers are stored in small run headers. + + Fix a bootstrap ordering bug that only occurred with TLS disabled. + + Fix a rallocm() rsize bug. + + Fix error detection bugs for aligned memory allocation. + +* 2.1.3 (March 14, 2011) + + Bug fixes: + - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix + for OS X in 2.1.2). + - Fix a "thread.arena" mallctl bug. + - Fix a thread cache stats merging bug. + +* 2.1.2 (March 2, 2011) + + Bug fixes: + - Fix "thread.{de,}allocatedp" mallctl for OS X. + - Add missing jemalloc.a to build system. + +* 2.1.1 (January 31, 2011) + + Bug fixes: + - Fix aligned huge reallocation (affected allocm()). + - Fix the ALLOCM_LG_ALIGN macro definition. + - Fix a heap dumping deadlock. + - Fix a "thread.arena" mallctl bug. + +* 2.1.0 (December 3, 2010) + + This version incorporates some optimizations that can't quite be considered + bug fixes. + + New features: + - Use Linux's mremap(2) for huge object reallocation when possible. + - Avoid locking in mallctl*() when possible. + - Add the "thread.[de]allocatedp" mallctl's. + - Convert the manual page source from roff to DocBook, and generate both roff + and HTML manuals. + + Bug fixes: + - Fix a crash due to incorrect bootstrap ordering. This only impacted + --enable-debug --enable-dss configurations. + - Fix a minor statistics bug for mallctl("swap.avail", ...). + +* 2.0.1 (October 29, 2010) + + Bug fixes: + - Fix a race condition in heap profiling that could cause undefined behavior + if "opt.prof_accum" were disabled. + - Add missing mutex unlocks for some OOM error paths in the heap profiling + code. + - Fix a compilation error for non-C99 builds. + +* 2.0.0 (October 24, 2010) + + This version focuses on the experimental *allocm() API, and on improved + run-time configuration/introspection. Nonetheless, numerous performance + improvements are also included. + + New features: + - Implement the experimental {,r,s,d}allocm() API, which provides a superset + of the functionality available via malloc(), calloc(), posix_memalign(), + realloc(), malloc_usable_size(), and free(). These functions can be used to + allocate/reallocate aligned zeroed memory, ask for optional extra memory + during reallocation, prevent object movement during reallocation, etc. + - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is + more human-readable, and more flexible. For example: + JEMALLOC_OPTIONS=AJP + is now: + MALLOC_CONF=abort:true,fill:true,stats_print:true + - Port to Apple OS X. Sponsored by Mozilla. + - Make it possible for the application to control thread-->arena mappings via + the "thread.arena" mallctl. + - Add compile-time support for all TLS-related functionality via pthreads TSD. + This is mainly of interest for OS X, which does not support TLS, but has a + TSD implementation with similar performance. + - Override memalign() and valloc() if they are provided by the system. + - Add the "arenas.purge" mallctl, which can be used to synchronously purge all + dirty unused pages. + - Make cumulative heap profiling data optional, so that it is possible to + limit the amount of memory consumed by heap profiling data structures. + - Add per thread allocation counters that can be accessed via the + "thread.allocated" and "thread.deallocated" mallctls. + + Incompatible changes: + - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). + - Increase default backtrace depth from 4 to 128 for heap profiling. + - Disable interval-based profile dumps by default. + + Bug fixes: + - Remove bad assertions in fork handler functions. These assertions could + cause aborts for some combinations of configure settings. + - Fix strerror_r() usage to deal with non-standard semantics in GNU libc. + - Fix leak context reporting. This bug tended to cause the number of contexts + to be underreported (though the reported number of objects and bytes were + correct). + - Fix a realloc() bug for large in-place growing reallocation. This bug could + cause memory corruption, but it was hard to trigger. + - Fix an allocation bug for small allocations that could be triggered if + multiple threads raced to create a new run of backing pages. + - Enhance the heap profiler to trigger samples based on usable size, rather + than request size. + - Fix a heap profiling bug due to sometimes losing track of requested object + size for sampled objects. + +* 1.0.3 (August 12, 2010) + + Bug fixes: + - Fix the libunwind-based implementation of stack backtracing (used for heap + profiling). This bug could cause zero-length backtraces to be reported. + - Add a missing mutex unlock in library initialization code. If multiple + threads raced to initialize malloc, some of them could end up permanently + blocked. + +* 1.0.2 (May 11, 2010) + + Bug fixes: + - Fix junk filling of large objects, which could cause memory corruption. + - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual + memory limits could cause swap file configuration to fail. Contributed by + Jordan DeLong. + +* 1.0.1 (April 14, 2010) + + Bug fixes: + - Fix compilation when --enable-fill is specified. + - Fix threads-related profiling bugs that affected accuracy and caused memory + to be leaked during thread exit. + - Fix dirty page purging race conditions that could cause crashes. + - Fix crash in tcache flushing code during thread destruction. + +* 1.0.0 (April 11, 2010) + + This release focuses on speed and run-time introspection. Numerous + algorithmic improvements make this release substantially faster than its + predecessors. + + New features: + - Implement autoconf-based configuration system. + - Add mallctl*(), for the purposes of introspection and run-time + configuration. + - Make it possible for the application to manually flush a thread's cache, via + the "tcache.flush" mallctl. + - Base maximum dirty page count on proportion of active memory. + - Compute various addtional run-time statistics, including per size class + statistics for large objects. + - Expose malloc_stats_print(), which can be called repeatedly by the + application. + - Simplify the malloc_message() signature to only take one string argument, + and incorporate an opaque data pointer argument for use by the application + in combination with malloc_stats_print(). + - Add support for allocation backed by one or more swap files, and allow the + application to disable over-commit if swap files are in use. + - Implement allocation profiling and leak checking. + + Removed features: + - Remove the dynamic arena rebalancing code, since thread-specific caching + reduces its utility. + + Bug fixes: + - Modify chunk allocation to work when address space layout randomization + (ASLR) is in use. + - Fix thread cleanup bugs related to TLS destruction. + - Handle 0-size allocation requests in posix_memalign(). + - Fix a chunk leak. The leaked chunks were never touched, so this impacted + virtual memory usage, but not physical memory usage. + +* linux_2008082[78]a (August 27/28, 2008) + + These snapshot releases are the simple result of incorporating Linux-specific + support into the FreeBSD malloc sources. + +-------------------------------------------------------------------------------- +vim:filetype=text:textwidth=80 diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..11a457a --- /dev/null +++ b/INSTALL @@ -0,0 +1,251 @@ +Building and installing jemalloc can be as simple as typing the following while +in the root directory of the source tree: + + ./configure + make + make install + +=== Advanced configuration ===================================================== + +The 'configure' script supports numerous options that allow control of which +functionality is enabled, where jemalloc is installed, etc. Optionally, pass +any of the following arguments (not a definitive list) to 'configure': + +--help + Print a definitive list of options. + +--prefix= + Set the base directory in which to install. For example: + + ./configure --prefix=/usr/local + + will cause files to be installed into /usr/local/include, /usr/local/lib, + and /usr/local/man. + +--with-rpath= + Embed one or more library paths, so that libjemalloc can find the libraries + it is linked to. This works only on ELF-based systems. + +--with-jemalloc-prefix= + Prefix all public APIs with . For example, if is + "prefix_", API changes like the following occur: + + malloc() --> prefix_malloc() + malloc_conf --> prefix_malloc_conf + /etc/malloc.conf --> /etc/prefix_malloc.conf + MALLOC_CONF --> PREFIX_MALLOC_CONF + + This makes it possible to use jemalloc at the same time as the system + allocator, or even to use multiple copies of jemalloc simultaneously. + + By default, the prefix is "", except on OS X, where it is "je_". On OS X, + jemalloc overlays the default malloc zone, but makes no attempt to actually + replace the "malloc", "calloc", etc. symbols. + +--with-install-suffix= + Append to the base name of all installed files, such that multiple + versions of jemalloc can coexist in the same installation directory. For + example, libjemalloc.so.0 becomes libjemalloc.so.0. + +--enable-cc-silence + Enable code that silences non-useful compiler warnings. This is helpful + when trying to tell serious warnings from those due to compiler + limitations, but it potentially incurs a performance penalty. + +--enable-debug + Enable assertions and validation code. This incurs a substantial + performance hit, but is very useful during application development. + +--enable-stats + Enable statistics gathering functionality. See the "opt.stats_print" + option documentation for usage details. + +--enable-prof + Enable heap profiling and leak detection functionality. See the "opt.prof" + option documentation for usage details. When enabled, there are several + approaches to backtracing, and the configure script chooses the first one + in the following list that appears to function correctly: + + + libunwind (requires --enable-prof-libunwind) + + libgcc (unless --disable-prof-libgcc) + + gcc intrinsics (unless --disable-prof-gcc) + +--enable-prof-libunwind + Use the libunwind library (http://www.nongnu.org/libunwind/) for stack + backtracing. + +--disable-prof-libgcc + Disable the use of libgcc's backtracing functionality. + +--disable-prof-gcc + Disable the use of gcc intrinsics for backtracing. + +--with-static-libunwind= + Statically link against the specified libunwind.a rather than dynamically + linking with -lunwind. + +--disable-tiny + Disable tiny (sub-quantum-sized) object support. Technically it is not + legal for a malloc implementation to allocate objects with less than + quantum alignment (8 or 16 bytes, depending on architecture), but in + practice it never causes any problems if, for example, 4-byte allocations + are 4-byte-aligned. + +--disable-tcache + Disable thread-specific caches for small objects. Objects are cached and + released in bulk, thus reducing the total number of mutex operations. See + the "opt.tcache" option for usage details. + +--enable-swap + Enable mmap()ed swap file support. When this feature is built in, it is + possible to specify one or more files that act as backing store. This + effectively allows for per application swap files. + +--enable-dss + Enable support for page allocation/deallocation via sbrk(2), in addition to + mmap(2). + +--enable-fill + Enable support for junk/zero filling of memory. See the "opt.junk"/ + "opt.zero" option documentation for usage details. + +--enable-xmalloc + Enable support for optional immediate termination due to out-of-memory + errors, as is commonly implemented by "xmalloc" wrapper function for malloc. + See the "opt.xmalloc" option documentation for usage details. + +--enable-sysv + Enable support for System V semantics, wherein malloc(0) returns NULL + rather than a minimal allocation. See the "opt.sysv" option documentation + for usage details. + +--enable-dynamic-page-shift + Under most conditions, the system page size never changes (usually 4KiB or + 8KiB, depending on architecture and configuration), and unless this option + is enabled, jemalloc assumes that page size can safely be determined during + configuration and hard-coded. Enabling dynamic page size determination has + a measurable impact on performance, since the compiler is forced to load + the page size from memory rather than embedding immediate values. + +--disable-lazy-lock + Disable code that wraps pthread_create() to detect when an application + switches from single-threaded to multi-threaded mode, so that it can avoid + mutex locking/unlocking operations while in single-threaded mode. In + practice, this feature usually has little impact on performance unless + thread-specific caching is disabled. + +--disable-tls + Disable thread-local storage (TLS), which allows for fast access to + thread-local variables via the __thread keyword. If TLS is available, + jemalloc uses it for several purposes. + +--with-xslroot= + Specify where to find DocBook XSL stylesheets when building the + documentation. + +The following environment variables (not a definitive list) impact configure's +behavior: + +CFLAGS="?" + Pass these flags to the compiler. You probably shouldn't define this unless + you know what you are doing. (Use EXTRA_CFLAGS instead.) + +EXTRA_CFLAGS="?" + Append these flags to CFLAGS. This makes it possible to add flags such as + -Werror, while allowing the configure script to determine what other flags + are appropriate for the specified configuration. + + The configure script specifically checks whether an optimization flag (-O*) + is specified in EXTRA_CFLAGS, and refrains from specifying an optimization + level if it finds that one has already been specified. + +CPPFLAGS="?" + Pass these flags to the C preprocessor. Note that CFLAGS is not passed to + 'cpp' when 'configure' is looking for include files, so you must use + CPPFLAGS instead if you need to help 'configure' find header files. + +LD_LIBRARY_PATH="?" + 'ld' uses this colon-separated list to find libraries. + +LDFLAGS="?" + Pass these flags when linking. + +PATH="?" + 'configure' uses this to find programs. + +=== Advanced compilation ======================================================= + +To install only parts of jemalloc, use the following targets: + + install_bin + install_include + install_lib + install_doc + +To clean up build results to varying degrees, use the following make targets: + + clean + distclean + relclean + +=== Advanced installation ====================================================== + +Optionally, define make variables when invoking make, including (not +exclusively): + +INCLUDEDIR="?" + Use this as the installation prefix for header files. + +LIBDIR="?" + Use this as the installation prefix for libraries. + +MANDIR="?" + Use this as the installation prefix for man pages. + +DESTDIR="?" + Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful + when installing to a different path than was specified via --prefix. + +CC="?" + Use this to invoke the C compiler. + +CFLAGS="?" + Pass these flags to the compiler. + +CPPFLAGS="?" + Pass these flags to the C preprocessor. + +LDFLAGS="?" + Pass these flags when linking. + +PATH="?" + Use this to search for programs used during configuration and building. + +=== Development ================================================================ + +If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh' +script rather than 'configure'. This re-generates 'configure', enables +configuration dependency rules, and enables re-generation of automatically +generated source files. + +The build system supports using an object directory separate from the source +tree. For example, you can create an 'obj' directory, and from within that +directory, issue configuration and build commands: + + autoconf + mkdir obj + cd obj + ../configure --enable-autogen + make + +=== Documentation ============================================================== + +The manual page is generated in both html and roff formats. Any web browser +can be used to view the html manual. The roff manual page can be formatted +prior to installation via any of the following commands: + + nroff -man -t doc/jemalloc.3 + + groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf + + (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html) diff --git a/Makefile.in b/Makefile.in new file mode 100644 index 0000000..26da0e2 --- /dev/null +++ b/Makefile.in @@ -0,0 +1,259 @@ +# Clear out all vpaths, then set just one (default vpath) for the main build +# directory. +vpath +vpath % . + +# Clear the default suffixes, so that built-in rules are not used. +.SUFFIXES : + +SHELL := /bin/sh + +CC := @CC@ + +# Configuration parameters. +DESTDIR = +BINDIR := $(DESTDIR)@BINDIR@ +INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@ +LIBDIR := $(DESTDIR)@LIBDIR@ +DATADIR := $(DESTDIR)@DATADIR@ +MANDIR := $(DESTDIR)@MANDIR@ + +# Build parameters. +CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include +CFLAGS := @CFLAGS@ +ifeq (macho, @abi@) +CFLAGS += -dynamic +endif +LDFLAGS := @LDFLAGS@ +LIBS := @LIBS@ +RPATH_EXTRA := @RPATH_EXTRA@ +ifeq (macho, @abi@) +SO := dylib +WL_SONAME := dylib_install_name +else +SO := so +WL_SONAME := soname +endif +REV := 1 +ifeq (macho, @abi@) +TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib +else +TEST_LIBRARY_PATH := +endif + +# Lists of files. +BINS := @srcroot@bin/pprof +CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ + @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h +CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ + @srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \ + @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ + @srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \ + @srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \ + @srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \ + @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c +ifeq (macho, @abi@) +CSRCS += @srcroot@src/zone.c +endif +STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a +DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ + @objroot@lib/libjemalloc@install_suffix@.$(SO) \ + @objroot@lib/libjemalloc@install_suffix@_pic.a +MAN3 := @objroot@doc/jemalloc@install_suffix@.3 +DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml +DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) +DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) +DOCS := $(DOCS_HTML) $(DOCS_MAN3) +CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ + @srcroot@test/bitmap.c @srcroot@test/mremap.c \ + @srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \ + @srcroot@test/thread_arena.c + +.PHONY: all dist doc_html doc_man doc +.PHONY: install_bin install_include install_lib +.PHONY: install_html install_man install_doc install +.PHONY: tests check clean distclean relclean + +.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o) + +# Default target. +all: $(DSOS) $(STATIC_LIBS) + +dist: doc + +@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl + @XSLTPROC@ -o $@ @objroot@doc/html.xsl $< + +@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl + @XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $< + +doc_html: $(DOCS_HTML) +doc_man: $(DOCS_MAN3) +doc: $(DOCS) + +# +# Include generated dependency files. +# +-include $(CSRCS:@srcroot@%.c=@objroot@%.d) +-include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) +-include $(CTESTS:@srcroot@%.c=@objroot@%.d) + +@objroot@src/%.o: @srcroot@src/%.c + @mkdir -p $(@D) + $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< + @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" + +@objroot@src/%.pic.o: @srcroot@src/%.c + @mkdir -p $(@D) + $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< + @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)" + +%.$(SO) : %.$(SO).$(REV) + @mkdir -p $(@D) + ln -sf $( $(@:%.o=%.d)" + +# Automatic dependency generation misses #include "*.c". +@objroot@test/bitmap.o : @objroot@src/bitmap.o + +@objroot@test/%: @objroot@test/%.o \ + @objroot@lib/libjemalloc@install_suffix@.$(SO) + @mkdir -p $(@D) +ifneq (@RPATH@, ) + $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ +else + $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ +endif + +install_bin: + install -d $(BINDIR) + @for b in $(BINS); do \ + echo "install -m 755 $$b $(BINDIR)"; \ + install -m 755 $$b $(BINDIR); \ +done + +install_include: + install -d $(INCLUDEDIR)/jemalloc + @for h in $(CHDRS); do \ + echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ + install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ +done + +install_lib: $(DSOS) $(STATIC_LIBS) + install -d $(LIBDIR) + install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR) + ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO) + install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR) + install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR) + +install_html: + install -d $(DATADIR)/doc/jemalloc@install_suffix@ + @for d in $(DOCS_HTML); do \ + echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \ + install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \ +done + +install_man: + install -d $(MANDIR)/man3 + @for d in $(DOCS_MAN3); do \ + echo "install -m 644 $$d $(MANDIR)/man3"; \ + install -m 644 $$d $(MANDIR)/man3; \ +done + +install_doc: install_html install_man + +install: install_bin install_include install_lib install_doc + +tests: $(CTESTS:@srcroot@%.c=@objroot@%) + +check: tests + @mkdir -p @objroot@test + @$(SHELL) -c 'total=0; \ + failures=0; \ + echo "========================================="; \ + for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \ + total=`expr $$total + 1`; \ + /bin/echo -n "$${t} ... "; \ + $(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \ + > @objroot@$${t}.out 2>&1; \ + if test -e "@srcroot@$${t}.exp"; then \ + diff -u @srcroot@$${t}.exp \ + @objroot@$${t}.out >/dev/null 2>&1; \ + fail=$$?; \ + if test "$${fail}" -eq "1" ; then \ + failures=`expr $${failures} + 1`; \ + echo "*** FAIL ***"; \ + else \ + echo "pass"; \ + fi; \ + else \ + echo "*** FAIL *** (.exp file is missing)"; \ + failures=`expr $${failures} + 1`; \ + fi; \ + done; \ + echo "========================================="; \ + echo "Failures: $${failures}/$${total}"' + +clean: + rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o) + rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o) + rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d) + rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) + rm -f $(CTESTS:@srcroot@%.c=@objroot@%) + rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o) + rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d) + rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out) + rm -f $(DSOS) $(STATIC_LIBS) + +distclean: clean + rm -rf @objroot@autom4te.cache + rm -f @objroot@config.log + rm -f @objroot@config.status + rm -f @objroot@config.stamp + rm -f @cfghdrs_out@ + rm -f @cfgoutputs_out@ + +relclean: distclean + rm -f @objroot@configure + rm -f @srcroot@VERSION + rm -f $(DOCS_HTML) + rm -f $(DOCS_MAN3) + +#=============================================================================== +# Re-configuration rules. + +ifeq (@enable_autogen@, 1) +@srcroot@configure : @srcroot@configure.ac + cd ./@srcroot@ && @AUTOCONF@ + +@objroot@config.status : @srcroot@configure + ./@objroot@config.status --recheck + +@srcroot@config.stamp.in : @srcroot@configure.ac + echo stamp > @srcroot@config.stamp.in + +@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure + ./@objroot@config.status + @touch $@ + +# There must be some action in order for make to re-read Makefile when it is +# out of date. +@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp + @true +endif diff --git a/README b/README new file mode 100644 index 0000000..4d7b552 --- /dev/null +++ b/README @@ -0,0 +1,16 @@ +jemalloc is a general-purpose scalable concurrent malloc(3) implementation. +This distribution is a stand-alone "portable" implementation that currently +targets Linux and Apple OS X. jemalloc is included as the default allocator in +the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox +web browser on Microsoft Windows-related platforms. Depending on your needs, +one of the other divergent versions may suit your needs better than this +distribution. + +The COPYING file contains copyright and licensing information. + +The INSTALL file contains information on how to configure, build, and install +jemalloc. + +The ChangeLog file contains a brief summary of changes for each release. + +URL: http://www.canonware.com/jemalloc/ diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 0000000..75f32da --- /dev/null +++ b/autogen.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +for i in autoconf; do + echo "$i" + $i + if [ $? -ne 0 ]; then + echo "Error $? in $i" + exit 1 + fi +done + +echo "./configure --enable-autogen $@" +./configure --enable-autogen $@ +if [ $? -ne 0 ]; then + echo "Error $? in ./configure" + exit 1 +fi diff --git a/bin/pprof b/bin/pprof new file mode 100755 index 0000000..280ddcc --- /dev/null +++ b/bin/pprof @@ -0,0 +1,4893 @@ +#! /usr/bin/env perl + +# Copyright (c) 1998-2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Program for printing the profile generated by common/profiler.cc, +# or by the heap profiler (common/debugallocation.cc) +# +# The profile contains a sequence of entries of the form: +# +# This program parses the profile, and generates user-readable +# output. +# +# Examples: +# +# % tools/pprof "program" "profile" +# Enters "interactive" mode +# +# % tools/pprof --text "program" "profile" +# Generates one line per procedure +# +# % tools/pprof --gv "program" "profile" +# Generates annotated call-graph and displays via "gv" +# +# % tools/pprof --gv --focus=Mutex "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# +# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# and does not match "string" +# +# % tools/pprof --list=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --list= pattern. The listing is +# annotated with the flat and cumulative sample counts at each line. +# +# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --disasm= pattern. The listing is +# annotated with the flat and cumulative sample counts at each PC value. +# +# TODO: Use color to indicate files? + +use strict; +use warnings; +use Getopt::Long; + +my $PPROF_VERSION = "1.7"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the PPROF_TOOLS +# environment variable, or from the environment. +my %obj_tool_map = ( + "objdump" => "objdump", + "nm" => "nm", + "addr2line" => "addr2line", + "c++filt" => "c++filt", + ## ConfigureObjTools may add architecture-specific entries: + #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables + #"addr2line_pdb" => "addr2line-pdb", # ditto + #"otool" => "otool", # equivalent of objdump on OS X +); +my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local +my $GV = "gv"; +my $EVINCE = "evince"; # could also be xpdf or perhaps acroread +my $KCACHEGRIND = "kcachegrind"; +my $PS2PDF = "ps2pdf"; +# These are used for dynamic profiles +my $URL_FETCHER = "curl -s"; + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param + # ?seconds=#&event=x&period=n +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter +my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile"; # must support "?seconds=#" +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; + +# default binary name +my $UNKNOWN_BINARY = "(unknown)"; + +# There is a pervasive dependency on the length (in hex characters, +# i.e., nibbles) of an address, distinguishing between 32-bit and +# 64-bit profiles. To err on the safe size, default to 64-bit here: +my $address_length = 16; + +# A list of paths to search for shared object files +my @prefix_list = (); + +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + +##### Argument parsing ##### + +sub usage_string { + return < + is a space separated list of profile names. +pprof [options] + is a list of profile files where each file contains + the necessary symbol mappings as well as profile data (likely generated + with --raw). +pprof [options] + is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each name can be: + /path/to/profile - a path to a profile file + host:port[/] - a location of a service to get profile from + + The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, + $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, + $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. + For instance: "pprof http://myserver.com:80$HEAP_PAGE". + If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). +pprof --symbols + Maps addresses to symbol names. In this mode, stdin should be a + list of library mappings, in the same format as is found in the heap- + and cpu-profile files (this loosely matches that of /proc/self/maps + on linux), followed by a list of hex addresses to map, one per line. + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html + +Options: + --cum Sort by cumulative data + --base= Subtract from before display + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds= Length of time for dynamic profiles [default=30 secs] + --add_lib= Read additional symbols and line info from the given library + --lib_prefix= Comma separated list of library path prefixes + +Reporting Granularity: + --addresses Report at address level + --lines Report at source line level + --functions Report at function level [default] + --files Report at source file level + +Output type: + --text Generate text report + --callgrind Generate callgrind format to stdout + --gv Generate Postscript and display + --evince Generate PDF and display + --web Generate SVG and display + --list= Generate source listing of matching routines + --disasm= Generate disassembly of matching routines + --symbols Print demangled symbol names found at given addresses + --dot Generate DOT file to stdout + --ps Generate Postcript to stdout + --pdf Generate PDF to stdout + --svg Generate SVG to stdout + --gif Generate GIF to stdout + --raw Generate symbolized pprof data (useful with remote fetch) + +Heap-Profile Options: + --inuse_space Display in-use (mega)bytes [default] + --inuse_objects Display in-use objects + --alloc_space Display allocated (mega)bytes + --alloc_objects Display allocated objects + --show_bytes Display space in bytes + --drop_negative Ignore negative differences + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + +Call-graph Options: + --nodecount= Show at most so many nodes [default=80] + --nodefraction= Hide nodes below *total [default=.005] + --edgefraction= Hide edges below *total [default=.001] + --maxdegree= Max incoming/outgoing edges per node [default=8] + --focus= Focus on nodes matching + --ignore= Ignore nodes matching + --scale= Set GV scaling [default=0] + --heapcheck Make nodes with non-0 object counts + (i.e. direct leak generators) more visible + +Miscellaneous: + --tools=[,...] \$PATH for object tool pathnames + --test Run unit tests + --help This message + --version Version information + +Environment Variables: + PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof + PPROF_TOOLS Prefix for object tools pathnames + +Examples: + +pprof /bin/ls ls.prof + Enters "interactive" mode +pprof --text /bin/ls ls.prof + Outputs one line per procedure +pprof --web /bin/ls ls.prof + Displays annotated call-graph in web browser +pprof --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +pprof --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +pprof --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +pprof --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() + +pprof http://localhost:1234/ + Enters "interactive" mode +pprof --text localhost:1234 + Outputs one line per procedure for localhost:1234 +pprof --raw localhost:1234 > ./local.raw +pprof --text ./local.raw + Fetches a remote profile for later analysis and then + analyzes it in text mode. +EOF +} + +sub version_string { + return < \$main::opt_help, + "version!" => \$main::opt_version, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "add_lib=s" => \$main::opt_lib, + "lib_prefix=s" => \$main::opt_lib_prefix, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "callgrind!" => \$main::opt_callgrind, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "symbols!" => \$main::opt_symbols, + "gv!" => \$main::opt_gv, + "evince!" => \$main::opt_evince, + "web!" => \$main::opt_web, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "svg!" => \$main::opt_svg, + "gif!" => \$main::opt_gif, + "raw!" => \$main::opt_raw, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "maxdegree=i" => \$main::opt_maxdegree, + "focus=s" => \$main::opt_focus, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "heapcheck" => \$main::opt_heapcheck, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, + ) || usage("Invalid option(s)"); + + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); + } + + if ($main::opt_version) { + print version_string(); + exit(0); + } + + # Disassembly/listing/symbols mode requires address-level info + if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; + } + + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } + + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } + + # Check output modes + my $modes = + $main::opt_text + + $main::opt_callgrind + + ($main::opt_list eq '' ? 0 : 1) + + ($main::opt_disasm eq '' ? 0 : 1) + + ($main::opt_symbols == 0 ? 0 : 1) + + $main::opt_gv + + $main::opt_evince + + $main::opt_web + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_svg + + $main::opt_gif + + $main::opt_raw + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; + } else { + $main::opt_text = 1; + } + } + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } + + if ($main::use_symbol_page || $main::use_symbolized_profile) { + # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } elsif ($main::opt_symbols) { + # --symbols needs a binary-name (to run nm on, etc) but not profiles + $main::prog = shift(@ARGV) || usage("Did not specify program"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { + my $machine = $1; + my $num_machines = $2; + my $path = $3; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine$path"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } elsif (!$main::use_symbolized_profile) { # may not need objtools! + ConfigureObjTools($main::prog) + } + + # Break the opt_list_prefix into the prefix_list array + @prefix_list = split (',', $main::opt_lib_prefix); + + # Remove trailing / from the prefixes, in the list to prevent + # searching things like /my/path//lib/mylib.so + foreach (@prefix_list) { + s|/+$||; + } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Get total data in profile + my $total = TotalProfile($profile); + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total); + } elsif ($main::opt_list) { + PrintListing($libs, $flat, $cumulative, $main::opt_list); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total: %s %s\n", Unparse($total), Units()); + } + PrintText($symbols, $flat, $cumulative, $total, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} + +sub RunGV { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + if (!system("$GV --version >/dev/null 2>&1")) { + # Options using double dash are supported by this gv version. + # Also, turn on noantialias to better handle bug in gv for + # postscript files with large dimensions. + # TODO: Maybe we should not pass the --noantialias flag + # if the gv version is known to work properly without the flag. + system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg); + } else { + # Old gv version - only supports options that use single dash. + print STDERR "$GV -scale $main::opt_scale\n"; + system("$GV -scale $main::opt_scale " . $fname . $bg); + } +} + +sub RunEvince { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + system("$EVINCE " . $fname . $bg); +} + +sub RunWeb { + my $fname = shift; + print STDERR "Loading web page file:///$fname\n"; + + if (`uname` =~ /Darwin/) { + # OS X: open will use standard preference for SVG files. + system("/usr/bin/open", $fname); + return; + } + + # Some kind of Unix; try generic symlinks, then specific browsers. + # (Stop once we find one.) + # Works best if the browser is already running. + my @alt = ( + "/etc/alternatives/gnome-www-browser", + "/etc/alternatives/x-www-browser", + "google-chrome", + "firefox", + ); + foreach my $b (@alt) { + if (system($b, $fname) == 0) { + return; + } + } + + print STDERR "Could not load web browser.\n"; +} + +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n"; + system("$KCACHEGRIND " . $fname . $bg); +} + + +##### Interactive helper routines ##### + +sub InteractiveMode { + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; + + print STDERR "Welcome to pprof! For help, type 'help'.\n"; + + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { + my $term = new Term::ReadLine 'pprof'; + while ( defined ($_ = $term->readline('(pprof) '))) { + $term->addhistory($_) if /\S/; + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + } + } else { # don't have readline + while (1) { + print STDERR "(pprof) "; + $_ = ; + last if ! defined $_ ; + s/\r//g; # turn windows-looking lines into unix-looking lines + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + + # Restore flags + $main::opt_lines = $save_opt_lines; + } + } +} + +# Takes two args: orig profile, and command to run. +# Returns 1 if we should keep going, or 0 if we were asked to quit +sub InteractiveCommand { + my($orig_profile, $symbols, $libs, $total, $command) = @_; + $_ = $command; # just to make future m//'s easier + if (!defined($_)) { + print STDERR "\n"; + return 0; + } + if (m/^\s*quit/) { + return 0; + } + if (m/^\s*help/) { + InteractiveHelpMessage(); + return 1; + } + # Clear all the mode options -- mode is controlled by "$command" + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_disasm = 0; + $main::opt_list = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_cum = 0; + + if (m/^\s*(text|top)(\d*)\s*(.*)/) { + $main::opt_text = 1; + + my $line_limit = ($2 ne "") ? int($2) : 10; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($3); + + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintText($symbols, $flat, $cumulative, $total, $line_limit); + return 1; + } + if (m/^\s*callgrind\s*([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = TempName($main::next_tmpfile, "callgrind"); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } + if (m/^\s*list\s*(.+)/) { + $main::opt_list = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintListing($libs, $flat, $cumulative, $routine); + return 1; + } + if (m/^\s*disasm\s*(.+)/) { + $main::opt_disasm = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintDisassembly($libs, $flat, $cumulative, $routine, $total); + return 1; + } + if (m/^\s*(gv|web|evince)\s*(.*)/) { + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + if ($1 eq "gv") { + $main::opt_gv = 1; + } elsif ($1 eq "evince") { + $main::opt_evince = 1; + } elsif ($1 eq "web") { + $main::opt_web = 1; + } + + my $focus; + my $ignore; + ($focus, $ignore) = ParseInteractiveArgs($2); + + # Process current profile to account for various settings + my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); + } elsif ($main::opt_web) { + RunWeb(TempName($main::next_tmpfile, "svg")); + } + $main::next_tmpfile++; + } + return 1; + } + if (m/^\s*$/) { + return 1; + } + print STDERR "Unknown command: try 'help'.\n"; + return 1; +} + + +sub ProcessProfile { + my $orig_profile = shift; + my $symbols = shift; + my $focus = shift; + my $ignore = shift; + + # Process current profile to account for various settings + my $profile = $orig_profile; + my $total_count = TotalProfile($profile); + printf("Total: %s %s\n", Unparse($total_count), Units()); + if ($focus ne '') { + $profile = FocusProfile($symbols, $profile, $focus); + my $focus_count = TotalProfile($profile); + printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); + } + if ($ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $ignore); + my $ignore_count = TotalProfile($profile); + printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); + } + + return $profile; +} + +sub InteractiveHelpMessage { + print STDERR <{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + my $depth = $#addrs + 1; + # int(foo / 2**32) is the only reliable way to get rid of bottom + # 32 bits on both 32- and 64-bit systems. + print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); + print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); + + foreach my $full_addr (@addrs) { + my $addr = $full_addr; + $addr =~ s/0x0*//; # strip off leading 0x, zeroes + if (length($addr) > 16) { + print STDERR "Invalid address in profile: $full_addr\n"; + next; + } + my $low_addr = substr($addr, -8); # get last 8 hex chars + my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars + print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); + } + } + } +} + +# Print symbols and profile data +sub PrintSymbolizedProfile { + my $symbols = shift; + my $profile = shift; + my $prog = shift; + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + + print '--- ', $symbol_marker, "\n"; + if (defined($prog)) { + print 'binary=', $prog, "\n"; + } + while (my ($pc, $name) = each(%{$symbols})) { + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; + } + print '---', "\n"; + + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + print '--- ', $profile_marker, "\n"; + if (defined($main::collected_profile)) { + # if used with remote fetch, simply dump the collected profile to output. + open(SRC, "<$main::collected_profile"); + while () { + print $_; + } + close(SRC); + } else { + # dump a cpu-format profile to standard out + PrintProfileData($profile); + } +} + +# Print text output +sub PrintText { + my $symbols = shift; + my $flat = shift; + my $cumulative = shift; + my $total = shift; + my $line_limit = shift; + + # Which profile to sort by? + my $s = $main::opt_cum ? $cumulative : $flat; + + my $running_sum = 0; + my $lines = 0; + foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } + keys(%{$cumulative})) { + my $f = GetEntry($flat, $k); + my $c = GetEntry($cumulative, $k); + $running_sum += $f; + + my $sym = $k; + if (exists($symbols->{$k})) { + $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; + if ($main::opt_addresses) { + $sym = $k . " " . $sym; + } + } + + if ($f != 0 || $c != 0) { + printf("%8s %6s %6s %8s %6s %s\n", + Unparse($f), + Percent($f, $total), + Percent($running_sum, $total), + Unparse($c), + Percent($c, $total), + $sym); + } + $lines++; + last if ($line_limit >= 0 && $lines > $line_limit); + } +} + +# Print the call graph in a way that's suiteable for callgrind. +sub PrintCallgrind { + my $calls = shift; + my $filename; + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">".$filename ); + printf CG ("events: Hits\n\n"); + foreach my $call ( map { $_->[0] } + sort { $a->[1] cmp $b ->[1] || + $a->[2] <=> $b->[2] } + map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + [$_, $1, $2] } + keys %$calls ) { + my $count = int($calls->{$call}); + $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + my ( $caller_file, $caller_line, $caller_function, + $callee_file, $callee_line, $callee_function ) = + ( $1, $2, $3, $5, $6, $7 ); + + + printf CG ("fl=$caller_file\nfn=$caller_function\n"); + if (defined $6) { + printf CG ("cfl=$callee_file\n"); + printf CG ("cfn=$callee_function\n"); + printf CG ("calls=$count $callee_line\n"); + } + printf CG ("$caller_line $count\n\n"); + } +} + +# Print disassembly for all all routines that match $main::opt_disasm +sub PrintDisassembly { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $disasm_opts = shift; + my $total = shift; + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + # See if there are any samples in this routine + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Return reference to array of tuples of the form: +# [start_address, filename, linenumber, instruction, limit_address] +# E.g., +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] +sub Disassemble { + my $prog = shift; + my $offset = shift; + my $start_addr = shift; + my $end_addr = shift; + + my $objdump = $obj_tool_map{"objdump"}; + my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " . + "--start-address=0x$start_addr " . + "--stop-address=0x$end_addr $prog"); + open(OBJDUMP, "$cmd |") || error("$objdump: $!\n"); + my @result = (); + my $filename = ""; + my $linenumber = -1; + my $last = ["", "", "", ""]; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + chop; + if (m|\s*([^:\s]+):(\d+)\s*$|) { + # Location line of the form: + # : + $filename = $1; + $linenumber = $2; + } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { + # Disassembly line -- zero-extend address to full length + my $addr = HexExtend($1); + my $k = AddressAdd($addr, $offset); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); + } + } + close(OBJDUMP); + return @result; +} + +# The input file should contain lines of the form /proc/maps-like +# output (same format as expected from the profiles) or that looks +# like hex addresses (like "0xDEADBEEF"). We will parse all +# /proc/maps output, and for all the hex addresses, we will output +# "short" symbol names, one per line, in the same order as the input. +sub PrintSymbols { + my $maps_and_symbols_file = shift; + + # ParseLibraries expects pcs to be in a set. Fine by us... + my @pclist = (); # pcs in sorted order + my $pcs = {}; + my $map = ""; + foreach my $line (<$maps_and_symbols_file>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /\b(0x[0-9a-f]+)\b/i) { + push(@pclist, HexExtend($1)); + $pcs->{$pclist[-1]} = 1; + } else { + $map .= $line; + } + } + + my $libs = ParseLibraries($main::prog, $map, $pcs); + my $symbols = ExtractSymbols($libs, $pcs); + + foreach my $pc (@pclist) { + # ->[0] is the shortname, ->[2] is the full name + print(($symbols->{$pc}->[0] || "??") . "\n"); + } +} + + +# For sorting functions by name +sub ByName { + return ShortFunctionName($a) cmp ShortFunctionName($b); +} + +# Print source-listing for all all routines that match $main::opt_list +sub PrintListing { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $list_opts = shift; + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + # Print if there are any samples in this routine + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintSource($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Returns the indentation of the line, if it has any non-whitespace +# characters. Otherwise, returns -1. +sub Indentation { + my $line = shift; + if (m/^(\s*)\S/) { + return length($1); + } else { + return -1; + } +} + +# Print source-listing for one routine +sub PrintSource { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + + # Disassemble all instructions (just to get line numbers) + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Hack 1: assume that the first source file encountered in the + # disassembly contains the routine + my $filename = undef; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[2] >= 0) { + $filename = $instructions[$i]->[1]; + last; + } + } + if (!defined($filename)) { + print STDERR "no filename found in $routine\n"; + return; + } + + # Hack 2: assume that the largest line number from $filename is the + # end of the procedure. This is typically safe since if P1 contains + # an inlined call to P2, then P2 usually occurs earlier in the + # source file. If this does not work, we might have to compute a + # density profile or just print all regions we find. + my $lastline = 0; + for (my $i = 0; $i <= $#instructions; $i++) { + my $f = $instructions[$i]->[1]; + my $l = $instructions[$i]->[2]; + if (($f eq $filename) && ($l > $lastline)) { + $lastline = $l; + } + } + + # Hack 3: assume the first source location from "filename" is the start of + # the source code. + my $firstline = 1; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[1] eq $filename) { + $firstline = $instructions[$i]->[2]; + last; + } + } + + # Hack 4: Extend last line forward until its indentation is less than + # the indentation we saw on $firstline + my $oldlastline = $lastline; + { + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return; + } + my $l = 0; + my $first_indentation = -1; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + my $indent = Indentation($_); + if ($l >= $firstline) { + if ($first_indentation < 0 && $indent >= 0) { + $first_indentation = $indent; + last if ($first_indentation == 0); + } + } + if ($l >= $lastline && $indent >= 0) { + if ($indent >= $first_indentation) { + $lastline = $l+1; + } else { + last; + } + } + } + close(FILE); + } + + # Assign all samples to the range $firstline,$lastline, + # Hack 4: If an instruction does not occur in the range, its samples + # are moved to the next instruction that occurs in the range. + my $samples1 = {}; + my $samples2 = {}; + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + $running1 += $c1; + $running2 += $c2; + $total1 += $c1; + $total2 += $c2; + my $file = $e->[1]; + my $line = $e->[2]; + if (($file eq $filename) && + ($line >= $firstline) && + ($line <= $lastline)) { + # Assign all accumulated samples to this line + AddEntry($samples1, $line, $running1); + AddEntry($samples2, $line, $running2); + $running1 = 0; + $running2 = 0; + } + } + + # Assign any leftover samples to $lastline + AddEntry($samples1, $lastline, $running1); + AddEntry($samples2, $lastline, $running2); + + printf("ROUTINE ====================== %s in %s\n" . + "%6s %6s Total %s (flat / cumulative)\n", + ShortFunctionName($routine), + $filename, + Units(), + Unparse($total1), + Unparse($total2)); + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return; + } + my $l = 0; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + if ($l >= $firstline - 5 && + (($l <= $oldlastline + 5) || ($l <= $lastline))) { + chop; + my $text = $_; + if ($l == $firstline) { printf("---\n"); } + printf("%6s %6s %4d: %s\n", + UnparseAlt(GetEntry($samples1, $l)), + UnparseAlt(GetEntry($samples2, $l)), + $l, + $text); + if ($l == $lastline) { printf("---\n"); } + }; + } + close(FILE); +} + +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while () { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + my $address = $e->[0]; + $address = AddressSub($address, $offset); # Make relative to section + $address =~ s/^0x//; + $address =~ s/^0*//; + + # Trim symbols + my $d = $e->[3]; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + $address, + $d); + } + } +} + +# Print DOT graph +sub PrintDot { + my $prog = shift; + my $symbols = shift; + my $raw = shift; + my $flat = shift; + my $cumulative = shift; + my $overall_total = shift; + + # Get total + my $local_total = TotalProfile($flat); + my $nodelimit = int($main::opt_nodefraction * $local_total); + my $edgelimit = int($main::opt_edgefraction * $local_total); + my $nodecount = $main::opt_nodecount; + + # Find nodes to include + my @list = (sort { abs(GetEntry($cumulative, $b)) <=> + abs(GetEntry($cumulative, $a)) + || $a cmp $b } + keys(%{$cumulative})); + my $last = $nodecount - 1; + if ($last > $#list) { + $last = $#list; + } + while (($last >= 0) && + (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { + $last--; + } + if ($last < 0) { + print STDERR "No nodes to print\n"; + return 0; + } + + if ($nodelimit > 0 || $edgelimit > 0) { + printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); + } + + # Open DOT output file + my $output; + if ($main::opt_gv) { + $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); + } elsif ($main::opt_evince) { + $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf"); + } elsif ($main::opt_ps) { + $output = "| $DOT -Tps2"; + } elsif ($main::opt_pdf) { + $output = "| $DOT -Tps2 | $PS2PDF - -"; + } elsif ($main::opt_web || $main::opt_svg) { + # We need to post-process the SVG, so write to a temporary file always. + $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg"); + } elsif ($main::opt_gif) { + $output = "| $DOT -Tgif"; + } else { + $output = ">&STDOUT"; + } + open(DOT, $output) || error("$output: $!\n"); + + # Title + printf DOT ("digraph \"%s; %s %s\" {\n", + $prog, + Unparse($overall_total), + Units()); + if ($main::opt_pdf) { + # The output is more printable if we set the page size for dot. + printf DOT ("size=\"8,11\"\n"); + } + printf DOT ("node [width=0.375,height=0.25];\n"); + + # Print legend + printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . + "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", + $prog, + sprintf("Total %s: %s", Units(), Unparse($overall_total)), + sprintf("Focusing on: %s", Unparse($local_total)), + sprintf("Dropped nodes with <= %s abs(%s)", + Unparse($nodelimit), Units()), + sprintf("Dropped edges with <= %s %s", + Unparse($edgelimit), Units()) + ); + + # Print nodes + my %node = (); + my $nextnode = 1; + foreach my $a (@list[0..$last]) { + # Pick font size + my $f = GetEntry($flat, $a); + my $c = GetEntry($cumulative, $a); + + my $fs = 8; + if ($local_total > 0) { + $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); + } + + $node{$a} = $nextnode++; + my $sym = $a; + $sym =~ s/\s+/\\n/g; + $sym =~ s/::/\\n/g; + + # Extra cumulative info to print for non-leaves + my $extra = ""; + if ($f != $c) { + $extra = sprintf("\\rof %s (%s)", + Unparse($c), + Percent($c, $overall_total)); + } + my $style = ""; + if ($main::opt_heapcheck) { + if ($f > 0) { + # make leak-causing nodes more visible (add a background) + $style = ",style=filled,fillcolor=gray" + } elsif ($f < 0) { + # make anti-leak-causing nodes (which almost never occur) + # stand out as well (triple border) + $style = ",peripheries=3" + } + } + + printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . + "\",shape=box,fontsize=%.1f%s];\n", + $node{$a}, + $sym, + Unparse($f), + Percent($f, $overall_total), + $extra, + $fs, + $style, + ); + } + + # Get edges and counts per edge + my %edge = (); + my $n; + foreach my $k (keys(%{$raw})) { + # TODO: omit low %age edges + $n = $raw->{$k}; + my @translated = TranslateStack($symbols, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; + #next if ($src eq $dst); # Avoid self-edges? + if (exists($node{$src}) && exists($node{$dst})) { + my $edge_label = "$src\001$dst"; + if (!exists($edge{$edge_label})) { + $edge{$edge_label} = 0; + } + $edge{$edge_label} += $n; + } + } + } + + # Print edges (process in order of decreasing counts) + my %indegree = (); # Number of incoming edges added per node so far + my %outdegree = (); # Number of outgoing edges added per node so far + foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { + my @x = split(/\001/, $e); + $n = $edge{$e}; + + # Initialize degree of kept incoming and outgoing edges if necessary + my $src = $x[0]; + my $dst = $x[1]; + if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } + if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } + + my $keep; + if ($indegree{$dst} == 0) { + # Keep edge if needed for reachability + $keep = 1; + } elsif (abs($n) <= $edgelimit) { + # Drop if we are below --edgefraction + $keep = 0; + } elsif ($outdegree{$src} >= $main::opt_maxdegree || + $indegree{$dst} >= $main::opt_maxdegree) { + # Keep limited number of in/out edges per node + $keep = 0; + } else { + $keep = 1; + } + + if ($keep) { + $outdegree{$src}++; + $indegree{$dst}++; + + # Compute line width based on edge count + my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); + if ($fraction > 1) { $fraction = 1; } + my $w = $fraction * 2; + if ($w < 1 && ($main::opt_web || $main::opt_svg)) { + # SVG output treats line widths < 1 poorly. + $w = 1; + } + + # Dot sometimes segfaults if given edge weights that are too large, so + # we cap the weights at a large value + my $edgeweight = abs($n) ** 0.7; + if ($edgeweight > 100000) { $edgeweight = 100000; } + $edgeweight = int($edgeweight); + + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + + # Use a slightly squashed function of the edge count as the weight + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", + $node{$x[0]}, + $node{$x[1]}, + Unparse($n), + $edgeweight, + $style); + } + } + + print DOT ("}\n"); + close(DOT); + + if ($main::opt_web || $main::opt_svg) { + # Rewrite SVG to be more usable inside web browser. + RewriteSvg(TempName($main::next_tmpfile, "svg")); + } + + return 1; +} + +sub RewriteSvg { + my $svgfile = shift; + + open(SVG, $svgfile) || die "open temp svg: $!"; + my @svg = ; + close(SVG); + unlink $svgfile; + my $svg = join('', @svg); + + # Dot's SVG output is + # + # + # + # ... + # + # + # + # Change it to + # + # + # $svg_javascript + # + # + # ... + # + # + # + + # Fix width, height; drop viewBox. + $svg =~ s/(?s) above first + my $svg_javascript = SvgJavascript(); + my $viewport = "\n"; + $svg =~ s/ above . + $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; + $svg =~ s/$svgfile") || die "open $svgfile: $!"; + print SVG $svg; + close(SVG); + } +} + +sub SvgJavascript { + return <<'EOF'; + +EOF +} + +# Return a small number that identifies the argument. +# Multiple calls with the same argument will return the same number. +# Calls with different arguments will return different numbers. +sub ShortIdFor { + my $key = shift; + my $id = $main::uniqueid{$key}; + if (!defined($id)) { + $id = keys(%main::uniqueid) + 1; + $main::uniqueid{$key} = $id; + } + return $id; +} + +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { + my $symbols = shift; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if ($j > 2) { + $func = "$func (inline)"; + } + + # Do not merge nodes corresponding to Callback::Run since that + # causes confusing cycles in dot display. Instead, we synthesize + # a unique name for this frame per caller. + if ($func =~ m/Callback.*::Run$/) { + my $caller = ($i > 0) ? $addrs[$i-1] : 0; + $func = "Run#" . ShortIdFor($caller); + } + + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } else { + push(@result, "$func $fileline"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } else { + push(@result, $func); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } + } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; +} + +# Generate percent string for a number and a total +sub Percent { + my $num = shift; + my $tot = shift; + if ($tot != 0) { + return sprintf("%.1f%%", $num * 100.0 / $tot); + } else { + return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); + } +} + +# Generate pretty-printed form of number +sub Unparse { + my $num = shift; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return sprintf("%d", $num); + } else { + if ($main::opt_show_bytes) { + return sprintf("%d", $num); + } else { + return sprintf("%.1f", $num / 1048576.0); + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + } else { + return sprintf("%d", $num); + } +} + +# Alternate pretty-printed form: 0 maps to "." +sub UnparseAlt { + my $num = shift; + if ($num == 0) { + return "."; + } else { + return Unparse($num); + } +} + +# Return output units +sub Units { + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return "objects"; + } else { + if ($main::opt_show_bytes) { + return "B"; + } else { + return "MB"; + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return "seconds"; + } else { + return "samples"; + } +} + +##### Profile manipulation code ##### + +# Generate flattened profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a] +sub FlatProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } + } + return $result; +} + +# Generate cumulative profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a], [b], [c], [d] +sub CumulativeProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + AddEntry($result, $a, $count); + } + } + return $result; +} + +# If the second-youngest PC on the stack is always the same, returns +# that pc. Otherwise, returns undef. +sub IsSecondPcAlwaysTheSame { + my $profile = shift; + + my $second_pc = undef; + foreach my $k (keys(%{$profile})) { + my @addrs = split(/\n/, $k); + if ($#addrs < 1) { + return undef; + } + if (not defined $second_pc) { + $second_pc = $addrs[1]; + } else { + if ($second_pc ne $addrs[1]) { + return undef; + } + } + } + return $second_pc; +} + +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + +# Extracts a graph of calls. +sub ExtractCalls { + my $symbols = shift; + my $profile = shift; + + my $calls = {}; + while( my ($stack_trace, $count) = each %$profile ) { + my @address = split(/\n/, $stack_trace); + my $destination = ExtractSymbolLocation($symbols, $address[0]); + AddEntry($calls, $destination, $count); + for (my $i = 1; $i <= $#address; $i++) { + my $source = ExtractSymbolLocation($symbols, $address[$i]); + my $call = "$source -> $destination"; + AddEntry($calls, $call, $count); + $destination = $source; + } + } + + return $calls; +} + +sub RemoveUninterestingFrames { + my $symbols = shift; + my $profile = shift; + + # List of function names to skip + my %skip = (); + my $skip_regexp = 'NOMATCH'; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + foreach my $name ('calloc', + 'cfree', + 'malloc', + 'free', + 'memalign', + 'posix_memalign', + 'pvalloc', + 'valloc', + 'realloc', + 'tc_calloc', + 'tc_cfree', + 'tc_malloc', + 'tc_free', + 'tc_memalign', + 'tc_posix_memalign', + 'tc_pvalloc', + 'tc_valloc', + 'tc_realloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', + '::do_malloc', # new name -- got moved to an unnamed ns + '::do_malloc_or_cpp_alloc', + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new', + 'operator new', + 'operator new[]', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { + $skip{$name} = 1; + $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything + } + # TODO: Remove TCMalloc once everything has been + # moved into the tcmalloc:: namespace and we have flushed + # old code out of the system. + $skip_regexp = "TCMalloc|^tcmalloc::"; + } elsif ($main::profile_type eq 'contention') { + foreach my $vname ('base::RecordLockProfileData', + 'base::SubmitMutexProfileData', + 'base::SubmitSpinLockProfileData', + 'Mutex::Unlock', + 'Mutex::UnlockSlow', + 'Mutex::ReaderUnlock', + 'MutexLock::~MutexLock', + 'SpinLock::Unlock', + 'SpinLock::SlowUnlock', + 'SpinLockHolder::~SpinLockHolder') { + $skip{$vname} = 1; + } + } elsif ($main::profile_type eq 'cpu') { + # Drop signal handlers used for CPU profile collection + # TODO(dpeng): this should not be necessary; it's taken + # care of by the general 2nd-pc mechanism below. + foreach my $name ('ProfileData::Add', # historical + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', + '__FRAME_END__', + '__pthread_sighandler', + '__restore') { + $skip{$name} = 1; + } + } else { + # Nothing skipped for unknown types + } + + if ($main::profile_type eq 'cpu') { + # If all the second-youngest program counters are the same, + # this STRONGLY suggests that it is an artifact of measurement, + # i.e., stack frames pushed by the CPU profiler signal handler. + # Hence, we delete them. + # (The topmost PC is read from the signal structure, not from + # the stack, so it does not get involved.) + while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { + my $result = {}; + my $func = ''; + if (exists($symbols->{$second_pc})) { + $second_pc = $symbols->{$second_pc}->[0]; + } + print STDERR "Removing $second_pc from all stack traces.\n"; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + splice @addrs, 1, 1; + my $reduced_path = join("\n", @addrs); + AddEntry($result, $reduced_path, $count); + } + $profile = $result; + } + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + if (exists($symbols->{$a})) { + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + next; + } + } + push(@path, $a); + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Reduce profile to granularity given by user +sub ReduceProfile { + my $symbols = shift; + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @translated = TranslateStack($symbols, $k); + my @path = (); + my %seen = (); + $seen{''} = 1; # So that empty keys are skipped + foreach my $e (@translated) { + # To avoid double-counting due to recursion, skip a stack-trace + # entry if it has already been seen + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); + } + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + +# Focus only on paths involving specified regexps +sub FocusProfile { + my $symbols = shift; + my $profile = shift; + my $focus = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { + AddEntry($result, $k, $count); + last; + } + } + } + return $result; +} + +# Focus only on paths not involving specified regexps +sub IgnoreProfile { + my $symbols = shift; + my $profile = shift; + my $ignore = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $matched = 0; + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { + $matched = 1; + last; + } + } + if (!$matched) { + AddEntry($result, $k, $count); + } + } + return $result; +} + +# Get total count in profile +sub TotalProfile { + my $profile = shift; + my $result = 0; + foreach my $k (keys(%{$profile})) { + $result += $profile->{$k}; + } + return $result; +} + +# Add A to B +sub AddProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + my $v = $A->{$k}; + AddEntry($R, $k, $v); + } + # add all keys in B + foreach my $k (keys(%{$B})) { + my $v = $B->{$k}; + AddEntry($R, $k, $v); + } + return $R; +} + +# Merges symbol maps +sub MergeSymbols { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + $R->{$k} = $A->{$k}; + } + if (defined($B)) { + foreach my $k (keys(%{$B})) { + $R->{$k} = $B->{$k}; + } + } + return $R; +} + + +# Add A to B +sub AddPcs { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + $R->{$k} = 1 + } + # add all keys in B + foreach my $k (keys(%{$B})) { + $R->{$k} = 1 + } + return $R; +} + +# Subtract B from A +sub SubtractProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + my $v = $A->{$k} - GetEntry($B, $k); + if ($v < 0 && $main::opt_drop_negative) { + $v = 0; + } + AddEntry($R, $k, $v); + } + if (!$main::opt_drop_negative) { + # Take care of when subtracted profile has more entries + foreach my $k (keys(%{$B})) { + if (!exists($A->{$k})) { + AddEntry($R, $k, 0 - $B->{$k}); + } + } + } + return $R; +} + +# Get entry from profile; zero if not present +sub GetEntry { + my $profile = shift; + my $k = shift; + if (exists($profile->{$k})) { + return $profile->{$k}; + } else { + return 0; + } +} + +# Add entry to specified profile +sub AddEntry { + my $profile = shift; + my $k = shift; + my $n = shift; + if (!exists($profile->{$k})) { + $profile->{$k} = 0; + } + $profile->{$k} += $n; +} + +# Add a stack of entries to specified profile, and add them to the $pcs +# list. +sub AddEntries { + my $profile = shift; + my $pcs = shift; + my $stack = shift; + my $count = shift; + my @k = (); + + foreach my $e (split(/\s+/, $stack)) { + my $pc = HexExtend($e); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $count); +} + +##### Code to profile a server dynamically ##### + +sub CheckSymbolPage { + my $url = SymbolPageURL(); + open(SYMBOL, "$URL_FETCHER '$url' |"); + my $line = ; + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; +} + +sub ParseProfileURL { + my $profile_name = shift; + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; + my $command_line = "$URL_FETCHER '$url'"; + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = ; + $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = "$URL_FETCHER --head '$url'"; + open(CMDLINE, "$command_line |") or error($command_line); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Add a timeout flat to URL_FETCHER +sub AddFetchTimeout { + my $fetcher = shift; + my $timeout = shift; + if (defined($timeout)) { + if ($fetcher =~ m/\bcurl -s/) { + $fetcher .= sprintf(" --max-time %d", $timeout); + } elsif ($fetcher =~ m/\brpcget\b/) { + $fetcher .= sprintf(" --deadline=%d", $timeout); + } + } + return $fetcher; +} + +# Reads a symbol map from the file handle name given as $1, returning +# the resulting symbol map. Also processes variables relating to symbols. +# Currently, the only variable processed is 'binary=' which updates +# $main::prog to have the correct program name. +sub ReadSymbols { + my $in = shift; + my $map = {}; + while (<$in>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Removes all the leading zeroes from the symbols, see comment below. + if (m/^0x0*([0-9a-f]+)\s+(.+)/) { + $map->{$1} = $2; + } elsif (m/^---/) { + last; + } elsif (m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1, $2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "binary") { + if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { + printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", + $main::prog, $value); + } + $main::prog = $value; + } else { + printf STDERR ("Ignoring unknown variable in symbols list: " . + "'%s' = '%s'\n", $variable, $value); + } + } + } + return $map; +} + +# Fetches and processes symbols to prepare them for use in the profile output +# code. If the optional 'symbol_map' arg is not given, fetches symbols from +# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols +# are assumed to have already been fetched into 'symbol_map' and are simply +# extracted and processed. +sub FetchSymbols { + my $pcset = shift; + my $symbol_map = shift; + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + + if (!defined($symbol_map)) { + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + + my $command_line; + if ($URL_FETCHER =~ m/\bcurl -s/) { + $url = ResolveRedirectionForCurl($url); + $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'"; + } else { + $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'"; + } + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $cppfilt = $obj_tool_map{"c++filt"}; + open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); + $symbol_map = ReadSymbols(*SYMBOL{IO}); + close(SYMBOL); + } + + my $symbols = {}; + foreach my $pc (@pcs) { + my $fullname; + # For 64 bits binaries, symbols are extracted with 8 leading zeroes. + # Then /symbol reads the long symbols in as uint64, and outputs + # the result with a "0x%08llx" format which get rid of the zeroes. + # By removing all the leading zeroes in both $pc and the symbols from + # /symbol, the symbols match and are retrievable from the map. + my $shortpc = $pc; + $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated + # (in PrintSymbolizedFile), by --, which is illegal in function names. + my $fullnames; + if (defined($symbol_map->{$shortpc})) { + $fullnames = $symbol_map->{$shortpc}; + } else { + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); + } + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); +} + +sub FetchDynamicProfile { + my $binary_name = shift; + my $profile_name = shift; + my $fetch_name_only = shift; + my $encourage_patience = shift; + + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + if ($path eq "" || $path eq "/") { + # Missing type specifier defaults to cpu-profile + $path = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url = "$baseURL$path"; + my $fetch_timeout = undef; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; + } else { + $url .= "?"; + } + $url .= sprintf("seconds=%d", $main::opt_seconds); + $fetch_timeout = $main::opt_seconds * 1.01 + 60; + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $path; + $suffix =~ s,/,.,g; + $profile_file .= $suffix; + } + + my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); + if (! -d $profile_dir) { + mkdir($profile_dir) + || die("Unable to create profile directory $profile_dir: $!\n"); + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); + my $cmd = "$fetcher '$url' > '$tmp_profile'"; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ + print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } +} + +# Collect profiles in parallel +sub FetchDynamicProfiles { + my $items = scalar(@main::pfile_args); + my $levels = log($items) / log(2); + + if ($items == 1) { + $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); + } else { + # math rounding issues + if ((2 ** $levels) < $items) { + $levels++; + } + my $count = scalar(@main::pfile_args); + for (my $i = 0; $i < $count; $i++) { + $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); + } + print STDERR "Fetching $count profiles, Be patient...\n"; + FetchDynamicProfilesRecurse($levels, 0, 0); + $main::collected_profile = join(" \\\n ", @main::profile_files); + } +} + +# Recursively fork a process to get enough processes +# collecting profiles +sub FetchDynamicProfilesRecurse { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if (my $pid = fork()) { + $position = 0 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + wait; + } else { + $position = 1 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + cleanup(); + exit(0); + } +} + +# Collect a single profile +sub TryCollectProfile { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if ($level >= ($maxlevel - 1)) { + if ($position < scalar(@main::pfile_args)) { + FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); + } + } else { + FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); + } +} + +##### Parsing code ##### + +# Provide a small streaming-read module to handle very large +# cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). +BEGIN { + package CpuProfileStream; + + sub new { + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little + perl_is_64bit => 1, # matters if profile is 64-bit + }; + bless $self, $class; + # Let unittests adjust the stride + if ($main::opt_test_stride > 0) { + $self->{stride} = $main::opt_test_stride; + } + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, check if we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses wrong. We won't complain yet, but will + # later if we ever see a value that doesn't fit in 32 bits. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + $self->{perl_is_64bit} = 0; + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } + return $self; + } + + # Load more data when we access slots->get(X) which is not yet in memory. + sub overflow { + my ($self) = @_; + my $slots = $self->{slots}; + $self->{base} += $#$slots + 1; # skip over data we're replacing + my $str; + read($self->{file}, $str, $self->{stride}); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + if ($self->{unpack_code} eq 'N') { # big-endian + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); + } + @$slots = @b64_values; + } + } + + # Access the i-th long in the file (logically), or -1 at EOF. + sub get { + my ($self, $idx) = @_; + my $slots = $self->{slots}; + while ($#$slots >= 0) { + if ($idx < $self->{base}) { + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return + } elsif ($idx > $self->{base} + $#$slots) { + $self->overflow(); + } else { + return $slots->[$idx - $self->{base}]; + } + } + # If we get here, $slots is [], which means we've reached EOF + return -1; # unique since slots is supposed to hold unsigned numbers + } +} + +# Reads the top, 'header' section of a profile, and returns the last +# line of the header, commonly called a 'header line'. The header +# section of a profile consists of zero or more 'command' lines that +# are instructions to pprof, which pprof executes when reading the +# header. All 'command' lines start with a %. After the command +# lines is the 'header line', which is a profile-specific line that +# indicates what type of profile it is, and perhaps other global +# information about the profile. For instance, here's a header line +# for a heap profile: +# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile +# For historical reasons, the CPU profile does not contain a text- +# readable header line. If the profile looks like a CPU profile, +# this function returns "". If no header line could be found, this +# function returns undef. +# +# The following commands are recognized: +# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' +# +# The input file should be in binmode. +sub ReadProfileHeader { + local *PROFILE = shift; + my $firstchar = ""; + my $line = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar !~ /[[:print:]]/) { # is not a text character + return ""; + } + while (defined($line = )) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /^%warn\s+(.*)/) { # 'warn' command + # Note this matches both '%warn blah\n' and '%warn\n'. + print STDERR "WARNING: $1\n"; # print the rest of the line + } elsif ($line =~ /^%/) { + print STDERR "Ignoring unknown command from profile header: $line"; + } else { + # End of commands, must be the header line. + return $line; + } + } + return undef; # got to EOF without seeing a header line +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + binmode TFILE; + my $firstline = ReadProfileHeader(*TFILE); + close(TFILE); + if (!$firstline) { + return 0; + } + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + return $firstline =~ /^--- *$symbol_marker/; +} + +# Parse profile generated by common/profiler.cc and return a reference +# to a map: +# $result->{version} Version number of profile file +# $result->{period} Sampling period (in microseconds) +# $result->{profile} Profile object +# $result->{map} Memory map info from profile +# $result->{pcs} Hash of all PC values seen, key is hex address +sub ReadProfile { + my $prog = shift; + my $fname = shift; + my $result; # return value + + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $contention_marker = $&; + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $growth_marker = $&; + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + + # Look at first line to see if it is a heap or a CPU profile. + # CPU profile may start with no header at all, and just binary data + # (starting with \0\0\0\0) -- in that case, don't try to read the + # whole firstline, since it may be gigabytes(!) of data. + open(PROFILE, "<$fname") || error("$fname: $!\n"); + binmode PROFILE; # New perls do UTF-8 processing + my $header = ReadProfileHeader(*PROFILE); + if (!defined($header)) { # means "at EOF" + error("Profile is empty.\n"); + } + + my $symbols; + if ($header =~ m/^--- *$symbol_marker/o) { + # Verify that the user asked for a symbolized profile + if (!$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . + "a binary arg. Try again without passing\n $prog\n"); + } + # Read the symbol section of the symbolized profile file. + $symbols = ReadSymbols(*PROFILE{IO}); + # Read the next line to get the header for the remaining profile. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + $main::profile_type = ''; + if ($header =~ m/^heap profile:.*$growth_marker/o) { + $main::profile_type = 'growth'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap profile:/) { + $main::profile_type = 'heap'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^--- *$contention_marker/o) { + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *Stacks:/) { + print STDERR + "Old format contention profile: mistakenly reports " . + "condition variable signals as lock contentions.\n"; + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *$profile_marker/) { + # the binary cpu profile data starts immediately after this line + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } else { + if (defined($symbols)) { + # a symbolized profile contains a format we don't recognize, bail out + error("$fname: Cannot recognize profile section after symbols.\n"); + } + # no ascii header present -- must be a CPU profile + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } + + close(PROFILE); + + # if we got symbols along with the profile, return those as well + if (defined($symbols)) { + $result->{symbols} = $symbols; + } + + return $result; +} + +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + if ($main::use_symbolized_profile) { + return $stack; + } else { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + +# CPU profile reader +sub ReadCPUProfile { + my $prog = shift; + my $fname = shift; # just used for logging + local *PROFILE = shift; + my $version; + my $period; + my $i; + my $profile = {}; + my $pcs = {}; + + # Parse string into array of slots. + my $slots = CpuProfileStream->new(*PROFILE, $fname); + + # Read header. The current header version is a 5-element structure + # containing: + # 0: header count (always 0) + # 1: header "words" (after this one: 3) + # 2: format version (0) + # 3: sampling period (usec) + # 4: unused padding (always 0) + if ($slots->get(0) != 0 ) { + error("$fname: not a profile file, or old format profile file\n"); + } + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + # However, don't do this if we're reading a symbolized profile + # file, in which case the subtract-one was done when the file + # was written. + if ($j > 0 && !$main::use_symbolized_profile) { + $pc--; + } + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; + } + + # Parse map + my $map = ''; + seek(PROFILE, $i * 4, 0); + read(PROFILE, $map, (stat PROFILE)[7]); + + my $r = {}; + $r->{version} = $version; + $r->{period} = $period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + + return $r; +} + +sub ReadHeapProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $index = 1; + if ($main::opt_inuse_space) { + $index = 1; + } elsif ($main::opt_inuse_objects) { + $index = 0; + } elsif ($main::opt_alloc_space) { + $index = 3; + } elsif ($main::opt_alloc_objects) { + $index = 2; + } + + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 + # There are two pairs , the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. + # For remote heap profiles (v1): + # The interpretation of the sampling frequency is that the profiler, for + # each sample, calculates a uniformly distributed random integer less than + # the given value, and records the next sample after that many bytes have + # been allocated. Therefore, the expected sample interval is half of the + # given frequency. By default, if not specified, the expected sample + # interval is 128KB. Only remote-heap-page profiles are adjusted for + # sample size. + # For remote heap profiles (v2): + # The sampling frequency is the rate of a Poisson process. This means that + # the probability of sampling an allocation of size X with sampling rate Y + # is 1 - exp(-X/Y) + # For version 2, a typical header line might look like this: + # heap profile: 1922: 127792360 [ 1922: 127792360] @ _v2/524288 + # the trailing number (524288) is the sampling rate. (Version 1 showed + # double the 'rate' here) + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { + if (defined($6) && ($6 ne '')) { + $type = $6; + my $sample_period = $8; + # $type is "heapprofile" for profiles generated by the + # heap-profiler, and either "heap" or "heap_v2" for profiles + # generated by sampling directly within tcmalloc. It can also + # be "growth" for heap-growth profiles. The first is typically + # found for profiles generated locally, and the others for + # remote profiles. + if (($type eq "heapprofile") || ($type !~ /heap/) ) { + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; + } elsif ($type =~ /_v2/) { + $sampling_algorithm = 2; # version 2 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period); + } + } else { + $sampling_algorithm = 1; # version 1 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period)/2; + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + # In this case it's so old it's guaranteed to be remote-heap version 1. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $sampling_algorithm = 1; + } + } + } + + if ($sampling_algorithm > 0) { + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($sample_adjustment == 0) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } else { + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + if ($sampling_algorithm > 1) { + # We don't bother printing anything for the original version (version 1) + printf STDERR "Heap version $sampling_algorithm\n"; + } + } + + my $profile = {}; + my $pcs = {}; + my $map = ""; + + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^MAPPED_LIBRARIES:/) { + # Read the /proc/self/maps data + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + last; + } + + if (/^--- Memory map:/) { + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + last; + } + + # Read entry of the form: + # : [: ] @ a1 a2 a3 ... an + s/^\s*//; + s/\s*$//; + if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { + my $stack = $5; + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + + my @counts = ($n1, $s1, $n2, $s2); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadSynchProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $map = ''; + my $profile = {}; + my $pcs = {}; + my $sampling_period = 1; + my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $seen_clockrate = 0; + my $line; + + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + + while ( $line = ) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + $count *= $sampling_period; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { + next; + } + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); + + } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1,$2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "cycles/second") { + $cyclespernanosec = $value / 1e9; + $seen_clockrate = 1; + } elsif ($variable eq "sampling period") { + $sampling_period = $value; + } elsif ($variable eq "ms since reset") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } elsif ($variable eq "discarded samples") { + # Currently nothing is done with this value in pprof + # So we just silently ignore it for now + } else { + printf STDERR ("Ignoring unnknown variable in /contention output: " . + "'%s' = '%s'\n",$variable,$value); + } + } else { + # Memory map entry + $map .= $line; + } + } + + if (!$seen_clockrate) { + printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", + $cyclespernanosec); + } + + my $r = {}; + $r->{version} = 0; + $r->{period} = $sampling_period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +# Given a hex value in the form "0x1abcd" return "0001abcd" or +# "000000000001abcd", depending on the current address length. +# There's probably a more idiomatic (or faster) way to do this... +sub HexExtend { + my $addr = shift; + + $addr =~ s/^0x//; + + if (length $addr > $address_length) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + } + + return substr("000000000000000".$addr, -$address_length); +} + +##### Symbol extraction ##### + +# Aggressively search the lib_prefix values for the given library +# If all else fails, just return the name of the library unmodified. +# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" +# it will search the following locations in this order, until it finds a file: +# /my/path/lib/dir/mylib.so +# /other/path/lib/dir/mylib.so +# /my/path/dir/mylib.so +# /other/path/dir/mylib.so +# /my/path/mylib.so +# /other/path/mylib.so +# /lib/dir/mylib.so (returned as last resort) +sub FindLibrary { + my $file = shift; + my $suffix = $file; + + # Search for the library as described above + do { + foreach my $prefix (@prefix_list) { + my $fullpath = $prefix . $suffix; + if (-e $fullpath) { + return $fullpath; + } + } + } while ($suffix =~ s|^/[^/]+/|/|); + return $file; +} + +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + if ($file =~ m|^/| && -f "/usr/lib/debug$file") { + return "/usr/lib/debug$file"; + } + return undef; +} + +# Parse text section header of a library using objdump +sub ParseTextSectionHeaderFromObjdump { + my $lib = shift; + + my $size = undef; + my $vma; + my $file_offset; + # Get objdump output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $objdump = $obj_tool_map{"objdump"}; + open(OBJDUMP, "$objdump -h $lib |") + || error("$objdump $lib: $!\n"); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Idx Name Size VMA LMA File off Algn + # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 + # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file + # offset may still be 8. But AddressSub below will still handle that. + my @x = split; + if (($#x >= 6) && ($x[1] eq '.text')) { + $size = $x[2]; + $vma = $x[3]; + $file_offset = $x[5]; + last; + } + } + close(OBJDUMP); + + if (!defined($size)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +# Parse text section header of a library using otool (on OS X) +sub ParseTextSectionHeaderFromOtool { + my $lib = shift; + + my $size = undef; + my $vma = undef; + my $file_offset = undef; + # Get otool output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $otool = $obj_tool_map{"otool"}; + open(OTOOL, "$otool -l $lib |") + || error("$otool $lib: $!\n"); + my $cmd = ""; + my $sectname = ""; + my $segname = ""; + foreach my $line () { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + # Load command <#> + # cmd LC_SEGMENT + # [...] + # Section + # sectname __text + # segname __TEXT + # addr 0x000009f8 + # size 0x00018b9e + # offset 2552 + # align 2^2 (4) + # We will need to strip off the leading 0x from the hex addresses, + # and convert the offset into hex. + if ($line =~ /Load command/) { + $cmd = ""; + $sectname = ""; + $segname = ""; + } elsif ($line =~ /Section/) { + $sectname = ""; + $segname = ""; + } elsif ($line =~ /cmd (\w+)/) { + $cmd = $1; + } elsif ($line =~ /sectname (\w+)/) { + $sectname = $1; + } elsif ($line =~ /segname (\w+)/) { + $segname = $1; + } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && + $sectname eq "__text" && + $segname eq "__TEXT")) { + next; + } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { + $vma = $1; + } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { + $size = $1; + } elsif ($line =~ /\boffset ([0-9]+)/) { + $file_offset = sprintf("%016x", $1); + } + if (defined($vma) && defined($size) && defined($file_offset)) { + last; + } + } + close(OTOOL); + + if (!defined($vma) || !defined($size) || !defined($file_offset)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +sub ParseTextSectionHeader { + # obj_tool_map("otool") is only defined if we're in a Mach-O environment + if (defined($obj_tool_map{"otool"})) { + my $r = ParseTextSectionHeaderFromOtool(@_); + if (defined($r)){ + return $r; + } + } + # If otool doesn't work, or we don't have it, fall back to objdump + return ParseTextSectionHeaderFromObjdump(@_); +} + +# Split /proc/pid/maps dump into a list of libraries +sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. + my $prog = shift; + my $map = shift; + my $pcs = shift; + + my $result = []; + my $h = "[a-f0-9]+"; + my $zero_offset = HexExtend("0"); + + my $buildvar = ""; + foreach my $l (split("\n", $map)) { + if ($l =~ m/^\s*build=(.*)$/) { + $buildvar = $1; + } + + my $start; + my $finish; + my $offset; + my $lib; + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { + # Full line from /proc/self/maps. Example: + # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { + # Cooked line from DumpAddressMap. Example: + # 40000000-40015000: /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = $3; + } else { + next; + } + + # Expand "$build" variable if available + $lib =~ s/\$build\b/$buildvar/g; + + $lib = FindLibrary($lib); + + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } + } + + push(@{$result}, [$lib, $start, $finish, $offset]); + } + + # Append special entry for additional library (not relocated) + if ($main::opt_lib ne "") { + my $text = ParseTextSectionHeader($main::opt_lib); + if (defined($text)) { + my $start = $text->{vma}; + my $finish = AddressAdd($start, $text->{size}); + + push(@{$result}, [$main::opt_lib, $start, $finish, $start]); + } + } + + # Append special entry for the main program. This covers + # 0..max_pc_value_seen, so that we assume pc values not found in one + # of the library ranges will be treated as coming from the main + # program binary. + my $min_pc = HexExtend("0"); + my $max_pc = $min_pc; # find the maximal PC value in any sample + foreach my $pc (keys(%{$pcs})) { + if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } + } + push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); + + return $result; +} + +# Add two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressAdd { + my $addr1 = shift; + my $addr2 = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + + if ($main::opt_debug and $main::opt_test) { + print STDERR "AddressAdd $addr1 + $addr2 = "; + } + + my $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + my $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2); + my $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + my $r = sprintf("%07x", $sum); + + $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2) + $c; + $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + $r = sprintf("%07x", $sum) . $r; + + $sum = hex($addr1) + hex($addr2) + $c; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } + + return $r; + } +} + + +# Subtract two hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressSub { + my $addr1 = shift; + my $addr2 = shift; + my $diff; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $diff); + + } else { + # Do the addition in 7-nibble chunks to trivialize borrow handling. + # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } + + my $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + my $a2 = hex(substr($addr2,-7)); + $addr2 = substr($addr2,0,-7); + my $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + my $r = sprintf("%07x", $diff); + + $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + $a2 = hex(substr($addr2,-7)) + $b; + $addr2 = substr($addr2,0,-7); + $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + $r = sprintf("%07x", $diff) . $r; + + $a1 = hex($addr1); + $a2 = hex($addr2) + $b; + if ($a2 > $a1) { $a1 += 0x100; } + $diff = $a1 - $a2; + $r = sprintf("%02x", $diff) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + + return $r; + } +} + +# Increment a hex addresses of length $address_length. +# Run pprof --test for unit test if this is changed. +sub AddressInc { + my $addr = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr)+1) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + # We are always doing this to step through the addresses in a function, + # and will almost never overflow the first chunk, so we check for this + # case and exit early. + + # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } + + my $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + my $r = sprintf("%07x", $sum); + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "0000000"; + } + + $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + $r = sprintf("%07x", $sum) . $r; + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "00000000000000"; + } + + $sum = hex($addr) + 1; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + return $r; + } +} + +# Extract symbols for all PC values found in profile +sub ExtractSymbols { + my $libs = shift; + my $pcset = shift; + + my $symbols = {}; + + # Map each PC value to the containing library. To make this faster, + # we sort libraries by their starting pc value (highest first), and + # advance through the libraries as we advance the pc. Sometimes the + # addresses of libraries may overlap with the addresses of the main + # binary, so to make sure the libraries 'win', we iterate over the + # libraries in reverse order (which assumes the binary doesn't start + # in the middle of a library, which seems a fair assumption). + my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings + foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { + my $libname = $lib->[0]; + my $start = $lib->[1]; + my $finish = $lib->[2]; + my $offset = $lib->[3]; + + # Get list of pcs that belong in this library. + my $contained = []; + my ($start_pc_index, $finish_pc_index); + # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. + for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; + $finish_pc_index--) { + last if $pcs[$finish_pc_index - 1] le $finish; + } + # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. + for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; + $start_pc_index--) { + last if $pcs[$start_pc_index - 1] lt $start; + } + # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, + # in case there are overlaps in libraries and the main binary. + @{$contained} = splice(@pcs, $start_pc_index, + $finish_pc_index - $start_pc_index); + # Map to symbols + MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); + } + + return $symbols; +} + +# Map list of PC values to symbols for a given image +sub MapToSymbols { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + my $debug = 0; + + # Ignore empty binaries + if ($#{$pclist} < 0) { return; } + + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = "$addr2line -f -C -e $image"; + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = "$addr2line --demangle -f -C -e $image"; + } + + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system("$addr2line --help >/dev/null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + # TODO(csilvers): only add '-i' if addr2line supports it. + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >/dev/null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat $main::tmpfile_sym"); + print("----\n"); + system("$cmd <$main::tmpfile_sym"); + print("----\n"); + } + + open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n"); + my $count = 0; # Index in pclist + while () { + # Read fullfunction and filelineinfo from next pair of lines + s/\r?\n$//g; + my $fullfunction = $_; + $_ = ; + s/\r?\n$//g; + my $filelinenum = $_; + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; + } + + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + + my $pcstr = $pclist->[$count]; + my $function = ShortFunctionName($fullfunction); + if ($fullfunction eq '??') { + # See if nm found a symbol + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } + } + + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, se this entry ends immediately + $count++; + } + } + close(SYMBOLS); +} + +# Use nm to map the list of referenced PCs to symbols. Return true iff we +# are able to read procedure information via nm. +sub MapSymbolsWithNM { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + # Get nm output sorted by increasing address + my $symbol_table = GetProcedureBoundaries($image, "."); + if (!%{$symbol_table}) { + return 0; + } + # Start addresses are already the right length (8 or 16 hex digits). + my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } + keys(%{$symbol_table}); + + if ($#names < 0) { + # No symbols: just use addresses + foreach my $pc (@{$pclist}) { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + return 0; + } + + # Sort addresses so we can do a join against nm output + my $index = 0; + my $fullname = $names[0]; + my $name = ShortFunctionName($fullname); + foreach my $pc (sort { $a cmp $b } @{$pclist}) { + # Adjust for mapped offset + my $mpc = AddressSub($pc, $offset); + while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ + $index++; + $fullname = $names[$index]; + $name = ShortFunctionName($fullname); + } + if ($mpc lt $symbol_table->{$fullname}->[1]) { + $symbols->{$pc} = [$name, "?", $fullname]; + } else { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + } + return 1; +} + +sub ShortFunctionName { + my $function = shift; + while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments + $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type + return $function; +} + +##### Miscellaneous ##### + +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) PPROF_TOOLS environment variable, if set +# 3) the environment +sub ConfigureObjTools { + my $prog_file = shift; + + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + + # Follow symlinks (at least for systems where "file" supports that) + my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`; + if ($file_type =~ /64-bit/) { + # Change $address_length to 16 if the program file is ELF 64-bit. + # We can't detect this from many (most?) heap or lock contention + # profiles, since the actual addresses referenced are generally in low + # memory even for 64-bit programs. + $address_length = 16; + } + + if ($file_type =~ /MS Windows/) { + # For windows, we provide a version of nm and addr2line as part of + # the opensource release, which is capable of parsing + # Windows-style PDB executables. It should live in the path, or + # in the same directory as pprof. + $obj_tool_map{"nm_pdb"} = "nm-pdb"; + $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; + } + + if ($file_type =~ /Mach-O/) { + # OS X uses otool to examine Mach-O files, rather than objdump. + $obj_tool_map{"otool"} = "otool"; + $obj_tool_map{"addr2line"} = "false"; # no addr2line + $obj_tool_map{"objdump"} = "false"; # no objdump + } + + # Go fill in %obj_tool_map with the pathnames to use: + foreach my $tool (keys %obj_tool_map) { + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); + } +} + +# Returns the path of a caller-specified object tool. If --tools or +# PPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path unmodified (which +# means we will look for it on PATH). +sub ConfigureTool { + my $tool = shift; + my $path; + + # --tools (or $PPROF_TOOLS) is a comma separated list, where each + # item is either a) a pathname prefix, or b) a map of the form + # :. First we look for an entry of type (b) for our + # tool. If one is found, we use it. Otherwise, we consider all the + # pathname prefixes in turn, until one yields an existing file. If + # none does, we use a default path. + my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || ""; + if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { + $path = $2; + # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. + } elsif ($tools ne '') { + foreach my $prefix (split(',', $tools)) { + next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list + if (-x $prefix . $tool) { + $path = $prefix . $tool; + last; + } + } + if (!$path) { + error("No '$tool' found with prefix specified by " . + "--tools (or \$PPROF_TOOLS) '$tools'\n"); + } + } else { + # ... otherwise use the version that exists in the same directory as + # pprof. If there's nothing there, use $PATH. + $0 =~ m,[^/]*$,; # this is everything after the last slash + my $dirname = $`; # this is everything up to and including the last slash + if (-x "$dirname$tool") { + $path = "$dirname$tool"; + } else { + $path = $tool; + } + } + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; +} + +sub cleanup { + unlink($main::tmpfile_sym); + unlink(keys %main::tempnames); + + # We leave any collected profiles in $HOME/pprof in case the user wants + # to look at them later. We print a message informing them of this. + if ((scalar(@main::profile_files) > 0) && + defined($main::collected_profile)) { + if (scalar(@main::profile_files) == 1) { + print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; + } + print STDERR "If you want to investigate this profile further, you can do:\n"; + print STDERR "\n"; + print STDERR " pprof \\\n"; + print STDERR " $main::prog \\\n"; + print STDERR " $main::collected_profile\n"; + print STDERR "\n"; + } +} + +sub sighandler { + cleanup(); + exit(1); +} + +sub error { + my $msg = shift; + print STDERR $msg; + cleanup(); + exit(1); +} + + +# Run $nm_command and get all the resulting procedure boundaries whose +# names match "$regexp" and returns them in a hashtable mapping from +# procedure name to a two-element vector of [start address, end address] +sub GetProcedureBoundariesViaNm { + my $nm_command = shift; + my $regexp = shift; + + my $symbol_table = {}; + open(NM, "$nm_command |") || error("$nm_command: $!\n"); + my $last_start = "0"; + my $routine = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (m/^\s*([0-9a-f]+) (.) (..*)/) { + my $start_val = $1; + my $type = $2; + my $this_routine = $3; + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if ($start_val eq $last_start && $type =~ /t/i) { + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; + } elsif ($start_val eq $last_start) { + # We're not the 'T' symbol at this address, so ignore us. + next; + } + + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template paramters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($start_val)]; + } + $last_start = $start_val; + $routine = $this_routine; + } elsif (m/^Loaded image name: (.+)/) { + # The win32 nm workalike emits information about the binary it is using. + if ($main::opt_debug) { print STDERR "Using Image $1\n"; } + } elsif (m/^PDB file name: (.+)/) { + # The win32 nm workalike emits information about the pdb it is using. + if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } + } + } + close(NM); + # Handle the last line in the nm output. Unfortunately, we don't know + # how big this last symbol is, because we don't know how big the file + # is. For now, we just give it a size of 0. + # TODO(csilvers): do better here. + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($last_start)]; + } + return $symbol_table; +} + +# Gets the procedure boundaries for all routines in "$image" whose names +# match "$regexp" and returns them in a hashtable mapping from procedure +# name to a two-element vector of [start address, end address]. +# Will return an empty map if nm is not installed or not working properly. +sub GetProcedureBoundaries { + my $image = shift; + my $regexp = shift; + + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + my $nm = $obj_tool_map{"nm"}; + my $cppfilt = $obj_tool_map{"c++filt"}; + + # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + if (system("$nm --demangle $image >/dev/null 2>&1") == 0) { + # In this mode, we do "nm --demangle " + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) { + # In this mode, we do "nm | c++filt" + $cppfilt_flag = " | $cppfilt"; + }; + my $flatten_flag = ""; + if (system("$nm -f $image >/dev/null 2>&1") == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag", + "$nm -D -n $flatten_flag $demangle_flag" . + " $image 2>/dev/null $cppfilt_flag", + # 6nm is for Go binaries + "6nm $image 2>/dev/null | sort", + ); + + # If the executable is an MS Windows PDB-format executable, we'll + # have set up obj_tool_map("nm_pdb"). In this case, we actually + # want to use both unix nm and windows-specific nm_pdb, since + # PDB-format executables can apparently include dwarf .o files. + if (exists $obj_tool_map{"nm_pdb"}) { + my $nm_pdb = $obj_tool_map{"nm_pdb"}; + push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null"); + } + + foreach my $nm_command (@nm_commands) { + my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp); + return $symbol_table if (%{$symbol_table}); + } + my $symbol_table = {}; + return $symbol_table; +} + + +# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. +# To make them more readable, we add underscores at interesting places. +# This routine removes the underscores, producing the canonical representation +# used by pprof to represent addresses, particularly in the tested routines. +sub CanonicalHex { + my $arg = shift; + return join '', (split '_',$arg); +} + + +# Unit test for AddressAdd: +sub AddressAddUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd ($row->[0], $row->[1]); + if ($sum ne $row->[2]) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + my $expected = join '', (split '_',$row->[2]); + if ($sum ne CanonicalHex($row->[2])) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressSub: +sub AddressSubUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub ($row->[0], $row->[1]); + if ($sum ne $row->[3]) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + if ($sum ne CanonicalHex($row->[3])) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressInc: +sub AddressIncUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc ($row->[0]); + if ($sum ne $row->[4]) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc (CanonicalHex($row->[0])); + if ($sum ne CanonicalHex($row->[4])) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Driver for unit tests. +# Currently just the address add/subtract/increment routines for 64-bit. +sub RunUnitTests { + my $error_count = 0; + + # This is a list of tuples [a, b, a+b, a-b, a+1] + my $unit_test_data_8 = [ + [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], + [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], + [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], + [qw(00000001 ffffffff 00000000 00000002 00000002)], + [qw(00000001 fffffff0 fffffff1 00000011 00000002)], + ]; + my $unit_test_data_16 = [ + # The implementation handles data in 7-nibble chunks, so those are the + # interesting boundaries. + [qw(aaaaaaaa 50505050 + 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], + [qw(50505050 aaaaaaaa + 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], + [qw(ffffffff aaaaaaaa + 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], + [qw(00000001 ffffffff + 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], + [qw(00000001 fffffff0 + 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], + + [qw(00_a00000a_aaaaaaa 50505050 + 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], + [qw(0f_fff0005_0505050 aaaaaaaa + 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], + [qw(00_000000f_fffffff 01_800000a_aaaaaaa + 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], + [qw(00_0000000_0000001 ff_fffffff_fffffff + 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], + [qw(00_0000000_0000001 ff_fffffff_ffffff0 + ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], + ]; + + $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); + if ($error_count > 0) { + print STDERR $error_count, " errors: FAILED\n"; + } else { + print STDERR "PASS\n"; + } + exit ($error_count); +} diff --git a/config.guess b/config.guess new file mode 100755 index 0000000..0773d0f --- /dev/null +++ b/config.guess @@ -0,0 +1,1456 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + +timestamp='2004-03-03' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Originally written by Per Bothner . +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# The plan is that this can be called by configure scripts if you +# don't specify an explicit build system type. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit 0 ;; + --version | -v ) + echo "$version" ; exit 0 ;; + --help | --h* | -h ) + echo "$usage"; exit 0 ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep __ELF__ >/dev/null + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit 0 ;; + amd64:OpenBSD:*:*) + echo x86_64-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + amiga:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + arc:OpenBSD:*:*) + echo mipsel-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + cats:OpenBSD:*:*) + echo arm-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + hp300:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + mac68k:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + macppc:OpenBSD:*:*) + echo powerpc-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + mvme68k:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + mvme88k:OpenBSD:*:*) + echo m88k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + mvmeppc:OpenBSD:*:*) + echo powerpc-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + pegasos:OpenBSD:*:*) + echo powerpc-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + pmax:OpenBSD:*:*) + echo mipsel-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + sgi:OpenBSD:*:*) + echo mipseb-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + sun3:OpenBSD:*:*) + echo m68k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + wgrisc:OpenBSD:*:*) + echo mipsel-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + *:OpenBSD:*:*) + echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit 0 ;; + macppc:MirBSD:*:*) + echo powerppc-unknown-mirbsd${UNAME_RELEASE} + exit 0 ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit 0 ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit 0 ;; + Alpha*:OpenVMS:*:*) + echo alpha-hp-vms + exit 0 ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit 0 ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit 0 ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit 0;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit 0 ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit 0 ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit 0 ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit 0 ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit 0;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit 0;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit 0 ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit 0 ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit 0 ;; + DRS?6000:UNIX_SV:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7 && exit 0 ;; + esac ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + i86pc:SunOS:5.*:*) + echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit 0 ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit 0 ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit 0 ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit 0 ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit 0 ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit 0 ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit 0 ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit 0 ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit 0 ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit 0 ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit 0 ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit 0 ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit 0 ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit 0 ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit 0 ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit 0 ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c \ + && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ + && exit 0 + echo mips-mips-riscos${UNAME_RELEASE} + exit 0 ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit 0 ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit 0 ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit 0 ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit 0 ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit 0 ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit 0 ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit 0 ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit 0 ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit 0 ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit 0 ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit 0 ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit 0 ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit 0 ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit 0 ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit 0 ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 + echo rs6000-ibm-aix3.2.5 + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit 0 ;; + *:AIX:*:[45]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit 0 ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit 0 ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit 0 ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit 0 ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit 0 ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit 0 ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit 0 ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit 0 ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include + #include + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + # avoid double evaluation of $set_cc_for_build + test -n "$CC_FOR_BUILD" || eval $set_cc_for_build + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit 0 ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit 0 ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 + echo unknown-hitachi-hiuxwe2 + exit 0 ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit 0 ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit 0 ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit 0 ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit 0 ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit 0 ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit 0 ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit 0 ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit 0 ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit 0 ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit 0 ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit 0 ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit 0 ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit 0 ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + *:UNICOS/mp:*:*) + echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit 0 ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit 0 ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit 0 ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit 0 ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit 0 ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit 0 ;; + *:FreeBSD:*:*) + # Determine whether the default compiler uses glibc. + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + #if __GLIBC__ >= 2 + LIBC=gnu + #else + LIBC= + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` + # GNU/KFreeBSD systems have a "k" prefix to indicate we are using + # FreeBSD's kernel, but not the complete OS. + case ${LIBC} in gnu) kernel_only='k' ;; esac + echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC} + exit 0 ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit 0 ;; + i*:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit 0 ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit 0 ;; + x86:Interix*:[34]*) + echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' + exit 0 ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit 0 ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit 0 ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit 0 ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit 0 ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit 0 ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit 0 ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit 0 ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit 0 ;; + arm*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit 0 ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit 0 ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit 0 ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit 0 ;; + mips:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips + #undef mipsel + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mipsel + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` + test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 + ;; + mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips64 + #undef mips64el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mips64el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips64 + #else + CPU= + #endif + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` + test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 + ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit 0 ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit 0 ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit 0 ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit 0 ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit 0 ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit 0 ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit 0 ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit 0 ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit 0 ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit 0 ;; + i*86:Linux:*:*) + # The BFD linker knows what the default object file format is, so + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + # Set LC_ALL=C to ensure ld outputs messages in English. + ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ + | sed -ne '/supported targets:/!d + s/[ ][ ]*/ /g + s/.*supported targets: *// + s/ .*// + p'` + case "$ld_supported_targets" in + elf32-i386) + TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + ;; + a.out-i386-linux) + echo "${UNAME_MACHINE}-pc-linux-gnuaout" + exit 0 ;; + coff-i386) + echo "${UNAME_MACHINE}-pc-linux-gnucoff" + exit 0 ;; + "") + # Either a pre-BFD a.out linker (linux-gnuoldld) or + # one that does not give us useful --help. + echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + exit 0 ;; + esac + # Determine whether the default compiler is a.out or elf + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include + #ifdef __ELF__ + # ifdef __GLIBC__ + # if __GLIBC__ >= 2 + LIBC=gnu + # else + LIBC=gnulibc1 + # endif + # else + LIBC=gnulibc1 + # endif + #else + #ifdef __INTEL_COMPILER + LIBC=gnu + #else + LIBC=gnuaout + #endif + #endif + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` + test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0 + test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0 + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit 0 ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit 0 ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit 0 ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit 0 ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit 0 ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit 0 ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit 0 ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit 0 ;; + i*86:*:5:[78]*) + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit 0 ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit 0 ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i386. + echo i386-pc-msdosdjgpp + exit 0 ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit 0 ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit 0 ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit 0 ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit 0 ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit 0 ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit 0 ;; + M68*:*:R3V[567]*:*) + test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && echo i486-ncr-sysv4.3${OS_REL} && exit 0 + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && echo i486-ncr-sysv4 && exit 0 ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit 0 ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit 0 ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit 0 ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit 0 ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit 0 ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit 0 ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says + echo i586-unisys-sysv4 + exit 0 ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes . + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit 0 ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit 0 ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit 0 ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit 0 ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit 0 ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit 0 ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit 0 ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit 0 ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit 0 ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit 0 ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit 0 ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit 0 ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit 0 ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit 0 ;; + *:Darwin:*:*) + case `uname -p` in + *86) UNAME_PROCESSOR=i686 ;; + powerpc) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit 0 ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit 0 ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit 0 ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit 0 ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit 0 ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit 0 ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit 0 ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit 0 ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit 0 ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit 0 ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit 0 ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit 0 ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit 0 ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit 0 ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit 0 ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit 0 ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c < +# include +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0 + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit 0 ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit 0 ;; + c34*) + echo c34-convex-bsd + exit 0 ;; + c38*) + echo c38-convex-bsd + exit 0 ;; + c4*) + echo c4-convex-bsd + exit 0 ;; + esac +fi + +cat >&2 < in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.stamp.in b/config.stamp.in new file mode 100644 index 0000000..e69de29 diff --git a/config.sub b/config.sub new file mode 100755 index 0000000..264f820 --- /dev/null +++ b/config.sub @@ -0,0 +1,1549 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + +timestamp='2004-02-23' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Please send patches to . Submit a context +# diff and a properly formatted ChangeLog entry. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to ." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit 0 ;; + --version | -v ) + echo "$version" ; exit 0 ;; + --help | --h* | -h ) + echo "$usage"; exit 0 ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit 0;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \ + kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis) + os= + basic_machine=$1 + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -chorusos*) + os=-chorusos + basic_machine=$1 + ;; + -chorusrdb) + os=-chorusrdb + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + 1750a | 580 \ + | a29k \ + | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ + | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ + | am33_2.0 \ + | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ + | c4x | clipper \ + | d10v | d30v | dlx | dsp16xx \ + | fr30 | frv \ + | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ + | i370 | i860 | i960 | ia64 \ + | ip2k | iq2000 \ + | m32r | m68000 | m68k | m88k | mcore \ + | mips | mipsbe | mipseb | mipsel | mipsle \ + | mips16 \ + | mips64 | mips64el \ + | mips64vr | mips64vrel \ + | mips64orion | mips64orionel \ + | mips64vr4100 | mips64vr4100el \ + | mips64vr4300 | mips64vr4300el \ + | mips64vr5000 | mips64vr5000el \ + | mipsisa32 | mipsisa32el \ + | mipsisa32r2 | mipsisa32r2el \ + | mipsisa64 | mipsisa64el \ + | mipsisa64r2 | mipsisa64r2el \ + | mipsisa64sb1 | mipsisa64sb1el \ + | mipsisa64sr71k | mipsisa64sr71kel \ + | mipstx39 | mipstx39el \ + | mn10200 | mn10300 \ + | msp430 \ + | ns16k | ns32k \ + | openrisc | or32 \ + | pdp10 | pdp11 | pj | pjl \ + | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ + | pyramid \ + | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ + | sh64 | sh64le \ + | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \ + | strongarm \ + | tahoe | thumb | tic4x | tic80 | tron \ + | v850 | v850e \ + | we32k \ + | x86 | xscale | xstormy16 | xtensa \ + | z8k) + basic_machine=$basic_machine-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + 580-* \ + | a29k-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ + | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ + | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ + | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ + | avr-* \ + | bs2000-* \ + | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ + | clipper-* | cydra-* \ + | d10v-* | d30v-* | dlx-* \ + | elxsi-* \ + | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ + | h8300-* | h8500-* \ + | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ + | i*86-* | i860-* | i960-* | ia64-* \ + | ip2k-* | iq2000-* \ + | m32r-* \ + | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ + | m88110-* | m88k-* | mcore-* \ + | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ + | mips16-* \ + | mips64-* | mips64el-* \ + | mips64vr-* | mips64vrel-* \ + | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* \ + | mips64vr4300-* | mips64vr4300el-* \ + | mips64vr5000-* | mips64vr5000el-* \ + | mipsisa32-* | mipsisa32el-* \ + | mipsisa32r2-* | mipsisa32r2el-* \ + | mipsisa64-* | mipsisa64el-* \ + | mipsisa64r2-* | mipsisa64r2el-* \ + | mipsisa64sb1-* | mipsisa64sb1el-* \ + | mipsisa64sr71k-* | mipsisa64sr71kel-* \ + | mipstx39-* | mipstx39el-* \ + | msp430-* \ + | none-* | np1-* | nv1-* | ns16k-* | ns32k-* \ + | orion-* \ + | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ + | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ + | pyramid-* \ + | romp-* | rs6000-* \ + | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \ + | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ + | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \ + | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ + | tahoe-* | thumb-* \ + | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ + | tron-* \ + | v850-* | v850e-* | vax-* \ + | we32k-* \ + | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \ + | xtensa-* \ + | ymp-* \ + | z8k-*) + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + abacus) + basic_machine=abacus-unknown + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amd64) + basic_machine=x86_64-pc + ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + c90) + basic_machine=c90-cray + os=-unicos + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | j90) + basic_machine=j90-cray + os=-unicos + ;; + cr16c) + basic_machine=cr16c-unknown + os=-elf + ;; + crds | unos) + basic_machine=m68k-crds + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + decsystem10* | dec10*) + basic_machine=pdp10-dec + os=-tops10 + ;; + decsystem20* | dec20*) + basic_machine=pdp10-dec + os=-tops20 + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + mmix*) + basic_machine=mmix-knuth + os=-mmixware + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + morphos) + basic_machine=powerpc-unknown + os=-morphos + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + nv1) + basic_machine=nv1-cray + os=-unicosmp + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + or32 | or32-*) + basic_machine=or32-unknown + os=-coff + ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pentium | p5 | k5 | k6 | nexgen | viac3) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon | athlon_*) + basic_machine=i686-pc + ;; + pentiumii | pentium2 | pentiumiii | pentium3) + basic_machine=i686-pc + ;; + pentium4) + basic_machine=i786-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentium4-*) + basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64) basic_machine=powerpc64-unknown + ;; + ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppc64le | powerpc64little | ppc64-le | powerpc64-little) + basic_machine=powerpc64le-unknown + ;; + ppc64le-* | powerpc64little-*) + basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + s390 | s390-*) + basic_machine=s390-ibm + ;; + s390x | s390x-*) + basic_machine=s390x-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sb1) + basic_machine=mipsisa64sb1-unknown + ;; + sb1el) + basic_machine=mipsisa64sb1el-unknown + ;; + sei) + basic_machine=mips-sei + os=-seiux + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparclite-wrs | simso-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=alphaev5-cray + os=-unicos + ;; + t90) + basic_machine=t90-cray + os=-unicos + ;; + tic54x | c54x*) + basic_machine=tic54x-unknown + os=-coff + ;; + tic55x | c55x*) + basic_machine=tic55x-unknown + os=-coff + ;; + tic6x | c6x*) + basic_machine=tic6x-unknown + os=-coff + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + toad1) + basic_machine=pdp10-xkl + os=-tops20 + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + ymp) + basic_machine=ymp-cray + os=-unicos + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + romp) + basic_machine=romp-ibm + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele) + basic_machine=sh-unknown + ;; + sh64) + basic_machine=sh64-unknown + ;; + sparc | sparcv9 | sparcv9b) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -chorusos* | -chorusrdb* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ + | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ + | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto-qnx*) + ;; + -nto*) + os=`echo $os | sed -e 's|nto|nto-qnx|'` + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux-dietlibc) + os=-linux-dietlibc + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -os400*) + os=-os400 + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -atheos*) + os=-atheos + ;; + -syllable*) + os=-syllable + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -nova*) + os=-rtmk-nova + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -tpf*) + os=-tpf + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -aros*) + os=-aros + ;; + -kaos*) + os=-kaos + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + c4x-* | tic4x-*) + os=-coff + ;; + # This must come before the *-dec entry. + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + or32-*) + os=-coff + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-ibm) + os=-aix + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -os400*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -tpf*) + vendor=ibm + ;; + -vxsim* | -vxworks* | -windiss*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + -vos*) + vendor=stratus + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit 0 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/configure.ac b/configure.ac new file mode 100644 index 0000000..412d3d1 --- /dev/null +++ b/configure.ac @@ -0,0 +1,927 @@ +dnl Process this file with autoconf to produce a configure script. +AC_INIT([Makefile.in]) + +dnl ============================================================================ +dnl Custom macro definitions. + +dnl JE_CFLAGS_APPEND(cflag) +AC_DEFUN([JE_CFLAGS_APPEND], +[ +AC_MSG_CHECKING([whether compiler supports $1]) +TCFLAGS="${CFLAGS}" +if test "x${CFLAGS}" = "x" ; then + CFLAGS="$1" +else + CFLAGS="${CFLAGS} $1" +fi +AC_RUN_IFELSE([AC_LANG_PROGRAM( +[[ +]], [[ + return 0; +]])], + AC_MSG_RESULT([yes]), + AC_MSG_RESULT([no]) + [CFLAGS="${TCFLAGS}"] +) +]) + +dnl JE_COMPILABLE(label, hcode, mcode, rvar) +AC_DEFUN([JE_COMPILABLE], +[ +AC_MSG_CHECKING([whether $1 is compilable]) +AC_RUN_IFELSE([AC_LANG_PROGRAM( +[$2], [$3])], + AC_MSG_RESULT([yes]) + [$4="yes"], + AC_MSG_RESULT([no]) + [$4="no"] +) +]) + +dnl ============================================================================ + +srcroot=$srcdir +if test "x${srcroot}" = "x." ; then + srcroot="" +else + srcroot="${srcroot}/" +fi +AC_SUBST([srcroot]) +abs_srcroot="`cd \"${srcdir}\"; pwd`/" +AC_SUBST([abs_srcroot]) + +objroot="" +AC_SUBST([objroot]) +abs_objroot="`pwd`/" +AC_SUBST([abs_objroot]) + +dnl Munge install path variables. +if test "x$prefix" = "xNONE" ; then + prefix="/usr/local" +fi +if test "x$exec_prefix" = "xNONE" ; then + exec_prefix=$prefix +fi +PREFIX=$prefix +AC_SUBST([PREFIX]) +BINDIR=`eval echo $bindir` +BINDIR=`eval echo $BINDIR` +AC_SUBST([BINDIR]) +INCLUDEDIR=`eval echo $includedir` +INCLUDEDIR=`eval echo $INCLUDEDIR` +AC_SUBST([INCLUDEDIR]) +LIBDIR=`eval echo $libdir` +LIBDIR=`eval echo $LIBDIR` +AC_SUBST([LIBDIR]) +DATADIR=`eval echo $datadir` +DATADIR=`eval echo $DATADIR` +AC_SUBST([DATADIR]) +MANDIR=`eval echo $mandir` +MANDIR=`eval echo $MANDIR` +AC_SUBST([MANDIR]) + +dnl Support for building documentation. +AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH]) +AC_ARG_WITH([xslroot], + [AS_HELP_STRING([--with-xslroot=], [XSL stylesheet root path])], +if test "x$with_xslroot" = "xno" ; then + XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" +else + XSLROOT="${with_xslroot}" +fi, + XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" +) +AC_SUBST([XSLROOT]) + +dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, +dnl just prevent autoconf from molesting CFLAGS. +CFLAGS=$CFLAGS +AC_PROG_CC +if test "x$CFLAGS" = "x" ; then + no_CFLAGS="yes" + if test "x$GCC" = "xyes" ; then + JE_CFLAGS_APPEND([-std=gnu99]) + JE_CFLAGS_APPEND([-Wall]) + JE_CFLAGS_APPEND([-pipe]) + JE_CFLAGS_APPEND([-g3]) + fi +fi +dnl Append EXTRA_CFLAGS to CFLAGS, if defined. +if test "x$EXTRA_CFLAGS" != "x" ; then + JE_CFLAGS_APPEND([$EXTRA_CFLAGS]) +fi +AC_PROG_CPP + +AC_CHECK_SIZEOF([void *]) +if test "x${ac_cv_sizeof_void_p}" = "x8" ; then + LG_SIZEOF_PTR=3 +elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then + LG_SIZEOF_PTR=2 +else + AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR]) + +AC_CHECK_SIZEOF([int]) +if test "x${ac_cv_sizeof_int}" = "x8" ; then + LG_SIZEOF_INT=3 +elif test "x${ac_cv_sizeof_int}" = "x4" ; then + LG_SIZEOF_INT=2 +else + AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT]) + +AC_CHECK_SIZEOF([long]) +if test "x${ac_cv_sizeof_long}" = "x8" ; then + LG_SIZEOF_LONG=3 +elif test "x${ac_cv_sizeof_long}" = "x4" ; then + LG_SIZEOF_LONG=2 +else + AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}]) +fi +AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) + +AC_CANONICAL_HOST +dnl CPU-specific settings. +CPU_SPINWAIT="" +case "${host_cpu}" in + i[[345]]86) + ;; + i686) + JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]], + [asm]) + if test "x${asm}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' + fi + ;; + x86_64) + JE_COMPILABLE([__asm__ syntax], [], + [[__asm__ volatile("pause"); return 0;]], [asm]) + if test "x${asm}" = "xyes" ; then + CPU_SPINWAIT='__asm__ volatile("pause")' + fi + ;; + *) + ;; +esac +AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) + +dnl Platform-specific settings. abi and RPATH can probably be determined +dnl programmatically, but doing so is error-prone, which makes it generally +dnl not worth the trouble. +dnl +dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the +dnl definitions need to be seen before any headers are included, which is a pain +dnl to make happen otherwise. +case "${host}" in + *-*-darwin*) + CFLAGS="$CFLAGS -fno-common -no-cpp-precomp" + abi="macho" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + RPATH="" + ;; + *-*-freebsd*) + CFLAGS="$CFLAGS" + abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + RPATH="-Wl,-rpath," + ;; + *-*-linux*) + CFLAGS="$CFLAGS" + CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" + abi="elf" + AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED]) + RPATH="-Wl,-rpath," + ;; + *-*-netbsd*) + AC_MSG_CHECKING([ABI]) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM( +[[#ifdef __ELF__ +/* ELF */ +#else +#error aout +#endif +]])], + [CFLAGS="$CFLAGS"; abi="elf"], + [abi="aout"]) + AC_MSG_RESULT([$abi]) + AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) + RPATH="-Wl,-rpath," + ;; + *-*-solaris2*) + CFLAGS="$CFLAGS" + abi="elf" + RPATH="-Wl,-R," + dnl Solaris needs this for sigwait(). + CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" + LIBS="$LIBS -lposix4 -lsocket -lnsl" + ;; + *) + AC_MSG_RESULT([Unsupported operating system: ${host}]) + abi="elf" + RPATH="-Wl,-rpath," + ;; +esac +AC_SUBST([abi]) +AC_SUBST([RPATH]) + +JE_COMPILABLE([__attribute__ syntax], + [static __attribute__((unused)) void foo(void){}], + [], + [attribute]) +if test "x${attribute}" = "xyes" ; then + AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ]) + if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then + JE_CFLAGS_APPEND([-fvisibility=hidden]) + fi +fi + +JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ +#define _GNU_SOURCE +#include +], [ +void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); +], [mremap_fixed]) +if test "x${mremap_fixed}" = "xyes" ; then + AC_DEFINE([JEMALLOC_MREMAP_FIXED]) +fi + +dnl Support optional additions to rpath. +AC_ARG_WITH([rpath], + [AS_HELP_STRING([--with-rpath=], [Colon-separated rpath (ELF systems only)])], +if test "x$with_rpath" = "xno" ; then + RPATH_EXTRA= +else + RPATH_EXTRA="`echo $with_rpath | tr \":\" \" \"`" +fi, + RPATH_EXTRA= +) +AC_SUBST([RPATH_EXTRA]) + +dnl Disable rules that do automatic regeneration of configure output by default. +AC_ARG_ENABLE([autogen], + [AS_HELP_STRING([--enable-autogen], [Automatically regenerate configure output])], +if test "x$enable_autogen" = "xno" ; then + enable_autogen="0" +else + enable_autogen="1" +fi +, +enable_autogen="0" +) +AC_SUBST([enable_autogen]) + +AC_PROG_INSTALL +AC_PROG_RANLIB +AC_PATH_PROG([AR], [ar], , [$PATH]) +AC_PATH_PROG([LD], [ld], , [$PATH]) +AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) + +dnl Do not prefix public APIs by default. +AC_ARG_WITH([jemalloc_prefix], + [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], + [JEMALLOC_PREFIX="$with_jemalloc_prefix"], + [if test "x$abi" != "xmacho" ; then + JEMALLOC_PREFIX="" +else + JEMALLOC_PREFIX="je_" +fi] +) +if test "x$JEMALLOC_PREFIX" != "x" ; then + JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` + AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) + AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) + jemalloc_prefix="$JEMALLOC_PREFIX" + jemalloc_cprefix="$JEMALLOC_CPREFIX" + AC_SUBST([jemalloc_prefix]) + AC_SUBST([jemalloc_cprefix]) + AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix]) +fi + +dnl Do not add suffix to installed files by default. +AC_ARG_WITH([install_suffix], + [AS_HELP_STRING([--with-install-suffix=], [Suffix to append to all installed files])], + [INSTALL_SUFFIX="$with_install_suffix"], + [INSTALL_SUFFIX=] +) +install_suffix="$INSTALL_SUFFIX" +AC_SUBST([install_suffix]) + +cfgoutputs_in="${srcroot}Makefile.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in" +cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in" + +cfgoutputs_out="Makefile" +cfgoutputs_out="${cfgoutputs_out} doc/html.xsl" +cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" +cfgoutputs_out="${cfgoutputs_out} doc/jemalloc${install_suffix}.xml" +cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h" +cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h" +cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h" + +cfgoutputs_tup="Makefile" +cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in" +cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" +cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc${install_suffix}.xml:doc/jemalloc.xml.in" +cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in" +cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" +cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" + +cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" + +cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" + +cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" + +dnl Do not silence irrelevant compiler warnings by default, since enabling this +dnl option incurs a performance penalty. +AC_ARG_ENABLE([cc-silence], + [AS_HELP_STRING([--enable-cc-silence], + [Silence irrelevant compiler warnings])], +[if test "x$enable_cc_silence" = "xno" ; then + enable_cc_silence="0" +else + enable_cc_silence="1" +fi +], +[enable_cc_silence="0"] +) +if test "x$enable_cc_silence" = "x1" ; then + AC_DEFINE([JEMALLOC_CC_SILENCE]) +fi + +dnl Do not compile with debugging by default. +AC_ARG_ENABLE([debug], + [AS_HELP_STRING([--enable-debug], [Build debugging code])], +[if test "x$enable_debug" = "xno" ; then + enable_debug="0" +else + enable_debug="1" +fi +], +[enable_debug="0"] +) +if test "x$enable_debug" = "x1" ; then + AC_DEFINE([JEMALLOC_DEBUG], [ ]) + AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) +fi +AC_SUBST([enable_debug]) + +dnl Only optimize if not debugging. +if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then + dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. + optimize="no" + echo "$EXTRA_CFLAGS" | grep "\-O" >/dev/null || optimize="yes" + if test "x${optimize}" = "xyes" ; then + if test "x$GCC" = "xyes" ; then + JE_CFLAGS_APPEND([-O3]) + JE_CFLAGS_APPEND([-funroll-loops]) + else + JE_CFLAGS_APPEND([-O]) + fi + fi +fi + +dnl Do not enable statistics calculation by default. +AC_ARG_ENABLE([stats], + [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])], +[if test "x$enable_stats" = "xno" ; then + enable_stats="0" +else + enable_stats="1" +fi +], +[enable_stats="0"] +) +if test "x$enable_stats" = "x1" ; then + AC_DEFINE([JEMALLOC_STATS], [ ]) +fi +AC_SUBST([enable_stats]) + +dnl Do not enable profiling by default. +AC_ARG_ENABLE([prof], + [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])], +[if test "x$enable_prof" = "xno" ; then + enable_prof="0" +else + enable_prof="1" +fi +], +[enable_prof="0"] +) +if test "x$enable_prof" = "x1" ; then + backtrace_method="" +else + backtrace_method="N/A" +fi + +AC_ARG_ENABLE([prof-libunwind], + [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])], +[if test "x$enable_prof_libunwind" = "xno" ; then + enable_prof_libunwind="0" +else + enable_prof_libunwind="1" +fi +], +[enable_prof_libunwind="0"] +) +AC_ARG_WITH([static_libunwind], + [AS_HELP_STRING([--with-static-libunwind=], + [Path to static libunwind library; use rather than dynamically linking])], +if test "x$with_static_libunwind" = "xno" ; then + LUNWIND="-lunwind" +else + if test ! -f "$with_static_libunwind" ; then + AC_MSG_ERROR([Static libunwind not found: $with_static_libunwind]) + fi + LUNWIND="$with_static_libunwind" +fi, + LUNWIND="-lunwind" +) +if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then + AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"]) + if test "x$LUNWIND" = "x-lunwind" ; then + AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"], + [enable_prof_libunwind="0"]) + else + LIBS="$LIBS $LUNWIND" + fi + if test "x${enable_prof_libunwind}" = "x1" ; then + backtrace_method="libunwind" + AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) + fi +fi + +AC_ARG_ENABLE([prof-libgcc], + [AS_HELP_STRING([--disable-prof-libgcc], + [Do not use libgcc for backtracing])], +[if test "x$enable_prof_libgcc" = "xno" ; then + enable_prof_libgcc="0" +else + enable_prof_libgcc="1" +fi +], +[enable_prof_libgcc="1"] +) +if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \ + -a "x$GCC" = "xyes" ; then + AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) + AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) + dnl The following is conservative, in that it only has entries for CPUs on + dnl which jemalloc has been tested. + AC_MSG_CHECKING([libgcc-based backtracing reliability on ${host_cpu}]) + case "${host_cpu}" in + i[[3456]]86) + AC_MSG_RESULT([unreliable]) + enable_prof_libgcc="0"; + ;; + x86_64) + AC_MSG_RESULT([reliable]) + ;; + *) + AC_MSG_RESULT([unreliable]) + enable_prof_libgcc="0"; + ;; + esac + if test "x${enable_prof_libgcc}" = "x1" ; then + backtrace_method="libgcc" + AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) + fi +else + enable_prof_libgcc="0" +fi + +AC_ARG_ENABLE([prof-gcc], + [AS_HELP_STRING([--disable-prof-gcc], + [Do not use gcc intrinsics for backtracing])], +[if test "x$enable_prof_gcc" = "xno" ; then + enable_prof_gcc="0" +else + enable_prof_gcc="1" +fi +], +[enable_prof_gcc="1"] +) +if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \ + -a "x$GCC" = "xyes" ; then + backtrace_method="gcc intrinsics" + AC_DEFINE([JEMALLOC_PROF_GCC], [ ]) +else + enable_prof_gcc="0" +fi + +if test "x$backtrace_method" = "x" ; then + backtrace_method="none (disabling profiling)" + enable_prof="0" +fi +AC_MSG_CHECKING([configured backtracing method]) +AC_MSG_RESULT([$backtrace_method]) +if test "x$enable_prof" = "x1" ; then + LIBS="$LIBS -lm" + AC_DEFINE([JEMALLOC_PROF], [ ]) +fi +AC_SUBST([enable_prof]) + +dnl Enable tiny allocations by default. +AC_ARG_ENABLE([tiny], + [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])], +[if test "x$enable_tiny" = "xno" ; then + enable_tiny="0" +else + enable_tiny="1" +fi +], +[enable_tiny="1"] +) +if test "x$enable_tiny" = "x1" ; then + AC_DEFINE([JEMALLOC_TINY], [ ]) +fi +AC_SUBST([enable_tiny]) + +dnl Enable thread-specific caching by default. +AC_ARG_ENABLE([tcache], + [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])], +[if test "x$enable_tcache" = "xno" ; then + enable_tcache="0" +else + enable_tcache="1" +fi +], +[enable_tcache="1"] +) +if test "x$enable_tcache" = "x1" ; then + AC_DEFINE([JEMALLOC_TCACHE], [ ]) +fi +AC_SUBST([enable_tcache]) + +dnl Do not enable mmap()ped swap files by default. +AC_ARG_ENABLE([swap], + [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])], +[if test "x$enable_swap" = "xno" ; then + enable_swap="0" +else + enable_swap="1" +fi +], +[enable_swap="0"] +) +if test "x$enable_swap" = "x1" ; then + AC_DEFINE([JEMALLOC_SWAP], [ ]) +fi +AC_SUBST([enable_swap]) + +dnl Do not enable allocation from DSS by default. +AC_ARG_ENABLE([dss], + [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], +[if test "x$enable_dss" = "xno" ; then + enable_dss="0" +else + enable_dss="1" +fi +], +[enable_dss="0"] +) +if test "x$enable_dss" = "x1" ; then + AC_DEFINE([JEMALLOC_DSS], [ ]) +fi +AC_SUBST([enable_dss]) + +dnl Do not support the junk/zero filling option by default. +AC_ARG_ENABLE([fill], + [AS_HELP_STRING([--enable-fill], [Support junk/zero filling option])], +[if test "x$enable_fill" = "xno" ; then + enable_fill="0" +else + enable_fill="1" +fi +], +[enable_fill="0"] +) +if test "x$enable_fill" = "x1" ; then + AC_DEFINE([JEMALLOC_FILL], [ ]) +fi +AC_SUBST([enable_fill]) + +dnl Do not support the xmalloc option by default. +AC_ARG_ENABLE([xmalloc], + [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])], +[if test "x$enable_xmalloc" = "xno" ; then + enable_xmalloc="0" +else + enable_xmalloc="1" +fi +], +[enable_xmalloc="0"] +) +if test "x$enable_xmalloc" = "x1" ; then + AC_DEFINE([JEMALLOC_XMALLOC], [ ]) +fi +AC_SUBST([enable_xmalloc]) + +dnl Do not support the SYSV option by default. +AC_ARG_ENABLE([sysv], + [AS_HELP_STRING([--enable-sysv], [Support SYSV semantics option])], +[if test "x$enable_sysv" = "xno" ; then + enable_sysv="0" +else + enable_sysv="1" +fi +], +[enable_sysv="0"] +) +if test "x$enable_sysv" = "x1" ; then + AC_DEFINE([JEMALLOC_SYSV], [ ]) +fi +AC_SUBST([enable_sysv]) + +dnl Do not determine page shift at run time by default. +AC_ARG_ENABLE([dynamic_page_shift], + [AS_HELP_STRING([--enable-dynamic-page-shift], + [Determine page size at run time (don't trust configure result)])], +[if test "x$enable_dynamic_page_shift" = "xno" ; then + enable_dynamic_page_shift="0" +else + enable_dynamic_page_shift="1" +fi +], +[enable_dynamic_page_shift="0"] +) +if test "x$enable_dynamic_page_shift" = "x1" ; then + AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ]) +fi +AC_SUBST([enable_dynamic_page_shift]) + +AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) +AC_RUN_IFELSE([AC_LANG_PROGRAM( +[[#include +#include +#include +]], [[ + long result; + FILE *f; + + result = sysconf(_SC_PAGESIZE); + if (result == -1) { + return 1; + } + f = fopen("conftest.out", "w"); + if (f == NULL) { + return 1; + } + fprintf(f, "%u\n", ffs((int)result) - 1); + close(f); + + return 0; +]])], + [STATIC_PAGE_SHIFT=`cat conftest.out`] + AC_MSG_RESULT([$STATIC_PAGE_SHIFT]) + AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$STATIC_PAGE_SHIFT]), + AC_MSG_RESULT([error])) + +dnl ============================================================================ +dnl jemalloc configuration. +dnl + +dnl Set VERSION if source directory has an embedded git repository. +if test -d "${srcroot}../.git" ; then + git describe --long --abbrev=40 > ${srcroot}VERSION +fi +jemalloc_version=`cat ${srcroot}VERSION` +jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'` +jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'` +jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'` +jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]4}'` +jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]5}'` +AC_SUBST([jemalloc_version]) +AC_SUBST([jemalloc_version_major]) +AC_SUBST([jemalloc_version_minor]) +AC_SUBST([jemalloc_version_bugfix]) +AC_SUBST([jemalloc_version_nrev]) +AC_SUBST([jemalloc_version_gid]) + +dnl ============================================================================ +dnl Configure pthreads. + +AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) +AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], + [AC_MSG_ERROR([libpthread is missing])]) + +CPPFLAGS="$CPPFLAGS -D_REENTRANT" + +dnl Enable lazy locking by default. +AC_ARG_ENABLE([lazy_lock], + [AS_HELP_STRING([--disable-lazy-lock], + [Disable lazy locking (always lock, even when single-threaded)])], +[if test "x$enable_lazy_lock" = "xno" ; then + enable_lazy_lock="0" +else + enable_lazy_lock="1" +fi +], +[enable_lazy_lock="1"] +) +if test "x$enable_lazy_lock" = "x1" ; then + AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) + AC_CHECK_LIB([dl], [dlopen], [LIBS="$LIBS -ldl"], + [AC_MSG_ERROR([libdl is missing])]) + AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) +fi +AC_SUBST([enable_lazy_lock]) + +AC_ARG_ENABLE([tls], + [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])], +if test "x$enable_tls" = "xno" ; then + enable_tls="0" +else + enable_tls="1" +fi +, +enable_tls="1" +) +if test "x${enable_tls}" = "x1" ; then +AC_MSG_CHECKING([for TLS]) +AC_RUN_IFELSE([AC_LANG_PROGRAM( +[[ + __thread int x; +]], [[ + x = 42; + + return 0; +]])], + AC_MSG_RESULT([yes]), + AC_MSG_RESULT([no]) + enable_tls="0") +fi +AC_SUBST([enable_tls]) +if test "x${enable_tls}" = "x0" ; then + AC_DEFINE_UNQUOTED([NO_TLS], [ ]) +fi + +dnl ============================================================================ +dnl Check for ffsl(3), and fail if not found. This function exists on all +dnl platforms that jemalloc currently has a chance of functioning on without +dnl modification. + +AC_CHECK_FUNC([ffsl], [], + [AC_MSG_ERROR([Cannot build without ffsl(3)])]) + +dnl ============================================================================ +dnl Check for atomic(3) operations as provided on Darwin. + +JE_COMPILABLE([Darwin OSAtomic*()], [ +#include +#include +], [ + { + int32_t x32 = 0; + volatile int32_t *x32p = &x32; + OSAtomicAdd32(1, x32p); + } + { + int64_t x64 = 0; + volatile int64_t *x64p = &x64; + OSAtomicAdd64(1, x64p); + } +], [osatomic]) +if test "x${osatomic}" = "xyes" ; then + AC_DEFINE([JEMALLOC_OSATOMIC]) +fi + +dnl ============================================================================ +dnl Check for spinlock(3) operations as provided on Darwin. + +JE_COMPILABLE([Darwin OSSpin*()], [ +#include +#include +], [ + OSSpinLock lock = 0; + OSSpinLockLock(&lock); + OSSpinLockUnlock(&lock); +], [osspin]) +if test "x${osspin}" = "xyes" ; then + AC_DEFINE([JEMALLOC_OSSPIN]) +fi + +dnl ============================================================================ +dnl Check for allocator-related functions that should be wrapped. + +AC_CHECK_FUNC([memalign], + [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])]) +AC_CHECK_FUNC([valloc], + [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])]) + +dnl ============================================================================ +dnl Darwin-related configuration. + +if test "x${abi}" = "xmacho" ; then + AC_DEFINE([JEMALLOC_IVSALLOC]) + AC_DEFINE([JEMALLOC_ZONE]) + + dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6 + dnl releases. malloc_zone_t and malloc_introspection_t have new fields in + dnl 10.6, which is the only source-level indication of the change. + AC_MSG_CHECKING([malloc zone version]) + AC_TRY_COMPILE([#include +#include ], [ + static malloc_zone_t zone; + static struct malloc_introspection_t zone_introspect; + + zone.size = NULL; + zone.malloc = NULL; + zone.calloc = NULL; + zone.valloc = NULL; + zone.free = NULL; + zone.realloc = NULL; + zone.destroy = NULL; + zone.zone_name = "jemalloc_zone"; + zone.batch_malloc = NULL; + zone.batch_free = NULL; + zone.introspect = &zone_introspect; + zone.version = 6; + zone.memalign = NULL; + zone.free_definite_size = NULL; + + zone_introspect.enumerator = NULL; + zone_introspect.good_size = NULL; + zone_introspect.check = NULL; + zone_introspect.print = NULL; + zone_introspect.log = NULL; + zone_introspect.force_lock = NULL; + zone_introspect.force_unlock = NULL; + zone_introspect.statistics = NULL; + zone_introspect.zone_locked = NULL; +], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6]) + AC_MSG_RESULT([6])], + [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3]) + AC_MSG_RESULT([3])]) +fi + +dnl ============================================================================ +dnl Check for typedefs, structures, and compiler characteristics. +AC_HEADER_STDBOOL + +dnl Process .in files. +AC_SUBST([cfghdrs_in]) +AC_SUBST([cfghdrs_out]) +AC_CONFIG_HEADERS([$cfghdrs_tup]) + +dnl ============================================================================ +dnl Generate outputs. +AC_CONFIG_FILES([$cfgoutputs_tup config.stamp]) +AC_SUBST([cfgoutputs_in]) +AC_SUBST([cfgoutputs_out]) +AC_OUTPUT + +dnl ============================================================================ +dnl Print out the results of configuration. +AC_MSG_RESULT([===============================================================================]) +AC_MSG_RESULT([jemalloc version : $jemalloc_version]) +AC_MSG_RESULT([]) +AC_MSG_RESULT([CC : ${CC}]) +AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) +AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) +AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) +AC_MSG_RESULT([LIBS : ${LIBS}]) +AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}]) +AC_MSG_RESULT([]) +AC_MSG_RESULT([XSLTPROC : ${XSLTPROC}]) +AC_MSG_RESULT([XSLROOT : ${XSLROOT}]) +AC_MSG_RESULT([]) +AC_MSG_RESULT([PREFIX : ${PREFIX}]) +AC_MSG_RESULT([BINDIR : ${BINDIR}]) +AC_MSG_RESULT([INCLUDEDIR : ${INCLUDEDIR}]) +AC_MSG_RESULT([LIBDIR : ${LIBDIR}]) +AC_MSG_RESULT([DATADIR : ${DATADIR}]) +AC_MSG_RESULT([MANDIR : ${MANDIR}]) +AC_MSG_RESULT([]) +AC_MSG_RESULT([srcroot : ${srcroot}]) +AC_MSG_RESULT([abs_srcroot : ${abs_srcroot}]) +AC_MSG_RESULT([objroot : ${objroot}]) +AC_MSG_RESULT([abs_objroot : ${abs_objroot}]) +AC_MSG_RESULT([]) +AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) +AC_MSG_RESULT([install_suffix : ${install_suffix}]) +AC_MSG_RESULT([autogen : ${enable_autogen}]) +AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) +AC_MSG_RESULT([debug : ${enable_debug}]) +AC_MSG_RESULT([stats : ${enable_stats}]) +AC_MSG_RESULT([prof : ${enable_prof}]) +AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) +AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) +AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) +AC_MSG_RESULT([tiny : ${enable_tiny}]) +AC_MSG_RESULT([tcache : ${enable_tcache}]) +AC_MSG_RESULT([fill : ${enable_fill}]) +AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) +AC_MSG_RESULT([sysv : ${enable_sysv}]) +AC_MSG_RESULT([swap : ${enable_swap}]) +AC_MSG_RESULT([dss : ${enable_dss}]) +AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) +AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) +AC_MSG_RESULT([tls : ${enable_tls}]) +AC_MSG_RESULT([===============================================================================]) diff --git a/doc/html.xsl.in b/doc/html.xsl.in new file mode 100644 index 0000000..a91d974 --- /dev/null +++ b/doc/html.xsl.in @@ -0,0 +1,4 @@ + + + + diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in new file mode 100644 index 0000000..13f3aae --- /dev/null +++ b/doc/jemalloc.xml.in @@ -0,0 +1,2280 @@ + + + + + + + User Manual + jemalloc + @jemalloc_version@ + + + Jason + Evans + Author + + + + + JEMALLOC + 3 + + + jemalloc + jemalloc + + general purpose memory allocation functions + + + LIBRARY + This manual describes jemalloc @jemalloc_version@. More information + can be found at the jemalloc website. + + + SYNOPSIS + + #include <stdlib.h> +#include <jemalloc/jemalloc.h> + + Standard API + + void *malloc + size_t size + + + void *calloc + size_t number + size_t size + + + int posix_memalign + void **ptr + size_t alignment + size_t size + + + void *realloc + void *ptr + size_t size + + + void free + void *ptr + + + + Non-standard API + + size_t malloc_usable_size + const void *ptr + + + void malloc_stats_print + void (*write_cb) + void *, const char * + + void *cbopaque + const char *opts + + + int mallctl + const char *name + void *oldp + size_t *oldlenp + void *newp + size_t newlen + + + int mallctlnametomib + const char *name + size_t *mibp + size_t *miblenp + + + int mallctlbymib + const size_t *mib + size_t miblen + void *oldp + size_t *oldlenp + void *newp + size_t newlen + + + void (*malloc_message) + void *cbopaque + const char *s + + const char *malloc_conf; + + + Experimental API + + int allocm + void **ptr + size_t *rsize + size_t size + int flags + + + int rallocm + void **ptr + size_t *rsize + size_t size + size_t extra + int flags + + + int sallocm + const void *ptr + size_t *rsize + int flags + + + int dallocm + void *ptr + int flags + + + + + + DESCRIPTION + + Standard API + + The malloc function allocates + size bytes of uninitialized memory. The allocated + space is suitably aligned (after possible pointer coercion) for storage + of any type of object. + + The calloc function allocates + space for number objects, each + size bytes in length. The result is identical to + calling malloc with an argument of + number * size, with the + exception that the allocated memory is explicitly initialized to zero + bytes. + + The posix_memalign function + allocates size bytes of memory such that the + allocation's base address is an even multiple of + alignment, and returns the allocation in the value + pointed to by ptr. The requested + alignment must be a power of 2 at least as large + as sizeof(void *). + + The realloc function changes the + size of the previously allocated memory referenced by + ptr to size bytes. The + contents of the memory are unchanged up to the lesser of the new and old + sizes. If the new size is larger, the contents of the newly allocated + portion of the memory are undefined. Upon success, the memory referenced + by ptr is freed and a pointer to the newly + allocated memory is returned. Note that + realloc may move the memory allocation, + resulting in a different return value than ptr. + If ptr is NULL, the + realloc function behaves identically to + malloc for the specified size. + + The free function causes the + allocated memory referenced by ptr to be made + available for future allocations. If ptr is + NULL, no action occurs. + + + Non-standard API + + The malloc_usable_size function + returns the usable size of the allocation pointed to by + ptr. The return value may be larger than the size + that was requested during allocation. The + malloc_usable_size function is not a + mechanism for in-place realloc; rather + it is provided solely as a tool for introspection purposes. Any + discrepancy between the requested allocation size and the size reported + by malloc_usable_size should not be + depended on, since such behavior is entirely implementation-dependent. + + + The malloc_stats_print function + writes human-readable summary statistics via the + write_cb callback function pointer and + cbopaque data passed to + write_cb, or + malloc_message if + write_cb is NULL. This + function can be called repeatedly. General information that never + changes during execution can be omitted by specifying "g" as a character + within the opts string. Note that + malloc_message uses the + mallctl* functions internally, so + inconsistent statistics can be reported if multiple threads use these + functions simultaneously. If is + specified during configuration, “m” and “a” can + be specified to omit merged arena and per arena statistics, respectively; + “b” and “l” can be specified to omit per size + class statistics for bins and large objects, respectively. Unrecognized + characters are silently ignored. Note that thread caching may prevent + some statistics from being completely up to date, since extra locking + would be required to merge counters that track thread cache operations. + + + The mallctl function provides a + general interface for introspecting the memory allocator, as well as + setting modifiable parameters and triggering actions. The + period-separated name argument specifies a + location in a tree-structured namespace; see the section for + documentation on the tree contents. To read a value, pass a pointer via + oldp to adequate space to contain the value, and a + pointer to its length via oldlenp; otherwise pass + NULL and NULL. Similarly, to + write a value, pass a pointer to the value via + newp, and its length via + newlen; otherwise pass NULL + and 0. + + The mallctlnametomib function + provides a way to avoid repeated name lookups for applications that + repeatedly query the same portion of the namespace, by translating a name + to a “Management Information Base” (MIB) that can be passed + repeatedly to mallctlbymib. Upon + successful return from mallctlnametomib, + mibp contains an array of + *miblenp integers, where + *miblenp is the lesser of the number of components + in name and the input value of + *miblenp. Thus it is possible to pass a + *miblenp that is smaller than the number of + period-separated name components, which results in a partial MIB that can + be used as the basis for constructing a complete MIB. For name + components that are integers (e.g. the 2 in + arenas.bin.2.size), + the corresponding MIB component will always be that integer. Therefore, + it is legitimate to construct code like the following: + + + Experimental API + The experimental API is subject to change or removal without regard + for backward compatibility. + + The allocm, + rallocm, + sallocm, and + dallocm functions all have a + flags argument that can be used to specify + options. The functions only check the options that are contextually + relevant. Use bitwise or (|) operations to + specify one or more of the following: + + + ALLOCM_LG_ALIGN(la) + + + Align the memory allocation to start at an address + that is a multiple of (1 << + la). This macro does not validate + that la is within the valid + range. + + + ALLOCM_ALIGN(a) + + + Align the memory allocation to start at an address + that is a multiple of a, where + a is a power of two. This macro does not + validate that a is a power of 2. + + + + ALLOCM_ZERO + + Initialize newly allocated memory to contain zero + bytes. In the growing reallocation case, the real size prior to + reallocation defines the boundary between untouched bytes and those + that are initialized to contain zero bytes. If this option is + absent, newly allocated memory is uninitialized. + + + ALLOCM_NO_MOVE + + For reallocation, fail rather than moving the + object. This constraint can apply to both growth and + shrinkage. + + + + + The allocm function allocates at + least size bytes of memory, sets + *ptr to the base address of the allocation, and + sets *rsize to the real size of the allocation if + rsize is not NULL. + + The rallocm function resizes the + allocation at *ptr to be at least + size bytes, sets *ptr to + the base address of the allocation if it moved, and sets + *rsize to the real size of the allocation if + rsize is not NULL. If + extra is non-zero, an attempt is made to resize + the allocation to be at least size + + extra) bytes, though inability to allocate + the extra byte(s) will not by itself result in failure. Behavior is + undefined if (size + + extra > + SIZE_T_MAX). + + The sallocm function sets + *rsize to the real size of the allocation. + + The dallocm function causes the + memory referenced by ptr to be made available for + future allocations. + + + + TUNING + Once, when the first call is made to one of the memory allocation + routines, the allocator initializes its internals based in part on various + options that can be specified at compile- or run-time. + + The string pointed to by the global variable + malloc_conf, the “name” of the file + referenced by the symbolic link named /etc/malloc.conf, and the value of the + environment variable MALLOC_CONF, will be interpreted, in + that order, from left to right as options. + + An options string is a comma-separated list of option:value pairs. + There is one key corresponding to each opt.* mallctl (see the section for options + documentation). For example, abort:true,narenas:1 sets + the opt.abort and opt.narenas options. Some + options have boolean values (true/false), others have integer values (base + 8, 10, or 16, depending on prefix), and yet others have raw string + values. + + + IMPLEMENTATION NOTES + Traditionally, allocators have used + sbrk + 2 to obtain memory, which is + suboptimal for several reasons, including race conditions, increased + fragmentation, and artificial limitations on maximum usable memory. If + is specified during configuration, this + allocator uses both sbrk + 2 and + mmap + 2, in that order of preference; + otherwise only mmap + 2 is used. + + This allocator uses multiple arenas in order to reduce lock + contention for threaded programs on multi-processor systems. This works + well with regard to threading scalability, but incurs some costs. There is + a small fixed per-arena overhead, and additionally, arenas manage memory + completely independently of each other, which means a small fixed increase + in overall memory fragmentation. These overheads are not generally an + issue, given the number of arenas normally used. Note that using + substantially more arenas than the default is not likely to improve + performance, mainly due to reduced cache performance. However, it may make + sense to reduce the number of arenas if an application does not make much + use of the allocation functions. + + In addition to multiple arenas, unless + is specified during configuration, this + allocator supports thread-specific caching for small and large objects, in + order to make it possible to completely avoid synchronization for most + allocation requests. Such caching allows very fast allocation in the + common case, but it increases memory usage and fragmentation, since a + bounded number of objects can remain allocated in each thread cache. + + Memory is conceptually broken into equal-sized chunks, where the + chunk size is a power of two that is greater than the page size. Chunks + are always aligned to multiples of the chunk size. This alignment makes it + possible to find metadata for user objects very quickly. + + User objects are broken into three categories according to size: + small, large, and huge. Small objects are smaller than one page. Large + objects are smaller than the chunk size. Huge objects are a multiple of + the chunk size. Small and large objects are managed by arenas; huge + objects are managed separately in a single data structure that is shared by + all threads. Huge objects are used by applications infrequently enough + that this single data structure is not a scalability issue. + + Each chunk that is managed by an arena tracks its contents as runs of + contiguous pages (unused, backing a set of small objects, or backing one + large object). The combination of chunk alignment and chunk page maps + makes it possible to determine all metadata regarding small and large + allocations in constant time. + + Small objects are managed in groups by page runs. Each run maintains + a frontier and free list to track which regions are in use. Unless + is specified during configuration, + allocation requests that are no more than half the quantum (8 or 16, + depending on architecture) are rounded up to the nearest power of two that + is at least sizeof(void *). + Allocation requests that are more than half the quantum, but no more than + the minimum cacheline-multiple size class (see the opt.lg_qspace_max + option) are rounded up to the nearest multiple of the quantum. Allocation + requests that are more than the minimum cacheline-multiple size class, but + no more than the minimum subpage-multiple size class (see the opt.lg_cspace_max + option) are rounded up to the nearest multiple of the cacheline size (64). + Allocation requests that are more than the minimum subpage-multiple size + class, but no more than the maximum subpage-multiple size class are rounded + up to the nearest multiple of the subpage size (256). Allocation requests + that are more than the maximum subpage-multiple size class, but small + enough to fit in an arena-managed chunk (see the opt.lg_chunk option), are + rounded up to the nearest run size. Allocation requests that are too large + to fit in an arena-managed chunk are rounded up to the nearest multiple of + the chunk size. + + Allocations are packed tightly together, which can be an issue for + multi-threaded applications. If you need to assure that allocations do not + suffer from cacheline sharing, round your allocation requests up to the + nearest multiple of the cacheline size, or specify cacheline alignment when + allocating. + + Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit + system, the size classes in each category are as shown in . + + + Size classes + + + + + + + Category + Subcategory + Size + + + + + Small + Tiny + [8] + + + Quantum-spaced + [16, 32, 48, ..., 128] + + + Cacheline-spaced + [192, 256, 320, ..., 512] + + + Subpage-spaced + [768, 1024, 1280, ..., 3840] + + + Large + [4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB] + + + Huge + [4 MiB, 8 MiB, 12 MiB, ...] + + + +
+
+ + MALLCTL NAMESPACE + The following names are defined in the namespace accessible via the + mallctl* functions. Value types are + specified in parentheses, their readable/writable statuses are encoded as + rw, r-, -w, or + --, and required build configuration flags follow, if + any. A name element encoded as <i> or + <j> indicates an integer component, where the + integer varies from 0 to some upper value that must be determined via + introspection. In the case of stats.arenas.<i>.*, + <i> equal to arenas.narenas can be + used to access the summation of statistics from all arenas. Take special + note of the epoch mallctl, + which controls refreshing of cached dynamic statistics. + + + + + version + (const char *) + r- + + Return the jemalloc version string. + + + + + epoch + (uint64_t) + rw + + If a value is passed in, refresh the data from which + the mallctl* functions report values, + and increment the epoch. Return the current epoch. This is useful for + detecting whether another thread caused a refresh. + + + + + config.debug + (bool) + r- + + was specified during + build configuration. + + + + + config.dss + (bool) + r- + + was specified during + build configuration. + + + + + config.dynamic_page_shift + (bool) + r- + + was + specified during build configuration. + + + + + config.fill + (bool) + r- + + was specified during + build configuration. + + + + + config.lazy_lock + (bool) + r- + + was specified + during build configuration. + + + + + config.prof + (bool) + r- + + was specified during + build configuration. + + + + + config.prof_libgcc + (bool) + r- + + was not + specified during build configuration. + + + + + config.prof_libunwind + (bool) + r- + + was specified + during build configuration. + + + + + config.stats + (bool) + r- + + was specified during + build configuration. + + + + + config.swap + (bool) + r- + + was specified during + build configuration. + + + + + config.sysv + (bool) + r- + + was specified during + build configuration. + + + + + config.tcache + (bool) + r- + + was not specified + during build configuration. + + + + + config.tiny + (bool) + r- + + was not specified + during build configuration. + + + + + config.tls + (bool) + r- + + was not specified during + build configuration. + + + + + config.xmalloc + (bool) + r- + + was specified during + build configuration. + + + + + opt.abort + (bool) + r- + + Abort-on-warning enabled/disabled. If true, most + warnings are fatal. The process will call + abort + 3 in these cases. This option is + disabled by default unless is + specified during configuration, in which case it is enabled by default. + + + + + + opt.lg_qspace_max + (size_t) + r- + + Size (log base 2) of the maximum size class that is a + multiple of the quantum (8 or 16 bytes, depending on architecture). + Above this size, cacheline spacing is used for size classes. The + default value is 128 bytes (2^7). + + + + + opt.lg_cspace_max + (size_t) + r- + + Size (log base 2) of the maximum size class that is a + multiple of the cacheline size (64). Above this size, subpage spacing + (256 bytes) is used for size classes. The default value is 512 bytes + (2^9). + + + + + opt.lg_chunk + (size_t) + r- + + Virtual memory chunk size (log base 2). The default + chunk size is 4 MiB (2^22). + + + + + opt.narenas + (size_t) + r- + + Maximum number of arenas to use. The default maximum + number of arenas is four times the number of CPUs, or one if there is a + single CPU. + + + + + opt.lg_dirty_mult + (ssize_t) + r- + + Per-arena minimum ratio (log base 2) of active to dirty + pages. Some dirty unused pages may be allowed to accumulate, within + the limit set by the ratio (or one chunk worth of dirty pages, + whichever is greater), before informing the kernel about some of those + pages via madvise + 2 or a similar system call. This + provides the kernel with sufficient information to recycle dirty pages + if physical memory becomes scarce and the pages remain unused. The + default minimum ratio is 32:1 (2^5:1); an option value of -1 will + disable dirty page purging. + + + + + opt.stats_print + (bool) + r- + + Enable/disable statistics printing at exit. If + enabled, the malloc_stats_print + function is called at program exit via an + atexit + 3 function. If + is specified during configuration, this + has the potential to cause deadlock for a multi-threaded process that + exits while one or more threads are executing in the memory allocation + functions. Therefore, this option should only be used with care; it is + primarily intended as a performance tuning aid during application + development. This option is disabled by default. + + + + + opt.junk + (bool) + r- + [] + + Junk filling enabled/disabled. If enabled, each byte + of uninitialized allocated memory will be initialized to + 0xa5. All deallocated memory will be initialized to + 0x5a. This is intended for debugging and will + impact performance negatively. This option is disabled by default + unless is specified during + configuration, in which case it is enabled by default. + + + + + opt.zero + (bool) + r- + [] + + Zero filling enabled/disabled. If enabled, each byte + of uninitialized allocated memory will be initialized to 0. Note that + this initialization only happens once for each byte, so + realloc and + rallocm calls do not zero memory that + was previously allocated. This is intended for debugging and will + impact performance negatively. This option is disabled by default. + + + + + + opt.sysv + (bool) + r- + [] + + If enabled, attempting to allocate zero bytes will + return a NULL pointer instead of a valid pointer. + (The default behavior is to make a minimal allocation and return a + pointer to it.) This option is provided for System V compatibility. + This option is incompatible with the opt.xmalloc option. + This option is disabled by default. + + + + + opt.xmalloc + (bool) + r- + [] + + Abort-on-out-of-memory enabled/disabled. If enabled, + rather than returning failure for any allocation function, display a + diagnostic message on STDERR_FILENO and cause the + program to drop core (using + abort + 3). If an application is + designed to depend on this behavior, set the option at compile time by + including the following in the source code: + + This option is disabled by default. + + + + + opt.tcache + (bool) + r- + [] + + Thread-specific caching enabled/disabled. When there + are multiple threads, each thread uses a thread-specific cache for + objects up to a certain size. Thread-specific caching allows many + allocations to be satisfied without performing any thread + synchronization, at the cost of increased memory use. See the + opt.lg_tcache_gc_sweep + and opt.lg_tcache_max + options for related tuning information. This option is enabled by + default. + + + + + opt.lg_tcache_gc_sweep + (ssize_t) + r- + [] + + Approximate interval (log base 2) between full + thread-specific cache garbage collection sweeps, counted in terms of + thread-specific cache allocation/deallocation events. Garbage + collection is actually performed incrementally, one size class at a + time, in order to avoid large collection pauses. The default sweep + interval is 8192 (2^13); setting this option to -1 will disable garbage + collection. + + + + + opt.lg_tcache_max + (size_t) + r- + [] + + Maximum size class (log base 2) to cache in the + thread-specific cache. At a minimum, all small size classes are + cached, and at a maximum all large size classes are cached. The + default maximum is 32 KiB (2^15). + + + + + opt.prof + (bool) + r- + [] + + Memory profiling enabled/disabled. If enabled, profile + memory allocation activity, and use an + atexit + 3 function to dump final memory + usage to a file named according to the pattern + <prefix>.<pid>.<seq>.f.heap, + where <prefix> is controlled by the opt.prof_prefix + option. See the opt.lg_prof_bt_max + option for backtrace depth control. See the opt.prof_active + option for on-the-fly activation/deactivation. See the opt.lg_prof_sample + option for probabilistic sampling control. See the opt.prof_accum + option for control of cumulative sample reporting. See the opt.lg_prof_tcmax + option for control of per thread backtrace caching. See the opt.lg_prof_interval + option for information on interval-triggered profile dumping, and the + opt.prof_gdump + option for information on high-water-triggered profile dumping. + Profile output is compatible with the included pprof + Perl script, which originates from the google-perftools + package. + + + + + opt.prof_prefix + (const char *) + r- + [] + + Filename prefix for profile dumps. If the prefix is + set to the empty string, no automatic dumps will occur; this is + primarily useful for disabling the automatic final heap dump (which + also disables leak reporting, if enabled). The default prefix is + jeprof. + + + + + opt.lg_prof_bt_max + (size_t) + r- + [] + + Maximum backtrace depth (log base 2) when profiling + memory allocation activity. The default is 128 (2^7). + + + + + opt.prof_active + (bool) + r- + [] + + Profiling activated/deactivated. This is a secondary + control mechanism that makes it possible to start the application with + profiling enabled (see the opt.prof option) but + inactive, then toggle profiling at any time during program execution + with the prof.active mallctl. + This option is enabled by default. + + + + + opt.lg_prof_sample + (ssize_t) + r- + [] + + Average interval (log base 2) between allocation + samples, as measured in bytes of allocation activity. Increasing the + sampling interval decreases profile fidelity, but also decreases the + computational overhead. The default sample interval is 1 (2^0) (i.e. + all allocations are sampled). + + + + + opt.prof_accum + (bool) + r- + [] + + Reporting of cumulative object/byte counts in profile + dumps enabled/disabled. If this option is enabled, every unique + backtrace must be stored for the duration of execution. Depending on + the application, this can impose a large memory overhead, and the + cumulative counts are not always of interest. See the + opt.lg_prof_tcmax + option for control of per thread backtrace caching, which has important + interactions. This option is enabled by default. + + + + + opt.lg_prof_tcmax + (ssize_t) + r- + [] + + Maximum per thread backtrace cache (log base 2) used + for heap profiling. A backtrace can only be discarded if the + opt.prof_accum + option is disabled, and no thread caches currently refer to the + backtrace. Therefore, a backtrace cache limit should be imposed if the + intention is to limit how much memory is used by backtraces. By + default, no limit is imposed (encoded as -1). + + + + + + opt.lg_prof_interval + (ssize_t) + r- + [] + + Average interval (log base 2) between memory profile + dumps, as measured in bytes of allocation activity. The actual + interval between dumps may be sporadic because decentralized allocation + counters are used to avoid synchronization bottlenecks. Profiles are + dumped to files named according to the pattern + <prefix>.<pid>.<seq>.i<iseq>.heap, + where <prefix> is controlled by the + opt.prof_prefix + option. By default, interval-triggered profile dumping is disabled + (encoded as -1). + + + + + + opt.prof_gdump + (bool) + r- + [] + + Trigger a memory profile dump every time the total + virtual memory exceeds the previous maximum. Profiles are dumped to + files named according to the pattern + <prefix>.<pid>.<seq>.u<useq>.heap, + where <prefix> is controlled by the opt.prof_prefix + option. This option is disabled by default. + + + + + opt.prof_leak + (bool) + r- + [] + + Leak reporting enabled/disabled. If enabled, use an + atexit + 3 function to report memory leaks + detected by allocation sampling. See the + opt.lg_prof_bt_max + option for backtrace depth control. See the + opt.prof option for + information on analyzing heap profile output. This option is disabled + by default. + + + + + opt.overcommit + (bool) + r- + [] + + Over-commit enabled/disabled. If enabled, over-commit + memory as a side effect of using anonymous + mmap + 2 or + sbrk + 2 for virtual memory allocation. + In order for overcommit to be disabled, the swap.fds mallctl must have + been successfully written to. This option is enabled by + default. + + + + + tcache.flush + (void) + -- + [] + + Flush calling thread's tcache. This interface releases + all cached objects and internal data structures associated with the + calling thread's thread-specific cache. Ordinarily, this interface + need not be called, since automatic periodic incremental garbage + collection occurs, and the thread cache is automatically discarded when + a thread exits. However, garbage collection is triggered by allocation + activity, so it is possible for a thread that stops + allocating/deallocating to retain its cache indefinitely, in which case + the developer may find manual flushing useful. + + + + + thread.arena + (unsigned) + rw + + Get or set the arena associated with the calling + thread. The arena index must be less than the maximum number of arenas + (see the arenas.narenas + mallctl). If the specified arena was not initialized beforehand (see + the arenas.initialized + mallctl), it will be automatically initialized as a side effect of + calling this interface. + + + + + thread.allocated + (uint64_t) + r- + [] + + Get the total number of bytes ever allocated by the + calling thread. This counter has the potential to wrap around; it is + up to the application to appropriately interpret the counter in such + cases. + + + + + thread.allocatedp + (uint64_t *) + r- + [] + + Get a pointer to the the value that is returned by the + thread.allocated + mallctl. This is useful for avoiding the overhead of repeated + mallctl* calls. + + + + + thread.deallocated + (uint64_t) + r- + [] + + Get the total number of bytes ever deallocated by the + calling thread. This counter has the potential to wrap around; it is + up to the application to appropriately interpret the counter in such + cases. + + + + + thread.deallocatedp + (uint64_t *) + r- + [] + + Get a pointer to the the value that is returned by the + thread.deallocated + mallctl. This is useful for avoiding the overhead of repeated + mallctl* calls. + + + + + arenas.narenas + (unsigned) + r- + + Maximum number of arenas. + + + + + arenas.initialized + (bool *) + r- + + An array of arenas.narenas + booleans. Each boolean indicates whether the corresponding arena is + initialized. + + + + + arenas.quantum + (size_t) + r- + + Quantum size. + + + + + arenas.cacheline + (size_t) + r- + + Assumed cacheline size. + + + + + arenas.subpage + (size_t) + r- + + Subpage size class interval. + + + + + arenas.pagesize + (size_t) + r- + + Page size. + + + + + arenas.chunksize + (size_t) + r- + + Chunk size. + + + + + arenas.tspace_min + (size_t) + r- + + Minimum tiny size class. Tiny size classes are powers + of two. + + + + + arenas.tspace_max + (size_t) + r- + + Maximum tiny size class. Tiny size classes are powers + of two. + + + + + arenas.qspace_min + (size_t) + r- + + Minimum quantum-spaced size class. + + + + + arenas.qspace_max + (size_t) + r- + + Maximum quantum-spaced size class. + + + + + arenas.cspace_min + (size_t) + r- + + Minimum cacheline-spaced size class. + + + + + arenas.cspace_max + (size_t) + r- + + Maximum cacheline-spaced size class. + + + + + arenas.sspace_min + (size_t) + r- + + Minimum subpage-spaced size class. + + + + + arenas.sspace_max + (size_t) + r- + + Maximum subpage-spaced size class. + + + + + arenas.tcache_max + (size_t) + r- + [] + + Maximum thread-cached size class. + + + + + arenas.ntbins + (unsigned) + r- + + Number of tiny bin size classes. + + + + + arenas.nqbins + (unsigned) + r- + + Number of quantum-spaced bin size + classes. + + + + + arenas.ncbins + (unsigned) + r- + + Number of cacheline-spaced bin size + classes. + + + + + arenas.nsbins + (unsigned) + r- + + Number of subpage-spaced bin size + classes. + + + + + arenas.nbins + (unsigned) + r- + + Total number of bin size classes. + + + + + arenas.nhbins + (unsigned) + r- + [] + + Total number of thread cache bin size + classes. + + + + + arenas.bin.<i>.size + (size_t) + r- + + Maximum size supported by size class. + + + + + arenas.bin.<i>.nregs + (uint32_t) + r- + + Number of regions per page run. + + + + + arenas.bin.<i>.run_size + (size_t) + r- + + Number of bytes per page run. + + + + + arenas.nlruns + (size_t) + r- + + Total number of large size classes. + + + + + arenas.lrun.<i>.size + (size_t) + r- + + Maximum size supported by this large size + class. + + + + + arenas.purge + (unsigned) + -w + + Purge unused dirty pages for the specified arena, or + for all arenas if none is specified. + + + + + prof.active + (bool) + rw + [] + + Control whether sampling is currently active. See the + opt.prof_active + option for additional information. + + + + + + prof.dump + (const char *) + -w + [] + + Dump a memory profile to the specified file, or if NULL + is specified, to a file according to the pattern + <prefix>.<pid>.<seq>.m<mseq>.heap, + where <prefix> is controlled by the + opt.prof_prefix + option. + + + + + prof.interval + (uint64_t) + r- + [] + + Average number of bytes allocated between + inverval-based profile dumps. See the + opt.lg_prof_interval + option for additional information. + + + + + stats.cactive + (size_t *) + r- + [] + + Pointer to a counter that contains an approximate count + of the current number of bytes in active pages. The estimate may be + high, but never low, because each arena rounds up to the nearest + multiple of the chunk size when computing its contribution to the + counter. Note that the epoch mallctl has no bearing + on this counter. Furthermore, counter consistency is maintained via + atomic operations, so it is necessary to use an atomic operation in + order to guarantee a consistent read when dereferencing the pointer. + + + + + + stats.allocated + (size_t) + r- + [] + + Total number of bytes allocated by the + application. + + + + + stats.active + (size_t) + r- + [] + + Total number of bytes in active pages allocated by the + application. This is a multiple of the page size, and greater than or + equal to stats.allocated. + + + + + + stats.mapped + (size_t) + r- + [] + + Total number of bytes in chunks mapped on behalf of the + application. This is a multiple of the chunk size, and is at least as + large as stats.active. This + does not include inactive chunks backed by swap files. his does not + include inactive chunks embedded in the DSS. + + + + + stats.chunks.current + (size_t) + r- + [] + + Total number of chunks actively mapped on behalf of the + application. This does not include inactive chunks backed by swap + files. This does not include inactive chunks embedded in the DSS. + + + + + + stats.chunks.total + (uint64_t) + r- + [] + + Cumulative number of chunks allocated. + + + + + stats.chunks.high + (size_t) + r- + [] + + Maximum number of active chunks at any time thus far. + + + + + + stats.huge.allocated + (size_t) + r- + [] + + Number of bytes currently allocated by huge objects. + + + + + + stats.huge.nmalloc + (uint64_t) + r- + [] + + Cumulative number of huge allocation requests. + + + + + + stats.huge.ndalloc + (uint64_t) + r- + [] + + Cumulative number of huge deallocation requests. + + + + + + stats.arenas.<i>.nthreads + (unsigned) + r- + + Number of threads currently assigned to + arena. + + + + + stats.arenas.<i>.pactive + (size_t) + r- + + Number of pages in active runs. + + + + + stats.arenas.<i>.pdirty + (size_t) + r- + + Number of pages within unused runs that are potentially + dirty, and for which madvise... + MADV_DONTNEED or + similar has not been called. + + + + + stats.arenas.<i>.mapped + (size_t) + r- + [] + + Number of mapped bytes. + + + + + stats.arenas.<i>.npurge + (uint64_t) + r- + [] + + Number of dirty page purge sweeps performed. + + + + + + stats.arenas.<i>.nmadvise + (uint64_t) + r- + [] + + Number of madvise... + MADV_DONTNEED or + similar calls made to purge dirty pages. + + + + + stats.arenas.<i>.npurged + (uint64_t) + r- + [] + + Number of pages purged. + + + + + stats.arenas.<i>.small.allocated + (size_t) + r- + [] + + Number of bytes currently allocated by small objects. + + + + + + stats.arenas.<i>.small.nmalloc + (uint64_t) + r- + [] + + Cumulative number of allocation requests served by + small bins. + + + + + stats.arenas.<i>.small.ndalloc + (uint64_t) + r- + [] + + Cumulative number of small objects returned to bins. + + + + + + stats.arenas.<i>.small.nrequests + (uint64_t) + r- + [] + + Cumulative number of small allocation requests. + + + + + + stats.arenas.<i>.large.allocated + (size_t) + r- + [] + + Number of bytes currently allocated by large objects. + + + + + + stats.arenas.<i>.large.nmalloc + (uint64_t) + r- + [] + + Cumulative number of large allocation requests served + directly by the arena. + + + + + stats.arenas.<i>.large.ndalloc + (uint64_t) + r- + [] + + Cumulative number of large deallocation requests served + directly by the arena. + + + + + stats.arenas.<i>.large.nrequests + (uint64_t) + r- + [] + + Cumulative number of large allocation requests. + + + + + + stats.arenas.<i>.bins.<j>.allocated + (size_t) + r- + [] + + Current number of bytes allocated by + bin. + + + + + stats.arenas.<i>.bins.<j>.nmalloc + (uint64_t) + r- + [] + + Cumulative number of allocations served by bin. + + + + + + stats.arenas.<i>.bins.<j>.ndalloc + (uint64_t) + r- + [] + + Cumulative number of allocations returned to bin. + + + + + + stats.arenas.<i>.bins.<j>.nrequests + (uint64_t) + r- + [] + + Cumulative number of allocation + requests. + + + + + stats.arenas.<i>.bins.<j>.nfills + (uint64_t) + r- + [ ] + + Cumulative number of tcache fills. + + + + + stats.arenas.<i>.bins.<j>.nflushes + (uint64_t) + r- + [ ] + + Cumulative number of tcache flushes. + + + + + stats.arenas.<i>.bins.<j>.nruns + (uint64_t) + r- + [] + + Cumulative number of runs created. + + + + + stats.arenas.<i>.bins.<j>.nreruns + (uint64_t) + r- + [] + + Cumulative number of times the current run from which + to allocate changed. + + + + + stats.arenas.<i>.bins.<j>.highruns + (size_t) + r- + [] + + Maximum number of runs at any time thus far. + + + + + + stats.arenas.<i>.bins.<j>.curruns + (size_t) + r- + [] + + Current number of runs. + + + + + stats.arenas.<i>.lruns.<j>.nmalloc + (uint64_t) + r- + [] + + Cumulative number of allocation requests for this size + class served directly by the arena. + + + + + stats.arenas.<i>.lruns.<j>.ndalloc + (uint64_t) + r- + [] + + Cumulative number of deallocation requests for this + size class served directly by the arena. + + + + + stats.arenas.<i>.lruns.<j>.nrequests + (uint64_t) + r- + [] + + Cumulative number of allocation requests for this size + class. + + + + + stats.arenas.<i>.lruns.<j>.highruns + (size_t) + r- + [] + + Maximum number of runs at any time thus far for this + size class. + + + + + stats.arenas.<i>.lruns.<j>.curruns + (size_t) + r- + [] + + Current number of runs for this size class. + + + + + + swap.avail + (size_t) + r- + [] + + Number of swap file bytes that are currently not + associated with any chunk (i.e. mapped, but otherwise completely + unmanaged). + + + + + swap.prezeroed + (bool) + rw + [] + + If true, the allocator assumes that the swap file(s) + contain nothing but nil bytes. If this assumption is violated, + allocator behavior is undefined. This value becomes read-only after + swap.fds is + successfully written to. + + + + + swap.nfds + (size_t) + r- + [] + + Number of file descriptors in use for swap. + + + + + + swap.fds + (int *) + r- + [] + + When written to, the files associated with the + specified file descriptors are contiguously mapped via + mmap + 2. The resulting virtual memory + region is preferred over anonymous + mmap + 2 and + sbrk + 2 memory. Note that if a file's + size is not a multiple of the page size, it is automatically truncated + to the nearest page size multiple. See the + swap.prezeroed + mallctl for specifying that the files are pre-zeroed. + + + + + DEBUGGING MALLOC PROBLEMS + When debugging, it is a good idea to configure/build jemalloc with + the and + options, and recompile the program with suitable options and symbols for + debugger support. When so configured, jemalloc incorporates a wide variety + of run-time assertions that catch application errors such as double-free, + write-after-free, etc. + + Programs often accidentally depend on “uninitialized” + memory actually being filled with zero bytes. Junk filling + (see the opt.junk + option) tends to expose such bugs in the form of obviously incorrect + results and/or coredumps. Conversely, zero + filling (see the opt.zero option) eliminates + the symptoms of such bugs. Between these two options, it is usually + possible to quickly detect, diagnose, and eliminate such bugs. + + This implementation does not provide much detail about the problems + it detects, because the performance impact for storing such information + would be prohibitive. There are a number of allocator implementations + available on the Internet which focus on detecting and pinpointing problems + by trading performance for extra sanity checks and detailed + diagnostics. + + + DIAGNOSTIC MESSAGES + If any of the memory allocation/deallocation functions detect an + error or warning condition, a message will be printed to file descriptor + STDERR_FILENO. Errors will result in the process + dumping core. If the opt.abort option is set, most + warnings are treated as errors. + + The malloc_message variable allows the programmer + to override the function which emits the text strings forming the errors + and warnings if for some reason the STDERR_FILENO file + descriptor is not suitable for this. + malloc_message takes the + cbopaque pointer argument that is + NULL unless overridden by the arguments in a call to + malloc_stats_print, followed by a string + pointer. Please note that doing anything which tries to allocate memory in + this function is likely to result in a crash or deadlock. + + All messages are prefixed by + “<jemalloc>: ”. + + + RETURN VALUES + + Standard API + The malloc and + calloc functions return a pointer to the + allocated memory if successful; otherwise a NULL + pointer is returned and errno is set to + ENOMEM. + + The posix_memalign function + returns the value 0 if successful; otherwise it returns an error value. + The posix_memalign function will fail + if: + + + EINVAL + + The alignment parameter is + not a power of 2 at least as large as + sizeof(void *). + + + + ENOMEM + + Memory allocation error. + + + + + The realloc function returns a + pointer, possibly identical to ptr, to the + allocated memory if successful; otherwise a NULL + pointer is returned, and errno is set to + ENOMEM if the error was the result of an + allocation failure. The realloc + function always leaves the original buffer intact when an error occurs. + + + The free function returns no + value. + + + Non-standard API + The malloc_usable_size function + returns the usable size of the allocation pointed to by + ptr. + + The mallctl, + mallctlnametomib, and + mallctlbymib functions return 0 on + success; otherwise they return an error value. The functions will fail + if: + + + EINVAL + + newp is not + NULL, and newlen is too + large or too small. Alternatively, *oldlenp + is too large or too small; in this case as much data as possible + are read despite the error. + + + ENOMEM + + *oldlenp is too short to + hold the requested value. + + + ENOENT + + name or + mib specifies an unknown/invalid + value. + + + EPERM + + Attempt to read or write void value, or attempt to + write read-only value. + + + EAGAIN + + A memory allocation failure + occurred. + + + EFAULT + + An interface with side effects failed in some way + not directly related to mallctl* + read/write processing. + + + + + + Experimental API + The allocm, + rallocm, + sallocm, and + dallocm functions return + ALLOCM_SUCCESS on success; otherwise they return an + error value. The allocm and + rallocm functions will fail if: + + + ALLOCM_ERR_OOM + + Out of memory. Insufficient contiguous memory was + available to service the allocation request. The + allocm function additionally sets + *ptr to NULL, whereas + the rallocm function leaves + *ptr unmodified. + + + The rallocm function will also + fail if: + + + ALLOCM_ERR_NOT_MOVED + + ALLOCM_NO_MOVE was specified, + but the reallocation request could not be serviced without moving + the object. + + + + + + + ENVIRONMENT + The following environment variable affects the execution of the + allocation functions: + + + MALLOC_CONF + + If the environment variable + MALLOC_CONF is set, the characters it contains + will be interpreted as options. + + + + + + EXAMPLES + To dump core whenever a problem occurs: + ln -s 'abort:true' /etc/malloc.conf + + To specify in the source a chunk size that is 16 MiB: + + + + SEE ALSO + madvise + 2, + mmap + 2, + sbrk + 2, + alloca + 3, + atexit + 3, + getpagesize + 3 + + + STANDARDS + The malloc, + calloc, + realloc, and + free functions conform to ISO/IEC + 9899:1990 (“ISO C90”). + + The posix_memalign function conforms + to IEEE Std 1003.1-2001 (“POSIX.1”). + + diff --git a/doc/manpages.xsl.in b/doc/manpages.xsl.in new file mode 100644 index 0000000..88b2626 --- /dev/null +++ b/doc/manpages.xsl.in @@ -0,0 +1,4 @@ + + + + diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl new file mode 100644 index 0000000..4e334a8 --- /dev/null +++ b/doc/stylesheet.xsl @@ -0,0 +1,7 @@ + + ansi + + + "" + + diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h new file mode 100644 index 0000000..b80c118 --- /dev/null +++ b/include/jemalloc/internal/arena.h @@ -0,0 +1,743 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Subpages are an artificially designated partitioning of pages. Their only + * purpose is to support subpage-spaced size classes. + * + * There must be at least 4 subpages per page, due to the way size classes are + * handled. + */ +#define LG_SUBPAGE 8 +#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) +#define SUBPAGE_MASK (SUBPAGE - 1) + +/* Return the smallest subpage multiple that is >= s. */ +#define SUBPAGE_CEILING(s) \ + (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) + +#ifdef JEMALLOC_TINY + /* Smallest size class to support. */ +# define LG_TINY_MIN LG_SIZEOF_PTR +# define TINY_MIN (1U << LG_TINY_MIN) +#endif + +/* + * Maximum size class that is a multiple of the quantum, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define LG_QSPACE_MAX_DEFAULT 7 + +/* + * Maximum size class that is a multiple of the cacheline, but not (necessarily) + * a power of 2. Above this size, allocations are rounded up to the nearest + * power of 2. + */ +#define LG_CSPACE_MAX_DEFAULT 9 + +/* + * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized + * as small as possible such that this setting is still honored, without + * violating other constraints. The goal is to make runs as small as possible + * without exceeding a per run external fragmentation threshold. + * + * We use binary fixed point math for overhead computations, where the binary + * point is implicitly RUN_BFP bits to the left. + * + * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be + * honored for some/all object sizes, since when heap profiling is enabled + * there is one pointer of header overhead per object (plus a constant). This + * constraint is relaxed (ignored) for runs that are so small that the + * per-region overhead is greater than: + * + * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) + */ +#define RUN_BFP 12 +/* \/ Implicit binary fixed point. */ +#define RUN_MAX_OVRHD 0x0000003dU +#define RUN_MAX_OVRHD_RELAX 0x00001800U + +/* Maximum number of regions in one run. */ +#define LG_RUN_MAXREGS 11 +#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) + +/* + * The minimum ratio of active:dirty pages per arena is computed as: + * + * (nactive >> opt_lg_dirty_mult) >= ndirty + * + * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 + * times as many active pages as dirty pages. + */ +#define LG_DIRTY_MULT_DEFAULT 5 + +typedef struct arena_chunk_map_s arena_chunk_map_t; +typedef struct arena_chunk_s arena_chunk_t; +typedef struct arena_run_s arena_run_t; +typedef struct arena_bin_info_s arena_bin_info_t; +typedef struct arena_bin_s arena_bin_t; +typedef struct arena_s arena_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Each element of the chunk map corresponds to one page within the chunk. */ +struct arena_chunk_map_s { + union { + /* + * Linkage for run trees. There are two disjoint uses: + * + * 1) arena_t's runs_avail_{clean,dirty} trees. + * 2) arena_run_t conceptually uses this linkage for in-use + * non-full runs, rather than directly embedding linkage. + */ + rb_node(arena_chunk_map_t) rb_link; + /* + * List of runs currently in purgatory. arena_chunk_purge() + * temporarily allocates runs that contain dirty pages while + * purging, so that other threads cannot use the runs while the + * purging thread is operating without the arena lock held. + */ + ql_elm(arena_chunk_map_t) ql_link; + } u; + +#ifdef JEMALLOC_PROF + /* Profile counters, used for large object runs. */ + prof_ctx_t *prof_ctx; +#endif + + /* + * Run address (or size) and various flags are stored together. The bit + * layout looks like (assuming 32-bit system): + * + * ???????? ???????? ????---- ----dula + * + * ? : Unallocated: Run address for first/last pages, unset for internal + * pages. + * Small: Run page offset. + * Large: Run size for first page, unset for trailing pages. + * - : Unused. + * d : dirty? + * u : unzeroed? + * l : large? + * a : allocated? + * + * Following are example bit patterns for the three types of runs. + * + * p : run page offset + * s : run size + * c : (binind+1) for size class (used only if prof_promote is true) + * x : don't care + * - : 0 + * + : 1 + * [DULA] : bit set + * [dula] : bit unset + * + * Unallocated (clean): + * ssssssss ssssssss ssss---- ----du-a + * xxxxxxxx xxxxxxxx xxxx---- -----Uxx + * ssssssss ssssssss ssss---- ----dU-a + * + * Unallocated (dirty): + * ssssssss ssssssss ssss---- ----D--a + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * ssssssss ssssssss ssss---- ----D--a + * + * Small: + * pppppppp pppppppp pppp---- ----d--A + * pppppppp pppppppp pppp---- -------A + * pppppppp pppppppp pppp---- ----d--A + * + * Large: + * ssssssss ssssssss ssss---- ----D-LA + * xxxxxxxx xxxxxxxx xxxx---- ----xxxx + * -------- -------- -------- ----D-LA + * + * Large (sampled, size <= PAGE_SIZE): + * ssssssss ssssssss sssscccc ccccD-LA + * + * Large (not sampled, size == PAGE_SIZE): + * ssssssss ssssssss ssss---- ----D-LA + */ + size_t bits; +#ifdef JEMALLOC_PROF +#define CHUNK_MAP_CLASS_SHIFT 4 +#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) +#endif +#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) +#define CHUNK_MAP_DIRTY ((size_t)0x8U) +#define CHUNK_MAP_UNZEROED ((size_t)0x4U) +#define CHUNK_MAP_LARGE ((size_t)0x2U) +#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) +#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED +}; +typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; +typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; + +/* Arena chunk header. */ +struct arena_chunk_s { + /* Arena that owns the chunk. */ + arena_t *arena; + + /* Linkage for the arena's chunks_dirty list. */ + ql_elm(arena_chunk_t) link_dirty; + + /* + * True if the chunk is currently in the chunks_dirty list, due to + * having at some point contained one or more dirty pages. Removal + * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. + */ + bool dirtied; + + /* Number of dirty pages. */ + size_t ndirty; + + /* + * Map of pages within chunk that keeps track of free/large/small. The + * first map_bias entries are omitted, since the chunk header does not + * need to be tracked in the map. This omission saves a header page + * for common chunk sizes (e.g. 4 MiB). + */ + arena_chunk_map_t map[1]; /* Dynamically sized. */ +}; +typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; + +struct arena_run_s { +#ifdef JEMALLOC_DEBUG + uint32_t magic; +# define ARENA_RUN_MAGIC 0x384adf93 +#endif + + /* Bin this run is associated with. */ + arena_bin_t *bin; + + /* Index of next region that has never been allocated, or nregs. */ + uint32_t nextind; + + /* Number of free regions in run. */ + unsigned nfree; +}; + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + */ +struct arena_bin_info_s { + /* Size of regions in a run for this bin's size class. */ + size_t reg_size; + + /* Total size of a run for this bin's size class. */ + size_t run_size; + + /* Total number of regions in a run for this bin's size class. */ + uint32_t nregs; + + /* + * Offset of first bitmap_t element in a run header for this bin's size + * class. + */ + uint32_t bitmap_offset; + + /* + * Metadata used to manipulate bitmaps for runs associated with this + * bin. + */ + bitmap_info_t bitmap_info; + +#ifdef JEMALLOC_PROF + /* + * Offset of first (prof_ctx_t *) in a run header for this bin's size + * class, or 0 if (opt_prof == false). + */ + uint32_t ctx0_offset; +#endif + + /* Offset of first region in a run for this bin's size class. */ + uint32_t reg0_offset; +}; + +struct arena_bin_s { + /* + * All operations on runcur, runs, and stats require that lock be + * locked. Run allocation/deallocation are protected by the arena lock, + * which may be acquired while holding one or more bin locks, but not + * vise versa. + */ + malloc_mutex_t lock; + + /* + * Current run being used to service allocations of this bin's size + * class. + */ + arena_run_t *runcur; + + /* + * Tree of non-full runs. This tree is used when looking for an + * existing run when runcur is no longer usable. We choose the + * non-full run that is lowest in memory; this policy tends to keep + * objects packed well, and it can also help reduce the number of + * almost-empty chunks. + */ + arena_run_tree_t runs; + +#ifdef JEMALLOC_STATS + /* Bin statistics. */ + malloc_bin_stats_t stats; +#endif +}; + +struct arena_s { +#ifdef JEMALLOC_DEBUG + uint32_t magic; +# define ARENA_MAGIC 0x947d3d24 +#endif + + /* This arena's index within the arenas array. */ + unsigned ind; + + /* + * Number of threads currently assigned to this arena. This field is + * protected by arenas_lock. + */ + unsigned nthreads; + + /* + * There are three classes of arena operations from a locking + * perspective: + * 1) Thread asssignment (modifies nthreads) is protected by + * arenas_lock. + * 2) Bin-related operations are protected by bin locks. + * 3) Chunk- and run-related operations are protected by this mutex. + */ + malloc_mutex_t lock; + +#ifdef JEMALLOC_STATS + arena_stats_t stats; +# ifdef JEMALLOC_TCACHE + /* + * List of tcaches for extant threads associated with this arena. + * Stats from these are merged incrementally, and at exit. + */ + ql_head(tcache_t) tcache_ql; +# endif +#endif + +#ifdef JEMALLOC_PROF + uint64_t prof_accumbytes; +#endif + + /* List of dirty-page-containing chunks this arena manages. */ + ql_head(arena_chunk_t) chunks_dirty; + + /* + * In order to avoid rapid chunk allocation/deallocation when an arena + * oscillates right on the cusp of needing a new chunk, cache the most + * recently freed chunk. The spare is left in the arena's chunk trees + * until it is deleted. + * + * There is one spare chunk per arena, rather than one spare total, in + * order to avoid interactions between multiple threads that could make + * a single spare inadequate. + */ + arena_chunk_t *spare; + + /* Number of pages in active runs. */ + size_t nactive; + + /* + * Current count of pages within unused runs that are potentially + * dirty, and for which madvise(... MADV_DONTNEED) has not been called. + * By tracking this, we can institute a limit on how much dirty unused + * memory is mapped for each arena. + */ + size_t ndirty; + + /* + * Approximate number of pages being purged. It is possible for + * multiple threads to purge dirty pages concurrently, and they use + * npurgatory to indicate the total number of pages all threads are + * attempting to purge. + */ + size_t npurgatory; + + /* + * Size/address-ordered trees of this arena's available runs. The trees + * are used for first-best-fit run allocation. The dirty tree contains + * runs with dirty pages (i.e. very likely to have been touched and + * therefore have associated physical pages), whereas the clean tree + * contains runs with pages that either have no associated physical + * pages, or have pages that the kernel may recycle at any time due to + * previous madvise(2) calls. The dirty tree is used in preference to + * the clean tree for allocations, because using dirty pages reduces + * the amount of dirty purging necessary to keep the active:dirty page + * ratio below the purge threshold. + */ + arena_avail_tree_t runs_avail_clean; + arena_avail_tree_t runs_avail_dirty; + + /* + * bins is used to store trees of free regions of the following sizes, + * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and + * default MALLOC_CONF. + * + * bins[i] | size | + * --------+--------+ + * 0 | 8 | + * --------+--------+ + * 1 | 16 | + * 2 | 32 | + * 3 | 48 | + * : : + * 6 | 96 | + * 7 | 112 | + * 8 | 128 | + * --------+--------+ + * 9 | 192 | + * 10 | 256 | + * 11 | 320 | + * 12 | 384 | + * 13 | 448 | + * 14 | 512 | + * --------+--------+ + * 15 | 768 | + * 16 | 1024 | + * 17 | 1280 | + * : : + * 25 | 3328 | + * 26 | 3584 | + * 27 | 3840 | + * --------+--------+ + */ + arena_bin_t bins[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_qspace_max; +extern size_t opt_lg_cspace_max; +extern ssize_t opt_lg_dirty_mult; +/* + * small_size2bin is a compact lookup table that rounds request sizes up to + * size classes. In order to reduce cache footprint, the table is compressed, + * and all accesses are via the SMALL_SIZE2BIN macro. + */ +extern uint8_t const *small_size2bin; +#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) + +extern arena_bin_info_t *arena_bin_info; + +/* Various bin-related settings. */ +#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ +# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) +#else +# define ntbins 0 +#endif +extern unsigned nqbins; /* Number of quantum-spaced bins. */ +extern unsigned ncbins; /* Number of cacheline-spaced bins. */ +extern unsigned nsbins; /* Number of subpage-spaced bins. */ +extern unsigned nbins; +#ifdef JEMALLOC_TINY +# define tspace_max ((size_t)(QUANTUM >> 1)) +#endif +#define qspace_min QUANTUM +extern size_t qspace_max; +extern size_t cspace_min; +extern size_t cspace_max; +extern size_t sspace_min; +extern size_t sspace_max; +#define small_maxclass sspace_max + +#define nlclasses (chunk_npages - map_bias) + +void arena_purge_all(arena_t *arena); +#ifdef JEMALLOC_PROF +void arena_prof_accum(arena_t *arena, uint64_t accumbytes); +#endif +#ifdef JEMALLOC_TCACHE +void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, + size_t binind +# ifdef JEMALLOC_PROF + , uint64_t prof_accumbytes +# endif + ); +#endif +void *arena_malloc_small(arena_t *arena, size_t size, bool zero); +void *arena_malloc_large(arena_t *arena, size_t size, bool zero); +void *arena_malloc(size_t size, bool zero); +void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, + size_t alignment, bool zero); +size_t arena_salloc(const void *ptr); +#ifdef JEMALLOC_PROF +void arena_prof_promoted(const void *ptr, size_t size); +size_t arena_salloc_demote(const void *ptr); +#endif +void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm); +void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#ifdef JEMALLOC_STATS +void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats); +#endif +void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); +bool arena_new(arena_t *arena, unsigned ind); +bool arena_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); +unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, + const void *ptr); +# ifdef JEMALLOC_PROF +prof_ctx_t *arena_prof_ctx_get(const void *ptr); +void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +# endif +void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) +JEMALLOC_INLINE size_t +arena_bin_index(arena_t *arena, arena_bin_t *bin) +{ + size_t binind = bin - arena->bins; + assert(binind < nbins); + return (binind); +} + +JEMALLOC_INLINE unsigned +arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) +{ + unsigned shift, diff, regind; + size_t size; + + dassert(run->magic == ARENA_RUN_MAGIC); + /* + * Freeing a pointer lower than region zero can cause assertion + * failure. + */ + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); + + /* + * Avoid doing division with a variable divisor if possible. Using + * actual division here can reduce allocator throughput by over 20%! + */ + diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - + bin_info->reg0_offset); + + /* Rescale (factor powers of 2 out of the numerator and denominator). */ + size = bin_info->reg_size; + shift = ffs(size) - 1; + diff >>= shift; + size >>= shift; + + if (size == 1) { + /* The divisor was a power of 2. */ + regind = diff; + } else { + /* + * To divide by a number D that is not a power of two we + * multiply by (2^21 / D) and then right shift by 21 positions. + * + * X / D + * + * becomes + * + * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT + * + * We can omit the first three elements, because we never + * divide by 0, and 1 and 2 are both powers of two, which are + * handled above. + */ +#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) +#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) + static const unsigned size_invs[] = { + SIZE_INV(3), + SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), + SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), + SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), + SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), + SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), + SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), + SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) + }; + + if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) + regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; + else + regind = diff / size; +#undef SIZE_INV +#undef SIZE_INV_SHIFT + } + assert(diff == regind * size); + assert(regind < bin_info->nregs); + + return (regind); +} + +#ifdef JEMALLOC_PROF +JEMALLOC_INLINE prof_ctx_t * +arena_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote) + ret = (prof_ctx_t *)(uintptr_t)1U; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind; + + dassert(run->magic == ARENA_RUN_MAGIC); + regind = arena_run_regind(run, bin_info, ptr); + ret = *(prof_ctx_t **)((uintptr_t)run + + bin_info->ctx0_offset + (regind * + sizeof(prof_ctx_t *))); + } + } else + ret = chunk->map[pageind-map_bias].prof_ctx; + + return (ret); +} + +JEMALLOC_INLINE void +arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + if (prof_promote == false) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + size_t binind; + arena_bin_info_t *bin_info; + unsigned regind; + + dassert(run->magic == ARENA_RUN_MAGIC); + binind = arena_bin_index(chunk->arena, bin); + bin_info = &arena_bin_info[binind]; + regind = arena_run_regind(run, bin_info, ptr); + + *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset + + (regind * sizeof(prof_ctx_t *)))) = ctx; + } else + assert((uintptr_t)ctx == (uintptr_t)1U); + } else + chunk->map[pageind-map_bias].prof_ctx = ctx; +} +#endif + +JEMALLOC_INLINE void +arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + size_t pageind; + arena_chunk_map_t *mapelm; + + assert(arena != NULL); + dassert(arena->magic == ARENA_MAGIC); + assert(chunk->arena == arena); + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapelm = &chunk->map[pageind-map_bias]; + assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { + /* Small allocation. */ +#ifdef JEMALLOC_TCACHE + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_small(tcache, ptr); + else { +#endif + arena_run_t *run; + arena_bin_t *bin; + + run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> + PAGE_SHIFT)) << PAGE_SHIFT)); + dassert(run->magic == ARENA_RUN_MAGIC); + bin = run->bin; +#ifdef JEMALLOC_DEBUG + { + size_t binind = arena_bin_index(arena, bin); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % + bin_info->reg_size == 0); + } +#endif + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin(arena, chunk, ptr, mapelm); + malloc_mutex_unlock(&bin->lock); +#ifdef JEMALLOC_TCACHE + } +#endif + } else { +#ifdef JEMALLOC_TCACHE + size_t size = mapelm->bits & ~PAGE_MASK; + + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + if (size <= tcache_maxclass) { + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + tcache_dalloc_large(tcache, ptr, size); + else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } + } else { + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); + } +#else + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, ptr); + malloc_mutex_unlock(&arena->lock); +#endif + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h new file mode 100644 index 0000000..9a29862 --- /dev/null +++ b/include/jemalloc/internal/atomic.h @@ -0,0 +1,169 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#define atomic_read_uint64(p) atomic_add_uint64(p, 0) +#define atomic_read_uint32(p) atomic_add_uint32(p, 0) + +#if (LG_SIZEOF_PTR == 3) +# define atomic_read_z(p) \ + (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0) +# define atomic_add_z(p, x) \ + (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x) +# define atomic_sub_z(p, x) \ + (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x) +#elif (LG_SIZEOF_PTR == 2) +# define atomic_read_z(p) \ + (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0) +# define atomic_add_z(p, x) \ + (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x) +# define atomic_sub_z(p, x) \ + (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x) +#endif + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); +uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); +uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); +uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) +/******************************************************************************/ +/* 64-bit operations. */ +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); +} +#elif (defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE uint64_t +atomic_add_uint64(uint64_t *p, uint64_t x) +{ + + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint64_t +atomic_sub_uint64(uint64_t *p, uint64_t x) +{ + + x = (uint64_t)(-(int64_t)x); + asm volatile ( + "lock; xaddq %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +#else +# if (LG_SIZEOF_PTR == 3) +# error "Missing implementation for 64-bit atomic operations" +# endif +#endif + +/******************************************************************************/ +/* 32-bit operations. */ +#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_add_and_fetch(p, x)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (__sync_sub_and_fetch(p, x)); +} +#elif (defined(JEMALLOC_OSATOMIC)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); +} +#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE uint32_t +atomic_add_uint32(uint32_t *p, uint32_t x) +{ + + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} + +JEMALLOC_INLINE uint32_t +atomic_sub_uint32(uint32_t *p, uint32_t x) +{ + + x = (uint32_t)(-(int32_t)x); + asm volatile ( + "lock; xaddl %0, %1;" + : "+r" (x), "=m" (*p) /* Outputs. */ + : "m" (*p) /* Inputs. */ + ); + + return (x); +} +#else +# error "Missing implementation for 32-bit atomic operations" +#endif +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h new file mode 100644 index 0000000..e353f30 --- /dev/null +++ b/include/jemalloc/internal/base.h @@ -0,0 +1,24 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern malloc_mutex_t base_mtx; + +void *base_alloc(size_t size); +extent_node_t *base_node_alloc(void); +void base_node_dealloc(extent_node_t *node); +bool base_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h new file mode 100644 index 0000000..605ebac --- /dev/null +++ b/include/jemalloc/internal/bitmap.h @@ -0,0 +1,184 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ +#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS + +typedef struct bitmap_level_s bitmap_level_t; +typedef struct bitmap_info_s bitmap_info_t; +typedef unsigned long bitmap_t; +#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG + +/* Number of bits per group. */ +#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) +#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) +#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) + +/* Maximum number of levels possible. */ +#define BITMAP_MAX_LEVELS \ + (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ + + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct bitmap_level_s { + /* Offset of this level's groups within the array of groups. */ + size_t group_offset; +}; + +struct bitmap_info_s { + /* Logical number of bits in bitmap (stored at bottom level). */ + size_t nbits; + + /* Number of levels necessary for nbits. */ + unsigned nlevels; + + /* + * Only the first (nlevels+1) elements are used, and levels are ordered + * bottom to top (e.g. the bottom level is stored in levels[0]). + */ + bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); +size_t bitmap_info_ngroups(const bitmap_info_t *binfo); +size_t bitmap_size(size_t nbits); +void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); +bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); +void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) +JEMALLOC_INLINE bool +bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; + bitmap_t rg = bitmap[rgoff]; + /* The bitmap is full iff the root group is 0. */ + return (rg == 0); +} + +JEMALLOC_INLINE bool +bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t g; + + assert(bit < binfo->nbits); + goff = bit >> LG_BITMAP_GROUP_NBITS; + g = bitmap[goff]; + return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); +} + +JEMALLOC_INLINE void +bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit) == false); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit)); + /* Propagate group state transitions up the tree. */ + if (g == 0) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (g != 0) + break; + } + } +} + +/* sfu: set first unset. */ +JEMALLOC_INLINE size_t +bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t bit; + bitmap_t g; + unsigned i; + + assert(bitmap_full(bitmap, binfo) == false); + + i = binfo->nlevels - 1; + g = bitmap[binfo->levels[i].group_offset]; + bit = ffsl(g) - 1; + while (i > 0) { + i--; + g = bitmap[binfo->levels[i].group_offset + bit]; + bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); + } + + bitmap_set(bitmap, binfo, bit); + return (bit); +} + +JEMALLOC_INLINE void +bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) +{ + size_t goff; + bitmap_t *gp; + bitmap_t g; + bool propagate; + + assert(bit < binfo->nbits); + assert(bitmap_get(bitmap, binfo, bit)); + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + assert(bitmap_get(bitmap, binfo, bit) == false); + /* Propagate group state transitions up the tree. */ + if (propagate) { + unsigned i; + for (i = 1; i < binfo->nlevels; i++) { + bit = goff; + goff = bit >> LG_BITMAP_GROUP_NBITS; + gp = &bitmap[binfo->levels[i].group_offset + goff]; + g = *gp; + propagate = (g == 0); + assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) + == 0); + g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); + *gp = g; + if (propagate == false) + break; + } + } +} + +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h new file mode 100644 index 0000000..a60f0ad --- /dev/null +++ b/include/jemalloc/internal/chunk.h @@ -0,0 +1,65 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Size and alignment of memory chunks that are allocated by the OS's virtual + * memory system. + */ +#define LG_CHUNK_DEFAULT 22 + +/* Return the chunk address for allocation address a. */ +#define CHUNK_ADDR2BASE(a) \ + ((void *)((uintptr_t)(a) & ~chunksize_mask)) + +/* Return the chunk offset of address a. */ +#define CHUNK_ADDR2OFFSET(a) \ + ((size_t)((uintptr_t)(a) & chunksize_mask)) + +/* Return the smallest chunk multiple that is >= s. */ +#define CHUNK_CEILING(s) \ + (((s) + chunksize_mask) & ~chunksize_mask) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern size_t opt_lg_chunk; +#ifdef JEMALLOC_SWAP +extern bool opt_overcommit; +#endif + +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) +/* Protects stats_chunks; currently not used for any other purpose. */ +extern malloc_mutex_t chunks_mtx; +/* Chunk statistics. */ +extern chunk_stats_t stats_chunks; +#endif + +#ifdef JEMALLOC_IVSALLOC +extern rtree_t *chunks_rtree; +#endif + +extern size_t chunksize; +extern size_t chunksize_mask; /* (chunksize - 1). */ +extern size_t chunk_npages; +extern size_t map_bias; /* Number of arena chunk header pages. */ +extern size_t arena_maxclass; /* Max size class for arenas. */ + +void *chunk_alloc(size_t size, bool base, bool *zero); +void chunk_dealloc(void *chunk, size_t size); +bool chunk_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + +#include "jemalloc/internal/chunk_swap.h" +#include "jemalloc/internal/chunk_dss.h" +#include "jemalloc/internal/chunk_mmap.h" diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h new file mode 100644 index 0000000..6f00522 --- /dev/null +++ b/include/jemalloc/internal/chunk_dss.h @@ -0,0 +1,30 @@ +#ifdef JEMALLOC_DSS +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +/* + * Protects sbrk() calls. This avoids malloc races among threads, though it + * does not protect against races with threads that call sbrk() directly. + */ +extern malloc_mutex_t dss_mtx; + +void *chunk_alloc_dss(size_t size, bool *zero); +bool chunk_in_dss(void *chunk); +bool chunk_dealloc_dss(void *chunk, size_t size); +bool chunk_dss_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_DSS */ diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h new file mode 100644 index 0000000..07b50a4 --- /dev/null +++ b/include/jemalloc/internal/chunk_mmap.h @@ -0,0 +1,23 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +void *chunk_alloc_mmap(size_t size); +void *chunk_alloc_mmap_noreserve(size_t size); +void chunk_dealloc_mmap(void *chunk, size_t size); + +bool chunk_mmap_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/chunk_swap.h b/include/jemalloc/internal/chunk_swap.h new file mode 100644 index 0000000..9faa739 --- /dev/null +++ b/include/jemalloc/internal/chunk_swap.h @@ -0,0 +1,34 @@ +#ifdef JEMALLOC_SWAP +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern malloc_mutex_t swap_mtx; +extern bool swap_enabled; +extern bool swap_prezeroed; +extern size_t swap_nfds; +extern int *swap_fds; +#ifdef JEMALLOC_STATS +extern size_t swap_avail; +#endif + +void *chunk_alloc_swap(size_t size, bool *zero); +bool chunk_in_swap(void *chunk); +bool chunk_dealloc_swap(void *chunk, size_t size); +bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); +bool chunk_swap_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_SWAP */ diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h new file mode 100644 index 0000000..3e4ad4c --- /dev/null +++ b/include/jemalloc/internal/ckh.h @@ -0,0 +1,95 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ckh_s ckh_t; +typedef struct ckhc_s ckhc_t; + +/* Typedefs to allow easy function pointer passing. */ +typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *); +typedef bool ckh_keycomp_t (const void *, const void *); + +/* Maintain counters used to get an idea of performance. */ +/* #define CKH_COUNT */ +/* Print counter values in ckh_delete() (requires CKH_COUNT). */ +/* #define CKH_VERBOSE */ + +/* + * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit + * one bucket per L1 cache line. + */ +#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Hash table cell. */ +struct ckhc_s { + const void *key; + const void *data; +}; + +struct ckh_s { +#ifdef JEMALLOC_DEBUG +#define CKH_MAGIC 0x3af2489d + uint32_t magic; +#endif + +#ifdef CKH_COUNT + /* Counters used to get an idea of performance. */ + uint64_t ngrows; + uint64_t nshrinks; + uint64_t nshrinkfails; + uint64_t ninserts; + uint64_t nrelocs; +#endif + + /* Used for pseudo-random number generation. */ +#define CKH_A 1103515241 +#define CKH_C 12347 + uint32_t prn_state; + + /* Total number of items. */ + size_t count; + + /* + * Minimum and current number of hash table buckets. There are + * 2^LG_CKH_BUCKET_CELLS cells per bucket. + */ + unsigned lg_minbuckets; + unsigned lg_curbuckets; + + /* Hash and comparison functions. */ + ckh_hash_t *hash; + ckh_keycomp_t *keycomp; + + /* Hash table with 2^lg_curbuckets buckets. */ + ckhc_t *tab; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, + ckh_keycomp_t *keycomp); +void ckh_delete(ckh_t *ckh); +size_t ckh_count(ckh_t *ckh); +bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); +bool ckh_insert(ckh_t *ckh, const void *key, const void *data); +bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, + void **data); +bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); +void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +bool ckh_string_keycomp(const void *k1, const void *k2); +void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +bool ckh_pointer_keycomp(const void *k1, const void *k2); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h new file mode 100644 index 0000000..f1f5eb7 --- /dev/null +++ b/include/jemalloc/internal/ctl.h @@ -0,0 +1,118 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct ctl_node_s ctl_node_t; +typedef struct ctl_arena_stats_s ctl_arena_stats_t; +typedef struct ctl_stats_s ctl_stats_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct ctl_node_s { + bool named; + union { + struct { + const char *name; + /* If (nchildren == 0), this is a terminal node. */ + unsigned nchildren; + const ctl_node_t *children; + } named; + struct { + const ctl_node_t *(*index)(const size_t *, size_t, + size_t); + } indexed; + } u; + int (*ctl)(const size_t *, size_t, void *, size_t *, void *, + size_t); +}; + +struct ctl_arena_stats_s { + bool initialized; + unsigned nthreads; + size_t pactive; + size_t pdirty; +#ifdef JEMALLOC_STATS + arena_stats_t astats; + + /* Aggregate stats for small size classes, based on bin stats. */ + size_t allocated_small; + uint64_t nmalloc_small; + uint64_t ndalloc_small; + uint64_t nrequests_small; + + malloc_bin_stats_t *bstats; /* nbins elements. */ + malloc_large_stats_t *lstats; /* nlclasses elements. */ +#endif +}; + +struct ctl_stats_s { +#ifdef JEMALLOC_STATS + size_t allocated; + size_t active; + size_t mapped; + struct { + size_t current; /* stats_chunks.curchunks */ + uint64_t total; /* stats_chunks.nchunks */ + size_t high; /* stats_chunks.highchunks */ + } chunks; + struct { + size_t allocated; /* huge_allocated */ + uint64_t nmalloc; /* huge_nmalloc */ + uint64_t ndalloc; /* huge_ndalloc */ + } huge; +#endif + ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ +#ifdef JEMALLOC_SWAP + size_t swap_avail; +#endif +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); +int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); + +int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +bool ctl_boot(void); + +#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ + if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ + != 0) { \ + malloc_write(": Failure in xmallctl(\""); \ + malloc_write(name); \ + malloc_write("\", ...)\n"); \ + abort(); \ + } \ +} while (0) + +#define xmallctlnametomib(name, mibp, miblenp) do { \ + if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ + malloc_write( \ + ": Failure in xmallctlnametomib(\""); \ + malloc_write(name); \ + malloc_write("\", ...)\n"); \ + abort(); \ + } \ +} while (0) + +#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ + if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ + newlen) != 0) { \ + malloc_write( \ + ": Failure in xmallctlbymib()\n"); \ + abort(); \ + } \ +} while (0) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h new file mode 100644 index 0000000..6fe9702 --- /dev/null +++ b/include/jemalloc/internal/extent.h @@ -0,0 +1,49 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct extent_node_s extent_node_t; + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* Tree of extents. */ +struct extent_node_s { +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) + /* Linkage for the size/address-ordered tree. */ + rb_node(extent_node_t) link_szad; +#endif + + /* Linkage for the address-ordered tree. */ + rb_node(extent_node_t) link_ad; + +#ifdef JEMALLOC_PROF + /* Profile counters, used for huge objects. */ + prof_ctx_t *prof_ctx; +#endif + + /* Pointer to the extent that this tree node is responsible for. */ + void *addr; + + /* Total region size. */ + size_t size; +}; +typedef rb_tree(extent_node_t) extent_tree_t; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) +rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) +#endif + +rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ + diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h new file mode 100644 index 0000000..93905bf --- /dev/null +++ b/include/jemalloc/internal/hash.h @@ -0,0 +1,70 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +uint64_t hash(const void *key, size_t len, uint64_t seed); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) +/* + * The following hash function is based on MurmurHash64A(), placed into the + * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for + * details. + */ +JEMALLOC_INLINE uint64_t +hash(const void *key, size_t len, uint64_t seed) +{ + const uint64_t m = 0xc6a4a7935bd1e995; + const int r = 47; + uint64_t h = seed ^ (len * m); + const uint64_t *data = (const uint64_t *)key; + const uint64_t *end = data + (len/8); + const unsigned char *data2; + + assert(((uintptr_t)key & 0x7) == 0); + + while(data != end) { + uint64_t k = *data++; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } + + data2 = (const unsigned char *)data; + switch(len & 7) { + case 7: h ^= ((uint64_t)(data2[6])) << 48; + case 6: h ^= ((uint64_t)(data2[5])) << 40; + case 5: h ^= ((uint64_t)(data2[4])) << 32; + case 4: h ^= ((uint64_t)(data2[3])) << 24; + case 3: h ^= ((uint64_t)(data2[2])) << 16; + case 2: h ^= ((uint64_t)(data2[1])) << 8; + case 1: h ^= ((uint64_t)(data2[0])); + h *= m; + } + + h ^= h >> r; + h *= m; + h ^= h >> r; + + return (h); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h new file mode 100644 index 0000000..66544cf --- /dev/null +++ b/include/jemalloc/internal/huge.h @@ -0,0 +1,41 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_STATS +/* Huge allocation statistics. */ +extern uint64_t huge_nmalloc; +extern uint64_t huge_ndalloc; +extern size_t huge_allocated; +#endif + +/* Protects chunk-related data structures. */ +extern malloc_mutex_t huge_mtx; + +void *huge_malloc(size_t size, bool zero); +void *huge_palloc(size_t size, size_t alignment, bool zero); +void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, + size_t extra); +void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero); +void huge_dalloc(void *ptr, bool unmap); +size_t huge_salloc(const void *ptr); +#ifdef JEMALLOC_PROF +prof_ctx_t *huge_prof_ctx_get(const void *ptr); +void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +#endif +bool huge_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in new file mode 100644 index 0000000..254adb6 --- /dev/null +++ b/include/jemalloc/internal/jemalloc_internal.h.in @@ -0,0 +1,786 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#ifndef SIZE_T_MAX +# define SIZE_T_MAX SIZE_MAX +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef offsetof +# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "../jemalloc@install_suffix@.h" + +#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#include +#endif + +#ifdef JEMALLOC_ZONE +#include +#include +#include +#include +#endif + +#ifdef JEMALLOC_LAZY_LOCK +#include +#endif + +#define RB_COMPACT +#include "jemalloc/internal/rb.h" +#include "jemalloc/internal/qr.h" +#include "jemalloc/internal/ql.h" + +extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); + +/* + * Define a custom assert() in order to reduce the chances of deadlock during + * assertion failure. + */ +#ifndef assert +# ifdef JEMALLOC_DEBUG +# define assert(e) do { \ + if (!(e)) { \ + char line_buf[UMAX2S_BUFSIZE]; \ + malloc_write(": "); \ + malloc_write(__FILE__); \ + malloc_write(":"); \ + malloc_write(u2s(__LINE__, 10, line_buf)); \ + malloc_write(": Failed assertion: "); \ + malloc_write("\""); \ + malloc_write(#e); \ + malloc_write("\"\n"); \ + abort(); \ + } \ +} while (0) +# else +# define assert(e) +# endif +#endif + +#ifdef JEMALLOC_DEBUG +# define dassert(e) assert(e) +#else +# define dassert(e) +#endif + +/* + * jemalloc can conceptually be broken into components (arena, tcache, etc.), + * but there are circular dependencies that cannot be broken without + * substantial performance degradation. In order to reduce the effect on + * visual code flow, read the header files in multiple passes, with one of the + * following cpp variables defined during each pass: + * + * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data + * types. + * JEMALLOC_H_STRUCTS : Data structures. + * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. + * JEMALLOC_H_INLINES : Inline functions. + */ +/******************************************************************************/ +#define JEMALLOC_H_TYPES + +#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) + +#define ZU(z) ((size_t)z) + +#ifndef __DECONST +# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) +#endif + +#ifdef JEMALLOC_DEBUG + /* Disable inlining to make debugging easier. */ +# define JEMALLOC_INLINE +# define inline +#else +# define JEMALLOC_ENABLE_INLINE +# define JEMALLOC_INLINE static inline +#endif + +/* Size of stack-allocated buffer passed to buferror(). */ +#define BUFERROR_BUF 64 + +/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ +#ifdef __i386__ +# define LG_QUANTUM 4 +#endif +#ifdef __ia64__ +# define LG_QUANTUM 4 +#endif +#ifdef __alpha__ +# define LG_QUANTUM 4 +#endif +#ifdef __sparc64__ +# define LG_QUANTUM 4 +#endif +#if (defined(__amd64__) || defined(__x86_64__)) +# define LG_QUANTUM 4 +#endif +#ifdef __arm__ +# define LG_QUANTUM 3 +#endif +#ifdef __mips__ +# define LG_QUANTUM 3 +#endif +#ifdef __powerpc__ +# define LG_QUANTUM 4 +#endif +#ifdef __s390x__ +# define LG_QUANTUM 4 +#endif + +#define QUANTUM ((size_t)(1U << LG_QUANTUM)) +#define QUANTUM_MASK (QUANTUM - 1) + +/* Return the smallest quantum multiple that is >= a. */ +#define QUANTUM_CEILING(a) \ + (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) + +#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) +#define LONG_MASK (LONG - 1) + +/* Return the smallest long multiple that is >= a. */ +#define LONG_CEILING(a) \ + (((a) + LONG_MASK) & ~LONG_MASK) + +#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) +#define PTR_MASK (SIZEOF_PTR - 1) + +/* Return the smallest (void *) multiple that is >= a. */ +#define PTR_CEILING(a) \ + (((a) + PTR_MASK) & ~PTR_MASK) + +/* + * Maximum size of L1 cache line. This is used to avoid cache line aliasing. + * In addition, this controls the spacing of cacheline-spaced size classes. + */ +#define LG_CACHELINE 6 +#define CACHELINE ((size_t)(1U << LG_CACHELINE)) +#define CACHELINE_MASK (CACHELINE - 1) + +/* Return the smallest cacheline multiple that is >= s. */ +#define CACHELINE_CEILING(s) \ + (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) + +/* + * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If + * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where + * compile-time values are required for the purposes of defining data + * structures. + */ +#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) +#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) + +#ifdef PAGE_SHIFT +# undef PAGE_SHIFT +#endif +#ifdef PAGE_SIZE +# undef PAGE_SIZE +#endif +#ifdef PAGE_MASK +# undef PAGE_MASK +#endif + +#ifdef DYNAMIC_PAGE_SHIFT +# define PAGE_SHIFT lg_pagesize +# define PAGE_SIZE pagesize +# define PAGE_MASK pagesize_mask +#else +# define PAGE_SHIFT STATIC_PAGE_SHIFT +# define PAGE_SIZE STATIC_PAGE_SIZE +# define PAGE_MASK STATIC_PAGE_MASK +#endif + +/* Return the smallest pagesize multiple that is >= s. */ +#define PAGE_CEILING(s) \ + (((s) + PAGE_MASK) & ~PAGE_MASK) + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_TYPES +/******************************************************************************/ +#define JEMALLOC_H_STRUCTS + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif +#include "jemalloc/internal/prof.h" + +#ifdef JEMALLOC_STATS +typedef struct { + uint64_t allocated; + uint64_t deallocated; +} thread_allocated_t; +#endif + +#undef JEMALLOC_H_STRUCTS +/******************************************************************************/ +#define JEMALLOC_H_EXTERNS + +extern bool opt_abort; +#ifdef JEMALLOC_FILL +extern bool opt_junk; +#endif +#ifdef JEMALLOC_SYSV +extern bool opt_sysv; +#endif +#ifdef JEMALLOC_XMALLOC +extern bool opt_xmalloc; +#endif +#ifdef JEMALLOC_FILL +extern bool opt_zero; +#endif +extern size_t opt_narenas; + +#ifdef DYNAMIC_PAGE_SHIFT +extern size_t pagesize; +extern size_t pagesize_mask; +extern size_t lg_pagesize; +#endif + +/* Number of CPUs. */ +extern unsigned ncpus; + +extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ +extern pthread_key_t arenas_tsd; +#ifndef NO_TLS +/* + * Map of pthread_self() --> arenas[???], used for selecting an arena to use + * for allocations. + */ +extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +# define ARENA_GET() arenas_tls +# define ARENA_SET(v) do { \ + arenas_tls = (v); \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ +} while (0) +#else +# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) +# define ARENA_SET(v) do { \ + pthread_setspecific(arenas_tsd, (void *)(v)); \ +} while (0) +#endif + +/* + * Arenas that are used to service external requests. Not all elements of the + * arenas array are necessarily used; arenas are created lazily as needed. + */ +extern arena_t **arenas; +extern unsigned narenas; + +#ifdef JEMALLOC_STATS +# ifndef NO_TLS +extern __thread thread_allocated_t thread_allocated_tls; +# define ALLOCATED_GET() (thread_allocated_tls.allocated) +# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) +# define DEALLOCATED_GET() (thread_allocated_tls.deallocated) +# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated) +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_tls.allocated += a; \ + thread_allocated_tls.deallocated += d; \ +} while (0) +# else +extern pthread_key_t thread_allocated_tsd; +thread_allocated_t *thread_allocated_get_hard(void); + +# define ALLOCATED_GET() (thread_allocated_get()->allocated) +# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) +# define DEALLOCATED_GET() (thread_allocated_get()->deallocated) +# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated) +# define ALLOCATED_ADD(a, d) do { \ + thread_allocated_t *thread_allocated = thread_allocated_get(); \ + thread_allocated->allocated += (a); \ + thread_allocated->deallocated += (d); \ +} while (0) +# endif +#endif + +arena_t *arenas_extend(unsigned ind); +arena_t *choose_arena_hard(void); +int buferror(int errnum, char *buf, size_t buflen); +void jemalloc_prefork(void); +void jemalloc_postfork(void); + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_EXTERNS +/******************************************************************************/ +#define JEMALLOC_H_INLINES + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/prn.h" +#include "jemalloc/internal/ckh.h" +#include "jemalloc/internal/stats.h" +#include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mb.h" +#include "jemalloc/internal/extent.h" +#include "jemalloc/internal/base.h" +#include "jemalloc/internal/chunk.h" +#include "jemalloc/internal/huge.h" + +#ifndef JEMALLOC_ENABLE_INLINE +size_t pow2_ceil(size_t x); +size_t s2u(size_t size); +size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); +void malloc_write(const char *s); +arena_t *choose_arena(void); +# if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +thread_allocated_t *thread_allocated_get(void); +# endif +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +/* Compute the smallest power of 2 that is >= x. */ +JEMALLOC_INLINE size_t +pow2_ceil(size_t x) +{ + + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +#if (LG_SIZEOF_PTR == 3) + x |= x >> 32; +#endif + x++; + return (x); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size. + */ +JEMALLOC_INLINE size_t +s2u(size_t size) +{ + + if (size <= small_maxclass) + return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); + if (size <= arena_maxclass) + return (PAGE_CEILING(size)); + return (CHUNK_CEILING(size)); +} + +/* + * Compute usable size that would result from allocating an object with the + * specified size and alignment. + */ +JEMALLOC_INLINE size_t +sa2u(size_t size, size_t alignment, size_t *run_size_p) +{ + size_t usize; + + /* + * Round size up to the nearest multiple of alignment. + * + * This done, we can take advantage of the fact that for each small + * size class, every object is aligned at the smallest power of two + * that is non-zero in the base two representation of the size. For + * example: + * + * Size | Base 2 | Minimum alignment + * -----+----------+------------------ + * 96 | 1100000 | 32 + * 144 | 10100000 | 32 + * 192 | 11000000 | 64 + * + * Depending on runtime settings, it is possible that arena_malloc() + * will further round up to a power of two, but that never causes + * correctness issues. + */ + usize = (size + (alignment - 1)) & (-alignment); + /* + * (usize < size) protects against the combination of maximal + * alignment and size greater than maximal alignment. + */ + if (usize < size) { + /* size_t overflow. */ + return (0); + } + + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { + if (usize <= small_maxclass) + return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); + return (PAGE_CEILING(usize)); + } else { + size_t run_size; + + /* + * We can't achieve subpage alignment, so round up alignment + * permanently; it makes later calculations simpler. + */ + alignment = PAGE_CEILING(alignment); + usize = PAGE_CEILING(size); + /* + * (usize < size) protects against very large sizes within + * PAGE_SIZE of SIZE_T_MAX. + * + * (usize + alignment < usize) protects against the + * combination of maximal alignment and usize large enough + * to cause overflow. This is similar to the first overflow + * check above, but it needs to be repeated due to the new + * usize value, which may now be *equal* to maximal + * alignment, whereas before we only detected overflow if the + * original size was *greater* than maximal alignment. + */ + if (usize < size || usize + alignment < usize) { + /* size_t overflow. */ + return (0); + } + + /* + * Calculate the size of the over-size run that arena_palloc() + * would need to allocate in order to guarantee the alignment. + */ + if (usize >= alignment) + run_size = usize + alignment - PAGE_SIZE; + else { + /* + * It is possible that (alignment << 1) will cause + * overflow, but it doesn't matter because we also + * subtract PAGE_SIZE, which in the case of overflow + * leaves us with a very large run_size. That causes + * the first conditional below to fail, which means + * that the bogus run_size value never gets used for + * anything important. + */ + run_size = (alignment << 1) - PAGE_SIZE; + } + if (run_size_p != NULL) + *run_size_p = run_size; + + if (run_size <= arena_maxclass) + return (PAGE_CEILING(usize)); + return (CHUNK_CEILING(usize)); + } +} + +/* + * Wrapper around malloc_message() that avoids the need for + * JEMALLOC_P(malloc_message)(...) throughout the code. + */ +JEMALLOC_INLINE void +malloc_write(const char *s) +{ + + JEMALLOC_P(malloc_message)(NULL, s); +} + +/* + * Choose an arena based on a per-thread value (fast-path code, calls slow-path + * code if necessary). + */ +JEMALLOC_INLINE arena_t * +choose_arena(void) +{ + arena_t *ret; + + ret = ARENA_GET(); + if (ret == NULL) { + ret = choose_arena_hard(); + assert(ret != NULL); + } + + return (ret); +} + +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +JEMALLOC_INLINE thread_allocated_t * +thread_allocated_get(void) +{ + thread_allocated_t *thread_allocated = (thread_allocated_t *) + pthread_getspecific(thread_allocated_tsd); + + if (thread_allocated == NULL) + return (thread_allocated_get_hard()); + return (thread_allocated); +} +#endif +#endif + +#include "jemalloc/internal/bitmap.h" +#include "jemalloc/internal/rtree.h" +#include "jemalloc/internal/tcache.h" +#include "jemalloc/internal/arena.h" +#include "jemalloc/internal/hash.h" +#ifdef JEMALLOC_ZONE +#include "jemalloc/internal/zone.h" +#endif + +#ifndef JEMALLOC_ENABLE_INLINE +void *imalloc(size_t size); +void *icalloc(size_t size); +void *ipalloc(size_t usize, size_t alignment, bool zero); +size_t isalloc(const void *ptr); +# ifdef JEMALLOC_IVSALLOC +size_t ivsalloc(const void *ptr); +# endif +void idalloc(void *ptr); +void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, + bool zero, bool no_move); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) +JEMALLOC_INLINE void * +imalloc(size_t size) +{ + + assert(size != 0); + + if (size <= arena_maxclass) + return (arena_malloc(size, false)); + else + return (huge_malloc(size, false)); +} + +JEMALLOC_INLINE void * +icalloc(size_t size) +{ + + if (size <= arena_maxclass) + return (arena_malloc(size, true)); + else + return (huge_malloc(size, true)); +} + +JEMALLOC_INLINE void * +ipalloc(size_t usize, size_t alignment, bool zero) +{ + void *ret; + + assert(usize != 0); + assert(usize == sa2u(usize, alignment, NULL)); + + if (usize <= arena_maxclass && alignment <= PAGE_SIZE) + ret = arena_malloc(usize, zero); + else { + size_t run_size +#ifdef JEMALLOC_CC_SILENCE + = 0 +#endif + ; + + /* + * Ideally we would only ever call sa2u() once per aligned + * allocation request, and the caller of this function has + * already done so once. However, it's rather burdensome to + * require every caller to pass in run_size, especially given + * that it's only relevant to large allocations. Therefore, + * just call it again here in order to get run_size. + */ + sa2u(usize, alignment, &run_size); + if (run_size <= arena_maxclass) { + ret = arena_palloc(choose_arena(), usize, run_size, + alignment, zero); + } else if (alignment <= chunksize) + ret = huge_malloc(usize, zero); + else + ret = huge_palloc(usize, alignment, zero); + } + + assert(((uintptr_t)ret & (alignment - 1)) == 0); + return (ret); +} + +JEMALLOC_INLINE size_t +isalloc(const void *ptr) +{ + size_t ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + dassert(chunk->arena->magic == ARENA_MAGIC); + +#ifdef JEMALLOC_PROF + ret = arena_salloc_demote(ptr); +#else + ret = arena_salloc(ptr); +#endif + } else + ret = huge_salloc(ptr); + + return (ret); +} + +#ifdef JEMALLOC_IVSALLOC +JEMALLOC_INLINE size_t +ivsalloc(const void *ptr) +{ + + /* Return 0 if ptr is not within a chunk managed by jemalloc. */ + if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) + return (0); + + return (isalloc(ptr)); +} +#endif + +JEMALLOC_INLINE void +idalloc(void *ptr) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) + arena_dalloc(chunk->arena, chunk, ptr); + else + huge_dalloc(ptr, true); +} + +JEMALLOC_INLINE void * +iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, + bool no_move) +{ + void *ret; + size_t oldsize; + + assert(ptr != NULL); + assert(size != 0); + + oldsize = isalloc(ptr); + + if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) + != 0) { + size_t usize, copysize; + + /* + * Existing object alignment is inadquate; allocate new space + * and copy. + */ + if (no_move) + return (NULL); + usize = sa2u(size + extra, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, without extra this time. */ + usize = sa2u(size, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + if (ret == NULL) + return (NULL); + } + /* + * Copy at most size bytes (not size+extra), since the caller + * has no expectation that the extra bytes will be reliably + * preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(ret, ptr, copysize); + idalloc(ptr); + return (ret); + } + + if (no_move) { + if (size <= arena_maxclass) { + return (arena_ralloc_no_move(ptr, oldsize, size, + extra, zero)); + } else { + return (huge_ralloc_no_move(ptr, oldsize, size, + extra)); + } + } else { + if (size + extra <= arena_maxclass) { + return (arena_ralloc(ptr, oldsize, size, extra, + alignment, zero)); + } else { + return (huge_ralloc(ptr, oldsize, size, extra, + alignment, zero)); + } + } +} +#endif + +#include "jemalloc/internal/prof.h" + +#undef JEMALLOC_H_INLINES +/******************************************************************************/ diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h new file mode 100644 index 0000000..dc9f2a5 --- /dev/null +++ b/include/jemalloc/internal/mb.h @@ -0,0 +1,108 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void mb_write(void); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) +#ifdef __i386__ +/* + * According to the Intel Architecture Software Developer's Manual, current + * processors execute instructions in order from the perspective of other + * processors in a multiprocessor system, but 1) Intel reserves the right to + * change that, and 2) the compiler's optimizer could re-order instructions if + * there weren't some form of barrier. Therefore, even if running on an + * architecture that does not need memory barriers (everything through at least + * i686), an "optimizer barrier" is necessary. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + +# if 0 + /* This is a true memory barrier. */ + asm volatile ("pusha;" + "xor %%eax,%%eax;" + "cpuid;" + "popa;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#else + /* + * This is hopefully enough to keep the compiler from reordering + * instructions around this one. + */ + asm volatile ("nop;" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +#endif +} +#elif (defined(__amd64_) || defined(__x86_64__)) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("sfence" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__powerpc__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("eieio" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#elif defined(__sparc64__) +JEMALLOC_INLINE void +mb_write(void) +{ + + asm volatile ("membar #StoreStore" + : /* Outputs. */ + : /* Inputs. */ + : "memory" /* Clobbers. */ + ); +} +#else +/* + * This is much slower than a simple memory barrier, but the semantics of mutex + * unlock make this work. + */ +JEMALLOC_INLINE void +mb_write(void) +{ + malloc_mutex_t mtx; + + malloc_mutex_init(&mtx); + malloc_mutex_lock(&mtx); + malloc_mutex_unlock(&mtx); +} +#endif +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h new file mode 100644 index 0000000..62947ce --- /dev/null +++ b/include/jemalloc/internal/mutex.h @@ -0,0 +1,86 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#ifdef JEMALLOC_OSSPIN +typedef OSSpinLock malloc_mutex_t; +#else +typedef pthread_mutex_t malloc_mutex_t; +#endif + +#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP +#else +# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#ifdef JEMALLOC_LAZY_LOCK +extern bool isthreaded; +#else +# define isthreaded true +#endif + +bool malloc_mutex_init(malloc_mutex_t *mutex); +void malloc_mutex_destroy(malloc_mutex_t *mutex); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void malloc_mutex_lock(malloc_mutex_t *mutex); +bool malloc_mutex_trylock(malloc_mutex_t *mutex); +void malloc_mutex_unlock(malloc_mutex_t *mutex); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) +JEMALLOC_INLINE void +malloc_mutex_lock(malloc_mutex_t *mutex) +{ + + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + OSSpinLockLock(mutex); +#else + pthread_mutex_lock(mutex); +#endif + } +} + +JEMALLOC_INLINE bool +malloc_mutex_trylock(malloc_mutex_t *mutex) +{ + + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + return (OSSpinLockTry(mutex) == false); +#else + return (pthread_mutex_trylock(mutex) != 0); +#endif + } else + return (false); +} + +JEMALLOC_INLINE void +malloc_mutex_unlock(malloc_mutex_t *mutex) +{ + + if (isthreaded) { +#ifdef JEMALLOC_OSSPIN + OSSpinLockUnlock(mutex); +#else + pthread_mutex_unlock(mutex); +#endif + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/prn.h b/include/jemalloc/internal/prn.h new file mode 100644 index 0000000..0709d70 --- /dev/null +++ b/include/jemalloc/internal/prn.h @@ -0,0 +1,60 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +/* + * Simple linear congruential pseudo-random number generator: + * + * prn(y) = (a*x + c) % m + * + * where the following constants ensure maximal period: + * + * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. + * c == Odd number (relatively prime to 2^n). + * m == 2^32 + * + * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. + * + * This choice of m has the disadvantage that the quality of the bits is + * proportional to bit position. For example. the lowest bit has a cycle of 2, + * the next has a cycle of 4, etc. For this reason, we prefer to use the upper + * bits. + * + * Macro parameters: + * uint32_t r : Result. + * unsigned lg_range : (0..32], number of least significant bits to return. + * uint32_t state : Seed value. + * const uint32_t a, c : See above discussion. + */ +#define prn32(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 32); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (32 - lg_range); \ +} while (false) + +/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */ +#define prn64(r, lg_range, state, a, c) do { \ + assert(lg_range > 0); \ + assert(lg_range <= 64); \ + \ + r = (state * (a)) + (c); \ + state = r; \ + r >>= (64 - lg_range); \ +} while (false) + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h new file mode 100644 index 0000000..f943873 --- /dev/null +++ b/include/jemalloc/internal/prof.h @@ -0,0 +1,561 @@ +#ifdef JEMALLOC_PROF +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct prof_bt_s prof_bt_t; +typedef struct prof_cnt_s prof_cnt_t; +typedef struct prof_thr_cnt_s prof_thr_cnt_t; +typedef struct prof_ctx_s prof_ctx_t; +typedef struct prof_tdata_s prof_tdata_t; + +/* Option defaults. */ +#define PROF_PREFIX_DEFAULT "jeprof" +#define LG_PROF_BT_MAX_DEFAULT 7 +#define LG_PROF_SAMPLE_DEFAULT 0 +#define LG_PROF_INTERVAL_DEFAULT -1 +#define LG_PROF_TCMAX_DEFAULT -1 + +/* + * Hard limit on stack backtrace depth. Note that the version of + * prof_backtrace() that is based on __builtin_return_address() necessarily has + * a hard-coded number of backtrace frame handlers. + */ +#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) +# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) +#else +# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ +#endif +#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) + +/* Initial hash table size. */ +#define PROF_CKH_MINITEMS 64 + +/* Size of memory buffer to use when writing dump files. */ +#define PROF_DUMP_BUF_SIZE 65536 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct prof_bt_s { + /* Backtrace, stored as len program counters. */ + void **vec; + unsigned len; +}; + +#ifdef JEMALLOC_PROF_LIBGCC +/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ +typedef struct { + prof_bt_t *bt; + unsigned nignore; + unsigned max; +} prof_unwind_data_t; +#endif + +struct prof_cnt_s { + /* + * Profiling counters. An allocation/deallocation pair can operate on + * different prof_thr_cnt_t objects that are linked into the same + * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go + * negative. In principle it is possible for the *bytes counters to + * overflow/underflow, but a general solution would require something + * like 128-bit counters; this implementation doesn't bother to solve + * that problem. + */ + int64_t curobjs; + int64_t curbytes; + uint64_t accumobjs; + uint64_t accumbytes; +}; + +struct prof_thr_cnt_s { + /* Linkage into prof_ctx_t's cnts_ql. */ + ql_elm(prof_thr_cnt_t) cnts_link; + + /* Linkage into thread's LRU. */ + ql_elm(prof_thr_cnt_t) lru_link; + + /* + * Associated context. If a thread frees an object that it did not + * allocate, it is possible that the context is not cached in the + * thread's hash table, in which case it must be able to look up the + * context, insert a new prof_thr_cnt_t into the thread's hash table, + * and link it into the prof_ctx_t's cnts_ql. + */ + prof_ctx_t *ctx; + + /* + * Threads use memory barriers to update the counters. Since there is + * only ever one writer, the only challenge is for the reader to get a + * consistent read of the counters. + * + * The writer uses this series of operations: + * + * 1) Increment epoch to an odd number. + * 2) Update counters. + * 3) Increment epoch to an even number. + * + * The reader must assure 1) that the epoch is even while it reads the + * counters, and 2) that the epoch doesn't change between the time it + * starts and finishes reading the counters. + */ + unsigned epoch; + + /* Profiling counters. */ + prof_cnt_t cnts; +}; + +struct prof_ctx_s { + /* Associated backtrace. */ + prof_bt_t *bt; + + /* Protects cnt_merged and cnts_ql. */ + malloc_mutex_t lock; + + /* Temporary storage for summation during dump. */ + prof_cnt_t cnt_summed; + + /* When threads exit, they merge their stats into cnt_merged. */ + prof_cnt_t cnt_merged; + + /* + * List of profile counters, one for each thread that has allocated in + * this context. + */ + ql_head(prof_thr_cnt_t) cnts_ql; +}; + +struct prof_tdata_s { + /* + * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a + * cache of backtraces, with associated thread-specific prof_thr_cnt_t + * objects. Other threads may read the prof_thr_cnt_t contents, but no + * others will ever write them. + * + * Upon thread exit, the thread must merge all the prof_thr_cnt_t + * counter data into the associated prof_ctx_t objects, and unlink/free + * the prof_thr_cnt_t objects. + */ + ckh_t bt2cnt; + + /* LRU for contents of bt2cnt. */ + ql_head(prof_thr_cnt_t) lru_ql; + + /* Backtrace vector, used for calls to prof_backtrace(). */ + void **vec; + + /* Sampling state. */ + uint64_t prn_state; + uint64_t threshold; + uint64_t accum; +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_prof; +/* + * Even if opt_prof is true, sampling can be temporarily disabled by setting + * opt_prof_active to false. No locking is used when updating opt_prof_active, + * so there are no guarantees regarding how long it will take for all threads + * to notice state changes. + */ +extern bool opt_prof_active; +extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ +extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ +extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ +extern bool opt_prof_gdump; /* High-water memory dumping. */ +extern bool opt_prof_leak; /* Dump leak summary at exit. */ +extern bool opt_prof_accum; /* Report cumulative bytes. */ +extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ +extern char opt_prof_prefix[PATH_MAX + 1]; + +/* + * Profile dump interval, measured in bytes allocated. Each arena triggers a + * profile dump when it reaches this threshold. The effect is that the + * interval between profile dumps averages prof_interval, though the actual + * interval between dumps will tend to be sporadic, and the interval will be a + * maximum of approximately (prof_interval * narenas). + */ +extern uint64_t prof_interval; + +/* + * If true, promote small sampled objects to large objects, since small run + * headers do not have embedded profile context pointers. + */ +extern bool prof_promote; + +/* (1U << opt_lg_prof_bt_max). */ +extern unsigned prof_bt_max; + +/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ +#ifndef NO_TLS +extern __thread prof_tdata_t *prof_tdata_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +# define PROF_TCACHE_GET() prof_tdata_tls +# define PROF_TCACHE_SET(v) do { \ + prof_tdata_tls = (v); \ + pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ +} while (0) +#else +# define PROF_TCACHE_GET() \ + ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) +# define PROF_TCACHE_SET(v) do { \ + pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ +} while (0) +#endif +/* + * Same contents as b2cnt_tls, but initialized such that the TSD destructor is + * called when a thread exits, so that prof_tdata_tls contents can be merged, + * unlinked, and deallocated. + */ +extern pthread_key_t prof_tdata_tsd; + +void bt_init(prof_bt_t *bt, void **vec); +void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); +prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); +void prof_idump(void); +bool prof_mdump(const char *filename); +void prof_gdump(void); +prof_tdata_t *prof_tdata_init(void); +void prof_boot0(void); +void prof_boot1(void); +bool prof_boot2(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void prof_sample_threshold_update(prof_tdata_t *prof_tdata); +prof_thr_cnt_t *prof_alloc_prep(size_t size); +prof_ctx_t *prof_ctx_get(const void *ptr); +void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); +bool prof_sample_accum_update(size_t size); +void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); +void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx); +void prof_free(const void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) +JEMALLOC_INLINE void +prof_sample_threshold_update(prof_tdata_t *prof_tdata) +{ + uint64_t r; + double u; + + /* + * Compute sample threshold as a geometrically distributed random + * variable with mean (2^opt_lg_prof_sample). + * + * __ __ + * | log(u) | 1 + * prof_tdata->threshold = | -------- |, where p = ------------------- + * | log(1-p) | opt_lg_prof_sample + * 2 + * + * For more information on the math, see: + * + * Non-Uniform Random Variate Generation + * Luc Devroye + * Springer-Verlag, New York, 1986 + * pp 500 + * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) + */ + prn64(r, 53, prof_tdata->prn_state, + (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); + u = (double)r * (1.0/9007199254740992.0L); + prof_tdata->threshold = (uint64_t)(log(u) / + log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) + + (uint64_t)1U; +} + +JEMALLOC_INLINE prof_thr_cnt_t * +prof_alloc_prep(size_t size) +{ +#ifdef JEMALLOC_ENABLE_INLINE + /* This function does not have its own stack frame, because it is inlined. */ +# define NIGNORE 1 +#else +# define NIGNORE 2 +#endif + prof_thr_cnt_t *ret; + prof_tdata_t *prof_tdata; + prof_bt_t bt; + + assert(size == s2u(size)); + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) + return (NULL); + } + + if (opt_prof_active == false) { + /* Sampling is currently inactive, so avoid sampling. */ + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } else if (opt_lg_prof_sample == 0) { + /* + * Don't bother with sampling logic, since sampling interval is + * 1. + */ + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else { + if (prof_tdata->threshold == 0) { + /* + * Initialize. Seed the prng differently for each + * thread. + */ + prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; + prof_sample_threshold_update(prof_tdata); + } + + /* + * Determine whether to capture a backtrace based on whether + * size is enough for prof_accum to reach + * prof_tdata->threshold. However, delay updating these + * variables until prof_{m,re}alloc(), because we don't know + * for sure that the allocation will succeed. + * + * Use subtraction rather than addition to avoid potential + * integer overflow. + */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + bt_init(&bt, prof_tdata->vec); + prof_backtrace(&bt, NIGNORE, prof_bt_max); + ret = prof_lookup(&bt); + } else + ret = (prof_thr_cnt_t *)(uintptr_t)1U; + } + + return (ret); +#undef NIGNORE +} + +JEMALLOC_INLINE prof_ctx_t * +prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + dassert(chunk->arena->magic == ARENA_MAGIC); + + ret = arena_prof_ctx_get(ptr); + } else + ret = huge_prof_ctx_get(ptr); + + return (ret); +} + +JEMALLOC_INLINE void +prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + arena_chunk_t *chunk; + + assert(ptr != NULL); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk != ptr) { + /* Region. */ + dassert(chunk->arena->magic == ARENA_MAGIC); + + arena_prof_ctx_set(ptr, ctx); + } else + huge_prof_ctx_set(ptr, ctx); +} + +JEMALLOC_INLINE bool +prof_sample_accum_update(size_t size) +{ + prof_tdata_t *prof_tdata; + + /* Sampling logic is unnecessary if the interval is 1. */ + assert(opt_lg_prof_sample != 0); + + prof_tdata = PROF_TCACHE_GET(); + assert(prof_tdata != NULL); + + /* Take care to avoid integer overflow. */ + if (size >= prof_tdata->threshold - prof_tdata->accum) { + prof_tdata->accum -= (prof_tdata->threshold - size); + /* Compute new sample threshold. */ + prof_sample_threshold_update(prof_tdata); + while (prof_tdata->accum >= prof_tdata->threshold) { + prof_tdata->accum -= prof_tdata->threshold; + prof_sample_threshold_update(prof_tdata); + } + return (false); + } else { + prof_tdata->accum += size; + return (true); + } +} + +JEMALLOC_INLINE void +prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) +{ + + assert(ptr != NULL); + assert(size == isalloc(ptr)); + + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. For malloc()-like allocation, it is + * always possible to tell in advance how large an + * object's usable size will be, so there should never + * be a difference between the size passed to + * prof_alloc_prep() and prof_malloc(). + */ + assert((uintptr_t)cnt == (uintptr_t)1U); + } + } + + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + /*********/ + mb_write(); + /*********/ + cnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); +} + +JEMALLOC_INLINE void +prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, + size_t old_size, prof_ctx_t *old_ctx) +{ + prof_thr_cnt_t *told_cnt; + + assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); + + if (ptr != NULL) { + assert(size == isalloc(ptr)); + if (opt_lg_prof_sample != 0) { + if (prof_sample_accum_update(size)) { + /* + * Don't sample. The size passed to + * prof_alloc_prep() was larger than what + * actually got allocated, so a backtrace was + * captured for this allocation, even though + * its actual size was insufficient to cross + * the sample threshold. + */ + cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } + } + + if ((uintptr_t)old_ctx > (uintptr_t)1U) { + told_cnt = prof_lookup(old_ctx->bt); + if (told_cnt == NULL) { + /* + * It's too late to propagate OOM for this realloc(), + * so operate directly on old_cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(&old_ctx->lock); + old_ctx->cnt_merged.curobjs--; + old_ctx->cnt_merged.curbytes -= old_size; + malloc_mutex_unlock(&old_ctx->lock); + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + } + } else + told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; + + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) { + prof_ctx_set(ptr, cnt->ctx); + cnt->epoch++; + } else + prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) { + told_cnt->cnts.curobjs--; + told_cnt->cnts.curbytes -= old_size; + } + if ((uintptr_t)cnt > (uintptr_t)1U) { + cnt->cnts.curobjs++; + cnt->cnts.curbytes += size; + if (opt_prof_accum) { + cnt->cnts.accumobjs++; + cnt->cnts.accumbytes += size; + } + } + /*********/ + mb_write(); + /*********/ + if ((uintptr_t)told_cnt > (uintptr_t)1U) + told_cnt->epoch++; + if ((uintptr_t)cnt > (uintptr_t)1U) + cnt->epoch++; + /*********/ + mb_write(); /* Not strictly necessary. */ +} + +JEMALLOC_INLINE void +prof_free(const void *ptr, size_t size) +{ + prof_ctx_t *ctx = prof_ctx_get(ptr); + + if ((uintptr_t)ctx > (uintptr_t)1) { + assert(size == isalloc(ptr)); + prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); + + if (tcnt != NULL) { + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + tcnt->cnts.curobjs--; + tcnt->cnts.curbytes -= size; + /*********/ + mb_write(); + /*********/ + tcnt->epoch++; + /*********/ + mb_write(); + /*********/ + } else { + /* + * OOM during free() cannot be propagated, so operate + * directly on cnt->ctx->cnt_merged. + */ + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs--; + ctx->cnt_merged.curbytes -= size; + malloc_mutex_unlock(&ctx->lock); + } + } +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_PROF */ diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h new file mode 100644 index 0000000..a9ed239 --- /dev/null +++ b/include/jemalloc/internal/ql.h @@ -0,0 +1,83 @@ +/* + * List definitions. + */ +#define ql_head(a_type) \ +struct { \ + a_type *qlh_first; \ +} + +#define ql_head_initializer(a_head) {NULL} + +#define ql_elm(a_type) qr(a_type) + +/* List functions. */ +#define ql_new(a_head) do { \ + (a_head)->qlh_first = NULL; \ +} while (0) + +#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) + +#define ql_first(a_head) ((a_head)->qlh_first) + +#define ql_last(a_head, a_field) \ + ((ql_first(a_head) != NULL) \ + ? qr_prev(ql_first(a_head), a_field) : NULL) + +#define ql_next(a_head, a_elm, a_field) \ + ((ql_last(a_head, a_field) != (a_elm)) \ + ? qr_next((a_elm), a_field) : NULL) + +#define ql_prev(a_head, a_elm, a_field) \ + ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ + : NULL) + +#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ + qr_before_insert((a_qlelm), (a_elm), a_field); \ + if (ql_first(a_head) == (a_qlelm)) { \ + ql_first(a_head) = (a_elm); \ + } \ +} while (0) + +#define ql_after_insert(a_qlelm, a_elm, a_field) \ + qr_after_insert((a_qlelm), (a_elm), a_field) + +#define ql_head_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = (a_elm); \ +} while (0) + +#define ql_tail_insert(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) != NULL) { \ + qr_before_insert(ql_first(a_head), (a_elm), a_field); \ + } \ + ql_first(a_head) = qr_next((a_elm), a_field); \ +} while (0) + +#define ql_remove(a_head, a_elm, a_field) do { \ + if (ql_first(a_head) == (a_elm)) { \ + ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ + } \ + if (ql_first(a_head) != (a_elm)) { \ + qr_remove((a_elm), a_field); \ + } else { \ + ql_first(a_head) = NULL; \ + } \ +} while (0) + +#define ql_head_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_first(a_head); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_tail_remove(a_head, a_type, a_field) do { \ + a_type *t = ql_last(a_head, a_field); \ + ql_remove((a_head), t, a_field); \ +} while (0) + +#define ql_foreach(a_var, a_head, a_field) \ + qr_foreach((a_var), ql_first(a_head), a_field) + +#define ql_reverse_foreach(a_var, a_head, a_field) \ + qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h new file mode 100644 index 0000000..fe22352 --- /dev/null +++ b/include/jemalloc/internal/qr.h @@ -0,0 +1,67 @@ +/* Ring definitions. */ +#define qr(a_type) \ +struct { \ + a_type *qre_next; \ + a_type *qre_prev; \ +} + +/* Ring functions. */ +#define qr_new(a_qr, a_field) do { \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) + +#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) + +#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qrelm); \ + (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ + (a_qrelm)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_after_insert(a_qrelm, a_qr, a_field) \ + do \ + { \ + (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ + (a_qr)->a_field.qre_prev = (a_qrelm); \ + (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ + (a_qrelm)->a_field.qre_next = (a_qr); \ + } while (0) + +#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ + void *t; \ + (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ + (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ + t = (a_qr_a)->a_field.qre_prev; \ + (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \ + (a_qr_b)->a_field.qre_prev = t; \ +} while (0) + +/* qr_meld() and qr_split() are functionally equivalent, so there's no need to + * have two copies of the code. */ +#define qr_split(a_qr_a, a_qr_b, a_field) \ + qr_meld((a_qr_a), (a_qr_b), a_field) + +#define qr_remove(a_qr, a_field) do { \ + (a_qr)->a_field.qre_prev->a_field.qre_next \ + = (a_qr)->a_field.qre_next; \ + (a_qr)->a_field.qre_next->a_field.qre_prev \ + = (a_qr)->a_field.qre_prev; \ + (a_qr)->a_field.qre_next = (a_qr); \ + (a_qr)->a_field.qre_prev = (a_qr); \ +} while (0) + +#define qr_foreach(var, a_qr, a_field) \ + for ((var) = (a_qr); \ + (var) != NULL; \ + (var) = (((var)->a_field.qre_next != (a_qr)) \ + ? (var)->a_field.qre_next : NULL)) + +#define qr_reverse_foreach(var, a_qr, a_field) \ + for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ + (var) != NULL; \ + (var) = (((var) != (a_qr)) \ + ? (var)->a_field.qre_prev : NULL)) diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h new file mode 100644 index 0000000..ee9b009 --- /dev/null +++ b/include/jemalloc/internal/rb.h @@ -0,0 +1,973 @@ +/*- + ******************************************************************************* + * + * cpp macro implementation of left-leaning 2-3 red-black trees. Parent + * pointers are not used, and color bits are stored in the least significant + * bit of right-child pointers (if RB_COMPACT is defined), thus making node + * linkage as compact as is possible for red-black trees. + * + * Usage: + * + * #include + * #include + * #define NDEBUG // (Optional, see assert(3).) + * #include + * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.) + * #include + * ... + * + ******************************************************************************* + */ + +#ifndef RB_H_ +#define RB_H_ + +#if 0 +__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $"); +#endif + +#ifdef RB_COMPACT +/* Node structure. */ +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right_red; \ +} +#else +#define rb_node(a_type) \ +struct { \ + a_type *rbn_left; \ + a_type *rbn_right; \ + bool rbn_red; \ +} +#endif + +/* Root structure. */ +#define rb_tree(a_type) \ +struct { \ + a_type *rbt_root; \ + a_type rbt_nil; \ +} + +/* Left accessors. */ +#define rbtn_left_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_left) +#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ + (a_node)->a_field.rbn_left = a_left; \ +} while (0) + +#ifdef RB_COMPACT +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ + & ((ssize_t)-2))) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ + | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ + & ((size_t)1))) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ + | ((ssize_t)a_red)); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ + (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ + (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ +} while (0) +#else +/* Right accessors. */ +#define rbtn_right_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_right) +#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ + (a_node)->a_field.rbn_right = a_right; \ +} while (0) + +/* Color accessors. */ +#define rbtn_red_get(a_type, a_field, a_node) \ + ((a_node)->a_field.rbn_red) +#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ + (a_node)->a_field.rbn_red = (a_red); \ +} while (0) +#define rbtn_red_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = true; \ +} while (0) +#define rbtn_black_set(a_type, a_field, a_node) do { \ + (a_node)->a_field.rbn_red = false; \ +} while (0) +#endif + +/* Node initializer. */ +#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ + rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ + rbtn_red_set(a_type, a_field, (a_node)); \ +} while (0) + +/* Tree initializer. */ +#define rb_new(a_type, a_field, a_rbt) do { \ + (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ + rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ + rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ +} while (0) + +/* Internal utility macros. */ +#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ + for (; \ + rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ + (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ + (r_node) = (a_root); \ + if ((r_node) != &(a_rbt)->rbt_nil) { \ + for (; rbtn_right_get(a_type, a_field, (r_node)) != \ + &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ + (r_node))) { \ + } \ + } \ +} while (0) + +#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ + rbtn_right_set(a_type, a_field, (a_node), \ + rbtn_left_get(a_type, a_field, (r_node))); \ + rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ + (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ + rbtn_left_set(a_type, a_field, (a_node), \ + rbtn_right_get(a_type, a_field, (r_node))); \ + rbtn_right_set(a_type, a_field, (r_node), (a_node)); \ +} while (0) + +/* + * The rb_proto() macro generates function prototypes that correspond to the + * functions generated by an equivalently parameterized call to rb_gen(). + */ + +#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree); \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, a_type *key); \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node); \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg); \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); + +/* + * The rb_gen() macro generates a type-specific red-black tree implementation, + * based on the above cpp macros. + * + * Arguments: + * + * a_attr : Function attribute for generated functions (ex: static). + * a_prefix : Prefix for generated functions (ex: ex_). + * a_rb_type : Type for red-black tree data structure (ex: ex_t). + * a_type : Type for red-black tree node data structure (ex: ex_node_t). + * a_field : Name of red-black tree node linkage (ex: ex_link). + * a_cmp : Node comparison function name, with the following prototype: + * int (a_cmp *)(a_type *a_node, a_type *a_other); + * ^^^^^^ + * or a_key + * Interpretation of comparision function return values: + * -1 : a_node < a_other + * 0 : a_node == a_other + * 1 : a_node > a_other + * In all cases, the a_node or a_key macro argument is the first + * argument to the comparison function, which makes it possible + * to write comparison functions that treat the first argument + * specially. + * + * Assuming the following setup: + * + * typedef struct ex_node_s ex_node_t; + * struct ex_node_s { + * rb_node(ex_node_t) ex_link; + * }; + * typedef rb_tree(ex_node_t) ex_t; + * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) + * + * The following API is generated: + * + * static void + * ex_new(ex_t *extree); + * Description: Initialize a red-black tree structure. + * Args: + * extree: Pointer to an uninitialized red-black tree object. + * + * static ex_node_t * + * ex_first(ex_t *extree); + * static ex_node_t * + * ex_last(ex_t *extree); + * Description: Get the first/last node in extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * Ret: First/last node in extree, or NULL if extree is empty. + * + * static ex_node_t * + * ex_next(ex_t *extree, ex_node_t *node); + * static ex_node_t * + * ex_prev(ex_t *extree, ex_node_t *node); + * Description: Get node's successor/predecessor. + * Args: + * extree: Pointer to an initialized red-black tree object. + * node : A node in extree. + * Ret: node's successor/predecessor in extree, or NULL if node is + * last/first. + * + * static ex_node_t * + * ex_search(ex_t *extree, ex_node_t *key); + * Description: Search for node that matches key. + * Args: + * extree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in extree that matches key, or NULL if no match. + * + * static ex_node_t * + * ex_nsearch(ex_t *extree, ex_node_t *key); + * static ex_node_t * + * ex_psearch(ex_t *extree, ex_node_t *key); + * Description: Search for node that matches key. If no match is found, + * return what would be key's successor/predecessor, were + * key in extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * key : Search key. + * Ret: Node in extree that matches key, or if no match, hypothetical + * node's successor/predecessor (NULL if no successor/predecessor). + * + * static void + * ex_insert(ex_t *extree, ex_node_t *node); + * Description: Insert node into extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * node : Node to be inserted into extree. + * + * static void + * ex_remove(ex_t *extree, ex_node_t *node); + * Description: Remove node from extree. + * Args: + * extree: Pointer to an initialized red-black tree object. + * node : Node in extree to be removed. + * + * static ex_node_t * + * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * static ex_node_t * + * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *, + * ex_node_t *, void *), void *arg); + * Description: Iterate forward/backward over extree, starting at node. + * If extree is modified, iteration must be immediately + * terminated by the callback function that causes the + * modification. + * Args: + * extree: Pointer to an initialized red-black tree object. + * start : Node at which to start iteration, or NULL to start at + * first/last node. + * cb : Callback function, which is called for each node during + * iteration. Under normal circumstances the callback function + * should return NULL, which causes iteration to continue. If a + * callback function returns non-NULL, iteration is immediately + * terminated and the non-NULL return value is returned by the + * iterator. This is useful for re-starting iteration after + * modifying extree. + * arg : Opaque pointer passed to cb(). + * Ret: NULL if iteration completed, or the non-NULL callback return value + * that caused termination of the iteration. + */ +#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ +a_attr void \ +a_prefix##new(a_rbt_type *rbtree) { \ + rb_new(a_type, a_field, rbtree); \ +} \ +a_attr a_type * \ +a_prefix##first(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##last(a_rbt_type *rbtree) { \ + a_type *ret; \ + rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != &rbtree->rbt_nil); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ + a_type *ret; \ + if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ + rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ + a_field, node), ret); \ + } else { \ + a_type *tnode = rbtree->rbt_root; \ + assert(tnode != &rbtree->rbt_nil); \ + ret = &rbtree->rbt_nil; \ + while (true) { \ + int cmp = (a_cmp)(node, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + break; \ + } \ + assert(tnode != &rbtree->rbt_nil); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + int cmp; \ + ret = rbtree->rbt_root; \ + while (ret != &rbtree->rbt_nil \ + && (cmp = (a_cmp)(key, ret)) != 0) { \ + if (cmp < 0) { \ + ret = rbtn_left_get(a_type, a_field, ret); \ + } else { \ + ret = rbtn_right_get(a_type, a_field, ret); \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + ret = tnode; \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ + a_type *ret; \ + a_type *tnode = rbtree->rbt_root; \ + ret = &rbtree->rbt_nil; \ + while (tnode != &rbtree->rbt_nil) { \ + int cmp = (a_cmp)(key, tnode); \ + if (cmp < 0) { \ + tnode = rbtn_left_get(a_type, a_field, tnode); \ + } else if (cmp > 0) { \ + ret = tnode; \ + tnode = rbtn_right_get(a_type, a_field, tnode); \ + } else { \ + ret = tnode; \ + break; \ + } \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = (NULL); \ + } \ + return (ret); \ +} \ +a_attr void \ +a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } path[sizeof(void *) << 4], *pathp; \ + rbt_node_new(a_type, a_field, rbtree, node); \ + /* Wind. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + assert(cmp != 0); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + } \ + } \ + pathp->node = node; \ + /* Unwind. */ \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + a_type *cnode = pathp->node; \ + if (pathp->cmp < 0) { \ + a_type *left = pathp[1].node; \ + rbtn_left_set(a_type, a_field, cnode, left); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* Fix up 4-node. */ \ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, cnode, tnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } else { \ + a_type *right = pathp[1].node; \ + rbtn_right_set(a_type, a_field, cnode, right); \ + if (rbtn_red_get(a_type, a_field, right)) { \ + a_type *left = rbtn_left_get(a_type, a_field, cnode); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + /* Split 4-node. */ \ + rbtn_black_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, right); \ + rbtn_red_set(a_type, a_field, cnode); \ + } else { \ + /* Lean left. */ \ + a_type *tnode; \ + bool tred = rbtn_red_get(a_type, a_field, cnode); \ + rbtn_rotate_left(a_type, a_field, cnode, tnode); \ + rbtn_color_set(a_type, a_field, tnode, tred); \ + rbtn_red_set(a_type, a_field, cnode); \ + cnode = tnode; \ + } \ + } else { \ + return; \ + } \ + } \ + pathp->node = cnode; \ + } \ + /* Set root, and make it black. */ \ + rbtree->rbt_root = path->node; \ + rbtn_black_set(a_type, a_field, rbtree->rbt_root); \ +} \ +a_attr void \ +a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ + struct { \ + a_type *node; \ + int cmp; \ + } *pathp, *nodep, path[sizeof(void *) << 4]; \ + /* Wind. */ \ + nodep = NULL; /* Silence compiler warning. */ \ + path->node = rbtree->rbt_root; \ + for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ + int cmp = pathp->cmp = a_cmp(node, pathp->node); \ + if (cmp < 0) { \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } else { \ + pathp[1].node = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + if (cmp == 0) { \ + /* Find node's successor, in preparation for swap. */ \ + pathp->cmp = 1; \ + nodep = pathp; \ + for (pathp++; pathp->node != &rbtree->rbt_nil; \ + pathp++) { \ + pathp->cmp = -1; \ + pathp[1].node = rbtn_left_get(a_type, a_field, \ + pathp->node); \ + } \ + break; \ + } \ + } \ + } \ + assert(nodep->node == node); \ + pathp--; \ + if (pathp->node != node) { \ + /* Swap node with its successor. */ \ + bool tred = rbtn_red_get(a_type, a_field, pathp->node); \ + rbtn_color_set(a_type, a_field, pathp->node, \ + rbtn_red_get(a_type, a_field, node)); \ + rbtn_left_set(a_type, a_field, pathp->node, \ + rbtn_left_get(a_type, a_field, node)); \ + /* If node's successor is its right child, the following code */\ + /* will do the wrong thing for the right child pointer. */\ + /* However, it doesn't matter, because the pointer will be */\ + /* properly set when the successor is pruned. */\ + rbtn_right_set(a_type, a_field, pathp->node, \ + rbtn_right_get(a_type, a_field, node)); \ + rbtn_color_set(a_type, a_field, node, tred); \ + /* The pruned leaf node's child pointers are never accessed */\ + /* again, so don't bother setting them to nil. */\ + nodep->node = pathp->node; \ + pathp->node = node; \ + if (nodep == path) { \ + rbtree->rbt_root = nodep->node; \ + } else { \ + if (nodep[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } else { \ + rbtn_right_set(a_type, a_field, nodep[-1].node, \ + nodep->node); \ + } \ + } \ + } else { \ + a_type *left = rbtn_left_get(a_type, a_field, node); \ + if (left != &rbtree->rbt_nil) { \ + /* node has no successor, but it has a left child. */\ + /* Splice node out, without losing the left child. */\ + assert(rbtn_red_get(a_type, a_field, node) == false); \ + assert(rbtn_red_get(a_type, a_field, left)); \ + rbtn_black_set(a_type, a_field, left); \ + if (pathp == path) { \ + rbtree->rbt_root = left; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + left); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + left); \ + } \ + } \ + return; \ + } else if (pathp == path) { \ + /* The tree only contained one node. */ \ + rbtree->rbt_root = &rbtree->rbt_nil; \ + return; \ + } \ + } \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + /* Prune red node, which requires no fixup. */ \ + assert(pathp[-1].cmp < 0); \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + &rbtree->rbt_nil); \ + return; \ + } \ + /* The node to be pruned is black, so unwind until balance is */\ + /* restored. */\ + pathp->node = &rbtree->rbt_nil; \ + for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ + assert(pathp->cmp != 0); \ + if (pathp->cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ + == false); \ + if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + a_type *tnode; \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ + /* In the following diagrams, ||, //, and \\ */\ + /* indicate the path to the removed node. */\ + /* */\ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + /* */\ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + /* */\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + a_type *right = rbtn_right_get(a_type, a_field, \ + pathp->node); \ + a_type *rightleft = rbtn_left_get(a_type, a_field, \ + right); \ + if (rbtn_red_get(a_type, a_field, rightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, rightleft); \ + rbtn_rotate_right(a_type, a_field, right, tnode); \ + rbtn_right_set(a_type, a_field, pathp->node, tnode);\ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* // \ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + a_type *tnode; \ + rbtn_red_set(a_type, a_field, pathp->node); \ + rbtn_rotate_left(a_type, a_field, pathp->node, \ + tnode); \ + pathp->node = tnode; \ + } \ + } \ + } else { \ + a_type *left; \ + rbtn_right_set(a_type, a_field, pathp->node, \ + pathp[1].node); \ + left = rbtn_left_get(a_type, a_field, pathp->node); \ + if (rbtn_red_get(a_type, a_field, left)) { \ + a_type *tnode; \ + a_type *leftright = rbtn_right_get(a_type, a_field, \ + left); \ + a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ + leftright); \ + if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (r) */\ + a_type *unode; \ + rbtn_black_set(a_type, a_field, leftrightleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + unode); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_right_set(a_type, a_field, unode, tnode); \ + rbtn_rotate_left(a_type, a_field, unode, tnode); \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (r) (b) */\ + /* \ */\ + /* (b) */\ + /* / */\ + /* (b) */\ + assert(leftright != &rbtree->rbt_nil); \ + rbtn_red_set(a_type, a_field, leftright); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + rbtn_black_set(a_type, a_field, tnode); \ + } \ + /* Balance restored, but rotation modified subtree */\ + /* root, which may actually be the tree root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + } \ + return; \ + } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, pathp->node); \ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root. */\ + assert((uintptr_t)pathp > (uintptr_t)path); \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, pathp[-1].node, \ + tnode); \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(r) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + rbtn_black_set(a_type, a_field, pathp->node); \ + /* Balance restored. */ \ + return; \ + } \ + } else { \ + a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ + if (rbtn_red_get(a_type, a_field, leftleft)) { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (r) */\ + a_type *tnode; \ + rbtn_black_set(a_type, a_field, leftleft); \ + rbtn_rotate_right(a_type, a_field, pathp->node, \ + tnode); \ + /* Balance restored, but rotation modified */\ + /* subtree root, which may actually be the tree */\ + /* root. */\ + if (pathp == path) { \ + /* Set root. */ \ + rbtree->rbt_root = tnode; \ + } else { \ + if (pathp[-1].cmp < 0) { \ + rbtn_left_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } else { \ + rbtn_right_set(a_type, a_field, \ + pathp[-1].node, tnode); \ + } \ + } \ + return; \ + } else { \ + /* || */\ + /* pathp(b) */\ + /* / \\ */\ + /* (b) (b) */\ + /* / */\ + /* (b) */\ + rbtn_red_set(a_type, a_field, left); \ + } \ + } \ + } \ + } \ + /* Set root. */ \ + rbtree->rbt_root = path->node; \ + assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ +} \ +a_attr a_type * \ +a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ + a_field, node), cb, arg)) != &rbtree->rbt_nil \ + || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp < 0) { \ + a_type *ret; \ + if ((ret = a_prefix##iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } else if (cmp > 0) { \ + return (a_prefix##iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ + a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ + a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \ + cb, arg); \ + } else { \ + ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + if (node == &rbtree->rbt_nil) { \ + return (&rbtree->rbt_nil); \ + } else { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ + a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \ + void *arg) { \ + int cmp = a_cmp(start, node); \ + if (cmp > 0) { \ + a_type *ret; \ + if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_right_get(a_type, a_field, node), cb, arg)) != \ + &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else if (cmp < 0) { \ + return (a_prefix##reverse_iter_start(rbtree, start, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } else { \ + a_type *ret; \ + if ((ret = cb(rbtree, node, arg)) != NULL) { \ + return (ret); \ + } \ + return (a_prefix##reverse_iter_recurse(rbtree, \ + rbtn_left_get(a_type, a_field, node), cb, arg)); \ + } \ +} \ +a_attr a_type * \ +a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ + a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ + a_type *ret; \ + if (start != NULL) { \ + ret = a_prefix##reverse_iter_start(rbtree, start, \ + rbtree->rbt_root, cb, arg); \ + } else { \ + ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ + cb, arg); \ + } \ + if (ret == &rbtree->rbt_nil) { \ + ret = NULL; \ + } \ + return (ret); \ +} + +#endif /* RB_H_ */ diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h new file mode 100644 index 0000000..95d6355 --- /dev/null +++ b/include/jemalloc/internal/rtree.h @@ -0,0 +1,161 @@ +/* + * This radix tree implementation is tailored to the singular purpose of + * tracking which chunks are currently owned by jemalloc. This functionality + * is mandatory for OS X, where jemalloc must be able to respond to object + * ownership queries. + * + ******************************************************************************* + */ +#ifdef JEMALLOC_H_TYPES + +typedef struct rtree_s rtree_t; + +/* + * Size of each radix tree node (must be a power of 2). This impacts tree + * depth. + */ +#if (LG_SIZEOF_PTR == 2) +# define RTREE_NODESIZE (1U << 14) +#else +# define RTREE_NODESIZE CACHELINE +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +struct rtree_s { + malloc_mutex_t mutex; + void **root; + unsigned height; + unsigned level2bits[1]; /* Dynamically sized. */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +rtree_t *rtree_new(unsigned bits); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +#ifndef JEMALLOC_DEBUG +void *rtree_get_locked(rtree_t *rtree, uintptr_t key); +#endif +void *rtree_get(rtree_t *rtree, uintptr_t key); +bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) +#define RTREE_GET_GENERATE(f) \ +/* The least significant bits of the key are ignored. */ \ +JEMALLOC_INLINE void * \ +f(rtree_t *rtree, uintptr_t key) \ +{ \ + void *ret; \ + uintptr_t subkey; \ + unsigned i, lshift, height, bits; \ + void **node, **child; \ + \ + RTREE_LOCK(&rtree->mutex); \ + for (i = lshift = 0, height = rtree->height, node = rtree->root;\ + i < height - 1; \ + i++, lshift += bits, node = child) { \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ + 3)) - bits); \ + child = (void**)node[subkey]; \ + if (child == NULL) { \ + RTREE_UNLOCK(&rtree->mutex); \ + return (NULL); \ + } \ + } \ + \ + /* \ + * node is a leaf, so it contains values rather than node \ + * pointers. \ + */ \ + bits = rtree->level2bits[i]; \ + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ + bits); \ + ret = node[subkey]; \ + RTREE_UNLOCK(&rtree->mutex); \ + \ + RTREE_GET_VALIDATE \ + return (ret); \ +} + +#ifdef JEMALLOC_DEBUG +# define RTREE_LOCK(l) malloc_mutex_lock(l) +# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) +# define RTREE_GET_VALIDATE +RTREE_GET_GENERATE(rtree_get_locked) +# undef RTREE_LOCK +# undef RTREE_UNLOCK +# undef RTREE_GET_VALIDATE +#endif + +#define RTREE_LOCK(l) +#define RTREE_UNLOCK(l) +#ifdef JEMALLOC_DEBUG + /* + * Suppose that it were possible for a jemalloc-allocated chunk to be + * munmap()ped, followed by a different allocator in another thread re-using + * overlapping virtual memory, all without invalidating the cached rtree + * value. The result would be a false positive (the rtree would claim that + * jemalloc owns memory that it had actually discarded). This scenario + * seems impossible, but the following assertion is a prudent sanity check. + */ +# define RTREE_GET_VALIDATE \ + assert(rtree_get_locked(rtree, key) == ret); +#else +# define RTREE_GET_VALIDATE +#endif +RTREE_GET_GENERATE(rtree_get) +#undef RTREE_LOCK +#undef RTREE_UNLOCK +#undef RTREE_GET_VALIDATE + +JEMALLOC_INLINE bool +rtree_set(rtree_t *rtree, uintptr_t key, void *val) +{ + uintptr_t subkey; + unsigned i, lshift, height, bits; + void **node, **child; + + malloc_mutex_lock(&rtree->mutex); + for (i = lshift = 0, height = rtree->height, node = rtree->root; + i < height - 1; + i++, lshift += bits, node = child) { + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - + bits); + child = (void**)node[subkey]; + if (child == NULL) { + child = (void**)base_alloc(sizeof(void *) << + rtree->level2bits[i+1]); + if (child == NULL) { + malloc_mutex_unlock(&rtree->mutex); + return (true); + } + memset(child, 0, sizeof(void *) << + rtree->level2bits[i+1]); + node[subkey] = child; + } + } + + /* node is a leaf, so it contains values rather than node pointers. */ + bits = rtree->level2bits[i]; + subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); + node[subkey] = val; + malloc_mutex_unlock(&rtree->mutex); + + return (false); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h new file mode 100644 index 0000000..2a9b31d --- /dev/null +++ b/include/jemalloc/internal/stats.h @@ -0,0 +1,207 @@ +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#define UMAX2S_BUFSIZE 65 + +#ifdef JEMALLOC_STATS +typedef struct tcache_bin_stats_s tcache_bin_stats_t; +typedef struct malloc_bin_stats_s malloc_bin_stats_t; +typedef struct malloc_large_stats_s malloc_large_stats_t; +typedef struct arena_stats_s arena_stats_t; +#endif +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) +typedef struct chunk_stats_s chunk_stats_t; +#endif + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#ifdef JEMALLOC_STATS + +#ifdef JEMALLOC_TCACHE +struct tcache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; +#endif + +struct malloc_bin_stats_s { + /* + * Current number of bytes allocated, including objects currently + * cached by tcache. + */ + size_t allocated; + + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + +#ifdef JEMALLOC_TCACHE + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; +#endif + + /* Total number of runs created for this bin's size class. */ + uint64_t nruns; + + /* + * Total number of runs reused by extracting them from the runs tree for + * this bin's size class. + */ + uint64_t reruns; + + /* High-water mark for this bin. */ + size_t highruns; + + /* Current number of runs in this bin. */ + size_t curruns; +}; + +struct malloc_large_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* High-water mark for this size class. */ + size_t highruns; + + /* Current number of runs of this size class. */ + size_t curruns; +}; + +struct arena_stats_s { + /* Number of bytes currently mapped. */ + size_t mapped; + + /* + * Total number of purge sweeps, total number of madvise calls made, + * and total pages purged in order to keep dirty unused memory under + * control. + */ + uint64_t npurge; + uint64_t nmadvise; + uint64_t purged; + + /* Per-size-category statistics. */ + size_t allocated_large; + uint64_t nmalloc_large; + uint64_t ndalloc_large; + uint64_t nrequests_large; + + /* + * One element for each possible size class, including sizes that + * overlap with bin size classes. This is necessary because ipalloc() + * sometimes has to use such large objects in order to assure proper + * alignment. + */ + malloc_large_stats_t *lstats; +}; +#endif /* JEMALLOC_STATS */ + +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) +struct chunk_stats_s { +# ifdef JEMALLOC_STATS + /* Number of chunks that were allocated. */ + uint64_t nchunks; +# endif + + /* High-water mark for number of chunks allocated. */ + size_t highchunks; + + /* + * Current number of chunks allocated. This value isn't maintained for + * any other purpose, so keep track of it in order to be able to set + * highchunks. + */ + size_t curchunks; +}; +#endif /* JEMALLOC_STATS */ + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_stats_print; + +#ifdef JEMALLOC_STATS +extern size_t stats_cactive; +#endif + +char *u2s(uint64_t x, unsigned base, char *s); +#ifdef JEMALLOC_STATS +void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, + const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); +void malloc_printf(const char *format, ...) + JEMALLOC_ATTR(format(printf, 1, 2)); +#endif +void stats_print(void (*write)(void *, const char *), void *cbopaque, + const char *opts); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES +#ifdef JEMALLOC_STATS + +#ifndef JEMALLOC_ENABLE_INLINE +size_t stats_cactive_get(void); +void stats_cactive_add(size_t size); +void stats_cactive_sub(size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) +JEMALLOC_INLINE size_t +stats_cactive_get(void) +{ + + return (atomic_read_z(&stats_cactive)); +} + +JEMALLOC_INLINE void +stats_cactive_add(size_t size) +{ + + atomic_add_z(&stats_cactive, size); +} + +JEMALLOC_INLINE void +stats_cactive_sub(size_t size) +{ + + atomic_sub_z(&stats_cactive, size); +} +#endif + +#endif /* JEMALLOC_STATS */ +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h new file mode 100644 index 0000000..da3c68c --- /dev/null +++ b/include/jemalloc/internal/tcache.h @@ -0,0 +1,431 @@ +#ifdef JEMALLOC_TCACHE +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +typedef struct tcache_bin_info_s tcache_bin_info_t; +typedef struct tcache_bin_s tcache_bin_t; +typedef struct tcache_s tcache_t; + +/* + * Absolute maximum number of cache slots for each small bin in the thread + * cache. This is an additional constraint beyond that imposed as: twice the + * number of regions per run for this size class. + * + * This constant must be an even number. + */ +#define TCACHE_NSLOTS_SMALL_MAX 200 + +/* Number of cache slots for large size classes. */ +#define TCACHE_NSLOTS_LARGE 20 + +/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ +#define LG_TCACHE_MAXCLASS_DEFAULT 15 + +/* + * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation + * events between full GC sweeps (-1: disabled). Integer rounding may cause + * the actual number to be slightly higher, since GC is performed + * incrementally. + */ +#define LG_TCACHE_GC_SWEEP_DEFAULT 13 + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +struct tcache_bin_info_s { + unsigned ncached_max; /* Upper limit on ncached. */ +}; + +struct tcache_bin_s { +# ifdef JEMALLOC_STATS + tcache_bin_stats_t tstats; +# endif + int low_water; /* Min # cached since last GC. */ + unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ + unsigned ncached; /* # of cached objects. */ + void **avail; /* Stack of available objects. */ +}; + +struct tcache_s { +# ifdef JEMALLOC_STATS + ql_elm(tcache_t) link; /* Used for aggregating stats. */ +# endif +# ifdef JEMALLOC_PROF + uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ +# endif + arena_t *arena; /* This thread's arena. */ + unsigned ev_cnt; /* Event count since incremental GC. */ + unsigned next_gc_bin; /* Next bin to GC. */ + tcache_bin_t tbins[1]; /* Dynamically sized. */ + /* + * The pointer stacks associated with tbins follow as a contiguous + * array. During tcache initialization, the avail pointer in each + * element of tbins is initialized to point to the proper offset within + * this array. + */ +}; + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +extern bool opt_tcache; +extern ssize_t opt_lg_tcache_max; +extern ssize_t opt_lg_tcache_gc_sweep; + +extern tcache_bin_info_t *tcache_bin_info; + +/* Map of thread-specific caches. */ +#ifndef NO_TLS +extern __thread tcache_t *tcache_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +# define TCACHE_GET() tcache_tls +# define TCACHE_SET(v) do { \ + tcache_tls = (tcache_t *)(v); \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#else +# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) +# define TCACHE_SET(v) do { \ + pthread_setspecific(tcache_tsd, (void *)(v)); \ +} while (0) +#endif +extern pthread_key_t tcache_tsd; + +/* + * Number of tcache bins. There are nbins small-object bins, plus 0 or more + * large-object bins. + */ +extern size_t nhbins; + +/* Maximum cached size class. */ +extern size_t tcache_maxclass; + +/* Number of tcache allocation/deallocation events between incremental GCs. */ +extern unsigned tcache_gc_incr; + +void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache_t *tcache +#endif + ); +void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache_t *tcache +#endif + ); +tcache_t *tcache_create(arena_t *arena); +void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, + size_t binind); +void tcache_destroy(tcache_t *tcache); +#ifdef JEMALLOC_STATS +void tcache_stats_merge(tcache_t *tcache, arena_t *arena); +#endif +bool tcache_boot(void); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#ifndef JEMALLOC_ENABLE_INLINE +void tcache_event(tcache_t *tcache); +tcache_t *tcache_get(void); +void *tcache_alloc_easy(tcache_bin_t *tbin); +void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); +void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); +void tcache_dalloc_small(tcache_t *tcache, void *ptr); +void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); +#endif + +#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) +JEMALLOC_INLINE tcache_t * +tcache_get(void) +{ + tcache_t *tcache; + + if ((isthreaded & opt_tcache) == false) + return (NULL); + + tcache = TCACHE_GET(); + if ((uintptr_t)tcache <= (uintptr_t)2) { + if (tcache == NULL) { + tcache = tcache_create(choose_arena()); + if (tcache == NULL) + return (NULL); + } else { + if (tcache == (void *)(uintptr_t)1) { + /* + * Make a note that an allocator function was + * called after the tcache_thread_cleanup() was + * called. + */ + TCACHE_SET((uintptr_t)2); + } + return (NULL); + } + } + + return (tcache); +} + +JEMALLOC_INLINE void +tcache_event(tcache_t *tcache) +{ + + if (tcache_gc_incr == 0) + return; + + tcache->ev_cnt++; + assert(tcache->ev_cnt <= tcache_gc_incr); + if (tcache->ev_cnt == tcache_gc_incr) { + size_t binind = tcache->next_gc_bin; + tcache_bin_t *tbin = &tcache->tbins[binind]; + tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + + if (tbin->low_water > 0) { + /* + * Flush (ceiling) 3/4 of the objects below the low + * water mark. + */ + if (binind < nbins) { + tcache_bin_flush_small(tbin, binind, + tbin->ncached - tbin->low_water + + (tbin->low_water >> 2) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } else { + tcache_bin_flush_large(tbin, binind, + tbin->ncached - tbin->low_water + + (tbin->low_water >> 2) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } + /* + * Reduce fill count by 2X. Limit lg_fill_div such that + * the fill count is always at least 1. + */ + if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) + >= 1) + tbin->lg_fill_div++; + } else if (tbin->low_water < 0) { + /* + * Increase fill count by 2X. Make sure lg_fill_div + * stays greater than 0. + */ + if (tbin->lg_fill_div > 1) + tbin->lg_fill_div--; + } + tbin->low_water = tbin->ncached; + + tcache->next_gc_bin++; + if (tcache->next_gc_bin == nhbins) + tcache->next_gc_bin = 0; + tcache->ev_cnt = 0; + } +} + +JEMALLOC_INLINE void * +tcache_alloc_easy(tcache_bin_t *tbin) +{ + void *ret; + + if (tbin->ncached == 0) { + tbin->low_water = -1; + return (NULL); + } + tbin->ncached--; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; + ret = tbin->avail[tbin->ncached]; + return (ret); +} + +JEMALLOC_INLINE void * +tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + binind = SMALL_SIZE2BIN(size); + assert(binind < nbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + ret = tcache_alloc_small_hard(tcache, tbin, binind); + if (ret == NULL) + return (NULL); + } + assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); + + if (zero == false) { +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); +#endif + } else + memset(ret, 0, size); + +#ifdef JEMALLOC_STATS + tbin->tstats.nrequests++; +#endif +#ifdef JEMALLOC_PROF + tcache->prof_accumbytes += arena_bin_info[binind].reg_size; +#endif + tcache_event(tcache); + return (ret); +} + +JEMALLOC_INLINE void * +tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) +{ + void *ret; + size_t binind; + tcache_bin_t *tbin; + + size = PAGE_CEILING(size); + assert(size <= tcache_maxclass); + binind = nbins + (size >> PAGE_SHIFT) - 1; + assert(binind < nhbins); + tbin = &tcache->tbins[binind]; + ret = tcache_alloc_easy(tbin); + if (ret == NULL) { + /* + * Only allocate one large object at a time, because it's quite + * expensive to create one and not use it. + */ + ret = arena_malloc_large(tcache->arena, size, zero); + if (ret == NULL) + return (NULL); + } else { +#ifdef JEMALLOC_PROF + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> + PAGE_SHIFT); + chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; +#endif + if (zero == false) { +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); +#endif + } else + memset(ret, 0, size); + +#ifdef JEMALLOC_STATS + tbin->tstats.nrequests++; +#endif +#ifdef JEMALLOC_PROF + tcache->prof_accumbytes += size; +#endif + } + + tcache_event(tcache); + return (ret); +} + +JEMALLOC_INLINE void +tcache_dalloc_small(tcache_t *tcache, void *ptr) +{ + arena_t *arena; + arena_chunk_t *chunk; + arena_run_t *run; + arena_bin_t *bin; + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + size_t pageind, binind; + arena_chunk_map_t *mapelm; + + assert(arena_salloc(ptr) <= small_maxclass); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapelm = &chunk->map[pageind-map_bias]; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); + dassert(run->magic == ARENA_RUN_MAGIC); + bin = run->bin; + binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / + sizeof(arena_bin_t); + assert(binind < nbins); + +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ptr, 0x5a, arena_bin_info[binind].reg_size); +#endif + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> + 1) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; + tbin->ncached++; + + tcache_event(tcache); +} + +JEMALLOC_INLINE void +tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) +{ + arena_t *arena; + arena_chunk_t *chunk; + size_t pageind, binind; + tcache_bin_t *tbin; + tcache_bin_info_t *tbin_info; + + assert((size & PAGE_MASK) == 0); + assert(arena_salloc(ptr) > small_maxclass); + assert(arena_salloc(ptr) <= tcache_maxclass); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + binind = nbins + (size >> PAGE_SHIFT) - 1; + +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ptr, 0x5a, size); +#endif + + tbin = &tcache->tbins[binind]; + tbin_info = &tcache_bin_info[binind]; + if (tbin->ncached == tbin_info->ncached_max) { + tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> + 1) +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + } + assert(tbin->ncached < tbin_info->ncached_max); + tbin->avail[tbin->ncached] = ptr; + tbin->ncached++; + + tcache_event(tcache); +} +#endif + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ +#endif /* JEMALLOC_TCACHE */ diff --git a/include/jemalloc/internal/zone.h b/include/jemalloc/internal/zone.h new file mode 100644 index 0000000..859b529 --- /dev/null +++ b/include/jemalloc/internal/zone.h @@ -0,0 +1,23 @@ +#ifndef JEMALLOC_ZONE +# error "This source file is for zones on Darwin (OS X)." +#endif +/******************************************************************************/ +#ifdef JEMALLOC_H_TYPES + +#endif /* JEMALLOC_H_TYPES */ +/******************************************************************************/ +#ifdef JEMALLOC_H_STRUCTS + +#endif /* JEMALLOC_H_STRUCTS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_EXTERNS + +malloc_zone_t *create_zone(void); +void szone2ozone(malloc_zone_t *zone); + +#endif /* JEMALLOC_H_EXTERNS */ +/******************************************************************************/ +#ifdef JEMALLOC_H_INLINES + +#endif /* JEMALLOC_H_INLINES */ +/******************************************************************************/ diff --git a/include/jemalloc/jemalloc.h.in b/include/jemalloc/jemalloc.h.in new file mode 100644 index 0000000..580a5ec --- /dev/null +++ b/include/jemalloc/jemalloc.h.in @@ -0,0 +1,66 @@ +#ifndef JEMALLOC_H_ +#define JEMALLOC_H_ +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#define JEMALLOC_VERSION "@jemalloc_version@" +#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ +#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ +#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ +#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ +#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" + +#include "jemalloc_defs@install_suffix@.h" +#ifndef JEMALLOC_P +# define JEMALLOC_P(s) s +#endif + +#define ALLOCM_LG_ALIGN(la) (la) +#if LG_SIZEOF_PTR == 2 +#define ALLOCM_ALIGN(a) (ffs(a)-1) +#else +#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) +#endif +#define ALLOCM_ZERO ((int)0x40) +#define ALLOCM_NO_MOVE ((int)0x80) + +#define ALLOCM_SUCCESS 0 +#define ALLOCM_ERR_OOM 1 +#define ALLOCM_ERR_NOT_MOVED 2 + +extern const char *JEMALLOC_P(malloc_conf); +extern void (*JEMALLOC_P(malloc_message))(void *, const char *); + +void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); +void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); +int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) + JEMALLOC_ATTR(nonnull(1)); +void *JEMALLOC_P(realloc)(void *ptr, size_t size); +void JEMALLOC_P(free)(void *ptr); + +size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); +void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), + void *cbopaque, const char *opts); +int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, + void *newp, size_t newlen); +int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, + size_t *miblenp); +int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen); + +int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) + JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, + size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) + JEMALLOC_ATTR(nonnull(1)); +int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); + +#ifdef __cplusplus +}; +#endif +#endif /* JEMALLOC_H_ */ diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in new file mode 100644 index 0000000..d8c81d7 --- /dev/null +++ b/include/jemalloc/jemalloc_defs.h.in @@ -0,0 +1,158 @@ +#ifndef JEMALLOC_DEFS_H_ +#define JEMALLOC_DEFS_H_ + +/* + * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. + * This makes it possible, with some care, to use multiple allocators + * simultaneously. + * + * In many cases it is more convenient to manually prefix allocator function + * calls than to let macros do it automatically, particularly when using + * multiple allocators simultaneously. Define JEMALLOC_MANGLE before + * #include'ing jemalloc.h in order to cause name mangling that corresponds to + * the API prefixing. + */ +#undef JEMALLOC_PREFIX +#undef JEMALLOC_CPREFIX +#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) +#undef JEMALLOC_P +#endif + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#undef CPU_SPINWAIT + +/* + * Defined if OSAtomic*() functions are available, as provided by Darwin, and + * documented in the atomic(3) manual page. + */ +#undef JEMALLOC_OSATOMIC + +/* + * Defined if OSSpin*() functions are available, as provided by Darwin, and + * documented in the spinlock(3) manual page. + */ +#undef JEMALLOC_OSSPIN + +/* Defined if __attribute__((...)) syntax is supported. */ +#undef JEMALLOC_HAVE_ATTR +#ifdef JEMALLOC_HAVE_ATTR +# define JEMALLOC_ATTR(s) __attribute__((s)) +#else +# define JEMALLOC_ATTR(s) +#endif + +/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ +#undef JEMALLOC_CC_SILENCE + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +#undef JEMALLOC_DEBUG + +/* JEMALLOC_STATS enables statistics calculation. */ +#undef JEMALLOC_STATS + +/* JEMALLOC_PROF enables allocation profiling. */ +#undef JEMALLOC_PROF + +/* Use libunwind for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBUNWIND + +/* Use libgcc for profile backtracing if defined. */ +#undef JEMALLOC_PROF_LIBGCC + +/* Use gcc intrinsics for profile backtracing if defined. */ +#undef JEMALLOC_PROF_GCC + +/* + * JEMALLOC_TINY enables support for tiny objects, which are smaller than one + * quantum. + */ +#undef JEMALLOC_TINY + +/* + * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. + * This makes it possible to allocate/deallocate objects without any locking + * when the cache is in the steady state. + */ +#undef JEMALLOC_TCACHE + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage + * segment (DSS). + */ +#undef JEMALLOC_DSS + +/* JEMALLOC_SWAP enables mmap()ed swap file support. */ +#undef JEMALLOC_SWAP + +/* Support memory filling (junk/zero). */ +#undef JEMALLOC_FILL + +/* Support optional abort() on OOM. */ +#undef JEMALLOC_XMALLOC + +/* Support SYSV semantics. */ +#undef JEMALLOC_SYSV + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +#undef JEMALLOC_LAZY_LOCK + +/* Determine page size at run time if defined. */ +#undef DYNAMIC_PAGE_SHIFT + +/* One page is 2^STATIC_PAGE_SHIFT bytes. */ +#undef STATIC_PAGE_SHIFT + +/* TLS is used to map arenas and magazine caches to threads. */ +#undef NO_TLS + +/* + * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside + * within jemalloc-owned chunks before dereferencing them. + */ +#undef JEMALLOC_IVSALLOC + +/* + * Define overrides for non-standard allocator-related functions if they + * are present on the system. + */ +#undef JEMALLOC_OVERRIDE_MEMALIGN +#undef JEMALLOC_OVERRIDE_VALLOC + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +#undef JEMALLOC_ZONE +#undef JEMALLOC_ZONE_VERSION + +/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ +#undef JEMALLOC_MREMAP_FIXED + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched. + * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being + * unused, such that they will be discarded rather + * than swapped out. + */ +#undef JEMALLOC_PURGE_MADVISE_DONTNEED +#undef JEMALLOC_PURGE_MADVISE_FREE + +/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ +#undef LG_SIZEOF_PTR + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#undef LG_SIZEOF_INT + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#undef LG_SIZEOF_LONG + +#endif /* JEMALLOC_DEFS_H_ */ diff --git a/install-sh b/install-sh new file mode 100755 index 0000000..ebc6691 --- /dev/null +++ b/install-sh @@ -0,0 +1,250 @@ +#! /bin/sh +# +# install - install a program, script, or datafile +# This comes from X11R5 (mit/util/scripts/install.sh). +# +# Copyright 1991 by the Massachusetts Institute of Technology +# +# Permission to use, copy, modify, distribute, and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice appear in all copies and that both that +# copyright notice and this permission notice appear in supporting +# documentation, and that the name of M.I.T. not be used in advertising or +# publicity pertaining to distribution of the software without specific, +# written prior permission. M.I.T. makes no representations about the +# suitability of this software for any purpose. It is provided "as is" +# without express or implied warranty. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. It can only install one file at a time, a restriction +# shared with many OS's install programs. + + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +transformbasename="" +transform_arg="" +instcmd="$mvprog" +chmodcmd="$chmodprog 0755" +chowncmd="" +chgrpcmd="" +stripcmd="" +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src="" +dst="" +dir_arg="" + +while [ x"$1" != x ]; do + case $1 in + -c) instcmd="$cpprog" + shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + -s) stripcmd="$stripprog" + shift + continue;; + + -t=*) transformarg=`echo $1 | sed 's/-t=//'` + shift + continue;; + + -b=*) transformbasename=`echo $1 | sed 's/-b=//'` + shift + continue;; + + *) if [ x"$src" = x ] + then + src=$1 + else + # this colon is to work around a 386BSD /bin/sh bug + : + dst=$1 + fi + shift + continue;; + esac +done + +if [ x"$src" = x ] +then + echo "install: no input file specified" + exit 1 +else + true +fi + +if [ x"$dir_arg" != x ]; then + dst=$src + src="" + + if [ -d $dst ]; then + instcmd=: + else + instcmd=mkdir + fi +else + +# Waiting for this to be detected by the "$instcmd $src $dsttmp" command +# might cause directories to be created, which would be especially bad +# if $src (and thus $dsttmp) contains '*'. + + if [ -f $src -o -d $src ] + then + true + else + echo "install: $src does not exist" + exit 1 + fi + + if [ x"$dst" = x ] + then + echo "install: no destination specified" + exit 1 + else + true + fi + +# If destination is a directory, append the input filename; if your system +# does not like double slashes in filenames, you may need to add some logic + + if [ -d $dst ] + then + dst="$dst"/`basename $src` + else + true + fi +fi + +## this sed command emulates the dirname command +dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + +# Make sure that the destination directory exists. +# this part is taken from Noah Friedman's mkinstalldirs script + +# Skip lots of stat calls in the usual case. +if [ ! -d "$dstdir" ]; then +defaultIFS=' +' +IFS="${IFS-${defaultIFS}}" + +oIFS="${IFS}" +# Some sh's can't handle IFS=/ for some reason. +IFS='%' +set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` +IFS="${oIFS}" + +pathcomp='' + +while [ $# -ne 0 ] ; do + pathcomp="${pathcomp}${1}" + shift + + if [ ! -d "${pathcomp}" ] ; + then + $mkdirprog "${pathcomp}" + else + true + fi + + pathcomp="${pathcomp}/" +done +fi + +if [ x"$dir_arg" != x ] +then + $doit $instcmd $dst && + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi +else + +# If we're going to rename the final executable, determine the name now. + + if [ x"$transformarg" = x ] + then + dstfile=`basename $dst` + else + dstfile=`basename $dst $transformbasename | + sed $transformarg`$transformbasename + fi + +# don't allow the sed command to completely eliminate the filename + + if [ x"$dstfile" = x ] + then + dstfile=`basename $dst` + else + true + fi + +# Make a temp file name in the proper directory. + + dsttmp=$dstdir/#inst.$$# + +# Move or copy the file name to the temp name + + $doit $instcmd $src $dsttmp && + + trap "rm -f ${dsttmp}" 0 && + +# and set any options; do chmod last to preserve setuid bits + +# If any of these fail, we abort the whole thing. If we want to +# ignore errors from any of these, just make sure not to ignore +# errors from the above "$doit $instcmd $src $dsttmp" command. + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && + +# Now rename the file to the real destination. + + $doit $rmcmd -f $dstdir/$dstfile && + $doit $mvcmd $dsttmp $dstdir/$dstfile + +fi && + + +exit 0 diff --git a/jemalloc/COPYING b/jemalloc/COPYING deleted file mode 100644 index 10ade12..0000000 --- a/jemalloc/COPYING +++ /dev/null @@ -1,51 +0,0 @@ -Unless otherwise specified, files in the jemalloc source distribution are -subject to the following licenses: --------------------------------------------------------------------------------- -Copyright (C) 2002-2010 Jason Evans . -All rights reserved. -Copyright (C) 2007-2010 Mozilla Foundation. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice(s), - this list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice(s), - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE -OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF -ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- -Copyright (C) 2009-2010 Facebook, Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. -* Neither the name of Facebook, Inc. nor the names of its contributors may be - used to endorse or promote products derived from this software without - specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --------------------------------------------------------------------------------- diff --git a/jemalloc/ChangeLog b/jemalloc/ChangeLog deleted file mode 100644 index 7b262c9..0000000 --- a/jemalloc/ChangeLog +++ /dev/null @@ -1,213 +0,0 @@ -Following are change highlights associated with official releases. Important -bug fixes are all mentioned, but internal enhancements are omitted here for -brevity (even though they are more fun to write about). Much more detail can be -found in the git revision history: - - http://www.canonware.com/cgi-bin/gitweb.cgi?p=jemalloc.git - git://canonware.com/jemalloc.git - -* 2.2.1 (March 30, 2011) - - Bug fixes: - - Implement atomic operations for x86/x64. This fixes compilation failures - for versions of gcc that are still in wide use. - - Fix an assertion in arena_purge(). - -* 2.2.0 (March 22, 2011) - - This version incorporates several improvements to algorithms and data - structures that tend to reduce fragmentation and increase speed. - - New features: - - Add the "stats.cactive" mallctl. - - Update pprof (from google-perftools 1.7). - - Improve backtracing-related configuration logic, and add the - --disable-prof-libgcc option. - - Bug fixes: - - Change default symbol visibility from "internal", to "hidden", which - decreases the overhead of library-internal function calls. - - Fix symbol visibility so that it is also set on OS X. - - Fix a build dependency regression caused by the introduction of the .pic.o - suffix for PIC object files. - - Add missing checks for mutex initialization failures. - - Don't use libgcc-based backtracing except on x64, where it is known to work. - - Fix deadlocks on OS X that were due to memory allocation in - pthread_mutex_lock(). - - Heap profiling-specific fixes: - + Fix memory corruption due to integer overflow in small region index - computation, when using a small enough sample interval that profiling - context pointers are stored in small run headers. - + Fix a bootstrap ordering bug that only occurred with TLS disabled. - + Fix a rallocm() rsize bug. - + Fix error detection bugs for aligned memory allocation. - -* 2.1.3 (March 14, 2011) - - Bug fixes: - - Fix a cpp logic regression (due to the "thread.{de,}allocatedp" mallctl fix - for OS X in 2.1.2). - - Fix a "thread.arena" mallctl bug. - - Fix a thread cache stats merging bug. - -* 2.1.2 (March 2, 2011) - - Bug fixes: - - Fix "thread.{de,}allocatedp" mallctl for OS X. - - Add missing jemalloc.a to build system. - -* 2.1.1 (January 31, 2011) - - Bug fixes: - - Fix aligned huge reallocation (affected allocm()). - - Fix the ALLOCM_LG_ALIGN macro definition. - - Fix a heap dumping deadlock. - - Fix a "thread.arena" mallctl bug. - -* 2.1.0 (December 3, 2010) - - This version incorporates some optimizations that can't quite be considered - bug fixes. - - New features: - - Use Linux's mremap(2) for huge object reallocation when possible. - - Avoid locking in mallctl*() when possible. - - Add the "thread.[de]allocatedp" mallctl's. - - Convert the manual page source from roff to DocBook, and generate both roff - and HTML manuals. - - Bug fixes: - - Fix a crash due to incorrect bootstrap ordering. This only impacted - --enable-debug --enable-dss configurations. - - Fix a minor statistics bug for mallctl("swap.avail", ...). - -* 2.0.1 (October 29, 2010) - - Bug fixes: - - Fix a race condition in heap profiling that could cause undefined behavior - if "opt.prof_accum" were disabled. - - Add missing mutex unlocks for some OOM error paths in the heap profiling - code. - - Fix a compilation error for non-C99 builds. - -* 2.0.0 (October 24, 2010) - - This version focuses on the experimental *allocm() API, and on improved - run-time configuration/introspection. Nonetheless, numerous performance - improvements are also included. - - New features: - - Implement the experimental {,r,s,d}allocm() API, which provides a superset - of the functionality available via malloc(), calloc(), posix_memalign(), - realloc(), malloc_usable_size(), and free(). These functions can be used to - allocate/reallocate aligned zeroed memory, ask for optional extra memory - during reallocation, prevent object movement during reallocation, etc. - - Replace JEMALLOC_OPTIONS/JEMALLOC_PROF_PREFIX with MALLOC_CONF, which is - more human-readable, and more flexible. For example: - JEMALLOC_OPTIONS=AJP - is now: - MALLOC_CONF=abort:true,fill:true,stats_print:true - - Port to Apple OS X. Sponsored by Mozilla. - - Make it possible for the application to control thread-->arena mappings via - the "thread.arena" mallctl. - - Add compile-time support for all TLS-related functionality via pthreads TSD. - This is mainly of interest for OS X, which does not support TLS, but has a - TSD implementation with similar performance. - - Override memalign() and valloc() if they are provided by the system. - - Add the "arenas.purge" mallctl, which can be used to synchronously purge all - dirty unused pages. - - Make cumulative heap profiling data optional, so that it is possible to - limit the amount of memory consumed by heap profiling data structures. - - Add per thread allocation counters that can be accessed via the - "thread.allocated" and "thread.deallocated" mallctls. - - Incompatible changes: - - Remove JEMALLOC_OPTIONS and malloc_options (see MALLOC_CONF above). - - Increase default backtrace depth from 4 to 128 for heap profiling. - - Disable interval-based profile dumps by default. - - Bug fixes: - - Remove bad assertions in fork handler functions. These assertions could - cause aborts for some combinations of configure settings. - - Fix strerror_r() usage to deal with non-standard semantics in GNU libc. - - Fix leak context reporting. This bug tended to cause the number of contexts - to be underreported (though the reported number of objects and bytes were - correct). - - Fix a realloc() bug for large in-place growing reallocation. This bug could - cause memory corruption, but it was hard to trigger. - - Fix an allocation bug for small allocations that could be triggered if - multiple threads raced to create a new run of backing pages. - - Enhance the heap profiler to trigger samples based on usable size, rather - than request size. - - Fix a heap profiling bug due to sometimes losing track of requested object - size for sampled objects. - -* 1.0.3 (August 12, 2010) - - Bug fixes: - - Fix the libunwind-based implementation of stack backtracing (used for heap - profiling). This bug could cause zero-length backtraces to be reported. - - Add a missing mutex unlock in library initialization code. If multiple - threads raced to initialize malloc, some of them could end up permanently - blocked. - -* 1.0.2 (May 11, 2010) - - Bug fixes: - - Fix junk filling of large objects, which could cause memory corruption. - - Add MAP_NORESERVE support for chunk mapping, because otherwise virtual - memory limits could cause swap file configuration to fail. Contributed by - Jordan DeLong. - -* 1.0.1 (April 14, 2010) - - Bug fixes: - - Fix compilation when --enable-fill is specified. - - Fix threads-related profiling bugs that affected accuracy and caused memory - to be leaked during thread exit. - - Fix dirty page purging race conditions that could cause crashes. - - Fix crash in tcache flushing code during thread destruction. - -* 1.0.0 (April 11, 2010) - - This release focuses on speed and run-time introspection. Numerous - algorithmic improvements make this release substantially faster than its - predecessors. - - New features: - - Implement autoconf-based configuration system. - - Add mallctl*(), for the purposes of introspection and run-time - configuration. - - Make it possible for the application to manually flush a thread's cache, via - the "tcache.flush" mallctl. - - Base maximum dirty page count on proportion of active memory. - - Compute various addtional run-time statistics, including per size class - statistics for large objects. - - Expose malloc_stats_print(), which can be called repeatedly by the - application. - - Simplify the malloc_message() signature to only take one string argument, - and incorporate an opaque data pointer argument for use by the application - in combination with malloc_stats_print(). - - Add support for allocation backed by one or more swap files, and allow the - application to disable over-commit if swap files are in use. - - Implement allocation profiling and leak checking. - - Removed features: - - Remove the dynamic arena rebalancing code, since thread-specific caching - reduces its utility. - - Bug fixes: - - Modify chunk allocation to work when address space layout randomization - (ASLR) is in use. - - Fix thread cleanup bugs related to TLS destruction. - - Handle 0-size allocation requests in posix_memalign(). - - Fix a chunk leak. The leaked chunks were never touched, so this impacted - virtual memory usage, but not physical memory usage. - -* linux_2008082[78]a (August 27/28, 2008) - - These snapshot releases are the simple result of incorporating Linux-specific - support into the FreeBSD malloc sources. - --------------------------------------------------------------------------------- -vim:filetype=text:textwidth=80 diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL deleted file mode 100644 index 11a457a..0000000 --- a/jemalloc/INSTALL +++ /dev/null @@ -1,251 +0,0 @@ -Building and installing jemalloc can be as simple as typing the following while -in the root directory of the source tree: - - ./configure - make - make install - -=== Advanced configuration ===================================================== - -The 'configure' script supports numerous options that allow control of which -functionality is enabled, where jemalloc is installed, etc. Optionally, pass -any of the following arguments (not a definitive list) to 'configure': - ---help - Print a definitive list of options. - ---prefix= - Set the base directory in which to install. For example: - - ./configure --prefix=/usr/local - - will cause files to be installed into /usr/local/include, /usr/local/lib, - and /usr/local/man. - ---with-rpath= - Embed one or more library paths, so that libjemalloc can find the libraries - it is linked to. This works only on ELF-based systems. - ---with-jemalloc-prefix= - Prefix all public APIs with . For example, if is - "prefix_", API changes like the following occur: - - malloc() --> prefix_malloc() - malloc_conf --> prefix_malloc_conf - /etc/malloc.conf --> /etc/prefix_malloc.conf - MALLOC_CONF --> PREFIX_MALLOC_CONF - - This makes it possible to use jemalloc at the same time as the system - allocator, or even to use multiple copies of jemalloc simultaneously. - - By default, the prefix is "", except on OS X, where it is "je_". On OS X, - jemalloc overlays the default malloc zone, but makes no attempt to actually - replace the "malloc", "calloc", etc. symbols. - ---with-install-suffix= - Append to the base name of all installed files, such that multiple - versions of jemalloc can coexist in the same installation directory. For - example, libjemalloc.so.0 becomes libjemalloc.so.0. - ---enable-cc-silence - Enable code that silences non-useful compiler warnings. This is helpful - when trying to tell serious warnings from those due to compiler - limitations, but it potentially incurs a performance penalty. - ---enable-debug - Enable assertions and validation code. This incurs a substantial - performance hit, but is very useful during application development. - ---enable-stats - Enable statistics gathering functionality. See the "opt.stats_print" - option documentation for usage details. - ---enable-prof - Enable heap profiling and leak detection functionality. See the "opt.prof" - option documentation for usage details. When enabled, there are several - approaches to backtracing, and the configure script chooses the first one - in the following list that appears to function correctly: - - + libunwind (requires --enable-prof-libunwind) - + libgcc (unless --disable-prof-libgcc) - + gcc intrinsics (unless --disable-prof-gcc) - ---enable-prof-libunwind - Use the libunwind library (http://www.nongnu.org/libunwind/) for stack - backtracing. - ---disable-prof-libgcc - Disable the use of libgcc's backtracing functionality. - ---disable-prof-gcc - Disable the use of gcc intrinsics for backtracing. - ---with-static-libunwind= - Statically link against the specified libunwind.a rather than dynamically - linking with -lunwind. - ---disable-tiny - Disable tiny (sub-quantum-sized) object support. Technically it is not - legal for a malloc implementation to allocate objects with less than - quantum alignment (8 or 16 bytes, depending on architecture), but in - practice it never causes any problems if, for example, 4-byte allocations - are 4-byte-aligned. - ---disable-tcache - Disable thread-specific caches for small objects. Objects are cached and - released in bulk, thus reducing the total number of mutex operations. See - the "opt.tcache" option for usage details. - ---enable-swap - Enable mmap()ed swap file support. When this feature is built in, it is - possible to specify one or more files that act as backing store. This - effectively allows for per application swap files. - ---enable-dss - Enable support for page allocation/deallocation via sbrk(2), in addition to - mmap(2). - ---enable-fill - Enable support for junk/zero filling of memory. See the "opt.junk"/ - "opt.zero" option documentation for usage details. - ---enable-xmalloc - Enable support for optional immediate termination due to out-of-memory - errors, as is commonly implemented by "xmalloc" wrapper function for malloc. - See the "opt.xmalloc" option documentation for usage details. - ---enable-sysv - Enable support for System V semantics, wherein malloc(0) returns NULL - rather than a minimal allocation. See the "opt.sysv" option documentation - for usage details. - ---enable-dynamic-page-shift - Under most conditions, the system page size never changes (usually 4KiB or - 8KiB, depending on architecture and configuration), and unless this option - is enabled, jemalloc assumes that page size can safely be determined during - configuration and hard-coded. Enabling dynamic page size determination has - a measurable impact on performance, since the compiler is forced to load - the page size from memory rather than embedding immediate values. - ---disable-lazy-lock - Disable code that wraps pthread_create() to detect when an application - switches from single-threaded to multi-threaded mode, so that it can avoid - mutex locking/unlocking operations while in single-threaded mode. In - practice, this feature usually has little impact on performance unless - thread-specific caching is disabled. - ---disable-tls - Disable thread-local storage (TLS), which allows for fast access to - thread-local variables via the __thread keyword. If TLS is available, - jemalloc uses it for several purposes. - ---with-xslroot= - Specify where to find DocBook XSL stylesheets when building the - documentation. - -The following environment variables (not a definitive list) impact configure's -behavior: - -CFLAGS="?" - Pass these flags to the compiler. You probably shouldn't define this unless - you know what you are doing. (Use EXTRA_CFLAGS instead.) - -EXTRA_CFLAGS="?" - Append these flags to CFLAGS. This makes it possible to add flags such as - -Werror, while allowing the configure script to determine what other flags - are appropriate for the specified configuration. - - The configure script specifically checks whether an optimization flag (-O*) - is specified in EXTRA_CFLAGS, and refrains from specifying an optimization - level if it finds that one has already been specified. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. Note that CFLAGS is not passed to - 'cpp' when 'configure' is looking for include files, so you must use - CPPFLAGS instead if you need to help 'configure' find header files. - -LD_LIBRARY_PATH="?" - 'ld' uses this colon-separated list to find libraries. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - 'configure' uses this to find programs. - -=== Advanced compilation ======================================================= - -To install only parts of jemalloc, use the following targets: - - install_bin - install_include - install_lib - install_doc - -To clean up build results to varying degrees, use the following make targets: - - clean - distclean - relclean - -=== Advanced installation ====================================================== - -Optionally, define make variables when invoking make, including (not -exclusively): - -INCLUDEDIR="?" - Use this as the installation prefix for header files. - -LIBDIR="?" - Use this as the installation prefix for libraries. - -MANDIR="?" - Use this as the installation prefix for man pages. - -DESTDIR="?" - Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR. This is useful - when installing to a different path than was specified via --prefix. - -CC="?" - Use this to invoke the C compiler. - -CFLAGS="?" - Pass these flags to the compiler. - -CPPFLAGS="?" - Pass these flags to the C preprocessor. - -LDFLAGS="?" - Pass these flags when linking. - -PATH="?" - Use this to search for programs used during configuration and building. - -=== Development ================================================================ - -If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh' -script rather than 'configure'. This re-generates 'configure', enables -configuration dependency rules, and enables re-generation of automatically -generated source files. - -The build system supports using an object directory separate from the source -tree. For example, you can create an 'obj' directory, and from within that -directory, issue configuration and build commands: - - autoconf - mkdir obj - cd obj - ../configure --enable-autogen - make - -=== Documentation ============================================================== - -The manual page is generated in both html and roff formats. Any web browser -can be used to view the html manual. The roff manual page can be formatted -prior to installation via any of the following commands: - - nroff -man -t doc/jemalloc.3 - - groff -man -t -Tps doc/jemalloc.3 | ps2pdf - doc/jemalloc.3.pdf - - (cd doc; groff -man -man-ext -t -Thtml jemalloc.3 > jemalloc.3.html) diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in deleted file mode 100644 index 26da0e2..0000000 --- a/jemalloc/Makefile.in +++ /dev/null @@ -1,259 +0,0 @@ -# Clear out all vpaths, then set just one (default vpath) for the main build -# directory. -vpath -vpath % . - -# Clear the default suffixes, so that built-in rules are not used. -.SUFFIXES : - -SHELL := /bin/sh - -CC := @CC@ - -# Configuration parameters. -DESTDIR = -BINDIR := $(DESTDIR)@BINDIR@ -INCLUDEDIR := $(DESTDIR)@INCLUDEDIR@ -LIBDIR := $(DESTDIR)@LIBDIR@ -DATADIR := $(DESTDIR)@DATADIR@ -MANDIR := $(DESTDIR)@MANDIR@ - -# Build parameters. -CPPFLAGS := @CPPFLAGS@ -I@srcroot@include -I@objroot@include -CFLAGS := @CFLAGS@ -ifeq (macho, @abi@) -CFLAGS += -dynamic -endif -LDFLAGS := @LDFLAGS@ -LIBS := @LIBS@ -RPATH_EXTRA := @RPATH_EXTRA@ -ifeq (macho, @abi@) -SO := dylib -WL_SONAME := dylib_install_name -else -SO := so -WL_SONAME := soname -endif -REV := 1 -ifeq (macho, @abi@) -TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH=@objroot@lib -else -TEST_LIBRARY_PATH := -endif - -# Lists of files. -BINS := @srcroot@bin/pprof -CHDRS := @objroot@include/jemalloc/jemalloc@install_suffix@.h \ - @objroot@include/jemalloc/jemalloc_defs@install_suffix@.h -CSRCS := @srcroot@src/jemalloc.c @srcroot@src/arena.c @srcroot@src/atomic.c \ - @srcroot@src/base.c @srcroot@src/bitmap.c @srcroot@src/chunk.c \ - @srcroot@src/chunk_dss.c @srcroot@src/chunk_mmap.c \ - @srcroot@src/chunk_swap.c @srcroot@src/ckh.c @srcroot@src/ctl.c \ - @srcroot@src/extent.c @srcroot@src/hash.c @srcroot@src/huge.c \ - @srcroot@src/mb.c @srcroot@src/mutex.c @srcroot@src/prof.c \ - @srcroot@src/rtree.c @srcroot@src/stats.c @srcroot@src/tcache.c -ifeq (macho, @abi@) -CSRCS += @srcroot@src/zone.c -endif -STATIC_LIBS := @objroot@lib/libjemalloc@install_suffix@.a -DSOS := @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) \ - @objroot@lib/libjemalloc@install_suffix@.$(SO) \ - @objroot@lib/libjemalloc@install_suffix@_pic.a -MAN3 := @objroot@doc/jemalloc@install_suffix@.3 -DOCS_XML := @objroot@doc/jemalloc@install_suffix@.xml -DOCS_HTML := $(DOCS_XML:@objroot@%.xml=@srcroot@%.html) -DOCS_MAN3 := $(DOCS_XML:@objroot@%.xml=@srcroot@%.3) -DOCS := $(DOCS_HTML) $(DOCS_MAN3) -CTESTS := @srcroot@test/allocated.c @srcroot@test/allocm.c \ - @srcroot@test/bitmap.c @srcroot@test/mremap.c \ - @srcroot@test/posix_memalign.c @srcroot@test/rallocm.c \ - @srcroot@test/thread_arena.c - -.PHONY: all dist doc_html doc_man doc -.PHONY: install_bin install_include install_lib -.PHONY: install_html install_man install_doc install -.PHONY: tests check clean distclean relclean - -.SECONDARY : $(CTESTS:@srcroot@%.c=@objroot@%.o) - -# Default target. -all: $(DSOS) $(STATIC_LIBS) - -dist: doc - -@srcroot@doc/%.html : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/html.xsl - @XSLTPROC@ -o $@ @objroot@doc/html.xsl $< - -@srcroot@doc/%.3 : @objroot@doc/%.xml @srcroot@doc/stylesheet.xsl @objroot@doc/manpages.xsl - @XSLTPROC@ -o $@ @objroot@doc/manpages.xsl $< - -doc_html: $(DOCS_HTML) -doc_man: $(DOCS_MAN3) -doc: $(DOCS) - -# -# Include generated dependency files. -# --include $(CSRCS:@srcroot@%.c=@objroot@%.d) --include $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) --include $(CTESTS:@srcroot@%.c=@objroot@%.d) - -@objroot@src/%.o: @srcroot@src/%.c - @mkdir -p $(@D) - $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $@)))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.o \2/g\" > $(@:%.o=%.d)" - -@objroot@src/%.pic.o: @srcroot@src/%.c - @mkdir -p $(@D) - $(CC) $(CFLAGS) -fPIC -DPIC -c $(CPPFLAGS) -o $@ $< - @$(SHELL) -ec "$(CC) -MM $(CPPFLAGS) $< | sed \"s/\($(subst /,\/,$(notdir $(basename $(basename $@))))\)\.o\([ :]*\)/$(subst /,\/,$(strip $(dir $@)))\1.pic.o \2/g\" > $(@:%.o=%.d)" - -%.$(SO) : %.$(SO).$(REV) - @mkdir -p $(@D) - ln -sf $( $(@:%.o=%.d)" - -# Automatic dependency generation misses #include "*.c". -@objroot@test/bitmap.o : @objroot@src/bitmap.o - -@objroot@test/%: @objroot@test/%.o \ - @objroot@lib/libjemalloc@install_suffix@.$(SO) - @mkdir -p $(@D) -ifneq (@RPATH@, ) - $(CC) -o $@ $< @RPATH@@objroot@lib -L@objroot@lib -ljemalloc@install_suffix@ -else - $(CC) -o $@ $< -L@objroot@lib -ljemalloc@install_suffix@ -endif - -install_bin: - install -d $(BINDIR) - @for b in $(BINS); do \ - echo "install -m 755 $$b $(BINDIR)"; \ - install -m 755 $$b $(BINDIR); \ -done - -install_include: - install -d $(INCLUDEDIR)/jemalloc - @for h in $(CHDRS); do \ - echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \ - install -m 644 $$h $(INCLUDEDIR)/jemalloc; \ -done - -install_lib: $(DSOS) $(STATIC_LIBS) - install -d $(LIBDIR) - install -m 755 @objroot@lib/libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR) - ln -sf libjemalloc@install_suffix@.$(SO).$(REV) $(LIBDIR)/libjemalloc@install_suffix@.$(SO) - install -m 755 @objroot@lib/libjemalloc@install_suffix@_pic.a $(LIBDIR) - install -m 755 @objroot@lib/libjemalloc@install_suffix@.a $(LIBDIR) - -install_html: - install -d $(DATADIR)/doc/jemalloc@install_suffix@ - @for d in $(DOCS_HTML); do \ - echo "install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@"; \ - install -m 644 $$d $(DATADIR)/doc/jemalloc@install_suffix@; \ -done - -install_man: - install -d $(MANDIR)/man3 - @for d in $(DOCS_MAN3); do \ - echo "install -m 644 $$d $(MANDIR)/man3"; \ - install -m 644 $$d $(MANDIR)/man3; \ -done - -install_doc: install_html install_man - -install: install_bin install_include install_lib install_doc - -tests: $(CTESTS:@srcroot@%.c=@objroot@%) - -check: tests - @mkdir -p @objroot@test - @$(SHELL) -c 'total=0; \ - failures=0; \ - echo "========================================="; \ - for t in $(CTESTS:@srcroot@%.c=@objroot@%); do \ - total=`expr $$total + 1`; \ - /bin/echo -n "$${t} ... "; \ - $(TEST_LIBRARY_PATH) $${t} @abs_srcroot@ @abs_objroot@ \ - > @objroot@$${t}.out 2>&1; \ - if test -e "@srcroot@$${t}.exp"; then \ - diff -u @srcroot@$${t}.exp \ - @objroot@$${t}.out >/dev/null 2>&1; \ - fail=$$?; \ - if test "$${fail}" -eq "1" ; then \ - failures=`expr $${failures} + 1`; \ - echo "*** FAIL ***"; \ - else \ - echo "pass"; \ - fi; \ - else \ - echo "*** FAIL *** (.exp file is missing)"; \ - failures=`expr $${failures} + 1`; \ - fi; \ - done; \ - echo "========================================="; \ - echo "Failures: $${failures}/$${total}"' - -clean: - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.o) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.o) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.d) - rm -f $(CSRCS:@srcroot@%.c=@objroot@%.pic.d) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.o) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.d) - rm -f $(CTESTS:@srcroot@%.c=@objroot@%.out) - rm -f $(DSOS) $(STATIC_LIBS) - -distclean: clean - rm -rf @objroot@autom4te.cache - rm -f @objroot@config.log - rm -f @objroot@config.status - rm -f @objroot@config.stamp - rm -f @cfghdrs_out@ - rm -f @cfgoutputs_out@ - -relclean: distclean - rm -f @objroot@configure - rm -f @srcroot@VERSION - rm -f $(DOCS_HTML) - rm -f $(DOCS_MAN3) - -#=============================================================================== -# Re-configuration rules. - -ifeq (@enable_autogen@, 1) -@srcroot@configure : @srcroot@configure.ac - cd ./@srcroot@ && @AUTOCONF@ - -@objroot@config.status : @srcroot@configure - ./@objroot@config.status --recheck - -@srcroot@config.stamp.in : @srcroot@configure.ac - echo stamp > @srcroot@config.stamp.in - -@objroot@config.stamp : @cfgoutputs_in@ @cfghdrs_in@ @srcroot@configure - ./@objroot@config.status - @touch $@ - -# There must be some action in order for make to re-read Makefile when it is -# out of date. -@cfgoutputs_out@ @cfghdrs_out@ : @objroot@config.stamp - @true -endif diff --git a/jemalloc/README b/jemalloc/README deleted file mode 100644 index 4d7b552..0000000 --- a/jemalloc/README +++ /dev/null @@ -1,16 +0,0 @@ -jemalloc is a general-purpose scalable concurrent malloc(3) implementation. -This distribution is a stand-alone "portable" implementation that currently -targets Linux and Apple OS X. jemalloc is included as the default allocator in -the FreeBSD and NetBSD operating systems, and it is used by the Mozilla Firefox -web browser on Microsoft Windows-related platforms. Depending on your needs, -one of the other divergent versions may suit your needs better than this -distribution. - -The COPYING file contains copyright and licensing information. - -The INSTALL file contains information on how to configure, build, and install -jemalloc. - -The ChangeLog file contains a brief summary of changes for each release. - -URL: http://www.canonware.com/jemalloc/ diff --git a/jemalloc/autogen.sh b/jemalloc/autogen.sh deleted file mode 100755 index 75f32da..0000000 --- a/jemalloc/autogen.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -for i in autoconf; do - echo "$i" - $i - if [ $? -ne 0 ]; then - echo "Error $? in $i" - exit 1 - fi -done - -echo "./configure --enable-autogen $@" -./configure --enable-autogen $@ -if [ $? -ne 0 ]; then - echo "Error $? in ./configure" - exit 1 -fi diff --git a/jemalloc/bin/pprof b/jemalloc/bin/pprof deleted file mode 100755 index 280ddcc..0000000 --- a/jemalloc/bin/pprof +++ /dev/null @@ -1,4893 +0,0 @@ -#! /usr/bin/env perl - -# Copyright (c) 1998-2007, Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# --- -# Program for printing the profile generated by common/profiler.cc, -# or by the heap profiler (common/debugallocation.cc) -# -# The profile contains a sequence of entries of the form: -# -# This program parses the profile, and generates user-readable -# output. -# -# Examples: -# -# % tools/pprof "program" "profile" -# Enters "interactive" mode -# -# % tools/pprof --text "program" "profile" -# Generates one line per procedure -# -# % tools/pprof --gv "program" "profile" -# Generates annotated call-graph and displays via "gv" -# -# % tools/pprof --gv --focus=Mutex "program" "profile" -# Restrict to code paths that involve an entry that matches "Mutex" -# -# % tools/pprof --gv --focus=Mutex --ignore=string "program" "profile" -# Restrict to code paths that involve an entry that matches "Mutex" -# and does not match "string" -# -# % tools/pprof --list=IBF_CheckDocid "program" "profile" -# Generates disassembly listing of all routines with at least one -# sample that match the --list= pattern. The listing is -# annotated with the flat and cumulative sample counts at each line. -# -# % tools/pprof --disasm=IBF_CheckDocid "program" "profile" -# Generates disassembly listing of all routines with at least one -# sample that match the --disasm= pattern. The listing is -# annotated with the flat and cumulative sample counts at each PC value. -# -# TODO: Use color to indicate files? - -use strict; -use warnings; -use Getopt::Long; - -my $PPROF_VERSION = "1.7"; - -# These are the object tools we use which can come from a -# user-specified location using --tools, from the PPROF_TOOLS -# environment variable, or from the environment. -my %obj_tool_map = ( - "objdump" => "objdump", - "nm" => "nm", - "addr2line" => "addr2line", - "c++filt" => "c++filt", - ## ConfigureObjTools may add architecture-specific entries: - #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables - #"addr2line_pdb" => "addr2line-pdb", # ditto - #"otool" => "otool", # equivalent of objdump on OS X -); -my $DOT = "dot"; # leave non-absolute, since it may be in /usr/local -my $GV = "gv"; -my $EVINCE = "evince"; # could also be xpdf or perhaps acroread -my $KCACHEGRIND = "kcachegrind"; -my $PS2PDF = "ps2pdf"; -# These are used for dynamic profiles -my $URL_FETCHER = "curl -s"; - -# These are the web pages that servers need to support for dynamic profiles -my $HEAP_PAGE = "/pprof/heap"; -my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" -my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param - # ?seconds=#&event=x&period=n -my $GROWTH_PAGE = "/pprof/growth"; -my $CONTENTION_PAGE = "/pprof/contention"; -my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter -my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; -my $CENSUSPROFILE_PAGE = "/pprof/censusprofile"; # must support "?seconds=#" -my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST -my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; - -# These are the web pages that can be named on the command line. -# All the alternatives must begin with /. -my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . - "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . - "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; - -# default binary name -my $UNKNOWN_BINARY = "(unknown)"; - -# There is a pervasive dependency on the length (in hex characters, -# i.e., nibbles) of an address, distinguishing between 32-bit and -# 64-bit profiles. To err on the safe size, default to 64-bit here: -my $address_length = 16; - -# A list of paths to search for shared object files -my @prefix_list = (); - -# Special routine name that should not have any symbols. -# Used as separator to parse "addr2line -i" output. -my $sep_symbol = '_fini'; -my $sep_address = undef; - -##### Argument parsing ##### - -sub usage_string { - return < - is a space separated list of profile names. -pprof [options] - is a list of profile files where each file contains - the necessary symbol mappings as well as profile data (likely generated - with --raw). -pprof [options] - is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE - - Each name can be: - /path/to/profile - a path to a profile file - host:port[/] - a location of a service to get profile from - - The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, - $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, - $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. - For instance: "pprof http://myserver.com:80$HEAP_PAGE". - If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). -pprof --symbols - Maps addresses to symbol names. In this mode, stdin should be a - list of library mappings, in the same format as is found in the heap- - and cpu-profile files (this loosely matches that of /proc/self/maps - on linux), followed by a list of hex addresses to map, one per line. - - For more help with querying remote servers, including how to add the - necessary server-side support code, see this filename (or one like it): - - /usr/doc/google-perftools-$PPROF_VERSION/pprof_remote_servers.html - -Options: - --cum Sort by cumulative data - --base= Subtract from before display - --interactive Run in interactive mode (interactive "help" gives help) [default] - --seconds= Length of time for dynamic profiles [default=30 secs] - --add_lib= Read additional symbols and line info from the given library - --lib_prefix= Comma separated list of library path prefixes - -Reporting Granularity: - --addresses Report at address level - --lines Report at source line level - --functions Report at function level [default] - --files Report at source file level - -Output type: - --text Generate text report - --callgrind Generate callgrind format to stdout - --gv Generate Postscript and display - --evince Generate PDF and display - --web Generate SVG and display - --list= Generate source listing of matching routines - --disasm= Generate disassembly of matching routines - --symbols Print demangled symbol names found at given addresses - --dot Generate DOT file to stdout - --ps Generate Postcript to stdout - --pdf Generate PDF to stdout - --svg Generate SVG to stdout - --gif Generate GIF to stdout - --raw Generate symbolized pprof data (useful with remote fetch) - -Heap-Profile Options: - --inuse_space Display in-use (mega)bytes [default] - --inuse_objects Display in-use objects - --alloc_space Display allocated (mega)bytes - --alloc_objects Display allocated objects - --show_bytes Display space in bytes - --drop_negative Ignore negative differences - -Contention-profile options: - --total_delay Display total delay at each region [default] - --contentions Display number of delays at each region - --mean_delay Display mean delay at each region - -Call-graph Options: - --nodecount= Show at most so many nodes [default=80] - --nodefraction= Hide nodes below *total [default=.005] - --edgefraction= Hide edges below *total [default=.001] - --maxdegree= Max incoming/outgoing edges per node [default=8] - --focus= Focus on nodes matching - --ignore= Ignore nodes matching - --scale= Set GV scaling [default=0] - --heapcheck Make nodes with non-0 object counts - (i.e. direct leak generators) more visible - -Miscellaneous: - --tools=[,...] \$PATH for object tool pathnames - --test Run unit tests - --help This message - --version Version information - -Environment Variables: - PPROF_TMPDIR Profiles directory. Defaults to \$HOME/pprof - PPROF_TOOLS Prefix for object tools pathnames - -Examples: - -pprof /bin/ls ls.prof - Enters "interactive" mode -pprof --text /bin/ls ls.prof - Outputs one line per procedure -pprof --web /bin/ls ls.prof - Displays annotated call-graph in web browser -pprof --gv /bin/ls ls.prof - Displays annotated call-graph via 'gv' -pprof --gv --focus=Mutex /bin/ls ls.prof - Restricts to code paths including a .*Mutex.* entry -pprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof - Code paths including Mutex but not string -pprof --list=getdir /bin/ls ls.prof - (Per-line) annotated source listing for getdir() -pprof --disasm=getdir /bin/ls ls.prof - (Per-PC) annotated disassembly for getdir() - -pprof http://localhost:1234/ - Enters "interactive" mode -pprof --text localhost:1234 - Outputs one line per procedure for localhost:1234 -pprof --raw localhost:1234 > ./local.raw -pprof --text ./local.raw - Fetches a remote profile for later analysis and then - analyzes it in text mode. -EOF -} - -sub version_string { - return < \$main::opt_help, - "version!" => \$main::opt_version, - "cum!" => \$main::opt_cum, - "base=s" => \$main::opt_base, - "seconds=i" => \$main::opt_seconds, - "add_lib=s" => \$main::opt_lib, - "lib_prefix=s" => \$main::opt_lib_prefix, - "functions!" => \$main::opt_functions, - "lines!" => \$main::opt_lines, - "addresses!" => \$main::opt_addresses, - "files!" => \$main::opt_files, - "text!" => \$main::opt_text, - "callgrind!" => \$main::opt_callgrind, - "list=s" => \$main::opt_list, - "disasm=s" => \$main::opt_disasm, - "symbols!" => \$main::opt_symbols, - "gv!" => \$main::opt_gv, - "evince!" => \$main::opt_evince, - "web!" => \$main::opt_web, - "dot!" => \$main::opt_dot, - "ps!" => \$main::opt_ps, - "pdf!" => \$main::opt_pdf, - "svg!" => \$main::opt_svg, - "gif!" => \$main::opt_gif, - "raw!" => \$main::opt_raw, - "interactive!" => \$main::opt_interactive, - "nodecount=i" => \$main::opt_nodecount, - "nodefraction=f" => \$main::opt_nodefraction, - "edgefraction=f" => \$main::opt_edgefraction, - "maxdegree=i" => \$main::opt_maxdegree, - "focus=s" => \$main::opt_focus, - "ignore=s" => \$main::opt_ignore, - "scale=i" => \$main::opt_scale, - "heapcheck" => \$main::opt_heapcheck, - "inuse_space!" => \$main::opt_inuse_space, - "inuse_objects!" => \$main::opt_inuse_objects, - "alloc_space!" => \$main::opt_alloc_space, - "alloc_objects!" => \$main::opt_alloc_objects, - "show_bytes!" => \$main::opt_show_bytes, - "drop_negative!" => \$main::opt_drop_negative, - "total_delay!" => \$main::opt_total_delay, - "contentions!" => \$main::opt_contentions, - "mean_delay!" => \$main::opt_mean_delay, - "tools=s" => \$main::opt_tools, - "test!" => \$main::opt_test, - "debug!" => \$main::opt_debug, - # Undocumented flags used only by unittests: - "test_stride=i" => \$main::opt_test_stride, - ) || usage("Invalid option(s)"); - - # Deal with the standard --help and --version - if ($main::opt_help) { - print usage_string(); - exit(0); - } - - if ($main::opt_version) { - print version_string(); - exit(0); - } - - # Disassembly/listing/symbols mode requires address-level info - if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { - $main::opt_functions = 0; - $main::opt_lines = 0; - $main::opt_addresses = 1; - $main::opt_files = 0; - } - - # Check heap-profiling flags - if ($main::opt_inuse_space + - $main::opt_inuse_objects + - $main::opt_alloc_space + - $main::opt_alloc_objects > 1) { - usage("Specify at most on of --inuse/--alloc options"); - } - - # Check output granularities - my $grains = - $main::opt_functions + - $main::opt_lines + - $main::opt_addresses + - $main::opt_files + - 0; - if ($grains > 1) { - usage("Only specify one output granularity option"); - } - if ($grains == 0) { - $main::opt_functions = 1; - } - - # Check output modes - my $modes = - $main::opt_text + - $main::opt_callgrind + - ($main::opt_list eq '' ? 0 : 1) + - ($main::opt_disasm eq '' ? 0 : 1) + - ($main::opt_symbols == 0 ? 0 : 1) + - $main::opt_gv + - $main::opt_evince + - $main::opt_web + - $main::opt_dot + - $main::opt_ps + - $main::opt_pdf + - $main::opt_svg + - $main::opt_gif + - $main::opt_raw + - $main::opt_interactive + - 0; - if ($modes > 1) { - usage("Only specify one output mode"); - } - if ($modes == 0) { - if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode - $main::opt_interactive = 1; - } else { - $main::opt_text = 1; - } - } - - if ($main::opt_test) { - RunUnitTests(); - # Should not return - exit(1); - } - - # Binary name and profile arguments list - $main::prog = ""; - @main::pfile_args = (); - - # Remote profiling without a binary (using $SYMBOL_PAGE instead) - if (IsProfileURL($ARGV[0])) { - $main::use_symbol_page = 1; - } elsif (IsSymbolizedProfileFile($ARGV[0])) { - $main::use_symbolized_profile = 1; - $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file - } - - if ($main::use_symbol_page || $main::use_symbolized_profile) { - # We don't need a binary! - my %disabled = ('--lines' => $main::opt_lines, - '--disasm' => $main::opt_disasm); - for my $option (keys %disabled) { - usage("$option cannot be used without a binary") if $disabled{$option}; - } - # Set $main::prog later... - scalar(@ARGV) || usage("Did not specify profile file"); - } elsif ($main::opt_symbols) { - # --symbols needs a binary-name (to run nm on, etc) but not profiles - $main::prog = shift(@ARGV) || usage("Did not specify program"); - } else { - $main::prog = shift(@ARGV) || usage("Did not specify program"); - scalar(@ARGV) || usage("Did not specify profile file"); - } - - # Parse profile file/location arguments - foreach my $farg (@ARGV) { - if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { - my $machine = $1; - my $num_machines = $2; - my $path = $3; - for (my $i = 0; $i < $num_machines; $i++) { - unshift(@main::pfile_args, "$i.$machine$path"); - } - } else { - unshift(@main::pfile_args, $farg); - } - } - - if ($main::use_symbol_page) { - unless (IsProfileURL($main::pfile_args[0])) { - error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); - } - CheckSymbolPage(); - $main::prog = FetchProgramName(); - } elsif (!$main::use_symbolized_profile) { # may not need objtools! - ConfigureObjTools($main::prog) - } - - # Break the opt_list_prefix into the prefix_list array - @prefix_list = split (',', $main::opt_lib_prefix); - - # Remove trailing / from the prefixes, in the list to prevent - # searching things like /my/path//lib/mylib.so - foreach (@prefix_list) { - s|/+$||; - } -} - -sub Main() { - Init(); - $main::collected_profile = undef; - @main::profile_files = (); - $main::op_time = time(); - - # Printing symbols is special and requires a lot less info that most. - if ($main::opt_symbols) { - PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin - return; - } - - # Fetch all profile data - FetchDynamicProfiles(); - - # this will hold symbols that we read from the profile files - my $symbol_map = {}; - - # Read one profile, pick the last item on the list - my $data = ReadProfile($main::prog, pop(@main::profile_files)); - my $profile = $data->{profile}; - my $pcs = $data->{pcs}; - my $libs = $data->{libs}; # Info about main program and shared libraries - $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); - - # Add additional profiles, if available. - if (scalar(@main::profile_files) > 0) { - foreach my $pname (@main::profile_files) { - my $data2 = ReadProfile($main::prog, $pname); - $profile = AddProfile($profile, $data2->{profile}); - $pcs = AddPcs($pcs, $data2->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); - } - } - - # Subtract base from profile, if specified - if ($main::opt_base ne '') { - my $base = ReadProfile($main::prog, $main::opt_base); - $profile = SubtractProfile($profile, $base->{profile}); - $pcs = AddPcs($pcs, $base->{pcs}); - $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); - } - - # Get total data in profile - my $total = TotalProfile($profile); - - # Collect symbols - my $symbols; - if ($main::use_symbolized_profile) { - $symbols = FetchSymbols($pcs, $symbol_map); - } elsif ($main::use_symbol_page) { - $symbols = FetchSymbols($pcs); - } else { - # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, - # which may differ from the data from subsequent profiles, especially - # if they were run on different machines. Use appropriate libs for - # each pc somehow. - $symbols = ExtractSymbols($libs, $pcs); - } - - # Remove uniniteresting stack items - $profile = RemoveUninterestingFrames($symbols, $profile); - - # Focus? - if ($main::opt_focus ne '') { - $profile = FocusProfile($symbols, $profile, $main::opt_focus); - } - - # Ignore? - if ($main::opt_ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); - } - - my $calls = ExtractCalls($symbols, $profile); - - # Reduce profiles to required output granularity, and also clean - # each stack trace so a given entry exists at most once. - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - # Print - if (!$main::opt_interactive) { - if ($main::opt_disasm) { - PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm, $total); - } elsif ($main::opt_list) { - PrintListing($libs, $flat, $cumulative, $main::opt_list); - } elsif ($main::opt_text) { - # Make sure the output is empty when have nothing to report - # (only matters when --heapcheck is given but we must be - # compatible with old branches that did not pass --heapcheck always): - if ($total != 0) { - printf("Total: %s %s\n", Unparse($total), Units()); - } - PrintText($symbols, $flat, $cumulative, $total, -1); - } elsif ($main::opt_raw) { - PrintSymbolizedProfile($symbols, $profile, $main::prog); - } elsif ($main::opt_callgrind) { - PrintCallgrind($calls); - } else { - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), ""); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), ""); - } elsif ($main::opt_web) { - my $tmp = TempName($main::next_tmpfile, "svg"); - RunWeb($tmp); - # The command we run might hand the file name off - # to an already running browser instance and then exit. - # Normally, we'd remove $tmp on exit (right now), - # but fork a child to remove $tmp a little later, so that the - # browser has time to load it first. - delete $main::tempnames{$tmp}; - if (fork() == 0) { - sleep 5; - unlink($tmp); - exit(0); - } - } - } else { - cleanup(); - exit(1); - } - } - } else { - InteractiveMode($profile, $symbols, $libs, $total); - } - - cleanup(); - exit(0); -} - -##### Entry Point ##### - -Main(); - -# Temporary code to detect if we're running on a Goobuntu system. -# These systems don't have the right stuff installed for the special -# Readline libraries to work, so as a temporary workaround, we default -# to using the normal stdio code, rather than the fancier readline-based -# code -sub ReadlineMightFail { - if (-e '/lib/libtermcap.so.2') { - return 0; # libtermcap exists, so readline should be okay - } else { - return 1; - } -} - -sub RunGV { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - if (!system("$GV --version >/dev/null 2>&1")) { - # Options using double dash are supported by this gv version. - # Also, turn on noantialias to better handle bug in gv for - # postscript files with large dimensions. - # TODO: Maybe we should not pass the --noantialias flag - # if the gv version is known to work properly without the flag. - system("$GV --scale=$main::opt_scale --noantialias " . $fname . $bg); - } else { - # Old gv version - only supports options that use single dash. - print STDERR "$GV -scale $main::opt_scale\n"; - system("$GV -scale $main::opt_scale " . $fname . $bg); - } -} - -sub RunEvince { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - system("$EVINCE " . $fname . $bg); -} - -sub RunWeb { - my $fname = shift; - print STDERR "Loading web page file:///$fname\n"; - - if (`uname` =~ /Darwin/) { - # OS X: open will use standard preference for SVG files. - system("/usr/bin/open", $fname); - return; - } - - # Some kind of Unix; try generic symlinks, then specific browsers. - # (Stop once we find one.) - # Works best if the browser is already running. - my @alt = ( - "/etc/alternatives/gnome-www-browser", - "/etc/alternatives/x-www-browser", - "google-chrome", - "firefox", - ); - foreach my $b (@alt) { - if (system($b, $fname) == 0) { - return; - } - } - - print STDERR "Could not load web browser.\n"; -} - -sub RunKcachegrind { - my $fname = shift; - my $bg = shift; # "" or " &" if we should run in background - print STDERR "Starting '$KCACHEGRIND " . $fname . $bg . "'\n"; - system("$KCACHEGRIND " . $fname . $bg); -} - - -##### Interactive helper routines ##### - -sub InteractiveMode { - $| = 1; # Make output unbuffered for interactive mode - my ($orig_profile, $symbols, $libs, $total) = @_; - - print STDERR "Welcome to pprof! For help, type 'help'.\n"; - - # Use ReadLine if it's installed and input comes from a console. - if ( -t STDIN && - !ReadlineMightFail() && - defined(eval {require Term::ReadLine}) ) { - my $term = new Term::ReadLine 'pprof'; - while ( defined ($_ = $term->readline('(pprof) '))) { - $term->addhistory($_) if /\S/; - if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { - last; # exit when we get an interactive command to quit - } - } - } else { # don't have readline - while (1) { - print STDERR "(pprof) "; - $_ = ; - last if ! defined $_ ; - s/\r//g; # turn windows-looking lines into unix-looking lines - - # Save some flags that might be reset by InteractiveCommand() - my $save_opt_lines = $main::opt_lines; - - if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { - last; # exit when we get an interactive command to quit - } - - # Restore flags - $main::opt_lines = $save_opt_lines; - } - } -} - -# Takes two args: orig profile, and command to run. -# Returns 1 if we should keep going, or 0 if we were asked to quit -sub InteractiveCommand { - my($orig_profile, $symbols, $libs, $total, $command) = @_; - $_ = $command; # just to make future m//'s easier - if (!defined($_)) { - print STDERR "\n"; - return 0; - } - if (m/^\s*quit/) { - return 0; - } - if (m/^\s*help/) { - InteractiveHelpMessage(); - return 1; - } - # Clear all the mode options -- mode is controlled by "$command" - $main::opt_text = 0; - $main::opt_callgrind = 0; - $main::opt_disasm = 0; - $main::opt_list = 0; - $main::opt_gv = 0; - $main::opt_evince = 0; - $main::opt_cum = 0; - - if (m/^\s*(text|top)(\d*)\s*(.*)/) { - $main::opt_text = 1; - - my $line_limit = ($2 ne "") ? int($2) : 10; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($3); - - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintText($symbols, $flat, $cumulative, $total, $line_limit); - return 1; - } - if (m/^\s*callgrind\s*([^ \n]*)/) { - $main::opt_callgrind = 1; - - # Get derived profiles - my $calls = ExtractCalls($symbols, $orig_profile); - my $filename = $1; - if ( $1 eq '' ) { - $filename = TempName($main::next_tmpfile, "callgrind"); - } - PrintCallgrind($calls, $filename); - if ( $1 eq '' ) { - RunKcachegrind($filename, " & "); - $main::next_tmpfile++; - } - - return 1; - } - if (m/^\s*list\s*(.+)/) { - $main::opt_list = 1; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($1); - - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintListing($libs, $flat, $cumulative, $routine); - return 1; - } - if (m/^\s*disasm\s*(.+)/) { - $main::opt_disasm = 1; - - my $routine; - my $ignore; - ($routine, $ignore) = ParseInteractiveArgs($1); - - # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, $symbols, "", $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - PrintDisassembly($libs, $flat, $cumulative, $routine, $total); - return 1; - } - if (m/^\s*(gv|web|evince)\s*(.*)/) { - $main::opt_gv = 0; - $main::opt_evince = 0; - $main::opt_web = 0; - if ($1 eq "gv") { - $main::opt_gv = 1; - } elsif ($1 eq "evince") { - $main::opt_evince = 1; - } elsif ($1 eq "web") { - $main::opt_web = 1; - } - - my $focus; - my $ignore; - ($focus, $ignore) = ParseInteractiveArgs($2); - - # Process current profile to account for various settings - my $profile = ProcessProfile($orig_profile, $symbols, $focus, $ignore); - my $reduced = ReduceProfile($symbols, $profile); - - # Get derived profiles - my $flat = FlatProfile($reduced); - my $cumulative = CumulativeProfile($reduced); - - if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { - if ($main::opt_gv) { - RunGV(TempName($main::next_tmpfile, "ps"), " &"); - } elsif ($main::opt_evince) { - RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); - } elsif ($main::opt_web) { - RunWeb(TempName($main::next_tmpfile, "svg")); - } - $main::next_tmpfile++; - } - return 1; - } - if (m/^\s*$/) { - return 1; - } - print STDERR "Unknown command: try 'help'.\n"; - return 1; -} - - -sub ProcessProfile { - my $orig_profile = shift; - my $symbols = shift; - my $focus = shift; - my $ignore = shift; - - # Process current profile to account for various settings - my $profile = $orig_profile; - my $total_count = TotalProfile($profile); - printf("Total: %s %s\n", Unparse($total_count), Units()); - if ($focus ne '') { - $profile = FocusProfile($symbols, $profile, $focus); - my $focus_count = TotalProfile($profile); - printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", - $focus, - Unparse($focus_count), Units(), - Unparse($total_count), ($focus_count*100.0) / $total_count); - } - if ($ignore ne '') { - $profile = IgnoreProfile($symbols, $profile, $ignore); - my $ignore_count = TotalProfile($profile); - printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", - $ignore, - Unparse($ignore_count), Units(), - Unparse($total_count), - ($ignore_count*100.0) / $total_count); - } - - return $profile; -} - -sub InteractiveHelpMessage { - print STDERR <{$k}; - my @addrs = split(/\n/, $k); - if ($#addrs >= 0) { - my $depth = $#addrs + 1; - # int(foo / 2**32) is the only reliable way to get rid of bottom - # 32 bits on both 32- and 64-bit systems. - print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); - print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); - - foreach my $full_addr (@addrs) { - my $addr = $full_addr; - $addr =~ s/0x0*//; # strip off leading 0x, zeroes - if (length($addr) > 16) { - print STDERR "Invalid address in profile: $full_addr\n"; - next; - } - my $low_addr = substr($addr, -8); # get last 8 hex chars - my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars - print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); - } - } - } -} - -# Print symbols and profile data -sub PrintSymbolizedProfile { - my $symbols = shift; - my $profile = shift; - my $prog = shift; - - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - - print '--- ', $symbol_marker, "\n"; - if (defined($prog)) { - print 'binary=', $prog, "\n"; - } - while (my ($pc, $name) = each(%{$symbols})) { - my $sep = ' '; - print '0x', $pc; - # We have a list of function names, which include the inlined - # calls. They are separated (and terminated) by --, which is - # illegal in function names. - for (my $j = 2; $j <= $#{$name}; $j += 3) { - print $sep, $name->[$j]; - $sep = '--'; - } - print "\n"; - } - print '---', "\n"; - - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; - print '--- ', $profile_marker, "\n"; - if (defined($main::collected_profile)) { - # if used with remote fetch, simply dump the collected profile to output. - open(SRC, "<$main::collected_profile"); - while () { - print $_; - } - close(SRC); - } else { - # dump a cpu-format profile to standard out - PrintProfileData($profile); - } -} - -# Print text output -sub PrintText { - my $symbols = shift; - my $flat = shift; - my $cumulative = shift; - my $total = shift; - my $line_limit = shift; - - # Which profile to sort by? - my $s = $main::opt_cum ? $cumulative : $flat; - - my $running_sum = 0; - my $lines = 0; - foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } - keys(%{$cumulative})) { - my $f = GetEntry($flat, $k); - my $c = GetEntry($cumulative, $k); - $running_sum += $f; - - my $sym = $k; - if (exists($symbols->{$k})) { - $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; - if ($main::opt_addresses) { - $sym = $k . " " . $sym; - } - } - - if ($f != 0 || $c != 0) { - printf("%8s %6s %6s %8s %6s %s\n", - Unparse($f), - Percent($f, $total), - Percent($running_sum, $total), - Unparse($c), - Percent($c, $total), - $sym); - } - $lines++; - last if ($line_limit >= 0 && $lines > $line_limit); - } -} - -# Print the call graph in a way that's suiteable for callgrind. -sub PrintCallgrind { - my $calls = shift; - my $filename; - if ($main::opt_interactive) { - $filename = shift; - print STDERR "Writing callgrind file to '$filename'.\n" - } else { - $filename = "&STDOUT"; - } - open(CG, ">".$filename ); - printf CG ("events: Hits\n\n"); - foreach my $call ( map { $_->[0] } - sort { $a->[1] cmp $b ->[1] || - $a->[2] <=> $b->[2] } - map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; - [$_, $1, $2] } - keys %$calls ) { - my $count = int($calls->{$call}); - $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; - my ( $caller_file, $caller_line, $caller_function, - $callee_file, $callee_line, $callee_function ) = - ( $1, $2, $3, $5, $6, $7 ); - - - printf CG ("fl=$caller_file\nfn=$caller_function\n"); - if (defined $6) { - printf CG ("cfl=$callee_file\n"); - printf CG ("cfn=$callee_function\n"); - printf CG ("calls=$count $callee_line\n"); - } - printf CG ("$caller_line $count\n\n"); - } -} - -# Print disassembly for all all routines that match $main::opt_disasm -sub PrintDisassembly { - my $libs = shift; - my $flat = shift; - my $cumulative = shift; - my $disasm_opts = shift; - my $total = shift; - - foreach my $lib (@{$libs}) { - my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); - my $offset = AddressSub($lib->[1], $lib->[3]); - foreach my $routine (sort ByName keys(%{$symbol_table})) { - my $start_addr = $symbol_table->{$routine}->[0]; - my $end_addr = $symbol_table->{$routine}->[1]; - # See if there are any samples in this routine - my $length = hex(AddressSub($end_addr, $start_addr)); - my $addr = AddressAdd($start_addr, $offset); - for (my $i = 0; $i < $length; $i++) { - if (defined($cumulative->{$addr})) { - PrintDisassembledFunction($lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr, $total); - last; - } - $addr = AddressInc($addr); - } - } - } -} - -# Return reference to array of tuples of the form: -# [start_address, filename, linenumber, instruction, limit_address] -# E.g., -# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] -sub Disassemble { - my $prog = shift; - my $offset = shift; - my $start_addr = shift; - my $end_addr = shift; - - my $objdump = $obj_tool_map{"objdump"}; - my $cmd = sprintf("$objdump -C -d -l --no-show-raw-insn " . - "--start-address=0x$start_addr " . - "--stop-address=0x$end_addr $prog"); - open(OBJDUMP, "$cmd |") || error("$objdump: $!\n"); - my @result = (); - my $filename = ""; - my $linenumber = -1; - my $last = ["", "", "", ""]; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - chop; - if (m|\s*([^:\s]+):(\d+)\s*$|) { - # Location line of the form: - # : - $filename = $1; - $linenumber = $2; - } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { - # Disassembly line -- zero-extend address to full length - my $addr = HexExtend($1); - my $k = AddressAdd($addr, $offset); - $last->[4] = $k; # Store ending address for previous instruction - $last = [$k, $filename, $linenumber, $2, $end_addr]; - push(@result, $last); - } - } - close(OBJDUMP); - return @result; -} - -# The input file should contain lines of the form /proc/maps-like -# output (same format as expected from the profiles) or that looks -# like hex addresses (like "0xDEADBEEF"). We will parse all -# /proc/maps output, and for all the hex addresses, we will output -# "short" symbol names, one per line, in the same order as the input. -sub PrintSymbols { - my $maps_and_symbols_file = shift; - - # ParseLibraries expects pcs to be in a set. Fine by us... - my @pclist = (); # pcs in sorted order - my $pcs = {}; - my $map = ""; - foreach my $line (<$maps_and_symbols_file>) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ($line =~ /\b(0x[0-9a-f]+)\b/i) { - push(@pclist, HexExtend($1)); - $pcs->{$pclist[-1]} = 1; - } else { - $map .= $line; - } - } - - my $libs = ParseLibraries($main::prog, $map, $pcs); - my $symbols = ExtractSymbols($libs, $pcs); - - foreach my $pc (@pclist) { - # ->[0] is the shortname, ->[2] is the full name - print(($symbols->{$pc}->[0] || "??") . "\n"); - } -} - - -# For sorting functions by name -sub ByName { - return ShortFunctionName($a) cmp ShortFunctionName($b); -} - -# Print source-listing for all all routines that match $main::opt_list -sub PrintListing { - my $libs = shift; - my $flat = shift; - my $cumulative = shift; - my $list_opts = shift; - - foreach my $lib (@{$libs}) { - my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); - my $offset = AddressSub($lib->[1], $lib->[3]); - foreach my $routine (sort ByName keys(%{$symbol_table})) { - # Print if there are any samples in this routine - my $start_addr = $symbol_table->{$routine}->[0]; - my $end_addr = $symbol_table->{$routine}->[1]; - my $length = hex(AddressSub($end_addr, $start_addr)); - my $addr = AddressAdd($start_addr, $offset); - for (my $i = 0; $i < $length; $i++) { - if (defined($cumulative->{$addr})) { - PrintSource($lib->[0], $offset, - $routine, $flat, $cumulative, - $start_addr, $end_addr); - last; - } - $addr = AddressInc($addr); - } - } - } -} - -# Returns the indentation of the line, if it has any non-whitespace -# characters. Otherwise, returns -1. -sub Indentation { - my $line = shift; - if (m/^(\s*)\S/) { - return length($1); - } else { - return -1; - } -} - -# Print source-listing for one routine -sub PrintSource { - my $prog = shift; - my $offset = shift; - my $routine = shift; - my $flat = shift; - my $cumulative = shift; - my $start_addr = shift; - my $end_addr = shift; - - # Disassemble all instructions (just to get line numbers) - my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); - - # Hack 1: assume that the first source file encountered in the - # disassembly contains the routine - my $filename = undef; - for (my $i = 0; $i <= $#instructions; $i++) { - if ($instructions[$i]->[2] >= 0) { - $filename = $instructions[$i]->[1]; - last; - } - } - if (!defined($filename)) { - print STDERR "no filename found in $routine\n"; - return; - } - - # Hack 2: assume that the largest line number from $filename is the - # end of the procedure. This is typically safe since if P1 contains - # an inlined call to P2, then P2 usually occurs earlier in the - # source file. If this does not work, we might have to compute a - # density profile or just print all regions we find. - my $lastline = 0; - for (my $i = 0; $i <= $#instructions; $i++) { - my $f = $instructions[$i]->[1]; - my $l = $instructions[$i]->[2]; - if (($f eq $filename) && ($l > $lastline)) { - $lastline = $l; - } - } - - # Hack 3: assume the first source location from "filename" is the start of - # the source code. - my $firstline = 1; - for (my $i = 0; $i <= $#instructions; $i++) { - if ($instructions[$i]->[1] eq $filename) { - $firstline = $instructions[$i]->[2]; - last; - } - } - - # Hack 4: Extend last line forward until its indentation is less than - # the indentation we saw on $firstline - my $oldlastline = $lastline; - { - if (!open(FILE, "<$filename")) { - print STDERR "$filename: $!\n"; - return; - } - my $l = 0; - my $first_indentation = -1; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - $l++; - my $indent = Indentation($_); - if ($l >= $firstline) { - if ($first_indentation < 0 && $indent >= 0) { - $first_indentation = $indent; - last if ($first_indentation == 0); - } - } - if ($l >= $lastline && $indent >= 0) { - if ($indent >= $first_indentation) { - $lastline = $l+1; - } else { - last; - } - } - } - close(FILE); - } - - # Assign all samples to the range $firstline,$lastline, - # Hack 4: If an instruction does not occur in the range, its samples - # are moved to the next instruction that occurs in the range. - my $samples1 = {}; - my $samples2 = {}; - my $running1 = 0; # Unassigned flat counts - my $running2 = 0; # Unassigned cumulative counts - my $total1 = 0; # Total flat counts - my $total2 = 0; # Total cumulative counts - foreach my $e (@instructions) { - # Add up counts for all address that fall inside this instruction - my $c1 = 0; - my $c2 = 0; - for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { - $c1 += GetEntry($flat, $a); - $c2 += GetEntry($cumulative, $a); - } - $running1 += $c1; - $running2 += $c2; - $total1 += $c1; - $total2 += $c2; - my $file = $e->[1]; - my $line = $e->[2]; - if (($file eq $filename) && - ($line >= $firstline) && - ($line <= $lastline)) { - # Assign all accumulated samples to this line - AddEntry($samples1, $line, $running1); - AddEntry($samples2, $line, $running2); - $running1 = 0; - $running2 = 0; - } - } - - # Assign any leftover samples to $lastline - AddEntry($samples1, $lastline, $running1); - AddEntry($samples2, $lastline, $running2); - - printf("ROUTINE ====================== %s in %s\n" . - "%6s %6s Total %s (flat / cumulative)\n", - ShortFunctionName($routine), - $filename, - Units(), - Unparse($total1), - Unparse($total2)); - if (!open(FILE, "<$filename")) { - print STDERR "$filename: $!\n"; - return; - } - my $l = 0; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - $l++; - if ($l >= $firstline - 5 && - (($l <= $oldlastline + 5) || ($l <= $lastline))) { - chop; - my $text = $_; - if ($l == $firstline) { printf("---\n"); } - printf("%6s %6s %4d: %s\n", - UnparseAlt(GetEntry($samples1, $l)), - UnparseAlt(GetEntry($samples2, $l)), - $l, - $text); - if ($l == $lastline) { printf("---\n"); } - }; - } - close(FILE); -} - -# Return the source line for the specified file/linenumber. -# Returns undef if not found. -sub SourceLine { - my $file = shift; - my $line = shift; - - # Look in cache - if (!defined($main::source_cache{$file})) { - if (100 < scalar keys(%main::source_cache)) { - # Clear the cache when it gets too big - $main::source_cache = (); - } - - # Read all lines from the file - if (!open(FILE, "<$file")) { - print STDERR "$file: $!\n"; - $main::source_cache{$file} = []; # Cache the negative result - return undef; - } - my $lines = []; - push(@{$lines}, ""); # So we can use 1-based line numbers as indices - while () { - push(@{$lines}, $_); - } - close(FILE); - - # Save the lines in the cache - $main::source_cache{$file} = $lines; - } - - my $lines = $main::source_cache{$file}; - if (($line < 0) || ($line > $#{$lines})) { - return undef; - } else { - return $lines->[$line]; - } -} - -# Print disassembly for one routine with interspersed source if available -sub PrintDisassembledFunction { - my $prog = shift; - my $offset = shift; - my $routine = shift; - my $flat = shift; - my $cumulative = shift; - my $start_addr = shift; - my $end_addr = shift; - my $total = shift; - - # Disassemble all instructions - my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); - - # Make array of counts per instruction - my @flat_count = (); - my @cum_count = (); - my $flat_total = 0; - my $cum_total = 0; - foreach my $e (@instructions) { - # Add up counts for all address that fall inside this instruction - my $c1 = 0; - my $c2 = 0; - for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { - $c1 += GetEntry($flat, $a); - $c2 += GetEntry($cumulative, $a); - } - push(@flat_count, $c1); - push(@cum_count, $c2); - $flat_total += $c1; - $cum_total += $c2; - } - - # Print header with total counts - printf("ROUTINE ====================== %s\n" . - "%6s %6s %s (flat, cumulative) %.1f%% of total\n", - ShortFunctionName($routine), - Unparse($flat_total), - Unparse($cum_total), - Units(), - ($cum_total * 100.0) / $total); - - # Process instructions in order - my $current_file = ""; - for (my $i = 0; $i <= $#instructions; ) { - my $e = $instructions[$i]; - - # Print the new file name whenever we switch files - if ($e->[1] ne $current_file) { - $current_file = $e->[1]; - my $fname = $current_file; - $fname =~ s|^\./||; # Trim leading "./" - - # Shorten long file names - if (length($fname) >= 58) { - $fname = "..." . substr($fname, -55); - } - printf("-------------------- %s\n", $fname); - } - - # TODO: Compute range of lines to print together to deal with - # small reorderings. - my $first_line = $e->[2]; - my $last_line = $first_line; - my %flat_sum = (); - my %cum_sum = (); - for (my $l = $first_line; $l <= $last_line; $l++) { - $flat_sum{$l} = 0; - $cum_sum{$l} = 0; - } - - # Find run of instructions for this range of source lines - my $first_inst = $i; - while (($i <= $#instructions) && - ($instructions[$i]->[2] >= $first_line) && - ($instructions[$i]->[2] <= $last_line)) { - $e = $instructions[$i]; - $flat_sum{$e->[2]} += $flat_count[$i]; - $cum_sum{$e->[2]} += $cum_count[$i]; - $i++; - } - my $last_inst = $i - 1; - - # Print source lines - for (my $l = $first_line; $l <= $last_line; $l++) { - my $line = SourceLine($current_file, $l); - if (!defined($line)) { - $line = "?\n"; - next; - } else { - $line =~ s/^\s+//; - } - printf("%6s %6s %5d: %s", - UnparseAlt($flat_sum{$l}), - UnparseAlt($cum_sum{$l}), - $l, - $line); - } - - # Print disassembly - for (my $x = $first_inst; $x <= $last_inst; $x++) { - my $e = $instructions[$x]; - my $address = $e->[0]; - $address = AddressSub($address, $offset); # Make relative to section - $address =~ s/^0x//; - $address =~ s/^0*//; - - # Trim symbols - my $d = $e->[3]; - while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) - while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments - - printf("%6s %6s %8s: %6s\n", - UnparseAlt($flat_count[$x]), - UnparseAlt($cum_count[$x]), - $address, - $d); - } - } -} - -# Print DOT graph -sub PrintDot { - my $prog = shift; - my $symbols = shift; - my $raw = shift; - my $flat = shift; - my $cumulative = shift; - my $overall_total = shift; - - # Get total - my $local_total = TotalProfile($flat); - my $nodelimit = int($main::opt_nodefraction * $local_total); - my $edgelimit = int($main::opt_edgefraction * $local_total); - my $nodecount = $main::opt_nodecount; - - # Find nodes to include - my @list = (sort { abs(GetEntry($cumulative, $b)) <=> - abs(GetEntry($cumulative, $a)) - || $a cmp $b } - keys(%{$cumulative})); - my $last = $nodecount - 1; - if ($last > $#list) { - $last = $#list; - } - while (($last >= 0) && - (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { - $last--; - } - if ($last < 0) { - print STDERR "No nodes to print\n"; - return 0; - } - - if ($nodelimit > 0 || $edgelimit > 0) { - printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", - Unparse($nodelimit), Units(), - Unparse($edgelimit), Units()); - } - - # Open DOT output file - my $output; - if ($main::opt_gv) { - $output = "| $DOT -Tps2 >" . TempName($main::next_tmpfile, "ps"); - } elsif ($main::opt_evince) { - $output = "| $DOT -Tps2 | $PS2PDF - " . TempName($main::next_tmpfile, "pdf"); - } elsif ($main::opt_ps) { - $output = "| $DOT -Tps2"; - } elsif ($main::opt_pdf) { - $output = "| $DOT -Tps2 | $PS2PDF - -"; - } elsif ($main::opt_web || $main::opt_svg) { - # We need to post-process the SVG, so write to a temporary file always. - $output = "| $DOT -Tsvg >" . TempName($main::next_tmpfile, "svg"); - } elsif ($main::opt_gif) { - $output = "| $DOT -Tgif"; - } else { - $output = ">&STDOUT"; - } - open(DOT, $output) || error("$output: $!\n"); - - # Title - printf DOT ("digraph \"%s; %s %s\" {\n", - $prog, - Unparse($overall_total), - Units()); - if ($main::opt_pdf) { - # The output is more printable if we set the page size for dot. - printf DOT ("size=\"8,11\"\n"); - } - printf DOT ("node [width=0.375,height=0.25];\n"); - - # Print legend - printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . - "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", - $prog, - sprintf("Total %s: %s", Units(), Unparse($overall_total)), - sprintf("Focusing on: %s", Unparse($local_total)), - sprintf("Dropped nodes with <= %s abs(%s)", - Unparse($nodelimit), Units()), - sprintf("Dropped edges with <= %s %s", - Unparse($edgelimit), Units()) - ); - - # Print nodes - my %node = (); - my $nextnode = 1; - foreach my $a (@list[0..$last]) { - # Pick font size - my $f = GetEntry($flat, $a); - my $c = GetEntry($cumulative, $a); - - my $fs = 8; - if ($local_total > 0) { - $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); - } - - $node{$a} = $nextnode++; - my $sym = $a; - $sym =~ s/\s+/\\n/g; - $sym =~ s/::/\\n/g; - - # Extra cumulative info to print for non-leaves - my $extra = ""; - if ($f != $c) { - $extra = sprintf("\\rof %s (%s)", - Unparse($c), - Percent($c, $overall_total)); - } - my $style = ""; - if ($main::opt_heapcheck) { - if ($f > 0) { - # make leak-causing nodes more visible (add a background) - $style = ",style=filled,fillcolor=gray" - } elsif ($f < 0) { - # make anti-leak-causing nodes (which almost never occur) - # stand out as well (triple border) - $style = ",peripheries=3" - } - } - - printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . - "\",shape=box,fontsize=%.1f%s];\n", - $node{$a}, - $sym, - Unparse($f), - Percent($f, $overall_total), - $extra, - $fs, - $style, - ); - } - - # Get edges and counts per edge - my %edge = (); - my $n; - foreach my $k (keys(%{$raw})) { - # TODO: omit low %age edges - $n = $raw->{$k}; - my @translated = TranslateStack($symbols, $k); - for (my $i = 1; $i <= $#translated; $i++) { - my $src = $translated[$i]; - my $dst = $translated[$i-1]; - #next if ($src eq $dst); # Avoid self-edges? - if (exists($node{$src}) && exists($node{$dst})) { - my $edge_label = "$src\001$dst"; - if (!exists($edge{$edge_label})) { - $edge{$edge_label} = 0; - } - $edge{$edge_label} += $n; - } - } - } - - # Print edges (process in order of decreasing counts) - my %indegree = (); # Number of incoming edges added per node so far - my %outdegree = (); # Number of outgoing edges added per node so far - foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { - my @x = split(/\001/, $e); - $n = $edge{$e}; - - # Initialize degree of kept incoming and outgoing edges if necessary - my $src = $x[0]; - my $dst = $x[1]; - if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } - if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } - - my $keep; - if ($indegree{$dst} == 0) { - # Keep edge if needed for reachability - $keep = 1; - } elsif (abs($n) <= $edgelimit) { - # Drop if we are below --edgefraction - $keep = 0; - } elsif ($outdegree{$src} >= $main::opt_maxdegree || - $indegree{$dst} >= $main::opt_maxdegree) { - # Keep limited number of in/out edges per node - $keep = 0; - } else { - $keep = 1; - } - - if ($keep) { - $outdegree{$src}++; - $indegree{$dst}++; - - # Compute line width based on edge count - my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); - if ($fraction > 1) { $fraction = 1; } - my $w = $fraction * 2; - if ($w < 1 && ($main::opt_web || $main::opt_svg)) { - # SVG output treats line widths < 1 poorly. - $w = 1; - } - - # Dot sometimes segfaults if given edge weights that are too large, so - # we cap the weights at a large value - my $edgeweight = abs($n) ** 0.7; - if ($edgeweight > 100000) { $edgeweight = 100000; } - $edgeweight = int($edgeweight); - - my $style = sprintf("setlinewidth(%f)", $w); - if ($x[1] =~ m/\(inline\)/) { - $style .= ",dashed"; - } - - # Use a slightly squashed function of the edge count as the weight - printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", - $node{$x[0]}, - $node{$x[1]}, - Unparse($n), - $edgeweight, - $style); - } - } - - print DOT ("}\n"); - close(DOT); - - if ($main::opt_web || $main::opt_svg) { - # Rewrite SVG to be more usable inside web browser. - RewriteSvg(TempName($main::next_tmpfile, "svg")); - } - - return 1; -} - -sub RewriteSvg { - my $svgfile = shift; - - open(SVG, $svgfile) || die "open temp svg: $!"; - my @svg = ; - close(SVG); - unlink $svgfile; - my $svg = join('', @svg); - - # Dot's SVG output is - # - # - # - # ... - # - # - # - # Change it to - # - # - # $svg_javascript - # - # - # ... - # - # - # - - # Fix width, height; drop viewBox. - $svg =~ s/(?s) above first - my $svg_javascript = SvgJavascript(); - my $viewport = "\n"; - $svg =~ s/ above . - $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; - $svg =~ s/$svgfile") || die "open $svgfile: $!"; - print SVG $svg; - close(SVG); - } -} - -sub SvgJavascript { - return <<'EOF'; - -EOF -} - -# Return a small number that identifies the argument. -# Multiple calls with the same argument will return the same number. -# Calls with different arguments will return different numbers. -sub ShortIdFor { - my $key = shift; - my $id = $main::uniqueid{$key}; - if (!defined($id)) { - $id = keys(%main::uniqueid) + 1; - $main::uniqueid{$key} = $id; - } - return $id; -} - -# Translate a stack of addresses into a stack of symbols -sub TranslateStack { - my $symbols = shift; - my $k = shift; - - my @addrs = split(/\n/, $k); - my @result = (); - for (my $i = 0; $i <= $#addrs; $i++) { - my $a = $addrs[$i]; - - # Skip large addresses since they sometimes show up as fake entries on RH9 - if (length($a) > 8 && $a gt "7fffffffffffffff") { - next; - } - - if ($main::opt_disasm || $main::opt_list) { - # We want just the address for the key - push(@result, $a); - next; - } - - my $symlist = $symbols->{$a}; - if (!defined($symlist)) { - $symlist = [$a, "", $a]; - } - - # We can have a sequence of symbols for a particular entry - # (more than one symbol in the case of inlining). Callers - # come before callees in symlist, so walk backwards since - # the translated stack should contain callees before callers. - for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { - my $func = $symlist->[$j-2]; - my $fileline = $symlist->[$j-1]; - my $fullfunc = $symlist->[$j]; - if ($j > 2) { - $func = "$func (inline)"; - } - - # Do not merge nodes corresponding to Callback::Run since that - # causes confusing cycles in dot display. Instead, we synthesize - # a unique name for this frame per caller. - if ($func =~ m/Callback.*::Run$/) { - my $caller = ($i > 0) ? $addrs[$i-1] : 0; - $func = "Run#" . ShortIdFor($caller); - } - - if ($main::opt_addresses) { - push(@result, "$a $func $fileline"); - } elsif ($main::opt_lines) { - if ($func eq '??' && $fileline eq '??:0') { - push(@result, "$a"); - } else { - push(@result, "$func $fileline"); - } - } elsif ($main::opt_functions) { - if ($func eq '??') { - push(@result, "$a"); - } else { - push(@result, $func); - } - } elsif ($main::opt_files) { - if ($fileline eq '??:0' || $fileline eq '') { - push(@result, "$a"); - } else { - my $f = $fileline; - $f =~ s/:\d+$//; - push(@result, $f); - } - } else { - push(@result, $a); - last; # Do not print inlined info - } - } - } - - # print join(",", @addrs), " => ", join(",", @result), "\n"; - return @result; -} - -# Generate percent string for a number and a total -sub Percent { - my $num = shift; - my $tot = shift; - if ($tot != 0) { - return sprintf("%.1f%%", $num * 100.0 / $tot); - } else { - return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); - } -} - -# Generate pretty-printed form of number -sub Unparse { - my $num = shift; - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - if ($main::opt_inuse_objects || $main::opt_alloc_objects) { - return sprintf("%d", $num); - } else { - if ($main::opt_show_bytes) { - return sprintf("%d", $num); - } else { - return sprintf("%.1f", $num / 1048576.0); - } - } - } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds - } else { - return sprintf("%d", $num); - } -} - -# Alternate pretty-printed form: 0 maps to "." -sub UnparseAlt { - my $num = shift; - if ($num == 0) { - return "."; - } else { - return Unparse($num); - } -} - -# Return output units -sub Units { - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - if ($main::opt_inuse_objects || $main::opt_alloc_objects) { - return "objects"; - } else { - if ($main::opt_show_bytes) { - return "B"; - } else { - return "MB"; - } - } - } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { - return "seconds"; - } else { - return "samples"; - } -} - -##### Profile manipulation code ##### - -# Generate flattened profile: -# If count is charged to stack [a,b,c,d], in generated profile, -# it will be charged to [a] -sub FlatProfile { - my $profile = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - if ($#addrs >= 0) { - AddEntry($result, $addrs[0], $count); - } - } - return $result; -} - -# Generate cumulative profile: -# If count is charged to stack [a,b,c,d], in generated profile, -# it will be charged to [a], [b], [c], [d] -sub CumulativeProfile { - my $profile = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - foreach my $a (@addrs) { - AddEntry($result, $a, $count); - } - } - return $result; -} - -# If the second-youngest PC on the stack is always the same, returns -# that pc. Otherwise, returns undef. -sub IsSecondPcAlwaysTheSame { - my $profile = shift; - - my $second_pc = undef; - foreach my $k (keys(%{$profile})) { - my @addrs = split(/\n/, $k); - if ($#addrs < 1) { - return undef; - } - if (not defined $second_pc) { - $second_pc = $addrs[1]; - } else { - if ($second_pc ne $addrs[1]) { - return undef; - } - } - } - return $second_pc; -} - -sub ExtractSymbolLocation { - my $symbols = shift; - my $address = shift; - # 'addr2line' outputs "??:0" for unknown locations; we do the - # same to be consistent. - my $location = "??:0:unknown"; - if (exists $symbols->{$address}) { - my $file = $symbols->{$address}->[1]; - if ($file eq "?") { - $file = "??:0" - } - $location = $file . ":" . $symbols->{$address}->[0]; - } - return $location; -} - -# Extracts a graph of calls. -sub ExtractCalls { - my $symbols = shift; - my $profile = shift; - - my $calls = {}; - while( my ($stack_trace, $count) = each %$profile ) { - my @address = split(/\n/, $stack_trace); - my $destination = ExtractSymbolLocation($symbols, $address[0]); - AddEntry($calls, $destination, $count); - for (my $i = 1; $i <= $#address; $i++) { - my $source = ExtractSymbolLocation($symbols, $address[$i]); - my $call = "$source -> $destination"; - AddEntry($calls, $call, $count); - $destination = $source; - } - } - - return $calls; -} - -sub RemoveUninterestingFrames { - my $symbols = shift; - my $profile = shift; - - # List of function names to skip - my %skip = (); - my $skip_regexp = 'NOMATCH'; - if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { - foreach my $name ('calloc', - 'cfree', - 'malloc', - 'free', - 'memalign', - 'posix_memalign', - 'pvalloc', - 'valloc', - 'realloc', - 'tc_calloc', - 'tc_cfree', - 'tc_malloc', - 'tc_free', - 'tc_memalign', - 'tc_posix_memalign', - 'tc_pvalloc', - 'tc_valloc', - 'tc_realloc', - 'tc_new', - 'tc_delete', - 'tc_newarray', - 'tc_deletearray', - 'tc_new_nothrow', - 'tc_newarray_nothrow', - 'do_malloc', - '::do_malloc', # new name -- got moved to an unnamed ns - '::do_malloc_or_cpp_alloc', - 'DoSampledAllocation', - 'simple_alloc::allocate', - '__malloc_alloc_template::allocate', - '__builtin_delete', - '__builtin_new', - '__builtin_vec_delete', - '__builtin_vec_new', - 'operator new', - 'operator new[]', - # These mark the beginning/end of our custom sections - '__start_google_malloc', - '__stop_google_malloc', - '__start_malloc_hook', - '__stop_malloc_hook') { - $skip{$name} = 1; - $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything - } - # TODO: Remove TCMalloc once everything has been - # moved into the tcmalloc:: namespace and we have flushed - # old code out of the system. - $skip_regexp = "TCMalloc|^tcmalloc::"; - } elsif ($main::profile_type eq 'contention') { - foreach my $vname ('base::RecordLockProfileData', - 'base::SubmitMutexProfileData', - 'base::SubmitSpinLockProfileData', - 'Mutex::Unlock', - 'Mutex::UnlockSlow', - 'Mutex::ReaderUnlock', - 'MutexLock::~MutexLock', - 'SpinLock::Unlock', - 'SpinLock::SlowUnlock', - 'SpinLockHolder::~SpinLockHolder') { - $skip{$vname} = 1; - } - } elsif ($main::profile_type eq 'cpu') { - # Drop signal handlers used for CPU profile collection - # TODO(dpeng): this should not be necessary; it's taken - # care of by the general 2nd-pc mechanism below. - foreach my $name ('ProfileData::Add', # historical - 'ProfileData::prof_handler', # historical - 'CpuProfiler::prof_handler', - '__FRAME_END__', - '__pthread_sighandler', - '__restore') { - $skip{$name} = 1; - } - } else { - # Nothing skipped for unknown types - } - - if ($main::profile_type eq 'cpu') { - # If all the second-youngest program counters are the same, - # this STRONGLY suggests that it is an artifact of measurement, - # i.e., stack frames pushed by the CPU profiler signal handler. - # Hence, we delete them. - # (The topmost PC is read from the signal structure, not from - # the stack, so it does not get involved.) - while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { - my $result = {}; - my $func = ''; - if (exists($symbols->{$second_pc})) { - $second_pc = $symbols->{$second_pc}->[0]; - } - print STDERR "Removing $second_pc from all stack traces.\n"; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - splice @addrs, 1, 1; - my $reduced_path = join("\n", @addrs); - AddEntry($result, $reduced_path, $count); - } - $profile = $result; - } - } - - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my @path = (); - foreach my $a (@addrs) { - if (exists($symbols->{$a})) { - my $func = $symbols->{$a}->[0]; - if ($skip{$func} || ($func =~ m/$skip_regexp/)) { - next; - } - } - push(@path, $a); - } - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - return $result; -} - -# Reduce profile to granularity given by user -sub ReduceProfile { - my $symbols = shift; - my $profile = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @translated = TranslateStack($symbols, $k); - my @path = (); - my %seen = (); - $seen{''} = 1; # So that empty keys are skipped - foreach my $e (@translated) { - # To avoid double-counting due to recursion, skip a stack-trace - # entry if it has already been seen - if (!$seen{$e}) { - $seen{$e} = 1; - push(@path, $e); - } - } - my $reduced_path = join("\n", @path); - AddEntry($result, $reduced_path, $count); - } - return $result; -} - -# Does the specified symbol array match the regexp? -sub SymbolMatches { - my $sym = shift; - my $re = shift; - if (defined($sym)) { - for (my $i = 0; $i < $#{$sym}; $i += 3) { - if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { - return 1; - } - } - } - return 0; -} - -# Focus only on paths involving specified regexps -sub FocusProfile { - my $symbols = shift; - my $profile = shift; - my $focus = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - foreach my $a (@addrs) { - # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { - AddEntry($result, $k, $count); - last; - } - } - } - return $result; -} - -# Focus only on paths not involving specified regexps -sub IgnoreProfile { - my $symbols = shift; - my $profile = shift; - my $ignore = shift; - my $result = {}; - foreach my $k (keys(%{$profile})) { - my $count = $profile->{$k}; - my @addrs = split(/\n/, $k); - my $matched = 0; - foreach my $a (@addrs) { - # Reply if it matches either the address/shortname/fileline - if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { - $matched = 1; - last; - } - } - if (!$matched) { - AddEntry($result, $k, $count); - } - } - return $result; -} - -# Get total count in profile -sub TotalProfile { - my $profile = shift; - my $result = 0; - foreach my $k (keys(%{$profile})) { - $result += $profile->{$k}; - } - return $result; -} - -# Add A to B -sub AddProfile { - my $A = shift; - my $B = shift; - - my $R = {}; - # add all keys in A - foreach my $k (keys(%{$A})) { - my $v = $A->{$k}; - AddEntry($R, $k, $v); - } - # add all keys in B - foreach my $k (keys(%{$B})) { - my $v = $B->{$k}; - AddEntry($R, $k, $v); - } - return $R; -} - -# Merges symbol maps -sub MergeSymbols { - my $A = shift; - my $B = shift; - - my $R = {}; - foreach my $k (keys(%{$A})) { - $R->{$k} = $A->{$k}; - } - if (defined($B)) { - foreach my $k (keys(%{$B})) { - $R->{$k} = $B->{$k}; - } - } - return $R; -} - - -# Add A to B -sub AddPcs { - my $A = shift; - my $B = shift; - - my $R = {}; - # add all keys in A - foreach my $k (keys(%{$A})) { - $R->{$k} = 1 - } - # add all keys in B - foreach my $k (keys(%{$B})) { - $R->{$k} = 1 - } - return $R; -} - -# Subtract B from A -sub SubtractProfile { - my $A = shift; - my $B = shift; - - my $R = {}; - foreach my $k (keys(%{$A})) { - my $v = $A->{$k} - GetEntry($B, $k); - if ($v < 0 && $main::opt_drop_negative) { - $v = 0; - } - AddEntry($R, $k, $v); - } - if (!$main::opt_drop_negative) { - # Take care of when subtracted profile has more entries - foreach my $k (keys(%{$B})) { - if (!exists($A->{$k})) { - AddEntry($R, $k, 0 - $B->{$k}); - } - } - } - return $R; -} - -# Get entry from profile; zero if not present -sub GetEntry { - my $profile = shift; - my $k = shift; - if (exists($profile->{$k})) { - return $profile->{$k}; - } else { - return 0; - } -} - -# Add entry to specified profile -sub AddEntry { - my $profile = shift; - my $k = shift; - my $n = shift; - if (!exists($profile->{$k})) { - $profile->{$k} = 0; - } - $profile->{$k} += $n; -} - -# Add a stack of entries to specified profile, and add them to the $pcs -# list. -sub AddEntries { - my $profile = shift; - my $pcs = shift; - my $stack = shift; - my $count = shift; - my @k = (); - - foreach my $e (split(/\s+/, $stack)) { - my $pc = HexExtend($e); - $pcs->{$pc} = 1; - push @k, $pc; - } - AddEntry($profile, (join "\n", @k), $count); -} - -##### Code to profile a server dynamically ##### - -sub CheckSymbolPage { - my $url = SymbolPageURL(); - open(SYMBOL, "$URL_FETCHER '$url' |"); - my $line = ; - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - close(SYMBOL); - unless (defined($line)) { - error("$url doesn't exist\n"); - } - - if ($line =~ /^num_symbols:\s+(\d+)$/) { - if ($1 == 0) { - error("Stripped binary. No symbols available.\n"); - } - } else { - error("Failed to get the number of symbols from $url\n"); - } -} - -sub IsProfileURL { - my $profile_name = shift; - if (-f $profile_name) { - printf STDERR "Using local file $profile_name.\n"; - return 0; - } - return 1; -} - -sub ParseProfileURL { - my $profile_name = shift; - - if (!defined($profile_name) || $profile_name eq "") { - return (); - } - - # Split profile URL - matches all non-empty strings, so no test. - $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; - - my $proto = $1 || "http://"; - my $hostport = $2; - my $prefix = $3; - my $profile = $4 || "/"; - - my $host = $hostport; - $host =~ s/:.*//; - - my $baseurl = "$proto$hostport$prefix"; - return ($host, $baseurl, $profile); -} - -# We fetch symbols from the first profile argument. -sub SymbolPageURL { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - return "$baseURL$SYMBOL_PAGE"; -} - -sub FetchProgramName() { - my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); - my $url = "$baseURL$PROGRAM_NAME_PAGE"; - my $command_line = "$URL_FETCHER '$url'"; - open(CMDLINE, "$command_line |") or error($command_line); - my $cmdline = ; - $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines - close(CMDLINE); - error("Failed to get program name from $url\n") unless defined($cmdline); - $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. - $cmdline =~ s!\n!!g; # Remove LFs. - return $cmdline; -} - -# Gee, curl's -L (--location) option isn't reliable at least -# with its 7.12.3 version. Curl will forget to post data if -# there is a redirection. This function is a workaround for -# curl. Redirection happens on borg hosts. -sub ResolveRedirectionForCurl { - my $url = shift; - my $command_line = "$URL_FETCHER --head '$url'"; - open(CMDLINE, "$command_line |") or error($command_line); - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (/^Location: (.*)/) { - $url = $1; - } - } - close(CMDLINE); - return $url; -} - -# Add a timeout flat to URL_FETCHER -sub AddFetchTimeout { - my $fetcher = shift; - my $timeout = shift; - if (defined($timeout)) { - if ($fetcher =~ m/\bcurl -s/) { - $fetcher .= sprintf(" --max-time %d", $timeout); - } elsif ($fetcher =~ m/\brpcget\b/) { - $fetcher .= sprintf(" --deadline=%d", $timeout); - } - } - return $fetcher; -} - -# Reads a symbol map from the file handle name given as $1, returning -# the resulting symbol map. Also processes variables relating to symbols. -# Currently, the only variable processed is 'binary=' which updates -# $main::prog to have the correct program name. -sub ReadSymbols { - my $in = shift; - my $map = {}; - while (<$in>) { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Removes all the leading zeroes from the symbols, see comment below. - if (m/^0x0*([0-9a-f]+)\s+(.+)/) { - $map->{$1} = $2; - } elsif (m/^---/) { - last; - } elsif (m/^([a-z][^=]*)=(.*)$/ ) { - my ($variable, $value) = ($1, $2); - for ($variable, $value) { - s/^\s+//; - s/\s+$//; - } - if ($variable eq "binary") { - if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { - printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", - $main::prog, $value); - } - $main::prog = $value; - } else { - printf STDERR ("Ignoring unknown variable in symbols list: " . - "'%s' = '%s'\n", $variable, $value); - } - } - } - return $map; -} - -# Fetches and processes symbols to prepare them for use in the profile output -# code. If the optional 'symbol_map' arg is not given, fetches symbols from -# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols -# are assumed to have already been fetched into 'symbol_map' and are simply -# extracted and processed. -sub FetchSymbols { - my $pcset = shift; - my $symbol_map = shift; - - my %seen = (); - my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq - - if (!defined($symbol_map)) { - my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); - - open(POSTFILE, ">$main::tmpfile_sym"); - print POSTFILE $post_data; - close(POSTFILE); - - my $url = SymbolPageURL(); - - my $command_line; - if ($URL_FETCHER =~ m/\bcurl -s/) { - $url = ResolveRedirectionForCurl($url); - $command_line = "$URL_FETCHER -d '\@$main::tmpfile_sym' '$url'"; - } else { - $command_line = "$URL_FETCHER --post '$url' < '$main::tmpfile_sym'"; - } - # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. - my $cppfilt = $obj_tool_map{"c++filt"}; - open(SYMBOL, "$command_line | $cppfilt |") or error($command_line); - $symbol_map = ReadSymbols(*SYMBOL{IO}); - close(SYMBOL); - } - - my $symbols = {}; - foreach my $pc (@pcs) { - my $fullname; - # For 64 bits binaries, symbols are extracted with 8 leading zeroes. - # Then /symbol reads the long symbols in as uint64, and outputs - # the result with a "0x%08llx" format which get rid of the zeroes. - # By removing all the leading zeroes in both $pc and the symbols from - # /symbol, the symbols match and are retrievable from the map. - my $shortpc = $pc; - $shortpc =~ s/^0*//; - # Each line may have a list of names, which includes the function - # and also other functions it has inlined. They are separated - # (in PrintSymbolizedFile), by --, which is illegal in function names. - my $fullnames; - if (defined($symbol_map->{$shortpc})) { - $fullnames = $symbol_map->{$shortpc}; - } else { - $fullnames = "0x" . $pc; # Just use addresses - } - my $sym = []; - $symbols->{$pc} = $sym; - foreach my $fullname (split("--", $fullnames)) { - my $name = ShortFunctionName($fullname); - push(@{$sym}, $name, "?", $fullname); - } - } - return $symbols; -} - -sub BaseName { - my $file_name = shift; - $file_name =~ s!^.*/!!; # Remove directory name - return $file_name; -} - -sub MakeProfileBaseName { - my ($binary_name, $profile_name) = @_; - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); - my $binary_shortname = BaseName($binary_name); - return sprintf("%s.%s.%s", - $binary_shortname, $main::op_time, $host); -} - -sub FetchDynamicProfile { - my $binary_name = shift; - my $profile_name = shift; - my $fetch_name_only = shift; - my $encourage_patience = shift; - - if (!IsProfileURL($profile_name)) { - return $profile_name; - } else { - my ($host, $baseURL, $path) = ParseProfileURL($profile_name); - if ($path eq "" || $path eq "/") { - # Missing type specifier defaults to cpu-profile - $path = $PROFILE_PAGE; - } - - my $profile_file = MakeProfileBaseName($binary_name, $profile_name); - - my $url = "$baseURL$path"; - my $fetch_timeout = undef; - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { - if ($path =~ m/[?]/) { - $url .= "&"; - } else { - $url .= "?"; - } - $url .= sprintf("seconds=%d", $main::opt_seconds); - $fetch_timeout = $main::opt_seconds * 1.01 + 60; - } else { - # For non-CPU profiles, we add a type-extension to - # the target profile file name. - my $suffix = $path; - $suffix =~ s,/,.,g; - $profile_file .= $suffix; - } - - my $profile_dir = $ENV{"PPROF_TMPDIR"} || ($ENV{HOME} . "/pprof"); - if (! -d $profile_dir) { - mkdir($profile_dir) - || die("Unable to create profile directory $profile_dir: $!\n"); - } - my $tmp_profile = "$profile_dir/.tmp.$profile_file"; - my $real_profile = "$profile_dir/$profile_file"; - - if ($fetch_name_only > 0) { - return $real_profile; - } - - my $fetcher = AddFetchTimeout($URL_FETCHER, $fetch_timeout); - my $cmd = "$fetcher '$url' > '$tmp_profile'"; - if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ - print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; - if ($encourage_patience) { - print STDERR "Be patient...\n"; - } - } else { - print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; - } - - (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); - (system("mv $tmp_profile $real_profile") == 0) || error("Unable to rename profile\n"); - print STDERR "Wrote profile to $real_profile\n"; - $main::collected_profile = $real_profile; - return $main::collected_profile; - } -} - -# Collect profiles in parallel -sub FetchDynamicProfiles { - my $items = scalar(@main::pfile_args); - my $levels = log($items) / log(2); - - if ($items == 1) { - $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); - } else { - # math rounding issues - if ((2 ** $levels) < $items) { - $levels++; - } - my $count = scalar(@main::pfile_args); - for (my $i = 0; $i < $count; $i++) { - $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); - } - print STDERR "Fetching $count profiles, Be patient...\n"; - FetchDynamicProfilesRecurse($levels, 0, 0); - $main::collected_profile = join(" \\\n ", @main::profile_files); - } -} - -# Recursively fork a process to get enough processes -# collecting profiles -sub FetchDynamicProfilesRecurse { - my $maxlevel = shift; - my $level = shift; - my $position = shift; - - if (my $pid = fork()) { - $position = 0 | ($position << 1); - TryCollectProfile($maxlevel, $level, $position); - wait; - } else { - $position = 1 | ($position << 1); - TryCollectProfile($maxlevel, $level, $position); - cleanup(); - exit(0); - } -} - -# Collect a single profile -sub TryCollectProfile { - my $maxlevel = shift; - my $level = shift; - my $position = shift; - - if ($level >= ($maxlevel - 1)) { - if ($position < scalar(@main::pfile_args)) { - FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); - } - } else { - FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); - } -} - -##### Parsing code ##### - -# Provide a small streaming-read module to handle very large -# cpu-profile files. Stream in chunks along a sliding window. -# Provides an interface to get one 'slot', correctly handling -# endian-ness differences. A slot is one 32-bit or 64-bit word -# (depending on the input profile). We tell endianness and bit-size -# for the profile by looking at the first 8 bytes: in cpu profiles, -# the second slot is always 3 (we'll accept anything that's not 0). -BEGIN { - package CpuProfileStream; - - sub new { - my ($class, $file, $fname) = @_; - my $self = { file => $file, - base => 0, - stride => 512 * 1024, # must be a multiple of bitsize/8 - slots => [], - unpack_code => "", # N for big-endian, V for little - perl_is_64bit => 1, # matters if profile is 64-bit - }; - bless $self, $class; - # Let unittests adjust the stride - if ($main::opt_test_stride > 0) { - $self->{stride} = $main::opt_test_stride; - } - # Read the first two slots to figure out bitsize and endianness. - my $slots = $self->{slots}; - my $str; - read($self->{file}, $str, 8); - # Set the global $address_length based on what we see here. - # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). - $address_length = ($str eq (chr(0)x8)) ? 16 : 8; - if ($address_length == 8) { - if (substr($str, 6, 2) eq chr(0)x2) { - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 4, 2) eq chr(0)x2) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**16\n"); - } - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # If we're a 64-bit profile, check if we're a 64-bit-capable - # perl. Otherwise, each slot will be represented as a float - # instead of an int64, losing precision and making all the - # 64-bit addresses wrong. We won't complain yet, but will - # later if we ever see a value that doesn't fit in 32 bits. - my $has_q = 0; - eval { $has_q = pack("Q", "1") ? 1 : 1; }; - if (!$has_q) { - $self->{perl_is_64bit} = 0; - } - read($self->{file}, $str, 8); - if (substr($str, 4, 4) eq chr(0)x4) { - # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. - $self->{unpack_code} = 'V'; # Little-endian. - } elsif (substr($str, 0, 4) eq chr(0)x4) { - $self->{unpack_code} = 'N'; # Big-endian - } else { - ::error("$fname: header size >= 2**32\n"); - } - my @pair = unpack($self->{unpack_code} . "*", $str); - # Since we know one of the pair is 0, it's fine to just add them. - @$slots = (0, $pair[0] + $pair[1]); - } - return $self; - } - - # Load more data when we access slots->get(X) which is not yet in memory. - sub overflow { - my ($self) = @_; - my $slots = $self->{slots}; - $self->{base} += $#$slots + 1; # skip over data we're replacing - my $str; - read($self->{file}, $str, $self->{stride}); - if ($address_length == 8) { # the 32-bit case - # This is the easy case: unpack provides 32-bit unpacking primitives. - @$slots = unpack($self->{unpack_code} . "*", $str); - } else { - # We need to unpack 32 bits at a time and combine. - my @b32_values = unpack($self->{unpack_code} . "*", $str); - my @b64_values = (); - for (my $i = 0; $i < $#b32_values; $i += 2) { - # TODO(csilvers): if this is a 32-bit perl, the math below - # could end up in a too-large int, which perl will promote - # to a double, losing necessary precision. Deal with that. - # Right now, we just die. - my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); - if ($self->{unpack_code} eq 'N') { # big-endian - ($lo, $hi) = ($hi, $lo); - } - my $value = $lo + $hi * (2**32); - if (!$self->{perl_is_64bit} && # check value is exactly represented - (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { - ::error("Need a 64-bit perl to process this 64-bit profile.\n"); - } - push(@b64_values, $value); - } - @$slots = @b64_values; - } - } - - # Access the i-th long in the file (logically), or -1 at EOF. - sub get { - my ($self, $idx) = @_; - my $slots = $self->{slots}; - while ($#$slots >= 0) { - if ($idx < $self->{base}) { - # The only time we expect a reference to $slots[$i - something] - # after referencing $slots[$i] is reading the very first header. - # Since $stride > |header|, that shouldn't cause any lookback - # errors. And everything after the header is sequential. - print STDERR "Unexpected look-back reading CPU profile"; - return -1; # shrug, don't know what better to return - } elsif ($idx > $self->{base} + $#$slots) { - $self->overflow(); - } else { - return $slots->[$idx - $self->{base}]; - } - } - # If we get here, $slots is [], which means we've reached EOF - return -1; # unique since slots is supposed to hold unsigned numbers - } -} - -# Reads the top, 'header' section of a profile, and returns the last -# line of the header, commonly called a 'header line'. The header -# section of a profile consists of zero or more 'command' lines that -# are instructions to pprof, which pprof executes when reading the -# header. All 'command' lines start with a %. After the command -# lines is the 'header line', which is a profile-specific line that -# indicates what type of profile it is, and perhaps other global -# information about the profile. For instance, here's a header line -# for a heap profile: -# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile -# For historical reasons, the CPU profile does not contain a text- -# readable header line. If the profile looks like a CPU profile, -# this function returns "". If no header line could be found, this -# function returns undef. -# -# The following commands are recognized: -# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' -# -# The input file should be in binmode. -sub ReadProfileHeader { - local *PROFILE = shift; - my $firstchar = ""; - my $line = ""; - read(PROFILE, $firstchar, 1); - seek(PROFILE, -1, 1); # unread the firstchar - if ($firstchar !~ /[[:print:]]/) { # is not a text character - return ""; - } - while (defined($line = )) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ($line =~ /^%warn\s+(.*)/) { # 'warn' command - # Note this matches both '%warn blah\n' and '%warn\n'. - print STDERR "WARNING: $1\n"; # print the rest of the line - } elsif ($line =~ /^%/) { - print STDERR "Ignoring unknown command from profile header: $line"; - } else { - # End of commands, must be the header line. - return $line; - } - } - return undef; # got to EOF without seeing a header line -} - -sub IsSymbolizedProfileFile { - my $file_name = shift; - if (!(-e $file_name) || !(-r $file_name)) { - return 0; - } - # Check if the file contains a symbol-section marker. - open(TFILE, "<$file_name"); - binmode TFILE; - my $firstline = ReadProfileHeader(*TFILE); - close(TFILE); - if (!$firstline) { - return 0; - } - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - return $firstline =~ /^--- *$symbol_marker/; -} - -# Parse profile generated by common/profiler.cc and return a reference -# to a map: -# $result->{version} Version number of profile file -# $result->{period} Sampling period (in microseconds) -# $result->{profile} Profile object -# $result->{map} Memory map info from profile -# $result->{pcs} Hash of all PC values seen, key is hex address -sub ReadProfile { - my $prog = shift; - my $fname = shift; - my $result; # return value - - $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $contention_marker = $&; - $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $growth_marker = $&; - $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $symbol_marker = $&; - $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash - my $profile_marker = $&; - - # Look at first line to see if it is a heap or a CPU profile. - # CPU profile may start with no header at all, and just binary data - # (starting with \0\0\0\0) -- in that case, don't try to read the - # whole firstline, since it may be gigabytes(!) of data. - open(PROFILE, "<$fname") || error("$fname: $!\n"); - binmode PROFILE; # New perls do UTF-8 processing - my $header = ReadProfileHeader(*PROFILE); - if (!defined($header)) { # means "at EOF" - error("Profile is empty.\n"); - } - - my $symbols; - if ($header =~ m/^--- *$symbol_marker/o) { - # Verify that the user asked for a symbolized profile - if (!$main::use_symbolized_profile) { - # we have both a binary and symbolized profiles, abort - error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . - "a binary arg. Try again without passing\n $prog\n"); - } - # Read the symbol section of the symbolized profile file. - $symbols = ReadSymbols(*PROFILE{IO}); - # Read the next line to get the header for the remaining profile. - $header = ReadProfileHeader(*PROFILE) || ""; - } - - $main::profile_type = ''; - if ($header =~ m/^heap profile:.*$growth_marker/o) { - $main::profile_type = 'growth'; - $result = ReadHeapProfile($prog, *PROFILE, $header); - } elsif ($header =~ m/^heap profile:/) { - $main::profile_type = 'heap'; - $result = ReadHeapProfile($prog, *PROFILE, $header); - } elsif ($header =~ m/^--- *$contention_marker/o) { - $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, *PROFILE); - } elsif ($header =~ m/^--- *Stacks:/) { - print STDERR - "Old format contention profile: mistakenly reports " . - "condition variable signals as lock contentions.\n"; - $main::profile_type = 'contention'; - $result = ReadSynchProfile($prog, *PROFILE); - } elsif ($header =~ m/^--- *$profile_marker/) { - # the binary cpu profile data starts immediately after this line - $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname, *PROFILE); - } else { - if (defined($symbols)) { - # a symbolized profile contains a format we don't recognize, bail out - error("$fname: Cannot recognize profile section after symbols.\n"); - } - # no ascii header present -- must be a CPU profile - $main::profile_type = 'cpu'; - $result = ReadCPUProfile($prog, $fname, *PROFILE); - } - - close(PROFILE); - - # if we got symbols along with the profile, return those as well - if (defined($symbols)) { - $result->{symbols} = $symbols; - } - - return $result; -} - -# Subtract one from caller pc so we map back to call instr. -# However, don't do this if we're reading a symbolized profile -# file, in which case the subtract-one was done when the file -# was written. -# -# We apply the same logic to all readers, though ReadCPUProfile uses an -# independent implementation. -sub FixCallerAddresses { - my $stack = shift; - if ($main::use_symbolized_profile) { - return $stack; - } else { - $stack =~ /(\s)/; - my $delimiter = $1; - my @addrs = split(' ', $stack); - my @fixedaddrs; - $#fixedaddrs = $#addrs; - if ($#addrs >= 0) { - $fixedaddrs[0] = $addrs[0]; - } - for (my $i = 1; $i <= $#addrs; $i++) { - $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); - } - return join $delimiter, @fixedaddrs; - } -} - -# CPU profile reader -sub ReadCPUProfile { - my $prog = shift; - my $fname = shift; # just used for logging - local *PROFILE = shift; - my $version; - my $period; - my $i; - my $profile = {}; - my $pcs = {}; - - # Parse string into array of slots. - my $slots = CpuProfileStream->new(*PROFILE, $fname); - - # Read header. The current header version is a 5-element structure - # containing: - # 0: header count (always 0) - # 1: header "words" (after this one: 3) - # 2: format version (0) - # 3: sampling period (usec) - # 4: unused padding (always 0) - if ($slots->get(0) != 0 ) { - error("$fname: not a profile file, or old format profile file\n"); - } - $i = 2 + $slots->get(1); - $version = $slots->get(2); - $period = $slots->get(3); - # Do some sanity checking on these header values. - if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { - error("$fname: not a profile file, or corrupted profile file\n"); - } - - # Parse profile - while ($slots->get($i) != -1) { - my $n = $slots->get($i++); - my $d = $slots->get($i++); - if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? - my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); - print STDERR "At index $i (address $addr):\n"; - error("$fname: stack trace depth >= 2**32\n"); - } - if ($slots->get($i) == 0) { - # End of profile data marker - $i += $d; - last; - } - - # Make key out of the stack entries - my @k = (); - for (my $j = 0; $j < $d; $j++) { - my $pc = $slots->get($i+$j); - # Subtract one from caller pc so we map back to call instr. - # However, don't do this if we're reading a symbolized profile - # file, in which case the subtract-one was done when the file - # was written. - if ($j > 0 && !$main::use_symbolized_profile) { - $pc--; - } - $pc = sprintf("%0*x", $address_length, $pc); - $pcs->{$pc} = 1; - push @k, $pc; - } - - AddEntry($profile, (join "\n", @k), $n); - $i += $d; - } - - # Parse map - my $map = ''; - seek(PROFILE, $i * 4, 0); - read(PROFILE, $map, (stat PROFILE)[7]); - - my $r = {}; - $r->{version} = $version; - $r->{period} = $period; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - - return $r; -} - -sub ReadHeapProfile { - my $prog = shift; - local *PROFILE = shift; - my $header = shift; - - my $index = 1; - if ($main::opt_inuse_space) { - $index = 1; - } elsif ($main::opt_inuse_objects) { - $index = 0; - } elsif ($main::opt_alloc_space) { - $index = 3; - } elsif ($main::opt_alloc_objects) { - $index = 2; - } - - # Find the type of this profile. The header line looks like: - # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 - # There are two pairs , the first inuse objects/space, and the - # second allocated objects/space. This is followed optionally by a profile - # type, and if that is present, optionally by a sampling frequency. - # For remote heap profiles (v1): - # The interpretation of the sampling frequency is that the profiler, for - # each sample, calculates a uniformly distributed random integer less than - # the given value, and records the next sample after that many bytes have - # been allocated. Therefore, the expected sample interval is half of the - # given frequency. By default, if not specified, the expected sample - # interval is 128KB. Only remote-heap-page profiles are adjusted for - # sample size. - # For remote heap profiles (v2): - # The sampling frequency is the rate of a Poisson process. This means that - # the probability of sampling an allocation of size X with sampling rate Y - # is 1 - exp(-X/Y) - # For version 2, a typical header line might look like this: - # heap profile: 1922: 127792360 [ 1922: 127792360] @ _v2/524288 - # the trailing number (524288) is the sampling rate. (Version 1 showed - # double the 'rate' here) - my $sampling_algorithm = 0; - my $sample_adjustment = 0; - chomp($header); - my $type = "unknown"; - if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { - if (defined($6) && ($6 ne '')) { - $type = $6; - my $sample_period = $8; - # $type is "heapprofile" for profiles generated by the - # heap-profiler, and either "heap" or "heap_v2" for profiles - # generated by sampling directly within tcmalloc. It can also - # be "growth" for heap-growth profiles. The first is typically - # found for profiles generated locally, and the others for - # remote profiles. - if (($type eq "heapprofile") || ($type !~ /heap/) ) { - # No need to adjust for the sampling rate with heap-profiler-derived data - $sampling_algorithm = 0; - } elsif ($type =~ /_v2/) { - $sampling_algorithm = 2; # version 2 sampling - if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period); - } - } else { - $sampling_algorithm = 1; # version 1 sampling - if (defined($sample_period) && ($sample_period ne '')) { - $sample_adjustment = int($sample_period)/2; - } - } - } else { - # We detect whether or not this is a remote-heap profile by checking - # that the total-allocated stats ($n2,$s2) are exactly the - # same as the in-use stats ($n1,$s1). It is remotely conceivable - # that a non-remote-heap profile may pass this check, but it is hard - # to imagine how that could happen. - # In this case it's so old it's guaranteed to be remote-heap version 1. - my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); - if (($n1 == $n2) && ($s1 == $s2)) { - # This is likely to be a remote-heap based sample profile - $sampling_algorithm = 1; - } - } - } - - if ($sampling_algorithm > 0) { - # For remote-heap generated profiles, adjust the counts and sizes to - # account for the sample rate (we sample once every 128KB by default). - if ($sample_adjustment == 0) { - # Turn on profile adjustment. - $sample_adjustment = 128*1024; - print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; - } else { - printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", - $sample_adjustment); - } - if ($sampling_algorithm > 1) { - # We don't bother printing anything for the original version (version 1) - printf STDERR "Heap version $sampling_algorithm\n"; - } - } - - my $profile = {}; - my $pcs = {}; - my $map = ""; - - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (/^MAPPED_LIBRARIES:/) { - # Read the /proc/self/maps data - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - $map .= $_; - } - last; - } - - if (/^--- Memory map:/) { - # Read /proc/self/maps data as formatted by DumpAddressMap() - my $buildvar = ""; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Parse "build=" specification if supplied - if (m/^\s*build=(.*)\n/) { - $buildvar = $1; - } - - # Expand "$build" variable if available - $_ =~ s/\$build\b/$buildvar/g; - - $map .= $_; - } - last; - } - - # Read entry of the form: - # : [: ] @ a1 a2 a3 ... an - s/^\s*//; - s/\s*$//; - if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { - my $stack = $5; - my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); - - if ($sample_adjustment) { - if ($sampling_algorithm == 2) { - # Remote-heap version 2 - # The sampling frequency is the rate of a Poisson process. - # This means that the probability of sampling an allocation of - # size X with sampling rate Y is 1 - exp(-X/Y) - if ($n1 != 0) { - my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n1 *= $scale_factor; - $s1 *= $scale_factor; - } - if ($n2 != 0) { - my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - my $scale_factor = 1/(1 - exp(-$ratio)); - $n2 *= $scale_factor; - $s2 *= $scale_factor; - } - } else { - # Remote-heap version 1 - my $ratio; - $ratio = (($s1*1.0)/$n1)/($sample_adjustment); - if ($ratio < 1) { - $n1 /= $ratio; - $s1 /= $ratio; - } - $ratio = (($s2*1.0)/$n2)/($sample_adjustment); - if ($ratio < 1) { - $n2 /= $ratio; - $s2 /= $ratio; - } - } - } - - my @counts = ($n1, $s1, $n2, $s2); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); - } - } - - my $r = {}; - $r->{version} = "heap"; - $r->{period} = 1; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - return $r; -} - -sub ReadSynchProfile { - my $prog = shift; - local *PROFILE = shift; - my $header = shift; - - my $map = ''; - my $profile = {}; - my $pcs = {}; - my $sampling_period = 1; - my $cyclespernanosec = 2.8; # Default assumption for old binaries - my $seen_clockrate = 0; - my $line; - - my $index = 0; - if ($main::opt_total_delay) { - $index = 0; - } elsif ($main::opt_contentions) { - $index = 1; - } elsif ($main::opt_mean_delay) { - $index = 2; - } - - while ( $line = ) { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { - my ($cycles, $count, $stack) = ($1, $2, $3); - - # Convert cycles to nanoseconds - $cycles /= $cyclespernanosec; - - # Adjust for sampling done by application - $cycles *= $sampling_period; - $count *= $sampling_period; - - my @values = ($cycles, $count, $cycles / $count); - AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); - - } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || - $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { - my ($cycles, $stack) = ($1, $2); - if ($cycles !~ /^\d+$/) { - next; - } - - # Convert cycles to nanoseconds - $cycles /= $cyclespernanosec; - - # Adjust for sampling done by application - $cycles *= $sampling_period; - - AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); - - } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { - my ($variable, $value) = ($1,$2); - for ($variable, $value) { - s/^\s+//; - s/\s+$//; - } - if ($variable eq "cycles/second") { - $cyclespernanosec = $value / 1e9; - $seen_clockrate = 1; - } elsif ($variable eq "sampling period") { - $sampling_period = $value; - } elsif ($variable eq "ms since reset") { - # Currently nothing is done with this value in pprof - # So we just silently ignore it for now - } elsif ($variable eq "discarded samples") { - # Currently nothing is done with this value in pprof - # So we just silently ignore it for now - } else { - printf STDERR ("Ignoring unnknown variable in /contention output: " . - "'%s' = '%s'\n",$variable,$value); - } - } else { - # Memory map entry - $map .= $line; - } - } - - if (!$seen_clockrate) { - printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", - $cyclespernanosec); - } - - my $r = {}; - $r->{version} = 0; - $r->{period} = $sampling_period; - $r->{profile} = $profile; - $r->{libs} = ParseLibraries($prog, $map, $pcs); - $r->{pcs} = $pcs; - return $r; -} - -# Given a hex value in the form "0x1abcd" return "0001abcd" or -# "000000000001abcd", depending on the current address length. -# There's probably a more idiomatic (or faster) way to do this... -sub HexExtend { - my $addr = shift; - - $addr =~ s/^0x//; - - if (length $addr > $address_length) { - printf STDERR "Warning: address $addr is longer than address length $address_length\n"; - } - - return substr("000000000000000".$addr, -$address_length); -} - -##### Symbol extraction ##### - -# Aggressively search the lib_prefix values for the given library -# If all else fails, just return the name of the library unmodified. -# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" -# it will search the following locations in this order, until it finds a file: -# /my/path/lib/dir/mylib.so -# /other/path/lib/dir/mylib.so -# /my/path/dir/mylib.so -# /other/path/dir/mylib.so -# /my/path/mylib.so -# /other/path/mylib.so -# /lib/dir/mylib.so (returned as last resort) -sub FindLibrary { - my $file = shift; - my $suffix = $file; - - # Search for the library as described above - do { - foreach my $prefix (@prefix_list) { - my $fullpath = $prefix . $suffix; - if (-e $fullpath) { - return $fullpath; - } - } - } while ($suffix =~ s|^/[^/]+/|/|); - return $file; -} - -# Return path to library with debugging symbols. -# For libc libraries, the copy in /usr/lib/debug contains debugging symbols -sub DebuggingLibrary { - my $file = shift; - if ($file =~ m|^/| && -f "/usr/lib/debug$file") { - return "/usr/lib/debug$file"; - } - return undef; -} - -# Parse text section header of a library using objdump -sub ParseTextSectionHeaderFromObjdump { - my $lib = shift; - - my $size = undef; - my $vma; - my $file_offset; - # Get objdump output from the library file to figure out how to - # map between mapped addresses and addresses in the library. - my $objdump = $obj_tool_map{"objdump"}; - open(OBJDUMP, "$objdump -h $lib |") - || error("$objdump $lib: $!\n"); - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - # Idx Name Size VMA LMA File off Algn - # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 - # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file - # offset may still be 8. But AddressSub below will still handle that. - my @x = split; - if (($#x >= 6) && ($x[1] eq '.text')) { - $size = $x[2]; - $vma = $x[3]; - $file_offset = $x[5]; - last; - } - } - close(OBJDUMP); - - if (!defined($size)) { - return undef; - } - - my $r = {}; - $r->{size} = $size; - $r->{vma} = $vma; - $r->{file_offset} = $file_offset; - - return $r; -} - -# Parse text section header of a library using otool (on OS X) -sub ParseTextSectionHeaderFromOtool { - my $lib = shift; - - my $size = undef; - my $vma = undef; - my $file_offset = undef; - # Get otool output from the library file to figure out how to - # map between mapped addresses and addresses in the library. - my $otool = $obj_tool_map{"otool"}; - open(OTOOL, "$otool -l $lib |") - || error("$otool $lib: $!\n"); - my $cmd = ""; - my $sectname = ""; - my $segname = ""; - foreach my $line () { - $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines - # Load command <#> - # cmd LC_SEGMENT - # [...] - # Section - # sectname __text - # segname __TEXT - # addr 0x000009f8 - # size 0x00018b9e - # offset 2552 - # align 2^2 (4) - # We will need to strip off the leading 0x from the hex addresses, - # and convert the offset into hex. - if ($line =~ /Load command/) { - $cmd = ""; - $sectname = ""; - $segname = ""; - } elsif ($line =~ /Section/) { - $sectname = ""; - $segname = ""; - } elsif ($line =~ /cmd (\w+)/) { - $cmd = $1; - } elsif ($line =~ /sectname (\w+)/) { - $sectname = $1; - } elsif ($line =~ /segname (\w+)/) { - $segname = $1; - } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && - $sectname eq "__text" && - $segname eq "__TEXT")) { - next; - } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { - $vma = $1; - } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { - $size = $1; - } elsif ($line =~ /\boffset ([0-9]+)/) { - $file_offset = sprintf("%016x", $1); - } - if (defined($vma) && defined($size) && defined($file_offset)) { - last; - } - } - close(OTOOL); - - if (!defined($vma) || !defined($size) || !defined($file_offset)) { - return undef; - } - - my $r = {}; - $r->{size} = $size; - $r->{vma} = $vma; - $r->{file_offset} = $file_offset; - - return $r; -} - -sub ParseTextSectionHeader { - # obj_tool_map("otool") is only defined if we're in a Mach-O environment - if (defined($obj_tool_map{"otool"})) { - my $r = ParseTextSectionHeaderFromOtool(@_); - if (defined($r)){ - return $r; - } - } - # If otool doesn't work, or we don't have it, fall back to objdump - return ParseTextSectionHeaderFromObjdump(@_); -} - -# Split /proc/pid/maps dump into a list of libraries -sub ParseLibraries { - return if $main::use_symbol_page; # We don't need libraries info. - my $prog = shift; - my $map = shift; - my $pcs = shift; - - my $result = []; - my $h = "[a-f0-9]+"; - my $zero_offset = HexExtend("0"); - - my $buildvar = ""; - foreach my $l (split("\n", $map)) { - if ($l =~ m/^\s*build=(.*)$/) { - $buildvar = $1; - } - - my $start; - my $finish; - my $offset; - my $lib; - if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { - # Full line from /proc/self/maps. Example: - # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so - $start = HexExtend($1); - $finish = HexExtend($2); - $offset = HexExtend($3); - $lib = $4; - $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths - } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { - # Cooked line from DumpAddressMap. Example: - # 40000000-40015000: /lib/ld-2.3.2.so - $start = HexExtend($1); - $finish = HexExtend($2); - $offset = $zero_offset; - $lib = $3; - } else { - next; - } - - # Expand "$build" variable if available - $lib =~ s/\$build\b/$buildvar/g; - - $lib = FindLibrary($lib); - - # Check for pre-relocated libraries, which use pre-relocated symbol tables - # and thus require adjusting the offset that we'll use to translate - # VM addresses into symbol table addresses. - # Only do this if we're not going to fetch the symbol table from a - # debugging copy of the library. - if (!DebuggingLibrary($lib)) { - my $text = ParseTextSectionHeader($lib); - if (defined($text)) { - my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); - $offset = AddressAdd($offset, $vma_offset); - } - } - - push(@{$result}, [$lib, $start, $finish, $offset]); - } - - # Append special entry for additional library (not relocated) - if ($main::opt_lib ne "") { - my $text = ParseTextSectionHeader($main::opt_lib); - if (defined($text)) { - my $start = $text->{vma}; - my $finish = AddressAdd($start, $text->{size}); - - push(@{$result}, [$main::opt_lib, $start, $finish, $start]); - } - } - - # Append special entry for the main program. This covers - # 0..max_pc_value_seen, so that we assume pc values not found in one - # of the library ranges will be treated as coming from the main - # program binary. - my $min_pc = HexExtend("0"); - my $max_pc = $min_pc; # find the maximal PC value in any sample - foreach my $pc (keys(%{$pcs})) { - if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } - } - push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); - - return $result; -} - -# Add two hex addresses of length $address_length. -# Run pprof --test for unit test if this is changed. -sub AddressAdd { - my $addr1 = shift; - my $addr2 = shift; - my $sum; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); - return sprintf("%08x", $sum); - - } else { - # Do the addition in 7-nibble chunks to trivialize carry handling. - - if ($main::opt_debug and $main::opt_test) { - print STDERR "AddressAdd $addr1 + $addr2 = "; - } - - my $a1 = substr($addr1,-7); - $addr1 = substr($addr1,0,-7); - my $a2 = substr($addr2,-7); - $addr2 = substr($addr2,0,-7); - $sum = hex($a1) + hex($a2); - my $c = 0; - if ($sum > 0xfffffff) { - $c = 1; - $sum -= 0x10000000; - } - my $r = sprintf("%07x", $sum); - - $a1 = substr($addr1,-7); - $addr1 = substr($addr1,0,-7); - $a2 = substr($addr2,-7); - $addr2 = substr($addr2,0,-7); - $sum = hex($a1) + hex($a2) + $c; - $c = 0; - if ($sum > 0xfffffff) { - $c = 1; - $sum -= 0x10000000; - } - $r = sprintf("%07x", $sum) . $r; - - $sum = hex($addr1) + hex($addr2) + $c; - if ($sum > 0xff) { $sum -= 0x100; } - $r = sprintf("%02x", $sum) . $r; - - if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } - - return $r; - } -} - - -# Subtract two hex addresses of length $address_length. -# Run pprof --test for unit test if this is changed. -sub AddressSub { - my $addr1 = shift; - my $addr2 = shift; - my $diff; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); - return sprintf("%08x", $diff); - - } else { - # Do the addition in 7-nibble chunks to trivialize borrow handling. - # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } - - my $a1 = hex(substr($addr1,-7)); - $addr1 = substr($addr1,0,-7); - my $a2 = hex(substr($addr2,-7)); - $addr2 = substr($addr2,0,-7); - my $b = 0; - if ($a2 > $a1) { - $b = 1; - $a1 += 0x10000000; - } - $diff = $a1 - $a2; - my $r = sprintf("%07x", $diff); - - $a1 = hex(substr($addr1,-7)); - $addr1 = substr($addr1,0,-7); - $a2 = hex(substr($addr2,-7)) + $b; - $addr2 = substr($addr2,0,-7); - $b = 0; - if ($a2 > $a1) { - $b = 1; - $a1 += 0x10000000; - } - $diff = $a1 - $a2; - $r = sprintf("%07x", $diff) . $r; - - $a1 = hex($addr1); - $a2 = hex($addr2) + $b; - if ($a2 > $a1) { $a1 += 0x100; } - $diff = $a1 - $a2; - $r = sprintf("%02x", $diff) . $r; - - # if ($main::opt_debug) { print STDERR "$r\n"; } - - return $r; - } -} - -# Increment a hex addresses of length $address_length. -# Run pprof --test for unit test if this is changed. -sub AddressInc { - my $addr = shift; - my $sum; - - if ($address_length == 8) { - # Perl doesn't cope with wraparound arithmetic, so do it explicitly: - $sum = (hex($addr)+1) % (0x10000000 * 16); - return sprintf("%08x", $sum); - - } else { - # Do the addition in 7-nibble chunks to trivialize carry handling. - # We are always doing this to step through the addresses in a function, - # and will almost never overflow the first chunk, so we check for this - # case and exit early. - - # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } - - my $a1 = substr($addr,-7); - $addr = substr($addr,0,-7); - $sum = hex($a1) + 1; - my $r = sprintf("%07x", $sum); - if ($sum <= 0xfffffff) { - $r = $addr . $r; - # if ($main::opt_debug) { print STDERR "$r\n"; } - return HexExtend($r); - } else { - $r = "0000000"; - } - - $a1 = substr($addr,-7); - $addr = substr($addr,0,-7); - $sum = hex($a1) + 1; - $r = sprintf("%07x", $sum) . $r; - if ($sum <= 0xfffffff) { - $r = $addr . $r; - # if ($main::opt_debug) { print STDERR "$r\n"; } - return HexExtend($r); - } else { - $r = "00000000000000"; - } - - $sum = hex($addr) + 1; - if ($sum > 0xff) { $sum -= 0x100; } - $r = sprintf("%02x", $sum) . $r; - - # if ($main::opt_debug) { print STDERR "$r\n"; } - return $r; - } -} - -# Extract symbols for all PC values found in profile -sub ExtractSymbols { - my $libs = shift; - my $pcset = shift; - - my $symbols = {}; - - # Map each PC value to the containing library. To make this faster, - # we sort libraries by their starting pc value (highest first), and - # advance through the libraries as we advance the pc. Sometimes the - # addresses of libraries may overlap with the addresses of the main - # binary, so to make sure the libraries 'win', we iterate over the - # libraries in reverse order (which assumes the binary doesn't start - # in the middle of a library, which seems a fair assumption). - my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings - foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { - my $libname = $lib->[0]; - my $start = $lib->[1]; - my $finish = $lib->[2]; - my $offset = $lib->[3]; - - # Get list of pcs that belong in this library. - my $contained = []; - my ($start_pc_index, $finish_pc_index); - # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. - for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; - $finish_pc_index--) { - last if $pcs[$finish_pc_index - 1] le $finish; - } - # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. - for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; - $start_pc_index--) { - last if $pcs[$start_pc_index - 1] lt $start; - } - # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, - # in case there are overlaps in libraries and the main binary. - @{$contained} = splice(@pcs, $start_pc_index, - $finish_pc_index - $start_pc_index); - # Map to symbols - MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); - } - - return $symbols; -} - -# Map list of PC values to symbols for a given image -sub MapToSymbols { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - my $debug = 0; - - # Ignore empty binaries - if ($#{$pclist} < 0) { return; } - - # Figure out the addr2line command to use - my $addr2line = $obj_tool_map{"addr2line"}; - my $cmd = "$addr2line -f -C -e $image"; - if (exists $obj_tool_map{"addr2line_pdb"}) { - $addr2line = $obj_tool_map{"addr2line_pdb"}; - $cmd = "$addr2line --demangle -f -C -e $image"; - } - - # If "addr2line" isn't installed on the system at all, just use - # nm to get what info we can (function names, but not line numbers). - if (system("$addr2line --help >/dev/null 2>&1") != 0) { - MapSymbolsWithNM($image, $offset, $pclist, $symbols); - return; - } - - # "addr2line -i" can produce a variable number of lines per input - # address, with no separator that allows us to tell when data for - # the next address starts. So we find the address for a special - # symbol (_fini) and interleave this address between all real - # addresses passed to addr2line. The name of this special symbol - # can then be used as a separator. - $sep_address = undef; # May be filled in by MapSymbolsWithNM() - my $nm_symbols = {}; - MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); - # TODO(csilvers): only add '-i' if addr2line supports it. - if (defined($sep_address)) { - # Only add " -i" to addr2line if the binary supports it. - # addr2line --help returns 0, but not if it sees an unknown flag first. - if (system("$cmd -i --help >/dev/null 2>&1") == 0) { - $cmd .= " -i"; - } else { - $sep_address = undef; # no need for sep_address if we don't support -i - } - } - - # Make file with all PC values with intervening 'sep_address' so - # that we can reliably detect the end of inlined function list - open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); - if ($debug) { print("---- $image ---\n"); } - for (my $i = 0; $i <= $#{$pclist}; $i++) { - # addr2line always reads hex addresses, and does not need '0x' prefix. - if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } - printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); - if (defined($sep_address)) { - printf ADDRESSES ("%s\n", $sep_address); - } - } - close(ADDRESSES); - if ($debug) { - print("----\n"); - system("cat $main::tmpfile_sym"); - print("----\n"); - system("$cmd <$main::tmpfile_sym"); - print("----\n"); - } - - open(SYMBOLS, "$cmd <$main::tmpfile_sym |") || error("$cmd: $!\n"); - my $count = 0; # Index in pclist - while () { - # Read fullfunction and filelineinfo from next pair of lines - s/\r?\n$//g; - my $fullfunction = $_; - $_ = ; - s/\r?\n$//g; - my $filelinenum = $_; - - if (defined($sep_address) && $fullfunction eq $sep_symbol) { - # Terminating marker for data for this address - $count++; - next; - } - - $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths - - my $pcstr = $pclist->[$count]; - my $function = ShortFunctionName($fullfunction); - if ($fullfunction eq '??') { - # See if nm found a symbol - my $nms = $nm_symbols->{$pcstr}; - if (defined($nms)) { - $function = $nms->[0]; - $fullfunction = $nms->[2]; - } - } - - # Prepend to accumulated symbols for pcstr - # (so that caller comes before callee) - my $sym = $symbols->{$pcstr}; - if (!defined($sym)) { - $sym = []; - $symbols->{$pcstr} = $sym; - } - unshift(@{$sym}, $function, $filelinenum, $fullfunction); - if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } - if (!defined($sep_address)) { - # Inlining is off, se this entry ends immediately - $count++; - } - } - close(SYMBOLS); -} - -# Use nm to map the list of referenced PCs to symbols. Return true iff we -# are able to read procedure information via nm. -sub MapSymbolsWithNM { - my $image = shift; - my $offset = shift; - my $pclist = shift; - my $symbols = shift; - - # Get nm output sorted by increasing address - my $symbol_table = GetProcedureBoundaries($image, "."); - if (!%{$symbol_table}) { - return 0; - } - # Start addresses are already the right length (8 or 16 hex digits). - my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } - keys(%{$symbol_table}); - - if ($#names < 0) { - # No symbols: just use addresses - foreach my $pc (@{$pclist}) { - my $pcstr = "0x" . $pc; - $symbols->{$pc} = [$pcstr, "?", $pcstr]; - } - return 0; - } - - # Sort addresses so we can do a join against nm output - my $index = 0; - my $fullname = $names[0]; - my $name = ShortFunctionName($fullname); - foreach my $pc (sort { $a cmp $b } @{$pclist}) { - # Adjust for mapped offset - my $mpc = AddressSub($pc, $offset); - while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ - $index++; - $fullname = $names[$index]; - $name = ShortFunctionName($fullname); - } - if ($mpc lt $symbol_table->{$fullname}->[1]) { - $symbols->{$pc} = [$name, "?", $fullname]; - } else { - my $pcstr = "0x" . $pc; - $symbols->{$pc} = [$pcstr, "?", $pcstr]; - } - } - return 1; -} - -sub ShortFunctionName { - my $function = shift; - while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types - while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments - $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type - return $function; -} - -##### Miscellaneous ##### - -# Find the right versions of the above object tools to use. The -# argument is the program file being analyzed, and should be an ELF -# 32-bit or ELF 64-bit executable file. The location of the tools -# is determined by considering the following options in this order: -# 1) --tools option, if set -# 2) PPROF_TOOLS environment variable, if set -# 3) the environment -sub ConfigureObjTools { - my $prog_file = shift; - - # Check for the existence of $prog_file because /usr/bin/file does not - # predictably return error status in prod. - (-e $prog_file) || error("$prog_file does not exist.\n"); - - # Follow symlinks (at least for systems where "file" supports that) - my $file_type = `/usr/bin/file -L $prog_file 2>/dev/null || /usr/bin/file $prog_file`; - if ($file_type =~ /64-bit/) { - # Change $address_length to 16 if the program file is ELF 64-bit. - # We can't detect this from many (most?) heap or lock contention - # profiles, since the actual addresses referenced are generally in low - # memory even for 64-bit programs. - $address_length = 16; - } - - if ($file_type =~ /MS Windows/) { - # For windows, we provide a version of nm and addr2line as part of - # the opensource release, which is capable of parsing - # Windows-style PDB executables. It should live in the path, or - # in the same directory as pprof. - $obj_tool_map{"nm_pdb"} = "nm-pdb"; - $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; - } - - if ($file_type =~ /Mach-O/) { - # OS X uses otool to examine Mach-O files, rather than objdump. - $obj_tool_map{"otool"} = "otool"; - $obj_tool_map{"addr2line"} = "false"; # no addr2line - $obj_tool_map{"objdump"} = "false"; # no objdump - } - - # Go fill in %obj_tool_map with the pathnames to use: - foreach my $tool (keys %obj_tool_map) { - $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); - } -} - -# Returns the path of a caller-specified object tool. If --tools or -# PPROF_TOOLS are specified, then returns the full path to the tool -# with that prefix. Otherwise, returns the path unmodified (which -# means we will look for it on PATH). -sub ConfigureTool { - my $tool = shift; - my $path; - - # --tools (or $PPROF_TOOLS) is a comma separated list, where each - # item is either a) a pathname prefix, or b) a map of the form - # :. First we look for an entry of type (b) for our - # tool. If one is found, we use it. Otherwise, we consider all the - # pathname prefixes in turn, until one yields an existing file. If - # none does, we use a default path. - my $tools = $main::opt_tools || $ENV{"PPROF_TOOLS"} || ""; - if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { - $path = $2; - # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. - } elsif ($tools ne '') { - foreach my $prefix (split(',', $tools)) { - next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list - if (-x $prefix . $tool) { - $path = $prefix . $tool; - last; - } - } - if (!$path) { - error("No '$tool' found with prefix specified by " . - "--tools (or \$PPROF_TOOLS) '$tools'\n"); - } - } else { - # ... otherwise use the version that exists in the same directory as - # pprof. If there's nothing there, use $PATH. - $0 =~ m,[^/]*$,; # this is everything after the last slash - my $dirname = $`; # this is everything up to and including the last slash - if (-x "$dirname$tool") { - $path = "$dirname$tool"; - } else { - $path = $tool; - } - } - if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } - return $path; -} - -sub cleanup { - unlink($main::tmpfile_sym); - unlink(keys %main::tempnames); - - # We leave any collected profiles in $HOME/pprof in case the user wants - # to look at them later. We print a message informing them of this. - if ((scalar(@main::profile_files) > 0) && - defined($main::collected_profile)) { - if (scalar(@main::profile_files) == 1) { - print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; - } - print STDERR "If you want to investigate this profile further, you can do:\n"; - print STDERR "\n"; - print STDERR " pprof \\\n"; - print STDERR " $main::prog \\\n"; - print STDERR " $main::collected_profile\n"; - print STDERR "\n"; - } -} - -sub sighandler { - cleanup(); - exit(1); -} - -sub error { - my $msg = shift; - print STDERR $msg; - cleanup(); - exit(1); -} - - -# Run $nm_command and get all the resulting procedure boundaries whose -# names match "$regexp" and returns them in a hashtable mapping from -# procedure name to a two-element vector of [start address, end address] -sub GetProcedureBoundariesViaNm { - my $nm_command = shift; - my $regexp = shift; - - my $symbol_table = {}; - open(NM, "$nm_command |") || error("$nm_command: $!\n"); - my $last_start = "0"; - my $routine = ""; - while () { - s/\r//g; # turn windows-looking lines into unix-looking lines - if (m/^\s*([0-9a-f]+) (.) (..*)/) { - my $start_val = $1; - my $type = $2; - my $this_routine = $3; - - # It's possible for two symbols to share the same address, if - # one is a zero-length variable (like __start_google_malloc) or - # one symbol is a weak alias to another (like __libc_malloc). - # In such cases, we want to ignore all values except for the - # actual symbol, which in nm-speak has type "T". The logic - # below does this, though it's a bit tricky: what happens when - # we have a series of lines with the same address, is the first - # one gets queued up to be processed. However, it won't - # *actually* be processed until later, when we read a line with - # a different address. That means that as long as we're reading - # lines with the same address, we have a chance to replace that - # item in the queue, which we do whenever we see a 'T' entry -- - # that is, a line with type 'T'. If we never see a 'T' entry, - # we'll just go ahead and process the first entry (which never - # got touched in the queue), and ignore the others. - if ($start_val eq $last_start && $type =~ /t/i) { - # We are the 'T' symbol at this address, replace previous symbol. - $routine = $this_routine; - next; - } elsif ($start_val eq $last_start) { - # We're not the 'T' symbol at this address, so ignore us. - next; - } - - if ($this_routine eq $sep_symbol) { - $sep_address = HexExtend($start_val); - } - - # Tag this routine with the starting address in case the image - # has multiple occurrences of this routine. We use a syntax - # that resembles template paramters that are automatically - # stripped out by ShortFunctionName() - $this_routine .= "<$start_val>"; - - if (defined($routine) && $routine =~ m/$regexp/) { - $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($start_val)]; - } - $last_start = $start_val; - $routine = $this_routine; - } elsif (m/^Loaded image name: (.+)/) { - # The win32 nm workalike emits information about the binary it is using. - if ($main::opt_debug) { print STDERR "Using Image $1\n"; } - } elsif (m/^PDB file name: (.+)/) { - # The win32 nm workalike emits information about the pdb it is using. - if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } - } - } - close(NM); - # Handle the last line in the nm output. Unfortunately, we don't know - # how big this last symbol is, because we don't know how big the file - # is. For now, we just give it a size of 0. - # TODO(csilvers): do better here. - if (defined($routine) && $routine =~ m/$regexp/) { - $symbol_table->{$routine} = [HexExtend($last_start), - HexExtend($last_start)]; - } - return $symbol_table; -} - -# Gets the procedure boundaries for all routines in "$image" whose names -# match "$regexp" and returns them in a hashtable mapping from procedure -# name to a two-element vector of [start address, end address]. -# Will return an empty map if nm is not installed or not working properly. -sub GetProcedureBoundaries { - my $image = shift; - my $regexp = shift; - - # For libc libraries, the copy in /usr/lib/debug contains debugging symbols - my $debugging = DebuggingLibrary($image); - if ($debugging) { - $image = $debugging; - } - - my $nm = $obj_tool_map{"nm"}; - my $cppfilt = $obj_tool_map{"c++filt"}; - - # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm - # binary doesn't support --demangle. In addition, for OS X we need - # to use the -f flag to get 'flat' nm output (otherwise we don't sort - # properly and get incorrect results). Unfortunately, GNU nm uses -f - # in an incompatible way. So first we test whether our nm supports - # --demangle and -f. - my $demangle_flag = ""; - my $cppfilt_flag = ""; - if (system("$nm --demangle $image >/dev/null 2>&1") == 0) { - # In this mode, we do "nm --demangle " - $demangle_flag = "--demangle"; - $cppfilt_flag = ""; - } elsif (system("$cppfilt $image >/dev/null 2>&1") == 0) { - # In this mode, we do "nm | c++filt" - $cppfilt_flag = " | $cppfilt"; - }; - my $flatten_flag = ""; - if (system("$nm -f $image >/dev/null 2>&1") == 0) { - $flatten_flag = "-f"; - } - - # Finally, in the case $imagie isn't a debug library, we try again with - # -D to at least get *exported* symbols. If we can't use --demangle, - # we use c++filt instead, if it exists on this system. - my @nm_commands = ("$nm -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - "$nm -D -n $flatten_flag $demangle_flag" . - " $image 2>/dev/null $cppfilt_flag", - # 6nm is for Go binaries - "6nm $image 2>/dev/null | sort", - ); - - # If the executable is an MS Windows PDB-format executable, we'll - # have set up obj_tool_map("nm_pdb"). In this case, we actually - # want to use both unix nm and windows-specific nm_pdb, since - # PDB-format executables can apparently include dwarf .o files. - if (exists $obj_tool_map{"nm_pdb"}) { - my $nm_pdb = $obj_tool_map{"nm_pdb"}; - push(@nm_commands, "$nm_pdb --demangle $image 2>/dev/null"); - } - - foreach my $nm_command (@nm_commands) { - my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp); - return $symbol_table if (%{$symbol_table}); - } - my $symbol_table = {}; - return $symbol_table; -} - - -# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. -# To make them more readable, we add underscores at interesting places. -# This routine removes the underscores, producing the canonical representation -# used by pprof to represent addresses, particularly in the tested routines. -sub CanonicalHex { - my $arg = shift; - return join '', (split '_',$arg); -} - - -# Unit test for AddressAdd: -sub AddressAddUnitTest { - my $test_data_8 = shift; - my $test_data_16 = shift; - my $error_count = 0; - my $fail_count = 0; - my $pass_count = 0; - # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; - - # First a few 8-nibble addresses. Note that this implementation uses - # plain old arithmetic, so a quick sanity check along with verifying what - # happens to overflow (we want it to wrap): - $address_length = 8; - foreach my $row (@{$test_data_8}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressAdd ($row->[0], $row->[1]); - if ($sum ne $row->[2]) { - printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, - $row->[0], $row->[1], $row->[2]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count = $fail_count; - $fail_count = 0; - $pass_count = 0; - - # Now 16-nibble addresses. - $address_length = 16; - foreach my $row (@{$test_data_16}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); - my $expected = join '', (split '_',$row->[2]); - if ($sum ne CanonicalHex($row->[2])) { - printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, - $row->[0], $row->[1], $row->[2]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count += $fail_count; - - return $error_count; -} - - -# Unit test for AddressSub: -sub AddressSubUnitTest { - my $test_data_8 = shift; - my $test_data_16 = shift; - my $error_count = 0; - my $fail_count = 0; - my $pass_count = 0; - # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; - - # First a few 8-nibble addresses. Note that this implementation uses - # plain old arithmetic, so a quick sanity check along with verifying what - # happens to overflow (we want it to wrap): - $address_length = 8; - foreach my $row (@{$test_data_8}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressSub ($row->[0], $row->[1]); - if ($sum ne $row->[3]) { - printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, - $row->[0], $row->[1], $row->[3]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count = $fail_count; - $fail_count = 0; - $pass_count = 0; - - # Now 16-nibble addresses. - $address_length = 16; - foreach my $row (@{$test_data_16}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); - if ($sum ne CanonicalHex($row->[3])) { - printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, - $row->[0], $row->[1], $row->[3]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count += $fail_count; - - return $error_count; -} - - -# Unit test for AddressInc: -sub AddressIncUnitTest { - my $test_data_8 = shift; - my $test_data_16 = shift; - my $error_count = 0; - my $fail_count = 0; - my $pass_count = 0; - # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; - - # First a few 8-nibble addresses. Note that this implementation uses - # plain old arithmetic, so a quick sanity check along with verifying what - # happens to overflow (we want it to wrap): - $address_length = 8; - foreach my $row (@{$test_data_8}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressInc ($row->[0]); - if ($sum ne $row->[4]) { - printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, - $row->[0], $row->[4]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count = $fail_count; - $fail_count = 0; - $pass_count = 0; - - # Now 16-nibble addresses. - $address_length = 16; - foreach my $row (@{$test_data_16}) { - if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } - my $sum = AddressInc (CanonicalHex($row->[0])); - if ($sum ne CanonicalHex($row->[4])) { - printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, - $row->[0], $row->[4]; - ++$fail_count; - } else { - ++$pass_count; - } - } - printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", - $pass_count, $fail_count; - $error_count += $fail_count; - - return $error_count; -} - - -# Driver for unit tests. -# Currently just the address add/subtract/increment routines for 64-bit. -sub RunUnitTests { - my $error_count = 0; - - # This is a list of tuples [a, b, a+b, a-b, a+1] - my $unit_test_data_8 = [ - [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], - [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], - [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], - [qw(00000001 ffffffff 00000000 00000002 00000002)], - [qw(00000001 fffffff0 fffffff1 00000011 00000002)], - ]; - my $unit_test_data_16 = [ - # The implementation handles data in 7-nibble chunks, so those are the - # interesting boundaries. - [qw(aaaaaaaa 50505050 - 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], - [qw(50505050 aaaaaaaa - 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], - [qw(ffffffff aaaaaaaa - 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], - [qw(00000001 ffffffff - 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], - [qw(00000001 fffffff0 - 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], - - [qw(00_a00000a_aaaaaaa 50505050 - 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], - [qw(0f_fff0005_0505050 aaaaaaaa - 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], - [qw(00_000000f_fffffff 01_800000a_aaaaaaa - 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], - [qw(00_0000000_0000001 ff_fffffff_fffffff - 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], - [qw(00_0000000_0000001 ff_fffffff_ffffff0 - ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], - ]; - - $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); - $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); - $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); - if ($error_count > 0) { - print STDERR $error_count, " errors: FAILED\n"; - } else { - print STDERR "PASS\n"; - } - exit ($error_count); -} diff --git a/jemalloc/config.guess b/jemalloc/config.guess deleted file mode 100755 index 0773d0f..0000000 --- a/jemalloc/config.guess +++ /dev/null @@ -1,1456 +0,0 @@ -#! /bin/sh -# Attempt to guess a canonical system name. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. - -timestamp='2004-03-03' - -# This file is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Originally written by Per Bothner . -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. -# -# This script attempts to guess a canonical system name similar to -# config.sub. If it succeeds, it prints the system name on stdout, and -# exits with 0. Otherwise, it exits with 1. -# -# The plan is that this can be called by configure scripts if you -# don't specify an explicit build system type. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] - -Output the configuration name of the system \`$me' is run on. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.guess ($timestamp) - -Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; - --version | -v ) - echo "$version" ; exit 0 ;; - --help | --h* | -h ) - echo "$usage"; exit 0 ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" >&2 - exit 1 ;; - * ) - break ;; - esac -done - -if test $# != 0; then - echo "$me: too many arguments$help" >&2 - exit 1 -fi - -trap 'exit 1' 1 2 15 - -# CC_FOR_BUILD -- compiler used by this script. Note that the use of a -# compiler to aid in system detection is discouraged as it requires -# temporary files to be created and, as you can see below, it is a -# headache to deal with in a portable fashion. - -# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still -# use `HOST_CC' if defined, but it is deprecated. - -# Portable tmp directory creation inspired by the Autoconf team. - -set_cc_for_build=' -trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; -trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; -: ${TMPDIR=/tmp} ; - { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || - { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || - { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || - { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; -dummy=$tmp/dummy ; -tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; -case $CC_FOR_BUILD,$HOST_CC,$CC in - ,,) echo "int x;" > $dummy.c ; - for c in cc gcc c89 c99 ; do - if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then - CC_FOR_BUILD="$c"; break ; - fi ; - done ; - if test x"$CC_FOR_BUILD" = x ; then - CC_FOR_BUILD=no_compiler_found ; - fi - ;; - ,,*) CC_FOR_BUILD=$CC ;; - ,*,*) CC_FOR_BUILD=$HOST_CC ;; -esac ;' - -# This is needed to find uname on a Pyramid OSx when run in the BSD universe. -# (ghazi@noc.rutgers.edu 1994-08-24) -if (test -f /.attbin/uname) >/dev/null 2>&1 ; then - PATH=$PATH:/.attbin ; export PATH -fi - -UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown -UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown -UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown -UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown - -# Note: order is significant - the case branches are not exclusive. - -case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in - *:NetBSD:*:*) - # NetBSD (nbsd) targets should (where applicable) match one or - # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, - # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently - # switched to ELF, *-*-netbsd* would select the old - # object file format. This provides both forward - # compatibility and a consistent mechanism for selecting the - # object file format. - # - # Note: NetBSD doesn't particularly care about the vendor - # portion of the name. We always set it to "unknown". - sysctl="sysctl -n hw.machine_arch" - UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ - /usr/sbin/$sysctl 2>/dev/null || echo unknown)` - case "${UNAME_MACHINE_ARCH}" in - armeb) machine=armeb-unknown ;; - arm*) machine=arm-unknown ;; - sh3el) machine=shl-unknown ;; - sh3eb) machine=sh-unknown ;; - *) machine=${UNAME_MACHINE_ARCH}-unknown ;; - esac - # The Operating System including object format, if it has switched - # to ELF recently, or will in the future. - case "${UNAME_MACHINE_ARCH}" in - arm*|i386|m68k|ns32k|sh3*|sparc|vax) - eval $set_cc_for_build - if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ - | grep __ELF__ >/dev/null - then - # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). - # Return netbsd for either. FIX? - os=netbsd - else - os=netbsdelf - fi - ;; - *) - os=netbsd - ;; - esac - # The OS release - # Debian GNU/NetBSD machines have a different userland, and - # thus, need a distinct triplet. However, they do not need - # kernel version information, so it can be replaced with a - # suitable tag, in the style of linux-gnu. - case "${UNAME_VERSION}" in - Debian*) - release='-gnu' - ;; - *) - release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` - ;; - esac - # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: - # contains redundant information, the shorter form: - # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. - echo "${machine}-${os}${release}" - exit 0 ;; - amd64:OpenBSD:*:*) - echo x86_64-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - amiga:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - cats:OpenBSD:*:*) - echo arm-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - hp300:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mac68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - macppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme68k:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvme88k:OpenBSD:*:*) - echo m88k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - mvmeppc:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pegasos:OpenBSD:*:*) - echo powerpc-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sgi:OpenBSD:*:*) - echo mipseb-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - sun3:OpenBSD:*:*) - echo m68k-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - *:OpenBSD:*:*) - echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; - *:ekkoBSD:*:*) - echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} - exit 0 ;; - macppc:MirBSD:*:*) - echo powerppc-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; - *:MirBSD:*:*) - echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} - exit 0 ;; - alpha:OSF1:*:*) - case $UNAME_RELEASE in - *4.0) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - ;; - *5.*) - UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` - ;; - esac - # According to Compaq, /usr/sbin/psrinfo has been available on - # OSF/1 and Tru64 systems produced since 1995. I hope that - # covers most systems running today. This code pipes the CPU - # types through head -n 1, so we only detect the type of CPU 0. - ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` - case "$ALPHA_CPU_TYPE" in - "EV4 (21064)") - UNAME_MACHINE="alpha" ;; - "EV4.5 (21064)") - UNAME_MACHINE="alpha" ;; - "LCA4 (21066/21068)") - UNAME_MACHINE="alpha" ;; - "EV5 (21164)") - UNAME_MACHINE="alphaev5" ;; - "EV5.6 (21164A)") - UNAME_MACHINE="alphaev56" ;; - "EV5.6 (21164PC)") - UNAME_MACHINE="alphapca56" ;; - "EV5.7 (21164PC)") - UNAME_MACHINE="alphapca57" ;; - "EV6 (21264)") - UNAME_MACHINE="alphaev6" ;; - "EV6.7 (21264A)") - UNAME_MACHINE="alphaev67" ;; - "EV6.8CB (21264C)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8AL (21264B)") - UNAME_MACHINE="alphaev68" ;; - "EV6.8CX (21264D)") - UNAME_MACHINE="alphaev68" ;; - "EV6.9A (21264/EV69A)") - UNAME_MACHINE="alphaev69" ;; - "EV7 (21364)") - UNAME_MACHINE="alphaev7" ;; - "EV7.9 (21364A)") - UNAME_MACHINE="alphaev79" ;; - esac - # A Pn.n version is a patched version. - # A Vn.n version is a released version. - # A Tn.n version is a released field test version. - # A Xn.n version is an unreleased experimental baselevel. - # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit 0 ;; - Alpha*:OpenVMS:*:*) - echo alpha-hp-vms - exit 0 ;; - Alpha\ *:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # Should we change UNAME_MACHINE based on the output of uname instead - # of the specific Alpha model? - echo alpha-pc-interix - exit 0 ;; - 21064:Windows_NT:50:3) - echo alpha-dec-winnt3.5 - exit 0 ;; - Amiga*:UNIX_System_V:4.0:*) - echo m68k-unknown-sysv4 - exit 0;; - *:[Aa]miga[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-amigaos - exit 0 ;; - *:[Mm]orph[Oo][Ss]:*:*) - echo ${UNAME_MACHINE}-unknown-morphos - exit 0 ;; - *:OS/390:*:*) - echo i370-ibm-openedition - exit 0 ;; - *:OS400:*:*) - echo powerpc-ibm-os400 - exit 0 ;; - arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) - echo arm-acorn-riscix${UNAME_RELEASE} - exit 0;; - SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) - echo hppa1.1-hitachi-hiuxmpp - exit 0;; - Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) - # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. - if test "`(/bin/universe) 2>/dev/null`" = att ; then - echo pyramid-pyramid-sysv3 - else - echo pyramid-pyramid-bsd - fi - exit 0 ;; - NILE*:*:*:dcosx) - echo pyramid-pyramid-svr4 - exit 0 ;; - DRS?6000:unix:4.0:6*) - echo sparc-icl-nx6 - exit 0 ;; - DRS?6000:UNIX_SV:4.2*:7*) - case `/usr/bin/uname -p` in - sparc) echo sparc-icl-nx7 && exit 0 ;; - esac ;; - sun4H:SunOS:5.*:*) - echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) - echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - i86pc:SunOS:5.*:*) - echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:6*:*) - # According to config.sub, this is the proper way to canonicalize - # SunOS6. Hard to guess exactly what SunOS6 will be like, but - # it's likely to be more like Solaris than SunOS4. - echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - sun4*:SunOS:*:*) - case "`/usr/bin/arch -k`" in - Series*|S4*) - UNAME_RELEASE=`uname -v` - ;; - esac - # Japanese Language versions have a version number like `4.1.3-JL'. - echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` - exit 0 ;; - sun3*:SunOS:*:*) - echo m68k-sun-sunos${UNAME_RELEASE} - exit 0 ;; - sun*:*:4.2BSD:*) - UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 - case "`/bin/arch`" in - sun3) - echo m68k-sun-sunos${UNAME_RELEASE} - ;; - sun4) - echo sparc-sun-sunos${UNAME_RELEASE} - ;; - esac - exit 0 ;; - aushp:SunOS:*:*) - echo sparc-auspex-sunos${UNAME_RELEASE} - exit 0 ;; - # The situation for MiNT is a little confusing. The machine name - # can be virtually everything (everything which is not - # "atarist" or "atariste" at least should have a processor - # > m68000). The system name ranges from "MiNT" over "FreeMiNT" - # to the lowercase version "mint" (or "freemint"). Finally - # the system name "TOS" denotes a system which is actually not - # MiNT. But MiNT is downward compatible to TOS, so this should - # be no problem. - atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) - echo m68k-atari-mint${UNAME_RELEASE} - exit 0 ;; - milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) - echo m68k-milan-mint${UNAME_RELEASE} - exit 0 ;; - hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) - echo m68k-hades-mint${UNAME_RELEASE} - exit 0 ;; - *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) - echo m68k-unknown-mint${UNAME_RELEASE} - exit 0 ;; - m68k:machten:*:*) - echo m68k-apple-machten${UNAME_RELEASE} - exit 0 ;; - powerpc:machten:*:*) - echo powerpc-apple-machten${UNAME_RELEASE} - exit 0 ;; - RISC*:Mach:*:*) - echo mips-dec-mach_bsd4.3 - exit 0 ;; - RISC*:ULTRIX:*:*) - echo mips-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - VAX*:ULTRIX*:*:*) - echo vax-dec-ultrix${UNAME_RELEASE} - exit 0 ;; - 2020:CLIX:*:* | 2430:CLIX:*:*) - echo clipper-intergraph-clix${UNAME_RELEASE} - exit 0 ;; - mips:*:*:UMIPS | mips:*:*:RISCos) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c -#ifdef __cplusplus -#include /* for printf() prototype */ - int main (int argc, char *argv[]) { -#else - int main (argc, argv) int argc; char *argv[]; { -#endif - #if defined (host_mips) && defined (MIPSEB) - #if defined (SYSTYPE_SYSV) - printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_SVR4) - printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); - #endif - #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) - printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); - #endif - #endif - exit (-1); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c \ - && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \ - && exit 0 - echo mips-mips-riscos${UNAME_RELEASE} - exit 0 ;; - Motorola:PowerMAX_OS:*:*) - echo powerpc-motorola-powermax - exit 0 ;; - Motorola:*:4.3:PL8-*) - echo powerpc-harris-powermax - exit 0 ;; - Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) - echo powerpc-harris-powermax - exit 0 ;; - Night_Hawk:Power_UNIX:*:*) - echo powerpc-harris-powerunix - exit 0 ;; - m88k:CX/UX:7*:*) - echo m88k-harris-cxux7 - exit 0 ;; - m88k:*:4*:R4*) - echo m88k-motorola-sysv4 - exit 0 ;; - m88k:*:3*:R3*) - echo m88k-motorola-sysv3 - exit 0 ;; - AViiON:dgux:*:*) - # DG/UX returns AViiON for all architectures - UNAME_PROCESSOR=`/usr/bin/uname -p` - if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] - then - if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ - [ ${TARGET_BINARY_INTERFACE}x = x ] - then - echo m88k-dg-dgux${UNAME_RELEASE} - else - echo m88k-dg-dguxbcs${UNAME_RELEASE} - fi - else - echo i586-dg-dgux${UNAME_RELEASE} - fi - exit 0 ;; - M88*:DolphinOS:*:*) # DolphinOS (SVR3) - echo m88k-dolphin-sysv3 - exit 0 ;; - M88*:*:R3*:*) - # Delta 88k system running SVR3 - echo m88k-motorola-sysv3 - exit 0 ;; - XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) - echo m88k-tektronix-sysv3 - exit 0 ;; - Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) - echo m68k-tektronix-bsd - exit 0 ;; - *:IRIX*:*:*) - echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` - exit 0 ;; - ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. - echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id - exit 0 ;; # Note that: echo "'`uname -s`'" gives 'AIX ' - i*86:AIX:*:*) - echo i386-ibm-aix - exit 0 ;; - ia64:AIX:*:*) - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} - exit 0 ;; - *:AIX:2:3) - if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - - main() - { - if (!__power_pc()) - exit(1); - puts("powerpc-ibm-aix3.2.5"); - exit(0); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 - echo rs6000-ibm-aix3.2.5 - elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then - echo rs6000-ibm-aix3.2.4 - else - echo rs6000-ibm-aix3.2 - fi - exit 0 ;; - *:AIX:*:[45]) - IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` - if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then - IBM_ARCH=rs6000 - else - IBM_ARCH=powerpc - fi - if [ -x /usr/bin/oslevel ] ; then - IBM_REV=`/usr/bin/oslevel` - else - IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} - fi - echo ${IBM_ARCH}-ibm-aix${IBM_REV} - exit 0 ;; - *:AIX:*:*) - echo rs6000-ibm-aix - exit 0 ;; - ibmrt:4.4BSD:*|romp-ibm:BSD:*) - echo romp-ibm-bsd4.4 - exit 0 ;; - ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and - echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to - exit 0 ;; # report: romp-ibm BSD 4.3 - *:BOSX:*:*) - echo rs6000-bull-bosx - exit 0 ;; - DPX/2?00:B.O.S.:*:*) - echo m68k-bull-sysv3 - exit 0 ;; - 9000/[34]??:4.3bsd:1.*:*) - echo m68k-hp-bsd - exit 0 ;; - hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) - echo m68k-hp-bsd4.4 - exit 0 ;; - 9000/[34678]??:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - case "${UNAME_MACHINE}" in - 9000/31? ) HP_ARCH=m68000 ;; - 9000/[34]?? ) HP_ARCH=m68k ;; - 9000/[678][0-9][0-9]) - if [ -x /usr/bin/getconf ]; then - sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` - sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` - case "${sc_cpu_version}" in - 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 - 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 - 532) # CPU_PA_RISC2_0 - case "${sc_kernel_bits}" in - 32) HP_ARCH="hppa2.0n" ;; - 64) HP_ARCH="hppa2.0w" ;; - '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 - esac ;; - esac - fi - if [ "${HP_ARCH}" = "" ]; then - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - - #define _HPUX_SOURCE - #include - #include - - int main () - { - #if defined(_SC_KERNEL_BITS) - long bits = sysconf(_SC_KERNEL_BITS); - #endif - long cpu = sysconf (_SC_CPU_VERSION); - - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1"); break; - case CPU_PA_RISC2_0: - #if defined(_SC_KERNEL_BITS) - switch (bits) - { - case 64: puts ("hppa2.0w"); break; - case 32: puts ("hppa2.0n"); break; - default: puts ("hppa2.0"); break; - } break; - #else /* !defined(_SC_KERNEL_BITS) */ - puts ("hppa2.0"); break; - #endif - default: puts ("hppa1.0"); break; - } - exit (0); - } -EOF - (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` - test -z "$HP_ARCH" && HP_ARCH=hppa - fi ;; - esac - if [ ${HP_ARCH} = "hppa2.0w" ] - then - # avoid double evaluation of $set_cc_for_build - test -n "$CC_FOR_BUILD" || eval $set_cc_for_build - if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null - then - HP_ARCH="hppa2.0w" - else - HP_ARCH="hppa64" - fi - fi - echo ${HP_ARCH}-hp-hpux${HPUX_REV} - exit 0 ;; - ia64:HP-UX:*:*) - HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` - echo ia64-hp-hpux${HPUX_REV} - exit 0 ;; - 3050*:HI-UX:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - int - main () - { - long cpu = sysconf (_SC_CPU_VERSION); - /* The order matters, because CPU_IS_HP_MC68K erroneously returns - true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct - results, however. */ - if (CPU_IS_PA_RISC (cpu)) - { - switch (cpu) - { - case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; - case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; - case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; - default: puts ("hppa-hitachi-hiuxwe2"); break; - } - } - else if (CPU_IS_HP_MC68K (cpu)) - puts ("m68k-hitachi-hiuxwe2"); - else puts ("unknown-hitachi-hiuxwe2"); - exit (0); - } -EOF - $CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0 - echo unknown-hitachi-hiuxwe2 - exit 0 ;; - 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) - echo hppa1.1-hp-bsd - exit 0 ;; - 9000/8??:4.3bsd:*:*) - echo hppa1.0-hp-bsd - exit 0 ;; - *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) - echo hppa1.0-hp-mpeix - exit 0 ;; - hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) - echo hppa1.1-hp-osf - exit 0 ;; - hp8??:OSF1:*:*) - echo hppa1.0-hp-osf - exit 0 ;; - i*86:OSF1:*:*) - if [ -x /usr/sbin/sysversion ] ; then - echo ${UNAME_MACHINE}-unknown-osf1mk - else - echo ${UNAME_MACHINE}-unknown-osf1 - fi - exit 0 ;; - parisc*:Lites*:*:*) - echo hppa1.1-hp-lites - exit 0 ;; - C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) - echo c1-convex-bsd - exit 0 ;; - C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) - echo c34-convex-bsd - exit 0 ;; - C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) - echo c38-convex-bsd - exit 0 ;; - C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) - echo c4-convex-bsd - exit 0 ;; - CRAY*Y-MP:*:*:*) - echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*[A-Z]90:*:*:*) - echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ - | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ - -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ - -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*TS:*:*:*) - echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*T3E:*:*:*) - echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - CRAY*SV1:*:*:*) - echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - *:UNICOS/mp:*:*) - echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' - exit 0 ;; - F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` - echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; - 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` - echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" - exit 0 ;; - i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) - echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} - exit 0 ;; - sparc*:BSD/OS:*:*) - echo sparc-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; - *:BSD/OS:*:*) - echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} - exit 0 ;; - *:FreeBSD:*:*) - # Determine whether the default compiler uses glibc. - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #if __GLIBC__ >= 2 - LIBC=gnu - #else - LIBC= - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - # GNU/KFreeBSD systems have a "k" prefix to indicate we are using - # FreeBSD's kernel, but not the complete OS. - case ${LIBC} in gnu) kernel_only='k' ;; esac - echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC} - exit 0 ;; - i*:CYGWIN*:*) - echo ${UNAME_MACHINE}-pc-cygwin - exit 0 ;; - i*:MINGW*:*) - echo ${UNAME_MACHINE}-pc-mingw32 - exit 0 ;; - i*:PW*:*) - echo ${UNAME_MACHINE}-pc-pw32 - exit 0 ;; - x86:Interix*:[34]*) - echo i586-pc-interix${UNAME_RELEASE}|sed -e 's/\..*//' - exit 0 ;; - [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) - echo i${UNAME_MACHINE}-pc-mks - exit 0 ;; - i*:Windows_NT*:* | Pentium*:Windows_NT*:*) - # How do we know it's Interix rather than the generic POSIX subsystem? - # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we - # UNAME_MACHINE based on the output of uname instead of i386? - echo i586-pc-interix - exit 0 ;; - i*:UWIN*:*) - echo ${UNAME_MACHINE}-pc-uwin - exit 0 ;; - p*:CYGWIN*:*) - echo powerpcle-unknown-cygwin - exit 0 ;; - prep*:SunOS:5.*:*) - echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` - exit 0 ;; - *:GNU:*:*) - # the GNU system - echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` - exit 0 ;; - *:GNU/*:*:*) - # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu - exit 0 ;; - i*86:Minix:*:*) - echo ${UNAME_MACHINE}-pc-minix - exit 0 ;; - arm*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - cris:Linux:*:*) - echo cris-axis-linux-gnu - exit 0 ;; - ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - mips:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips - #undef mipsel - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mipsel - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - mips64:Linux:*:*) - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #undef CPU - #undef mips64 - #undef mips64el - #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) - CPU=mips64el - #else - #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) - CPU=mips64 - #else - CPU= - #endif - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 - ;; - ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu - exit 0 ;; - ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu - exit 0 ;; - alpha:Linux:*:*) - case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in - EV5) UNAME_MACHINE=alphaev5 ;; - EV56) UNAME_MACHINE=alphaev56 ;; - PCA56) UNAME_MACHINE=alphapca56 ;; - PCA57) UNAME_MACHINE=alphapca56 ;; - EV6) UNAME_MACHINE=alphaev6 ;; - EV67) UNAME_MACHINE=alphaev67 ;; - EV68*) UNAME_MACHINE=alphaev68 ;; - esac - objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} - exit 0 ;; - parisc:Linux:*:* | hppa:Linux:*:*) - # Look for CPU level - case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; - esac - exit 0 ;; - parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu - exit 0 ;; - s390:Linux:*:* | s390x:Linux:*:*) - echo ${UNAME_MACHINE}-ibm-linux - exit 0 ;; - sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu - exit 0 ;; - x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu - exit 0 ;; - i*86:Linux:*:*) - # The BFD linker knows what the default object file format is, so - # first see if it will tell us. cd to the root directory to prevent - # problems with other programs or directories called `ld' in the path. - # Set LC_ALL=C to ensure ld outputs messages in English. - ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ - | sed -ne '/supported targets:/!d - s/[ ][ ]*/ /g - s/.*supported targets: *// - s/ .*// - p'` - case "$ld_supported_targets" in - elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" - ;; - a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" - exit 0 ;; - coff-i386) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" - exit 0 ;; - "") - # Either a pre-BFD a.out linker (linux-gnuoldld) or - # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" - exit 0 ;; - esac - # Determine whether the default compiler is a.out or elf - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #ifdef __ELF__ - # ifdef __GLIBC__ - # if __GLIBC__ >= 2 - LIBC=gnu - # else - LIBC=gnulibc1 - # endif - # else - LIBC=gnulibc1 - # endif - #else - #ifdef __INTEL_COMPILER - LIBC=gnu - #else - LIBC=gnuaout - #endif - #endif - #ifdef __dietlibc__ - LIBC=dietlibc - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0 - test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0 - ;; - i*86:DYNIX/ptx:4*:*) - # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. - # earlier versions are messed up and put the nodename in both - # sysname and nodename. - echo i386-sequent-sysv4 - exit 0 ;; - i*86:UNIX_SV:4.2MP:2.*) - # Unixware is an offshoot of SVR4, but it has its own version - # number series starting with 2... - # I am not positive that other SVR4 systems won't match this, - # I just have to hope. -- rms. - # Use sysv4.2uw... so that sysv4* matches it. - echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} - exit 0 ;; - i*86:OS/2:*:*) - # If we were able to find `uname', then EMX Unix compatibility - # is probably installed. - echo ${UNAME_MACHINE}-pc-os2-emx - exit 0 ;; - i*86:XTS-300:*:STOP) - echo ${UNAME_MACHINE}-unknown-stop - exit 0 ;; - i*86:atheos:*:*) - echo ${UNAME_MACHINE}-unknown-atheos - exit 0 ;; - i*86:syllable:*:*) - echo ${UNAME_MACHINE}-pc-syllable - exit 0 ;; - i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) - echo i386-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - i*86:*DOS:*:*) - echo ${UNAME_MACHINE}-pc-msdosdjgpp - exit 0 ;; - i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) - UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` - if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then - echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} - else - echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} - fi - exit 0 ;; - i*86:*:5:[78]*) - case `/bin/uname -X | grep "^Machine"` in - *486*) UNAME_MACHINE=i486 ;; - *Pentium) UNAME_MACHINE=i586 ;; - *Pent*|*Celeron) UNAME_MACHINE=i686 ;; - esac - echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} - exit 0 ;; - i*86:*:3.2:*) - if test -f /usr/options/cb.name; then - UNAME_REL=`sed -n 's/.*Version //p' /dev/null >/dev/null ; then - UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` - (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 - (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ - && UNAME_MACHINE=i586 - (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ - && UNAME_MACHINE=i686 - (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ - && UNAME_MACHINE=i686 - echo ${UNAME_MACHINE}-pc-sco$UNAME_REL - else - echo ${UNAME_MACHINE}-pc-sysv32 - fi - exit 0 ;; - pc:*:*:*) - # Left here for compatibility: - # uname -m prints for DJGPP always 'pc', but it prints nothing about - # the processor, so we play safe by assuming i386. - echo i386-pc-msdosdjgpp - exit 0 ;; - Intel:Mach:3*:*) - echo i386-pc-mach3 - exit 0 ;; - paragon:*:*:*) - echo i860-intel-osf1 - exit 0 ;; - i860:*:4.*:*) # i860-SVR4 - if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then - echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 - else # Add other i860-SVR4 vendors below as they are discovered. - echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 - fi - exit 0 ;; - mini*:CTIX:SYS*5:*) - # "miniframe" - echo m68010-convergent-sysv - exit 0 ;; - mc68k:UNIX:SYSTEM5:3.51m) - echo m68k-convergent-sysv - exit 0 ;; - M680?0:D-NIX:5.3:*) - echo m68k-diab-dnix - exit 0 ;; - M68*:*:R3V[567]*:*) - test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0) - OS_REL='' - test -r /etc/.relid \ - && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4.3${OS_REL} && exit 0 - /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ - && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;; - 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) - /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ - && echo i486-ncr-sysv4 && exit 0 ;; - m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) - echo m68k-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - mc68030:UNIX_System_V:4.*:*) - echo m68k-atari-sysv4 - exit 0 ;; - TSUNAMI:LynxOS:2.*:*) - echo sparc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - rs6000:LynxOS:2.*:*) - echo rs6000-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) - echo powerpc-unknown-lynxos${UNAME_RELEASE} - exit 0 ;; - SM[BE]S:UNIX_SV:*:*) - echo mips-dde-sysv${UNAME_RELEASE} - exit 0 ;; - RM*:ReliantUNIX-*:*:*) - echo mips-sni-sysv4 - exit 0 ;; - RM*:SINIX-*:*:*) - echo mips-sni-sysv4 - exit 0 ;; - *:SINIX-*:*:*) - if uname -p 2>/dev/null >/dev/null ; then - UNAME_MACHINE=`(uname -p) 2>/dev/null` - echo ${UNAME_MACHINE}-sni-sysv4 - else - echo ns32k-sni-sysv - fi - exit 0 ;; - PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort - # says - echo i586-unisys-sysv4 - exit 0 ;; - *:UNIX_System_V:4*:FTX*) - # From Gerald Hewes . - # How about differentiating between stratus architectures? -djm - echo hppa1.1-stratus-sysv4 - exit 0 ;; - *:*:*:FTX*) - # From seanf@swdc.stratus.com. - echo i860-stratus-sysv4 - exit 0 ;; - *:VOS:*:*) - # From Paul.Green@stratus.com. - echo hppa1.1-stratus-vos - exit 0 ;; - mc68*:A/UX:*:*) - echo m68k-apple-aux${UNAME_RELEASE} - exit 0 ;; - news*:NEWS-OS:6*:*) - echo mips-sony-newsos6 - exit 0 ;; - R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) - if [ -d /usr/nec ]; then - echo mips-nec-sysv${UNAME_RELEASE} - else - echo mips-unknown-sysv${UNAME_RELEASE} - fi - exit 0 ;; - BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. - echo powerpc-be-beos - exit 0 ;; - BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. - echo powerpc-apple-beos - exit 0 ;; - BePC:BeOS:*:*) # BeOS running on Intel PC compatible. - echo i586-pc-beos - exit 0 ;; - SX-4:SUPER-UX:*:*) - echo sx4-nec-superux${UNAME_RELEASE} - exit 0 ;; - SX-5:SUPER-UX:*:*) - echo sx5-nec-superux${UNAME_RELEASE} - exit 0 ;; - SX-6:SUPER-UX:*:*) - echo sx6-nec-superux${UNAME_RELEASE} - exit 0 ;; - Power*:Rhapsody:*:*) - echo powerpc-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; - *:Rhapsody:*:*) - echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} - exit 0 ;; - *:Darwin:*:*) - case `uname -p` in - *86) UNAME_PROCESSOR=i686 ;; - powerpc) UNAME_PROCESSOR=powerpc ;; - esac - echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} - exit 0 ;; - *:procnto*:*:* | *:QNX:[0123456789]*:*) - UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = "x86"; then - UNAME_PROCESSOR=i386 - UNAME_MACHINE=pc - fi - echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} - exit 0 ;; - *:QNX:*:4*) - echo i386-pc-qnx - exit 0 ;; - NSR-?:NONSTOP_KERNEL:*:*) - echo nsr-tandem-nsk${UNAME_RELEASE} - exit 0 ;; - *:NonStop-UX:*:*) - echo mips-compaq-nonstopux - exit 0 ;; - BS2000:POSIX*:*:*) - echo bs2000-siemens-sysv - exit 0 ;; - DS/*:UNIX_System_V:*:*) - echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} - exit 0 ;; - *:Plan9:*:*) - # "uname -m" is not consistent, so use $cputype instead. 386 - # is converted to i386 for consistency with other x86 - # operating systems. - if test "$cputype" = "386"; then - UNAME_MACHINE=i386 - else - UNAME_MACHINE="$cputype" - fi - echo ${UNAME_MACHINE}-unknown-plan9 - exit 0 ;; - *:TOPS-10:*:*) - echo pdp10-unknown-tops10 - exit 0 ;; - *:TENEX:*:*) - echo pdp10-unknown-tenex - exit 0 ;; - KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) - echo pdp10-dec-tops20 - exit 0 ;; - XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) - echo pdp10-xkl-tops20 - exit 0 ;; - *:TOPS-20:*:*) - echo pdp10-unknown-tops20 - exit 0 ;; - *:ITS:*:*) - echo pdp10-unknown-its - exit 0 ;; - SEI:*:*:SEIUX) - echo mips-sei-seiux${UNAME_RELEASE} - exit 0 ;; - *:DragonFly:*:*) - echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` - exit 0 ;; -esac - -#echo '(No uname command or uname output not recognized.)' 1>&2 -#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 - -eval $set_cc_for_build -cat >$dummy.c < -# include -#endif -main () -{ -#if defined (sony) -#if defined (MIPSEB) - /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, - I don't know.... */ - printf ("mips-sony-bsd\n"); exit (0); -#else -#include - printf ("m68k-sony-newsos%s\n", -#ifdef NEWSOS4 - "4" -#else - "" -#endif - ); exit (0); -#endif -#endif - -#if defined (__arm) && defined (__acorn) && defined (__unix) - printf ("arm-acorn-riscix"); exit (0); -#endif - -#if defined (hp300) && !defined (hpux) - printf ("m68k-hp-bsd\n"); exit (0); -#endif - -#if defined (NeXT) -#if !defined (__ARCHITECTURE__) -#define __ARCHITECTURE__ "m68k" -#endif - int version; - version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; - if (version < 4) - printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); - else - printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); - exit (0); -#endif - -#if defined (MULTIMAX) || defined (n16) -#if defined (UMAXV) - printf ("ns32k-encore-sysv\n"); exit (0); -#else -#if defined (CMU) - printf ("ns32k-encore-mach\n"); exit (0); -#else - printf ("ns32k-encore-bsd\n"); exit (0); -#endif -#endif -#endif - -#if defined (__386BSD__) - printf ("i386-pc-bsd\n"); exit (0); -#endif - -#if defined (sequent) -#if defined (i386) - printf ("i386-sequent-dynix\n"); exit (0); -#endif -#if defined (ns32000) - printf ("ns32k-sequent-dynix\n"); exit (0); -#endif -#endif - -#if defined (_SEQUENT_) - struct utsname un; - - uname(&un); - - if (strncmp(un.version, "V2", 2) == 0) { - printf ("i386-sequent-ptx2\n"); exit (0); - } - if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ - printf ("i386-sequent-ptx1\n"); exit (0); - } - printf ("i386-sequent-ptx\n"); exit (0); - -#endif - -#if defined (vax) -# if !defined (ultrix) -# include -# if defined (BSD) -# if BSD == 43 - printf ("vax-dec-bsd4.3\n"); exit (0); -# else -# if BSD == 199006 - printf ("vax-dec-bsd4.3reno\n"); exit (0); -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# endif -# else - printf ("vax-dec-bsd\n"); exit (0); -# endif -# else - printf ("vax-dec-ultrix\n"); exit (0); -# endif -#endif - -#if defined (alliant) && defined (i860) - printf ("i860-alliant-bsd\n"); exit (0); -#endif - - exit (1); -} -EOF - -$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0 - -# Apollos put the system type in the environment. - -test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; } - -# Convex versions that predate uname can use getsysinfo(1) - -if [ -x /usr/convex/getsysinfo ] -then - case `getsysinfo -f cpu_type` in - c1*) - echo c1-convex-bsd - exit 0 ;; - c2*) - if getsysinfo -f scalar_acc - then echo c32-convex-bsd - else echo c2-convex-bsd - fi - exit 0 ;; - c34*) - echo c34-convex-bsd - exit 0 ;; - c38*) - echo c38-convex-bsd - exit 0 ;; - c4*) - echo c4-convex-bsd - exit 0 ;; - esac -fi - -cat >&2 < in order to provide the needed -information to handle your system. - -config.guess timestamp = $timestamp - -uname -m = `(uname -m) 2>/dev/null || echo unknown` -uname -r = `(uname -r) 2>/dev/null || echo unknown` -uname -s = `(uname -s) 2>/dev/null || echo unknown` -uname -v = `(uname -v) 2>/dev/null || echo unknown` - -/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` -/bin/uname -X = `(/bin/uname -X) 2>/dev/null` - -hostinfo = `(hostinfo) 2>/dev/null` -/bin/universe = `(/bin/universe) 2>/dev/null` -/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` -/bin/arch = `(/bin/arch) 2>/dev/null` -/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` -/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` - -UNAME_MACHINE = ${UNAME_MACHINE} -UNAME_RELEASE = ${UNAME_RELEASE} -UNAME_SYSTEM = ${UNAME_SYSTEM} -UNAME_VERSION = ${UNAME_VERSION} -EOF - -exit 1 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/jemalloc/config.stamp.in b/jemalloc/config.stamp.in deleted file mode 100644 index e69de29..0000000 diff --git a/jemalloc/config.sub b/jemalloc/config.sub deleted file mode 100755 index 264f820..0000000 --- a/jemalloc/config.sub +++ /dev/null @@ -1,1549 +0,0 @@ -#! /bin/sh -# Configuration validation subroutine script. -# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. - -timestamp='2004-02-23' - -# This file is (in principle) common to ALL GNU software. -# The presence of a machine in this file suggests that SOME GNU software -# can handle that machine. It does not imply ALL GNU software can. -# -# This file is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, -# Boston, MA 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Please send patches to . Submit a context -# diff and a properly formatted ChangeLog entry. -# -# Configuration subroutine to validate and canonicalize a configuration type. -# Supply the specified configuration type as an argument. -# If it is invalid, we print an error message on stderr and exit with code 1. -# Otherwise, we print the canonical config type on stdout and succeed. - -# This file is supposed to be the same for all GNU packages -# and recognize all the CPU types, system types and aliases -# that are meaningful with *any* GNU software. -# Each package is responsible for reporting which valid configurations -# it does not support. The user should be able to distinguish -# a failure to support a valid configuration from a meaningless -# configuration. - -# The goal of this file is to map all the various variations of a given -# machine specification into a single specification in the form: -# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM -# or in some cases, the newer four-part form: -# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM -# It is wrong to echo any other type of specification. - -me=`echo "$0" | sed -e 's,.*/,,'` - -usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS - $0 [OPTION] ALIAS - -Canonicalize a configuration name. - -Operation modes: - -h, --help print this help, then exit - -t, --time-stamp print date of last modification, then exit - -v, --version print version number, then exit - -Report bugs and patches to ." - -version="\ -GNU config.sub ($timestamp) - -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 -Free Software Foundation, Inc. - -This is free software; see the source for copying conditions. There is NO -warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." - -help=" -Try \`$me --help' for more information." - -# Parse command line -while test $# -gt 0 ; do - case $1 in - --time-stamp | --time* | -t ) - echo "$timestamp" ; exit 0 ;; - --version | -v ) - echo "$version" ; exit 0 ;; - --help | --h* | -h ) - echo "$usage"; exit 0 ;; - -- ) # Stop option processing - shift; break ;; - - ) # Use stdin as input. - break ;; - -* ) - echo "$me: invalid option $1$help" - exit 1 ;; - - *local*) - # First pass through any local machine types. - echo $1 - exit 0;; - - * ) - break ;; - esac -done - -case $# in - 0) echo "$me: missing argument$help" >&2 - exit 1;; - 1) ;; - *) echo "$me: too many arguments$help" >&2 - exit 1;; -esac - -# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). -# Here we must recognize all the valid KERNEL-OS combinations. -maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` -case $maybe_os in - nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \ - kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) - os=-$maybe_os - basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` - ;; - *) - basic_machine=`echo $1 | sed 's/-[^-]*$//'` - if [ $basic_machine != $1 ] - then os=`echo $1 | sed 's/.*-/-/'` - else os=; fi - ;; -esac - -### Let's recognize common machines as not being operating systems so -### that things like config.sub decstation-3100 work. We also -### recognize some manufacturers as not being operating systems, so we -### can provide default operating systems below. -case $os in - -sun*os*) - # Prevent following clause from handling this invalid input. - ;; - -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ - -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ - -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ - -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ - -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ - -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis) - os= - basic_machine=$1 - ;; - -sim | -cisco | -oki | -wec | -winbond) - os= - basic_machine=$1 - ;; - -scout) - ;; - -wrs) - os=-vxworks - basic_machine=$1 - ;; - -chorusos*) - os=-chorusos - basic_machine=$1 - ;; - -chorusrdb) - os=-chorusrdb - basic_machine=$1 - ;; - -hiux*) - os=-hiuxwe2 - ;; - -sco5) - os=-sco3.2v5 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco4) - os=-sco3.2v4 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2.[4-9]*) - os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco3.2v[4-9]*) - # Don't forget version if it is 3.2v4 or newer. - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -sco*) - os=-sco3.2v2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -udk*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -isc) - os=-isc2.2 - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -clix*) - basic_machine=clipper-intergraph - ;; - -isc*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` - ;; - -lynx*) - os=-lynxos - ;; - -ptx*) - basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` - ;; - -windowsnt*) - os=`echo $os | sed -e 's/windowsnt/winnt/'` - ;; - -psos*) - os=-psos - ;; - -mint | -mint[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; -esac - -# Decode aliases for certain CPU-COMPANY combinations. -case $basic_machine in - # Recognize the basic CPU types without company name. - # Some are omitted here because they have special meanings below. - 1750a | 580 \ - | a29k \ - | alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \ - | alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \ - | am33_2.0 \ - | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \ - | c4x | clipper \ - | d10v | d30v | dlx | dsp16xx \ - | fr30 | frv \ - | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ - | i370 | i860 | i960 | ia64 \ - | ip2k | iq2000 \ - | m32r | m68000 | m68k | m88k | mcore \ - | mips | mipsbe | mipseb | mipsel | mipsle \ - | mips16 \ - | mips64 | mips64el \ - | mips64vr | mips64vrel \ - | mips64orion | mips64orionel \ - | mips64vr4100 | mips64vr4100el \ - | mips64vr4300 | mips64vr4300el \ - | mips64vr5000 | mips64vr5000el \ - | mipsisa32 | mipsisa32el \ - | mipsisa32r2 | mipsisa32r2el \ - | mipsisa64 | mipsisa64el \ - | mipsisa64r2 | mipsisa64r2el \ - | mipsisa64sb1 | mipsisa64sb1el \ - | mipsisa64sr71k | mipsisa64sr71kel \ - | mipstx39 | mipstx39el \ - | mn10200 | mn10300 \ - | msp430 \ - | ns16k | ns32k \ - | openrisc | or32 \ - | pdp10 | pdp11 | pj | pjl \ - | powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \ - | pyramid \ - | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ - | sh64 | sh64le \ - | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \ - | strongarm \ - | tahoe | thumb | tic4x | tic80 | tron \ - | v850 | v850e \ - | we32k \ - | x86 | xscale | xstormy16 | xtensa \ - | z8k) - basic_machine=$basic_machine-unknown - ;; - m6811 | m68hc11 | m6812 | m68hc12) - # Motorola 68HC11/12. - basic_machine=$basic_machine-unknown - os=-none - ;; - m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k) - ;; - - # We use `pc' rather than `unknown' - # because (1) that's what they normally are, and - # (2) the word "unknown" tends to confuse beginning users. - i*86 | x86_64) - basic_machine=$basic_machine-pc - ;; - # Object if more than one company name word. - *-*-*) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; - # Recognize the basic CPU types with company name. - 580-* \ - | a29k-* \ - | alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \ - | alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \ - | alphapca5[67]-* | alpha64pca5[67]-* | arc-* \ - | arm-* | armbe-* | armle-* | armeb-* | armv*-* \ - | avr-* \ - | bs2000-* \ - | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ - | clipper-* | cydra-* \ - | d10v-* | d30v-* | dlx-* \ - | elxsi-* \ - | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ - | h8300-* | h8500-* \ - | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ - | i*86-* | i860-* | i960-* | ia64-* \ - | ip2k-* | iq2000-* \ - | m32r-* \ - | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | mcore-* \ - | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ - | mips16-* \ - | mips64-* | mips64el-* \ - | mips64vr-* | mips64vrel-* \ - | mips64orion-* | mips64orionel-* \ - | mips64vr4100-* | mips64vr4100el-* \ - | mips64vr4300-* | mips64vr4300el-* \ - | mips64vr5000-* | mips64vr5000el-* \ - | mipsisa32-* | mipsisa32el-* \ - | mipsisa32r2-* | mipsisa32r2el-* \ - | mipsisa64-* | mipsisa64el-* \ - | mipsisa64r2-* | mipsisa64r2el-* \ - | mipsisa64sb1-* | mipsisa64sb1el-* \ - | mipsisa64sr71k-* | mipsisa64sr71kel-* \ - | mipstx39-* | mipstx39el-* \ - | msp430-* \ - | none-* | np1-* | nv1-* | ns16k-* | ns32k-* \ - | orion-* \ - | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ - | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ - | pyramid-* \ - | romp-* | rs6000-* \ - | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \ - | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ - | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \ - | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ - | tahoe-* | thumb-* \ - | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ - | tron-* \ - | v850-* | v850e-* | vax-* \ - | we32k-* \ - | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \ - | xtensa-* \ - | ymp-* \ - | z8k-*) - ;; - # Recognize the various machine names and aliases which stand - # for a CPU type and a company and sometimes even an OS. - 386bsd) - basic_machine=i386-unknown - os=-bsd - ;; - 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) - basic_machine=m68000-att - ;; - 3b*) - basic_machine=we32k-att - ;; - a29khif) - basic_machine=a29k-amd - os=-udi - ;; - abacus) - basic_machine=abacus-unknown - ;; - adobe68k) - basic_machine=m68010-adobe - os=-scout - ;; - alliant | fx80) - basic_machine=fx80-alliant - ;; - altos | altos3068) - basic_machine=m68k-altos - ;; - am29k) - basic_machine=a29k-none - os=-bsd - ;; - amd64) - basic_machine=x86_64-pc - ;; - amd64-*) - basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - amdahl) - basic_machine=580-amdahl - os=-sysv - ;; - amiga | amiga-*) - basic_machine=m68k-unknown - ;; - amigaos | amigados) - basic_machine=m68k-unknown - os=-amigaos - ;; - amigaunix | amix) - basic_machine=m68k-unknown - os=-sysv4 - ;; - apollo68) - basic_machine=m68k-apollo - os=-sysv - ;; - apollo68bsd) - basic_machine=m68k-apollo - os=-bsd - ;; - aux) - basic_machine=m68k-apple - os=-aux - ;; - balance) - basic_machine=ns32k-sequent - os=-dynix - ;; - c90) - basic_machine=c90-cray - os=-unicos - ;; - convex-c1) - basic_machine=c1-convex - os=-bsd - ;; - convex-c2) - basic_machine=c2-convex - os=-bsd - ;; - convex-c32) - basic_machine=c32-convex - os=-bsd - ;; - convex-c34) - basic_machine=c34-convex - os=-bsd - ;; - convex-c38) - basic_machine=c38-convex - os=-bsd - ;; - cray | j90) - basic_machine=j90-cray - os=-unicos - ;; - cr16c) - basic_machine=cr16c-unknown - os=-elf - ;; - crds | unos) - basic_machine=m68k-crds - ;; - cris | cris-* | etrax*) - basic_machine=cris-axis - ;; - crx) - basic_machine=crx-unknown - os=-elf - ;; - da30 | da30-*) - basic_machine=m68k-da30 - ;; - decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) - basic_machine=mips-dec - ;; - decsystem10* | dec10*) - basic_machine=pdp10-dec - os=-tops10 - ;; - decsystem20* | dec20*) - basic_machine=pdp10-dec - os=-tops20 - ;; - delta | 3300 | motorola-3300 | motorola-delta \ - | 3300-motorola | delta-motorola) - basic_machine=m68k-motorola - ;; - delta88) - basic_machine=m88k-motorola - os=-sysv3 - ;; - dpx20 | dpx20-*) - basic_machine=rs6000-bull - os=-bosx - ;; - dpx2* | dpx2*-bull) - basic_machine=m68k-bull - os=-sysv3 - ;; - ebmon29k) - basic_machine=a29k-amd - os=-ebmon - ;; - elxsi) - basic_machine=elxsi-elxsi - os=-bsd - ;; - encore | umax | mmax) - basic_machine=ns32k-encore - ;; - es1800 | OSE68k | ose68k | ose | OSE) - basic_machine=m68k-ericsson - os=-ose - ;; - fx2800) - basic_machine=i860-alliant - ;; - genix) - basic_machine=ns32k-ns - ;; - gmicro) - basic_machine=tron-gmicro - os=-sysv - ;; - go32) - basic_machine=i386-pc - os=-go32 - ;; - h3050r* | hiux*) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - h8300hms) - basic_machine=h8300-hitachi - os=-hms - ;; - h8300xray) - basic_machine=h8300-hitachi - os=-xray - ;; - h8500hms) - basic_machine=h8500-hitachi - os=-hms - ;; - harris) - basic_machine=m88k-harris - os=-sysv3 - ;; - hp300-*) - basic_machine=m68k-hp - ;; - hp300bsd) - basic_machine=m68k-hp - os=-bsd - ;; - hp300hpux) - basic_machine=m68k-hp - os=-hpux - ;; - hp3k9[0-9][0-9] | hp9[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k2[0-9][0-9] | hp9k31[0-9]) - basic_machine=m68000-hp - ;; - hp9k3[2-9][0-9]) - basic_machine=m68k-hp - ;; - hp9k6[0-9][0-9] | hp6[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hp9k7[0-79][0-9] | hp7[0-79][0-9]) - basic_machine=hppa1.1-hp - ;; - hp9k78[0-9] | hp78[0-9]) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) - # FIXME: really hppa2.0-hp - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][13679] | hp8[0-9][13679]) - basic_machine=hppa1.1-hp - ;; - hp9k8[0-9][0-9] | hp8[0-9][0-9]) - basic_machine=hppa1.0-hp - ;; - hppa-next) - os=-nextstep3 - ;; - hppaosf) - basic_machine=hppa1.1-hp - os=-osf - ;; - hppro) - basic_machine=hppa1.1-hp - os=-proelf - ;; - i370-ibm* | ibm*) - basic_machine=i370-ibm - ;; -# I'm not sure what "Sysv32" means. Should this be sysv3.2? - i*86v32) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv32 - ;; - i*86v4*) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv4 - ;; - i*86v) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-sysv - ;; - i*86sol2) - basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` - os=-solaris2 - ;; - i386mach) - basic_machine=i386-mach - os=-mach - ;; - i386-vsta | vsta) - basic_machine=i386-unknown - os=-vsta - ;; - iris | iris4d) - basic_machine=mips-sgi - case $os in - -irix*) - ;; - *) - os=-irix4 - ;; - esac - ;; - isi68 | isi) - basic_machine=m68k-isi - os=-sysv - ;; - m88k-omron*) - basic_machine=m88k-omron - ;; - magnum | m3230) - basic_machine=mips-mips - os=-sysv - ;; - merlin) - basic_machine=ns32k-utek - os=-sysv - ;; - mingw32) - basic_machine=i386-pc - os=-mingw32 - ;; - miniframe) - basic_machine=m68000-convergent - ;; - *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) - basic_machine=m68k-atari - os=-mint - ;; - mips3*-*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` - ;; - mips3*) - basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown - ;; - mmix*) - basic_machine=mmix-knuth - os=-mmixware - ;; - monitor) - basic_machine=m68k-rom68k - os=-coff - ;; - morphos) - basic_machine=powerpc-unknown - os=-morphos - ;; - msdos) - basic_machine=i386-pc - os=-msdos - ;; - mvs) - basic_machine=i370-ibm - os=-mvs - ;; - ncr3000) - basic_machine=i486-ncr - os=-sysv4 - ;; - netbsd386) - basic_machine=i386-unknown - os=-netbsd - ;; - netwinder) - basic_machine=armv4l-rebel - os=-linux - ;; - news | news700 | news800 | news900) - basic_machine=m68k-sony - os=-newsos - ;; - news1000) - basic_machine=m68030-sony - os=-newsos - ;; - news-3600 | risc-news) - basic_machine=mips-sony - os=-newsos - ;; - necv70) - basic_machine=v70-nec - os=-sysv - ;; - next | m*-next ) - basic_machine=m68k-next - case $os in - -nextstep* ) - ;; - -ns2*) - os=-nextstep2 - ;; - *) - os=-nextstep3 - ;; - esac - ;; - nh3000) - basic_machine=m68k-harris - os=-cxux - ;; - nh[45]000) - basic_machine=m88k-harris - os=-cxux - ;; - nindy960) - basic_machine=i960-intel - os=-nindy - ;; - mon960) - basic_machine=i960-intel - os=-mon960 - ;; - nonstopux) - basic_machine=mips-compaq - os=-nonstopux - ;; - np1) - basic_machine=np1-gould - ;; - nv1) - basic_machine=nv1-cray - os=-unicosmp - ;; - nsr-tandem) - basic_machine=nsr-tandem - ;; - op50n-* | op60c-*) - basic_machine=hppa1.1-oki - os=-proelf - ;; - or32 | or32-*) - basic_machine=or32-unknown - os=-coff - ;; - os400) - basic_machine=powerpc-ibm - os=-os400 - ;; - OSE68000 | ose68000) - basic_machine=m68000-ericsson - os=-ose - ;; - os68k) - basic_machine=m68k-none - os=-os68k - ;; - pa-hitachi) - basic_machine=hppa1.1-hitachi - os=-hiuxwe2 - ;; - paragon) - basic_machine=i860-intel - os=-osf - ;; - pbd) - basic_machine=sparc-tti - ;; - pbb) - basic_machine=m68k-tti - ;; - pc532 | pc532-*) - basic_machine=ns32k-pc532 - ;; - pentium | p5 | k5 | k6 | nexgen | viac3) - basic_machine=i586-pc - ;; - pentiumpro | p6 | 6x86 | athlon | athlon_*) - basic_machine=i686-pc - ;; - pentiumii | pentium2 | pentiumiii | pentium3) - basic_machine=i686-pc - ;; - pentium4) - basic_machine=i786-pc - ;; - pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*) - basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumpro-* | p6-* | 6x86-* | athlon-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*) - basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pentium4-*) - basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - pn) - basic_machine=pn-gould - ;; - power) basic_machine=power-ibm - ;; - ppc) basic_machine=powerpc-unknown - ;; - ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppcle | powerpclittle | ppc-le | powerpc-little) - basic_machine=powerpcle-unknown - ;; - ppcle-* | powerpclittle-*) - basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64) basic_machine=powerpc64-unknown - ;; - ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ppc64le | powerpc64little | ppc64-le | powerpc64-little) - basic_machine=powerpc64le-unknown - ;; - ppc64le-* | powerpc64little-*) - basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'` - ;; - ps2) - basic_machine=i386-ibm - ;; - pw32) - basic_machine=i586-unknown - os=-pw32 - ;; - rom68k) - basic_machine=m68k-rom68k - os=-coff - ;; - rm[46]00) - basic_machine=mips-siemens - ;; - rtpc | rtpc-*) - basic_machine=romp-ibm - ;; - s390 | s390-*) - basic_machine=s390-ibm - ;; - s390x | s390x-*) - basic_machine=s390x-ibm - ;; - sa29200) - basic_machine=a29k-amd - os=-udi - ;; - sb1) - basic_machine=mipsisa64sb1-unknown - ;; - sb1el) - basic_machine=mipsisa64sb1el-unknown - ;; - sei) - basic_machine=mips-sei - os=-seiux - ;; - sequent) - basic_machine=i386-sequent - ;; - sh) - basic_machine=sh-hitachi - os=-hms - ;; - sh64) - basic_machine=sh64-unknown - ;; - sparclite-wrs | simso-wrs) - basic_machine=sparclite-wrs - os=-vxworks - ;; - sps7) - basic_machine=m68k-bull - os=-sysv2 - ;; - spur) - basic_machine=spur-unknown - ;; - st2000) - basic_machine=m68k-tandem - ;; - stratus) - basic_machine=i860-stratus - os=-sysv4 - ;; - sun2) - basic_machine=m68000-sun - ;; - sun2os3) - basic_machine=m68000-sun - os=-sunos3 - ;; - sun2os4) - basic_machine=m68000-sun - os=-sunos4 - ;; - sun3os3) - basic_machine=m68k-sun - os=-sunos3 - ;; - sun3os4) - basic_machine=m68k-sun - os=-sunos4 - ;; - sun4os3) - basic_machine=sparc-sun - os=-sunos3 - ;; - sun4os4) - basic_machine=sparc-sun - os=-sunos4 - ;; - sun4sol2) - basic_machine=sparc-sun - os=-solaris2 - ;; - sun3 | sun3-*) - basic_machine=m68k-sun - ;; - sun4) - basic_machine=sparc-sun - ;; - sun386 | sun386i | roadrunner) - basic_machine=i386-sun - ;; - sv1) - basic_machine=sv1-cray - os=-unicos - ;; - symmetry) - basic_machine=i386-sequent - os=-dynix - ;; - t3e) - basic_machine=alphaev5-cray - os=-unicos - ;; - t90) - basic_machine=t90-cray - os=-unicos - ;; - tic54x | c54x*) - basic_machine=tic54x-unknown - os=-coff - ;; - tic55x | c55x*) - basic_machine=tic55x-unknown - os=-coff - ;; - tic6x | c6x*) - basic_machine=tic6x-unknown - os=-coff - ;; - tx39) - basic_machine=mipstx39-unknown - ;; - tx39el) - basic_machine=mipstx39el-unknown - ;; - toad1) - basic_machine=pdp10-xkl - os=-tops20 - ;; - tower | tower-32) - basic_machine=m68k-ncr - ;; - tpf) - basic_machine=s390x-ibm - os=-tpf - ;; - udi29k) - basic_machine=a29k-amd - os=-udi - ;; - ultra3) - basic_machine=a29k-nyu - os=-sym1 - ;; - v810 | necv810) - basic_machine=v810-nec - os=-none - ;; - vaxv) - basic_machine=vax-dec - os=-sysv - ;; - vms) - basic_machine=vax-dec - os=-vms - ;; - vpp*|vx|vx-*) - basic_machine=f301-fujitsu - ;; - vxworks960) - basic_machine=i960-wrs - os=-vxworks - ;; - vxworks68) - basic_machine=m68k-wrs - os=-vxworks - ;; - vxworks29k) - basic_machine=a29k-wrs - os=-vxworks - ;; - w65*) - basic_machine=w65-wdc - os=-none - ;; - w89k-*) - basic_machine=hppa1.1-winbond - os=-proelf - ;; - xps | xps100) - basic_machine=xps100-honeywell - ;; - ymp) - basic_machine=ymp-cray - os=-unicos - ;; - z8k-*-coff) - basic_machine=z8k-unknown - os=-sim - ;; - none) - basic_machine=none-none - os=-none - ;; - -# Here we handle the default manufacturer of certain CPU types. It is in -# some cases the only manufacturer, in others, it is the most popular. - w89k) - basic_machine=hppa1.1-winbond - ;; - op50n) - basic_machine=hppa1.1-oki - ;; - op60c) - basic_machine=hppa1.1-oki - ;; - romp) - basic_machine=romp-ibm - ;; - rs6000) - basic_machine=rs6000-ibm - ;; - vax) - basic_machine=vax-dec - ;; - pdp10) - # there are many clones, so DEC is not a safe bet - basic_machine=pdp10-unknown - ;; - pdp11) - basic_machine=pdp11-dec - ;; - we32k) - basic_machine=we32k-att - ;; - sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele) - basic_machine=sh-unknown - ;; - sh64) - basic_machine=sh64-unknown - ;; - sparc | sparcv9 | sparcv9b) - basic_machine=sparc-sun - ;; - cydra) - basic_machine=cydra-cydrome - ;; - orion) - basic_machine=orion-highlevel - ;; - orion105) - basic_machine=clipper-highlevel - ;; - mac | mpw | mac-mpw) - basic_machine=m68k-apple - ;; - pmac | pmac-mpw) - basic_machine=powerpc-apple - ;; - *-unknown) - # Make sure to match an already-canonicalized machine name. - ;; - *) - echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 - exit 1 - ;; -esac - -# Here we canonicalize certain aliases for manufacturers. -case $basic_machine in - *-digital*) - basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` - ;; - *-commodore*) - basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` - ;; - *) - ;; -esac - -# Decode manufacturer-specific aliases for certain operating systems. - -if [ x"$os" != x"" ] -then -case $os in - # First match some system type aliases - # that might get confused with valid system types. - # -solaris* is a basic system type, with this one exception. - -solaris1 | -solaris1.*) - os=`echo $os | sed -e 's|solaris1|sunos4|'` - ;; - -solaris) - os=-solaris2 - ;; - -svr4*) - os=-sysv4 - ;; - -unixware*) - os=-sysv4.2uw - ;; - -gnu/linux*) - os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` - ;; - # First accept the basic system types. - # The portable systems comes first. - # Each alternative MUST END IN A *, to match a version number. - # -sysv* is not here because it comes later, after sysvr4. - -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ - | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ - | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ - | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ - | -aos* \ - | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ - | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \ - | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ - | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ - | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ - | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ - | -chorusos* | -chorusrdb* \ - | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \ - | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ - | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ - | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ - | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ - | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly*) - # Remember, each alternative MUST END IN *, to match a version number. - ;; - -qnx*) - case $basic_machine in - x86-* | i*86-*) - ;; - *) - os=-nto$os - ;; - esac - ;; - -nto-qnx*) - ;; - -nto*) - os=`echo $os | sed -e 's|nto|nto-qnx|'` - ;; - -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ - | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ - | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) - ;; - -mac*) - os=`echo $os | sed -e 's|mac|macos|'` - ;; - -linux-dietlibc) - os=-linux-dietlibc - ;; - -linux*) - os=`echo $os | sed -e 's|linux|linux-gnu|'` - ;; - -sunos5*) - os=`echo $os | sed -e 's|sunos5|solaris2|'` - ;; - -sunos6*) - os=`echo $os | sed -e 's|sunos6|solaris3|'` - ;; - -opened*) - os=-openedition - ;; - -os400*) - os=-os400 - ;; - -wince*) - os=-wince - ;; - -osfrose*) - os=-osfrose - ;; - -osf*) - os=-osf - ;; - -utek*) - os=-bsd - ;; - -dynix*) - os=-bsd - ;; - -acis*) - os=-aos - ;; - -atheos*) - os=-atheos - ;; - -syllable*) - os=-syllable - ;; - -386bsd) - os=-bsd - ;; - -ctix* | -uts*) - os=-sysv - ;; - -nova*) - os=-rtmk-nova - ;; - -ns2 ) - os=-nextstep2 - ;; - -nsk*) - os=-nsk - ;; - # Preserve the version number of sinix5. - -sinix5.*) - os=`echo $os | sed -e 's|sinix|sysv|'` - ;; - -sinix*) - os=-sysv4 - ;; - -tpf*) - os=-tpf - ;; - -triton*) - os=-sysv3 - ;; - -oss*) - os=-sysv3 - ;; - -svr4) - os=-sysv4 - ;; - -svr3) - os=-sysv3 - ;; - -sysvr4) - os=-sysv4 - ;; - # This must come after -sysvr4. - -sysv*) - ;; - -ose*) - os=-ose - ;; - -es1800*) - os=-ose - ;; - -xenix) - os=-xenix - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - os=-mint - ;; - -aros*) - os=-aros - ;; - -kaos*) - os=-kaos - ;; - -none) - ;; - *) - # Get rid of the `-' at the beginning of $os. - os=`echo $os | sed 's/[^-]*-//'` - echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 - exit 1 - ;; -esac -else - -# Here we handle the default operating systems that come with various machines. -# The value should be what the vendor currently ships out the door with their -# machine or put another way, the most popular os provided with the machine. - -# Note that if you're going to try to match "-MANUFACTURER" here (say, -# "-sun"), then you have to tell the case statement up towards the top -# that MANUFACTURER isn't an operating system. Otherwise, code above -# will signal an error saying that MANUFACTURER isn't an operating -# system, and we'll never get to this point. - -case $basic_machine in - *-acorn) - os=-riscix1.2 - ;; - arm*-rebel) - os=-linux - ;; - arm*-semi) - os=-aout - ;; - c4x-* | tic4x-*) - os=-coff - ;; - # This must come before the *-dec entry. - pdp10-*) - os=-tops20 - ;; - pdp11-*) - os=-none - ;; - *-dec | vax-*) - os=-ultrix4.2 - ;; - m68*-apollo) - os=-domain - ;; - i386-sun) - os=-sunos4.0.2 - ;; - m68000-sun) - os=-sunos3 - # This also exists in the configure program, but was not the - # default. - # os=-sunos4 - ;; - m68*-cisco) - os=-aout - ;; - mips*-cisco) - os=-elf - ;; - mips*-*) - os=-elf - ;; - or32-*) - os=-coff - ;; - *-tti) # must be before sparc entry or we get the wrong os. - os=-sysv3 - ;; - sparc-* | *-sun) - os=-sunos4.1.1 - ;; - *-be) - os=-beos - ;; - *-ibm) - os=-aix - ;; - *-wec) - os=-proelf - ;; - *-winbond) - os=-proelf - ;; - *-oki) - os=-proelf - ;; - *-hp) - os=-hpux - ;; - *-hitachi) - os=-hiux - ;; - i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) - os=-sysv - ;; - *-cbm) - os=-amigaos - ;; - *-dg) - os=-dgux - ;; - *-dolphin) - os=-sysv3 - ;; - m68k-ccur) - os=-rtu - ;; - m88k-omron*) - os=-luna - ;; - *-next ) - os=-nextstep - ;; - *-sequent) - os=-ptx - ;; - *-crds) - os=-unos - ;; - *-ns) - os=-genix - ;; - i370-*) - os=-mvs - ;; - *-next) - os=-nextstep3 - ;; - *-gould) - os=-sysv - ;; - *-highlevel) - os=-bsd - ;; - *-encore) - os=-bsd - ;; - *-sgi) - os=-irix - ;; - *-siemens) - os=-sysv4 - ;; - *-masscomp) - os=-rtu - ;; - f30[01]-fujitsu | f700-fujitsu) - os=-uxpv - ;; - *-rom68k) - os=-coff - ;; - *-*bug) - os=-coff - ;; - *-apple) - os=-macos - ;; - *-atari*) - os=-mint - ;; - *) - os=-none - ;; -esac -fi - -# Here we handle the case where we know the os, and the CPU type, but not the -# manufacturer. We pick the logical manufacturer. -vendor=unknown -case $basic_machine in - *-unknown) - case $os in - -riscix*) - vendor=acorn - ;; - -sunos*) - vendor=sun - ;; - -aix*) - vendor=ibm - ;; - -beos*) - vendor=be - ;; - -hpux*) - vendor=hp - ;; - -mpeix*) - vendor=hp - ;; - -hiux*) - vendor=hitachi - ;; - -unos*) - vendor=crds - ;; - -dgux*) - vendor=dg - ;; - -luna*) - vendor=omron - ;; - -genix*) - vendor=ns - ;; - -mvs* | -opened*) - vendor=ibm - ;; - -os400*) - vendor=ibm - ;; - -ptx*) - vendor=sequent - ;; - -tpf*) - vendor=ibm - ;; - -vxsim* | -vxworks* | -windiss*) - vendor=wrs - ;; - -aux*) - vendor=apple - ;; - -hms*) - vendor=hitachi - ;; - -mpw* | -macos*) - vendor=apple - ;; - -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) - vendor=atari - ;; - -vos*) - vendor=stratus - ;; - esac - basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` - ;; -esac - -echo $basic_machine$os -exit 0 - -# Local variables: -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "timestamp='" -# time-stamp-format: "%:y-%02m-%02d" -# time-stamp-end: "'" -# End: diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac deleted file mode 100644 index 412d3d1..0000000 --- a/jemalloc/configure.ac +++ /dev/null @@ -1,927 +0,0 @@ -dnl Process this file with autoconf to produce a configure script. -AC_INIT([Makefile.in]) - -dnl ============================================================================ -dnl Custom macro definitions. - -dnl JE_CFLAGS_APPEND(cflag) -AC_DEFUN([JE_CFLAGS_APPEND], -[ -AC_MSG_CHECKING([whether compiler supports $1]) -TCFLAGS="${CFLAGS}" -if test "x${CFLAGS}" = "x" ; then - CFLAGS="$1" -else - CFLAGS="${CFLAGS} $1" -fi -AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[ -]], [[ - return 0; -]])], - AC_MSG_RESULT([yes]), - AC_MSG_RESULT([no]) - [CFLAGS="${TCFLAGS}"] -) -]) - -dnl JE_COMPILABLE(label, hcode, mcode, rvar) -AC_DEFUN([JE_COMPILABLE], -[ -AC_MSG_CHECKING([whether $1 is compilable]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( -[$2], [$3])], - AC_MSG_RESULT([yes]) - [$4="yes"], - AC_MSG_RESULT([no]) - [$4="no"] -) -]) - -dnl ============================================================================ - -srcroot=$srcdir -if test "x${srcroot}" = "x." ; then - srcroot="" -else - srcroot="${srcroot}/" -fi -AC_SUBST([srcroot]) -abs_srcroot="`cd \"${srcdir}\"; pwd`/" -AC_SUBST([abs_srcroot]) - -objroot="" -AC_SUBST([objroot]) -abs_objroot="`pwd`/" -AC_SUBST([abs_objroot]) - -dnl Munge install path variables. -if test "x$prefix" = "xNONE" ; then - prefix="/usr/local" -fi -if test "x$exec_prefix" = "xNONE" ; then - exec_prefix=$prefix -fi -PREFIX=$prefix -AC_SUBST([PREFIX]) -BINDIR=`eval echo $bindir` -BINDIR=`eval echo $BINDIR` -AC_SUBST([BINDIR]) -INCLUDEDIR=`eval echo $includedir` -INCLUDEDIR=`eval echo $INCLUDEDIR` -AC_SUBST([INCLUDEDIR]) -LIBDIR=`eval echo $libdir` -LIBDIR=`eval echo $LIBDIR` -AC_SUBST([LIBDIR]) -DATADIR=`eval echo $datadir` -DATADIR=`eval echo $DATADIR` -AC_SUBST([DATADIR]) -MANDIR=`eval echo $mandir` -MANDIR=`eval echo $MANDIR` -AC_SUBST([MANDIR]) - -dnl Support for building documentation. -AC_PATH_PROG([XSLTPROC], [xsltproc], , [$PATH]) -AC_ARG_WITH([xslroot], - [AS_HELP_STRING([--with-xslroot=], [XSL stylesheet root path])], -if test "x$with_xslroot" = "xno" ; then - XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" -else - XSLROOT="${with_xslroot}" -fi, - XSLROOT="/usr/share/xml/docbook/stylesheet/docbook-xsl" -) -AC_SUBST([XSLROOT]) - -dnl If CFLAGS isn't defined, set CFLAGS to something reasonable. Otherwise, -dnl just prevent autoconf from molesting CFLAGS. -CFLAGS=$CFLAGS -AC_PROG_CC -if test "x$CFLAGS" = "x" ; then - no_CFLAGS="yes" - if test "x$GCC" = "xyes" ; then - JE_CFLAGS_APPEND([-std=gnu99]) - JE_CFLAGS_APPEND([-Wall]) - JE_CFLAGS_APPEND([-pipe]) - JE_CFLAGS_APPEND([-g3]) - fi -fi -dnl Append EXTRA_CFLAGS to CFLAGS, if defined. -if test "x$EXTRA_CFLAGS" != "x" ; then - JE_CFLAGS_APPEND([$EXTRA_CFLAGS]) -fi -AC_PROG_CPP - -AC_CHECK_SIZEOF([void *]) -if test "x${ac_cv_sizeof_void_p}" = "x8" ; then - LG_SIZEOF_PTR=3 -elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then - LG_SIZEOF_PTR=2 -else - AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR]) - -AC_CHECK_SIZEOF([int]) -if test "x${ac_cv_sizeof_int}" = "x8" ; then - LG_SIZEOF_INT=3 -elif test "x${ac_cv_sizeof_int}" = "x4" ; then - LG_SIZEOF_INT=2 -else - AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT]) - -AC_CHECK_SIZEOF([long]) -if test "x${ac_cv_sizeof_long}" = "x8" ; then - LG_SIZEOF_LONG=3 -elif test "x${ac_cv_sizeof_long}" = "x4" ; then - LG_SIZEOF_LONG=2 -else - AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}]) -fi -AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG]) - -AC_CANONICAL_HOST -dnl CPU-specific settings. -CPU_SPINWAIT="" -case "${host_cpu}" in - i[[345]]86) - ;; - i686) - JE_COMPILABLE([__asm__], [], [[__asm__ volatile("pause"); return 0;]], - [asm]) - if test "x${asm}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi - ;; - x86_64) - JE_COMPILABLE([__asm__ syntax], [], - [[__asm__ volatile("pause"); return 0;]], [asm]) - if test "x${asm}" = "xyes" ; then - CPU_SPINWAIT='__asm__ volatile("pause")' - fi - ;; - *) - ;; -esac -AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT]) - -dnl Platform-specific settings. abi and RPATH can probably be determined -dnl programmatically, but doing so is error-prone, which makes it generally -dnl not worth the trouble. -dnl -dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the -dnl definitions need to be seen before any headers are included, which is a pain -dnl to make happen otherwise. -case "${host}" in - *-*-darwin*) - CFLAGS="$CFLAGS -fno-common -no-cpp-precomp" - abi="macho" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) - RPATH="" - ;; - *-*-freebsd*) - CFLAGS="$CFLAGS" - abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) - RPATH="-Wl,-rpath," - ;; - *-*-linux*) - CFLAGS="$CFLAGS" - CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE" - abi="elf" - AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED]) - RPATH="-Wl,-rpath," - ;; - *-*-netbsd*) - AC_MSG_CHECKING([ABI]) - AC_COMPILE_IFELSE([AC_LANG_PROGRAM( -[[#ifdef __ELF__ -/* ELF */ -#else -#error aout -#endif -]])], - [CFLAGS="$CFLAGS"; abi="elf"], - [abi="aout"]) - AC_MSG_RESULT([$abi]) - AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE]) - RPATH="-Wl,-rpath," - ;; - *-*-solaris2*) - CFLAGS="$CFLAGS" - abi="elf" - RPATH="-Wl,-R," - dnl Solaris needs this for sigwait(). - CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS" - LIBS="$LIBS -lposix4 -lsocket -lnsl" - ;; - *) - AC_MSG_RESULT([Unsupported operating system: ${host}]) - abi="elf" - RPATH="-Wl,-rpath," - ;; -esac -AC_SUBST([abi]) -AC_SUBST([RPATH]) - -JE_COMPILABLE([__attribute__ syntax], - [static __attribute__((unused)) void foo(void){}], - [], - [attribute]) -if test "x${attribute}" = "xyes" ; then - AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ]) - if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then - JE_CFLAGS_APPEND([-fvisibility=hidden]) - fi -fi - -JE_COMPILABLE([mremap(...MREMAP_FIXED...)], [ -#define _GNU_SOURCE -#include -], [ -void *p = mremap((void *)0, 0, 0, MREMAP_MAYMOVE|MREMAP_FIXED, (void *)0); -], [mremap_fixed]) -if test "x${mremap_fixed}" = "xyes" ; then - AC_DEFINE([JEMALLOC_MREMAP_FIXED]) -fi - -dnl Support optional additions to rpath. -AC_ARG_WITH([rpath], - [AS_HELP_STRING([--with-rpath=], [Colon-separated rpath (ELF systems only)])], -if test "x$with_rpath" = "xno" ; then - RPATH_EXTRA= -else - RPATH_EXTRA="`echo $with_rpath | tr \":\" \" \"`" -fi, - RPATH_EXTRA= -) -AC_SUBST([RPATH_EXTRA]) - -dnl Disable rules that do automatic regeneration of configure output by default. -AC_ARG_ENABLE([autogen], - [AS_HELP_STRING([--enable-autogen], [Automatically regenerate configure output])], -if test "x$enable_autogen" = "xno" ; then - enable_autogen="0" -else - enable_autogen="1" -fi -, -enable_autogen="0" -) -AC_SUBST([enable_autogen]) - -AC_PROG_INSTALL -AC_PROG_RANLIB -AC_PATH_PROG([AR], [ar], , [$PATH]) -AC_PATH_PROG([LD], [ld], , [$PATH]) -AC_PATH_PROG([AUTOCONF], [autoconf], , [$PATH]) - -dnl Do not prefix public APIs by default. -AC_ARG_WITH([jemalloc_prefix], - [AS_HELP_STRING([--with-jemalloc-prefix=], [Prefix to prepend to all public APIs])], - [JEMALLOC_PREFIX="$with_jemalloc_prefix"], - [if test "x$abi" != "xmacho" ; then - JEMALLOC_PREFIX="" -else - JEMALLOC_PREFIX="je_" -fi] -) -if test "x$JEMALLOC_PREFIX" != "x" ; then - JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"` - AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"]) - AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"]) - jemalloc_prefix="$JEMALLOC_PREFIX" - jemalloc_cprefix="$JEMALLOC_CPREFIX" - AC_SUBST([jemalloc_prefix]) - AC_SUBST([jemalloc_cprefix]) - AC_DEFINE_UNQUOTED([JEMALLOC_P(string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix)], [${JEMALLOC_PREFIX}##string_that_no_one_should_want_to_use_as_a_jemalloc_API_prefix]) -fi - -dnl Do not add suffix to installed files by default. -AC_ARG_WITH([install_suffix], - [AS_HELP_STRING([--with-install-suffix=], [Suffix to append to all installed files])], - [INSTALL_SUFFIX="$with_install_suffix"], - [INSTALL_SUFFIX=] -) -install_suffix="$INSTALL_SUFFIX" -AC_SUBST([install_suffix]) - -cfgoutputs_in="${srcroot}Makefile.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/html.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/manpages.xsl.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}doc/jemalloc.xml.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/jemalloc.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}include/jemalloc/internal/jemalloc_internal.h.in" -cfgoutputs_in="${cfgoutputs_in} ${srcroot}test/jemalloc_test.h.in" - -cfgoutputs_out="Makefile" -cfgoutputs_out="${cfgoutputs_out} doc/html.xsl" -cfgoutputs_out="${cfgoutputs_out} doc/manpages.xsl" -cfgoutputs_out="${cfgoutputs_out} doc/jemalloc${install_suffix}.xml" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc${install_suffix}.h" -cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_out="${cfgoutputs_out} test/jemalloc_test.h" - -cfgoutputs_tup="Makefile" -cfgoutputs_tup="${cfgoutputs_tup} doc/html.xsl:doc/html.xsl.in" -cfgoutputs_tup="${cfgoutputs_tup} doc/manpages.xsl:doc/manpages.xsl.in" -cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc${install_suffix}.xml:doc/jemalloc.xml.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc${install_suffix}.h:include/jemalloc/jemalloc.h.in" -cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h" -cfgoutputs_tup="${cfgoutputs_tup} test/jemalloc_test.h:test/jemalloc_test.h.in" - -cfghdrs_in="${srcroot}include/jemalloc/jemalloc_defs.h.in" - -cfghdrs_out="include/jemalloc/jemalloc_defs${install_suffix}.h" - -cfghdrs_tup="include/jemalloc/jemalloc_defs${install_suffix}.h:include/jemalloc/jemalloc_defs.h.in" - -dnl Do not silence irrelevant compiler warnings by default, since enabling this -dnl option incurs a performance penalty. -AC_ARG_ENABLE([cc-silence], - [AS_HELP_STRING([--enable-cc-silence], - [Silence irrelevant compiler warnings])], -[if test "x$enable_cc_silence" = "xno" ; then - enable_cc_silence="0" -else - enable_cc_silence="1" -fi -], -[enable_cc_silence="0"] -) -if test "x$enable_cc_silence" = "x1" ; then - AC_DEFINE([JEMALLOC_CC_SILENCE]) -fi - -dnl Do not compile with debugging by default. -AC_ARG_ENABLE([debug], - [AS_HELP_STRING([--enable-debug], [Build debugging code])], -[if test "x$enable_debug" = "xno" ; then - enable_debug="0" -else - enable_debug="1" -fi -], -[enable_debug="0"] -) -if test "x$enable_debug" = "x1" ; then - AC_DEFINE([JEMALLOC_DEBUG], [ ]) - AC_DEFINE([JEMALLOC_IVSALLOC], [ ]) -fi -AC_SUBST([enable_debug]) - -dnl Only optimize if not debugging. -if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then - dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS. - optimize="no" - echo "$EXTRA_CFLAGS" | grep "\-O" >/dev/null || optimize="yes" - if test "x${optimize}" = "xyes" ; then - if test "x$GCC" = "xyes" ; then - JE_CFLAGS_APPEND([-O3]) - JE_CFLAGS_APPEND([-funroll-loops]) - else - JE_CFLAGS_APPEND([-O]) - fi - fi -fi - -dnl Do not enable statistics calculation by default. -AC_ARG_ENABLE([stats], - [AS_HELP_STRING([--enable-stats], [Enable statistics calculation/reporting])], -[if test "x$enable_stats" = "xno" ; then - enable_stats="0" -else - enable_stats="1" -fi -], -[enable_stats="0"] -) -if test "x$enable_stats" = "x1" ; then - AC_DEFINE([JEMALLOC_STATS], [ ]) -fi -AC_SUBST([enable_stats]) - -dnl Do not enable profiling by default. -AC_ARG_ENABLE([prof], - [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])], -[if test "x$enable_prof" = "xno" ; then - enable_prof="0" -else - enable_prof="1" -fi -], -[enable_prof="0"] -) -if test "x$enable_prof" = "x1" ; then - backtrace_method="" -else - backtrace_method="N/A" -fi - -AC_ARG_ENABLE([prof-libunwind], - [AS_HELP_STRING([--enable-prof-libunwind], [Use libunwind for backtracing])], -[if test "x$enable_prof_libunwind" = "xno" ; then - enable_prof_libunwind="0" -else - enable_prof_libunwind="1" -fi -], -[enable_prof_libunwind="0"] -) -AC_ARG_WITH([static_libunwind], - [AS_HELP_STRING([--with-static-libunwind=], - [Path to static libunwind library; use rather than dynamically linking])], -if test "x$with_static_libunwind" = "xno" ; then - LUNWIND="-lunwind" -else - if test ! -f "$with_static_libunwind" ; then - AC_MSG_ERROR([Static libunwind not found: $with_static_libunwind]) - fi - LUNWIND="$with_static_libunwind" -fi, - LUNWIND="-lunwind" -) -if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then - AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"]) - if test "x$LUNWIND" = "x-lunwind" ; then - AC_CHECK_LIB([unwind], [backtrace], [LIBS="$LIBS $LUNWIND"], - [enable_prof_libunwind="0"]) - else - LIBS="$LIBS $LUNWIND" - fi - if test "x${enable_prof_libunwind}" = "x1" ; then - backtrace_method="libunwind" - AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ]) - fi -fi - -AC_ARG_ENABLE([prof-libgcc], - [AS_HELP_STRING([--disable-prof-libgcc], - [Do not use libgcc for backtracing])], -[if test "x$enable_prof_libgcc" = "xno" ; then - enable_prof_libgcc="0" -else - enable_prof_libgcc="1" -fi -], -[enable_prof_libgcc="1"] -) -if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \ - -a "x$GCC" = "xyes" ; then - AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"]) - AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"]) - dnl The following is conservative, in that it only has entries for CPUs on - dnl which jemalloc has been tested. - AC_MSG_CHECKING([libgcc-based backtracing reliability on ${host_cpu}]) - case "${host_cpu}" in - i[[3456]]86) - AC_MSG_RESULT([unreliable]) - enable_prof_libgcc="0"; - ;; - x86_64) - AC_MSG_RESULT([reliable]) - ;; - *) - AC_MSG_RESULT([unreliable]) - enable_prof_libgcc="0"; - ;; - esac - if test "x${enable_prof_libgcc}" = "x1" ; then - backtrace_method="libgcc" - AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ]) - fi -else - enable_prof_libgcc="0" -fi - -AC_ARG_ENABLE([prof-gcc], - [AS_HELP_STRING([--disable-prof-gcc], - [Do not use gcc intrinsics for backtracing])], -[if test "x$enable_prof_gcc" = "xno" ; then - enable_prof_gcc="0" -else - enable_prof_gcc="1" -fi -], -[enable_prof_gcc="1"] -) -if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \ - -a "x$GCC" = "xyes" ; then - backtrace_method="gcc intrinsics" - AC_DEFINE([JEMALLOC_PROF_GCC], [ ]) -else - enable_prof_gcc="0" -fi - -if test "x$backtrace_method" = "x" ; then - backtrace_method="none (disabling profiling)" - enable_prof="0" -fi -AC_MSG_CHECKING([configured backtracing method]) -AC_MSG_RESULT([$backtrace_method]) -if test "x$enable_prof" = "x1" ; then - LIBS="$LIBS -lm" - AC_DEFINE([JEMALLOC_PROF], [ ]) -fi -AC_SUBST([enable_prof]) - -dnl Enable tiny allocations by default. -AC_ARG_ENABLE([tiny], - [AS_HELP_STRING([--disable-tiny], [Disable tiny (sub-quantum) allocations])], -[if test "x$enable_tiny" = "xno" ; then - enable_tiny="0" -else - enable_tiny="1" -fi -], -[enable_tiny="1"] -) -if test "x$enable_tiny" = "x1" ; then - AC_DEFINE([JEMALLOC_TINY], [ ]) -fi -AC_SUBST([enable_tiny]) - -dnl Enable thread-specific caching by default. -AC_ARG_ENABLE([tcache], - [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])], -[if test "x$enable_tcache" = "xno" ; then - enable_tcache="0" -else - enable_tcache="1" -fi -], -[enable_tcache="1"] -) -if test "x$enable_tcache" = "x1" ; then - AC_DEFINE([JEMALLOC_TCACHE], [ ]) -fi -AC_SUBST([enable_tcache]) - -dnl Do not enable mmap()ped swap files by default. -AC_ARG_ENABLE([swap], - [AS_HELP_STRING([--enable-swap], [Enable mmap()ped swap files])], -[if test "x$enable_swap" = "xno" ; then - enable_swap="0" -else - enable_swap="1" -fi -], -[enable_swap="0"] -) -if test "x$enable_swap" = "x1" ; then - AC_DEFINE([JEMALLOC_SWAP], [ ]) -fi -AC_SUBST([enable_swap]) - -dnl Do not enable allocation from DSS by default. -AC_ARG_ENABLE([dss], - [AS_HELP_STRING([--enable-dss], [Enable allocation from DSS])], -[if test "x$enable_dss" = "xno" ; then - enable_dss="0" -else - enable_dss="1" -fi -], -[enable_dss="0"] -) -if test "x$enable_dss" = "x1" ; then - AC_DEFINE([JEMALLOC_DSS], [ ]) -fi -AC_SUBST([enable_dss]) - -dnl Do not support the junk/zero filling option by default. -AC_ARG_ENABLE([fill], - [AS_HELP_STRING([--enable-fill], [Support junk/zero filling option])], -[if test "x$enable_fill" = "xno" ; then - enable_fill="0" -else - enable_fill="1" -fi -], -[enable_fill="0"] -) -if test "x$enable_fill" = "x1" ; then - AC_DEFINE([JEMALLOC_FILL], [ ]) -fi -AC_SUBST([enable_fill]) - -dnl Do not support the xmalloc option by default. -AC_ARG_ENABLE([xmalloc], - [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])], -[if test "x$enable_xmalloc" = "xno" ; then - enable_xmalloc="0" -else - enable_xmalloc="1" -fi -], -[enable_xmalloc="0"] -) -if test "x$enable_xmalloc" = "x1" ; then - AC_DEFINE([JEMALLOC_XMALLOC], [ ]) -fi -AC_SUBST([enable_xmalloc]) - -dnl Do not support the SYSV option by default. -AC_ARG_ENABLE([sysv], - [AS_HELP_STRING([--enable-sysv], [Support SYSV semantics option])], -[if test "x$enable_sysv" = "xno" ; then - enable_sysv="0" -else - enable_sysv="1" -fi -], -[enable_sysv="0"] -) -if test "x$enable_sysv" = "x1" ; then - AC_DEFINE([JEMALLOC_SYSV], [ ]) -fi -AC_SUBST([enable_sysv]) - -dnl Do not determine page shift at run time by default. -AC_ARG_ENABLE([dynamic_page_shift], - [AS_HELP_STRING([--enable-dynamic-page-shift], - [Determine page size at run time (don't trust configure result)])], -[if test "x$enable_dynamic_page_shift" = "xno" ; then - enable_dynamic_page_shift="0" -else - enable_dynamic_page_shift="1" -fi -], -[enable_dynamic_page_shift="0"] -) -if test "x$enable_dynamic_page_shift" = "x1" ; then - AC_DEFINE([DYNAMIC_PAGE_SHIFT], [ ]) -fi -AC_SUBST([enable_dynamic_page_shift]) - -AC_MSG_CHECKING([STATIC_PAGE_SHIFT]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[#include -#include -#include -]], [[ - long result; - FILE *f; - - result = sysconf(_SC_PAGESIZE); - if (result == -1) { - return 1; - } - f = fopen("conftest.out", "w"); - if (f == NULL) { - return 1; - } - fprintf(f, "%u\n", ffs((int)result) - 1); - close(f); - - return 0; -]])], - [STATIC_PAGE_SHIFT=`cat conftest.out`] - AC_MSG_RESULT([$STATIC_PAGE_SHIFT]) - AC_DEFINE_UNQUOTED([STATIC_PAGE_SHIFT], [$STATIC_PAGE_SHIFT]), - AC_MSG_RESULT([error])) - -dnl ============================================================================ -dnl jemalloc configuration. -dnl - -dnl Set VERSION if source directory has an embedded git repository. -if test -d "${srcroot}../.git" ; then - git describe --long --abbrev=40 > ${srcroot}VERSION -fi -jemalloc_version=`cat ${srcroot}VERSION` -jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'` -jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'` -jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'` -jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]4}'` -jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]5}'` -AC_SUBST([jemalloc_version]) -AC_SUBST([jemalloc_version_major]) -AC_SUBST([jemalloc_version_minor]) -AC_SUBST([jemalloc_version_bugfix]) -AC_SUBST([jemalloc_version_nrev]) -AC_SUBST([jemalloc_version_gid]) - -dnl ============================================================================ -dnl Configure pthreads. - -AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])]) -AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"], - [AC_MSG_ERROR([libpthread is missing])]) - -CPPFLAGS="$CPPFLAGS -D_REENTRANT" - -dnl Enable lazy locking by default. -AC_ARG_ENABLE([lazy_lock], - [AS_HELP_STRING([--disable-lazy-lock], - [Disable lazy locking (always lock, even when single-threaded)])], -[if test "x$enable_lazy_lock" = "xno" ; then - enable_lazy_lock="0" -else - enable_lazy_lock="1" -fi -], -[enable_lazy_lock="1"] -) -if test "x$enable_lazy_lock" = "x1" ; then - AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) - AC_CHECK_LIB([dl], [dlopen], [LIBS="$LIBS -ldl"], - [AC_MSG_ERROR([libdl is missing])]) - AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ]) -fi -AC_SUBST([enable_lazy_lock]) - -AC_ARG_ENABLE([tls], - [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])], -if test "x$enable_tls" = "xno" ; then - enable_tls="0" -else - enable_tls="1" -fi -, -enable_tls="1" -) -if test "x${enable_tls}" = "x1" ; then -AC_MSG_CHECKING([for TLS]) -AC_RUN_IFELSE([AC_LANG_PROGRAM( -[[ - __thread int x; -]], [[ - x = 42; - - return 0; -]])], - AC_MSG_RESULT([yes]), - AC_MSG_RESULT([no]) - enable_tls="0") -fi -AC_SUBST([enable_tls]) -if test "x${enable_tls}" = "x0" ; then - AC_DEFINE_UNQUOTED([NO_TLS], [ ]) -fi - -dnl ============================================================================ -dnl Check for ffsl(3), and fail if not found. This function exists on all -dnl platforms that jemalloc currently has a chance of functioning on without -dnl modification. - -AC_CHECK_FUNC([ffsl], [], - [AC_MSG_ERROR([Cannot build without ffsl(3)])]) - -dnl ============================================================================ -dnl Check for atomic(3) operations as provided on Darwin. - -JE_COMPILABLE([Darwin OSAtomic*()], [ -#include -#include -], [ - { - int32_t x32 = 0; - volatile int32_t *x32p = &x32; - OSAtomicAdd32(1, x32p); - } - { - int64_t x64 = 0; - volatile int64_t *x64p = &x64; - OSAtomicAdd64(1, x64p); - } -], [osatomic]) -if test "x${osatomic}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSATOMIC]) -fi - -dnl ============================================================================ -dnl Check for spinlock(3) operations as provided on Darwin. - -JE_COMPILABLE([Darwin OSSpin*()], [ -#include -#include -], [ - OSSpinLock lock = 0; - OSSpinLockLock(&lock); - OSSpinLockUnlock(&lock); -], [osspin]) -if test "x${osspin}" = "xyes" ; then - AC_DEFINE([JEMALLOC_OSSPIN]) -fi - -dnl ============================================================================ -dnl Check for allocator-related functions that should be wrapped. - -AC_CHECK_FUNC([memalign], - [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN])]) -AC_CHECK_FUNC([valloc], - [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC])]) - -dnl ============================================================================ -dnl Darwin-related configuration. - -if test "x${abi}" = "xmacho" ; then - AC_DEFINE([JEMALLOC_IVSALLOC]) - AC_DEFINE([JEMALLOC_ZONE]) - - dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6 - dnl releases. malloc_zone_t and malloc_introspection_t have new fields in - dnl 10.6, which is the only source-level indication of the change. - AC_MSG_CHECKING([malloc zone version]) - AC_TRY_COMPILE([#include -#include ], [ - static malloc_zone_t zone; - static struct malloc_introspection_t zone_introspect; - - zone.size = NULL; - zone.malloc = NULL; - zone.calloc = NULL; - zone.valloc = NULL; - zone.free = NULL; - zone.realloc = NULL; - zone.destroy = NULL; - zone.zone_name = "jemalloc_zone"; - zone.batch_malloc = NULL; - zone.batch_free = NULL; - zone.introspect = &zone_introspect; - zone.version = 6; - zone.memalign = NULL; - zone.free_definite_size = NULL; - - zone_introspect.enumerator = NULL; - zone_introspect.good_size = NULL; - zone_introspect.check = NULL; - zone_introspect.print = NULL; - zone_introspect.log = NULL; - zone_introspect.force_lock = NULL; - zone_introspect.force_unlock = NULL; - zone_introspect.statistics = NULL; - zone_introspect.zone_locked = NULL; -], [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [6]) - AC_MSG_RESULT([6])], - [AC_DEFINE_UNQUOTED([JEMALLOC_ZONE_VERSION], [3]) - AC_MSG_RESULT([3])]) -fi - -dnl ============================================================================ -dnl Check for typedefs, structures, and compiler characteristics. -AC_HEADER_STDBOOL - -dnl Process .in files. -AC_SUBST([cfghdrs_in]) -AC_SUBST([cfghdrs_out]) -AC_CONFIG_HEADERS([$cfghdrs_tup]) - -dnl ============================================================================ -dnl Generate outputs. -AC_CONFIG_FILES([$cfgoutputs_tup config.stamp]) -AC_SUBST([cfgoutputs_in]) -AC_SUBST([cfgoutputs_out]) -AC_OUTPUT - -dnl ============================================================================ -dnl Print out the results of configuration. -AC_MSG_RESULT([===============================================================================]) -AC_MSG_RESULT([jemalloc version : $jemalloc_version]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([CC : ${CC}]) -AC_MSG_RESULT([CPPFLAGS : ${CPPFLAGS}]) -AC_MSG_RESULT([CFLAGS : ${CFLAGS}]) -AC_MSG_RESULT([LDFLAGS : ${LDFLAGS}]) -AC_MSG_RESULT([LIBS : ${LIBS}]) -AC_MSG_RESULT([RPATH_EXTRA : ${RPATH_EXTRA}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([XSLTPROC : ${XSLTPROC}]) -AC_MSG_RESULT([XSLROOT : ${XSLROOT}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([PREFIX : ${PREFIX}]) -AC_MSG_RESULT([BINDIR : ${BINDIR}]) -AC_MSG_RESULT([INCLUDEDIR : ${INCLUDEDIR}]) -AC_MSG_RESULT([LIBDIR : ${LIBDIR}]) -AC_MSG_RESULT([DATADIR : ${DATADIR}]) -AC_MSG_RESULT([MANDIR : ${MANDIR}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([srcroot : ${srcroot}]) -AC_MSG_RESULT([abs_srcroot : ${abs_srcroot}]) -AC_MSG_RESULT([objroot : ${objroot}]) -AC_MSG_RESULT([abs_objroot : ${abs_objroot}]) -AC_MSG_RESULT([]) -AC_MSG_RESULT([JEMALLOC_PREFIX : ${JEMALLOC_PREFIX}]) -AC_MSG_RESULT([install_suffix : ${install_suffix}]) -AC_MSG_RESULT([autogen : ${enable_autogen}]) -AC_MSG_RESULT([cc-silence : ${enable_cc_silence}]) -AC_MSG_RESULT([debug : ${enable_debug}]) -AC_MSG_RESULT([stats : ${enable_stats}]) -AC_MSG_RESULT([prof : ${enable_prof}]) -AC_MSG_RESULT([prof-libunwind : ${enable_prof_libunwind}]) -AC_MSG_RESULT([prof-libgcc : ${enable_prof_libgcc}]) -AC_MSG_RESULT([prof-gcc : ${enable_prof_gcc}]) -AC_MSG_RESULT([tiny : ${enable_tiny}]) -AC_MSG_RESULT([tcache : ${enable_tcache}]) -AC_MSG_RESULT([fill : ${enable_fill}]) -AC_MSG_RESULT([xmalloc : ${enable_xmalloc}]) -AC_MSG_RESULT([sysv : ${enable_sysv}]) -AC_MSG_RESULT([swap : ${enable_swap}]) -AC_MSG_RESULT([dss : ${enable_dss}]) -AC_MSG_RESULT([dynamic_page_shift : ${enable_dynamic_page_shift}]) -AC_MSG_RESULT([lazy_lock : ${enable_lazy_lock}]) -AC_MSG_RESULT([tls : ${enable_tls}]) -AC_MSG_RESULT([===============================================================================]) diff --git a/jemalloc/doc/html.xsl.in b/jemalloc/doc/html.xsl.in deleted file mode 100644 index a91d974..0000000 --- a/jemalloc/doc/html.xsl.in +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/jemalloc/doc/jemalloc.xml.in b/jemalloc/doc/jemalloc.xml.in deleted file mode 100644 index 13f3aae..0000000 --- a/jemalloc/doc/jemalloc.xml.in +++ /dev/null @@ -1,2280 +0,0 @@ - - - - - - - User Manual - jemalloc - @jemalloc_version@ - - - Jason - Evans - Author - - - - - JEMALLOC - 3 - - - jemalloc - jemalloc - - general purpose memory allocation functions - - - LIBRARY - This manual describes jemalloc @jemalloc_version@. More information - can be found at the jemalloc website. - - - SYNOPSIS - - #include <stdlib.h> -#include <jemalloc/jemalloc.h> - - Standard API - - void *malloc - size_t size - - - void *calloc - size_t number - size_t size - - - int posix_memalign - void **ptr - size_t alignment - size_t size - - - void *realloc - void *ptr - size_t size - - - void free - void *ptr - - - - Non-standard API - - size_t malloc_usable_size - const void *ptr - - - void malloc_stats_print - void (*write_cb) - void *, const char * - - void *cbopaque - const char *opts - - - int mallctl - const char *name - void *oldp - size_t *oldlenp - void *newp - size_t newlen - - - int mallctlnametomib - const char *name - size_t *mibp - size_t *miblenp - - - int mallctlbymib - const size_t *mib - size_t miblen - void *oldp - size_t *oldlenp - void *newp - size_t newlen - - - void (*malloc_message) - void *cbopaque - const char *s - - const char *malloc_conf; - - - Experimental API - - int allocm - void **ptr - size_t *rsize - size_t size - int flags - - - int rallocm - void **ptr - size_t *rsize - size_t size - size_t extra - int flags - - - int sallocm - const void *ptr - size_t *rsize - int flags - - - int dallocm - void *ptr - int flags - - - - - - DESCRIPTION - - Standard API - - The malloc function allocates - size bytes of uninitialized memory. The allocated - space is suitably aligned (after possible pointer coercion) for storage - of any type of object. - - The calloc function allocates - space for number objects, each - size bytes in length. The result is identical to - calling malloc with an argument of - number * size, with the - exception that the allocated memory is explicitly initialized to zero - bytes. - - The posix_memalign function - allocates size bytes of memory such that the - allocation's base address is an even multiple of - alignment, and returns the allocation in the value - pointed to by ptr. The requested - alignment must be a power of 2 at least as large - as sizeof(void *). - - The realloc function changes the - size of the previously allocated memory referenced by - ptr to size bytes. The - contents of the memory are unchanged up to the lesser of the new and old - sizes. If the new size is larger, the contents of the newly allocated - portion of the memory are undefined. Upon success, the memory referenced - by ptr is freed and a pointer to the newly - allocated memory is returned. Note that - realloc may move the memory allocation, - resulting in a different return value than ptr. - If ptr is NULL, the - realloc function behaves identically to - malloc for the specified size. - - The free function causes the - allocated memory referenced by ptr to be made - available for future allocations. If ptr is - NULL, no action occurs. - - - Non-standard API - - The malloc_usable_size function - returns the usable size of the allocation pointed to by - ptr. The return value may be larger than the size - that was requested during allocation. The - malloc_usable_size function is not a - mechanism for in-place realloc; rather - it is provided solely as a tool for introspection purposes. Any - discrepancy between the requested allocation size and the size reported - by malloc_usable_size should not be - depended on, since such behavior is entirely implementation-dependent. - - - The malloc_stats_print function - writes human-readable summary statistics via the - write_cb callback function pointer and - cbopaque data passed to - write_cb, or - malloc_message if - write_cb is NULL. This - function can be called repeatedly. General information that never - changes during execution can be omitted by specifying "g" as a character - within the opts string. Note that - malloc_message uses the - mallctl* functions internally, so - inconsistent statistics can be reported if multiple threads use these - functions simultaneously. If is - specified during configuration, “m” and “a” can - be specified to omit merged arena and per arena statistics, respectively; - “b” and “l” can be specified to omit per size - class statistics for bins and large objects, respectively. Unrecognized - characters are silently ignored. Note that thread caching may prevent - some statistics from being completely up to date, since extra locking - would be required to merge counters that track thread cache operations. - - - The mallctl function provides a - general interface for introspecting the memory allocator, as well as - setting modifiable parameters and triggering actions. The - period-separated name argument specifies a - location in a tree-structured namespace; see the section for - documentation on the tree contents. To read a value, pass a pointer via - oldp to adequate space to contain the value, and a - pointer to its length via oldlenp; otherwise pass - NULL and NULL. Similarly, to - write a value, pass a pointer to the value via - newp, and its length via - newlen; otherwise pass NULL - and 0. - - The mallctlnametomib function - provides a way to avoid repeated name lookups for applications that - repeatedly query the same portion of the namespace, by translating a name - to a “Management Information Base” (MIB) that can be passed - repeatedly to mallctlbymib. Upon - successful return from mallctlnametomib, - mibp contains an array of - *miblenp integers, where - *miblenp is the lesser of the number of components - in name and the input value of - *miblenp. Thus it is possible to pass a - *miblenp that is smaller than the number of - period-separated name components, which results in a partial MIB that can - be used as the basis for constructing a complete MIB. For name - components that are integers (e.g. the 2 in - arenas.bin.2.size), - the corresponding MIB component will always be that integer. Therefore, - it is legitimate to construct code like the following: - - - Experimental API - The experimental API is subject to change or removal without regard - for backward compatibility. - - The allocm, - rallocm, - sallocm, and - dallocm functions all have a - flags argument that can be used to specify - options. The functions only check the options that are contextually - relevant. Use bitwise or (|) operations to - specify one or more of the following: - - - ALLOCM_LG_ALIGN(la) - - - Align the memory allocation to start at an address - that is a multiple of (1 << - la). This macro does not validate - that la is within the valid - range. - - - ALLOCM_ALIGN(a) - - - Align the memory allocation to start at an address - that is a multiple of a, where - a is a power of two. This macro does not - validate that a is a power of 2. - - - - ALLOCM_ZERO - - Initialize newly allocated memory to contain zero - bytes. In the growing reallocation case, the real size prior to - reallocation defines the boundary between untouched bytes and those - that are initialized to contain zero bytes. If this option is - absent, newly allocated memory is uninitialized. - - - ALLOCM_NO_MOVE - - For reallocation, fail rather than moving the - object. This constraint can apply to both growth and - shrinkage. - - - - - The allocm function allocates at - least size bytes of memory, sets - *ptr to the base address of the allocation, and - sets *rsize to the real size of the allocation if - rsize is not NULL. - - The rallocm function resizes the - allocation at *ptr to be at least - size bytes, sets *ptr to - the base address of the allocation if it moved, and sets - *rsize to the real size of the allocation if - rsize is not NULL. If - extra is non-zero, an attempt is made to resize - the allocation to be at least size + - extra) bytes, though inability to allocate - the extra byte(s) will not by itself result in failure. Behavior is - undefined if (size + - extra > - SIZE_T_MAX). - - The sallocm function sets - *rsize to the real size of the allocation. - - The dallocm function causes the - memory referenced by ptr to be made available for - future allocations. - - - - TUNING - Once, when the first call is made to one of the memory allocation - routines, the allocator initializes its internals based in part on various - options that can be specified at compile- or run-time. - - The string pointed to by the global variable - malloc_conf, the “name” of the file - referenced by the symbolic link named /etc/malloc.conf, and the value of the - environment variable MALLOC_CONF, will be interpreted, in - that order, from left to right as options. - - An options string is a comma-separated list of option:value pairs. - There is one key corresponding to each opt.* mallctl (see the section for options - documentation). For example, abort:true,narenas:1 sets - the opt.abort and opt.narenas options. Some - options have boolean values (true/false), others have integer values (base - 8, 10, or 16, depending on prefix), and yet others have raw string - values. - - - IMPLEMENTATION NOTES - Traditionally, allocators have used - sbrk - 2 to obtain memory, which is - suboptimal for several reasons, including race conditions, increased - fragmentation, and artificial limitations on maximum usable memory. If - is specified during configuration, this - allocator uses both sbrk - 2 and - mmap - 2, in that order of preference; - otherwise only mmap - 2 is used. - - This allocator uses multiple arenas in order to reduce lock - contention for threaded programs on multi-processor systems. This works - well with regard to threading scalability, but incurs some costs. There is - a small fixed per-arena overhead, and additionally, arenas manage memory - completely independently of each other, which means a small fixed increase - in overall memory fragmentation. These overheads are not generally an - issue, given the number of arenas normally used. Note that using - substantially more arenas than the default is not likely to improve - performance, mainly due to reduced cache performance. However, it may make - sense to reduce the number of arenas if an application does not make much - use of the allocation functions. - - In addition to multiple arenas, unless - is specified during configuration, this - allocator supports thread-specific caching for small and large objects, in - order to make it possible to completely avoid synchronization for most - allocation requests. Such caching allows very fast allocation in the - common case, but it increases memory usage and fragmentation, since a - bounded number of objects can remain allocated in each thread cache. - - Memory is conceptually broken into equal-sized chunks, where the - chunk size is a power of two that is greater than the page size. Chunks - are always aligned to multiples of the chunk size. This alignment makes it - possible to find metadata for user objects very quickly. - - User objects are broken into three categories according to size: - small, large, and huge. Small objects are smaller than one page. Large - objects are smaller than the chunk size. Huge objects are a multiple of - the chunk size. Small and large objects are managed by arenas; huge - objects are managed separately in a single data structure that is shared by - all threads. Huge objects are used by applications infrequently enough - that this single data structure is not a scalability issue. - - Each chunk that is managed by an arena tracks its contents as runs of - contiguous pages (unused, backing a set of small objects, or backing one - large object). The combination of chunk alignment and chunk page maps - makes it possible to determine all metadata regarding small and large - allocations in constant time. - - Small objects are managed in groups by page runs. Each run maintains - a frontier and free list to track which regions are in use. Unless - is specified during configuration, - allocation requests that are no more than half the quantum (8 or 16, - depending on architecture) are rounded up to the nearest power of two that - is at least sizeof(void *). - Allocation requests that are more than half the quantum, but no more than - the minimum cacheline-multiple size class (see the opt.lg_qspace_max - option) are rounded up to the nearest multiple of the quantum. Allocation - requests that are more than the minimum cacheline-multiple size class, but - no more than the minimum subpage-multiple size class (see the opt.lg_cspace_max - option) are rounded up to the nearest multiple of the cacheline size (64). - Allocation requests that are more than the minimum subpage-multiple size - class, but no more than the maximum subpage-multiple size class are rounded - up to the nearest multiple of the subpage size (256). Allocation requests - that are more than the maximum subpage-multiple size class, but small - enough to fit in an arena-managed chunk (see the opt.lg_chunk option), are - rounded up to the nearest run size. Allocation requests that are too large - to fit in an arena-managed chunk are rounded up to the nearest multiple of - the chunk size. - - Allocations are packed tightly together, which can be an issue for - multi-threaded applications. If you need to assure that allocations do not - suffer from cacheline sharing, round your allocation requests up to the - nearest multiple of the cacheline size, or specify cacheline alignment when - allocating. - - Assuming 4 MiB chunks, 4 KiB pages, and a 16-byte quantum on a 64-bit - system, the size classes in each category are as shown in . - - - Size classes - - - - - - - Category - Subcategory - Size - - - - - Small - Tiny - [8] - - - Quantum-spaced - [16, 32, 48, ..., 128] - - - Cacheline-spaced - [192, 256, 320, ..., 512] - - - Subpage-spaced - [768, 1024, 1280, ..., 3840] - - - Large - [4 KiB, 8 KiB, 12 KiB, ..., 4072 KiB] - - - Huge - [4 MiB, 8 MiB, 12 MiB, ...] - - - -
-
- - MALLCTL NAMESPACE - The following names are defined in the namespace accessible via the - mallctl* functions. Value types are - specified in parentheses, their readable/writable statuses are encoded as - rw, r-, -w, or - --, and required build configuration flags follow, if - any. A name element encoded as <i> or - <j> indicates an integer component, where the - integer varies from 0 to some upper value that must be determined via - introspection. In the case of stats.arenas.<i>.*, - <i> equal to arenas.narenas can be - used to access the summation of statistics from all arenas. Take special - note of the epoch mallctl, - which controls refreshing of cached dynamic statistics. - - - - - version - (const char *) - r- - - Return the jemalloc version string. - - - - - epoch - (uint64_t) - rw - - If a value is passed in, refresh the data from which - the mallctl* functions report values, - and increment the epoch. Return the current epoch. This is useful for - detecting whether another thread caused a refresh. - - - - - config.debug - (bool) - r- - - was specified during - build configuration. - - - - - config.dss - (bool) - r- - - was specified during - build configuration. - - - - - config.dynamic_page_shift - (bool) - r- - - was - specified during build configuration. - - - - - config.fill - (bool) - r- - - was specified during - build configuration. - - - - - config.lazy_lock - (bool) - r- - - was specified - during build configuration. - - - - - config.prof - (bool) - r- - - was specified during - build configuration. - - - - - config.prof_libgcc - (bool) - r- - - was not - specified during build configuration. - - - - - config.prof_libunwind - (bool) - r- - - was specified - during build configuration. - - - - - config.stats - (bool) - r- - - was specified during - build configuration. - - - - - config.swap - (bool) - r- - - was specified during - build configuration. - - - - - config.sysv - (bool) - r- - - was specified during - build configuration. - - - - - config.tcache - (bool) - r- - - was not specified - during build configuration. - - - - - config.tiny - (bool) - r- - - was not specified - during build configuration. - - - - - config.tls - (bool) - r- - - was not specified during - build configuration. - - - - - config.xmalloc - (bool) - r- - - was specified during - build configuration. - - - - - opt.abort - (bool) - r- - - Abort-on-warning enabled/disabled. If true, most - warnings are fatal. The process will call - abort - 3 in these cases. This option is - disabled by default unless is - specified during configuration, in which case it is enabled by default. - - - - - - opt.lg_qspace_max - (size_t) - r- - - Size (log base 2) of the maximum size class that is a - multiple of the quantum (8 or 16 bytes, depending on architecture). - Above this size, cacheline spacing is used for size classes. The - default value is 128 bytes (2^7). - - - - - opt.lg_cspace_max - (size_t) - r- - - Size (log base 2) of the maximum size class that is a - multiple of the cacheline size (64). Above this size, subpage spacing - (256 bytes) is used for size classes. The default value is 512 bytes - (2^9). - - - - - opt.lg_chunk - (size_t) - r- - - Virtual memory chunk size (log base 2). The default - chunk size is 4 MiB (2^22). - - - - - opt.narenas - (size_t) - r- - - Maximum number of arenas to use. The default maximum - number of arenas is four times the number of CPUs, or one if there is a - single CPU. - - - - - opt.lg_dirty_mult - (ssize_t) - r- - - Per-arena minimum ratio (log base 2) of active to dirty - pages. Some dirty unused pages may be allowed to accumulate, within - the limit set by the ratio (or one chunk worth of dirty pages, - whichever is greater), before informing the kernel about some of those - pages via madvise - 2 or a similar system call. This - provides the kernel with sufficient information to recycle dirty pages - if physical memory becomes scarce and the pages remain unused. The - default minimum ratio is 32:1 (2^5:1); an option value of -1 will - disable dirty page purging. - - - - - opt.stats_print - (bool) - r- - - Enable/disable statistics printing at exit. If - enabled, the malloc_stats_print - function is called at program exit via an - atexit - 3 function. If - is specified during configuration, this - has the potential to cause deadlock for a multi-threaded process that - exits while one or more threads are executing in the memory allocation - functions. Therefore, this option should only be used with care; it is - primarily intended as a performance tuning aid during application - development. This option is disabled by default. - - - - - opt.junk - (bool) - r- - [] - - Junk filling enabled/disabled. If enabled, each byte - of uninitialized allocated memory will be initialized to - 0xa5. All deallocated memory will be initialized to - 0x5a. This is intended for debugging and will - impact performance negatively. This option is disabled by default - unless is specified during - configuration, in which case it is enabled by default. - - - - - opt.zero - (bool) - r- - [] - - Zero filling enabled/disabled. If enabled, each byte - of uninitialized allocated memory will be initialized to 0. Note that - this initialization only happens once for each byte, so - realloc and - rallocm calls do not zero memory that - was previously allocated. This is intended for debugging and will - impact performance negatively. This option is disabled by default. - - - - - - opt.sysv - (bool) - r- - [] - - If enabled, attempting to allocate zero bytes will - return a NULL pointer instead of a valid pointer. - (The default behavior is to make a minimal allocation and return a - pointer to it.) This option is provided for System V compatibility. - This option is incompatible with the opt.xmalloc option. - This option is disabled by default. - - - - - opt.xmalloc - (bool) - r- - [] - - Abort-on-out-of-memory enabled/disabled. If enabled, - rather than returning failure for any allocation function, display a - diagnostic message on STDERR_FILENO and cause the - program to drop core (using - abort - 3). If an application is - designed to depend on this behavior, set the option at compile time by - including the following in the source code: - - This option is disabled by default. - - - - - opt.tcache - (bool) - r- - [] - - Thread-specific caching enabled/disabled. When there - are multiple threads, each thread uses a thread-specific cache for - objects up to a certain size. Thread-specific caching allows many - allocations to be satisfied without performing any thread - synchronization, at the cost of increased memory use. See the - opt.lg_tcache_gc_sweep - and opt.lg_tcache_max - options for related tuning information. This option is enabled by - default. - - - - - opt.lg_tcache_gc_sweep - (ssize_t) - r- - [] - - Approximate interval (log base 2) between full - thread-specific cache garbage collection sweeps, counted in terms of - thread-specific cache allocation/deallocation events. Garbage - collection is actually performed incrementally, one size class at a - time, in order to avoid large collection pauses. The default sweep - interval is 8192 (2^13); setting this option to -1 will disable garbage - collection. - - - - - opt.lg_tcache_max - (size_t) - r- - [] - - Maximum size class (log base 2) to cache in the - thread-specific cache. At a minimum, all small size classes are - cached, and at a maximum all large size classes are cached. The - default maximum is 32 KiB (2^15). - - - - - opt.prof - (bool) - r- - [] - - Memory profiling enabled/disabled. If enabled, profile - memory allocation activity, and use an - atexit - 3 function to dump final memory - usage to a file named according to the pattern - <prefix>.<pid>.<seq>.f.heap, - where <prefix> is controlled by the opt.prof_prefix - option. See the opt.lg_prof_bt_max - option for backtrace depth control. See the opt.prof_active - option for on-the-fly activation/deactivation. See the opt.lg_prof_sample - option for probabilistic sampling control. See the opt.prof_accum - option for control of cumulative sample reporting. See the opt.lg_prof_tcmax - option for control of per thread backtrace caching. See the opt.lg_prof_interval - option for information on interval-triggered profile dumping, and the - opt.prof_gdump - option for information on high-water-triggered profile dumping. - Profile output is compatible with the included pprof - Perl script, which originates from the google-perftools - package. - - - - - opt.prof_prefix - (const char *) - r- - [] - - Filename prefix for profile dumps. If the prefix is - set to the empty string, no automatic dumps will occur; this is - primarily useful for disabling the automatic final heap dump (which - also disables leak reporting, if enabled). The default prefix is - jeprof. - - - - - opt.lg_prof_bt_max - (size_t) - r- - [] - - Maximum backtrace depth (log base 2) when profiling - memory allocation activity. The default is 128 (2^7). - - - - - opt.prof_active - (bool) - r- - [] - - Profiling activated/deactivated. This is a secondary - control mechanism that makes it possible to start the application with - profiling enabled (see the opt.prof option) but - inactive, then toggle profiling at any time during program execution - with the prof.active mallctl. - This option is enabled by default. - - - - - opt.lg_prof_sample - (ssize_t) - r- - [] - - Average interval (log base 2) between allocation - samples, as measured in bytes of allocation activity. Increasing the - sampling interval decreases profile fidelity, but also decreases the - computational overhead. The default sample interval is 1 (2^0) (i.e. - all allocations are sampled). - - - - - opt.prof_accum - (bool) - r- - [] - - Reporting of cumulative object/byte counts in profile - dumps enabled/disabled. If this option is enabled, every unique - backtrace must be stored for the duration of execution. Depending on - the application, this can impose a large memory overhead, and the - cumulative counts are not always of interest. See the - opt.lg_prof_tcmax - option for control of per thread backtrace caching, which has important - interactions. This option is enabled by default. - - - - - opt.lg_prof_tcmax - (ssize_t) - r- - [] - - Maximum per thread backtrace cache (log base 2) used - for heap profiling. A backtrace can only be discarded if the - opt.prof_accum - option is disabled, and no thread caches currently refer to the - backtrace. Therefore, a backtrace cache limit should be imposed if the - intention is to limit how much memory is used by backtraces. By - default, no limit is imposed (encoded as -1). - - - - - - opt.lg_prof_interval - (ssize_t) - r- - [] - - Average interval (log base 2) between memory profile - dumps, as measured in bytes of allocation activity. The actual - interval between dumps may be sporadic because decentralized allocation - counters are used to avoid synchronization bottlenecks. Profiles are - dumped to files named according to the pattern - <prefix>.<pid>.<seq>.i<iseq>.heap, - where <prefix> is controlled by the - opt.prof_prefix - option. By default, interval-triggered profile dumping is disabled - (encoded as -1). - - - - - - opt.prof_gdump - (bool) - r- - [] - - Trigger a memory profile dump every time the total - virtual memory exceeds the previous maximum. Profiles are dumped to - files named according to the pattern - <prefix>.<pid>.<seq>.u<useq>.heap, - where <prefix> is controlled by the opt.prof_prefix - option. This option is disabled by default. - - - - - opt.prof_leak - (bool) - r- - [] - - Leak reporting enabled/disabled. If enabled, use an - atexit - 3 function to report memory leaks - detected by allocation sampling. See the - opt.lg_prof_bt_max - option for backtrace depth control. See the - opt.prof option for - information on analyzing heap profile output. This option is disabled - by default. - - - - - opt.overcommit - (bool) - r- - [] - - Over-commit enabled/disabled. If enabled, over-commit - memory as a side effect of using anonymous - mmap - 2 or - sbrk - 2 for virtual memory allocation. - In order for overcommit to be disabled, the swap.fds mallctl must have - been successfully written to. This option is enabled by - default. - - - - - tcache.flush - (void) - -- - [] - - Flush calling thread's tcache. This interface releases - all cached objects and internal data structures associated with the - calling thread's thread-specific cache. Ordinarily, this interface - need not be called, since automatic periodic incremental garbage - collection occurs, and the thread cache is automatically discarded when - a thread exits. However, garbage collection is triggered by allocation - activity, so it is possible for a thread that stops - allocating/deallocating to retain its cache indefinitely, in which case - the developer may find manual flushing useful. - - - - - thread.arena - (unsigned) - rw - - Get or set the arena associated with the calling - thread. The arena index must be less than the maximum number of arenas - (see the arenas.narenas - mallctl). If the specified arena was not initialized beforehand (see - the arenas.initialized - mallctl), it will be automatically initialized as a side effect of - calling this interface. - - - - - thread.allocated - (uint64_t) - r- - [] - - Get the total number of bytes ever allocated by the - calling thread. This counter has the potential to wrap around; it is - up to the application to appropriately interpret the counter in such - cases. - - - - - thread.allocatedp - (uint64_t *) - r- - [] - - Get a pointer to the the value that is returned by the - thread.allocated - mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. - - - - - thread.deallocated - (uint64_t) - r- - [] - - Get the total number of bytes ever deallocated by the - calling thread. This counter has the potential to wrap around; it is - up to the application to appropriately interpret the counter in such - cases. - - - - - thread.deallocatedp - (uint64_t *) - r- - [] - - Get a pointer to the the value that is returned by the - thread.deallocated - mallctl. This is useful for avoiding the overhead of repeated - mallctl* calls. - - - - - arenas.narenas - (unsigned) - r- - - Maximum number of arenas. - - - - - arenas.initialized - (bool *) - r- - - An array of arenas.narenas - booleans. Each boolean indicates whether the corresponding arena is - initialized. - - - - - arenas.quantum - (size_t) - r- - - Quantum size. - - - - - arenas.cacheline - (size_t) - r- - - Assumed cacheline size. - - - - - arenas.subpage - (size_t) - r- - - Subpage size class interval. - - - - - arenas.pagesize - (size_t) - r- - - Page size. - - - - - arenas.chunksize - (size_t) - r- - - Chunk size. - - - - - arenas.tspace_min - (size_t) - r- - - Minimum tiny size class. Tiny size classes are powers - of two. - - - - - arenas.tspace_max - (size_t) - r- - - Maximum tiny size class. Tiny size classes are powers - of two. - - - - - arenas.qspace_min - (size_t) - r- - - Minimum quantum-spaced size class. - - - - - arenas.qspace_max - (size_t) - r- - - Maximum quantum-spaced size class. - - - - - arenas.cspace_min - (size_t) - r- - - Minimum cacheline-spaced size class. - - - - - arenas.cspace_max - (size_t) - r- - - Maximum cacheline-spaced size class. - - - - - arenas.sspace_min - (size_t) - r- - - Minimum subpage-spaced size class. - - - - - arenas.sspace_max - (size_t) - r- - - Maximum subpage-spaced size class. - - - - - arenas.tcache_max - (size_t) - r- - [] - - Maximum thread-cached size class. - - - - - arenas.ntbins - (unsigned) - r- - - Number of tiny bin size classes. - - - - - arenas.nqbins - (unsigned) - r- - - Number of quantum-spaced bin size - classes. - - - - - arenas.ncbins - (unsigned) - r- - - Number of cacheline-spaced bin size - classes. - - - - - arenas.nsbins - (unsigned) - r- - - Number of subpage-spaced bin size - classes. - - - - - arenas.nbins - (unsigned) - r- - - Total number of bin size classes. - - - - - arenas.nhbins - (unsigned) - r- - [] - - Total number of thread cache bin size - classes. - - - - - arenas.bin.<i>.size - (size_t) - r- - - Maximum size supported by size class. - - - - - arenas.bin.<i>.nregs - (uint32_t) - r- - - Number of regions per page run. - - - - - arenas.bin.<i>.run_size - (size_t) - r- - - Number of bytes per page run. - - - - - arenas.nlruns - (size_t) - r- - - Total number of large size classes. - - - - - arenas.lrun.<i>.size - (size_t) - r- - - Maximum size supported by this large size - class. - - - - - arenas.purge - (unsigned) - -w - - Purge unused dirty pages for the specified arena, or - for all arenas if none is specified. - - - - - prof.active - (bool) - rw - [] - - Control whether sampling is currently active. See the - opt.prof_active - option for additional information. - - - - - - prof.dump - (const char *) - -w - [] - - Dump a memory profile to the specified file, or if NULL - is specified, to a file according to the pattern - <prefix>.<pid>.<seq>.m<mseq>.heap, - where <prefix> is controlled by the - opt.prof_prefix - option. - - - - - prof.interval - (uint64_t) - r- - [] - - Average number of bytes allocated between - inverval-based profile dumps. See the - opt.lg_prof_interval - option for additional information. - - - - - stats.cactive - (size_t *) - r- - [] - - Pointer to a counter that contains an approximate count - of the current number of bytes in active pages. The estimate may be - high, but never low, because each arena rounds up to the nearest - multiple of the chunk size when computing its contribution to the - counter. Note that the epoch mallctl has no bearing - on this counter. Furthermore, counter consistency is maintained via - atomic operations, so it is necessary to use an atomic operation in - order to guarantee a consistent read when dereferencing the pointer. - - - - - - stats.allocated - (size_t) - r- - [] - - Total number of bytes allocated by the - application. - - - - - stats.active - (size_t) - r- - [] - - Total number of bytes in active pages allocated by the - application. This is a multiple of the page size, and greater than or - equal to stats.allocated. - - - - - - stats.mapped - (size_t) - r- - [] - - Total number of bytes in chunks mapped on behalf of the - application. This is a multiple of the chunk size, and is at least as - large as stats.active. This - does not include inactive chunks backed by swap files. his does not - include inactive chunks embedded in the DSS. - - - - - stats.chunks.current - (size_t) - r- - [] - - Total number of chunks actively mapped on behalf of the - application. This does not include inactive chunks backed by swap - files. This does not include inactive chunks embedded in the DSS. - - - - - - stats.chunks.total - (uint64_t) - r- - [] - - Cumulative number of chunks allocated. - - - - - stats.chunks.high - (size_t) - r- - [] - - Maximum number of active chunks at any time thus far. - - - - - - stats.huge.allocated - (size_t) - r- - [] - - Number of bytes currently allocated by huge objects. - - - - - - stats.huge.nmalloc - (uint64_t) - r- - [] - - Cumulative number of huge allocation requests. - - - - - - stats.huge.ndalloc - (uint64_t) - r- - [] - - Cumulative number of huge deallocation requests. - - - - - - stats.arenas.<i>.nthreads - (unsigned) - r- - - Number of threads currently assigned to - arena. - - - - - stats.arenas.<i>.pactive - (size_t) - r- - - Number of pages in active runs. - - - - - stats.arenas.<i>.pdirty - (size_t) - r- - - Number of pages within unused runs that are potentially - dirty, and for which madvise... - MADV_DONTNEED or - similar has not been called. - - - - - stats.arenas.<i>.mapped - (size_t) - r- - [] - - Number of mapped bytes. - - - - - stats.arenas.<i>.npurge - (uint64_t) - r- - [] - - Number of dirty page purge sweeps performed. - - - - - - stats.arenas.<i>.nmadvise - (uint64_t) - r- - [] - - Number of madvise... - MADV_DONTNEED or - similar calls made to purge dirty pages. - - - - - stats.arenas.<i>.npurged - (uint64_t) - r- - [] - - Number of pages purged. - - - - - stats.arenas.<i>.small.allocated - (size_t) - r- - [] - - Number of bytes currently allocated by small objects. - - - - - - stats.arenas.<i>.small.nmalloc - (uint64_t) - r- - [] - - Cumulative number of allocation requests served by - small bins. - - - - - stats.arenas.<i>.small.ndalloc - (uint64_t) - r- - [] - - Cumulative number of small objects returned to bins. - - - - - - stats.arenas.<i>.small.nrequests - (uint64_t) - r- - [] - - Cumulative number of small allocation requests. - - - - - - stats.arenas.<i>.large.allocated - (size_t) - r- - [] - - Number of bytes currently allocated by large objects. - - - - - - stats.arenas.<i>.large.nmalloc - (uint64_t) - r- - [] - - Cumulative number of large allocation requests served - directly by the arena. - - - - - stats.arenas.<i>.large.ndalloc - (uint64_t) - r- - [] - - Cumulative number of large deallocation requests served - directly by the arena. - - - - - stats.arenas.<i>.large.nrequests - (uint64_t) - r- - [] - - Cumulative number of large allocation requests. - - - - - - stats.arenas.<i>.bins.<j>.allocated - (size_t) - r- - [] - - Current number of bytes allocated by - bin. - - - - - stats.arenas.<i>.bins.<j>.nmalloc - (uint64_t) - r- - [] - - Cumulative number of allocations served by bin. - - - - - - stats.arenas.<i>.bins.<j>.ndalloc - (uint64_t) - r- - [] - - Cumulative number of allocations returned to bin. - - - - - - stats.arenas.<i>.bins.<j>.nrequests - (uint64_t) - r- - [] - - Cumulative number of allocation - requests. - - - - - stats.arenas.<i>.bins.<j>.nfills - (uint64_t) - r- - [ ] - - Cumulative number of tcache fills. - - - - - stats.arenas.<i>.bins.<j>.nflushes - (uint64_t) - r- - [ ] - - Cumulative number of tcache flushes. - - - - - stats.arenas.<i>.bins.<j>.nruns - (uint64_t) - r- - [] - - Cumulative number of runs created. - - - - - stats.arenas.<i>.bins.<j>.nreruns - (uint64_t) - r- - [] - - Cumulative number of times the current run from which - to allocate changed. - - - - - stats.arenas.<i>.bins.<j>.highruns - (size_t) - r- - [] - - Maximum number of runs at any time thus far. - - - - - - stats.arenas.<i>.bins.<j>.curruns - (size_t) - r- - [] - - Current number of runs. - - - - - stats.arenas.<i>.lruns.<j>.nmalloc - (uint64_t) - r- - [] - - Cumulative number of allocation requests for this size - class served directly by the arena. - - - - - stats.arenas.<i>.lruns.<j>.ndalloc - (uint64_t) - r- - [] - - Cumulative number of deallocation requests for this - size class served directly by the arena. - - - - - stats.arenas.<i>.lruns.<j>.nrequests - (uint64_t) - r- - [] - - Cumulative number of allocation requests for this size - class. - - - - - stats.arenas.<i>.lruns.<j>.highruns - (size_t) - r- - [] - - Maximum number of runs at any time thus far for this - size class. - - - - - stats.arenas.<i>.lruns.<j>.curruns - (size_t) - r- - [] - - Current number of runs for this size class. - - - - - - swap.avail - (size_t) - r- - [] - - Number of swap file bytes that are currently not - associated with any chunk (i.e. mapped, but otherwise completely - unmanaged). - - - - - swap.prezeroed - (bool) - rw - [] - - If true, the allocator assumes that the swap file(s) - contain nothing but nil bytes. If this assumption is violated, - allocator behavior is undefined. This value becomes read-only after - swap.fds is - successfully written to. - - - - - swap.nfds - (size_t) - r- - [] - - Number of file descriptors in use for swap. - - - - - - swap.fds - (int *) - r- - [] - - When written to, the files associated with the - specified file descriptors are contiguously mapped via - mmap - 2. The resulting virtual memory - region is preferred over anonymous - mmap - 2 and - sbrk - 2 memory. Note that if a file's - size is not a multiple of the page size, it is automatically truncated - to the nearest page size multiple. See the - swap.prezeroed - mallctl for specifying that the files are pre-zeroed. - - - - - DEBUGGING MALLOC PROBLEMS - When debugging, it is a good idea to configure/build jemalloc with - the and - options, and recompile the program with suitable options and symbols for - debugger support. When so configured, jemalloc incorporates a wide variety - of run-time assertions that catch application errors such as double-free, - write-after-free, etc. - - Programs often accidentally depend on “uninitialized” - memory actually being filled with zero bytes. Junk filling - (see the opt.junk - option) tends to expose such bugs in the form of obviously incorrect - results and/or coredumps. Conversely, zero - filling (see the opt.zero option) eliminates - the symptoms of such bugs. Between these two options, it is usually - possible to quickly detect, diagnose, and eliminate such bugs. - - This implementation does not provide much detail about the problems - it detects, because the performance impact for storing such information - would be prohibitive. There are a number of allocator implementations - available on the Internet which focus on detecting and pinpointing problems - by trading performance for extra sanity checks and detailed - diagnostics. - - - DIAGNOSTIC MESSAGES - If any of the memory allocation/deallocation functions detect an - error or warning condition, a message will be printed to file descriptor - STDERR_FILENO. Errors will result in the process - dumping core. If the opt.abort option is set, most - warnings are treated as errors. - - The malloc_message variable allows the programmer - to override the function which emits the text strings forming the errors - and warnings if for some reason the STDERR_FILENO file - descriptor is not suitable for this. - malloc_message takes the - cbopaque pointer argument that is - NULL unless overridden by the arguments in a call to - malloc_stats_print, followed by a string - pointer. Please note that doing anything which tries to allocate memory in - this function is likely to result in a crash or deadlock. - - All messages are prefixed by - “<jemalloc>: ”. - - - RETURN VALUES - - Standard API - The malloc and - calloc functions return a pointer to the - allocated memory if successful; otherwise a NULL - pointer is returned and errno is set to - ENOMEM. - - The posix_memalign function - returns the value 0 if successful; otherwise it returns an error value. - The posix_memalign function will fail - if: - - - EINVAL - - The alignment parameter is - not a power of 2 at least as large as - sizeof(void *). - - - - ENOMEM - - Memory allocation error. - - - - - The realloc function returns a - pointer, possibly identical to ptr, to the - allocated memory if successful; otherwise a NULL - pointer is returned, and errno is set to - ENOMEM if the error was the result of an - allocation failure. The realloc - function always leaves the original buffer intact when an error occurs. - - - The free function returns no - value. - - - Non-standard API - The malloc_usable_size function - returns the usable size of the allocation pointed to by - ptr. - - The mallctl, - mallctlnametomib, and - mallctlbymib functions return 0 on - success; otherwise they return an error value. The functions will fail - if: - - - EINVAL - - newp is not - NULL, and newlen is too - large or too small. Alternatively, *oldlenp - is too large or too small; in this case as much data as possible - are read despite the error. - - - ENOMEM - - *oldlenp is too short to - hold the requested value. - - - ENOENT - - name or - mib specifies an unknown/invalid - value. - - - EPERM - - Attempt to read or write void value, or attempt to - write read-only value. - - - EAGAIN - - A memory allocation failure - occurred. - - - EFAULT - - An interface with side effects failed in some way - not directly related to mallctl* - read/write processing. - - - - - - Experimental API - The allocm, - rallocm, - sallocm, and - dallocm functions return - ALLOCM_SUCCESS on success; otherwise they return an - error value. The allocm and - rallocm functions will fail if: - - - ALLOCM_ERR_OOM - - Out of memory. Insufficient contiguous memory was - available to service the allocation request. The - allocm function additionally sets - *ptr to NULL, whereas - the rallocm function leaves - *ptr unmodified. - - - The rallocm function will also - fail if: - - - ALLOCM_ERR_NOT_MOVED - - ALLOCM_NO_MOVE was specified, - but the reallocation request could not be serviced without moving - the object. - - - - - - - ENVIRONMENT - The following environment variable affects the execution of the - allocation functions: - - - MALLOC_CONF - - If the environment variable - MALLOC_CONF is set, the characters it contains - will be interpreted as options. - - - - - - EXAMPLES - To dump core whenever a problem occurs: - ln -s 'abort:true' /etc/malloc.conf - - To specify in the source a chunk size that is 16 MiB: - - - - SEE ALSO - madvise - 2, - mmap - 2, - sbrk - 2, - alloca - 3, - atexit - 3, - getpagesize - 3 - - - STANDARDS - The malloc, - calloc, - realloc, and - free functions conform to ISO/IEC - 9899:1990 (“ISO C90”). - - The posix_memalign function conforms - to IEEE Std 1003.1-2001 (“POSIX.1”). - - diff --git a/jemalloc/doc/manpages.xsl.in b/jemalloc/doc/manpages.xsl.in deleted file mode 100644 index 88b2626..0000000 --- a/jemalloc/doc/manpages.xsl.in +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/jemalloc/doc/stylesheet.xsl b/jemalloc/doc/stylesheet.xsl deleted file mode 100644 index 4e334a8..0000000 --- a/jemalloc/doc/stylesheet.xsl +++ /dev/null @@ -1,7 +0,0 @@ - - ansi - - - "" - - diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h deleted file mode 100644 index b80c118..0000000 --- a/jemalloc/include/jemalloc/internal/arena.h +++ /dev/null @@ -1,743 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Subpages are an artificially designated partitioning of pages. Their only - * purpose is to support subpage-spaced size classes. - * - * There must be at least 4 subpages per page, due to the way size classes are - * handled. - */ -#define LG_SUBPAGE 8 -#define SUBPAGE ((size_t)(1U << LG_SUBPAGE)) -#define SUBPAGE_MASK (SUBPAGE - 1) - -/* Return the smallest subpage multiple that is >= s. */ -#define SUBPAGE_CEILING(s) \ - (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK) - -#ifdef JEMALLOC_TINY - /* Smallest size class to support. */ -# define LG_TINY_MIN LG_SIZEOF_PTR -# define TINY_MIN (1U << LG_TINY_MIN) -#endif - -/* - * Maximum size class that is a multiple of the quantum, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_QSPACE_MAX_DEFAULT 7 - -/* - * Maximum size class that is a multiple of the cacheline, but not (necessarily) - * a power of 2. Above this size, allocations are rounded up to the nearest - * power of 2. - */ -#define LG_CSPACE_MAX_DEFAULT 9 - -/* - * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized - * as small as possible such that this setting is still honored, without - * violating other constraints. The goal is to make runs as small as possible - * without exceeding a per run external fragmentation threshold. - * - * We use binary fixed point math for overhead computations, where the binary - * point is implicitly RUN_BFP bits to the left. - * - * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be - * honored for some/all object sizes, since when heap profiling is enabled - * there is one pointer of header overhead per object (plus a constant). This - * constraint is relaxed (ignored) for runs that are so small that the - * per-region overhead is greater than: - * - * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP)) - */ -#define RUN_BFP 12 -/* \/ Implicit binary fixed point. */ -#define RUN_MAX_OVRHD 0x0000003dU -#define RUN_MAX_OVRHD_RELAX 0x00001800U - -/* Maximum number of regions in one run. */ -#define LG_RUN_MAXREGS 11 -#define RUN_MAXREGS (1U << LG_RUN_MAXREGS) - -/* - * The minimum ratio of active:dirty pages per arena is computed as: - * - * (nactive >> opt_lg_dirty_mult) >= ndirty - * - * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32 - * times as many active pages as dirty pages. - */ -#define LG_DIRTY_MULT_DEFAULT 5 - -typedef struct arena_chunk_map_s arena_chunk_map_t; -typedef struct arena_chunk_s arena_chunk_t; -typedef struct arena_run_s arena_run_t; -typedef struct arena_bin_info_s arena_bin_info_t; -typedef struct arena_bin_s arena_bin_t; -typedef struct arena_s arena_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Each element of the chunk map corresponds to one page within the chunk. */ -struct arena_chunk_map_s { - union { - /* - * Linkage for run trees. There are two disjoint uses: - * - * 1) arena_t's runs_avail_{clean,dirty} trees. - * 2) arena_run_t conceptually uses this linkage for in-use - * non-full runs, rather than directly embedding linkage. - */ - rb_node(arena_chunk_map_t) rb_link; - /* - * List of runs currently in purgatory. arena_chunk_purge() - * temporarily allocates runs that contain dirty pages while - * purging, so that other threads cannot use the runs while the - * purging thread is operating without the arena lock held. - */ - ql_elm(arena_chunk_map_t) ql_link; - } u; - -#ifdef JEMALLOC_PROF - /* Profile counters, used for large object runs. */ - prof_ctx_t *prof_ctx; -#endif - - /* - * Run address (or size) and various flags are stored together. The bit - * layout looks like (assuming 32-bit system): - * - * ???????? ???????? ????---- ----dula - * - * ? : Unallocated: Run address for first/last pages, unset for internal - * pages. - * Small: Run page offset. - * Large: Run size for first page, unset for trailing pages. - * - : Unused. - * d : dirty? - * u : unzeroed? - * l : large? - * a : allocated? - * - * Following are example bit patterns for the three types of runs. - * - * p : run page offset - * s : run size - * c : (binind+1) for size class (used only if prof_promote is true) - * x : don't care - * - : 0 - * + : 1 - * [DULA] : bit set - * [dula] : bit unset - * - * Unallocated (clean): - * ssssssss ssssssss ssss---- ----du-a - * xxxxxxxx xxxxxxxx xxxx---- -----Uxx - * ssssssss ssssssss ssss---- ----dU-a - * - * Unallocated (dirty): - * ssssssss ssssssss ssss---- ----D--a - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * ssssssss ssssssss ssss---- ----D--a - * - * Small: - * pppppppp pppppppp pppp---- ----d--A - * pppppppp pppppppp pppp---- -------A - * pppppppp pppppppp pppp---- ----d--A - * - * Large: - * ssssssss ssssssss ssss---- ----D-LA - * xxxxxxxx xxxxxxxx xxxx---- ----xxxx - * -------- -------- -------- ----D-LA - * - * Large (sampled, size <= PAGE_SIZE): - * ssssssss ssssssss sssscccc ccccD-LA - * - * Large (not sampled, size == PAGE_SIZE): - * ssssssss ssssssss ssss---- ----D-LA - */ - size_t bits; -#ifdef JEMALLOC_PROF -#define CHUNK_MAP_CLASS_SHIFT 4 -#define CHUNK_MAP_CLASS_MASK ((size_t)0xff0U) -#endif -#define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU) -#define CHUNK_MAP_DIRTY ((size_t)0x8U) -#define CHUNK_MAP_UNZEROED ((size_t)0x4U) -#define CHUNK_MAP_LARGE ((size_t)0x2U) -#define CHUNK_MAP_ALLOCATED ((size_t)0x1U) -#define CHUNK_MAP_KEY CHUNK_MAP_ALLOCATED -}; -typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t; -typedef rb_tree(arena_chunk_map_t) arena_run_tree_t; - -/* Arena chunk header. */ -struct arena_chunk_s { - /* Arena that owns the chunk. */ - arena_t *arena; - - /* Linkage for the arena's chunks_dirty list. */ - ql_elm(arena_chunk_t) link_dirty; - - /* - * True if the chunk is currently in the chunks_dirty list, due to - * having at some point contained one or more dirty pages. Removal - * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible. - */ - bool dirtied; - - /* Number of dirty pages. */ - size_t ndirty; - - /* - * Map of pages within chunk that keeps track of free/large/small. The - * first map_bias entries are omitted, since the chunk header does not - * need to be tracked in the map. This omission saves a header page - * for common chunk sizes (e.g. 4 MiB). - */ - arena_chunk_map_t map[1]; /* Dynamically sized. */ -}; -typedef rb_tree(arena_chunk_t) arena_chunk_tree_t; - -struct arena_run_s { -#ifdef JEMALLOC_DEBUG - uint32_t magic; -# define ARENA_RUN_MAGIC 0x384adf93 -#endif - - /* Bin this run is associated with. */ - arena_bin_t *bin; - - /* Index of next region that has never been allocated, or nregs. */ - uint32_t nextind; - - /* Number of free regions in run. */ - unsigned nfree; -}; - -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - */ -struct arena_bin_info_s { - /* Size of regions in a run for this bin's size class. */ - size_t reg_size; - - /* Total size of a run for this bin's size class. */ - size_t run_size; - - /* Total number of regions in a run for this bin's size class. */ - uint32_t nregs; - - /* - * Offset of first bitmap_t element in a run header for this bin's size - * class. - */ - uint32_t bitmap_offset; - - /* - * Metadata used to manipulate bitmaps for runs associated with this - * bin. - */ - bitmap_info_t bitmap_info; - -#ifdef JEMALLOC_PROF - /* - * Offset of first (prof_ctx_t *) in a run header for this bin's size - * class, or 0 if (opt_prof == false). - */ - uint32_t ctx0_offset; -#endif - - /* Offset of first region in a run for this bin's size class. */ - uint32_t reg0_offset; -}; - -struct arena_bin_s { - /* - * All operations on runcur, runs, and stats require that lock be - * locked. Run allocation/deallocation are protected by the arena lock, - * which may be acquired while holding one or more bin locks, but not - * vise versa. - */ - malloc_mutex_t lock; - - /* - * Current run being used to service allocations of this bin's size - * class. - */ - arena_run_t *runcur; - - /* - * Tree of non-full runs. This tree is used when looking for an - * existing run when runcur is no longer usable. We choose the - * non-full run that is lowest in memory; this policy tends to keep - * objects packed well, and it can also help reduce the number of - * almost-empty chunks. - */ - arena_run_tree_t runs; - -#ifdef JEMALLOC_STATS - /* Bin statistics. */ - malloc_bin_stats_t stats; -#endif -}; - -struct arena_s { -#ifdef JEMALLOC_DEBUG - uint32_t magic; -# define ARENA_MAGIC 0x947d3d24 -#endif - - /* This arena's index within the arenas array. */ - unsigned ind; - - /* - * Number of threads currently assigned to this arena. This field is - * protected by arenas_lock. - */ - unsigned nthreads; - - /* - * There are three classes of arena operations from a locking - * perspective: - * 1) Thread asssignment (modifies nthreads) is protected by - * arenas_lock. - * 2) Bin-related operations are protected by bin locks. - * 3) Chunk- and run-related operations are protected by this mutex. - */ - malloc_mutex_t lock; - -#ifdef JEMALLOC_STATS - arena_stats_t stats; -# ifdef JEMALLOC_TCACHE - /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit. - */ - ql_head(tcache_t) tcache_ql; -# endif -#endif - -#ifdef JEMALLOC_PROF - uint64_t prof_accumbytes; -#endif - - /* List of dirty-page-containing chunks this arena manages. */ - ql_head(arena_chunk_t) chunks_dirty; - - /* - * In order to avoid rapid chunk allocation/deallocation when an arena - * oscillates right on the cusp of needing a new chunk, cache the most - * recently freed chunk. The spare is left in the arena's chunk trees - * until it is deleted. - * - * There is one spare chunk per arena, rather than one spare total, in - * order to avoid interactions between multiple threads that could make - * a single spare inadequate. - */ - arena_chunk_t *spare; - - /* Number of pages in active runs. */ - size_t nactive; - - /* - * Current count of pages within unused runs that are potentially - * dirty, and for which madvise(... MADV_DONTNEED) has not been called. - * By tracking this, we can institute a limit on how much dirty unused - * memory is mapped for each arena. - */ - size_t ndirty; - - /* - * Approximate number of pages being purged. It is possible for - * multiple threads to purge dirty pages concurrently, and they use - * npurgatory to indicate the total number of pages all threads are - * attempting to purge. - */ - size_t npurgatory; - - /* - * Size/address-ordered trees of this arena's available runs. The trees - * are used for first-best-fit run allocation. The dirty tree contains - * runs with dirty pages (i.e. very likely to have been touched and - * therefore have associated physical pages), whereas the clean tree - * contains runs with pages that either have no associated physical - * pages, or have pages that the kernel may recycle at any time due to - * previous madvise(2) calls. The dirty tree is used in preference to - * the clean tree for allocations, because using dirty pages reduces - * the amount of dirty purging necessary to keep the active:dirty page - * ratio below the purge threshold. - */ - arena_avail_tree_t runs_avail_clean; - arena_avail_tree_t runs_avail_dirty; - - /* - * bins is used to store trees of free regions of the following sizes, - * assuming a 64-bit system with 16-byte quantum, 4 KiB page size, and - * default MALLOC_CONF. - * - * bins[i] | size | - * --------+--------+ - * 0 | 8 | - * --------+--------+ - * 1 | 16 | - * 2 | 32 | - * 3 | 48 | - * : : - * 6 | 96 | - * 7 | 112 | - * 8 | 128 | - * --------+--------+ - * 9 | 192 | - * 10 | 256 | - * 11 | 320 | - * 12 | 384 | - * 13 | 448 | - * 14 | 512 | - * --------+--------+ - * 15 | 768 | - * 16 | 1024 | - * 17 | 1280 | - * : : - * 25 | 3328 | - * 26 | 3584 | - * 27 | 3840 | - * --------+--------+ - */ - arena_bin_t bins[1]; /* Dynamically sized. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern size_t opt_lg_qspace_max; -extern size_t opt_lg_cspace_max; -extern ssize_t opt_lg_dirty_mult; -/* - * small_size2bin is a compact lookup table that rounds request sizes up to - * size classes. In order to reduce cache footprint, the table is compressed, - * and all accesses are via the SMALL_SIZE2BIN macro. - */ -extern uint8_t const *small_size2bin; -#define SMALL_SIZE2BIN(s) (small_size2bin[(s-1) >> LG_TINY_MIN]) - -extern arena_bin_info_t *arena_bin_info; - -/* Various bin-related settings. */ -#ifdef JEMALLOC_TINY /* Number of (2^n)-spaced tiny bins. */ -# define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN)) -#else -# define ntbins 0 -#endif -extern unsigned nqbins; /* Number of quantum-spaced bins. */ -extern unsigned ncbins; /* Number of cacheline-spaced bins. */ -extern unsigned nsbins; /* Number of subpage-spaced bins. */ -extern unsigned nbins; -#ifdef JEMALLOC_TINY -# define tspace_max ((size_t)(QUANTUM >> 1)) -#endif -#define qspace_min QUANTUM -extern size_t qspace_max; -extern size_t cspace_min; -extern size_t cspace_max; -extern size_t sspace_min; -extern size_t sspace_max; -#define small_maxclass sspace_max - -#define nlclasses (chunk_npages - map_bias) - -void arena_purge_all(arena_t *arena); -#ifdef JEMALLOC_PROF -void arena_prof_accum(arena_t *arena, uint64_t accumbytes); -#endif -#ifdef JEMALLOC_TCACHE -void arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, - size_t binind -# ifdef JEMALLOC_PROF - , uint64_t prof_accumbytes -# endif - ); -#endif -void *arena_malloc_small(arena_t *arena, size_t size, bool zero); -void *arena_malloc_large(arena_t *arena, size_t size, bool zero); -void *arena_malloc(size_t size, bool zero); -void *arena_palloc(arena_t *arena, size_t size, size_t alloc_size, - size_t alignment, bool zero); -size_t arena_salloc(const void *ptr); -#ifdef JEMALLOC_PROF -void arena_prof_promoted(const void *ptr, size_t size); -size_t arena_salloc_demote(const void *ptr); -#endif -void arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm); -void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr); -#ifdef JEMALLOC_STATS -void arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats); -#endif -void *arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); -void *arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); -bool arena_new(arena_t *arena, unsigned ind); -bool arena_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -size_t arena_bin_index(arena_t *arena, arena_bin_t *bin); -unsigned arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, - const void *ptr); -# ifdef JEMALLOC_PROF -prof_ctx_t *arena_prof_ctx_get(const void *ptr); -void arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -# endif -void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) -JEMALLOC_INLINE size_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) -{ - size_t binind = bin - arena->bins; - assert(binind < nbins); - return (binind); -} - -JEMALLOC_INLINE unsigned -arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr) -{ - unsigned shift, diff, regind; - size_t size; - - dassert(run->magic == ARENA_RUN_MAGIC); - /* - * Freeing a pointer lower than region zero can cause assertion - * failure. - */ - assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); - - /* - * Avoid doing division with a variable divisor if possible. Using - * actual division here can reduce allocator throughput by over 20%! - */ - diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - - bin_info->reg0_offset); - - /* Rescale (factor powers of 2 out of the numerator and denominator). */ - size = bin_info->reg_size; - shift = ffs(size) - 1; - diff >>= shift; - size >>= shift; - - if (size == 1) { - /* The divisor was a power of 2. */ - regind = diff; - } else { - /* - * To divide by a number D that is not a power of two we - * multiply by (2^21 / D) and then right shift by 21 positions. - * - * X / D - * - * becomes - * - * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT - * - * We can omit the first three elements, because we never - * divide by 0, and 1 and 2 are both powers of two, which are - * handled above. - */ -#define SIZE_INV_SHIFT ((sizeof(unsigned) << 3) - LG_RUN_MAXREGS) -#define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1) - static const unsigned size_invs[] = { - SIZE_INV(3), - SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7), - SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11), - SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15), - SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19), - SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23), - SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27), - SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31) - }; - - if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2)) - regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT; - else - regind = diff / size; -#undef SIZE_INV -#undef SIZE_INV_SHIFT - } - assert(diff == regind * size); - assert(regind < bin_info->nregs); - - return (regind); -} - -#ifdef JEMALLOC_PROF -JEMALLOC_INLINE prof_ctx_t * -arena_prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote) - ret = (prof_ctx_t *)(uintptr_t)1U; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind; - - dassert(run->magic == ARENA_RUN_MAGIC); - regind = arena_run_regind(run, bin_info, ptr); - ret = *(prof_ctx_t **)((uintptr_t)run + - bin_info->ctx0_offset + (regind * - sizeof(prof_ctx_t *))); - } - } else - ret = chunk->map[pageind-map_bias].prof_ctx; - - return (ret); -} - -JEMALLOC_INLINE void -arena_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - if (prof_promote == false) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - size_t binind; - arena_bin_info_t *bin_info; - unsigned regind; - - dassert(run->magic == ARENA_RUN_MAGIC); - binind = arena_bin_index(chunk->arena, bin); - bin_info = &arena_bin_info[binind]; - regind = arena_run_regind(run, bin_info, ptr); - - *((prof_ctx_t **)((uintptr_t)run + bin_info->ctx0_offset - + (regind * sizeof(prof_ctx_t *)))) = ctx; - } else - assert((uintptr_t)ctx == (uintptr_t)1U); - } else - chunk->map[pageind-map_bias].prof_ctx = ctx; -} -#endif - -JEMALLOC_INLINE void -arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr) -{ - size_t pageind; - arena_chunk_map_t *mapelm; - - assert(arena != NULL); - dassert(arena->magic == ARENA_MAGIC); - assert(chunk->arena == arena); - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapelm = &chunk->map[pageind-map_bias]; - assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) { - /* Small allocation. */ -#ifdef JEMALLOC_TCACHE - tcache_t *tcache; - - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_small(tcache, ptr); - else { -#endif - arena_run_t *run; - arena_bin_t *bin; - - run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> - PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - bin = run->bin; -#ifdef JEMALLOC_DEBUG - { - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % - bin_info->reg_size == 0); - } -#endif - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, ptr, mapelm); - malloc_mutex_unlock(&bin->lock); -#ifdef JEMALLOC_TCACHE - } -#endif - } else { -#ifdef JEMALLOC_TCACHE - size_t size = mapelm->bits & ~PAGE_MASK; - - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - if (size <= tcache_maxclass) { - tcache_t *tcache; - - if ((tcache = tcache_get()) != NULL) - tcache_dalloc_large(tcache, ptr, size); - else { - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } - } else { - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); - } -#else - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, ptr); - malloc_mutex_unlock(&arena->lock); -#endif - } -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/atomic.h b/jemalloc/include/jemalloc/internal/atomic.h deleted file mode 100644 index 9a29862..0000000 --- a/jemalloc/include/jemalloc/internal/atomic.h +++ /dev/null @@ -1,169 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#define atomic_read_uint64(p) atomic_add_uint64(p, 0) -#define atomic_read_uint32(p) atomic_add_uint32(p, 0) - -#if (LG_SIZEOF_PTR == 3) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint64((uint64_t *)p, (uint64_t)x) -#elif (LG_SIZEOF_PTR == 2) -# define atomic_read_z(p) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)0) -# define atomic_add_z(p, x) \ - (size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x) -# define atomic_sub_z(p, x) \ - (size_t)atomic_sub_uint32((uint32_t *)p, (uint32_t)x) -#endif - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint64_t atomic_add_uint64(uint64_t *p, uint64_t x); -uint64_t atomic_sub_uint64(uint64_t *p, uint64_t x); -uint32_t atomic_add_uint32(uint32_t *p, uint32_t x); -uint32_t atomic_sub_uint32(uint32_t *p, uint32_t x); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_)) -/******************************************************************************/ -/* 64-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64((int64_t)x, (int64_t *)p)); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p)); -} -#elif (defined(__amd64_) || defined(__x86_64__)) -JEMALLOC_INLINE uint64_t -atomic_add_uint64(uint64_t *p, uint64_t x) -{ - - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} - -JEMALLOC_INLINE uint64_t -atomic_sub_uint64(uint64_t *p, uint64_t x) -{ - - x = (uint64_t)(-(int64_t)x); - asm volatile ( - "lock; xaddq %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} -#else -# if (LG_SIZEOF_PTR == 3) -# error "Missing implementation for 64-bit atomic operations" -# endif -#endif - -/******************************************************************************/ -/* 32-bit operations. */ -#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_add_and_fetch(p, x)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (__sync_sub_and_fetch(p, x)); -} -#elif (defined(JEMALLOC_OSATOMIC)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32((int32_t)x, (int32_t *)p)); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p)); -} -#elif (defined(__i386__) || defined(__amd64_) || defined(__x86_64__)) -JEMALLOC_INLINE uint32_t -atomic_add_uint32(uint32_t *p, uint32_t x) -{ - - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} - -JEMALLOC_INLINE uint32_t -atomic_sub_uint32(uint32_t *p, uint32_t x) -{ - - x = (uint32_t)(-(int32_t)x); - asm volatile ( - "lock; xaddl %0, %1;" - : "+r" (x), "=m" (*p) /* Outputs. */ - : "m" (*p) /* Inputs. */ - ); - - return (x); -} -#else -# error "Missing implementation for 32-bit atomic operations" -#endif -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/base.h b/jemalloc/include/jemalloc/internal/base.h deleted file mode 100644 index e353f30..0000000 --- a/jemalloc/include/jemalloc/internal/base.h +++ /dev/null @@ -1,24 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern malloc_mutex_t base_mtx; - -void *base_alloc(size_t size); -extent_node_t *base_node_alloc(void); -void base_node_dealloc(extent_node_t *node); -bool base_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/bitmap.h b/jemalloc/include/jemalloc/internal/bitmap.h deleted file mode 100644 index 605ebac..0000000 --- a/jemalloc/include/jemalloc/internal/bitmap.h +++ /dev/null @@ -1,184 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */ -#define LG_BITMAP_MAXBITS LG_RUN_MAXREGS - -typedef struct bitmap_level_s bitmap_level_t; -typedef struct bitmap_info_s bitmap_info_t; -typedef unsigned long bitmap_t; -#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG - -/* Number of bits per group. */ -#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3) -#define BITMAP_GROUP_NBITS (ZU(1) << LG_BITMAP_GROUP_NBITS) -#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS-1) - -/* Maximum number of levels possible. */ -#define BITMAP_MAX_LEVELS \ - (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \ - + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct bitmap_level_s { - /* Offset of this level's groups within the array of groups. */ - size_t group_offset; -}; - -struct bitmap_info_s { - /* Logical number of bits in bitmap (stored at bottom level). */ - size_t nbits; - - /* Number of levels necessary for nbits. */ - unsigned nlevels; - - /* - * Only the first (nlevels+1) elements are used, and levels are ordered - * bottom to top (e.g. the bottom level is stored in levels[0]). - */ - bitmap_level_t levels[BITMAP_MAX_LEVELS+1]; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void bitmap_info_init(bitmap_info_t *binfo, size_t nbits); -size_t bitmap_info_ngroups(const bitmap_info_t *binfo); -size_t bitmap_size(size_t nbits); -void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo); -bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo); -void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_)) -JEMALLOC_INLINE bool -bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1; - bitmap_t rg = bitmap[rgoff]; - /* The bitmap is full iff the root group is 0. */ - return (rg == 0); -} - -JEMALLOC_INLINE bool -bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t g; - - assert(bit < binfo->nbits); - goff = bit >> LG_BITMAP_GROUP_NBITS; - g = bitmap[goff]; - return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))); -} - -JEMALLOC_INLINE void -bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - - assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit) == false); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(bitmap_get(bitmap, binfo, bit)); - /* Propagate group state transitions up the tree. */ - if (g == 0) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (g != 0) - break; - } - } -} - -/* sfu: set first unset. */ -JEMALLOC_INLINE size_t -bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t bit; - bitmap_t g; - unsigned i; - - assert(bitmap_full(bitmap, binfo) == false); - - i = binfo->nlevels - 1; - g = bitmap[binfo->levels[i].group_offset]; - bit = ffsl(g) - 1; - while (i > 0) { - i--; - g = bitmap[binfo->levels[i].group_offset + bit]; - bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffsl(g) - 1); - } - - bitmap_set(bitmap, binfo, bit); - return (bit); -} - -JEMALLOC_INLINE void -bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) -{ - size_t goff; - bitmap_t *gp; - bitmap_t g; - bool propagate; - - assert(bit < binfo->nbits); - assert(bitmap_get(bitmap, binfo, bit)); - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[goff]; - g = *gp; - propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - assert(bitmap_get(bitmap, binfo, bit) == false); - /* Propagate group state transitions up the tree. */ - if (propagate) { - unsigned i; - for (i = 1; i < binfo->nlevels; i++) { - bit = goff; - goff = bit >> LG_BITMAP_GROUP_NBITS; - gp = &bitmap[binfo->levels[i].group_offset + goff]; - g = *gp; - propagate = (g == 0); - assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) - == 0); - g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK); - *gp = g; - if (propagate == false) - break; - } - } -} - -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/chunk.h b/jemalloc/include/jemalloc/internal/chunk.h deleted file mode 100644 index a60f0ad..0000000 --- a/jemalloc/include/jemalloc/internal/chunk.h +++ /dev/null @@ -1,65 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Size and alignment of memory chunks that are allocated by the OS's virtual - * memory system. - */ -#define LG_CHUNK_DEFAULT 22 - -/* Return the chunk address for allocation address a. */ -#define CHUNK_ADDR2BASE(a) \ - ((void *)((uintptr_t)(a) & ~chunksize_mask)) - -/* Return the chunk offset of address a. */ -#define CHUNK_ADDR2OFFSET(a) \ - ((size_t)((uintptr_t)(a) & chunksize_mask)) - -/* Return the smallest chunk multiple that is >= s. */ -#define CHUNK_CEILING(s) \ - (((s) + chunksize_mask) & ~chunksize_mask) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern size_t opt_lg_chunk; -#ifdef JEMALLOC_SWAP -extern bool opt_overcommit; -#endif - -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) -/* Protects stats_chunks; currently not used for any other purpose. */ -extern malloc_mutex_t chunks_mtx; -/* Chunk statistics. */ -extern chunk_stats_t stats_chunks; -#endif - -#ifdef JEMALLOC_IVSALLOC -extern rtree_t *chunks_rtree; -#endif - -extern size_t chunksize; -extern size_t chunksize_mask; /* (chunksize - 1). */ -extern size_t chunk_npages; -extern size_t map_bias; /* Number of arena chunk header pages. */ -extern size_t arena_maxclass; /* Max size class for arenas. */ - -void *chunk_alloc(size_t size, bool base, bool *zero); -void chunk_dealloc(void *chunk, size_t size); -bool chunk_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - -#include "jemalloc/internal/chunk_swap.h" -#include "jemalloc/internal/chunk_dss.h" -#include "jemalloc/internal/chunk_mmap.h" diff --git a/jemalloc/include/jemalloc/internal/chunk_dss.h b/jemalloc/include/jemalloc/internal/chunk_dss.h deleted file mode 100644 index 6f00522..0000000 --- a/jemalloc/include/jemalloc/internal/chunk_dss.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifdef JEMALLOC_DSS -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -/* - * Protects sbrk() calls. This avoids malloc races among threads, though it - * does not protect against races with threads that call sbrk() directly. - */ -extern malloc_mutex_t dss_mtx; - -void *chunk_alloc_dss(size_t size, bool *zero); -bool chunk_in_dss(void *chunk); -bool chunk_dealloc_dss(void *chunk, size_t size); -bool chunk_dss_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ -#endif /* JEMALLOC_DSS */ diff --git a/jemalloc/include/jemalloc/internal/chunk_mmap.h b/jemalloc/include/jemalloc/internal/chunk_mmap.h deleted file mode 100644 index 07b50a4..0000000 --- a/jemalloc/include/jemalloc/internal/chunk_mmap.h +++ /dev/null @@ -1,23 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -void *chunk_alloc_mmap(size_t size); -void *chunk_alloc_mmap_noreserve(size_t size); -void chunk_dealloc_mmap(void *chunk, size_t size); - -bool chunk_mmap_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/chunk_swap.h b/jemalloc/include/jemalloc/internal/chunk_swap.h deleted file mode 100644 index 9faa739..0000000 --- a/jemalloc/include/jemalloc/internal/chunk_swap.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifdef JEMALLOC_SWAP -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern malloc_mutex_t swap_mtx; -extern bool swap_enabled; -extern bool swap_prezeroed; -extern size_t swap_nfds; -extern int *swap_fds; -#ifdef JEMALLOC_STATS -extern size_t swap_avail; -#endif - -void *chunk_alloc_swap(size_t size, bool *zero); -bool chunk_in_swap(void *chunk); -bool chunk_dealloc_swap(void *chunk, size_t size); -bool chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed); -bool chunk_swap_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ -#endif /* JEMALLOC_SWAP */ diff --git a/jemalloc/include/jemalloc/internal/ckh.h b/jemalloc/include/jemalloc/internal/ckh.h deleted file mode 100644 index 3e4ad4c..0000000 --- a/jemalloc/include/jemalloc/internal/ckh.h +++ /dev/null @@ -1,95 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ckh_s ckh_t; -typedef struct ckhc_s ckhc_t; - -/* Typedefs to allow easy function pointer passing. */ -typedef void ckh_hash_t (const void *, unsigned, size_t *, size_t *); -typedef bool ckh_keycomp_t (const void *, const void *); - -/* Maintain counters used to get an idea of performance. */ -/* #define CKH_COUNT */ -/* Print counter values in ckh_delete() (requires CKH_COUNT). */ -/* #define CKH_VERBOSE */ - -/* - * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket. Try to fit - * one bucket per L1 cache line. - */ -#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Hash table cell. */ -struct ckhc_s { - const void *key; - const void *data; -}; - -struct ckh_s { -#ifdef JEMALLOC_DEBUG -#define CKH_MAGIC 0x3af2489d - uint32_t magic; -#endif - -#ifdef CKH_COUNT - /* Counters used to get an idea of performance. */ - uint64_t ngrows; - uint64_t nshrinks; - uint64_t nshrinkfails; - uint64_t ninserts; - uint64_t nrelocs; -#endif - - /* Used for pseudo-random number generation. */ -#define CKH_A 1103515241 -#define CKH_C 12347 - uint32_t prn_state; - - /* Total number of items. */ - size_t count; - - /* - * Minimum and current number of hash table buckets. There are - * 2^LG_CKH_BUCKET_CELLS cells per bucket. - */ - unsigned lg_minbuckets; - unsigned lg_curbuckets; - - /* Hash and comparison functions. */ - ckh_hash_t *hash; - ckh_keycomp_t *keycomp; - - /* Hash table with 2^lg_curbuckets buckets. */ - ckhc_t *tab; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -bool ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, - ckh_keycomp_t *keycomp); -void ckh_delete(ckh_t *ckh); -size_t ckh_count(ckh_t *ckh); -bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data); -bool ckh_insert(ckh_t *ckh, const void *key, const void *data); -bool ckh_remove(ckh_t *ckh, const void *searchkey, void **key, - void **data); -bool ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data); -void ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); -bool ckh_string_keycomp(const void *k1, const void *k2); -void ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); -bool ckh_pointer_keycomp(const void *k1, const void *k2); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/ctl.h b/jemalloc/include/jemalloc/internal/ctl.h deleted file mode 100644 index f1f5eb7..0000000 --- a/jemalloc/include/jemalloc/internal/ctl.h +++ /dev/null @@ -1,118 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct ctl_node_s ctl_node_t; -typedef struct ctl_arena_stats_s ctl_arena_stats_t; -typedef struct ctl_stats_s ctl_stats_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct ctl_node_s { - bool named; - union { - struct { - const char *name; - /* If (nchildren == 0), this is a terminal node. */ - unsigned nchildren; - const ctl_node_t *children; - } named; - struct { - const ctl_node_t *(*index)(const size_t *, size_t, - size_t); - } indexed; - } u; - int (*ctl)(const size_t *, size_t, void *, size_t *, void *, - size_t); -}; - -struct ctl_arena_stats_s { - bool initialized; - unsigned nthreads; - size_t pactive; - size_t pdirty; -#ifdef JEMALLOC_STATS - arena_stats_t astats; - - /* Aggregate stats for small size classes, based on bin stats. */ - size_t allocated_small; - uint64_t nmalloc_small; - uint64_t ndalloc_small; - uint64_t nrequests_small; - - malloc_bin_stats_t *bstats; /* nbins elements. */ - malloc_large_stats_t *lstats; /* nlclasses elements. */ -#endif -}; - -struct ctl_stats_s { -#ifdef JEMALLOC_STATS - size_t allocated; - size_t active; - size_t mapped; - struct { - size_t current; /* stats_chunks.curchunks */ - uint64_t total; /* stats_chunks.nchunks */ - size_t high; /* stats_chunks.highchunks */ - } chunks; - struct { - size_t allocated; /* huge_allocated */ - uint64_t nmalloc; /* huge_nmalloc */ - uint64_t ndalloc; /* huge_ndalloc */ - } huge; -#endif - ctl_arena_stats_t *arenas; /* (narenas + 1) elements. */ -#ifdef JEMALLOC_SWAP - size_t swap_avail; -#endif -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -int ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen); -int ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp); - -int ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -bool ctl_boot(void); - -#define xmallctl(name, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctl)(name, oldp, oldlenp, newp, newlen) \ - != 0) { \ - malloc_write(": Failure in xmallctl(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ - abort(); \ - } \ -} while (0) - -#define xmallctlnametomib(name, mibp, miblenp) do { \ - if (JEMALLOC_P(mallctlnametomib)(name, mibp, miblenp) != 0) { \ - malloc_write( \ - ": Failure in xmallctlnametomib(\""); \ - malloc_write(name); \ - malloc_write("\", ...)\n"); \ - abort(); \ - } \ -} while (0) - -#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do { \ - if (JEMALLOC_P(mallctlbymib)(mib, miblen, oldp, oldlenp, newp, \ - newlen) != 0) { \ - malloc_write( \ - ": Failure in xmallctlbymib()\n"); \ - abort(); \ - } \ -} while (0) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/jemalloc/include/jemalloc/internal/extent.h b/jemalloc/include/jemalloc/internal/extent.h deleted file mode 100644 index 6fe9702..0000000 --- a/jemalloc/include/jemalloc/internal/extent.h +++ /dev/null @@ -1,49 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct extent_node_s extent_node_t; - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* Tree of extents. */ -struct extent_node_s { -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) - /* Linkage for the size/address-ordered tree. */ - rb_node(extent_node_t) link_szad; -#endif - - /* Linkage for the address-ordered tree. */ - rb_node(extent_node_t) link_ad; - -#ifdef JEMALLOC_PROF - /* Profile counters, used for huge objects. */ - prof_ctx_t *prof_ctx; -#endif - - /* Pointer to the extent that this tree node is responsible for. */ - void *addr; - - /* Total region size. */ - size_t size; -}; -typedef rb_tree(extent_node_t) extent_tree_t; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) -rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t) -#endif - -rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t) - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ - diff --git a/jemalloc/include/jemalloc/internal/hash.h b/jemalloc/include/jemalloc/internal/hash.h deleted file mode 100644 index 93905bf..0000000 --- a/jemalloc/include/jemalloc/internal/hash.h +++ /dev/null @@ -1,70 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -uint64_t hash(const void *key, size_t len, uint64_t seed); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_)) -/* - * The following hash function is based on MurmurHash64A(), placed into the - * public domain by Austin Appleby. See http://murmurhash.googlepages.com/ for - * details. - */ -JEMALLOC_INLINE uint64_t -hash(const void *key, size_t len, uint64_t seed) -{ - const uint64_t m = 0xc6a4a7935bd1e995; - const int r = 47; - uint64_t h = seed ^ (len * m); - const uint64_t *data = (const uint64_t *)key; - const uint64_t *end = data + (len/8); - const unsigned char *data2; - - assert(((uintptr_t)key & 0x7) == 0); - - while(data != end) { - uint64_t k = *data++; - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - } - - data2 = (const unsigned char *)data; - switch(len & 7) { - case 7: h ^= ((uint64_t)(data2[6])) << 48; - case 6: h ^= ((uint64_t)(data2[5])) << 40; - case 5: h ^= ((uint64_t)(data2[4])) << 32; - case 4: h ^= ((uint64_t)(data2[3])) << 24; - case 3: h ^= ((uint64_t)(data2[2])) << 16; - case 2: h ^= ((uint64_t)(data2[1])) << 8; - case 1: h ^= ((uint64_t)(data2[0])); - h *= m; - } - - h ^= h >> r; - h *= m; - h ^= h >> r; - - return (h); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/huge.h b/jemalloc/include/jemalloc/internal/huge.h deleted file mode 100644 index 66544cf..0000000 --- a/jemalloc/include/jemalloc/internal/huge.h +++ /dev/null @@ -1,41 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#ifdef JEMALLOC_STATS -/* Huge allocation statistics. */ -extern uint64_t huge_nmalloc; -extern uint64_t huge_ndalloc; -extern size_t huge_allocated; -#endif - -/* Protects chunk-related data structures. */ -extern malloc_mutex_t huge_mtx; - -void *huge_malloc(size_t size, bool zero); -void *huge_palloc(size_t size, size_t alignment, bool zero); -void *huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, - size_t extra); -void *huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero); -void huge_dalloc(void *ptr, bool unmap); -size_t huge_salloc(const void *ptr); -#ifdef JEMALLOC_PROF -prof_ctx_t *huge_prof_ctx_get(const void *ptr); -void huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -#endif -bool huge_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in b/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in deleted file mode 100644 index 254adb6..0000000 --- a/jemalloc/include/jemalloc/internal/jemalloc_internal.h.in +++ /dev/null @@ -1,786 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#ifndef SIZE_T_MAX -# define SIZE_T_MAX SIZE_MAX -#endif -#include -#include -#include -#include -#include -#include -#include -#include -#ifndef offsetof -# define offsetof(type, member) ((size_t)&(((type *)NULL)->member)) -#endif -#include -#include -#include -#include -#include -#include -#include -#include - -#define JEMALLOC_MANGLE -#include "../jemalloc@install_suffix@.h" - -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) -#include -#endif - -#ifdef JEMALLOC_ZONE -#include -#include -#include -#include -#endif - -#ifdef JEMALLOC_LAZY_LOCK -#include -#endif - -#define RB_COMPACT -#include "jemalloc/internal/rb.h" -#include "jemalloc/internal/qr.h" -#include "jemalloc/internal/ql.h" - -extern void (*JEMALLOC_P(malloc_message))(void *wcbopaque, const char *s); - -/* - * Define a custom assert() in order to reduce the chances of deadlock during - * assertion failure. - */ -#ifndef assert -# ifdef JEMALLOC_DEBUG -# define assert(e) do { \ - if (!(e)) { \ - char line_buf[UMAX2S_BUFSIZE]; \ - malloc_write(": "); \ - malloc_write(__FILE__); \ - malloc_write(":"); \ - malloc_write(u2s(__LINE__, 10, line_buf)); \ - malloc_write(": Failed assertion: "); \ - malloc_write("\""); \ - malloc_write(#e); \ - malloc_write("\"\n"); \ - abort(); \ - } \ -} while (0) -# else -# define assert(e) -# endif -#endif - -#ifdef JEMALLOC_DEBUG -# define dassert(e) assert(e) -#else -# define dassert(e) -#endif - -/* - * jemalloc can conceptually be broken into components (arena, tcache, etc.), - * but there are circular dependencies that cannot be broken without - * substantial performance degradation. In order to reduce the effect on - * visual code flow, read the header files in multiple passes, with one of the - * following cpp variables defined during each pass: - * - * JEMALLOC_H_TYPES : Preprocessor-defined constants and psuedo-opaque data - * types. - * JEMALLOC_H_STRUCTS : Data structures. - * JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes. - * JEMALLOC_H_INLINES : Inline functions. - */ -/******************************************************************************/ -#define JEMALLOC_H_TYPES - -#define ALLOCM_LG_ALIGN_MASK ((int)0x3f) - -#define ZU(z) ((size_t)z) - -#ifndef __DECONST -# define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var)) -#endif - -#ifdef JEMALLOC_DEBUG - /* Disable inlining to make debugging easier. */ -# define JEMALLOC_INLINE -# define inline -#else -# define JEMALLOC_ENABLE_INLINE -# define JEMALLOC_INLINE static inline -#endif - -/* Size of stack-allocated buffer passed to buferror(). */ -#define BUFERROR_BUF 64 - -/* Minimum alignment of allocations is 2^LG_QUANTUM bytes. */ -#ifdef __i386__ -# define LG_QUANTUM 4 -#endif -#ifdef __ia64__ -# define LG_QUANTUM 4 -#endif -#ifdef __alpha__ -# define LG_QUANTUM 4 -#endif -#ifdef __sparc64__ -# define LG_QUANTUM 4 -#endif -#if (defined(__amd64__) || defined(__x86_64__)) -# define LG_QUANTUM 4 -#endif -#ifdef __arm__ -# define LG_QUANTUM 3 -#endif -#ifdef __mips__ -# define LG_QUANTUM 3 -#endif -#ifdef __powerpc__ -# define LG_QUANTUM 4 -#endif -#ifdef __s390x__ -# define LG_QUANTUM 4 -#endif - -#define QUANTUM ((size_t)(1U << LG_QUANTUM)) -#define QUANTUM_MASK (QUANTUM - 1) - -/* Return the smallest quantum multiple that is >= a. */ -#define QUANTUM_CEILING(a) \ - (((a) + QUANTUM_MASK) & ~QUANTUM_MASK) - -#define LONG ((size_t)(1U << LG_SIZEOF_LONG)) -#define LONG_MASK (LONG - 1) - -/* Return the smallest long multiple that is >= a. */ -#define LONG_CEILING(a) \ - (((a) + LONG_MASK) & ~LONG_MASK) - -#define SIZEOF_PTR (1U << LG_SIZEOF_PTR) -#define PTR_MASK (SIZEOF_PTR - 1) - -/* Return the smallest (void *) multiple that is >= a. */ -#define PTR_CEILING(a) \ - (((a) + PTR_MASK) & ~PTR_MASK) - -/* - * Maximum size of L1 cache line. This is used to avoid cache line aliasing. - * In addition, this controls the spacing of cacheline-spaced size classes. - */ -#define LG_CACHELINE 6 -#define CACHELINE ((size_t)(1U << LG_CACHELINE)) -#define CACHELINE_MASK (CACHELINE - 1) - -/* Return the smallest cacheline multiple that is >= s. */ -#define CACHELINE_CEILING(s) \ - (((s) + CACHELINE_MASK) & ~CACHELINE_MASK) - -/* - * Page size. STATIC_PAGE_SHIFT is determined by the configure script. If - * DYNAMIC_PAGE_SHIFT is enabled, only use the STATIC_PAGE_* macros where - * compile-time values are required for the purposes of defining data - * structures. - */ -#define STATIC_PAGE_SIZE ((size_t)(1U << STATIC_PAGE_SHIFT)) -#define STATIC_PAGE_MASK ((size_t)(STATIC_PAGE_SIZE - 1)) - -#ifdef PAGE_SHIFT -# undef PAGE_SHIFT -#endif -#ifdef PAGE_SIZE -# undef PAGE_SIZE -#endif -#ifdef PAGE_MASK -# undef PAGE_MASK -#endif - -#ifdef DYNAMIC_PAGE_SHIFT -# define PAGE_SHIFT lg_pagesize -# define PAGE_SIZE pagesize -# define PAGE_MASK pagesize_mask -#else -# define PAGE_SHIFT STATIC_PAGE_SHIFT -# define PAGE_SIZE STATIC_PAGE_SIZE -# define PAGE_MASK STATIC_PAGE_MASK -#endif - -/* Return the smallest pagesize multiple that is >= s. */ -#define PAGE_CEILING(s) \ - (((s) + PAGE_MASK) & ~PAGE_MASK) - -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_TYPES -/******************************************************************************/ -#define JEMALLOC_H_STRUCTS - -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif -#include "jemalloc/internal/prof.h" - -#ifdef JEMALLOC_STATS -typedef struct { - uint64_t allocated; - uint64_t deallocated; -} thread_allocated_t; -#endif - -#undef JEMALLOC_H_STRUCTS -/******************************************************************************/ -#define JEMALLOC_H_EXTERNS - -extern bool opt_abort; -#ifdef JEMALLOC_FILL -extern bool opt_junk; -#endif -#ifdef JEMALLOC_SYSV -extern bool opt_sysv; -#endif -#ifdef JEMALLOC_XMALLOC -extern bool opt_xmalloc; -#endif -#ifdef JEMALLOC_FILL -extern bool opt_zero; -#endif -extern size_t opt_narenas; - -#ifdef DYNAMIC_PAGE_SHIFT -extern size_t pagesize; -extern size_t pagesize_mask; -extern size_t lg_pagesize; -#endif - -/* Number of CPUs. */ -extern unsigned ncpus; - -extern malloc_mutex_t arenas_lock; /* Protects arenas initialization. */ -extern pthread_key_t arenas_tsd; -#ifndef NO_TLS -/* - * Map of pthread_self() --> arenas[???], used for selecting an arena to use - * for allocations. - */ -extern __thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -# define ARENA_GET() arenas_tls -# define ARENA_SET(v) do { \ - arenas_tls = (v); \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#else -# define ARENA_GET() ((arena_t *)pthread_getspecific(arenas_tsd)) -# define ARENA_SET(v) do { \ - pthread_setspecific(arenas_tsd, (void *)(v)); \ -} while (0) -#endif - -/* - * Arenas that are used to service external requests. Not all elements of the - * arenas array are necessarily used; arenas are created lazily as needed. - */ -extern arena_t **arenas; -extern unsigned narenas; - -#ifdef JEMALLOC_STATS -# ifndef NO_TLS -extern __thread thread_allocated_t thread_allocated_tls; -# define ALLOCATED_GET() (thread_allocated_tls.allocated) -# define ALLOCATEDP_GET() (&thread_allocated_tls.allocated) -# define DEALLOCATED_GET() (thread_allocated_tls.deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_tls.deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_tls.allocated += a; \ - thread_allocated_tls.deallocated += d; \ -} while (0) -# else -extern pthread_key_t thread_allocated_tsd; -thread_allocated_t *thread_allocated_get_hard(void); - -# define ALLOCATED_GET() (thread_allocated_get()->allocated) -# define ALLOCATEDP_GET() (&thread_allocated_get()->allocated) -# define DEALLOCATED_GET() (thread_allocated_get()->deallocated) -# define DEALLOCATEDP_GET() (&thread_allocated_get()->deallocated) -# define ALLOCATED_ADD(a, d) do { \ - thread_allocated_t *thread_allocated = thread_allocated_get(); \ - thread_allocated->allocated += (a); \ - thread_allocated->deallocated += (d); \ -} while (0) -# endif -#endif - -arena_t *arenas_extend(unsigned ind); -arena_t *choose_arena_hard(void); -int buferror(int errnum, char *buf, size_t buflen); -void jemalloc_prefork(void); -void jemalloc_postfork(void); - -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_EXTERNS -/******************************************************************************/ -#define JEMALLOC_H_INLINES - -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/prn.h" -#include "jemalloc/internal/ckh.h" -#include "jemalloc/internal/stats.h" -#include "jemalloc/internal/ctl.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/mb.h" -#include "jemalloc/internal/extent.h" -#include "jemalloc/internal/base.h" -#include "jemalloc/internal/chunk.h" -#include "jemalloc/internal/huge.h" - -#ifndef JEMALLOC_ENABLE_INLINE -size_t pow2_ceil(size_t x); -size_t s2u(size_t size); -size_t sa2u(size_t size, size_t alignment, size_t *run_size_p); -void malloc_write(const char *s); -arena_t *choose_arena(void); -# if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -thread_allocated_t *thread_allocated_get(void); -# endif -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -/* Compute the smallest power of 2 that is >= x. */ -JEMALLOC_INLINE size_t -pow2_ceil(size_t x) -{ - - x--; - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; -#if (LG_SIZEOF_PTR == 3) - x |= x >> 32; -#endif - x++; - return (x); -} - -/* - * Compute usable size that would result from allocating an object with the - * specified size. - */ -JEMALLOC_INLINE size_t -s2u(size_t size) -{ - - if (size <= small_maxclass) - return (arena_bin_info[SMALL_SIZE2BIN(size)].reg_size); - if (size <= arena_maxclass) - return (PAGE_CEILING(size)); - return (CHUNK_CEILING(size)); -} - -/* - * Compute usable size that would result from allocating an object with the - * specified size and alignment. - */ -JEMALLOC_INLINE size_t -sa2u(size_t size, size_t alignment, size_t *run_size_p) -{ - size_t usize; - - /* - * Round size up to the nearest multiple of alignment. - * - * This done, we can take advantage of the fact that for each small - * size class, every object is aligned at the smallest power of two - * that is non-zero in the base two representation of the size. For - * example: - * - * Size | Base 2 | Minimum alignment - * -----+----------+------------------ - * 96 | 1100000 | 32 - * 144 | 10100000 | 32 - * 192 | 11000000 | 64 - * - * Depending on runtime settings, it is possible that arena_malloc() - * will further round up to a power of two, but that never causes - * correctness issues. - */ - usize = (size + (alignment - 1)) & (-alignment); - /* - * (usize < size) protects against the combination of maximal - * alignment and size greater than maximal alignment. - */ - if (usize < size) { - /* size_t overflow. */ - return (0); - } - - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) { - if (usize <= small_maxclass) - return (arena_bin_info[SMALL_SIZE2BIN(usize)].reg_size); - return (PAGE_CEILING(usize)); - } else { - size_t run_size; - - /* - * We can't achieve subpage alignment, so round up alignment - * permanently; it makes later calculations simpler. - */ - alignment = PAGE_CEILING(alignment); - usize = PAGE_CEILING(size); - /* - * (usize < size) protects against very large sizes within - * PAGE_SIZE of SIZE_T_MAX. - * - * (usize + alignment < usize) protects against the - * combination of maximal alignment and usize large enough - * to cause overflow. This is similar to the first overflow - * check above, but it needs to be repeated due to the new - * usize value, which may now be *equal* to maximal - * alignment, whereas before we only detected overflow if the - * original size was *greater* than maximal alignment. - */ - if (usize < size || usize + alignment < usize) { - /* size_t overflow. */ - return (0); - } - - /* - * Calculate the size of the over-size run that arena_palloc() - * would need to allocate in order to guarantee the alignment. - */ - if (usize >= alignment) - run_size = usize + alignment - PAGE_SIZE; - else { - /* - * It is possible that (alignment << 1) will cause - * overflow, but it doesn't matter because we also - * subtract PAGE_SIZE, which in the case of overflow - * leaves us with a very large run_size. That causes - * the first conditional below to fail, which means - * that the bogus run_size value never gets used for - * anything important. - */ - run_size = (alignment << 1) - PAGE_SIZE; - } - if (run_size_p != NULL) - *run_size_p = run_size; - - if (run_size <= arena_maxclass) - return (PAGE_CEILING(usize)); - return (CHUNK_CEILING(usize)); - } -} - -/* - * Wrapper around malloc_message() that avoids the need for - * JEMALLOC_P(malloc_message)(...) throughout the code. - */ -JEMALLOC_INLINE void -malloc_write(const char *s) -{ - - JEMALLOC_P(malloc_message)(NULL, s); -} - -/* - * Choose an arena based on a per-thread value (fast-path code, calls slow-path - * code if necessary). - */ -JEMALLOC_INLINE arena_t * -choose_arena(void) -{ - arena_t *ret; - - ret = ARENA_GET(); - if (ret == NULL) { - ret = choose_arena_hard(); - assert(ret != NULL); - } - - return (ret); -} - -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -JEMALLOC_INLINE thread_allocated_t * -thread_allocated_get(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - pthread_getspecific(thread_allocated_tsd); - - if (thread_allocated == NULL) - return (thread_allocated_get_hard()); - return (thread_allocated); -} -#endif -#endif - -#include "jemalloc/internal/bitmap.h" -#include "jemalloc/internal/rtree.h" -#include "jemalloc/internal/tcache.h" -#include "jemalloc/internal/arena.h" -#include "jemalloc/internal/hash.h" -#ifdef JEMALLOC_ZONE -#include "jemalloc/internal/zone.h" -#endif - -#ifndef JEMALLOC_ENABLE_INLINE -void *imalloc(size_t size); -void *icalloc(size_t size); -void *ipalloc(size_t usize, size_t alignment, bool zero); -size_t isalloc(const void *ptr); -# ifdef JEMALLOC_IVSALLOC -size_t ivsalloc(const void *ptr); -# endif -void idalloc(void *ptr); -void *iralloc(void *ptr, size_t size, size_t extra, size_t alignment, - bool zero, bool no_move); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_)) -JEMALLOC_INLINE void * -imalloc(size_t size) -{ - - assert(size != 0); - - if (size <= arena_maxclass) - return (arena_malloc(size, false)); - else - return (huge_malloc(size, false)); -} - -JEMALLOC_INLINE void * -icalloc(size_t size) -{ - - if (size <= arena_maxclass) - return (arena_malloc(size, true)); - else - return (huge_malloc(size, true)); -} - -JEMALLOC_INLINE void * -ipalloc(size_t usize, size_t alignment, bool zero) -{ - void *ret; - - assert(usize != 0); - assert(usize == sa2u(usize, alignment, NULL)); - - if (usize <= arena_maxclass && alignment <= PAGE_SIZE) - ret = arena_malloc(usize, zero); - else { - size_t run_size -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; - - /* - * Ideally we would only ever call sa2u() once per aligned - * allocation request, and the caller of this function has - * already done so once. However, it's rather burdensome to - * require every caller to pass in run_size, especially given - * that it's only relevant to large allocations. Therefore, - * just call it again here in order to get run_size. - */ - sa2u(usize, alignment, &run_size); - if (run_size <= arena_maxclass) { - ret = arena_palloc(choose_arena(), usize, run_size, - alignment, zero); - } else if (alignment <= chunksize) - ret = huge_malloc(usize, zero); - else - ret = huge_palloc(usize, alignment, zero); - } - - assert(((uintptr_t)ret & (alignment - 1)) == 0); - return (ret); -} - -JEMALLOC_INLINE size_t -isalloc(const void *ptr) -{ - size_t ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); - -#ifdef JEMALLOC_PROF - ret = arena_salloc_demote(ptr); -#else - ret = arena_salloc(ptr); -#endif - } else - ret = huge_salloc(ptr); - - return (ret); -} - -#ifdef JEMALLOC_IVSALLOC -JEMALLOC_INLINE size_t -ivsalloc(const void *ptr) -{ - - /* Return 0 if ptr is not within a chunk managed by jemalloc. */ - if (rtree_get(chunks_rtree, (uintptr_t)CHUNK_ADDR2BASE(ptr)) == NULL) - return (0); - - return (isalloc(ptr)); -} -#endif - -JEMALLOC_INLINE void -idalloc(void *ptr) -{ - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) - arena_dalloc(chunk->arena, chunk, ptr); - else - huge_dalloc(ptr, true); -} - -JEMALLOC_INLINE void * -iralloc(void *ptr, size_t size, size_t extra, size_t alignment, bool zero, - bool no_move) -{ - void *ret; - size_t oldsize; - - assert(ptr != NULL); - assert(size != 0); - - oldsize = isalloc(ptr); - - if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1)) - != 0) { - size_t usize, copysize; - - /* - * Existing object alignment is inadquate; allocate new space - * and copy. - */ - if (no_move) - return (NULL); - usize = sa2u(size + extra, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, without extra this time. */ - usize = sa2u(size, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - if (ret == NULL) - return (NULL); - } - /* - * Copy at most size bytes (not size+extra), since the caller - * has no expectation that the extra bytes will be reliably - * preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - memcpy(ret, ptr, copysize); - idalloc(ptr); - return (ret); - } - - if (no_move) { - if (size <= arena_maxclass) { - return (arena_ralloc_no_move(ptr, oldsize, size, - extra, zero)); - } else { - return (huge_ralloc_no_move(ptr, oldsize, size, - extra)); - } - } else { - if (size + extra <= arena_maxclass) { - return (arena_ralloc(ptr, oldsize, size, extra, - alignment, zero)); - } else { - return (huge_ralloc(ptr, oldsize, size, extra, - alignment, zero)); - } - } -} -#endif - -#include "jemalloc/internal/prof.h" - -#undef JEMALLOC_H_INLINES -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/mb.h b/jemalloc/include/jemalloc/internal/mb.h deleted file mode 100644 index dc9f2a5..0000000 --- a/jemalloc/include/jemalloc/internal/mb.h +++ /dev/null @@ -1,108 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void mb_write(void); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_)) -#ifdef __i386__ -/* - * According to the Intel Architecture Software Developer's Manual, current - * processors execute instructions in order from the perspective of other - * processors in a multiprocessor system, but 1) Intel reserves the right to - * change that, and 2) the compiler's optimizer could re-order instructions if - * there weren't some form of barrier. Therefore, even if running on an - * architecture that does not need memory barriers (everything through at least - * i686), an "optimizer barrier" is necessary. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - -# if 0 - /* This is a true memory barrier. */ - asm volatile ("pusha;" - "xor %%eax,%%eax;" - "cpuid;" - "popa;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -#else - /* - * This is hopefully enough to keep the compiler from reordering - * instructions around this one. - */ - asm volatile ("nop;" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -#endif -} -#elif (defined(__amd64_) || defined(__x86_64__)) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("sfence" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__powerpc__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("eieio" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#elif defined(__sparc64__) -JEMALLOC_INLINE void -mb_write(void) -{ - - asm volatile ("membar #StoreStore" - : /* Outputs. */ - : /* Inputs. */ - : "memory" /* Clobbers. */ - ); -} -#else -/* - * This is much slower than a simple memory barrier, but the semantics of mutex - * unlock make this work. - */ -JEMALLOC_INLINE void -mb_write(void) -{ - malloc_mutex_t mtx; - - malloc_mutex_init(&mtx); - malloc_mutex_lock(&mtx); - malloc_mutex_unlock(&mtx); -} -#endif -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/mutex.h b/jemalloc/include/jemalloc/internal/mutex.h deleted file mode 100644 index 62947ce..0000000 --- a/jemalloc/include/jemalloc/internal/mutex.h +++ /dev/null @@ -1,86 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#ifdef JEMALLOC_OSSPIN -typedef OSSpinLock malloc_mutex_t; -#else -typedef pthread_mutex_t malloc_mutex_t; -#endif - -#ifdef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -# define MALLOC_MUTEX_INITIALIZER PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP -#else -# define MALLOC_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#ifdef JEMALLOC_LAZY_LOCK -extern bool isthreaded; -#else -# define isthreaded true -#endif - -bool malloc_mutex_init(malloc_mutex_t *mutex); -void malloc_mutex_destroy(malloc_mutex_t *mutex); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void malloc_mutex_lock(malloc_mutex_t *mutex); -bool malloc_mutex_trylock(malloc_mutex_t *mutex); -void malloc_mutex_unlock(malloc_mutex_t *mutex); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_)) -JEMALLOC_INLINE void -malloc_mutex_lock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - OSSpinLockLock(mutex); -#else - pthread_mutex_lock(mutex); -#endif - } -} - -JEMALLOC_INLINE bool -malloc_mutex_trylock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - return (OSSpinLockTry(mutex) == false); -#else - return (pthread_mutex_trylock(mutex) != 0); -#endif - } else - return (false); -} - -JEMALLOC_INLINE void -malloc_mutex_unlock(malloc_mutex_t *mutex) -{ - - if (isthreaded) { -#ifdef JEMALLOC_OSSPIN - OSSpinLockUnlock(mutex); -#else - pthread_mutex_unlock(mutex); -#endif - } -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/prn.h b/jemalloc/include/jemalloc/internal/prn.h deleted file mode 100644 index 0709d70..0000000 --- a/jemalloc/include/jemalloc/internal/prn.h +++ /dev/null @@ -1,60 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -/* - * Simple linear congruential pseudo-random number generator: - * - * prn(y) = (a*x + c) % m - * - * where the following constants ensure maximal period: - * - * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4. - * c == Odd number (relatively prime to 2^n). - * m == 2^32 - * - * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints. - * - * This choice of m has the disadvantage that the quality of the bits is - * proportional to bit position. For example. the lowest bit has a cycle of 2, - * the next has a cycle of 4, etc. For this reason, we prefer to use the upper - * bits. - * - * Macro parameters: - * uint32_t r : Result. - * unsigned lg_range : (0..32], number of least significant bits to return. - * uint32_t state : Seed value. - * const uint32_t a, c : See above discussion. - */ -#define prn32(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 32); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (32 - lg_range); \ -} while (false) - -/* Same as prn32(), but 64 bits of pseudo-randomness, using uint64_t. */ -#define prn64(r, lg_range, state, a, c) do { \ - assert(lg_range > 0); \ - assert(lg_range <= 64); \ - \ - r = (state * (a)) + (c); \ - state = r; \ - r >>= (64 - lg_range); \ -} while (false) - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/prof.h b/jemalloc/include/jemalloc/internal/prof.h deleted file mode 100644 index f943873..0000000 --- a/jemalloc/include/jemalloc/internal/prof.h +++ /dev/null @@ -1,561 +0,0 @@ -#ifdef JEMALLOC_PROF -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct prof_bt_s prof_bt_t; -typedef struct prof_cnt_s prof_cnt_t; -typedef struct prof_thr_cnt_s prof_thr_cnt_t; -typedef struct prof_ctx_s prof_ctx_t; -typedef struct prof_tdata_s prof_tdata_t; - -/* Option defaults. */ -#define PROF_PREFIX_DEFAULT "jeprof" -#define LG_PROF_BT_MAX_DEFAULT 7 -#define LG_PROF_SAMPLE_DEFAULT 0 -#define LG_PROF_INTERVAL_DEFAULT -1 -#define LG_PROF_TCMAX_DEFAULT -1 - -/* - * Hard limit on stack backtrace depth. Note that the version of - * prof_backtrace() that is based on __builtin_return_address() necessarily has - * a hard-coded number of backtrace frame handlers. - */ -#if (defined(JEMALLOC_PROF_LIBGCC) || defined(JEMALLOC_PROF_LIBUNWIND)) -# define LG_PROF_BT_MAX ((ZU(1) << (LG_SIZEOF_PTR+3)) - 1) -#else -# define LG_PROF_BT_MAX 7 /* >= LG_PROF_BT_MAX_DEFAULT */ -#endif -#define PROF_BT_MAX (1U << LG_PROF_BT_MAX) - -/* Initial hash table size. */ -#define PROF_CKH_MINITEMS 64 - -/* Size of memory buffer to use when writing dump files. */ -#define PROF_DUMP_BUF_SIZE 65536 - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct prof_bt_s { - /* Backtrace, stored as len program counters. */ - void **vec; - unsigned len; -}; - -#ifdef JEMALLOC_PROF_LIBGCC -/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */ -typedef struct { - prof_bt_t *bt; - unsigned nignore; - unsigned max; -} prof_unwind_data_t; -#endif - -struct prof_cnt_s { - /* - * Profiling counters. An allocation/deallocation pair can operate on - * different prof_thr_cnt_t objects that are linked into the same - * prof_ctx_t cnts_ql, so it is possible for the cur* counters to go - * negative. In principle it is possible for the *bytes counters to - * overflow/underflow, but a general solution would require something - * like 128-bit counters; this implementation doesn't bother to solve - * that problem. - */ - int64_t curobjs; - int64_t curbytes; - uint64_t accumobjs; - uint64_t accumbytes; -}; - -struct prof_thr_cnt_s { - /* Linkage into prof_ctx_t's cnts_ql. */ - ql_elm(prof_thr_cnt_t) cnts_link; - - /* Linkage into thread's LRU. */ - ql_elm(prof_thr_cnt_t) lru_link; - - /* - * Associated context. If a thread frees an object that it did not - * allocate, it is possible that the context is not cached in the - * thread's hash table, in which case it must be able to look up the - * context, insert a new prof_thr_cnt_t into the thread's hash table, - * and link it into the prof_ctx_t's cnts_ql. - */ - prof_ctx_t *ctx; - - /* - * Threads use memory barriers to update the counters. Since there is - * only ever one writer, the only challenge is for the reader to get a - * consistent read of the counters. - * - * The writer uses this series of operations: - * - * 1) Increment epoch to an odd number. - * 2) Update counters. - * 3) Increment epoch to an even number. - * - * The reader must assure 1) that the epoch is even while it reads the - * counters, and 2) that the epoch doesn't change between the time it - * starts and finishes reading the counters. - */ - unsigned epoch; - - /* Profiling counters. */ - prof_cnt_t cnts; -}; - -struct prof_ctx_s { - /* Associated backtrace. */ - prof_bt_t *bt; - - /* Protects cnt_merged and cnts_ql. */ - malloc_mutex_t lock; - - /* Temporary storage for summation during dump. */ - prof_cnt_t cnt_summed; - - /* When threads exit, they merge their stats into cnt_merged. */ - prof_cnt_t cnt_merged; - - /* - * List of profile counters, one for each thread that has allocated in - * this context. - */ - ql_head(prof_thr_cnt_t) cnts_ql; -}; - -struct prof_tdata_s { - /* - * Hash of (prof_bt_t *)-->(prof_thr_cnt_t *). Each thread keeps a - * cache of backtraces, with associated thread-specific prof_thr_cnt_t - * objects. Other threads may read the prof_thr_cnt_t contents, but no - * others will ever write them. - * - * Upon thread exit, the thread must merge all the prof_thr_cnt_t - * counter data into the associated prof_ctx_t objects, and unlink/free - * the prof_thr_cnt_t objects. - */ - ckh_t bt2cnt; - - /* LRU for contents of bt2cnt. */ - ql_head(prof_thr_cnt_t) lru_ql; - - /* Backtrace vector, used for calls to prof_backtrace(). */ - void **vec; - - /* Sampling state. */ - uint64_t prn_state; - uint64_t threshold; - uint64_t accum; -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_prof; -/* - * Even if opt_prof is true, sampling can be temporarily disabled by setting - * opt_prof_active to false. No locking is used when updating opt_prof_active, - * so there are no guarantees regarding how long it will take for all threads - * to notice state changes. - */ -extern bool opt_prof_active; -extern size_t opt_lg_prof_bt_max; /* Maximum backtrace depth. */ -extern size_t opt_lg_prof_sample; /* Mean bytes between samples. */ -extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */ -extern bool opt_prof_gdump; /* High-water memory dumping. */ -extern bool opt_prof_leak; /* Dump leak summary at exit. */ -extern bool opt_prof_accum; /* Report cumulative bytes. */ -extern ssize_t opt_lg_prof_tcmax; /* lg(max per thread bactrace cache) */ -extern char opt_prof_prefix[PATH_MAX + 1]; - -/* - * Profile dump interval, measured in bytes allocated. Each arena triggers a - * profile dump when it reaches this threshold. The effect is that the - * interval between profile dumps averages prof_interval, though the actual - * interval between dumps will tend to be sporadic, and the interval will be a - * maximum of approximately (prof_interval * narenas). - */ -extern uint64_t prof_interval; - -/* - * If true, promote small sampled objects to large objects, since small run - * headers do not have embedded profile context pointers. - */ -extern bool prof_promote; - -/* (1U << opt_lg_prof_bt_max). */ -extern unsigned prof_bt_max; - -/* Thread-specific backtrace cache, used to reduce bt2ctx contention. */ -#ifndef NO_TLS -extern __thread prof_tdata_t *prof_tdata_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define PROF_TCACHE_GET() prof_tdata_tls -# define PROF_TCACHE_SET(v) do { \ - prof_tdata_tls = (v); \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#else -# define PROF_TCACHE_GET() \ - ((prof_tdata_t *)pthread_getspecific(prof_tdata_tsd)) -# define PROF_TCACHE_SET(v) do { \ - pthread_setspecific(prof_tdata_tsd, (void *)(v)); \ -} while (0) -#endif -/* - * Same contents as b2cnt_tls, but initialized such that the TSD destructor is - * called when a thread exits, so that prof_tdata_tls contents can be merged, - * unlinked, and deallocated. - */ -extern pthread_key_t prof_tdata_tsd; - -void bt_init(prof_bt_t *bt, void **vec); -void prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max); -prof_thr_cnt_t *prof_lookup(prof_bt_t *bt); -void prof_idump(void); -bool prof_mdump(const char *filename); -void prof_gdump(void); -prof_tdata_t *prof_tdata_init(void); -void prof_boot0(void); -void prof_boot1(void); -bool prof_boot2(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void prof_sample_threshold_update(prof_tdata_t *prof_tdata); -prof_thr_cnt_t *prof_alloc_prep(size_t size); -prof_ctx_t *prof_ctx_get(const void *ptr); -void prof_ctx_set(const void *ptr, prof_ctx_t *ctx); -bool prof_sample_accum_update(size_t size); -void prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt); -void prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx); -void prof_free(const void *ptr, size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_)) -JEMALLOC_INLINE void -prof_sample_threshold_update(prof_tdata_t *prof_tdata) -{ - uint64_t r; - double u; - - /* - * Compute sample threshold as a geometrically distributed random - * variable with mean (2^opt_lg_prof_sample). - * - * __ __ - * | log(u) | 1 - * prof_tdata->threshold = | -------- |, where p = ------------------- - * | log(1-p) | opt_lg_prof_sample - * 2 - * - * For more information on the math, see: - * - * Non-Uniform Random Variate Generation - * Luc Devroye - * Springer-Verlag, New York, 1986 - * pp 500 - * (http://cg.scs.carleton.ca/~luc/rnbookindex.html) - */ - prn64(r, 53, prof_tdata->prn_state, - (uint64_t)6364136223846793005LLU, (uint64_t)1442695040888963407LLU); - u = (double)r * (1.0/9007199254740992.0L); - prof_tdata->threshold = (uint64_t)(log(u) / - log(1.0 - (1.0 / (double)((uint64_t)1U << opt_lg_prof_sample)))) - + (uint64_t)1U; -} - -JEMALLOC_INLINE prof_thr_cnt_t * -prof_alloc_prep(size_t size) -{ -#ifdef JEMALLOC_ENABLE_INLINE - /* This function does not have its own stack frame, because it is inlined. */ -# define NIGNORE 1 -#else -# define NIGNORE 2 -#endif - prof_thr_cnt_t *ret; - prof_tdata_t *prof_tdata; - prof_bt_t bt; - - assert(size == s2u(size)); - - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } - - if (opt_prof_active == false) { - /* Sampling is currently inactive, so avoid sampling. */ - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } else if (opt_lg_prof_sample == 0) { - /* - * Don't bother with sampling logic, since sampling interval is - * 1. - */ - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else { - if (prof_tdata->threshold == 0) { - /* - * Initialize. Seed the prng differently for each - * thread. - */ - prof_tdata->prn_state = (uint64_t)(uintptr_t)&size; - prof_sample_threshold_update(prof_tdata); - } - - /* - * Determine whether to capture a backtrace based on whether - * size is enough for prof_accum to reach - * prof_tdata->threshold. However, delay updating these - * variables until prof_{m,re}alloc(), because we don't know - * for sure that the allocation will succeed. - * - * Use subtraction rather than addition to avoid potential - * integer overflow. - */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - bt_init(&bt, prof_tdata->vec); - prof_backtrace(&bt, NIGNORE, prof_bt_max); - ret = prof_lookup(&bt); - } else - ret = (prof_thr_cnt_t *)(uintptr_t)1U; - } - - return (ret); -#undef NIGNORE -} - -JEMALLOC_INLINE prof_ctx_t * -prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); - - ret = arena_prof_ctx_get(ptr); - } else - ret = huge_prof_ctx_get(ptr); - - return (ret); -} - -JEMALLOC_INLINE void -prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - arena_chunk_t *chunk; - - assert(ptr != NULL); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk != ptr) { - /* Region. */ - dassert(chunk->arena->magic == ARENA_MAGIC); - - arena_prof_ctx_set(ptr, ctx); - } else - huge_prof_ctx_set(ptr, ctx); -} - -JEMALLOC_INLINE bool -prof_sample_accum_update(size_t size) -{ - prof_tdata_t *prof_tdata; - - /* Sampling logic is unnecessary if the interval is 1. */ - assert(opt_lg_prof_sample != 0); - - prof_tdata = PROF_TCACHE_GET(); - assert(prof_tdata != NULL); - - /* Take care to avoid integer overflow. */ - if (size >= prof_tdata->threshold - prof_tdata->accum) { - prof_tdata->accum -= (prof_tdata->threshold - size); - /* Compute new sample threshold. */ - prof_sample_threshold_update(prof_tdata); - while (prof_tdata->accum >= prof_tdata->threshold) { - prof_tdata->accum -= prof_tdata->threshold; - prof_sample_threshold_update(prof_tdata); - } - return (false); - } else { - prof_tdata->accum += size; - return (true); - } -} - -JEMALLOC_INLINE void -prof_malloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt) -{ - - assert(ptr != NULL); - assert(size == isalloc(ptr)); - - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { - /* - * Don't sample. For malloc()-like allocation, it is - * always possible to tell in advance how large an - * object's usable size will be, so there should never - * be a difference between the size passed to - * prof_alloc_prep() and prof_malloc(). - */ - assert((uintptr_t)cnt == (uintptr_t)1U); - } - } - - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; - } - /*********/ - mb_write(); - /*********/ - cnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); -} - -JEMALLOC_INLINE void -prof_realloc(const void *ptr, size_t size, prof_thr_cnt_t *cnt, - size_t old_size, prof_ctx_t *old_ctx) -{ - prof_thr_cnt_t *told_cnt; - - assert(ptr != NULL || (uintptr_t)cnt <= (uintptr_t)1U); - - if (ptr != NULL) { - assert(size == isalloc(ptr)); - if (opt_lg_prof_sample != 0) { - if (prof_sample_accum_update(size)) { - /* - * Don't sample. The size passed to - * prof_alloc_prep() was larger than what - * actually got allocated, so a backtrace was - * captured for this allocation, even though - * its actual size was insufficient to cross - * the sample threshold. - */ - cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } - } - } - - if ((uintptr_t)old_ctx > (uintptr_t)1U) { - told_cnt = prof_lookup(old_ctx->bt); - if (told_cnt == NULL) { - /* - * It's too late to propagate OOM for this realloc(), - * so operate directly on old_cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(&old_ctx->lock); - old_ctx->cnt_merged.curobjs--; - old_ctx->cnt_merged.curbytes -= old_size; - malloc_mutex_unlock(&old_ctx->lock); - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - } - } else - told_cnt = (prof_thr_cnt_t *)(uintptr_t)1U; - - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) { - prof_ctx_set(ptr, cnt->ctx); - cnt->epoch++; - } else - prof_ctx_set(ptr, (prof_ctx_t *)(uintptr_t)1U); - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) { - told_cnt->cnts.curobjs--; - told_cnt->cnts.curbytes -= old_size; - } - if ((uintptr_t)cnt > (uintptr_t)1U) { - cnt->cnts.curobjs++; - cnt->cnts.curbytes += size; - if (opt_prof_accum) { - cnt->cnts.accumobjs++; - cnt->cnts.accumbytes += size; - } - } - /*********/ - mb_write(); - /*********/ - if ((uintptr_t)told_cnt > (uintptr_t)1U) - told_cnt->epoch++; - if ((uintptr_t)cnt > (uintptr_t)1U) - cnt->epoch++; - /*********/ - mb_write(); /* Not strictly necessary. */ -} - -JEMALLOC_INLINE void -prof_free(const void *ptr, size_t size) -{ - prof_ctx_t *ctx = prof_ctx_get(ptr); - - if ((uintptr_t)ctx > (uintptr_t)1) { - assert(size == isalloc(ptr)); - prof_thr_cnt_t *tcnt = prof_lookup(ctx->bt); - - if (tcnt != NULL) { - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - tcnt->cnts.curobjs--; - tcnt->cnts.curbytes -= size; - /*********/ - mb_write(); - /*********/ - tcnt->epoch++; - /*********/ - mb_write(); - /*********/ - } else { - /* - * OOM during free() cannot be propagated, so operate - * directly on cnt->ctx->cnt_merged. - */ - malloc_mutex_lock(&ctx->lock); - ctx->cnt_merged.curobjs--; - ctx->cnt_merged.curbytes -= size; - malloc_mutex_unlock(&ctx->lock); - } - } -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/jemalloc/include/jemalloc/internal/ql.h b/jemalloc/include/jemalloc/internal/ql.h deleted file mode 100644 index a9ed239..0000000 --- a/jemalloc/include/jemalloc/internal/ql.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * List definitions. - */ -#define ql_head(a_type) \ -struct { \ - a_type *qlh_first; \ -} - -#define ql_head_initializer(a_head) {NULL} - -#define ql_elm(a_type) qr(a_type) - -/* List functions. */ -#define ql_new(a_head) do { \ - (a_head)->qlh_first = NULL; \ -} while (0) - -#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field) - -#define ql_first(a_head) ((a_head)->qlh_first) - -#define ql_last(a_head, a_field) \ - ((ql_first(a_head) != NULL) \ - ? qr_prev(ql_first(a_head), a_field) : NULL) - -#define ql_next(a_head, a_elm, a_field) \ - ((ql_last(a_head, a_field) != (a_elm)) \ - ? qr_next((a_elm), a_field) : NULL) - -#define ql_prev(a_head, a_elm, a_field) \ - ((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) \ - : NULL) - -#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do { \ - qr_before_insert((a_qlelm), (a_elm), a_field); \ - if (ql_first(a_head) == (a_qlelm)) { \ - ql_first(a_head) = (a_elm); \ - } \ -} while (0) - -#define ql_after_insert(a_qlelm, a_elm, a_field) \ - qr_after_insert((a_qlelm), (a_elm), a_field) - -#define ql_head_insert(a_head, a_elm, a_field) do { \ - if (ql_first(a_head) != NULL) { \ - qr_before_insert(ql_first(a_head), (a_elm), a_field); \ - } \ - ql_first(a_head) = (a_elm); \ -} while (0) - -#define ql_tail_insert(a_head, a_elm, a_field) do { \ - if (ql_first(a_head) != NULL) { \ - qr_before_insert(ql_first(a_head), (a_elm), a_field); \ - } \ - ql_first(a_head) = qr_next((a_elm), a_field); \ -} while (0) - -#define ql_remove(a_head, a_elm, a_field) do { \ - if (ql_first(a_head) == (a_elm)) { \ - ql_first(a_head) = qr_next(ql_first(a_head), a_field); \ - } \ - if (ql_first(a_head) != (a_elm)) { \ - qr_remove((a_elm), a_field); \ - } else { \ - ql_first(a_head) = NULL; \ - } \ -} while (0) - -#define ql_head_remove(a_head, a_type, a_field) do { \ - a_type *t = ql_first(a_head); \ - ql_remove((a_head), t, a_field); \ -} while (0) - -#define ql_tail_remove(a_head, a_type, a_field) do { \ - a_type *t = ql_last(a_head, a_field); \ - ql_remove((a_head), t, a_field); \ -} while (0) - -#define ql_foreach(a_var, a_head, a_field) \ - qr_foreach((a_var), ql_first(a_head), a_field) - -#define ql_reverse_foreach(a_var, a_head, a_field) \ - qr_reverse_foreach((a_var), ql_first(a_head), a_field) diff --git a/jemalloc/include/jemalloc/internal/qr.h b/jemalloc/include/jemalloc/internal/qr.h deleted file mode 100644 index fe22352..0000000 --- a/jemalloc/include/jemalloc/internal/qr.h +++ /dev/null @@ -1,67 +0,0 @@ -/* Ring definitions. */ -#define qr(a_type) \ -struct { \ - a_type *qre_next; \ - a_type *qre_prev; \ -} - -/* Ring functions. */ -#define qr_new(a_qr, a_field) do { \ - (a_qr)->a_field.qre_next = (a_qr); \ - (a_qr)->a_field.qre_prev = (a_qr); \ -} while (0) - -#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next) - -#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev) - -#define qr_before_insert(a_qrelm, a_qr, a_field) do { \ - (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \ - (a_qr)->a_field.qre_next = (a_qrelm); \ - (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \ - (a_qrelm)->a_field.qre_prev = (a_qr); \ -} while (0) - -#define qr_after_insert(a_qrelm, a_qr, a_field) \ - do \ - { \ - (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \ - (a_qr)->a_field.qre_prev = (a_qrelm); \ - (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \ - (a_qrelm)->a_field.qre_next = (a_qr); \ - } while (0) - -#define qr_meld(a_qr_a, a_qr_b, a_field) do { \ - void *t; \ - (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \ - (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \ - t = (a_qr_a)->a_field.qre_prev; \ - (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \ - (a_qr_b)->a_field.qre_prev = t; \ -} while (0) - -/* qr_meld() and qr_split() are functionally equivalent, so there's no need to - * have two copies of the code. */ -#define qr_split(a_qr_a, a_qr_b, a_field) \ - qr_meld((a_qr_a), (a_qr_b), a_field) - -#define qr_remove(a_qr, a_field) do { \ - (a_qr)->a_field.qre_prev->a_field.qre_next \ - = (a_qr)->a_field.qre_next; \ - (a_qr)->a_field.qre_next->a_field.qre_prev \ - = (a_qr)->a_field.qre_prev; \ - (a_qr)->a_field.qre_next = (a_qr); \ - (a_qr)->a_field.qre_prev = (a_qr); \ -} while (0) - -#define qr_foreach(var, a_qr, a_field) \ - for ((var) = (a_qr); \ - (var) != NULL; \ - (var) = (((var)->a_field.qre_next != (a_qr)) \ - ? (var)->a_field.qre_next : NULL)) - -#define qr_reverse_foreach(var, a_qr, a_field) \ - for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \ - (var) != NULL; \ - (var) = (((var) != (a_qr)) \ - ? (var)->a_field.qre_prev : NULL)) diff --git a/jemalloc/include/jemalloc/internal/rb.h b/jemalloc/include/jemalloc/internal/rb.h deleted file mode 100644 index ee9b009..0000000 --- a/jemalloc/include/jemalloc/internal/rb.h +++ /dev/null @@ -1,973 +0,0 @@ -/*- - ******************************************************************************* - * - * cpp macro implementation of left-leaning 2-3 red-black trees. Parent - * pointers are not used, and color bits are stored in the least significant - * bit of right-child pointers (if RB_COMPACT is defined), thus making node - * linkage as compact as is possible for red-black trees. - * - * Usage: - * - * #include - * #include - * #define NDEBUG // (Optional, see assert(3).) - * #include - * #define RB_COMPACT // (Optional, embed color bits in right-child pointers.) - * #include - * ... - * - ******************************************************************************* - */ - -#ifndef RB_H_ -#define RB_H_ - -#if 0 -__FBSDID("$FreeBSD: head/lib/libc/stdlib/rb.h 204493 2010-02-28 22:57:13Z jasone $"); -#endif - -#ifdef RB_COMPACT -/* Node structure. */ -#define rb_node(a_type) \ -struct { \ - a_type *rbn_left; \ - a_type *rbn_right_red; \ -} -#else -#define rb_node(a_type) \ -struct { \ - a_type *rbn_left; \ - a_type *rbn_right; \ - bool rbn_red; \ -} -#endif - -/* Root structure. */ -#define rb_tree(a_type) \ -struct { \ - a_type *rbt_root; \ - a_type rbt_nil; \ -} - -/* Left accessors. */ -#define rbtn_left_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_left) -#define rbtn_left_set(a_type, a_field, a_node, a_left) do { \ - (a_node)->a_field.rbn_left = a_left; \ -} while (0) - -#ifdef RB_COMPACT -/* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ - ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red) \ - & ((ssize_t)-2))) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right) \ - | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1))); \ -} while (0) - -/* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ - ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red) \ - & ((size_t)1))) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t) \ - (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)) \ - | ((ssize_t)a_red)); \ -} while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) \ - (a_node)->a_field.rbn_right_red) | ((size_t)1)); \ -} while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t) \ - (a_node)->a_field.rbn_right_red) & ((ssize_t)-2)); \ -} while (0) -#else -/* Right accessors. */ -#define rbtn_right_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_right) -#define rbtn_right_set(a_type, a_field, a_node, a_right) do { \ - (a_node)->a_field.rbn_right = a_right; \ -} while (0) - -/* Color accessors. */ -#define rbtn_red_get(a_type, a_field, a_node) \ - ((a_node)->a_field.rbn_red) -#define rbtn_color_set(a_type, a_field, a_node, a_red) do { \ - (a_node)->a_field.rbn_red = (a_red); \ -} while (0) -#define rbtn_red_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_red = true; \ -} while (0) -#define rbtn_black_set(a_type, a_field, a_node) do { \ - (a_node)->a_field.rbn_red = false; \ -} while (0) -#endif - -/* Node initializer. */ -#define rbt_node_new(a_type, a_field, a_rbt, a_node) do { \ - rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil); \ - rbtn_red_set(a_type, a_field, (a_node)); \ -} while (0) - -/* Tree initializer. */ -#define rb_new(a_type, a_field, a_rbt) do { \ - (a_rbt)->rbt_root = &(a_rbt)->rbt_nil; \ - rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil); \ - rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil); \ -} while (0) - -/* Internal utility macros. */ -#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do { \ - (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; \ - rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\ - (r_node) = rbtn_left_get(a_type, a_field, (r_node))) { \ - } \ - } \ -} while (0) - -#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do { \ - (r_node) = (a_root); \ - if ((r_node) != &(a_rbt)->rbt_nil) { \ - for (; rbtn_right_get(a_type, a_field, (r_node)) != \ - &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field, \ - (r_node))) { \ - } \ - } \ -} while (0) - -#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do { \ - (r_node) = rbtn_right_get(a_type, a_field, (a_node)); \ - rbtn_right_set(a_type, a_field, (a_node), \ - rbtn_left_get(a_type, a_field, (r_node))); \ - rbtn_left_set(a_type, a_field, (r_node), (a_node)); \ -} while (0) - -#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do { \ - (r_node) = rbtn_left_get(a_type, a_field, (a_node)); \ - rbtn_left_set(a_type, a_field, (a_node), \ - rbtn_right_get(a_type, a_field, (r_node))); \ - rbtn_right_set(a_type, a_field, (r_node), (a_node)); \ -} while (0) - -/* - * The rb_proto() macro generates function prototypes that correspond to the - * functions generated by an equivalently parameterized call to rb_gen(). - */ - -#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type) \ -a_attr void \ -a_prefix##new(a_rbt_type *rbtree); \ -a_attr a_type * \ -a_prefix##first(a_rbt_type *rbtree); \ -a_attr a_type * \ -a_prefix##last(a_rbt_type *rbtree); \ -a_attr a_type * \ -a_prefix##next(a_rbt_type *rbtree, a_type *node); \ -a_attr a_type * \ -a_prefix##prev(a_rbt_type *rbtree, a_type *node); \ -a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key); \ -a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key); \ -a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key); \ -a_attr void \ -a_prefix##insert(a_rbt_type *rbtree, a_type *node); \ -a_attr void \ -a_prefix##remove(a_rbt_type *rbtree, a_type *node); \ -a_attr a_type * \ -a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ - a_rbt_type *, a_type *, void *), void *arg); \ -a_attr a_type * \ -a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg); - -/* - * The rb_gen() macro generates a type-specific red-black tree implementation, - * based on the above cpp macros. - * - * Arguments: - * - * a_attr : Function attribute for generated functions (ex: static). - * a_prefix : Prefix for generated functions (ex: ex_). - * a_rb_type : Type for red-black tree data structure (ex: ex_t). - * a_type : Type for red-black tree node data structure (ex: ex_node_t). - * a_field : Name of red-black tree node linkage (ex: ex_link). - * a_cmp : Node comparison function name, with the following prototype: - * int (a_cmp *)(a_type *a_node, a_type *a_other); - * ^^^^^^ - * or a_key - * Interpretation of comparision function return values: - * -1 : a_node < a_other - * 0 : a_node == a_other - * 1 : a_node > a_other - * In all cases, the a_node or a_key macro argument is the first - * argument to the comparison function, which makes it possible - * to write comparison functions that treat the first argument - * specially. - * - * Assuming the following setup: - * - * typedef struct ex_node_s ex_node_t; - * struct ex_node_s { - * rb_node(ex_node_t) ex_link; - * }; - * typedef rb_tree(ex_node_t) ex_t; - * rb_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp) - * - * The following API is generated: - * - * static void - * ex_new(ex_t *extree); - * Description: Initialize a red-black tree structure. - * Args: - * extree: Pointer to an uninitialized red-black tree object. - * - * static ex_node_t * - * ex_first(ex_t *extree); - * static ex_node_t * - * ex_last(ex_t *extree); - * Description: Get the first/last node in extree. - * Args: - * extree: Pointer to an initialized red-black tree object. - * Ret: First/last node in extree, or NULL if extree is empty. - * - * static ex_node_t * - * ex_next(ex_t *extree, ex_node_t *node); - * static ex_node_t * - * ex_prev(ex_t *extree, ex_node_t *node); - * Description: Get node's successor/predecessor. - * Args: - * extree: Pointer to an initialized red-black tree object. - * node : A node in extree. - * Ret: node's successor/predecessor in extree, or NULL if node is - * last/first. - * - * static ex_node_t * - * ex_search(ex_t *extree, ex_node_t *key); - * Description: Search for node that matches key. - * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or NULL if no match. - * - * static ex_node_t * - * ex_nsearch(ex_t *extree, ex_node_t *key); - * static ex_node_t * - * ex_psearch(ex_t *extree, ex_node_t *key); - * Description: Search for node that matches key. If no match is found, - * return what would be key's successor/predecessor, were - * key in extree. - * Args: - * extree: Pointer to an initialized red-black tree object. - * key : Search key. - * Ret: Node in extree that matches key, or if no match, hypothetical - * node's successor/predecessor (NULL if no successor/predecessor). - * - * static void - * ex_insert(ex_t *extree, ex_node_t *node); - * Description: Insert node into extree. - * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node to be inserted into extree. - * - * static void - * ex_remove(ex_t *extree, ex_node_t *node); - * Description: Remove node from extree. - * Args: - * extree: Pointer to an initialized red-black tree object. - * node : Node in extree to be removed. - * - * static ex_node_t * - * ex_iter(ex_t *extree, ex_node_t *start, ex_node_t *(*cb)(ex_t *, - * ex_node_t *, void *), void *arg); - * static ex_node_t * - * ex_reverse_iter(ex_t *extree, ex_node_t *start, ex_node *(*cb)(ex_t *, - * ex_node_t *, void *), void *arg); - * Description: Iterate forward/backward over extree, starting at node. - * If extree is modified, iteration must be immediately - * terminated by the callback function that causes the - * modification. - * Args: - * extree: Pointer to an initialized red-black tree object. - * start : Node at which to start iteration, or NULL to start at - * first/last node. - * cb : Callback function, which is called for each node during - * iteration. Under normal circumstances the callback function - * should return NULL, which causes iteration to continue. If a - * callback function returns non-NULL, iteration is immediately - * terminated and the non-NULL return value is returned by the - * iterator. This is useful for re-starting iteration after - * modifying extree. - * arg : Opaque pointer passed to cb(). - * Ret: NULL if iteration completed, or the non-NULL callback return value - * that caused termination of the iteration. - */ -#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp) \ -a_attr void \ -a_prefix##new(a_rbt_type *rbtree) { \ - rb_new(a_type, a_field, rbtree); \ -} \ -a_attr a_type * \ -a_prefix##first(a_rbt_type *rbtree) { \ - a_type *ret; \ - rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##last(a_rbt_type *rbtree) { \ - a_type *ret; \ - rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret); \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##next(a_rbt_type *rbtree, a_type *node) { \ - a_type *ret; \ - if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ - rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type, \ - a_field, node), ret); \ - } else { \ - a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ - while (true) { \ - int cmp = (a_cmp)(node, tnode); \ - if (cmp < 0) { \ - ret = tnode; \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - break; \ - } \ - assert(tnode != &rbtree->rbt_nil); \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##prev(a_rbt_type *rbtree, a_type *node) { \ - a_type *ret; \ - if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) { \ - rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type, \ - a_field, node), ret); \ - } else { \ - a_type *tnode = rbtree->rbt_root; \ - assert(tnode != &rbtree->rbt_nil); \ - ret = &rbtree->rbt_nil; \ - while (true) { \ - int cmp = (a_cmp)(node, tnode); \ - if (cmp < 0) { \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - ret = tnode; \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - break; \ - } \ - assert(tnode != &rbtree->rbt_nil); \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##search(a_rbt_type *rbtree, a_type *key) { \ - a_type *ret; \ - int cmp; \ - ret = rbtree->rbt_root; \ - while (ret != &rbtree->rbt_nil \ - && (cmp = (a_cmp)(key, ret)) != 0) { \ - if (cmp < 0) { \ - ret = rbtn_left_get(a_type, a_field, ret); \ - } else { \ - ret = rbtn_right_get(a_type, a_field, ret); \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) { \ - a_type *ret; \ - a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ - int cmp = (a_cmp)(key, tnode); \ - if (cmp < 0) { \ - ret = tnode; \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - ret = tnode; \ - break; \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##psearch(a_rbt_type *rbtree, a_type *key) { \ - a_type *ret; \ - a_type *tnode = rbtree->rbt_root; \ - ret = &rbtree->rbt_nil; \ - while (tnode != &rbtree->rbt_nil) { \ - int cmp = (a_cmp)(key, tnode); \ - if (cmp < 0) { \ - tnode = rbtn_left_get(a_type, a_field, tnode); \ - } else if (cmp > 0) { \ - ret = tnode; \ - tnode = rbtn_right_get(a_type, a_field, tnode); \ - } else { \ - ret = tnode; \ - break; \ - } \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = (NULL); \ - } \ - return (ret); \ -} \ -a_attr void \ -a_prefix##insert(a_rbt_type *rbtree, a_type *node) { \ - struct { \ - a_type *node; \ - int cmp; \ - } path[sizeof(void *) << 4], *pathp; \ - rbt_node_new(a_type, a_field, rbtree, node); \ - /* Wind. */ \ - path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ - int cmp = pathp->cmp = a_cmp(node, pathp->node); \ - assert(cmp != 0); \ - if (cmp < 0) { \ - pathp[1].node = rbtn_left_get(a_type, a_field, \ - pathp->node); \ - } else { \ - pathp[1].node = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - } \ - } \ - pathp->node = node; \ - /* Unwind. */ \ - for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ - a_type *cnode = pathp->node; \ - if (pathp->cmp < 0) { \ - a_type *left = pathp[1].node; \ - rbtn_left_set(a_type, a_field, cnode, left); \ - if (rbtn_red_get(a_type, a_field, left)) { \ - a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ - /* Fix up 4-node. */ \ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, leftleft); \ - rbtn_rotate_right(a_type, a_field, cnode, tnode); \ - cnode = tnode; \ - } \ - } else { \ - return; \ - } \ - } else { \ - a_type *right = pathp[1].node; \ - rbtn_right_set(a_type, a_field, cnode, right); \ - if (rbtn_red_get(a_type, a_field, right)) { \ - a_type *left = rbtn_left_get(a_type, a_field, cnode); \ - if (rbtn_red_get(a_type, a_field, left)) { \ - /* Split 4-node. */ \ - rbtn_black_set(a_type, a_field, left); \ - rbtn_black_set(a_type, a_field, right); \ - rbtn_red_set(a_type, a_field, cnode); \ - } else { \ - /* Lean left. */ \ - a_type *tnode; \ - bool tred = rbtn_red_get(a_type, a_field, cnode); \ - rbtn_rotate_left(a_type, a_field, cnode, tnode); \ - rbtn_color_set(a_type, a_field, tnode, tred); \ - rbtn_red_set(a_type, a_field, cnode); \ - cnode = tnode; \ - } \ - } else { \ - return; \ - } \ - } \ - pathp->node = cnode; \ - } \ - /* Set root, and make it black. */ \ - rbtree->rbt_root = path->node; \ - rbtn_black_set(a_type, a_field, rbtree->rbt_root); \ -} \ -a_attr void \ -a_prefix##remove(a_rbt_type *rbtree, a_type *node) { \ - struct { \ - a_type *node; \ - int cmp; \ - } *pathp, *nodep, path[sizeof(void *) << 4]; \ - /* Wind. */ \ - nodep = NULL; /* Silence compiler warning. */ \ - path->node = rbtree->rbt_root; \ - for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) { \ - int cmp = pathp->cmp = a_cmp(node, pathp->node); \ - if (cmp < 0) { \ - pathp[1].node = rbtn_left_get(a_type, a_field, \ - pathp->node); \ - } else { \ - pathp[1].node = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - if (cmp == 0) { \ - /* Find node's successor, in preparation for swap. */ \ - pathp->cmp = 1; \ - nodep = pathp; \ - for (pathp++; pathp->node != &rbtree->rbt_nil; \ - pathp++) { \ - pathp->cmp = -1; \ - pathp[1].node = rbtn_left_get(a_type, a_field, \ - pathp->node); \ - } \ - break; \ - } \ - } \ - } \ - assert(nodep->node == node); \ - pathp--; \ - if (pathp->node != node) { \ - /* Swap node with its successor. */ \ - bool tred = rbtn_red_get(a_type, a_field, pathp->node); \ - rbtn_color_set(a_type, a_field, pathp->node, \ - rbtn_red_get(a_type, a_field, node)); \ - rbtn_left_set(a_type, a_field, pathp->node, \ - rbtn_left_get(a_type, a_field, node)); \ - /* If node's successor is its right child, the following code */\ - /* will do the wrong thing for the right child pointer. */\ - /* However, it doesn't matter, because the pointer will be */\ - /* properly set when the successor is pruned. */\ - rbtn_right_set(a_type, a_field, pathp->node, \ - rbtn_right_get(a_type, a_field, node)); \ - rbtn_color_set(a_type, a_field, node, tred); \ - /* The pruned leaf node's child pointers are never accessed */\ - /* again, so don't bother setting them to nil. */\ - nodep->node = pathp->node; \ - pathp->node = node; \ - if (nodep == path) { \ - rbtree->rbt_root = nodep->node; \ - } else { \ - if (nodep[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, nodep[-1].node, \ - nodep->node); \ - } else { \ - rbtn_right_set(a_type, a_field, nodep[-1].node, \ - nodep->node); \ - } \ - } \ - } else { \ - a_type *left = rbtn_left_get(a_type, a_field, node); \ - if (left != &rbtree->rbt_nil) { \ - /* node has no successor, but it has a left child. */\ - /* Splice node out, without losing the left child. */\ - assert(rbtn_red_get(a_type, a_field, node) == false); \ - assert(rbtn_red_get(a_type, a_field, left)); \ - rbtn_black_set(a_type, a_field, left); \ - if (pathp == path) { \ - rbtree->rbt_root = left; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - left); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - left); \ - } \ - } \ - return; \ - } else if (pathp == path) { \ - /* The tree only contained one node. */ \ - rbtree->rbt_root = &rbtree->rbt_nil; \ - return; \ - } \ - } \ - if (rbtn_red_get(a_type, a_field, pathp->node)) { \ - /* Prune red node, which requires no fixup. */ \ - assert(pathp[-1].cmp < 0); \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - &rbtree->rbt_nil); \ - return; \ - } \ - /* The node to be pruned is black, so unwind until balance is */\ - /* restored. */\ - pathp->node = &rbtree->rbt_nil; \ - for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) { \ - assert(pathp->cmp != 0); \ - if (pathp->cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp->node, \ - pathp[1].node); \ - assert(rbtn_red_get(a_type, a_field, pathp[1].node) \ - == false); \ - if (rbtn_red_get(a_type, a_field, pathp->node)) { \ - a_type *right = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - a_type *rightleft = rbtn_left_get(a_type, a_field, \ - right); \ - a_type *tnode; \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ - /* In the following diagrams, ||, //, and \\ */\ - /* indicate the path to the removed node. */\ - /* */\ - /* || */\ - /* pathp(r) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - /* */\ - rbtn_black_set(a_type, a_field, pathp->node); \ - rbtn_rotate_right(a_type, a_field, right, tnode); \ - rbtn_right_set(a_type, a_field, pathp->node, tnode);\ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - } else { \ - /* || */\ - /* pathp(r) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - /* */\ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - } \ - /* Balance restored, but rotation modified subtree */\ - /* root. */\ - assert((uintptr_t)pathp > (uintptr_t)path); \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } \ - return; \ - } else { \ - a_type *right = rbtn_right_get(a_type, a_field, \ - pathp->node); \ - a_type *rightleft = rbtn_left_get(a_type, a_field, \ - right); \ - if (rbtn_red_get(a_type, a_field, rightleft)) { \ - /* || */\ - /* pathp(b) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, rightleft); \ - rbtn_rotate_right(a_type, a_field, right, tnode); \ - rbtn_right_set(a_type, a_field, pathp->node, tnode);\ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - /* Balance restored, but rotation modified */\ - /* subree root, which may actually be the tree */\ - /* root. */\ - if (pathp == path) { \ - /* Set root. */ \ - rbtree->rbt_root = tnode; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } \ - } \ - return; \ - } else { \ - /* || */\ - /* pathp(b) */\ - /* // \ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - a_type *tnode; \ - rbtn_red_set(a_type, a_field, pathp->node); \ - rbtn_rotate_left(a_type, a_field, pathp->node, \ - tnode); \ - pathp->node = tnode; \ - } \ - } \ - } else { \ - a_type *left; \ - rbtn_right_set(a_type, a_field, pathp->node, \ - pathp[1].node); \ - left = rbtn_left_get(a_type, a_field, pathp->node); \ - if (rbtn_red_get(a_type, a_field, left)) { \ - a_type *tnode; \ - a_type *leftright = rbtn_right_get(a_type, a_field, \ - left); \ - a_type *leftrightleft = rbtn_left_get(a_type, a_field, \ - leftright); \ - if (rbtn_red_get(a_type, a_field, leftrightleft)) { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (r) (b) */\ - /* \ */\ - /* (b) */\ - /* / */\ - /* (r) */\ - a_type *unode; \ - rbtn_black_set(a_type, a_field, leftrightleft); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - unode); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - rbtn_right_set(a_type, a_field, unode, tnode); \ - rbtn_rotate_left(a_type, a_field, unode, tnode); \ - } else { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (r) (b) */\ - /* \ */\ - /* (b) */\ - /* / */\ - /* (b) */\ - assert(leftright != &rbtree->rbt_nil); \ - rbtn_red_set(a_type, a_field, leftright); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - rbtn_black_set(a_type, a_field, tnode); \ - } \ - /* Balance restored, but rotation modified subtree */\ - /* root, which may actually be the tree root. */\ - if (pathp == path) { \ - /* Set root. */ \ - rbtree->rbt_root = tnode; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } \ - } \ - return; \ - } else if (rbtn_red_get(a_type, a_field, pathp->node)) { \ - a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ - /* || */\ - /* pathp(r) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, pathp->node); \ - rbtn_red_set(a_type, a_field, left); \ - rbtn_black_set(a_type, a_field, leftleft); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - /* Balance restored, but rotation modified */\ - /* subtree root. */\ - assert((uintptr_t)pathp > (uintptr_t)path); \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, pathp[-1].node, \ - tnode); \ - } \ - return; \ - } else { \ - /* || */\ - /* pathp(r) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - rbtn_red_set(a_type, a_field, left); \ - rbtn_black_set(a_type, a_field, pathp->node); \ - /* Balance restored. */ \ - return; \ - } \ - } else { \ - a_type *leftleft = rbtn_left_get(a_type, a_field, left);\ - if (rbtn_red_get(a_type, a_field, leftleft)) { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (r) */\ - a_type *tnode; \ - rbtn_black_set(a_type, a_field, leftleft); \ - rbtn_rotate_right(a_type, a_field, pathp->node, \ - tnode); \ - /* Balance restored, but rotation modified */\ - /* subtree root, which may actually be the tree */\ - /* root. */\ - if (pathp == path) { \ - /* Set root. */ \ - rbtree->rbt_root = tnode; \ - } else { \ - if (pathp[-1].cmp < 0) { \ - rbtn_left_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } else { \ - rbtn_right_set(a_type, a_field, \ - pathp[-1].node, tnode); \ - } \ - } \ - return; \ - } else { \ - /* || */\ - /* pathp(b) */\ - /* / \\ */\ - /* (b) (b) */\ - /* / */\ - /* (b) */\ - rbtn_red_set(a_type, a_field, left); \ - } \ - } \ - } \ - } \ - /* Set root. */ \ - rbtree->rbt_root = path->node; \ - assert(rbtn_red_get(a_type, a_field, rbtree->rbt_root) == false); \ -} \ -a_attr a_type * \ -a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ - } else { \ - a_type *ret; \ - if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type, \ - a_field, node), cb, arg)) != &rbtree->rbt_nil \ - || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - int cmp = a_cmp(start, node); \ - if (cmp < 0) { \ - a_type *ret; \ - if ((ret = a_prefix##iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ - } else if (cmp > 0) { \ - return (a_prefix##iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)); \ - } else { \ - a_type *ret; \ - if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type, \ - a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)( \ - a_rbt_type *, a_type *, void *), void *arg) { \ - a_type *ret; \ - if (start != NULL) { \ - ret = a_prefix##iter_start(rbtree, start, rbtree->rbt_root, \ - cb, arg); \ - } else { \ - ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} \ -a_attr a_type * \ -a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - if (node == &rbtree->rbt_nil) { \ - return (&rbtree->rbt_nil); \ - } else { \ - a_type *ret; \ - if ((ret = a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start, \ - a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *), \ - void *arg) { \ - int cmp = a_cmp(start, node); \ - if (cmp > 0) { \ - a_type *ret; \ - if ((ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_right_get(a_type, a_field, node), cb, arg)) != \ - &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } else if (cmp < 0) { \ - return (a_prefix##reverse_iter_start(rbtree, start, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } else { \ - a_type *ret; \ - if ((ret = cb(rbtree, node, arg)) != NULL) { \ - return (ret); \ - } \ - return (a_prefix##reverse_iter_recurse(rbtree, \ - rbtn_left_get(a_type, a_field, node), cb, arg)); \ - } \ -} \ -a_attr a_type * \ -a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start, \ - a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) { \ - a_type *ret; \ - if (start != NULL) { \ - ret = a_prefix##reverse_iter_start(rbtree, start, \ - rbtree->rbt_root, cb, arg); \ - } else { \ - ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root, \ - cb, arg); \ - } \ - if (ret == &rbtree->rbt_nil) { \ - ret = NULL; \ - } \ - return (ret); \ -} - -#endif /* RB_H_ */ diff --git a/jemalloc/include/jemalloc/internal/rtree.h b/jemalloc/include/jemalloc/internal/rtree.h deleted file mode 100644 index 95d6355..0000000 --- a/jemalloc/include/jemalloc/internal/rtree.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * This radix tree implementation is tailored to the singular purpose of - * tracking which chunks are currently owned by jemalloc. This functionality - * is mandatory for OS X, where jemalloc must be able to respond to object - * ownership queries. - * - ******************************************************************************* - */ -#ifdef JEMALLOC_H_TYPES - -typedef struct rtree_s rtree_t; - -/* - * Size of each radix tree node (must be a power of 2). This impacts tree - * depth. - */ -#if (LG_SIZEOF_PTR == 2) -# define RTREE_NODESIZE (1U << 14) -#else -# define RTREE_NODESIZE CACHELINE -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -struct rtree_s { - malloc_mutex_t mutex; - void **root; - unsigned height; - unsigned level2bits[1]; /* Dynamically sized. */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -rtree_t *rtree_new(unsigned bits); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -#ifndef JEMALLOC_DEBUG -void *rtree_get_locked(rtree_t *rtree, uintptr_t key); -#endif -void *rtree_get(rtree_t *rtree, uintptr_t key); -bool rtree_set(rtree_t *rtree, uintptr_t key, void *val); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_)) -#define RTREE_GET_GENERATE(f) \ -/* The least significant bits of the key are ignored. */ \ -JEMALLOC_INLINE void * \ -f(rtree_t *rtree, uintptr_t key) \ -{ \ - void *ret; \ - uintptr_t subkey; \ - unsigned i, lshift, height, bits; \ - void **node, **child; \ - \ - RTREE_LOCK(&rtree->mutex); \ - for (i = lshift = 0, height = rtree->height, node = rtree->root;\ - i < height - 1; \ - i++, lshift += bits, node = child) { \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR + \ - 3)) - bits); \ - child = (void**)node[subkey]; \ - if (child == NULL) { \ - RTREE_UNLOCK(&rtree->mutex); \ - return (NULL); \ - } \ - } \ - \ - /* \ - * node is a leaf, so it contains values rather than node \ - * pointers. \ - */ \ - bits = rtree->level2bits[i]; \ - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - \ - bits); \ - ret = node[subkey]; \ - RTREE_UNLOCK(&rtree->mutex); \ - \ - RTREE_GET_VALIDATE \ - return (ret); \ -} - -#ifdef JEMALLOC_DEBUG -# define RTREE_LOCK(l) malloc_mutex_lock(l) -# define RTREE_UNLOCK(l) malloc_mutex_unlock(l) -# define RTREE_GET_VALIDATE -RTREE_GET_GENERATE(rtree_get_locked) -# undef RTREE_LOCK -# undef RTREE_UNLOCK -# undef RTREE_GET_VALIDATE -#endif - -#define RTREE_LOCK(l) -#define RTREE_UNLOCK(l) -#ifdef JEMALLOC_DEBUG - /* - * Suppose that it were possible for a jemalloc-allocated chunk to be - * munmap()ped, followed by a different allocator in another thread re-using - * overlapping virtual memory, all without invalidating the cached rtree - * value. The result would be a false positive (the rtree would claim that - * jemalloc owns memory that it had actually discarded). This scenario - * seems impossible, but the following assertion is a prudent sanity check. - */ -# define RTREE_GET_VALIDATE \ - assert(rtree_get_locked(rtree, key) == ret); -#else -# define RTREE_GET_VALIDATE -#endif -RTREE_GET_GENERATE(rtree_get) -#undef RTREE_LOCK -#undef RTREE_UNLOCK -#undef RTREE_GET_VALIDATE - -JEMALLOC_INLINE bool -rtree_set(rtree_t *rtree, uintptr_t key, void *val) -{ - uintptr_t subkey; - unsigned i, lshift, height, bits; - void **node, **child; - - malloc_mutex_lock(&rtree->mutex); - for (i = lshift = 0, height = rtree->height, node = rtree->root; - i < height - 1; - i++, lshift += bits, node = child) { - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - - bits); - child = (void**)node[subkey]; - if (child == NULL) { - child = (void**)base_alloc(sizeof(void *) << - rtree->level2bits[i+1]); - if (child == NULL) { - malloc_mutex_unlock(&rtree->mutex); - return (true); - } - memset(child, 0, sizeof(void *) << - rtree->level2bits[i+1]); - node[subkey] = child; - } - } - - /* node is a leaf, so it contains values rather than node pointers. */ - bits = rtree->level2bits[i]; - subkey = (key << lshift) >> ((ZU(1) << (LG_SIZEOF_PTR+3)) - bits); - node[subkey] = val; - malloc_mutex_unlock(&rtree->mutex); - - return (false); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/stats.h b/jemalloc/include/jemalloc/internal/stats.h deleted file mode 100644 index 2a9b31d..0000000 --- a/jemalloc/include/jemalloc/internal/stats.h +++ /dev/null @@ -1,207 +0,0 @@ -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#define UMAX2S_BUFSIZE 65 - -#ifdef JEMALLOC_STATS -typedef struct tcache_bin_stats_s tcache_bin_stats_t; -typedef struct malloc_bin_stats_s malloc_bin_stats_t; -typedef struct malloc_large_stats_s malloc_large_stats_t; -typedef struct arena_stats_s arena_stats_t; -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) -typedef struct chunk_stats_s chunk_stats_t; -#endif - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#ifdef JEMALLOC_STATS - -#ifdef JEMALLOC_TCACHE -struct tcache_bin_stats_s { - /* - * Number of allocation requests that corresponded to the size of this - * bin. - */ - uint64_t nrequests; -}; -#endif - -struct malloc_bin_stats_s { - /* - * Current number of bytes allocated, including objects currently - * cached by tcache. - */ - size_t allocated; - - /* - * Total number of allocation/deallocation requests served directly by - * the bin. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to the size of this - * bin. This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - -#ifdef JEMALLOC_TCACHE - /* Number of tcache fills from this bin. */ - uint64_t nfills; - - /* Number of tcache flushes to this bin. */ - uint64_t nflushes; -#endif - - /* Total number of runs created for this bin's size class. */ - uint64_t nruns; - - /* - * Total number of runs reused by extracting them from the runs tree for - * this bin's size class. - */ - uint64_t reruns; - - /* High-water mark for this bin. */ - size_t highruns; - - /* Current number of runs in this bin. */ - size_t curruns; -}; - -struct malloc_large_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the arena. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to this size class. - * This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* High-water mark for this size class. */ - size_t highruns; - - /* Current number of runs of this size class. */ - size_t curruns; -}; - -struct arena_stats_s { - /* Number of bytes currently mapped. */ - size_t mapped; - - /* - * Total number of purge sweeps, total number of madvise calls made, - * and total pages purged in order to keep dirty unused memory under - * control. - */ - uint64_t npurge; - uint64_t nmadvise; - uint64_t purged; - - /* Per-size-category statistics. */ - size_t allocated_large; - uint64_t nmalloc_large; - uint64_t ndalloc_large; - uint64_t nrequests_large; - - /* - * One element for each possible size class, including sizes that - * overlap with bin size classes. This is necessary because ipalloc() - * sometimes has to use such large objects in order to assure proper - * alignment. - */ - malloc_large_stats_t *lstats; -}; -#endif /* JEMALLOC_STATS */ - -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) -struct chunk_stats_s { -# ifdef JEMALLOC_STATS - /* Number of chunks that were allocated. */ - uint64_t nchunks; -# endif - - /* High-water mark for number of chunks allocated. */ - size_t highchunks; - - /* - * Current number of chunks allocated. This value isn't maintained for - * any other purpose, so keep track of it in order to be able to set - * highchunks. - */ - size_t curchunks; -}; -#endif /* JEMALLOC_STATS */ - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_stats_print; - -#ifdef JEMALLOC_STATS -extern size_t stats_cactive; -#endif - -char *u2s(uint64_t x, unsigned base, char *s); -#ifdef JEMALLOC_STATS -void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque, - const char *format, ...) JEMALLOC_ATTR(format(printf, 3, 4)); -void malloc_printf(const char *format, ...) - JEMALLOC_ATTR(format(printf, 1, 2)); -#endif -void stats_print(void (*write)(void *, const char *), void *cbopaque, - const char *opts); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES -#ifdef JEMALLOC_STATS - -#ifndef JEMALLOC_ENABLE_INLINE -size_t stats_cactive_get(void); -void stats_cactive_add(size_t size); -void stats_cactive_sub(size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_)) -JEMALLOC_INLINE size_t -stats_cactive_get(void) -{ - - return (atomic_read_z(&stats_cactive)); -} - -JEMALLOC_INLINE void -stats_cactive_add(size_t size) -{ - - atomic_add_z(&stats_cactive, size); -} - -JEMALLOC_INLINE void -stats_cactive_sub(size_t size) -{ - - atomic_sub_z(&stats_cactive, size); -} -#endif - -#endif /* JEMALLOC_STATS */ -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/internal/tcache.h b/jemalloc/include/jemalloc/internal/tcache.h deleted file mode 100644 index da3c68c..0000000 --- a/jemalloc/include/jemalloc/internal/tcache.h +++ /dev/null @@ -1,431 +0,0 @@ -#ifdef JEMALLOC_TCACHE -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -typedef struct tcache_bin_info_s tcache_bin_info_t; -typedef struct tcache_bin_s tcache_bin_t; -typedef struct tcache_s tcache_t; - -/* - * Absolute maximum number of cache slots for each small bin in the thread - * cache. This is an additional constraint beyond that imposed as: twice the - * number of regions per run for this size class. - * - * This constant must be an even number. - */ -#define TCACHE_NSLOTS_SMALL_MAX 200 - -/* Number of cache slots for large size classes. */ -#define TCACHE_NSLOTS_LARGE 20 - -/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */ -#define LG_TCACHE_MAXCLASS_DEFAULT 15 - -/* - * (1U << opt_lg_tcache_gc_sweep) is the approximate number of allocation - * events between full GC sweeps (-1: disabled). Integer rounding may cause - * the actual number to be slightly higher, since GC is performed - * incrementally. - */ -#define LG_TCACHE_GC_SWEEP_DEFAULT 13 - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; - -struct tcache_bin_s { -# ifdef JEMALLOC_STATS - tcache_bin_stats_t tstats; -# endif - int low_water; /* Min # cached since last GC. */ - unsigned lg_fill_div; /* Fill (ncached_max >> lg_fill_div). */ - unsigned ncached; /* # of cached objects. */ - void **avail; /* Stack of available objects. */ -}; - -struct tcache_s { -# ifdef JEMALLOC_STATS - ql_elm(tcache_t) link; /* Used for aggregating stats. */ -# endif -# ifdef JEMALLOC_PROF - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum() */ -# endif - arena_t *arena; /* This thread's arena. */ - unsigned ev_cnt; /* Event count since incremental GC. */ - unsigned next_gc_bin; /* Next bin to GC. */ - tcache_bin_t tbins[1]; /* Dynamically sized. */ - /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. - */ -}; - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -extern bool opt_tcache; -extern ssize_t opt_lg_tcache_max; -extern ssize_t opt_lg_tcache_gc_sweep; - -extern tcache_bin_info_t *tcache_bin_info; - -/* Map of thread-specific caches. */ -#ifndef NO_TLS -extern __thread tcache_t *tcache_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -# define TCACHE_GET() tcache_tls -# define TCACHE_SET(v) do { \ - tcache_tls = (tcache_t *)(v); \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#else -# define TCACHE_GET() ((tcache_t *)pthread_getspecific(tcache_tsd)) -# define TCACHE_SET(v) do { \ - pthread_setspecific(tcache_tsd, (void *)(v)); \ -} while (0) -#endif -extern pthread_key_t tcache_tsd; - -/* - * Number of tcache bins. There are nbins small-object bins, plus 0 or more - * large-object bins. - */ -extern size_t nhbins; - -/* Maximum cached size class. */ -extern size_t tcache_maxclass; - -/* Number of tcache allocation/deallocation events between incremental GCs. */ -extern unsigned tcache_gc_incr; - -void tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); -void tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ); -tcache_t *tcache_create(arena_t *arena); -void *tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, - size_t binind); -void tcache_destroy(tcache_t *tcache); -#ifdef JEMALLOC_STATS -void tcache_stats_merge(tcache_t *tcache, arena_t *arena); -#endif -bool tcache_boot(void); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#ifndef JEMALLOC_ENABLE_INLINE -void tcache_event(tcache_t *tcache); -tcache_t *tcache_get(void); -void *tcache_alloc_easy(tcache_bin_t *tbin); -void *tcache_alloc_small(tcache_t *tcache, size_t size, bool zero); -void *tcache_alloc_large(tcache_t *tcache, size_t size, bool zero); -void tcache_dalloc_small(tcache_t *tcache, void *ptr); -void tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size); -#endif - -#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_)) -JEMALLOC_INLINE tcache_t * -tcache_get(void) -{ - tcache_t *tcache; - - if ((isthreaded & opt_tcache) == false) - return (NULL); - - tcache = TCACHE_GET(); - if ((uintptr_t)tcache <= (uintptr_t)2) { - if (tcache == NULL) { - tcache = tcache_create(choose_arena()); - if (tcache == NULL) - return (NULL); - } else { - if (tcache == (void *)(uintptr_t)1) { - /* - * Make a note that an allocator function was - * called after the tcache_thread_cleanup() was - * called. - */ - TCACHE_SET((uintptr_t)2); - } - return (NULL); - } - } - - return (tcache); -} - -JEMALLOC_INLINE void -tcache_event(tcache_t *tcache) -{ - - if (tcache_gc_incr == 0) - return; - - tcache->ev_cnt++; - assert(tcache->ev_cnt <= tcache_gc_incr); - if (tcache->ev_cnt == tcache_gc_incr) { - size_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin = &tcache->tbins[binind]; - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; - - if (tbin->low_water > 0) { - /* - * Flush (ceiling) 3/4 of the objects below the low - * water mark. - */ - if (binind < nbins) { - tcache_bin_flush_small(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - } else { - tcache_bin_flush_large(tbin, binind, - tbin->ncached - tbin->low_water + - (tbin->low_water >> 2) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - } - /* - * Reduce fill count by 2X. Limit lg_fill_div such that - * the fill count is always at least 1. - */ - if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) - >= 1) - tbin->lg_fill_div++; - } else if (tbin->low_water < 0) { - /* - * Increase fill count by 2X. Make sure lg_fill_div - * stays greater than 0. - */ - if (tbin->lg_fill_div > 1) - tbin->lg_fill_div--; - } - tbin->low_water = tbin->ncached; - - tcache->next_gc_bin++; - if (tcache->next_gc_bin == nhbins) - tcache->next_gc_bin = 0; - tcache->ev_cnt = 0; - } -} - -JEMALLOC_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin) -{ - void *ret; - - if (tbin->ncached == 0) { - tbin->low_water = -1; - return (NULL); - } - tbin->ncached--; - if ((int)tbin->ncached < tbin->low_water) - tbin->low_water = tbin->ncached; - ret = tbin->avail[tbin->ncached]; - return (ret); -} - -JEMALLOC_INLINE void * -tcache_alloc_small(tcache_t *tcache, size_t size, bool zero) -{ - void *ret; - size_t binind; - tcache_bin_t *tbin; - - binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (ret == NULL) { - ret = tcache_alloc_small_hard(tcache, tbin, binind); - if (ret == NULL) - return (NULL); - } - assert(arena_salloc(ret) == arena_bin_info[binind].reg_size); - - if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif - } else - memset(ret, 0, size); - -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += arena_bin_info[binind].reg_size; -#endif - tcache_event(tcache); - return (ret); -} - -JEMALLOC_INLINE void * -tcache_alloc_large(tcache_t *tcache, size_t size, bool zero) -{ - void *ret; - size_t binind; - tcache_bin_t *tbin; - - size = PAGE_CEILING(size); - assert(size <= tcache_maxclass); - binind = nbins + (size >> PAGE_SHIFT) - 1; - assert(binind < nhbins); - tbin = &tcache->tbins[binind]; - ret = tcache_alloc_easy(tbin); - if (ret == NULL) { - /* - * Only allocate one large object at a time, because it's quite - * expensive to create one and not use it. - */ - ret = arena_malloc_large(tcache->arena, size, zero); - if (ret == NULL) - return (NULL); - } else { -#ifdef JEMALLOC_PROF - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >> - PAGE_SHIFT); - chunk->map[pageind-map_bias].bits &= ~CHUNK_MAP_CLASS_MASK; -#endif - if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif - } else - memset(ret, 0, size); - -#ifdef JEMALLOC_STATS - tbin->tstats.nrequests++; -#endif -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes += size; -#endif - } - - tcache_event(tcache); - return (ret); -} - -JEMALLOC_INLINE void -tcache_dalloc_small(tcache_t *tcache, void *ptr) -{ - arena_t *arena; - arena_chunk_t *chunk; - arena_run_t *run; - arena_bin_t *bin; - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - size_t pageind, binind; - arena_chunk_map_t *mapelm; - - assert(arena_salloc(ptr) <= small_maxclass); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapelm = &chunk->map[pageind-map_bias]; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - bin = run->bin; - binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) / - sizeof(arena_bin_t); - assert(binind < nbins); - -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ptr, 0x5a, arena_bin_info[binind].reg_size); -#endif - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_small(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; - tbin->ncached++; - - tcache_event(tcache); -} - -JEMALLOC_INLINE void -tcache_dalloc_large(tcache_t *tcache, void *ptr, size_t size) -{ - arena_t *arena; - arena_chunk_t *chunk; - size_t pageind, binind; - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; - - assert((size & PAGE_MASK) == 0); - assert(arena_salloc(ptr) > small_maxclass); - assert(arena_salloc(ptr) <= tcache_maxclass); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = nbins + (size >> PAGE_SHIFT) - 1; - -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ptr, 0x5a, size); -#endif - - tbin = &tcache->tbins[binind]; - tbin_info = &tcache_bin_info[binind]; - if (tbin->ncached == tbin_info->ncached_max) { - tcache_bin_flush_large(tbin, binind, (tbin_info->ncached_max >> - 1) -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->avail[tbin->ncached] = ptr; - tbin->ncached++; - - tcache_event(tcache); -} -#endif - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ -#endif /* JEMALLOC_TCACHE */ diff --git a/jemalloc/include/jemalloc/internal/zone.h b/jemalloc/include/jemalloc/internal/zone.h deleted file mode 100644 index 859b529..0000000 --- a/jemalloc/include/jemalloc/internal/zone.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef JEMALLOC_ZONE -# error "This source file is for zones on Darwin (OS X)." -#endif -/******************************************************************************/ -#ifdef JEMALLOC_H_TYPES - -#endif /* JEMALLOC_H_TYPES */ -/******************************************************************************/ -#ifdef JEMALLOC_H_STRUCTS - -#endif /* JEMALLOC_H_STRUCTS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_EXTERNS - -malloc_zone_t *create_zone(void); -void szone2ozone(malloc_zone_t *zone); - -#endif /* JEMALLOC_H_EXTERNS */ -/******************************************************************************/ -#ifdef JEMALLOC_H_INLINES - -#endif /* JEMALLOC_H_INLINES */ -/******************************************************************************/ diff --git a/jemalloc/include/jemalloc/jemalloc.h.in b/jemalloc/include/jemalloc/jemalloc.h.in deleted file mode 100644 index 580a5ec..0000000 --- a/jemalloc/include/jemalloc/jemalloc.h.in +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef JEMALLOC_H_ -#define JEMALLOC_H_ -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#define JEMALLOC_VERSION "@jemalloc_version@" -#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@ -#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@ -#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@ -#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@ -#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@" - -#include "jemalloc_defs@install_suffix@.h" -#ifndef JEMALLOC_P -# define JEMALLOC_P(s) s -#endif - -#define ALLOCM_LG_ALIGN(la) (la) -#if LG_SIZEOF_PTR == 2 -#define ALLOCM_ALIGN(a) (ffs(a)-1) -#else -#define ALLOCM_ALIGN(a) ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31) -#endif -#define ALLOCM_ZERO ((int)0x40) -#define ALLOCM_NO_MOVE ((int)0x80) - -#define ALLOCM_SUCCESS 0 -#define ALLOCM_ERR_OOM 1 -#define ALLOCM_ERR_NOT_MOVED 2 - -extern const char *JEMALLOC_P(malloc_conf); -extern void (*JEMALLOC_P(malloc_message))(void *, const char *); - -void *JEMALLOC_P(malloc)(size_t size) JEMALLOC_ATTR(malloc); -void *JEMALLOC_P(calloc)(size_t num, size_t size) JEMALLOC_ATTR(malloc); -int JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) - JEMALLOC_ATTR(nonnull(1)); -void *JEMALLOC_P(realloc)(void *ptr, size_t size); -void JEMALLOC_P(free)(void *ptr); - -size_t JEMALLOC_P(malloc_usable_size)(const void *ptr); -void JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts); -int JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, - void *newp, size_t newlen); -int JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, - size_t *miblenp); -int JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); - -int JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) - JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, - size_t extra, int flags) JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) - JEMALLOC_ATTR(nonnull(1)); -int JEMALLOC_P(dallocm)(void *ptr, int flags) JEMALLOC_ATTR(nonnull(1)); - -#ifdef __cplusplus -}; -#endif -#endif /* JEMALLOC_H_ */ diff --git a/jemalloc/include/jemalloc/jemalloc_defs.h.in b/jemalloc/include/jemalloc/jemalloc_defs.h.in deleted file mode 100644 index d8c81d7..0000000 --- a/jemalloc/include/jemalloc/jemalloc_defs.h.in +++ /dev/null @@ -1,158 +0,0 @@ -#ifndef JEMALLOC_DEFS_H_ -#define JEMALLOC_DEFS_H_ - -/* - * If JEMALLOC_PREFIX is defined, it will cause all public APIs to be prefixed. - * This makes it possible, with some care, to use multiple allocators - * simultaneously. - * - * In many cases it is more convenient to manually prefix allocator function - * calls than to let macros do it automatically, particularly when using - * multiple allocators simultaneously. Define JEMALLOC_MANGLE before - * #include'ing jemalloc.h in order to cause name mangling that corresponds to - * the API prefixing. - */ -#undef JEMALLOC_PREFIX -#undef JEMALLOC_CPREFIX -#if (defined(JEMALLOC_PREFIX) && defined(JEMALLOC_MANGLE)) -#undef JEMALLOC_P -#endif - -/* - * Hyper-threaded CPUs may need a special instruction inside spin loops in - * order to yield to another virtual CPU. - */ -#undef CPU_SPINWAIT - -/* - * Defined if OSAtomic*() functions are available, as provided by Darwin, and - * documented in the atomic(3) manual page. - */ -#undef JEMALLOC_OSATOMIC - -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -#undef JEMALLOC_OSSPIN - -/* Defined if __attribute__((...)) syntax is supported. */ -#undef JEMALLOC_HAVE_ATTR -#ifdef JEMALLOC_HAVE_ATTR -# define JEMALLOC_ATTR(s) __attribute__((s)) -#else -# define JEMALLOC_ATTR(s) -#endif - -/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */ -#undef JEMALLOC_CC_SILENCE - -/* - * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables - * inline functions. - */ -#undef JEMALLOC_DEBUG - -/* JEMALLOC_STATS enables statistics calculation. */ -#undef JEMALLOC_STATS - -/* JEMALLOC_PROF enables allocation profiling. */ -#undef JEMALLOC_PROF - -/* Use libunwind for profile backtracing if defined. */ -#undef JEMALLOC_PROF_LIBUNWIND - -/* Use libgcc for profile backtracing if defined. */ -#undef JEMALLOC_PROF_LIBGCC - -/* Use gcc intrinsics for profile backtracing if defined. */ -#undef JEMALLOC_PROF_GCC - -/* - * JEMALLOC_TINY enables support for tiny objects, which are smaller than one - * quantum. - */ -#undef JEMALLOC_TINY - -/* - * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects. - * This makes it possible to allocate/deallocate objects without any locking - * when the cache is in the steady state. - */ -#undef JEMALLOC_TCACHE - -/* - * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage - * segment (DSS). - */ -#undef JEMALLOC_DSS - -/* JEMALLOC_SWAP enables mmap()ed swap file support. */ -#undef JEMALLOC_SWAP - -/* Support memory filling (junk/zero). */ -#undef JEMALLOC_FILL - -/* Support optional abort() on OOM. */ -#undef JEMALLOC_XMALLOC - -/* Support SYSV semantics. */ -#undef JEMALLOC_SYSV - -/* Support lazy locking (avoid locking unless a second thread is launched). */ -#undef JEMALLOC_LAZY_LOCK - -/* Determine page size at run time if defined. */ -#undef DYNAMIC_PAGE_SHIFT - -/* One page is 2^STATIC_PAGE_SHIFT bytes. */ -#undef STATIC_PAGE_SHIFT - -/* TLS is used to map arenas and magazine caches to threads. */ -#undef NO_TLS - -/* - * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside - * within jemalloc-owned chunks before dereferencing them. - */ -#undef JEMALLOC_IVSALLOC - -/* - * Define overrides for non-standard allocator-related functions if they - * are present on the system. - */ -#undef JEMALLOC_OVERRIDE_MEMALIGN -#undef JEMALLOC_OVERRIDE_VALLOC - -/* - * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. - */ -#undef JEMALLOC_ZONE -#undef JEMALLOC_ZONE_VERSION - -/* If defined, use mremap(...MREMAP_FIXED...) for huge realloc(). */ -#undef JEMALLOC_MREMAP_FIXED - -/* - * Methods for purging unused pages differ between operating systems. - * - * madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages, - * such that new pages will be demand-zeroed if - * the address region is later touched. - * madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being - * unused, such that they will be discarded rather - * than swapped out. - */ -#undef JEMALLOC_PURGE_MADVISE_DONTNEED -#undef JEMALLOC_PURGE_MADVISE_FREE - -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#undef LG_SIZEOF_PTR - -/* sizeof(int) == 2^LG_SIZEOF_INT. */ -#undef LG_SIZEOF_INT - -/* sizeof(long) == 2^LG_SIZEOF_LONG. */ -#undef LG_SIZEOF_LONG - -#endif /* JEMALLOC_DEFS_H_ */ diff --git a/jemalloc/install-sh b/jemalloc/install-sh deleted file mode 100755 index ebc6691..0000000 --- a/jemalloc/install-sh +++ /dev/null @@ -1,250 +0,0 @@ -#! /bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c deleted file mode 100644 index 9aaf47f..0000000 --- a/jemalloc/src/arena.c +++ /dev/null @@ -1,2703 +0,0 @@ -#define JEMALLOC_ARENA_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; -size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; -ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; -uint8_t const *small_size2bin; -arena_bin_info_t *arena_bin_info; - -/* Various bin-related settings. */ -unsigned nqbins; -unsigned ncbins; -unsigned nsbins; -unsigned nbins; -size_t qspace_max; -size_t cspace_min; -size_t cspace_max; -size_t sspace_min; -size_t sspace_max; - -size_t lg_mspace; -size_t mspace_mask; - -/* - * const_small_size2bin is a static constant lookup table that in the common - * case can be used as-is for small_size2bin. - */ -#if (LG_TINY_MIN == 2) -#define S2B_4(i) i, -#define S2B_8(i) S2B_4(i) S2B_4(i) -#elif (LG_TINY_MIN == 3) -#define S2B_8(i) i, -#else -# error "Unsupported LG_TINY_MIN" -#endif -#define S2B_16(i) S2B_8(i) S2B_8(i) -#define S2B_32(i) S2B_16(i) S2B_16(i) -#define S2B_64(i) S2B_32(i) S2B_32(i) -#define S2B_128(i) S2B_64(i) S2B_64(i) -#define S2B_256(i) S2B_128(i) S2B_128(i) -/* - * The number of elements in const_small_size2bin is dependent on the - * definition for SUBPAGE. - */ -static JEMALLOC_ATTR(aligned(CACHELINE)) - const uint8_t const_small_size2bin[] = { -#if (LG_QUANTUM == 4) -/* 16-byte quantum **********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ - S2B_8(2) /* 16 */ -# define S2B_QMIN 2 -# elif (LG_TINY_MIN == 3) - S2B_8(0) /* 8 */ - S2B_8(1) /* 16 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_16(0) /* 16 */ -# define S2B_QMIN 0 -# endif - S2B_16(S2B_QMIN + 1) /* 32 */ - S2B_16(S2B_QMIN + 2) /* 48 */ - S2B_16(S2B_QMIN + 3) /* 64 */ - S2B_16(S2B_QMIN + 4) /* 80 */ - S2B_16(S2B_QMIN + 5) /* 96 */ - S2B_16(S2B_QMIN + 6) /* 112 */ - S2B_16(S2B_QMIN + 7) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 8) -#else -/* 8-byte quantum ***********************/ -# ifdef JEMALLOC_TINY -# if (LG_TINY_MIN == 2) - S2B_4(0) /* 4 */ - S2B_4(1) /* 8 */ -# define S2B_QMIN 1 -# else -# error "Unsupported LG_TINY_MIN" -# endif -# else - S2B_8(0) /* 8 */ -# define S2B_QMIN 0 -# endif - S2B_8(S2B_QMIN + 1) /* 16 */ - S2B_8(S2B_QMIN + 2) /* 24 */ - S2B_8(S2B_QMIN + 3) /* 32 */ - S2B_8(S2B_QMIN + 4) /* 40 */ - S2B_8(S2B_QMIN + 5) /* 48 */ - S2B_8(S2B_QMIN + 6) /* 56 */ - S2B_8(S2B_QMIN + 7) /* 64 */ - S2B_8(S2B_QMIN + 8) /* 72 */ - S2B_8(S2B_QMIN + 9) /* 80 */ - S2B_8(S2B_QMIN + 10) /* 88 */ - S2B_8(S2B_QMIN + 11) /* 96 */ - S2B_8(S2B_QMIN + 12) /* 104 */ - S2B_8(S2B_QMIN + 13) /* 112 */ - S2B_8(S2B_QMIN + 14) /* 120 */ - S2B_8(S2B_QMIN + 15) /* 128 */ -# define S2B_CMIN (S2B_QMIN + 16) -#endif -/****************************************/ - S2B_64(S2B_CMIN + 0) /* 192 */ - S2B_64(S2B_CMIN + 1) /* 256 */ - S2B_64(S2B_CMIN + 2) /* 320 */ - S2B_64(S2B_CMIN + 3) /* 384 */ - S2B_64(S2B_CMIN + 4) /* 448 */ - S2B_64(S2B_CMIN + 5) /* 512 */ -# define S2B_SMIN (S2B_CMIN + 6) - S2B_256(S2B_SMIN + 0) /* 768 */ - S2B_256(S2B_SMIN + 1) /* 1024 */ - S2B_256(S2B_SMIN + 2) /* 1280 */ - S2B_256(S2B_SMIN + 3) /* 1536 */ - S2B_256(S2B_SMIN + 4) /* 1792 */ - S2B_256(S2B_SMIN + 5) /* 2048 */ - S2B_256(S2B_SMIN + 6) /* 2304 */ - S2B_256(S2B_SMIN + 7) /* 2560 */ - S2B_256(S2B_SMIN + 8) /* 2816 */ - S2B_256(S2B_SMIN + 9) /* 3072 */ - S2B_256(S2B_SMIN + 10) /* 3328 */ - S2B_256(S2B_SMIN + 11) /* 3584 */ - S2B_256(S2B_SMIN + 12) /* 3840 */ -#if (STATIC_PAGE_SHIFT == 13) - S2B_256(S2B_SMIN + 13) /* 4096 */ - S2B_256(S2B_SMIN + 14) /* 4352 */ - S2B_256(S2B_SMIN + 15) /* 4608 */ - S2B_256(S2B_SMIN + 16) /* 4864 */ - S2B_256(S2B_SMIN + 17) /* 5120 */ - S2B_256(S2B_SMIN + 18) /* 5376 */ - S2B_256(S2B_SMIN + 19) /* 5632 */ - S2B_256(S2B_SMIN + 20) /* 5888 */ - S2B_256(S2B_SMIN + 21) /* 6144 */ - S2B_256(S2B_SMIN + 22) /* 6400 */ - S2B_256(S2B_SMIN + 23) /* 6656 */ - S2B_256(S2B_SMIN + 24) /* 6912 */ - S2B_256(S2B_SMIN + 25) /* 7168 */ - S2B_256(S2B_SMIN + 26) /* 7424 */ - S2B_256(S2B_SMIN + 27) /* 7680 */ - S2B_256(S2B_SMIN + 28) /* 7936 */ -#endif -}; -#undef S2B_1 -#undef S2B_2 -#undef S2B_4 -#undef S2B_8 -#undef S2B_16 -#undef S2B_32 -#undef S2B_64 -#undef S2B_128 -#undef S2B_256 -#undef S2B_QMIN -#undef S2B_CMIN -#undef S2B_SMIN - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, - bool large, bool zero); -static arena_chunk_t *arena_chunk_alloc(arena_t *arena); -static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); -static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, - bool zero); -static void arena_purge(arena_t *arena, bool all); -static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); -static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize); -static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); -static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); -static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); -static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin); -static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, arena_bin_t *bin); -static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, - arena_run_t *run, arena_bin_t *bin); -static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size); -static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, - void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); -static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, - size_t extra, bool zero); -static bool small_size2bin_init(void); -#ifdef JEMALLOC_DEBUG -static void small_size2bin_validate(void); -#endif -static bool small_size2bin_init_hard(void); -static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, - size_t min_run_size); -static bool bin_info_init(void); - -/******************************************************************************/ - -static inline int -arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) -{ - uintptr_t a_mapelm = (uintptr_t)a; - uintptr_t b_mapelm = (uintptr_t)b; - - assert(a != NULL); - assert(b != NULL); - - return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm)); -} - -/* Generate red-black tree functions. */ -rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t, - arena_chunk_map_t, u.rb_link, arena_run_comp) - -static inline int -arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) -{ - int ret; - size_t a_size = a->bits & ~PAGE_MASK; - size_t b_size = b->bits & ~PAGE_MASK; - - assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits & - CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY)); - - ret = (a_size > b_size) - (a_size < b_size); - if (ret == 0) { - uintptr_t a_mapelm, b_mapelm; - - if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY) - a_mapelm = (uintptr_t)a; - else { - /* - * Treat keys as though they are lower than anything - * else. - */ - a_mapelm = 0; - } - b_mapelm = (uintptr_t)b; - - ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); - } - - return (ret); -} - -/* Generate red-black tree functions. */ -rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, - arena_chunk_map_t, u.rb_link, arena_avail_comp) - -static inline void * -arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) -{ - void *ret; - unsigned regind; - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - dassert(run->magic == ARENA_RUN_MAGIC); - assert(run->nfree > 0); - assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); - - regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); - ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + - (uintptr_t)(bin_info->reg_size * regind)); - run->nfree--; - if (regind == run->nextind) - run->nextind++; - assert(regind < run->nextind); - return (ret); -} - -static inline void -arena_run_reg_dalloc(arena_run_t *run, void *ptr) -{ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - unsigned regind = arena_run_regind(run, bin_info, ptr); - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - assert(run->nfree < bin_info->nregs); - /* Freeing an interior pointer can cause assertion failure. */ - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size - == 0); - assert((uintptr_t)ptr >= (uintptr_t)run + - (uintptr_t)bin_info->reg0_offset); - /* Freeing an unallocated pointer can cause assertion failure. */ - assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); - - bitmap_unset(bitmap, &bin_info->bitmap_info, regind); - run->nfree++; -} - -#ifdef JEMALLOC_DEBUG -static inline void -arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) -{ - size_t i; - size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); - - for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) - assert(p[i] == 0); -} -#endif - -static void -arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, - bool zero) -{ - arena_chunk_t *chunk; - size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; - size_t flag_dirty; - arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - old_ndirty = chunk->ndirty; - run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) - >> PAGE_SHIFT); - flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; - runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; - total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> - PAGE_SHIFT; - assert((chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty); - need_pages = (size >> PAGE_SHIFT); - assert(need_pages > 0); - assert(need_pages <= total_pages); - rem_pages = total_pages - need_pages; - - arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) << - PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif - arena->nactive += need_pages; - - /* Keep track of trailing unused pages for later use. */ - if (rem_pages > 0) { - if (flag_dirty != 0) { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; - } else { - chunk->map[run_ind+need_pages-map_bias].bits = - (rem_pages << PAGE_SHIFT) | - (chunk->map[run_ind+need_pages-map_bias].bits & - CHUNK_MAP_UNZEROED); - chunk->map[run_ind+total_pages-1-map_bias].bits = - (rem_pages << PAGE_SHIFT) | - (chunk->map[run_ind+total_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); - } - arena_avail_tree_insert(runs_avail, - &chunk->map[run_ind+need_pages-map_bias]); - } - - /* Update dirty page accounting. */ - if (flag_dirty != 0) { - chunk->ndirty -= need_pages; - arena->ndirty -= need_pages; - } - - /* - * Update the page map separately for large vs. small runs, since it is - * possible to avoid iteration for large mallocs. - */ - if (large) { - if (zero) { - if (flag_dirty == 0) { - /* - * The run is clean, so some pages may be - * zeroed (i.e. never before touched). - */ - for (i = 0; i < need_pages; i++) { - if ((chunk->map[run_ind+i-map_bias].bits - & CHUNK_MAP_UNZEROED) != 0) { - memset((void *)((uintptr_t) - chunk + ((run_ind+i) << - PAGE_SHIFT)), 0, - PAGE_SIZE); - } -#ifdef JEMALLOC_DEBUG - else { - arena_chunk_validate_zeroed( - chunk, run_ind+i); - } -#endif - } - } else { - /* - * The run is dirty, so all pages must be - * zeroed. - */ - memset((void *)((uintptr_t)chunk + (run_ind << - PAGE_SHIFT)), 0, (need_pages << - PAGE_SHIFT)); - } - } - - /* - * Set the last element first, in case the run only contains one - * page (i.e. both statements set the same element). - */ - chunk->map[run_ind+need_pages-1-map_bias].bits = - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; - chunk->map[run_ind-map_bias].bits = size | flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - } else { - assert(zero == false); - /* - * Propagate the dirty and unzeroed flags to the allocated - * small run, so that arena_dalloc_bin_run() has the ability to - * conditionally trim clean pages. - */ - chunk->map[run_ind-map_bias].bits = - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_ALLOCATED | flag_dirty; -#ifdef JEMALLOC_DEBUG - /* - * The first page will always be dirtied during small run - * initialization, so a validation failure here would not - * actually cause an observable failure. - */ - if (flag_dirty == 0 && - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) - == 0) - arena_chunk_validate_zeroed(chunk, run_ind); -#endif - for (i = 1; i < need_pages - 1; i++) { - chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) - | (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; -#ifdef JEMALLOC_DEBUG - if (flag_dirty == 0 && - (chunk->map[run_ind+i-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) - arena_chunk_validate_zeroed(chunk, run_ind+i); -#endif - } - chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages - - 1) << PAGE_SHIFT) | - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; -#ifdef JEMALLOC_DEBUG - if (flag_dirty == 0 && - (chunk->map[run_ind+need_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) == 0) { - arena_chunk_validate_zeroed(chunk, - run_ind+need_pages-1); - } -#endif - } -} - -static arena_chunk_t * -arena_chunk_alloc(arena_t *arena) -{ - arena_chunk_t *chunk; - size_t i; - - if (arena->spare != NULL) { - arena_avail_tree_t *runs_avail; - - chunk = arena->spare; - arena->spare = NULL; - - /* Insert the run into the appropriate runs_avail_* tree. */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); - assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) - == arena_maxclass); - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == - (chunk->map[chunk_npages-1-map_bias].bits & - CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[0]); - } else { - bool zero; - size_t unzeroed; - - zero = false; - malloc_mutex_unlock(&arena->lock); - chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero); - malloc_mutex_lock(&arena->lock); - if (chunk == NULL) - return (NULL); -#ifdef JEMALLOC_STATS - arena->stats.mapped += chunksize; -#endif - - chunk->arena = arena; - ql_elm_new(chunk, link_dirty); - chunk->dirtied = false; - - /* - * Claim that no pages are in use, since the header is merely - * overhead. - */ - chunk->ndirty = 0; - - /* - * Initialize the map to contain one maximal free untouched run. - * Mark the pages as zeroed iff chunk_alloc() returned a zeroed - * chunk. - */ - unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; - chunk->map[0].bits = arena_maxclass | unzeroed; - /* - * There is no need to initialize the internal page map entries - * unless the chunk is not zeroed. - */ - if (zero == false) { - for (i = map_bias+1; i < chunk_npages-1; i++) - chunk->map[i-map_bias].bits = unzeroed; - } -#ifdef JEMALLOC_DEBUG - else { - for (i = map_bias+1; i < chunk_npages-1; i++) - assert(chunk->map[i-map_bias].bits == unzeroed); - } -#endif - chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | - unzeroed; - - /* Insert the run into the runs_avail_clean tree. */ - arena_avail_tree_insert(&arena->runs_avail_clean, - &chunk->map[0]); - } - - return (chunk); -} - -static void -arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) -{ - arena_avail_tree_t *runs_avail; - - /* - * Remove run from the appropriate runs_avail_* tree, so that the arena - * does not use it. - */ - if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) - runs_avail = &arena->runs_avail_clean; - else - runs_avail = &arena->runs_avail_dirty; - arena_avail_tree_remove(runs_avail, &chunk->map[0]); - - if (arena->spare != NULL) { - arena_chunk_t *spare = arena->spare; - - arena->spare = chunk; - if (spare->dirtied) { - ql_remove(&chunk->arena->chunks_dirty, spare, - link_dirty); - arena->ndirty -= spare->ndirty; - } - malloc_mutex_unlock(&arena->lock); - chunk_dealloc((void *)spare, chunksize); - malloc_mutex_lock(&arena->lock); -#ifdef JEMALLOC_STATS - arena->stats.mapped -= chunksize; -#endif - } else - arena->spare = chunk; -} - -static arena_run_t * -arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) -{ - arena_chunk_t *chunk; - arena_run_t *run; - arena_chunk_map_t *mapelm, key; - - assert(size <= arena_maxclass); - assert((size & PAGE_MASK) == 0); - - /* Search the arena's chunks for the lowest best fit. */ - key.bits = size | CHUNK_MAP_KEY; - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); - arena_run_split(arena, run, size, large, zero); - return (run); - } - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); - arena_run_split(arena, run, size, large, zero); - return (run); - } - - /* - * No usable runs. Create a new chunk from which to allocate the run. - */ - chunk = arena_chunk_alloc(arena); - if (chunk != NULL) { - run = (arena_run_t *)((uintptr_t)chunk + (map_bias << - PAGE_SHIFT)); - arena_run_split(arena, run, size, large, zero); - return (run); - } - - /* - * arena_chunk_alloc() failed, but another thread may have made - * sufficient memory available while this one dropped arena->lock in - * arena_chunk_alloc(), so search one more time. - */ - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); - arena_run_split(arena, run, size, large, zero); - return (run); - } - mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); - if (mapelm != NULL) { - arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); - size_t pageind = (((uintptr_t)mapelm - - (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) - + map_bias; - - run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << - PAGE_SHIFT)); - arena_run_split(arena, run, size, large, zero); - return (run); - } - - return (NULL); -} - -static inline void -arena_maybe_purge(arena_t *arena) -{ - - /* Enforce opt_lg_dirty_mult. */ - if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory && - (arena->ndirty - arena->npurgatory) > chunk_npages && - (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - arena->npurgatory)) - arena_purge(arena, false); -} - -static inline void -arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) -{ - ql_head(arena_chunk_map_t) mapelms; - arena_chunk_map_t *mapelm; - size_t pageind, flag_unzeroed; -#ifdef JEMALLOC_DEBUG - size_t ndirty; -#endif -#ifdef JEMALLOC_STATS - size_t nmadvise; -#endif - - ql_new(&mapelms); - - flag_unzeroed = -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - /* - * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous - * mappings, but not for file-backed mappings. - */ -# ifdef JEMALLOC_SWAP - swap_enabled ? CHUNK_MAP_UNZEROED : -# endif - 0; -#else - CHUNK_MAP_UNZEROED; -#endif - - /* - * If chunk is the spare, temporarily re-allocate it, 1) so that its - * run is reinserted into runs_avail_dirty, and 2) so that it cannot be - * completely discarded by another thread while arena->lock is dropped - * by this thread. Note that the arena_run_dalloc() call will - * implicitly deallocate the chunk, so no explicit action is required - * in this function to deallocate the chunk. - * - * Note that once a chunk contains dirty pages, it cannot again contain - * a single run unless 1) it is a dirty run, or 2) this function purges - * dirty pages and causes the transition to a single clean run. Thus - * (chunk == arena->spare) is possible, but it is not possible for - * this function to be called on the spare unless it contains a dirty - * run. - */ - if (chunk == arena->spare) { - assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); - arena_chunk_alloc(arena); - } - - /* Temporarily allocate all free dirty runs within chunk. */ - for (pageind = map_bias; pageind < chunk_npages;) { - mapelm = &chunk->map[pageind-map_bias]; - if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { - size_t npages; - - npages = mapelm->bits >> PAGE_SHIFT; - assert(pageind + npages <= chunk_npages); - if (mapelm->bits & CHUNK_MAP_DIRTY) { - size_t i; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif - - arena_avail_tree_remove( - &arena->runs_avail_dirty, mapelm); - - mapelm->bits = (npages << PAGE_SHIFT) | - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; - /* - * Update internal elements in the page map, so - * that CHUNK_MAP_UNZEROED is properly set. - */ - for (i = 1; i < npages - 1; i++) { - chunk->map[pageind+i-map_bias].bits = - flag_unzeroed; - } - if (npages > 1) { - chunk->map[ - pageind+npages-1-map_bias].bits = - flag_unzeroed | CHUNK_MAP_LARGE | - CHUNK_MAP_ALLOCATED; - } - -#ifdef JEMALLOC_STATS - /* - * Update stats_cactive if nactive is crossing a - * chunk multiple. - */ - cactive_diff = CHUNK_CEILING((arena->nactive + - npages) << PAGE_SHIFT) - - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_add(cactive_diff); -#endif - arena->nactive += npages; - /* Append to list for later processing. */ - ql_elm_new(mapelm, u.ql_link); - ql_tail_insert(&mapelms, mapelm, u.ql_link); - } - - pageind += npages; - } else { - /* Skip allocated run. */ - if (mapelm->bits & CHUNK_MAP_LARGE) - pageind += mapelm->bits >> PAGE_SHIFT; - else { - arena_run_t *run = (arena_run_t *)((uintptr_t) - chunk + (uintptr_t)(pageind << PAGE_SHIFT)); - - assert((mapelm->bits >> PAGE_SHIFT) == 0); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(arena, - run->bin); - arena_bin_info_t *bin_info = - &arena_bin_info[binind]; - pageind += bin_info->run_size >> PAGE_SHIFT; - } - } - } - assert(pageind == chunk_npages); - -#ifdef JEMALLOC_DEBUG - ndirty = chunk->ndirty; -#endif -#ifdef JEMALLOC_STATS - arena->stats.purged += chunk->ndirty; -#endif - arena->ndirty -= chunk->ndirty; - chunk->ndirty = 0; - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; - - malloc_mutex_unlock(&arena->lock); -#ifdef JEMALLOC_STATS - nmadvise = 0; -#endif - ql_foreach(mapelm, &mapelms, u.ql_link) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - size_t npages = mapelm->bits >> PAGE_SHIFT; - - assert(pageind + npages <= chunk_npages); -#ifdef JEMALLOC_DEBUG - assert(ndirty >= npages); - ndirty -= npages; -#endif - -#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_DONTNEED); -#elif defined(JEMALLOC_PURGE_MADVISE_FREE) - madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), - (npages << PAGE_SHIFT), MADV_FREE); -#else -# error "No method defined for purging unused dirty pages." -#endif - -#ifdef JEMALLOC_STATS - nmadvise++; -#endif - } -#ifdef JEMALLOC_DEBUG - assert(ndirty == 0); -#endif - malloc_mutex_lock(&arena->lock); -#ifdef JEMALLOC_STATS - arena->stats.nmadvise += nmadvise; -#endif - - /* Deallocate runs. */ - for (mapelm = ql_first(&mapelms); mapelm != NULL; - mapelm = ql_first(&mapelms)) { - size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t)) + map_bias; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)(pageind << PAGE_SHIFT)); - - ql_remove(&mapelms, mapelm, u.ql_link); - arena_run_dalloc(arena, run, false); - } -} - -static void -arena_purge(arena_t *arena, bool all) -{ - arena_chunk_t *chunk; - size_t npurgatory; -#ifdef JEMALLOC_DEBUG - size_t ndirty = 0; - - ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { - assert(chunk->dirtied); - ndirty += chunk->ndirty; - } - assert(ndirty == arena->ndirty); -#endif - assert(arena->ndirty > arena->npurgatory || all); - assert(arena->ndirty > chunk_npages || all); - assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - - npurgatory) || all); - -#ifdef JEMALLOC_STATS - arena->stats.npurge++; -#endif - - /* - * Compute the minimum number of pages that this thread should try to - * purge, and add the result to arena->npurgatory. This will keep - * multiple threads from racing to reduce ndirty below the threshold. - */ - npurgatory = arena->ndirty - arena->npurgatory; - if (all == false) { - assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); - npurgatory -= arena->nactive >> opt_lg_dirty_mult; - } - arena->npurgatory += npurgatory; - - while (npurgatory > 0) { - /* Get next chunk with dirty pages. */ - chunk = ql_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* - * This thread was unable to purge as many pages as - * originally intended, due to races with other threads - * that either did some of the purging work, or re-used - * dirty pages. - */ - arena->npurgatory -= npurgatory; - return; - } - while (chunk->ndirty == 0) { - ql_remove(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = false; - chunk = ql_first(&arena->chunks_dirty); - if (chunk == NULL) { - /* Same logic as for above. */ - arena->npurgatory -= npurgatory; - return; - } - } - - if (chunk->ndirty > npurgatory) { - /* - * This thread will, at a minimum, purge all the dirty - * pages in chunk, so set npurgatory to reflect this - * thread's commitment to purge the pages. This tends - * to reduce the chances of the following scenario: - * - * 1) This thread sets arena->npurgatory such that - * (arena->ndirty - arena->npurgatory) is at the - * threshold. - * 2) This thread drops arena->lock. - * 3) Another thread causes one or more pages to be - * dirtied, and immediately determines that it must - * purge dirty pages. - * - * If this scenario *does* play out, that's okay, - * because all of the purging work being done really - * needs to happen. - */ - arena->npurgatory += chunk->ndirty - npurgatory; - npurgatory = chunk->ndirty; - } - - arena->npurgatory -= chunk->ndirty; - npurgatory -= chunk->ndirty; - arena_chunk_purge(arena, chunk); - } -} - -void -arena_purge_all(arena_t *arena) -{ - - malloc_mutex_lock(&arena->lock); - arena_purge(arena, true); - malloc_mutex_unlock(&arena->lock); -} - -static void -arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) -{ - arena_chunk_t *chunk; - size_t size, run_ind, run_pages, flag_dirty; - arena_avail_tree_t *runs_avail; -#ifdef JEMALLOC_STATS - size_t cactive_diff; -#endif - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) - >> PAGE_SHIFT); - assert(run_ind >= map_bias); - assert(run_ind < chunk_npages); - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { - size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; - assert(size == PAGE_SIZE || - (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); - } else { - size_t binind = arena_bin_index(arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - size = bin_info->run_size; - } - run_pages = (size >> PAGE_SHIFT); -#ifdef JEMALLOC_STATS - /* Update stats_cactive if nactive is crossing a chunk multiple. */ - cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) - - CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT); - if (cactive_diff != 0) - stats_cactive_sub(cactive_diff); -#endif - arena->nactive -= run_pages; - - /* - * The run is dirty if the caller claims to have dirtied it, as well as - * if it was already dirty before being allocated. - */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) - dirty = true; - flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; - runs_avail = dirty ? &arena->runs_avail_dirty : - &arena->runs_avail_clean; - - /* Mark pages as unallocated in the chunk map. */ - if (dirty) { - chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - CHUNK_MAP_DIRTY; - - chunk->ndirty += run_pages; - arena->ndirty += run_pages; - } else { - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_UNZEROED); - } - - /* Try to coalesce forward. */ - if (run_ind + run_pages < chunk_npages && - (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & - ~PAGE_MASK; - size_t nrun_pages = nrun_size >> PAGE_SHIFT; - - /* - * Remove successor from runs_avail; the coalesced run is - * inserted later. - */ - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & ~PAGE_MASK) == nrun_size); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0); - assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits - & CHUNK_MAP_DIRTY) == flag_dirty); - arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind+run_pages-map_bias]); - - size += nrun_size; - run_pages += nrun_pages; - - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); - } - - /* Try to coalesce backward. */ - if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & - CHUNK_MAP_DIRTY) == flag_dirty) { - size_t prun_size = chunk->map[run_ind-1-map_bias].bits & - ~PAGE_MASK; - size_t prun_pages = prun_size >> PAGE_SHIFT; - - run_ind -= prun_pages; - - /* - * Remove predecessor from runs_avail; the coalesced run is - * inserted later. - */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) - == prun_size); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) - == 0); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) - == flag_dirty); - arena_avail_tree_remove(runs_avail, - &chunk->map[run_ind-map_bias]); - - size += prun_size; - run_pages += prun_pages; - - chunk->map[run_ind-map_bias].bits = size | - (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind+run_pages-1-map_bias].bits = size | - (chunk->map[run_ind+run_pages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); - } - - /* Insert into runs_avail, now that coalescing is complete. */ - assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); - assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == - (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); - arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); - - if (dirty) { - /* - * Insert into chunks_dirty before potentially calling - * arena_chunk_dealloc(), so that chunks_dirty and - * arena->ndirty are consistent. - */ - if (chunk->dirtied == false) { - ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); - chunk->dirtied = true; - } - } - - /* - * Deallocate chunk if it is now completely unused. The bit - * manipulation checks whether the first run is unallocated and extends - * to the end of the chunk. - */ - if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == - arena_maxclass) - arena_chunk_dealloc(arena, chunk); - - /* - * It is okay to do dirty page processing here even if the chunk was - * deallocated above, since in that case it is the spare. Waiting - * until after possible chunk deallocation to do dirty processing - * allows for an old spare to be fully deallocated, thus decreasing the - * chances of spuriously crossing the dirty page purging threshold. - */ - if (dirty) - arena_maybe_purge(arena); -} - -static void -arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - size_t oldsize, size_t newsize) -{ - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; - - assert(oldsize > newsize); - - /* - * Update the chunk map so that arena_run_dalloc() can treat the - * leading run as separately allocated. Set the last element of each - * run first, in case of single-page runs. - */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = (oldsize - newsize) - | flag_dirty | (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - -#ifdef JEMALLOC_DEBUG - { - size_t tail_npages = newsize >> PAGE_SHIFT; - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_DIRTY) == flag_dirty); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] - .bits & CHUNK_MAP_ALLOCATED) != 0); - } -#endif - chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - - arena_run_dalloc(arena, run, false); -} - -static void -arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - size_t oldsize, size_t newsize, bool dirty) -{ - size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t head_npages = newsize >> PAGE_SHIFT; - size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT; - size_t flag_dirty = chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY; - - assert(oldsize > newsize); - - /* - * Update the chunk map so that arena_run_dalloc() can treat the - * trailing run as separately allocated. Set the last element of each - * run first, in case of single-page runs. - */ - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | - (chunk->map[pageind+head_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind-map_bias].bits = newsize | flag_dirty | - (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - ~PAGE_MASK) == 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_LARGE) != 0); - assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_ALLOCATED) != 0); - chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = - flag_dirty | - (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | - flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & - CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - - arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), - dirty); -} - -static arena_run_t * -arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) -{ - arena_chunk_map_t *mapelm; - arena_run_t *run; - size_t binind; - arena_bin_info_t *bin_info; - - /* Look for a usable run. */ - mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - - /* run is guaranteed to have available space. */ - arena_run_tree_remove(&bin->runs, mapelm); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) - << PAGE_SHIFT)); -#ifdef JEMALLOC_STATS - bin->stats.reruns++; -#endif - return (run); - } - /* No existing runs have any space available. */ - - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; - - /* Allocate a new run. */ - malloc_mutex_unlock(&bin->lock); - /******************************/ - malloc_mutex_lock(&arena->lock); - run = arena_run_alloc(arena, bin_info->run_size, false, false); - if (run != NULL) { - bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + - (uintptr_t)bin_info->bitmap_offset); - - /* Initialize run internals. */ - run->bin = bin; - run->nextind = 0; - run->nfree = bin_info->nregs; - bitmap_init(bitmap, &bin_info->bitmap_info); -#ifdef JEMALLOC_DEBUG - run->magic = ARENA_RUN_MAGIC; -#endif - } - malloc_mutex_unlock(&arena->lock); - /********************************/ - malloc_mutex_lock(&bin->lock); - if (run != NULL) { -#ifdef JEMALLOC_STATS - bin->stats.nruns++; - bin->stats.curruns++; - if (bin->stats.curruns > bin->stats.highruns) - bin->stats.highruns = bin->stats.curruns; -#endif - return (run); - } - - /* - * arena_run_alloc() failed, but another thread may have made - * sufficient memory available while this one dropped bin->lock above, - * so search one more time. - */ - mapelm = arena_run_tree_first(&bin->runs); - if (mapelm != NULL) { - arena_chunk_t *chunk; - size_t pageind; - - /* run is guaranteed to have available space. */ - arena_run_tree_remove(&bin->runs, mapelm); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); - pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / - sizeof(arena_chunk_map_t))) + map_bias; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) - << PAGE_SHIFT)); -#ifdef JEMALLOC_STATS - bin->stats.reruns++; -#endif - return (run); - } - - return (NULL); -} - -/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */ -static void * -arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) -{ - void *ret; - size_t binind; - arena_bin_info_t *bin_info; - arena_run_t *run; - - binind = arena_bin_index(arena, bin); - bin_info = &arena_bin_info[binind]; - bin->runcur = NULL; - run = arena_bin_nonfull_run_get(arena, bin); - if (bin->runcur != NULL && bin->runcur->nfree > 0) { - /* - * Another thread updated runcur while this one ran without the - * bin lock in arena_bin_nonfull_run_get(). - */ - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); - assert(bin->runcur->nfree > 0); - ret = arena_run_reg_alloc(bin->runcur, bin_info); - if (run != NULL) { - arena_chunk_t *chunk; - - /* - * arena_run_alloc() may have allocated run, or it may - * have pulled run from the bin's run tree. Therefore - * it is unsafe to make any assumptions about how run - * has previously been used, and arena_bin_lower_run() - * must be called, as if a region were just deallocated - * from the run. - */ - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); - if (run->nfree == bin_info->nregs) - arena_dalloc_bin_run(arena, chunk, run, bin); - else - arena_bin_lower_run(arena, chunk, run, bin); - } - return (ret); - } - - if (run == NULL) - return (NULL); - - bin->runcur = run; - - dassert(bin->runcur->magic == ARENA_RUN_MAGIC); - assert(bin->runcur->nfree > 0); - - return (arena_run_reg_alloc(bin->runcur, bin_info)); -} - -#ifdef JEMALLOC_PROF -void -arena_prof_accum(arena_t *arena, uint64_t accumbytes) -{ - - if (prof_interval != 0) { - arena->prof_accumbytes += accumbytes; - if (arena->prof_accumbytes >= prof_interval) { - prof_idump(); - arena->prof_accumbytes -= prof_interval; - } - } -} -#endif - -#ifdef JEMALLOC_TCACHE -void -arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind -# ifdef JEMALLOC_PROF - , uint64_t prof_accumbytes -# endif - ) -{ - unsigned i, nfill; - arena_bin_t *bin; - arena_run_t *run; - void *ptr; - - assert(tbin->ncached == 0); - -#ifdef JEMALLOC_PROF - malloc_mutex_lock(&arena->lock); - arena_prof_accum(arena, prof_accumbytes); - malloc_mutex_unlock(&arena->lock); -#endif - bin = &arena->bins[binind]; - malloc_mutex_lock(&bin->lock); - for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> - tbin->lg_fill_div); i < nfill; i++) { - if ((run = bin->runcur) != NULL && run->nfree > 0) - ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); - else - ptr = arena_bin_malloc_hard(arena, bin); - if (ptr == NULL) - break; - /* Insert such that low regions get used first. */ - tbin->avail[nfill - 1 - i] = ptr; - } -#ifdef JEMALLOC_STATS - bin->stats.allocated += i * arena_bin_info[binind].reg_size; - bin->stats.nmalloc += i; - bin->stats.nrequests += tbin->tstats.nrequests; - bin->stats.nfills++; - tbin->tstats.nrequests = 0; -#endif - malloc_mutex_unlock(&bin->lock); - tbin->ncached = i; -} -#endif - -void * -arena_malloc_small(arena_t *arena, size_t size, bool zero) -{ - void *ret; - arena_bin_t *bin; - arena_run_t *run; - size_t binind; - - binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); - bin = &arena->bins[binind]; - size = arena_bin_info[binind].reg_size; - - malloc_mutex_lock(&bin->lock); - if ((run = bin->runcur) != NULL && run->nfree > 0) - ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); - else - ret = arena_bin_malloc_hard(arena, bin); - - if (ret == NULL) { - malloc_mutex_unlock(&bin->lock); - return (NULL); - } - -#ifdef JEMALLOC_STATS - bin->stats.allocated += size; - bin->stats.nmalloc++; - bin->stats.nrequests++; -#endif - malloc_mutex_unlock(&bin->lock); -#ifdef JEMALLOC_PROF - if (isthreaded == false) { - malloc_mutex_lock(&arena->lock); - arena_prof_accum(arena, size); - malloc_mutex_unlock(&arena->lock); - } -#endif - - if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif - } else - memset(ret, 0, size); - - return (ret); -} - -void * -arena_malloc_large(arena_t *arena, size_t size, bool zero) -{ - void *ret; - - /* Large allocation. */ - size = PAGE_CEILING(size); - malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, size, true, zero); - if (ret == NULL) { - malloc_mutex_unlock(&arena->lock); - return (NULL); - } -#ifdef JEMALLOC_STATS - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; - } -#endif -#ifdef JEMALLOC_PROF - arena_prof_accum(arena, size); -#endif - malloc_mutex_unlock(&arena->lock); - - if (zero == false) { -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); -#endif - } - - return (ret); -} - -void * -arena_malloc(size_t size, bool zero) -{ - - assert(size != 0); - assert(QUANTUM_CEILING(size) <= arena_maxclass); - - if (size <= small_maxclass) { -#ifdef JEMALLOC_TCACHE - tcache_t *tcache; - - if ((tcache = tcache_get()) != NULL) - return (tcache_alloc_small(tcache, size, zero)); - else - -#endif - return (arena_malloc_small(choose_arena(), size, zero)); - } else { -#ifdef JEMALLOC_TCACHE - if (size <= tcache_maxclass) { - tcache_t *tcache; - - if ((tcache = tcache_get()) != NULL) - return (tcache_alloc_large(tcache, size, zero)); - else { - return (arena_malloc_large(choose_arena(), - size, zero)); - } - } else -#endif - return (arena_malloc_large(choose_arena(), size, zero)); - } -} - -/* Only handles large allocations that require more than page alignment. */ -void * -arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, - bool zero) -{ - void *ret; - size_t offset; - arena_chunk_t *chunk; - - assert((size & PAGE_MASK) == 0); - - alignment = PAGE_CEILING(alignment); - - malloc_mutex_lock(&arena->lock); - ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); - if (ret == NULL) { - malloc_mutex_unlock(&arena->lock); - return (NULL); - } - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); - - offset = (uintptr_t)ret & (alignment - 1); - assert((offset & PAGE_MASK) == 0); - assert(offset < alloc_size); - if (offset == 0) - arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false); - else { - size_t leadsize, trailsize; - - leadsize = alignment - offset; - if (leadsize > 0) { - arena_run_trim_head(arena, chunk, ret, alloc_size, - alloc_size - leadsize); - ret = (void *)((uintptr_t)ret + leadsize); - } - - trailsize = alloc_size - leadsize - size; - if (trailsize != 0) { - /* Trim trailing space. */ - assert(trailsize < alloc_size); - arena_run_trim_tail(arena, chunk, ret, size + trailsize, - size, false); - } - } - -#ifdef JEMALLOC_STATS - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; - } -#endif - malloc_mutex_unlock(&arena->lock); - -#ifdef JEMALLOC_FILL - if (zero == false) { - if (opt_junk) - memset(ret, 0xa5, size); - else if (opt_zero) - memset(ret, 0, size); - } -#endif - return (ret); -} - -/* Return the size of the allocation pointed to by ptr. */ -size_t -arena_salloc(const void *ptr) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == - 0); - ret = bin_info->reg_size; - } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; - assert(ret != 0); - } - - return (ret); -} - -#ifdef JEMALLOC_PROF -void -arena_prof_promoted(const void *ptr, size_t size) -{ - arena_chunk_t *chunk; - size_t pageind, binind; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - assert(isalloc(ptr) == PAGE_SIZE); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - binind = SMALL_SIZE2BIN(size); - assert(binind < nbins); - chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & - ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); -} - -size_t -arena_salloc_demote(const void *ptr) -{ - size_t ret; - arena_chunk_t *chunk; - size_t pageind, mapbits; - - assert(ptr != NULL); - assert(CHUNK_ADDR2BASE(ptr) != ptr); - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - mapbits = chunk->map[pageind-map_bias].bits; - assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); - if ((mapbits & CHUNK_MAP_LARGE) == 0) { - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - size_t binind = arena_bin_index(chunk->arena, run->bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - assert(((uintptr_t)ptr - ((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == - 0); - ret = bin_info->reg_size; - } else { - assert(((uintptr_t)ptr & PAGE_MASK) == 0); - ret = mapbits & ~PAGE_MASK; - if (prof_promote && ret == PAGE_SIZE && (mapbits & - CHUNK_MAP_CLASS_MASK) != 0) { - size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> - CHUNK_MAP_CLASS_SHIFT) - 1; - assert(binind < nbins); - ret = arena_bin_info[binind].reg_size; - } - assert(ret != 0); - } - - return (ret); -} -#endif - -static void -arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) -{ - - /* Dissociate run from bin. */ - if (run == bin->runcur) - bin->runcur = NULL; - else { - size_t binind = arena_bin_index(chunk->arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; - - if (bin_info->nregs != 1) { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - /* - * This block's conditional is necessary because if the - * run only contains one region, then it never gets - * inserted into the non-full runs tree. - */ - arena_run_tree_remove(&bin->runs, run_mapelm); - } - } -} - -static void -arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) -{ - size_t binind; - arena_bin_info_t *bin_info; - size_t npages, run_ind, past; - - assert(run != bin->runcur); - assert(arena_run_tree_search(&bin->runs, &chunk->map[ - (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); - - binind = arena_bin_index(chunk->arena, run->bin); - bin_info = &arena_bin_info[binind]; - - malloc_mutex_unlock(&bin->lock); - /******************************/ - npages = bin_info->run_size >> PAGE_SHIFT; - run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); - past = (size_t)(PAGE_CEILING((uintptr_t)run + - (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * - bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); - malloc_mutex_lock(&arena->lock); - - /* - * If the run was originally clean, and some pages were never touched, - * trim the clean pages before deallocating the dirty portion of the - * run. - */ - if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past - - run_ind < npages) { - /* - * Trim clean pages. Convert to large run beforehand. Set the - * last map element first, in case this is a one-page run. - */ - chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | - (chunk->map[run_ind+npages-1-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); - chunk->map[run_ind-map_bias].bits = bin_info->run_size | - CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & - CHUNK_MAP_FLAGS_MASK); - arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), - ((past - run_ind) << PAGE_SHIFT), false); - /* npages = past - run_ind; */ - } -#ifdef JEMALLOC_DEBUG - run->magic = 0; -#endif - arena_run_dalloc(arena, run, true); - malloc_mutex_unlock(&arena->lock); - /****************************/ - malloc_mutex_lock(&bin->lock); -#ifdef JEMALLOC_STATS - bin->stats.curruns--; -#endif -} - -static void -arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, - arena_bin_t *bin) -{ - - /* - * Make sure that bin->runcur always refers to the lowest non-full run, - * if one exists. - */ - if (bin->runcur == NULL) - bin->runcur = run; - else if ((uintptr_t)run < (uintptr_t)bin->runcur) { - /* Switch runcur. */ - if (bin->runcur->nfree > 0) { - arena_chunk_t *runcur_chunk = - CHUNK_ADDR2BASE(bin->runcur); - size_t runcur_pageind = (((uintptr_t)bin->runcur - - (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *runcur_mapelm = - &runcur_chunk->map[runcur_pageind-map_bias]; - - /* Insert runcur. */ - arena_run_tree_insert(&bin->runs, runcur_mapelm); - } - bin->runcur = run; - } else { - size_t run_pageind = (((uintptr_t)run - - (uintptr_t)chunk)) >> PAGE_SHIFT; - arena_chunk_map_t *run_mapelm = - &chunk->map[run_pageind-map_bias]; - - assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL); - arena_run_tree_insert(&bin->runs, run_mapelm); - } -} - -void -arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, - arena_chunk_map_t *mapelm) -{ - size_t pageind; - arena_run_t *run; - arena_bin_t *bin; -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size_t size; -#endif - - pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - - (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); - dassert(run->magic == ARENA_RUN_MAGIC); - bin = run->bin; - size_t binind = arena_bin_index(arena, bin); - arena_bin_info_t *bin_info = &arena_bin_info[binind]; -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size = bin_info->reg_size; -#endif - -#ifdef JEMALLOC_FILL - if (opt_junk) - memset(ptr, 0x5a, size); -#endif - - arena_run_reg_dalloc(run, ptr); - if (run->nfree == bin_info->nregs) { - arena_dissociate_bin_run(chunk, run, bin); - arena_dalloc_bin_run(arena, chunk, run, bin); - } else if (run->nfree == 1 && run != bin->runcur) - arena_bin_lower_run(arena, chunk, run, bin); - -#ifdef JEMALLOC_STATS - bin->stats.allocated -= size; - bin->stats.ndalloc++; -#endif -} - -#ifdef JEMALLOC_STATS -void -arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, - arena_stats_t *astats, malloc_bin_stats_t *bstats, - malloc_large_stats_t *lstats) -{ - unsigned i; - - malloc_mutex_lock(&arena->lock); - *nactive += arena->nactive; - *ndirty += arena->ndirty; - - astats->mapped += arena->stats.mapped; - astats->npurge += arena->stats.npurge; - astats->nmadvise += arena->stats.nmadvise; - astats->purged += arena->stats.purged; - astats->allocated_large += arena->stats.allocated_large; - astats->nmalloc_large += arena->stats.nmalloc_large; - astats->ndalloc_large += arena->stats.ndalloc_large; - astats->nrequests_large += arena->stats.nrequests_large; - - for (i = 0; i < nlclasses; i++) { - lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; - lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; - lstats[i].nrequests += arena->stats.lstats[i].nrequests; - lstats[i].highruns += arena->stats.lstats[i].highruns; - lstats[i].curruns += arena->stats.lstats[i].curruns; - } - malloc_mutex_unlock(&arena->lock); - - for (i = 0; i < nbins; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(&bin->lock); - bstats[i].allocated += bin->stats.allocated; - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; -#ifdef JEMALLOC_TCACHE - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; -#endif - bstats[i].nruns += bin->stats.nruns; - bstats[i].reruns += bin->stats.reruns; - bstats[i].highruns += bin->stats.highruns; - bstats[i].curruns += bin->stats.curruns; - malloc_mutex_unlock(&bin->lock); - } -} -#endif - -void -arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) -{ - - /* Large allocation. */ -#ifdef JEMALLOC_FILL -# ifndef JEMALLOC_STATS - if (opt_junk) -# endif -#endif - { -#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> - PAGE_SHIFT; - size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; -#endif - -#ifdef JEMALLOC_FILL -# ifdef JEMALLOC_STATS - if (opt_junk) -# endif - memset(ptr, 0x5a, size); -#endif -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; -#endif - } - - arena_run_dalloc(arena, (arena_run_t *)ptr, true); -} - -static void -arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t size) -{ - - assert(size < oldsize); - - /* - * Shrink the run, and make trailing pages available for other - * allocations. - */ - malloc_mutex_lock(&arena->lock); - arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, - true); -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; - - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; - } -#endif - malloc_mutex_unlock(&arena->lock); -} - -static bool -arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, - size_t oldsize, size_t size, size_t extra, bool zero) -{ - size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; - size_t npages = oldsize >> PAGE_SHIFT; - size_t followsize; - - assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); - - /* Try to extend the run. */ - assert(size + extra > oldsize); - malloc_mutex_lock(&arena->lock); - if (pageind + npages < chunk_npages && - (chunk->map[pageind+npages-map_bias].bits - & CHUNK_MAP_ALLOCATED) == 0 && (followsize = - chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - - oldsize) { - /* - * The next run is available and sufficiently large. Split the - * following run, then merge the first part with the existing - * allocation. - */ - size_t flag_dirty; - size_t splitsize = (oldsize + followsize <= size + extra) - ? followsize : size + extra - oldsize; - arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + - ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero); - - size = oldsize + splitsize; - npages = size >> PAGE_SHIFT; - - /* - * Mark the extended run as dirty if either portion of the run - * was dirty before allocation. This is rather pedantic, - * because there's not actually any sequence of events that - * could cause the resulting run to be passed to - * arena_run_dalloc() with the dirty argument set to false - * (which is when dirty flag consistency would really matter). - */ - flag_dirty = (chunk->map[pageind-map_bias].bits & - CHUNK_MAP_DIRTY) | - (chunk->map[pageind+npages-1-map_bias].bits & - CHUNK_MAP_DIRTY); - chunk->map[pageind-map_bias].bits = size | flag_dirty - | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | - CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; - -#ifdef JEMALLOC_STATS - arena->stats.ndalloc_large++; - arena->stats.allocated_large -= oldsize; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; - arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; - - arena->stats.nmalloc_large++; - arena->stats.nrequests_large++; - arena->stats.allocated_large += size; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; - if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { - arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = - arena->stats.lstats[(size >> PAGE_SHIFT) - - 1].curruns; - } -#endif - malloc_mutex_unlock(&arena->lock); - return (false); - } - malloc_mutex_unlock(&arena->lock); - - return (true); -} - -/* - * Try to resize a large allocation, in order to avoid copying. This will - * always fail if growing an object, and the following run is already in use. - */ -static bool -arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) -{ - size_t psize; - - psize = PAGE_CEILING(size + extra); - if (psize == oldsize) { - /* Same size class. */ -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - - size); - } -#endif - return (false); - } else { - arena_chunk_t *chunk; - arena_t *arena; - - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - arena = chunk->arena; - dassert(arena->magic == ARENA_MAGIC); - - if (psize < oldsize) { -#ifdef JEMALLOC_FILL - /* Fill before shrinking in order avoid a race. */ - if (opt_junk) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); - } -#endif - arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, - psize); - return (false); - } else { - bool ret = arena_ralloc_large_grow(arena, chunk, ptr, - oldsize, PAGE_CEILING(size), - psize - PAGE_CEILING(size), zero); -#ifdef JEMALLOC_FILL - if (ret == false && zero == false && opt_zero) { - memset((void *)((uintptr_t)ptr + oldsize), 0, - size - oldsize); - } -#endif - return (ret); - } - } -} - -void * -arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, - bool zero) -{ - - /* - * Avoid moving the allocation if the size class can be left the same. - */ - if (oldsize <= arena_maxclass) { - if (oldsize <= small_maxclass) { - assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size - == oldsize); - if ((size + extra <= small_maxclass && - SMALL_SIZE2BIN(size + extra) == - SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && - size + extra >= oldsize)) { -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), - 0x5a, oldsize - size); - } -#endif - return (ptr); - } - } else { - assert(size <= arena_maxclass); - if (size + extra > small_maxclass) { - if (arena_ralloc_large(ptr, oldsize, size, - extra, zero) == false) - return (ptr); - } - } - } - - /* Reallocation would require a move. */ - return (NULL); -} - -void * -arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) -{ - void *ret; - size_t copysize; - - /* Try to avoid moving the allocation. */ - ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); - if (ret != NULL) - return (ret); - - /* - * size and oldsize are different enough that we need to move the - * object. In that case, fall back to allocating new space and - * copying. - */ - if (alignment != 0) { - size_t usize = sa2u(size + extra, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - } else - ret = arena_malloc(size + extra, zero); - - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment != 0) { - size_t usize = sa2u(size, alignment, NULL); - if (usize == 0) - return (NULL); - ret = ipalloc(usize, alignment, zero); - } else - ret = arena_malloc(size, zero); - - if (ret == NULL) - return (NULL); - } - - /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ - - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - memcpy(ret, ptr, copysize); - idalloc(ptr); - return (ret); -} - -bool -arena_new(arena_t *arena, unsigned ind) -{ - unsigned i; - arena_bin_t *bin; - - arena->ind = ind; - arena->nthreads = 0; - - if (malloc_mutex_init(&arena->lock)) - return (true); - -#ifdef JEMALLOC_STATS - memset(&arena->stats, 0, sizeof(arena_stats_t)); - arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (arena->stats.lstats == NULL) - return (true); - memset(arena->stats.lstats, 0, nlclasses * - sizeof(malloc_large_stats_t)); -# ifdef JEMALLOC_TCACHE - ql_new(&arena->tcache_ql); -# endif -#endif - -#ifdef JEMALLOC_PROF - arena->prof_accumbytes = 0; -#endif - - /* Initialize chunks. */ - ql_new(&arena->chunks_dirty); - arena->spare = NULL; - - arena->nactive = 0; - arena->ndirty = 0; - arena->npurgatory = 0; - - arena_avail_tree_new(&arena->runs_avail_clean); - arena_avail_tree_new(&arena->runs_avail_dirty); - - /* Initialize bins. */ - i = 0; -#ifdef JEMALLOC_TINY - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } -#endif - - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } - - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } - - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock)) - return (true); - bin->runcur = NULL; - arena_run_tree_new(&bin->runs); -#ifdef JEMALLOC_STATS - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); -#endif - } - -#ifdef JEMALLOC_DEBUG - arena->magic = ARENA_MAGIC; -#endif - - return (false); -} - -#ifdef JEMALLOC_DEBUG -static void -small_size2bin_validate(void) -{ - size_t i, size, binind; - - i = 1; -# ifdef JEMALLOC_TINY - /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i++) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } - for (; i < qspace_min; i++) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - assert(SMALL_SIZE2BIN(i) == binind); - } -# endif - /* Quantum-spaced. */ - for (; i <= qspace_max; i++) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i++) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - assert(SMALL_SIZE2BIN(i) == binind); - } - /* Sub-page. */ - for (; i <= sspace_max; i++) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) - >> LG_SUBPAGE); - assert(SMALL_SIZE2BIN(i) == binind); - } -} -#endif - -static bool -small_size2bin_init(void) -{ - - if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)) - return (small_size2bin_init_hard()); - - small_size2bin = const_small_size2bin; -#ifdef JEMALLOC_DEBUG - small_size2bin_validate(); -#endif - return (false); -} - -static bool -small_size2bin_init_hard(void) -{ - size_t i, size, binind; - uint8_t *custom_small_size2bin; -#define CUSTOM_SMALL_SIZE2BIN(s) \ - custom_small_size2bin[(s-1) >> LG_TINY_MIN] - - assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT - || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT - || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> - LG_TINY_MIN) + 1)); - - custom_small_size2bin = (uint8_t *) - base_alloc(small_maxclass >> LG_TINY_MIN); - if (custom_small_size2bin == NULL) - return (true); - - i = 1; -#ifdef JEMALLOC_TINY - /* Tiny. */ - for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { - size = pow2_ceil(1U << LG_TINY_MIN); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - for (; i < qspace_min; i += TINY_MIN) { - size = pow2_ceil(i); - binind = ffs((int)(size >> (LG_TINY_MIN + 1))); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } -#endif - /* Quantum-spaced. */ - for (; i <= qspace_max; i += TINY_MIN) { - size = QUANTUM_CEILING(i); - binind = ntbins + (size >> LG_QUANTUM) - 1; - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Cacheline-spaced. */ - for (; i <= cspace_max; i += TINY_MIN) { - size = CACHELINE_CEILING(i); - binind = ntbins + nqbins + ((size - cspace_min) >> - LG_CACHELINE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - /* Sub-page. */ - for (; i <= sspace_max; i += TINY_MIN) { - size = SUBPAGE_CEILING(i); - binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> - LG_SUBPAGE); - CUSTOM_SMALL_SIZE2BIN(i) = binind; - } - - small_size2bin = custom_small_size2bin; -#ifdef JEMALLOC_DEBUG - small_size2bin_validate(); -#endif - return (false); -#undef CUSTOM_SMALL_SIZE2BIN -} - -/* - * Calculate bin_info->run_size such that it meets the following constraints: - * - * *) bin_info->run_size >= min_run_size - * *) bin_info->run_size <= arena_maxclass - * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). - * *) bin_info->nregs <= RUN_MAXREGS - * - * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also - * calculated here, since these settings are all interdependent. - */ -static size_t -bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) -{ - size_t try_run_size, good_run_size; - uint32_t try_nregs, good_nregs; - uint32_t try_hdr_size, good_hdr_size; - uint32_t try_bitmap_offset, good_bitmap_offset; -#ifdef JEMALLOC_PROF - uint32_t try_ctx0_offset, good_ctx0_offset; -#endif - uint32_t try_reg0_offset, good_reg0_offset; - - assert(min_run_size >= PAGE_SIZE); - assert(min_run_size <= arena_maxclass); - - /* - * Calculate known-valid settings before entering the run_size - * expansion loop, so that the first part of the loop always copies - * valid settings. - * - * The do..while loop iteratively reduces the number of regions until - * the run header and the regions no longer overlap. A closed formula - * would be quite messy, since there is an interdependency between the - * header's mask length and the number of regions. - */ - try_run_size = min_run_size; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* Add space for one (prof_ctx_t *) per region. */ - try_hdr_size += try_nregs * sizeof(prof_ctx_t *); - } else - try_ctx0_offset = 0; -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); - - /* run_size expansion loop. */ - do { - /* - * Copy valid settings before trying more aggressive settings. - */ - good_run_size = try_run_size; - good_nregs = try_nregs; - good_hdr_size = try_hdr_size; - good_bitmap_offset = try_bitmap_offset; -#ifdef JEMALLOC_PROF - good_ctx0_offset = try_ctx0_offset; -#endif - good_reg0_offset = try_reg0_offset; - - /* Try more aggressive settings. */ - try_run_size += PAGE_SIZE; - try_nregs = ((try_run_size - sizeof(arena_run_t)) / - bin_info->reg_size) - + 1; /* Counter-act try_nregs-- in loop. */ - if (try_nregs > RUN_MAXREGS) { - try_nregs = RUN_MAXREGS - + 1; /* Counter-act try_nregs-- in loop. */ - } - do { - try_nregs--; - try_hdr_size = sizeof(arena_run_t); - /* Pad to a long boundary. */ - try_hdr_size = LONG_CEILING(try_hdr_size); - try_bitmap_offset = try_hdr_size; - /* Add space for bitmap. */ - try_hdr_size += bitmap_size(try_nregs); -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote == false) { - /* Pad to a quantum boundary. */ - try_hdr_size = QUANTUM_CEILING(try_hdr_size); - try_ctx0_offset = try_hdr_size; - /* - * Add space for one (prof_ctx_t *) per region. - */ - try_hdr_size += try_nregs * - sizeof(prof_ctx_t *); - } -#endif - try_reg0_offset = try_run_size - (try_nregs * - bin_info->reg_size); - } while (try_hdr_size > try_reg0_offset); - } while (try_run_size <= arena_maxclass - && try_run_size <= arena_maxclass - && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX - && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size - && try_nregs < RUN_MAXREGS); - - assert(good_hdr_size <= good_reg0_offset); - - /* Copy final settings. */ - bin_info->run_size = good_run_size; - bin_info->nregs = good_nregs; - bin_info->bitmap_offset = good_bitmap_offset; -#ifdef JEMALLOC_PROF - bin_info->ctx0_offset = good_ctx0_offset; -#endif - bin_info->reg0_offset = good_reg0_offset; - - return (good_run_size); -} - -static bool -bin_info_init(void) -{ - arena_bin_info_t *bin_info; - unsigned i; - size_t prev_run_size; - - arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); - if (arena_bin_info == NULL) - return (true); - - prev_run_size = PAGE_SIZE; - i = 0; -#ifdef JEMALLOC_TINY - /* (2^n)-spaced tiny bins. */ - for (; i < ntbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (1U << (LG_TINY_MIN + i)); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } -#endif - - /* Quantum-spaced bins. */ - for (; i < ntbins + nqbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - /* Cacheline-spaced bins. */ - for (; i < ntbins + nqbins + ncbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << - LG_CACHELINE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - /* Subpage-spaced bins. */ - for (; i < nbins; i++) { - bin_info = &arena_bin_info[i]; - bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + - ncbins)) << LG_SUBPAGE); - prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); - bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); - } - - return (false); -} - -bool -arena_boot(void) -{ - size_t header_size; - unsigned i; - - /* Set variables according to the value of opt_lg_[qc]space_max. */ - qspace_max = (1U << opt_lg_qspace_max); - cspace_min = CACHELINE_CEILING(qspace_max); - if (cspace_min == qspace_max) - cspace_min += CACHELINE; - cspace_max = (1U << opt_lg_cspace_max); - sspace_min = SUBPAGE_CEILING(cspace_max); - if (sspace_min == cspace_max) - sspace_min += SUBPAGE; - assert(sspace_min < PAGE_SIZE); - sspace_max = PAGE_SIZE - SUBPAGE; - -#ifdef JEMALLOC_TINY - assert(LG_QUANTUM >= LG_TINY_MIN); -#endif - assert(ntbins <= LG_QUANTUM); - nqbins = qspace_max >> LG_QUANTUM; - ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; - nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1; - nbins = ntbins + nqbins + ncbins + nsbins; - - /* - * The small_size2bin lookup table uses uint8_t to encode each bin - * index, so we cannot support more than 256 small size classes. This - * limit is difficult to exceed (not even possible with 16B quantum and - * 4KiB pages), and such configurations are impractical, but - * nonetheless we need to protect against this case in order to avoid - * undefined behavior. - * - * Further constrain nbins to 255 if prof_promote is true, since all - * small size classes, plus a "not small" size class must be stored in - * 8 bits of arena_chunk_map_t's bits field. - */ -#ifdef JEMALLOC_PROF - if (opt_prof && prof_promote) { - if (nbins > 255) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 255)\n"); - abort(); - } - } else -#endif - if (nbins > 256) { - char line_buf[UMAX2S_BUFSIZE]; - malloc_write(": Too many small size classes ("); - malloc_write(u2s(nbins, 10, line_buf)); - malloc_write(" > max 256)\n"); - abort(); - } - - /* - * Compute the header size such that it is large enough to contain the - * page map. The page map is biased to omit entries for the header - * itself, so some iteration is necessary to compute the map bias. - * - * 1) Compute safe header_size and map_bias values that include enough - * space for an unbiased page map. - * 2) Refine map_bias based on (1) to omit the header pages in the page - * map. The resulting map_bias may be one too small. - * 3) Refine map_bias based on (2). The result will be >= the result - * from (2), and will always be correct. - */ - map_bias = 0; - for (i = 0; i < 3; i++) { - header_size = offsetof(arena_chunk_t, map) - + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); - map_bias = (header_size >> PAGE_SHIFT) + ((header_size & - PAGE_MASK) != 0); - } - assert(map_bias > 0); - - arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); - - if (small_size2bin_init()) - return (true); - - if (bin_info_init()) - return (true); - - return (false); -} diff --git a/jemalloc/src/atomic.c b/jemalloc/src/atomic.c deleted file mode 100644 index 77ee313..0000000 --- a/jemalloc/src/atomic.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_ATOMIC_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/jemalloc/src/base.c b/jemalloc/src/base.c deleted file mode 100644 index cc85e84..0000000 --- a/jemalloc/src/base.c +++ /dev/null @@ -1,106 +0,0 @@ -#define JEMALLOC_BASE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -malloc_mutex_t base_mtx; - -/* - * Current pages that are being used for internal memory allocations. These - * pages are carved up in cacheline-size quanta, so that there is no chance of - * false cache line sharing. - */ -static void *base_pages; -static void *base_next_addr; -static void *base_past_addr; /* Addr immediately past base_pages. */ -static extent_node_t *base_nodes; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static bool base_pages_alloc(size_t minsize); - -/******************************************************************************/ - -static bool -base_pages_alloc(size_t minsize) -{ - size_t csize; - bool zero; - - assert(minsize != 0); - csize = CHUNK_CEILING(minsize); - zero = false; - base_pages = chunk_alloc(csize, true, &zero); - if (base_pages == NULL) - return (true); - base_next_addr = base_pages; - base_past_addr = (void *)((uintptr_t)base_pages + csize); - - return (false); -} - -void * -base_alloc(size_t size) -{ - void *ret; - size_t csize; - - /* Round size up to nearest multiple of the cacheline size. */ - csize = CACHELINE_CEILING(size); - - malloc_mutex_lock(&base_mtx); - /* Make sure there's enough space for the allocation. */ - if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { - if (base_pages_alloc(csize)) { - malloc_mutex_unlock(&base_mtx); - return (NULL); - } - } - /* Allocate. */ - ret = base_next_addr; - base_next_addr = (void *)((uintptr_t)base_next_addr + csize); - malloc_mutex_unlock(&base_mtx); - - return (ret); -} - -extent_node_t * -base_node_alloc(void) -{ - extent_node_t *ret; - - malloc_mutex_lock(&base_mtx); - if (base_nodes != NULL) { - ret = base_nodes; - base_nodes = *(extent_node_t **)ret; - malloc_mutex_unlock(&base_mtx); - } else { - malloc_mutex_unlock(&base_mtx); - ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); - } - - return (ret); -} - -void -base_node_dealloc(extent_node_t *node) -{ - - malloc_mutex_lock(&base_mtx); - *(extent_node_t **)node = base_nodes; - base_nodes = node; - malloc_mutex_unlock(&base_mtx); -} - -bool -base_boot(void) -{ - - base_nodes = NULL; - if (malloc_mutex_init(&base_mtx)) - return (true); - - return (false); -} diff --git a/jemalloc/src/bitmap.c b/jemalloc/src/bitmap.c deleted file mode 100644 index b47e262..0000000 --- a/jemalloc/src/bitmap.c +++ /dev/null @@ -1,90 +0,0 @@ -#define JEMALLOC_BITMAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t bits2groups(size_t nbits); - -/******************************************************************************/ - -static size_t -bits2groups(size_t nbits) -{ - - return ((nbits >> LG_BITMAP_GROUP_NBITS) + - !!(nbits & BITMAP_GROUP_NBITS_MASK)); -} - -void -bitmap_info_init(bitmap_info_t *binfo, size_t nbits) -{ - unsigned i; - size_t group_count; - - assert(nbits > 0); - assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); - - /* - * Compute the number of groups necessary to store nbits bits, and - * progressively work upward through the levels until reaching a level - * that requires only one group. - */ - binfo->levels[0].group_offset = 0; - group_count = bits2groups(nbits); - for (i = 1; group_count > 1; i++) { - assert(i < BITMAP_MAX_LEVELS); - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - group_count = bits2groups(group_count); - } - binfo->levels[i].group_offset = binfo->levels[i-1].group_offset - + group_count; - binfo->nlevels = i; - binfo->nbits = nbits; -} - -size_t -bitmap_info_ngroups(const bitmap_info_t *binfo) -{ - - return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); -} - -size_t -bitmap_size(size_t nbits) -{ - bitmap_info_t binfo; - - bitmap_info_init(&binfo, nbits); - return (bitmap_info_ngroups(&binfo)); -} - -void -bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) -{ - size_t extra; - unsigned i; - - /* - * Bits are actually inverted with regard to the external bitmap - * interface, so the bitmap starts out with all 1 bits, except for - * trailing unused bits (if any). Note that each group uses bit 0 to - * correspond to the first logical bit in the group, so extra bits - * are the most significant bits of the last group. - */ - memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << - LG_SIZEOF_BITMAP); - extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) - & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[1].group_offset - 1] >>= extra; - for (i = 1; i < binfo->nlevels; i++) { - size_t group_count = binfo->levels[i].group_offset - - binfo->levels[i-1].group_offset; - extra = (BITMAP_GROUP_NBITS - (group_count & - BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; - if (extra != 0) - bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; - } -} diff --git a/jemalloc/src/chunk.c b/jemalloc/src/chunk.c deleted file mode 100644 index 301519e..0000000 --- a/jemalloc/src/chunk.c +++ /dev/null @@ -1,171 +0,0 @@ -#define JEMALLOC_CHUNK_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -size_t opt_lg_chunk = LG_CHUNK_DEFAULT; -#ifdef JEMALLOC_SWAP -bool opt_overcommit = true; -#endif - -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) -malloc_mutex_t chunks_mtx; -chunk_stats_t stats_chunks; -#endif - -#ifdef JEMALLOC_IVSALLOC -rtree_t *chunks_rtree; -#endif - -/* Various chunk-related settings. */ -size_t chunksize; -size_t chunksize_mask; /* (chunksize - 1). */ -size_t chunk_npages; -size_t map_bias; -size_t arena_maxclass; /* Max size class for arenas. */ - -/******************************************************************************/ - -/* - * If the caller specifies (*zero == false), it is still possible to receive - * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc() - * takes advantage of this to avoid demanding zeroed chunks, but taking - * advantage of them if they are returned. - */ -void * -chunk_alloc(size_t size, bool base, bool *zero) -{ - void *ret; - - assert(size != 0); - assert((size & chunksize_mask) == 0); - -#ifdef JEMALLOC_SWAP - if (swap_enabled) { - ret = chunk_alloc_swap(size, zero); - if (ret != NULL) - goto RETURN; - } - - if (swap_enabled == false || opt_overcommit) { -#endif -#ifdef JEMALLOC_DSS - ret = chunk_alloc_dss(size, zero); - if (ret != NULL) - goto RETURN; -#endif - ret = chunk_alloc_mmap(size); - if (ret != NULL) { - *zero = true; - goto RETURN; - } -#ifdef JEMALLOC_SWAP - } -#endif - - /* All strategies for allocation failed. */ - ret = NULL; -RETURN: -#ifdef JEMALLOC_IVSALLOC - if (base == false && ret != NULL) { - if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { - chunk_dealloc(ret, size); - return (NULL); - } - } -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - if (ret != NULL) { -# ifdef JEMALLOC_PROF - bool gdump; -# endif - malloc_mutex_lock(&chunks_mtx); -# ifdef JEMALLOC_STATS - stats_chunks.nchunks += (size / chunksize); -# endif - stats_chunks.curchunks += (size / chunksize); - if (stats_chunks.curchunks > stats_chunks.highchunks) { - stats_chunks.highchunks = stats_chunks.curchunks; -# ifdef JEMALLOC_PROF - gdump = true; -# endif - } -# ifdef JEMALLOC_PROF - else - gdump = false; -# endif - malloc_mutex_unlock(&chunks_mtx); -# ifdef JEMALLOC_PROF - if (opt_prof && opt_prof_gdump && gdump) - prof_gdump(); -# endif - } -#endif - - assert(CHUNK_ADDR2BASE(ret) == ret); - return (ret); -} - -void -chunk_dealloc(void *chunk, size_t size) -{ - - assert(chunk != NULL); - assert(CHUNK_ADDR2BASE(chunk) == chunk); - assert(size != 0); - assert((size & chunksize_mask) == 0); - -#ifdef JEMALLOC_IVSALLOC - rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); -#endif -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - malloc_mutex_lock(&chunks_mtx); - stats_chunks.curchunks -= (size / chunksize); - malloc_mutex_unlock(&chunks_mtx); -#endif - -#ifdef JEMALLOC_SWAP - if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) - return; -#endif -#ifdef JEMALLOC_DSS - if (chunk_dealloc_dss(chunk, size) == false) - return; -#endif - chunk_dealloc_mmap(chunk, size); -} - -bool -chunk_boot(void) -{ - - /* Set variables according to the value of opt_lg_chunk. */ - chunksize = (ZU(1) << opt_lg_chunk); - assert(chunksize >= PAGE_SIZE); - chunksize_mask = chunksize - 1; - chunk_npages = (chunksize >> PAGE_SHIFT); - -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - if (malloc_mutex_init(&chunks_mtx)) - return (true); - memset(&stats_chunks, 0, sizeof(chunk_stats_t)); -#endif -#ifdef JEMALLOC_SWAP - if (chunk_swap_boot()) - return (true); -#endif - if (chunk_mmap_boot()) - return (true); -#ifdef JEMALLOC_DSS - if (chunk_dss_boot()) - return (true); -#endif -#ifdef JEMALLOC_IVSALLOC - chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); - if (chunks_rtree == NULL) - return (true); -#endif - - return (false); -} diff --git a/jemalloc/src/chunk_dss.c b/jemalloc/src/chunk_dss.c deleted file mode 100644 index 5c0e290..0000000 --- a/jemalloc/src/chunk_dss.c +++ /dev/null @@ -1,284 +0,0 @@ -#define JEMALLOC_CHUNK_DSS_C_ -#include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_DSS -/******************************************************************************/ -/* Data. */ - -malloc_mutex_t dss_mtx; - -/* Base address of the DSS. */ -static void *dss_base; -/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ -static void *dss_prev; -/* Current upper limit on DSS addresses. */ -static void *dss_max; - -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t dss_chunks_szad; -static extent_tree_t dss_chunks_ad; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *chunk_recycle_dss(size_t size, bool *zero); -static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); - -/******************************************************************************/ - -static void * -chunk_recycle_dss(size_t size, bool *zero) -{ - extent_node_t *node, key; - - key.addr = NULL; - key.size = size; - malloc_mutex_lock(&dss_mtx); - node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); - if (node != NULL) { - void *ret = node->addr; - - /* Remove node from the tree. */ - extent_tree_szad_remove(&dss_chunks_szad, node); - if (node->size == size) { - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } else { - /* - * Insert the remainder of node's address range as a - * smaller chunk. Its position within dss_chunks_ad - * does not change. - */ - assert(node->size > size); - node->addr = (void *)((uintptr_t)node->addr + size); - node->size -= size; - extent_tree_szad_insert(&dss_chunks_szad, node); - } - malloc_mutex_unlock(&dss_mtx); - - if (*zero) - memset(ret, 0, size); - return (ret); - } - malloc_mutex_unlock(&dss_mtx); - - return (NULL); -} - -void * -chunk_alloc_dss(size_t size, bool *zero) -{ - void *ret; - - ret = chunk_recycle_dss(size, zero); - if (ret != NULL) - return (ret); - - /* - * sbrk() uses a signed increment argument, so take care not to - * interpret a huge allocation request as a negative increment. - */ - if ((intptr_t)size < 0) - return (NULL); - - malloc_mutex_lock(&dss_mtx); - if (dss_prev != (void *)-1) { - intptr_t incr; - - /* - * The loop is necessary to recover from races with other - * threads that are using the DSS for something other than - * malloc. - */ - do { - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - - /* - * Calculate how much padding is necessary to - * chunk-align the end of the DSS. - */ - incr = (intptr_t)size - - (intptr_t)CHUNK_ADDR2OFFSET(dss_max); - if (incr == (intptr_t)size) - ret = dss_max; - else { - ret = (void *)((intptr_t)dss_max + incr); - incr += size; - } - - dss_prev = sbrk(incr); - if (dss_prev == dss_max) { - /* Success. */ - dss_max = (void *)((intptr_t)dss_prev + incr); - malloc_mutex_unlock(&dss_mtx); - *zero = true; - return (ret); - } - } while (dss_prev != (void *)-1); - } - malloc_mutex_unlock(&dss_mtx); - - return (NULL); -} - -static extent_node_t * -chunk_dealloc_dss_record(void *chunk, size_t size) -{ - extent_node_t *xnode, *node, *prev, key; - - xnode = NULL; - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&dss_chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. - * This does not change the position within - * dss_chunks_ad, so only remove/insert from/into - * dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&dss_chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on dss_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. - */ - malloc_mutex_unlock(&dss_mtx); - xnode = base_node_alloc(); - malloc_mutex_lock(&dss_mtx); - if (xnode == NULL) - return (NULL); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&dss_chunks_ad, node); - extent_tree_szad_insert(&dss_chunks_szad, node); - break; - } - } - /* Discard xnode if it ended up unused do to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&dss_chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within dss_chunks_ad, so only - * remove/insert node from/into dss_chunks_szad. - */ - extent_tree_szad_remove(&dss_chunks_szad, prev); - extent_tree_ad_remove(&dss_chunks_ad, prev); - - extent_tree_szad_remove(&dss_chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - extent_tree_szad_insert(&dss_chunks_szad, node); - - base_node_dealloc(prev); - } - - return (node); -} - -bool -chunk_in_dss(void *chunk) -{ - bool ret; - - malloc_mutex_lock(&dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(&dss_mtx); - - return (ret); -} - -bool -chunk_dealloc_dss(void *chunk, size_t size) -{ - bool ret; - - malloc_mutex_lock(&dss_mtx); - if ((uintptr_t)chunk >= (uintptr_t)dss_base - && (uintptr_t)chunk < (uintptr_t)dss_max) { - extent_node_t *node; - - /* Try to coalesce with other unused chunks. */ - node = chunk_dealloc_dss_record(chunk, size); - if (node != NULL) { - chunk = node->addr; - size = node->size; - } - - /* Get the current end of the DSS. */ - dss_max = sbrk(0); - - /* - * Try to shrink the DSS if this chunk is at the end of the - * DSS. The sbrk() call here is subject to a race condition - * with threads that use brk(2) or sbrk(2) directly, but the - * alternative would be to leak memory for the sake of poorly - * designed multi-threaded programs. - */ - if ((void *)((uintptr_t)chunk + size) == dss_max - && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) { - /* Success. */ - dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size); - - if (node != NULL) { - extent_tree_szad_remove(&dss_chunks_szad, node); - extent_tree_ad_remove(&dss_chunks_ad, node); - base_node_dealloc(node); - } - } else - madvise(chunk, size, MADV_DONTNEED); - - ret = false; - goto RETURN; - } - - ret = true; -RETURN: - malloc_mutex_unlock(&dss_mtx); - return (ret); -} - -bool -chunk_dss_boot(void) -{ - - if (malloc_mutex_init(&dss_mtx)) - return (true); - dss_base = sbrk(0); - dss_prev = dss_base; - dss_max = dss_base; - extent_tree_szad_new(&dss_chunks_szad); - extent_tree_ad_new(&dss_chunks_ad); - - return (false); -} - -/******************************************************************************/ -#endif /* JEMALLOC_DSS */ diff --git a/jemalloc/src/chunk_mmap.c b/jemalloc/src/chunk_mmap.c deleted file mode 100644 index 164e86e..0000000 --- a/jemalloc/src/chunk_mmap.c +++ /dev/null @@ -1,239 +0,0 @@ -#define JEMALLOC_CHUNK_MMAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -/* - * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and - * potentially avoid some system calls. - */ -#ifndef NO_TLS -static __thread bool mmap_unaligned_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -#define MMAP_UNALIGNED_GET() mmap_unaligned_tls -#define MMAP_UNALIGNED_SET(v) do { \ - mmap_unaligned_tls = (v); \ -} while (0) -#else -static pthread_key_t mmap_unaligned_tsd; -#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd)) -#define MMAP_UNALIGNED_SET(v) do { \ - pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \ -} while (0) -#endif - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *pages_map(void *addr, size_t size, bool noreserve); -static void pages_unmap(void *addr, size_t size); -static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, - bool noreserve); -static void *chunk_alloc_mmap_internal(size_t size, bool noreserve); - -/******************************************************************************/ - -static void * -pages_map(void *addr, size_t size, bool noreserve) -{ - void *ret; - - /* - * We don't use MAP_FIXED here, because it can cause the *replacement* - * of existing mappings, and we only want to create new mappings. - */ - int flags = MAP_PRIVATE | MAP_ANON; -#ifdef MAP_NORESERVE - if (noreserve) - flags |= MAP_NORESERVE; -#endif - ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); - assert(ret != NULL); - - if (ret == MAP_FAILED) - ret = NULL; - else if (addr != NULL && ret != addr) { - /* - * We succeeded in mapping memory, but not in the right place. - */ - if (munmap(ret, size) == -1) { - char buf[BUFERROR_BUF]; - - buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); - if (opt_abort) - abort(); - } - ret = NULL; - } - - assert(ret == NULL || (addr == NULL && ret != addr) - || (addr != NULL && ret == addr)); - return (ret); -} - -static void -pages_unmap(void *addr, size_t size) -{ - - if (munmap(addr, size) == -1) { - char buf[BUFERROR_BUF]; - - buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); - if (opt_abort) - abort(); - } -} - -static void * -chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) -{ - void *ret; - size_t offset; - - /* Beware size_t wrap-around. */ - if (size + chunksize <= size) - return (NULL); - - ret = pages_map(NULL, size + chunksize, noreserve); - if (ret == NULL) - return (NULL); - - /* Clean up unneeded leading/trailing space. */ - offset = CHUNK_ADDR2OFFSET(ret); - if (offset != 0) { - /* Note that mmap() returned an unaligned mapping. */ - unaligned = true; - - /* Leading space. */ - pages_unmap(ret, chunksize - offset); - - ret = (void *)((uintptr_t)ret + - (chunksize - offset)); - - /* Trailing space. */ - pages_unmap((void *)((uintptr_t)ret + size), - offset); - } else { - /* Trailing space only. */ - pages_unmap((void *)((uintptr_t)ret + size), - chunksize); - } - - /* - * If mmap() returned an aligned mapping, reset mmap_unaligned so that - * the next chunk_alloc_mmap() execution tries the fast allocation - * method. - */ - if (unaligned == false) - MMAP_UNALIGNED_SET(false); - - return (ret); -} - -static void * -chunk_alloc_mmap_internal(size_t size, bool noreserve) -{ - void *ret; - - /* - * Ideally, there would be a way to specify alignment to mmap() (like - * NetBSD has), but in the absence of such a feature, we have to work - * hard to efficiently create aligned mappings. The reliable, but - * slow method is to create a mapping that is over-sized, then trim the - * excess. However, that always results in at least one call to - * pages_unmap(). - * - * A more optimistic approach is to try mapping precisely the right - * amount, then try to append another mapping if alignment is off. In - * practice, this works out well as long as the application is not - * interleaving mappings via direct mmap() calls. If we do run into a - * situation where there is an interleaved mapping and we are unable to - * extend an unaligned mapping, our best option is to switch to the - * slow method until mmap() returns another aligned mapping. This will - * tend to leave a gap in the memory map that is too small to cause - * later problems for the optimistic method. - * - * Another possible confounding factor is address space layout - * randomization (ASLR), which causes mmap(2) to disregard the - * requested address. mmap_unaligned tracks whether the previous - * chunk_alloc_mmap() execution received any unaligned or relocated - * mappings, and if so, the current execution will immediately fall - * back to the slow method. However, we keep track of whether the fast - * method would have succeeded, and if so, we make a note to try the - * fast method next time. - */ - - if (MMAP_UNALIGNED_GET() == false) { - size_t offset; - - ret = pages_map(NULL, size, noreserve); - if (ret == NULL) - return (NULL); - - offset = CHUNK_ADDR2OFFSET(ret); - if (offset != 0) { - MMAP_UNALIGNED_SET(true); - /* Try to extend chunk boundary. */ - if (pages_map((void *)((uintptr_t)ret + size), - chunksize - offset, noreserve) == NULL) { - /* - * Extension failed. Clean up, then revert to - * the reliable-but-expensive method. - */ - pages_unmap(ret, size); - ret = chunk_alloc_mmap_slow(size, true, - noreserve); - } else { - /* Clean up unneeded leading space. */ - pages_unmap(ret, chunksize - offset); - ret = (void *)((uintptr_t)ret + (chunksize - - offset)); - } - } - } else - ret = chunk_alloc_mmap_slow(size, false, noreserve); - - return (ret); -} - -void * -chunk_alloc_mmap(size_t size) -{ - - return (chunk_alloc_mmap_internal(size, false)); -} - -void * -chunk_alloc_mmap_noreserve(size_t size) -{ - - return (chunk_alloc_mmap_internal(size, true)); -} - -void -chunk_dealloc_mmap(void *chunk, size_t size) -{ - - pages_unmap(chunk, size); -} - -bool -chunk_mmap_boot(void) -{ - -#ifdef NO_TLS - if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { - malloc_write(": Error in pthread_key_create()\n"); - return (true); - } -#endif - - return (false); -} diff --git a/jemalloc/src/chunk_swap.c b/jemalloc/src/chunk_swap.c deleted file mode 100644 index cb25ae0..0000000 --- a/jemalloc/src/chunk_swap.c +++ /dev/null @@ -1,402 +0,0 @@ -#define JEMALLOC_CHUNK_SWAP_C_ -#include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_SWAP -/******************************************************************************/ -/* Data. */ - -malloc_mutex_t swap_mtx; -bool swap_enabled; -bool swap_prezeroed; -size_t swap_nfds; -int *swap_fds; -#ifdef JEMALLOC_STATS -size_t swap_avail; -#endif - -/* Base address of the mmap()ed file(s). */ -static void *swap_base; -/* Current end of the space in use (<= swap_max). */ -static void *swap_end; -/* Absolute upper limit on file-backed addresses. */ -static void *swap_max; - -/* - * Trees of chunks that were previously allocated (trees differ only in node - * ordering). These are used when allocating chunks, in an attempt to re-use - * address space. Depending on function, different tree orderings are needed, - * which is why there are two trees with the same contents. - */ -static extent_tree_t swap_chunks_szad; -static extent_tree_t swap_chunks_ad; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void *chunk_recycle_swap(size_t size, bool *zero); -static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size); - -/******************************************************************************/ - -static void * -chunk_recycle_swap(size_t size, bool *zero) -{ - extent_node_t *node, key; - - key.addr = NULL; - key.size = size; - malloc_mutex_lock(&swap_mtx); - node = extent_tree_szad_nsearch(&swap_chunks_szad, &key); - if (node != NULL) { - void *ret = node->addr; - - /* Remove node from the tree. */ - extent_tree_szad_remove(&swap_chunks_szad, node); - if (node->size == size) { - extent_tree_ad_remove(&swap_chunks_ad, node); - base_node_dealloc(node); - } else { - /* - * Insert the remainder of node's address range as a - * smaller chunk. Its position within swap_chunks_ad - * does not change. - */ - assert(node->size > size); - node->addr = (void *)((uintptr_t)node->addr + size); - node->size -= size; - extent_tree_szad_insert(&swap_chunks_szad, node); - } -#ifdef JEMALLOC_STATS - swap_avail -= size; -#endif - malloc_mutex_unlock(&swap_mtx); - - if (*zero) - memset(ret, 0, size); - return (ret); - } - malloc_mutex_unlock(&swap_mtx); - - return (NULL); -} - -void * -chunk_alloc_swap(size_t size, bool *zero) -{ - void *ret; - - assert(swap_enabled); - - ret = chunk_recycle_swap(size, zero); - if (ret != NULL) - return (ret); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { - ret = swap_end; - swap_end = (void *)((uintptr_t)swap_end + size); -#ifdef JEMALLOC_STATS - swap_avail -= size; -#endif - malloc_mutex_unlock(&swap_mtx); - - if (swap_prezeroed) - *zero = true; - else if (*zero) - memset(ret, 0, size); - } else { - malloc_mutex_unlock(&swap_mtx); - return (NULL); - } - - return (ret); -} - -static extent_node_t * -chunk_dealloc_swap_record(void *chunk, size_t size) -{ - extent_node_t *xnode, *node, *prev, key; - - xnode = NULL; - while (true) { - key.addr = (void *)((uintptr_t)chunk + size); - node = extent_tree_ad_nsearch(&swap_chunks_ad, &key); - /* Try to coalesce forward. */ - if (node != NULL && node->addr == key.addr) { - /* - * Coalesce chunk with the following address range. - * This does not change the position within - * swap_chunks_ad, so only remove/insert from/into - * swap_chunks_szad. - */ - extent_tree_szad_remove(&swap_chunks_szad, node); - node->addr = chunk; - node->size += size; - extent_tree_szad_insert(&swap_chunks_szad, node); - break; - } else if (xnode == NULL) { - /* - * It is possible that base_node_alloc() will cause a - * new base chunk to be allocated, so take care not to - * deadlock on swap_mtx, and recover if another thread - * deallocates an adjacent chunk while this one is busy - * allocating xnode. - */ - malloc_mutex_unlock(&swap_mtx); - xnode = base_node_alloc(); - malloc_mutex_lock(&swap_mtx); - if (xnode == NULL) - return (NULL); - } else { - /* Coalescing forward failed, so insert a new node. */ - node = xnode; - xnode = NULL; - node->addr = chunk; - node->size = size; - extent_tree_ad_insert(&swap_chunks_ad, node); - extent_tree_szad_insert(&swap_chunks_szad, node); - break; - } - } - /* Discard xnode if it ended up unused do to a race. */ - if (xnode != NULL) - base_node_dealloc(xnode); - - /* Try to coalesce backward. */ - prev = extent_tree_ad_prev(&swap_chunks_ad, node); - if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == - chunk) { - /* - * Coalesce chunk with the previous address range. This does - * not change the position within swap_chunks_ad, so only - * remove/insert node from/into swap_chunks_szad. - */ - extent_tree_szad_remove(&swap_chunks_szad, prev); - extent_tree_ad_remove(&swap_chunks_ad, prev); - - extent_tree_szad_remove(&swap_chunks_szad, node); - node->addr = prev->addr; - node->size += prev->size; - extent_tree_szad_insert(&swap_chunks_szad, node); - - base_node_dealloc(prev); - } - - return (node); -} - -bool -chunk_in_swap(void *chunk) -{ - bool ret; - - assert(swap_enabled); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)chunk >= (uintptr_t)swap_base - && (uintptr_t)chunk < (uintptr_t)swap_max) - ret = true; - else - ret = false; - malloc_mutex_unlock(&swap_mtx); - - return (ret); -} - -bool -chunk_dealloc_swap(void *chunk, size_t size) -{ - bool ret; - - assert(swap_enabled); - - malloc_mutex_lock(&swap_mtx); - if ((uintptr_t)chunk >= (uintptr_t)swap_base - && (uintptr_t)chunk < (uintptr_t)swap_max) { - extent_node_t *node; - - /* Try to coalesce with other unused chunks. */ - node = chunk_dealloc_swap_record(chunk, size); - if (node != NULL) { - chunk = node->addr; - size = node->size; - } - - /* - * Try to shrink the in-use memory if this chunk is at the end - * of the in-use memory. - */ - if ((void *)((uintptr_t)chunk + size) == swap_end) { - swap_end = (void *)((uintptr_t)swap_end - size); - - if (node != NULL) { - extent_tree_szad_remove(&swap_chunks_szad, - node); - extent_tree_ad_remove(&swap_chunks_ad, node); - base_node_dealloc(node); - } - } else - madvise(chunk, size, MADV_DONTNEED); - -#ifdef JEMALLOC_STATS - swap_avail += size; -#endif - ret = false; - goto RETURN; - } - - ret = true; -RETURN: - malloc_mutex_unlock(&swap_mtx); - return (ret); -} - -bool -chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) -{ - bool ret; - unsigned i; - off_t off; - void *vaddr; - size_t cumsize, voff; - size_t sizes[nfds]; - - malloc_mutex_lock(&swap_mtx); - - /* Get file sizes. */ - for (i = 0, cumsize = 0; i < nfds; i++) { - off = lseek(fds[i], 0, SEEK_END); - if (off == ((off_t)-1)) { - ret = true; - goto RETURN; - } - if (PAGE_CEILING(off) != off) { - /* Truncate to a multiple of the page size. */ - off &= ~PAGE_MASK; - if (ftruncate(fds[i], off) != 0) { - ret = true; - goto RETURN; - } - } - sizes[i] = off; - if (cumsize + off < cumsize) { - /* - * Cumulative file size is greater than the total - * address space. Bail out while it's still obvious - * what the problem is. - */ - ret = true; - goto RETURN; - } - cumsize += off; - } - - /* Round down to a multiple of the chunk size. */ - cumsize &= ~chunksize_mask; - if (cumsize == 0) { - ret = true; - goto RETURN; - } - - /* - * Allocate a chunk-aligned region of anonymous memory, which will - * be the final location for the memory-mapped files. - */ - vaddr = chunk_alloc_mmap_noreserve(cumsize); - if (vaddr == NULL) { - ret = true; - goto RETURN; - } - - /* Overlay the files onto the anonymous mapping. */ - for (i = 0, voff = 0; i < nfds; i++) { - void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); - if (addr == MAP_FAILED) { - char buf[BUFERROR_BUF]; - - - buferror(errno, buf, sizeof(buf)); - malloc_write( - ": Error in mmap(..., MAP_FIXED, ...): "); - malloc_write(buf); - malloc_write("\n"); - if (opt_abort) - abort(); - if (munmap(vaddr, voff) == -1) { - buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in munmap(): "); - malloc_write(buf); - malloc_write("\n"); - } - ret = true; - goto RETURN; - } - assert(addr == (void *)((uintptr_t)vaddr + voff)); - - /* - * Tell the kernel that the mapping will be accessed randomly, - * and that it should not gratuitously sync pages to the - * filesystem. - */ -#ifdef MADV_RANDOM - madvise(addr, sizes[i], MADV_RANDOM); -#endif -#ifdef MADV_NOSYNC - madvise(addr, sizes[i], MADV_NOSYNC); -#endif - - voff += sizes[i]; - } - - swap_prezeroed = prezeroed; - swap_base = vaddr; - swap_end = swap_base; - swap_max = (void *)((uintptr_t)vaddr + cumsize); - - /* Copy the fds array for mallctl purposes. */ - swap_fds = (int *)base_alloc(nfds * sizeof(int)); - if (swap_fds == NULL) { - ret = true; - goto RETURN; - } - memcpy(swap_fds, fds, nfds * sizeof(int)); - swap_nfds = nfds; - -#ifdef JEMALLOC_STATS - swap_avail = cumsize; -#endif - - swap_enabled = true; - - ret = false; -RETURN: - malloc_mutex_unlock(&swap_mtx); - return (ret); -} - -bool -chunk_swap_boot(void) -{ - - if (malloc_mutex_init(&swap_mtx)) - return (true); - - swap_enabled = false; - swap_prezeroed = false; /* swap.* mallctl's depend on this. */ - swap_nfds = 0; - swap_fds = NULL; -#ifdef JEMALLOC_STATS - swap_avail = 0; -#endif - swap_base = NULL; - swap_end = NULL; - swap_max = NULL; - - extent_tree_szad_new(&swap_chunks_szad); - extent_tree_ad_new(&swap_chunks_ad); - - return (false); -} - -/******************************************************************************/ -#endif /* JEMALLOC_SWAP */ diff --git a/jemalloc/src/ckh.c b/jemalloc/src/ckh.c deleted file mode 100644 index 143b5b5..0000000 --- a/jemalloc/src/ckh.c +++ /dev/null @@ -1,619 +0,0 @@ -/* - ******************************************************************************* - * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each - * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash - * functions are employed. The original cuckoo hashing algorithm was described - * in: - * - * Pagh, R., F.F. Rodler (2004) Cuckoo Hashing. Journal of Algorithms - * 51(2):122-144. - * - * Generalization of cuckoo hashing was discussed in: - * - * Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical - * alternative to traditional hash tables. In Proceedings of the 7th - * Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA, - * January 2006. - * - * This implementation uses precisely two hash functions because that is the - * fewest that can work, and supporting multiple hashes is an implementation - * burden. Here is a reproduction of Figure 1 from Erlingsson et al. (2006) - * that shows approximate expected maximum load factors for various - * configurations: - * - * | #cells/bucket | - * #hashes | 1 | 2 | 4 | 8 | - * --------+-------+-------+-------+-------+ - * 1 | 0.006 | 0.006 | 0.03 | 0.12 | - * 2 | 0.49 | 0.86 |>0.93< |>0.96< | - * 3 | 0.91 | 0.97 | 0.98 | 0.999 | - * 4 | 0.97 | 0.99 | 0.999 | | - * - * The number of cells per bucket is chosen such that a bucket fits in one cache - * line. So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing, - * respectively. - * - ******************************************************************************/ -#define JEMALLOC_CKH_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static bool ckh_grow(ckh_t *ckh); -static void ckh_shrink(ckh_t *ckh); - -/******************************************************************************/ - -/* - * Search bucket for key and return the cell number if found; SIZE_T_MAX - * otherwise. - */ -JEMALLOC_INLINE size_t -ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) -{ - ckhc_t *cell; - unsigned i; - - for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { - cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; - if (cell->key != NULL && ckh->keycomp(key, cell->key)) - return ((bucket << LG_CKH_BUCKET_CELLS) + i); - } - - return (SIZE_T_MAX); -} - -/* - * Search table for key and return cell number if found; SIZE_T_MAX otherwise. - */ -JEMALLOC_INLINE size_t -ckh_isearch(ckh_t *ckh, const void *key) -{ - size_t hash1, hash2, bucket, cell; - - assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); - - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); - - /* Search primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); - cell = ckh_bucket_search(ckh, bucket, key); - if (cell != SIZE_T_MAX) - return (cell); - - /* Search secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); - cell = ckh_bucket_search(ckh, bucket, key); - return (cell); -} - -JEMALLOC_INLINE bool -ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, - const void *data) -{ - ckhc_t *cell; - unsigned offset, i; - - /* - * Cycle through the cells in the bucket, starting at a random position. - * The randomness avoids worst-case search overhead as buckets fill up. - */ - prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); - for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { - cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + - ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; - if (cell->key == NULL) { - cell->key = key; - cell->data = data; - ckh->count++; - return (false); - } - } - - return (true); -} - -/* - * No space is available in bucket. Randomly evict an item, then try to find an - * alternate location for that item. Iteratively repeat this - * eviction/relocation procedure until either success or detection of an - * eviction/relocation bucket cycle. - */ -JEMALLOC_INLINE bool -ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, - void const **argdata) -{ - const void *key, *data, *tkey, *tdata; - ckhc_t *cell; - size_t hash1, hash2, bucket, tbucket; - unsigned i; - - bucket = argbucket; - key = *argkey; - data = *argdata; - while (true) { - /* - * Choose a random item within the bucket to evict. This is - * critical to correct function, because without (eventually) - * evicting all items within a bucket during iteration, it - * would be possible to get stuck in an infinite loop if there - * were an item for which both hashes indicated the same - * bucket. - */ - prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); - cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; - assert(cell->key != NULL); - - /* Swap cell->{key,data} and {key,data} (evict). */ - tkey = cell->key; tdata = cell->data; - cell->key = key; cell->data = data; - key = tkey; data = tdata; - -#ifdef CKH_COUNT - ckh->nrelocs++; -#endif - - /* Find the alternate bucket for the evicted item. */ - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); - tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (tbucket == bucket) { - tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); - /* - * It may be that (tbucket == bucket) still, if the - * item's hashes both indicate this bucket. However, - * we are guaranteed to eventually escape this bucket - * during iteration, assuming pseudo-random item - * selection (true randomness would make infinite - * looping a remote possibility). The reason we can - * never get trapped forever is that there are two - * cases: - * - * 1) This bucket == argbucket, so we will quickly - * detect an eviction cycle and terminate. - * 2) An item was evicted to this bucket from another, - * which means that at least one item in this bucket - * has hashes that indicate distinct buckets. - */ - } - /* Check for a cycle. */ - if (tbucket == argbucket) { - *argkey = key; - *argdata = data; - return (true); - } - - bucket = tbucket; - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) - return (false); - } -} - -JEMALLOC_INLINE bool -ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) -{ - size_t hash1, hash2, bucket; - const void *key = *argkey; - const void *data = *argdata; - - ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); - - /* Try to insert in primary bucket. */ - bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) - return (false); - - /* Try to insert in secondary bucket. */ - bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); - if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) - return (false); - - /* - * Try to find a place for this item via iterative eviction/relocation. - */ - return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata)); -} - -/* - * Try to rebuild the hash table from scratch by inserting all items from the - * old table into the new. - */ -JEMALLOC_INLINE bool -ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) -{ - size_t count, i, nins; - const void *key, *data; - - count = ckh->count; - ckh->count = 0; - for (i = nins = 0; nins < count; i++) { - if (aTab[i].key != NULL) { - key = aTab[i].key; - data = aTab[i].data; - if (ckh_try_insert(ckh, &key, &data)) { - ckh->count = count; - return (true); - } - nins++; - } - } - - return (false); -} - -static bool -ckh_grow(ckh_t *ckh) -{ - bool ret; - ckhc_t *tab, *ttab; - size_t lg_curcells; - unsigned lg_prevbuckets; - -#ifdef CKH_COUNT - ckh->ngrows++; -#endif - - /* - * It is possible (though unlikely, given well behaved hashes) that the - * table will have to be doubled more than once in order to create a - * usable table. - */ - lg_prevbuckets = ckh->lg_curbuckets; - lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; - while (true) { - size_t usize; - - lg_curcells++; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); - if (usize == 0) { - ret = true; - goto RETURN; - } - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); - if (tab == NULL) { - ret = true; - goto RETURN; - } - /* Swap in new table. */ - ttab = ckh->tab; - ckh->tab = tab; - tab = ttab; - ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - - if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); - break; - } - - /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); - ckh->tab = tab; - ckh->lg_curbuckets = lg_prevbuckets; - } - - ret = false; -RETURN: - return (ret); -} - -static void -ckh_shrink(ckh_t *ckh) -{ - ckhc_t *tab, *ttab; - size_t lg_curcells, usize; - unsigned lg_prevbuckets; - - /* - * It is possible (though unlikely, given well behaved hashes) that the - * table rebuild will fail. - */ - lg_prevbuckets = ckh->lg_curbuckets; - lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; - usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); - if (usize == 0) - return; - tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); - if (tab == NULL) { - /* - * An OOM error isn't worth propagating, since it doesn't - * prevent this or future operations from proceeding. - */ - return; - } - /* Swap in new table. */ - ttab = ckh->tab; - ckh->tab = tab; - tab = ttab; - ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; - - if (ckh_rebuild(ckh, tab) == false) { - idalloc(tab); -#ifdef CKH_COUNT - ckh->nshrinks++; -#endif - return; - } - - /* Rebuilding failed, so back out partially rebuilt table. */ - idalloc(ckh->tab); - ckh->tab = tab; - ckh->lg_curbuckets = lg_prevbuckets; -#ifdef CKH_COUNT - ckh->nshrinkfails++; -#endif -} - -bool -ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) -{ - bool ret; - size_t mincells, usize; - unsigned lg_mincells; - - assert(minitems > 0); - assert(hash != NULL); - assert(keycomp != NULL); - -#ifdef CKH_COUNT - ckh->ngrows = 0; - ckh->nshrinks = 0; - ckh->nshrinkfails = 0; - ckh->ninserts = 0; - ckh->nrelocs = 0; -#endif - ckh->prn_state = 42; /* Value doesn't really matter. */ - ckh->count = 0; - - /* - * Find the minimum power of 2 that is large enough to fit aBaseCount - * entries. We are using (2+,2) cuckoo hashing, which has an expected - * maximum load factor of at least ~0.86, so 0.75 is a conservative load - * factor that will typically allow 2^aLgMinItems to fit without ever - * growing the table. - */ - assert(LG_CKH_BUCKET_CELLS > 0); - mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2; - for (lg_mincells = LG_CKH_BUCKET_CELLS; - (ZU(1) << lg_mincells) < mincells; - lg_mincells++) - ; /* Do nothing. */ - ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; - ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; - ckh->hash = hash; - ckh->keycomp = keycomp; - - usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); - if (usize == 0) { - ret = true; - goto RETURN; - } - ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); - if (ckh->tab == NULL) { - ret = true; - goto RETURN; - } - -#ifdef JEMALLOC_DEBUG - ckh->magic = CKH_MAGIC; -#endif - - ret = false; -RETURN: - return (ret); -} - -void -ckh_delete(ckh_t *ckh) -{ - - assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); - -#ifdef CKH_VERBOSE - malloc_printf( - "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64"," - " nshrinkfails: %"PRIu64", ninserts: %"PRIu64"," - " nrelocs: %"PRIu64"\n", __func__, ckh, - (unsigned long long)ckh->ngrows, - (unsigned long long)ckh->nshrinks, - (unsigned long long)ckh->nshrinkfails, - (unsigned long long)ckh->ninserts, - (unsigned long long)ckh->nrelocs); -#endif - - idalloc(ckh->tab); -#ifdef JEMALLOC_DEBUG - memset(ckh, 0x5a, sizeof(ckh_t)); -#endif -} - -size_t -ckh_count(ckh_t *ckh) -{ - - assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); - - return (ckh->count); -} - -bool -ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) -{ - size_t i, ncells; - - for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets + - LG_CKH_BUCKET_CELLS)); i < ncells; i++) { - if (ckh->tab[i].key != NULL) { - if (key != NULL) - *key = (void *)ckh->tab[i].key; - if (data != NULL) - *data = (void *)ckh->tab[i].data; - *tabind = i + 1; - return (false); - } - } - - return (true); -} - -bool -ckh_insert(ckh_t *ckh, const void *key, const void *data) -{ - bool ret; - - assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); - assert(ckh_search(ckh, key, NULL, NULL)); - -#ifdef CKH_COUNT - ckh->ninserts++; -#endif - - while (ckh_try_insert(ckh, &key, &data)) { - if (ckh_grow(ckh)) { - ret = true; - goto RETURN; - } - } - - ret = false; -RETURN: - return (ret); -} - -bool -ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) -{ - size_t cell; - - assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); - - cell = ckh_isearch(ckh, searchkey); - if (cell != SIZE_T_MAX) { - if (key != NULL) - *key = (void *)ckh->tab[cell].key; - if (data != NULL) - *data = (void *)ckh->tab[cell].data; - ckh->tab[cell].key = NULL; - ckh->tab[cell].data = NULL; /* Not necessary. */ - - ckh->count--; - /* Try to halve the table if it is less than 1/4 full. */ - if (ckh->count < (ZU(1) << (ckh->lg_curbuckets - + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets - > ckh->lg_minbuckets) { - /* Ignore error due to OOM. */ - ckh_shrink(ckh); - } - - return (false); - } - - return (true); -} - -bool -ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) -{ - size_t cell; - - assert(ckh != NULL); - dassert(ckh->magic == CKH_MAGIC); - - cell = ckh_isearch(ckh, searchkey); - if (cell != SIZE_T_MAX) { - if (key != NULL) - *key = (void *)ckh->tab[cell].key; - if (data != NULL) - *data = (void *)ckh->tab[cell].data; - return (false); - } - - return (true); -} - -void -ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) -{ - size_t ret1, ret2; - uint64_t h; - - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - - h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(key, strlen((const char *)key), - 0x8432a476666bbc13U); - } - - *hash1 = ret1; - *hash2 = ret2; -} - -bool -ckh_string_keycomp(const void *k1, const void *k2) -{ - - assert(k1 != NULL); - assert(k2 != NULL); - - return (strcmp((char *)k1, (char *)k2) ? false : true); -} - -void -ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2) -{ - size_t ret1, ret2; - uint64_t h; - union { - const void *v; - uint64_t i; - } u; - - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - - assert(sizeof(u.v) == sizeof(u.i)); -#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) - u.i = 0; -#endif - u.v = key; - h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - assert(SIZEOF_PTR == 8); - ret1 = h; - ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU); - } - - *hash1 = ret1; - *hash2 = ret2; -} - -bool -ckh_pointer_keycomp(const void *k1, const void *k2) -{ - - return ((k1 == k2) ? true : false); -} diff --git a/jemalloc/src/ctl.c b/jemalloc/src/ctl.c deleted file mode 100644 index e5336d3..0000000 --- a/jemalloc/src/ctl.c +++ /dev/null @@ -1,1670 +0,0 @@ -#define JEMALLOC_CTL_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -/* - * ctl_mtx protects the following: - * - ctl_stats.* - * - opt_prof_active - * - swap_enabled - * - swap_prezeroed - */ -static malloc_mutex_t ctl_mtx; -static bool ctl_initialized; -static uint64_t ctl_epoch; -static ctl_stats_t ctl_stats; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -#define CTL_PROTO(n) \ -static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ - size_t *oldlenp, void *newp, size_t newlen); - -#define INDEX_PROTO(n) \ -const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ - size_t i); - -#ifdef JEMALLOC_STATS -static bool ctl_arena_init(ctl_arena_stats_t *astats); -#endif -static void ctl_arena_clear(ctl_arena_stats_t *astats); -#ifdef JEMALLOC_STATS -static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, - arena_t *arena); -static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, - ctl_arena_stats_t *astats); -#endif -static void ctl_arena_refresh(arena_t *arena, unsigned i); -static void ctl_refresh(void); -static bool ctl_init(void); -static int ctl_lookup(const char *name, ctl_node_t const **nodesp, - size_t *mibp, size_t *depthp); - -CTL_PROTO(version) -CTL_PROTO(epoch) -#ifdef JEMALLOC_TCACHE -CTL_PROTO(tcache_flush) -#endif -CTL_PROTO(thread_arena) -#ifdef JEMALLOC_STATS -CTL_PROTO(thread_allocated) -CTL_PROTO(thread_allocatedp) -CTL_PROTO(thread_deallocated) -CTL_PROTO(thread_deallocatedp) -#endif -CTL_PROTO(config_debug) -CTL_PROTO(config_dss) -CTL_PROTO(config_dynamic_page_shift) -CTL_PROTO(config_fill) -CTL_PROTO(config_lazy_lock) -CTL_PROTO(config_prof) -CTL_PROTO(config_prof_libgcc) -CTL_PROTO(config_prof_libunwind) -CTL_PROTO(config_stats) -CTL_PROTO(config_swap) -CTL_PROTO(config_sysv) -CTL_PROTO(config_tcache) -CTL_PROTO(config_tiny) -CTL_PROTO(config_tls) -CTL_PROTO(config_xmalloc) -CTL_PROTO(opt_abort) -CTL_PROTO(opt_lg_qspace_max) -CTL_PROTO(opt_lg_cspace_max) -CTL_PROTO(opt_lg_chunk) -CTL_PROTO(opt_narenas) -CTL_PROTO(opt_lg_dirty_mult) -CTL_PROTO(opt_stats_print) -#ifdef JEMALLOC_FILL -CTL_PROTO(opt_junk) -CTL_PROTO(opt_zero) -#endif -#ifdef JEMALLOC_SYSV -CTL_PROTO(opt_sysv) -#endif -#ifdef JEMALLOC_XMALLOC -CTL_PROTO(opt_xmalloc) -#endif -#ifdef JEMALLOC_TCACHE -CTL_PROTO(opt_tcache) -CTL_PROTO(opt_lg_tcache_gc_sweep) -#endif -#ifdef JEMALLOC_PROF -CTL_PROTO(opt_prof) -CTL_PROTO(opt_prof_prefix) -CTL_PROTO(opt_prof_active) -CTL_PROTO(opt_lg_prof_bt_max) -CTL_PROTO(opt_lg_prof_sample) -CTL_PROTO(opt_lg_prof_interval) -CTL_PROTO(opt_prof_gdump) -CTL_PROTO(opt_prof_leak) -CTL_PROTO(opt_prof_accum) -CTL_PROTO(opt_lg_prof_tcmax) -#endif -#ifdef JEMALLOC_SWAP -CTL_PROTO(opt_overcommit) -#endif -CTL_PROTO(arenas_bin_i_size) -CTL_PROTO(arenas_bin_i_nregs) -CTL_PROTO(arenas_bin_i_run_size) -INDEX_PROTO(arenas_bin_i) -CTL_PROTO(arenas_lrun_i_size) -INDEX_PROTO(arenas_lrun_i) -CTL_PROTO(arenas_narenas) -CTL_PROTO(arenas_initialized) -CTL_PROTO(arenas_quantum) -CTL_PROTO(arenas_cacheline) -CTL_PROTO(arenas_subpage) -CTL_PROTO(arenas_pagesize) -CTL_PROTO(arenas_chunksize) -#ifdef JEMALLOC_TINY -CTL_PROTO(arenas_tspace_min) -CTL_PROTO(arenas_tspace_max) -#endif -CTL_PROTO(arenas_qspace_min) -CTL_PROTO(arenas_qspace_max) -CTL_PROTO(arenas_cspace_min) -CTL_PROTO(arenas_cspace_max) -CTL_PROTO(arenas_sspace_min) -CTL_PROTO(arenas_sspace_max) -#ifdef JEMALLOC_TCACHE -CTL_PROTO(arenas_tcache_max) -#endif -CTL_PROTO(arenas_ntbins) -CTL_PROTO(arenas_nqbins) -CTL_PROTO(arenas_ncbins) -CTL_PROTO(arenas_nsbins) -CTL_PROTO(arenas_nbins) -#ifdef JEMALLOC_TCACHE -CTL_PROTO(arenas_nhbins) -#endif -CTL_PROTO(arenas_nlruns) -CTL_PROTO(arenas_purge) -#ifdef JEMALLOC_PROF -CTL_PROTO(prof_active) -CTL_PROTO(prof_dump) -CTL_PROTO(prof_interval) -#endif -#ifdef JEMALLOC_STATS -CTL_PROTO(stats_chunks_current) -CTL_PROTO(stats_chunks_total) -CTL_PROTO(stats_chunks_high) -CTL_PROTO(stats_huge_allocated) -CTL_PROTO(stats_huge_nmalloc) -CTL_PROTO(stats_huge_ndalloc) -CTL_PROTO(stats_arenas_i_small_allocated) -CTL_PROTO(stats_arenas_i_small_nmalloc) -CTL_PROTO(stats_arenas_i_small_ndalloc) -CTL_PROTO(stats_arenas_i_small_nrequests) -CTL_PROTO(stats_arenas_i_large_allocated) -CTL_PROTO(stats_arenas_i_large_nmalloc) -CTL_PROTO(stats_arenas_i_large_ndalloc) -CTL_PROTO(stats_arenas_i_large_nrequests) -CTL_PROTO(stats_arenas_i_bins_j_allocated) -CTL_PROTO(stats_arenas_i_bins_j_nmalloc) -CTL_PROTO(stats_arenas_i_bins_j_ndalloc) -CTL_PROTO(stats_arenas_i_bins_j_nrequests) -#ifdef JEMALLOC_TCACHE -CTL_PROTO(stats_arenas_i_bins_j_nfills) -CTL_PROTO(stats_arenas_i_bins_j_nflushes) -#endif -CTL_PROTO(stats_arenas_i_bins_j_nruns) -CTL_PROTO(stats_arenas_i_bins_j_nreruns) -CTL_PROTO(stats_arenas_i_bins_j_highruns) -CTL_PROTO(stats_arenas_i_bins_j_curruns) -INDEX_PROTO(stats_arenas_i_bins_j) -CTL_PROTO(stats_arenas_i_lruns_j_nmalloc) -CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) -CTL_PROTO(stats_arenas_i_lruns_j_nrequests) -CTL_PROTO(stats_arenas_i_lruns_j_highruns) -CTL_PROTO(stats_arenas_i_lruns_j_curruns) -INDEX_PROTO(stats_arenas_i_lruns_j) -#endif -CTL_PROTO(stats_arenas_i_nthreads) -CTL_PROTO(stats_arenas_i_pactive) -CTL_PROTO(stats_arenas_i_pdirty) -#ifdef JEMALLOC_STATS -CTL_PROTO(stats_arenas_i_mapped) -CTL_PROTO(stats_arenas_i_npurge) -CTL_PROTO(stats_arenas_i_nmadvise) -CTL_PROTO(stats_arenas_i_purged) -#endif -INDEX_PROTO(stats_arenas_i) -#ifdef JEMALLOC_STATS -CTL_PROTO(stats_cactive) -CTL_PROTO(stats_allocated) -CTL_PROTO(stats_active) -CTL_PROTO(stats_mapped) -#endif -#ifdef JEMALLOC_SWAP -# ifdef JEMALLOC_STATS -CTL_PROTO(swap_avail) -# endif -CTL_PROTO(swap_prezeroed) -CTL_PROTO(swap_nfds) -CTL_PROTO(swap_fds) -#endif - -/******************************************************************************/ -/* mallctl tree. */ - -/* Maximum tree depth. */ -#define CTL_MAX_DEPTH 6 - -#define NAME(n) true, {.named = {n -#define CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t), c##_node}}, NULL -#define CTL(c) 0, NULL}}, c##_ctl - -/* - * Only handles internal indexed nodes, since there are currently no external - * ones. - */ -#define INDEX(i) false, {.indexed = {i##_index}}, NULL - -#ifdef JEMALLOC_TCACHE -static const ctl_node_t tcache_node[] = { - {NAME("flush"), CTL(tcache_flush)} -}; -#endif - -static const ctl_node_t thread_node[] = { - {NAME("arena"), CTL(thread_arena)} -#ifdef JEMALLOC_STATS - , - {NAME("allocated"), CTL(thread_allocated)}, - {NAME("allocatedp"), CTL(thread_allocatedp)}, - {NAME("deallocated"), CTL(thread_deallocated)}, - {NAME("deallocatedp"), CTL(thread_deallocatedp)} -#endif -}; - -static const ctl_node_t config_node[] = { - {NAME("debug"), CTL(config_debug)}, - {NAME("dss"), CTL(config_dss)}, - {NAME("dynamic_page_shift"), CTL(config_dynamic_page_shift)}, - {NAME("fill"), CTL(config_fill)}, - {NAME("lazy_lock"), CTL(config_lazy_lock)}, - {NAME("prof"), CTL(config_prof)}, - {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, - {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, - {NAME("stats"), CTL(config_stats)}, - {NAME("swap"), CTL(config_swap)}, - {NAME("sysv"), CTL(config_sysv)}, - {NAME("tcache"), CTL(config_tcache)}, - {NAME("tiny"), CTL(config_tiny)}, - {NAME("tls"), CTL(config_tls)}, - {NAME("xmalloc"), CTL(config_xmalloc)} -}; - -static const ctl_node_t opt_node[] = { - {NAME("abort"), CTL(opt_abort)}, - {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, - {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, - {NAME("lg_chunk"), CTL(opt_lg_chunk)}, - {NAME("narenas"), CTL(opt_narenas)}, - {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, - {NAME("stats_print"), CTL(opt_stats_print)} -#ifdef JEMALLOC_FILL - , - {NAME("junk"), CTL(opt_junk)}, - {NAME("zero"), CTL(opt_zero)} -#endif -#ifdef JEMALLOC_SYSV - , - {NAME("sysv"), CTL(opt_sysv)} -#endif -#ifdef JEMALLOC_XMALLOC - , - {NAME("xmalloc"), CTL(opt_xmalloc)} -#endif -#ifdef JEMALLOC_TCACHE - , - {NAME("tcache"), CTL(opt_tcache)}, - {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)} -#endif -#ifdef JEMALLOC_PROF - , - {NAME("prof"), CTL(opt_prof)}, - {NAME("prof_prefix"), CTL(opt_prof_prefix)}, - {NAME("prof_active"), CTL(opt_prof_active)}, - {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, - {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, - {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, - {NAME("prof_gdump"), CTL(opt_prof_gdump)}, - {NAME("prof_leak"), CTL(opt_prof_leak)}, - {NAME("prof_accum"), CTL(opt_prof_accum)}, - {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} -#endif -#ifdef JEMALLOC_SWAP - , - {NAME("overcommit"), CTL(opt_overcommit)} -#endif -}; - -static const ctl_node_t arenas_bin_i_node[] = { - {NAME("size"), CTL(arenas_bin_i_size)}, - {NAME("nregs"), CTL(arenas_bin_i_nregs)}, - {NAME("run_size"), CTL(arenas_bin_i_run_size)} -}; -static const ctl_node_t super_arenas_bin_i_node[] = { - {NAME(""), CHILD(arenas_bin_i)} -}; - -static const ctl_node_t arenas_bin_node[] = { - {INDEX(arenas_bin_i)} -}; - -static const ctl_node_t arenas_lrun_i_node[] = { - {NAME("size"), CTL(arenas_lrun_i_size)} -}; -static const ctl_node_t super_arenas_lrun_i_node[] = { - {NAME(""), CHILD(arenas_lrun_i)} -}; - -static const ctl_node_t arenas_lrun_node[] = { - {INDEX(arenas_lrun_i)} -}; - -static const ctl_node_t arenas_node[] = { - {NAME("narenas"), CTL(arenas_narenas)}, - {NAME("initialized"), CTL(arenas_initialized)}, - {NAME("quantum"), CTL(arenas_quantum)}, - {NAME("cacheline"), CTL(arenas_cacheline)}, - {NAME("subpage"), CTL(arenas_subpage)}, - {NAME("pagesize"), CTL(arenas_pagesize)}, - {NAME("chunksize"), CTL(arenas_chunksize)}, -#ifdef JEMALLOC_TINY - {NAME("tspace_min"), CTL(arenas_tspace_min)}, - {NAME("tspace_max"), CTL(arenas_tspace_max)}, -#endif - {NAME("qspace_min"), CTL(arenas_qspace_min)}, - {NAME("qspace_max"), CTL(arenas_qspace_max)}, - {NAME("cspace_min"), CTL(arenas_cspace_min)}, - {NAME("cspace_max"), CTL(arenas_cspace_max)}, - {NAME("sspace_min"), CTL(arenas_sspace_min)}, - {NAME("sspace_max"), CTL(arenas_sspace_max)}, -#ifdef JEMALLOC_TCACHE - {NAME("tcache_max"), CTL(arenas_tcache_max)}, -#endif - {NAME("ntbins"), CTL(arenas_ntbins)}, - {NAME("nqbins"), CTL(arenas_nqbins)}, - {NAME("ncbins"), CTL(arenas_ncbins)}, - {NAME("nsbins"), CTL(arenas_nsbins)}, - {NAME("nbins"), CTL(arenas_nbins)}, -#ifdef JEMALLOC_TCACHE - {NAME("nhbins"), CTL(arenas_nhbins)}, -#endif - {NAME("bin"), CHILD(arenas_bin)}, - {NAME("nlruns"), CTL(arenas_nlruns)}, - {NAME("lrun"), CHILD(arenas_lrun)}, - {NAME("purge"), CTL(arenas_purge)} -}; - -#ifdef JEMALLOC_PROF -static const ctl_node_t prof_node[] = { - {NAME("active"), CTL(prof_active)}, - {NAME("dump"), CTL(prof_dump)}, - {NAME("interval"), CTL(prof_interval)} -}; -#endif - -#ifdef JEMALLOC_STATS -static const ctl_node_t stats_chunks_node[] = { - {NAME("current"), CTL(stats_chunks_current)}, - {NAME("total"), CTL(stats_chunks_total)}, - {NAME("high"), CTL(stats_chunks_high)} -}; - -static const ctl_node_t stats_huge_node[] = { - {NAME("allocated"), CTL(stats_huge_allocated)}, - {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, - {NAME("ndalloc"), CTL(stats_huge_ndalloc)} -}; - -static const ctl_node_t stats_arenas_i_small_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} -}; - -static const ctl_node_t stats_arenas_i_large_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} -}; - -static const ctl_node_t stats_arenas_i_bins_j_node[] = { - {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, - {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, -#ifdef JEMALLOC_TCACHE - {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, - {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, -#endif - {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, - {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, - {NAME("highruns"), CTL(stats_arenas_i_bins_j_highruns)}, - {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} -}; -static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_bins_j)} -}; - -static const ctl_node_t stats_arenas_i_bins_node[] = { - {INDEX(stats_arenas_i_bins_j)} -}; - -static const ctl_node_t stats_arenas_i_lruns_j_node[] = { - {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, - {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, - {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, - {NAME("highruns"), CTL(stats_arenas_i_lruns_j_highruns)}, - {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} -}; -static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { - {NAME(""), CHILD(stats_arenas_i_lruns_j)} -}; - -static const ctl_node_t stats_arenas_i_lruns_node[] = { - {INDEX(stats_arenas_i_lruns_j)} -}; -#endif - -static const ctl_node_t stats_arenas_i_node[] = { - {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, - {NAME("pactive"), CTL(stats_arenas_i_pactive)}, - {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} -#ifdef JEMALLOC_STATS - , - {NAME("mapped"), CTL(stats_arenas_i_mapped)}, - {NAME("npurge"), CTL(stats_arenas_i_npurge)}, - {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, - {NAME("purged"), CTL(stats_arenas_i_purged)}, - {NAME("small"), CHILD(stats_arenas_i_small)}, - {NAME("large"), CHILD(stats_arenas_i_large)}, - {NAME("bins"), CHILD(stats_arenas_i_bins)}, - {NAME("lruns"), CHILD(stats_arenas_i_lruns)} -#endif -}; -static const ctl_node_t super_stats_arenas_i_node[] = { - {NAME(""), CHILD(stats_arenas_i)} -}; - -static const ctl_node_t stats_arenas_node[] = { - {INDEX(stats_arenas_i)} -}; - -static const ctl_node_t stats_node[] = { -#ifdef JEMALLOC_STATS - {NAME("cactive"), CTL(stats_cactive)}, - {NAME("allocated"), CTL(stats_allocated)}, - {NAME("active"), CTL(stats_active)}, - {NAME("mapped"), CTL(stats_mapped)}, - {NAME("chunks"), CHILD(stats_chunks)}, - {NAME("huge"), CHILD(stats_huge)}, -#endif - {NAME("arenas"), CHILD(stats_arenas)} -}; - -#ifdef JEMALLOC_SWAP -static const ctl_node_t swap_node[] = { -# ifdef JEMALLOC_STATS - {NAME("avail"), CTL(swap_avail)}, -# endif - {NAME("prezeroed"), CTL(swap_prezeroed)}, - {NAME("nfds"), CTL(swap_nfds)}, - {NAME("fds"), CTL(swap_fds)} -}; -#endif - -static const ctl_node_t root_node[] = { - {NAME("version"), CTL(version)}, - {NAME("epoch"), CTL(epoch)}, -#ifdef JEMALLOC_TCACHE - {NAME("tcache"), CHILD(tcache)}, -#endif - {NAME("thread"), CHILD(thread)}, - {NAME("config"), CHILD(config)}, - {NAME("opt"), CHILD(opt)}, - {NAME("arenas"), CHILD(arenas)}, -#ifdef JEMALLOC_PROF - {NAME("prof"), CHILD(prof)}, -#endif - {NAME("stats"), CHILD(stats)} -#ifdef JEMALLOC_SWAP - , - {NAME("swap"), CHILD(swap)} -#endif -}; -static const ctl_node_t super_root_node[] = { - {NAME(""), CHILD(root)} -}; - -#undef NAME -#undef CHILD -#undef CTL -#undef INDEX - -/******************************************************************************/ - -#ifdef JEMALLOC_STATS -static bool -ctl_arena_init(ctl_arena_stats_t *astats) -{ - - if (astats->bstats == NULL) { - astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins * - sizeof(malloc_bin_stats_t)); - if (astats->bstats == NULL) - return (true); - } - if (astats->lstats == NULL) { - astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * - sizeof(malloc_large_stats_t)); - if (astats->lstats == NULL) - return (true); - } - - return (false); -} -#endif - -static void -ctl_arena_clear(ctl_arena_stats_t *astats) -{ - - astats->pactive = 0; - astats->pdirty = 0; -#ifdef JEMALLOC_STATS - memset(&astats->astats, 0, sizeof(arena_stats_t)); - astats->allocated_small = 0; - astats->nmalloc_small = 0; - astats->ndalloc_small = 0; - astats->nrequests_small = 0; - memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); - memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); -#endif -} - -#ifdef JEMALLOC_STATS -static void -ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) -{ - unsigned i; - - arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, - &cstats->astats, cstats->bstats, cstats->lstats); - - for (i = 0; i < nbins; i++) { - cstats->allocated_small += cstats->bstats[i].allocated; - cstats->nmalloc_small += cstats->bstats[i].nmalloc; - cstats->ndalloc_small += cstats->bstats[i].ndalloc; - cstats->nrequests_small += cstats->bstats[i].nrequests; - } -} - -static void -ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) -{ - unsigned i; - - sstats->pactive += astats->pactive; - sstats->pdirty += astats->pdirty; - - sstats->astats.mapped += astats->astats.mapped; - sstats->astats.npurge += astats->astats.npurge; - sstats->astats.nmadvise += astats->astats.nmadvise; - sstats->astats.purged += astats->astats.purged; - - sstats->allocated_small += astats->allocated_small; - sstats->nmalloc_small += astats->nmalloc_small; - sstats->ndalloc_small += astats->ndalloc_small; - sstats->nrequests_small += astats->nrequests_small; - - sstats->astats.allocated_large += astats->astats.allocated_large; - sstats->astats.nmalloc_large += astats->astats.nmalloc_large; - sstats->astats.ndalloc_large += astats->astats.ndalloc_large; - sstats->astats.nrequests_large += astats->astats.nrequests_large; - - for (i = 0; i < nlclasses; i++) { - sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; - sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; - sstats->lstats[i].nrequests += astats->lstats[i].nrequests; - sstats->lstats[i].highruns += astats->lstats[i].highruns; - sstats->lstats[i].curruns += astats->lstats[i].curruns; - } - - for (i = 0; i < nbins; i++) { - sstats->bstats[i].allocated += astats->bstats[i].allocated; - sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; - sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; - sstats->bstats[i].nrequests += astats->bstats[i].nrequests; -#ifdef JEMALLOC_TCACHE - sstats->bstats[i].nfills += astats->bstats[i].nfills; - sstats->bstats[i].nflushes += astats->bstats[i].nflushes; -#endif - sstats->bstats[i].nruns += astats->bstats[i].nruns; - sstats->bstats[i].reruns += astats->bstats[i].reruns; - sstats->bstats[i].highruns += astats->bstats[i].highruns; - sstats->bstats[i].curruns += astats->bstats[i].curruns; - } -} -#endif - -static void -ctl_arena_refresh(arena_t *arena, unsigned i) -{ - ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; - ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas]; - - ctl_arena_clear(astats); - - sstats->nthreads += astats->nthreads; -#ifdef JEMALLOC_STATS - ctl_arena_stats_amerge(astats, arena); - /* Merge into sum stats as well. */ - ctl_arena_stats_smerge(sstats, astats); -#else - astats->pactive += arena->nactive; - astats->pdirty += arena->ndirty; - /* Merge into sum stats as well. */ - sstats->pactive += arena->nactive; - sstats->pdirty += arena->ndirty; -#endif -} - -static void -ctl_refresh(void) -{ - unsigned i; - arena_t *tarenas[narenas]; - -#ifdef JEMALLOC_STATS - malloc_mutex_lock(&chunks_mtx); - ctl_stats.chunks.current = stats_chunks.curchunks; - ctl_stats.chunks.total = stats_chunks.nchunks; - ctl_stats.chunks.high = stats_chunks.highchunks; - malloc_mutex_unlock(&chunks_mtx); - - malloc_mutex_lock(&huge_mtx); - ctl_stats.huge.allocated = huge_allocated; - ctl_stats.huge.nmalloc = huge_nmalloc; - ctl_stats.huge.ndalloc = huge_ndalloc; - malloc_mutex_unlock(&huge_mtx); -#endif - - /* - * Clear sum stats, since they will be merged into by - * ctl_arena_refresh(). - */ - ctl_stats.arenas[narenas].nthreads = 0; - ctl_arena_clear(&ctl_stats.arenas[narenas]); - - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) - ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; - else - ctl_stats.arenas[i].nthreads = 0; - } - malloc_mutex_unlock(&arenas_lock); - for (i = 0; i < narenas; i++) { - bool initialized = (tarenas[i] != NULL); - - ctl_stats.arenas[i].initialized = initialized; - if (initialized) - ctl_arena_refresh(tarenas[i], i); - } - -#ifdef JEMALLOC_STATS - ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small - + ctl_stats.arenas[narenas].astats.allocated_large - + ctl_stats.huge.allocated; - ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT) - + ctl_stats.huge.allocated; - ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); - -# ifdef JEMALLOC_SWAP - malloc_mutex_lock(&swap_mtx); - ctl_stats.swap_avail = swap_avail; - malloc_mutex_unlock(&swap_mtx); -# endif -#endif - - ctl_epoch++; -} - -static bool -ctl_init(void) -{ - bool ret; - - malloc_mutex_lock(&ctl_mtx); - if (ctl_initialized == false) { -#ifdef JEMALLOC_STATS - unsigned i; -#endif - - /* - * Allocate space for one extra arena stats element, which - * contains summed stats across all arenas. - */ - ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( - (narenas + 1) * sizeof(ctl_arena_stats_t)); - if (ctl_stats.arenas == NULL) { - ret = true; - goto RETURN; - } - memset(ctl_stats.arenas, 0, (narenas + 1) * - sizeof(ctl_arena_stats_t)); - - /* - * Initialize all stats structures, regardless of whether they - * ever get used. Lazy initialization would allow errors to - * cause inconsistent state to be viewable by the application. - */ -#ifdef JEMALLOC_STATS - for (i = 0; i <= narenas; i++) { - if (ctl_arena_init(&ctl_stats.arenas[i])) { - ret = true; - goto RETURN; - } - } -#endif - ctl_stats.arenas[narenas].initialized = true; - - ctl_epoch = 0; - ctl_refresh(); - ctl_initialized = true; - } - - ret = false; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static int -ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, - size_t *depthp) -{ - int ret; - const char *elm, *tdot, *dot; - size_t elen, i, j; - const ctl_node_t *node; - - elm = name; - /* Equivalent to strchrnul(). */ - dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : strchr(elm, '\0'); - elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); - if (elen == 0) { - ret = ENOENT; - goto RETURN; - } - node = super_root_node; - for (i = 0; i < *depthp; i++) { - assert(node->named); - assert(node->u.named.nchildren > 0); - if (node->u.named.children[0].named) { - const ctl_node_t *pnode = node; - - /* Children are named. */ - for (j = 0; j < node->u.named.nchildren; j++) { - const ctl_node_t *child = - &node->u.named.children[j]; - if (strlen(child->u.named.name) == elen - && strncmp(elm, child->u.named.name, - elen) == 0) { - node = child; - if (nodesp != NULL) - nodesp[i] = node; - mibp[i] = j; - break; - } - } - if (node == pnode) { - ret = ENOENT; - goto RETURN; - } - } else { - unsigned long index; - const ctl_node_t *inode; - - /* Children are indexed. */ - index = strtoul(elm, NULL, 10); - if (index == ULONG_MAX) { - ret = ENOENT; - goto RETURN; - } - - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mibp, *depthp, - index); - if (node == NULL) { - ret = ENOENT; - goto RETURN; - } - - if (nodesp != NULL) - nodesp[i] = node; - mibp[i] = (size_t)index; - } - - if (node->ctl != NULL) { - /* Terminal node. */ - if (*dot != '\0') { - /* - * The name contains more elements than are - * in this path through the tree. - */ - ret = ENOENT; - goto RETURN; - } - /* Complete lookup successful. */ - *depthp = i + 1; - break; - } - - /* Update elm. */ - if (*dot == '\0') { - /* No more elements. */ - ret = ENOENT; - goto RETURN; - } - elm = &dot[1]; - dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : - strchr(elm, '\0'); - elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); - } - - ret = 0; -RETURN: - return (ret); -} - -int -ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - int ret; - size_t depth; - ctl_node_t const *nodes[CTL_MAX_DEPTH]; - size_t mib[CTL_MAX_DEPTH]; - - if (ctl_initialized == false && ctl_init()) { - ret = EAGAIN; - goto RETURN; - } - - depth = CTL_MAX_DEPTH; - ret = ctl_lookup(name, nodes, mib, &depth); - if (ret != 0) - goto RETURN; - - if (nodes[depth-1]->ctl == NULL) { - /* The name refers to a partial path through the ctl tree. */ - ret = ENOENT; - goto RETURN; - } - - ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); -RETURN: - return(ret); -} - -int -ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) -{ - int ret; - - if (ctl_initialized == false && ctl_init()) { - ret = EAGAIN; - goto RETURN; - } - - ret = ctl_lookup(name, NULL, mibp, miblenp); -RETURN: - return(ret); -} - -int -ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - const ctl_node_t *node; - size_t i; - - if (ctl_initialized == false && ctl_init()) { - ret = EAGAIN; - goto RETURN; - } - - /* Iterate down the tree. */ - node = super_root_node; - for (i = 0; i < miblen; i++) { - if (node->u.named.children[0].named) { - /* Children are named. */ - if (node->u.named.nchildren <= mib[i]) { - ret = ENOENT; - goto RETURN; - } - node = &node->u.named.children[mib[i]]; - } else { - const ctl_node_t *inode; - - /* Indexed element. */ - inode = &node->u.named.children[0]; - node = inode->u.indexed.index(mib, miblen, mib[i]); - if (node == NULL) { - ret = ENOENT; - goto RETURN; - } - } - } - - /* Call the ctl function. */ - if (node->ctl == NULL) { - /* Partial MIB. */ - ret = ENOENT; - goto RETURN; - } - ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); - -RETURN: - return(ret); -} - -bool -ctl_boot(void) -{ - - if (malloc_mutex_init(&ctl_mtx)) - return (true); - - ctl_initialized = false; - - return (false); -} - -/******************************************************************************/ -/* *_ctl() functions. */ - -#define READONLY() do { \ - if (newp != NULL || newlen != 0) { \ - ret = EPERM; \ - goto RETURN; \ - } \ -} while (0) - -#define WRITEONLY() do { \ - if (oldp != NULL || oldlenp != NULL) { \ - ret = EPERM; \ - goto RETURN; \ - } \ -} while (0) - -#define VOID() do { \ - READONLY(); \ - WRITEONLY(); \ -} while (0) - -#define READ(v, t) do { \ - if (oldp != NULL && oldlenp != NULL) { \ - if (*oldlenp != sizeof(t)) { \ - size_t copylen = (sizeof(t) <= *oldlenp) \ - ? sizeof(t) : *oldlenp; \ - memcpy(oldp, (void *)&v, copylen); \ - ret = EINVAL; \ - goto RETURN; \ - } else \ - *(t *)oldp = v; \ - } \ -} while (0) - -#define WRITE(v, t) do { \ - if (newp != NULL) { \ - if (newlen != sizeof(t)) { \ - ret = EINVAL; \ - goto RETURN; \ - } \ - v = *(t *)newp; \ - } \ -} while (0) - -#define CTL_RO_GEN(n, v, t) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - t oldval; \ - \ - malloc_mutex_lock(&ctl_mtx); \ - READONLY(); \ - oldval = v; \ - READ(oldval, t); \ - \ - ret = 0; \ -RETURN: \ - malloc_mutex_unlock(&ctl_mtx); \ - return (ret); \ -} - -/* - * ctl_mtx is not acquired, under the assumption that no pertinent data will - * mutate during the call. - */ -#define CTL_RO_NL_GEN(n, v, t) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - t oldval; \ - \ - READONLY(); \ - oldval = v; \ - READ(oldval, t); \ - \ - ret = 0; \ -RETURN: \ - return (ret); \ -} - -#define CTL_RO_TRUE_GEN(n) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - bool oldval; \ - \ - READONLY(); \ - oldval = true; \ - READ(oldval, bool); \ - \ - ret = 0; \ -RETURN: \ - return (ret); \ -} - -#define CTL_RO_FALSE_GEN(n) \ -static int \ -n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ - void *newp, size_t newlen) \ -{ \ - int ret; \ - bool oldval; \ - \ - READONLY(); \ - oldval = false; \ - READ(oldval, bool); \ - \ - ret = 0; \ -RETURN: \ - return (ret); \ -} - -CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) - -static int -epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - uint64_t newval; - - malloc_mutex_lock(&ctl_mtx); - newval = 0; - WRITE(newval, uint64_t); - if (newval != 0) - ctl_refresh(); - READ(ctl_epoch, uint64_t); - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -#ifdef JEMALLOC_TCACHE -static int -tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - tcache_t *tcache; - - VOID(); - - tcache = TCACHE_GET(); - if (tcache == NULL) { - ret = 0; - goto RETURN; - } - tcache_destroy(tcache); - TCACHE_SET(NULL); - - ret = 0; -RETURN: - return (ret); -} -#endif - -static int -thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - unsigned newind, oldind; - - newind = oldind = choose_arena()->ind; - WRITE(newind, unsigned); - READ(oldind, unsigned); - if (newind != oldind) { - arena_t *arena; - - if (newind >= narenas) { - /* New arena index is out of range. */ - ret = EFAULT; - goto RETURN; - } - - /* Initialize arena if necessary. */ - malloc_mutex_lock(&arenas_lock); - if ((arena = arenas[newind]) == NULL) - arena = arenas_extend(newind); - arenas[oldind]->nthreads--; - arenas[newind]->nthreads++; - malloc_mutex_unlock(&arenas_lock); - if (arena == NULL) { - ret = EAGAIN; - goto RETURN; - } - - /* Set new arena association. */ - ARENA_SET(arena); -#ifdef JEMALLOC_TCACHE - { - tcache_t *tcache = TCACHE_GET(); - if (tcache != NULL) - tcache->arena = arena; - } -#endif - } - - ret = 0; -RETURN: - return (ret); -} - -#ifdef JEMALLOC_STATS -CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *); -CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); -CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *); -#endif - -/******************************************************************************/ - -#ifdef JEMALLOC_DEBUG -CTL_RO_TRUE_GEN(config_debug) -#else -CTL_RO_FALSE_GEN(config_debug) -#endif - -#ifdef JEMALLOC_DSS -CTL_RO_TRUE_GEN(config_dss) -#else -CTL_RO_FALSE_GEN(config_dss) -#endif - -#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT -CTL_RO_TRUE_GEN(config_dynamic_page_shift) -#else -CTL_RO_FALSE_GEN(config_dynamic_page_shift) -#endif - -#ifdef JEMALLOC_FILL -CTL_RO_TRUE_GEN(config_fill) -#else -CTL_RO_FALSE_GEN(config_fill) -#endif - -#ifdef JEMALLOC_LAZY_LOCK -CTL_RO_TRUE_GEN(config_lazy_lock) -#else -CTL_RO_FALSE_GEN(config_lazy_lock) -#endif - -#ifdef JEMALLOC_PROF -CTL_RO_TRUE_GEN(config_prof) -#else -CTL_RO_FALSE_GEN(config_prof) -#endif - -#ifdef JEMALLOC_PROF_LIBGCC -CTL_RO_TRUE_GEN(config_prof_libgcc) -#else -CTL_RO_FALSE_GEN(config_prof_libgcc) -#endif - -#ifdef JEMALLOC_PROF_LIBUNWIND -CTL_RO_TRUE_GEN(config_prof_libunwind) -#else -CTL_RO_FALSE_GEN(config_prof_libunwind) -#endif - -#ifdef JEMALLOC_STATS -CTL_RO_TRUE_GEN(config_stats) -#else -CTL_RO_FALSE_GEN(config_stats) -#endif - -#ifdef JEMALLOC_SWAP -CTL_RO_TRUE_GEN(config_swap) -#else -CTL_RO_FALSE_GEN(config_swap) -#endif - -#ifdef JEMALLOC_SYSV -CTL_RO_TRUE_GEN(config_sysv) -#else -CTL_RO_FALSE_GEN(config_sysv) -#endif - -#ifdef JEMALLOC_TCACHE -CTL_RO_TRUE_GEN(config_tcache) -#else -CTL_RO_FALSE_GEN(config_tcache) -#endif - -#ifdef JEMALLOC_TINY -CTL_RO_TRUE_GEN(config_tiny) -#else -CTL_RO_FALSE_GEN(config_tiny) -#endif - -#ifdef JEMALLOC_TLS -CTL_RO_TRUE_GEN(config_tls) -#else -CTL_RO_FALSE_GEN(config_tls) -#endif - -#ifdef JEMALLOC_XMALLOC -CTL_RO_TRUE_GEN(config_xmalloc) -#else -CTL_RO_FALSE_GEN(config_xmalloc) -#endif - -/******************************************************************************/ - -CTL_RO_NL_GEN(opt_abort, opt_abort, bool) -CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) -CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) -CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) -CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) -CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) -CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) -#ifdef JEMALLOC_FILL -CTL_RO_NL_GEN(opt_junk, opt_junk, bool) -CTL_RO_NL_GEN(opt_zero, opt_zero, bool) -#endif -#ifdef JEMALLOC_SYSV -CTL_RO_NL_GEN(opt_sysv, opt_sysv, bool) -#endif -#ifdef JEMALLOC_XMALLOC -CTL_RO_NL_GEN(opt_xmalloc, opt_xmalloc, bool) -#endif -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) -CTL_RO_NL_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) -#endif -#ifdef JEMALLOC_PROF -CTL_RO_NL_GEN(opt_prof, opt_prof, bool) -CTL_RO_NL_GEN(opt_prof_prefix, opt_prof_prefix, const char *) -CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) /* Mutable. */ -CTL_RO_NL_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) -CTL_RO_NL_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) -CTL_RO_NL_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) -CTL_RO_NL_GEN(opt_prof_gdump, opt_prof_gdump, bool) -CTL_RO_NL_GEN(opt_prof_leak, opt_prof_leak, bool) -CTL_RO_NL_GEN(opt_prof_accum, opt_prof_accum, bool) -CTL_RO_NL_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) -#endif -#ifdef JEMALLOC_SWAP -CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) -#endif - -/******************************************************************************/ - -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) -const ctl_node_t * -arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > nbins) - return (NULL); - return (super_arenas_bin_i_node); -} - -CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t) -const ctl_node_t * -arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) -{ - - if (i > nlclasses) - return (NULL); - return (super_arenas_lrun_i_node); -} - -CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned) - -static int -arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - unsigned nread, i; - - malloc_mutex_lock(&ctl_mtx); - READONLY(); - if (*oldlenp != narenas * sizeof(bool)) { - ret = EINVAL; - nread = (*oldlenp < narenas * sizeof(bool)) - ? (*oldlenp / sizeof(bool)) : narenas; - } else { - ret = 0; - nread = narenas; - } - - for (i = 0; i < nread; i++) - ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; - -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) -CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) -CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) -CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) -CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) -#ifdef JEMALLOC_TINY -CTL_RO_NL_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t) -CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) -#endif -CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) -CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) -CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) -CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) -CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) -CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) -#endif -CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) -CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) -CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) -CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) -CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) -#ifdef JEMALLOC_TCACHE -CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) -#endif -CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) - -static int -arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - unsigned arena; - - WRITEONLY(); - arena = UINT_MAX; - WRITE(arena, unsigned); - if (newp != NULL && arena >= narenas) { - ret = EFAULT; - goto RETURN; - } else { - arena_t *tarenas[narenas]; - - malloc_mutex_lock(&arenas_lock); - memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); - malloc_mutex_unlock(&arenas_lock); - - if (arena == UINT_MAX) { - unsigned i; - for (i = 0; i < narenas; i++) { - if (tarenas[i] != NULL) - arena_purge_all(tarenas[i]); - } - } else { - assert(arena < narenas); - if (tarenas[arena] != NULL) - arena_purge_all(tarenas[arena]); - } - } - - ret = 0; -RETURN: - return (ret); -} - -/******************************************************************************/ - -#ifdef JEMALLOC_PROF -static int -prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - bool oldval; - - malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ - oldval = opt_prof_active; - if (newp != NULL) { - /* - * The memory barriers will tend to make opt_prof_active - * propagate faster on systems with weak memory ordering. - */ - mb_write(); - WRITE(opt_prof_active, bool); - mb_write(); - } - READ(oldval, bool); - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -static int -prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - const char *filename = NULL; - - WRITEONLY(); - WRITE(filename, const char *); - - if (prof_mdump(filename)) { - ret = EFAULT; - goto RETURN; - } - - ret = 0; -RETURN: - return (ret); -} - -CTL_RO_NL_GEN(prof_interval, prof_interval, uint64_t) -#endif - -/******************************************************************************/ - -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t) -CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t) -CTL_RO_GEN(stats_chunks_high, ctl_stats.chunks.high, size_t) -CTL_RO_GEN(stats_huge_allocated, huge_allocated, size_t) -CTL_RO_GEN(stats_huge_nmalloc, huge_nmalloc, uint64_t) -CTL_RO_GEN(stats_huge_ndalloc, huge_ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_allocated, - ctl_stats.arenas[mib[2]].allocated_small, size_t) -CTL_RO_GEN(stats_arenas_i_small_nmalloc, - ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_ndalloc, - ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_small_nrequests, - ctl_stats.arenas[mib[2]].nrequests_small, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_allocated, - ctl_stats.arenas[mib[2]].astats.allocated_large, size_t) -CTL_RO_GEN(stats_arenas_i_large_nmalloc, - ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_ndalloc, - ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) -CTL_RO_GEN(stats_arenas_i_large_nrequests, - ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) - -CTL_RO_GEN(stats_arenas_i_bins_j_allocated, - ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nmalloc, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_ndalloc, - ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nrequests, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) -#ifdef JEMALLOC_TCACHE -CTL_RO_GEN(stats_arenas_i_bins_j_nfills, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nflushes, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t) -#endif -CTL_RO_GEN(stats_arenas_i_bins_j_nruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_nreruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) -CTL_RO_GEN(stats_arenas_i_bins_j_highruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t) -CTL_RO_GEN(stats_arenas_i_bins_j_curruns, - ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) - -const ctl_node_t * -stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) -{ - - if (j > nbins) - return (NULL); - return (super_stats_arenas_i_bins_j_node); -} - -CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc, - ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc, - ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests, - ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_curruns, - ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) -CTL_RO_GEN(stats_arenas_i_lruns_j_highruns, - ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t) - -const ctl_node_t * -stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) -{ - - if (j > nlclasses) - return (NULL); - return (super_stats_arenas_i_lruns_j_node); -} - -#endif -CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) -CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) -CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped, - size_t) -CTL_RO_GEN(stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge, - uint64_t) -CTL_RO_GEN(stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise, - uint64_t) -CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, - uint64_t) -#endif - -const ctl_node_t * -stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) -{ - const ctl_node_t * ret; - - malloc_mutex_lock(&ctl_mtx); - if (ctl_stats.arenas[i].initialized == false) { - ret = NULL; - goto RETURN; - } - - ret = super_stats_arenas_i_node; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -#ifdef JEMALLOC_STATS -CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *) -CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) -CTL_RO_GEN(stats_active, ctl_stats.active, size_t) -CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) -#endif - -/******************************************************************************/ - -#ifdef JEMALLOC_SWAP -# ifdef JEMALLOC_STATS -CTL_RO_GEN(swap_avail, ctl_stats.swap_avail, size_t) -# endif - -static int -swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - int ret; - - malloc_mutex_lock(&ctl_mtx); - if (swap_enabled) { - READONLY(); - } else { - /* - * swap_prezeroed isn't actually used by the swap code until it - * is set during a successful chunk_swap_enabled() call. We - * use it here to store the value that we'll pass to - * chunk_swap_enable() in a swap.fds mallctl(). This is not - * very clean, but the obvious alternatives are even worse. - */ - WRITE(swap_prezeroed, bool); - } - - READ(swap_prezeroed, bool); - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} - -CTL_RO_GEN(swap_nfds, swap_nfds, size_t) - -static int -swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, - void *newp, size_t newlen) -{ - int ret; - - malloc_mutex_lock(&ctl_mtx); - if (swap_enabled) { - READONLY(); - } else if (newp != NULL) { - size_t nfds = newlen / sizeof(int); - - { - int fds[nfds]; - - memcpy(fds, newp, nfds * sizeof(int)); - if (chunk_swap_enable(fds, nfds, swap_prezeroed)) { - ret = EFAULT; - goto RETURN; - } - } - } - - if (oldp != NULL && oldlenp != NULL) { - if (*oldlenp != swap_nfds * sizeof(int)) { - size_t copylen = (swap_nfds * sizeof(int) <= *oldlenp) - ? swap_nfds * sizeof(int) : *oldlenp; - - memcpy(oldp, swap_fds, copylen); - ret = EINVAL; - goto RETURN; - } else - memcpy(oldp, swap_fds, *oldlenp); - } - - ret = 0; -RETURN: - malloc_mutex_unlock(&ctl_mtx); - return (ret); -} -#endif diff --git a/jemalloc/src/extent.c b/jemalloc/src/extent.c deleted file mode 100644 index 3c04d3a..0000000 --- a/jemalloc/src/extent.c +++ /dev/null @@ -1,41 +0,0 @@ -#define JEMALLOC_EXTENT_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ - -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) -static inline int -extent_szad_comp(extent_node_t *a, extent_node_t *b) -{ - int ret; - size_t a_size = a->size; - size_t b_size = b->size; - - ret = (a_size > b_size) - (a_size < b_size); - if (ret == 0) { - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; - - ret = (a_addr > b_addr) - (a_addr < b_addr); - } - - return (ret); -} - -/* Generate red-black tree functions. */ -rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, - extent_szad_comp) -#endif - -static inline int -extent_ad_comp(extent_node_t *a, extent_node_t *b) -{ - uintptr_t a_addr = (uintptr_t)a->addr; - uintptr_t b_addr = (uintptr_t)b->addr; - - return ((a_addr > b_addr) - (a_addr < b_addr)); -} - -/* Generate red-black tree functions. */ -rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad, - extent_ad_comp) diff --git a/jemalloc/src/hash.c b/jemalloc/src/hash.c deleted file mode 100644 index cfa4da0..0000000 --- a/jemalloc/src/hash.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_HASH_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/jemalloc/src/huge.c b/jemalloc/src/huge.c deleted file mode 100644 index ac3f3a0..0000000 --- a/jemalloc/src/huge.c +++ /dev/null @@ -1,379 +0,0 @@ -#define JEMALLOC_HUGE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -#ifdef JEMALLOC_STATS -uint64_t huge_nmalloc; -uint64_t huge_ndalloc; -size_t huge_allocated; -#endif - -malloc_mutex_t huge_mtx; - -/******************************************************************************/ - -/* Tree of chunks that are stand-alone huge allocations. */ -static extent_tree_t huge; - -void * -huge_malloc(size_t size, bool zero) -{ - void *ret; - size_t csize; - extent_node_t *node; - - /* Allocate one or more contiguous chunks for this request. */ - - csize = CHUNK_CEILING(size); - if (csize == 0) { - /* size is large enough to cause size_t wrap-around. */ - return (NULL); - } - - /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); - if (node == NULL) - return (NULL); - - ret = chunk_alloc(csize, false, &zero); - if (ret == NULL) { - base_node_dealloc(node); - return (NULL); - } - - /* Insert node into huge. */ - node->addr = ret; - node->size = csize; - - malloc_mutex_lock(&huge_mtx); - extent_tree_ad_insert(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_add(csize); - huge_nmalloc++; - huge_allocated += csize; -#endif - malloc_mutex_unlock(&huge_mtx); - -#ifdef JEMALLOC_FILL - if (zero == false) { - if (opt_junk) - memset(ret, 0xa5, csize); - else if (opt_zero) - memset(ret, 0, csize); - } -#endif - - return (ret); -} - -/* Only handles large allocations that require more than chunk alignment. */ -void * -huge_palloc(size_t size, size_t alignment, bool zero) -{ - void *ret; - size_t alloc_size, chunk_size, offset; - extent_node_t *node; - - /* - * This allocation requires alignment that is even larger than chunk - * alignment. This means that huge_malloc() isn't good enough. - * - * Allocate almost twice as many chunks as are demanded by the size or - * alignment, in order to assure the alignment can be achieved, then - * unmap leading and trailing chunks. - */ - assert(alignment > chunksize); - - chunk_size = CHUNK_CEILING(size); - - if (size >= alignment) - alloc_size = chunk_size + alignment - chunksize; - else - alloc_size = (alignment << 1) - chunksize; - - /* Allocate an extent node with which to track the chunk. */ - node = base_node_alloc(); - if (node == NULL) - return (NULL); - - ret = chunk_alloc(alloc_size, false, &zero); - if (ret == NULL) { - base_node_dealloc(node); - return (NULL); - } - - offset = (uintptr_t)ret & (alignment - 1); - assert((offset & chunksize_mask) == 0); - assert(offset < alloc_size); - if (offset == 0) { - /* Trim trailing space. */ - chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size - - chunk_size); - } else { - size_t trailsize; - - /* Trim leading space. */ - chunk_dealloc(ret, alignment - offset); - - ret = (void *)((uintptr_t)ret + (alignment - offset)); - - trailsize = alloc_size - (alignment - offset) - chunk_size; - if (trailsize != 0) { - /* Trim trailing space. */ - assert(trailsize < alloc_size); - chunk_dealloc((void *)((uintptr_t)ret + chunk_size), - trailsize); - } - } - - /* Insert node into huge. */ - node->addr = ret; - node->size = chunk_size; - - malloc_mutex_lock(&huge_mtx); - extent_tree_ad_insert(&huge, node); -#ifdef JEMALLOC_STATS - stats_cactive_add(chunk_size); - huge_nmalloc++; - huge_allocated += chunk_size; -#endif - malloc_mutex_unlock(&huge_mtx); - -#ifdef JEMALLOC_FILL - if (zero == false) { - if (opt_junk) - memset(ret, 0xa5, chunk_size); - else if (opt_zero) - memset(ret, 0, chunk_size); - } -#endif - - return (ret); -} - -void * -huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) -{ - - /* - * Avoid moving the allocation if the size class can be left the same. - */ - if (oldsize > arena_maxclass - && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) - && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { - assert(CHUNK_CEILING(oldsize) == oldsize); -#ifdef JEMALLOC_FILL - if (opt_junk && size < oldsize) { - memset((void *)((uintptr_t)ptr + size), 0x5a, - oldsize - size); - } -#endif - return (ptr); - } - - /* Reallocation would require a move. */ - return (NULL); -} - -void * -huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, - size_t alignment, bool zero) -{ - void *ret; - size_t copysize; - - /* Try to avoid moving the allocation. */ - ret = huge_ralloc_no_move(ptr, oldsize, size, extra); - if (ret != NULL) - return (ret); - - /* - * size and oldsize are different enough that we need to use a - * different size class. In that case, fall back to allocating new - * space and copying. - */ - if (alignment > chunksize) - ret = huge_palloc(size + extra, alignment, zero); - else - ret = huge_malloc(size + extra, zero); - - if (ret == NULL) { - if (extra == 0) - return (NULL); - /* Try again, this time without extra. */ - if (alignment > chunksize) - ret = huge_palloc(size, alignment, zero); - else - ret = huge_malloc(size, zero); - - if (ret == NULL) - return (NULL); - } - - /* - * Copy at most size bytes (not size+extra), since the caller has no - * expectation that the extra bytes will be reliably preserved. - */ - copysize = (size < oldsize) ? size : oldsize; - - /* - * Use mremap(2) if this is a huge-->huge reallocation, and neither the - * source nor the destination are in swap or dss. - */ -#ifdef JEMALLOC_MREMAP_FIXED - if (oldsize >= chunksize -# ifdef JEMALLOC_SWAP - && (swap_enabled == false || (chunk_in_swap(ptr) == false && - chunk_in_swap(ret) == false)) -# endif -# ifdef JEMALLOC_DSS - && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false -# endif - ) { - size_t newsize = huge_salloc(ret); - - if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, - ret) == MAP_FAILED) { - /* - * Assuming no chunk management bugs in the allocator, - * the only documented way an error can occur here is - * if the application changed the map type for a - * portion of the old allocation. This is firmly in - * undefined behavior territory, so write a diagnostic - * message, and optionally abort. - */ - char buf[BUFERROR_BUF]; - - buferror(errno, buf, sizeof(buf)); - malloc_write(": Error in mremap(): "); - malloc_write(buf); - malloc_write("\n"); - if (opt_abort) - abort(); - memcpy(ret, ptr, copysize); - idalloc(ptr); - } else - huge_dalloc(ptr, false); - } else -#endif - { - memcpy(ret, ptr, copysize); - idalloc(ptr); - } - return (ret); -} - -void -huge_dalloc(void *ptr, bool unmap) -{ - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = ptr; - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - assert(node->addr == ptr); - extent_tree_ad_remove(&huge, node); - -#ifdef JEMALLOC_STATS - stats_cactive_sub(node->size); - huge_ndalloc++; - huge_allocated -= node->size; -#endif - - malloc_mutex_unlock(&huge_mtx); - - if (unmap) { - /* Unmap chunk. */ -#ifdef JEMALLOC_FILL -#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) - if (opt_junk) - memset(node->addr, 0x5a, node->size); -#endif -#endif - chunk_dealloc(node->addr, node->size); - } - - base_node_dealloc(node); -} - -size_t -huge_salloc(const void *ptr) -{ - size_t ret; - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - ret = node->size; - - malloc_mutex_unlock(&huge_mtx); - - return (ret); -} - -#ifdef JEMALLOC_PROF -prof_ctx_t * -huge_prof_ctx_get(const void *ptr) -{ - prof_ctx_t *ret; - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - ret = node->prof_ctx; - - malloc_mutex_unlock(&huge_mtx); - - return (ret); -} - -void -huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) -{ - extent_node_t *node, key; - - malloc_mutex_lock(&huge_mtx); - - /* Extract from tree of huge allocations. */ - key.addr = __DECONST(void *, ptr); - node = extent_tree_ad_search(&huge, &key); - assert(node != NULL); - - node->prof_ctx = ctx; - - malloc_mutex_unlock(&huge_mtx); -} -#endif - -bool -huge_boot(void) -{ - - /* Initialize chunks data. */ - if (malloc_mutex_init(&huge_mtx)) - return (true); - extent_tree_ad_new(&huge); - -#ifdef JEMALLOC_STATS - huge_nmalloc = 0; - huge_ndalloc = 0; - huge_allocated = 0; -#endif - - return (false); -} diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c deleted file mode 100644 index e287516..0000000 --- a/jemalloc/src/jemalloc.c +++ /dev/null @@ -1,1847 +0,0 @@ -#define JEMALLOC_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -malloc_mutex_t arenas_lock; -arena_t **arenas; -unsigned narenas; - -pthread_key_t arenas_tsd; -#ifndef NO_TLS -__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#endif - -#ifdef JEMALLOC_STATS -# ifndef NO_TLS -__thread thread_allocated_t thread_allocated_tls; -# else -pthread_key_t thread_allocated_tsd; -# endif -#endif - -/* Set to true once the allocator has been initialized. */ -static bool malloc_initialized = false; - -/* Used to let the initializing thread recursively allocate. */ -static pthread_t malloc_initializer = (unsigned long)0; - -/* Used to avoid initialization races. */ -static malloc_mutex_t init_lock = -#ifdef JEMALLOC_OSSPIN - 0 -#else - MALLOC_MUTEX_INITIALIZER -#endif - ; - -#ifdef DYNAMIC_PAGE_SHIFT -size_t pagesize; -size_t pagesize_mask; -size_t lg_pagesize; -#endif - -unsigned ncpus; - -/* Runtime configuration options. */ -const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); -#ifdef JEMALLOC_DEBUG -bool opt_abort = true; -# ifdef JEMALLOC_FILL -bool opt_junk = true; -# endif -#else -bool opt_abort = false; -# ifdef JEMALLOC_FILL -bool opt_junk = false; -# endif -#endif -#ifdef JEMALLOC_SYSV -bool opt_sysv = false; -#endif -#ifdef JEMALLOC_XMALLOC -bool opt_xmalloc = false; -#endif -#ifdef JEMALLOC_FILL -bool opt_zero = false; -#endif -size_t opt_narenas = 0; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void wrtmessage(void *cbopaque, const char *s); -static void stats_print_atexit(void); -static unsigned malloc_ncpus(void); -static void arenas_cleanup(void *arg); -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -static void thread_allocated_cleanup(void *arg); -#endif -static bool malloc_conf_next(char const **opts_p, char const **k_p, - size_t *klen_p, char const **v_p, size_t *vlen_p); -static void malloc_conf_error(const char *msg, const char *k, size_t klen, - const char *v, size_t vlen); -static void malloc_conf_init(void); -static bool malloc_init_hard(void); - -/******************************************************************************/ -/* malloc_message() setup. */ - -#ifdef JEMALLOC_HAVE_ATTR -JEMALLOC_ATTR(visibility("hidden")) -#else -static -#endif -void -wrtmessage(void *cbopaque, const char *s) -{ -#ifdef JEMALLOC_CC_SILENCE - int result = -#endif - write(STDERR_FILENO, s, strlen(s)); -#ifdef JEMALLOC_CC_SILENCE - if (result < 0) - result = errno; -#endif -} - -void (*JEMALLOC_P(malloc_message))(void *, const char *s) - JEMALLOC_ATTR(visibility("default")) = wrtmessage; - -/******************************************************************************/ -/* - * Begin miscellaneous support functions. - */ - -/* Create a new arena and insert it into the arenas array at index ind. */ -arena_t * -arenas_extend(unsigned ind) -{ - arena_t *ret; - - /* Allocate enough space for trailing bins. */ - ret = (arena_t *)base_alloc(offsetof(arena_t, bins) - + (sizeof(arena_bin_t) * nbins)); - if (ret != NULL && arena_new(ret, ind) == false) { - arenas[ind] = ret; - return (ret); - } - /* Only reached if there is an OOM error. */ - - /* - * OOM here is quite inconvenient to propagate, since dealing with it - * would require a check for failure in the fast path. Instead, punt - * by using arenas[0]. In practice, this is an extremely unlikely - * failure. - */ - malloc_write(": Error initializing arena\n"); - if (opt_abort) - abort(); - - return (arenas[0]); -} - -/* - * Choose an arena based on a per-thread value (slow-path code only, called - * only by choose_arena()). - */ -arena_t * -choose_arena_hard(void) -{ - arena_t *ret; - - if (narenas > 1) { - unsigned i, choose, first_null; - - choose = 0; - first_null = narenas; - malloc_mutex_lock(&arenas_lock); - assert(arenas[0] != NULL); - for (i = 1; i < narenas; i++) { - if (arenas[i] != NULL) { - /* - * Choose the first arena that has the lowest - * number of threads assigned to it. - */ - if (arenas[i]->nthreads < - arenas[choose]->nthreads) - choose = i; - } else if (first_null == narenas) { - /* - * Record the index of the first uninitialized - * arena, in case all extant arenas are in use. - * - * NB: It is possible for there to be - * discontinuities in terms of initialized - * versus uninitialized arenas, due to the - * "thread.arena" mallctl. - */ - first_null = i; - } - } - - if (arenas[choose] == 0 || first_null == narenas) { - /* - * Use an unloaded arena, or the least loaded arena if - * all arenas are already initialized. - */ - ret = arenas[choose]; - } else { - /* Initialize a new arena. */ - ret = arenas_extend(first_null); - } - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); - } else { - ret = arenas[0]; - malloc_mutex_lock(&arenas_lock); - ret->nthreads++; - malloc_mutex_unlock(&arenas_lock); - } - - ARENA_SET(ret); - - return (ret); -} - -/* - * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so - * provide a wrapper. - */ -int -buferror(int errnum, char *buf, size_t buflen) -{ -#ifdef _GNU_SOURCE - char *b = strerror_r(errno, buf, buflen); - if (b != buf) { - strncpy(buf, b, buflen); - buf[buflen-1] = '\0'; - } - return (0); -#else - return (strerror_r(errno, buf, buflen)); -#endif -} - -static void -stats_print_atexit(void) -{ - -#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS)) - unsigned i; - - /* - * Merge stats from extant threads. This is racy, since individual - * threads do not lock when recording tcache stats events. As a - * consequence, the final stats may be slightly out of date by the time - * they are reported, if other threads continue to allocate. - */ - for (i = 0; i < narenas; i++) { - arena_t *arena = arenas[i]; - if (arena != NULL) { - tcache_t *tcache; - - /* - * tcache_stats_merge() locks bins, so if any code is - * introduced that acquires both arena and bin locks in - * the opposite order, deadlocks may result. - */ - malloc_mutex_lock(&arena->lock); - ql_foreach(tcache, &arena->tcache_ql, link) { - tcache_stats_merge(tcache, arena); - } - malloc_mutex_unlock(&arena->lock); - } - } -#endif - JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); -} - -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -thread_allocated_t * -thread_allocated_get_hard(void) -{ - thread_allocated_t *thread_allocated = (thread_allocated_t *) - imalloc(sizeof(thread_allocated_t)); - if (thread_allocated == NULL) { - static thread_allocated_t static_thread_allocated = {0, 0}; - malloc_write(": Error allocating TSD;" - " mallctl(\"thread.{de,}allocated[p]\", ...)" - " will be inaccurate\n"); - if (opt_abort) - abort(); - return (&static_thread_allocated); - } - pthread_setspecific(thread_allocated_tsd, thread_allocated); - thread_allocated->allocated = 0; - thread_allocated->deallocated = 0; - return (thread_allocated); -} -#endif - -/* - * End miscellaneous support functions. - */ -/******************************************************************************/ -/* - * Begin initialization functions. - */ - -static unsigned -malloc_ncpus(void) -{ - unsigned ret; - long result; - - result = sysconf(_SC_NPROCESSORS_ONLN); - if (result == -1) { - /* Error. */ - ret = 1; - } - ret = (unsigned)result; - - return (ret); -} - -static void -arenas_cleanup(void *arg) -{ - arena_t *arena = (arena_t *)arg; - - malloc_mutex_lock(&arenas_lock); - arena->nthreads--; - malloc_mutex_unlock(&arenas_lock); -} - -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) -static void -thread_allocated_cleanup(void *arg) -{ - uint64_t *allocated = (uint64_t *)arg; - - if (allocated != NULL) - idalloc(allocated); -} -#endif - -/* - * FreeBSD's pthreads implementation calls malloc(3), so the malloc - * implementation has to take pains to avoid infinite recursion during - * initialization. - */ -static inline bool -malloc_init(void) -{ - - if (malloc_initialized == false) - return (malloc_init_hard()); - - return (false); -} - -static bool -malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, - char const **v_p, size_t *vlen_p) -{ - bool accept; - const char *opts = *opts_p; - - *k_p = opts; - - for (accept = false; accept == false;) { - switch (*opts) { - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'L': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': - case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '_': - opts++; - break; - case ':': - opts++; - *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; - *v_p = opts; - accept = true; - break; - case '\0': - if (opts != *opts_p) { - malloc_write(": Conf string " - "ends with key\n"); - } - return (true); - default: - malloc_write(": Malformed conf " - "string\n"); - return (true); - } - } - - for (accept = false; accept == false;) { - switch (*opts) { - case ',': - opts++; - /* - * Look ahead one character here, because the - * next time this function is called, it will - * assume that end of input has been cleanly - * reached if no input remains, but we have - * optimistically already consumed the comma if - * one exists. - */ - if (*opts == '\0') { - malloc_write(": Conf string " - "ends with comma\n"); - } - *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; - accept = true; - break; - case '\0': - *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; - accept = true; - break; - default: - opts++; - break; - } - } - - *opts_p = opts; - return (false); -} - -static void -malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, - size_t vlen) -{ - char buf[PATH_MAX + 1]; - - malloc_write(": "); - malloc_write(msg); - malloc_write(": "); - memcpy(buf, k, klen); - memcpy(&buf[klen], ":", 1); - memcpy(&buf[klen+1], v, vlen); - buf[klen+1+vlen] = '\0'; - malloc_write(buf); - malloc_write("\n"); -} - -static void -malloc_conf_init(void) -{ - unsigned i; - char buf[PATH_MAX + 1]; - const char *opts, *k, *v; - size_t klen, vlen; - - for (i = 0; i < 3; i++) { - /* Get runtime configuration. */ - switch (i) { - case 0: - if (JEMALLOC_P(malloc_conf) != NULL) { - /* - * Use options that were compiled into the - * program. - */ - opts = JEMALLOC_P(malloc_conf); - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - case 1: { - int linklen; - const char *linkname = -#ifdef JEMALLOC_PREFIX - "/etc/"JEMALLOC_PREFIX"malloc.conf" -#else - "/etc/malloc.conf" -#endif - ; - - if ((linklen = readlink(linkname, buf, - sizeof(buf) - 1)) != -1) { - /* - * Use the contents of the "/etc/malloc.conf" - * symbolic link's name. - */ - buf[linklen] = '\0'; - opts = buf; - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - } - case 2: { - const char *envname = -#ifdef JEMALLOC_PREFIX - JEMALLOC_CPREFIX"MALLOC_CONF" -#else - "MALLOC_CONF" -#endif - ; - - if ((opts = getenv(envname)) != NULL) { - /* - * Do nothing; opts is already initialized to - * the value of the MALLOC_CONF environment - * variable. - */ - } else { - /* No configuration specified. */ - buf[0] = '\0'; - opts = buf; - } - break; - } - default: - /* NOTREACHED */ - assert(false); - buf[0] = '\0'; - opts = buf; - } - - while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, - &vlen) == false) { -#define CONF_HANDLE_BOOL(n) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ - klen) == 0) { \ - if (strncmp("true", v, vlen) == 0 && \ - vlen == sizeof("true")-1) \ - opt_##n = true; \ - else if (strncmp("false", v, vlen) == \ - 0 && vlen == sizeof("false")-1) \ - opt_##n = false; \ - else { \ - malloc_conf_error( \ - "Invalid conf value", \ - k, klen, v, vlen); \ - } \ - continue; \ - } -#define CONF_HANDLE_SIZE_T(n, min, max) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ - klen) == 0) { \ - unsigned long ul; \ - char *end; \ - \ - errno = 0; \ - ul = strtoul(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ - (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ - k, klen, v, vlen); \ - } else if (ul < min || ul > max) { \ - malloc_conf_error( \ - "Out-of-range conf value", \ - k, klen, v, vlen); \ - } else \ - opt_##n = ul; \ - continue; \ - } -#define CONF_HANDLE_SSIZE_T(n, min, max) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ - klen) == 0) { \ - long l; \ - char *end; \ - \ - errno = 0; \ - l = strtol(v, &end, 0); \ - if (errno != 0 || (uintptr_t)end - \ - (uintptr_t)v != vlen) { \ - malloc_conf_error( \ - "Invalid conf value", \ - k, klen, v, vlen); \ - } else if (l < (ssize_t)min || l > \ - (ssize_t)max) { \ - malloc_conf_error( \ - "Out-of-range conf value", \ - k, klen, v, vlen); \ - } else \ - opt_##n = l; \ - continue; \ - } -#define CONF_HANDLE_CHAR_P(n, d) \ - if (sizeof(#n)-1 == klen && strncmp(#n, k, \ - klen) == 0) { \ - size_t cpylen = (vlen <= \ - sizeof(opt_##n)-1) ? vlen : \ - sizeof(opt_##n)-1; \ - strncpy(opt_##n, v, cpylen); \ - opt_##n[cpylen] = '\0'; \ - continue; \ - } - - CONF_HANDLE_BOOL(abort) - CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, - PAGE_SHIFT-1) - CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, - PAGE_SHIFT-1) - /* - * Chunks always require at least one * header page, - * plus one data page. - */ - CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX) - CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_BOOL(stats_print) -#ifdef JEMALLOC_FILL - CONF_HANDLE_BOOL(junk) - CONF_HANDLE_BOOL(zero) -#endif -#ifdef JEMALLOC_SYSV - CONF_HANDLE_BOOL(sysv) -#endif -#ifdef JEMALLOC_XMALLOC - CONF_HANDLE_BOOL(xmalloc) -#endif -#ifdef JEMALLOC_TCACHE - CONF_HANDLE_BOOL(tcache) - CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, - (sizeof(size_t) << 3) - 1) -#endif -#ifdef JEMALLOC_PROF - CONF_HANDLE_BOOL(prof) - CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") - CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX) - CONF_HANDLE_BOOL(prof_active) - CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_accum) - CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, - (sizeof(size_t) << 3) - 1) - CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, - (sizeof(uint64_t) << 3) - 1) - CONF_HANDLE_BOOL(prof_gdump) - CONF_HANDLE_BOOL(prof_leak) -#endif -#ifdef JEMALLOC_SWAP - CONF_HANDLE_BOOL(overcommit) -#endif - malloc_conf_error("Invalid conf pair", k, klen, v, - vlen); -#undef CONF_HANDLE_BOOL -#undef CONF_HANDLE_SIZE_T -#undef CONF_HANDLE_SSIZE_T -#undef CONF_HANDLE_CHAR_P - } - - /* Validate configuration of options that are inter-related. */ - if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { - malloc_write(": Invalid lg_[qc]space_max " - "relationship; restoring defaults\n"); - opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; - opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; - } - } -} - -static bool -malloc_init_hard(void) -{ - arena_t *init_arenas[1]; - - malloc_mutex_lock(&init_lock); - if (malloc_initialized || malloc_initializer == pthread_self()) { - /* - * Another thread initialized the allocator before this one - * acquired init_lock, or this thread is the initializing - * thread, and it is recursively allocating. - */ - malloc_mutex_unlock(&init_lock); - return (false); - } - if (malloc_initializer != (unsigned long)0) { - /* Busy-wait until the initializing thread completes. */ - do { - malloc_mutex_unlock(&init_lock); - CPU_SPINWAIT; - malloc_mutex_lock(&init_lock); - } while (malloc_initialized == false); - malloc_mutex_unlock(&init_lock); - return (false); - } - -#ifdef DYNAMIC_PAGE_SHIFT - /* Get page size. */ - { - long result; - - result = sysconf(_SC_PAGESIZE); - assert(result != -1); - pagesize = (unsigned)result; - - /* - * We assume that pagesize is a power of 2 when calculating - * pagesize_mask and lg_pagesize. - */ - assert(((result - 1) & result) == 0); - pagesize_mask = result - 1; - lg_pagesize = ffs((int)result) - 1; - } -#endif - -#ifdef JEMALLOC_PROF - prof_boot0(); -#endif - - malloc_conf_init(); - - /* Register fork handlers. */ - if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, - jemalloc_postfork) != 0) { - malloc_write(": Error in pthread_atfork()\n"); - if (opt_abort) - abort(); - } - - if (ctl_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (opt_stats_print) { - /* Print statistics at exit. */ - if (atexit(stats_print_atexit) != 0) { - malloc_write(": Error in atexit()\n"); - if (opt_abort) - abort(); - } - } - - if (chunk_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - if (base_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - -#ifdef JEMALLOC_PROF - prof_boot1(); -#endif - - if (arena_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - -#ifdef JEMALLOC_TCACHE - if (tcache_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } -#endif - - if (huge_boot()) { - malloc_mutex_unlock(&init_lock); - return (true); - } - -#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) - /* Initialize allocation counters before any allocations can occur. */ - if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) - != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } -#endif - - /* - * Create enough scaffolding to allow recursive allocation in - * malloc_ncpus(). - */ - narenas = 1; - arenas = init_arenas; - memset(arenas, 0, sizeof(arena_t *) * narenas); - - /* - * Initialize one arena here. The rest are lazily created in - * choose_arena_hard(). - */ - arenas_extend(0); - if (arenas[0] == NULL) { - malloc_mutex_unlock(&init_lock); - return (true); - } - - /* - * Assign the initial arena to the initial thread, in order to avoid - * spurious creation of an extra arena if the application switches to - * threaded mode. - */ - ARENA_SET(arenas[0]); - arenas[0]->nthreads++; - - if (malloc_mutex_init(&arenas_lock)) - return (true); - - if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { - malloc_mutex_unlock(&init_lock); - return (true); - } - -#ifdef JEMALLOC_PROF - if (prof_boot2()) { - malloc_mutex_unlock(&init_lock); - return (true); - } -#endif - - /* Get number of CPUs. */ - malloc_initializer = pthread_self(); - malloc_mutex_unlock(&init_lock); - ncpus = malloc_ncpus(); - malloc_mutex_lock(&init_lock); - - if (opt_narenas == 0) { - /* - * For SMP systems, create more than one arena per CPU by - * default. - */ - if (ncpus > 1) - opt_narenas = ncpus << 2; - else - opt_narenas = 1; - } - narenas = opt_narenas; - /* - * Make sure that the arenas array can be allocated. In practice, this - * limit is enough to allow the allocator to function, but the ctl - * machinery will fail to allocate memory at far lower limits. - */ - if (narenas > chunksize / sizeof(arena_t *)) { - char buf[UMAX2S_BUFSIZE]; - - narenas = chunksize / sizeof(arena_t *); - malloc_write(": Reducing narenas to limit ("); - malloc_write(u2s(narenas, 10, buf)); - malloc_write(")\n"); - } - - /* Allocate and initialize arenas. */ - arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); - if (arenas == NULL) { - malloc_mutex_unlock(&init_lock); - return (true); - } - /* - * Zero the array. In practice, this should always be pre-zeroed, - * since it was just mmap()ed, but let's be sure. - */ - memset(arenas, 0, sizeof(arena_t *) * narenas); - /* Copy the pointer to the one arena that was already initialized. */ - arenas[0] = init_arenas[0]; - -#ifdef JEMALLOC_ZONE - /* Register the custom zone. */ - malloc_zone_register(create_zone()); - - /* - * Convert the default szone to an "overlay zone" that is capable of - * deallocating szone-allocated objects, but allocating new objects - * from jemalloc. - */ - szone2ozone(malloc_default_zone()); -#endif - - malloc_initialized = true; - malloc_mutex_unlock(&init_lock); - return (false); -} - -#ifdef JEMALLOC_ZONE -JEMALLOC_ATTR(constructor) -void -jemalloc_darwin_init(void) -{ - - if (malloc_init_hard()) - abort(); -} -#endif - -/* - * End initialization functions. - */ -/******************************************************************************/ -/* - * Begin malloc(3)-compatible functions. - */ - -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(malloc)(size_t size) -{ - void *ret; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif - - if (malloc_init()) { - ret = NULL; - goto OOM; - } - - if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif - size = 1; -#ifdef JEMALLOC_SYSV - else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in malloc(): " - "invalid size 0\n"); - abort(); - } -# endif - ret = NULL; - goto RETURN; - } -#endif - } - -#ifdef JEMALLOC_PROF - if (opt_prof) { - usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { - ret = NULL; - goto OOM; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = imalloc(size); - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif - ret = imalloc(size); - } - -OOM: - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in malloc(): " - "out of memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - -#ifdef JEMALLOC_SYSV -RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { - assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, 0); - } -#endif - return (ret); -} - -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) -{ - int ret; - size_t usize -#ifdef JEMALLOC_CC_SILENCE - = 0 -#endif - ; - void *result; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif - - if (malloc_init()) - result = NULL; - else { - if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif - size = 1; -#ifdef JEMALLOC_SYSV - else { -# ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in " - "posix_memalign(): invalid size " - "0\n"); - abort(); - } -# endif - result = NULL; - *memptr = NULL; - ret = 0; - goto RETURN; - } -#endif - } - - /* Make sure that alignment is a large enough power of 2. */ - if (((alignment - 1) & alignment) != 0 - || alignment < sizeof(void *)) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in " - "posix_memalign(): invalid alignment\n"); - abort(); - } -#endif - result = NULL; - ret = EINVAL; - goto RETURN; - } - - usize = sa2u(size, alignment, NULL); - if (usize == 0) { - result = NULL; - ret = ENOMEM; - goto RETURN; - } - -#ifdef JEMALLOC_PROF - if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) { - result = NULL; - ret = EINVAL; - } else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= small_maxclass) { - assert(sa2u(small_maxclass+1, - alignment, NULL) != 0); - result = ipalloc(sa2u(small_maxclass+1, - alignment, NULL), alignment, false); - if (result != NULL) { - arena_prof_promoted(result, - usize); - } - } else { - result = ipalloc(usize, alignment, - false); - } - } - } else -#endif - result = ipalloc(usize, alignment, false); - } - - if (result == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in posix_memalign(): " - "out of memory\n"); - abort(); - } -#endif - ret = ENOMEM; - goto RETURN; - } - - *memptr = result; - ret = 0; - -RETURN: -#ifdef JEMALLOC_STATS - if (result != NULL) { - assert(usize == isalloc(result)); - ALLOCATED_ADD(usize, 0); - } -#endif -#ifdef JEMALLOC_PROF - if (opt_prof && result != NULL) - prof_malloc(result, usize, cnt); -#endif - return (ret); -} - -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(calloc)(size_t num, size_t size) -{ - void *ret; - size_t num_size; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif - - if (malloc_init()) { - num_size = 0; - ret = NULL; - goto RETURN; - } - - num_size = num * size; - if (num_size == 0) { -#ifdef JEMALLOC_SYSV - if ((opt_sysv == false) && ((num == 0) || (size == 0))) -#endif - num_size = 1; -#ifdef JEMALLOC_SYSV - else { - ret = NULL; - goto RETURN; - } -#endif - /* - * Try to avoid division here. We know that it isn't possible to - * overflow during multiplication if neither operand uses any of the - * most significant half of the bits in a size_t. - */ - } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) - && (num_size / size != num)) { - /* size_t overflow. */ - ret = NULL; - goto RETURN; - } - -#ifdef JEMALLOC_PROF - if (opt_prof) { - usize = s2u(num_size); - if ((cnt = prof_alloc_prep(usize)) == NULL) { - ret = NULL; - goto RETURN; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize - <= small_maxclass) { - ret = icalloc(small_maxclass+1); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = icalloc(num_size); - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(num_size); -#endif - ret = icalloc(num_size); - } - -RETURN: - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in calloc(): out of " - "memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - -#ifdef JEMALLOC_PROF - if (opt_prof && ret != NULL) - prof_malloc(ret, usize, cnt); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { - assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, 0); - } -#endif - return (ret); -} - -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(realloc)(void *ptr, size_t size) -{ - void *ret; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize -# ifdef JEMALLOC_CC_SILENCE - = 0 -# endif - ; - size_t old_size = 0; -#endif -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; - prof_ctx_t *old_ctx -# ifdef JEMALLOC_CC_SILENCE - = NULL -# endif - ; -#endif - - if (size == 0) { -#ifdef JEMALLOC_SYSV - if (opt_sysv == false) -#endif - size = 1; -#ifdef JEMALLOC_SYSV - else { - if (ptr != NULL) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - old_size = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { - old_ctx = prof_ctx_get(ptr); - cnt = NULL; - } -#endif - idalloc(ptr); - } -#ifdef JEMALLOC_PROF - else if (opt_prof) { - old_ctx = NULL; - cnt = NULL; - } -#endif - ret = NULL; - goto RETURN; - } -#endif - } - - if (ptr != NULL) { - assert(malloc_initialized || malloc_initializer == - pthread_self()); - -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - old_size = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { - usize = s2u(size); - old_ctx = prof_ctx_get(ptr); - if ((cnt = prof_alloc_prep(usize)) == NULL) { - ret = NULL; - goto OOM; - } - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && - usize <= small_maxclass) { - ret = iralloc(ptr, small_maxclass+1, 0, 0, - false, false); - if (ret != NULL) - arena_prof_promoted(ret, usize); - } else - ret = iralloc(ptr, size, 0, 0, false, false); - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif - ret = iralloc(ptr, size, 0, 0, false, false); - } - -#ifdef JEMALLOC_PROF -OOM: -#endif - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - } else { -#ifdef JEMALLOC_PROF - if (opt_prof) - old_ctx = NULL; -#endif - if (malloc_init()) { -#ifdef JEMALLOC_PROF - if (opt_prof) - cnt = NULL; -#endif - ret = NULL; - } else { -#ifdef JEMALLOC_PROF - if (opt_prof) { - usize = s2u(size); - if ((cnt = prof_alloc_prep(usize)) == NULL) - ret = NULL; - else { - if (prof_promote && (uintptr_t)cnt != - (uintptr_t)1U && usize <= - small_maxclass) { - ret = imalloc(small_maxclass+1); - if (ret != NULL) { - arena_prof_promoted(ret, - usize); - } - } else - ret = imalloc(size); - } - } else -#endif - { -#ifdef JEMALLOC_STATS - usize = s2u(size); -#endif - ret = imalloc(size); - } - } - - if (ret == NULL) { -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in realloc(): " - "out of memory\n"); - abort(); - } -#endif - errno = ENOMEM; - } - } - -#ifdef JEMALLOC_SYSV -RETURN: -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) - prof_realloc(ret, usize, cnt, old_size, old_ctx); -#endif -#ifdef JEMALLOC_STATS - if (ret != NULL) { - assert(usize == isalloc(ret)); - ALLOCATED_ADD(usize, old_size); - } -#endif - return (ret); -} - -JEMALLOC_ATTR(visibility("default")) -void -JEMALLOC_P(free)(void *ptr) -{ - - if (ptr != NULL) { -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize; -#endif - - assert(malloc_initialized || malloc_initializer == - pthread_self()); - -#ifdef JEMALLOC_STATS - usize = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { -# ifndef JEMALLOC_STATS - usize = isalloc(ptr); -# endif - prof_free(ptr, usize); - } -#endif -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, usize); -#endif - idalloc(ptr); - } -} - -/* - * End malloc(3)-compatible functions. - */ -/******************************************************************************/ -/* - * Begin non-standard override functions. - * - * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the - * entire point is to avoid accidental mixed allocator usage. - */ -#ifndef JEMALLOC_PREFIX - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(memalign)(size_t alignment, size_t size) -{ - void *ret; -#ifdef JEMALLOC_CC_SILENCE - int result = -#endif - JEMALLOC_P(posix_memalign)(&ret, alignment, size); -#ifdef JEMALLOC_CC_SILENCE - if (result != 0) - return (NULL); -#endif - return (ret); -} -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_ATTR(malloc) -JEMALLOC_ATTR(visibility("default")) -void * -JEMALLOC_P(valloc)(size_t size) -{ - void *ret; -#ifdef JEMALLOC_CC_SILENCE - int result = -#endif - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); -#ifdef JEMALLOC_CC_SILENCE - if (result != 0) - return (NULL); -#endif - return (ret); -} -#endif - -#endif /* JEMALLOC_PREFIX */ -/* - * End non-standard override functions. - */ -/******************************************************************************/ -/* - * Begin non-standard functions. - */ - -JEMALLOC_ATTR(visibility("default")) -size_t -JEMALLOC_P(malloc_usable_size)(const void *ptr) -{ - size_t ret; - - assert(malloc_initialized || malloc_initializer == pthread_self()); - -#ifdef JEMALLOC_IVSALLOC - ret = ivsalloc(ptr); -#else - assert(ptr != NULL); - ret = isalloc(ptr); -#endif - - return (ret); -} - -JEMALLOC_ATTR(visibility("default")) -void -JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), - void *cbopaque, const char *opts) -{ - - stats_print(write_cb, cbopaque, opts); -} - -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, - size_t newlen) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_byname(name, oldp, oldlenp, newp, newlen)); -} - -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_nametomib(name, mibp, miblenp)); -} - -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, - size_t *oldlenp, void *newp, size_t newlen) -{ - - if (malloc_init()) - return (EAGAIN); - - return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); -} - -JEMALLOC_INLINE void * -iallocm(size_t usize, size_t alignment, bool zero) -{ - - assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, - NULL))); - - if (alignment != 0) - return (ipalloc(usize, alignment, zero)); - else if (zero) - return (icalloc(usize)); - else - return (imalloc(usize)); -} - -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) -{ - void *p; - size_t usize; - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; -#endif - - assert(ptr != NULL); - assert(size != 0); - - if (malloc_init()) - goto OOM; - - usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, - NULL); - if (usize == 0) - goto OOM; - -#ifdef JEMALLOC_PROF - if (opt_prof) { - if ((cnt = prof_alloc_prep(usize)) == NULL) - goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= - small_maxclass) { - size_t usize_promoted = (alignment == 0) ? - s2u(small_maxclass+1) : sa2u(small_maxclass+1, - alignment, NULL); - assert(usize_promoted != 0); - p = iallocm(usize_promoted, alignment, zero); - if (p == NULL) - goto OOM; - arena_prof_promoted(p, usize); - } else { - p = iallocm(usize, alignment, zero); - if (p == NULL) - goto OOM; - } - - if (rsize != NULL) - *rsize = usize; - } else -#endif - { - p = iallocm(usize, alignment, zero); - if (p == NULL) - goto OOM; -#ifndef JEMALLOC_STATS - if (rsize != NULL) -#endif - { -#ifdef JEMALLOC_STATS - if (rsize != NULL) -#endif - *rsize = usize; - } - } - - *ptr = p; -#ifdef JEMALLOC_STATS - assert(usize == isalloc(p)); - ALLOCATED_ADD(usize, 0); -#endif - return (ALLOCM_SUCCESS); -OOM: -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in allocm(): " - "out of memory\n"); - abort(); - } -#endif - *ptr = NULL; - return (ALLOCM_ERR_OOM); -} - -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, - int flags) -{ - void *p, *q; - size_t usize; -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t old_size; -#endif - size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) - & (SIZE_T_MAX-1)); - bool zero = flags & ALLOCM_ZERO; - bool no_move = flags & ALLOCM_NO_MOVE; -#ifdef JEMALLOC_PROF - prof_thr_cnt_t *cnt; - prof_ctx_t *old_ctx; -#endif - - assert(ptr != NULL); - assert(*ptr != NULL); - assert(size != 0); - assert(SIZE_T_MAX - size >= extra); - assert(malloc_initialized || malloc_initializer == pthread_self()); - - p = *ptr; -#ifdef JEMALLOC_PROF - if (opt_prof) { - /* - * usize isn't knowable before iralloc() returns when extra is - * non-zero. Therefore, compute its maximum possible value and - * use that in prof_alloc_prep() to decide whether to capture a - * backtrace. prof_realloc() will use the actual usize to - * decide whether to sample. - */ - size_t max_usize = (alignment == 0) ? s2u(size+extra) : - sa2u(size+extra, alignment, NULL); - old_size = isalloc(p); - old_ctx = prof_ctx_get(p); - if ((cnt = prof_alloc_prep(max_usize)) == NULL) - goto OOM; - if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize - <= small_maxclass) { - q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= - size+extra) ? 0 : size+extra - (small_maxclass+1), - alignment, zero, no_move); - if (q == NULL) - goto ERR; - usize = isalloc(q); - arena_prof_promoted(q, usize); - } else { - q = iralloc(p, size, extra, alignment, zero, no_move); - if (q == NULL) - goto ERR; - usize = isalloc(q); - } - prof_realloc(q, usize, cnt, old_size, old_ctx); - if (rsize != NULL) - *rsize = usize; - } else -#endif - { -#ifdef JEMALLOC_STATS - old_size = isalloc(p); -#endif - q = iralloc(p, size, extra, alignment, zero, no_move); - if (q == NULL) - goto ERR; -#ifndef JEMALLOC_STATS - if (rsize != NULL) -#endif - { - usize = isalloc(q); -#ifdef JEMALLOC_STATS - if (rsize != NULL) -#endif - *rsize = usize; - } - } - - *ptr = q; -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(usize, old_size); -#endif - return (ALLOCM_SUCCESS); -ERR: - if (no_move) - return (ALLOCM_ERR_NOT_MOVED); -#ifdef JEMALLOC_PROF -OOM: -#endif -#ifdef JEMALLOC_XMALLOC - if (opt_xmalloc) { - malloc_write(": Error in rallocm(): " - "out of memory\n"); - abort(); - } -#endif - return (ALLOCM_ERR_OOM); -} - -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) -{ - size_t sz; - - assert(malloc_initialized || malloc_initializer == pthread_self()); - -#ifdef JEMALLOC_IVSALLOC - sz = ivsalloc(ptr); -#else - assert(ptr != NULL); - sz = isalloc(ptr); -#endif - assert(rsize != NULL); - *rsize = sz; - - return (ALLOCM_SUCCESS); -} - -JEMALLOC_ATTR(nonnull(1)) -JEMALLOC_ATTR(visibility("default")) -int -JEMALLOC_P(dallocm)(void *ptr, int flags) -{ -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - size_t usize; -#endif - - assert(ptr != NULL); - assert(malloc_initialized || malloc_initializer == pthread_self()); - -#ifdef JEMALLOC_STATS - usize = isalloc(ptr); -#endif -#ifdef JEMALLOC_PROF - if (opt_prof) { -# ifndef JEMALLOC_STATS - usize = isalloc(ptr); -# endif - prof_free(ptr, usize); - } -#endif -#ifdef JEMALLOC_STATS - ALLOCATED_ADD(0, usize); -#endif - idalloc(ptr); - - return (ALLOCM_SUCCESS); -} - -/* - * End non-standard functions. - */ -/******************************************************************************/ - -/* - * The following functions are used by threading libraries for protection of - * malloc during fork(). - */ - -void -jemalloc_prefork(void) -{ - unsigned i; - - /* Acquire all mutexes in a safe order. */ - - malloc_mutex_lock(&arenas_lock); - for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) - malloc_mutex_lock(&arenas[i]->lock); - } - - malloc_mutex_lock(&base_mtx); - - malloc_mutex_lock(&huge_mtx); - -#ifdef JEMALLOC_DSS - malloc_mutex_lock(&dss_mtx); -#endif - -#ifdef JEMALLOC_SWAP - malloc_mutex_lock(&swap_mtx); -#endif -} - -void -jemalloc_postfork(void) -{ - unsigned i; - - /* Release all mutexes, now that fork() has completed. */ - -#ifdef JEMALLOC_SWAP - malloc_mutex_unlock(&swap_mtx); -#endif - -#ifdef JEMALLOC_DSS - malloc_mutex_unlock(&dss_mtx); -#endif - - malloc_mutex_unlock(&huge_mtx); - - malloc_mutex_unlock(&base_mtx); - - for (i = 0; i < narenas; i++) { - if (arenas[i] != NULL) - malloc_mutex_unlock(&arenas[i]->lock); - } - malloc_mutex_unlock(&arenas_lock); -} - -/******************************************************************************/ diff --git a/jemalloc/src/mb.c b/jemalloc/src/mb.c deleted file mode 100644 index dc2c0a2..0000000 --- a/jemalloc/src/mb.c +++ /dev/null @@ -1,2 +0,0 @@ -#define JEMALLOC_MB_C_ -#include "jemalloc/internal/jemalloc_internal.h" diff --git a/jemalloc/src/mutex.c b/jemalloc/src/mutex.c deleted file mode 100644 index ca89ef1..0000000 --- a/jemalloc/src/mutex.c +++ /dev/null @@ -1,90 +0,0 @@ -#define JEMALLOC_MUTEX_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -/******************************************************************************/ -/* Data. */ - -#ifdef JEMALLOC_LAZY_LOCK -bool isthreaded = false; -#endif - -#ifdef JEMALLOC_LAZY_LOCK -static void pthread_create_once(void); -#endif - -/******************************************************************************/ -/* - * We intercept pthread_create() calls in order to toggle isthreaded if the - * process goes multi-threaded. - */ - -#ifdef JEMALLOC_LAZY_LOCK -static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, - void *(*)(void *), void *__restrict); - -static void -pthread_create_once(void) -{ - - pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); - if (pthread_create_fptr == NULL) { - malloc_write(": Error in dlsym(RTLD_NEXT, " - "\"pthread_create\")\n"); - abort(); - } - - isthreaded = true; -} - -JEMALLOC_ATTR(visibility("default")) -int -pthread_create(pthread_t *__restrict thread, - const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), - void *__restrict arg) -{ - static pthread_once_t once_control = PTHREAD_ONCE_INIT; - - pthread_once(&once_control, pthread_create_once); - - return (pthread_create_fptr(thread, attr, start_routine, arg)); -} -#endif - -/******************************************************************************/ - -bool -malloc_mutex_init(malloc_mutex_t *mutex) -{ -#ifdef JEMALLOC_OSSPIN - *mutex = 0; -#else - pthread_mutexattr_t attr; - - if (pthread_mutexattr_init(&attr) != 0) - return (true); -#ifdef PTHREAD_MUTEX_ADAPTIVE_NP - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); -#else - pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); -#endif - if (pthread_mutex_init(mutex, &attr) != 0) { - pthread_mutexattr_destroy(&attr); - return (true); - } - pthread_mutexattr_destroy(&attr); - -#endif - return (false); -} - -void -malloc_mutex_destroy(malloc_mutex_t *mutex) -{ - -#ifndef JEMALLOC_OSSPIN - if (pthread_mutex_destroy(mutex) != 0) { - malloc_write(": Error in pthread_mutex_destroy()\n"); - abort(); - } -#endif -} diff --git a/jemalloc/src/prof.c b/jemalloc/src/prof.c deleted file mode 100644 index 8370042..0000000 --- a/jemalloc/src/prof.c +++ /dev/null @@ -1,1243 +0,0 @@ -#define JEMALLOC_PROF_C_ -#include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_PROF -/******************************************************************************/ - -#ifdef JEMALLOC_PROF_LIBUNWIND -#define UNW_LOCAL_ONLY -#include -#endif - -#ifdef JEMALLOC_PROF_LIBGCC -#include -#endif - -/******************************************************************************/ -/* Data. */ - -bool opt_prof = false; -bool opt_prof_active = true; -size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; -size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; -ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; -bool opt_prof_gdump = false; -bool opt_prof_leak = false; -bool opt_prof_accum = true; -ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT; -char opt_prof_prefix[PATH_MAX + 1]; - -uint64_t prof_interval; -bool prof_promote; - -unsigned prof_bt_max; - -#ifndef NO_TLS -__thread prof_tdata_t *prof_tdata_tls - JEMALLOC_ATTR(tls_model("initial-exec")); -#endif -pthread_key_t prof_tdata_tsd; - -/* - * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data - * structure that knows about all backtraces currently captured. - */ -static ckh_t bt2ctx; -static malloc_mutex_t bt2ctx_mtx; - -static malloc_mutex_t prof_dump_seq_mtx; -static uint64_t prof_dump_seq; -static uint64_t prof_dump_iseq; -static uint64_t prof_dump_mseq; -static uint64_t prof_dump_useq; - -/* - * This buffer is rather large for stack allocation, so use a single buffer for - * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since - * it must be locked anyway during dumping. - */ -static char prof_dump_buf[PROF_DUMP_BUF_SIZE]; -static unsigned prof_dump_buf_end; -static int prof_dump_fd; - -/* Do not dump any profiles until bootstrapping is complete. */ -static bool prof_booted = false; - -static malloc_mutex_t enq_mtx; -static bool enq; -static bool enq_idump; -static bool enq_gdump; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static prof_bt_t *bt_dup(prof_bt_t *bt); -static void bt_destroy(prof_bt_t *bt); -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code prof_unwind_init_callback( - struct _Unwind_Context *context, void *arg); -static _Unwind_Reason_Code prof_unwind_callback( - struct _Unwind_Context *context, void *arg); -#endif -static bool prof_flush(bool propagate_err); -static bool prof_write(const char *s, bool propagate_err); -static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, - size_t *leak_nctx); -static void prof_ctx_destroy(prof_ctx_t *ctx); -static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); -static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, - bool propagate_err); -static bool prof_dump_maps(bool propagate_err); -static bool prof_dump(const char *filename, bool leakcheck, - bool propagate_err); -static void prof_dump_filename(char *filename, char v, int64_t vseq); -static void prof_fdump(void); -static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, - size_t *hash2); -static bool prof_bt_keycomp(const void *k1, const void *k2); -static void prof_tdata_cleanup(void *arg); - -/******************************************************************************/ - -void -bt_init(prof_bt_t *bt, void **vec) -{ - - bt->vec = vec; - bt->len = 0; -} - -static void -bt_destroy(prof_bt_t *bt) -{ - - idalloc(bt); -} - -static prof_bt_t * -bt_dup(prof_bt_t *bt) -{ - prof_bt_t *ret; - - /* - * Create a single allocation that has space for vec immediately - * following the prof_bt_t structure. The backtraces that get - * stored in the backtrace caches are copied from stack-allocated - * temporary variables, so size is known at creation time. Making this - * a contiguous object improves cache locality. - */ - ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + - (bt->len * sizeof(void *))); - if (ret == NULL) - return (NULL); - ret->vec = (void **)((uintptr_t)ret + - QUANTUM_CEILING(sizeof(prof_bt_t))); - memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); - ret->len = bt->len; - - return (ret); -} - -static inline void -prof_enter(void) -{ - - malloc_mutex_lock(&enq_mtx); - enq = true; - malloc_mutex_unlock(&enq_mtx); - - malloc_mutex_lock(&bt2ctx_mtx); -} - -static inline void -prof_leave(void) -{ - bool idump, gdump; - - malloc_mutex_unlock(&bt2ctx_mtx); - - malloc_mutex_lock(&enq_mtx); - enq = false; - idump = enq_idump; - enq_idump = false; - gdump = enq_gdump; - enq_gdump = false; - malloc_mutex_unlock(&enq_mtx); - - if (idump) - prof_idump(); - if (gdump) - prof_gdump(); -} - -#ifdef JEMALLOC_PROF_LIBUNWIND -void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) -{ - unw_context_t uc; - unw_cursor_t cursor; - unsigned i; - int err; - - assert(bt->len == 0); - assert(bt->vec != NULL); - assert(max <= (1U << opt_lg_prof_bt_max)); - - unw_getcontext(&uc); - unw_init_local(&cursor, &uc); - - /* Throw away (nignore+1) stack frames, if that many exist. */ - for (i = 0; i < nignore + 1; i++) { - err = unw_step(&cursor); - if (err <= 0) - return; - } - - /* - * Iterate over stack frames until there are no more, or until no space - * remains in bt. - */ - for (i = 0; i < max; i++) { - unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); - bt->len++; - err = unw_step(&cursor); - if (err <= 0) - break; - } -} -#endif -#ifdef JEMALLOC_PROF_LIBGCC -static _Unwind_Reason_Code -prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) -{ - - return (_URC_NO_REASON); -} - -static _Unwind_Reason_Code -prof_unwind_callback(struct _Unwind_Context *context, void *arg) -{ - prof_unwind_data_t *data = (prof_unwind_data_t *)arg; - - if (data->nignore > 0) - data->nignore--; - else { - data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); - data->bt->len++; - if (data->bt->len == data->max) - return (_URC_END_OF_STACK); - } - - return (_URC_NO_REASON); -} - -void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) -{ - prof_unwind_data_t data = {bt, nignore, max}; - - _Unwind_Backtrace(prof_unwind_callback, &data); -} -#endif -#ifdef JEMALLOC_PROF_GCC -void -prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) -{ -#define BT_FRAME(i) \ - if ((i) < nignore + max) { \ - void *p; \ - if (__builtin_frame_address(i) == 0) \ - return; \ - p = __builtin_return_address(i); \ - if (p == NULL) \ - return; \ - if (i >= nignore) { \ - bt->vec[(i) - nignore] = p; \ - bt->len = (i) - nignore + 1; \ - } \ - } else \ - return; - - assert(nignore <= 3); - assert(max <= (1U << opt_lg_prof_bt_max)); - - BT_FRAME(0) - BT_FRAME(1) - BT_FRAME(2) - BT_FRAME(3) - BT_FRAME(4) - BT_FRAME(5) - BT_FRAME(6) - BT_FRAME(7) - BT_FRAME(8) - BT_FRAME(9) - - BT_FRAME(10) - BT_FRAME(11) - BT_FRAME(12) - BT_FRAME(13) - BT_FRAME(14) - BT_FRAME(15) - BT_FRAME(16) - BT_FRAME(17) - BT_FRAME(18) - BT_FRAME(19) - - BT_FRAME(20) - BT_FRAME(21) - BT_FRAME(22) - BT_FRAME(23) - BT_FRAME(24) - BT_FRAME(25) - BT_FRAME(26) - BT_FRAME(27) - BT_FRAME(28) - BT_FRAME(29) - - BT_FRAME(30) - BT_FRAME(31) - BT_FRAME(32) - BT_FRAME(33) - BT_FRAME(34) - BT_FRAME(35) - BT_FRAME(36) - BT_FRAME(37) - BT_FRAME(38) - BT_FRAME(39) - - BT_FRAME(40) - BT_FRAME(41) - BT_FRAME(42) - BT_FRAME(43) - BT_FRAME(44) - BT_FRAME(45) - BT_FRAME(46) - BT_FRAME(47) - BT_FRAME(48) - BT_FRAME(49) - - BT_FRAME(50) - BT_FRAME(51) - BT_FRAME(52) - BT_FRAME(53) - BT_FRAME(54) - BT_FRAME(55) - BT_FRAME(56) - BT_FRAME(57) - BT_FRAME(58) - BT_FRAME(59) - - BT_FRAME(60) - BT_FRAME(61) - BT_FRAME(62) - BT_FRAME(63) - BT_FRAME(64) - BT_FRAME(65) - BT_FRAME(66) - BT_FRAME(67) - BT_FRAME(68) - BT_FRAME(69) - - BT_FRAME(70) - BT_FRAME(71) - BT_FRAME(72) - BT_FRAME(73) - BT_FRAME(74) - BT_FRAME(75) - BT_FRAME(76) - BT_FRAME(77) - BT_FRAME(78) - BT_FRAME(79) - - BT_FRAME(80) - BT_FRAME(81) - BT_FRAME(82) - BT_FRAME(83) - BT_FRAME(84) - BT_FRAME(85) - BT_FRAME(86) - BT_FRAME(87) - BT_FRAME(88) - BT_FRAME(89) - - BT_FRAME(90) - BT_FRAME(91) - BT_FRAME(92) - BT_FRAME(93) - BT_FRAME(94) - BT_FRAME(95) - BT_FRAME(96) - BT_FRAME(97) - BT_FRAME(98) - BT_FRAME(99) - - BT_FRAME(100) - BT_FRAME(101) - BT_FRAME(102) - BT_FRAME(103) - BT_FRAME(104) - BT_FRAME(105) - BT_FRAME(106) - BT_FRAME(107) - BT_FRAME(108) - BT_FRAME(109) - - BT_FRAME(110) - BT_FRAME(111) - BT_FRAME(112) - BT_FRAME(113) - BT_FRAME(114) - BT_FRAME(115) - BT_FRAME(116) - BT_FRAME(117) - BT_FRAME(118) - BT_FRAME(119) - - BT_FRAME(120) - BT_FRAME(121) - BT_FRAME(122) - BT_FRAME(123) - BT_FRAME(124) - BT_FRAME(125) - BT_FRAME(126) - BT_FRAME(127) - - /* Extras to compensate for nignore. */ - BT_FRAME(128) - BT_FRAME(129) - BT_FRAME(130) -#undef BT_FRAME -} -#endif - -prof_thr_cnt_t * -prof_lookup(prof_bt_t *bt) -{ - union { - prof_thr_cnt_t *p; - void *v; - } ret; - prof_tdata_t *prof_tdata; - - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata == NULL) { - prof_tdata = prof_tdata_init(); - if (prof_tdata == NULL) - return (NULL); - } - - if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { - union { - prof_bt_t *p; - void *v; - } btkey; - union { - prof_ctx_t *p; - void *v; - } ctx; - bool new_ctx; - - /* - * This thread's cache lacks bt. Look for it in the global - * cache. - */ - prof_enter(); - if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { - /* bt has never been seen before. Insert it. */ - ctx.v = imalloc(sizeof(prof_ctx_t)); - if (ctx.v == NULL) { - prof_leave(); - return (NULL); - } - btkey.p = bt_dup(bt); - if (btkey.v == NULL) { - prof_leave(); - idalloc(ctx.v); - return (NULL); - } - ctx.p->bt = btkey.p; - if (malloc_mutex_init(&ctx.p->lock)) { - prof_leave(); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } - memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); - ql_new(&ctx.p->cnts_ql); - if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { - /* OOM. */ - prof_leave(); - malloc_mutex_destroy(&ctx.p->lock); - idalloc(btkey.v); - idalloc(ctx.v); - return (NULL); - } - /* - * Artificially raise curobjs, in order to avoid a race - * condition with prof_ctx_merge()/prof_ctx_destroy(). - */ - ctx.p->cnt_merged.curobjs++; - new_ctx = true; - } else - new_ctx = false; - prof_leave(); - - /* Link a prof_thd_cnt_t into ctx for this thread. */ - if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt) - == (ZU(1) << opt_lg_prof_tcmax)) { - assert(ckh_count(&prof_tdata->bt2cnt) > 0); - /* - * Flush the least recently used cnt in order to keep - * bt2cnt from becoming too large. - */ - ret.p = ql_last(&prof_tdata->lru_ql, lru_link); - assert(ret.v != NULL); - ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, - NULL); - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - prof_ctx_merge(ret.p->ctx, ret.p); - /* ret can now be re-used. */ - } else { - assert(opt_lg_prof_tcmax < 0 || - ckh_count(&prof_tdata->bt2cnt) < (ZU(1) << - opt_lg_prof_tcmax)); - /* Allocate and partially initialize a new cnt. */ - ret.v = imalloc(sizeof(prof_thr_cnt_t)); - if (ret.p == NULL) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } - return (NULL); - } - ql_elm_new(ret.p, cnts_link); - ql_elm_new(ret.p, lru_link); - } - /* Finish initializing ret. */ - ret.p->ctx = ctx.p; - ret.p->epoch = 0; - memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); - if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { - if (new_ctx) { - malloc_mutex_lock(&ctx.p->lock); - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } - idalloc(ret.v); - return (NULL); - } - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - malloc_mutex_lock(&ctx.p->lock); - ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); - if (new_ctx) - ctx.p->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx.p->lock); - } else { - /* Move ret to the front of the LRU. */ - ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); - ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); - } - - return (ret.p); -} - -static bool -prof_flush(bool propagate_err) -{ - bool ret = false; - ssize_t err; - - err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); - if (err == -1) { - if (propagate_err == false) { - malloc_write(": write() failed during heap " - "profile flush\n"); - if (opt_abort) - abort(); - } - ret = true; - } - prof_dump_buf_end = 0; - - return (ret); -} - -static bool -prof_write(const char *s, bool propagate_err) -{ - unsigned i, slen, n; - - i = 0; - slen = strlen(s); - while (i < slen) { - /* Flush the buffer if it is full. */ - if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) - if (prof_flush(propagate_err) && propagate_err) - return (true); - - if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) { - /* Finish writing. */ - n = slen - i; - } else { - /* Write as much of s as will fit. */ - n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end; - } - memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); - prof_dump_buf_end += n; - i += n; - } - - return (false); -} - -static void -prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) -{ - prof_thr_cnt_t *thr_cnt; - prof_cnt_t tcnt; - - malloc_mutex_lock(&ctx->lock); - - memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); - ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { - volatile unsigned *epoch = &thr_cnt->epoch; - - while (true) { - unsigned epoch0 = *epoch; - - /* Make sure epoch is even. */ - if (epoch0 & 1U) - continue; - - memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); - - /* Terminate if epoch didn't change while reading. */ - if (*epoch == epoch0) - break; - } - - ctx->cnt_summed.curobjs += tcnt.curobjs; - ctx->cnt_summed.curbytes += tcnt.curbytes; - if (opt_prof_accum) { - ctx->cnt_summed.accumobjs += tcnt.accumobjs; - ctx->cnt_summed.accumbytes += tcnt.accumbytes; - } - } - - if (ctx->cnt_summed.curobjs != 0) - (*leak_nctx)++; - - /* Add to cnt_all. */ - cnt_all->curobjs += ctx->cnt_summed.curobjs; - cnt_all->curbytes += ctx->cnt_summed.curbytes; - if (opt_prof_accum) { - cnt_all->accumobjs += ctx->cnt_summed.accumobjs; - cnt_all->accumbytes += ctx->cnt_summed.accumbytes; - } - - malloc_mutex_unlock(&ctx->lock); -} - -static void -prof_ctx_destroy(prof_ctx_t *ctx) -{ - - /* - * Check that ctx is still unused by any thread cache before destroying - * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to - * avoid a race condition with this function, and prof_ctx_merge() - * artificially raises ctx->cnt_merged.curobjs in order to avoid a race - * between the main body of prof_ctx_merge() and entry into this - * function. - */ - prof_enter(); - malloc_mutex_lock(&ctx->lock); - if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { - assert(ctx->cnt_merged.curbytes == 0); - assert(ctx->cnt_merged.accumobjs == 0); - assert(ctx->cnt_merged.accumbytes == 0); - /* Remove ctx from bt2ctx. */ - ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); - prof_leave(); - /* Destroy ctx. */ - malloc_mutex_unlock(&ctx->lock); - bt_destroy(ctx->bt); - malloc_mutex_destroy(&ctx->lock); - idalloc(ctx); - } else { - /* Compensate for increment in prof_ctx_merge(). */ - ctx->cnt_merged.curobjs--; - malloc_mutex_unlock(&ctx->lock); - prof_leave(); - } -} - -static void -prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) -{ - bool destroy; - - /* Merge cnt stats and detach from ctx. */ - malloc_mutex_lock(&ctx->lock); - ctx->cnt_merged.curobjs += cnt->cnts.curobjs; - ctx->cnt_merged.curbytes += cnt->cnts.curbytes; - ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; - ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; - ql_remove(&ctx->cnts_ql, cnt, cnts_link); - if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && - ctx->cnt_merged.curobjs == 0) { - /* - * Artificially raise ctx->cnt_merged.curobjs in order to keep - * another thread from winning the race to destroy ctx while - * this one has ctx->lock dropped. Without this, it would be - * possible for another thread to: - * - * 1) Sample an allocation associated with ctx. - * 2) Deallocate the sampled object. - * 3) Successfully prof_ctx_destroy(ctx). - * - * The result would be that ctx no longer exists by the time - * this thread accesses it in prof_ctx_destroy(). - */ - ctx->cnt_merged.curobjs++; - destroy = true; - } else - destroy = false; - malloc_mutex_unlock(&ctx->lock); - if (destroy) - prof_ctx_destroy(ctx); -} - -static bool -prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) -{ - char buf[UMAX2S_BUFSIZE]; - unsigned i; - - if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { - assert(ctx->cnt_summed.curbytes == 0); - assert(ctx->cnt_summed.accumobjs == 0); - assert(ctx->cnt_summed.accumbytes == 0); - return (false); - } - - if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf), - propagate_err) - || prof_write(" [", propagate_err) - || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf), - propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf), - propagate_err) - || prof_write("] @", propagate_err)) - return (true); - - for (i = 0; i < bt->len; i++) { - if (prof_write(" 0x", propagate_err) - || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf), - propagate_err)) - return (true); - } - - if (prof_write("\n", propagate_err)) - return (true); - - return (false); -} - -static bool -prof_dump_maps(bool propagate_err) -{ - int mfd; - char buf[UMAX2S_BUFSIZE]; - char *s; - unsigned i, slen; - /* /proc//maps\0 */ - char mpath[6 + UMAX2S_BUFSIZE - + 5 + 1]; - - i = 0; - - s = "/proc/"; - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - s = u2s(getpid(), 10, buf); - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - s = "/maps"; - slen = strlen(s); - memcpy(&mpath[i], s, slen); - i += slen; - - mpath[i] = '\0'; - - mfd = open(mpath, O_RDONLY); - if (mfd != -1) { - ssize_t nread; - - if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) && - propagate_err) - return (true); - nread = 0; - do { - prof_dump_buf_end += nread; - if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) { - /* Make space in prof_dump_buf before read(). */ - if (prof_flush(propagate_err) && propagate_err) - return (true); - } - nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], - PROF_DUMP_BUF_SIZE - prof_dump_buf_end); - } while (nread > 0); - close(mfd); - } else - return (true); - - return (false); -} - -static bool -prof_dump(const char *filename, bool leakcheck, bool propagate_err) -{ - prof_cnt_t cnt_all; - size_t tabind; - union { - prof_bt_t *p; - void *v; - } bt; - union { - prof_ctx_t *p; - void *v; - } ctx; - char buf[UMAX2S_BUFSIZE]; - size_t leak_nctx; - - prof_enter(); - prof_dump_fd = creat(filename, 0644); - if (prof_dump_fd == -1) { - if (propagate_err == false) { - malloc_write(": creat(\""); - malloc_write(filename); - malloc_write("\", 0644) failed\n"); - if (opt_abort) - abort(); - } - goto ERROR; - } - - /* Merge per thread profile stats, and sum them in cnt_all. */ - memset(&cnt_all, 0, sizeof(prof_cnt_t)); - leak_nctx = 0; - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) - prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); - - /* Dump profile header. */ - if (prof_write("heap profile: ", propagate_err) - || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err) - || prof_write(" [", propagate_err) - || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err) - || prof_write(": ", propagate_err) - || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err)) - goto ERROR; - - if (opt_lg_prof_sample == 0) { - if (prof_write("] @ heapprofile\n", propagate_err)) - goto ERROR; - } else { - if (prof_write("] @ heap_v2/", propagate_err) - || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10, - buf), propagate_err) - || prof_write("\n", propagate_err)) - goto ERROR; - } - - /* Dump per ctx profile stats. */ - for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) - == false;) { - if (prof_dump_ctx(ctx.p, bt.p, propagate_err)) - goto ERROR; - } - - /* Dump /proc//maps if possible. */ - if (prof_dump_maps(propagate_err)) - goto ERROR; - - if (prof_flush(propagate_err)) - goto ERROR; - close(prof_dump_fd); - prof_leave(); - - if (leakcheck && cnt_all.curbytes != 0) { - malloc_write(": Leak summary: "); - malloc_write(u2s(cnt_all.curbytes, 10, buf)); - malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); - malloc_write(u2s(cnt_all.curobjs, 10, buf)); - malloc_write((cnt_all.curobjs != 1) ? " objects, " : - " object, "); - malloc_write(u2s(leak_nctx, 10, buf)); - malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); - malloc_write(": Run pprof on \""); - malloc_write(filename); - malloc_write("\" for leak detail\n"); - } - - return (false); -ERROR: - prof_leave(); - return (true); -} - -#define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \ - + 1 \ - + UMAX2S_BUFSIZE \ - + 2 \ - + UMAX2S_BUFSIZE \ - + 5 + 1) -static void -prof_dump_filename(char *filename, char v, int64_t vseq) -{ - char buf[UMAX2S_BUFSIZE]; - char *s; - unsigned i, slen; - - /* - * Construct a filename of the form: - * - * ...v.heap\0 - */ - - i = 0; - - s = opt_prof_prefix; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = u2s(getpid(), 10, buf); - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = u2s(prof_dump_seq, 10, buf); - prof_dump_seq++; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - s = "."; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - filename[i] = v; - i++; - - if (vseq != 0xffffffffffffffffLLU) { - s = u2s(vseq, 10, buf); - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - } - - s = ".heap"; - slen = strlen(s); - memcpy(&filename[i], s, slen); - i += slen; - - filename[i] = '\0'; -} - -static void -prof_fdump(void) -{ - char filename[DUMP_FILENAME_BUFSIZE]; - - if (prof_booted == false) - return; - - if (opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, opt_prof_leak, false); - } -} - -void -prof_idump(void) -{ - char filename[DUMP_FILENAME_BUFSIZE]; - - if (prof_booted == false) - return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_idump = true; - malloc_mutex_unlock(&enq_mtx); - return; - } - malloc_mutex_unlock(&enq_mtx); - - if (opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'i', prof_dump_iseq); - prof_dump_iseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); - } -} - -bool -prof_mdump(const char *filename) -{ - char filename_buf[DUMP_FILENAME_BUFSIZE]; - - if (opt_prof == false || prof_booted == false) - return (true); - - if (filename == NULL) { - /* No filename specified, so automatically generate one. */ - if (opt_prof_prefix[0] == '\0') - return (true); - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename_buf, 'm', prof_dump_mseq); - prof_dump_mseq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - filename = filename_buf; - } - return (prof_dump(filename, false, true)); -} - -void -prof_gdump(void) -{ - char filename[DUMP_FILENAME_BUFSIZE]; - - if (prof_booted == false) - return; - malloc_mutex_lock(&enq_mtx); - if (enq) { - enq_gdump = true; - malloc_mutex_unlock(&enq_mtx); - return; - } - malloc_mutex_unlock(&enq_mtx); - - if (opt_prof_prefix[0] != '\0') { - malloc_mutex_lock(&prof_dump_seq_mtx); - prof_dump_filename(filename, 'u', prof_dump_useq); - prof_dump_useq++; - malloc_mutex_unlock(&prof_dump_seq_mtx); - prof_dump(filename, false, false); - } -} - -static void -prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) -{ - size_t ret1, ret2; - uint64_t h; - prof_bt_t *bt = (prof_bt_t *)key; - - assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); - assert(hash1 != NULL); - assert(hash2 != NULL); - - h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU); - if (minbits <= 32) { - /* - * Avoid doing multiple hashes, since a single hash provides - * enough bits. - */ - ret1 = h & ZU(0xffffffffU); - ret2 = h >> 32; - } else { - ret1 = h; - ret2 = hash(bt->vec, bt->len * sizeof(void *), - 0x8432a476666bbc13U); - } - - *hash1 = ret1; - *hash2 = ret2; -} - -static bool -prof_bt_keycomp(const void *k1, const void *k2) -{ - const prof_bt_t *bt1 = (prof_bt_t *)k1; - const prof_bt_t *bt2 = (prof_bt_t *)k2; - - if (bt1->len != bt2->len) - return (false); - return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); -} - -prof_tdata_t * -prof_tdata_init(void) -{ - prof_tdata_t *prof_tdata; - - /* Initialize an empty cache for this thread. */ - prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); - if (prof_tdata == NULL) - return (NULL); - - if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, - prof_bt_hash, prof_bt_keycomp)) { - idalloc(prof_tdata); - return (NULL); - } - ql_new(&prof_tdata->lru_ql); - - prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); - if (prof_tdata->vec == NULL) { - - ckh_delete(&prof_tdata->bt2cnt); - idalloc(prof_tdata); - return (NULL); - } - - prof_tdata->prn_state = 0; - prof_tdata->threshold = 0; - prof_tdata->accum = 0; - - PROF_TCACHE_SET(prof_tdata); - - return (prof_tdata); -} - -static void -prof_tdata_cleanup(void *arg) -{ - prof_tdata_t *prof_tdata; - - prof_tdata = PROF_TCACHE_GET(); - if (prof_tdata != NULL) { - prof_thr_cnt_t *cnt; - - /* - * Delete the hash table. All of its contents can still be - * iterated over via the LRU. - */ - ckh_delete(&prof_tdata->bt2cnt); - - /* - * Iteratively merge cnt's into the global stats and delete - * them. - */ - while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { - prof_ctx_merge(cnt->ctx, cnt); - ql_remove(&prof_tdata->lru_ql, cnt, lru_link); - idalloc(cnt); - } - - idalloc(prof_tdata->vec); - - idalloc(prof_tdata); - PROF_TCACHE_SET(NULL); - } -} - -void -prof_boot0(void) -{ - - memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, - sizeof(PROF_PREFIX_DEFAULT)); -} - -void -prof_boot1(void) -{ - - /* - * opt_prof and prof_promote must be in their final state before any - * arenas are initialized, so this function must be executed early. - */ - - if (opt_prof_leak && opt_prof == false) { - /* - * Enable opt_prof, but in such a way that profiles are never - * automatically dumped. - */ - opt_prof = true; - opt_prof_gdump = false; - prof_interval = 0; - } else if (opt_prof) { - if (opt_lg_prof_interval >= 0) { - prof_interval = (((uint64_t)1U) << - opt_lg_prof_interval); - } else - prof_interval = 0; - } - - prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT); -} - -bool -prof_boot2(void) -{ - - if (opt_prof) { - if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, - prof_bt_keycomp)) - return (true); - if (malloc_mutex_init(&bt2ctx_mtx)) - return (true); - if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup) - != 0) { - malloc_write( - ": Error in pthread_key_create()\n"); - abort(); - } - - prof_bt_max = (1U << opt_lg_prof_bt_max); - if (malloc_mutex_init(&prof_dump_seq_mtx)) - return (true); - - if (malloc_mutex_init(&enq_mtx)) - return (true); - enq = false; - enq_idump = false; - enq_gdump = false; - - if (atexit(prof_fdump) != 0) { - malloc_write(": Error in atexit()\n"); - if (opt_abort) - abort(); - } - } - -#ifdef JEMALLOC_PROF_LIBGCC - /* - * Cause the backtracing machinery to allocate its internal state - * before enabling profiling. - */ - _Unwind_Backtrace(prof_unwind_init_callback, NULL); -#endif - - prof_booted = true; - - return (false); -} - -/******************************************************************************/ -#endif /* JEMALLOC_PROF */ diff --git a/jemalloc/src/rtree.c b/jemalloc/src/rtree.c deleted file mode 100644 index eb0ff1e..0000000 --- a/jemalloc/src/rtree.c +++ /dev/null @@ -1,46 +0,0 @@ -#define JEMALLOC_RTREE_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -rtree_t * -rtree_new(unsigned bits) -{ - rtree_t *ret; - unsigned bits_per_level, height, i; - - bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; - height = bits / bits_per_level; - if (height * bits_per_level != bits) - height++; - assert(height * bits_per_level >= bits); - - ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + - (sizeof(unsigned) * height)); - if (ret == NULL) - return (NULL); - memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * - height)); - - if (malloc_mutex_init(&ret->mutex)) { - /* Leak the rtree. */ - return (NULL); - } - ret->height = height; - if (bits_per_level * height > bits) - ret->level2bits[0] = bits % bits_per_level; - else - ret->level2bits[0] = bits_per_level; - for (i = 1; i < height; i++) - ret->level2bits[i] = bits_per_level; - - ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); - if (ret->root == NULL) { - /* - * We leak the rtree here, since there's no generic base - * deallocation. - */ - return (NULL); - } - memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); - - return (ret); -} diff --git a/jemalloc/src/stats.c b/jemalloc/src/stats.c deleted file mode 100644 index cbbbb5b..0000000 --- a/jemalloc/src/stats.c +++ /dev/null @@ -1,790 +0,0 @@ -#define JEMALLOC_STATS_C_ -#include "jemalloc/internal/jemalloc_internal.h" - -#define CTL_GET(n, v, t) do { \ - size_t sz = sizeof(t); \ - xmallctl(n, v, &sz, NULL, 0); \ -} while (0) - -#define CTL_I_GET(n, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - -#define CTL_J_GET(n, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[2] = j; \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - -#define CTL_IJ_GET(n, v, t) do { \ - size_t mib[6]; \ - size_t miblen = sizeof(mib) / sizeof(size_t); \ - size_t sz = sizeof(t); \ - xmallctlnametomib(n, mib, &miblen); \ - mib[2] = i; \ - mib[4] = j; \ - xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ -} while (0) - -/******************************************************************************/ -/* Data. */ - -bool opt_stats_print = false; - -#ifdef JEMALLOC_STATS -size_t stats_cactive = 0; -#endif - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -#ifdef JEMALLOC_STATS -static void malloc_vcprintf(void (*write_cb)(void *, const char *), - void *cbopaque, const char *format, va_list ap); -static void stats_arena_bins_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -static void stats_arena_print(void (*write_cb)(void *, const char *), - void *cbopaque, unsigned i); -#endif - -/******************************************************************************/ - -/* - * We don't want to depend on vsnprintf() for production builds, since that can - * cause unnecessary bloat for static binaries. u2s() provides minimal integer - * printing functionality, so that malloc_printf() use can be limited to - * JEMALLOC_STATS code. - */ -char * -u2s(uint64_t x, unsigned base, char *s) -{ - unsigned i; - - i = UMAX2S_BUFSIZE - 1; - s[i] = '\0'; - switch (base) { - case 10: - do { - i--; - s[i] = "0123456789"[x % (uint64_t)10]; - x /= (uint64_t)10; - } while (x > 0); - break; - case 16: - do { - i--; - s[i] = "0123456789abcdef"[x & 0xf]; - x >>= 4; - } while (x > 0); - break; - default: - do { - i--; - s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % - (uint64_t)base]; - x /= (uint64_t)base; - } while (x > 0); - } - - return (&s[i]); -} - -#ifdef JEMALLOC_STATS -static void -malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, va_list ap) -{ - char buf[4096]; - - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = JEMALLOC_P(malloc_message); - cbopaque = NULL; - } - - vsnprintf(buf, sizeof(buf), format, ap); - write_cb(cbopaque, buf); -} - -/* - * Print to a callback function in such a way as to (hopefully) avoid memory - * allocation. - */ -JEMALLOC_ATTR(format(printf, 3, 4)) -void -malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, - const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(write_cb, cbopaque, format, ap); - va_end(ap); -} - -/* - * Print to stderr in such a way as to (hopefully) avoid memory allocation. - */ -JEMALLOC_ATTR(format(printf, 1, 2)) -void -malloc_printf(const char *format, ...) -{ - va_list ap; - - va_start(ap, format); - malloc_vcprintf(NULL, NULL, format, ap); - va_end(ap); -} -#endif - -#ifdef JEMALLOC_STATS -static void -stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) -{ - size_t pagesize; - bool config_tcache; - unsigned nbins, j, gap_start; - - CTL_GET("arenas.pagesize", &pagesize, size_t); - - CTL_GET("config.tcache", &config_tcache, bool); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc nrequests nfills nflushes" - " newruns reruns maxruns curruns\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "bins: bin size regs pgs allocated nmalloc" - " ndalloc newruns reruns maxruns" - " curruns\n"); - } - CTL_GET("arenas.nbins", &nbins, unsigned); - for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { - uint64_t nruns; - - CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); - if (nruns == 0) { - if (gap_start == UINT_MAX) - gap_start = j; - } else { - unsigned ntbins_, nqbins, ncbins, nsbins; - size_t reg_size, run_size, allocated; - uint32_t nregs; - uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; - uint64_t reruns; - size_t highruns, curruns; - - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u..%u]\n", gap_start, - j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, - "[%u]\n", gap_start); - } - gap_start = UINT_MAX; - } - CTL_GET("arenas.ntbins", &ntbins_, unsigned); - CTL_GET("arenas.nqbins", &nqbins, unsigned); - CTL_GET("arenas.ncbins", &ncbins, unsigned); - CTL_GET("arenas.nsbins", &nsbins, unsigned); - CTL_J_GET("arenas.bin.0.size", ®_size, size_t); - CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); - CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.allocated", - &allocated, size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", - &nmalloc, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", - &ndalloc, uint64_t); - if (config_tcache) { - CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", - &nrequests, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nfills", - &nfills, uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", - &nflushes, uint64_t); - } - CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, - uint64_t); - CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns, - size_t); - CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, - size_t); - if (config_tcache) { - malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, - allocated, nmalloc, ndalloc, nrequests, - nfills, nflushes, nruns, reruns, highruns, - curruns); - } else { - malloc_cprintf(write_cb, cbopaque, - "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 - " %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", - j, - j < ntbins_ ? "T" : j < ntbins_ + nqbins ? - "Q" : j < ntbins_ + nqbins + ncbins ? "C" : - "S", - reg_size, nregs, run_size / pagesize, - allocated, nmalloc, ndalloc, nruns, reruns, - highruns, curruns); - } - } - } - if (gap_start != UINT_MAX) { - if (j > gap_start + 1) { - /* Gap of more than one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n", - gap_start, j - 1); - } else { - /* Gap of one size class. */ - malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start); - } - } -} - -static void -stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) -{ - size_t pagesize, nlruns, j; - ssize_t gap_start; - - CTL_GET("arenas.pagesize", &pagesize, size_t); - - malloc_cprintf(write_cb, cbopaque, - "large: size pages nmalloc ndalloc nrequests" - " maxruns curruns\n"); - CTL_GET("arenas.nlruns", &nlruns, size_t); - for (j = 0, gap_start = -1; j < nlruns; j++) { - uint64_t nmalloc, ndalloc, nrequests; - size_t run_size, highruns, curruns; - - CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, - uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc, - uint64_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, - uint64_t); - if (nrequests == 0) { - if (gap_start == -1) - gap_start = j; - } else { - CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns, - size_t); - CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, - size_t); - if (gap_start != -1) { - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", - j - gap_start); - gap_start = -1; - } - malloc_cprintf(write_cb, cbopaque, - "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 - " %12zu %12zu\n", - run_size, run_size / pagesize, nmalloc, ndalloc, - nrequests, highruns, curruns); - } - } - if (gap_start != -1) - malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start); -} - -static void -stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - unsigned i) -{ - unsigned nthreads; - size_t pagesize, pactive, pdirty, mapped; - uint64_t npurge, nmadvise, purged; - size_t small_allocated; - uint64_t small_nmalloc, small_ndalloc, small_nrequests; - size_t large_allocated; - uint64_t large_nmalloc, large_ndalloc, large_nrequests; - - CTL_GET("arenas.pagesize", &pagesize, size_t); - - CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); - CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); - CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); - CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); - CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t); - CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s," - " %"PRIu64" madvise%s, %"PRIu64" purged\n", - pactive, pdirty, npurge, npurge == 1 ? "" : "s", - nmadvise, nmadvise == 1 ? "" : "s", purged); - - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc ndalloc nrequests\n"); - CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); - CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - small_allocated, small_nmalloc, small_ndalloc, small_nrequests); - CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); - CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); - CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - large_allocated, large_nmalloc, large_ndalloc, large_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", - small_allocated + large_allocated, - small_nmalloc + large_nmalloc, - small_ndalloc + large_ndalloc, - small_nrequests + large_nrequests); - malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", - pactive * pagesize ); - CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); - - stats_arena_bins_print(write_cb, cbopaque, i); - stats_arena_lruns_print(write_cb, cbopaque, i); -} -#endif - -void -stats_print(void (*write_cb)(void *, const char *), void *cbopaque, - const char *opts) -{ - int err; - uint64_t epoch; - size_t u64sz; - char s[UMAX2S_BUFSIZE]; - bool general = true; - bool merged = true; - bool unmerged = true; - bool bins = true; - bool large = true; - - /* - * Refresh stats, in case mallctl() was called by the application. - * - * Check for OOM here, since refreshing the ctl cache can trigger - * allocation. In practice, none of the subsequent mallctl()-related - * calls in this function will cause OOM if this one succeeds. - * */ - epoch = 1; - u64sz = sizeof(uint64_t); - err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch, - sizeof(uint64_t)); - if (err != 0) { - if (err == EAGAIN) { - malloc_write(": Memory allocation failure in " - "mallctl(\"epoch\", ...)\n"); - return; - } - malloc_write(": Failure in mallctl(\"epoch\", " - "...)\n"); - abort(); - } - - if (write_cb == NULL) { - /* - * The caller did not provide an alternate write_cb callback - * function, so use the default one. malloc_write() is an - * inline function, so use malloc_message() directly here. - */ - write_cb = JEMALLOC_P(malloc_message); - cbopaque = NULL; - } - - if (opts != NULL) { - unsigned i; - - for (i = 0; opts[i] != '\0'; i++) { - switch (opts[i]) { - case 'g': - general = false; - break; - case 'm': - merged = false; - break; - case 'a': - unmerged = false; - break; - case 'b': - bins = false; - break; - case 'l': - large = false; - break; - default:; - } - } - } - - write_cb(cbopaque, "___ Begin jemalloc statistics ___\n"); - if (general) { - int err; - const char *cpv; - bool bv; - unsigned uv; - ssize_t ssv; - size_t sv, bsz, ssz, sssz, cpsz; - - bsz = sizeof(bool); - ssz = sizeof(size_t); - sssz = sizeof(ssize_t); - cpsz = sizeof(const char *); - - CTL_GET("version", &cpv, const char *); - write_cb(cbopaque, "Version: "); - write_cb(cbopaque, cpv); - write_cb(cbopaque, "\n"); - CTL_GET("config.debug", &bv, bool); - write_cb(cbopaque, "Assertions "); - write_cb(cbopaque, bv ? "enabled" : "disabled"); - write_cb(cbopaque, "\n"); - -#define OPT_WRITE_BOOL(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \ - NULL, 0)) == 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, bv ? "true" : "false"); \ - write_cb(cbopaque, "\n"); \ - } -#define OPT_WRITE_SIZE_T(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \ - NULL, 0)) == 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, u2s(sv, 10, s)); \ - write_cb(cbopaque, "\n"); \ - } -#define OPT_WRITE_SSIZE_T(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \ - NULL, 0)) == 0) { \ - if (ssv >= 0) { \ - write_cb(cbopaque, " opt."#n": "); \ - write_cb(cbopaque, u2s(ssv, 10, s)); \ - } else { \ - write_cb(cbopaque, " opt."#n": -"); \ - write_cb(cbopaque, u2s(-ssv, 10, s)); \ - } \ - write_cb(cbopaque, "\n"); \ - } -#define OPT_WRITE_CHAR_P(n) \ - if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \ - NULL, 0)) == 0) { \ - write_cb(cbopaque, " opt."#n": \""); \ - write_cb(cbopaque, cpv); \ - write_cb(cbopaque, "\"\n"); \ - } - - write_cb(cbopaque, "Run-time option settings:\n"); - OPT_WRITE_BOOL(abort) - OPT_WRITE_SIZE_T(lg_qspace_max) - OPT_WRITE_SIZE_T(lg_cspace_max) - OPT_WRITE_SIZE_T(lg_chunk) - OPT_WRITE_SIZE_T(narenas) - OPT_WRITE_SSIZE_T(lg_dirty_mult) - OPT_WRITE_BOOL(stats_print) - OPT_WRITE_BOOL(junk) - OPT_WRITE_BOOL(zero) - OPT_WRITE_BOOL(sysv) - OPT_WRITE_BOOL(xmalloc) - OPT_WRITE_BOOL(tcache) - OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) - OPT_WRITE_SSIZE_T(lg_tcache_max) - OPT_WRITE_BOOL(prof) - OPT_WRITE_CHAR_P(prof_prefix) - OPT_WRITE_SIZE_T(lg_prof_bt_max) - OPT_WRITE_BOOL(prof_active) - OPT_WRITE_SSIZE_T(lg_prof_sample) - OPT_WRITE_BOOL(prof_accum) - OPT_WRITE_SSIZE_T(lg_prof_tcmax) - OPT_WRITE_SSIZE_T(lg_prof_interval) - OPT_WRITE_BOOL(prof_gdump) - OPT_WRITE_BOOL(prof_leak) - OPT_WRITE_BOOL(overcommit) - -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_SIZE_T -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - - write_cb(cbopaque, "CPUs: "); - write_cb(cbopaque, u2s(ncpus, 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("arenas.narenas", &uv, unsigned); - write_cb(cbopaque, "Max arenas: "); - write_cb(cbopaque, u2s(uv, 10, s)); - write_cb(cbopaque, "\n"); - - write_cb(cbopaque, "Pointer size: "); - write_cb(cbopaque, u2s(sizeof(void *), 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("arenas.quantum", &sv, size_t); - write_cb(cbopaque, "Quantum size: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("arenas.cacheline", &sv, size_t); - write_cb(cbopaque, "Cacheline size (assumed): "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("arenas.subpage", &sv, size_t); - write_cb(cbopaque, "Subpage spacing: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - - if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, - NULL, 0)) == 0) { - write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - - CTL_GET("arenas.tspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - } - - CTL_GET("arenas.qspace_min", &sv, size_t); - write_cb(cbopaque, "Quantum-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.qspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.cspace_min", &sv, size_t); - write_cb(cbopaque, "Cacheline-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.cspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("arenas.sspace_min", &sv, size_t); - write_cb(cbopaque, "Subpage-spaced sizes: ["); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ".."); - CTL_GET("arenas.sspace_max", &sv, size_t); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "]\n"); - - CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); - if (ssv >= 0) { - write_cb(cbopaque, - "Min active:dirty page ratio per arena: "); - write_cb(cbopaque, u2s((1U << ssv), 10, s)); - write_cb(cbopaque, ":1\n"); - } else { - write_cb(cbopaque, - "Min active:dirty page ratio per arena: N/A\n"); - } - if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv, - &ssz, NULL, 0)) == 0) { - write_cb(cbopaque, - "Maximum thread-cached size class: "); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, "\n"); - } - if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv, - &ssz, NULL, 0)) == 0) { - size_t tcache_gc_sweep = (1U << ssv); - bool tcache_enabled; - CTL_GET("opt.tcache", &tcache_enabled, bool); - write_cb(cbopaque, "Thread cache GC sweep interval: "); - write_cb(cbopaque, tcache_enabled && ssv >= 0 ? - u2s(tcache_gc_sweep, 10, s) : "N/A"); - write_cb(cbopaque, "\n"); - } - if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) - == 0 && bv) { - CTL_GET("opt.lg_prof_bt_max", &sv, size_t); - write_cb(cbopaque, "Maximum profile backtrace depth: "); - write_cb(cbopaque, u2s((1U << sv), 10, s)); - write_cb(cbopaque, "\n"); - - CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t); - write_cb(cbopaque, - "Maximum per thread backtrace cache: "); - if (ssv >= 0) { - write_cb(cbopaque, u2s((1U << ssv), 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(ssv, 10, s)); - write_cb(cbopaque, ")\n"); - } else - write_cb(cbopaque, "N/A\n"); - - CTL_GET("opt.lg_prof_sample", &sv, size_t); - write_cb(cbopaque, "Average profile sample interval: "); - write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ")\n"); - - CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); - write_cb(cbopaque, "Average profile dump interval: "); - if (ssv >= 0) { - write_cb(cbopaque, u2s((((uint64_t)1U) << ssv), - 10, s)); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(ssv, 10, s)); - write_cb(cbopaque, ")\n"); - } else - write_cb(cbopaque, "N/A\n"); - } - CTL_GET("arenas.chunksize", &sv, size_t); - write_cb(cbopaque, "Chunk size: "); - write_cb(cbopaque, u2s(sv, 10, s)); - CTL_GET("opt.lg_chunk", &sv, size_t); - write_cb(cbopaque, " (2^"); - write_cb(cbopaque, u2s(sv, 10, s)); - write_cb(cbopaque, ")\n"); - } - -#ifdef JEMALLOC_STATS - { - int err; - size_t sszp, ssz; - size_t *cactive; - size_t allocated, active, mapped; - size_t chunks_current, chunks_high, swap_avail; - uint64_t chunks_total; - size_t huge_allocated; - uint64_t huge_nmalloc, huge_ndalloc; - - sszp = sizeof(size_t *); - ssz = sizeof(size_t); - - CTL_GET("stats.cactive", &cactive, size_t *); - CTL_GET("stats.allocated", &allocated, size_t); - CTL_GET("stats.active", &active, size_t); - CTL_GET("stats.mapped", &mapped, size_t); - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, mapped: %zu\n", - allocated, active, mapped); - malloc_cprintf(write_cb, cbopaque, - "Current active ceiling: %zu\n", atomic_read_z(cactive)); - - /* Print chunk stats. */ - CTL_GET("stats.chunks.total", &chunks_total, uint64_t); - CTL_GET("stats.chunks.high", &chunks_high, size_t); - CTL_GET("stats.chunks.current", &chunks_current, size_t); - if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz, - NULL, 0)) == 0) { - size_t lg_chunk; - - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks swap_avail\n"); - CTL_GET("opt.lg_chunk", &lg_chunk, size_t); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64"%13zu%13zu%13zu\n", - chunks_total, chunks_high, chunks_current, - swap_avail << lg_chunk); - } else { - malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " - "highchunks curchunks\n"); - malloc_cprintf(write_cb, cbopaque, - " %13"PRIu64"%13zu%13zu\n", - chunks_total, chunks_high, chunks_current); - } - - /* Print huge stats. */ - CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); - CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t); - CTL_GET("stats.huge.allocated", &huge_allocated, size_t); - malloc_cprintf(write_cb, cbopaque, - "huge: nmalloc ndalloc allocated\n"); - malloc_cprintf(write_cb, cbopaque, - " %12"PRIu64" %12"PRIu64" %12zu\n", - huge_nmalloc, huge_ndalloc, huge_allocated); - - if (merged) { - unsigned narenas; - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - bool initialized[narenas]; - size_t isz; - unsigned i, ninitialized; - - isz = sizeof(initialized); - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - for (i = ninitialized = 0; i < narenas; i++) { - if (initialized[i]) - ninitialized++; - } - - if (ninitialized > 1) { - /* Print merged arena stats. */ - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - stats_arena_print(write_cb, cbopaque, - narenas); - } - } - } - - if (unmerged) { - unsigned narenas; - - /* Print stats for each arena. */ - - CTL_GET("arenas.narenas", &narenas, unsigned); - { - bool initialized[narenas]; - size_t isz; - unsigned i; - - isz = sizeof(initialized); - xmallctl("arenas.initialized", initialized, - &isz, NULL, 0); - - for (i = 0; i < narenas; i++) { - if (initialized[i]) { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", i); - stats_arena_print(write_cb, - cbopaque, i); - } - } - } - } - } -#endif /* #ifdef JEMALLOC_STATS */ - write_cb(cbopaque, "--- End jemalloc statistics ---\n"); -} diff --git a/jemalloc/src/tcache.c b/jemalloc/src/tcache.c deleted file mode 100644 index 31c329e..0000000 --- a/jemalloc/src/tcache.c +++ /dev/null @@ -1,480 +0,0 @@ -#define JEMALLOC_TCACHE_C_ -#include "jemalloc/internal/jemalloc_internal.h" -#ifdef JEMALLOC_TCACHE -/******************************************************************************/ -/* Data. */ - -bool opt_tcache = true; -ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; -ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; - -tcache_bin_info_t *tcache_bin_info; -static unsigned stack_nelms; /* Total stack elms per tcache. */ - -/* Map of thread-specific caches. */ -#ifndef NO_TLS -__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); -#endif - -/* - * Same contents as tcache, but initialized such that the TSD destructor is - * called when a thread exits, so that the cache can be cleaned up. - */ -pthread_key_t tcache_tsd; - -size_t nhbins; -size_t tcache_maxclass; -unsigned tcache_gc_incr; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static void tcache_thread_cleanup(void *arg); - -/******************************************************************************/ - -void * -tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) -{ - void *ret; - - arena_tcache_fill_small(tcache->arena, tbin, binind -#ifdef JEMALLOC_PROF - , tcache->prof_accumbytes -#endif - ); -#ifdef JEMALLOC_PROF - tcache->prof_accumbytes = 0; -#endif - ret = tcache_alloc_easy(tbin); - - return (ret); -} - -void -tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ) -{ - void *ptr; - unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS - bool merged_stats = false; -#endif - - assert(binind < nbins); - assert(rem <= tbin->ncached); - - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { - /* Lock the arena bin associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); - arena_t *arena = chunk->arena; - arena_bin_t *bin = &arena->bins[binind]; - -#ifdef JEMALLOC_PROF - if (arena == tcache->arena) { - malloc_mutex_lock(&arena->lock); - arena_prof_accum(arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&arena->lock); - tcache->prof_accumbytes = 0; - } -#endif - - malloc_mutex_lock(&bin->lock); -#ifdef JEMALLOC_STATS - if (arena == tcache->arena) { - assert(merged_stats == false); - merged_stats = true; - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - } -#endif - ndeferred = 0; - for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) { - size_t pageind = ((uintptr_t)ptr - - (uintptr_t)chunk) >> PAGE_SHIFT; - arena_chunk_map_t *mapelm = - &chunk->map[pageind-map_bias]; - arena_dalloc_bin(arena, chunk, ptr, mapelm); - } else { - /* - * This object was allocated via a different - * arena bin than the one that is currently - * locked. Stash the object, so that it can be - * handled in a future pass. - */ - tbin->avail[ndeferred] = ptr; - ndeferred++; - } - } - malloc_mutex_unlock(&bin->lock); - } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_bin_t *bin = &tcache->arena->bins[binind]; - malloc_mutex_lock(&bin->lock); - bin->stats.nflushes++; - bin->stats.nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&bin->lock); - } -#endif - - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); - tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) - tbin->low_water = tbin->ncached; -} - -void -tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache_t *tcache -#endif - ) -{ - void *ptr; - unsigned i, nflush, ndeferred; -#ifdef JEMALLOC_STATS - bool merged_stats = false; -#endif - - assert(binind < nhbins); - assert(rem <= tbin->ncached); - - for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { - /* Lock the arena associated with the first object. */ - arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( - tbin->avail[0]); - arena_t *arena = chunk->arena; - - malloc_mutex_lock(&arena->lock); -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - if (arena == tcache->arena) { -#endif -#ifdef JEMALLOC_PROF - arena_prof_accum(arena, tcache->prof_accumbytes); - tcache->prof_accumbytes = 0; -#endif -#ifdef JEMALLOC_STATS - merged_stats = true; - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; -#endif -#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) - } -#endif - ndeferred = 0; - for (i = 0; i < nflush; i++) { - ptr = tbin->avail[i]; - assert(ptr != NULL); - chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); - if (chunk->arena == arena) - arena_dalloc_large(arena, chunk, ptr); - else { - /* - * This object was allocated via a different - * arena than the one that is currently locked. - * Stash the object, so that it can be handled - * in a future pass. - */ - tbin->avail[ndeferred] = ptr; - ndeferred++; - } - } - malloc_mutex_unlock(&arena->lock); - } -#ifdef JEMALLOC_STATS - if (merged_stats == false) { - /* - * The flush loop didn't happen to flush to this thread's - * arena, so the stats didn't get merged. Manually do so now. - */ - arena_t *arena = tcache->arena; - malloc_mutex_lock(&arena->lock); - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[binind - nbins].nrequests += - tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - malloc_mutex_unlock(&arena->lock); - } -#endif - - memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], - rem * sizeof(void *)); - tbin->ncached = rem; - if ((int)tbin->ncached < tbin->low_water) - tbin->low_water = tbin->ncached; -} - -tcache_t * -tcache_create(arena_t *arena) -{ - tcache_t *tcache; - size_t size, stack_offset; - unsigned i; - - size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); - /* Naturally align the pointer stacks. */ - size = PTR_CEILING(size); - stack_offset = size; - size += stack_nelms * sizeof(void *); - /* - * Round up to the nearest multiple of the cacheline size, in order to - * avoid the possibility of false cacheline sharing. - * - * That this works relies on the same logic as in ipalloc(), but we - * cannot directly call ipalloc() here due to tcache bootstrapping - * issues. - */ - size = (size + CACHELINE_MASK) & (-CACHELINE); - - if (size <= small_maxclass) - tcache = (tcache_t *)arena_malloc_small(arena, size, true); - else if (size <= tcache_maxclass) - tcache = (tcache_t *)arena_malloc_large(arena, size, true); - else - tcache = (tcache_t *)icalloc(size); - - if (tcache == NULL) - return (NULL); - -#ifdef JEMALLOC_STATS - /* Link into list of extant tcaches. */ - malloc_mutex_lock(&arena->lock); - ql_elm_new(tcache, link); - ql_tail_insert(&arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&arena->lock); -#endif - - tcache->arena = arena; - assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - for (i = 0; i < nhbins; i++) { - tcache->tbins[i].lg_fill_div = 1; - tcache->tbins[i].avail = (void **)((uintptr_t)tcache + - (uintptr_t)stack_offset); - stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); - } - - TCACHE_SET(tcache); - - return (tcache); -} - -void -tcache_destroy(tcache_t *tcache) -{ - unsigned i; - size_t tcache_size; - -#ifdef JEMALLOC_STATS - /* Unlink from list of extant tcaches. */ - malloc_mutex_lock(&tcache->arena->lock); - ql_remove(&tcache->arena->tcache_ql, tcache, link); - malloc_mutex_unlock(&tcache->arena->lock); - tcache_stats_merge(tcache, tcache->arena); -#endif - - for (i = 0; i < nbins; i++) { - tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_small(tbin, i, 0 -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - -#ifdef JEMALLOC_STATS - if (tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; - arena_bin_t *bin = &arena->bins[i]; - malloc_mutex_lock(&bin->lock); - bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(&bin->lock); - } -#endif - } - - for (; i < nhbins; i++) { - tcache_bin_t *tbin = &tcache->tbins[i]; - tcache_bin_flush_large(tbin, i, 0 -#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) - , tcache -#endif - ); - -#ifdef JEMALLOC_STATS - if (tbin->tstats.nrequests != 0) { - arena_t *arena = tcache->arena; - malloc_mutex_lock(&arena->lock); - arena->stats.nrequests_large += tbin->tstats.nrequests; - arena->stats.lstats[i - nbins].nrequests += - tbin->tstats.nrequests; - malloc_mutex_unlock(&arena->lock); - } -#endif - } - -#ifdef JEMALLOC_PROF - if (tcache->prof_accumbytes > 0) { - malloc_mutex_lock(&tcache->arena->lock); - arena_prof_accum(tcache->arena, tcache->prof_accumbytes); - malloc_mutex_unlock(&tcache->arena->lock); - } -#endif - - tcache_size = arena_salloc(tcache); - if (tcache_size <= small_maxclass) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> - PAGE_SHIFT; - arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; - arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + - (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << - PAGE_SHIFT)); - arena_bin_t *bin = run->bin; - - malloc_mutex_lock(&bin->lock); - arena_dalloc_bin(arena, chunk, tcache, mapelm); - malloc_mutex_unlock(&bin->lock); - } else if (tcache_size <= tcache_maxclass) { - arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); - arena_t *arena = chunk->arena; - - malloc_mutex_lock(&arena->lock); - arena_dalloc_large(arena, chunk, tcache); - malloc_mutex_unlock(&arena->lock); - } else - idalloc(tcache); -} - -static void -tcache_thread_cleanup(void *arg) -{ - tcache_t *tcache = (tcache_t *)arg; - - if (tcache == (void *)(uintptr_t)1) { - /* - * The previous time this destructor was called, we set the key - * to 1 so that other destructors wouldn't cause re-creation of - * the tcache. This time, do nothing, so that the destructor - * will not be called again. - */ - } else if (tcache == (void *)(uintptr_t)2) { - /* - * Another destructor called an allocator function after this - * destructor was called. Reset tcache to 1 in order to - * receive another callback. - */ - TCACHE_SET((uintptr_t)1); - } else if (tcache != NULL) { - assert(tcache != (void *)(uintptr_t)1); - tcache_destroy(tcache); - TCACHE_SET((uintptr_t)1); - } -} - -#ifdef JEMALLOC_STATS -void -tcache_stats_merge(tcache_t *tcache, arena_t *arena) -{ - unsigned i; - - /* Merge and reset tcache stats. */ - for (i = 0; i < nbins; i++) { - arena_bin_t *bin = &arena->bins[i]; - tcache_bin_t *tbin = &tcache->tbins[i]; - malloc_mutex_lock(&bin->lock); - bin->stats.nrequests += tbin->tstats.nrequests; - malloc_mutex_unlock(&bin->lock); - tbin->tstats.nrequests = 0; - } - - for (; i < nhbins; i++) { - malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins]; - tcache_bin_t *tbin = &tcache->tbins[i]; - arena->stats.nrequests_large += tbin->tstats.nrequests; - lstats->nrequests += tbin->tstats.nrequests; - tbin->tstats.nrequests = 0; - } -} -#endif - -bool -tcache_boot(void) -{ - - if (opt_tcache) { - unsigned i; - - /* - * If necessary, clamp opt_lg_tcache_max, now that - * small_maxclass and arena_maxclass are known. - */ - if (opt_lg_tcache_max < 0 || (1U << - opt_lg_tcache_max) < small_maxclass) - tcache_maxclass = small_maxclass; - else if ((1U << opt_lg_tcache_max) > arena_maxclass) - tcache_maxclass = arena_maxclass; - else - tcache_maxclass = (1U << opt_lg_tcache_max); - - nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); - - /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * - sizeof(tcache_bin_info_t)); - if (tcache_bin_info == NULL) - return (true); - stack_nelms = 0; - for (i = 0; i < nbins; i++) { - if ((arena_bin_info[i].nregs << 1) <= - TCACHE_NSLOTS_SMALL_MAX) { - tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); - } else { - tcache_bin_info[i].ncached_max = - TCACHE_NSLOTS_SMALL_MAX; - } - stack_nelms += tcache_bin_info[i].ncached_max; - } - for (; i < nhbins; i++) { - tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; - stack_nelms += tcache_bin_info[i].ncached_max; - } - - /* Compute incremental GC event threshold. */ - if (opt_lg_tcache_gc_sweep >= 0) { - tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / - nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == - 0) ? 0 : 1); - } else - tcache_gc_incr = 0; - - if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != - 0) { - malloc_write( - ": Error in pthread_key_create()\n"); - abort(); - } - } - - return (false); -} -/******************************************************************************/ -#endif /* JEMALLOC_TCACHE */ diff --git a/jemalloc/src/zone.c b/jemalloc/src/zone.c deleted file mode 100644 index 2c1b231..0000000 --- a/jemalloc/src/zone.c +++ /dev/null @@ -1,354 +0,0 @@ -#include "jemalloc/internal/jemalloc_internal.h" -#ifndef JEMALLOC_ZONE -# error "This source file is for zones on Darwin (OS X)." -#endif - -/******************************************************************************/ -/* Data. */ - -static malloc_zone_t zone, szone; -static struct malloc_introspection_t zone_introspect, ozone_introspect; - -/******************************************************************************/ -/* Function prototypes for non-inline static functions. */ - -static size_t zone_size(malloc_zone_t *zone, void *ptr); -static void *zone_malloc(malloc_zone_t *zone, size_t size); -static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); -static void *zone_valloc(malloc_zone_t *zone, size_t size); -static void zone_free(malloc_zone_t *zone, void *ptr); -static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -#if (JEMALLOC_ZONE_VERSION >= 6) -static void *zone_memalign(malloc_zone_t *zone, size_t alignment, - size_t size); -static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, - size_t size); -#endif -static void *zone_destroy(malloc_zone_t *zone); -static size_t zone_good_size(malloc_zone_t *zone, size_t size); -static void zone_force_lock(malloc_zone_t *zone); -static void zone_force_unlock(malloc_zone_t *zone); -static size_t ozone_size(malloc_zone_t *zone, void *ptr); -static void ozone_free(malloc_zone_t *zone, void *ptr); -static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size); -static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size, - void **results, unsigned num_requested); -static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, - unsigned num); -#if (JEMALLOC_ZONE_VERSION >= 6) -static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr, - size_t size); -#endif -static void ozone_force_lock(malloc_zone_t *zone); -static void ozone_force_unlock(malloc_zone_t *zone); - -/******************************************************************************/ -/* - * Functions. - */ - -static size_t -zone_size(malloc_zone_t *zone, void *ptr) -{ - - /* - * There appear to be places within Darwin (such as setenv(3)) that - * cause calls to this function with pointers that *no* zone owns. If - * we knew that all pointers were owned by *some* zone, we could split - * our zone into two parts, and use one as the default allocator and - * the other as the default deallocator/reallocator. Since that will - * not work in practice, we must check all pointers to assure that they - * reside within a mapped chunk before determining size. - */ - return (ivsalloc(ptr)); -} - -static void * -zone_malloc(malloc_zone_t *zone, size_t size) -{ - - return (JEMALLOC_P(malloc)(size)); -} - -static void * -zone_calloc(malloc_zone_t *zone, size_t num, size_t size) -{ - - return (JEMALLOC_P(calloc)(num, size)); -} - -static void * -zone_valloc(malloc_zone_t *zone, size_t size) -{ - void *ret = NULL; /* Assignment avoids useless compiler warning. */ - - JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); - - return (ret); -} - -static void -zone_free(malloc_zone_t *zone, void *ptr) -{ - - JEMALLOC_P(free)(ptr); -} - -static void * -zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) -{ - - return (JEMALLOC_P(realloc)(ptr, size)); -} - -#if (JEMALLOC_ZONE_VERSION >= 6) -static void * -zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) -{ - void *ret = NULL; /* Assignment avoids useless compiler warning. */ - - JEMALLOC_P(posix_memalign)(&ret, alignment, size); - - return (ret); -} - -static void -zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) -{ - - assert(ivsalloc(ptr) == size); - JEMALLOC_P(free)(ptr); -} -#endif - -static void * -zone_destroy(malloc_zone_t *zone) -{ - - /* This function should never be called. */ - assert(false); - return (NULL); -} - -static size_t -zone_good_size(malloc_zone_t *zone, size_t size) -{ - size_t ret; - void *p; - - /* - * Actually create an object of the appropriate size, then find out - * how large it could have been without moving up to the next size - * class. - */ - p = JEMALLOC_P(malloc)(size); - if (p != NULL) { - ret = isalloc(p); - JEMALLOC_P(free)(p); - } else - ret = size; - - return (ret); -} - -static void -zone_force_lock(malloc_zone_t *zone) -{ - - if (isthreaded) - jemalloc_prefork(); -} - -static void -zone_force_unlock(malloc_zone_t *zone) -{ - - if (isthreaded) - jemalloc_postfork(); -} - -malloc_zone_t * -create_zone(void) -{ - - zone.size = (void *)zone_size; - zone.malloc = (void *)zone_malloc; - zone.calloc = (void *)zone_calloc; - zone.valloc = (void *)zone_valloc; - zone.free = (void *)zone_free; - zone.realloc = (void *)zone_realloc; - zone.destroy = (void *)zone_destroy; - zone.zone_name = "jemalloc_zone"; - zone.batch_malloc = NULL; - zone.batch_free = NULL; - zone.introspect = &zone_introspect; - zone.version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 6) - zone.memalign = zone_memalign; - zone.free_definite_size = zone_free_definite_size; -#endif - - zone_introspect.enumerator = NULL; - zone_introspect.good_size = (void *)zone_good_size; - zone_introspect.check = NULL; - zone_introspect.print = NULL; - zone_introspect.log = NULL; - zone_introspect.force_lock = (void *)zone_force_lock; - zone_introspect.force_unlock = (void *)zone_force_unlock; - zone_introspect.statistics = NULL; -#if (JEMALLOC_ZONE_VERSION >= 6) - zone_introspect.zone_locked = NULL; -#endif - - return (&zone); -} - -static size_t -ozone_size(malloc_zone_t *zone, void *ptr) -{ - size_t ret; - - ret = ivsalloc(ptr); - if (ret == 0) - ret = szone.size(zone, ptr); - - return (ret); -} - -static void -ozone_free(malloc_zone_t *zone, void *ptr) -{ - - if (ivsalloc(ptr) != 0) - JEMALLOC_P(free)(ptr); - else { - size_t size = szone.size(zone, ptr); - if (size != 0) - (szone.free)(zone, ptr); - } -} - -static void * -ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) -{ - size_t oldsize; - - if (ptr == NULL) - return (JEMALLOC_P(malloc)(size)); - - oldsize = ivsalloc(ptr); - if (oldsize != 0) - return (JEMALLOC_P(realloc)(ptr, size)); - else { - oldsize = szone.size(zone, ptr); - if (oldsize == 0) - return (JEMALLOC_P(malloc)(size)); - else { - void *ret = JEMALLOC_P(malloc)(size); - if (ret != NULL) { - memcpy(ret, ptr, (oldsize < size) ? oldsize : - size); - (szone.free)(zone, ptr); - } - return (ret); - } - } -} - -static unsigned -ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results, - unsigned num_requested) -{ - - /* Don't bother implementing this interface, since it isn't required. */ - return (0); -} - -static void -ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num) -{ - unsigned i; - - for (i = 0; i < num; i++) - ozone_free(zone, to_be_freed[i]); -} - -#if (JEMALLOC_ZONE_VERSION >= 6) -static void -ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) -{ - - if (ivsalloc(ptr) != 0) { - assert(ivsalloc(ptr) == size); - JEMALLOC_P(free)(ptr); - } else { - assert(size == szone.size(zone, ptr)); - szone.free_definite_size(zone, ptr, size); - } -} -#endif - -static void -ozone_force_lock(malloc_zone_t *zone) -{ - - /* jemalloc locking is taken care of by the normal jemalloc zone. */ - szone.introspect->force_lock(zone); -} - -static void -ozone_force_unlock(malloc_zone_t *zone) -{ - - /* jemalloc locking is taken care of by the normal jemalloc zone. */ - szone.introspect->force_unlock(zone); -} - -/* - * Overlay the default scalable zone (szone) such that existing allocations are - * drained, and further allocations come from jemalloc. This is necessary - * because Core Foundation directly accesses and uses the szone before the - * jemalloc library is even loaded. - */ -void -szone2ozone(malloc_zone_t *zone) -{ - - /* - * Stash a copy of the original szone so that we can call its - * functions as needed. Note that the internally, the szone stores its - * bookkeeping data structures immediately following the malloc_zone_t - * header, so when calling szone functions, we need to pass a pointer - * to the original zone structure. - */ - memcpy(&szone, zone, sizeof(malloc_zone_t)); - - zone->size = (void *)ozone_size; - zone->malloc = (void *)zone_malloc; - zone->calloc = (void *)zone_calloc; - zone->valloc = (void *)zone_valloc; - zone->free = (void *)ozone_free; - zone->realloc = (void *)ozone_realloc; - zone->destroy = (void *)zone_destroy; - zone->zone_name = "jemalloc_ozone"; - zone->batch_malloc = ozone_batch_malloc; - zone->batch_free = ozone_batch_free; - zone->introspect = &ozone_introspect; - zone->version = JEMALLOC_ZONE_VERSION; -#if (JEMALLOC_ZONE_VERSION >= 6) - zone->memalign = zone_memalign; - zone->free_definite_size = ozone_free_definite_size; -#endif - - ozone_introspect.enumerator = NULL; - ozone_introspect.good_size = (void *)zone_good_size; - ozone_introspect.check = NULL; - ozone_introspect.print = NULL; - ozone_introspect.log = NULL; - ozone_introspect.force_lock = (void *)ozone_force_lock; - ozone_introspect.force_unlock = (void *)ozone_force_unlock; - ozone_introspect.statistics = NULL; -#if (JEMALLOC_ZONE_VERSION >= 6) - ozone_introspect.zone_locked = NULL; -#endif -} diff --git a/jemalloc/test/allocated.c b/jemalloc/test/allocated.c deleted file mode 100644 index b1e40e4..0000000 --- a/jemalloc/test/allocated.c +++ /dev/null @@ -1,142 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -void * -thread_start(void *arg) -{ - int err; - void *p; - uint64_t a0, a1, d0, d1; - uint64_t *ap0, *ap1, *dp0, *dp1; - size_t sz, usize; - - sz = sizeof(a0); - if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL, - 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto RETURN; - } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - sz = sizeof(ap0); - if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL, - 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto RETURN; - } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - assert(*ap0 == a0); - - sz = sizeof(d0); - if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL, - 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto RETURN; - } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - sz = sizeof(dp0); - if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL, - 0))) { - if (err == ENOENT) { -#ifdef JEMALLOC_STATS - assert(false); -#endif - goto RETURN; - } - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - exit(1); - } - assert(*dp0 == d0); - - p = JEMALLOC_P(malloc)(1); - if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); - exit(1); - } - - sz = sizeof(a1); - JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0); - sz = sizeof(ap1); - JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0); - assert(*ap1 == a1); - assert(ap0 == ap1); - - usize = JEMALLOC_P(malloc_usable_size)(p); - assert(a0 + usize <= a1); - - JEMALLOC_P(free)(p); - - sz = sizeof(d1); - JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0); - sz = sizeof(dp1); - JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0); - assert(*dp1 == d1); - assert(dp0 == dp1); - - assert(d0 + usize <= d1); - -RETURN: - return (NULL); -} - -int -main(void) -{ - int ret = 0; - pthread_t thread; - - fprintf(stderr, "Test begin\n"); - - thread_start(NULL); - - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto RETURN; - } - pthread_join(thread, (void *)&ret); - - thread_start(NULL); - - if (pthread_create(&thread, NULL, thread_start, NULL) - != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); - ret = 1; - goto RETURN; - } - pthread_join(thread, (void *)&ret); - - thread_start(NULL); - -RETURN: - fprintf(stderr, "Test end\n"); - return (ret); -} diff --git a/jemalloc/test/allocated.exp b/jemalloc/test/allocated.exp deleted file mode 100644 index 369a88d..0000000 --- a/jemalloc/test/allocated.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/jemalloc/test/allocm.c b/jemalloc/test/allocm.c deleted file mode 100644 index 59d0002..0000000 --- a/jemalloc/test/allocm.c +++ /dev/null @@ -1,133 +0,0 @@ -#include -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)0x80000000000LLU) */ -#define MAXALIGN ((size_t)0x2000000LLU) -#define NITER 4 - -int -main(void) -{ - int r; - void *p; - size_t sz, alignment, total, tsz; - unsigned i; - void *ps[NITER]; - - fprintf(stderr, "Test begin\n"); - - sz = 0; - r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); - if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); - abort(); - } - if (sz < 42) - fprintf(stderr, "Real size smaller than expected\n"); - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); - - r = JEMALLOC_P(allocm)(&p, NULL, 42, 0); - if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); - abort(); - } - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); - - r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); - abort(); - } - if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected dallocm() error\n"); - -#if LG_SIZEOF_PTR == 3 - alignment = 0x8000000000000000LLU; - sz = 0x8000000000000000LLU; -#else - alignment = 0x80000000LU; - sz = 0x80000000LU; -#endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - -#if LG_SIZEOF_PTR == 3 - alignment = 0x4000000000000000LLU; - sz = 0x8400000000000001LLU; -#else - alignment = 0x40000000LU; - sz = 0x84000001LU; -#endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - - alignment = 0x10LLU; -#if LG_SIZEOF_PTR == 3 - sz = 0xfffffffffffffff0LLU; -#else - sz = 0xfffffff0LU; -#endif - r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); - if (r == ALLOCM_SUCCESS) { - fprintf(stderr, - "Expected error for allocm(&p, %zu, 0x%x)\n", - sz, ALLOCM_ALIGN(alignment)); - } - - for (i = 0; i < NITER; i++) - ps[i] = NULL; - - for (alignment = 8; - alignment <= MAXALIGN; - alignment <<= 1) { - total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); - for (sz = 1; - sz < 3 * alignment && sz < (1U << 31); - sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { - for (i = 0; i < NITER; i++) { - r = JEMALLOC_P(allocm)(&ps[i], NULL, sz, - ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); - if (r != ALLOCM_SUCCESS) { - fprintf(stderr, - "Error for size %zu (0x%zx): %d\n", - sz, sz, r); - exit(1); - } - if ((uintptr_t)p & (alignment-1)) { - fprintf(stderr, - "%p inadequately aligned for" - " alignment: %zu\n", p, alignment); - } - JEMALLOC_P(sallocm)(ps[i], &tsz, 0); - total += tsz; - if (total >= (MAXALIGN << 1)) - break; - } - for (i = 0; i < NITER; i++) { - if (ps[i] != NULL) { - JEMALLOC_P(dallocm)(ps[i], 0); - ps[i] = NULL; - } - } - } - } - - fprintf(stderr, "Test end\n"); - return (0); -} diff --git a/jemalloc/test/allocm.exp b/jemalloc/test/allocm.exp deleted file mode 100644 index b5061c7..0000000 --- a/jemalloc/test/allocm.exp +++ /dev/null @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff --git a/jemalloc/test/bitmap.c b/jemalloc/test/bitmap.c deleted file mode 100644 index adfaacf..0000000 --- a/jemalloc/test/bitmap.c +++ /dev/null @@ -1,157 +0,0 @@ -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -/* - * Avoid using the assert() from jemalloc_internal.h, since it requires - * internal libjemalloc functionality. - * */ -#include - -/* - * Directly include the bitmap code, since it isn't exposed outside - * libjemalloc. - */ -#include "../src/bitmap.c" - -#if (LG_BITMAP_MAXBITS > 12) -# define MAXBITS 4500 -#else -# define MAXBITS (1U << LG_BITMAP_MAXBITS) -#endif - -static void -test_bitmap_size(void) -{ - size_t i, prev_size; - - prev_size = 0; - for (i = 1; i <= MAXBITS; i++) { - size_t size = bitmap_size(i); - assert(size >= prev_size); - prev_size = size; - } -} - -static void -test_bitmap_init(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; - bitmap_init(bitmap, &binfo); - - for (j = 0; j < i; j++) - assert(bitmap_get(bitmap, &binfo, j) == false); - - } - } -} - -static void -test_bitmap_set(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; - bitmap_init(bitmap, &binfo); - - for (j = 0; j < i; j++) - bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); - } - } -} - -static void -test_bitmap_unset(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - size_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; - bitmap_init(bitmap, &binfo); - - for (j = 0; j < i; j++) - bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); - for (j = 0; j < i; j++) - bitmap_unset(bitmap, &binfo, j); - for (j = 0; j < i; j++) - bitmap_set(bitmap, &binfo, j); - assert(bitmap_full(bitmap, &binfo)); - } - } -} - -static void -test_bitmap_sfu(void) -{ - size_t i; - - for (i = 1; i <= MAXBITS; i++) { - bitmap_info_t binfo; - bitmap_info_init(&binfo, i); - { - ssize_t j; - bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; - bitmap_init(bitmap, &binfo); - - /* Iteratively set bits starting at the beginning. */ - for (j = 0; j < i; j++) - assert(bitmap_sfu(bitmap, &binfo) == j); - assert(bitmap_full(bitmap, &binfo)); - - /* - * Iteratively unset bits starting at the end, and - * verify that bitmap_sfu() reaches the unset bits. - */ - for (j = i - 1; j >= 0; j--) { - bitmap_unset(bitmap, &binfo, j); - assert(bitmap_sfu(bitmap, &binfo) == j); - bitmap_unset(bitmap, &binfo, j); - } - assert(bitmap_get(bitmap, &binfo, 0) == false); - - /* - * Iteratively set bits starting at the beginning, and - * verify that bitmap_sfu() looks past them. - */ - for (j = 1; j < i; j++) { - bitmap_set(bitmap, &binfo, j - 1); - assert(bitmap_sfu(bitmap, &binfo) == j); - bitmap_unset(bitmap, &binfo, j); - } - assert(bitmap_sfu(bitmap, &binfo) == i - 1); - assert(bitmap_full(bitmap, &binfo)); - } - } -} - -int -main(void) -{ - fprintf(stderr, "Test begin\n"); - - test_bitmap_size(); - test_bitmap_init(); - test_bitmap_set(); - test_bitmap_unset(); - test_bitmap_sfu(); - - fprintf(stderr, "Test end\n"); - return (0); -} diff --git a/jemalloc/test/bitmap.exp b/jemalloc/test/bitmap.exp deleted file mode 100644 index 369a88d..0000000 --- a/jemalloc/test/bitmap.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/jemalloc/test/jemalloc_test.h.in b/jemalloc/test/jemalloc_test.h.in deleted file mode 100644 index 0c48895..0000000 --- a/jemalloc/test/jemalloc_test.h.in +++ /dev/null @@ -1,6 +0,0 @@ -/* - * This header should be included by tests, rather than directly including - * jemalloc/jemalloc.h, because --with-install-suffix may cause the header to - * have a different name. - */ -#include "jemalloc/jemalloc@install_suffix@.h" diff --git a/jemalloc/test/mremap.c b/jemalloc/test/mremap.c deleted file mode 100644 index 146c66f..0000000 --- a/jemalloc/test/mremap.c +++ /dev/null @@ -1,67 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -int -main(void) -{ - int ret, err; - size_t sz, lg_chunk, chunksize, i; - char *p, *q; - - fprintf(stderr, "Test begin\n"); - - sz = sizeof(lg_chunk); - if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL, - 0))) { - assert(err != ENOENT); - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - ret = 1; - goto RETURN; - } - chunksize = ((size_t)1U) << lg_chunk; - - p = (char *)malloc(chunksize); - if (p == NULL) { - fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p); - ret = 1; - goto RETURN; - } - memset(p, 'a', chunksize); - - q = (char *)realloc(p, chunksize * 2); - if (q == NULL) { - fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2, - q); - ret = 1; - goto RETURN; - } - for (i = 0; i < chunksize; i++) { - assert(q[i] == 'a'); - } - - p = q; - - q = (char *)realloc(p, chunksize); - if (q == NULL) { - fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q); - ret = 1; - goto RETURN; - } - for (i = 0; i < chunksize; i++) { - assert(q[i] == 'a'); - } - - free(q); - - ret = 0; -RETURN: - fprintf(stderr, "Test end\n"); - return (ret); -} diff --git a/jemalloc/test/mremap.exp b/jemalloc/test/mremap.exp deleted file mode 100644 index 369a88d..0000000 --- a/jemalloc/test/mremap.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/jemalloc/test/posix_memalign.c b/jemalloc/test/posix_memalign.c deleted file mode 100644 index 3e306c0..0000000 --- a/jemalloc/test/posix_memalign.c +++ /dev/null @@ -1,121 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define CHUNK 0x400000 -/* #define MAXALIGN ((size_t)0x80000000000LLU) */ -#define MAXALIGN ((size_t)0x2000000LLU) -#define NITER 4 - -int -main(void) -{ - size_t alignment, size, total; - unsigned i; - int err; - void *p, *ps[NITER]; - - fprintf(stderr, "Test begin\n"); - - /* Test error conditions. */ - for (alignment = 0; alignment < sizeof(void *); alignment++) { - err = JEMALLOC_P(posix_memalign)(&p, alignment, 1); - if (err != EINVAL) { - fprintf(stderr, - "Expected error for invalid alignment %zu\n", - alignment); - } - } - - for (alignment = sizeof(size_t); alignment < MAXALIGN; - alignment <<= 1) { - err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1); - if (err == 0) { - fprintf(stderr, - "Expected error for invalid alignment %zu\n", - alignment + 1); - } - } - -#if LG_SIZEOF_PTR == 3 - alignment = 0x8000000000000000LLU; - size = 0x8000000000000000LLU; -#else - alignment = 0x80000000LU; - size = 0x80000000LU; -#endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); - if (err == 0) { - fprintf(stderr, - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } - -#if LG_SIZEOF_PTR == 3 - alignment = 0x4000000000000000LLU; - size = 0x8400000000000001LLU; -#else - alignment = 0x40000000LU; - size = 0x84000001LU; -#endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); - if (err == 0) { - fprintf(stderr, - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } - - alignment = 0x10LLU; -#if LG_SIZEOF_PTR == 3 - size = 0xfffffffffffffff0LLU; -#else - size = 0xfffffff0LU; -#endif - err = JEMALLOC_P(posix_memalign)(&p, alignment, size); - if (err == 0) { - fprintf(stderr, - "Expected error for posix_memalign(&p, %zu, %zu)\n", - alignment, size); - } - - for (i = 0; i < NITER; i++) - ps[i] = NULL; - - for (alignment = 8; - alignment <= MAXALIGN; - alignment <<= 1) { - total = 0; - fprintf(stderr, "Alignment: %zu\n", alignment); - for (size = 1; - size < 3 * alignment && size < (1U << 31); - size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { - for (i = 0; i < NITER; i++) { - err = JEMALLOC_P(posix_memalign)(&ps[i], - alignment, size); - if (err) { - fprintf(stderr, - "Error for size %zu (0x%zx): %s\n", - size, size, strerror(err)); - exit(1); - } - total += JEMALLOC_P(malloc_usable_size)(ps[i]); - if (total >= (MAXALIGN << 1)) - break; - } - for (i = 0; i < NITER; i++) { - if (ps[i] != NULL) { - JEMALLOC_P(free)(ps[i]); - ps[i] = NULL; - } - } - } - } - - fprintf(stderr, "Test end\n"); - return (0); -} diff --git a/jemalloc/test/posix_memalign.exp b/jemalloc/test/posix_memalign.exp deleted file mode 100644 index b5061c7..0000000 --- a/jemalloc/test/posix_memalign.exp +++ /dev/null @@ -1,25 +0,0 @@ -Test begin -Alignment: 8 -Alignment: 16 -Alignment: 32 -Alignment: 64 -Alignment: 128 -Alignment: 256 -Alignment: 512 -Alignment: 1024 -Alignment: 2048 -Alignment: 4096 -Alignment: 8192 -Alignment: 16384 -Alignment: 32768 -Alignment: 65536 -Alignment: 131072 -Alignment: 262144 -Alignment: 524288 -Alignment: 1048576 -Alignment: 2097152 -Alignment: 4194304 -Alignment: 8388608 -Alignment: 16777216 -Alignment: 33554432 -Test end diff --git a/jemalloc/test/rallocm.c b/jemalloc/test/rallocm.c deleted file mode 100644 index a8cadeb..0000000 --- a/jemalloc/test/rallocm.c +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -int -main(void) -{ - void *p, *q; - size_t sz, tsz; - int r; - - fprintf(stderr, "Test begin\n"); - - r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); - if (r != ALLOCM_SUCCESS) { - fprintf(stderr, "Unexpected allocm() error\n"); - abort(); - } - - q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); - if (q != p) - fprintf(stderr, "Unexpected object move\n"); - if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); - if (q != p) - fprintf(stderr, "Unexpected object move\n"); - if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_ERR_NOT_MOVED) - fprintf(stderr, "Unexpected rallocm() result\n"); - if (q != p) - fprintf(stderr, "Unexpected object move\n"); - if (tsz != sz) { - fprintf(stderr, "Unexpected size change: %zu --> %zu\n", - sz, tsz); - } - - q = p; - r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0); - if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); - if (q == p) - fprintf(stderr, "Expected object move\n"); - if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0); - if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); - if (q == p) - fprintf(stderr, "Expected object move\n"); - if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0); - if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); - if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", - sz, tsz); - } - p = q; - sz = tsz; - - r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); - if (q != p) - fprintf(stderr, "Unexpected object move\n"); - if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", - sz, tsz); - } - sz = tsz; - - r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE); - if (r != ALLOCM_SUCCESS) - fprintf(stderr, "Unexpected rallocm() error\n"); - if (q != p) - fprintf(stderr, "Unexpected object move\n"); - if (tsz == sz) { - fprintf(stderr, "Expected size change: %zu --> %zu\n", - sz, tsz); - } - sz = tsz; - - JEMALLOC_P(dallocm)(p, 0); - - fprintf(stderr, "Test end\n"); - return (0); -} diff --git a/jemalloc/test/rallocm.exp b/jemalloc/test/rallocm.exp deleted file mode 100644 index 369a88d..0000000 --- a/jemalloc/test/rallocm.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/jemalloc/test/thread_arena.c b/jemalloc/test/thread_arena.c deleted file mode 100644 index ef8d681..0000000 --- a/jemalloc/test/thread_arena.c +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_MANGLE -#include "jemalloc_test.h" - -#define NTHREADS 10 - -void * -thread_start(void *arg) -{ - unsigned main_arena_ind = *(unsigned *)arg; - void *p; - unsigned arena_ind; - size_t size; - int err; - - p = JEMALLOC_P(malloc)(1); - if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); - return (void *)1; - } - - size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, - &main_arena_ind, sizeof(main_arena_ind)))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - return (void *)1; - } - - size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, - 0))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - return (void *)1; - } - assert(arena_ind == main_arena_ind); - - return (NULL); -} - -int -main(void) -{ - int ret = 0; - void *p; - unsigned arena_ind; - size_t size; - int err; - pthread_t threads[NTHREADS]; - unsigned i; - - fprintf(stderr, "Test begin\n"); - - p = JEMALLOC_P(malloc)(1); - if (p == NULL) { - fprintf(stderr, "%s(): Error in malloc()\n", __func__); - ret = 1; - goto RETURN; - } - - size = sizeof(arena_ind); - if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, - 0))) { - fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, - strerror(err)); - ret = 1; - goto RETURN; - } - - for (i = 0; i < NTHREADS; i++) { - if (pthread_create(&threads[i], NULL, thread_start, - (void *)&arena_ind) != 0) { - fprintf(stderr, "%s(): Error in pthread_create()\n", - __func__); - ret = 1; - goto RETURN; - } - } - - for (i = 0; i < NTHREADS; i++) - pthread_join(threads[i], (void *)&ret); - -RETURN: - fprintf(stderr, "Test end\n"); - return (ret); -} diff --git a/jemalloc/test/thread_arena.exp b/jemalloc/test/thread_arena.exp deleted file mode 100644 index 369a88d..0000000 --- a/jemalloc/test/thread_arena.exp +++ /dev/null @@ -1,2 +0,0 @@ -Test begin -Test end diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 0000000..9aaf47f --- /dev/null +++ b/src/arena.c @@ -0,0 +1,2703 @@ +#define JEMALLOC_ARENA_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; +size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; +ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT; +uint8_t const *small_size2bin; +arena_bin_info_t *arena_bin_info; + +/* Various bin-related settings. */ +unsigned nqbins; +unsigned ncbins; +unsigned nsbins; +unsigned nbins; +size_t qspace_max; +size_t cspace_min; +size_t cspace_max; +size_t sspace_min; +size_t sspace_max; + +size_t lg_mspace; +size_t mspace_mask; + +/* + * const_small_size2bin is a static constant lookup table that in the common + * case can be used as-is for small_size2bin. + */ +#if (LG_TINY_MIN == 2) +#define S2B_4(i) i, +#define S2B_8(i) S2B_4(i) S2B_4(i) +#elif (LG_TINY_MIN == 3) +#define S2B_8(i) i, +#else +# error "Unsupported LG_TINY_MIN" +#endif +#define S2B_16(i) S2B_8(i) S2B_8(i) +#define S2B_32(i) S2B_16(i) S2B_16(i) +#define S2B_64(i) S2B_32(i) S2B_32(i) +#define S2B_128(i) S2B_64(i) S2B_64(i) +#define S2B_256(i) S2B_128(i) S2B_128(i) +/* + * The number of elements in const_small_size2bin is dependent on the + * definition for SUBPAGE. + */ +static JEMALLOC_ATTR(aligned(CACHELINE)) + const uint8_t const_small_size2bin[] = { +#if (LG_QUANTUM == 4) +/* 16-byte quantum **********************/ +# ifdef JEMALLOC_TINY +# if (LG_TINY_MIN == 2) + S2B_4(0) /* 4 */ + S2B_4(1) /* 8 */ + S2B_8(2) /* 16 */ +# define S2B_QMIN 2 +# elif (LG_TINY_MIN == 3) + S2B_8(0) /* 8 */ + S2B_8(1) /* 16 */ +# define S2B_QMIN 1 +# else +# error "Unsupported LG_TINY_MIN" +# endif +# else + S2B_16(0) /* 16 */ +# define S2B_QMIN 0 +# endif + S2B_16(S2B_QMIN + 1) /* 32 */ + S2B_16(S2B_QMIN + 2) /* 48 */ + S2B_16(S2B_QMIN + 3) /* 64 */ + S2B_16(S2B_QMIN + 4) /* 80 */ + S2B_16(S2B_QMIN + 5) /* 96 */ + S2B_16(S2B_QMIN + 6) /* 112 */ + S2B_16(S2B_QMIN + 7) /* 128 */ +# define S2B_CMIN (S2B_QMIN + 8) +#else +/* 8-byte quantum ***********************/ +# ifdef JEMALLOC_TINY +# if (LG_TINY_MIN == 2) + S2B_4(0) /* 4 */ + S2B_4(1) /* 8 */ +# define S2B_QMIN 1 +# else +# error "Unsupported LG_TINY_MIN" +# endif +# else + S2B_8(0) /* 8 */ +# define S2B_QMIN 0 +# endif + S2B_8(S2B_QMIN + 1) /* 16 */ + S2B_8(S2B_QMIN + 2) /* 24 */ + S2B_8(S2B_QMIN + 3) /* 32 */ + S2B_8(S2B_QMIN + 4) /* 40 */ + S2B_8(S2B_QMIN + 5) /* 48 */ + S2B_8(S2B_QMIN + 6) /* 56 */ + S2B_8(S2B_QMIN + 7) /* 64 */ + S2B_8(S2B_QMIN + 8) /* 72 */ + S2B_8(S2B_QMIN + 9) /* 80 */ + S2B_8(S2B_QMIN + 10) /* 88 */ + S2B_8(S2B_QMIN + 11) /* 96 */ + S2B_8(S2B_QMIN + 12) /* 104 */ + S2B_8(S2B_QMIN + 13) /* 112 */ + S2B_8(S2B_QMIN + 14) /* 120 */ + S2B_8(S2B_QMIN + 15) /* 128 */ +# define S2B_CMIN (S2B_QMIN + 16) +#endif +/****************************************/ + S2B_64(S2B_CMIN + 0) /* 192 */ + S2B_64(S2B_CMIN + 1) /* 256 */ + S2B_64(S2B_CMIN + 2) /* 320 */ + S2B_64(S2B_CMIN + 3) /* 384 */ + S2B_64(S2B_CMIN + 4) /* 448 */ + S2B_64(S2B_CMIN + 5) /* 512 */ +# define S2B_SMIN (S2B_CMIN + 6) + S2B_256(S2B_SMIN + 0) /* 768 */ + S2B_256(S2B_SMIN + 1) /* 1024 */ + S2B_256(S2B_SMIN + 2) /* 1280 */ + S2B_256(S2B_SMIN + 3) /* 1536 */ + S2B_256(S2B_SMIN + 4) /* 1792 */ + S2B_256(S2B_SMIN + 5) /* 2048 */ + S2B_256(S2B_SMIN + 6) /* 2304 */ + S2B_256(S2B_SMIN + 7) /* 2560 */ + S2B_256(S2B_SMIN + 8) /* 2816 */ + S2B_256(S2B_SMIN + 9) /* 3072 */ + S2B_256(S2B_SMIN + 10) /* 3328 */ + S2B_256(S2B_SMIN + 11) /* 3584 */ + S2B_256(S2B_SMIN + 12) /* 3840 */ +#if (STATIC_PAGE_SHIFT == 13) + S2B_256(S2B_SMIN + 13) /* 4096 */ + S2B_256(S2B_SMIN + 14) /* 4352 */ + S2B_256(S2B_SMIN + 15) /* 4608 */ + S2B_256(S2B_SMIN + 16) /* 4864 */ + S2B_256(S2B_SMIN + 17) /* 5120 */ + S2B_256(S2B_SMIN + 18) /* 5376 */ + S2B_256(S2B_SMIN + 19) /* 5632 */ + S2B_256(S2B_SMIN + 20) /* 5888 */ + S2B_256(S2B_SMIN + 21) /* 6144 */ + S2B_256(S2B_SMIN + 22) /* 6400 */ + S2B_256(S2B_SMIN + 23) /* 6656 */ + S2B_256(S2B_SMIN + 24) /* 6912 */ + S2B_256(S2B_SMIN + 25) /* 7168 */ + S2B_256(S2B_SMIN + 26) /* 7424 */ + S2B_256(S2B_SMIN + 27) /* 7680 */ + S2B_256(S2B_SMIN + 28) /* 7936 */ +#endif +}; +#undef S2B_1 +#undef S2B_2 +#undef S2B_4 +#undef S2B_8 +#undef S2B_16 +#undef S2B_32 +#undef S2B_64 +#undef S2B_128 +#undef S2B_256 +#undef S2B_QMIN +#undef S2B_CMIN +#undef S2B_SMIN + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size, + bool large, bool zero); +static arena_chunk_t *arena_chunk_alloc(arena_t *arena); +static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk); +static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large, + bool zero); +static void arena_purge(arena_t *arena, bool all); +static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty); +static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, size_t oldsize, size_t newsize); +static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, size_t oldsize, size_t newsize, bool dirty); +static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin); +static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin); +static void arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin); +static void arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); +static void arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, + arena_run_t *run, arena_bin_t *bin); +static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t oldsize, size_t size); +static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, + void *ptr, size_t oldsize, size_t size, size_t extra, bool zero); +static bool arena_ralloc_large(void *ptr, size_t oldsize, size_t size, + size_t extra, bool zero); +static bool small_size2bin_init(void); +#ifdef JEMALLOC_DEBUG +static void small_size2bin_validate(void); +#endif +static bool small_size2bin_init_hard(void); +static size_t bin_info_run_size_calc(arena_bin_info_t *bin_info, + size_t min_run_size); +static bool bin_info_init(void); + +/******************************************************************************/ + +static inline int +arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +{ + uintptr_t a_mapelm = (uintptr_t)a; + uintptr_t b_mapelm = (uintptr_t)b; + + assert(a != NULL); + assert(b != NULL); + + return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm)); +} + +/* Generate red-black tree functions. */ +rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t, + arena_chunk_map_t, u.rb_link, arena_run_comp) + +static inline int +arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b) +{ + int ret; + size_t a_size = a->bits & ~PAGE_MASK; + size_t b_size = b->bits & ~PAGE_MASK; + + assert((a->bits & CHUNK_MAP_KEY) == CHUNK_MAP_KEY || (a->bits & + CHUNK_MAP_DIRTY) == (b->bits & CHUNK_MAP_DIRTY)); + + ret = (a_size > b_size) - (a_size < b_size); + if (ret == 0) { + uintptr_t a_mapelm, b_mapelm; + + if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY) + a_mapelm = (uintptr_t)a; + else { + /* + * Treat keys as though they are lower than anything + * else. + */ + a_mapelm = 0; + } + b_mapelm = (uintptr_t)b; + + ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm); + } + + return (ret); +} + +/* Generate red-black tree functions. */ +rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t, + arena_chunk_map_t, u.rb_link, arena_avail_comp) + +static inline void * +arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info) +{ + void *ret; + unsigned regind; + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + dassert(run->magic == ARENA_RUN_MAGIC); + assert(run->nfree > 0); + assert(bitmap_full(bitmap, &bin_info->bitmap_info) == false); + + regind = bitmap_sfu(bitmap, &bin_info->bitmap_info); + ret = (void *)((uintptr_t)run + (uintptr_t)bin_info->reg0_offset + + (uintptr_t)(bin_info->reg_size * regind)); + run->nfree--; + if (regind == run->nextind) + run->nextind++; + assert(regind < run->nextind); + return (ret); +} + +static inline void +arena_run_reg_dalloc(arena_run_t *run, void *ptr) +{ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + unsigned regind = arena_run_regind(run, bin_info, ptr); + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + assert(run->nfree < bin_info->nregs); + /* Freeing an interior pointer can cause assertion failure. */ + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % (uintptr_t)bin_info->reg_size + == 0); + assert((uintptr_t)ptr >= (uintptr_t)run + + (uintptr_t)bin_info->reg0_offset); + /* Freeing an unallocated pointer can cause assertion failure. */ + assert(bitmap_get(bitmap, &bin_info->bitmap_info, regind)); + + bitmap_unset(bitmap, &bin_info->bitmap_info, regind); + run->nfree++; +} + +#ifdef JEMALLOC_DEBUG +static inline void +arena_chunk_validate_zeroed(arena_chunk_t *chunk, size_t run_ind) +{ + size_t i; + size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << PAGE_SHIFT)); + + for (i = 0; i < PAGE_SIZE / sizeof(size_t); i++) + assert(p[i] == 0); +} +#endif + +static void +arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large, + bool zero) +{ + arena_chunk_t *chunk; + size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i; + size_t flag_dirty; + arena_avail_tree_t *runs_avail; +#ifdef JEMALLOC_STATS + size_t cactive_diff; +#endif + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + old_ndirty = chunk->ndirty; + run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk) + >> PAGE_SHIFT); + flag_dirty = chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY; + runs_avail = (flag_dirty != 0) ? &arena->runs_avail_dirty : + &arena->runs_avail_clean; + total_pages = (chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) >> + PAGE_SHIFT; + assert((chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty); + need_pages = (size >> PAGE_SHIFT); + assert(need_pages > 0); + assert(need_pages <= total_pages); + rem_pages = total_pages - need_pages; + + arena_avail_tree_remove(runs_avail, &chunk->map[run_ind-map_bias]); +#ifdef JEMALLOC_STATS + /* Update stats_cactive if nactive is crossing a chunk multiple. */ + cactive_diff = CHUNK_CEILING((arena->nactive + need_pages) << + PAGE_SHIFT) - CHUNK_CEILING(arena->nactive << PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); +#endif + arena->nactive += need_pages; + + /* Keep track of trailing unused pages for later use. */ + if (rem_pages > 0) { + if (flag_dirty != 0) { + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << PAGE_SHIFT) | CHUNK_MAP_DIRTY; + } else { + chunk->map[run_ind+need_pages-map_bias].bits = + (rem_pages << PAGE_SHIFT) | + (chunk->map[run_ind+need_pages-map_bias].bits & + CHUNK_MAP_UNZEROED); + chunk->map[run_ind+total_pages-1-map_bias].bits = + (rem_pages << PAGE_SHIFT) | + (chunk->map[run_ind+total_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); + } + arena_avail_tree_insert(runs_avail, + &chunk->map[run_ind+need_pages-map_bias]); + } + + /* Update dirty page accounting. */ + if (flag_dirty != 0) { + chunk->ndirty -= need_pages; + arena->ndirty -= need_pages; + } + + /* + * Update the page map separately for large vs. small runs, since it is + * possible to avoid iteration for large mallocs. + */ + if (large) { + if (zero) { + if (flag_dirty == 0) { + /* + * The run is clean, so some pages may be + * zeroed (i.e. never before touched). + */ + for (i = 0; i < need_pages; i++) { + if ((chunk->map[run_ind+i-map_bias].bits + & CHUNK_MAP_UNZEROED) != 0) { + memset((void *)((uintptr_t) + chunk + ((run_ind+i) << + PAGE_SHIFT)), 0, + PAGE_SIZE); + } +#ifdef JEMALLOC_DEBUG + else { + arena_chunk_validate_zeroed( + chunk, run_ind+i); + } +#endif + } + } else { + /* + * The run is dirty, so all pages must be + * zeroed. + */ + memset((void *)((uintptr_t)chunk + (run_ind << + PAGE_SHIFT)), 0, (need_pages << + PAGE_SHIFT)); + } + } + + /* + * Set the last element first, in case the run only contains one + * page (i.e. both statements set the same element). + */ + chunk->map[run_ind+need_pages-1-map_bias].bits = + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED | flag_dirty; + chunk->map[run_ind-map_bias].bits = size | flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + } else { + assert(zero == false); + /* + * Propagate the dirty and unzeroed flags to the allocated + * small run, so that arena_dalloc_bin_run() has the ability to + * conditionally trim clean pages. + */ + chunk->map[run_ind-map_bias].bits = + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_ALLOCATED | flag_dirty; +#ifdef JEMALLOC_DEBUG + /* + * The first page will always be dirtied during small run + * initialization, so a validation failure here would not + * actually cause an observable failure. + */ + if (flag_dirty == 0 && + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED) + == 0) + arena_chunk_validate_zeroed(chunk, run_ind); +#endif + for (i = 1; i < need_pages - 1; i++) { + chunk->map[run_ind+i-map_bias].bits = (i << PAGE_SHIFT) + | (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED; +#ifdef JEMALLOC_DEBUG + if (flag_dirty == 0 && + (chunk->map[run_ind+i-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) + arena_chunk_validate_zeroed(chunk, run_ind+i); +#endif + } + chunk->map[run_ind+need_pages-1-map_bias].bits = ((need_pages + - 1) << PAGE_SHIFT) | + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_ALLOCATED | flag_dirty; +#ifdef JEMALLOC_DEBUG + if (flag_dirty == 0 && + (chunk->map[run_ind+need_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) == 0) { + arena_chunk_validate_zeroed(chunk, + run_ind+need_pages-1); + } +#endif + } +} + +static arena_chunk_t * +arena_chunk_alloc(arena_t *arena) +{ + arena_chunk_t *chunk; + size_t i; + + if (arena->spare != NULL) { + arena_avail_tree_t *runs_avail; + + chunk = arena->spare; + arena->spare = NULL; + + /* Insert the run into the appropriate runs_avail_* tree. */ + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + runs_avail = &arena->runs_avail_clean; + else + runs_avail = &arena->runs_avail_dirty; + assert((chunk->map[0].bits & ~PAGE_MASK) == arena_maxclass); + assert((chunk->map[chunk_npages-1-map_bias].bits & ~PAGE_MASK) + == arena_maxclass); + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) == + (chunk->map[chunk_npages-1-map_bias].bits & + CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[0]); + } else { + bool zero; + size_t unzeroed; + + zero = false; + malloc_mutex_unlock(&arena->lock); + chunk = (arena_chunk_t *)chunk_alloc(chunksize, false, &zero); + malloc_mutex_lock(&arena->lock); + if (chunk == NULL) + return (NULL); +#ifdef JEMALLOC_STATS + arena->stats.mapped += chunksize; +#endif + + chunk->arena = arena; + ql_elm_new(chunk, link_dirty); + chunk->dirtied = false; + + /* + * Claim that no pages are in use, since the header is merely + * overhead. + */ + chunk->ndirty = 0; + + /* + * Initialize the map to contain one maximal free untouched run. + * Mark the pages as zeroed iff chunk_alloc() returned a zeroed + * chunk. + */ + unzeroed = zero ? 0 : CHUNK_MAP_UNZEROED; + chunk->map[0].bits = arena_maxclass | unzeroed; + /* + * There is no need to initialize the internal page map entries + * unless the chunk is not zeroed. + */ + if (zero == false) { + for (i = map_bias+1; i < chunk_npages-1; i++) + chunk->map[i-map_bias].bits = unzeroed; + } +#ifdef JEMALLOC_DEBUG + else { + for (i = map_bias+1; i < chunk_npages-1; i++) + assert(chunk->map[i-map_bias].bits == unzeroed); + } +#endif + chunk->map[chunk_npages-1-map_bias].bits = arena_maxclass | + unzeroed; + + /* Insert the run into the runs_avail_clean tree. */ + arena_avail_tree_insert(&arena->runs_avail_clean, + &chunk->map[0]); + } + + return (chunk); +} + +static void +arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk) +{ + arena_avail_tree_t *runs_avail; + + /* + * Remove run from the appropriate runs_avail_* tree, so that the arena + * does not use it. + */ + if ((chunk->map[0].bits & CHUNK_MAP_DIRTY) == 0) + runs_avail = &arena->runs_avail_clean; + else + runs_avail = &arena->runs_avail_dirty; + arena_avail_tree_remove(runs_avail, &chunk->map[0]); + + if (arena->spare != NULL) { + arena_chunk_t *spare = arena->spare; + + arena->spare = chunk; + if (spare->dirtied) { + ql_remove(&chunk->arena->chunks_dirty, spare, + link_dirty); + arena->ndirty -= spare->ndirty; + } + malloc_mutex_unlock(&arena->lock); + chunk_dealloc((void *)spare, chunksize); + malloc_mutex_lock(&arena->lock); +#ifdef JEMALLOC_STATS + arena->stats.mapped -= chunksize; +#endif + } else + arena->spare = chunk; +} + +static arena_run_t * +arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero) +{ + arena_chunk_t *chunk; + arena_run_t *run; + arena_chunk_map_t *mapelm, key; + + assert(size <= arena_maxclass); + assert((size & PAGE_MASK) == 0); + + /* Search the arena's chunks for the lowest best fit. */ + key.bits = size | CHUNK_MAP_KEY; + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + PAGE_SHIFT)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + PAGE_SHIFT)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + + /* + * No usable runs. Create a new chunk from which to allocate the run. + */ + chunk = arena_chunk_alloc(arena); + if (chunk != NULL) { + run = (arena_run_t *)((uintptr_t)chunk + (map_bias << + PAGE_SHIFT)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + + /* + * arena_chunk_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped arena->lock in + * arena_chunk_alloc(), so search one more time. + */ + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_dirty, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + PAGE_SHIFT)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + mapelm = arena_avail_tree_nsearch(&arena->runs_avail_clean, &key); + if (mapelm != NULL) { + arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm); + size_t pageind = (((uintptr_t)mapelm - + (uintptr_t)run_chunk->map) / sizeof(arena_chunk_map_t)) + + map_bias; + + run = (arena_run_t *)((uintptr_t)run_chunk + (pageind << + PAGE_SHIFT)); + arena_run_split(arena, run, size, large, zero); + return (run); + } + + return (NULL); +} + +static inline void +arena_maybe_purge(arena_t *arena) +{ + + /* Enforce opt_lg_dirty_mult. */ + if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory && + (arena->ndirty - arena->npurgatory) > chunk_npages && + (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - + arena->npurgatory)) + arena_purge(arena, false); +} + +static inline void +arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk) +{ + ql_head(arena_chunk_map_t) mapelms; + arena_chunk_map_t *mapelm; + size_t pageind, flag_unzeroed; +#ifdef JEMALLOC_DEBUG + size_t ndirty; +#endif +#ifdef JEMALLOC_STATS + size_t nmadvise; +#endif + + ql_new(&mapelms); + + flag_unzeroed = +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + /* + * madvise(..., MADV_DONTNEED) results in zero-filled pages for anonymous + * mappings, but not for file-backed mappings. + */ +# ifdef JEMALLOC_SWAP + swap_enabled ? CHUNK_MAP_UNZEROED : +# endif + 0; +#else + CHUNK_MAP_UNZEROED; +#endif + + /* + * If chunk is the spare, temporarily re-allocate it, 1) so that its + * run is reinserted into runs_avail_dirty, and 2) so that it cannot be + * completely discarded by another thread while arena->lock is dropped + * by this thread. Note that the arena_run_dalloc() call will + * implicitly deallocate the chunk, so no explicit action is required + * in this function to deallocate the chunk. + * + * Note that once a chunk contains dirty pages, it cannot again contain + * a single run unless 1) it is a dirty run, or 2) this function purges + * dirty pages and causes the transition to a single clean run. Thus + * (chunk == arena->spare) is possible, but it is not possible for + * this function to be called on the spare unless it contains a dirty + * run. + */ + if (chunk == arena->spare) { + assert((chunk->map[0].bits & CHUNK_MAP_DIRTY) != 0); + arena_chunk_alloc(arena); + } + + /* Temporarily allocate all free dirty runs within chunk. */ + for (pageind = map_bias; pageind < chunk_npages;) { + mapelm = &chunk->map[pageind-map_bias]; + if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) { + size_t npages; + + npages = mapelm->bits >> PAGE_SHIFT; + assert(pageind + npages <= chunk_npages); + if (mapelm->bits & CHUNK_MAP_DIRTY) { + size_t i; +#ifdef JEMALLOC_STATS + size_t cactive_diff; +#endif + + arena_avail_tree_remove( + &arena->runs_avail_dirty, mapelm); + + mapelm->bits = (npages << PAGE_SHIFT) | + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; + /* + * Update internal elements in the page map, so + * that CHUNK_MAP_UNZEROED is properly set. + */ + for (i = 1; i < npages - 1; i++) { + chunk->map[pageind+i-map_bias].bits = + flag_unzeroed; + } + if (npages > 1) { + chunk->map[ + pageind+npages-1-map_bias].bits = + flag_unzeroed | CHUNK_MAP_LARGE | + CHUNK_MAP_ALLOCATED; + } + +#ifdef JEMALLOC_STATS + /* + * Update stats_cactive if nactive is crossing a + * chunk multiple. + */ + cactive_diff = CHUNK_CEILING((arena->nactive + + npages) << PAGE_SHIFT) - + CHUNK_CEILING(arena->nactive << PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_add(cactive_diff); +#endif + arena->nactive += npages; + /* Append to list for later processing. */ + ql_elm_new(mapelm, u.ql_link); + ql_tail_insert(&mapelms, mapelm, u.ql_link); + } + + pageind += npages; + } else { + /* Skip allocated run. */ + if (mapelm->bits & CHUNK_MAP_LARGE) + pageind += mapelm->bits >> PAGE_SHIFT; + else { + arena_run_t *run = (arena_run_t *)((uintptr_t) + chunk + (uintptr_t)(pageind << PAGE_SHIFT)); + + assert((mapelm->bits >> PAGE_SHIFT) == 0); + dassert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(arena, + run->bin); + arena_bin_info_t *bin_info = + &arena_bin_info[binind]; + pageind += bin_info->run_size >> PAGE_SHIFT; + } + } + } + assert(pageind == chunk_npages); + +#ifdef JEMALLOC_DEBUG + ndirty = chunk->ndirty; +#endif +#ifdef JEMALLOC_STATS + arena->stats.purged += chunk->ndirty; +#endif + arena->ndirty -= chunk->ndirty; + chunk->ndirty = 0; + ql_remove(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = false; + + malloc_mutex_unlock(&arena->lock); +#ifdef JEMALLOC_STATS + nmadvise = 0; +#endif + ql_foreach(mapelm, &mapelms, u.ql_link) { + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + size_t npages = mapelm->bits >> PAGE_SHIFT; + + assert(pageind + npages <= chunk_npages); +#ifdef JEMALLOC_DEBUG + assert(ndirty >= npages); + ndirty -= npages; +#endif + +#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED + madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MADV_DONTNEED); +#elif defined(JEMALLOC_PURGE_MADVISE_FREE) + madvise((void *)((uintptr_t)chunk + (pageind << PAGE_SHIFT)), + (npages << PAGE_SHIFT), MADV_FREE); +#else +# error "No method defined for purging unused dirty pages." +#endif + +#ifdef JEMALLOC_STATS + nmadvise++; +#endif + } +#ifdef JEMALLOC_DEBUG + assert(ndirty == 0); +#endif + malloc_mutex_lock(&arena->lock); +#ifdef JEMALLOC_STATS + arena->stats.nmadvise += nmadvise; +#endif + + /* Deallocate runs. */ + for (mapelm = ql_first(&mapelms); mapelm != NULL; + mapelm = ql_first(&mapelms)) { + size_t pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t)) + map_bias; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)(pageind << PAGE_SHIFT)); + + ql_remove(&mapelms, mapelm, u.ql_link); + arena_run_dalloc(arena, run, false); + } +} + +static void +arena_purge(arena_t *arena, bool all) +{ + arena_chunk_t *chunk; + size_t npurgatory; +#ifdef JEMALLOC_DEBUG + size_t ndirty = 0; + + ql_foreach(chunk, &arena->chunks_dirty, link_dirty) { + assert(chunk->dirtied); + ndirty += chunk->ndirty; + } + assert(ndirty == arena->ndirty); +#endif + assert(arena->ndirty > arena->npurgatory || all); + assert(arena->ndirty > chunk_npages || all); + assert((arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty - + npurgatory) || all); + +#ifdef JEMALLOC_STATS + arena->stats.npurge++; +#endif + + /* + * Compute the minimum number of pages that this thread should try to + * purge, and add the result to arena->npurgatory. This will keep + * multiple threads from racing to reduce ndirty below the threshold. + */ + npurgatory = arena->ndirty - arena->npurgatory; + if (all == false) { + assert(npurgatory >= arena->nactive >> opt_lg_dirty_mult); + npurgatory -= arena->nactive >> opt_lg_dirty_mult; + } + arena->npurgatory += npurgatory; + + while (npurgatory > 0) { + /* Get next chunk with dirty pages. */ + chunk = ql_first(&arena->chunks_dirty); + if (chunk == NULL) { + /* + * This thread was unable to purge as many pages as + * originally intended, due to races with other threads + * that either did some of the purging work, or re-used + * dirty pages. + */ + arena->npurgatory -= npurgatory; + return; + } + while (chunk->ndirty == 0) { + ql_remove(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = false; + chunk = ql_first(&arena->chunks_dirty); + if (chunk == NULL) { + /* Same logic as for above. */ + arena->npurgatory -= npurgatory; + return; + } + } + + if (chunk->ndirty > npurgatory) { + /* + * This thread will, at a minimum, purge all the dirty + * pages in chunk, so set npurgatory to reflect this + * thread's commitment to purge the pages. This tends + * to reduce the chances of the following scenario: + * + * 1) This thread sets arena->npurgatory such that + * (arena->ndirty - arena->npurgatory) is at the + * threshold. + * 2) This thread drops arena->lock. + * 3) Another thread causes one or more pages to be + * dirtied, and immediately determines that it must + * purge dirty pages. + * + * If this scenario *does* play out, that's okay, + * because all of the purging work being done really + * needs to happen. + */ + arena->npurgatory += chunk->ndirty - npurgatory; + npurgatory = chunk->ndirty; + } + + arena->npurgatory -= chunk->ndirty; + npurgatory -= chunk->ndirty; + arena_chunk_purge(arena, chunk); + } +} + +void +arena_purge_all(arena_t *arena) +{ + + malloc_mutex_lock(&arena->lock); + arena_purge(arena, true); + malloc_mutex_unlock(&arena->lock); +} + +static void +arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty) +{ + arena_chunk_t *chunk; + size_t size, run_ind, run_pages, flag_dirty; + arena_avail_tree_t *runs_avail; +#ifdef JEMALLOC_STATS + size_t cactive_diff; +#endif + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) + >> PAGE_SHIFT); + assert(run_ind >= map_bias); + assert(run_ind < chunk_npages); + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_LARGE) != 0) { + size = chunk->map[run_ind-map_bias].bits & ~PAGE_MASK; + assert(size == PAGE_SIZE || + (chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[run_ind+(size>>PAGE_SHIFT)-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + } else { + size_t binind = arena_bin_index(arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + size = bin_info->run_size; + } + run_pages = (size >> PAGE_SHIFT); +#ifdef JEMALLOC_STATS + /* Update stats_cactive if nactive is crossing a chunk multiple. */ + cactive_diff = CHUNK_CEILING(arena->nactive << PAGE_SHIFT) - + CHUNK_CEILING((arena->nactive - run_pages) << PAGE_SHIFT); + if (cactive_diff != 0) + stats_cactive_sub(cactive_diff); +#endif + arena->nactive -= run_pages; + + /* + * The run is dirty if the caller claims to have dirtied it, as well as + * if it was already dirty before being allocated. + */ + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) != 0) + dirty = true; + flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0; + runs_avail = dirty ? &arena->runs_avail_dirty : + &arena->runs_avail_clean; + + /* Mark pages as unallocated in the chunk map. */ + if (dirty) { + chunk->map[run_ind-map_bias].bits = size | CHUNK_MAP_DIRTY; + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + CHUNK_MAP_DIRTY; + + chunk->ndirty += run_pages; + arena->ndirty += run_pages; + } else { + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_UNZEROED); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_UNZEROED); + } + + /* Try to coalesce forward. */ + if (run_ind + run_pages < chunk_npages && + (chunk->map[run_ind+run_pages-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0 && (chunk->map[run_ind+run_pages-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty) { + size_t nrun_size = chunk->map[run_ind+run_pages-map_bias].bits & + ~PAGE_MASK; + size_t nrun_pages = nrun_size >> PAGE_SHIFT; + + /* + * Remove successor from runs_avail; the coalesced run is + * inserted later. + */ + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & ~PAGE_MASK) == nrun_size); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0); + assert((chunk->map[run_ind+run_pages+nrun_pages-1-map_bias].bits + & CHUNK_MAP_DIRTY) == flag_dirty); + arena_avail_tree_remove(runs_avail, + &chunk->map[run_ind+run_pages-map_bias]); + + size += nrun_size; + run_pages += nrun_pages; + + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + } + + /* Try to coalesce backward. */ + if (run_ind > map_bias && (chunk->map[run_ind-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[run_ind-1-map_bias].bits & + CHUNK_MAP_DIRTY) == flag_dirty) { + size_t prun_size = chunk->map[run_ind-1-map_bias].bits & + ~PAGE_MASK; + size_t prun_pages = prun_size >> PAGE_SHIFT; + + run_ind -= prun_pages; + + /* + * Remove predecessor from runs_avail; the coalesced run is + * inserted later. + */ + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) + == prun_size); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_ALLOCATED) + == 0); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) + == flag_dirty); + arena_avail_tree_remove(runs_avail, + &chunk->map[run_ind-map_bias]); + + size += prun_size; + run_pages += prun_pages; + + chunk->map[run_ind-map_bias].bits = size | + (chunk->map[run_ind-map_bias].bits & CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind+run_pages-1-map_bias].bits = size | + (chunk->map[run_ind+run_pages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + } + + /* Insert into runs_avail, now that coalescing is complete. */ + assert((chunk->map[run_ind-map_bias].bits & ~PAGE_MASK) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & ~PAGE_MASK)); + assert((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == + (chunk->map[run_ind+run_pages-1-map_bias].bits & CHUNK_MAP_DIRTY)); + arena_avail_tree_insert(runs_avail, &chunk->map[run_ind-map_bias]); + + if (dirty) { + /* + * Insert into chunks_dirty before potentially calling + * arena_chunk_dealloc(), so that chunks_dirty and + * arena->ndirty are consistent. + */ + if (chunk->dirtied == false) { + ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty); + chunk->dirtied = true; + } + } + + /* + * Deallocate chunk if it is now completely unused. The bit + * manipulation checks whether the first run is unallocated and extends + * to the end of the chunk. + */ + if ((chunk->map[0].bits & (~PAGE_MASK | CHUNK_MAP_ALLOCATED)) == + arena_maxclass) + arena_chunk_dealloc(arena, chunk); + + /* + * It is okay to do dirty page processing here even if the chunk was + * deallocated above, since in that case it is the spare. Waiting + * until after possible chunk deallocation to do dirty processing + * allows for an old spare to be fully deallocated, thus decreasing the + * chances of spuriously crossing the dirty page purging threshold. + */ + if (dirty) + arena_maybe_purge(arena); +} + +static void +arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize) +{ + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & CHUNK_MAP_DIRTY; + + assert(oldsize > newsize); + + /* + * Update the chunk map so that arena_run_dalloc() can treat the + * leading run as separately allocated. Set the last element of each + * run first, in case of single-page runs. + */ + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = (oldsize - newsize) + | flag_dirty | (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + +#ifdef JEMALLOC_DEBUG + { + size_t tail_npages = newsize >> PAGE_SHIFT; + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_DIRTY) == flag_dirty); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias] + .bits & CHUNK_MAP_ALLOCATED) != 0); + } +#endif + chunk->map[pageind+head_npages-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_FLAGS_MASK) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + arena_run_dalloc(arena, run, false); +} + +static void +arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + size_t oldsize, size_t newsize, bool dirty) +{ + size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT; + size_t head_npages = newsize >> PAGE_SHIFT; + size_t tail_npages = (oldsize - newsize) >> PAGE_SHIFT; + size_t flag_dirty = chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY; + + assert(oldsize > newsize); + + /* + * Update the chunk map so that arena_run_dalloc() can treat the + * trailing run as separately allocated. Set the last element of each + * run first, in case of single-page runs. + */ + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind-map_bias].bits & CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages-1-map_bias].bits = flag_dirty | + (chunk->map[pageind+head_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind-map_bias].bits = newsize | flag_dirty | + (chunk->map[pageind-map_bias].bits & CHUNK_MAP_UNZEROED) | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + ~PAGE_MASK) == 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_LARGE) != 0); + assert((chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_ALLOCATED) != 0); + chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits = + flag_dirty | + (chunk->map[pageind+head_npages+tail_npages-1-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+head_npages-map_bias].bits = (oldsize - newsize) | + flag_dirty | (chunk->map[pageind+head_npages-map_bias].bits & + CHUNK_MAP_UNZEROED) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + + arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize), + dirty); +} + +static arena_run_t * +arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin) +{ + arena_chunk_map_t *mapelm; + arena_run_t *run; + size_t binind; + arena_bin_info_t *bin_info; + + /* Look for a usable run. */ + mapelm = arena_run_tree_first(&bin->runs); + if (mapelm != NULL) { + arena_chunk_t *chunk; + size_t pageind; + + /* run is guaranteed to have available space. */ + arena_run_tree_remove(&bin->runs, mapelm); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (mapelm->bits >> PAGE_SHIFT)) + << PAGE_SHIFT)); +#ifdef JEMALLOC_STATS + bin->stats.reruns++; +#endif + return (run); + } + /* No existing runs have any space available. */ + + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + + /* Allocate a new run. */ + malloc_mutex_unlock(&bin->lock); + /******************************/ + malloc_mutex_lock(&arena->lock); + run = arena_run_alloc(arena, bin_info->run_size, false, false); + if (run != NULL) { + bitmap_t *bitmap = (bitmap_t *)((uintptr_t)run + + (uintptr_t)bin_info->bitmap_offset); + + /* Initialize run internals. */ + run->bin = bin; + run->nextind = 0; + run->nfree = bin_info->nregs; + bitmap_init(bitmap, &bin_info->bitmap_info); +#ifdef JEMALLOC_DEBUG + run->magic = ARENA_RUN_MAGIC; +#endif + } + malloc_mutex_unlock(&arena->lock); + /********************************/ + malloc_mutex_lock(&bin->lock); + if (run != NULL) { +#ifdef JEMALLOC_STATS + bin->stats.nruns++; + bin->stats.curruns++; + if (bin->stats.curruns > bin->stats.highruns) + bin->stats.highruns = bin->stats.curruns; +#endif + return (run); + } + + /* + * arena_run_alloc() failed, but another thread may have made + * sufficient memory available while this one dropped bin->lock above, + * so search one more time. + */ + mapelm = arena_run_tree_first(&bin->runs); + if (mapelm != NULL) { + arena_chunk_t *chunk; + size_t pageind; + + /* run is guaranteed to have available space. */ + arena_run_tree_remove(&bin->runs, mapelm); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm); + pageind = ((((uintptr_t)mapelm - (uintptr_t)chunk->map) / + sizeof(arena_chunk_map_t))) + map_bias; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (mapelm->bits >> PAGE_SHIFT)) + << PAGE_SHIFT)); +#ifdef JEMALLOC_STATS + bin->stats.reruns++; +#endif + return (run); + } + + return (NULL); +} + +/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */ +static void * +arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin) +{ + void *ret; + size_t binind; + arena_bin_info_t *bin_info; + arena_run_t *run; + + binind = arena_bin_index(arena, bin); + bin_info = &arena_bin_info[binind]; + bin->runcur = NULL; + run = arena_bin_nonfull_run_get(arena, bin); + if (bin->runcur != NULL && bin->runcur->nfree > 0) { + /* + * Another thread updated runcur while this one ran without the + * bin lock in arena_bin_nonfull_run_get(). + */ + dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->nfree > 0); + ret = arena_run_reg_alloc(bin->runcur, bin_info); + if (run != NULL) { + arena_chunk_t *chunk; + + /* + * arena_run_alloc() may have allocated run, or it may + * have pulled run from the bin's run tree. Therefore + * it is unsafe to make any assumptions about how run + * has previously been used, and arena_bin_lower_run() + * must be called, as if a region were just deallocated + * from the run. + */ + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run); + if (run->nfree == bin_info->nregs) + arena_dalloc_bin_run(arena, chunk, run, bin); + else + arena_bin_lower_run(arena, chunk, run, bin); + } + return (ret); + } + + if (run == NULL) + return (NULL); + + bin->runcur = run; + + dassert(bin->runcur->magic == ARENA_RUN_MAGIC); + assert(bin->runcur->nfree > 0); + + return (arena_run_reg_alloc(bin->runcur, bin_info)); +} + +#ifdef JEMALLOC_PROF +void +arena_prof_accum(arena_t *arena, uint64_t accumbytes) +{ + + if (prof_interval != 0) { + arena->prof_accumbytes += accumbytes; + if (arena->prof_accumbytes >= prof_interval) { + prof_idump(); + arena->prof_accumbytes -= prof_interval; + } + } +} +#endif + +#ifdef JEMALLOC_TCACHE +void +arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, size_t binind +# ifdef JEMALLOC_PROF + , uint64_t prof_accumbytes +# endif + ) +{ + unsigned i, nfill; + arena_bin_t *bin; + arena_run_t *run; + void *ptr; + + assert(tbin->ncached == 0); + +#ifdef JEMALLOC_PROF + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, prof_accumbytes); + malloc_mutex_unlock(&arena->lock); +#endif + bin = &arena->bins[binind]; + malloc_mutex_lock(&bin->lock); + for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >> + tbin->lg_fill_div); i < nfill; i++) { + if ((run = bin->runcur) != NULL && run->nfree > 0) + ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]); + else + ptr = arena_bin_malloc_hard(arena, bin); + if (ptr == NULL) + break; + /* Insert such that low regions get used first. */ + tbin->avail[nfill - 1 - i] = ptr; + } +#ifdef JEMALLOC_STATS + bin->stats.allocated += i * arena_bin_info[binind].reg_size; + bin->stats.nmalloc += i; + bin->stats.nrequests += tbin->tstats.nrequests; + bin->stats.nfills++; + tbin->tstats.nrequests = 0; +#endif + malloc_mutex_unlock(&bin->lock); + tbin->ncached = i; +} +#endif + +void * +arena_malloc_small(arena_t *arena, size_t size, bool zero) +{ + void *ret; + arena_bin_t *bin; + arena_run_t *run; + size_t binind; + + binind = SMALL_SIZE2BIN(size); + assert(binind < nbins); + bin = &arena->bins[binind]; + size = arena_bin_info[binind].reg_size; + + malloc_mutex_lock(&bin->lock); + if ((run = bin->runcur) != NULL && run->nfree > 0) + ret = arena_run_reg_alloc(run, &arena_bin_info[binind]); + else + ret = arena_bin_malloc_hard(arena, bin); + + if (ret == NULL) { + malloc_mutex_unlock(&bin->lock); + return (NULL); + } + +#ifdef JEMALLOC_STATS + bin->stats.allocated += size; + bin->stats.nmalloc++; + bin->stats.nrequests++; +#endif + malloc_mutex_unlock(&bin->lock); +#ifdef JEMALLOC_PROF + if (isthreaded == false) { + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, size); + malloc_mutex_unlock(&arena->lock); + } +#endif + + if (zero == false) { +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); +#endif + } else + memset(ret, 0, size); + + return (ret); +} + +void * +arena_malloc_large(arena_t *arena, size_t size, bool zero) +{ + void *ret; + + /* Large allocation. */ + size = PAGE_CEILING(size); + malloc_mutex_lock(&arena->lock); + ret = (void *)arena_run_alloc(arena, size, true, zero); + if (ret == NULL) { + malloc_mutex_unlock(&arena->lock); + return (NULL); + } +#ifdef JEMALLOC_STATS + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + } +#endif +#ifdef JEMALLOC_PROF + arena_prof_accum(arena, size); +#endif + malloc_mutex_unlock(&arena->lock); + + if (zero == false) { +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); +#endif + } + + return (ret); +} + +void * +arena_malloc(size_t size, bool zero) +{ + + assert(size != 0); + assert(QUANTUM_CEILING(size) <= arena_maxclass); + + if (size <= small_maxclass) { +#ifdef JEMALLOC_TCACHE + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + return (tcache_alloc_small(tcache, size, zero)); + else + +#endif + return (arena_malloc_small(choose_arena(), size, zero)); + } else { +#ifdef JEMALLOC_TCACHE + if (size <= tcache_maxclass) { + tcache_t *tcache; + + if ((tcache = tcache_get()) != NULL) + return (tcache_alloc_large(tcache, size, zero)); + else { + return (arena_malloc_large(choose_arena(), + size, zero)); + } + } else +#endif + return (arena_malloc_large(choose_arena(), size, zero)); + } +} + +/* Only handles large allocations that require more than page alignment. */ +void * +arena_palloc(arena_t *arena, size_t size, size_t alloc_size, size_t alignment, + bool zero) +{ + void *ret; + size_t offset; + arena_chunk_t *chunk; + + assert((size & PAGE_MASK) == 0); + + alignment = PAGE_CEILING(alignment); + + malloc_mutex_lock(&arena->lock); + ret = (void *)arena_run_alloc(arena, alloc_size, true, zero); + if (ret == NULL) { + malloc_mutex_unlock(&arena->lock); + return (NULL); + } + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret); + + offset = (uintptr_t)ret & (alignment - 1); + assert((offset & PAGE_MASK) == 0); + assert(offset < alloc_size); + if (offset == 0) + arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false); + else { + size_t leadsize, trailsize; + + leadsize = alignment - offset; + if (leadsize > 0) { + arena_run_trim_head(arena, chunk, ret, alloc_size, + alloc_size - leadsize); + ret = (void *)((uintptr_t)ret + leadsize); + } + + trailsize = alloc_size - leadsize - size; + if (trailsize != 0) { + /* Trim trailing space. */ + assert(trailsize < alloc_size); + arena_run_trim_tail(arena, chunk, ret, size + trailsize, + size, false); + } + } + +#ifdef JEMALLOC_STATS + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + } +#endif + malloc_mutex_unlock(&arena->lock); + +#ifdef JEMALLOC_FILL + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, size); + else if (opt_zero) + memset(ret, 0, size); + } +#endif + return (ret); +} + +/* Return the size of the allocation pointed to by ptr. */ +size_t +arena_salloc(const void *ptr) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + dassert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == + 0); + ret = bin_info->reg_size; + } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = mapbits & ~PAGE_MASK; + assert(ret != 0); + } + + return (ret); +} + +#ifdef JEMALLOC_PROF +void +arena_prof_promoted(const void *ptr, size_t size) +{ + arena_chunk_t *chunk; + size_t pageind, binind; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + assert(isalloc(ptr) == PAGE_SIZE); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + binind = SMALL_SIZE2BIN(size); + assert(binind < nbins); + chunk->map[pageind-map_bias].bits = (chunk->map[pageind-map_bias].bits & + ~CHUNK_MAP_CLASS_MASK) | ((binind+1) << CHUNK_MAP_CLASS_SHIFT); +} + +size_t +arena_salloc_demote(const void *ptr) +{ + size_t ret; + arena_chunk_t *chunk; + size_t pageind, mapbits; + + assert(ptr != NULL); + assert(CHUNK_ADDR2BASE(ptr) != ptr); + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + mapbits = chunk->map[pageind-map_bias].bits; + assert((mapbits & CHUNK_MAP_ALLOCATED) != 0); + if ((mapbits & CHUNK_MAP_LARGE) == 0) { + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapbits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + dassert(run->magic == ARENA_RUN_MAGIC); + size_t binind = arena_bin_index(chunk->arena, run->bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + assert(((uintptr_t)ptr - ((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_size == + 0); + ret = bin_info->reg_size; + } else { + assert(((uintptr_t)ptr & PAGE_MASK) == 0); + ret = mapbits & ~PAGE_MASK; + if (prof_promote && ret == PAGE_SIZE && (mapbits & + CHUNK_MAP_CLASS_MASK) != 0) { + size_t binind = ((mapbits & CHUNK_MAP_CLASS_MASK) >> + CHUNK_MAP_CLASS_SHIFT) - 1; + assert(binind < nbins); + ret = arena_bin_info[binind].reg_size; + } + assert(ret != 0); + } + + return (ret); +} +#endif + +static void +arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* Dissociate run from bin. */ + if (run == bin->runcur) + bin->runcur = NULL; + else { + size_t binind = arena_bin_index(chunk->arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; + + if (bin_info->nregs != 1) { + size_t run_pageind = (((uintptr_t)run - + (uintptr_t)chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + /* + * This block's conditional is necessary because if the + * run only contains one region, then it never gets + * inserted into the non-full runs tree. + */ + arena_run_tree_remove(&bin->runs, run_mapelm); + } + } +} + +static void +arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + size_t binind; + arena_bin_info_t *bin_info; + size_t npages, run_ind, past; + + assert(run != bin->runcur); + assert(arena_run_tree_search(&bin->runs, &chunk->map[ + (((uintptr_t)run-(uintptr_t)chunk)>>PAGE_SHIFT)-map_bias]) == NULL); + + binind = arena_bin_index(chunk->arena, run->bin); + bin_info = &arena_bin_info[binind]; + + malloc_mutex_unlock(&bin->lock); + /******************************/ + npages = bin_info->run_size >> PAGE_SHIFT; + run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT); + past = (size_t)(PAGE_CEILING((uintptr_t)run + + (uintptr_t)bin_info->reg0_offset + (uintptr_t)(run->nextind * + bin_info->reg_size) - (uintptr_t)chunk) >> PAGE_SHIFT); + malloc_mutex_lock(&arena->lock); + + /* + * If the run was originally clean, and some pages were never touched, + * trim the clean pages before deallocating the dirty portion of the + * run. + */ + if ((chunk->map[run_ind-map_bias].bits & CHUNK_MAP_DIRTY) == 0 && past + - run_ind < npages) { + /* + * Trim clean pages. Convert to large run beforehand. Set the + * last map element first, in case this is a one-page run. + */ + chunk->map[run_ind+npages-1-map_bias].bits = CHUNK_MAP_LARGE | + (chunk->map[run_ind+npages-1-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + chunk->map[run_ind-map_bias].bits = bin_info->run_size | + CHUNK_MAP_LARGE | (chunk->map[run_ind-map_bias].bits & + CHUNK_MAP_FLAGS_MASK); + arena_run_trim_tail(arena, chunk, run, (npages << PAGE_SHIFT), + ((past - run_ind) << PAGE_SHIFT), false); + /* npages = past - run_ind; */ + } +#ifdef JEMALLOC_DEBUG + run->magic = 0; +#endif + arena_run_dalloc(arena, run, true); + malloc_mutex_unlock(&arena->lock); + /****************************/ + malloc_mutex_lock(&bin->lock); +#ifdef JEMALLOC_STATS + bin->stats.curruns--; +#endif +} + +static void +arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run, + arena_bin_t *bin) +{ + + /* + * Make sure that bin->runcur always refers to the lowest non-full run, + * if one exists. + */ + if (bin->runcur == NULL) + bin->runcur = run; + else if ((uintptr_t)run < (uintptr_t)bin->runcur) { + /* Switch runcur. */ + if (bin->runcur->nfree > 0) { + arena_chunk_t *runcur_chunk = + CHUNK_ADDR2BASE(bin->runcur); + size_t runcur_pageind = (((uintptr_t)bin->runcur - + (uintptr_t)runcur_chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *runcur_mapelm = + &runcur_chunk->map[runcur_pageind-map_bias]; + + /* Insert runcur. */ + arena_run_tree_insert(&bin->runs, runcur_mapelm); + } + bin->runcur = run; + } else { + size_t run_pageind = (((uintptr_t)run - + (uintptr_t)chunk)) >> PAGE_SHIFT; + arena_chunk_map_t *run_mapelm = + &chunk->map[run_pageind-map_bias]; + + assert(arena_run_tree_search(&bin->runs, run_mapelm) == NULL); + arena_run_tree_insert(&bin->runs, run_mapelm); + } +} + +void +arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr, + arena_chunk_map_t *mapelm) +{ + size_t pageind; + arena_run_t *run; + arena_bin_t *bin; +#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) + size_t size; +#endif + + pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind - + (mapelm->bits >> PAGE_SHIFT)) << PAGE_SHIFT)); + dassert(run->magic == ARENA_RUN_MAGIC); + bin = run->bin; + size_t binind = arena_bin_index(arena, bin); + arena_bin_info_t *bin_info = &arena_bin_info[binind]; +#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) + size = bin_info->reg_size; +#endif + +#ifdef JEMALLOC_FILL + if (opt_junk) + memset(ptr, 0x5a, size); +#endif + + arena_run_reg_dalloc(run, ptr); + if (run->nfree == bin_info->nregs) { + arena_dissociate_bin_run(chunk, run, bin); + arena_dalloc_bin_run(arena, chunk, run, bin); + } else if (run->nfree == 1 && run != bin->runcur) + arena_bin_lower_run(arena, chunk, run, bin); + +#ifdef JEMALLOC_STATS + bin->stats.allocated -= size; + bin->stats.ndalloc++; +#endif +} + +#ifdef JEMALLOC_STATS +void +arena_stats_merge(arena_t *arena, size_t *nactive, size_t *ndirty, + arena_stats_t *astats, malloc_bin_stats_t *bstats, + malloc_large_stats_t *lstats) +{ + unsigned i; + + malloc_mutex_lock(&arena->lock); + *nactive += arena->nactive; + *ndirty += arena->ndirty; + + astats->mapped += arena->stats.mapped; + astats->npurge += arena->stats.npurge; + astats->nmadvise += arena->stats.nmadvise; + astats->purged += arena->stats.purged; + astats->allocated_large += arena->stats.allocated_large; + astats->nmalloc_large += arena->stats.nmalloc_large; + astats->ndalloc_large += arena->stats.ndalloc_large; + astats->nrequests_large += arena->stats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + lstats[i].nmalloc += arena->stats.lstats[i].nmalloc; + lstats[i].ndalloc += arena->stats.lstats[i].ndalloc; + lstats[i].nrequests += arena->stats.lstats[i].nrequests; + lstats[i].highruns += arena->stats.lstats[i].highruns; + lstats[i].curruns += arena->stats.lstats[i].curruns; + } + malloc_mutex_unlock(&arena->lock); + + for (i = 0; i < nbins; i++) { + arena_bin_t *bin = &arena->bins[i]; + + malloc_mutex_lock(&bin->lock); + bstats[i].allocated += bin->stats.allocated; + bstats[i].nmalloc += bin->stats.nmalloc; + bstats[i].ndalloc += bin->stats.ndalloc; + bstats[i].nrequests += bin->stats.nrequests; +#ifdef JEMALLOC_TCACHE + bstats[i].nfills += bin->stats.nfills; + bstats[i].nflushes += bin->stats.nflushes; +#endif + bstats[i].nruns += bin->stats.nruns; + bstats[i].reruns += bin->stats.reruns; + bstats[i].highruns += bin->stats.highruns; + bstats[i].curruns += bin->stats.curruns; + malloc_mutex_unlock(&bin->lock); + } +} +#endif + +void +arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr) +{ + + /* Large allocation. */ +#ifdef JEMALLOC_FILL +# ifndef JEMALLOC_STATS + if (opt_junk) +# endif +#endif + { +#if (defined(JEMALLOC_FILL) || defined(JEMALLOC_STATS)) + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> + PAGE_SHIFT; + size_t size = chunk->map[pageind-map_bias].bits & ~PAGE_MASK; +#endif + +#ifdef JEMALLOC_FILL +# ifdef JEMALLOC_STATS + if (opt_junk) +# endif + memset(ptr, 0x5a, size); +#endif +#ifdef JEMALLOC_STATS + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--; +#endif + } + + arena_run_dalloc(arena, (arena_run_t *)ptr, true); +} + +static void +arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t size) +{ + + assert(size < oldsize); + + /* + * Shrink the run, and make trailing pages available for other + * allocations. + */ + malloc_mutex_lock(&arena->lock); + arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size, + true); +#ifdef JEMALLOC_STATS + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns; + } +#endif + malloc_mutex_unlock(&arena->lock); +} + +static bool +arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr, + size_t oldsize, size_t size, size_t extra, bool zero) +{ + size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT; + size_t npages = oldsize >> PAGE_SHIFT; + size_t followsize; + + assert(oldsize == (chunk->map[pageind-map_bias].bits & ~PAGE_MASK)); + + /* Try to extend the run. */ + assert(size + extra > oldsize); + malloc_mutex_lock(&arena->lock); + if (pageind + npages < chunk_npages && + (chunk->map[pageind+npages-map_bias].bits + & CHUNK_MAP_ALLOCATED) == 0 && (followsize = + chunk->map[pageind+npages-map_bias].bits & ~PAGE_MASK) >= size - + oldsize) { + /* + * The next run is available and sufficiently large. Split the + * following run, then merge the first part with the existing + * allocation. + */ + size_t flag_dirty; + size_t splitsize = (oldsize + followsize <= size + extra) + ? followsize : size + extra - oldsize; + arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk + + ((pageind+npages) << PAGE_SHIFT)), splitsize, true, zero); + + size = oldsize + splitsize; + npages = size >> PAGE_SHIFT; + + /* + * Mark the extended run as dirty if either portion of the run + * was dirty before allocation. This is rather pedantic, + * because there's not actually any sequence of events that + * could cause the resulting run to be passed to + * arena_run_dalloc() with the dirty argument set to false + * (which is when dirty flag consistency would really matter). + */ + flag_dirty = (chunk->map[pageind-map_bias].bits & + CHUNK_MAP_DIRTY) | + (chunk->map[pageind+npages-1-map_bias].bits & + CHUNK_MAP_DIRTY); + chunk->map[pageind-map_bias].bits = size | flag_dirty + | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + chunk->map[pageind+npages-1-map_bias].bits = flag_dirty | + CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED; + +#ifdef JEMALLOC_STATS + arena->stats.ndalloc_large++; + arena->stats.allocated_large -= oldsize; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].ndalloc++; + arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--; + + arena->stats.nmalloc_large++; + arena->stats.nrequests_large++; + arena->stats.allocated_large += size; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nmalloc++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++; + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++; + if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns > + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) { + arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns = + arena->stats.lstats[(size >> PAGE_SHIFT) - + 1].curruns; + } +#endif + malloc_mutex_unlock(&arena->lock); + return (false); + } + malloc_mutex_unlock(&arena->lock); + + return (true); +} + +/* + * Try to resize a large allocation, in order to avoid copying. This will + * always fail if growing an object, and the following run is already in use. + */ +static bool +arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) +{ + size_t psize; + + psize = PAGE_CEILING(size + extra); + if (psize == oldsize) { + /* Same size class. */ +#ifdef JEMALLOC_FILL + if (opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - + size); + } +#endif + return (false); + } else { + arena_chunk_t *chunk; + arena_t *arena; + + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + arena = chunk->arena; + dassert(arena->magic == ARENA_MAGIC); + + if (psize < oldsize) { +#ifdef JEMALLOC_FILL + /* Fill before shrinking in order avoid a race. */ + if (opt_junk) { + memset((void *)((uintptr_t)ptr + size), 0x5a, + oldsize - size); + } +#endif + arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, + psize); + return (false); + } else { + bool ret = arena_ralloc_large_grow(arena, chunk, ptr, + oldsize, PAGE_CEILING(size), + psize - PAGE_CEILING(size), zero); +#ifdef JEMALLOC_FILL + if (ret == false && zero == false && opt_zero) { + memset((void *)((uintptr_t)ptr + oldsize), 0, + size - oldsize); + } +#endif + return (ret); + } + } +} + +void * +arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra, + bool zero) +{ + + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize <= arena_maxclass) { + if (oldsize <= small_maxclass) { + assert(arena_bin_info[SMALL_SIZE2BIN(oldsize)].reg_size + == oldsize); + if ((size + extra <= small_maxclass && + SMALL_SIZE2BIN(size + extra) == + SMALL_SIZE2BIN(oldsize)) || (size <= oldsize && + size + extra >= oldsize)) { +#ifdef JEMALLOC_FILL + if (opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), + 0x5a, oldsize - size); + } +#endif + return (ptr); + } + } else { + assert(size <= arena_maxclass); + if (size + extra > small_maxclass) { + if (arena_ralloc_large(ptr, oldsize, size, + extra, zero) == false) + return (ptr); + } + } + } + + /* Reallocation would require a move. */ + return (NULL); +} + +void * +arena_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = arena_ralloc_no_move(ptr, oldsize, size, extra, zero); + if (ret != NULL) + return (ret); + + /* + * size and oldsize are different enough that we need to move the + * object. In that case, fall back to allocating new space and + * copying. + */ + if (alignment != 0) { + size_t usize = sa2u(size + extra, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + } else + ret = arena_malloc(size + extra, zero); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment != 0) { + size_t usize = sa2u(size, alignment, NULL); + if (usize == 0) + return (NULL); + ret = ipalloc(usize, alignment, zero); + } else + ret = arena_malloc(size, zero); + + if (ret == NULL) + return (NULL); + } + + /* Junk/zero-filling were already done by ipalloc()/arena_malloc(). */ + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + memcpy(ret, ptr, copysize); + idalloc(ptr); + return (ret); +} + +bool +arena_new(arena_t *arena, unsigned ind) +{ + unsigned i; + arena_bin_t *bin; + + arena->ind = ind; + arena->nthreads = 0; + + if (malloc_mutex_init(&arena->lock)) + return (true); + +#ifdef JEMALLOC_STATS + memset(&arena->stats, 0, sizeof(arena_stats_t)); + arena->stats.lstats = (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (arena->stats.lstats == NULL) + return (true); + memset(arena->stats.lstats, 0, nlclasses * + sizeof(malloc_large_stats_t)); +# ifdef JEMALLOC_TCACHE + ql_new(&arena->tcache_ql); +# endif +#endif + +#ifdef JEMALLOC_PROF + arena->prof_accumbytes = 0; +#endif + + /* Initialize chunks. */ + ql_new(&arena->chunks_dirty); + arena->spare = NULL; + + arena->nactive = 0; + arena->ndirty = 0; + arena->npurgatory = 0; + + arena_avail_tree_new(&arena->runs_avail_clean); + arena_avail_tree_new(&arena->runs_avail_dirty); + + /* Initialize bins. */ + i = 0; +#ifdef JEMALLOC_TINY + /* (2^n)-spaced tiny bins. */ + for (; i < ntbins; i++) { + bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock)) + return (true); + bin->runcur = NULL; + arena_run_tree_new(&bin->runs); +#ifdef JEMALLOC_STATS + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); +#endif + } +#endif + + /* Quantum-spaced bins. */ + for (; i < ntbins + nqbins; i++) { + bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock)) + return (true); + bin->runcur = NULL; + arena_run_tree_new(&bin->runs); +#ifdef JEMALLOC_STATS + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); +#endif + } + + /* Cacheline-spaced bins. */ + for (; i < ntbins + nqbins + ncbins; i++) { + bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock)) + return (true); + bin->runcur = NULL; + arena_run_tree_new(&bin->runs); +#ifdef JEMALLOC_STATS + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); +#endif + } + + /* Subpage-spaced bins. */ + for (; i < nbins; i++) { + bin = &arena->bins[i]; + if (malloc_mutex_init(&bin->lock)) + return (true); + bin->runcur = NULL; + arena_run_tree_new(&bin->runs); +#ifdef JEMALLOC_STATS + memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); +#endif + } + +#ifdef JEMALLOC_DEBUG + arena->magic = ARENA_MAGIC; +#endif + + return (false); +} + +#ifdef JEMALLOC_DEBUG +static void +small_size2bin_validate(void) +{ + size_t i, size, binind; + + i = 1; +# ifdef JEMALLOC_TINY + /* Tiny. */ + for (; i < (1U << LG_TINY_MIN); i++) { + size = pow2_ceil(1U << LG_TINY_MIN); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } + for (; i < qspace_min; i++) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + assert(SMALL_SIZE2BIN(i) == binind); + } +# endif + /* Quantum-spaced. */ + for (; i <= qspace_max; i++) { + size = QUANTUM_CEILING(i); + binind = ntbins + (size >> LG_QUANTUM) - 1; + assert(SMALL_SIZE2BIN(i) == binind); + } + /* Cacheline-spaced. */ + for (; i <= cspace_max; i++) { + size = CACHELINE_CEILING(i); + binind = ntbins + nqbins + ((size - cspace_min) >> + LG_CACHELINE); + assert(SMALL_SIZE2BIN(i) == binind); + } + /* Sub-page. */ + for (; i <= sspace_max; i++) { + size = SUBPAGE_CEILING(i); + binind = ntbins + nqbins + ncbins + ((size - sspace_min) + >> LG_SUBPAGE); + assert(SMALL_SIZE2BIN(i) == binind); + } +} +#endif + +static bool +small_size2bin_init(void) +{ + + if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT + || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT + || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> + LG_TINY_MIN) + 1)) + return (small_size2bin_init_hard()); + + small_size2bin = const_small_size2bin; +#ifdef JEMALLOC_DEBUG + small_size2bin_validate(); +#endif + return (false); +} + +static bool +small_size2bin_init_hard(void) +{ + size_t i, size, binind; + uint8_t *custom_small_size2bin; +#define CUSTOM_SMALL_SIZE2BIN(s) \ + custom_small_size2bin[(s-1) >> LG_TINY_MIN] + + assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT + || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT + || (sizeof(const_small_size2bin) != ((small_maxclass-1) >> + LG_TINY_MIN) + 1)); + + custom_small_size2bin = (uint8_t *) + base_alloc(small_maxclass >> LG_TINY_MIN); + if (custom_small_size2bin == NULL) + return (true); + + i = 1; +#ifdef JEMALLOC_TINY + /* Tiny. */ + for (; i < (1U << LG_TINY_MIN); i += TINY_MIN) { + size = pow2_ceil(1U << LG_TINY_MIN); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + for (; i < qspace_min; i += TINY_MIN) { + size = pow2_ceil(i); + binind = ffs((int)(size >> (LG_TINY_MIN + 1))); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } +#endif + /* Quantum-spaced. */ + for (; i <= qspace_max; i += TINY_MIN) { + size = QUANTUM_CEILING(i); + binind = ntbins + (size >> LG_QUANTUM) - 1; + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + /* Cacheline-spaced. */ + for (; i <= cspace_max; i += TINY_MIN) { + size = CACHELINE_CEILING(i); + binind = ntbins + nqbins + ((size - cspace_min) >> + LG_CACHELINE); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + /* Sub-page. */ + for (; i <= sspace_max; i += TINY_MIN) { + size = SUBPAGE_CEILING(i); + binind = ntbins + nqbins + ncbins + ((size - sspace_min) >> + LG_SUBPAGE); + CUSTOM_SMALL_SIZE2BIN(i) = binind; + } + + small_size2bin = custom_small_size2bin; +#ifdef JEMALLOC_DEBUG + small_size2bin_validate(); +#endif + return (false); +#undef CUSTOM_SMALL_SIZE2BIN +} + +/* + * Calculate bin_info->run_size such that it meets the following constraints: + * + * *) bin_info->run_size >= min_run_size + * *) bin_info->run_size <= arena_maxclass + * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed). + * *) bin_info->nregs <= RUN_MAXREGS + * + * bin_info->nregs, bin_info->bitmap_offset, and bin_info->reg0_offset are also + * calculated here, since these settings are all interdependent. + */ +static size_t +bin_info_run_size_calc(arena_bin_info_t *bin_info, size_t min_run_size) +{ + size_t try_run_size, good_run_size; + uint32_t try_nregs, good_nregs; + uint32_t try_hdr_size, good_hdr_size; + uint32_t try_bitmap_offset, good_bitmap_offset; +#ifdef JEMALLOC_PROF + uint32_t try_ctx0_offset, good_ctx0_offset; +#endif + uint32_t try_reg0_offset, good_reg0_offset; + + assert(min_run_size >= PAGE_SIZE); + assert(min_run_size <= arena_maxclass); + + /* + * Calculate known-valid settings before entering the run_size + * expansion loop, so that the first part of the loop always copies + * valid settings. + * + * The do..while loop iteratively reduces the number of regions until + * the run header and the regions no longer overlap. A closed formula + * would be quite messy, since there is an interdependency between the + * header's mask length and the number of regions. + */ + try_run_size = min_run_size; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin_info->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* Add space for one (prof_ctx_t *) per region. */ + try_hdr_size += try_nregs * sizeof(prof_ctx_t *); + } else + try_ctx0_offset = 0; +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin_info->reg_size); + } while (try_hdr_size > try_reg0_offset); + + /* run_size expansion loop. */ + do { + /* + * Copy valid settings before trying more aggressive settings. + */ + good_run_size = try_run_size; + good_nregs = try_nregs; + good_hdr_size = try_hdr_size; + good_bitmap_offset = try_bitmap_offset; +#ifdef JEMALLOC_PROF + good_ctx0_offset = try_ctx0_offset; +#endif + good_reg0_offset = try_reg0_offset; + + /* Try more aggressive settings. */ + try_run_size += PAGE_SIZE; + try_nregs = ((try_run_size - sizeof(arena_run_t)) / + bin_info->reg_size) + + 1; /* Counter-act try_nregs-- in loop. */ + if (try_nregs > RUN_MAXREGS) { + try_nregs = RUN_MAXREGS + + 1; /* Counter-act try_nregs-- in loop. */ + } + do { + try_nregs--; + try_hdr_size = sizeof(arena_run_t); + /* Pad to a long boundary. */ + try_hdr_size = LONG_CEILING(try_hdr_size); + try_bitmap_offset = try_hdr_size; + /* Add space for bitmap. */ + try_hdr_size += bitmap_size(try_nregs); +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote == false) { + /* Pad to a quantum boundary. */ + try_hdr_size = QUANTUM_CEILING(try_hdr_size); + try_ctx0_offset = try_hdr_size; + /* + * Add space for one (prof_ctx_t *) per region. + */ + try_hdr_size += try_nregs * + sizeof(prof_ctx_t *); + } +#endif + try_reg0_offset = try_run_size - (try_nregs * + bin_info->reg_size); + } while (try_hdr_size > try_reg0_offset); + } while (try_run_size <= arena_maxclass + && try_run_size <= arena_maxclass + && RUN_MAX_OVRHD * (bin_info->reg_size << 3) > RUN_MAX_OVRHD_RELAX + && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size + && try_nregs < RUN_MAXREGS); + + assert(good_hdr_size <= good_reg0_offset); + + /* Copy final settings. */ + bin_info->run_size = good_run_size; + bin_info->nregs = good_nregs; + bin_info->bitmap_offset = good_bitmap_offset; +#ifdef JEMALLOC_PROF + bin_info->ctx0_offset = good_ctx0_offset; +#endif + bin_info->reg0_offset = good_reg0_offset; + + return (good_run_size); +} + +static bool +bin_info_init(void) +{ + arena_bin_info_t *bin_info; + unsigned i; + size_t prev_run_size; + + arena_bin_info = base_alloc(sizeof(arena_bin_info_t) * nbins); + if (arena_bin_info == NULL) + return (true); + + prev_run_size = PAGE_SIZE; + i = 0; +#ifdef JEMALLOC_TINY + /* (2^n)-spaced tiny bins. */ + for (; i < ntbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (1U << (LG_TINY_MIN + i)); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } +#endif + + /* Quantum-spaced bins. */ + for (; i < ntbins + nqbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = (i - ntbins + 1) << LG_QUANTUM; + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } + + /* Cacheline-spaced bins. */ + for (; i < ntbins + nqbins + ncbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = cspace_min + ((i - (ntbins + nqbins)) << + LG_CACHELINE); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } + + /* Subpage-spaced bins. */ + for (; i < nbins; i++) { + bin_info = &arena_bin_info[i]; + bin_info->reg_size = sspace_min + ((i - (ntbins + nqbins + + ncbins)) << LG_SUBPAGE); + prev_run_size = bin_info_run_size_calc(bin_info, prev_run_size); + bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs); + } + + return (false); +} + +bool +arena_boot(void) +{ + size_t header_size; + unsigned i; + + /* Set variables according to the value of opt_lg_[qc]space_max. */ + qspace_max = (1U << opt_lg_qspace_max); + cspace_min = CACHELINE_CEILING(qspace_max); + if (cspace_min == qspace_max) + cspace_min += CACHELINE; + cspace_max = (1U << opt_lg_cspace_max); + sspace_min = SUBPAGE_CEILING(cspace_max); + if (sspace_min == cspace_max) + sspace_min += SUBPAGE; + assert(sspace_min < PAGE_SIZE); + sspace_max = PAGE_SIZE - SUBPAGE; + +#ifdef JEMALLOC_TINY + assert(LG_QUANTUM >= LG_TINY_MIN); +#endif + assert(ntbins <= LG_QUANTUM); + nqbins = qspace_max >> LG_QUANTUM; + ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1; + nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1; + nbins = ntbins + nqbins + ncbins + nsbins; + + /* + * The small_size2bin lookup table uses uint8_t to encode each bin + * index, so we cannot support more than 256 small size classes. This + * limit is difficult to exceed (not even possible with 16B quantum and + * 4KiB pages), and such configurations are impractical, but + * nonetheless we need to protect against this case in order to avoid + * undefined behavior. + * + * Further constrain nbins to 255 if prof_promote is true, since all + * small size classes, plus a "not small" size class must be stored in + * 8 bits of arena_chunk_map_t's bits field. + */ +#ifdef JEMALLOC_PROF + if (opt_prof && prof_promote) { + if (nbins > 255) { + char line_buf[UMAX2S_BUFSIZE]; + malloc_write(": Too many small size classes ("); + malloc_write(u2s(nbins, 10, line_buf)); + malloc_write(" > max 255)\n"); + abort(); + } + } else +#endif + if (nbins > 256) { + char line_buf[UMAX2S_BUFSIZE]; + malloc_write(": Too many small size classes ("); + malloc_write(u2s(nbins, 10, line_buf)); + malloc_write(" > max 256)\n"); + abort(); + } + + /* + * Compute the header size such that it is large enough to contain the + * page map. The page map is biased to omit entries for the header + * itself, so some iteration is necessary to compute the map bias. + * + * 1) Compute safe header_size and map_bias values that include enough + * space for an unbiased page map. + * 2) Refine map_bias based on (1) to omit the header pages in the page + * map. The resulting map_bias may be one too small. + * 3) Refine map_bias based on (2). The result will be >= the result + * from (2), and will always be correct. + */ + map_bias = 0; + for (i = 0; i < 3; i++) { + header_size = offsetof(arena_chunk_t, map) + + (sizeof(arena_chunk_map_t) * (chunk_npages-map_bias)); + map_bias = (header_size >> PAGE_SHIFT) + ((header_size & + PAGE_MASK) != 0); + } + assert(map_bias > 0); + + arena_maxclass = chunksize - (map_bias << PAGE_SHIFT); + + if (small_size2bin_init()) + return (true); + + if (bin_info_init()) + return (true); + + return (false); +} diff --git a/src/atomic.c b/src/atomic.c new file mode 100644 index 0000000..77ee313 --- /dev/null +++ b/src/atomic.c @@ -0,0 +1,2 @@ +#define JEMALLOC_ATOMIC_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/base.c b/src/base.c new file mode 100644 index 0000000..cc85e84 --- /dev/null +++ b/src/base.c @@ -0,0 +1,106 @@ +#define JEMALLOC_BASE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +malloc_mutex_t base_mtx; + +/* + * Current pages that are being used for internal memory allocations. These + * pages are carved up in cacheline-size quanta, so that there is no chance of + * false cache line sharing. + */ +static void *base_pages; +static void *base_next_addr; +static void *base_past_addr; /* Addr immediately past base_pages. */ +static extent_node_t *base_nodes; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static bool base_pages_alloc(size_t minsize); + +/******************************************************************************/ + +static bool +base_pages_alloc(size_t minsize) +{ + size_t csize; + bool zero; + + assert(minsize != 0); + csize = CHUNK_CEILING(minsize); + zero = false; + base_pages = chunk_alloc(csize, true, &zero); + if (base_pages == NULL) + return (true); + base_next_addr = base_pages; + base_past_addr = (void *)((uintptr_t)base_pages + csize); + + return (false); +} + +void * +base_alloc(size_t size) +{ + void *ret; + size_t csize; + + /* Round size up to nearest multiple of the cacheline size. */ + csize = CACHELINE_CEILING(size); + + malloc_mutex_lock(&base_mtx); + /* Make sure there's enough space for the allocation. */ + if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) { + if (base_pages_alloc(csize)) { + malloc_mutex_unlock(&base_mtx); + return (NULL); + } + } + /* Allocate. */ + ret = base_next_addr; + base_next_addr = (void *)((uintptr_t)base_next_addr + csize); + malloc_mutex_unlock(&base_mtx); + + return (ret); +} + +extent_node_t * +base_node_alloc(void) +{ + extent_node_t *ret; + + malloc_mutex_lock(&base_mtx); + if (base_nodes != NULL) { + ret = base_nodes; + base_nodes = *(extent_node_t **)ret; + malloc_mutex_unlock(&base_mtx); + } else { + malloc_mutex_unlock(&base_mtx); + ret = (extent_node_t *)base_alloc(sizeof(extent_node_t)); + } + + return (ret); +} + +void +base_node_dealloc(extent_node_t *node) +{ + + malloc_mutex_lock(&base_mtx); + *(extent_node_t **)node = base_nodes; + base_nodes = node; + malloc_mutex_unlock(&base_mtx); +} + +bool +base_boot(void) +{ + + base_nodes = NULL; + if (malloc_mutex_init(&base_mtx)) + return (true); + + return (false); +} diff --git a/src/bitmap.c b/src/bitmap.c new file mode 100644 index 0000000..b47e262 --- /dev/null +++ b/src/bitmap.c @@ -0,0 +1,90 @@ +#define JEMALLOC_BITMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t bits2groups(size_t nbits); + +/******************************************************************************/ + +static size_t +bits2groups(size_t nbits) +{ + + return ((nbits >> LG_BITMAP_GROUP_NBITS) + + !!(nbits & BITMAP_GROUP_NBITS_MASK)); +} + +void +bitmap_info_init(bitmap_info_t *binfo, size_t nbits) +{ + unsigned i; + size_t group_count; + + assert(nbits > 0); + assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS)); + + /* + * Compute the number of groups necessary to store nbits bits, and + * progressively work upward through the levels until reaching a level + * that requires only one group. + */ + binfo->levels[0].group_offset = 0; + group_count = bits2groups(nbits); + for (i = 1; group_count > 1; i++) { + assert(i < BITMAP_MAX_LEVELS); + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + group_count = bits2groups(group_count); + } + binfo->levels[i].group_offset = binfo->levels[i-1].group_offset + + group_count; + binfo->nlevels = i; + binfo->nbits = nbits; +} + +size_t +bitmap_info_ngroups(const bitmap_info_t *binfo) +{ + + return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP); +} + +size_t +bitmap_size(size_t nbits) +{ + bitmap_info_t binfo; + + bitmap_info_init(&binfo, nbits); + return (bitmap_info_ngroups(&binfo)); +} + +void +bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) +{ + size_t extra; + unsigned i; + + /* + * Bits are actually inverted with regard to the external bitmap + * interface, so the bitmap starts out with all 1 bits, except for + * trailing unused bits (if any). Note that each group uses bit 0 to + * correspond to the first logical bit in the group, so extra bits + * are the most significant bits of the last group. + */ + memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset << + LG_SIZEOF_BITMAP); + extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK)) + & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[1].group_offset - 1] >>= extra; + for (i = 1; i < binfo->nlevels; i++) { + size_t group_count = binfo->levels[i].group_offset - + binfo->levels[i-1].group_offset; + extra = (BITMAP_GROUP_NBITS - (group_count & + BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK; + if (extra != 0) + bitmap[binfo->levels[i+1].group_offset - 1] >>= extra; + } +} diff --git a/src/chunk.c b/src/chunk.c new file mode 100644 index 0000000..301519e --- /dev/null +++ b/src/chunk.c @@ -0,0 +1,171 @@ +#define JEMALLOC_CHUNK_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +size_t opt_lg_chunk = LG_CHUNK_DEFAULT; +#ifdef JEMALLOC_SWAP +bool opt_overcommit = true; +#endif + +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) +malloc_mutex_t chunks_mtx; +chunk_stats_t stats_chunks; +#endif + +#ifdef JEMALLOC_IVSALLOC +rtree_t *chunks_rtree; +#endif + +/* Various chunk-related settings. */ +size_t chunksize; +size_t chunksize_mask; /* (chunksize - 1). */ +size_t chunk_npages; +size_t map_bias; +size_t arena_maxclass; /* Max size class for arenas. */ + +/******************************************************************************/ + +/* + * If the caller specifies (*zero == false), it is still possible to receive + * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc() + * takes advantage of this to avoid demanding zeroed chunks, but taking + * advantage of them if they are returned. + */ +void * +chunk_alloc(size_t size, bool base, bool *zero) +{ + void *ret; + + assert(size != 0); + assert((size & chunksize_mask) == 0); + +#ifdef JEMALLOC_SWAP + if (swap_enabled) { + ret = chunk_alloc_swap(size, zero); + if (ret != NULL) + goto RETURN; + } + + if (swap_enabled == false || opt_overcommit) { +#endif +#ifdef JEMALLOC_DSS + ret = chunk_alloc_dss(size, zero); + if (ret != NULL) + goto RETURN; +#endif + ret = chunk_alloc_mmap(size); + if (ret != NULL) { + *zero = true; + goto RETURN; + } +#ifdef JEMALLOC_SWAP + } +#endif + + /* All strategies for allocation failed. */ + ret = NULL; +RETURN: +#ifdef JEMALLOC_IVSALLOC + if (base == false && ret != NULL) { + if (rtree_set(chunks_rtree, (uintptr_t)ret, ret)) { + chunk_dealloc(ret, size); + return (NULL); + } + } +#endif +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + if (ret != NULL) { +# ifdef JEMALLOC_PROF + bool gdump; +# endif + malloc_mutex_lock(&chunks_mtx); +# ifdef JEMALLOC_STATS + stats_chunks.nchunks += (size / chunksize); +# endif + stats_chunks.curchunks += (size / chunksize); + if (stats_chunks.curchunks > stats_chunks.highchunks) { + stats_chunks.highchunks = stats_chunks.curchunks; +# ifdef JEMALLOC_PROF + gdump = true; +# endif + } +# ifdef JEMALLOC_PROF + else + gdump = false; +# endif + malloc_mutex_unlock(&chunks_mtx); +# ifdef JEMALLOC_PROF + if (opt_prof && opt_prof_gdump && gdump) + prof_gdump(); +# endif + } +#endif + + assert(CHUNK_ADDR2BASE(ret) == ret); + return (ret); +} + +void +chunk_dealloc(void *chunk, size_t size) +{ + + assert(chunk != NULL); + assert(CHUNK_ADDR2BASE(chunk) == chunk); + assert(size != 0); + assert((size & chunksize_mask) == 0); + +#ifdef JEMALLOC_IVSALLOC + rtree_set(chunks_rtree, (uintptr_t)chunk, NULL); +#endif +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + malloc_mutex_lock(&chunks_mtx); + stats_chunks.curchunks -= (size / chunksize); + malloc_mutex_unlock(&chunks_mtx); +#endif + +#ifdef JEMALLOC_SWAP + if (swap_enabled && chunk_dealloc_swap(chunk, size) == false) + return; +#endif +#ifdef JEMALLOC_DSS + if (chunk_dealloc_dss(chunk, size) == false) + return; +#endif + chunk_dealloc_mmap(chunk, size); +} + +bool +chunk_boot(void) +{ + + /* Set variables according to the value of opt_lg_chunk. */ + chunksize = (ZU(1) << opt_lg_chunk); + assert(chunksize >= PAGE_SIZE); + chunksize_mask = chunksize - 1; + chunk_npages = (chunksize >> PAGE_SHIFT); + +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + if (malloc_mutex_init(&chunks_mtx)) + return (true); + memset(&stats_chunks, 0, sizeof(chunk_stats_t)); +#endif +#ifdef JEMALLOC_SWAP + if (chunk_swap_boot()) + return (true); +#endif + if (chunk_mmap_boot()) + return (true); +#ifdef JEMALLOC_DSS + if (chunk_dss_boot()) + return (true); +#endif +#ifdef JEMALLOC_IVSALLOC + chunks_rtree = rtree_new((ZU(1) << (LG_SIZEOF_PTR+3)) - opt_lg_chunk); + if (chunks_rtree == NULL) + return (true); +#endif + + return (false); +} diff --git a/src/chunk_dss.c b/src/chunk_dss.c new file mode 100644 index 0000000..5c0e290 --- /dev/null +++ b/src/chunk_dss.c @@ -0,0 +1,284 @@ +#define JEMALLOC_CHUNK_DSS_C_ +#include "jemalloc/internal/jemalloc_internal.h" +#ifdef JEMALLOC_DSS +/******************************************************************************/ +/* Data. */ + +malloc_mutex_t dss_mtx; + +/* Base address of the DSS. */ +static void *dss_base; +/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */ +static void *dss_prev; +/* Current upper limit on DSS addresses. */ +static void *dss_max; + +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t dss_chunks_szad; +static extent_tree_t dss_chunks_ad; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle_dss(size_t size, bool *zero); +static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle_dss(size_t size, bool *zero) +{ + extent_node_t *node, key; + + key.addr = NULL; + key.size = size; + malloc_mutex_lock(&dss_mtx); + node = extent_tree_szad_nsearch(&dss_chunks_szad, &key); + if (node != NULL) { + void *ret = node->addr; + + /* Remove node from the tree. */ + extent_tree_szad_remove(&dss_chunks_szad, node); + if (node->size == size) { + extent_tree_ad_remove(&dss_chunks_ad, node); + base_node_dealloc(node); + } else { + /* + * Insert the remainder of node's address range as a + * smaller chunk. Its position within dss_chunks_ad + * does not change. + */ + assert(node->size > size); + node->addr = (void *)((uintptr_t)node->addr + size); + node->size -= size; + extent_tree_szad_insert(&dss_chunks_szad, node); + } + malloc_mutex_unlock(&dss_mtx); + + if (*zero) + memset(ret, 0, size); + return (ret); + } + malloc_mutex_unlock(&dss_mtx); + + return (NULL); +} + +void * +chunk_alloc_dss(size_t size, bool *zero) +{ + void *ret; + + ret = chunk_recycle_dss(size, zero); + if (ret != NULL) + return (ret); + + /* + * sbrk() uses a signed increment argument, so take care not to + * interpret a huge allocation request as a negative increment. + */ + if ((intptr_t)size < 0) + return (NULL); + + malloc_mutex_lock(&dss_mtx); + if (dss_prev != (void *)-1) { + intptr_t incr; + + /* + * The loop is necessary to recover from races with other + * threads that are using the DSS for something other than + * malloc. + */ + do { + /* Get the current end of the DSS. */ + dss_max = sbrk(0); + + /* + * Calculate how much padding is necessary to + * chunk-align the end of the DSS. + */ + incr = (intptr_t)size + - (intptr_t)CHUNK_ADDR2OFFSET(dss_max); + if (incr == (intptr_t)size) + ret = dss_max; + else { + ret = (void *)((intptr_t)dss_max + incr); + incr += size; + } + + dss_prev = sbrk(incr); + if (dss_prev == dss_max) { + /* Success. */ + dss_max = (void *)((intptr_t)dss_prev + incr); + malloc_mutex_unlock(&dss_mtx); + *zero = true; + return (ret); + } + } while (dss_prev != (void *)-1); + } + malloc_mutex_unlock(&dss_mtx); + + return (NULL); +} + +static extent_node_t * +chunk_dealloc_dss_record(void *chunk, size_t size) +{ + extent_node_t *xnode, *node, *prev, key; + + xnode = NULL; + while (true) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&dss_chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. + * This does not change the position within + * dss_chunks_ad, so only remove/insert from/into + * dss_chunks_szad. + */ + extent_tree_szad_remove(&dss_chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&dss_chunks_szad, node); + break; + } else if (xnode == NULL) { + /* + * It is possible that base_node_alloc() will cause a + * new base chunk to be allocated, so take care not to + * deadlock on dss_mtx, and recover if another thread + * deallocates an adjacent chunk while this one is busy + * allocating xnode. + */ + malloc_mutex_unlock(&dss_mtx); + xnode = base_node_alloc(); + malloc_mutex_lock(&dss_mtx); + if (xnode == NULL) + return (NULL); + } else { + /* Coalescing forward failed, so insert a new node. */ + node = xnode; + xnode = NULL; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&dss_chunks_ad, node); + extent_tree_szad_insert(&dss_chunks_szad, node); + break; + } + } + /* Discard xnode if it ended up unused do to a race. */ + if (xnode != NULL) + base_node_dealloc(xnode); + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(&dss_chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within dss_chunks_ad, so only + * remove/insert node from/into dss_chunks_szad. + */ + extent_tree_szad_remove(&dss_chunks_szad, prev); + extent_tree_ad_remove(&dss_chunks_ad, prev); + + extent_tree_szad_remove(&dss_chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + extent_tree_szad_insert(&dss_chunks_szad, node); + + base_node_dealloc(prev); + } + + return (node); +} + +bool +chunk_in_dss(void *chunk) +{ + bool ret; + + malloc_mutex_lock(&dss_mtx); + if ((uintptr_t)chunk >= (uintptr_t)dss_base + && (uintptr_t)chunk < (uintptr_t)dss_max) + ret = true; + else + ret = false; + malloc_mutex_unlock(&dss_mtx); + + return (ret); +} + +bool +chunk_dealloc_dss(void *chunk, size_t size) +{ + bool ret; + + malloc_mutex_lock(&dss_mtx); + if ((uintptr_t)chunk >= (uintptr_t)dss_base + && (uintptr_t)chunk < (uintptr_t)dss_max) { + extent_node_t *node; + + /* Try to coalesce with other unused chunks. */ + node = chunk_dealloc_dss_record(chunk, size); + if (node != NULL) { + chunk = node->addr; + size = node->size; + } + + /* Get the current end of the DSS. */ + dss_max = sbrk(0); + + /* + * Try to shrink the DSS if this chunk is at the end of the + * DSS. The sbrk() call here is subject to a race condition + * with threads that use brk(2) or sbrk(2) directly, but the + * alternative would be to leak memory for the sake of poorly + * designed multi-threaded programs. + */ + if ((void *)((uintptr_t)chunk + size) == dss_max + && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) { + /* Success. */ + dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size); + + if (node != NULL) { + extent_tree_szad_remove(&dss_chunks_szad, node); + extent_tree_ad_remove(&dss_chunks_ad, node); + base_node_dealloc(node); + } + } else + madvise(chunk, size, MADV_DONTNEED); + + ret = false; + goto RETURN; + } + + ret = true; +RETURN: + malloc_mutex_unlock(&dss_mtx); + return (ret); +} + +bool +chunk_dss_boot(void) +{ + + if (malloc_mutex_init(&dss_mtx)) + return (true); + dss_base = sbrk(0); + dss_prev = dss_base; + dss_max = dss_base; + extent_tree_szad_new(&dss_chunks_szad); + extent_tree_ad_new(&dss_chunks_ad); + + return (false); +} + +/******************************************************************************/ +#endif /* JEMALLOC_DSS */ diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c new file mode 100644 index 0000000..164e86e --- /dev/null +++ b/src/chunk_mmap.c @@ -0,0 +1,239 @@ +#define JEMALLOC_CHUNK_MMAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +/* + * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and + * potentially avoid some system calls. + */ +#ifndef NO_TLS +static __thread bool mmap_unaligned_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +#define MMAP_UNALIGNED_GET() mmap_unaligned_tls +#define MMAP_UNALIGNED_SET(v) do { \ + mmap_unaligned_tls = (v); \ +} while (0) +#else +static pthread_key_t mmap_unaligned_tsd; +#define MMAP_UNALIGNED_GET() ((bool)pthread_getspecific(mmap_unaligned_tsd)) +#define MMAP_UNALIGNED_SET(v) do { \ + pthread_setspecific(mmap_unaligned_tsd, (void *)(v)); \ +} while (0) +#endif + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *pages_map(void *addr, size_t size, bool noreserve); +static void pages_unmap(void *addr, size_t size); +static void *chunk_alloc_mmap_slow(size_t size, bool unaligned, + bool noreserve); +static void *chunk_alloc_mmap_internal(size_t size, bool noreserve); + +/******************************************************************************/ + +static void * +pages_map(void *addr, size_t size, bool noreserve) +{ + void *ret; + + /* + * We don't use MAP_FIXED here, because it can cause the *replacement* + * of existing mappings, and we only want to create new mappings. + */ + int flags = MAP_PRIVATE | MAP_ANON; +#ifdef MAP_NORESERVE + if (noreserve) + flags |= MAP_NORESERVE; +#endif + ret = mmap(addr, size, PROT_READ | PROT_WRITE, flags, -1, 0); + assert(ret != NULL); + + if (ret == MAP_FAILED) + ret = NULL; + else if (addr != NULL && ret != addr) { + /* + * We succeeded in mapping memory, but not in the right place. + */ + if (munmap(ret, size) == -1) { + char buf[BUFERROR_BUF]; + + buferror(errno, buf, sizeof(buf)); + malloc_write(": Error in munmap(): "); + malloc_write(buf); + malloc_write("\n"); + if (opt_abort) + abort(); + } + ret = NULL; + } + + assert(ret == NULL || (addr == NULL && ret != addr) + || (addr != NULL && ret == addr)); + return (ret); +} + +static void +pages_unmap(void *addr, size_t size) +{ + + if (munmap(addr, size) == -1) { + char buf[BUFERROR_BUF]; + + buferror(errno, buf, sizeof(buf)); + malloc_write(": Error in munmap(): "); + malloc_write(buf); + malloc_write("\n"); + if (opt_abort) + abort(); + } +} + +static void * +chunk_alloc_mmap_slow(size_t size, bool unaligned, bool noreserve) +{ + void *ret; + size_t offset; + + /* Beware size_t wrap-around. */ + if (size + chunksize <= size) + return (NULL); + + ret = pages_map(NULL, size + chunksize, noreserve); + if (ret == NULL) + return (NULL); + + /* Clean up unneeded leading/trailing space. */ + offset = CHUNK_ADDR2OFFSET(ret); + if (offset != 0) { + /* Note that mmap() returned an unaligned mapping. */ + unaligned = true; + + /* Leading space. */ + pages_unmap(ret, chunksize - offset); + + ret = (void *)((uintptr_t)ret + + (chunksize - offset)); + + /* Trailing space. */ + pages_unmap((void *)((uintptr_t)ret + size), + offset); + } else { + /* Trailing space only. */ + pages_unmap((void *)((uintptr_t)ret + size), + chunksize); + } + + /* + * If mmap() returned an aligned mapping, reset mmap_unaligned so that + * the next chunk_alloc_mmap() execution tries the fast allocation + * method. + */ + if (unaligned == false) + MMAP_UNALIGNED_SET(false); + + return (ret); +} + +static void * +chunk_alloc_mmap_internal(size_t size, bool noreserve) +{ + void *ret; + + /* + * Ideally, there would be a way to specify alignment to mmap() (like + * NetBSD has), but in the absence of such a feature, we have to work + * hard to efficiently create aligned mappings. The reliable, but + * slow method is to create a mapping that is over-sized, then trim the + * excess. However, that always results in at least one call to + * pages_unmap(). + * + * A more optimistic approach is to try mapping precisely the right + * amount, then try to append another mapping if alignment is off. In + * practice, this works out well as long as the application is not + * interleaving mappings via direct mmap() calls. If we do run into a + * situation where there is an interleaved mapping and we are unable to + * extend an unaligned mapping, our best option is to switch to the + * slow method until mmap() returns another aligned mapping. This will + * tend to leave a gap in the memory map that is too small to cause + * later problems for the optimistic method. + * + * Another possible confounding factor is address space layout + * randomization (ASLR), which causes mmap(2) to disregard the + * requested address. mmap_unaligned tracks whether the previous + * chunk_alloc_mmap() execution received any unaligned or relocated + * mappings, and if so, the current execution will immediately fall + * back to the slow method. However, we keep track of whether the fast + * method would have succeeded, and if so, we make a note to try the + * fast method next time. + */ + + if (MMAP_UNALIGNED_GET() == false) { + size_t offset; + + ret = pages_map(NULL, size, noreserve); + if (ret == NULL) + return (NULL); + + offset = CHUNK_ADDR2OFFSET(ret); + if (offset != 0) { + MMAP_UNALIGNED_SET(true); + /* Try to extend chunk boundary. */ + if (pages_map((void *)((uintptr_t)ret + size), + chunksize - offset, noreserve) == NULL) { + /* + * Extension failed. Clean up, then revert to + * the reliable-but-expensive method. + */ + pages_unmap(ret, size); + ret = chunk_alloc_mmap_slow(size, true, + noreserve); + } else { + /* Clean up unneeded leading space. */ + pages_unmap(ret, chunksize - offset); + ret = (void *)((uintptr_t)ret + (chunksize - + offset)); + } + } + } else + ret = chunk_alloc_mmap_slow(size, false, noreserve); + + return (ret); +} + +void * +chunk_alloc_mmap(size_t size) +{ + + return (chunk_alloc_mmap_internal(size, false)); +} + +void * +chunk_alloc_mmap_noreserve(size_t size) +{ + + return (chunk_alloc_mmap_internal(size, true)); +} + +void +chunk_dealloc_mmap(void *chunk, size_t size) +{ + + pages_unmap(chunk, size); +} + +bool +chunk_mmap_boot(void) +{ + +#ifdef NO_TLS + if (pthread_key_create(&mmap_unaligned_tsd, NULL) != 0) { + malloc_write(": Error in pthread_key_create()\n"); + return (true); + } +#endif + + return (false); +} diff --git a/src/chunk_swap.c b/src/chunk_swap.c new file mode 100644 index 0000000..cb25ae0 --- /dev/null +++ b/src/chunk_swap.c @@ -0,0 +1,402 @@ +#define JEMALLOC_CHUNK_SWAP_C_ +#include "jemalloc/internal/jemalloc_internal.h" +#ifdef JEMALLOC_SWAP +/******************************************************************************/ +/* Data. */ + +malloc_mutex_t swap_mtx; +bool swap_enabled; +bool swap_prezeroed; +size_t swap_nfds; +int *swap_fds; +#ifdef JEMALLOC_STATS +size_t swap_avail; +#endif + +/* Base address of the mmap()ed file(s). */ +static void *swap_base; +/* Current end of the space in use (<= swap_max). */ +static void *swap_end; +/* Absolute upper limit on file-backed addresses. */ +static void *swap_max; + +/* + * Trees of chunks that were previously allocated (trees differ only in node + * ordering). These are used when allocating chunks, in an attempt to re-use + * address space. Depending on function, different tree orderings are needed, + * which is why there are two trees with the same contents. + */ +static extent_tree_t swap_chunks_szad; +static extent_tree_t swap_chunks_ad; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void *chunk_recycle_swap(size_t size, bool *zero); +static extent_node_t *chunk_dealloc_swap_record(void *chunk, size_t size); + +/******************************************************************************/ + +static void * +chunk_recycle_swap(size_t size, bool *zero) +{ + extent_node_t *node, key; + + key.addr = NULL; + key.size = size; + malloc_mutex_lock(&swap_mtx); + node = extent_tree_szad_nsearch(&swap_chunks_szad, &key); + if (node != NULL) { + void *ret = node->addr; + + /* Remove node from the tree. */ + extent_tree_szad_remove(&swap_chunks_szad, node); + if (node->size == size) { + extent_tree_ad_remove(&swap_chunks_ad, node); + base_node_dealloc(node); + } else { + /* + * Insert the remainder of node's address range as a + * smaller chunk. Its position within swap_chunks_ad + * does not change. + */ + assert(node->size > size); + node->addr = (void *)((uintptr_t)node->addr + size); + node->size -= size; + extent_tree_szad_insert(&swap_chunks_szad, node); + } +#ifdef JEMALLOC_STATS + swap_avail -= size; +#endif + malloc_mutex_unlock(&swap_mtx); + + if (*zero) + memset(ret, 0, size); + return (ret); + } + malloc_mutex_unlock(&swap_mtx); + + return (NULL); +} + +void * +chunk_alloc_swap(size_t size, bool *zero) +{ + void *ret; + + assert(swap_enabled); + + ret = chunk_recycle_swap(size, zero); + if (ret != NULL) + return (ret); + + malloc_mutex_lock(&swap_mtx); + if ((uintptr_t)swap_end + size <= (uintptr_t)swap_max) { + ret = swap_end; + swap_end = (void *)((uintptr_t)swap_end + size); +#ifdef JEMALLOC_STATS + swap_avail -= size; +#endif + malloc_mutex_unlock(&swap_mtx); + + if (swap_prezeroed) + *zero = true; + else if (*zero) + memset(ret, 0, size); + } else { + malloc_mutex_unlock(&swap_mtx); + return (NULL); + } + + return (ret); +} + +static extent_node_t * +chunk_dealloc_swap_record(void *chunk, size_t size) +{ + extent_node_t *xnode, *node, *prev, key; + + xnode = NULL; + while (true) { + key.addr = (void *)((uintptr_t)chunk + size); + node = extent_tree_ad_nsearch(&swap_chunks_ad, &key); + /* Try to coalesce forward. */ + if (node != NULL && node->addr == key.addr) { + /* + * Coalesce chunk with the following address range. + * This does not change the position within + * swap_chunks_ad, so only remove/insert from/into + * swap_chunks_szad. + */ + extent_tree_szad_remove(&swap_chunks_szad, node); + node->addr = chunk; + node->size += size; + extent_tree_szad_insert(&swap_chunks_szad, node); + break; + } else if (xnode == NULL) { + /* + * It is possible that base_node_alloc() will cause a + * new base chunk to be allocated, so take care not to + * deadlock on swap_mtx, and recover if another thread + * deallocates an adjacent chunk while this one is busy + * allocating xnode. + */ + malloc_mutex_unlock(&swap_mtx); + xnode = base_node_alloc(); + malloc_mutex_lock(&swap_mtx); + if (xnode == NULL) + return (NULL); + } else { + /* Coalescing forward failed, so insert a new node. */ + node = xnode; + xnode = NULL; + node->addr = chunk; + node->size = size; + extent_tree_ad_insert(&swap_chunks_ad, node); + extent_tree_szad_insert(&swap_chunks_szad, node); + break; + } + } + /* Discard xnode if it ended up unused do to a race. */ + if (xnode != NULL) + base_node_dealloc(xnode); + + /* Try to coalesce backward. */ + prev = extent_tree_ad_prev(&swap_chunks_ad, node); + if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) == + chunk) { + /* + * Coalesce chunk with the previous address range. This does + * not change the position within swap_chunks_ad, so only + * remove/insert node from/into swap_chunks_szad. + */ + extent_tree_szad_remove(&swap_chunks_szad, prev); + extent_tree_ad_remove(&swap_chunks_ad, prev); + + extent_tree_szad_remove(&swap_chunks_szad, node); + node->addr = prev->addr; + node->size += prev->size; + extent_tree_szad_insert(&swap_chunks_szad, node); + + base_node_dealloc(prev); + } + + return (node); +} + +bool +chunk_in_swap(void *chunk) +{ + bool ret; + + assert(swap_enabled); + + malloc_mutex_lock(&swap_mtx); + if ((uintptr_t)chunk >= (uintptr_t)swap_base + && (uintptr_t)chunk < (uintptr_t)swap_max) + ret = true; + else + ret = false; + malloc_mutex_unlock(&swap_mtx); + + return (ret); +} + +bool +chunk_dealloc_swap(void *chunk, size_t size) +{ + bool ret; + + assert(swap_enabled); + + malloc_mutex_lock(&swap_mtx); + if ((uintptr_t)chunk >= (uintptr_t)swap_base + && (uintptr_t)chunk < (uintptr_t)swap_max) { + extent_node_t *node; + + /* Try to coalesce with other unused chunks. */ + node = chunk_dealloc_swap_record(chunk, size); + if (node != NULL) { + chunk = node->addr; + size = node->size; + } + + /* + * Try to shrink the in-use memory if this chunk is at the end + * of the in-use memory. + */ + if ((void *)((uintptr_t)chunk + size) == swap_end) { + swap_end = (void *)((uintptr_t)swap_end - size); + + if (node != NULL) { + extent_tree_szad_remove(&swap_chunks_szad, + node); + extent_tree_ad_remove(&swap_chunks_ad, node); + base_node_dealloc(node); + } + } else + madvise(chunk, size, MADV_DONTNEED); + +#ifdef JEMALLOC_STATS + swap_avail += size; +#endif + ret = false; + goto RETURN; + } + + ret = true; +RETURN: + malloc_mutex_unlock(&swap_mtx); + return (ret); +} + +bool +chunk_swap_enable(const int *fds, unsigned nfds, bool prezeroed) +{ + bool ret; + unsigned i; + off_t off; + void *vaddr; + size_t cumsize, voff; + size_t sizes[nfds]; + + malloc_mutex_lock(&swap_mtx); + + /* Get file sizes. */ + for (i = 0, cumsize = 0; i < nfds; i++) { + off = lseek(fds[i], 0, SEEK_END); + if (off == ((off_t)-1)) { + ret = true; + goto RETURN; + } + if (PAGE_CEILING(off) != off) { + /* Truncate to a multiple of the page size. */ + off &= ~PAGE_MASK; + if (ftruncate(fds[i], off) != 0) { + ret = true; + goto RETURN; + } + } + sizes[i] = off; + if (cumsize + off < cumsize) { + /* + * Cumulative file size is greater than the total + * address space. Bail out while it's still obvious + * what the problem is. + */ + ret = true; + goto RETURN; + } + cumsize += off; + } + + /* Round down to a multiple of the chunk size. */ + cumsize &= ~chunksize_mask; + if (cumsize == 0) { + ret = true; + goto RETURN; + } + + /* + * Allocate a chunk-aligned region of anonymous memory, which will + * be the final location for the memory-mapped files. + */ + vaddr = chunk_alloc_mmap_noreserve(cumsize); + if (vaddr == NULL) { + ret = true; + goto RETURN; + } + + /* Overlay the files onto the anonymous mapping. */ + for (i = 0, voff = 0; i < nfds; i++) { + void *addr = mmap((void *)((uintptr_t)vaddr + voff), sizes[i], + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fds[i], 0); + if (addr == MAP_FAILED) { + char buf[BUFERROR_BUF]; + + + buferror(errno, buf, sizeof(buf)); + malloc_write( + ": Error in mmap(..., MAP_FIXED, ...): "); + malloc_write(buf); + malloc_write("\n"); + if (opt_abort) + abort(); + if (munmap(vaddr, voff) == -1) { + buferror(errno, buf, sizeof(buf)); + malloc_write(": Error in munmap(): "); + malloc_write(buf); + malloc_write("\n"); + } + ret = true; + goto RETURN; + } + assert(addr == (void *)((uintptr_t)vaddr + voff)); + + /* + * Tell the kernel that the mapping will be accessed randomly, + * and that it should not gratuitously sync pages to the + * filesystem. + */ +#ifdef MADV_RANDOM + madvise(addr, sizes[i], MADV_RANDOM); +#endif +#ifdef MADV_NOSYNC + madvise(addr, sizes[i], MADV_NOSYNC); +#endif + + voff += sizes[i]; + } + + swap_prezeroed = prezeroed; + swap_base = vaddr; + swap_end = swap_base; + swap_max = (void *)((uintptr_t)vaddr + cumsize); + + /* Copy the fds array for mallctl purposes. */ + swap_fds = (int *)base_alloc(nfds * sizeof(int)); + if (swap_fds == NULL) { + ret = true; + goto RETURN; + } + memcpy(swap_fds, fds, nfds * sizeof(int)); + swap_nfds = nfds; + +#ifdef JEMALLOC_STATS + swap_avail = cumsize; +#endif + + swap_enabled = true; + + ret = false; +RETURN: + malloc_mutex_unlock(&swap_mtx); + return (ret); +} + +bool +chunk_swap_boot(void) +{ + + if (malloc_mutex_init(&swap_mtx)) + return (true); + + swap_enabled = false; + swap_prezeroed = false; /* swap.* mallctl's depend on this. */ + swap_nfds = 0; + swap_fds = NULL; +#ifdef JEMALLOC_STATS + swap_avail = 0; +#endif + swap_base = NULL; + swap_end = NULL; + swap_max = NULL; + + extent_tree_szad_new(&swap_chunks_szad); + extent_tree_ad_new(&swap_chunks_ad); + + return (false); +} + +/******************************************************************************/ +#endif /* JEMALLOC_SWAP */ diff --git a/src/ckh.c b/src/ckh.c new file mode 100644 index 0000000..143b5b5 --- /dev/null +++ b/src/ckh.c @@ -0,0 +1,619 @@ +/* + ******************************************************************************* + * Implementation of (2^1+,2) cuckoo hashing, where 2^1+ indicates that each + * hash bucket contains 2^n cells, for n >= 1, and 2 indicates that two hash + * functions are employed. The original cuckoo hashing algorithm was described + * in: + * + * Pagh, R., F.F. Rodler (2004) Cuckoo Hashing. Journal of Algorithms + * 51(2):122-144. + * + * Generalization of cuckoo hashing was discussed in: + * + * Erlingsson, U., M. Manasse, F. McSherry (2006) A cool and practical + * alternative to traditional hash tables. In Proceedings of the 7th + * Workshop on Distributed Data and Structures (WDAS'06), Santa Clara, CA, + * January 2006. + * + * This implementation uses precisely two hash functions because that is the + * fewest that can work, and supporting multiple hashes is an implementation + * burden. Here is a reproduction of Figure 1 from Erlingsson et al. (2006) + * that shows approximate expected maximum load factors for various + * configurations: + * + * | #cells/bucket | + * #hashes | 1 | 2 | 4 | 8 | + * --------+-------+-------+-------+-------+ + * 1 | 0.006 | 0.006 | 0.03 | 0.12 | + * 2 | 0.49 | 0.86 |>0.93< |>0.96< | + * 3 | 0.91 | 0.97 | 0.98 | 0.999 | + * 4 | 0.97 | 0.99 | 0.999 | | + * + * The number of cells per bucket is chosen such that a bucket fits in one cache + * line. So, on 32- and 64-bit systems, we use (8,2) and (4,2) cuckoo hashing, + * respectively. + * + ******************************************************************************/ +#define JEMALLOC_CKH_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static bool ckh_grow(ckh_t *ckh); +static void ckh_shrink(ckh_t *ckh); + +/******************************************************************************/ + +/* + * Search bucket for key and return the cell number if found; SIZE_T_MAX + * otherwise. + */ +JEMALLOC_INLINE size_t +ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) +{ + ckhc_t *cell; + unsigned i; + + for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; + if (cell->key != NULL && ckh->keycomp(key, cell->key)) + return ((bucket << LG_CKH_BUCKET_CELLS) + i); + } + + return (SIZE_T_MAX); +} + +/* + * Search table for key and return cell number if found; SIZE_T_MAX otherwise. + */ +JEMALLOC_INLINE size_t +ckh_isearch(ckh_t *ckh, const void *key) +{ + size_t hash1, hash2, bucket, cell; + + assert(ckh != NULL); + dassert(ckh->magic == CKH_MAGIC); + + ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + + /* Search primary bucket. */ + bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + cell = ckh_bucket_search(ckh, bucket, key); + if (cell != SIZE_T_MAX) + return (cell); + + /* Search secondary bucket. */ + bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + cell = ckh_bucket_search(ckh, bucket, key); + return (cell); +} + +JEMALLOC_INLINE bool +ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key, + const void *data) +{ + ckhc_t *cell; + unsigned offset, i; + + /* + * Cycle through the cells in the bucket, starting at a random position. + * The randomness avoids worst-case search overhead as buckets fill up. + */ + prn32(offset, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); + for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) { + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))]; + if (cell->key == NULL) { + cell->key = key; + cell->data = data; + ckh->count++; + return (false); + } + } + + return (true); +} + +/* + * No space is available in bucket. Randomly evict an item, then try to find an + * alternate location for that item. Iteratively repeat this + * eviction/relocation procedure until either success or detection of an + * eviction/relocation bucket cycle. + */ +JEMALLOC_INLINE bool +ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey, + void const **argdata) +{ + const void *key, *data, *tkey, *tdata; + ckhc_t *cell; + size_t hash1, hash2, bucket, tbucket; + unsigned i; + + bucket = argbucket; + key = *argkey; + data = *argdata; + while (true) { + /* + * Choose a random item within the bucket to evict. This is + * critical to correct function, because without (eventually) + * evicting all items within a bucket during iteration, it + * would be possible to get stuck in an infinite loop if there + * were an item for which both hashes indicated the same + * bucket. + */ + prn32(i, LG_CKH_BUCKET_CELLS, ckh->prn_state, CKH_A, CKH_C); + cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i]; + assert(cell->key != NULL); + + /* Swap cell->{key,data} and {key,data} (evict). */ + tkey = cell->key; tdata = cell->data; + cell->key = key; cell->data = data; + key = tkey; data = tdata; + +#ifdef CKH_COUNT + ckh->nrelocs++; +#endif + + /* Find the alternate bucket for the evicted item. */ + ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + tbucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (tbucket == bucket) { + tbucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + /* + * It may be that (tbucket == bucket) still, if the + * item's hashes both indicate this bucket. However, + * we are guaranteed to eventually escape this bucket + * during iteration, assuming pseudo-random item + * selection (true randomness would make infinite + * looping a remote possibility). The reason we can + * never get trapped forever is that there are two + * cases: + * + * 1) This bucket == argbucket, so we will quickly + * detect an eviction cycle and terminate. + * 2) An item was evicted to this bucket from another, + * which means that at least one item in this bucket + * has hashes that indicate distinct buckets. + */ + } + /* Check for a cycle. */ + if (tbucket == argbucket) { + *argkey = key; + *argdata = data; + return (true); + } + + bucket = tbucket; + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + } +} + +JEMALLOC_INLINE bool +ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) +{ + size_t hash1, hash2, bucket; + const void *key = *argkey; + const void *data = *argdata; + + ckh->hash(key, ckh->lg_curbuckets, &hash1, &hash2); + + /* Try to insert in primary bucket. */ + bucket = hash1 & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + + /* Try to insert in secondary bucket. */ + bucket = hash2 & ((ZU(1) << ckh->lg_curbuckets) - 1); + if (ckh_try_bucket_insert(ckh, bucket, key, data) == false) + return (false); + + /* + * Try to find a place for this item via iterative eviction/relocation. + */ + return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata)); +} + +/* + * Try to rebuild the hash table from scratch by inserting all items from the + * old table into the new. + */ +JEMALLOC_INLINE bool +ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) +{ + size_t count, i, nins; + const void *key, *data; + + count = ckh->count; + ckh->count = 0; + for (i = nins = 0; nins < count; i++) { + if (aTab[i].key != NULL) { + key = aTab[i].key; + data = aTab[i].data; + if (ckh_try_insert(ckh, &key, &data)) { + ckh->count = count; + return (true); + } + nins++; + } + } + + return (false); +} + +static bool +ckh_grow(ckh_t *ckh) +{ + bool ret; + ckhc_t *tab, *ttab; + size_t lg_curcells; + unsigned lg_prevbuckets; + +#ifdef CKH_COUNT + ckh->ngrows++; +#endif + + /* + * It is possible (though unlikely, given well behaved hashes) that the + * table will have to be doubled more than once in order to create a + * usable table. + */ + lg_prevbuckets = ckh->lg_curbuckets; + lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS; + while (true) { + size_t usize; + + lg_curcells++; + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + if (usize == 0) { + ret = true; + goto RETURN; + } + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (tab == NULL) { + ret = true; + goto RETURN; + } + /* Swap in new table. */ + ttab = ckh->tab; + ckh->tab = tab; + tab = ttab; + ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; + + if (ckh_rebuild(ckh, tab) == false) { + idalloc(tab); + break; + } + + /* Rebuilding failed, so back out partially rebuilt table. */ + idalloc(ckh->tab); + ckh->tab = tab; + ckh->lg_curbuckets = lg_prevbuckets; + } + + ret = false; +RETURN: + return (ret); +} + +static void +ckh_shrink(ckh_t *ckh) +{ + ckhc_t *tab, *ttab; + size_t lg_curcells, usize; + unsigned lg_prevbuckets; + + /* + * It is possible (though unlikely, given well behaved hashes) that the + * table rebuild will fail. + */ + lg_prevbuckets = ckh->lg_curbuckets; + lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1; + usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE, NULL); + if (usize == 0) + return; + tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (tab == NULL) { + /* + * An OOM error isn't worth propagating, since it doesn't + * prevent this or future operations from proceeding. + */ + return; + } + /* Swap in new table. */ + ttab = ckh->tab; + ckh->tab = tab; + tab = ttab; + ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS; + + if (ckh_rebuild(ckh, tab) == false) { + idalloc(tab); +#ifdef CKH_COUNT + ckh->nshrinks++; +#endif + return; + } + + /* Rebuilding failed, so back out partially rebuilt table. */ + idalloc(ckh->tab); + ckh->tab = tab; + ckh->lg_curbuckets = lg_prevbuckets; +#ifdef CKH_COUNT + ckh->nshrinkfails++; +#endif +} + +bool +ckh_new(ckh_t *ckh, size_t minitems, ckh_hash_t *hash, ckh_keycomp_t *keycomp) +{ + bool ret; + size_t mincells, usize; + unsigned lg_mincells; + + assert(minitems > 0); + assert(hash != NULL); + assert(keycomp != NULL); + +#ifdef CKH_COUNT + ckh->ngrows = 0; + ckh->nshrinks = 0; + ckh->nshrinkfails = 0; + ckh->ninserts = 0; + ckh->nrelocs = 0; +#endif + ckh->prn_state = 42; /* Value doesn't really matter. */ + ckh->count = 0; + + /* + * Find the minimum power of 2 that is large enough to fit aBaseCount + * entries. We are using (2+,2) cuckoo hashing, which has an expected + * maximum load factor of at least ~0.86, so 0.75 is a conservative load + * factor that will typically allow 2^aLgMinItems to fit without ever + * growing the table. + */ + assert(LG_CKH_BUCKET_CELLS > 0); + mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2; + for (lg_mincells = LG_CKH_BUCKET_CELLS; + (ZU(1) << lg_mincells) < mincells; + lg_mincells++) + ; /* Do nothing. */ + ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; + ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS; + ckh->hash = hash; + ckh->keycomp = keycomp; + + usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE, NULL); + if (usize == 0) { + ret = true; + goto RETURN; + } + ckh->tab = (ckhc_t *)ipalloc(usize, CACHELINE, true); + if (ckh->tab == NULL) { + ret = true; + goto RETURN; + } + +#ifdef JEMALLOC_DEBUG + ckh->magic = CKH_MAGIC; +#endif + + ret = false; +RETURN: + return (ret); +} + +void +ckh_delete(ckh_t *ckh) +{ + + assert(ckh != NULL); + dassert(ckh->magic == CKH_MAGIC); + +#ifdef CKH_VERBOSE + malloc_printf( + "%s(%p): ngrows: %"PRIu64", nshrinks: %"PRIu64"," + " nshrinkfails: %"PRIu64", ninserts: %"PRIu64"," + " nrelocs: %"PRIu64"\n", __func__, ckh, + (unsigned long long)ckh->ngrows, + (unsigned long long)ckh->nshrinks, + (unsigned long long)ckh->nshrinkfails, + (unsigned long long)ckh->ninserts, + (unsigned long long)ckh->nrelocs); +#endif + + idalloc(ckh->tab); +#ifdef JEMALLOC_DEBUG + memset(ckh, 0x5a, sizeof(ckh_t)); +#endif +} + +size_t +ckh_count(ckh_t *ckh) +{ + + assert(ckh != NULL); + dassert(ckh->magic == CKH_MAGIC); + + return (ckh->count); +} + +bool +ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) +{ + size_t i, ncells; + + for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets + + LG_CKH_BUCKET_CELLS)); i < ncells; i++) { + if (ckh->tab[i].key != NULL) { + if (key != NULL) + *key = (void *)ckh->tab[i].key; + if (data != NULL) + *data = (void *)ckh->tab[i].data; + *tabind = i + 1; + return (false); + } + } + + return (true); +} + +bool +ckh_insert(ckh_t *ckh, const void *key, const void *data) +{ + bool ret; + + assert(ckh != NULL); + dassert(ckh->magic == CKH_MAGIC); + assert(ckh_search(ckh, key, NULL, NULL)); + +#ifdef CKH_COUNT + ckh->ninserts++; +#endif + + while (ckh_try_insert(ckh, &key, &data)) { + if (ckh_grow(ckh)) { + ret = true; + goto RETURN; + } + } + + ret = false; +RETURN: + return (ret); +} + +bool +ckh_remove(ckh_t *ckh, const void *searchkey, void **key, void **data) +{ + size_t cell; + + assert(ckh != NULL); + dassert(ckh->magic == CKH_MAGIC); + + cell = ckh_isearch(ckh, searchkey); + if (cell != SIZE_T_MAX) { + if (key != NULL) + *key = (void *)ckh->tab[cell].key; + if (data != NULL) + *data = (void *)ckh->tab[cell].data; + ckh->tab[cell].key = NULL; + ckh->tab[cell].data = NULL; /* Not necessary. */ + + ckh->count--; + /* Try to halve the table if it is less than 1/4 full. */ + if (ckh->count < (ZU(1) << (ckh->lg_curbuckets + + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets + > ckh->lg_minbuckets) { + /* Ignore error due to OOM. */ + ckh_shrink(ckh); + } + + return (false); + } + + return (true); +} + +bool +ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) +{ + size_t cell; + + assert(ckh != NULL); + dassert(ckh->magic == CKH_MAGIC); + + cell = ckh_isearch(ckh, searchkey); + if (cell != SIZE_T_MAX) { + if (key != NULL) + *key = (void *)ckh->tab[cell].key; + if (data != NULL) + *data = (void *)ckh->tab[cell].data; + return (false); + } + + return (true); +} + +void +ckh_string_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +{ + size_t ret1, ret2; + uint64_t h; + + assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); + assert(hash1 != NULL); + assert(hash2 != NULL); + + h = hash(key, strlen((const char *)key), 0x94122f335b332aeaLLU); + if (minbits <= 32) { + /* + * Avoid doing multiple hashes, since a single hash provides + * enough bits. + */ + ret1 = h & ZU(0xffffffffU); + ret2 = h >> 32; + } else { + ret1 = h; + ret2 = hash(key, strlen((const char *)key), + 0x8432a476666bbc13U); + } + + *hash1 = ret1; + *hash2 = ret2; +} + +bool +ckh_string_keycomp(const void *k1, const void *k2) +{ + + assert(k1 != NULL); + assert(k2 != NULL); + + return (strcmp((char *)k1, (char *)k2) ? false : true); +} + +void +ckh_pointer_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2) +{ + size_t ret1, ret2; + uint64_t h; + union { + const void *v; + uint64_t i; + } u; + + assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); + assert(hash1 != NULL); + assert(hash2 != NULL); + + assert(sizeof(u.v) == sizeof(u.i)); +#if (LG_SIZEOF_PTR != LG_SIZEOF_INT) + u.i = 0; +#endif + u.v = key; + h = hash(&u.i, sizeof(u.i), 0xd983396e68886082LLU); + if (minbits <= 32) { + /* + * Avoid doing multiple hashes, since a single hash provides + * enough bits. + */ + ret1 = h & ZU(0xffffffffU); + ret2 = h >> 32; + } else { + assert(SIZEOF_PTR == 8); + ret1 = h; + ret2 = hash(&u.i, sizeof(u.i), 0x5e2be9aff8709a5dLLU); + } + + *hash1 = ret1; + *hash2 = ret2; +} + +bool +ckh_pointer_keycomp(const void *k1, const void *k2) +{ + + return ((k1 == k2) ? true : false); +} diff --git a/src/ctl.c b/src/ctl.c new file mode 100644 index 0000000..e5336d3 --- /dev/null +++ b/src/ctl.c @@ -0,0 +1,1670 @@ +#define JEMALLOC_CTL_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +/* + * ctl_mtx protects the following: + * - ctl_stats.* + * - opt_prof_active + * - swap_enabled + * - swap_prezeroed + */ +static malloc_mutex_t ctl_mtx; +static bool ctl_initialized; +static uint64_t ctl_epoch; +static ctl_stats_t ctl_stats; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +#define CTL_PROTO(n) \ +static int n##_ctl(const size_t *mib, size_t miblen, void *oldp, \ + size_t *oldlenp, void *newp, size_t newlen); + +#define INDEX_PROTO(n) \ +const ctl_node_t *n##_index(const size_t *mib, size_t miblen, \ + size_t i); + +#ifdef JEMALLOC_STATS +static bool ctl_arena_init(ctl_arena_stats_t *astats); +#endif +static void ctl_arena_clear(ctl_arena_stats_t *astats); +#ifdef JEMALLOC_STATS +static void ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, + arena_t *arena); +static void ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, + ctl_arena_stats_t *astats); +#endif +static void ctl_arena_refresh(arena_t *arena, unsigned i); +static void ctl_refresh(void); +static bool ctl_init(void); +static int ctl_lookup(const char *name, ctl_node_t const **nodesp, + size_t *mibp, size_t *depthp); + +CTL_PROTO(version) +CTL_PROTO(epoch) +#ifdef JEMALLOC_TCACHE +CTL_PROTO(tcache_flush) +#endif +CTL_PROTO(thread_arena) +#ifdef JEMALLOC_STATS +CTL_PROTO(thread_allocated) +CTL_PROTO(thread_allocatedp) +CTL_PROTO(thread_deallocated) +CTL_PROTO(thread_deallocatedp) +#endif +CTL_PROTO(config_debug) +CTL_PROTO(config_dss) +CTL_PROTO(config_dynamic_page_shift) +CTL_PROTO(config_fill) +CTL_PROTO(config_lazy_lock) +CTL_PROTO(config_prof) +CTL_PROTO(config_prof_libgcc) +CTL_PROTO(config_prof_libunwind) +CTL_PROTO(config_stats) +CTL_PROTO(config_swap) +CTL_PROTO(config_sysv) +CTL_PROTO(config_tcache) +CTL_PROTO(config_tiny) +CTL_PROTO(config_tls) +CTL_PROTO(config_xmalloc) +CTL_PROTO(opt_abort) +CTL_PROTO(opt_lg_qspace_max) +CTL_PROTO(opt_lg_cspace_max) +CTL_PROTO(opt_lg_chunk) +CTL_PROTO(opt_narenas) +CTL_PROTO(opt_lg_dirty_mult) +CTL_PROTO(opt_stats_print) +#ifdef JEMALLOC_FILL +CTL_PROTO(opt_junk) +CTL_PROTO(opt_zero) +#endif +#ifdef JEMALLOC_SYSV +CTL_PROTO(opt_sysv) +#endif +#ifdef JEMALLOC_XMALLOC +CTL_PROTO(opt_xmalloc) +#endif +#ifdef JEMALLOC_TCACHE +CTL_PROTO(opt_tcache) +CTL_PROTO(opt_lg_tcache_gc_sweep) +#endif +#ifdef JEMALLOC_PROF +CTL_PROTO(opt_prof) +CTL_PROTO(opt_prof_prefix) +CTL_PROTO(opt_prof_active) +CTL_PROTO(opt_lg_prof_bt_max) +CTL_PROTO(opt_lg_prof_sample) +CTL_PROTO(opt_lg_prof_interval) +CTL_PROTO(opt_prof_gdump) +CTL_PROTO(opt_prof_leak) +CTL_PROTO(opt_prof_accum) +CTL_PROTO(opt_lg_prof_tcmax) +#endif +#ifdef JEMALLOC_SWAP +CTL_PROTO(opt_overcommit) +#endif +CTL_PROTO(arenas_bin_i_size) +CTL_PROTO(arenas_bin_i_nregs) +CTL_PROTO(arenas_bin_i_run_size) +INDEX_PROTO(arenas_bin_i) +CTL_PROTO(arenas_lrun_i_size) +INDEX_PROTO(arenas_lrun_i) +CTL_PROTO(arenas_narenas) +CTL_PROTO(arenas_initialized) +CTL_PROTO(arenas_quantum) +CTL_PROTO(arenas_cacheline) +CTL_PROTO(arenas_subpage) +CTL_PROTO(arenas_pagesize) +CTL_PROTO(arenas_chunksize) +#ifdef JEMALLOC_TINY +CTL_PROTO(arenas_tspace_min) +CTL_PROTO(arenas_tspace_max) +#endif +CTL_PROTO(arenas_qspace_min) +CTL_PROTO(arenas_qspace_max) +CTL_PROTO(arenas_cspace_min) +CTL_PROTO(arenas_cspace_max) +CTL_PROTO(arenas_sspace_min) +CTL_PROTO(arenas_sspace_max) +#ifdef JEMALLOC_TCACHE +CTL_PROTO(arenas_tcache_max) +#endif +CTL_PROTO(arenas_ntbins) +CTL_PROTO(arenas_nqbins) +CTL_PROTO(arenas_ncbins) +CTL_PROTO(arenas_nsbins) +CTL_PROTO(arenas_nbins) +#ifdef JEMALLOC_TCACHE +CTL_PROTO(arenas_nhbins) +#endif +CTL_PROTO(arenas_nlruns) +CTL_PROTO(arenas_purge) +#ifdef JEMALLOC_PROF +CTL_PROTO(prof_active) +CTL_PROTO(prof_dump) +CTL_PROTO(prof_interval) +#endif +#ifdef JEMALLOC_STATS +CTL_PROTO(stats_chunks_current) +CTL_PROTO(stats_chunks_total) +CTL_PROTO(stats_chunks_high) +CTL_PROTO(stats_huge_allocated) +CTL_PROTO(stats_huge_nmalloc) +CTL_PROTO(stats_huge_ndalloc) +CTL_PROTO(stats_arenas_i_small_allocated) +CTL_PROTO(stats_arenas_i_small_nmalloc) +CTL_PROTO(stats_arenas_i_small_ndalloc) +CTL_PROTO(stats_arenas_i_small_nrequests) +CTL_PROTO(stats_arenas_i_large_allocated) +CTL_PROTO(stats_arenas_i_large_nmalloc) +CTL_PROTO(stats_arenas_i_large_ndalloc) +CTL_PROTO(stats_arenas_i_large_nrequests) +CTL_PROTO(stats_arenas_i_bins_j_allocated) +CTL_PROTO(stats_arenas_i_bins_j_nmalloc) +CTL_PROTO(stats_arenas_i_bins_j_ndalloc) +CTL_PROTO(stats_arenas_i_bins_j_nrequests) +#ifdef JEMALLOC_TCACHE +CTL_PROTO(stats_arenas_i_bins_j_nfills) +CTL_PROTO(stats_arenas_i_bins_j_nflushes) +#endif +CTL_PROTO(stats_arenas_i_bins_j_nruns) +CTL_PROTO(stats_arenas_i_bins_j_nreruns) +CTL_PROTO(stats_arenas_i_bins_j_highruns) +CTL_PROTO(stats_arenas_i_bins_j_curruns) +INDEX_PROTO(stats_arenas_i_bins_j) +CTL_PROTO(stats_arenas_i_lruns_j_nmalloc) +CTL_PROTO(stats_arenas_i_lruns_j_ndalloc) +CTL_PROTO(stats_arenas_i_lruns_j_nrequests) +CTL_PROTO(stats_arenas_i_lruns_j_highruns) +CTL_PROTO(stats_arenas_i_lruns_j_curruns) +INDEX_PROTO(stats_arenas_i_lruns_j) +#endif +CTL_PROTO(stats_arenas_i_nthreads) +CTL_PROTO(stats_arenas_i_pactive) +CTL_PROTO(stats_arenas_i_pdirty) +#ifdef JEMALLOC_STATS +CTL_PROTO(stats_arenas_i_mapped) +CTL_PROTO(stats_arenas_i_npurge) +CTL_PROTO(stats_arenas_i_nmadvise) +CTL_PROTO(stats_arenas_i_purged) +#endif +INDEX_PROTO(stats_arenas_i) +#ifdef JEMALLOC_STATS +CTL_PROTO(stats_cactive) +CTL_PROTO(stats_allocated) +CTL_PROTO(stats_active) +CTL_PROTO(stats_mapped) +#endif +#ifdef JEMALLOC_SWAP +# ifdef JEMALLOC_STATS +CTL_PROTO(swap_avail) +# endif +CTL_PROTO(swap_prezeroed) +CTL_PROTO(swap_nfds) +CTL_PROTO(swap_fds) +#endif + +/******************************************************************************/ +/* mallctl tree. */ + +/* Maximum tree depth. */ +#define CTL_MAX_DEPTH 6 + +#define NAME(n) true, {.named = {n +#define CHILD(c) sizeof(c##_node) / sizeof(ctl_node_t), c##_node}}, NULL +#define CTL(c) 0, NULL}}, c##_ctl + +/* + * Only handles internal indexed nodes, since there are currently no external + * ones. + */ +#define INDEX(i) false, {.indexed = {i##_index}}, NULL + +#ifdef JEMALLOC_TCACHE +static const ctl_node_t tcache_node[] = { + {NAME("flush"), CTL(tcache_flush)} +}; +#endif + +static const ctl_node_t thread_node[] = { + {NAME("arena"), CTL(thread_arena)} +#ifdef JEMALLOC_STATS + , + {NAME("allocated"), CTL(thread_allocated)}, + {NAME("allocatedp"), CTL(thread_allocatedp)}, + {NAME("deallocated"), CTL(thread_deallocated)}, + {NAME("deallocatedp"), CTL(thread_deallocatedp)} +#endif +}; + +static const ctl_node_t config_node[] = { + {NAME("debug"), CTL(config_debug)}, + {NAME("dss"), CTL(config_dss)}, + {NAME("dynamic_page_shift"), CTL(config_dynamic_page_shift)}, + {NAME("fill"), CTL(config_fill)}, + {NAME("lazy_lock"), CTL(config_lazy_lock)}, + {NAME("prof"), CTL(config_prof)}, + {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, + {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, + {NAME("stats"), CTL(config_stats)}, + {NAME("swap"), CTL(config_swap)}, + {NAME("sysv"), CTL(config_sysv)}, + {NAME("tcache"), CTL(config_tcache)}, + {NAME("tiny"), CTL(config_tiny)}, + {NAME("tls"), CTL(config_tls)}, + {NAME("xmalloc"), CTL(config_xmalloc)} +}; + +static const ctl_node_t opt_node[] = { + {NAME("abort"), CTL(opt_abort)}, + {NAME("lg_qspace_max"), CTL(opt_lg_qspace_max)}, + {NAME("lg_cspace_max"), CTL(opt_lg_cspace_max)}, + {NAME("lg_chunk"), CTL(opt_lg_chunk)}, + {NAME("narenas"), CTL(opt_narenas)}, + {NAME("lg_dirty_mult"), CTL(opt_lg_dirty_mult)}, + {NAME("stats_print"), CTL(opt_stats_print)} +#ifdef JEMALLOC_FILL + , + {NAME("junk"), CTL(opt_junk)}, + {NAME("zero"), CTL(opt_zero)} +#endif +#ifdef JEMALLOC_SYSV + , + {NAME("sysv"), CTL(opt_sysv)} +#endif +#ifdef JEMALLOC_XMALLOC + , + {NAME("xmalloc"), CTL(opt_xmalloc)} +#endif +#ifdef JEMALLOC_TCACHE + , + {NAME("tcache"), CTL(opt_tcache)}, + {NAME("lg_tcache_gc_sweep"), CTL(opt_lg_tcache_gc_sweep)} +#endif +#ifdef JEMALLOC_PROF + , + {NAME("prof"), CTL(opt_prof)}, + {NAME("prof_prefix"), CTL(opt_prof_prefix)}, + {NAME("prof_active"), CTL(opt_prof_active)}, + {NAME("lg_prof_bt_max"), CTL(opt_lg_prof_bt_max)}, + {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)}, + {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)}, + {NAME("prof_gdump"), CTL(opt_prof_gdump)}, + {NAME("prof_leak"), CTL(opt_prof_leak)}, + {NAME("prof_accum"), CTL(opt_prof_accum)}, + {NAME("lg_prof_tcmax"), CTL(opt_lg_prof_tcmax)} +#endif +#ifdef JEMALLOC_SWAP + , + {NAME("overcommit"), CTL(opt_overcommit)} +#endif +}; + +static const ctl_node_t arenas_bin_i_node[] = { + {NAME("size"), CTL(arenas_bin_i_size)}, + {NAME("nregs"), CTL(arenas_bin_i_nregs)}, + {NAME("run_size"), CTL(arenas_bin_i_run_size)} +}; +static const ctl_node_t super_arenas_bin_i_node[] = { + {NAME(""), CHILD(arenas_bin_i)} +}; + +static const ctl_node_t arenas_bin_node[] = { + {INDEX(arenas_bin_i)} +}; + +static const ctl_node_t arenas_lrun_i_node[] = { + {NAME("size"), CTL(arenas_lrun_i_size)} +}; +static const ctl_node_t super_arenas_lrun_i_node[] = { + {NAME(""), CHILD(arenas_lrun_i)} +}; + +static const ctl_node_t arenas_lrun_node[] = { + {INDEX(arenas_lrun_i)} +}; + +static const ctl_node_t arenas_node[] = { + {NAME("narenas"), CTL(arenas_narenas)}, + {NAME("initialized"), CTL(arenas_initialized)}, + {NAME("quantum"), CTL(arenas_quantum)}, + {NAME("cacheline"), CTL(arenas_cacheline)}, + {NAME("subpage"), CTL(arenas_subpage)}, + {NAME("pagesize"), CTL(arenas_pagesize)}, + {NAME("chunksize"), CTL(arenas_chunksize)}, +#ifdef JEMALLOC_TINY + {NAME("tspace_min"), CTL(arenas_tspace_min)}, + {NAME("tspace_max"), CTL(arenas_tspace_max)}, +#endif + {NAME("qspace_min"), CTL(arenas_qspace_min)}, + {NAME("qspace_max"), CTL(arenas_qspace_max)}, + {NAME("cspace_min"), CTL(arenas_cspace_min)}, + {NAME("cspace_max"), CTL(arenas_cspace_max)}, + {NAME("sspace_min"), CTL(arenas_sspace_min)}, + {NAME("sspace_max"), CTL(arenas_sspace_max)}, +#ifdef JEMALLOC_TCACHE + {NAME("tcache_max"), CTL(arenas_tcache_max)}, +#endif + {NAME("ntbins"), CTL(arenas_ntbins)}, + {NAME("nqbins"), CTL(arenas_nqbins)}, + {NAME("ncbins"), CTL(arenas_ncbins)}, + {NAME("nsbins"), CTL(arenas_nsbins)}, + {NAME("nbins"), CTL(arenas_nbins)}, +#ifdef JEMALLOC_TCACHE + {NAME("nhbins"), CTL(arenas_nhbins)}, +#endif + {NAME("bin"), CHILD(arenas_bin)}, + {NAME("nlruns"), CTL(arenas_nlruns)}, + {NAME("lrun"), CHILD(arenas_lrun)}, + {NAME("purge"), CTL(arenas_purge)} +}; + +#ifdef JEMALLOC_PROF +static const ctl_node_t prof_node[] = { + {NAME("active"), CTL(prof_active)}, + {NAME("dump"), CTL(prof_dump)}, + {NAME("interval"), CTL(prof_interval)} +}; +#endif + +#ifdef JEMALLOC_STATS +static const ctl_node_t stats_chunks_node[] = { + {NAME("current"), CTL(stats_chunks_current)}, + {NAME("total"), CTL(stats_chunks_total)}, + {NAME("high"), CTL(stats_chunks_high)} +}; + +static const ctl_node_t stats_huge_node[] = { + {NAME("allocated"), CTL(stats_huge_allocated)}, + {NAME("nmalloc"), CTL(stats_huge_nmalloc)}, + {NAME("ndalloc"), CTL(stats_huge_ndalloc)} +}; + +static const ctl_node_t stats_arenas_i_small_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_small_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)} +}; + +static const ctl_node_t stats_arenas_i_large_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_large_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)} +}; + +static const ctl_node_t stats_arenas_i_bins_j_node[] = { + {NAME("allocated"), CTL(stats_arenas_i_bins_j_allocated)}, + {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)}, +#ifdef JEMALLOC_TCACHE + {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)}, + {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)}, +#endif + {NAME("nruns"), CTL(stats_arenas_i_bins_j_nruns)}, + {NAME("nreruns"), CTL(stats_arenas_i_bins_j_nreruns)}, + {NAME("highruns"), CTL(stats_arenas_i_bins_j_highruns)}, + {NAME("curruns"), CTL(stats_arenas_i_bins_j_curruns)} +}; +static const ctl_node_t super_stats_arenas_i_bins_j_node[] = { + {NAME(""), CHILD(stats_arenas_i_bins_j)} +}; + +static const ctl_node_t stats_arenas_i_bins_node[] = { + {INDEX(stats_arenas_i_bins_j)} +}; + +static const ctl_node_t stats_arenas_i_lruns_j_node[] = { + {NAME("nmalloc"), CTL(stats_arenas_i_lruns_j_nmalloc)}, + {NAME("ndalloc"), CTL(stats_arenas_i_lruns_j_ndalloc)}, + {NAME("nrequests"), CTL(stats_arenas_i_lruns_j_nrequests)}, + {NAME("highruns"), CTL(stats_arenas_i_lruns_j_highruns)}, + {NAME("curruns"), CTL(stats_arenas_i_lruns_j_curruns)} +}; +static const ctl_node_t super_stats_arenas_i_lruns_j_node[] = { + {NAME(""), CHILD(stats_arenas_i_lruns_j)} +}; + +static const ctl_node_t stats_arenas_i_lruns_node[] = { + {INDEX(stats_arenas_i_lruns_j)} +}; +#endif + +static const ctl_node_t stats_arenas_i_node[] = { + {NAME("nthreads"), CTL(stats_arenas_i_nthreads)}, + {NAME("pactive"), CTL(stats_arenas_i_pactive)}, + {NAME("pdirty"), CTL(stats_arenas_i_pdirty)} +#ifdef JEMALLOC_STATS + , + {NAME("mapped"), CTL(stats_arenas_i_mapped)}, + {NAME("npurge"), CTL(stats_arenas_i_npurge)}, + {NAME("nmadvise"), CTL(stats_arenas_i_nmadvise)}, + {NAME("purged"), CTL(stats_arenas_i_purged)}, + {NAME("small"), CHILD(stats_arenas_i_small)}, + {NAME("large"), CHILD(stats_arenas_i_large)}, + {NAME("bins"), CHILD(stats_arenas_i_bins)}, + {NAME("lruns"), CHILD(stats_arenas_i_lruns)} +#endif +}; +static const ctl_node_t super_stats_arenas_i_node[] = { + {NAME(""), CHILD(stats_arenas_i)} +}; + +static const ctl_node_t stats_arenas_node[] = { + {INDEX(stats_arenas_i)} +}; + +static const ctl_node_t stats_node[] = { +#ifdef JEMALLOC_STATS + {NAME("cactive"), CTL(stats_cactive)}, + {NAME("allocated"), CTL(stats_allocated)}, + {NAME("active"), CTL(stats_active)}, + {NAME("mapped"), CTL(stats_mapped)}, + {NAME("chunks"), CHILD(stats_chunks)}, + {NAME("huge"), CHILD(stats_huge)}, +#endif + {NAME("arenas"), CHILD(stats_arenas)} +}; + +#ifdef JEMALLOC_SWAP +static const ctl_node_t swap_node[] = { +# ifdef JEMALLOC_STATS + {NAME("avail"), CTL(swap_avail)}, +# endif + {NAME("prezeroed"), CTL(swap_prezeroed)}, + {NAME("nfds"), CTL(swap_nfds)}, + {NAME("fds"), CTL(swap_fds)} +}; +#endif + +static const ctl_node_t root_node[] = { + {NAME("version"), CTL(version)}, + {NAME("epoch"), CTL(epoch)}, +#ifdef JEMALLOC_TCACHE + {NAME("tcache"), CHILD(tcache)}, +#endif + {NAME("thread"), CHILD(thread)}, + {NAME("config"), CHILD(config)}, + {NAME("opt"), CHILD(opt)}, + {NAME("arenas"), CHILD(arenas)}, +#ifdef JEMALLOC_PROF + {NAME("prof"), CHILD(prof)}, +#endif + {NAME("stats"), CHILD(stats)} +#ifdef JEMALLOC_SWAP + , + {NAME("swap"), CHILD(swap)} +#endif +}; +static const ctl_node_t super_root_node[] = { + {NAME(""), CHILD(root)} +}; + +#undef NAME +#undef CHILD +#undef CTL +#undef INDEX + +/******************************************************************************/ + +#ifdef JEMALLOC_STATS +static bool +ctl_arena_init(ctl_arena_stats_t *astats) +{ + + if (astats->bstats == NULL) { + astats->bstats = (malloc_bin_stats_t *)base_alloc(nbins * + sizeof(malloc_bin_stats_t)); + if (astats->bstats == NULL) + return (true); + } + if (astats->lstats == NULL) { + astats->lstats = (malloc_large_stats_t *)base_alloc(nlclasses * + sizeof(malloc_large_stats_t)); + if (astats->lstats == NULL) + return (true); + } + + return (false); +} +#endif + +static void +ctl_arena_clear(ctl_arena_stats_t *astats) +{ + + astats->pactive = 0; + astats->pdirty = 0; +#ifdef JEMALLOC_STATS + memset(&astats->astats, 0, sizeof(arena_stats_t)); + astats->allocated_small = 0; + astats->nmalloc_small = 0; + astats->ndalloc_small = 0; + astats->nrequests_small = 0; + memset(astats->bstats, 0, nbins * sizeof(malloc_bin_stats_t)); + memset(astats->lstats, 0, nlclasses * sizeof(malloc_large_stats_t)); +#endif +} + +#ifdef JEMALLOC_STATS +static void +ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena) +{ + unsigned i; + + arena_stats_merge(arena, &cstats->pactive, &cstats->pdirty, + &cstats->astats, cstats->bstats, cstats->lstats); + + for (i = 0; i < nbins; i++) { + cstats->allocated_small += cstats->bstats[i].allocated; + cstats->nmalloc_small += cstats->bstats[i].nmalloc; + cstats->ndalloc_small += cstats->bstats[i].ndalloc; + cstats->nrequests_small += cstats->bstats[i].nrequests; + } +} + +static void +ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats) +{ + unsigned i; + + sstats->pactive += astats->pactive; + sstats->pdirty += astats->pdirty; + + sstats->astats.mapped += astats->astats.mapped; + sstats->astats.npurge += astats->astats.npurge; + sstats->astats.nmadvise += astats->astats.nmadvise; + sstats->astats.purged += astats->astats.purged; + + sstats->allocated_small += astats->allocated_small; + sstats->nmalloc_small += astats->nmalloc_small; + sstats->ndalloc_small += astats->ndalloc_small; + sstats->nrequests_small += astats->nrequests_small; + + sstats->astats.allocated_large += astats->astats.allocated_large; + sstats->astats.nmalloc_large += astats->astats.nmalloc_large; + sstats->astats.ndalloc_large += astats->astats.ndalloc_large; + sstats->astats.nrequests_large += astats->astats.nrequests_large; + + for (i = 0; i < nlclasses; i++) { + sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc; + sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc; + sstats->lstats[i].nrequests += astats->lstats[i].nrequests; + sstats->lstats[i].highruns += astats->lstats[i].highruns; + sstats->lstats[i].curruns += astats->lstats[i].curruns; + } + + for (i = 0; i < nbins; i++) { + sstats->bstats[i].allocated += astats->bstats[i].allocated; + sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc; + sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc; + sstats->bstats[i].nrequests += astats->bstats[i].nrequests; +#ifdef JEMALLOC_TCACHE + sstats->bstats[i].nfills += astats->bstats[i].nfills; + sstats->bstats[i].nflushes += astats->bstats[i].nflushes; +#endif + sstats->bstats[i].nruns += astats->bstats[i].nruns; + sstats->bstats[i].reruns += astats->bstats[i].reruns; + sstats->bstats[i].highruns += astats->bstats[i].highruns; + sstats->bstats[i].curruns += astats->bstats[i].curruns; + } +} +#endif + +static void +ctl_arena_refresh(arena_t *arena, unsigned i) +{ + ctl_arena_stats_t *astats = &ctl_stats.arenas[i]; + ctl_arena_stats_t *sstats = &ctl_stats.arenas[narenas]; + + ctl_arena_clear(astats); + + sstats->nthreads += astats->nthreads; +#ifdef JEMALLOC_STATS + ctl_arena_stats_amerge(astats, arena); + /* Merge into sum stats as well. */ + ctl_arena_stats_smerge(sstats, astats); +#else + astats->pactive += arena->nactive; + astats->pdirty += arena->ndirty; + /* Merge into sum stats as well. */ + sstats->pactive += arena->nactive; + sstats->pdirty += arena->ndirty; +#endif +} + +static void +ctl_refresh(void) +{ + unsigned i; + arena_t *tarenas[narenas]; + +#ifdef JEMALLOC_STATS + malloc_mutex_lock(&chunks_mtx); + ctl_stats.chunks.current = stats_chunks.curchunks; + ctl_stats.chunks.total = stats_chunks.nchunks; + ctl_stats.chunks.high = stats_chunks.highchunks; + malloc_mutex_unlock(&chunks_mtx); + + malloc_mutex_lock(&huge_mtx); + ctl_stats.huge.allocated = huge_allocated; + ctl_stats.huge.nmalloc = huge_nmalloc; + ctl_stats.huge.ndalloc = huge_ndalloc; + malloc_mutex_unlock(&huge_mtx); +#endif + + /* + * Clear sum stats, since they will be merged into by + * ctl_arena_refresh(). + */ + ctl_stats.arenas[narenas].nthreads = 0; + ctl_arena_clear(&ctl_stats.arenas[narenas]); + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + ctl_stats.arenas[i].nthreads = arenas[i]->nthreads; + else + ctl_stats.arenas[i].nthreads = 0; + } + malloc_mutex_unlock(&arenas_lock); + for (i = 0; i < narenas; i++) { + bool initialized = (tarenas[i] != NULL); + + ctl_stats.arenas[i].initialized = initialized; + if (initialized) + ctl_arena_refresh(tarenas[i], i); + } + +#ifdef JEMALLOC_STATS + ctl_stats.allocated = ctl_stats.arenas[narenas].allocated_small + + ctl_stats.arenas[narenas].astats.allocated_large + + ctl_stats.huge.allocated; + ctl_stats.active = (ctl_stats.arenas[narenas].pactive << PAGE_SHIFT) + + ctl_stats.huge.allocated; + ctl_stats.mapped = (ctl_stats.chunks.current << opt_lg_chunk); + +# ifdef JEMALLOC_SWAP + malloc_mutex_lock(&swap_mtx); + ctl_stats.swap_avail = swap_avail; + malloc_mutex_unlock(&swap_mtx); +# endif +#endif + + ctl_epoch++; +} + +static bool +ctl_init(void) +{ + bool ret; + + malloc_mutex_lock(&ctl_mtx); + if (ctl_initialized == false) { +#ifdef JEMALLOC_STATS + unsigned i; +#endif + + /* + * Allocate space for one extra arena stats element, which + * contains summed stats across all arenas. + */ + ctl_stats.arenas = (ctl_arena_stats_t *)base_alloc( + (narenas + 1) * sizeof(ctl_arena_stats_t)); + if (ctl_stats.arenas == NULL) { + ret = true; + goto RETURN; + } + memset(ctl_stats.arenas, 0, (narenas + 1) * + sizeof(ctl_arena_stats_t)); + + /* + * Initialize all stats structures, regardless of whether they + * ever get used. Lazy initialization would allow errors to + * cause inconsistent state to be viewable by the application. + */ +#ifdef JEMALLOC_STATS + for (i = 0; i <= narenas; i++) { + if (ctl_arena_init(&ctl_stats.arenas[i])) { + ret = true; + goto RETURN; + } + } +#endif + ctl_stats.arenas[narenas].initialized = true; + + ctl_epoch = 0; + ctl_refresh(); + ctl_initialized = true; + } + + ret = false; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp, + size_t *depthp) +{ + int ret; + const char *elm, *tdot, *dot; + size_t elen, i, j; + const ctl_node_t *node; + + elm = name; + /* Equivalent to strchrnul(). */ + dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : strchr(elm, '\0'); + elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); + if (elen == 0) { + ret = ENOENT; + goto RETURN; + } + node = super_root_node; + for (i = 0; i < *depthp; i++) { + assert(node->named); + assert(node->u.named.nchildren > 0); + if (node->u.named.children[0].named) { + const ctl_node_t *pnode = node; + + /* Children are named. */ + for (j = 0; j < node->u.named.nchildren; j++) { + const ctl_node_t *child = + &node->u.named.children[j]; + if (strlen(child->u.named.name) == elen + && strncmp(elm, child->u.named.name, + elen) == 0) { + node = child; + if (nodesp != NULL) + nodesp[i] = node; + mibp[i] = j; + break; + } + } + if (node == pnode) { + ret = ENOENT; + goto RETURN; + } + } else { + unsigned long index; + const ctl_node_t *inode; + + /* Children are indexed. */ + index = strtoul(elm, NULL, 10); + if (index == ULONG_MAX) { + ret = ENOENT; + goto RETURN; + } + + inode = &node->u.named.children[0]; + node = inode->u.indexed.index(mibp, *depthp, + index); + if (node == NULL) { + ret = ENOENT; + goto RETURN; + } + + if (nodesp != NULL) + nodesp[i] = node; + mibp[i] = (size_t)index; + } + + if (node->ctl != NULL) { + /* Terminal node. */ + if (*dot != '\0') { + /* + * The name contains more elements than are + * in this path through the tree. + */ + ret = ENOENT; + goto RETURN; + } + /* Complete lookup successful. */ + *depthp = i + 1; + break; + } + + /* Update elm. */ + if (*dot == '\0') { + /* No more elements. */ + ret = ENOENT; + goto RETURN; + } + elm = &dot[1]; + dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot : + strchr(elm, '\0'); + elen = (size_t)((uintptr_t)dot - (uintptr_t)elm); + } + + ret = 0; +RETURN: + return (ret); +} + +int +ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + int ret; + size_t depth; + ctl_node_t const *nodes[CTL_MAX_DEPTH]; + size_t mib[CTL_MAX_DEPTH]; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto RETURN; + } + + depth = CTL_MAX_DEPTH; + ret = ctl_lookup(name, nodes, mib, &depth); + if (ret != 0) + goto RETURN; + + if (nodes[depth-1]->ctl == NULL) { + /* The name refers to a partial path through the ctl tree. */ + ret = ENOENT; + goto RETURN; + } + + ret = nodes[depth-1]->ctl(mib, depth, oldp, oldlenp, newp, newlen); +RETURN: + return(ret); +} + +int +ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp) +{ + int ret; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto RETURN; + } + + ret = ctl_lookup(name, NULL, mibp, miblenp); +RETURN: + return(ret); +} + +int +ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + const ctl_node_t *node; + size_t i; + + if (ctl_initialized == false && ctl_init()) { + ret = EAGAIN; + goto RETURN; + } + + /* Iterate down the tree. */ + node = super_root_node; + for (i = 0; i < miblen; i++) { + if (node->u.named.children[0].named) { + /* Children are named. */ + if (node->u.named.nchildren <= mib[i]) { + ret = ENOENT; + goto RETURN; + } + node = &node->u.named.children[mib[i]]; + } else { + const ctl_node_t *inode; + + /* Indexed element. */ + inode = &node->u.named.children[0]; + node = inode->u.indexed.index(mib, miblen, mib[i]); + if (node == NULL) { + ret = ENOENT; + goto RETURN; + } + } + } + + /* Call the ctl function. */ + if (node->ctl == NULL) { + /* Partial MIB. */ + ret = ENOENT; + goto RETURN; + } + ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen); + +RETURN: + return(ret); +} + +bool +ctl_boot(void) +{ + + if (malloc_mutex_init(&ctl_mtx)) + return (true); + + ctl_initialized = false; + + return (false); +} + +/******************************************************************************/ +/* *_ctl() functions. */ + +#define READONLY() do { \ + if (newp != NULL || newlen != 0) { \ + ret = EPERM; \ + goto RETURN; \ + } \ +} while (0) + +#define WRITEONLY() do { \ + if (oldp != NULL || oldlenp != NULL) { \ + ret = EPERM; \ + goto RETURN; \ + } \ +} while (0) + +#define VOID() do { \ + READONLY(); \ + WRITEONLY(); \ +} while (0) + +#define READ(v, t) do { \ + if (oldp != NULL && oldlenp != NULL) { \ + if (*oldlenp != sizeof(t)) { \ + size_t copylen = (sizeof(t) <= *oldlenp) \ + ? sizeof(t) : *oldlenp; \ + memcpy(oldp, (void *)&v, copylen); \ + ret = EINVAL; \ + goto RETURN; \ + } else \ + *(t *)oldp = v; \ + } \ +} while (0) + +#define WRITE(v, t) do { \ + if (newp != NULL) { \ + if (newlen != sizeof(t)) { \ + ret = EINVAL; \ + goto RETURN; \ + } \ + v = *(t *)newp; \ + } \ +} while (0) + +#define CTL_RO_GEN(n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + malloc_mutex_lock(&ctl_mtx); \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + malloc_mutex_unlock(&ctl_mtx); \ + return (ret); \ +} + +/* + * ctl_mtx is not acquired, under the assumption that no pertinent data will + * mutate during the call. + */ +#define CTL_RO_NL_GEN(n, v, t) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + t oldval; \ + \ + READONLY(); \ + oldval = v; \ + READ(oldval, t); \ + \ + ret = 0; \ +RETURN: \ + return (ret); \ +} + +#define CTL_RO_TRUE_GEN(n) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + bool oldval; \ + \ + READONLY(); \ + oldval = true; \ + READ(oldval, bool); \ + \ + ret = 0; \ +RETURN: \ + return (ret); \ +} + +#define CTL_RO_FALSE_GEN(n) \ +static int \ +n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, \ + void *newp, size_t newlen) \ +{ \ + int ret; \ + bool oldval; \ + \ + READONLY(); \ + oldval = false; \ + READ(oldval, bool); \ + \ + ret = 0; \ +RETURN: \ + return (ret); \ +} + +CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *) + +static int +epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + uint64_t newval; + + malloc_mutex_lock(&ctl_mtx); + newval = 0; + WRITE(newval, uint64_t); + if (newval != 0) + ctl_refresh(); + READ(ctl_epoch, uint64_t); + + ret = 0; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +#ifdef JEMALLOC_TCACHE +static int +tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + tcache_t *tcache; + + VOID(); + + tcache = TCACHE_GET(); + if (tcache == NULL) { + ret = 0; + goto RETURN; + } + tcache_destroy(tcache); + TCACHE_SET(NULL); + + ret = 0; +RETURN: + return (ret); +} +#endif + +static int +thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned newind, oldind; + + newind = oldind = choose_arena()->ind; + WRITE(newind, unsigned); + READ(oldind, unsigned); + if (newind != oldind) { + arena_t *arena; + + if (newind >= narenas) { + /* New arena index is out of range. */ + ret = EFAULT; + goto RETURN; + } + + /* Initialize arena if necessary. */ + malloc_mutex_lock(&arenas_lock); + if ((arena = arenas[newind]) == NULL) + arena = arenas_extend(newind); + arenas[oldind]->nthreads--; + arenas[newind]->nthreads++; + malloc_mutex_unlock(&arenas_lock); + if (arena == NULL) { + ret = EAGAIN; + goto RETURN; + } + + /* Set new arena association. */ + ARENA_SET(arena); +#ifdef JEMALLOC_TCACHE + { + tcache_t *tcache = TCACHE_GET(); + if (tcache != NULL) + tcache->arena = arena; + } +#endif + } + + ret = 0; +RETURN: + return (ret); +} + +#ifdef JEMALLOC_STATS +CTL_RO_NL_GEN(thread_allocated, ALLOCATED_GET(), uint64_t); +CTL_RO_NL_GEN(thread_allocatedp, ALLOCATEDP_GET(), uint64_t *); +CTL_RO_NL_GEN(thread_deallocated, DEALLOCATED_GET(), uint64_t); +CTL_RO_NL_GEN(thread_deallocatedp, DEALLOCATEDP_GET(), uint64_t *); +#endif + +/******************************************************************************/ + +#ifdef JEMALLOC_DEBUG +CTL_RO_TRUE_GEN(config_debug) +#else +CTL_RO_FALSE_GEN(config_debug) +#endif + +#ifdef JEMALLOC_DSS +CTL_RO_TRUE_GEN(config_dss) +#else +CTL_RO_FALSE_GEN(config_dss) +#endif + +#ifdef JEMALLOC_DYNAMIC_PAGE_SHIFT +CTL_RO_TRUE_GEN(config_dynamic_page_shift) +#else +CTL_RO_FALSE_GEN(config_dynamic_page_shift) +#endif + +#ifdef JEMALLOC_FILL +CTL_RO_TRUE_GEN(config_fill) +#else +CTL_RO_FALSE_GEN(config_fill) +#endif + +#ifdef JEMALLOC_LAZY_LOCK +CTL_RO_TRUE_GEN(config_lazy_lock) +#else +CTL_RO_FALSE_GEN(config_lazy_lock) +#endif + +#ifdef JEMALLOC_PROF +CTL_RO_TRUE_GEN(config_prof) +#else +CTL_RO_FALSE_GEN(config_prof) +#endif + +#ifdef JEMALLOC_PROF_LIBGCC +CTL_RO_TRUE_GEN(config_prof_libgcc) +#else +CTL_RO_FALSE_GEN(config_prof_libgcc) +#endif + +#ifdef JEMALLOC_PROF_LIBUNWIND +CTL_RO_TRUE_GEN(config_prof_libunwind) +#else +CTL_RO_FALSE_GEN(config_prof_libunwind) +#endif + +#ifdef JEMALLOC_STATS +CTL_RO_TRUE_GEN(config_stats) +#else +CTL_RO_FALSE_GEN(config_stats) +#endif + +#ifdef JEMALLOC_SWAP +CTL_RO_TRUE_GEN(config_swap) +#else +CTL_RO_FALSE_GEN(config_swap) +#endif + +#ifdef JEMALLOC_SYSV +CTL_RO_TRUE_GEN(config_sysv) +#else +CTL_RO_FALSE_GEN(config_sysv) +#endif + +#ifdef JEMALLOC_TCACHE +CTL_RO_TRUE_GEN(config_tcache) +#else +CTL_RO_FALSE_GEN(config_tcache) +#endif + +#ifdef JEMALLOC_TINY +CTL_RO_TRUE_GEN(config_tiny) +#else +CTL_RO_FALSE_GEN(config_tiny) +#endif + +#ifdef JEMALLOC_TLS +CTL_RO_TRUE_GEN(config_tls) +#else +CTL_RO_FALSE_GEN(config_tls) +#endif + +#ifdef JEMALLOC_XMALLOC +CTL_RO_TRUE_GEN(config_xmalloc) +#else +CTL_RO_FALSE_GEN(config_xmalloc) +#endif + +/******************************************************************************/ + +CTL_RO_NL_GEN(opt_abort, opt_abort, bool) +CTL_RO_NL_GEN(opt_lg_qspace_max, opt_lg_qspace_max, size_t) +CTL_RO_NL_GEN(opt_lg_cspace_max, opt_lg_cspace_max, size_t) +CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t) +CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t) +CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t) +CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) +#ifdef JEMALLOC_FILL +CTL_RO_NL_GEN(opt_junk, opt_junk, bool) +CTL_RO_NL_GEN(opt_zero, opt_zero, bool) +#endif +#ifdef JEMALLOC_SYSV +CTL_RO_NL_GEN(opt_sysv, opt_sysv, bool) +#endif +#ifdef JEMALLOC_XMALLOC +CTL_RO_NL_GEN(opt_xmalloc, opt_xmalloc, bool) +#endif +#ifdef JEMALLOC_TCACHE +CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) +CTL_RO_NL_GEN(opt_lg_tcache_gc_sweep, opt_lg_tcache_gc_sweep, ssize_t) +#endif +#ifdef JEMALLOC_PROF +CTL_RO_NL_GEN(opt_prof, opt_prof, bool) +CTL_RO_NL_GEN(opt_prof_prefix, opt_prof_prefix, const char *) +CTL_RO_GEN(opt_prof_active, opt_prof_active, bool) /* Mutable. */ +CTL_RO_NL_GEN(opt_lg_prof_bt_max, opt_lg_prof_bt_max, size_t) +CTL_RO_NL_GEN(opt_lg_prof_sample, opt_lg_prof_sample, size_t) +CTL_RO_NL_GEN(opt_lg_prof_interval, opt_lg_prof_interval, ssize_t) +CTL_RO_NL_GEN(opt_prof_gdump, opt_prof_gdump, bool) +CTL_RO_NL_GEN(opt_prof_leak, opt_prof_leak, bool) +CTL_RO_NL_GEN(opt_prof_accum, opt_prof_accum, bool) +CTL_RO_NL_GEN(opt_lg_prof_tcmax, opt_lg_prof_tcmax, ssize_t) +#endif +#ifdef JEMALLOC_SWAP +CTL_RO_NL_GEN(opt_overcommit, opt_overcommit, bool) +#endif + +/******************************************************************************/ + +CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t) +const ctl_node_t * +arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > nbins) + return (NULL); + return (super_arenas_bin_i_node); +} + +CTL_RO_NL_GEN(arenas_lrun_i_size, ((mib[2]+1) << PAGE_SHIFT), size_t) +const ctl_node_t * +arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i) +{ + + if (i > nlclasses) + return (NULL); + return (super_arenas_lrun_i_node); +} + +CTL_RO_NL_GEN(arenas_narenas, narenas, unsigned) + +static int +arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + unsigned nread, i; + + malloc_mutex_lock(&ctl_mtx); + READONLY(); + if (*oldlenp != narenas * sizeof(bool)) { + ret = EINVAL; + nread = (*oldlenp < narenas * sizeof(bool)) + ? (*oldlenp / sizeof(bool)) : narenas; + } else { + ret = 0; + nread = narenas; + } + + for (i = 0; i < nread; i++) + ((bool *)oldp)[i] = ctl_stats.arenas[i].initialized; + +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t) +CTL_RO_NL_GEN(arenas_cacheline, CACHELINE, size_t) +CTL_RO_NL_GEN(arenas_subpage, SUBPAGE, size_t) +CTL_RO_NL_GEN(arenas_pagesize, PAGE_SIZE, size_t) +CTL_RO_NL_GEN(arenas_chunksize, chunksize, size_t) +#ifdef JEMALLOC_TINY +CTL_RO_NL_GEN(arenas_tspace_min, (1U << LG_TINY_MIN), size_t) +CTL_RO_NL_GEN(arenas_tspace_max, (qspace_min >> 1), size_t) +#endif +CTL_RO_NL_GEN(arenas_qspace_min, qspace_min, size_t) +CTL_RO_NL_GEN(arenas_qspace_max, qspace_max, size_t) +CTL_RO_NL_GEN(arenas_cspace_min, cspace_min, size_t) +CTL_RO_NL_GEN(arenas_cspace_max, cspace_max, size_t) +CTL_RO_NL_GEN(arenas_sspace_min, sspace_min, size_t) +CTL_RO_NL_GEN(arenas_sspace_max, sspace_max, size_t) +#ifdef JEMALLOC_TCACHE +CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) +#endif +CTL_RO_NL_GEN(arenas_ntbins, ntbins, unsigned) +CTL_RO_NL_GEN(arenas_nqbins, nqbins, unsigned) +CTL_RO_NL_GEN(arenas_ncbins, ncbins, unsigned) +CTL_RO_NL_GEN(arenas_nsbins, nsbins, unsigned) +CTL_RO_NL_GEN(arenas_nbins, nbins, unsigned) +#ifdef JEMALLOC_TCACHE +CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) +#endif +CTL_RO_NL_GEN(arenas_nlruns, nlclasses, size_t) + +static int +arenas_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + unsigned arena; + + WRITEONLY(); + arena = UINT_MAX; + WRITE(arena, unsigned); + if (newp != NULL && arena >= narenas) { + ret = EFAULT; + goto RETURN; + } else { + arena_t *tarenas[narenas]; + + malloc_mutex_lock(&arenas_lock); + memcpy(tarenas, arenas, sizeof(arena_t *) * narenas); + malloc_mutex_unlock(&arenas_lock); + + if (arena == UINT_MAX) { + unsigned i; + for (i = 0; i < narenas; i++) { + if (tarenas[i] != NULL) + arena_purge_all(tarenas[i]); + } + } else { + assert(arena < narenas); + if (tarenas[arena] != NULL) + arena_purge_all(tarenas[arena]); + } + } + + ret = 0; +RETURN: + return (ret); +} + +/******************************************************************************/ + +#ifdef JEMALLOC_PROF +static int +prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + bool oldval; + + malloc_mutex_lock(&ctl_mtx); /* Protect opt_prof_active. */ + oldval = opt_prof_active; + if (newp != NULL) { + /* + * The memory barriers will tend to make opt_prof_active + * propagate faster on systems with weak memory ordering. + */ + mb_write(); + WRITE(opt_prof_active, bool); + mb_write(); + } + READ(oldval, bool); + + ret = 0; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +static int +prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + const char *filename = NULL; + + WRITEONLY(); + WRITE(filename, const char *); + + if (prof_mdump(filename)) { + ret = EFAULT; + goto RETURN; + } + + ret = 0; +RETURN: + return (ret); +} + +CTL_RO_NL_GEN(prof_interval, prof_interval, uint64_t) +#endif + +/******************************************************************************/ + +#ifdef JEMALLOC_STATS +CTL_RO_GEN(stats_chunks_current, ctl_stats.chunks.current, size_t) +CTL_RO_GEN(stats_chunks_total, ctl_stats.chunks.total, uint64_t) +CTL_RO_GEN(stats_chunks_high, ctl_stats.chunks.high, size_t) +CTL_RO_GEN(stats_huge_allocated, huge_allocated, size_t) +CTL_RO_GEN(stats_huge_nmalloc, huge_nmalloc, uint64_t) +CTL_RO_GEN(stats_huge_ndalloc, huge_ndalloc, uint64_t) +CTL_RO_GEN(stats_arenas_i_small_allocated, + ctl_stats.arenas[mib[2]].allocated_small, size_t) +CTL_RO_GEN(stats_arenas_i_small_nmalloc, + ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t) +CTL_RO_GEN(stats_arenas_i_small_ndalloc, + ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t) +CTL_RO_GEN(stats_arenas_i_small_nrequests, + ctl_stats.arenas[mib[2]].nrequests_small, uint64_t) +CTL_RO_GEN(stats_arenas_i_large_allocated, + ctl_stats.arenas[mib[2]].astats.allocated_large, size_t) +CTL_RO_GEN(stats_arenas_i_large_nmalloc, + ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) +CTL_RO_GEN(stats_arenas_i_large_ndalloc, + ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t) +CTL_RO_GEN(stats_arenas_i_large_nrequests, + ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t) + +CTL_RO_GEN(stats_arenas_i_bins_j_allocated, + ctl_stats.arenas[mib[2]].bstats[mib[4]].allocated, size_t) +CTL_RO_GEN(stats_arenas_i_bins_j_nmalloc, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t) +CTL_RO_GEN(stats_arenas_i_bins_j_ndalloc, + ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t) +CTL_RO_GEN(stats_arenas_i_bins_j_nrequests, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t) +#ifdef JEMALLOC_TCACHE +CTL_RO_GEN(stats_arenas_i_bins_j_nfills, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t) +CTL_RO_GEN(stats_arenas_i_bins_j_nflushes, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t) +#endif +CTL_RO_GEN(stats_arenas_i_bins_j_nruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t) +CTL_RO_GEN(stats_arenas_i_bins_j_nreruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t) +CTL_RO_GEN(stats_arenas_i_bins_j_highruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].highruns, size_t) +CTL_RO_GEN(stats_arenas_i_bins_j_curruns, + ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t) + +const ctl_node_t * +stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > nbins) + return (NULL); + return (super_stats_arenas_i_bins_j_node); +} + +CTL_RO_GEN(stats_arenas_i_lruns_j_nmalloc, + ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t) +CTL_RO_GEN(stats_arenas_i_lruns_j_ndalloc, + ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t) +CTL_RO_GEN(stats_arenas_i_lruns_j_nrequests, + ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t) +CTL_RO_GEN(stats_arenas_i_lruns_j_curruns, + ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t) +CTL_RO_GEN(stats_arenas_i_lruns_j_highruns, + ctl_stats.arenas[mib[2]].lstats[mib[4]].highruns, size_t) + +const ctl_node_t * +stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j) +{ + + if (j > nlclasses) + return (NULL); + return (super_stats_arenas_i_lruns_j_node); +} + +#endif +CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned) +CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t) +CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t) +#ifdef JEMALLOC_STATS +CTL_RO_GEN(stats_arenas_i_mapped, ctl_stats.arenas[mib[2]].astats.mapped, + size_t) +CTL_RO_GEN(stats_arenas_i_npurge, ctl_stats.arenas[mib[2]].astats.npurge, + uint64_t) +CTL_RO_GEN(stats_arenas_i_nmadvise, ctl_stats.arenas[mib[2]].astats.nmadvise, + uint64_t) +CTL_RO_GEN(stats_arenas_i_purged, ctl_stats.arenas[mib[2]].astats.purged, + uint64_t) +#endif + +const ctl_node_t * +stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i) +{ + const ctl_node_t * ret; + + malloc_mutex_lock(&ctl_mtx); + if (ctl_stats.arenas[i].initialized == false) { + ret = NULL; + goto RETURN; + } + + ret = super_stats_arenas_i_node; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +#ifdef JEMALLOC_STATS +CTL_RO_GEN(stats_cactive, &stats_cactive, size_t *) +CTL_RO_GEN(stats_allocated, ctl_stats.allocated, size_t) +CTL_RO_GEN(stats_active, ctl_stats.active, size_t) +CTL_RO_GEN(stats_mapped, ctl_stats.mapped, size_t) +#endif + +/******************************************************************************/ + +#ifdef JEMALLOC_SWAP +# ifdef JEMALLOC_STATS +CTL_RO_GEN(swap_avail, ctl_stats.swap_avail, size_t) +# endif + +static int +swap_prezeroed_ctl(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + int ret; + + malloc_mutex_lock(&ctl_mtx); + if (swap_enabled) { + READONLY(); + } else { + /* + * swap_prezeroed isn't actually used by the swap code until it + * is set during a successful chunk_swap_enabled() call. We + * use it here to store the value that we'll pass to + * chunk_swap_enable() in a swap.fds mallctl(). This is not + * very clean, but the obvious alternatives are even worse. + */ + WRITE(swap_prezeroed, bool); + } + + READ(swap_prezeroed, bool); + + ret = 0; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} + +CTL_RO_GEN(swap_nfds, swap_nfds, size_t) + +static int +swap_fds_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, + void *newp, size_t newlen) +{ + int ret; + + malloc_mutex_lock(&ctl_mtx); + if (swap_enabled) { + READONLY(); + } else if (newp != NULL) { + size_t nfds = newlen / sizeof(int); + + { + int fds[nfds]; + + memcpy(fds, newp, nfds * sizeof(int)); + if (chunk_swap_enable(fds, nfds, swap_prezeroed)) { + ret = EFAULT; + goto RETURN; + } + } + } + + if (oldp != NULL && oldlenp != NULL) { + if (*oldlenp != swap_nfds * sizeof(int)) { + size_t copylen = (swap_nfds * sizeof(int) <= *oldlenp) + ? swap_nfds * sizeof(int) : *oldlenp; + + memcpy(oldp, swap_fds, copylen); + ret = EINVAL; + goto RETURN; + } else + memcpy(oldp, swap_fds, *oldlenp); + } + + ret = 0; +RETURN: + malloc_mutex_unlock(&ctl_mtx); + return (ret); +} +#endif diff --git a/src/extent.c b/src/extent.c new file mode 100644 index 0000000..3c04d3a --- /dev/null +++ b/src/extent.c @@ -0,0 +1,41 @@ +#define JEMALLOC_EXTENT_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ + +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) +static inline int +extent_szad_comp(extent_node_t *a, extent_node_t *b) +{ + int ret; + size_t a_size = a->size; + size_t b_size = b->size; + + ret = (a_size > b_size) - (a_size < b_size); + if (ret == 0) { + uintptr_t a_addr = (uintptr_t)a->addr; + uintptr_t b_addr = (uintptr_t)b->addr; + + ret = (a_addr > b_addr) - (a_addr < b_addr); + } + + return (ret); +} + +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, link_szad, + extent_szad_comp) +#endif + +static inline int +extent_ad_comp(extent_node_t *a, extent_node_t *b) +{ + uintptr_t a_addr = (uintptr_t)a->addr; + uintptr_t b_addr = (uintptr_t)b->addr; + + return ((a_addr > b_addr) - (a_addr < b_addr)); +} + +/* Generate red-black tree functions. */ +rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad, + extent_ad_comp) diff --git a/src/hash.c b/src/hash.c new file mode 100644 index 0000000..cfa4da0 --- /dev/null +++ b/src/hash.c @@ -0,0 +1,2 @@ +#define JEMALLOC_HASH_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/huge.c b/src/huge.c new file mode 100644 index 0000000..ac3f3a0 --- /dev/null +++ b/src/huge.c @@ -0,0 +1,379 @@ +#define JEMALLOC_HUGE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +#ifdef JEMALLOC_STATS +uint64_t huge_nmalloc; +uint64_t huge_ndalloc; +size_t huge_allocated; +#endif + +malloc_mutex_t huge_mtx; + +/******************************************************************************/ + +/* Tree of chunks that are stand-alone huge allocations. */ +static extent_tree_t huge; + +void * +huge_malloc(size_t size, bool zero) +{ + void *ret; + size_t csize; + extent_node_t *node; + + /* Allocate one or more contiguous chunks for this request. */ + + csize = CHUNK_CEILING(size); + if (csize == 0) { + /* size is large enough to cause size_t wrap-around. */ + return (NULL); + } + + /* Allocate an extent node with which to track the chunk. */ + node = base_node_alloc(); + if (node == NULL) + return (NULL); + + ret = chunk_alloc(csize, false, &zero); + if (ret == NULL) { + base_node_dealloc(node); + return (NULL); + } + + /* Insert node into huge. */ + node->addr = ret; + node->size = csize; + + malloc_mutex_lock(&huge_mtx); + extent_tree_ad_insert(&huge, node); +#ifdef JEMALLOC_STATS + stats_cactive_add(csize); + huge_nmalloc++; + huge_allocated += csize; +#endif + malloc_mutex_unlock(&huge_mtx); + +#ifdef JEMALLOC_FILL + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, csize); + else if (opt_zero) + memset(ret, 0, csize); + } +#endif + + return (ret); +} + +/* Only handles large allocations that require more than chunk alignment. */ +void * +huge_palloc(size_t size, size_t alignment, bool zero) +{ + void *ret; + size_t alloc_size, chunk_size, offset; + extent_node_t *node; + + /* + * This allocation requires alignment that is even larger than chunk + * alignment. This means that huge_malloc() isn't good enough. + * + * Allocate almost twice as many chunks as are demanded by the size or + * alignment, in order to assure the alignment can be achieved, then + * unmap leading and trailing chunks. + */ + assert(alignment > chunksize); + + chunk_size = CHUNK_CEILING(size); + + if (size >= alignment) + alloc_size = chunk_size + alignment - chunksize; + else + alloc_size = (alignment << 1) - chunksize; + + /* Allocate an extent node with which to track the chunk. */ + node = base_node_alloc(); + if (node == NULL) + return (NULL); + + ret = chunk_alloc(alloc_size, false, &zero); + if (ret == NULL) { + base_node_dealloc(node); + return (NULL); + } + + offset = (uintptr_t)ret & (alignment - 1); + assert((offset & chunksize_mask) == 0); + assert(offset < alloc_size); + if (offset == 0) { + /* Trim trailing space. */ + chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size + - chunk_size); + } else { + size_t trailsize; + + /* Trim leading space. */ + chunk_dealloc(ret, alignment - offset); + + ret = (void *)((uintptr_t)ret + (alignment - offset)); + + trailsize = alloc_size - (alignment - offset) - chunk_size; + if (trailsize != 0) { + /* Trim trailing space. */ + assert(trailsize < alloc_size); + chunk_dealloc((void *)((uintptr_t)ret + chunk_size), + trailsize); + } + } + + /* Insert node into huge. */ + node->addr = ret; + node->size = chunk_size; + + malloc_mutex_lock(&huge_mtx); + extent_tree_ad_insert(&huge, node); +#ifdef JEMALLOC_STATS + stats_cactive_add(chunk_size); + huge_nmalloc++; + huge_allocated += chunk_size; +#endif + malloc_mutex_unlock(&huge_mtx); + +#ifdef JEMALLOC_FILL + if (zero == false) { + if (opt_junk) + memset(ret, 0xa5, chunk_size); + else if (opt_zero) + memset(ret, 0, chunk_size); + } +#endif + + return (ret); +} + +void * +huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra) +{ + + /* + * Avoid moving the allocation if the size class can be left the same. + */ + if (oldsize > arena_maxclass + && CHUNK_CEILING(oldsize) >= CHUNK_CEILING(size) + && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(size+extra)) { + assert(CHUNK_CEILING(oldsize) == oldsize); +#ifdef JEMALLOC_FILL + if (opt_junk && size < oldsize) { + memset((void *)((uintptr_t)ptr + size), 0x5a, + oldsize - size); + } +#endif + return (ptr); + } + + /* Reallocation would require a move. */ + return (NULL); +} + +void * +huge_ralloc(void *ptr, size_t oldsize, size_t size, size_t extra, + size_t alignment, bool zero) +{ + void *ret; + size_t copysize; + + /* Try to avoid moving the allocation. */ + ret = huge_ralloc_no_move(ptr, oldsize, size, extra); + if (ret != NULL) + return (ret); + + /* + * size and oldsize are different enough that we need to use a + * different size class. In that case, fall back to allocating new + * space and copying. + */ + if (alignment > chunksize) + ret = huge_palloc(size + extra, alignment, zero); + else + ret = huge_malloc(size + extra, zero); + + if (ret == NULL) { + if (extra == 0) + return (NULL); + /* Try again, this time without extra. */ + if (alignment > chunksize) + ret = huge_palloc(size, alignment, zero); + else + ret = huge_malloc(size, zero); + + if (ret == NULL) + return (NULL); + } + + /* + * Copy at most size bytes (not size+extra), since the caller has no + * expectation that the extra bytes will be reliably preserved. + */ + copysize = (size < oldsize) ? size : oldsize; + + /* + * Use mremap(2) if this is a huge-->huge reallocation, and neither the + * source nor the destination are in swap or dss. + */ +#ifdef JEMALLOC_MREMAP_FIXED + if (oldsize >= chunksize +# ifdef JEMALLOC_SWAP + && (swap_enabled == false || (chunk_in_swap(ptr) == false && + chunk_in_swap(ret) == false)) +# endif +# ifdef JEMALLOC_DSS + && chunk_in_dss(ptr) == false && chunk_in_dss(ret) == false +# endif + ) { + size_t newsize = huge_salloc(ret); + + if (mremap(ptr, oldsize, newsize, MREMAP_MAYMOVE|MREMAP_FIXED, + ret) == MAP_FAILED) { + /* + * Assuming no chunk management bugs in the allocator, + * the only documented way an error can occur here is + * if the application changed the map type for a + * portion of the old allocation. This is firmly in + * undefined behavior territory, so write a diagnostic + * message, and optionally abort. + */ + char buf[BUFERROR_BUF]; + + buferror(errno, buf, sizeof(buf)); + malloc_write(": Error in mremap(): "); + malloc_write(buf); + malloc_write("\n"); + if (opt_abort) + abort(); + memcpy(ret, ptr, copysize); + idalloc(ptr); + } else + huge_dalloc(ptr, false); + } else +#endif + { + memcpy(ret, ptr, copysize); + idalloc(ptr); + } + return (ret); +} + +void +huge_dalloc(void *ptr, bool unmap) +{ + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = ptr; + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + assert(node->addr == ptr); + extent_tree_ad_remove(&huge, node); + +#ifdef JEMALLOC_STATS + stats_cactive_sub(node->size); + huge_ndalloc++; + huge_allocated -= node->size; +#endif + + malloc_mutex_unlock(&huge_mtx); + + if (unmap) { + /* Unmap chunk. */ +#ifdef JEMALLOC_FILL +#if (defined(JEMALLOC_SWAP) || defined(JEMALLOC_DSS)) + if (opt_junk) + memset(node->addr, 0x5a, node->size); +#endif +#endif + chunk_dealloc(node->addr, node->size); + } + + base_node_dealloc(node); +} + +size_t +huge_salloc(const void *ptr) +{ + size_t ret; + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + ret = node->size; + + malloc_mutex_unlock(&huge_mtx); + + return (ret); +} + +#ifdef JEMALLOC_PROF +prof_ctx_t * +huge_prof_ctx_get(const void *ptr) +{ + prof_ctx_t *ret; + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + ret = node->prof_ctx; + + malloc_mutex_unlock(&huge_mtx); + + return (ret); +} + +void +huge_prof_ctx_set(const void *ptr, prof_ctx_t *ctx) +{ + extent_node_t *node, key; + + malloc_mutex_lock(&huge_mtx); + + /* Extract from tree of huge allocations. */ + key.addr = __DECONST(void *, ptr); + node = extent_tree_ad_search(&huge, &key); + assert(node != NULL); + + node->prof_ctx = ctx; + + malloc_mutex_unlock(&huge_mtx); +} +#endif + +bool +huge_boot(void) +{ + + /* Initialize chunks data. */ + if (malloc_mutex_init(&huge_mtx)) + return (true); + extent_tree_ad_new(&huge); + +#ifdef JEMALLOC_STATS + huge_nmalloc = 0; + huge_ndalloc = 0; + huge_allocated = 0; +#endif + + return (false); +} diff --git a/src/jemalloc.c b/src/jemalloc.c new file mode 100644 index 0000000..e287516 --- /dev/null +++ b/src/jemalloc.c @@ -0,0 +1,1847 @@ +#define JEMALLOC_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +malloc_mutex_t arenas_lock; +arena_t **arenas; +unsigned narenas; + +pthread_key_t arenas_tsd; +#ifndef NO_TLS +__thread arena_t *arenas_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#endif + +#ifdef JEMALLOC_STATS +# ifndef NO_TLS +__thread thread_allocated_t thread_allocated_tls; +# else +pthread_key_t thread_allocated_tsd; +# endif +#endif + +/* Set to true once the allocator has been initialized. */ +static bool malloc_initialized = false; + +/* Used to let the initializing thread recursively allocate. */ +static pthread_t malloc_initializer = (unsigned long)0; + +/* Used to avoid initialization races. */ +static malloc_mutex_t init_lock = +#ifdef JEMALLOC_OSSPIN + 0 +#else + MALLOC_MUTEX_INITIALIZER +#endif + ; + +#ifdef DYNAMIC_PAGE_SHIFT +size_t pagesize; +size_t pagesize_mask; +size_t lg_pagesize; +#endif + +unsigned ncpus; + +/* Runtime configuration options. */ +const char *JEMALLOC_P(malloc_conf) JEMALLOC_ATTR(visibility("default")); +#ifdef JEMALLOC_DEBUG +bool opt_abort = true; +# ifdef JEMALLOC_FILL +bool opt_junk = true; +# endif +#else +bool opt_abort = false; +# ifdef JEMALLOC_FILL +bool opt_junk = false; +# endif +#endif +#ifdef JEMALLOC_SYSV +bool opt_sysv = false; +#endif +#ifdef JEMALLOC_XMALLOC +bool opt_xmalloc = false; +#endif +#ifdef JEMALLOC_FILL +bool opt_zero = false; +#endif +size_t opt_narenas = 0; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void wrtmessage(void *cbopaque, const char *s); +static void stats_print_atexit(void); +static unsigned malloc_ncpus(void); +static void arenas_cleanup(void *arg); +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void thread_allocated_cleanup(void *arg); +#endif +static bool malloc_conf_next(char const **opts_p, char const **k_p, + size_t *klen_p, char const **v_p, size_t *vlen_p); +static void malloc_conf_error(const char *msg, const char *k, size_t klen, + const char *v, size_t vlen); +static void malloc_conf_init(void); +static bool malloc_init_hard(void); + +/******************************************************************************/ +/* malloc_message() setup. */ + +#ifdef JEMALLOC_HAVE_ATTR +JEMALLOC_ATTR(visibility("hidden")) +#else +static +#endif +void +wrtmessage(void *cbopaque, const char *s) +{ +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + write(STDERR_FILENO, s, strlen(s)); +#ifdef JEMALLOC_CC_SILENCE + if (result < 0) + result = errno; +#endif +} + +void (*JEMALLOC_P(malloc_message))(void *, const char *s) + JEMALLOC_ATTR(visibility("default")) = wrtmessage; + +/******************************************************************************/ +/* + * Begin miscellaneous support functions. + */ + +/* Create a new arena and insert it into the arenas array at index ind. */ +arena_t * +arenas_extend(unsigned ind) +{ + arena_t *ret; + + /* Allocate enough space for trailing bins. */ + ret = (arena_t *)base_alloc(offsetof(arena_t, bins) + + (sizeof(arena_bin_t) * nbins)); + if (ret != NULL && arena_new(ret, ind) == false) { + arenas[ind] = ret; + return (ret); + } + /* Only reached if there is an OOM error. */ + + /* + * OOM here is quite inconvenient to propagate, since dealing with it + * would require a check for failure in the fast path. Instead, punt + * by using arenas[0]. In practice, this is an extremely unlikely + * failure. + */ + malloc_write(": Error initializing arena\n"); + if (opt_abort) + abort(); + + return (arenas[0]); +} + +/* + * Choose an arena based on a per-thread value (slow-path code only, called + * only by choose_arena()). + */ +arena_t * +choose_arena_hard(void) +{ + arena_t *ret; + + if (narenas > 1) { + unsigned i, choose, first_null; + + choose = 0; + first_null = narenas; + malloc_mutex_lock(&arenas_lock); + assert(arenas[0] != NULL); + for (i = 1; i < narenas; i++) { + if (arenas[i] != NULL) { + /* + * Choose the first arena that has the lowest + * number of threads assigned to it. + */ + if (arenas[i]->nthreads < + arenas[choose]->nthreads) + choose = i; + } else if (first_null == narenas) { + /* + * Record the index of the first uninitialized + * arena, in case all extant arenas are in use. + * + * NB: It is possible for there to be + * discontinuities in terms of initialized + * versus uninitialized arenas, due to the + * "thread.arena" mallctl. + */ + first_null = i; + } + } + + if (arenas[choose] == 0 || first_null == narenas) { + /* + * Use an unloaded arena, or the least loaded arena if + * all arenas are already initialized. + */ + ret = arenas[choose]; + } else { + /* Initialize a new arena. */ + ret = arenas_extend(first_null); + } + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } else { + ret = arenas[0]; + malloc_mutex_lock(&arenas_lock); + ret->nthreads++; + malloc_mutex_unlock(&arenas_lock); + } + + ARENA_SET(ret); + + return (ret); +} + +/* + * glibc provides a non-standard strerror_r() when _GNU_SOURCE is defined, so + * provide a wrapper. + */ +int +buferror(int errnum, char *buf, size_t buflen) +{ +#ifdef _GNU_SOURCE + char *b = strerror_r(errno, buf, buflen); + if (b != buf) { + strncpy(buf, b, buflen); + buf[buflen-1] = '\0'; + } + return (0); +#else + return (strerror_r(errno, buf, buflen)); +#endif +} + +static void +stats_print_atexit(void) +{ + +#if (defined(JEMALLOC_TCACHE) && defined(JEMALLOC_STATS)) + unsigned i; + + /* + * Merge stats from extant threads. This is racy, since individual + * threads do not lock when recording tcache stats events. As a + * consequence, the final stats may be slightly out of date by the time + * they are reported, if other threads continue to allocate. + */ + for (i = 0; i < narenas; i++) { + arena_t *arena = arenas[i]; + if (arena != NULL) { + tcache_t *tcache; + + /* + * tcache_stats_merge() locks bins, so if any code is + * introduced that acquires both arena and bin locks in + * the opposite order, deadlocks may result. + */ + malloc_mutex_lock(&arena->lock); + ql_foreach(tcache, &arena->tcache_ql, link) { + tcache_stats_merge(tcache, arena); + } + malloc_mutex_unlock(&arena->lock); + } + } +#endif + JEMALLOC_P(malloc_stats_print)(NULL, NULL, NULL); +} + +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +thread_allocated_t * +thread_allocated_get_hard(void) +{ + thread_allocated_t *thread_allocated = (thread_allocated_t *) + imalloc(sizeof(thread_allocated_t)); + if (thread_allocated == NULL) { + static thread_allocated_t static_thread_allocated = {0, 0}; + malloc_write(": Error allocating TSD;" + " mallctl(\"thread.{de,}allocated[p]\", ...)" + " will be inaccurate\n"); + if (opt_abort) + abort(); + return (&static_thread_allocated); + } + pthread_setspecific(thread_allocated_tsd, thread_allocated); + thread_allocated->allocated = 0; + thread_allocated->deallocated = 0; + return (thread_allocated); +} +#endif + +/* + * End miscellaneous support functions. + */ +/******************************************************************************/ +/* + * Begin initialization functions. + */ + +static unsigned +malloc_ncpus(void) +{ + unsigned ret; + long result; + + result = sysconf(_SC_NPROCESSORS_ONLN); + if (result == -1) { + /* Error. */ + ret = 1; + } + ret = (unsigned)result; + + return (ret); +} + +static void +arenas_cleanup(void *arg) +{ + arena_t *arena = (arena_t *)arg; + + malloc_mutex_lock(&arenas_lock); + arena->nthreads--; + malloc_mutex_unlock(&arenas_lock); +} + +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) +static void +thread_allocated_cleanup(void *arg) +{ + uint64_t *allocated = (uint64_t *)arg; + + if (allocated != NULL) + idalloc(allocated); +} +#endif + +/* + * FreeBSD's pthreads implementation calls malloc(3), so the malloc + * implementation has to take pains to avoid infinite recursion during + * initialization. + */ +static inline bool +malloc_init(void) +{ + + if (malloc_initialized == false) + return (malloc_init_hard()); + + return (false); +} + +static bool +malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p, + char const **v_p, size_t *vlen_p) +{ + bool accept; + const char *opts = *opts_p; + + *k_p = opts; + + for (accept = false; accept == false;) { + switch (*opts) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '_': + opts++; + break; + case ':': + opts++; + *klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p; + *v_p = opts; + accept = true; + break; + case '\0': + if (opts != *opts_p) { + malloc_write(": Conf string " + "ends with key\n"); + } + return (true); + default: + malloc_write(": Malformed conf " + "string\n"); + return (true); + } + } + + for (accept = false; accept == false;) { + switch (*opts) { + case ',': + opts++; + /* + * Look ahead one character here, because the + * next time this function is called, it will + * assume that end of input has been cleanly + * reached if no input remains, but we have + * optimistically already consumed the comma if + * one exists. + */ + if (*opts == '\0') { + malloc_write(": Conf string " + "ends with comma\n"); + } + *vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p; + accept = true; + break; + case '\0': + *vlen_p = (uintptr_t)opts - (uintptr_t)*v_p; + accept = true; + break; + default: + opts++; + break; + } + } + + *opts_p = opts; + return (false); +} + +static void +malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, + size_t vlen) +{ + char buf[PATH_MAX + 1]; + + malloc_write(": "); + malloc_write(msg); + malloc_write(": "); + memcpy(buf, k, klen); + memcpy(&buf[klen], ":", 1); + memcpy(&buf[klen+1], v, vlen); + buf[klen+1+vlen] = '\0'; + malloc_write(buf); + malloc_write("\n"); +} + +static void +malloc_conf_init(void) +{ + unsigned i; + char buf[PATH_MAX + 1]; + const char *opts, *k, *v; + size_t klen, vlen; + + for (i = 0; i < 3; i++) { + /* Get runtime configuration. */ + switch (i) { + case 0: + if (JEMALLOC_P(malloc_conf) != NULL) { + /* + * Use options that were compiled into the + * program. + */ + opts = JEMALLOC_P(malloc_conf); + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + case 1: { + int linklen; + const char *linkname = +#ifdef JEMALLOC_PREFIX + "/etc/"JEMALLOC_PREFIX"malloc.conf" +#else + "/etc/malloc.conf" +#endif + ; + + if ((linklen = readlink(linkname, buf, + sizeof(buf) - 1)) != -1) { + /* + * Use the contents of the "/etc/malloc.conf" + * symbolic link's name. + */ + buf[linklen] = '\0'; + opts = buf; + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } + case 2: { + const char *envname = +#ifdef JEMALLOC_PREFIX + JEMALLOC_CPREFIX"MALLOC_CONF" +#else + "MALLOC_CONF" +#endif + ; + + if ((opts = getenv(envname)) != NULL) { + /* + * Do nothing; opts is already initialized to + * the value of the MALLOC_CONF environment + * variable. + */ + } else { + /* No configuration specified. */ + buf[0] = '\0'; + opts = buf; + } + break; + } + default: + /* NOTREACHED */ + assert(false); + buf[0] = '\0'; + opts = buf; + } + + while (*opts != '\0' && malloc_conf_next(&opts, &k, &klen, &v, + &vlen) == false) { +#define CONF_HANDLE_BOOL(n) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + if (strncmp("true", v, vlen) == 0 && \ + vlen == sizeof("true")-1) \ + opt_##n = true; \ + else if (strncmp("false", v, vlen) == \ + 0 && vlen == sizeof("false")-1) \ + opt_##n = false; \ + else { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } \ + continue; \ + } +#define CONF_HANDLE_SIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + unsigned long ul; \ + char *end; \ + \ + errno = 0; \ + ul = strtoul(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (ul < min || ul > max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = ul; \ + continue; \ + } +#define CONF_HANDLE_SSIZE_T(n, min, max) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + long l; \ + char *end; \ + \ + errno = 0; \ + l = strtol(v, &end, 0); \ + if (errno != 0 || (uintptr_t)end - \ + (uintptr_t)v != vlen) { \ + malloc_conf_error( \ + "Invalid conf value", \ + k, klen, v, vlen); \ + } else if (l < (ssize_t)min || l > \ + (ssize_t)max) { \ + malloc_conf_error( \ + "Out-of-range conf value", \ + k, klen, v, vlen); \ + } else \ + opt_##n = l; \ + continue; \ + } +#define CONF_HANDLE_CHAR_P(n, d) \ + if (sizeof(#n)-1 == klen && strncmp(#n, k, \ + klen) == 0) { \ + size_t cpylen = (vlen <= \ + sizeof(opt_##n)-1) ? vlen : \ + sizeof(opt_##n)-1; \ + strncpy(opt_##n, v, cpylen); \ + opt_##n[cpylen] = '\0'; \ + continue; \ + } + + CONF_HANDLE_BOOL(abort) + CONF_HANDLE_SIZE_T(lg_qspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + CONF_HANDLE_SIZE_T(lg_cspace_max, LG_QUANTUM, + PAGE_SHIFT-1) + /* + * Chunks always require at least one * header page, + * plus one data page. + */ + CONF_HANDLE_SIZE_T(lg_chunk, PAGE_SHIFT+1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SIZE_T(narenas, 1, SIZE_T_MAX) + CONF_HANDLE_SSIZE_T(lg_dirty_mult, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_BOOL(stats_print) +#ifdef JEMALLOC_FILL + CONF_HANDLE_BOOL(junk) + CONF_HANDLE_BOOL(zero) +#endif +#ifdef JEMALLOC_SYSV + CONF_HANDLE_BOOL(sysv) +#endif +#ifdef JEMALLOC_XMALLOC + CONF_HANDLE_BOOL(xmalloc) +#endif +#ifdef JEMALLOC_TCACHE + CONF_HANDLE_BOOL(tcache) + CONF_HANDLE_SSIZE_T(lg_tcache_gc_sweep, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_tcache_max, -1, + (sizeof(size_t) << 3) - 1) +#endif +#ifdef JEMALLOC_PROF + CONF_HANDLE_BOOL(prof) + CONF_HANDLE_CHAR_P(prof_prefix, "jeprof") + CONF_HANDLE_SIZE_T(lg_prof_bt_max, 0, LG_PROF_BT_MAX) + CONF_HANDLE_BOOL(prof_active) + CONF_HANDLE_SSIZE_T(lg_prof_sample, 0, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_accum) + CONF_HANDLE_SSIZE_T(lg_prof_tcmax, -1, + (sizeof(size_t) << 3) - 1) + CONF_HANDLE_SSIZE_T(lg_prof_interval, -1, + (sizeof(uint64_t) << 3) - 1) + CONF_HANDLE_BOOL(prof_gdump) + CONF_HANDLE_BOOL(prof_leak) +#endif +#ifdef JEMALLOC_SWAP + CONF_HANDLE_BOOL(overcommit) +#endif + malloc_conf_error("Invalid conf pair", k, klen, v, + vlen); +#undef CONF_HANDLE_BOOL +#undef CONF_HANDLE_SIZE_T +#undef CONF_HANDLE_SSIZE_T +#undef CONF_HANDLE_CHAR_P + } + + /* Validate configuration of options that are inter-related. */ + if (opt_lg_qspace_max+1 >= opt_lg_cspace_max) { + malloc_write(": Invalid lg_[qc]space_max " + "relationship; restoring defaults\n"); + opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT; + opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT; + } + } +} + +static bool +malloc_init_hard(void) +{ + arena_t *init_arenas[1]; + + malloc_mutex_lock(&init_lock); + if (malloc_initialized || malloc_initializer == pthread_self()) { + /* + * Another thread initialized the allocator before this one + * acquired init_lock, or this thread is the initializing + * thread, and it is recursively allocating. + */ + malloc_mutex_unlock(&init_lock); + return (false); + } + if (malloc_initializer != (unsigned long)0) { + /* Busy-wait until the initializing thread completes. */ + do { + malloc_mutex_unlock(&init_lock); + CPU_SPINWAIT; + malloc_mutex_lock(&init_lock); + } while (malloc_initialized == false); + malloc_mutex_unlock(&init_lock); + return (false); + } + +#ifdef DYNAMIC_PAGE_SHIFT + /* Get page size. */ + { + long result; + + result = sysconf(_SC_PAGESIZE); + assert(result != -1); + pagesize = (unsigned)result; + + /* + * We assume that pagesize is a power of 2 when calculating + * pagesize_mask and lg_pagesize. + */ + assert(((result - 1) & result) == 0); + pagesize_mask = result - 1; + lg_pagesize = ffs((int)result) - 1; + } +#endif + +#ifdef JEMALLOC_PROF + prof_boot0(); +#endif + + malloc_conf_init(); + + /* Register fork handlers. */ + if (pthread_atfork(jemalloc_prefork, jemalloc_postfork, + jemalloc_postfork) != 0) { + malloc_write(": Error in pthread_atfork()\n"); + if (opt_abort) + abort(); + } + + if (ctl_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (opt_stats_print) { + /* Print statistics at exit. */ + if (atexit(stats_print_atexit) != 0) { + malloc_write(": Error in atexit()\n"); + if (opt_abort) + abort(); + } + } + + if (chunk_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + if (base_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + +#ifdef JEMALLOC_PROF + prof_boot1(); +#endif + + if (arena_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + +#ifdef JEMALLOC_TCACHE + if (tcache_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + + if (huge_boot()) { + malloc_mutex_unlock(&init_lock); + return (true); + } + +#if (defined(JEMALLOC_STATS) && defined(NO_TLS)) + /* Initialize allocation counters before any allocations can occur. */ + if (pthread_key_create(&thread_allocated_tsd, thread_allocated_cleanup) + != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + + /* + * Create enough scaffolding to allow recursive allocation in + * malloc_ncpus(). + */ + narenas = 1; + arenas = init_arenas; + memset(arenas, 0, sizeof(arena_t *) * narenas); + + /* + * Initialize one arena here. The rest are lazily created in + * choose_arena_hard(). + */ + arenas_extend(0); + if (arenas[0] == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + + /* + * Assign the initial arena to the initial thread, in order to avoid + * spurious creation of an extra arena if the application switches to + * threaded mode. + */ + ARENA_SET(arenas[0]); + arenas[0]->nthreads++; + + if (malloc_mutex_init(&arenas_lock)) + return (true); + + if (pthread_key_create(&arenas_tsd, arenas_cleanup) != 0) { + malloc_mutex_unlock(&init_lock); + return (true); + } + +#ifdef JEMALLOC_PROF + if (prof_boot2()) { + malloc_mutex_unlock(&init_lock); + return (true); + } +#endif + + /* Get number of CPUs. */ + malloc_initializer = pthread_self(); + malloc_mutex_unlock(&init_lock); + ncpus = malloc_ncpus(); + malloc_mutex_lock(&init_lock); + + if (opt_narenas == 0) { + /* + * For SMP systems, create more than one arena per CPU by + * default. + */ + if (ncpus > 1) + opt_narenas = ncpus << 2; + else + opt_narenas = 1; + } + narenas = opt_narenas; + /* + * Make sure that the arenas array can be allocated. In practice, this + * limit is enough to allow the allocator to function, but the ctl + * machinery will fail to allocate memory at far lower limits. + */ + if (narenas > chunksize / sizeof(arena_t *)) { + char buf[UMAX2S_BUFSIZE]; + + narenas = chunksize / sizeof(arena_t *); + malloc_write(": Reducing narenas to limit ("); + malloc_write(u2s(narenas, 10, buf)); + malloc_write(")\n"); + } + + /* Allocate and initialize arenas. */ + arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas); + if (arenas == NULL) { + malloc_mutex_unlock(&init_lock); + return (true); + } + /* + * Zero the array. In practice, this should always be pre-zeroed, + * since it was just mmap()ed, but let's be sure. + */ + memset(arenas, 0, sizeof(arena_t *) * narenas); + /* Copy the pointer to the one arena that was already initialized. */ + arenas[0] = init_arenas[0]; + +#ifdef JEMALLOC_ZONE + /* Register the custom zone. */ + malloc_zone_register(create_zone()); + + /* + * Convert the default szone to an "overlay zone" that is capable of + * deallocating szone-allocated objects, but allocating new objects + * from jemalloc. + */ + szone2ozone(malloc_default_zone()); +#endif + + malloc_initialized = true; + malloc_mutex_unlock(&init_lock); + return (false); +} + +#ifdef JEMALLOC_ZONE +JEMALLOC_ATTR(constructor) +void +jemalloc_darwin_init(void) +{ + + if (malloc_init_hard()) + abort(); +} +#endif + +/* + * End initialization functions. + */ +/******************************************************************************/ +/* + * Begin malloc(3)-compatible functions. + */ + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(malloc)(size_t size) +{ + void *ret; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (malloc_init()) { + ret = NULL; + goto OOM; + } + + if (size == 0) { +#ifdef JEMALLOC_SYSV + if (opt_sysv == false) +#endif + size = 1; +#ifdef JEMALLOC_SYSV + else { +# ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in malloc(): " + "invalid size 0\n"); + abort(); + } +# endif + ret = NULL; + goto RETURN; + } +#endif + } + +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { + ret = NULL; + goto OOM; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + small_maxclass) { + ret = imalloc(small_maxclass+1); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = imalloc(size); + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif + ret = imalloc(size); + } + +OOM: + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in malloc(): " + "out of memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + +#ifdef JEMALLOC_SYSV +RETURN: +#endif +#ifdef JEMALLOC_PROF + if (opt_prof && ret != NULL) + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } +#endif + return (ret); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(posix_memalign)(void **memptr, size_t alignment, size_t size) +{ + int ret; + size_t usize +#ifdef JEMALLOC_CC_SILENCE + = 0 +#endif + ; + void *result; +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (malloc_init()) + result = NULL; + else { + if (size == 0) { +#ifdef JEMALLOC_SYSV + if (opt_sysv == false) +#endif + size = 1; +#ifdef JEMALLOC_SYSV + else { +# ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in " + "posix_memalign(): invalid size " + "0\n"); + abort(); + } +# endif + result = NULL; + *memptr = NULL; + ret = 0; + goto RETURN; + } +#endif + } + + /* Make sure that alignment is a large enough power of 2. */ + if (((alignment - 1) & alignment) != 0 + || alignment < sizeof(void *)) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in " + "posix_memalign(): invalid alignment\n"); + abort(); + } +#endif + result = NULL; + ret = EINVAL; + goto RETURN; + } + + usize = sa2u(size, alignment, NULL); + if (usize == 0) { + result = NULL; + ret = ENOMEM; + goto RETURN; + } + +#ifdef JEMALLOC_PROF + if (opt_prof) { + if ((cnt = prof_alloc_prep(usize)) == NULL) { + result = NULL; + ret = EINVAL; + } else { + if (prof_promote && (uintptr_t)cnt != + (uintptr_t)1U && usize <= small_maxclass) { + assert(sa2u(small_maxclass+1, + alignment, NULL) != 0); + result = ipalloc(sa2u(small_maxclass+1, + alignment, NULL), alignment, false); + if (result != NULL) { + arena_prof_promoted(result, + usize); + } + } else { + result = ipalloc(usize, alignment, + false); + } + } + } else +#endif + result = ipalloc(usize, alignment, false); + } + + if (result == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in posix_memalign(): " + "out of memory\n"); + abort(); + } +#endif + ret = ENOMEM; + goto RETURN; + } + + *memptr = result; + ret = 0; + +RETURN: +#ifdef JEMALLOC_STATS + if (result != NULL) { + assert(usize == isalloc(result)); + ALLOCATED_ADD(usize, 0); + } +#endif +#ifdef JEMALLOC_PROF + if (opt_prof && result != NULL) + prof_malloc(result, usize, cnt); +#endif + return (ret); +} + +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(calloc)(size_t num, size_t size) +{ + void *ret; + size_t num_size; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; +#endif +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (malloc_init()) { + num_size = 0; + ret = NULL; + goto RETURN; + } + + num_size = num * size; + if (num_size == 0) { +#ifdef JEMALLOC_SYSV + if ((opt_sysv == false) && ((num == 0) || (size == 0))) +#endif + num_size = 1; +#ifdef JEMALLOC_SYSV + else { + ret = NULL; + goto RETURN; + } +#endif + /* + * Try to avoid division here. We know that it isn't possible to + * overflow during multiplication if neither operand uses any of the + * most significant half of the bits in a size_t. + */ + } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2))) + && (num_size / size != num)) { + /* size_t overflow. */ + ret = NULL; + goto RETURN; + } + +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(num_size); + if ((cnt = prof_alloc_prep(usize)) == NULL) { + ret = NULL; + goto RETURN; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize + <= small_maxclass) { + ret = icalloc(small_maxclass+1); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = icalloc(num_size); + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(num_size); +#endif + ret = icalloc(num_size); + } + +RETURN: + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in calloc(): out of " + "memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + +#ifdef JEMALLOC_PROF + if (opt_prof && ret != NULL) + prof_malloc(ret, usize, cnt); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, 0); + } +#endif + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(realloc)(void *ptr, size_t size) +{ + void *ret; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize +# ifdef JEMALLOC_CC_SILENCE + = 0 +# endif + ; + size_t old_size = 0; +#endif +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; + prof_ctx_t *old_ctx +# ifdef JEMALLOC_CC_SILENCE + = NULL +# endif + ; +#endif + + if (size == 0) { +#ifdef JEMALLOC_SYSV + if (opt_sysv == false) +#endif + size = 1; +#ifdef JEMALLOC_SYSV + else { + if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { + old_ctx = prof_ctx_get(ptr); + cnt = NULL; + } +#endif + idalloc(ptr); + } +#ifdef JEMALLOC_PROF + else if (opt_prof) { + old_ctx = NULL; + cnt = NULL; + } +#endif + ret = NULL; + goto RETURN; + } +#endif + } + + if (ptr != NULL) { + assert(malloc_initialized || malloc_initializer == + pthread_self()); + +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + old_size = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(size); + old_ctx = prof_ctx_get(ptr); + if ((cnt = prof_alloc_prep(usize)) == NULL) { + ret = NULL; + goto OOM; + } + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && + usize <= small_maxclass) { + ret = iralloc(ptr, small_maxclass+1, 0, 0, + false, false); + if (ret != NULL) + arena_prof_promoted(ret, usize); + } else + ret = iralloc(ptr, size, 0, 0, false, false); + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif + ret = iralloc(ptr, size, 0, 0, false, false); + } + +#ifdef JEMALLOC_PROF +OOM: +#endif + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in realloc(): " + "out of memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + } else { +#ifdef JEMALLOC_PROF + if (opt_prof) + old_ctx = NULL; +#endif + if (malloc_init()) { +#ifdef JEMALLOC_PROF + if (opt_prof) + cnt = NULL; +#endif + ret = NULL; + } else { +#ifdef JEMALLOC_PROF + if (opt_prof) { + usize = s2u(size); + if ((cnt = prof_alloc_prep(usize)) == NULL) + ret = NULL; + else { + if (prof_promote && (uintptr_t)cnt != + (uintptr_t)1U && usize <= + small_maxclass) { + ret = imalloc(small_maxclass+1); + if (ret != NULL) { + arena_prof_promoted(ret, + usize); + } + } else + ret = imalloc(size); + } + } else +#endif + { +#ifdef JEMALLOC_STATS + usize = s2u(size); +#endif + ret = imalloc(size); + } + } + + if (ret == NULL) { +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in realloc(): " + "out of memory\n"); + abort(); + } +#endif + errno = ENOMEM; + } + } + +#ifdef JEMALLOC_SYSV +RETURN: +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) + prof_realloc(ret, usize, cnt, old_size, old_ctx); +#endif +#ifdef JEMALLOC_STATS + if (ret != NULL) { + assert(usize == isalloc(ret)); + ALLOCATED_ADD(usize, old_size); + } +#endif + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void +JEMALLOC_P(free)(void *ptr) +{ + + if (ptr != NULL) { +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif + + assert(malloc_initialized || malloc_initializer == + pthread_self()); + +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, usize); +#endif + idalloc(ptr); + } +} + +/* + * End malloc(3)-compatible functions. + */ +/******************************************************************************/ +/* + * Begin non-standard override functions. + * + * These overrides are omitted if the JEMALLOC_PREFIX is defined, since the + * entire point is to avoid accidental mixed allocator usage. + */ +#ifndef JEMALLOC_PREFIX + +#ifdef JEMALLOC_OVERRIDE_MEMALIGN +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(memalign)(size_t alignment, size_t size) +{ + void *ret; +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + JEMALLOC_P(posix_memalign)(&ret, alignment, size); +#ifdef JEMALLOC_CC_SILENCE + if (result != 0) + return (NULL); +#endif + return (ret); +} +#endif + +#ifdef JEMALLOC_OVERRIDE_VALLOC +JEMALLOC_ATTR(malloc) +JEMALLOC_ATTR(visibility("default")) +void * +JEMALLOC_P(valloc)(size_t size) +{ + void *ret; +#ifdef JEMALLOC_CC_SILENCE + int result = +#endif + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); +#ifdef JEMALLOC_CC_SILENCE + if (result != 0) + return (NULL); +#endif + return (ret); +} +#endif + +#endif /* JEMALLOC_PREFIX */ +/* + * End non-standard override functions. + */ +/******************************************************************************/ +/* + * Begin non-standard functions. + */ + +JEMALLOC_ATTR(visibility("default")) +size_t +JEMALLOC_P(malloc_usable_size)(const void *ptr) +{ + size_t ret; + + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_IVSALLOC + ret = ivsalloc(ptr); +#else + assert(ptr != NULL); + ret = isalloc(ptr); +#endif + + return (ret); +} + +JEMALLOC_ATTR(visibility("default")) +void +JEMALLOC_P(malloc_stats_print)(void (*write_cb)(void *, const char *), + void *cbopaque, const char *opts) +{ + + stats_print(write_cb, cbopaque, opts); +} + +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(mallctl)(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_byname(name, oldp, oldlenp, newp, newlen)); +} + +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(mallctlnametomib)(const char *name, size_t *mibp, size_t *miblenp) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_nametomib(name, mibp, miblenp)); +} + +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(mallctlbymib)(const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) +{ + + if (malloc_init()) + return (EAGAIN); + + return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen)); +} + +JEMALLOC_INLINE void * +iallocm(size_t usize, size_t alignment, bool zero) +{ + + assert(usize == ((alignment == 0) ? s2u(usize) : sa2u(usize, alignment, + NULL))); + + if (alignment != 0) + return (ipalloc(usize, alignment, zero)); + else if (zero) + return (icalloc(usize)); + else + return (imalloc(usize)); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(allocm)(void **ptr, size_t *rsize, size_t size, int flags) +{ + void *p; + size_t usize; + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt; +#endif + + assert(ptr != NULL); + assert(size != 0); + + if (malloc_init()) + goto OOM; + + usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment, + NULL); + if (usize == 0) + goto OOM; + +#ifdef JEMALLOC_PROF + if (opt_prof) { + if ((cnt = prof_alloc_prep(usize)) == NULL) + goto OOM; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && usize <= + small_maxclass) { + size_t usize_promoted = (alignment == 0) ? + s2u(small_maxclass+1) : sa2u(small_maxclass+1, + alignment, NULL); + assert(usize_promoted != 0); + p = iallocm(usize_promoted, alignment, zero); + if (p == NULL) + goto OOM; + arena_prof_promoted(p, usize); + } else { + p = iallocm(usize, alignment, zero); + if (p == NULL) + goto OOM; + } + + if (rsize != NULL) + *rsize = usize; + } else +#endif + { + p = iallocm(usize, alignment, zero); + if (p == NULL) + goto OOM; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } + } + + *ptr = p; +#ifdef JEMALLOC_STATS + assert(usize == isalloc(p)); + ALLOCATED_ADD(usize, 0); +#endif + return (ALLOCM_SUCCESS); +OOM: +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in allocm(): " + "out of memory\n"); + abort(); + } +#endif + *ptr = NULL; + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(rallocm)(void **ptr, size_t *rsize, size_t size, size_t extra, + int flags) +{ + void *p, *q; + size_t usize; +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t old_size; +#endif + size_t alignment = (ZU(1) << (flags & ALLOCM_LG_ALIGN_MASK) + & (SIZE_T_MAX-1)); + bool zero = flags & ALLOCM_ZERO; + bool no_move = flags & ALLOCM_NO_MOVE; +#ifdef JEMALLOC_PROF + prof_thr_cnt_t *cnt; + prof_ctx_t *old_ctx; +#endif + + assert(ptr != NULL); + assert(*ptr != NULL); + assert(size != 0); + assert(SIZE_T_MAX - size >= extra); + assert(malloc_initialized || malloc_initializer == pthread_self()); + + p = *ptr; +#ifdef JEMALLOC_PROF + if (opt_prof) { + /* + * usize isn't knowable before iralloc() returns when extra is + * non-zero. Therefore, compute its maximum possible value and + * use that in prof_alloc_prep() to decide whether to capture a + * backtrace. prof_realloc() will use the actual usize to + * decide whether to sample. + */ + size_t max_usize = (alignment == 0) ? s2u(size+extra) : + sa2u(size+extra, alignment, NULL); + old_size = isalloc(p); + old_ctx = prof_ctx_get(p); + if ((cnt = prof_alloc_prep(max_usize)) == NULL) + goto OOM; + if (prof_promote && (uintptr_t)cnt != (uintptr_t)1U && max_usize + <= small_maxclass) { + q = iralloc(p, small_maxclass+1, (small_maxclass+1 >= + size+extra) ? 0 : size+extra - (small_maxclass+1), + alignment, zero, no_move); + if (q == NULL) + goto ERR; + usize = isalloc(q); + arena_prof_promoted(q, usize); + } else { + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto ERR; + usize = isalloc(q); + } + prof_realloc(q, usize, cnt, old_size, old_ctx); + if (rsize != NULL) + *rsize = usize; + } else +#endif + { +#ifdef JEMALLOC_STATS + old_size = isalloc(p); +#endif + q = iralloc(p, size, extra, alignment, zero, no_move); + if (q == NULL) + goto ERR; +#ifndef JEMALLOC_STATS + if (rsize != NULL) +#endif + { + usize = isalloc(q); +#ifdef JEMALLOC_STATS + if (rsize != NULL) +#endif + *rsize = usize; + } + } + + *ptr = q; +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(usize, old_size); +#endif + return (ALLOCM_SUCCESS); +ERR: + if (no_move) + return (ALLOCM_ERR_NOT_MOVED); +#ifdef JEMALLOC_PROF +OOM: +#endif +#ifdef JEMALLOC_XMALLOC + if (opt_xmalloc) { + malloc_write(": Error in rallocm(): " + "out of memory\n"); + abort(); + } +#endif + return (ALLOCM_ERR_OOM); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(sallocm)(const void *ptr, size_t *rsize, int flags) +{ + size_t sz; + + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_IVSALLOC + sz = ivsalloc(ptr); +#else + assert(ptr != NULL); + sz = isalloc(ptr); +#endif + assert(rsize != NULL); + *rsize = sz; + + return (ALLOCM_SUCCESS); +} + +JEMALLOC_ATTR(nonnull(1)) +JEMALLOC_ATTR(visibility("default")) +int +JEMALLOC_P(dallocm)(void *ptr, int flags) +{ +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + size_t usize; +#endif + + assert(ptr != NULL); + assert(malloc_initialized || malloc_initializer == pthread_self()); + +#ifdef JEMALLOC_STATS + usize = isalloc(ptr); +#endif +#ifdef JEMALLOC_PROF + if (opt_prof) { +# ifndef JEMALLOC_STATS + usize = isalloc(ptr); +# endif + prof_free(ptr, usize); + } +#endif +#ifdef JEMALLOC_STATS + ALLOCATED_ADD(0, usize); +#endif + idalloc(ptr); + + return (ALLOCM_SUCCESS); +} + +/* + * End non-standard functions. + */ +/******************************************************************************/ + +/* + * The following functions are used by threading libraries for protection of + * malloc during fork(). + */ + +void +jemalloc_prefork(void) +{ + unsigned i; + + /* Acquire all mutexes in a safe order. */ + + malloc_mutex_lock(&arenas_lock); + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + malloc_mutex_lock(&arenas[i]->lock); + } + + malloc_mutex_lock(&base_mtx); + + malloc_mutex_lock(&huge_mtx); + +#ifdef JEMALLOC_DSS + malloc_mutex_lock(&dss_mtx); +#endif + +#ifdef JEMALLOC_SWAP + malloc_mutex_lock(&swap_mtx); +#endif +} + +void +jemalloc_postfork(void) +{ + unsigned i; + + /* Release all mutexes, now that fork() has completed. */ + +#ifdef JEMALLOC_SWAP + malloc_mutex_unlock(&swap_mtx); +#endif + +#ifdef JEMALLOC_DSS + malloc_mutex_unlock(&dss_mtx); +#endif + + malloc_mutex_unlock(&huge_mtx); + + malloc_mutex_unlock(&base_mtx); + + for (i = 0; i < narenas; i++) { + if (arenas[i] != NULL) + malloc_mutex_unlock(&arenas[i]->lock); + } + malloc_mutex_unlock(&arenas_lock); +} + +/******************************************************************************/ diff --git a/src/mb.c b/src/mb.c new file mode 100644 index 0000000..dc2c0a2 --- /dev/null +++ b/src/mb.c @@ -0,0 +1,2 @@ +#define JEMALLOC_MB_C_ +#include "jemalloc/internal/jemalloc_internal.h" diff --git a/src/mutex.c b/src/mutex.c new file mode 100644 index 0000000..ca89ef1 --- /dev/null +++ b/src/mutex.c @@ -0,0 +1,90 @@ +#define JEMALLOC_MUTEX_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +/******************************************************************************/ +/* Data. */ + +#ifdef JEMALLOC_LAZY_LOCK +bool isthreaded = false; +#endif + +#ifdef JEMALLOC_LAZY_LOCK +static void pthread_create_once(void); +#endif + +/******************************************************************************/ +/* + * We intercept pthread_create() calls in order to toggle isthreaded if the + * process goes multi-threaded. + */ + +#ifdef JEMALLOC_LAZY_LOCK +static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, + void *(*)(void *), void *__restrict); + +static void +pthread_create_once(void) +{ + + pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); + if (pthread_create_fptr == NULL) { + malloc_write(": Error in dlsym(RTLD_NEXT, " + "\"pthread_create\")\n"); + abort(); + } + + isthreaded = true; +} + +JEMALLOC_ATTR(visibility("default")) +int +pthread_create(pthread_t *__restrict thread, + const pthread_attr_t *__restrict attr, void *(*start_routine)(void *), + void *__restrict arg) +{ + static pthread_once_t once_control = PTHREAD_ONCE_INIT; + + pthread_once(&once_control, pthread_create_once); + + return (pthread_create_fptr(thread, attr, start_routine, arg)); +} +#endif + +/******************************************************************************/ + +bool +malloc_mutex_init(malloc_mutex_t *mutex) +{ +#ifdef JEMALLOC_OSSPIN + *mutex = 0; +#else + pthread_mutexattr_t attr; + + if (pthread_mutexattr_init(&attr) != 0) + return (true); +#ifdef PTHREAD_MUTEX_ADAPTIVE_NP + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP); +#else + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT); +#endif + if (pthread_mutex_init(mutex, &attr) != 0) { + pthread_mutexattr_destroy(&attr); + return (true); + } + pthread_mutexattr_destroy(&attr); + +#endif + return (false); +} + +void +malloc_mutex_destroy(malloc_mutex_t *mutex) +{ + +#ifndef JEMALLOC_OSSPIN + if (pthread_mutex_destroy(mutex) != 0) { + malloc_write(": Error in pthread_mutex_destroy()\n"); + abort(); + } +#endif +} diff --git a/src/prof.c b/src/prof.c new file mode 100644 index 0000000..8370042 --- /dev/null +++ b/src/prof.c @@ -0,0 +1,1243 @@ +#define JEMALLOC_PROF_C_ +#include "jemalloc/internal/jemalloc_internal.h" +#ifdef JEMALLOC_PROF +/******************************************************************************/ + +#ifdef JEMALLOC_PROF_LIBUNWIND +#define UNW_LOCAL_ONLY +#include +#endif + +#ifdef JEMALLOC_PROF_LIBGCC +#include +#endif + +/******************************************************************************/ +/* Data. */ + +bool opt_prof = false; +bool opt_prof_active = true; +size_t opt_lg_prof_bt_max = LG_PROF_BT_MAX_DEFAULT; +size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT; +ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT; +bool opt_prof_gdump = false; +bool opt_prof_leak = false; +bool opt_prof_accum = true; +ssize_t opt_lg_prof_tcmax = LG_PROF_TCMAX_DEFAULT; +char opt_prof_prefix[PATH_MAX + 1]; + +uint64_t prof_interval; +bool prof_promote; + +unsigned prof_bt_max; + +#ifndef NO_TLS +__thread prof_tdata_t *prof_tdata_tls + JEMALLOC_ATTR(tls_model("initial-exec")); +#endif +pthread_key_t prof_tdata_tsd; + +/* + * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data + * structure that knows about all backtraces currently captured. + */ +static ckh_t bt2ctx; +static malloc_mutex_t bt2ctx_mtx; + +static malloc_mutex_t prof_dump_seq_mtx; +static uint64_t prof_dump_seq; +static uint64_t prof_dump_iseq; +static uint64_t prof_dump_mseq; +static uint64_t prof_dump_useq; + +/* + * This buffer is rather large for stack allocation, so use a single buffer for + * all profile dumps. The buffer is implicitly protected by bt2ctx_mtx, since + * it must be locked anyway during dumping. + */ +static char prof_dump_buf[PROF_DUMP_BUF_SIZE]; +static unsigned prof_dump_buf_end; +static int prof_dump_fd; + +/* Do not dump any profiles until bootstrapping is complete. */ +static bool prof_booted = false; + +static malloc_mutex_t enq_mtx; +static bool enq; +static bool enq_idump; +static bool enq_gdump; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static prof_bt_t *bt_dup(prof_bt_t *bt); +static void bt_destroy(prof_bt_t *bt); +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code prof_unwind_init_callback( + struct _Unwind_Context *context, void *arg); +static _Unwind_Reason_Code prof_unwind_callback( + struct _Unwind_Context *context, void *arg); +#endif +static bool prof_flush(bool propagate_err); +static bool prof_write(const char *s, bool propagate_err); +static void prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, + size_t *leak_nctx); +static void prof_ctx_destroy(prof_ctx_t *ctx); +static void prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt); +static bool prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, + bool propagate_err); +static bool prof_dump_maps(bool propagate_err); +static bool prof_dump(const char *filename, bool leakcheck, + bool propagate_err); +static void prof_dump_filename(char *filename, char v, int64_t vseq); +static void prof_fdump(void); +static void prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, + size_t *hash2); +static bool prof_bt_keycomp(const void *k1, const void *k2); +static void prof_tdata_cleanup(void *arg); + +/******************************************************************************/ + +void +bt_init(prof_bt_t *bt, void **vec) +{ + + bt->vec = vec; + bt->len = 0; +} + +static void +bt_destroy(prof_bt_t *bt) +{ + + idalloc(bt); +} + +static prof_bt_t * +bt_dup(prof_bt_t *bt) +{ + prof_bt_t *ret; + + /* + * Create a single allocation that has space for vec immediately + * following the prof_bt_t structure. The backtraces that get + * stored in the backtrace caches are copied from stack-allocated + * temporary variables, so size is known at creation time. Making this + * a contiguous object improves cache locality. + */ + ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) + + (bt->len * sizeof(void *))); + if (ret == NULL) + return (NULL); + ret->vec = (void **)((uintptr_t)ret + + QUANTUM_CEILING(sizeof(prof_bt_t))); + memcpy(ret->vec, bt->vec, bt->len * sizeof(void *)); + ret->len = bt->len; + + return (ret); +} + +static inline void +prof_enter(void) +{ + + malloc_mutex_lock(&enq_mtx); + enq = true; + malloc_mutex_unlock(&enq_mtx); + + malloc_mutex_lock(&bt2ctx_mtx); +} + +static inline void +prof_leave(void) +{ + bool idump, gdump; + + malloc_mutex_unlock(&bt2ctx_mtx); + + malloc_mutex_lock(&enq_mtx); + enq = false; + idump = enq_idump; + enq_idump = false; + gdump = enq_gdump; + enq_gdump = false; + malloc_mutex_unlock(&enq_mtx); + + if (idump) + prof_idump(); + if (gdump) + prof_gdump(); +} + +#ifdef JEMALLOC_PROF_LIBUNWIND +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + unw_context_t uc; + unw_cursor_t cursor; + unsigned i; + int err; + + assert(bt->len == 0); + assert(bt->vec != NULL); + assert(max <= (1U << opt_lg_prof_bt_max)); + + unw_getcontext(&uc); + unw_init_local(&cursor, &uc); + + /* Throw away (nignore+1) stack frames, if that many exist. */ + for (i = 0; i < nignore + 1; i++) { + err = unw_step(&cursor); + if (err <= 0) + return; + } + + /* + * Iterate over stack frames until there are no more, or until no space + * remains in bt. + */ + for (i = 0; i < max; i++) { + unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]); + bt->len++; + err = unw_step(&cursor); + if (err <= 0) + break; + } +} +#endif +#ifdef JEMALLOC_PROF_LIBGCC +static _Unwind_Reason_Code +prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) +{ + + return (_URC_NO_REASON); +} + +static _Unwind_Reason_Code +prof_unwind_callback(struct _Unwind_Context *context, void *arg) +{ + prof_unwind_data_t *data = (prof_unwind_data_t *)arg; + + if (data->nignore > 0) + data->nignore--; + else { + data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context); + data->bt->len++; + if (data->bt->len == data->max) + return (_URC_END_OF_STACK); + } + + return (_URC_NO_REASON); +} + +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ + prof_unwind_data_t data = {bt, nignore, max}; + + _Unwind_Backtrace(prof_unwind_callback, &data); +} +#endif +#ifdef JEMALLOC_PROF_GCC +void +prof_backtrace(prof_bt_t *bt, unsigned nignore, unsigned max) +{ +#define BT_FRAME(i) \ + if ((i) < nignore + max) { \ + void *p; \ + if (__builtin_frame_address(i) == 0) \ + return; \ + p = __builtin_return_address(i); \ + if (p == NULL) \ + return; \ + if (i >= nignore) { \ + bt->vec[(i) - nignore] = p; \ + bt->len = (i) - nignore + 1; \ + } \ + } else \ + return; + + assert(nignore <= 3); + assert(max <= (1U << opt_lg_prof_bt_max)); + + BT_FRAME(0) + BT_FRAME(1) + BT_FRAME(2) + BT_FRAME(3) + BT_FRAME(4) + BT_FRAME(5) + BT_FRAME(6) + BT_FRAME(7) + BT_FRAME(8) + BT_FRAME(9) + + BT_FRAME(10) + BT_FRAME(11) + BT_FRAME(12) + BT_FRAME(13) + BT_FRAME(14) + BT_FRAME(15) + BT_FRAME(16) + BT_FRAME(17) + BT_FRAME(18) + BT_FRAME(19) + + BT_FRAME(20) + BT_FRAME(21) + BT_FRAME(22) + BT_FRAME(23) + BT_FRAME(24) + BT_FRAME(25) + BT_FRAME(26) + BT_FRAME(27) + BT_FRAME(28) + BT_FRAME(29) + + BT_FRAME(30) + BT_FRAME(31) + BT_FRAME(32) + BT_FRAME(33) + BT_FRAME(34) + BT_FRAME(35) + BT_FRAME(36) + BT_FRAME(37) + BT_FRAME(38) + BT_FRAME(39) + + BT_FRAME(40) + BT_FRAME(41) + BT_FRAME(42) + BT_FRAME(43) + BT_FRAME(44) + BT_FRAME(45) + BT_FRAME(46) + BT_FRAME(47) + BT_FRAME(48) + BT_FRAME(49) + + BT_FRAME(50) + BT_FRAME(51) + BT_FRAME(52) + BT_FRAME(53) + BT_FRAME(54) + BT_FRAME(55) + BT_FRAME(56) + BT_FRAME(57) + BT_FRAME(58) + BT_FRAME(59) + + BT_FRAME(60) + BT_FRAME(61) + BT_FRAME(62) + BT_FRAME(63) + BT_FRAME(64) + BT_FRAME(65) + BT_FRAME(66) + BT_FRAME(67) + BT_FRAME(68) + BT_FRAME(69) + + BT_FRAME(70) + BT_FRAME(71) + BT_FRAME(72) + BT_FRAME(73) + BT_FRAME(74) + BT_FRAME(75) + BT_FRAME(76) + BT_FRAME(77) + BT_FRAME(78) + BT_FRAME(79) + + BT_FRAME(80) + BT_FRAME(81) + BT_FRAME(82) + BT_FRAME(83) + BT_FRAME(84) + BT_FRAME(85) + BT_FRAME(86) + BT_FRAME(87) + BT_FRAME(88) + BT_FRAME(89) + + BT_FRAME(90) + BT_FRAME(91) + BT_FRAME(92) + BT_FRAME(93) + BT_FRAME(94) + BT_FRAME(95) + BT_FRAME(96) + BT_FRAME(97) + BT_FRAME(98) + BT_FRAME(99) + + BT_FRAME(100) + BT_FRAME(101) + BT_FRAME(102) + BT_FRAME(103) + BT_FRAME(104) + BT_FRAME(105) + BT_FRAME(106) + BT_FRAME(107) + BT_FRAME(108) + BT_FRAME(109) + + BT_FRAME(110) + BT_FRAME(111) + BT_FRAME(112) + BT_FRAME(113) + BT_FRAME(114) + BT_FRAME(115) + BT_FRAME(116) + BT_FRAME(117) + BT_FRAME(118) + BT_FRAME(119) + + BT_FRAME(120) + BT_FRAME(121) + BT_FRAME(122) + BT_FRAME(123) + BT_FRAME(124) + BT_FRAME(125) + BT_FRAME(126) + BT_FRAME(127) + + /* Extras to compensate for nignore. */ + BT_FRAME(128) + BT_FRAME(129) + BT_FRAME(130) +#undef BT_FRAME +} +#endif + +prof_thr_cnt_t * +prof_lookup(prof_bt_t *bt) +{ + union { + prof_thr_cnt_t *p; + void *v; + } ret; + prof_tdata_t *prof_tdata; + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata == NULL) { + prof_tdata = prof_tdata_init(); + if (prof_tdata == NULL) + return (NULL); + } + + if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) { + union { + prof_bt_t *p; + void *v; + } btkey; + union { + prof_ctx_t *p; + void *v; + } ctx; + bool new_ctx; + + /* + * This thread's cache lacks bt. Look for it in the global + * cache. + */ + prof_enter(); + if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) { + /* bt has never been seen before. Insert it. */ + ctx.v = imalloc(sizeof(prof_ctx_t)); + if (ctx.v == NULL) { + prof_leave(); + return (NULL); + } + btkey.p = bt_dup(bt); + if (btkey.v == NULL) { + prof_leave(); + idalloc(ctx.v); + return (NULL); + } + ctx.p->bt = btkey.p; + if (malloc_mutex_init(&ctx.p->lock)) { + prof_leave(); + idalloc(btkey.v); + idalloc(ctx.v); + return (NULL); + } + memset(&ctx.p->cnt_merged, 0, sizeof(prof_cnt_t)); + ql_new(&ctx.p->cnts_ql); + if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) { + /* OOM. */ + prof_leave(); + malloc_mutex_destroy(&ctx.p->lock); + idalloc(btkey.v); + idalloc(ctx.v); + return (NULL); + } + /* + * Artificially raise curobjs, in order to avoid a race + * condition with prof_ctx_merge()/prof_ctx_destroy(). + */ + ctx.p->cnt_merged.curobjs++; + new_ctx = true; + } else + new_ctx = false; + prof_leave(); + + /* Link a prof_thd_cnt_t into ctx for this thread. */ + if (opt_lg_prof_tcmax >= 0 && ckh_count(&prof_tdata->bt2cnt) + == (ZU(1) << opt_lg_prof_tcmax)) { + assert(ckh_count(&prof_tdata->bt2cnt) > 0); + /* + * Flush the least recently used cnt in order to keep + * bt2cnt from becoming too large. + */ + ret.p = ql_last(&prof_tdata->lru_ql, lru_link); + assert(ret.v != NULL); + ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt, NULL, + NULL); + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + prof_ctx_merge(ret.p->ctx, ret.p); + /* ret can now be re-used. */ + } else { + assert(opt_lg_prof_tcmax < 0 || + ckh_count(&prof_tdata->bt2cnt) < (ZU(1) << + opt_lg_prof_tcmax)); + /* Allocate and partially initialize a new cnt. */ + ret.v = imalloc(sizeof(prof_thr_cnt_t)); + if (ret.p == NULL) { + if (new_ctx) { + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs--; + malloc_mutex_unlock(&ctx.p->lock); + } + return (NULL); + } + ql_elm_new(ret.p, cnts_link); + ql_elm_new(ret.p, lru_link); + } + /* Finish initializing ret. */ + ret.p->ctx = ctx.p; + ret.p->epoch = 0; + memset(&ret.p->cnts, 0, sizeof(prof_cnt_t)); + if (ckh_insert(&prof_tdata->bt2cnt, btkey.v, ret.v)) { + if (new_ctx) { + malloc_mutex_lock(&ctx.p->lock); + ctx.p->cnt_merged.curobjs--; + malloc_mutex_unlock(&ctx.p->lock); + } + idalloc(ret.v); + return (NULL); + } + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + malloc_mutex_lock(&ctx.p->lock); + ql_tail_insert(&ctx.p->cnts_ql, ret.p, cnts_link); + if (new_ctx) + ctx.p->cnt_merged.curobjs--; + malloc_mutex_unlock(&ctx.p->lock); + } else { + /* Move ret to the front of the LRU. */ + ql_remove(&prof_tdata->lru_ql, ret.p, lru_link); + ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link); + } + + return (ret.p); +} + +static bool +prof_flush(bool propagate_err) +{ + bool ret = false; + ssize_t err; + + err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); + if (err == -1) { + if (propagate_err == false) { + malloc_write(": write() failed during heap " + "profile flush\n"); + if (opt_abort) + abort(); + } + ret = true; + } + prof_dump_buf_end = 0; + + return (ret); +} + +static bool +prof_write(const char *s, bool propagate_err) +{ + unsigned i, slen, n; + + i = 0; + slen = strlen(s); + while (i < slen) { + /* Flush the buffer if it is full. */ + if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) + if (prof_flush(propagate_err) && propagate_err) + return (true); + + if (prof_dump_buf_end + slen <= PROF_DUMP_BUF_SIZE) { + /* Finish writing. */ + n = slen - i; + } else { + /* Write as much of s as will fit. */ + n = PROF_DUMP_BUF_SIZE - prof_dump_buf_end; + } + memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n); + prof_dump_buf_end += n; + i += n; + } + + return (false); +} + +static void +prof_ctx_sum(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx) +{ + prof_thr_cnt_t *thr_cnt; + prof_cnt_t tcnt; + + malloc_mutex_lock(&ctx->lock); + + memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t)); + ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) { + volatile unsigned *epoch = &thr_cnt->epoch; + + while (true) { + unsigned epoch0 = *epoch; + + /* Make sure epoch is even. */ + if (epoch0 & 1U) + continue; + + memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t)); + + /* Terminate if epoch didn't change while reading. */ + if (*epoch == epoch0) + break; + } + + ctx->cnt_summed.curobjs += tcnt.curobjs; + ctx->cnt_summed.curbytes += tcnt.curbytes; + if (opt_prof_accum) { + ctx->cnt_summed.accumobjs += tcnt.accumobjs; + ctx->cnt_summed.accumbytes += tcnt.accumbytes; + } + } + + if (ctx->cnt_summed.curobjs != 0) + (*leak_nctx)++; + + /* Add to cnt_all. */ + cnt_all->curobjs += ctx->cnt_summed.curobjs; + cnt_all->curbytes += ctx->cnt_summed.curbytes; + if (opt_prof_accum) { + cnt_all->accumobjs += ctx->cnt_summed.accumobjs; + cnt_all->accumbytes += ctx->cnt_summed.accumbytes; + } + + malloc_mutex_unlock(&ctx->lock); +} + +static void +prof_ctx_destroy(prof_ctx_t *ctx) +{ + + /* + * Check that ctx is still unused by any thread cache before destroying + * it. prof_lookup() interlocks bt2ctx_mtx and ctx->lock in order to + * avoid a race condition with this function, and prof_ctx_merge() + * artificially raises ctx->cnt_merged.curobjs in order to avoid a race + * between the main body of prof_ctx_merge() and entry into this + * function. + */ + prof_enter(); + malloc_mutex_lock(&ctx->lock); + if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 1) { + assert(ctx->cnt_merged.curbytes == 0); + assert(ctx->cnt_merged.accumobjs == 0); + assert(ctx->cnt_merged.accumbytes == 0); + /* Remove ctx from bt2ctx. */ + ckh_remove(&bt2ctx, ctx->bt, NULL, NULL); + prof_leave(); + /* Destroy ctx. */ + malloc_mutex_unlock(&ctx->lock); + bt_destroy(ctx->bt); + malloc_mutex_destroy(&ctx->lock); + idalloc(ctx); + } else { + /* Compensate for increment in prof_ctx_merge(). */ + ctx->cnt_merged.curobjs--; + malloc_mutex_unlock(&ctx->lock); + prof_leave(); + } +} + +static void +prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt) +{ + bool destroy; + + /* Merge cnt stats and detach from ctx. */ + malloc_mutex_lock(&ctx->lock); + ctx->cnt_merged.curobjs += cnt->cnts.curobjs; + ctx->cnt_merged.curbytes += cnt->cnts.curbytes; + ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs; + ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes; + ql_remove(&ctx->cnts_ql, cnt, cnts_link); + if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL && + ctx->cnt_merged.curobjs == 0) { + /* + * Artificially raise ctx->cnt_merged.curobjs in order to keep + * another thread from winning the race to destroy ctx while + * this one has ctx->lock dropped. Without this, it would be + * possible for another thread to: + * + * 1) Sample an allocation associated with ctx. + * 2) Deallocate the sampled object. + * 3) Successfully prof_ctx_destroy(ctx). + * + * The result would be that ctx no longer exists by the time + * this thread accesses it in prof_ctx_destroy(). + */ + ctx->cnt_merged.curobjs++; + destroy = true; + } else + destroy = false; + malloc_mutex_unlock(&ctx->lock); + if (destroy) + prof_ctx_destroy(ctx); +} + +static bool +prof_dump_ctx(prof_ctx_t *ctx, prof_bt_t *bt, bool propagate_err) +{ + char buf[UMAX2S_BUFSIZE]; + unsigned i; + + if (opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) { + assert(ctx->cnt_summed.curbytes == 0); + assert(ctx->cnt_summed.accumobjs == 0); + assert(ctx->cnt_summed.accumbytes == 0); + return (false); + } + + if (prof_write(u2s(ctx->cnt_summed.curobjs, 10, buf), propagate_err) + || prof_write(": ", propagate_err) + || prof_write(u2s(ctx->cnt_summed.curbytes, 10, buf), + propagate_err) + || prof_write(" [", propagate_err) + || prof_write(u2s(ctx->cnt_summed.accumobjs, 10, buf), + propagate_err) + || prof_write(": ", propagate_err) + || prof_write(u2s(ctx->cnt_summed.accumbytes, 10, buf), + propagate_err) + || prof_write("] @", propagate_err)) + return (true); + + for (i = 0; i < bt->len; i++) { + if (prof_write(" 0x", propagate_err) + || prof_write(u2s((uintptr_t)bt->vec[i], 16, buf), + propagate_err)) + return (true); + } + + if (prof_write("\n", propagate_err)) + return (true); + + return (false); +} + +static bool +prof_dump_maps(bool propagate_err) +{ + int mfd; + char buf[UMAX2S_BUFSIZE]; + char *s; + unsigned i, slen; + /* /proc//maps\0 */ + char mpath[6 + UMAX2S_BUFSIZE + + 5 + 1]; + + i = 0; + + s = "/proc/"; + slen = strlen(s); + memcpy(&mpath[i], s, slen); + i += slen; + + s = u2s(getpid(), 10, buf); + slen = strlen(s); + memcpy(&mpath[i], s, slen); + i += slen; + + s = "/maps"; + slen = strlen(s); + memcpy(&mpath[i], s, slen); + i += slen; + + mpath[i] = '\0'; + + mfd = open(mpath, O_RDONLY); + if (mfd != -1) { + ssize_t nread; + + if (prof_write("\nMAPPED_LIBRARIES:\n", propagate_err) && + propagate_err) + return (true); + nread = 0; + do { + prof_dump_buf_end += nread; + if (prof_dump_buf_end == PROF_DUMP_BUF_SIZE) { + /* Make space in prof_dump_buf before read(). */ + if (prof_flush(propagate_err) && propagate_err) + return (true); + } + nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], + PROF_DUMP_BUF_SIZE - prof_dump_buf_end); + } while (nread > 0); + close(mfd); + } else + return (true); + + return (false); +} + +static bool +prof_dump(const char *filename, bool leakcheck, bool propagate_err) +{ + prof_cnt_t cnt_all; + size_t tabind; + union { + prof_bt_t *p; + void *v; + } bt; + union { + prof_ctx_t *p; + void *v; + } ctx; + char buf[UMAX2S_BUFSIZE]; + size_t leak_nctx; + + prof_enter(); + prof_dump_fd = creat(filename, 0644); + if (prof_dump_fd == -1) { + if (propagate_err == false) { + malloc_write(": creat(\""); + malloc_write(filename); + malloc_write("\", 0644) failed\n"); + if (opt_abort) + abort(); + } + goto ERROR; + } + + /* Merge per thread profile stats, and sum them in cnt_all. */ + memset(&cnt_all, 0, sizeof(prof_cnt_t)); + leak_nctx = 0; + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;) + prof_ctx_sum(ctx.p, &cnt_all, &leak_nctx); + + /* Dump profile header. */ + if (prof_write("heap profile: ", propagate_err) + || prof_write(u2s(cnt_all.curobjs, 10, buf), propagate_err) + || prof_write(": ", propagate_err) + || prof_write(u2s(cnt_all.curbytes, 10, buf), propagate_err) + || prof_write(" [", propagate_err) + || prof_write(u2s(cnt_all.accumobjs, 10, buf), propagate_err) + || prof_write(": ", propagate_err) + || prof_write(u2s(cnt_all.accumbytes, 10, buf), propagate_err)) + goto ERROR; + + if (opt_lg_prof_sample == 0) { + if (prof_write("] @ heapprofile\n", propagate_err)) + goto ERROR; + } else { + if (prof_write("] @ heap_v2/", propagate_err) + || prof_write(u2s((uint64_t)1U << opt_lg_prof_sample, 10, + buf), propagate_err) + || prof_write("\n", propagate_err)) + goto ERROR; + } + + /* Dump per ctx profile stats. */ + for (tabind = 0; ckh_iter(&bt2ctx, &tabind, &bt.v, &ctx.v) + == false;) { + if (prof_dump_ctx(ctx.p, bt.p, propagate_err)) + goto ERROR; + } + + /* Dump /proc//maps if possible. */ + if (prof_dump_maps(propagate_err)) + goto ERROR; + + if (prof_flush(propagate_err)) + goto ERROR; + close(prof_dump_fd); + prof_leave(); + + if (leakcheck && cnt_all.curbytes != 0) { + malloc_write(": Leak summary: "); + malloc_write(u2s(cnt_all.curbytes, 10, buf)); + malloc_write((cnt_all.curbytes != 1) ? " bytes, " : " byte, "); + malloc_write(u2s(cnt_all.curobjs, 10, buf)); + malloc_write((cnt_all.curobjs != 1) ? " objects, " : + " object, "); + malloc_write(u2s(leak_nctx, 10, buf)); + malloc_write((leak_nctx != 1) ? " contexts\n" : " context\n"); + malloc_write(": Run pprof on \""); + malloc_write(filename); + malloc_write("\" for leak detail\n"); + } + + return (false); +ERROR: + prof_leave(); + return (true); +} + +#define DUMP_FILENAME_BUFSIZE (PATH_MAX+ UMAX2S_BUFSIZE \ + + 1 \ + + UMAX2S_BUFSIZE \ + + 2 \ + + UMAX2S_BUFSIZE \ + + 5 + 1) +static void +prof_dump_filename(char *filename, char v, int64_t vseq) +{ + char buf[UMAX2S_BUFSIZE]; + char *s; + unsigned i, slen; + + /* + * Construct a filename of the form: + * + * ...v.heap\0 + */ + + i = 0; + + s = opt_prof_prefix; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + s = "."; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + s = u2s(getpid(), 10, buf); + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + s = "."; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + s = u2s(prof_dump_seq, 10, buf); + prof_dump_seq++; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + s = "."; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + filename[i] = v; + i++; + + if (vseq != 0xffffffffffffffffLLU) { + s = u2s(vseq, 10, buf); + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + } + + s = ".heap"; + slen = strlen(s); + memcpy(&filename[i], s, slen); + i += slen; + + filename[i] = '\0'; +} + +static void +prof_fdump(void) +{ + char filename[DUMP_FILENAME_BUFSIZE]; + + if (prof_booted == false) + return; + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'f', 0xffffffffffffffffLLU); + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, opt_prof_leak, false); + } +} + +void +prof_idump(void) +{ + char filename[DUMP_FILENAME_BUFSIZE]; + + if (prof_booted == false) + return; + malloc_mutex_lock(&enq_mtx); + if (enq) { + enq_idump = true; + malloc_mutex_unlock(&enq_mtx); + return; + } + malloc_mutex_unlock(&enq_mtx); + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'i', prof_dump_iseq); + prof_dump_iseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } +} + +bool +prof_mdump(const char *filename) +{ + char filename_buf[DUMP_FILENAME_BUFSIZE]; + + if (opt_prof == false || prof_booted == false) + return (true); + + if (filename == NULL) { + /* No filename specified, so automatically generate one. */ + if (opt_prof_prefix[0] == '\0') + return (true); + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename_buf, 'm', prof_dump_mseq); + prof_dump_mseq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + filename = filename_buf; + } + return (prof_dump(filename, false, true)); +} + +void +prof_gdump(void) +{ + char filename[DUMP_FILENAME_BUFSIZE]; + + if (prof_booted == false) + return; + malloc_mutex_lock(&enq_mtx); + if (enq) { + enq_gdump = true; + malloc_mutex_unlock(&enq_mtx); + return; + } + malloc_mutex_unlock(&enq_mtx); + + if (opt_prof_prefix[0] != '\0') { + malloc_mutex_lock(&prof_dump_seq_mtx); + prof_dump_filename(filename, 'u', prof_dump_useq); + prof_dump_useq++; + malloc_mutex_unlock(&prof_dump_seq_mtx); + prof_dump(filename, false, false); + } +} + +static void +prof_bt_hash(const void *key, unsigned minbits, size_t *hash1, size_t *hash2) +{ + size_t ret1, ret2; + uint64_t h; + prof_bt_t *bt = (prof_bt_t *)key; + + assert(minbits <= 32 || (SIZEOF_PTR == 8 && minbits <= 64)); + assert(hash1 != NULL); + assert(hash2 != NULL); + + h = hash(bt->vec, bt->len * sizeof(void *), 0x94122f335b332aeaLLU); + if (minbits <= 32) { + /* + * Avoid doing multiple hashes, since a single hash provides + * enough bits. + */ + ret1 = h & ZU(0xffffffffU); + ret2 = h >> 32; + } else { + ret1 = h; + ret2 = hash(bt->vec, bt->len * sizeof(void *), + 0x8432a476666bbc13U); + } + + *hash1 = ret1; + *hash2 = ret2; +} + +static bool +prof_bt_keycomp(const void *k1, const void *k2) +{ + const prof_bt_t *bt1 = (prof_bt_t *)k1; + const prof_bt_t *bt2 = (prof_bt_t *)k2; + + if (bt1->len != bt2->len) + return (false); + return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0); +} + +prof_tdata_t * +prof_tdata_init(void) +{ + prof_tdata_t *prof_tdata; + + /* Initialize an empty cache for this thread. */ + prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t)); + if (prof_tdata == NULL) + return (NULL); + + if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS, + prof_bt_hash, prof_bt_keycomp)) { + idalloc(prof_tdata); + return (NULL); + } + ql_new(&prof_tdata->lru_ql); + + prof_tdata->vec = imalloc(sizeof(void *) * prof_bt_max); + if (prof_tdata->vec == NULL) { + + ckh_delete(&prof_tdata->bt2cnt); + idalloc(prof_tdata); + return (NULL); + } + + prof_tdata->prn_state = 0; + prof_tdata->threshold = 0; + prof_tdata->accum = 0; + + PROF_TCACHE_SET(prof_tdata); + + return (prof_tdata); +} + +static void +prof_tdata_cleanup(void *arg) +{ + prof_tdata_t *prof_tdata; + + prof_tdata = PROF_TCACHE_GET(); + if (prof_tdata != NULL) { + prof_thr_cnt_t *cnt; + + /* + * Delete the hash table. All of its contents can still be + * iterated over via the LRU. + */ + ckh_delete(&prof_tdata->bt2cnt); + + /* + * Iteratively merge cnt's into the global stats and delete + * them. + */ + while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) { + prof_ctx_merge(cnt->ctx, cnt); + ql_remove(&prof_tdata->lru_ql, cnt, lru_link); + idalloc(cnt); + } + + idalloc(prof_tdata->vec); + + idalloc(prof_tdata); + PROF_TCACHE_SET(NULL); + } +} + +void +prof_boot0(void) +{ + + memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT, + sizeof(PROF_PREFIX_DEFAULT)); +} + +void +prof_boot1(void) +{ + + /* + * opt_prof and prof_promote must be in their final state before any + * arenas are initialized, so this function must be executed early. + */ + + if (opt_prof_leak && opt_prof == false) { + /* + * Enable opt_prof, but in such a way that profiles are never + * automatically dumped. + */ + opt_prof = true; + opt_prof_gdump = false; + prof_interval = 0; + } else if (opt_prof) { + if (opt_lg_prof_interval >= 0) { + prof_interval = (((uint64_t)1U) << + opt_lg_prof_interval); + } else + prof_interval = 0; + } + + prof_promote = (opt_prof && opt_lg_prof_sample > PAGE_SHIFT); +} + +bool +prof_boot2(void) +{ + + if (opt_prof) { + if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash, + prof_bt_keycomp)) + return (true); + if (malloc_mutex_init(&bt2ctx_mtx)) + return (true); + if (pthread_key_create(&prof_tdata_tsd, prof_tdata_cleanup) + != 0) { + malloc_write( + ": Error in pthread_key_create()\n"); + abort(); + } + + prof_bt_max = (1U << opt_lg_prof_bt_max); + if (malloc_mutex_init(&prof_dump_seq_mtx)) + return (true); + + if (malloc_mutex_init(&enq_mtx)) + return (true); + enq = false; + enq_idump = false; + enq_gdump = false; + + if (atexit(prof_fdump) != 0) { + malloc_write(": Error in atexit()\n"); + if (opt_abort) + abort(); + } + } + +#ifdef JEMALLOC_PROF_LIBGCC + /* + * Cause the backtracing machinery to allocate its internal state + * before enabling profiling. + */ + _Unwind_Backtrace(prof_unwind_init_callback, NULL); +#endif + + prof_booted = true; + + return (false); +} + +/******************************************************************************/ +#endif /* JEMALLOC_PROF */ diff --git a/src/rtree.c b/src/rtree.c new file mode 100644 index 0000000..eb0ff1e --- /dev/null +++ b/src/rtree.c @@ -0,0 +1,46 @@ +#define JEMALLOC_RTREE_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +rtree_t * +rtree_new(unsigned bits) +{ + rtree_t *ret; + unsigned bits_per_level, height, i; + + bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1; + height = bits / bits_per_level; + if (height * bits_per_level != bits) + height++; + assert(height * bits_per_level >= bits); + + ret = (rtree_t*)base_alloc(offsetof(rtree_t, level2bits) + + (sizeof(unsigned) * height)); + if (ret == NULL) + return (NULL); + memset(ret, 0, offsetof(rtree_t, level2bits) + (sizeof(unsigned) * + height)); + + if (malloc_mutex_init(&ret->mutex)) { + /* Leak the rtree. */ + return (NULL); + } + ret->height = height; + if (bits_per_level * height > bits) + ret->level2bits[0] = bits % bits_per_level; + else + ret->level2bits[0] = bits_per_level; + for (i = 1; i < height; i++) + ret->level2bits[i] = bits_per_level; + + ret->root = (void**)base_alloc(sizeof(void *) << ret->level2bits[0]); + if (ret->root == NULL) { + /* + * We leak the rtree here, since there's no generic base + * deallocation. + */ + return (NULL); + } + memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]); + + return (ret); +} diff --git a/src/stats.c b/src/stats.c new file mode 100644 index 0000000..cbbbb5b --- /dev/null +++ b/src/stats.c @@ -0,0 +1,790 @@ +#define JEMALLOC_STATS_C_ +#include "jemalloc/internal/jemalloc_internal.h" + +#define CTL_GET(n, v, t) do { \ + size_t sz = sizeof(t); \ + xmallctl(n, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_I_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = i; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_J_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = j; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +#define CTL_IJ_GET(n, v, t) do { \ + size_t mib[6]; \ + size_t miblen = sizeof(mib) / sizeof(size_t); \ + size_t sz = sizeof(t); \ + xmallctlnametomib(n, mib, &miblen); \ + mib[2] = i; \ + mib[4] = j; \ + xmallctlbymib(mib, miblen, v, &sz, NULL, 0); \ +} while (0) + +/******************************************************************************/ +/* Data. */ + +bool opt_stats_print = false; + +#ifdef JEMALLOC_STATS +size_t stats_cactive = 0; +#endif + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +#ifdef JEMALLOC_STATS +static void malloc_vcprintf(void (*write_cb)(void *, const char *), + void *cbopaque, const char *format, va_list ap); +static void stats_arena_bins_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_lruns_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +static void stats_arena_print(void (*write_cb)(void *, const char *), + void *cbopaque, unsigned i); +#endif + +/******************************************************************************/ + +/* + * We don't want to depend on vsnprintf() for production builds, since that can + * cause unnecessary bloat for static binaries. u2s() provides minimal integer + * printing functionality, so that malloc_printf() use can be limited to + * JEMALLOC_STATS code. + */ +char * +u2s(uint64_t x, unsigned base, char *s) +{ + unsigned i; + + i = UMAX2S_BUFSIZE - 1; + s[i] = '\0'; + switch (base) { + case 10: + do { + i--; + s[i] = "0123456789"[x % (uint64_t)10]; + x /= (uint64_t)10; + } while (x > 0); + break; + case 16: + do { + i--; + s[i] = "0123456789abcdef"[x & 0xf]; + x >>= 4; + } while (x > 0); + break; + default: + do { + i--; + s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % + (uint64_t)base]; + x /= (uint64_t)base; + } while (x > 0); + } + + return (&s[i]); +} + +#ifdef JEMALLOC_STATS +static void +malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, va_list ap) +{ + char buf[4096]; + + if (write_cb == NULL) { + /* + * The caller did not provide an alternate write_cb callback + * function, so use the default one. malloc_write() is an + * inline function, so use malloc_message() directly here. + */ + write_cb = JEMALLOC_P(malloc_message); + cbopaque = NULL; + } + + vsnprintf(buf, sizeof(buf), format, ap); + write_cb(cbopaque, buf); +} + +/* + * Print to a callback function in such a way as to (hopefully) avoid memory + * allocation. + */ +JEMALLOC_ATTR(format(printf, 3, 4)) +void +malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, + const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(write_cb, cbopaque, format, ap); + va_end(ap); +} + +/* + * Print to stderr in such a way as to (hopefully) avoid memory allocation. + */ +JEMALLOC_ATTR(format(printf, 1, 2)) +void +malloc_printf(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + malloc_vcprintf(NULL, NULL, format, ap); + va_end(ap); +} +#endif + +#ifdef JEMALLOC_STATS +static void +stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i) +{ + size_t pagesize; + bool config_tcache; + unsigned nbins, j, gap_start; + + CTL_GET("arenas.pagesize", &pagesize, size_t); + + CTL_GET("config.tcache", &config_tcache, bool); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "bins: bin size regs pgs allocated nmalloc" + " ndalloc nrequests nfills nflushes" + " newruns reruns maxruns curruns\n"); + } else { + malloc_cprintf(write_cb, cbopaque, + "bins: bin size regs pgs allocated nmalloc" + " ndalloc newruns reruns maxruns" + " curruns\n"); + } + CTL_GET("arenas.nbins", &nbins, unsigned); + for (j = 0, gap_start = UINT_MAX; j < nbins; j++) { + uint64_t nruns; + + CTL_IJ_GET("stats.arenas.0.bins.0.nruns", &nruns, uint64_t); + if (nruns == 0) { + if (gap_start == UINT_MAX) + gap_start = j; + } else { + unsigned ntbins_, nqbins, ncbins, nsbins; + size_t reg_size, run_size, allocated; + uint32_t nregs; + uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes; + uint64_t reruns; + size_t highruns, curruns; + + if (gap_start != UINT_MAX) { + if (j > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_cprintf(write_cb, cbopaque, + "[%u..%u]\n", gap_start, + j - 1); + } else { + /* Gap of one size class. */ + malloc_cprintf(write_cb, cbopaque, + "[%u]\n", gap_start); + } + gap_start = UINT_MAX; + } + CTL_GET("arenas.ntbins", &ntbins_, unsigned); + CTL_GET("arenas.nqbins", &nqbins, unsigned); + CTL_GET("arenas.ncbins", &ncbins, unsigned); + CTL_GET("arenas.nsbins", &nsbins, unsigned); + CTL_J_GET("arenas.bin.0.size", ®_size, size_t); + CTL_J_GET("arenas.bin.0.nregs", &nregs, uint32_t); + CTL_J_GET("arenas.bin.0.run_size", &run_size, size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.allocated", + &allocated, size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nmalloc", + &nmalloc, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.ndalloc", + &ndalloc, uint64_t); + if (config_tcache) { + CTL_IJ_GET("stats.arenas.0.bins.0.nrequests", + &nrequests, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nfills", + &nfills, uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.nflushes", + &nflushes, uint64_t); + } + CTL_IJ_GET("stats.arenas.0.bins.0.nreruns", &reruns, + uint64_t); + CTL_IJ_GET("stats.arenas.0.bins.0.highruns", &highruns, + size_t); + CTL_IJ_GET("stats.arenas.0.bins.0.curruns", &curruns, + size_t); + if (config_tcache) { + malloc_cprintf(write_cb, cbopaque, + "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu %12zu\n", + j, + j < ntbins_ ? "T" : j < ntbins_ + nqbins ? + "Q" : j < ntbins_ + nqbins + ncbins ? "C" : + "S", + reg_size, nregs, run_size / pagesize, + allocated, nmalloc, ndalloc, nrequests, + nfills, nflushes, nruns, reruns, highruns, + curruns); + } else { + malloc_cprintf(write_cb, cbopaque, + "%13u %1s %5zu %4u %3zu %12zu %12"PRIu64 + " %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu %12zu\n", + j, + j < ntbins_ ? "T" : j < ntbins_ + nqbins ? + "Q" : j < ntbins_ + nqbins + ncbins ? "C" : + "S", + reg_size, nregs, run_size / pagesize, + allocated, nmalloc, ndalloc, nruns, reruns, + highruns, curruns); + } + } + } + if (gap_start != UINT_MAX) { + if (j > gap_start + 1) { + /* Gap of more than one size class. */ + malloc_cprintf(write_cb, cbopaque, "[%u..%u]\n", + gap_start, j - 1); + } else { + /* Gap of one size class. */ + malloc_cprintf(write_cb, cbopaque, "[%u]\n", gap_start); + } + } +} + +static void +stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i) +{ + size_t pagesize, nlruns, j; + ssize_t gap_start; + + CTL_GET("arenas.pagesize", &pagesize, size_t); + + malloc_cprintf(write_cb, cbopaque, + "large: size pages nmalloc ndalloc nrequests" + " maxruns curruns\n"); + CTL_GET("arenas.nlruns", &nlruns, size_t); + for (j = 0, gap_start = -1; j < nlruns; j++) { + uint64_t nmalloc, ndalloc, nrequests; + size_t run_size, highruns, curruns; + + CTL_IJ_GET("stats.arenas.0.lruns.0.nmalloc", &nmalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.ndalloc", &ndalloc, + uint64_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.nrequests", &nrequests, + uint64_t); + if (nrequests == 0) { + if (gap_start == -1) + gap_start = j; + } else { + CTL_J_GET("arenas.lrun.0.size", &run_size, size_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.highruns", &highruns, + size_t); + CTL_IJ_GET("stats.arenas.0.lruns.0.curruns", &curruns, + size_t); + if (gap_start != -1) { + malloc_cprintf(write_cb, cbopaque, "[%zu]\n", + j - gap_start); + gap_start = -1; + } + malloc_cprintf(write_cb, cbopaque, + "%13zu %5zu %12"PRIu64" %12"PRIu64" %12"PRIu64 + " %12zu %12zu\n", + run_size, run_size / pagesize, nmalloc, ndalloc, + nrequests, highruns, curruns); + } + } + if (gap_start != -1) + malloc_cprintf(write_cb, cbopaque, "[%zu]\n", j - gap_start); +} + +static void +stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, + unsigned i) +{ + unsigned nthreads; + size_t pagesize, pactive, pdirty, mapped; + uint64_t npurge, nmadvise, purged; + size_t small_allocated; + uint64_t small_nmalloc, small_ndalloc, small_nrequests; + size_t large_allocated; + uint64_t large_nmalloc, large_ndalloc, large_nrequests; + + CTL_GET("arenas.pagesize", &pagesize, size_t); + + CTL_I_GET("stats.arenas.0.nthreads", &nthreads, unsigned); + malloc_cprintf(write_cb, cbopaque, + "assigned threads: %u\n", nthreads); + CTL_I_GET("stats.arenas.0.pactive", &pactive, size_t); + CTL_I_GET("stats.arenas.0.pdirty", &pdirty, size_t); + CTL_I_GET("stats.arenas.0.npurge", &npurge, uint64_t); + CTL_I_GET("stats.arenas.0.nmadvise", &nmadvise, uint64_t); + CTL_I_GET("stats.arenas.0.purged", &purged, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s," + " %"PRIu64" madvise%s, %"PRIu64" purged\n", + pactive, pdirty, npurge, npurge == 1 ? "" : "s", + nmadvise, nmadvise == 1 ? "" : "s", purged); + + malloc_cprintf(write_cb, cbopaque, + " allocated nmalloc ndalloc nrequests\n"); + CTL_I_GET("stats.arenas.0.small.allocated", &small_allocated, size_t); + CTL_I_GET("stats.arenas.0.small.nmalloc", &small_nmalloc, uint64_t); + CTL_I_GET("stats.arenas.0.small.ndalloc", &small_ndalloc, uint64_t); + CTL_I_GET("stats.arenas.0.small.nrequests", &small_nrequests, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "small: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + small_allocated, small_nmalloc, small_ndalloc, small_nrequests); + CTL_I_GET("stats.arenas.0.large.allocated", &large_allocated, size_t); + CTL_I_GET("stats.arenas.0.large.nmalloc", &large_nmalloc, uint64_t); + CTL_I_GET("stats.arenas.0.large.ndalloc", &large_ndalloc, uint64_t); + CTL_I_GET("stats.arenas.0.large.nrequests", &large_nrequests, uint64_t); + malloc_cprintf(write_cb, cbopaque, + "large: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + large_allocated, large_nmalloc, large_ndalloc, large_nrequests); + malloc_cprintf(write_cb, cbopaque, + "total: %12zu %12"PRIu64" %12"PRIu64" %12"PRIu64"\n", + small_allocated + large_allocated, + small_nmalloc + large_nmalloc, + small_ndalloc + large_ndalloc, + small_nrequests + large_nrequests); + malloc_cprintf(write_cb, cbopaque, "active: %12zu\n", + pactive * pagesize ); + CTL_I_GET("stats.arenas.0.mapped", &mapped, size_t); + malloc_cprintf(write_cb, cbopaque, "mapped: %12zu\n", mapped); + + stats_arena_bins_print(write_cb, cbopaque, i); + stats_arena_lruns_print(write_cb, cbopaque, i); +} +#endif + +void +stats_print(void (*write_cb)(void *, const char *), void *cbopaque, + const char *opts) +{ + int err; + uint64_t epoch; + size_t u64sz; + char s[UMAX2S_BUFSIZE]; + bool general = true; + bool merged = true; + bool unmerged = true; + bool bins = true; + bool large = true; + + /* + * Refresh stats, in case mallctl() was called by the application. + * + * Check for OOM here, since refreshing the ctl cache can trigger + * allocation. In practice, none of the subsequent mallctl()-related + * calls in this function will cause OOM if this one succeeds. + * */ + epoch = 1; + u64sz = sizeof(uint64_t); + err = JEMALLOC_P(mallctl)("epoch", &epoch, &u64sz, &epoch, + sizeof(uint64_t)); + if (err != 0) { + if (err == EAGAIN) { + malloc_write(": Memory allocation failure in " + "mallctl(\"epoch\", ...)\n"); + return; + } + malloc_write(": Failure in mallctl(\"epoch\", " + "...)\n"); + abort(); + } + + if (write_cb == NULL) { + /* + * The caller did not provide an alternate write_cb callback + * function, so use the default one. malloc_write() is an + * inline function, so use malloc_message() directly here. + */ + write_cb = JEMALLOC_P(malloc_message); + cbopaque = NULL; + } + + if (opts != NULL) { + unsigned i; + + for (i = 0; opts[i] != '\0'; i++) { + switch (opts[i]) { + case 'g': + general = false; + break; + case 'm': + merged = false; + break; + case 'a': + unmerged = false; + break; + case 'b': + bins = false; + break; + case 'l': + large = false; + break; + default:; + } + } + } + + write_cb(cbopaque, "___ Begin jemalloc statistics ___\n"); + if (general) { + int err; + const char *cpv; + bool bv; + unsigned uv; + ssize_t ssv; + size_t sv, bsz, ssz, sssz, cpsz; + + bsz = sizeof(bool); + ssz = sizeof(size_t); + sssz = sizeof(ssize_t); + cpsz = sizeof(const char *); + + CTL_GET("version", &cpv, const char *); + write_cb(cbopaque, "Version: "); + write_cb(cbopaque, cpv); + write_cb(cbopaque, "\n"); + CTL_GET("config.debug", &bv, bool); + write_cb(cbopaque, "Assertions "); + write_cb(cbopaque, bv ? "enabled" : "disabled"); + write_cb(cbopaque, "\n"); + +#define OPT_WRITE_BOOL(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &bv, &bsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, bv ? "true" : "false"); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &sv, &ssz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(sv, 10, s)); \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_SSIZE_T(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &ssv, &sssz, \ + NULL, 0)) == 0) { \ + if (ssv >= 0) { \ + write_cb(cbopaque, " opt."#n": "); \ + write_cb(cbopaque, u2s(ssv, 10, s)); \ + } else { \ + write_cb(cbopaque, " opt."#n": -"); \ + write_cb(cbopaque, u2s(-ssv, 10, s)); \ + } \ + write_cb(cbopaque, "\n"); \ + } +#define OPT_WRITE_CHAR_P(n) \ + if ((err = JEMALLOC_P(mallctl)("opt."#n, &cpv, &cpsz, \ + NULL, 0)) == 0) { \ + write_cb(cbopaque, " opt."#n": \""); \ + write_cb(cbopaque, cpv); \ + write_cb(cbopaque, "\"\n"); \ + } + + write_cb(cbopaque, "Run-time option settings:\n"); + OPT_WRITE_BOOL(abort) + OPT_WRITE_SIZE_T(lg_qspace_max) + OPT_WRITE_SIZE_T(lg_cspace_max) + OPT_WRITE_SIZE_T(lg_chunk) + OPT_WRITE_SIZE_T(narenas) + OPT_WRITE_SSIZE_T(lg_dirty_mult) + OPT_WRITE_BOOL(stats_print) + OPT_WRITE_BOOL(junk) + OPT_WRITE_BOOL(zero) + OPT_WRITE_BOOL(sysv) + OPT_WRITE_BOOL(xmalloc) + OPT_WRITE_BOOL(tcache) + OPT_WRITE_SSIZE_T(lg_tcache_gc_sweep) + OPT_WRITE_SSIZE_T(lg_tcache_max) + OPT_WRITE_BOOL(prof) + OPT_WRITE_CHAR_P(prof_prefix) + OPT_WRITE_SIZE_T(lg_prof_bt_max) + OPT_WRITE_BOOL(prof_active) + OPT_WRITE_SSIZE_T(lg_prof_sample) + OPT_WRITE_BOOL(prof_accum) + OPT_WRITE_SSIZE_T(lg_prof_tcmax) + OPT_WRITE_SSIZE_T(lg_prof_interval) + OPT_WRITE_BOOL(prof_gdump) + OPT_WRITE_BOOL(prof_leak) + OPT_WRITE_BOOL(overcommit) + +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_SIZE_T +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_CHAR_P + + write_cb(cbopaque, "CPUs: "); + write_cb(cbopaque, u2s(ncpus, 10, s)); + write_cb(cbopaque, "\n"); + + CTL_GET("arenas.narenas", &uv, unsigned); + write_cb(cbopaque, "Max arenas: "); + write_cb(cbopaque, u2s(uv, 10, s)); + write_cb(cbopaque, "\n"); + + write_cb(cbopaque, "Pointer size: "); + write_cb(cbopaque, u2s(sizeof(void *), 10, s)); + write_cb(cbopaque, "\n"); + + CTL_GET("arenas.quantum", &sv, size_t); + write_cb(cbopaque, "Quantum size: "); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "\n"); + + CTL_GET("arenas.cacheline", &sv, size_t); + write_cb(cbopaque, "Cacheline size (assumed): "); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "\n"); + + CTL_GET("arenas.subpage", &sv, size_t); + write_cb(cbopaque, "Subpage spacing: "); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "\n"); + + if ((err = JEMALLOC_P(mallctl)("arenas.tspace_min", &sv, &ssz, + NULL, 0)) == 0) { + write_cb(cbopaque, "Tiny 2^n-spaced sizes: ["); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, ".."); + + CTL_GET("arenas.tspace_max", &sv, size_t); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "]\n"); + } + + CTL_GET("arenas.qspace_min", &sv, size_t); + write_cb(cbopaque, "Quantum-spaced sizes: ["); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, ".."); + CTL_GET("arenas.qspace_max", &sv, size_t); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "]\n"); + + CTL_GET("arenas.cspace_min", &sv, size_t); + write_cb(cbopaque, "Cacheline-spaced sizes: ["); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, ".."); + CTL_GET("arenas.cspace_max", &sv, size_t); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "]\n"); + + CTL_GET("arenas.sspace_min", &sv, size_t); + write_cb(cbopaque, "Subpage-spaced sizes: ["); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, ".."); + CTL_GET("arenas.sspace_max", &sv, size_t); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "]\n"); + + CTL_GET("opt.lg_dirty_mult", &ssv, ssize_t); + if (ssv >= 0) { + write_cb(cbopaque, + "Min active:dirty page ratio per arena: "); + write_cb(cbopaque, u2s((1U << ssv), 10, s)); + write_cb(cbopaque, ":1\n"); + } else { + write_cb(cbopaque, + "Min active:dirty page ratio per arena: N/A\n"); + } + if ((err = JEMALLOC_P(mallctl)("arenas.tcache_max", &sv, + &ssz, NULL, 0)) == 0) { + write_cb(cbopaque, + "Maximum thread-cached size class: "); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, "\n"); + } + if ((err = JEMALLOC_P(mallctl)("opt.lg_tcache_gc_sweep", &ssv, + &ssz, NULL, 0)) == 0) { + size_t tcache_gc_sweep = (1U << ssv); + bool tcache_enabled; + CTL_GET("opt.tcache", &tcache_enabled, bool); + write_cb(cbopaque, "Thread cache GC sweep interval: "); + write_cb(cbopaque, tcache_enabled && ssv >= 0 ? + u2s(tcache_gc_sweep, 10, s) : "N/A"); + write_cb(cbopaque, "\n"); + } + if ((err = JEMALLOC_P(mallctl)("opt.prof", &bv, &bsz, NULL, 0)) + == 0 && bv) { + CTL_GET("opt.lg_prof_bt_max", &sv, size_t); + write_cb(cbopaque, "Maximum profile backtrace depth: "); + write_cb(cbopaque, u2s((1U << sv), 10, s)); + write_cb(cbopaque, "\n"); + + CTL_GET("opt.lg_prof_tcmax", &ssv, ssize_t); + write_cb(cbopaque, + "Maximum per thread backtrace cache: "); + if (ssv >= 0) { + write_cb(cbopaque, u2s((1U << ssv), 10, s)); + write_cb(cbopaque, " (2^"); + write_cb(cbopaque, u2s(ssv, 10, s)); + write_cb(cbopaque, ")\n"); + } else + write_cb(cbopaque, "N/A\n"); + + CTL_GET("opt.lg_prof_sample", &sv, size_t); + write_cb(cbopaque, "Average profile sample interval: "); + write_cb(cbopaque, u2s((((uint64_t)1U) << sv), 10, s)); + write_cb(cbopaque, " (2^"); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, ")\n"); + + CTL_GET("opt.lg_prof_interval", &ssv, ssize_t); + write_cb(cbopaque, "Average profile dump interval: "); + if (ssv >= 0) { + write_cb(cbopaque, u2s((((uint64_t)1U) << ssv), + 10, s)); + write_cb(cbopaque, " (2^"); + write_cb(cbopaque, u2s(ssv, 10, s)); + write_cb(cbopaque, ")\n"); + } else + write_cb(cbopaque, "N/A\n"); + } + CTL_GET("arenas.chunksize", &sv, size_t); + write_cb(cbopaque, "Chunk size: "); + write_cb(cbopaque, u2s(sv, 10, s)); + CTL_GET("opt.lg_chunk", &sv, size_t); + write_cb(cbopaque, " (2^"); + write_cb(cbopaque, u2s(sv, 10, s)); + write_cb(cbopaque, ")\n"); + } + +#ifdef JEMALLOC_STATS + { + int err; + size_t sszp, ssz; + size_t *cactive; + size_t allocated, active, mapped; + size_t chunks_current, chunks_high, swap_avail; + uint64_t chunks_total; + size_t huge_allocated; + uint64_t huge_nmalloc, huge_ndalloc; + + sszp = sizeof(size_t *); + ssz = sizeof(size_t); + + CTL_GET("stats.cactive", &cactive, size_t *); + CTL_GET("stats.allocated", &allocated, size_t); + CTL_GET("stats.active", &active, size_t); + CTL_GET("stats.mapped", &mapped, size_t); + malloc_cprintf(write_cb, cbopaque, + "Allocated: %zu, active: %zu, mapped: %zu\n", + allocated, active, mapped); + malloc_cprintf(write_cb, cbopaque, + "Current active ceiling: %zu\n", atomic_read_z(cactive)); + + /* Print chunk stats. */ + CTL_GET("stats.chunks.total", &chunks_total, uint64_t); + CTL_GET("stats.chunks.high", &chunks_high, size_t); + CTL_GET("stats.chunks.current", &chunks_current, size_t); + if ((err = JEMALLOC_P(mallctl)("swap.avail", &swap_avail, &ssz, + NULL, 0)) == 0) { + size_t lg_chunk; + + malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " + "highchunks curchunks swap_avail\n"); + CTL_GET("opt.lg_chunk", &lg_chunk, size_t); + malloc_cprintf(write_cb, cbopaque, + " %13"PRIu64"%13zu%13zu%13zu\n", + chunks_total, chunks_high, chunks_current, + swap_avail << lg_chunk); + } else { + malloc_cprintf(write_cb, cbopaque, "chunks: nchunks " + "highchunks curchunks\n"); + malloc_cprintf(write_cb, cbopaque, + " %13"PRIu64"%13zu%13zu\n", + chunks_total, chunks_high, chunks_current); + } + + /* Print huge stats. */ + CTL_GET("stats.huge.nmalloc", &huge_nmalloc, uint64_t); + CTL_GET("stats.huge.ndalloc", &huge_ndalloc, uint64_t); + CTL_GET("stats.huge.allocated", &huge_allocated, size_t); + malloc_cprintf(write_cb, cbopaque, + "huge: nmalloc ndalloc allocated\n"); + malloc_cprintf(write_cb, cbopaque, + " %12"PRIu64" %12"PRIu64" %12zu\n", + huge_nmalloc, huge_ndalloc, huge_allocated); + + if (merged) { + unsigned narenas; + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + bool initialized[narenas]; + size_t isz; + unsigned i, ninitialized; + + isz = sizeof(initialized); + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + for (i = ninitialized = 0; i < narenas; i++) { + if (initialized[i]) + ninitialized++; + } + + if (ninitialized > 1) { + /* Print merged arena stats. */ + malloc_cprintf(write_cb, cbopaque, + "\nMerged arenas stats:\n"); + stats_arena_print(write_cb, cbopaque, + narenas); + } + } + } + + if (unmerged) { + unsigned narenas; + + /* Print stats for each arena. */ + + CTL_GET("arenas.narenas", &narenas, unsigned); + { + bool initialized[narenas]; + size_t isz; + unsigned i; + + isz = sizeof(initialized); + xmallctl("arenas.initialized", initialized, + &isz, NULL, 0); + + for (i = 0; i < narenas; i++) { + if (initialized[i]) { + malloc_cprintf(write_cb, + cbopaque, + "\narenas[%u]:\n", i); + stats_arena_print(write_cb, + cbopaque, i); + } + } + } + } + } +#endif /* #ifdef JEMALLOC_STATS */ + write_cb(cbopaque, "--- End jemalloc statistics ---\n"); +} diff --git a/src/tcache.c b/src/tcache.c new file mode 100644 index 0000000..31c329e --- /dev/null +++ b/src/tcache.c @@ -0,0 +1,480 @@ +#define JEMALLOC_TCACHE_C_ +#include "jemalloc/internal/jemalloc_internal.h" +#ifdef JEMALLOC_TCACHE +/******************************************************************************/ +/* Data. */ + +bool opt_tcache = true; +ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; +ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT; + +tcache_bin_info_t *tcache_bin_info; +static unsigned stack_nelms; /* Total stack elms per tcache. */ + +/* Map of thread-specific caches. */ +#ifndef NO_TLS +__thread tcache_t *tcache_tls JEMALLOC_ATTR(tls_model("initial-exec")); +#endif + +/* + * Same contents as tcache, but initialized such that the TSD destructor is + * called when a thread exits, so that the cache can be cleaned up. + */ +pthread_key_t tcache_tsd; + +size_t nhbins; +size_t tcache_maxclass; +unsigned tcache_gc_incr; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static void tcache_thread_cleanup(void *arg); + +/******************************************************************************/ + +void * +tcache_alloc_small_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind) +{ + void *ret; + + arena_tcache_fill_small(tcache->arena, tbin, binind +#ifdef JEMALLOC_PROF + , tcache->prof_accumbytes +#endif + ); +#ifdef JEMALLOC_PROF + tcache->prof_accumbytes = 0; +#endif + ret = tcache_alloc_easy(tbin); + + return (ret); +} + +void +tcache_bin_flush_small(tcache_bin_t *tbin, size_t binind, unsigned rem +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache_t *tcache +#endif + ) +{ + void *ptr; + unsigned i, nflush, ndeferred; +#ifdef JEMALLOC_STATS + bool merged_stats = false; +#endif + + assert(binind < nbins); + assert(rem <= tbin->ncached); + + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + /* Lock the arena bin associated with the first object. */ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); + arena_t *arena = chunk->arena; + arena_bin_t *bin = &arena->bins[binind]; + +#ifdef JEMALLOC_PROF + if (arena == tcache->arena) { + malloc_mutex_lock(&arena->lock); + arena_prof_accum(arena, tcache->prof_accumbytes); + malloc_mutex_unlock(&arena->lock); + tcache->prof_accumbytes = 0; + } +#endif + + malloc_mutex_lock(&bin->lock); +#ifdef JEMALLOC_STATS + if (arena == tcache->arena) { + assert(merged_stats == false); + merged_stats = true; + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } +#endif + ndeferred = 0; + for (i = 0; i < nflush; i++) { + ptr = tbin->avail[i]; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk->arena == arena) { + size_t pageind = ((uintptr_t)ptr - + (uintptr_t)chunk) >> PAGE_SHIFT; + arena_chunk_map_t *mapelm = + &chunk->map[pageind-map_bias]; + arena_dalloc_bin(arena, chunk, ptr, mapelm); + } else { + /* + * This object was allocated via a different + * arena bin than the one that is currently + * locked. Stash the object, so that it can be + * handled in a future pass. + */ + tbin->avail[ndeferred] = ptr; + ndeferred++; + } + } + malloc_mutex_unlock(&bin->lock); + } +#ifdef JEMALLOC_STATS + if (merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_bin_t *bin = &tcache->arena->bins[binind]; + malloc_mutex_lock(&bin->lock); + bin->stats.nflushes++; + bin->stats.nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&bin->lock); + } +#endif + + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); + tbin->ncached = rem; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; +} + +void +tcache_bin_flush_large(tcache_bin_t *tbin, size_t binind, unsigned rem +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache_t *tcache +#endif + ) +{ + void *ptr; + unsigned i, nflush, ndeferred; +#ifdef JEMALLOC_STATS + bool merged_stats = false; +#endif + + assert(binind < nhbins); + assert(rem <= tbin->ncached); + + for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) { + /* Lock the arena associated with the first object. */ + arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE( + tbin->avail[0]); + arena_t *arena = chunk->arena; + + malloc_mutex_lock(&arena->lock); +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + if (arena == tcache->arena) { +#endif +#ifdef JEMALLOC_PROF + arena_prof_accum(arena, tcache->prof_accumbytes); + tcache->prof_accumbytes = 0; +#endif +#ifdef JEMALLOC_STATS + merged_stats = true; + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[binind - nbins].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; +#endif +#if (defined(JEMALLOC_PROF) || defined(JEMALLOC_STATS)) + } +#endif + ndeferred = 0; + for (i = 0; i < nflush; i++) { + ptr = tbin->avail[i]; + assert(ptr != NULL); + chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr); + if (chunk->arena == arena) + arena_dalloc_large(arena, chunk, ptr); + else { + /* + * This object was allocated via a different + * arena than the one that is currently locked. + * Stash the object, so that it can be handled + * in a future pass. + */ + tbin->avail[ndeferred] = ptr; + ndeferred++; + } + } + malloc_mutex_unlock(&arena->lock); + } +#ifdef JEMALLOC_STATS + if (merged_stats == false) { + /* + * The flush loop didn't happen to flush to this thread's + * arena, so the stats didn't get merged. Manually do so now. + */ + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[binind - nbins].nrequests += + tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + malloc_mutex_unlock(&arena->lock); + } +#endif + + memmove(tbin->avail, &tbin->avail[tbin->ncached - rem], + rem * sizeof(void *)); + tbin->ncached = rem; + if ((int)tbin->ncached < tbin->low_water) + tbin->low_water = tbin->ncached; +} + +tcache_t * +tcache_create(arena_t *arena) +{ + tcache_t *tcache; + size_t size, stack_offset; + unsigned i; + + size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins); + /* Naturally align the pointer stacks. */ + size = PTR_CEILING(size); + stack_offset = size; + size += stack_nelms * sizeof(void *); + /* + * Round up to the nearest multiple of the cacheline size, in order to + * avoid the possibility of false cacheline sharing. + * + * That this works relies on the same logic as in ipalloc(), but we + * cannot directly call ipalloc() here due to tcache bootstrapping + * issues. + */ + size = (size + CACHELINE_MASK) & (-CACHELINE); + + if (size <= small_maxclass) + tcache = (tcache_t *)arena_malloc_small(arena, size, true); + else if (size <= tcache_maxclass) + tcache = (tcache_t *)arena_malloc_large(arena, size, true); + else + tcache = (tcache_t *)icalloc(size); + + if (tcache == NULL) + return (NULL); + +#ifdef JEMALLOC_STATS + /* Link into list of extant tcaches. */ + malloc_mutex_lock(&arena->lock); + ql_elm_new(tcache, link); + ql_tail_insert(&arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&arena->lock); +#endif + + tcache->arena = arena; + assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); + for (i = 0; i < nhbins; i++) { + tcache->tbins[i].lg_fill_div = 1; + tcache->tbins[i].avail = (void **)((uintptr_t)tcache + + (uintptr_t)stack_offset); + stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *); + } + + TCACHE_SET(tcache); + + return (tcache); +} + +void +tcache_destroy(tcache_t *tcache) +{ + unsigned i; + size_t tcache_size; + +#ifdef JEMALLOC_STATS + /* Unlink from list of extant tcaches. */ + malloc_mutex_lock(&tcache->arena->lock); + ql_remove(&tcache->arena->tcache_ql, tcache, link); + malloc_mutex_unlock(&tcache->arena->lock); + tcache_stats_merge(tcache, tcache->arena); +#endif + + for (i = 0; i < nbins; i++) { + tcache_bin_t *tbin = &tcache->tbins[i]; + tcache_bin_flush_small(tbin, i, 0 +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + +#ifdef JEMALLOC_STATS + if (tbin->tstats.nrequests != 0) { + arena_t *arena = tcache->arena; + arena_bin_t *bin = &arena->bins[i]; + malloc_mutex_lock(&bin->lock); + bin->stats.nrequests += tbin->tstats.nrequests; + malloc_mutex_unlock(&bin->lock); + } +#endif + } + + for (; i < nhbins; i++) { + tcache_bin_t *tbin = &tcache->tbins[i]; + tcache_bin_flush_large(tbin, i, 0 +#if (defined(JEMALLOC_STATS) || defined(JEMALLOC_PROF)) + , tcache +#endif + ); + +#ifdef JEMALLOC_STATS + if (tbin->tstats.nrequests != 0) { + arena_t *arena = tcache->arena; + malloc_mutex_lock(&arena->lock); + arena->stats.nrequests_large += tbin->tstats.nrequests; + arena->stats.lstats[i - nbins].nrequests += + tbin->tstats.nrequests; + malloc_mutex_unlock(&arena->lock); + } +#endif + } + +#ifdef JEMALLOC_PROF + if (tcache->prof_accumbytes > 0) { + malloc_mutex_lock(&tcache->arena->lock); + arena_prof_accum(tcache->arena, tcache->prof_accumbytes); + malloc_mutex_unlock(&tcache->arena->lock); + } +#endif + + tcache_size = arena_salloc(tcache); + if (tcache_size <= small_maxclass) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + size_t pageind = ((uintptr_t)tcache - (uintptr_t)chunk) >> + PAGE_SHIFT; + arena_chunk_map_t *mapelm = &chunk->map[pageind-map_bias]; + arena_run_t *run = (arena_run_t *)((uintptr_t)chunk + + (uintptr_t)((pageind - (mapelm->bits >> PAGE_SHIFT)) << + PAGE_SHIFT)); + arena_bin_t *bin = run->bin; + + malloc_mutex_lock(&bin->lock); + arena_dalloc_bin(arena, chunk, tcache, mapelm); + malloc_mutex_unlock(&bin->lock); + } else if (tcache_size <= tcache_maxclass) { + arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache); + arena_t *arena = chunk->arena; + + malloc_mutex_lock(&arena->lock); + arena_dalloc_large(arena, chunk, tcache); + malloc_mutex_unlock(&arena->lock); + } else + idalloc(tcache); +} + +static void +tcache_thread_cleanup(void *arg) +{ + tcache_t *tcache = (tcache_t *)arg; + + if (tcache == (void *)(uintptr_t)1) { + /* + * The previous time this destructor was called, we set the key + * to 1 so that other destructors wouldn't cause re-creation of + * the tcache. This time, do nothing, so that the destructor + * will not be called again. + */ + } else if (tcache == (void *)(uintptr_t)2) { + /* + * Another destructor called an allocator function after this + * destructor was called. Reset tcache to 1 in order to + * receive another callback. + */ + TCACHE_SET((uintptr_t)1); + } else if (tcache != NULL) { + assert(tcache != (void *)(uintptr_t)1); + tcache_destroy(tcache); + TCACHE_SET((uintptr_t)1); + } +} + +#ifdef JEMALLOC_STATS +void +tcache_stats_merge(tcache_t *tcache, arena_t *arena) +{ + unsigned i; + + /* Merge and reset tcache stats. */ + for (i = 0; i < nbins; i++) { + arena_bin_t *bin = &arena->bins[i]; + tcache_bin_t *tbin = &tcache->tbins[i]; + malloc_mutex_lock(&bin->lock); + bin->stats.nrequests += tbin->tstats.nrequests; + malloc_mutex_unlock(&bin->lock); + tbin->tstats.nrequests = 0; + } + + for (; i < nhbins; i++) { + malloc_large_stats_t *lstats = &arena->stats.lstats[i - nbins]; + tcache_bin_t *tbin = &tcache->tbins[i]; + arena->stats.nrequests_large += tbin->tstats.nrequests; + lstats->nrequests += tbin->tstats.nrequests; + tbin->tstats.nrequests = 0; + } +} +#endif + +bool +tcache_boot(void) +{ + + if (opt_tcache) { + unsigned i; + + /* + * If necessary, clamp opt_lg_tcache_max, now that + * small_maxclass and arena_maxclass are known. + */ + if (opt_lg_tcache_max < 0 || (1U << + opt_lg_tcache_max) < small_maxclass) + tcache_maxclass = small_maxclass; + else if ((1U << opt_lg_tcache_max) > arena_maxclass) + tcache_maxclass = arena_maxclass; + else + tcache_maxclass = (1U << opt_lg_tcache_max); + + nhbins = nbins + (tcache_maxclass >> PAGE_SHIFT); + + /* Initialize tcache_bin_info. */ + tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins * + sizeof(tcache_bin_info_t)); + if (tcache_bin_info == NULL) + return (true); + stack_nelms = 0; + for (i = 0; i < nbins; i++) { + if ((arena_bin_info[i].nregs << 1) <= + TCACHE_NSLOTS_SMALL_MAX) { + tcache_bin_info[i].ncached_max = + (arena_bin_info[i].nregs << 1); + } else { + tcache_bin_info[i].ncached_max = + TCACHE_NSLOTS_SMALL_MAX; + } + stack_nelms += tcache_bin_info[i].ncached_max; + } + for (; i < nhbins; i++) { + tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE; + stack_nelms += tcache_bin_info[i].ncached_max; + } + + /* Compute incremental GC event threshold. */ + if (opt_lg_tcache_gc_sweep >= 0) { + tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) / + nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins == + 0) ? 0 : 1); + } else + tcache_gc_incr = 0; + + if (pthread_key_create(&tcache_tsd, tcache_thread_cleanup) != + 0) { + malloc_write( + ": Error in pthread_key_create()\n"); + abort(); + } + } + + return (false); +} +/******************************************************************************/ +#endif /* JEMALLOC_TCACHE */ diff --git a/src/zone.c b/src/zone.c new file mode 100644 index 0000000..2c1b231 --- /dev/null +++ b/src/zone.c @@ -0,0 +1,354 @@ +#include "jemalloc/internal/jemalloc_internal.h" +#ifndef JEMALLOC_ZONE +# error "This source file is for zones on Darwin (OS X)." +#endif + +/******************************************************************************/ +/* Data. */ + +static malloc_zone_t zone, szone; +static struct malloc_introspection_t zone_introspect, ozone_introspect; + +/******************************************************************************/ +/* Function prototypes for non-inline static functions. */ + +static size_t zone_size(malloc_zone_t *zone, void *ptr); +static void *zone_malloc(malloc_zone_t *zone, size_t size); +static void *zone_calloc(malloc_zone_t *zone, size_t num, size_t size); +static void *zone_valloc(malloc_zone_t *zone, size_t size); +static void zone_free(malloc_zone_t *zone, void *ptr); +static void *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +#if (JEMALLOC_ZONE_VERSION >= 6) +static void *zone_memalign(malloc_zone_t *zone, size_t alignment, + size_t size); +static void zone_free_definite_size(malloc_zone_t *zone, void *ptr, + size_t size); +#endif +static void *zone_destroy(malloc_zone_t *zone); +static size_t zone_good_size(malloc_zone_t *zone, size_t size); +static void zone_force_lock(malloc_zone_t *zone); +static void zone_force_unlock(malloc_zone_t *zone); +static size_t ozone_size(malloc_zone_t *zone, void *ptr); +static void ozone_free(malloc_zone_t *zone, void *ptr); +static void *ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size); +static unsigned ozone_batch_malloc(malloc_zone_t *zone, size_t size, + void **results, unsigned num_requested); +static void ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, + unsigned num); +#if (JEMALLOC_ZONE_VERSION >= 6) +static void ozone_free_definite_size(malloc_zone_t *zone, void *ptr, + size_t size); +#endif +static void ozone_force_lock(malloc_zone_t *zone); +static void ozone_force_unlock(malloc_zone_t *zone); + +/******************************************************************************/ +/* + * Functions. + */ + +static size_t +zone_size(malloc_zone_t *zone, void *ptr) +{ + + /* + * There appear to be places within Darwin (such as setenv(3)) that + * cause calls to this function with pointers that *no* zone owns. If + * we knew that all pointers were owned by *some* zone, we could split + * our zone into two parts, and use one as the default allocator and + * the other as the default deallocator/reallocator. Since that will + * not work in practice, we must check all pointers to assure that they + * reside within a mapped chunk before determining size. + */ + return (ivsalloc(ptr)); +} + +static void * +zone_malloc(malloc_zone_t *zone, size_t size) +{ + + return (JEMALLOC_P(malloc)(size)); +} + +static void * +zone_calloc(malloc_zone_t *zone, size_t num, size_t size) +{ + + return (JEMALLOC_P(calloc)(num, size)); +} + +static void * +zone_valloc(malloc_zone_t *zone, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + JEMALLOC_P(posix_memalign)(&ret, PAGE_SIZE, size); + + return (ret); +} + +static void +zone_free(malloc_zone_t *zone, void *ptr) +{ + + JEMALLOC_P(free)(ptr); +} + +static void * +zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) +{ + + return (JEMALLOC_P(realloc)(ptr, size)); +} + +#if (JEMALLOC_ZONE_VERSION >= 6) +static void * +zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) +{ + void *ret = NULL; /* Assignment avoids useless compiler warning. */ + + JEMALLOC_P(posix_memalign)(&ret, alignment, size); + + return (ret); +} + +static void +zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) +{ + + assert(ivsalloc(ptr) == size); + JEMALLOC_P(free)(ptr); +} +#endif + +static void * +zone_destroy(malloc_zone_t *zone) +{ + + /* This function should never be called. */ + assert(false); + return (NULL); +} + +static size_t +zone_good_size(malloc_zone_t *zone, size_t size) +{ + size_t ret; + void *p; + + /* + * Actually create an object of the appropriate size, then find out + * how large it could have been without moving up to the next size + * class. + */ + p = JEMALLOC_P(malloc)(size); + if (p != NULL) { + ret = isalloc(p); + JEMALLOC_P(free)(p); + } else + ret = size; + + return (ret); +} + +static void +zone_force_lock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_prefork(); +} + +static void +zone_force_unlock(malloc_zone_t *zone) +{ + + if (isthreaded) + jemalloc_postfork(); +} + +malloc_zone_t * +create_zone(void) +{ + + zone.size = (void *)zone_size; + zone.malloc = (void *)zone_malloc; + zone.calloc = (void *)zone_calloc; + zone.valloc = (void *)zone_valloc; + zone.free = (void *)zone_free; + zone.realloc = (void *)zone_realloc; + zone.destroy = (void *)zone_destroy; + zone.zone_name = "jemalloc_zone"; + zone.batch_malloc = NULL; + zone.batch_free = NULL; + zone.introspect = &zone_introspect; + zone.version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone.memalign = zone_memalign; + zone.free_definite_size = zone_free_definite_size; +#endif + + zone_introspect.enumerator = NULL; + zone_introspect.good_size = (void *)zone_good_size; + zone_introspect.check = NULL; + zone_introspect.print = NULL; + zone_introspect.log = NULL; + zone_introspect.force_lock = (void *)zone_force_lock; + zone_introspect.force_unlock = (void *)zone_force_unlock; + zone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone_introspect.zone_locked = NULL; +#endif + + return (&zone); +} + +static size_t +ozone_size(malloc_zone_t *zone, void *ptr) +{ + size_t ret; + + ret = ivsalloc(ptr); + if (ret == 0) + ret = szone.size(zone, ptr); + + return (ret); +} + +static void +ozone_free(malloc_zone_t *zone, void *ptr) +{ + + if (ivsalloc(ptr) != 0) + JEMALLOC_P(free)(ptr); + else { + size_t size = szone.size(zone, ptr); + if (size != 0) + (szone.free)(zone, ptr); + } +} + +static void * +ozone_realloc(malloc_zone_t *zone, void *ptr, size_t size) +{ + size_t oldsize; + + if (ptr == NULL) + return (JEMALLOC_P(malloc)(size)); + + oldsize = ivsalloc(ptr); + if (oldsize != 0) + return (JEMALLOC_P(realloc)(ptr, size)); + else { + oldsize = szone.size(zone, ptr); + if (oldsize == 0) + return (JEMALLOC_P(malloc)(size)); + else { + void *ret = JEMALLOC_P(malloc)(size); + if (ret != NULL) { + memcpy(ret, ptr, (oldsize < size) ? oldsize : + size); + (szone.free)(zone, ptr); + } + return (ret); + } + } +} + +static unsigned +ozone_batch_malloc(malloc_zone_t *zone, size_t size, void **results, + unsigned num_requested) +{ + + /* Don't bother implementing this interface, since it isn't required. */ + return (0); +} + +static void +ozone_batch_free(malloc_zone_t *zone, void **to_be_freed, unsigned num) +{ + unsigned i; + + for (i = 0; i < num; i++) + ozone_free(zone, to_be_freed[i]); +} + +#if (JEMALLOC_ZONE_VERSION >= 6) +static void +ozone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) +{ + + if (ivsalloc(ptr) != 0) { + assert(ivsalloc(ptr) == size); + JEMALLOC_P(free)(ptr); + } else { + assert(size == szone.size(zone, ptr)); + szone.free_definite_size(zone, ptr, size); + } +} +#endif + +static void +ozone_force_lock(malloc_zone_t *zone) +{ + + /* jemalloc locking is taken care of by the normal jemalloc zone. */ + szone.introspect->force_lock(zone); +} + +static void +ozone_force_unlock(malloc_zone_t *zone) +{ + + /* jemalloc locking is taken care of by the normal jemalloc zone. */ + szone.introspect->force_unlock(zone); +} + +/* + * Overlay the default scalable zone (szone) such that existing allocations are + * drained, and further allocations come from jemalloc. This is necessary + * because Core Foundation directly accesses and uses the szone before the + * jemalloc library is even loaded. + */ +void +szone2ozone(malloc_zone_t *zone) +{ + + /* + * Stash a copy of the original szone so that we can call its + * functions as needed. Note that the internally, the szone stores its + * bookkeeping data structures immediately following the malloc_zone_t + * header, so when calling szone functions, we need to pass a pointer + * to the original zone structure. + */ + memcpy(&szone, zone, sizeof(malloc_zone_t)); + + zone->size = (void *)ozone_size; + zone->malloc = (void *)zone_malloc; + zone->calloc = (void *)zone_calloc; + zone->valloc = (void *)zone_valloc; + zone->free = (void *)ozone_free; + zone->realloc = (void *)ozone_realloc; + zone->destroy = (void *)zone_destroy; + zone->zone_name = "jemalloc_ozone"; + zone->batch_malloc = ozone_batch_malloc; + zone->batch_free = ozone_batch_free; + zone->introspect = &ozone_introspect; + zone->version = JEMALLOC_ZONE_VERSION; +#if (JEMALLOC_ZONE_VERSION >= 6) + zone->memalign = zone_memalign; + zone->free_definite_size = ozone_free_definite_size; +#endif + + ozone_introspect.enumerator = NULL; + ozone_introspect.good_size = (void *)zone_good_size; + ozone_introspect.check = NULL; + ozone_introspect.print = NULL; + ozone_introspect.log = NULL; + ozone_introspect.force_lock = (void *)ozone_force_lock; + ozone_introspect.force_unlock = (void *)ozone_force_unlock; + ozone_introspect.statistics = NULL; +#if (JEMALLOC_ZONE_VERSION >= 6) + ozone_introspect.zone_locked = NULL; +#endif +} diff --git a/test/allocated.c b/test/allocated.c new file mode 100644 index 0000000..b1e40e4 --- /dev/null +++ b/test/allocated.c @@ -0,0 +1,142 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +void * +thread_start(void *arg) +{ + int err; + void *p; + uint64_t a0, a1, d0, d1; + uint64_t *ap0, *ap1, *dp0, *dp1; + size_t sz, usize; + + sz = sizeof(a0); + if ((err = JEMALLOC_P(mallctl)("thread.allocated", &a0, &sz, NULL, + 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_STATS + assert(false); +#endif + goto RETURN; + } + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + exit(1); + } + sz = sizeof(ap0); + if ((err = JEMALLOC_P(mallctl)("thread.allocatedp", &ap0, &sz, NULL, + 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_STATS + assert(false); +#endif + goto RETURN; + } + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + exit(1); + } + assert(*ap0 == a0); + + sz = sizeof(d0); + if ((err = JEMALLOC_P(mallctl)("thread.deallocated", &d0, &sz, NULL, + 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_STATS + assert(false); +#endif + goto RETURN; + } + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + exit(1); + } + sz = sizeof(dp0); + if ((err = JEMALLOC_P(mallctl)("thread.deallocatedp", &dp0, &sz, NULL, + 0))) { + if (err == ENOENT) { +#ifdef JEMALLOC_STATS + assert(false); +#endif + goto RETURN; + } + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + exit(1); + } + assert(*dp0 == d0); + + p = JEMALLOC_P(malloc)(1); + if (p == NULL) { + fprintf(stderr, "%s(): Error in malloc()\n", __func__); + exit(1); + } + + sz = sizeof(a1); + JEMALLOC_P(mallctl)("thread.allocated", &a1, &sz, NULL, 0); + sz = sizeof(ap1); + JEMALLOC_P(mallctl)("thread.allocatedp", &ap1, &sz, NULL, 0); + assert(*ap1 == a1); + assert(ap0 == ap1); + + usize = JEMALLOC_P(malloc_usable_size)(p); + assert(a0 + usize <= a1); + + JEMALLOC_P(free)(p); + + sz = sizeof(d1); + JEMALLOC_P(mallctl)("thread.deallocated", &d1, &sz, NULL, 0); + sz = sizeof(dp1); + JEMALLOC_P(mallctl)("thread.deallocatedp", &dp1, &sz, NULL, 0); + assert(*dp1 == d1); + assert(dp0 == dp1); + + assert(d0 + usize <= d1); + +RETURN: + return (NULL); +} + +int +main(void) +{ + int ret = 0; + pthread_t thread; + + fprintf(stderr, "Test begin\n"); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + + if (pthread_create(&thread, NULL, thread_start, NULL) + != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", __func__); + ret = 1; + goto RETURN; + } + pthread_join(thread, (void *)&ret); + + thread_start(NULL); + +RETURN: + fprintf(stderr, "Test end\n"); + return (ret); +} diff --git a/test/allocated.exp b/test/allocated.exp new file mode 100644 index 0000000..369a88d --- /dev/null +++ b/test/allocated.exp @@ -0,0 +1,2 @@ +Test begin +Test end diff --git a/test/allocm.c b/test/allocm.c new file mode 100644 index 0000000..59d0002 --- /dev/null +++ b/test/allocm.c @@ -0,0 +1,133 @@ +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +#define CHUNK 0x400000 +/* #define MAXALIGN ((size_t)0x80000000000LLU) */ +#define MAXALIGN ((size_t)0x2000000LLU) +#define NITER 4 + +int +main(void) +{ + int r; + void *p; + size_t sz, alignment, total, tsz; + unsigned i; + void *ps[NITER]; + + fprintf(stderr, "Test begin\n"); + + sz = 0; + r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected allocm() error\n"); + abort(); + } + if (sz < 42) + fprintf(stderr, "Real size smaller than expected\n"); + if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected dallocm() error\n"); + + r = JEMALLOC_P(allocm)(&p, NULL, 42, 0); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected allocm() error\n"); + abort(); + } + if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected dallocm() error\n"); + + r = JEMALLOC_P(allocm)(&p, NULL, 42, ALLOCM_ZERO); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected allocm() error\n"); + abort(); + } + if (JEMALLOC_P(dallocm)(p, 0) != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected dallocm() error\n"); + +#if LG_SIZEOF_PTR == 3 + alignment = 0x8000000000000000LLU; + sz = 0x8000000000000000LLU; +#else + alignment = 0x80000000LU; + sz = 0x80000000LU; +#endif + r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + if (r == ALLOCM_SUCCESS) { + fprintf(stderr, + "Expected error for allocm(&p, %zu, 0x%x)\n", + sz, ALLOCM_ALIGN(alignment)); + } + +#if LG_SIZEOF_PTR == 3 + alignment = 0x4000000000000000LLU; + sz = 0x8400000000000001LLU; +#else + alignment = 0x40000000LU; + sz = 0x84000001LU; +#endif + r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + if (r == ALLOCM_SUCCESS) { + fprintf(stderr, + "Expected error for allocm(&p, %zu, 0x%x)\n", + sz, ALLOCM_ALIGN(alignment)); + } + + alignment = 0x10LLU; +#if LG_SIZEOF_PTR == 3 + sz = 0xfffffffffffffff0LLU; +#else + sz = 0xfffffff0LU; +#endif + r = JEMALLOC_P(allocm)(&p, NULL, sz, ALLOCM_ALIGN(alignment)); + if (r == ALLOCM_SUCCESS) { + fprintf(stderr, + "Expected error for allocm(&p, %zu, 0x%x)\n", + sz, ALLOCM_ALIGN(alignment)); + } + + for (i = 0; i < NITER; i++) + ps[i] = NULL; + + for (alignment = 8; + alignment <= MAXALIGN; + alignment <<= 1) { + total = 0; + fprintf(stderr, "Alignment: %zu\n", alignment); + for (sz = 1; + sz < 3 * alignment && sz < (1U << 31); + sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { + for (i = 0; i < NITER; i++) { + r = JEMALLOC_P(allocm)(&ps[i], NULL, sz, + ALLOCM_ALIGN(alignment) | ALLOCM_ZERO); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, + "Error for size %zu (0x%zx): %d\n", + sz, sz, r); + exit(1); + } + if ((uintptr_t)p & (alignment-1)) { + fprintf(stderr, + "%p inadequately aligned for" + " alignment: %zu\n", p, alignment); + } + JEMALLOC_P(sallocm)(ps[i], &tsz, 0); + total += tsz; + if (total >= (MAXALIGN << 1)) + break; + } + for (i = 0; i < NITER; i++) { + if (ps[i] != NULL) { + JEMALLOC_P(dallocm)(ps[i], 0); + ps[i] = NULL; + } + } + } + } + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/allocm.exp b/test/allocm.exp new file mode 100644 index 0000000..b5061c7 --- /dev/null +++ b/test/allocm.exp @@ -0,0 +1,25 @@ +Test begin +Alignment: 8 +Alignment: 16 +Alignment: 32 +Alignment: 64 +Alignment: 128 +Alignment: 256 +Alignment: 512 +Alignment: 1024 +Alignment: 2048 +Alignment: 4096 +Alignment: 8192 +Alignment: 16384 +Alignment: 32768 +Alignment: 65536 +Alignment: 131072 +Alignment: 262144 +Alignment: 524288 +Alignment: 1048576 +Alignment: 2097152 +Alignment: 4194304 +Alignment: 8388608 +Alignment: 16777216 +Alignment: 33554432 +Test end diff --git a/test/bitmap.c b/test/bitmap.c new file mode 100644 index 0000000..adfaacf --- /dev/null +++ b/test/bitmap.c @@ -0,0 +1,157 @@ +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +/* + * Avoid using the assert() from jemalloc_internal.h, since it requires + * internal libjemalloc functionality. + * */ +#include + +/* + * Directly include the bitmap code, since it isn't exposed outside + * libjemalloc. + */ +#include "../src/bitmap.c" + +#if (LG_BITMAP_MAXBITS > 12) +# define MAXBITS 4500 +#else +# define MAXBITS (1U << LG_BITMAP_MAXBITS) +#endif + +static void +test_bitmap_size(void) +{ + size_t i, prev_size; + + prev_size = 0; + for (i = 1; i <= MAXBITS; i++) { + size_t size = bitmap_size(i); + assert(size >= prev_size); + prev_size = size; + } +} + +static void +test_bitmap_init(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + size_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + for (j = 0; j < i; j++) + assert(bitmap_get(bitmap, &binfo, j) == false); + + } + } +} + +static void +test_bitmap_set(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + size_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + for (j = 0; j < i; j++) + bitmap_set(bitmap, &binfo, j); + assert(bitmap_full(bitmap, &binfo)); + } + } +} + +static void +test_bitmap_unset(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + size_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + for (j = 0; j < i; j++) + bitmap_set(bitmap, &binfo, j); + assert(bitmap_full(bitmap, &binfo)); + for (j = 0; j < i; j++) + bitmap_unset(bitmap, &binfo, j); + for (j = 0; j < i; j++) + bitmap_set(bitmap, &binfo, j); + assert(bitmap_full(bitmap, &binfo)); + } + } +} + +static void +test_bitmap_sfu(void) +{ + size_t i; + + for (i = 1; i <= MAXBITS; i++) { + bitmap_info_t binfo; + bitmap_info_init(&binfo, i); + { + ssize_t j; + bitmap_t bitmap[bitmap_info_ngroups(&binfo)]; + bitmap_init(bitmap, &binfo); + + /* Iteratively set bits starting at the beginning. */ + for (j = 0; j < i; j++) + assert(bitmap_sfu(bitmap, &binfo) == j); + assert(bitmap_full(bitmap, &binfo)); + + /* + * Iteratively unset bits starting at the end, and + * verify that bitmap_sfu() reaches the unset bits. + */ + for (j = i - 1; j >= 0; j--) { + bitmap_unset(bitmap, &binfo, j); + assert(bitmap_sfu(bitmap, &binfo) == j); + bitmap_unset(bitmap, &binfo, j); + } + assert(bitmap_get(bitmap, &binfo, 0) == false); + + /* + * Iteratively set bits starting at the beginning, and + * verify that bitmap_sfu() looks past them. + */ + for (j = 1; j < i; j++) { + bitmap_set(bitmap, &binfo, j - 1); + assert(bitmap_sfu(bitmap, &binfo) == j); + bitmap_unset(bitmap, &binfo, j); + } + assert(bitmap_sfu(bitmap, &binfo) == i - 1); + assert(bitmap_full(bitmap, &binfo)); + } + } +} + +int +main(void) +{ + fprintf(stderr, "Test begin\n"); + + test_bitmap_size(); + test_bitmap_init(); + test_bitmap_set(); + test_bitmap_unset(); + test_bitmap_sfu(); + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/bitmap.exp b/test/bitmap.exp new file mode 100644 index 0000000..369a88d --- /dev/null +++ b/test/bitmap.exp @@ -0,0 +1,2 @@ +Test begin +Test end diff --git a/test/jemalloc_test.h.in b/test/jemalloc_test.h.in new file mode 100644 index 0000000..0c48895 --- /dev/null +++ b/test/jemalloc_test.h.in @@ -0,0 +1,6 @@ +/* + * This header should be included by tests, rather than directly including + * jemalloc/jemalloc.h, because --with-install-suffix may cause the header to + * have a different name. + */ +#include "jemalloc/jemalloc@install_suffix@.h" diff --git a/test/mremap.c b/test/mremap.c new file mode 100644 index 0000000..146c66f --- /dev/null +++ b/test/mremap.c @@ -0,0 +1,67 @@ +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +int +main(void) +{ + int ret, err; + size_t sz, lg_chunk, chunksize, i; + char *p, *q; + + fprintf(stderr, "Test begin\n"); + + sz = sizeof(lg_chunk); + if ((err = JEMALLOC_P(mallctl)("opt.lg_chunk", &lg_chunk, &sz, NULL, + 0))) { + assert(err != ENOENT); + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + ret = 1; + goto RETURN; + } + chunksize = ((size_t)1U) << lg_chunk; + + p = (char *)malloc(chunksize); + if (p == NULL) { + fprintf(stderr, "malloc(%zu) --> %p\n", chunksize, p); + ret = 1; + goto RETURN; + } + memset(p, 'a', chunksize); + + q = (char *)realloc(p, chunksize * 2); + if (q == NULL) { + fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize * 2, + q); + ret = 1; + goto RETURN; + } + for (i = 0; i < chunksize; i++) { + assert(q[i] == 'a'); + } + + p = q; + + q = (char *)realloc(p, chunksize); + if (q == NULL) { + fprintf(stderr, "realloc(%p, %zu) --> %p\n", p, chunksize, q); + ret = 1; + goto RETURN; + } + for (i = 0; i < chunksize; i++) { + assert(q[i] == 'a'); + } + + free(q); + + ret = 0; +RETURN: + fprintf(stderr, "Test end\n"); + return (ret); +} diff --git a/test/mremap.exp b/test/mremap.exp new file mode 100644 index 0000000..369a88d --- /dev/null +++ b/test/mremap.exp @@ -0,0 +1,2 @@ +Test begin +Test end diff --git a/test/posix_memalign.c b/test/posix_memalign.c new file mode 100644 index 0000000..3e306c0 --- /dev/null +++ b/test/posix_memalign.c @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +#define CHUNK 0x400000 +/* #define MAXALIGN ((size_t)0x80000000000LLU) */ +#define MAXALIGN ((size_t)0x2000000LLU) +#define NITER 4 + +int +main(void) +{ + size_t alignment, size, total; + unsigned i; + int err; + void *p, *ps[NITER]; + + fprintf(stderr, "Test begin\n"); + + /* Test error conditions. */ + for (alignment = 0; alignment < sizeof(void *); alignment++) { + err = JEMALLOC_P(posix_memalign)(&p, alignment, 1); + if (err != EINVAL) { + fprintf(stderr, + "Expected error for invalid alignment %zu\n", + alignment); + } + } + + for (alignment = sizeof(size_t); alignment < MAXALIGN; + alignment <<= 1) { + err = JEMALLOC_P(posix_memalign)(&p, alignment + 1, 1); + if (err == 0) { + fprintf(stderr, + "Expected error for invalid alignment %zu\n", + alignment + 1); + } + } + +#if LG_SIZEOF_PTR == 3 + alignment = 0x8000000000000000LLU; + size = 0x8000000000000000LLU; +#else + alignment = 0x80000000LU; + size = 0x80000000LU; +#endif + err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + if (err == 0) { + fprintf(stderr, + "Expected error for posix_memalign(&p, %zu, %zu)\n", + alignment, size); + } + +#if LG_SIZEOF_PTR == 3 + alignment = 0x4000000000000000LLU; + size = 0x8400000000000001LLU; +#else + alignment = 0x40000000LU; + size = 0x84000001LU; +#endif + err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + if (err == 0) { + fprintf(stderr, + "Expected error for posix_memalign(&p, %zu, %zu)\n", + alignment, size); + } + + alignment = 0x10LLU; +#if LG_SIZEOF_PTR == 3 + size = 0xfffffffffffffff0LLU; +#else + size = 0xfffffff0LU; +#endif + err = JEMALLOC_P(posix_memalign)(&p, alignment, size); + if (err == 0) { + fprintf(stderr, + "Expected error for posix_memalign(&p, %zu, %zu)\n", + alignment, size); + } + + for (i = 0; i < NITER; i++) + ps[i] = NULL; + + for (alignment = 8; + alignment <= MAXALIGN; + alignment <<= 1) { + total = 0; + fprintf(stderr, "Alignment: %zu\n", alignment); + for (size = 1; + size < 3 * alignment && size < (1U << 31); + size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) { + for (i = 0; i < NITER; i++) { + err = JEMALLOC_P(posix_memalign)(&ps[i], + alignment, size); + if (err) { + fprintf(stderr, + "Error for size %zu (0x%zx): %s\n", + size, size, strerror(err)); + exit(1); + } + total += JEMALLOC_P(malloc_usable_size)(ps[i]); + if (total >= (MAXALIGN << 1)) + break; + } + for (i = 0; i < NITER; i++) { + if (ps[i] != NULL) { + JEMALLOC_P(free)(ps[i]); + ps[i] = NULL; + } + } + } + } + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/posix_memalign.exp b/test/posix_memalign.exp new file mode 100644 index 0000000..b5061c7 --- /dev/null +++ b/test/posix_memalign.exp @@ -0,0 +1,25 @@ +Test begin +Alignment: 8 +Alignment: 16 +Alignment: 32 +Alignment: 64 +Alignment: 128 +Alignment: 256 +Alignment: 512 +Alignment: 1024 +Alignment: 2048 +Alignment: 4096 +Alignment: 8192 +Alignment: 16384 +Alignment: 32768 +Alignment: 65536 +Alignment: 131072 +Alignment: 262144 +Alignment: 524288 +Alignment: 1048576 +Alignment: 2097152 +Alignment: 4194304 +Alignment: 8388608 +Alignment: 16777216 +Alignment: 33554432 +Test end diff --git a/test/rallocm.c b/test/rallocm.c new file mode 100644 index 0000000..a8cadeb --- /dev/null +++ b/test/rallocm.c @@ -0,0 +1,117 @@ +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +int +main(void) +{ + void *p, *q; + size_t sz, tsz; + int r; + + fprintf(stderr, "Test begin\n"); + + r = JEMALLOC_P(allocm)(&p, &sz, 42, 0); + if (r != ALLOCM_SUCCESS) { + fprintf(stderr, "Unexpected allocm() error\n"); + abort(); + } + + q = p; + r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 0, ALLOCM_NO_MOVE); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected rallocm() error\n"); + if (q != p) + fprintf(stderr, "Unexpected object move\n"); + if (tsz != sz) { + fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + sz, tsz); + } + + q = p; + r = JEMALLOC_P(rallocm)(&q, &tsz, sz, 5, ALLOCM_NO_MOVE); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected rallocm() error\n"); + if (q != p) + fprintf(stderr, "Unexpected object move\n"); + if (tsz != sz) { + fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + sz, tsz); + } + + q = p; + r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, ALLOCM_NO_MOVE); + if (r != ALLOCM_ERR_NOT_MOVED) + fprintf(stderr, "Unexpected rallocm() result\n"); + if (q != p) + fprintf(stderr, "Unexpected object move\n"); + if (tsz != sz) { + fprintf(stderr, "Unexpected size change: %zu --> %zu\n", + sz, tsz); + } + + q = p; + r = JEMALLOC_P(rallocm)(&q, &tsz, sz + 5, 0, 0); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected rallocm() error\n"); + if (q == p) + fprintf(stderr, "Expected object move\n"); + if (tsz == sz) { + fprintf(stderr, "Expected size change: %zu --> %zu\n", + sz, tsz); + } + p = q; + sz = tsz; + + r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, 0); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected rallocm() error\n"); + if (q == p) + fprintf(stderr, "Expected object move\n"); + if (tsz == sz) { + fprintf(stderr, "Expected size change: %zu --> %zu\n", + sz, tsz); + } + p = q; + sz = tsz; + + r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, 0); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected rallocm() error\n"); + if (tsz == sz) { + fprintf(stderr, "Expected size change: %zu --> %zu\n", + sz, tsz); + } + p = q; + sz = tsz; + + r = JEMALLOC_P(rallocm)(&q, &tsz, 8192, 0, ALLOCM_NO_MOVE); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected rallocm() error\n"); + if (q != p) + fprintf(stderr, "Unexpected object move\n"); + if (tsz == sz) { + fprintf(stderr, "Expected size change: %zu --> %zu\n", + sz, tsz); + } + sz = tsz; + + r = JEMALLOC_P(rallocm)(&q, &tsz, 16384, 0, ALLOCM_NO_MOVE); + if (r != ALLOCM_SUCCESS) + fprintf(stderr, "Unexpected rallocm() error\n"); + if (q != p) + fprintf(stderr, "Unexpected object move\n"); + if (tsz == sz) { + fprintf(stderr, "Expected size change: %zu --> %zu\n", + sz, tsz); + } + sz = tsz; + + JEMALLOC_P(dallocm)(p, 0); + + fprintf(stderr, "Test end\n"); + return (0); +} diff --git a/test/rallocm.exp b/test/rallocm.exp new file mode 100644 index 0000000..369a88d --- /dev/null +++ b/test/rallocm.exp @@ -0,0 +1,2 @@ +Test begin +Test end diff --git a/test/thread_arena.c b/test/thread_arena.c new file mode 100644 index 0000000..ef8d681 --- /dev/null +++ b/test/thread_arena.c @@ -0,0 +1,92 @@ +#include +#include +#include +#include +#include + +#define JEMALLOC_MANGLE +#include "jemalloc_test.h" + +#define NTHREADS 10 + +void * +thread_start(void *arg) +{ + unsigned main_arena_ind = *(unsigned *)arg; + void *p; + unsigned arena_ind; + size_t size; + int err; + + p = JEMALLOC_P(malloc)(1); + if (p == NULL) { + fprintf(stderr, "%s(): Error in malloc()\n", __func__); + return (void *)1; + } + + size = sizeof(arena_ind); + if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, + &main_arena_ind, sizeof(main_arena_ind)))) { + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + return (void *)1; + } + + size = sizeof(arena_ind); + if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, + 0))) { + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + return (void *)1; + } + assert(arena_ind == main_arena_ind); + + return (NULL); +} + +int +main(void) +{ + int ret = 0; + void *p; + unsigned arena_ind; + size_t size; + int err; + pthread_t threads[NTHREADS]; + unsigned i; + + fprintf(stderr, "Test begin\n"); + + p = JEMALLOC_P(malloc)(1); + if (p == NULL) { + fprintf(stderr, "%s(): Error in malloc()\n", __func__); + ret = 1; + goto RETURN; + } + + size = sizeof(arena_ind); + if ((err = JEMALLOC_P(mallctl)("thread.arena", &arena_ind, &size, NULL, + 0))) { + fprintf(stderr, "%s(): Error in mallctl(): %s\n", __func__, + strerror(err)); + ret = 1; + goto RETURN; + } + + for (i = 0; i < NTHREADS; i++) { + if (pthread_create(&threads[i], NULL, thread_start, + (void *)&arena_ind) != 0) { + fprintf(stderr, "%s(): Error in pthread_create()\n", + __func__); + ret = 1; + goto RETURN; + } + } + + for (i = 0; i < NTHREADS; i++) + pthread_join(threads[i], (void *)&ret); + +RETURN: + fprintf(stderr, "Test end\n"); + return (ret); +} diff --git a/test/thread_arena.exp b/test/thread_arena.exp new file mode 100644 index 0000000..369a88d --- /dev/null +++ b/test/thread_arena.exp @@ -0,0 +1,2 @@ +Test begin +Test end -- cgit v0.12