From cc00a157703d5da61b9ffc29a80077c4aafa0277 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 25 Jun 2009 18:06:48 -0700 Subject: Clean up the manpage and conditionalize various portions according to how jemalloc is configured. Modify arena_malloc() API to avoid unnecessary choose_arena() calls. Remove unnecessary code from choose_arena(). Enable lazy-lock by default, now that choose_arena() is both faster and out of the critical path. Implement objdir support in the build system. --- jemalloc/INSTALL | 172 ++++++++++++ jemalloc/Makefile.in | 27 +- jemalloc/README | 4 + jemalloc/configure.ac | 65 ++++- jemalloc/doc/jemalloc.3 | 589 ---------------------------------------- jemalloc/doc/jemalloc.3.in | 577 +++++++++++++++++++++++++++++++++++++++ jemalloc/src/jemalloc.c | 52 ++-- jemalloc/src/jemalloc.h | 20 +- jemalloc/src/jemalloc_defs.h.in | 10 + 9 files changed, 878 insertions(+), 638 deletions(-) create mode 100644 jemalloc/INSTALL create mode 100644 jemalloc/README delete mode 100644 jemalloc/doc/jemalloc.3 create mode 100644 jemalloc/doc/jemalloc.3.in diff --git a/jemalloc/INSTALL b/jemalloc/INSTALL new file mode 100644 index 0000000..1320ba9 --- /dev/null +++ b/jemalloc/INSTALL @@ -0,0 +1,172 @@ +Building and installing jemalloc can be as simple as typing the following while +in the root directory of the source tree: + + ./configure + make + make install + +=== Advanced configuration ===================================================== + +The 'configure' script supports numerous options that allow control of which +functionality is enabled, where jemalloc is installed, etc. Optionally, pass +any of the following arguments (not a definitive list) to 'configure': + +--help + Print a definitive list of options. + +--prefix= + Set the base directory in which to install. For example: + + ./configure --prefix=/usr/local + + will cause files to be installed into /usr/local/include, /usr/local/lib, + and /usr/local/man. + +--with-rpath= + Embed one or more library paths, so that Crux's internal shared library can + find the libraries it is linked to. This works only on ELF-based systems. + +--enable-debug + Enable assertions and validation code. This incurs a substantial + performance hit, but is very useful during application development. + +--enable-stats + Enable statistics gathering functionality. Use the 'P' option to print + detailed allocation statistics at exit, and/or the 'U' option to print a + detailed allocation trace log. + +--disable-tiny + Disable tiny (sub-quantum-sized) object support. Technically it is not + legal for a malloc implementation to allocate objects with less than + quantum alignment (8 or 16 bytes, depending on architecture), but in + practice it never causes any problems if, for example, 4-byte allocationsj + are 4-byte-aligned. + +--disable-mag + Disable thread-specific caches for sub-page-sized objects. Objects are + cached and released in bulk using "magazines" -- a term coined by the + developers of Solaris's umem allocator. + +--disable-balance + Disable dynamic rebalancing of thread-->arena assignments. + +--enable-dss + Enable support for page allocation/deallocation via sbrk(2), in addition to + mmap(2). + +--enable-fill + Enable support for junk/zero filling of memory. Use the 'J' option to + control junk filling, or the 'Z' option to control zero filling. + +--enable-xmalloc + Enable support for optional immediate termination due to out-of-memory + errors, as is commonly implemented by "xmalloc" wrapper function for malloc. + Use the 'X' option to control termination behavior. + +--enable-sysv + Enable support for System V semantics, wherein malloc(0) returns NULL + rather than a minimal allocation. Use the 'V' option to control System V + compatibility. + +--enable-dynamic-page-shift + Under most conditions, the system page size never changes (usually 4KiB or + 8KiB, depending on architecture and configuration), and unless this option + is enabled, jemalloc assumes that page size can safely be determined during + configuration and hard-coded. Enabling dynamic page size determination has + a measurable impact on performance, since the compiler is forced to load + the page size from memory rather than embedding immediate values. + +--disable-lazy-lock + Disable code that wraps pthread_create() to detect when an application + switches from single-threaded to multi-threaded mode, so that it can avoid + mutex locking/unlocking operations while in single-threaded mode. In + practice, this feature usually has little impact on performance unless + magazines are disabled. + +The following environment variables (not a definitive list) impact configure's +behavior: + +CFLAGS="?" + Pass these flags to the compiler. You probably shouldn't define this unless + you know what you are doing. (Use EXTRA_CFLAGS instead.) + +EXTRA_CFLAGS="?" + Append these flags to CFLAGS. This makes it possible to add flags such as + -Werror, while allowing the configure script to determine what other flags + are appropriate for the specified configuration. + + The configure script specifically checks whether an optimization flag (-O*) + is specified in EXTRA_CFLAGS, and refrains from specifying an optimization + level if it finds that one has already been specified. + +CPPFLAGS="?" + Pass these flags to the C preprocessor. Note that CFLAGS is not passed to + 'cpp' when 'configure' is looking for include files, so you must use + CPPFLAGS instead if you need to help 'configure' find header files. + +LD_LIBRARY_PATH="?" + 'ld' uses this colon-separated list to find libraries. + +LDFLAGS="?" + Pass these flags when linking. + +PATH="?" + 'configure' uses this to find programs. + +=== Advanced compilation ======================================================= + +To run integrated regression tests, type: + + make check + +To clean up build results to varying degrees, use the following make targets: + + clean + distclean + relclean + +=== Advanced installation ====================================================== + +Optionally, define make variables when invoking make, including (not +exclusively): + +INCLUDEDIR="?" + Use this as the installation prefix for header files. + +LIBDIR="?" + Use this as the installation prefix for libraries. + +MANDIR="?" + Use this as the installation prefix for man pages. + +CC="?" + Use this to invoke the C compiler. + +CFLAGS="?" + Pass these flags to the compiler. + +CPPFLAGS="?" + Pass these flags to the C preprocessor. + +LDFLAGS="?" + Pass these flags when linking. + +PATH="?" + Use this to search for programs used during configuration and building. + +=== Development ================================================================ + +If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh' +script rather than 'configure'. This re-generates 'configure', enables +configuration dependency rules, and enables re-generation of automatically +generated source files. + +The build system supports using an object directory separate from the source +tree. For example, you can create an 'obj' directory, and from within that +directory, issue configuration and build commands: + + autoconf + mkdir obj + cd obj + ../configure --enable-autogen + make diff --git a/jemalloc/Makefile.in b/jemalloc/Makefile.in index 1652ec9..e8d212e 100644 --- a/jemalloc/Makefile.in +++ b/jemalloc/Makefile.in @@ -11,10 +11,8 @@ SHELL := /bin/sh CC := @CC@ # Configuration parameters. -BINDIR := @BINDIR@ INCLUDEDIR := @INCLUDEDIR@ LIBDIR := @LIBDIR@ -DATADIR := @DATADIR@ MANDIR := @MANDIR@ # Build parameters. @@ -34,20 +32,20 @@ endif REV := 0 # File lists. -CHDRS := src/jemalloc.h -CSRCS := src/jemalloc.c -DSO := lib/libjemalloc.so.$(REV) -MAN3 := doc/jemalloc.3 +CHDRS := @srcroot@src/jemalloc.h @objroot@src/jemalloc_defs.h +CSRCS := @srcroot@src/jemalloc.c +DSOS := @objroot@lib/libjemalloc.so.$(REV) @objroot@lib/libjemalloc.so +MAN3 := @objroot@doc/jemalloc.3 .PHONY: all dist install check clean distclean relclean # Default target. -all: $(DSO) +all: $(DSOS) -src/%.o: src/%.c +@objroot@src/%.o: @srcroot@src/%.c $(CC) $(CFLAGS) -c $(CPPFLAGS) -o $@ $+ -$(DSO): $(CSRCS:%.c=%.o) +$(DSOS): $(CSRCS:@srcroot@%.c=@objroot@%.o) @mkdir -p $(@D) gcc -shared -o $@ $+ $(LDFLAGS) $(LIBS) ln -sf libjemalloc.so.$(REV) lib/libjemalloc.so @@ -59,7 +57,10 @@ install: install -m 644 $$h $(INCLUDEDIR); \ done install -d $(LIBDIR) - install -m 755 $(DSO) $(LIBDIR) + @for s in $(DSOS); do \ + echo "install -m 755 $$s $(LIBDIR)"; \ + install -m 755 $$s $(LIBDIR); \ +done install -d $(MANDIR) @for m in $(MAN3); do \ echo "install -m 644 $$m $(MANDIR)/man3"; \ @@ -69,9 +70,9 @@ done check: clean: - rm -f src/*.o - rm -f lib/libjemalloc.so - rm -f lib/libjemalloc.so.$(REV) + rm -f @objroot@src/*.o + rm -f @objroot@lib/libjemalloc.so + rm -f @objroot@lib/libjemalloc.so.$(REV) distclean: clean rm -f @objroot@config.log diff --git a/jemalloc/README b/jemalloc/README new file mode 100644 index 0000000..de3a0a8 --- /dev/null +++ b/jemalloc/README @@ -0,0 +1,4 @@ +jemalloc is a general-purpose scalable concurrent malloc(3) implementation. + +The INSTALL file contains information on how to configure, build, and install +jemalloc. diff --git a/jemalloc/configure.ac b/jemalloc/configure.ac index fa0c1bc..e0bf8f5 100644 --- a/jemalloc/configure.ac +++ b/jemalloc/configure.ac @@ -41,7 +41,7 @@ MANDIR=`eval echo $mandir` MANDIR=`eval echo $MANDIR` AC_SUBST([MANDIR]) -cfgoutputs="Makefile" +cfgoutputs="Makefile doc/jemalloc.3" cfghdrs="src/jemalloc_defs.h" dnl If CFLAGS isn't defined and using gcc, set CFLAGS to something reasonable. @@ -219,6 +219,12 @@ if test "x$enable_stats" = "x1" ; then AC_DEFINE([JEMALLOC_STATS], [ ]) fi AC_SUBST([enable_stats]) +if test "x$enable_stats" = "x0" ; then + roff_stats=".\\\" " +else + roff_stats="" +fi +AC_SUBST([roff_stats]) dnl Enable tiny allocations by default. AC_ARG_ENABLE([tiny], @@ -235,6 +241,15 @@ if test "x$enable_tiny" = "x1" ; then AC_DEFINE([JEMALLOC_TINY], [ ]) fi AC_SUBST([enable_tiny]) +if test "x$enable_tiny" = "x0" ; then + roff_tiny=".\\\" " + roff_no_tiny="" +else + roff_tiny="" + roff_no_tiny=".\\\" " +fi +AC_SUBST([roff_tiny]) +AC_SUBST([roff_no_tiny]) dnl Enable magazines by default. AC_ARG_ENABLE([mag], @@ -251,6 +266,12 @@ if test "x$enable_mag" = "x1" ; then AC_DEFINE([JEMALLOC_MAG], [ ]) fi AC_SUBST([enable_mag]) +if test "x$enable_mag" = "x0" ; then + roff_mag=".\\\" " +else + roff_mag="" +fi +AC_SUBST([roff_mag]) dnl Enable dynamic arena load balancing by default. AC_ARG_ENABLE([balance], @@ -267,6 +288,12 @@ if test "x$enable_balance" = "x1" ; then AC_DEFINE([JEMALLOC_BALANCE], [ ]) fi AC_SUBST([enable_balance]) +if test "x$enable_balance" = "x0" ; then + roff_balance=".\\\" " +else + roff_balance="" +fi +AC_SUBST([roff_balance]) dnl Do not enable allocation from DSS by default. AC_ARG_ENABLE([dss], @@ -283,6 +310,12 @@ if test "x$enable_dss" = "x1" ; then AC_DEFINE([JEMALLOC_DSS], [ ]) fi AC_SUBST([enable_dss]) +if test "x$enable_dss" = "x0" ; then + roff_dss=".\\\" " +else + roff_dss="" +fi +AC_SUBST([roff_dss]) dnl Do not support the junk/zero filling option by default. AC_ARG_ENABLE([fill], @@ -299,6 +332,12 @@ if test "x$enable_fill" = "x1" ; then AC_DEFINE([JEMALLOC_FILL], [ ]) fi AC_SUBST([enable_fill]) +if test "x$enable_fill" = "x0" ; then + roff_fill=".\\\" " +else + roff_fill="" +fi +AC_SUBST([roff_fill]) dnl Do not support the xmalloc option by default. AC_ARG_ENABLE([xmalloc], @@ -315,6 +354,12 @@ if test "x$enable_xmalloc" = "x1" ; then AC_DEFINE([JEMALLOC_XMALLOC], [ ]) fi AC_SUBST([enable_xmalloc]) +if test "x$enable_xmalloc" = "x0" ; then + roff_xmalloc=".\\\" " +else + roff_xmalloc="" +fi +AC_SUBST([roff_xmalloc]) dnl Do not support the SYSV option by default. AC_ARG_ENABLE([sysv], @@ -331,6 +376,12 @@ if test "x$enable_sysv" = "x1" ; then AC_DEFINE([JEMALLOC_SYSV], [ ]) fi AC_SUBST([enable_sysv]) +if test "x$enable_sysv" = "x0" ; then + roff_sysv=".\\\" " +else + roff_sysv="" +fi +AC_SUBST([roff_sysv]) dnl Do not determine page shift at run time by default. AC_ARG_ENABLE([dynamic_page_shift], @@ -380,6 +431,7 @@ dnl ============================================================================ dnl jemalloc configuration. dnl jemalloc_version=`cat ${srcroot}VERSION` +AC_DEFINE_UNQUOTED([JEMALLOC_VERSION], ["$jemalloc_version"]) AC_SUBST([jemalloc_version]) dnl ============================================================================ @@ -400,21 +452,24 @@ AC_RUN_IFELSE([AC_LANG_PROGRAM( return 0; ]])], - AC_MSG_RESULT([yes]), + AC_MSG_RESULT([yes]) + roff_tls="", AC_MSG_RESULT([no]) + roff_tls=".\\\" " AC_DEFINE_UNQUOTED([NO_TLS], [ ])) +AC_SUBST([roff_tls]) -dnl Do not enable lazy locking by default. +dnl Enable lazy locking by default. AC_ARG_ENABLE([lazy_lock], [AS_HELP_STRING([--enable-lazy-lock], - [Enable lazy locking (avoid locking unless multiple threads)])], + [Disable lazy locking (always lock, even when single-threaded)])], [if test "x$enable_lazy_lock" = "xno" ; then enable_lazy_lock="0" else enable_lazy_lock="1" fi ], -[enable_lazy_lock="0"] +[enable_lazy_lock="1"] ) if test "x$enable_lazy_lock" = "x1" ; then AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])]) diff --git a/jemalloc/doc/jemalloc.3 b/jemalloc/doc/jemalloc.3 deleted file mode 100644 index b26ec39..0000000 --- a/jemalloc/doc/jemalloc.3 +++ /dev/null @@ -1,589 +0,0 @@ -.\" Copyright (c) 2006-2008 Jason Evans . -.\" Copyright (c) 2009 Facebook, Inc. All rights reserved. -.\" All rights reserved. -.\" Copyright (c) 1980, 1991, 1993 -.\" The Regents of the University of California. All rights reserved. -.\" -.\" This code is derived from software contributed to Berkeley by -.\" the American National Standards Committee X3, on Information -.\" Processing Systems. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 -.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $ -.\" -.Dd June 22, 2009 -.Dt JEMALLOC 3 -.Os -.Sh NAME -.Nm malloc , calloc , posix_memalign , realloc , free , malloc_usable_size -.Nd general purpose memory allocation functions -.Sh LIBRARY -.Lb libc -.Sh SYNOPSIS -.In stdlib.h -.Ft void * -.Fn malloc "size_t size" -.Ft void * -.Fn calloc "size_t number" "size_t size" -.Ft int -.Fn posix_memalign "void **ptr" "size_t alignment" "size_t size" -.Ft void * -.Fn realloc "void *ptr" "size_t size" -.Ft void -.Fn free "void *ptr" -.Ft const char * -.Va jemalloc_options ; -.Ft void -.Fo \*(lp*jemalloc_message\*(rp -.Fa "const char *p1" "const char *p2" "const char *p3" "const char *p4" -.Fc -.In malloc_np.h -.Ft size_t -.Fn malloc_usable_size "const void *ptr" -.Sh DESCRIPTION -The -.Fn malloc -function allocates -.Fa size -bytes of uninitialized memory. -The allocated space is suitably aligned (after possible pointer coercion) -for storage of any type of object. -.Pp -The -.Fn calloc -function allocates space for -.Fa number -objects, -each -.Fa size -bytes in length. -The result is identical to calling -.Fn malloc -with an argument of -.Dq "number * size" , -with the exception that the allocated memory is explicitly initialized -to zero bytes. -.Pp -The -.Fn posix_memalign -function allocates -.Fa size -bytes of memory such that the allocation's base address is an even multiple of -.Fa alignment , -and returns the allocation in the value pointed to by -.Fa ptr . -The requested -.Fa alignment -must be a power of 2 at least as large as -.Fn sizeof "void *" . -.Pp -The -.Fn realloc -function changes the size of the previously allocated memory referenced by -.Fa ptr -to -.Fa size -bytes. -The contents of the memory are unchanged up to the lesser of the new and -old sizes. -If the new size is larger, -the contents of the newly allocated portion of the memory are undefined. -Upon success, the memory referenced by -.Fa ptr -is freed and a pointer to the newly allocated memory is returned. -Note that -.Fn realloc -may move the memory allocation, resulting in a different return value than -.Fa ptr . -If -.Fa ptr -is -.Dv NULL , -the -.Fn realloc -function behaves identically to -.Fn malloc -for the specified size. -.Pp -The -.Fn free -function causes the allocated memory referenced by -.Fa ptr -to be made available for future allocations. -If -.Fa ptr -is -.Dv NULL , -no action occurs. -.Pp -The -.Fn malloc_usable_size -function returns the usable size of the allocation pointed to by -.Fa ptr . -The return value may be larger than the size that was requested during -allocation. -The -.Fn malloc_usable_size -function is not a mechanism for in-place -.Fn realloc ; -rather it is provided solely as a tool for introspection purposes. -Any discrepancy between the requested allocation size and the size reported by -.Fn malloc_usable_size -should not be depended on, since such behavior is entirely -implementation-dependent. -.Sh TUNING -Once, when the first call is made to one of these memory allocation -routines, various flags will be set or reset, which affects the -workings of this allocator implementation. -.Pp -The -.Dq name -of the file referenced by the symbolic link named -.Pa /etc/jemalloc.conf , -the value of the environment variable -.Ev JEMALLOC_OPTIONS , -and the string pointed to by the global variable -.Va jemalloc_options -will be interpreted, in that order, from left to right as flags. -.Pp -Each flag is a single letter, optionally prefixed by a non-negative base 10 -integer repetition count. -For example, -.Dq 3N -is equivalent to -.Dq NNN . -Some flags control parameter magnitudes, where uppercase increases the -magnitude, and lowercase decreases the magnitude. -Other flags control boolean parameters, where uppercase indicates that a -behavior is set, or on, and lowercase means that a behavior is not set, or off. -.Bl -tag -width indent -.It A -All warnings (except for the warning about unknown -flags being set) become fatal. -The process will call -.Xr abort 3 -in these cases. -.It B -Double/halve the per-arena lock contention threshold at which a thread is -randomly re-assigned to an arena. -This dynamic load balancing tends to push threads away from highly contended -arenas, which avoids worst case contention scenarios in which threads -disproportionately utilize arenas. -However, due to the highly dynamic load that applications may place on the -allocator, it is impossible for the allocator to know in advance how sensitive -it should be to contention over arenas. -Therefore, some applications may benefit from increasing or decreasing this -threshold parameter. -This option is not available for some configurations (non-PIC). -.It C -Double/halve the size of the maximum size class that is a multiple of the -cacheline size (64). -Above this size, subpage spacing (256 bytes) is used for size classes. -The default value is 512 bytes. -.It D -Use -.Xr sbrk 2 -to acquire memory in the data storage segment (DSS). -This option is enabled by default. -See the -.Dq M -option for related information and interactions. -.It F -Double/halve the per-arena maximum number of dirty unused pages that are -allowed to accumulate before informing the kernel about at least half of those -pages via -.Xr madvise 2 . -This provides the kernel with sufficient information to recycle dirty pages if -physical memory becomes scarce and the pages remain unused. -The default is 512 pages per arena; -.Ev JEMALLOC_OPTIONS=10f -will prevent any dirty unused pages from accumulating. -.It G -When there are multiple threads, use thread-specific caching for objects that -are smaller than one page. -This option is enabled by default. -Thread-specific caching allows many allocations to be satisfied without -performing any thread synchronization, at the cost of increased memory use. -See the -.Dq R -option for related tuning information. -This option is not available for some configurations (non-PIC). -.It J -Each byte of new memory allocated by -.Fn malloc -or -.Fn realloc -will be initialized to 0xa5. -All memory returned by -.Fn free -or -.Fn realloc -will be initialized to 0x5a. -This is intended for debugging and will impact performance negatively. -.It K -Double/halve the virtual memory chunk size. -The default chunk size is 1 MB. -.It M -Use -.Xr mmap 2 -to acquire anonymously mapped memory. -This option is enabled by default. -If both the -.Dq D -and -.Dq M -options are enabled, the allocator prefers the DSS over anonymous mappings, -but allocation only fails if memory cannot be acquired via either method. -If neither option is enabled, then the -.Dq M -option is implicitly enabled in order to assure that there is a method for -acquiring memory. -.It N -Double/halve the number of arenas. -The default number of arenas is two times the number of CPUs, or one if there -is a single CPU. -.It P -Various statistics are printed at program exit via an -.Xr atexit 3 -function. -This has the potential to cause deadlock for a multi-threaded process that exits -while one or more threads are executing in the memory allocation functions. -Therefore, this option should only be used with care; it is primarily intended -as a performance tuning aid during application development. -.It Q -Double/halve the size of the maximum size class that is a multiple of the -quantum (8 or 16 bytes, depending on architecture). -Above this size, cacheline spacing is used for size classes. -The default value is 128 bytes. -.It R -Double/halve magazine size, which approximately doubles/halves the number of -rounds in each magazine. -Magazines are used by the thread-specific caching machinery to acquire and -release objects in bulk. -Increasing the magazine size decreases locking overhead, at the expense of -increased memory usage. -This option is not available for some configurations (non-PIC). -.It U -Generate -.Dq utrace -entries for -.Xr ktrace 1 , -for all operations. -Consult the source for details on this option. -.It V -Attempting to allocate zero bytes will return a -.Dv NULL -pointer instead of -a valid pointer. -(The default behavior is to make a minimal allocation and return a -pointer to it.) -This option is provided for System V compatibility. -This option is incompatible with the -.Dq X -option. -.It X -Rather than return failure for any allocation function, -display a diagnostic message on -.Dv stderr -and cause the program to drop -core (using -.Xr abort 3 ) . -This option should be set at compile time by including the following in -the source code: -.Bd -literal -offset indent -jemalloc_options = "X"; -.Ed -.It Z -Each byte of new memory allocated by -.Fn malloc -or -.Fn realloc -will be initialized to 0. -Note that this initialization only happens once for each byte, so -.Fn realloc -calls do not zero memory that was previously allocated. -This is intended for debugging and will impact performance negatively. -.El -.Pp -The -.Dq J -and -.Dq Z -options are intended for testing and debugging. -An application which changes its behavior when these options are used -is flawed. -.Sh IMPLEMENTATION NOTES -Traditionally, allocators have used -.Xr sbrk 2 -to obtain memory, which is suboptimal for several reasons, including race -conditions, increased fragmentation, and artificial limitations on maximum -usable memory. -This allocator uses both -.Xr sbrk 2 -and -.Xr mmap 2 -by default, but it can be configured at run time to use only one or the other. -If resource limits are not a primary concern, the preferred configuration is -.Ev JEMALLOC_OPTIONS=dM -or -.Ev JEMALLOC_OPTIONS=DM . -When so configured, the -.Ar datasize -resource limit has little practical effect for typical applications; use -.Ev JEMALLOC_OPTIONS=Dm -if that is a concern. -Regardless of allocator configuration, the -.Ar vmemoryuse -resource limit can be used to bound the total virtual memory used by a -process, as described in -.Xr limits 1 . -.Pp -This allocator uses multiple arenas in order to reduce lock contention for -threaded programs on multi-processor systems. -This works well with regard to threading scalability, but incurs some costs. -There is a small fixed per-arena overhead, and additionally, arenas manage -memory completely independently of each other, which means a small fixed -increase in overall memory fragmentation. -These overheads are not generally an issue, given the number of arenas normally -used. -Note that using substantially more arenas than the default is not likely to -improve performance, mainly due to reduced cache performance. -However, it may make sense to reduce the number of arenas if an application -does not make much use of the allocation functions. -.Pp -In addition to multiple arenas, this allocator supports thread-specific -caching for small objects (smaller than one page), in order to make it -possible to completely avoid synchronization for most small allocation requests. -Such caching allows very fast allocation in the common case, but it increases -memory usage and fragmentation, since a bounded number of objects can remain -allocated in each thread cache. -.Pp -Memory is conceptually broken into equal-sized chunks, where the chunk size is -a power of two that is greater than the page size. -Chunks are always aligned to multiples of the chunk size. -This alignment makes it possible to find metadata for user objects very -quickly. -.Pp -User objects are broken into three categories according to size: small, large, -and huge. -Small objects are smaller than one page. -Large objects are smaller than the chunk size. -Huge objects are a multiple of the chunk size. -Small and large objects are managed by arenas; huge objects are managed -separately in a single data structure that is shared by all threads. -Huge objects are used by applications infrequently enough that this single -data structure is not a scalability issue. -.Pp -Each chunk that is managed by an arena tracks its contents as runs of -contiguous pages (unused, backing a set of small objects, or backing one large -object). -The combination of chunk alignment and chunk page maps makes it possible to -determine all metadata regarding small and large allocations in constant time. -.Pp -Small objects are managed in groups by page runs. -Each run maintains a bitmap that tracks which regions are in use. -Allocation requests that are no more than half the quantum (8 or 16, depending -on architecture) are rounded up to the nearest power of two. -Allocation requests that are more than half the quantum, but no more than the -minimum cacheline-multiple size class (see the -.Dq Q -option) are rounded up to the nearest multiple of the quantum. -Allocation requests that are more than the minumum cacheline-multiple size -class, but no more than the minimum subpage-multiple size class (see the -.Dq C -option) are rounded up to the nearest multiple of the cacheline size (64). -Allocation requests that are more than the minimum subpage-multiple size class -are rounded up to the nearest multiple of the subpage size (256). -Allocation requests that are more than one page, but small enough to fit in -an arena-managed chunk (see the -.Dq K -option), are rounded up to the nearest run size. -Allocation requests that are too large to fit in an arena-managed chunk are -rounded up to the nearest multiple of the chunk size. -.Pp -Allocations are packed tightly together, which can be an issue for -multi-threaded applications. -If you need to assure that allocations do not suffer from cacheline sharing, -round your allocation requests up to the nearest multiple of the cacheline -size. -.Sh DEBUGGING MALLOC PROBLEMS -The first thing to do is to set the -.Dq A -option. -This option forces a coredump (if possible) at the first sign of trouble, -rather than the normal policy of trying to continue if at all possible. -.Pp -It is probably also a good idea to recompile the program with suitable -options and symbols for debugger support. -.Pp -If the program starts to give unusual results, coredump or generally behave -differently without emitting any of the messages mentioned in the next -section, it is likely because it depends on the storage being filled with -zero bytes. -Try running it with the -.Dq Z -option set; -if that improves the situation, this diagnosis has been confirmed. -If the program still misbehaves, -the likely problem is accessing memory outside the allocated area. -.Pp -Alternatively, if the symptoms are not easy to reproduce, setting the -.Dq J -option may help provoke the problem. -.Pp -In truly difficult cases, the -.Dq U -option, if supported by the kernel, can provide a detailed trace of -all calls made to these functions. -.Pp -Unfortunately this implementation does not provide much detail about -the problems it detects; the performance impact for storing such information -would be prohibitive. -There are a number of allocator implementations available on the Internet -which focus on detecting and pinpointing problems by trading performance for -extra sanity checks and detailed diagnostics. -.Sh DIAGNOSTIC MESSAGES -If any of the memory allocation/deallocation functions detect an error or -warning condition, a message will be printed to file descriptor -.Dv STDERR_FILENO . -Errors will result in the process dumping core. -If the -.Dq A -option is set, all warnings are treated as errors. -.Pp -The -.Va _malloc_message -variable allows the programmer to override the function which emits -the text strings forming the errors and warnings if for some reason -the -.Dv stderr -file descriptor is not suitable for this. -Please note that doing anything which tries to allocate memory in -this function is likely to result in a crash or deadlock. -.Pp -All messages are prefixed by -.Dq Ao Ar progname Ac Ns Li : (malloc) . -.Sh RETURN VALUES -The -.Fn malloc -and -.Fn calloc -functions return a pointer to the allocated memory if successful; otherwise -a -.Dv NULL -pointer is returned and -.Va errno -is set to -.Er ENOMEM . -.Pp -The -.Fn posix_memalign -function returns the value 0 if successful; otherwise it returns an error value. -The -.Fn posix_memalign -function will fail if: -.Bl -tag -width Er -.It Bq Er EINVAL -The -.Fa alignment -parameter is not a power of 2 at least as large as -.Fn sizeof "void *" . -.It Bq Er ENOMEM -Memory allocation error. -.El -.Pp -The -.Fn realloc -function returns a pointer, possibly identical to -.Fa ptr , -to the allocated memory -if successful; otherwise a -.Dv NULL -pointer is returned, and -.Va errno -is set to -.Er ENOMEM -if the error was the result of an allocation failure. -The -.Fn realloc -function always leaves the original buffer intact -when an error occurs. -.Pp -The -.Fn free -function returns no value. -.Pp -The -.Fn malloc_usable_size -function returns the usable size of the allocation pointed to by -.Fa ptr . -.Sh ENVIRONMENT -The following environment variables affect the execution of the allocation -functions: -.Bl -tag -width ".Ev JEMALLOC_OPTIONS" -.It Ev JEMALLOC_OPTIONS -If the environment variable -.Ev JEMALLOC_OPTIONS -is set, the characters it contains will be interpreted as flags to the -allocation functions. -.El -.Sh EXAMPLES -To dump core whenever a problem occurs: -.Pp -.Bd -literal -offset indent -ln -s 'A' /etc/jemalloc.conf -.Ed -.Pp -To specify in the source that a program does no return value checking -on calls to these functions: -.Bd -literal -offset indent -jemalloc_options = "X"; -.Ed -.Sh SEE ALSO -.Xr limits 1 , -.Xr madvise 2 , -.Xr mmap 2 , -.Xr sbrk 2 , -.Xr alloca 3 , -.Xr atexit 3 , -.Xr getpagesize 3 , -.Xr memory 3 , -.Xr posix_memalign 3 -.Sh STANDARDS -The -.Fn malloc , -.Fn calloc , -.Fn realloc -and -.Fn free -functions conform to -.St -isoC . -.Pp -The -.Fn posix_memalign -function conforms to -.St -p1003.1-2001 . diff --git a/jemalloc/doc/jemalloc.3.in b/jemalloc/doc/jemalloc.3.in new file mode 100644 index 0000000..8d10b39 --- /dev/null +++ b/jemalloc/doc/jemalloc.3.in @@ -0,0 +1,577 @@ +.\" Copyright (c) 2009 Facebook, Inc. All rights reserved. +.\" Copyright (c) 2006-2008 Jason Evans . +.\" All rights reserved. +.\" Copyright (c) 1980, 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the American National Standards Committee X3, on Information +.\" Processing Systems. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)malloc.3 8.1 (Berkeley) 6/4/93 +.\" $FreeBSD: head/lib/libc/stdlib/malloc.3 182225 2008-08-27 02:00:53Z jasone $ +.\" +.Dd June 22, 2009 +.Dt JEMALLOC 3 +.Os +.Sh NAME +.Nm malloc , calloc , posix_memalign , realloc , free , malloc_usable_size +.Nd general purpose memory allocation functions +.Sh LIBRARY +.Lb libjemalloc +.Sh SYNOPSIS +.In stdlib.h +.Ft void * +.Fn malloc "size_t size" +.Ft void * +.Fn calloc "size_t number" "size_t size" +.Ft int +.Fn posix_memalign "void **ptr" "size_t alignment" "size_t size" +.Ft void * +.Fn realloc "void *ptr" "size_t size" +.Ft void +.Fn free "void *ptr" +.In jemalloc.h +.Ft size_t +.Fn malloc_usable_size "const void *ptr" +.Ft const char * +.Va jemalloc_options ; +.Ft void +.Fo \*(lp*jemalloc_message\*(rp +.Fa "const char *p1" "const char *p2" "const char *p3" "const char *p4" +.Fc +.Sh DESCRIPTION +The +.Fn malloc +function allocates +.Fa size +bytes of uninitialized memory. +The allocated space is suitably aligned +@roff_tiny@(after possible pointer coercion) +for storage of any type of object. +.Pp +The +.Fn calloc +function allocates space for +.Fa number +objects, +each +.Fa size +bytes in length. +The result is identical to calling +.Fn malloc +with an argument of +.Dq "number * size" , +with the exception that the allocated memory is explicitly initialized +to zero bytes. +.Pp +The +.Fn posix_memalign +function allocates +.Fa size +bytes of memory such that the allocation's base address is an even multiple of +.Fa alignment , +and returns the allocation in the value pointed to by +.Fa ptr . +The requested +.Fa alignment +must be a power of 2 at least as large as +.Fn sizeof "void *" . +.Pp +The +.Fn realloc +function changes the size of the previously allocated memory referenced by +.Fa ptr +to +.Fa size +bytes. +The contents of the memory are unchanged up to the lesser of the new and +old sizes. +If the new size is larger, +the contents of the newly allocated portion of the memory are undefined. +Upon success, the memory referenced by +.Fa ptr +is freed and a pointer to the newly allocated memory is returned. +Note that +.Fn realloc +may move the memory allocation, resulting in a different return value than +.Fa ptr . +If +.Fa ptr +is +.Dv NULL , +the +.Fn realloc +function behaves identically to +.Fn malloc +for the specified size. +.Pp +The +.Fn free +function causes the allocated memory referenced by +.Fa ptr +to be made available for future allocations. +If +.Fa ptr +is +.Dv NULL , +no action occurs. +.Pp +The +.Fn malloc_usable_size +function returns the usable size of the allocation pointed to by +.Fa ptr . +The return value may be larger than the size that was requested during +allocation. +The +.Fn malloc_usable_size +function is not a mechanism for in-place +.Fn realloc ; +rather it is provided solely as a tool for introspection purposes. +Any discrepancy between the requested allocation size and the size reported by +.Fn malloc_usable_size +should not be depended on, since such behavior is entirely +implementation-dependent. +.Sh TUNING +Once, when the first call is made to one of these memory allocation +routines, various flags will be set or reset, which affects the +workings of this allocator implementation. +.Pp +The +.Dq name +of the file referenced by the symbolic link named +.Pa /etc/jemalloc.conf , +the value of the environment variable +.Ev JEMALLOC_OPTIONS , +and the string pointed to by the global variable +.Va jemalloc_options +will be interpreted, in that order, from left to right as flags. +.Pp +Each flag is a single letter, optionally prefixed by a non-negative base 10 +integer repetition count. +For example, +.Dq 3N +is equivalent to +.Dq NNN . +Some flags control parameter magnitudes, where uppercase increases the +magnitude, and lowercase decreases the magnitude. +Other flags control boolean parameters, where uppercase indicates that a +behavior is set, or on, and lowercase means that a behavior is not set, or off. +.Bl -tag -width indent +.It A +All warnings (except for the warning about unknown +flags being set) become fatal. +The process will call +.Xr abort 3 +in these cases. +@roff_balance@@roff_tls@.It B +@roff_balance@@roff_tls@Double/halve the per-arena lock contention threshold at +@roff_balance@@roff_tls@which a thread is randomly re-assigned to an arena. +@roff_balance@@roff_tls@This dynamic load balancing tends to push threads away +@roff_balance@@roff_tls@from highly contended arenas, which avoids worst case +@roff_balance@@roff_tls@contention scenarios in which threads disproportionately +@roff_balance@@roff_tls@utilize arenas. +@roff_balance@@roff_tls@However, due to the highly dynamic load that +@roff_balance@@roff_tls@applications may place on the allocator, it is +@roff_balance@@roff_tls@impossible for the allocator to know in advance how +@roff_balance@@roff_tls@sensitive it should be to contention over arenas. +@roff_balance@@roff_tls@Therefore, some applications may benefit from increasing +@roff_balance@@roff_tls@or decreasing this threshold parameter. +.It C +Double/halve the size of the maximum size class that is a multiple of the +cacheline size (64). +Above this size, subpage spacing (256 bytes) is used for size classes. +The default value is 512 bytes. +@roff_dss@.It D +@roff_dss@Use +@roff_dss@.Xr sbrk 2 +@roff_dss@to acquire memory in the data storage segment (DSS). +@roff_dss@This option is enabled by default. +@roff_dss@See the +@roff_dss@.Dq M +@roff_dss@option for related information and interactions. +.It F +Double/halve the per-arena maximum number of dirty unused pages that are +allowed to accumulate before informing the kernel about at least half of those +pages via +.Xr madvise 2 . +This provides the kernel with sufficient information to recycle dirty pages if +physical memory becomes scarce and the pages remain unused. +The default is 512 pages per arena; +.Ev JEMALLOC_OPTIONS=10f +will prevent any dirty unused pages from accumulating. +@roff_mag@@roff_tls@.It G +@roff_mag@@roff_tls@When there are multiple threads, use thread-specific caching +@roff_mag@@roff_tls@for objects that are smaller than one page. +@roff_mag@@roff_tls@This option is enabled by default. +@roff_mag@@roff_tls@Thread-specific caching allows many allocations to be +@roff_mag@@roff_tls@satisfied without performing any thread synchronization, at +@roff_mag@@roff_tls@the cost of increased memory use. +@roff_mag@@roff_tls@See the +@roff_mag@@roff_tls@.Dq R +@roff_mag@@roff_tls@option for related tuning information. +@roff_fill@.It J +@roff_fill@Each byte of new memory allocated by +@roff_fill@.Fn malloc +@roff_fill@or +@roff_fill@.Fn realloc +@roff_fill@will be initialized to 0xa5. +@roff_fill@All memory returned by +@roff_fill@.Fn free +@roff_fill@or +@roff_fill@.Fn realloc +@roff_fill@will be initialized to 0x5a. +@roff_fill@This is intended for debugging and will impact performance +@roff_fill@negatively. +.It K +Double/halve the virtual memory chunk size. +The default chunk size is 1 MB. +@roff_dss@.It M +@roff_dss@Use +@roff_dss@.Xr mmap 2 +@roff_dss@to acquire anonymously mapped memory. +@roff_dss@This option is enabled by default. +@roff_dss@If both the +@roff_dss@.Dq D +@roff_dss@and +@roff_dss@.Dq M +@roff_dss@options are enabled, the allocator prefers the DSS over anonymous +@roff_dss@mappings, but allocation only fails if memory cannot be acquired via +@roff_dss@either method. +@roff_dss@If neither option is enabled, then the +@roff_dss@.Dq M +@roff_dss@option is implicitly enabled in order to assure that there is a method +@roff_dss@for acquiring memory. +.It N +Double/halve the number of arenas. +The default number of arenas is two times the number of CPUs, or one if there +is a single CPU. +.It P +Various statistics are printed at program exit via an +.Xr atexit 3 +function. +This has the potential to cause deadlock for a multi-threaded process that exits +while one or more threads are executing in the memory allocation functions. +Therefore, this option should only be used with care; it is primarily intended +as a performance tuning aid during application development. +.It Q +Double/halve the size of the maximum size class that is a multiple of the +quantum (8 or 16 bytes, depending on architecture). +Above this size, cacheline spacing is used for size classes. +The default value is 128 bytes. +@roff_mag@@roff_tls@.It R +@roff_mag@@roff_tls@Double/halve magazine size, which approximately +@roff_mag@@roff_tls@doubles/halves the number of rounds in each magazine. +@roff_mag@@roff_tls@Magazines are used by the thread-specific caching machinery +@roff_mag@@roff_tls@to acquire and release objects in bulk. +@roff_mag@@roff_tls@Increasing the magazine size decreases locking overhead, at +@roff_mag@@roff_tls@the expense of increased memory usage. +@roff_stats@.It U +@roff_stats@Generate a verbose trace log via +@roff_stats@.Fn jemalloc_message +@roff_stats@for all allocation operations. +@roff_sysv@.It V +@roff_sysv@Attempting to allocate zero bytes will return a +@roff_sysv@.Dv NULL +@roff_sysv@pointer instead of a valid pointer. +@roff_sysv@(The default behavior is to make a minimal allocation and return a +@roff_sysv@pointer to it.) +@roff_sysv@This option is provided for System V compatibility. +@roff_sysv@@roff_xmalloc@This option is incompatible with the +@roff_sysv@@roff_xmalloc@.Dq X +@roff_sysv@@roff_xmalloc@option. +@roff_xmalloc@.It X +@roff_xmalloc@Rather than return failure for any allocation function, display a +@roff_xmalloc@diagnostic message on +@roff_xmalloc@.Dv stderr +@roff_xmalloc@and cause the program to drop core (using +@roff_xmalloc@.Xr abort 3 ) . +@roff_xmalloc@This option should be set at compile time by including the +@roff_xmalloc@following in the source code: +@roff_xmalloc@.Bd -literal -offset indent +@roff_xmalloc@jemalloc_options = "X"; +@roff_xmalloc@.Ed +@roff_fill@.It Z +@roff_fill@Each byte of new memory allocated by +@roff_fill@.Fn malloc +@roff_fill@or +@roff_fill@.Fn realloc +@roff_fill@will be initialized to 0. +@roff_fill@Note that this initialization only happens once for each byte, so +@roff_fill@.Fn realloc +@roff_fill@calls do not zero memory that was previously allocated. +@roff_fill@This is intended for debugging and will impact performance +@roff_fill@negatively. +.El +.Pp +@roff_fill@The +@roff_fill@.Dq J +@roff_fill@and +@roff_fill@.Dq Z +@roff_fill@options are intended for testing and debugging. +@roff_fill@An application which changes its behavior when these options are used +@roff_fill@is flawed. +.Sh IMPLEMENTATION NOTES +@roff_dss@Traditionally, allocators have used +@roff_dss@.Xr sbrk 2 +@roff_dss@to obtain memory, which is suboptimal for several reasons, including +@roff_dss@race conditions, increased fragmentation, and artificial limitations +@roff_dss@on maximum usable memory. +@roff_dss@This allocator uses both +@roff_dss@.Xr sbrk 2 +@roff_dss@and +@roff_dss@.Xr mmap 2 +@roff_dss@by default, but it can be configured at run time to use only one or +@roff_dss@the other. +.Pp +This allocator uses multiple arenas in order to reduce lock contention for +threaded programs on multi-processor systems. +This works well with regard to threading scalability, but incurs some costs. +There is a small fixed per-arena overhead, and additionally, arenas manage +memory completely independently of each other, which means a small fixed +increase in overall memory fragmentation. +These overheads are not generally an issue, given the number of arenas normally +used. +Note that using substantially more arenas than the default is not likely to +improve performance, mainly due to reduced cache performance. +However, it may make sense to reduce the number of arenas if an application +does not make much use of the allocation functions. +.Pp +@roff_mag@In addition to multiple arenas, this allocator supports +@roff_mag@thread-specific caching for small objects (smaller than one page), in +@roff_mag@order to make it possible to completely avoid synchronization for most +@roff_mag@small allocation requests. +@roff_mag@Such caching allows very fast allocation in the common case, but it +@roff_mag@increases memory usage and fragmentation, since a bounded number of +@roff_mag@objects can remain allocated in each thread cache. +@roff_mag@.Pp +Memory is conceptually broken into equal-sized chunks, where the chunk size is +a power of two that is greater than the page size. +Chunks are always aligned to multiples of the chunk size. +This alignment makes it possible to find metadata for user objects very +quickly. +.Pp +User objects are broken into three categories according to size: small, large, +and huge. +Small objects are smaller than one page. +Large objects are smaller than the chunk size. +Huge objects are a multiple of the chunk size. +Small and large objects are managed by arenas; huge objects are managed +separately in a single data structure that is shared by all threads. +Huge objects are used by applications infrequently enough that this single +data structure is not a scalability issue. +.Pp +Each chunk that is managed by an arena tracks its contents as runs of +contiguous pages (unused, backing a set of small objects, or backing one large +object). +The combination of chunk alignment and chunk page maps makes it possible to +determine all metadata regarding small and large allocations in constant time. +.Pp +Small objects are managed in groups by page runs. +Each run maintains a bitmap that tracks which regions are in use. +@roff_tiny@Allocation requests that are no more than half the quantum (8 or 16, +@roff_tiny@depending on architecture) are rounded up to the nearest power of +@roff_tiny@two. +Allocation requests that are +@roff_tiny@more than half the quantum, but +no more than the minimum cacheline-multiple size class (see the +.Dq Q +option) are rounded up to the nearest multiple of the +@roff_tiny@quantum. +@roff_no_tiny@quantum (8 or 16, depending on architecture). +Allocation requests that are more than the minumum cacheline-multiple size +class, but no more than the minimum subpage-multiple size class (see the +.Dq C +option) are rounded up to the nearest multiple of the cacheline size (64). +Allocation requests that are more than the minimum subpage-multiple size class +are rounded up to the nearest multiple of the subpage size (256). +Allocation requests that are more than one page, but small enough to fit in +an arena-managed chunk (see the +.Dq K +option), are rounded up to the nearest run size. +Allocation requests that are too large to fit in an arena-managed chunk are +rounded up to the nearest multiple of the chunk size. +.Pp +Allocations are packed tightly together, which can be an issue for +multi-threaded applications. +If you need to assure that allocations do not suffer from cacheline sharing, +round your allocation requests up to the nearest multiple of the cacheline +size. +.Sh DEBUGGING MALLOC PROBLEMS +The first thing to do is to set the +.Dq A +option. +This option forces a coredump (if possible) at the first sign of trouble, +rather than the normal policy of trying to continue if at all possible. +.Pp +It is probably also a good idea to recompile the program with suitable +options and symbols for debugger support. +.Pp +@roff_fill@If the program starts to give unusual results, coredump or generally +@roff_fill@behave differently without emitting any of the messages mentioned in +@roff_fill@the next section, it is likely because it depends on the storage +@roff_fill@being filled with zero bytes. +@roff_fill@Try running it with the +@roff_fill@.Dq Z +@roff_fill@option set; +@roff_fill@if that improves the situation, this diagnosis has been confirmed. +@roff_fill@If the program still misbehaves, +@roff_fill@the likely problem is accessing memory outside the allocated area. +@roff_fill@.Pp +@roff_fill@Alternatively, if the symptoms are not easy to reproduce, setting the +@roff_fill@.Dq J +@roff_fill@option may help provoke the problem. +@roff_fill@.Pp +@roff_stats@In truly difficult cases, the +@roff_stats@.Dq U +@roff_stats@option can provide a detailed trace of all calls made to these +@roff_stats@functions. +@roff_stats@.Pp +Unfortunately this implementation does not provide much detail about +the problems it detects; the performance impact for storing such information +would be prohibitive. +There are a number of allocator implementations available on the Internet +which focus on detecting and pinpointing problems by trading performance for +extra sanity checks and detailed diagnostics. +.Sh DIAGNOSTIC MESSAGES +If any of the memory allocation/deallocation functions detect an error or +warning condition, a message will be printed to file descriptor +.Dv STDERR_FILENO . +Errors will result in the process dumping core. +If the +.Dq A +option is set, all warnings are treated as errors. +.Pp +The +.Va jemalloc_message +variable allows the programmer to override the function which emits +the text strings forming the errors and warnings if for some reason +the +.Dv stderr +file descriptor is not suitable for this. +Please note that doing anything which tries to allocate memory in +this function is likely to result in a crash or deadlock. +.Pp +All messages are prefixed by +.Dq : . +.Sh RETURN VALUES +The +.Fn malloc +and +.Fn calloc +functions return a pointer to the allocated memory if successful; otherwise +a +.Dv NULL +pointer is returned and +.Va errno +is set to +.Er ENOMEM . +.Pp +The +.Fn posix_memalign +function returns the value 0 if successful; otherwise it returns an error value. +The +.Fn posix_memalign +function will fail if: +.Bl -tag -width Er +.It Bq Er EINVAL +The +.Fa alignment +parameter is not a power of 2 at least as large as +.Fn sizeof "void *" . +.It Bq Er ENOMEM +Memory allocation error. +.El +.Pp +The +.Fn realloc +function returns a pointer, possibly identical to +.Fa ptr , +to the allocated memory +if successful; otherwise a +.Dv NULL +pointer is returned, and +.Va errno +is set to +.Er ENOMEM +if the error was the result of an allocation failure. +The +.Fn realloc +function always leaves the original buffer intact +when an error occurs. +.Pp +The +.Fn free +function returns no value. +.Pp +The +.Fn malloc_usable_size +function returns the usable size of the allocation pointed to by +.Fa ptr . +.Sh ENVIRONMENT +The following environment variables affect the execution of the allocation +functions: +.Bl -tag -width ".Ev JEMALLOC_OPTIONS" +.It Ev JEMALLOC_OPTIONS +If the environment variable +.Ev JEMALLOC_OPTIONS +is set, the characters it contains will be interpreted as flags to the +allocation functions. +.El +.Sh EXAMPLES +To dump core whenever a problem occurs: +.Pp +.Bd -literal -offset indent +ln -s 'A' /etc/jemalloc.conf +.Ed +.Pp +To specify in the source that a program does no return value checking +on calls to these functions: +.Bd -literal -offset indent +jemalloc_options = "X"; +.Ed +.Sh SEE ALSO +.Xr madvise 2 , +.Xr mmap 2 , +.Xr sbrk 2 , +.Xr alloca 3 , +.Xr atexit 3 , +.Xr getpagesize 3 +.Sh STANDARDS +The +.Fn malloc , +.Fn calloc , +.Fn realloc +and +.Fn free +functions conform to +.St -isoC . +.Pp +The +.Fn posix_memalign +function conforms to +.St -p1003.1-2001 . diff --git a/jemalloc/src/jemalloc.c b/jemalloc/src/jemalloc.c index 71b09c4..65ce18e 100644 --- a/jemalloc/src/jemalloc.c +++ b/jemalloc/src/jemalloc.c @@ -1178,8 +1178,8 @@ static bool size2bin_init_hard(void); static unsigned malloc_ncpus(void); static bool malloc_init_hard(void); static void thread_cleanup(void *arg); -void jemalloc_prefork(void); -void jemalloc_postfork(void); +static void jemalloc_prefork(void); +static void jemalloc_postfork(void); /* * End function prototypes. @@ -1231,9 +1231,10 @@ umax2s(uintmax_t x, char *s) # define assert(e) do { \ if (!(e)) { \ char line_buf[UMAX2S_BUFSIZE]; \ - jemalloc_message(__FILE__, ":", umax2s(__LINE__, \ - line_buf), ": Failed assertion: "); \ - jemalloc_message("\"", #e, "\"\n", ""); \ + jemalloc_message(": ", __FILE__, ":", \ + umax2s(__LINE__, line_buf)); \ + jemalloc_message(": Failed assertion: ", "\"", #e, \ + "\"\n"); \ abort(); \ } \ } while (0) @@ -1250,15 +1251,17 @@ utrace(const void *addr, size_t len) assert(len == sizeof(malloc_utrace_t)); if (ut->p == NULL && ut->s == 0 && ut->r == NULL) - malloc_printf("%d x USER malloc_init()\n", getpid()); + malloc_printf(":utrace: %d malloc_init()\n", + getpid()); else if (ut->p == NULL && ut->r != NULL) { - malloc_printf("%d x USER %p = malloc(%zu)\n", getpid(), ut->r, - ut->s); + malloc_printf(":utrace: %d %p = malloc(%zu)\n", + getpid(), ut->r, ut->s); } else if (ut->p != NULL && ut->r != NULL) { - malloc_printf("%d x USER %p = realloc(%p, %zu)\n", getpid(), - ut->r, ut->p, ut->s); + malloc_printf(":utrace: %d %p = realloc(%p, %zu)\n", + getpid(), ut->r, ut->p, ut->s); } else - malloc_printf("%d x USER free(%p)\n", getpid(), ut->p); + malloc_printf(":utrace: %d free(%p)\n", getpid(), + ut->p); return (0); } @@ -2247,11 +2250,6 @@ choose_arena(void) * introduces a bootstrapping issue. */ #ifndef NO_TLS - if (isthreaded == false) { - /* Avoid the overhead of TLS for single-threaded operation. */ - return (arenas[0]); - } - ret = arenas_map; if (ret == NULL) { ret = choose_arena_hard(); @@ -3405,11 +3403,9 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero) } static inline void * -arena_malloc(arena_t *arena, size_t size, bool zero) +arena_malloc(size_t size, bool zero) { - assert(arena != NULL); - assert(arena->magic == ARENA_MAGIC); assert(size != 0); assert(QUANTUM_CEILING(size) <= arena_maxclass); @@ -3418,7 +3414,7 @@ arena_malloc(arena_t *arena, size_t size, bool zero) if (opt_mag) { mag_rack_t *rack = mag_rack; if (rack == NULL) { - rack = mag_rack_create(arena); + rack = mag_rack_create(choose_arena()); if (rack == NULL) return (NULL); mag_rack = rack; @@ -3427,9 +3423,9 @@ arena_malloc(arena_t *arena, size_t size, bool zero) return (mag_rack_alloc(rack, size, zero)); } else #endif - return (arena_malloc_small(arena, size, zero)); + return (arena_malloc_small(choose_arena(), size, zero)); } else - return (arena_malloc_large(arena, size, zero)); + return (arena_malloc_large(choose_arena(), size, zero)); } static inline void * @@ -3439,7 +3435,7 @@ imalloc(size_t size) assert(size != 0); if (size <= arena_maxclass) - return (arena_malloc(choose_arena(), size, false)); + return (arena_malloc(size, false)); else return (huge_malloc(size, false)); } @@ -3449,7 +3445,7 @@ icalloc(size_t size) { if (size <= arena_maxclass) - return (arena_malloc(choose_arena(), size, true)); + return (arena_malloc(size, true)); else return (huge_malloc(size, true)); } @@ -3553,7 +3549,7 @@ ipalloc(size_t alignment, size_t size) if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE && ceil_size <= arena_maxclass)) - ret = arena_malloc(choose_arena(), ceil_size, false); + ret = arena_malloc(ceil_size, false); else { size_t run_size; @@ -4113,7 +4109,7 @@ arena_ralloc(void *ptr, size_t size, size_t oldsize) * need to move the object. In that case, fall back to allocating new * space and copying. */ - ret = arena_malloc(choose_arena(), size, false); + ret = arena_malloc(size, false); if (ret == NULL) return (NULL); @@ -5725,7 +5721,7 @@ thread_cleanup(void *arg) * is threaded here. */ -void +static void jemalloc_prefork(void) { bool again; @@ -5773,7 +5769,7 @@ jemalloc_prefork(void) #endif } -void +static void jemalloc_postfork(void) { unsigned i; diff --git a/jemalloc/src/jemalloc.h b/jemalloc/src/jemalloc.h index dbff468..21b8de5 100644 --- a/jemalloc/src/jemalloc.h +++ b/jemalloc/src/jemalloc.h @@ -28,10 +28,24 @@ ******************************************************************************* */ +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef JEMALLOC_H_ +#define JEMALLOC_H_ + +#include "jemalloc_defs.h" + +size_t malloc_usable_size(const void *ptr); + extern const char *jemalloc_options; extern void (*jemalloc_message)(const char *p1, const char *p2, const char *p3, const char *p4); -void jemalloc_thread_cleanup(void); -void jemalloc_prefork(void); -void jemalloc_postfork(void); +#endif /* JEMALLOC_H_ */ + +#ifdef __cplusplus +}; +#endif + diff --git a/jemalloc/src/jemalloc_defs.h.in b/jemalloc/src/jemalloc_defs.h.in index 6ca6018..eae3d0a 100644 --- a/jemalloc/src/jemalloc_defs.h.in +++ b/jemalloc/src/jemalloc_defs.h.in @@ -28,6 +28,14 @@ ******************************************************************************* */ +#ifndef JEMALLOC_DEFS_H_ +#define JEMALLOC_DEFS_H_ + +/* + * jemalloc version string. + */ +#undef JEMALLOC_VERSION + /* * Hyper-threaded CPUs may need a special instruction inside spin loops in * order to yield to another virtual CPU. @@ -92,3 +100,5 @@ /* sizeof(void *) == 2^SIZEOF_PTR_2POW. */ #undef SIZEOF_PTR_2POW + +#endif /* JEMALLOC_DEFS_H_ */ -- cgit v0.12