From 9ebbfca93fd98e31d2dbc2bddffcc4624df516dc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 12 Sep 2016 16:44:33 -0700
Subject: Change html manual encoding to UTF-8.

This works around GitHub's broken automatic reformatting from ISO-8859-1
to UTF-8 when serving static html.

Remove <parameter/> from e.g. <function>malloc<parameter/></function>,
add a custom template that does not append parentheses, and manually
specify them, e.g. <function>malloc()</function>.  This works around
apparently broken XSL formatting that causes <code/> to be emitted in
html (rather than <code></code>, or better yet, nothing).
---
 doc/html.xsl.in     |   1 +
 doc/jemalloc.xml.in | 174 ++++++++++++++++++++++++++--------------------------
 doc/stylesheet.xsl  |   5 +-
 3 files changed, 92 insertions(+), 88 deletions(-)
diff --git a/doc/html.xsl.in b/doc/html.xsl.in
index a91d974..ec4fa65 100644
--- a/doc/html.xsl.in
+++ b/doc/html.xsl.in
@@ -1,4 +1,5 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
   <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+  <xsl:output method="xml" encoding="utf-8"/>
 </xsl:stylesheet>
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index c4a44e3..8817229 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -180,20 +180,20 @@
     <refsect2>
       <title>Standard API</title>
 
-      <para>The <function>malloc<parameter/></function> function allocates
+      <para>The <function>malloc()</function> function allocates
       <parameter>size</parameter> bytes of uninitialized memory.  The allocated
       space is suitably aligned (after possible pointer coercion) for storage
       of any type of object.</para>
 
-      <para>The <function>calloc<parameter/></function> function allocates
+      <para>The <function>calloc()</function> function allocates
       space for <parameter>number</parameter> objects, each
       <parameter>size</parameter> bytes in length.  The result is identical to
-      calling <function>malloc<parameter/></function> with an argument of
+      calling <function>malloc()</function> with an argument of
       <parameter>number</parameter> * <parameter>size</parameter>, with the
       exception that the allocated memory is explicitly initialized to zero
       bytes.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>, and returns the allocation in the value
@@ -201,7 +201,7 @@
       <parameter>alignment</parameter> must be a power of 2 at least as large as
       <code language="C">sizeof(<type>void *</type>)</code>.</para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function
+      <para>The <function>aligned_alloc()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>.  The requested
@@ -209,7 +209,7 @@
       undefined if <parameter>size</parameter> is not an integral multiple of
       <parameter>alignment</parameter>.</para>
 
-      <para>The <function>realloc<parameter/></function> function changes the
+      <para>The <function>realloc()</function> function changes the
       size of the previously allocated memory referenced by
       <parameter>ptr</parameter> to <parameter>size</parameter> bytes.  The
       contents of the memory are unchanged up to the lesser of the new and old
@@ -217,26 +217,26 @@
       portion of the memory are undefined.  Upon success, the memory referenced
       by <parameter>ptr</parameter> is freed and a pointer to the newly
       allocated memory is returned.  Note that
-      <function>realloc<parameter/></function> may move the memory allocation,
+      <function>realloc()</function> may move the memory allocation,
       resulting in a different return value than <parameter>ptr</parameter>.
       If <parameter>ptr</parameter> is <constant>NULL</constant>, the
-      <function>realloc<parameter/></function> function behaves identically to
-      <function>malloc<parameter/></function> for the specified size.</para>
+      <function>realloc()</function> function behaves identically to
+      <function>malloc()</function> for the specified size.</para>
 
-      <para>The <function>free<parameter/></function> function causes the
+      <para>The <function>free()</function> function causes the
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function>,
-      <function>rallocx<parameter/></function>,
-      <function>xallocx<parameter/></function>,
-      <function>sallocx<parameter/></function>,
-      <function>dallocx<parameter/></function>,
-      <function>sdallocx<parameter/></function>, and
-      <function>nallocx<parameter/></function> functions all have a
+      <para>The <function>mallocx()</function>,
+      <function>rallocx()</function>,
+      <function>xallocx()</function>,
+      <function>sallocx()</function>,
+      <function>dallocx()</function>,
+      <function>sdallocx()</function>, and
+      <function>nallocx()</function> functions all have a
       <parameter>flags</parameter> argument that can be used to specify
       options.  The functions only check the options that are contextually
       relevant.  Use bitwise or (<code language="C">|</code>) operations to
@@ -307,19 +307,19 @@
         </variablelist>
       </para>
 
-      <para>The <function>mallocx<parameter/></function> function allocates at
+      <para>The <function>mallocx()</function> function allocates at
       least <parameter>size</parameter> bytes of memory, and returns a pointer
       to the base address of the allocation.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>rallocx<parameter/></function> function resizes the
+      <para>The <function>rallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> to be at least
       <parameter>size</parameter> bytes, and returns a pointer to the base
       address of the resulting allocation, which may or may not have moved from
       its original location.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>xallocx<parameter/></function> function resizes the
+      <para>The <function>xallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> in place to be at least
       <parameter>size</parameter> bytes, and returns the real size of the
       allocation.  If <parameter>extra</parameter> is non-zero, an attempt is
@@ -332,32 +332,32 @@
       language="C">(<parameter>size</parameter> + <parameter>extra</parameter>
       &gt; <constant>SIZE_T_MAX</constant>)</code>.</para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation at <parameter>ptr</parameter>.</para>
 
-      <para>The <function>dallocx<parameter/></function> function causes the
+      <para>The <function>dallocx()</function> function causes the
       memory referenced by <parameter>ptr</parameter> to be made available for
       future allocations.</para>
 
-      <para>The <function>sdallocx<parameter/></function> function is an
-      extension of <function>dallocx<parameter/></function> with a
+      <para>The <function>sdallocx()</function> function is an
+      extension of <function>dallocx()</function> with a
       <parameter>size</parameter> parameter to allow the caller to pass in the
       allocation size as an optimization.  The minimum valid input size is the
       original requested size of the allocation, and the maximum valid input
       size is the corresponding value returned by
-      <function>nallocx<parameter/></function> or
-      <function>sallocx<parameter/></function>.</para>
+      <function>nallocx()</function> or
+      <function>sallocx()</function>.</para>
 
-      <para>The <function>nallocx<parameter/></function> function allocates no
+      <para>The <function>nallocx()</function> function allocates no
       memory, but it performs the same size computation as the
-      <function>mallocx<parameter/></function> function, and returns the real
+      <function>mallocx()</function> function, and returns the real
       size of the allocation that would result from the equivalent
-      <function>mallocx<parameter/></function> function call, or
+      <function>mallocx()</function> function call, or
       <constant>0</constant> if the inputs exceed the maximum supported size
       class and/or alignment.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>mallctl<parameter/></function> function provides a
+      <para>The <function>mallctl()</function> function provides a
       general interface for introspecting the memory allocator, as well as
       setting modifiable parameters and triggering actions.  The
       period-separated <parameter>name</parameter> argument specifies a
@@ -372,12 +372,12 @@
       <parameter>newlen</parameter>; otherwise pass <constant>NULL</constant>
       and <constant>0</constant>.</para>
 
-      <para>The <function>mallctlnametomib<parameter/></function> function
+      <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
       to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
-      repeatedly to <function>mallctlbymib<parameter/></function>.  Upon
-      successful return from <function>mallctlnametomib<parameter/></function>,
+      repeatedly to <function>mallctlbymib()</function>.  Upon
+      successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
       <parameter>*miblenp</parameter> integers, where
       <parameter>*miblenp</parameter> is the lesser of the number of components
@@ -410,18 +410,18 @@ for (i = 0; i < nbins; i++) {
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print<parameter/></function> function
+      <para>The <function>malloc_stats_print()</function> function
       writes human-readable summary statistics via the
       <parameter>write_cb</parameter> callback function pointer and
       <parameter>cbopaque</parameter> data passed to
       <parameter>write_cb</parameter>, or
-      <function>malloc_message<parameter/></function> if
+      <function>malloc_message()</function> if
       <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
       function can be called repeatedly.  General information that never
       changes during execution can be omitted by specifying "g" as a character
       within the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message<parameter/></function> uses the
-      <function>mallctl*<parameter/></function> functions internally, so
+      <function>malloc_message()</function> uses the
+      <function>mallctl*()</function> functions internally, so
       inconsistent statistics can be reported if multiple threads use these
       functions simultaneously.  If <option>--enable-stats</option> is
       specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
@@ -434,15 +434,15 @@ for (i = 0; i < nbins; i++) {
       thread cache operations.
       </para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  The return value may be larger than the size
       that was requested during allocation.  The
-      <function>malloc_usable_size<parameter/></function> function is not a
-      mechanism for in-place <function>realloc<parameter/></function>; rather
+      <function>malloc_usable_size()</function> function is not a
+      mechanism for in-place <function>realloc()</function>; rather
       it is provided solely as a tool for introspection purposes.  Any
       discrepancy between the requested allocation size and the size reported
-      by <function>malloc_usable_size<parameter/></function> should not be
+      by <function>malloc_usable_size()</function> should not be
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
@@ -460,7 +460,7 @@ for (i = 0; i < nbins; i++) {
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
     <varname>malloc_conf</varname> may be read before
-    <function>main<parameter/></function> is entered, so the declaration of
+    <function>main()</function> is entered, so the declaration of
     <varname>malloc_conf</varname> should specify an initializer that contains
     the final value to be read by jemalloc.  <option>--with-malloc-conf</option>
     and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
@@ -549,14 +549,14 @@ for (i = 0; i < nbins; i++) {
     nearest multiple of the cacheline size, or specify cacheline alignment when
     allocating.</para>
 
-    <para>The <function>realloc<parameter/></function>,
-    <function>rallocx<parameter/></function>, and
-    <function>xallocx<parameter/></function> functions may resize allocations
+    <para>The <function>realloc()</function>,
+    <function>rallocx()</function>, and
+    <function>xallocx()</function> functions may resize allocations
     without moving them under limited circumstances.  Unlike the
-    <function>*allocx<parameter/></function> API, the standard API does not
+    <function>*allocx()</function> API, the standard API does not
     officially round up the usable size of an allocation to the nearest size
     class, so technically it is necessary to call
-    <function>realloc<parameter/></function> to grow e.g. a 9-byte allocation to
+    <function>realloc()</function> to grow e.g. a 9-byte allocation to
     16 bytes, or shrink a 16-byte allocation to 9 bytes.  Growth and shrinkage
     trivially succeeds in place as long as the pre-size and post-size both round
     up to the same size class.  No other API guarantees are made regarding
@@ -702,7 +702,7 @@ for (i = 0; i < nbins; i++) {
   <refsect1 id="mallctl_namespace">
     <title>MALLCTL NAMESPACE</title>
     <para>The following names are defined in the namespace accessible via the
-    <function>mallctl*<parameter/></function> functions.  Value types are
+    <function>mallctl*()</function> functions.  Value types are
     specified in parentheses, their readable/writable statuses are encoded as
     <literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
     <literal>--</literal>, and required build configuration flags follow, if
@@ -733,7 +733,7 @@ for (i = 0; i < nbins; i++) {
           <literal>rw</literal>
         </term>
         <listitem><para>If a value is passed in, refresh the data from which
-        the <function>mallctl*<parameter/></function> functions report values,
+        the <function>mallctl*()</function> functions report values,
         and increment the epoch.  Return the current epoch.  This is useful for
         detecting whether another thread caused a refresh.</para></listitem>
       </varlistentry>
@@ -1013,19 +1013,19 @@ for (i = 0; i < nbins; i++) {
           <literal>r-</literal>
         </term>
         <listitem><para>Enable/disable statistics printing at exit.  If
-        enabled, the <function>malloc_stats_print<parameter/></function>
+        enabled, the <function>malloc_stats_print()</function>
         function is called at program exit via an
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function.  If
         <option>--enable-stats</option> is specified during configuration, this
         has the potential to cause deadlock for a multi-threaded process that
         exits while one or more threads are executing in the memory allocation
-        functions.  Furthermore, <function>atexit<parameter/></function> may
+        functions.  Furthermore, <function>atexit()</function> may
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
-        <function>atexit<parameter/></function>, so this option is not
+        <function>atexit()</function>, so this option is not
         universally usable (though the application can register its own
-        <function>atexit<parameter/></function> function with equivalent
+        <function>atexit()</function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
         development.  This option is disabled by default.</para></listitem>
@@ -1101,8 +1101,8 @@ for (i = 0; i < nbins; i++) {
         <listitem><para>Zero filling enabled/disabled.  If enabled, each byte
         of uninitialized allocated memory will be initialized to 0.  Note that
         this initialization only happens once for each byte, so
-        <function>realloc<parameter/></function> and
-        <function>rallocx<parameter/></function> calls do not zero memory that
+        <function>realloc()</function> and
+        <function>rallocx()</function> calls do not zero memory that
         was previously allocated.  This is intended for debugging and will
         impact performance negatively.  This option is disabled by default.
         </para></listitem>
@@ -1325,11 +1325,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  Note that <function>atexit<parameter/></function> may allocate
+        option.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
-        when jemalloc in turn calls <function>atexit<parameter/></function>, so
+        when jemalloc in turn calls <function>atexit()</function>, so
         this option is not universally usable (though the application can
-        register its own <function>atexit<parameter/></function> function with
+        register its own <function>atexit()</function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
       </varlistentry>
@@ -1388,7 +1388,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.deallocated">
@@ -1415,7 +1415,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.tcache.enabled">
@@ -2763,10 +2763,10 @@ MAPPED_LIBRARIES:
     to override the function which emits the text strings forming the errors
     and warnings if for some reason the <constant>STDERR_FILENO</constant> file
     descriptor is not suitable for this.
-    <function>malloc_message<parameter/></function> takes the
+    <function>malloc_message()</function> takes the
     <parameter>cbopaque</parameter> pointer argument that is
     <constant>NULL</constant> unless overridden by the arguments in a call to
-    <function>malloc_stats_print<parameter/></function>, followed by a string
+    <function>malloc_stats_print()</function>, followed by a string
     pointer.  Please note that doing anything which tries to allocate memory in
     this function is likely to result in a crash or deadlock.</para>
 
@@ -2777,15 +2777,15 @@ MAPPED_LIBRARIES:
     <title>RETURN VALUES</title>
     <refsect2>
       <title>Standard API</title>
-      <para>The <function>malloc<parameter/></function> and
-      <function>calloc<parameter/></function> functions return a pointer to the
+      <para>The <function>malloc()</function> and
+      <function>calloc()</function> functions return a pointer to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname>.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       returns the value 0 if successful; otherwise it returns an error value.
-      The <function>posix_memalign<parameter/></function> function will fail
+      The <function>posix_memalign()</function> function will fail
       if:
         <variablelist>
           <varlistentry>
@@ -2804,11 +2804,11 @@ MAPPED_LIBRARIES:
         </variablelist>
       </para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function returns
+      <para>The <function>aligned_alloc()</function> function returns
       a pointer to the allocated memory if successful; otherwise a
       <constant>NULL</constant> pointer is returned and
       <varname>errno</varname> is set.  The
-      <function>aligned_alloc<parameter/></function> function will fail if:
+      <function>aligned_alloc()</function> function will fail if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
@@ -2825,44 +2825,44 @@ MAPPED_LIBRARIES:
         </variablelist>
       </para>
 
-      <para>The <function>realloc<parameter/></function> function returns a
+      <para>The <function>realloc()</function> function returns a
       pointer, possibly identical to <parameter>ptr</parameter>, to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned, and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname> if the error was the result of an
-      allocation failure.  The <function>realloc<parameter/></function>
+      allocation failure.  The <function>realloc()</function>
       function always leaves the original buffer intact when an error occurs.
       </para>
 
-      <para>The <function>free<parameter/></function> function returns no
+      <para>The <function>free()</function> function returns no
       value.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function> and
-      <function>rallocx<parameter/></function> functions return a pointer to
+      <para>The <function>mallocx()</function> and
+      <function>rallocx()</function> functions return a pointer to
       the allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned to indicate insufficient contiguous memory was
       available to service the allocation request.  </para>
 
-      <para>The <function>xallocx<parameter/></function> function returns the
+      <para>The <function>xallocx()</function> function returns the
       real size of the resulting resized allocation pointed to by
       <parameter>ptr</parameter>, which is a value less than
       <parameter>size</parameter> if the allocation could not be adequately
       grown in place.  </para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation pointed to by <parameter>ptr</parameter>.
       </para>
 
-      <para>The <function>nallocx<parameter/></function> returns the real size
+      <para>The <function>nallocx()</function> returns the real size
       that would result from a successful equivalent
-      <function>mallocx<parameter/></function> function call, or zero if
+      <function>mallocx()</function> function call, or zero if
       insufficient memory is available to perform the size computation.  </para>
 
-      <para>The <function>mallctl<parameter/></function>,
-      <function>mallctlnametomib<parameter/></function>, and
-      <function>mallctlbymib<parameter/></function> functions return 0 on
+      <para>The <function>mallctl()</function>,
+      <function>mallctlnametomib()</function>, and
+      <function>mallctlbymib()</function> functions return 0 on
       success; otherwise they return an error value.  The functions will fail
       if:
         <variablelist>
@@ -2898,13 +2898,13 @@ MAPPED_LIBRARIES:
             <term><errorname>EFAULT</errorname></term>
 
             <listitem><para>An interface with side effects failed in some way
-            not directly related to <function>mallctl*<parameter/></function>
+            not directly related to <function>mallctl*()</function>
             read/write processing.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  </para>
     </refsect2>
@@ -2952,13 +2952,13 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
   </refsect1>
   <refsect1 id="standards">
     <title>STANDARDS</title>
-    <para>The <function>malloc<parameter/></function>,
-    <function>calloc<parameter/></function>,
-    <function>realloc<parameter/></function>, and
-    <function>free<parameter/></function> functions conform to ISO/IEC
+    <para>The <function>malloc()</function>,
+    <function>calloc()</function>,
+    <function>realloc()</function>, and
+    <function>free()</function> functions conform to ISO/IEC
     9899:1990 (&ldquo;ISO C90&rdquo;).</para>
 
-    <para>The <function>posix_memalign<parameter/></function> function conforms
+    <para>The <function>posix_memalign()</function> function conforms
     to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
   </refsect1>
 </refentry>
diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl
index 4e334a8..bc8bc2a 100644
--- a/doc/stylesheet.xsl
+++ b/doc/stylesheet.xsl
@@ -1,6 +1,9 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:param name="funcsynopsis.style">ansi</xsl:param>
-  <xsl:param name="function.parens" select="1"/>
+  <xsl:param name="function.parens" select="0"/>
+  <xsl:template match="function">
+    <xsl:call-template name="inline.monoseq"/>
+  </xsl:template>
   <xsl:template match="mallctl">
     "<xsl:call-template name="inline.monoseq"/>"
   </xsl:template>
-- 
cgit v0.12


From 57ed894f8ac154d30faf9449a76d2792cdad3850 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 22 Sep 2016 09:13:45 -0700
Subject: Fix arena_bind().

When tsd is not in nominal state (e.g. during thread termination), we
should not increment nthreads.
---
 src/jemalloc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5d1f493..c1ecc73 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -455,15 +455,16 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
+	if (!tsd_nominal(tsd))
+		return;
+
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
-	if (tsd_nominal(tsd)) {
-		if (internal)
-			tsd_iarena_set(tsd, arena);
-		else
-			tsd_arena_set(tsd, arena);
-	}
+	if (internal)
+		tsd_iarena_set(tsd, arena);
+	else
+		tsd_arena_set(tsd, arena);
 }
 
 void
-- 
cgit v0.12


From a6a8e40f7d24ee9f4f6abf1c03e1e0d7e3763084 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Fri, 10 Jun 2016 16:28:35 -0700
Subject: Fix a valgrind regression in chunk_recycle()

Fix a latent valgrind bug exposed by d412624b25eed2b5c52b7d94a71070d3aab03cb4
(Move retaining out of default chunk hooks).
---
 src/chunk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/chunk.c b/src/chunk.c
index f292c98..dff537f 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -316,10 +316,11 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			size_t i;
 			size_t *p = (size_t *)(uintptr_t)ret;
 
-			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 			for (i = 0; i < size / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
+		if (config_valgrind)
+			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 	}
 	return (ret);
 }
-- 
cgit v0.12


From d1207f0d371eade218f7572743b5eddedc7fff94 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Fri, 17 Jun 2016 13:28:39 -0700
Subject: Check for __builtin_unreachable at configure time

Add a configure check for __builtin_unreachable instead of basing its
availability on the __GNUC__ version. On OS X using gcc (a real gcc, not the
bundled version that's just a gcc front-end) leads to a linker assertion:

    https://github.com/jemalloc/jemalloc/issues/266

It turns out that this is caused by a gcc bug resulting from the use of
__builtin_unreachable():

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57438

To work around this bug, check that __builtin_unreachable() actually works at
configure time, and if it doesn't use abort() instead. The check is based on
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57438#c21.

With this `make check` passes with a homebrew installed gcc-5 and gcc-6.
---
 configure.ac                                       | 17 +++++++++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  6 ++++++
 include/jemalloc/internal/util.h                   | 22 ++++++----------------
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7f19715..6fe366c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1050,6 +1050,23 @@ if test "x$enable_cache_oblivious" = "x1" ; then
 fi
 AC_SUBST([enable_cache_oblivious])
 
+
+
+JE_COMPILABLE([a program using __builtin_unreachable], [
+void foo (void) {
+  __builtin_unreachable();
+}
+], [
+	{
+		foo();
+	}
+], [je_cv_gcc_builtin_unreachable])
+if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+else
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+fi
+
 dnl ============================================================================
 dnl Check for  __builtin_ffsl(), then ffsl(3), and fail if neither are found.
 dnl One of those two functions should (theoretically) exist on all platforms
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7de0cf7..22396b7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -189,6 +189,12 @@
 #undef JEMALLOC_TLS
 
 /*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#undef JEMALLOC_INTERNAL_UNREACHABLE
+
+/*
  * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
  * use ffs_*() from util.h.
  */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index a0c2203..aee00d6 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -61,30 +61,20 @@
 #	define JEMALLOC_CC_SILENCE_INIT(v)
 #endif
 
-#define	JEMALLOC_GNUC_PREREQ(major, minor)				\
-    (!defined(__clang__) &&						\
-    (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
-#ifndef __has_builtin
-#  define __has_builtin(builtin) (0)
-#endif
-#define	JEMALLOC_CLANG_HAS_BUILTIN(builtin)				\
-    (defined(__clang__) && __has_builtin(builtin))
-
 #ifdef __GNUC__
 #	define likely(x)   __builtin_expect(!!(x), 1)
 #	define unlikely(x) __builtin_expect(!!(x), 0)
-#  if JEMALLOC_GNUC_PREREQ(4, 6) ||					\
-      JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
-#	define unreachable() __builtin_unreachable()
-#  else
-#	define unreachable() abort()
-#  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable() abort()
 #endif
 
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
 #include "jemalloc/internal/assert.h"
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
-- 
cgit v0.12


From 38a96f07ac3b351aeb9f3d685eaf0d4f9f01195a Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 12 Jul 2016 15:52:18 -0700
Subject: Fix a bug in __builtin_unreachable configure check

In 1167e9e, I accidentally tested je_cv_gcc_builtin_ffsl instead of
je_cv_gcc_builtin_unreachable (copy-paste error), which meant that
JEMALLOC_INTERNAL_UNREACHABLE was always getting defined as abort even if
__builtin_unreachable support was detected.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 6fe366c..a58eb5e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1061,7 +1061,7 @@ void foo (void) {
 		foo();
 	}
 ], [je_cv_gcc_builtin_unreachable])
-if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
 else
   AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
-- 
cgit v0.12


From 11b5da7533f703f6274eae98d767dd1381fced15 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 8 Jul 2016 13:35:35 +0900
Subject: Change how the default zone is found

On OSX 10.12, malloc_default_zone returns a special zone that is not
present in the list of registered zones. That zone uses a "lite zone"
if one is present (apparently enabled when malloc stack logging is
enabled), or the first registered zone otherwise. In practice this
means unless malloc stack logging is enabled, the first registered
zone is the default.

So get the list of zones to get the first one, instead of relying on
malloc_default_zone.
---
 src/zone.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index 2c17123..bf5c930 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -168,6 +168,33 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_parent();
 }
 
+static malloc_zone_t *get_default_zone()
+{
+	malloc_zone_t **zones = NULL;
+	unsigned int num_zones = 0;
+
+	/*
+	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
+	 * present in the list of registered zones. That zone uses a "lite zone"
+	 * if one is present (apparently enabled when malloc stack logging is
+	 * enabled), or the first registered zone otherwise. In practice this
+	 * means unless malloc stack logging is enabled, the first registered
+	 * zone is the default.
+	 * So get the list of zones to get the first one, instead of relying on
+	 * malloc_default_zone.
+	 */
+        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
+	                                         &num_zones)) {
+		/* Reset the value in case the failure happened after it was set. */
+		num_zones = 0;
+	}
+
+	if (num_zones)
+		return zones[0];
+
+	return malloc_default_zone();
+}
+
 JEMALLOC_ATTR(constructor)
 void
 register_zone(void)
@@ -177,7 +204,7 @@ register_zone(void)
 	 * If something else replaced the system default zone allocator, don't
 	 * register jemalloc's.
 	 */
-	malloc_zone_t *default_zone = malloc_default_zone();
+	malloc_zone_t *default_zone = get_default_zone();
 	malloc_zone_t *purgeable_zone = NULL;
 	if (!default_zone->zone_name ||
 	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
@@ -272,5 +299,7 @@ register_zone(void)
 			malloc_zone_unregister(purgeable_zone);
 			malloc_zone_register(purgeable_zone);
 		}
-	} while (malloc_default_zone() != &zone);
+
+		default_zone = get_default_zone();
+	} while (default_zone != &zone);
 }
-- 
cgit v0.12


From 57cddffca6a9481973fad4e09577f8c64b7950d6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 26 Sep 2016 11:00:32 -0700
Subject: Formatting fixes.

---
 src/zone.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index bf5c930..89a3062 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -168,7 +168,8 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_parent();
 }
 
-static malloc_zone_t *get_default_zone()
+static malloc_zone_t *
+get_default_zone(void)
 {
 	malloc_zone_t **zones = NULL;
 	unsigned int num_zones = 0;
@@ -179,20 +180,22 @@ static malloc_zone_t *get_default_zone()
 	 * if one is present (apparently enabled when malloc stack logging is
 	 * enabled), or the first registered zone otherwise. In practice this
 	 * means unless malloc stack logging is enabled, the first registered
-	 * zone is the default.
-	 * So get the list of zones to get the first one, instead of relying on
-	 * malloc_default_zone.
+	 * zone is the default.  So get the list of zones to get the first one,
+	 * instead of relying on malloc_default_zone.
 	 */
-        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
-	                                         &num_zones)) {
-		/* Reset the value in case the failure happened after it was set. */
+        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	    (vm_address_t**)&zones, &num_zones)) {
+		/*
+		 * Reset the value in case the failure happened after it was
+		 * set.
+		 */
 		num_zones = 0;
 	}
 
 	if (num_zones)
-		return zones[0];
+		return (zones[0]);
 
-	return malloc_default_zone();
+	return (malloc_default_zone());
 }
 
 JEMALLOC_ATTR(constructor)
-- 
cgit v0.12


From 50a865e15adbc2c28dc92ac4e9c63b63835ac77b Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Wed, 8 Jun 2016 14:20:32 -0700
Subject: Work around a weird pgi bug in test/unit/math.c

pgi fails to compile math.c, reporting that `-INFINITY` in `pt_norm_expected[]`
is a "Non-constant" expression. A simplified version of this failure is:

```c
#include <math.h>

static double inf1, inf2 = INFINITY;  // no complaints
static double inf3 = INFINITY;        // suddenly INFINITY is "Non-constant"

int main() { }
```

```sh
PGC-S-0074-Non-constant expression in initializer (t.c: 4)
```

pgi errors on the declaration of inf3, and will compile fine if that line is
removed. I've reported this bug to pgi, but in the meantime I just switched to
using (DBL_MAX + DBL_MAX) to work around this bug.
---
 test/unit/math.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/unit/math.c b/test/unit/math.c
index ebec77a..adb72be 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -5,6 +5,10 @@
 
 #include <float.h>
 
+#ifdef __PGI
+#undef INFINITY
+#endif
+
 #ifndef INFINITY
 #define	INFINITY (DBL_MAX + DBL_MAX)
 #endif
-- 
cgit v0.12


From 5acef864f22384064b5bfb4ad3b50114b9ef214b Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Wed, 8 Jun 2016 14:48:55 -0700
Subject: Don't use compact red-black trees with the pgi compiler

Some bug (either in the red-black tree code, or in the pgi compiler) seems to
cause red-black trees to become unbalanced. This issue seems to go away if we
don't use compact red-black trees. Since red-black trees don't seem to be used
much anymore, I opted for what seems to be an easy fix here instead of digging
in and trying to find the root cause of the bug.

Some context in case it's helpful:

I experienced a ton of segfaults while using pgi as Chapel's target compiler
with jemalloc 4.0.4. The little bit of debugging I did pointed me somewhere
deep in red-black tree manipulation, but I didn't get a chance to investigate
further. It looks like 4.2.0 replaced most uses of red-black trees with
pairing-heaps, which seems to avoid whatever bug I was hitting.

However, `make check_unit` was still failing on the rb test, so I figured the
core issue was just being masked. Here's the `make check_unit` failure:

```sh
=== test/unit/rb ===
test_rb_empty: pass
tree_recurse:test/unit/rb.c:90: Failed assertion: (((_Bool) (((uintptr_t) (left_node)->link.rbn_right_red) & ((size_t)1)))) == (false) --> true != false: Node should be black
test_rb_random:test/unit/rb.c:274: Failed assertion: (imbalances) == (0) --> 1 != 0: Tree is unbalanced
tree_recurse:test/unit/rb.c:90: Failed assertion: (((_Bool) (((uintptr_t) (left_node)->link.rbn_right_red) & ((size_t)1)))) == (false) --> true != false: Node should be black
test_rb_random:test/unit/rb.c:274: Failed assertion: (imbalances) == (0) --> 1 != 0: Tree is unbalanced
node_remove:test/unit/rb.c:190: Failed assertion: (imbalances) == (0) --> 2 != 0: Tree is unbalanced
<jemalloc>: test/unit/rb.c:43: Failed assertion: "pathp[-1].cmp < 0"
test/test.sh: line 22: 12926 Aborted
Test harness error
```

While starting to debug I saw the RB_COMPACT option and decided to check if
turning that off resolved the bug. It seems to have fixed it (`make check_unit`
passes and the segfaults under Chapel are gone) so it seems like on okay
work-around. I'd imagine this has performance implications for red-black trees
under pgi, but if they're not going to be used much anymore it's probably not a
big deal.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8f82edd..1e45bc9 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -162,7 +162,9 @@ static const bool config_cache_oblivious =
 #endif
 
 #include "jemalloc/internal/ph.h"
+#ifndef __PGI
 #define	RB_COMPACT
+#endif
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
-- 
cgit v0.12


From 3573fb93ceb309c51484753d5f1a95df09f97185 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 13:18:08 -0700
Subject: Add -dynamic for integration and stress tests with Cray compiler
 wrappers

Cray systems come with compiler wrappers to simplify building parallel
applications. CC is the C++ wrapper, and cc is the C wrapper.

The wrappers call the base {Cray, Intel, PGI, or GNU} compiler with vendor
specific flags. The "Programming Environment" (prgenv) that's currently loaded
determines the base compiler. e.g. compiling with gnu looks something like:

    module load PrgEnv-gnu
    cc hello.c

On most systems the wrappers defaults to `-static` mode, which causes them to
only look for static libraries, and not for any dynamic ones (even if the
dynamic version was explicitly listed.)

The integration and stress tests expect to be using the .so, so we have to run
the with -dynamic so that wrapper will find/use the .so.
---
 Makefile.in  |  5 +++--
 configure.ac | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 652f01f..54b56c2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -57,6 +57,7 @@ SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
 CTARGET = @CTARGET@
 LDTARGET = @LDTARGET@
+TEST_LD_MODE = @TEST_LD_MODE@
 MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
@@ -299,11 +300,11 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index a58eb5e..44af36a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,6 +131,18 @@ if test "x$GCC" != "xyes" ; then
                                [je_cv_msvc=no])])
 fi
 
+dnl check if a cray prgenv wrapper compiler is being used
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+  case "${CC}" in
+    CC|cc)
+	je_cv_cray_prgenv_wrapper="yes"
+	;;
+    *)
+       ;;
+  esac
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
@@ -269,11 +281,16 @@ SOREV="${so}.${rev}"
 PIC_CFLAGS='-fPIC -DPIC'
 CTARGET='-o $@'
 LDTARGET='-o $@'
+TEST_LD_MODE=
 EXTRA_LDFLAGS=
 ARFLAGS='crus'
 AROUT=' $@'
 CC_MM=1
 
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  TEST_LD_MODE='-dynamic'
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
@@ -432,6 +449,7 @@ AC_SUBST([SOREV])
 AC_SUBST([PIC_CFLAGS])
 AC_SUBST([CTARGET])
 AC_SUBST([LDTARGET])
+AC_SUBST([TEST_LD_MODE])
 AC_SUBST([MKLIB])
 AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])
-- 
cgit v0.12


From b770d2da1d94962860ea396b6e6dc83153ebdb55 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 14:20:28 -0700
Subject: Fix librt detection when using a Cray compiler wrapper

The Cray compiler wrappers will often add `-lrt` to the base compiler with
`-static` linking (the default at most sites.) However, `-lrt` isn't
automatically added with `-dynamic`. This means that if jemalloc was built with
`-static`, but then used in a program with `-dynamic` jemalloc won't have
detected that librt is a dependency.

The integration and stress tests use -dynamic, which is causing undefined
references to clock_gettime().

This just adds an extra check for librt (ignoring the autoconf cache) with
`-dynamic` thrown. It also stops filtering librt from the integration tests.

With this `make check` passes for:
 - PrgEnv-gnu
 - PrgEnv-intel
 - PrgEnv-pgi

PrgEnv-cray still needs more work (will be in a separate patch.)
---
 Makefile.in  |  2 +-
 configure.ac | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 54b56c2..964ad87 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -300,7 +300,7 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
diff --git a/configure.ac b/configure.ac
index 44af36a..2c6009a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1279,6 +1279,20 @@ CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 dnl Check whether clock_gettime(2) is in libc or librt.
 AC_SEARCH_LIBS([clock_gettime], [rt])
 
+dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
+dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+    SAVED_CFLAGS="${CFLAGS}"
+
+    unset ac_cv_search_clock_gettime
+    JE_CFLAGS_APPEND([-dynamic])
+    AC_SEARCH_LIBS([clock_gettime], [rt])
+
+    CFLAGS="${SAVED_CFLAGS}"
+  fi
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
-- 
cgit v0.12


From 8701bc70791e3717d155fb39da9868a0dbf09be6 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:07 -0700
Subject: Add initial support for building with the cray compiler

Get jemalloc building and passing `make check_unit` with cray 8.4. An inlining
bug in 8.4 results in internal errors while trying to build jemalloc. This has
already been reported and fixed for the 8.5 release.

In order to work around the inlining bug, disable gnu compatibility and limit
ipa optimizations.

I copied the msvc compiler check for cray, but note that we perform the test
even if we think we're using gcc because cray pretends to be gcc if `-hgnu`
(which is enabled by default) is used. I couldn't come up with a principled way
to check for the inlining bug, so instead I just checked compiler versions.

The build had lots of warnings I need to address and cray doesn't support -MM
or -MT for dependency tracking, so I had to do `make CC_MM=`.
---
 configure.ac | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/configure.ac b/configure.ac
index 2c6009a..77c1b72 100644
--- a/configure.ac
+++ b/configure.ac
@@ -118,6 +118,7 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
 dnl just prevent autoconf from molesting CFLAGS.
 CFLAGS=$CFLAGS
 AC_PROG_CC
+
 if test "x$GCC" != "xyes" ; then
   AC_CACHE_CHECK([whether compiler is MSVC],
                  [je_cv_msvc],
@@ -143,6 +144,30 @@ if test "x${PE_ENV}" != "x" ; then
   esac
 fi
 
+AC_CACHE_CHECK([whether compiler is cray],
+              [je_cv_cray],
+              [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                  [
+#ifndef _CRAYC
+  int fail[-1];
+#endif
+])],
+                            [je_cv_cray=yes],
+                            [je_cv_cray=no])])
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  AC_CACHE_CHECK([whether cray compiler version is 8.4],
+                [je_cv_cray_84],
+                [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                      [
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+  int fail[-1];
+#endif
+])],
+                              [je_cv_cray_84=yes],
+                              [je_cv_cray_84=no])])
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
@@ -164,6 +189,13 @@ if test "x$CFLAGS" = "x" ; then
     JE_CFLAGS_APPEND([-FS])
     CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
+  if test "x$je_cv_cray" = "xyes" ; then
+    dnl cray compiler 8.4 has an inlining bug
+    if test "x$je_cv_cray_84" = "xyes" ; then
+      JE_CFLAGS_APPEND([-hipa2])
+      JE_CFLAGS_APPEND([-hnognu])
+    fi
+  fi
 fi
 dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
 if test "x$EXTRA_CFLAGS" != "x" ; then
-- 
cgit v0.12


From 1d42a99027991b6abc2fb8b3e99f136b7ad36751 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:29 -0700
Subject: Disable automatic dependency generation for the Cray compiler

Cray only supports `-M` for generating dependency files. It does not support
`-MM` or `-MT`, so don't try to use them. I just reused the existing mechanism
for turning auto-dependency generation off (`CC_MM=`), but it might be more
principled to add a configure test to check if the compiler supports `-MM` and
`-MT`, instead of manually tracking which compilers don't support those flags.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index 77c1b72..a864ce6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,10 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   TEST_LD_MODE='-dynamic'
 fi
 
+if test "x${je_cv_cray}" = "xyes" ; then
+  CC_MM=
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
-- 
cgit v0.12


From 4b525183988bc88f2c49b59ed50d1610656e0fd6 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:53 -0700
Subject: Add Cray compiler's equivalent of -Werror before __attribute__ checks

Cray uses -herror_on_warning instead of -Werror. Use it everywhere -Werror is
currently used for __attribute__ checks so configure actually detects they're
not supported.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index a864ce6..f1a34c3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -504,6 +504,7 @@ fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([tls_model attribute], [],
               [static __thread int
                __attribute__((tls_model("initial-exec"), unused)) foo;
@@ -519,6 +520,7 @@ fi
 dnl Check for alloc_size attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [void *foo(size_t size) __attribute__((alloc_size(1)));],
               [je_cv_alloc_size])
@@ -529,6 +531,7 @@ fi
 dnl Check for format(gnu_printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
               [je_cv_format_gnu_printf])
@@ -539,6 +542,7 @@ fi
 dnl Check for format(printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
               [je_cv_format_printf])
-- 
cgit v0.12


From c128167bca9b652dd7cd90fd724ff0d02e66289f Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Thu, 7 Jul 2016 15:06:01 -0700
Subject: Disable irrelevant Cray compiler warnings if cc-silence is enabled

Cray is pretty warning-happy, so disable ones that aren't helpful. Each warning
has a numeric value instead of having named flags to disable specific warnings.
Disable warnings 128 and 1357.

128:  Ignore unreachable code warning. Cray warns about `not_reached()` not
      being reachable in a couple of places because it detects that some loops
      will never terminate.

1357: Ignore warning about redefinition of malloc and friends

With this patch, Cray 8.4.0 and 8.5.1 build cleanly and pass `make check`
---
 configure.ac | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/configure.ac b/configure.ac
index f1a34c3..f1fc493 100644
--- a/configure.ac
+++ b/configure.ac
@@ -195,6 +195,12 @@ if test "x$CFLAGS" = "x" ; then
       JE_CFLAGS_APPEND([-hipa2])
       JE_CFLAGS_APPEND([-hnognu])
     fi
+    if test "x$enable_cc_silence" != "xno" ; then
+      dnl ignore unreachable code warning
+      JE_CFLAGS_APPEND([-hnomessage=128])
+      dnl ignore redefinition of "malloc", "free", etc warning
+      JE_CFLAGS_APPEND([-hnomessage=1357])
+    fi
   fi
 fi
 dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
-- 
cgit v0.12


From b54c0c2925fd5acd63fd3957aa9e177c3fd8d27f Mon Sep 17 00:00:00 2001
From: Eric Le Bihan <eric.le.bihan.dev@free.fr>
Date: Thu, 14 Jul 2016 22:44:01 +0200
Subject: Fix LG_QUANTUM definition for sparc64

GCC 4.9.3 cross-compiled for sparc64 defines __sparc_v9__, not
__sparc64__ nor __sparcv9. This prevents LG_QUANTUM from being defined
properly. Adding this new value to the check solves the issue.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 1e45bc9..6a0aa00 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -236,7 +236,7 @@ typedef unsigned szind_t;
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
-#  if (defined(__sparc64__) || defined(__sparcv9))
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-- 
cgit v0.12


From 15da5f5d9d0e44cceb66bb13fad969d51145a616 Mon Sep 17 00:00:00 2001
From: Bai <ShuangxueBai@users.noreply.github.com>
Date: Sun, 28 Aug 2016 13:51:57 +0800
Subject: Readme.txt error for building in the Windows

The command can't work using sh -C sh -c "./autogen.sh CC=cl --enable-lazy-lock=no".
Change the position of the colon, the command of autogen work.
---
 msvc/ReadMe.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index 02b97f7..b1c2fc5 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -17,7 +17,7 @@ How to build jemalloc for Windows
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh CC=cl --enable-lazy-lock=no"
+   sh -c "./autogen.sh" CC=cl --enable-lazy-lock=no
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln
-- 
cgit v0.12


From 92009b19d623748fdf8b65aea732c27ec4dd257b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20K=C3=B6ckerbauer?=
 <thomas.koeckerbauer@dynatrace.com>
Date: Wed, 7 Sep 2016 08:34:17 +0200
Subject: use install command determined by configure

---
 Makefile.in | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 964ad87..8789c45 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -62,6 +62,7 @@ MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
 TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
@@ -311,54 +312,54 @@ build_lib_static: $(STATIC_LIBS)
 build_lib: build_lib_shared build_lib_static
 
 install_bin:
-	install -d $(BINDIR)
+	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	echo "install -m 755 $$b $(BINDIR)"; \
-	install -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
-	install -d $(INCLUDEDIR)/jemalloc
+	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
-	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
-	install -d $(LIBDIR)
-	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
 
 install_lib_static: $(STATIC_LIBS)
-	install -d $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	echo "install -m 755 $$l $(LIBDIR)"; \
-	install -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
-	install -d $(LIBDIR)/pkgconfig
+	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \
-	install -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 install_lib: install_lib_shared install_lib_static install_lib_pc
 
 install_doc_html:
-	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
-	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man:
-	install -d $(MANDIR)/man3
+	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	echo "install -m 644 $$d $(MANDIR)/man3"; \
-	install -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man
-- 
cgit v0.12


From 43d4d7c37376e2ac5faf9c0adf30647d7c288106 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 10 Jun 2016 00:17:19 +0900
Subject: Add Travis-CI configuration

---
 .travis.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..1fed4f8
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,29 @@
+language: c
+
+matrix:
+  include:
+    - os: linux
+      compiler: gcc
+    - os: linux
+      compiler: gcc
+      env:
+        - EXTRA_FLAGS=-m32
+      addons:
+        apt:
+          packages:
+          - gcc-multilib
+    - os: osx
+      compiler: clang
+    - os: osx
+      compiler: clang
+      env:
+        - EXTRA_FLAGS=-m32
+
+before_script:
+  - autoconf
+  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"}
+  - make -j3
+  - make -j3 tests
+
+script:
+  - make check
-- 
cgit v0.12


From 3bb044c80766e1cbe14e246ec2232650859a0dc9 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 9 Jun 2016 17:10:16 +0900
Subject: Add an AppVeyor config

This builds jemalloc and runs all checks with:
- MSVC 2015 64-bits
- MSVC 2015 32-bits
- MINGW64 (from msys2)
- MINGW32 (from msys2)

Normally, AppVeyor configs are named appveyor.yml, but it is possible to
configure the .yml file name in the AppVeyor project settings such that
the file stays "hidden", like typical travis configs.
---
 .appveyor.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 .appveyor.yml

diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 0000000..ddd5c57
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,28 @@
+version: '{build}'
+
+environment:
+  matrix:
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    MSVC: amd64
+  - MSYSTEM: MINGW32
+    CPU: i686
+    MSVC: x86
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+  - MSYSTEM: MINGW32
+    CPU: i686
+
+install:
+  - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
+  - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+  - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+
+build_script:
+  - bash -c "autoconf"
+  - bash -c "./configure"
+  - mingw32-make -j3
+  - file lib/jemalloc.dll
+  - mingw32-make -j3 tests
+  - mingw32-make -k check
-- 
cgit v0.12


From 79647fe4650ddd5325ce2cedf0fced772eed3f71 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 26 Sep 2016 15:55:40 -0700
Subject: Close file descriptor after reading "/proc/sys/vm/overcommit_memory".

This bug was introduced by c2f970c32b527660a33fa513a76d913c812dcf7c
(Modify pages_map() to support mapping uncommitted virtual memory.).

This resolves #399.
---
 src/pages.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pages.c b/src/pages.c
index 2a9b7e3..05b0d69 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -219,6 +219,7 @@ os_overcommits_proc(void)
 		return (false); /* Error. */
 
 	nread = read(fd, &buf, sizeof(buf));
+	close(fd);
 	if (nread < 1)
 		return (false); /* Error. */
 	/*
-- 
cgit v0.12


From af33e9a59735a2ee72132d3dd6e23fae6d296e34 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 9 Jun 2016 23:17:39 +0900
Subject: Define 64-bits atomics unconditionally

They are used on all platforms in prng.h.
---
 include/jemalloc/internal/atomic.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 3f15ea1..3936f68 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -66,8 +66,7 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  if (defined(__amd64__) || defined(__x86_64__))
+#if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -125,7 +124,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#  elif (defined(JEMALLOC_C11ATOMICS))
+#elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -153,7 +152,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	atomic_store(a, x);
 }
-#  elif (defined(JEMALLOC_ATOMIC9))
+#elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -193,7 +192,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	atomic_store_rel_long(p, x);
 }
-#  elif (defined(JEMALLOC_OSATOMIC))
+#elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -225,7 +224,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 		o = atomic_read_uint64(p);
 	} while (atomic_cas_uint64(p, o, x));
 }
-#  elif (defined(_MSC_VER))
+#elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -255,7 +254,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	InterlockedExchange64(p, x);
 }
-#  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
+#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -284,9 +283,8 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	__sync_lock_test_and_set(p, x);
 }
-#  else
-#    error "Missing implementation for 64-bit atomic operations"
-#  endif
+#else
+#  error "Missing implementation for 64-bit atomic operations"
 #endif
 
 /******************************************************************************/
-- 
cgit v0.12


From c19b48fe73b18eb39182e15cb31f4695a7e10b7f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 15:06:50 -0700
Subject: Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index ed62e0e..532255d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -19,7 +19,7 @@ brevity.  Much more detail can be found in the git revision history:
 
   New features:
   - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
-    an arena's allocations in a single operation.  (@jasone@)
+    an arena's allocations in a single operation.  (@jasone)
   - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
   - Add the --with-version configure option.  (@jasone)
   - Support --with-lg-page values larger than actual page size.  (@jasone)
-- 
cgit v0.12


From bcd5424b1c5361534b3c535326d0b87f223a42e1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 21:07:08 -0700
Subject: Use TSDN_NULL rather than NULL as appropriate.

---
 include/jemalloc/internal/mb.h |  4 ++--
 src/jemalloc.c                 |  4 ++--
 src/tsd.c                      | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 437c86f..5384728 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -105,8 +105,8 @@ mb_write(void)
 	malloc_mutex_t mtx;
 
 	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
-	malloc_mutex_lock(NULL, &mtx);
-	malloc_mutex_unlock(NULL, &mtx);
+	malloc_mutex_lock(TSDN_NULL, &mtx);
+	malloc_mutex_unlock(TSDN_NULL, &mtx);
 }
 #endif
 #endif
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c1ecc73..0d776b6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1252,9 +1252,9 @@ malloc_init_hard_needed(void)
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
 		/* Busy-wait until the initializing thread completes. */
 		do {
-			malloc_mutex_unlock(NULL, &init_lock);
+			malloc_mutex_unlock(TSDN_NULL, &init_lock);
 			CPU_SPINWAIT;
-			malloc_mutex_lock(NULL, &init_lock);
+			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
diff --git a/src/tsd.c b/src/tsd.c
index aeaa5e1..ec69a51 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -171,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(NULL, &head->lock);
+			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return (iter->data);
 		}
 	}
@@ -182,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 	return (NULL);
 }
 
@@ -190,8 +190,8 @@ void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 
-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 }
 #endif
-- 
cgit v0.12


From 1abb49f09d98e265ad92a831a056ccdfb4cf6041 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 16:16:11 -0700
Subject: Implement pz2ind(), pind2sz(), and psz2u().

These compute size classes and indices similarly to size2index(),
index2size() and s2u(), respectively, but using the subset of size
classes that are multiples of the page size.  Note that pszind_t and
szind_t are not interchangeable.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 80 ++++++++++++++++++--
 include/jemalloc/internal/private_symbols.txt    |  3 +
 include/jemalloc/internal/size_classes.sh        | 46 ++++++++++--
 src/arena.c                                      |  4 +-
 src/jemalloc.c                                   |  4 +-
 test/unit/size_classes.c                         | 94 +++++++++++++++++++++---
 6 files changed, 203 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 6a0aa00..76dff3f 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -187,6 +187,9 @@ static const bool config_cache_oblivious =
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
 /* Size class index type. */
 typedef unsigned szind_t;
 
@@ -545,6 +548,9 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind(size_t psz);
+size_t	pind2sz(pszind_t pind);
+size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
 szind_t	size2index_lookup(size_t size);
 szind_t	size2index(size_t size);
@@ -565,10 +571,74 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (NPSIZES);
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return (ind);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return (sz);
+	}
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (0);
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -577,9 +647,7 @@ size2index_compute(size_t size)
 	}
 #endif
 	{
-		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		szind_t x = lg_floor((size<<1)-1);
 		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
 		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
@@ -665,6 +733,8 @@ JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -674,9 +744,7 @@ s2u_compute(size_t size)
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index f2b6a55..a9d6973 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -401,6 +401,7 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
+pind2sz
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -454,6 +455,8 @@ prof_thread_active_init_set
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+psz2ind
+psz2u
 purge_mode_names
 quarantine
 quarantine_alloc_hook
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 2b0ca29..f6fbce4 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -48,6 +48,21 @@ size_class() {
   lg_p=$5
   lg_kmax=$6
 
+  if [ ${lg_delta} -ge ${lg_p} ] ; then
+    psz="yes"
+  else
+    pow2 ${lg_p}; p=${pow2_result}
+    pow2 ${lg_grp}; grp=${pow2_result}
+    pow2 ${lg_delta}; delta=${pow2_result}
+    sz=$((${grp} + ${delta} * ${ndelta}))
+    npgs=$((${sz} / ${p}))
+    if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
+      psz="yes"
+    else
+      psz="no"
+    fi
+  fi
+
   lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
   if [ ${pow2_result} -lt ${ndelta} ] ; then
     rem="yes"
@@ -74,14 +89,15 @@ size_class() {
   else
     lg_delta_lookup="no"
   fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup}
   # Defined upon return:
-  # - lg_delta_lookup (${lg_delta} or "no")
+  # - psz ("yes" or "no")
   # - bin ("yes" or "no")
+  # - lg_delta_lookup (${lg_delta} or "no")
 }
 
 sep_line() {
-  echo "                                               \\"
+  echo "                                                         \\"
 }
 
 size_classes() {
@@ -95,12 +111,13 @@ size_classes() {
   pow2 ${lg_g}; g=${pow2_result}
 
   echo "#define	SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\"
 
   ntbins=0
   nlbins=0
   lg_tiny_maxclass='"NA"'
   nbins=0
+  npsizes=0
 
   # Tiny size classes.
   ndelta=0
@@ -112,6 +129,9 @@ size_classes() {
     if [ ${lg_delta_lookup} != "no" ] ; then
       nlbins=$((${index} + 1))
     fi
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
     fi
@@ -133,11 +153,17 @@ size_classes() {
     index=$((${index} + 1))
     lg_grp=$((${lg_grp} + 1))
     lg_delta=$((${lg_delta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   fi
   while [ ${ndelta} -lt ${g} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   done
 
   # All remaining groups.
@@ -157,6 +183,9 @@ size_classes() {
         # Final written value is correct:
         lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       fi
+      if [ ${psz} = "yes" ] ; then
+        npsizes=$((${npsizes} + 1))
+      fi
       if [ ${bin} != "no" ] ; then
         nbins=$((${index} + 1))
         # Final written value is correct:
@@ -183,6 +212,7 @@ size_classes() {
   # - nlbins
   # - nbins
   # - nsizes
+  # - npsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
@@ -200,13 +230,13 @@ cat <<EOF
  * be defined prior to inclusion, and it in turn defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   SIZE_CLASSES: Complete table of
- *                 SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)
- *                 tuples.
+ *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
+ *                 bin, lg_delta_lookup) tuples.
  *     index: Size class index.
  *     lg_grp: Lg group base size (no deltas added).
  *     lg_delta: Lg delta to previous size class.
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
+ *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
  *                      otherwise.
@@ -214,6 +244,7 @@ cat <<EOF
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
  *   NSIZES: Number of size classes.
+ *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
@@ -238,6 +269,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	NLBINS			${nlbins}"
         echo "#define	NBINS			${nbins}"
         echo "#define	NSIZES			${nsizes}"
+        echo "#define	NPSIZES			${npsizes}"
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
diff --git a/src/arena.c b/src/arena.c
index ce62590..a28c077 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3711,7 +3711,7 @@ bin_info_init(void)
 	bin_info_run_size_calc(bin_info);				\
 	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 #define	BIN_INFO_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
 #undef BIN_INFO_INIT_bin_yes
@@ -3735,7 +3735,7 @@ small_run_size_init(void)
 		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
 	}
 #define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
 #undef TAB_INIT_bin_yes
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0d776b6..4645783 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -88,7 +88,7 @@ static uint8_t	malloc_slow_flags;
 /* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
 const size_t	index2size_tab[NSIZES+1] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
@@ -161,7 +161,7 @@ const uint8_t	size2index_tab[] = {
 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
 #define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 2e2caaf..4e1e0ce 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -80,25 +80,96 @@ TEST_BEGIN(test_size_classes)
 }
 TEST_END
 
+TEST_BEGIN(test_psize_classes)
+{
+	size_t size_class, max_size_class;
+	pszind_t pind, max_pind;
+
+	max_size_class = get_max_size_class();
+	max_pind = psz2ind(max_size_class);
+
+	for (pind = 0, size_class = pind2sz(pind); pind < max_pind ||
+	    size_class < max_size_class; pind++, size_class =
+	    pind2sz(pind)) {
+		assert_true(pind < max_pind,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		assert_true(size_class < max_size_class,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+
+		assert_u_eq(pind, psz2ind(size_class),
+		    "psz2ind() does not reverse pind2sz(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+		assert_zu_eq(size_class, pind2sz(psz2ind(size_class)),
+		    "pind2sz() does not reverse psz2ind(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+
+		assert_u_eq(pind+1, psz2ind(size_class+1),
+		    "Next size_class does not round up properly");
+
+		assert_zu_eq(size_class, (pind > 0) ?
+		    psz2u(pind2sz(pind-1)+1) : psz2u(1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class-1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class),
+		    "psz2u() does not compute same size class");
+		assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1),
+		    "psz2u() does not round up to next size class");
+	}
+
+	assert_u_eq(pind, psz2ind(pind2sz(pind)),
+	    "psz2ind() does not reverse pind2sz()");
+	assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)),
+	    "pind2sz() does not reverse psz2ind()");
+
+	assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class-1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class),
+	    "psz2u() does not compute same size class");
+}
+TEST_END
+
 TEST_BEGIN(test_overflow)
 {
 	size_t max_size_class;
 
 	max_size_class = get_max_size_class();
 
-	assert_u_ge(size2index(max_size_class+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(SIZE_T_MAX), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-
-	assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
-	assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
+	assert_u_eq(size2index(max_size_class+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(SIZE_T_MAX), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+
+	assert_zu_eq(s2u(max_size_class+1), 0,
+	    "s2u() should return 0 for unsupported size");
+	assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "s2u() should return 0 for unsupported size");
 	assert_zu_eq(s2u(SIZE_T_MAX), 0,
 	    "s2u() should return 0 on overflow");
+
+	assert_u_eq(psz2ind(max_size_class+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+
+	assert_zu_eq(psz2u(max_size_class+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(SIZE_T_MAX), 0,
+	    "psz2u() should return 0 on overflow");
 }
 TEST_END
 
@@ -108,5 +179,6 @@ main(void)
 
 	return (test(
 	    test_size_classes,
+	    test_psize_classes,
 	    test_overflow));
 }
-- 
cgit v0.12


From f193fd80cf1f99bce2bc9f5f4a8b149219965da2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 8 Apr 2016 14:17:57 -0700
Subject: Refactor runs_avail.

Use pszind_t size classes rather than szind_t size classes, and always
reserve space for NPSIZES elements.  This removes unused heaps that are
not multiples of the page size, and adds (currently) unused heaps for
all huge size classes, with the immediate benefit that the size of
arena_t allocations is constant (no longer dependent on chunk size).
---
 include/jemalloc/internal/arena.h                |  9 ++--
 include/jemalloc/internal/jemalloc_internal.h.in | 27 ++++++++++-
 include/jemalloc/internal/private_symbols.txt    |  3 +-
 src/arena.c                                      | 61 +++++++++---------------
 src/jemalloc.c                                   | 17 +++++--
 test/unit/run_quantize.c                         |  2 +-
 6 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index b1de2b6..0600770 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -470,10 +470,12 @@ struct arena_s {
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered heaps of this arena's available runs.  The
-	 * heaps are used for first-best-fit run allocation.
+	 * Size-segregated address-ordered heaps of this arena's available runs,
+	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
+	 * they reside in the last heap which corresponds to a size class less
+	 * than or equal to the run size.
 	 */
-	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[NPSIZES];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -505,7 +507,6 @@ extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 76dff3f..9708df9 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -461,10 +461,15 @@ extern unsigned	narenas_auto;
 extern arena_t	**arenas;
 
 /*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES];
+/*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES+1];
+extern size_t const	index2size_tab[NSIZES];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
@@ -549,6 +554,8 @@ void	jemalloc_postfork_child(void);
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
+size_t	pind2sz_compute(pszind_t pind);
+size_t	pind2sz_lookup(pszind_t pind);
 size_t	pind2sz(pszind_t pind);
 size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
@@ -596,7 +603,7 @@ psz2ind(size_t psz)
 }
 
 JEMALLOC_INLINE size_t
-pind2sz(pszind_t pind)
+pind2sz_compute(pszind_t pind)
 {
 
 	{
@@ -617,6 +624,22 @@ pind2sz(pszind_t pind)
 }
 
 JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind)
+{
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	assert(pind < NPSIZES);
+	return (pind2sz_lookup(pind));
+}
+
+JEMALLOC_INLINE size_t
 psz2u(size_t psz)
 {
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a9d6973..c59f82b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -402,6 +402,8 @@ pages_purge
 pages_trim
 pages_unmap
 pind2sz
+pind2sz_compute
+pind2sz_lookup
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -480,7 +482,6 @@ rtree_val_read
 rtree_val_write
 run_quantize_ceil
 run_quantize_floor
-run_quantize_max
 s2u
 s2u_compute
 s2u_lookup
diff --git a/src/arena.c b/src/arena.c
index a28c077..990d9fa 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,15 +21,12 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-size_t		run_quantize_max; /* Max run_quantize_*() input. */
 static size_t	small_maxrun; /* Max run size for small size classes. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
 static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
 static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
-static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
-static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 
 /******************************************************************************/
 /*
@@ -164,7 +161,7 @@ run_quantize_floor(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
@@ -187,7 +184,7 @@ run_quantize_ceil(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
@@ -200,25 +197,15 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
-static arena_run_heap_t *
-arena_runs_avail_get(arena_t *arena, szind_t ind)
-{
-
-	assert(ind >= runs_avail_bias);
-	assert(ind - runs_avail_bias < runs_avail_nclasses);
-
-	return (&arena->runs_avail[ind - runs_avail_bias]);
-}
-
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	arena_run_heap_insert(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -226,11 +213,11 @@ static void
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	arena_run_heap_remove(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -1109,12 +1096,13 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	szind_t ind, i;
+	pszind_t pind, i;
 
-	ind = size2index(run_quantize_ceil(size));
-	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
+	pind = psz2ind(run_quantize_ceil(size));
+
+	for (i = pind; pind2sz(i) <= large_maxclass; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
-		    arena_runs_avail_get(arena, i));
+		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
 			return (&miscelm->run);
 	}
@@ -1967,7 +1955,8 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	assert(!arena->purging);
 	arena->nactive = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -3496,23 +3485,19 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
-	size_t arena_size;
 	unsigned i;
 
-	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
-	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
 		arena = (arena_t *)base_alloc(tsdn,
-		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
-		    sizeof(malloc_large_stats_t) + nhclasses) *
-		    sizeof(malloc_huge_stats_t));
+		    CACHELINE_CEILING(sizeof(arena_t)) +
+		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) +
+		    (nhclasses * sizeof(malloc_huge_stats_t))));
 	} else
-		arena = (arena_t *)base_alloc(tsdn, arena_size);
+		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)
 		return (NULL);
 
@@ -3524,11 +3509,11 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size));
+		    + CACHELINE_CEILING(sizeof(arena_t)));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size) +
+		    + CACHELINE_CEILING(sizeof(arena_t)) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
@@ -3562,8 +3547,10 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
+
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3748,6 +3735,7 @@ small_run_size_init(void)
 static bool
 run_quantize_init(void)
 {
+	size_t run_quantize_max;
 	unsigned i;
 
 	run_quantize_max = chunksize + large_pad;
@@ -3827,9 +3815,6 @@ arena_boot(void)
 	if (run_quantize_init())
 		return (true);
 
-	runs_avail_bias = size2index(PAGE);
-	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
-
 	return (false);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4645783..b9ff65f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -85,14 +85,25 @@ enum {
 };
 static uint8_t	malloc_slow_flags;
 
-/* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES+1] = {
+const size_t	pind2sz_tab[NPSIZES] = {
+#define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define	PSZ_no(lg_grp, ndelta, lg_delta)
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t	index2size_tab[NSIZES] = {
 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
-	ZU(0)
 };
 
 JEMALLOC_ALIGNED(CACHELINE)
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index f6a2f74..45f3201 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c
@@ -111,7 +111,7 @@ TEST_BEGIN(test_monotonic)
 
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i < run_quantize_max >> LG_PAGE; i++) {
+	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
 		size_t run_size, floor, ceil;
 
 		run_size = i << LG_PAGE;
-- 
cgit v0.12


From 5d8db15db91c85d47b343cfc07fc6ea736f0de48 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 8 Apr 2016 14:16:19 -0700
Subject: Simplify run quantization.

---
 include/jemalloc/internal/arena.h |   2 +-
 src/arena.c                       | 180 ++++++--------------------------------
 src/jemalloc.c                    |   3 +-
 3 files changed, 31 insertions(+), 154 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 0600770..fe20ab6 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -602,7 +602,7 @@ unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
-bool	arena_boot(void);
+void	arena_boot(void);
 void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 990d9fa..522483b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,10 +21,6 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-static size_t	small_maxrun; /* Max run size for small size classes. */
-static bool	*small_run_tab; /* Valid small run page multiples. */
-static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
-static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
 
@@ -74,83 +70,6 @@ arena_run_addr_comp(const arena_chunk_map_misc_t *a,
 ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
     ph_link, arena_run_addr_comp)
 
-static size_t
-run_quantize_floor_compute(size_t size)
-{
-	size_t qsize;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/* Don't change sizes that are valid small run sizes. */
-	if (size <= small_maxrun && small_run_tab[size >> LG_PAGE])
-		return (size);
-
-	/*
-	 * Round down to the nearest run size that can actually be requested
-	 * during normal large allocation.  Add large_pad so that cache index
-	 * randomization can offset the allocation from the page boundary.
-	 */
-	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
-	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize_floor_compute(size - large_pad));
-	assert(qsize <= size);
-	return (qsize);
-}
-
-static size_t
-run_quantize_ceil_compute_hard(size_t size)
-{
-	size_t large_run_size_next;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/*
-	 * Return the next quantized size greater than the input size.
-	 * Quantized sizes comprise the union of run sizes that back small
-	 * region runs, and run sizes that back large regions with no explicit
-	 * alignment constraints.
-	 */
-
-	if (size > SMALL_MAXCLASS) {
-		large_run_size_next = PAGE_CEILING(index2size(size2index(size -
-		    large_pad) + 1) + large_pad);
-	} else
-		large_run_size_next = SIZE_T_MAX;
-	if (size >= small_maxrun)
-		return (large_run_size_next);
-
-	while (true) {
-		size += PAGE;
-		assert(size <= small_maxrun);
-		if (small_run_tab[size >> LG_PAGE]) {
-			if (large_run_size_next < size)
-				return (large_run_size_next);
-			return (size);
-		}
-	}
-}
-
-static size_t
-run_quantize_ceil_compute(size_t size)
-{
-	size_t qsize = run_quantize_floor_compute(size);
-
-	if (qsize < size) {
-		/*
-		 * Skip a quantization that may have an adequately large run,
-		 * because under-sized runs may be mixed in.  This only happens
-		 * when an unusual size is requested, i.e. for aligned
-		 * allocation, and is just one of several places where linear
-		 * search would potentially find sufficiently aligned available
-		 * memory somewhere lower.
-		 */
-		qsize = run_quantize_ceil_compute_hard(qsize);
-	}
-	return (qsize);
-}
-
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
 #define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
@@ -159,13 +78,27 @@ static size_t
 run_quantize_floor(size_t size)
 {
 	size_t ret;
+	pszind_t pind;
 
 	assert(size > 0);
 	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
-	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_floor_compute(size));
+	assert(size != 0);
+	assert(size == PAGE_CEILING(size));
+
+	pind = psz2ind(size - large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
+		return (size);
+	}
+	ret = pind2sz(pind - 1) + large_pad;
+	assert(ret <= size);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -187,8 +120,18 @@ run_quantize_ceil(size_t size)
 	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
-	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_ceil_compute(size));
+	ret = run_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large run,
+		 * because under-sized runs may be mixed in.  This only happens
+		 * when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
+	}
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -3680,9 +3623,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs *
 	    bin_info->reg_interval) - pad_size + bin_info->redzone_size);
 
-	if (actual_run_size > small_maxrun)
-		small_maxrun = actual_run_size;
-
 	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
 	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
 }
@@ -3706,63 +3646,7 @@ bin_info_init(void)
 #undef SC
 }
 
-static bool
-small_run_size_init(void)
-{
-
-	assert(small_maxrun != 0);
-
-	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * (small_maxrun >>
-	    LG_PAGE));
-	if (small_run_tab == NULL)
-		return (true);
-
-#define	TAB_INIT_bin_yes(index, size) {					\
-		arena_bin_info_t *bin_info = &arena_bin_info[index];	\
-		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
-	}
-#define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
-	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
-	SIZE_CLASSES
-#undef TAB_INIT_bin_yes
-#undef TAB_INIT_bin_no
-#undef SC
-
-	return (false);
-}
-
-static bool
-run_quantize_init(void)
-{
-	size_t run_quantize_max;
-	unsigned i;
-
-	run_quantize_max = chunksize + large_pad;
-
-	run_quantize_floor_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_floor_tab == NULL)
-		return (true);
-
-	run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_ceil_tab == NULL)
-		return (true);
-
-	for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
-		size_t run_size = i << LG_PAGE;
-
-		run_quantize_floor_tab[i-1] =
-		    run_quantize_floor_compute(run_size);
-		run_quantize_ceil_tab[i-1] =
-		    run_quantize_ceil_compute(run_size);
-	}
-
-	return (false);
-}
-
-bool
+void
 arena_boot(void)
 {
 	unsigned i;
@@ -3810,12 +3694,6 @@ arena_boot(void)
 	nhclasses = NSIZES - nlclasses - NBINS;
 
 	bin_info_init();
-	if (small_run_size_init())
-		return (true);
-	if (run_quantize_init())
-		return (true);
-
-	return (false);
 }
 
 void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b9ff65f..d3bb596 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1299,8 +1299,7 @@ malloc_init_hard_a0_locked()
 		return (true);
 	if (config_prof)
 		prof_boot1();
-	if (arena_boot())
-		return (true);
+	arena_boot();
 	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
-- 
cgit v0.12


From b732c395b7755622dd469fb27a463adcfd3b3152 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 7 Oct 2016 08:47:16 -0700
Subject: Refine nstime_update().

Add missing #include <time.h>.  The critical time facilities appear to
have been transitively included via unistd.h and sys/time.h, but in
principle this omission was capable of having caused
clock_gettime(CLOCK_MONOTONIC, ...) to have been overlooked in favor of
gettimeofday(), which in turn could cause spurious non-monotonic time
updates.

Refactor nstime_get() out of nstime_update() and add configure tests for
all variants.

Add CLOCK_MONOTONIC_RAW support (Linux-specific) and
mach_absolute_time() support (OS X-specific).

Do not fall back to clock_gettime(CLOCK_REALTIME, ...).  This was a
fragile Linux-specific workaround, which we're unlikely to use at all
now that clock_gettime(CLOCK_MONOTONIC_RAW, ...) is supported, and if we
have no choice besides non-monotonic clocks, gettimeofday() is only
incrementally worse.
---
 configure.ac                                       | 49 +++++++++++---
 .../jemalloc/internal/jemalloc_internal_decls.h    |  4 ++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  | 15 +++++
 include/jemalloc/internal/nstime.h                 |  3 -
 src/nstime.c                                       | 76 ++++++++++++++--------
 5 files changed, 109 insertions(+), 38 deletions(-)

diff --git a/configure.ac b/configure.ac
index f1fc493..a7da9ff 100644
--- a/configure.ac
+++ b/configure.ac
@@ -345,11 +345,11 @@ dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
+CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-	CFLAGS="$CFLAGS"
 	abi="macho"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
@@ -362,30 +362,26 @@ case "${host}" in
 	sbrk_deprecated="1"
 	;;
   *-*-freebsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-openbsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_tls="0"
 	;;
   *-*-bitrig*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	CFLAGS="$CFLAGS"
+	dnl secure_getenv() is exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
@@ -404,13 +400,12 @@ case "${host}" in
 #error aout
 #endif
 ]])],
-                          [CFLAGS="$CFLAGS"; abi="elf"],
+                          [abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH='-Wl,-R,$(1)'
@@ -1339,6 +1334,44 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   fi
 fi
 
+dnl check for CLOCK_MONOTONIC_RAW (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_RAW, ...)], [
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+], [je_cv_clock_monotonic_raw])
+if test "x${je_cv_clock_monotonic_raw}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW])
+fi
+
+dnl check for CLOCK_MONOTONIC.
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
+#include <unistd.h>
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+#  error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+], [je_cv_clock_monotonic])
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+fi
+
+dnl Check for mach_absolute_time().
+JE_COMPILABLE([mach_absolute_time()], [
+#include <mach/mach_time.h>
+], [
+	mach_absolute_time();
+], [je_cv_mach_absolute_time])
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 2b8ca5d..910b2fc 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -19,6 +19,10 @@
 #  include <pthread.h>
 #  include <errno.h>
 #  include <sys/time.h>
+#  include <time.h>
+#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#    include <mach/mach_time.h>
+#  endif
 #endif
 #include <sys/types.h>
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 22396b7..d10c8a4 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -77,6 +77,21 @@
 #undef JEMALLOC_HAVE_ISSETUGID
 
 /*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_RAW, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+
+/*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
  * bootstrapping will cause recursion into the pthreads library.  Therefore, if
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index dc293b7..c892bac 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,9 +1,6 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define	JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
diff --git a/src/nstime.c b/src/nstime.c
index aad2c26..cfb1c8e 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -97,6 +97,54 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 	return (time->ns / divisor->ns);
 }
 
+#ifdef _WIN32
+static void
+nstime_get(nstime_t *time)
+{
+	FILETIME ft;
+	uint64_t ticks_100ns;
+
+	GetSystemTimeAsFileTime(&ft);
+	ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+	nstime_init(time, ticks_100ns * 100);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+static void
+nstime_get(nstime_t *time)
+{
+
+	nstime_init(time, mach_absolute_time());
+}
+#else
+static void
+nstime_get(nstime_t *time)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000);
+}
+#endif
+
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(n_nstime_update)
@@ -107,33 +155,7 @@ nstime_update(nstime_t *time)
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-
-#ifdef _WIN32
-	{
-		FILETIME ft;
-		uint64_t ticks;
-		GetSystemTimeAsFileTime(&ft);
-		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
-		    ft.dwLowDateTime;
-		time->ns = ticks * 100;
-	}
-#elif JEMALLOC_CLOCK_GETTIME
-	{
-		struct timespec ts;
-
-		if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
-			clock_gettime(CLOCK_MONOTONIC, &ts);
-		else
-			clock_gettime(CLOCK_REALTIME, &ts);
-		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
-	}
-#else
-	{
-		struct timeval tv;
-		gettimeofday(&tv, NULL);
-		time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
-	}
-#endif
+	nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {
-- 
cgit v0.12


From 94e7ffa9794792d2ec70269a0ab9c282a32aa2ec Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 10 Oct 2016 20:32:19 -0700
Subject: Refactor arena->decay_* into arena->decay.* (arena_decay_t).

---
 include/jemalloc/internal/arena.h | 99 +++++++++++++++++++++------------------
 src/arena.c                       | 76 +++++++++++++++---------------
 2 files changed, 91 insertions(+), 84 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index fe20ab6..048e203 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -42,6 +42,7 @@ typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
 typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
@@ -257,6 +258,56 @@ struct arena_bin_info_s {
 	uint32_t		reg0_offset;
 };
 
+struct arena_decay_s {
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			time;
+	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* decay_deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of decay_interval and
+	 * per epoch jitter which is a uniform random variable in
+	 * [0..decay_interval).  Epochs always advance by precise multiples of
+	 * decay_interval, but we randomize the deadline to reduce the
+	 * likelihood of arenas purging in lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between decay_ndirty and ndirty to
+	 * determine how many dirty pages, if any, were generated, and record
+	 * the result in decay_backlog.
+	 */
+	size_t			ndirty;
+	/*
+	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
+	 * to the current contents of decay_backlog, i.e. the limit on how many
+	 * pages are allowed to exist for the decay epochs.
+	 */
+	size_t			backlog_npages_limit;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to decay_epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+
+};
+
 struct arena_bin_s {
 	/*
 	 * All operations on runcur, runs, and stats require that lock be
@@ -394,52 +445,8 @@ struct arena_s {
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;
 
-	/*
-	 * Approximate time in seconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	ssize_t			decay_time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		decay_interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		decay_epoch;
-	/* decay_deadline randomness generator. */
-	uint64_t		decay_jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
-	 */
-	nstime_t		decay_deadline;
-	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
-	 */
-	size_t			decay_ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			decay_backlog_npages_limit;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
-	 */
-	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+	/* Decay-based purging state. */
+	arena_decay_t		decay;
 
 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
diff --git a/src/arena.c b/src/arena.c
index 522483b..8f2e5d8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1187,14 +1187,14 @@ arena_decay_deadline_init(arena_t *arena)
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
 	 */
-	nstime_copy(&arena->decay_deadline, &arena->decay_epoch);
-	nstime_add(&arena->decay_deadline, &arena->decay_interval);
-	if (arena->decay_time > 0) {
+	nstime_copy(&arena->decay.deadline, &arena->decay.epoch);
+	nstime_add(&arena->decay.deadline, &arena->decay.interval);
+	if (arena->decay.time > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
-		    nstime_ns(&arena->decay_interval)));
-		nstime_add(&arena->decay_deadline, &jitter);
+		nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
+		    nstime_ns(&arena->decay.interval)));
+		nstime_add(&arena->decay.deadline, &jitter);
 	}
 }
 
@@ -1204,7 +1204,7 @@ arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
 
 	assert(opt_purge == purge_mode_decay);
 
-	return (nstime_compare(&arena->decay_deadline, time) <= 0);
+	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }
 
 static size_t
@@ -1229,7 +1229,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	 */
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
-		sum += arena->decay_backlog[i] * h_steps[i];
+		sum += arena->decay.backlog[i] * h_steps[i];
 	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
@@ -1246,39 +1246,39 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 	assert(arena_decay_deadline_reached(arena, time));
 
 	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance_u64 = nstime_divide(&delta, &arena->decay_interval);
+	nstime_subtract(&delta, &arena->decay.epoch);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay.interval);
 	assert(nadvance_u64 > 0);
 
 	/* Add nadvance_u64 decay intervals to epoch. */
-	nstime_copy(&delta, &arena->decay_interval);
+	nstime_copy(&delta, &arena->decay.interval);
 	nstime_imultiply(&delta, nadvance_u64);
-	nstime_add(&arena->decay_epoch, &delta);
+	nstime_add(&arena->decay.epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;
 
 		assert((uint64_t)nadvance_z == nadvance_u64);
 
-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z],
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
 			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
-	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
-	    arena->decay_ndirty : 0;
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay_backlog_npages_limit =
+	ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? arena->ndirty -
+	    arena->decay.ndirty : 0;
+	arena->decay.ndirty = arena->ndirty;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+	arena->decay.backlog_npages_limit =
 	    arena_decay_backlog_npages_limit(arena);
 }
 
@@ -1289,11 +1289,11 @@ arena_decay_npages_limit(arena_t *arena)
 
 	assert(opt_purge == purge_mode_decay);
 
-	npages_limit = arena->decay_backlog_npages_limit;
+	npages_limit = arena->decay.backlog_npages_limit;
 
 	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay_ndirty)
-		npages_limit += arena->ndirty - arena->decay_ndirty;
+	if (arena->ndirty > arena->decay.ndirty)
+		npages_limit += arena->ndirty - arena->decay.ndirty;
 
 	return (npages_limit);
 }
@@ -1302,19 +1302,19 @@ static void
 arena_decay_init(arena_t *arena, ssize_t decay_time)
 {
 
-	arena->decay_time = decay_time;
+	arena->decay.time = decay_time;
 	if (decay_time > 0) {
-		nstime_init2(&arena->decay_interval, decay_time, 0);
-		nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+		nstime_init2(&arena->decay.interval, decay_time, 0);
+		nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init(&arena->decay_epoch, 0);
-	nstime_update(&arena->decay_epoch);
-	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
+	nstime_init(&arena->decay.epoch, 0);
+	nstime_update(&arena->decay.epoch);
+	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog_npages_limit = 0;
-	memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+	arena->decay.ndirty = arena->ndirty;
+	arena->decay.backlog_npages_limit = 0;
+	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
 static bool
@@ -1334,7 +1334,7 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 	ssize_t decay_time;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	decay_time = arena->decay_time;
+	decay_time = arena->decay.time;
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (decay_time);
@@ -1400,16 +1400,16 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 	assert(opt_purge == purge_mode_decay);
 
 	/* Purge all or nothing if the option is disabled. */
-	if (arena->decay_time <= 0) {
-		if (arena->decay_time == 0)
+	if (arena->decay.time <= 0) {
+		if (arena->decay.time == 0)
 			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}
 
-	nstime_copy(&time, &arena->decay_epoch);
+	nstime_copy(&time, &arena->decay.epoch);
 	if (unlikely(nstime_update(&time))) {
 		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay_deadline);
+		nstime_copy(&time, &arena->decay.deadline);
 	}
 
 	if (arena_decay_deadline_reached(arena, &time))
@@ -3323,7 +3323,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
-	*decay_time = arena->decay_time;
+	*decay_time = arena->decay.time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
 }
-- 
cgit v0.12


From 45a5bf677299eb152c3c47836bd5d946234ce40e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 10 Oct 2016 22:15:10 -0700
Subject: Do not advance decay epoch when time goes backwards.

Instead, move the epoch backward in time.  Additionally, add
nstime_monotonic() and use it in debug builds to assert that time only
goes backward if nstime_update() is using a non-monotonic time source.
---
 include/jemalloc/internal/nstime.h            |  3 +++
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 21 +++++++++++++++++----
 src/nstime.c                                  | 22 ++++++++++++++++++++++
 test/unit/decay.c                             | 13 ++++++++++++-
 test/unit/nstime.c                            |  9 ++++++++-
 6 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index c892bac..93b27dc 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -31,9 +31,12 @@ void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void	nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
 #ifdef JEMALLOC_JET
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *nstime_update;
 #else
+bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c59f82b..cd6681c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -360,6 +360,7 @@ nstime_idivide
 nstime_imultiply
 nstime_init
 nstime_init2
+nstime_monotonic
 nstime_ns
 nstime_nsec
 nstime_sec
diff --git a/src/arena.c b/src/arena.c
index 8f2e5d8..dc04acc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1406,10 +1406,23 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 		return;
 	}
 
-	nstime_copy(&time, &arena->decay.epoch);
-	if (unlikely(nstime_update(&time))) {
-		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay.deadline);
+	nstime_init(&time, 0);
+	nstime_update(&time);
+	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
+	    &time) > 0)) {
+		/*
+		 * Time went backwards.  Move the epoch back in time, with the
+		 * expectation that time typically flows forward for long enough
+		 * periods of time that epochs complete.  Unfortunately,
+		 * this strategy is susceptible to clock jitter triggering
+		 * premature epoch advances, but clock jitter estimation and
+		 * compensation isn't feasible here because calls into this code
+		 * are event-driven.
+		 */
+		nstime_copy(&arena->decay.epoch, &time);
+	} else {
+		/* Verify that time does not go backwards. */
+		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}
 
 	if (arena_decay_deadline_reached(arena, &time))
diff --git a/src/nstime.c b/src/nstime.c
index cfb1c8e..c420c88 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -98,6 +98,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 }
 
 #ifdef _WIN32
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -110,6 +111,7 @@ nstime_get(nstime_t *time)
 	nstime_init(time, ticks_100ns * 100);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -119,6 +121,7 @@ nstime_get(nstime_t *time)
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -128,6 +131,7 @@ nstime_get(nstime_t *time)
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -135,6 +139,7 @@ nstime_get(nstime_t *time)
 	nstime_init(time, mach_absolute_time());
 }
 #else
+#  define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time)
 {
@@ -146,6 +151,23 @@ nstime_get(nstime_t *time)
 #endif
 
 #ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
+#endif
+bool
+nstime_monotonic(void)
+{
+
+	return (NSTIME_MONOTONIC);
+#undef NSTIME_MONOTONIC
+}
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(nstime_monotonic)
+nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
+#endif
+
+#ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(n_nstime_update)
 #endif
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 70a2e67..b7e4e25 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -2,6 +2,7 @@
 
 const char *malloc_conf = "purge:decay,decay_time:1";
 
+static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
@@ -9,6 +10,13 @@ static nstime_t time_mock;
 static bool nonmonotonic_mock;
 
 static bool
+nstime_monotonic_mock(void)
+{
+
+	return (false);
+}
+
+static bool
 nstime_update_mock(nstime_t *time)
 {
 
@@ -318,7 +326,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nstime_update(&time_mock);
 	nonmonotonic_mock = true;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
@@ -342,8 +352,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	if (config_stats)
-		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 #undef NPS
 }
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index cd7d9a6..0368bc2 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -176,6 +176,13 @@ TEST_BEGIN(test_nstime_divide)
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_monotonic)
+{
+
+	nstime_monotonic();
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_update)
 {
 	nstime_t nst;
@@ -198,7 +205,6 @@ TEST_BEGIN(test_nstime_update)
 		assert_d_eq(nstime_compare(&nst, &nst0), 0,
 		    "Time should not have been modified");
 	}
-
 }
 TEST_END
 
@@ -216,5 +222,6 @@ main(void)
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
+	    test_nstime_monotonic,
 	    test_nstime_update));
 }
-- 
cgit v0.12


From a14712b4b87df5aa95446f91832ea4846a8f1475 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 11 Oct 2016 15:28:43 -0700
Subject: Fix decay tests to all adapt to nstime_monotonic().

---
 test/unit/decay.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index b7e4e25..e169ae2 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -7,13 +7,13 @@ static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
-static bool nonmonotonic_mock;
+static bool monotonic_mock;
 
 static bool
 nstime_monotonic_mock(void)
 {
 
-	return (false);
+	return (monotonic_mock);
 }
 
 static bool
@@ -21,9 +21,9 @@ nstime_update_mock(nstime_t *time)
 {
 
 	nupdates_mock++;
-	if (!nonmonotonic_mock)
+	if (monotonic_mock)
 		nstime_copy(time, &time_mock);
-	return (nonmonotonic_mock);
+	return (!monotonic_mock);
 }
 
 TEST_BEGIN(test_decay_ticks)
@@ -253,9 +253,11 @@ TEST_BEGIN(test_decay_ticker)
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = false;
+	monotonic_mock = true;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
@@ -267,6 +269,7 @@ TEST_BEGIN(test_decay_ticker)
 		    "Expected nstime_update() to be called");
 	}
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 
 	nstime_init(&time, 0);
@@ -324,7 +327,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = true;
+	monotonic_mock = false;
 
 	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
-- 
cgit v0.12


From d419bb09ef6700dde95c74e1f1752f81e5d15d92 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 11 Oct 2016 15:30:01 -0700
Subject: Fix and simplify decay-based purging.

Simplify decay-based purging attempts to only be triggered when the
epoch is advanced, rather than every time purgeable memory increases.
In a correctly functioning system (not previously the case; see below),
this only causes a behavior difference if during subsequent purge
attempts the least recently used (LRU) purgeable memory extent is
initially too large to be purged, but that memory is reused between
attempts and one or more of the next LRU purgeable memory extents are
small enough to be purged.  In practice this is an arbitrary behavior
change that is within the set of acceptable behaviors.

As for the purging fix, assure that arena->decay.ndirty is recorded
*after* the epoch advance and associated purging occurs.  Prior to this
fix, it was possible for purging during epoch advance to cause a
substantially underrepresentative (arena->ndirty - arena->decay.ndirty),
i.e. the number of dirty pages attributed to the current epoch was too
low, and a series of unintended purges could result.  This fix is also
relevant in the context of the simplification described above, but the
bug's impact would be limited to over-purging at epoch advances.
---
 include/jemalloc/internal/arena.h |  29 ++++------
 src/arena.c                       | 109 ++++++++++++++++++++------------------
 2 files changed, 69 insertions(+), 69 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 048e203..1277d08 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -265,7 +265,7 @@ struct arena_decay_s {
 	 * and/or reused.
 	 */
 	ssize_t			time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	/* time / SMOOTHSTEP_NSTEPS. */
 	nstime_t		interval;
 	/*
 	 * Time at which the current decay interval logically started.  We do
@@ -275,37 +275,30 @@ struct arena_decay_s {
 	 * merge all relevant activity into the most recently recorded epoch.
 	 */
 	nstime_t		epoch;
-	/* decay_deadline randomness generator. */
+	/* Deadline randomness generator. */
 	uint64_t		jitter_state;
 	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
 	 */
 	nstime_t		deadline;
 	/*
 	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
+	 * advancement we use the delta between arena->decay.ndirty and
+	 * arena->ndirty to determine how many dirty pages, if any, were
+	 * generated.
 	 */
 	size_t			ndirty;
 	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			backlog_npages_limit;
-	/*
 	 * Trailing log of how many unused dirty pages were generated during
 	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
 	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
+	 * relative to epoch.
 	 */
 	size_t			backlog[SMOOTHSTEP_NSTEPS];
-
 };
 
 struct arena_bin_s {
diff --git a/src/arena.c b/src/arena.c
index dc04acc..90b9d82 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1236,11 +1236,41 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 }
 
 static void
-arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
+arena_decay_backlog_update_last(arena_t *arena)
+{
+	size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ?
+	    arena->ndirty - arena->decay.ndirty : 0;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+}
+
+static void
+arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
+{
+
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+		}
+	}
+
+	arena_decay_backlog_update_last(arena);
+}
+
+static void
+arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
 {
 	uint64_t nadvance_u64;
 	nstime_t delta;
-	size_t ndirty_delta;
 
 	assert(opt_purge == purge_mode_decay);
 	assert(arena_decay_deadline_reached(arena, time));
@@ -1259,43 +1289,25 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
-	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
-	} else {
-		size_t nadvance_z = (size_t)nadvance_u64;
+	arena_decay_backlog_update(arena, nadvance_u64);
+}
 
-		assert((uint64_t)nadvance_z == nadvance_u64);
+static void
+arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
+{
+	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
 
-		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
-		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
-		if (nadvance_z > 1) {
-			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
-		}
-	}
-	ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? arena->ndirty -
-	    arena->decay.ndirty : 0;
+	if (arena->ndirty > ndirty_limit)
+		arena_purge_to_limit(tsdn, arena, ndirty_limit);
 	arena->decay.ndirty = arena->ndirty;
-	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay.backlog_npages_limit =
-	    arena_decay_backlog_npages_limit(arena);
 }
 
-static size_t
-arena_decay_npages_limit(arena_t *arena)
+static void
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
 {
-	size_t npages_limit;
-
-	assert(opt_purge == purge_mode_decay);
-
-	npages_limit = arena->decay.backlog_npages_limit;
 
-	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay.ndirty)
-		npages_limit += arena->ndirty - arena->decay.ndirty;
-
-	return (npages_limit);
+	arena_decay_epoch_advance_helper(arena, time);
+	arena_decay_epoch_advance_purge(tsdn, arena);
 }
 
 static void
@@ -1313,7 +1325,6 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
 	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
 	arena->decay.ndirty = arena->ndirty;
-	arena->decay.backlog_npages_limit = 0;
 	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
@@ -1395,7 +1406,6 @@ static void
 arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
-	size_t ndirty_limit;
 
 	assert(opt_purge == purge_mode_decay);
 
@@ -1411,32 +1421,29 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
 	    &time) > 0)) {
 		/*
-		 * Time went backwards.  Move the epoch back in time, with the
-		 * expectation that time typically flows forward for long enough
-		 * periods of time that epochs complete.  Unfortunately,
-		 * this strategy is susceptible to clock jitter triggering
-		 * premature epoch advances, but clock jitter estimation and
-		 * compensation isn't feasible here because calls into this code
-		 * are event-driven.
+		 * Time went backwards.  Move the epoch back in time and
+		 * generate a new deadline, with the expectation that time
+		 * typically flows forward for long enough periods of time that
+		 * epochs complete.  Unfortunately, this strategy is susceptible
+		 * to clock jitter triggering premature epoch advances, but
+		 * clock jitter estimation and compensation isn't feasible here
+		 * because calls into this code are event-driven.
 		 */
 		nstime_copy(&arena->decay.epoch, &time);
+		arena_decay_deadline_init(arena);
 	} else {
 		/* Verify that time does not go backwards. */
 		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}
 
-	if (arena_decay_deadline_reached(arena, &time))
-		arena_decay_epoch_advance(arena, &time);
-
-	ndirty_limit = arena_decay_npages_limit(arena);
-
 	/*
-	 * Don't try to purge unless the number of purgeable pages exceeds the
-	 * current limit.
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances.
 	 */
-	if (arena->ndirty <= ndirty_limit)
-		return;
-	arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	if (arena_decay_deadline_reached(arena, &time))
+		arena_decay_epoch_advance(tsdn, arena, &time);
 }
 
 void
-- 
cgit v0.12


From a2539fab95008bee7fc1e4651d24b6a0427b88ce Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 12 Oct 2016 22:58:40 -0700
Subject: Disallow 0x5a junk filling when running in Valgrind.

Explicitly disallow junk:true and junk:free runtime settings when
running in Valgrind, since deallocation-time junk filling and redzone
validation cause false positive Valgrind reports.

This resolves #470.
---
 src/jemalloc.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index d3bb596..8731934 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1161,9 +1161,20 @@ malloc_conf_init(void)
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
 					if (CONF_MATCH_VALUE("true")) {
-						opt_junk = "true";
-						opt_junk_alloc = opt_junk_free =
-						    true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "true";
+							opt_junk_alloc = true;
+							opt_junk_free = true;
+						}
 					} else if (CONF_MATCH_VALUE("false")) {
 						opt_junk = "false";
 						opt_junk_alloc = opt_junk_free =
@@ -1173,9 +1184,20 @@ malloc_conf_init(void)
 						opt_junk_alloc = true;
 						opt_junk_free = false;
 					} else if (CONF_MATCH_VALUE("free")) {
-						opt_junk = "free";
-						opt_junk_alloc = false;
-						opt_junk_free = true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "free";
+							opt_junk_alloc = false;
+							opt_junk_free = true;
+						}
 					} else {
 						malloc_conf_error(
 						    "Invalid conf value", k,
-- 
cgit v0.12


From 9737685943fedf5796ff157306ca70aaa25750c7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 13 Oct 2016 14:47:50 -0700
Subject: Add/use adaptive spinning.

Add spin_t and spin_{init,adaptive}(), which provide a simple
abstraction for adaptive spinning.

Adaptively spin during busy waits in bootstrapping and rtree node
initialization.
---
 Makefile.in                                      |  1 +
 include/jemalloc/internal/jemalloc_internal.h.in |  4 ++
 include/jemalloc/internal/spin.h                 | 51 ++++++++++++++++++++++++
 src/jemalloc.c                                   |  5 ++-
 src/rtree.c                                      |  5 ++-
 src/spin.c                                       |  2 +
 6 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 include/jemalloc/internal/spin.h
 create mode 100644 src/spin.c

diff --git a/Makefile.in b/Makefile.in
index 8789c45..a2d5594 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -101,6 +101,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/quarantine.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
+	$(srcroot)src/spin.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 9708df9..d644cea 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -369,6 +369,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -401,6 +402,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -502,6 +504,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -534,6 +537,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
new file mode 100644
index 0000000..9ef5ceb
--- /dev/null
+++ b/include/jemalloc/internal/spin.h
@@ -0,0 +1,51 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct spin_s spin_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct spin_s {
+	unsigned iteration;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	spin_init(spin_t *spin);
+void	spin_adaptive(spin_t *spin);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
+JEMALLOC_INLINE void
+spin_init(spin_t *spin)
+{
+
+	spin->iteration = 0;
+}
+
+JEMALLOC_INLINE void
+spin_adaptive(spin_t *spin)
+{
+	volatile uint64_t i;
+
+	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+		CPU_SPINWAIT;
+
+	if (spin->iteration < 63)
+		spin->iteration++;
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8731934..aec2a5e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1283,10 +1283,13 @@ malloc_init_hard_needed(void)
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
+		spin_t spinner;
+
 		/* Busy-wait until the initializing thread completes. */
+		spin_init(&spinner);
 		do {
 			malloc_mutex_unlock(TSDN_NULL, &init_lock);
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
diff --git a/src/rtree.c b/src/rtree.c
index 3166b45..5590034 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -96,12 +96,15 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
 	rtree_node_elm_t *node;
 
 	if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
+		spin_t spinner;
+
 		/*
 		 * Another thread is already in the process of initializing.
 		 * Spin-wait until initialization is complete.
 		 */
+		spin_init(&spinner);
 		do {
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			node = atomic_read_p((void **)elmp);
 		} while (node == RTREE_NODE_INITIALIZING);
 	} else {
diff --git a/src/spin.c b/src/spin.c
new file mode 100644
index 0000000..5242d95
--- /dev/null
+++ b/src/spin.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_SPIN_C_
+#include "jemalloc/internal/jemalloc_internal.h"
-- 
cgit v0.12


From e2bcf037d445a84a71c7997670819ebd0a893b4a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 13 Oct 2016 12:18:38 -0700
Subject: Make dss operations lockless.

Rather than protecting dss operations with a mutex, use atomic
operations.  This has negligible impact on synchronization overhead
during typical dss allocation, but is a substantial improvement for
chunk_in_dss() and the newly added chunk_dss_mergeable(), which can be
called multiple times during chunk deallocations.

This change also has the advantage of avoiding tsd in deallocation paths
associated with purging, which resolves potential deadlocks during
thread exit due to attempted tsd resurrection.

This resolves #425.
---
 include/jemalloc/internal/chunk.h             |   3 -
 include/jemalloc/internal/chunk_dss.h         |  12 +-
 include/jemalloc/internal/huge.h              |   2 +-
 include/jemalloc/internal/private_symbols.txt |   7 +-
 src/arena.c                                   |   2 +-
 src/chunk.c                                   |  46 ++-----
 src/chunk_dss.c                               | 179 +++++++++++++++-----------
 src/ctl.c                                     |   4 +-
 src/huge.c                                    |   8 +-
 src/jemalloc.c                                |   6 +-
 test/unit/junk.c                              |   4 +-
 11 files changed, 127 insertions(+), 146 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index c9fd4ec..e199a03 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -71,9 +71,6 @@ bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(tsdn_t *tsdn);
-void	chunk_postfork_parent(tsdn_t *tsdn);
-void	chunk_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 724fa57..da8511b 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -21,15 +21,13 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(tsdn_t *tsdn);
-bool	chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+dss_prec_t	chunk_dss_prec_get(void);
+bool	chunk_dss_prec_set(dss_prec_t dss_prec);
 void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(tsdn_t *tsdn, void *chunk);
-bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(tsdn_t *tsdn);
-void	chunk_dss_postfork_parent(tsdn_t *tsdn);
-void	chunk_dss_postfork_child(tsdn_t *tsdn);
+bool	chunk_in_dss(void *chunk);
+bool	chunk_dss_mergeable(void *chunk_a, void *chunk_b);
+void	chunk_dss_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index b5fa9e6..22184d9 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -17,7 +17,7 @@ bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
+typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
 void	huge_dalloc(tsdn_t *tsdn, void *ptr);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index cd6681c..642c3de 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -167,20 +167,15 @@ chunk_dalloc_mmap
 chunk_dalloc_wrapper
 chunk_deregister
 chunk_dss_boot
-chunk_dss_postfork_child
-chunk_dss_postfork_parent
+chunk_dss_mergeable
 chunk_dss_prec_get
 chunk_dss_prec_set
-chunk_dss_prefork
 chunk_hooks_default
 chunk_hooks_get
 chunk_hooks_set
 chunk_in_dss
 chunk_lookup
 chunk_npages
-chunk_postfork_child
-chunk_postfork_parent
-chunk_prefork
 chunk_purge_wrapper
 chunk_register
 chunks_rtree
diff --git a/src/arena.c b/src/arena.c
index 90b9d82..7651495 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3499,7 +3499,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get(tsdn);
+	arena->dss_prec = chunk_dss_prec_get();
 
 	ql_new(&arena->achunks);
 
diff --git a/src/chunk.c b/src/chunk.c
index dff537f..302b98c 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -611,10 +611,10 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static bool
-chunk_dalloc_default_impl(tsdn_t *tsdn, void *chunk, size_t size)
+chunk_dalloc_default_impl(void *chunk, size_t size)
 {
 
-	if (!have_dss || !chunk_in_dss(tsdn, chunk))
+	if (!have_dss || !chunk_in_dss(chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
@@ -623,11 +623,8 @@ static bool
 chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
-	tsdn_t *tsdn;
-
-	tsdn = tsdn_fetch();
 
-	return (chunk_dalloc_default_impl(tsdn, chunk, size));
+	return (chunk_dalloc_default_impl(chunk, size));
 }
 
 void
@@ -645,7 +642,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Try to deallocate. */
 	if (chunk_hooks->dalloc == chunk_dalloc_default) {
 		/* Call directly to propagate tsdn. */
-		err = chunk_dalloc_default_impl(tsdn, chunk, size);
+		err = chunk_dalloc_default_impl(chunk, size);
 	} else
 		err = chunk_hooks->dalloc(chunk, size, committed, arena->ind);
 
@@ -718,13 +715,12 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
 }
 
 static bool
-chunk_merge_default_impl(tsdn_t *tsdn, void *chunk_a, void *chunk_b)
+chunk_merge_default_impl(void *chunk_a, void *chunk_b)
 {
 
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss && chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn,
-	    chunk_b))
+	if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b))
 		return (true);
 
 	return (false);
@@ -734,11 +730,8 @@ static bool
 chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
     bool committed, unsigned arena_ind)
 {
-	tsdn_t *tsdn;
-
-	tsdn = tsdn_fetch();
 
-	return (chunk_merge_default_impl(tsdn, chunk_a, chunk_b));
+	return (chunk_merge_default_impl(chunk_a, chunk_b));
 }
 
 static rtree_node_elm_t *
@@ -782,32 +775,11 @@ chunk_boot(void)
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> LG_PAGE);
 
-	if (have_dss && chunk_dss_boot())
-		return (true);
+	if (have_dss)
+		chunk_dss_boot();
 	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    opt_lg_chunk), chunks_rtree_node_alloc, NULL))
 		return (true);
 
 	return (false);
 }
-
-void
-chunk_prefork(tsdn_t *tsdn)
-{
-
-	chunk_dss_prefork(tsdn);
-}
-
-void
-chunk_postfork_parent(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_parent(tsdn);
-}
-
-void
-chunk_postfork_child(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_child(tsdn);
-}
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 0b1f82b..85a1354 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -10,20 +10,19 @@ const char	*dss_prec_names[] = {
 	"N/A"
 };
 
-/* Current dss precedence default, used when creating new arenas. */
-static dss_prec_t	dss_prec_default = DSS_PREC_DEFAULT;
-
 /*
- * Protects sbrk() calls.  This avoids malloc races among threads, though it
- * does not protect against races with threads that call sbrk() directly.
+ * Current dss precedence default, used when creating new arenas.  NB: This is
+ * stored as unsigned rather than dss_prec_t because in principle there's no
+ * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
+ * atomic operations to synchronize the setting.
  */
-static malloc_mutex_t	dss_mtx;
+static unsigned		dss_prec_default = (unsigned)DSS_PREC_DEFAULT;
 
 /* Base address of the DSS. */
 static void		*dss_base;
-/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
-static void		*dss_prev;
-/* Current upper limit on DSS addresses. */
+/* Atomic boolean indicating whether the DSS is exhausted. */
+static unsigned		dss_exhausted;
+/* Atomic current upper limit on DSS addresses. */
 static void		*dss_max;
 
 /******************************************************************************/
@@ -41,30 +40,59 @@ chunk_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-chunk_dss_prec_get(tsdn_t *tsdn)
+chunk_dss_prec_get(void)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	ret = dss_prec_default;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
 	return (ret);
 }
 
 bool
-chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
+chunk_dss_prec_set(dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
 	return (false);
 }
 
+static void *
+chunk_dss_max_update(void *new_addr)
+{
+	void *max_cur;
+	spin_t spinner;
+
+	/*
+	 * Get the current end of the DSS as max_cur and assure that dss_max is
+	 * up to date.
+	 */
+	spin_init(&spinner);
+	while (true) {
+		void *max_prev = atomic_read_p(&dss_max);
+
+		max_cur = chunk_dss_sbrk(0);
+		if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
+			/*
+			 * Another thread optimistically updated dss_max.  Wait
+			 * for it to finish.
+			 */
+			spin_adaptive(&spinner);
+			continue;
+		}
+		if (!atomic_cas_p(&dss_max, max_prev, max_cur))
+			break;
+	}
+	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
+	if (new_addr != NULL && max_cur != new_addr)
+		return (NULL);
+
+	return (max_cur);
+}
+
 void *
 chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
@@ -80,28 +108,20 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if (dss_prev != (void *)-1) {
-
+	if (!atomic_read_u(&dss_exhausted)) {
 		/*
 		 * The loop is necessary to recover from races with other
 		 * threads that are using the DSS for something other than
 		 * malloc.
 		 */
-		do {
-			void *ret, *cpad, *dss_next;
+		while (true) {
+			void *ret, *cpad, *max_cur, *dss_next, *dss_prev;
 			size_t gap_size, cpad_size;
 			intptr_t incr;
-			/* Avoid an unnecessary system call. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
 
-			/* Get the current end of the DSS. */
-			dss_max = chunk_dss_sbrk(0);
-
-			/* Make sure the earlier condition still holds. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
+			max_cur = chunk_dss_max_update(new_addr);
+			if (max_cur == NULL)
+				goto label_oom;
 
 			/*
 			 * Calculate how much padding is necessary to
@@ -120,17 +140,23 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
-				/* Wrap-around. */
-				malloc_mutex_unlock(tsdn, &dss_mtx);
-				return (NULL);
-			}
+			    (uintptr_t)dss_next < (uintptr_t)dss_max)
+				goto label_oom; /* Wrap-around. */
 			incr = gap_size + cpad_size + size;
+
+			/*
+			 * Optimistically update dss_max, and roll back below if
+			 * sbrk() fails.  No other thread will try to extend the
+			 * DSS while dss_max is greater than the current DSS
+			 * max reported by sbrk(0).
+			 */
+			if (atomic_cas_p(&dss_max, max_cur, dss_next))
+				continue;
+
+			/* Try to allocate. */
 			dss_prev = chunk_dss_sbrk(incr);
-			if (dss_prev == dss_max) {
+			if (dss_prev == max_cur) {
 				/* Success. */
-				dss_max = dss_next;
-				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
@@ -147,68 +173,65 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				return (ret);
 			}
-		} while (dss_prev != (void *)-1);
-	}
-	malloc_mutex_unlock(tsdn, &dss_mtx);
 
+			/*
+			 * Failure, whether due to OOM or a race with a raw
+			 * sbrk() call from outside the allocator.  Try to roll
+			 * back optimistic dss_max update; if rollback fails,
+			 * it's due to another caller of this function having
+			 * succeeded since this invocation started, in which
+			 * case rollback is not necessary.
+			 */
+			atomic_cas_p(&dss_max, dss_next, max_cur);
+			if (dss_prev == (void *)-1) {
+				/* OOM. */
+				atomic_write_u(&dss_exhausted, (unsigned)true);
+				goto label_oom;
+			}
+		}
+	}
+label_oom:
 	return (NULL);
 }
 
-bool
-chunk_in_dss(tsdn_t *tsdn, void *chunk)
+static bool
+chunk_in_dss_helper(void *chunk, void *max)
 {
-	bool ret;
-
-	cassert(have_dss);
-
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)dss_base
-	    && (uintptr_t)chunk < (uintptr_t)dss_max)
-		ret = true;
-	else
-		ret = false;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
 
-	return (ret);
+	return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk <
+	    (uintptr_t)max);
 }
 
 bool
-chunk_dss_boot(void)
+chunk_in_dss(void *chunk)
 {
 
 	cassert(have_dss);
 
-	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
-		return (true);
-	dss_base = chunk_dss_sbrk(0);
-	dss_prev = dss_base;
-	dss_max = dss_base;
-
-	return (false);
+	return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max)));
 }
 
-void
-chunk_dss_prefork(tsdn_t *tsdn)
+bool
+chunk_dss_mergeable(void *chunk_a, void *chunk_b)
 {
+	void *max;
 
-	if (have_dss)
-		malloc_mutex_prefork(tsdn, &dss_mtx);
-}
-
-void
-chunk_dss_postfork_parent(tsdn_t *tsdn)
-{
+	cassert(have_dss);
 
-	if (have_dss)
-		malloc_mutex_postfork_parent(tsdn, &dss_mtx);
+	max = atomic_read_p(&dss_max);
+	return (chunk_in_dss_helper(chunk_a, max) ==
+	    chunk_in_dss_helper(chunk_b, max));
 }
 
 void
-chunk_dss_postfork_child(tsdn_t *tsdn)
+chunk_dss_boot(void)
 {
 
-	if (have_dss)
-		malloc_mutex_postfork_child(tsdn, &dss_mtx);
+	cassert(have_dss);
+
+	dss_base = chunk_dss_sbrk(0);
+	dss_exhausted = (unsigned)(dss_base == (void *)-1);
+	dss_max = dss_base;
 }
 
 /******************************************************************************/
diff --git a/src/ctl.c b/src/ctl.c
index dad8008..5d2c8db 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1685,11 +1685,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
+		    chunk_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd));
+		dss_prec_old = chunk_dss_prec_get();
 	}
 
 	dss = dss_prec_names[dss_prec_old];
diff --git a/src/huge.c b/src/huge.c
index 3a2877c..19ca3f0 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -114,7 +114,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
+huge_dalloc_junk(void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -122,7 +122,7 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
+		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
@@ -221,7 +221,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
@@ -402,7 +402,7 @@ huge_dalloc(tsdn_t *tsdn, void *ptr)
 	ql_remove(&arena->huge, node, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	huge_dalloc_junk(tsdn, extent_node_addr_get(node),
+	huge_dalloc_junk(extent_node_addr_get(node),
 	    extent_node_size_get(node));
 	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index aec2a5e..b0ebf81 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1115,8 +1115,7 @@ malloc_conf_init(void)
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (chunk_dss_prec_set(NULL,
-						   i)) {
+						if (chunk_dss_prec_set(i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
@@ -2783,7 +2782,6 @@ _malloc_prefork(void)
 		}
 	}
 	base_prefork(tsd_tsdn(tsd));
-	chunk_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
 			arena_prefork3(tsd_tsdn(tsd), arena);
@@ -2812,7 +2810,6 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent(tsd_tsdn(tsd));
 	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
@@ -2837,7 +2834,6 @@ jemalloc_postfork_child(void)
 
 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child(tsd_tsdn(tsd));
 	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
diff --git a/test/unit/junk.c b/test/unit/junk.c
index acddc60..460bd52 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 }
 
 static void
-huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
+huge_dalloc_junk_intercept(void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(tsdn, ptr, usize);
+	huge_dalloc_junk_orig(ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
-- 
cgit v0.12


From 962a2979e353f876f3725417179f201e671d9dbb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 20 Oct 2016 23:59:12 -0700
Subject: Do not (recursively) allocate within tsd_fetch().

Refactor tsd so that tsdn_fetch() does not trigger allocation, since
allocation could cause infinite recursion.

This resolves #458.
---
 include/jemalloc/internal/ckh.h                  |  8 +--
 include/jemalloc/internal/jemalloc_internal.h.in | 11 ++-
 include/jemalloc/internal/private_symbols.txt    |  3 +
 include/jemalloc/internal/prof.h                 |  8 +--
 include/jemalloc/internal/tcache.h               |  2 +-
 include/jemalloc/internal/tsd.h                  | 74 ++++++++++++++++-----
 src/ckh.c                                        | 42 ++++++------
 src/ctl.c                                        |  4 +-
 src/huge.c                                       |  4 +-
 src/jemalloc.c                                   |  9 ++-
 src/prof.c                                       | 85 ++++++++++++------------
 src/tcache.c                                     |  8 +--
 test/unit/ckh.c                                  | 42 ++++++------
 test/unit/tsd.c                                  |  6 +-
 14 files changed, 176 insertions(+), 130 deletions(-)

diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 46e151c..f75ad90 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -64,13 +64,13 @@ struct ckh_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-bool	ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp);
-void	ckh_delete(tsdn_t *tsdn, ckh_t *ckh);
+void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
 bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index d644cea..fdc8fef 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -479,6 +479,7 @@ extern size_t const	index2size_tab[NSIZES];
  */
 extern uint8_t const	size2index_tab[];
 
+arena_t	*a0get(void);
 void	*a0malloc(size_t size);
 void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
@@ -574,7 +575,7 @@ size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_ichoose(tsdn_t *tsdn, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
@@ -912,14 +913,10 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_ichoose(tsdn_t *tsdn, arena_t *arena)
+arena_ichoose(tsd_t *tsd, arena_t *arena)
 {
 
-	assert(!tsdn_null(tsdn) || arena != NULL);
-
-	if (!tsdn_null(tsdn))
-		return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true));
-	return (arena);
+	return (arena_choose_impl(tsd, arena, true));
 }
 
 JEMALLOC_INLINE arena_tdata_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 642c3de..6221179 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -1,4 +1,5 @@
 a0dalloc
+a0get
 a0malloc
 arena_aalloc
 arena_alloc_junk_small
@@ -545,7 +546,9 @@ tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
+tsd_fetch_impl
 tsd_get
+tsd_get_allocates
 tsd_iarena_get
 tsd_iarena_set
 tsd_iarenap_get
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 21dff5f..8293b71 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -299,9 +299,9 @@ extern prof_dump_header_t *prof_dump_header;
 void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
 void	prof_gdump(tsdn_t *tsdn);
-prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
+void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
 bool	prof_active_get(tsdn_t *tsdn);
 bool	prof_active_set(tsdn_t *tsdn, bool active);
@@ -315,7 +315,7 @@ bool	prof_gdump_get(tsdn_t *tsdn);
 bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(tsdn_t *tsdn);
+bool	prof_boot2(tsd_t *tsd);
 void	prof_prefork0(tsdn_t *tsdn);
 void	prof_prefork1(tsdn_t *tsdn);
 void	prof_postfork_parent(tsdn_t *tsdn);
@@ -384,7 +384,7 @@ prof_tdata_get(tsd_t *tsd, bool create)
 	if (create) {
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd_tsdn(tsd));
+				tdata = prof_tdata_init(tsd);
 				tsd_prof_tdata_set(tsd, tdata);
 			}
 		} else if (unlikely(tdata->expired)) {
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 70883b1..01ba062 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -145,7 +145,7 @@ tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
+bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index bf11341..9055aca 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -48,7 +48,7 @@ typedef enum {
  *
  *   bool example_tsd_boot(void) {...}
  *   bool example_tsd_booted_get(void) {...}
- *   example_t *example_tsd_get() {...}
+ *   example_t *example_tsd_get(bool init) {...}
  *   void example_tsd_set(example_t *val) {...}
  *
  * Note that all of the functions deal in terms of (a_type *) rather than
@@ -105,7 +105,7 @@ a_name##tsd_boot(void);							\
 a_attr bool								\
 a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
-a_name##tsd_get(void);							\
+a_name##tsd_get(bool init);						\
 a_attr void								\
 a_name##tsd_set(a_type *val);
 
@@ -213,9 +213,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@@ -264,9 +270,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@@ -325,14 +337,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
 	SetLastError(error);						\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		if (wrapper == NULL) {					\
@@ -392,14 +404,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@@ -408,7 +428,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
@@ -452,12 +472,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    pthread_getspecific(a_name##tsd_tsd);			\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
 		wrapper = tsd_init_check_recursion(			\
 		    &a_name##tsd_init_head, &block);			\
@@ -520,14 +540,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@@ -536,7 +564,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
@@ -639,6 +667,7 @@ void	tsd_cleanup(void *arg);
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
+tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
 tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
@@ -658,9 +687,13 @@ malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
+tsd_fetch_impl(bool init)
 {
-	tsd_t *tsd = tsd_get();
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL)
+		return (NULL);
+	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
 		if (tsd->state == tsd_state_uninitialized) {
@@ -677,6 +710,13 @@ tsd_fetch(void)
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+
+	return (tsd_fetch_impl(true));
+}
+
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsd_tsdn(tsd_t *tsd)
 {
@@ -723,7 +763,7 @@ tsdn_fetch(void)
 	if (!tsd_booted_get())
 		return (NULL);
 
-	return (tsd_tsdn(tsd_fetch()));
+	return (tsd_tsdn(tsd_fetch_impl(false)));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/src/ckh.c b/src/ckh.c
index 747c1c8..3be671c 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -40,8 +40,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsdn_t *tsdn, ckh_t *ckh);
-static void	ckh_shrink(tsdn_t *tsdn, ckh_t *ckh);
+static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -244,7 +244,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 }
 
 static bool
-ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
+ckh_grow(tsd_t *tsd, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
@@ -270,8 +270,8 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-		    true, arena_ichoose(tsdn, NULL));
+		tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
+		    true, NULL, true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -283,12 +283,12 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsdn, tab, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsdn, ckh->tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -299,7 +299,7 @@ label_return:
 }
 
 static void
-ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
+ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
 	size_t usize;
@@ -314,8 +314,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsdn, NULL));
+	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -330,7 +330,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsdn, tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -347,7 +347,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 }
 
 bool
-ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp)
 {
 	bool ret;
@@ -391,8 +391,8 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-	    true, arena_ichoose(tsdn, NULL));
+	ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
+	    NULL, true, arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -404,7 +404,7 @@ label_return:
 }
 
 void
-ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
+ckh_delete(tsd_t *tsd, ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
@@ -421,7 +421,7 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
@@ -456,7 +456,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 }
 
 bool
-ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
 {
 	bool ret;
 
@@ -468,7 +468,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 #endif
 
 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(tsdn, ckh)) {
+		if (ckh_grow(tsd, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@@ -480,7 +480,7 @@ label_return:
 }
 
 bool
-ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data)
 {
 	size_t cell;
@@ -502,7 +502,7 @@ ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(tsdn, ckh);
+			ckh_shrink(tsd, ckh);
 		}
 
 		return (false);
diff --git a/src/ctl.c b/src/ctl.c
index 5d2c8db..bc78b20 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1478,7 +1478,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) {
+	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2100,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	prof_reset(tsd_tsdn(tsd), lg_sample);
+	prof_reset(tsd, lg_sample);
 
 	ret = 0;
 label_return:
diff --git a/src/huge.c b/src/huge.c
index 19ca3f0..62e6932 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -54,6 +54,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 {
 	void *ret;
 	size_t ausize;
+	arena_t *iarena;
 	extent_node_t *node;
 	bool is_zeroed;
 
@@ -67,8 +68,9 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
+	iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : a0get();
 	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
+	    CACHELINE, false, NULL, true, iarena);
 	if (node == NULL)
 		return (NULL);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b0ebf81..53fcae3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -340,6 +340,13 @@ a0idalloc(void *ptr, bool is_metadata)
 	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
 }
 
+arena_t *
+a0get(void)
+{
+
+	return (a0);
+}
+
 void *
 a0malloc(size_t size)
 {
@@ -1454,7 +1461,7 @@ malloc_init_hard(void)
 		return (true);
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
-	if (config_prof && prof_boot2(tsd_tsdn(tsd))) {
+	if (config_prof && prof_boot2(tsd)) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
diff --git a/src/prof.c b/src/prof.c
index c1f58d4..140d5b2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -125,7 +125,7 @@ static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
@@ -591,7 +591,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
@@ -651,7 +651,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 
@@ -704,7 +704,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
+		prof_tdata_destroy(tsd, tdata, false);
 
 	if (destroy_tctx)
 		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
@@ -733,7 +733,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
+		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
 			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
@@ -795,7 +795,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
+		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -810,8 +810,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
-		    ret.v);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
@@ -1791,7 +1790,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn)
 }
 
 static prof_tdata_t *
-prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
@@ -1799,7 +1798,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
 	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
@@ -1813,9 +1812,9 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
 
-	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsdn, tdata, NULL, true, true);
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1829,19 +1828,19 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	return (tdata);
 }
 
 prof_tdata_t *
-prof_tdata_init(tsdn_t *tsdn)
+prof_tdata_init(tsd_t *tsd)
 {
 
-	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
-	    prof_thread_active_init_get(tsdn)));
+	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
 }
 
 static bool
@@ -1866,31 +1865,29 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
 }
 
 static void
-prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 
-	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
-
-	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 
 	tdata_tree_remove(&tdatas, tdata);
 
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL)
-		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
-	ckh_delete(tsdn, &tdata->bt2tctx);
-	idalloctm(tsdn, tdata, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 }
 
 static void
-prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 }
 
 static void
@@ -1913,7 +1910,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		destroy_tdata = false;
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
+		prof_tdata_destroy(tsd, tdata, true);
 }
 
 prof_tdata_t *
@@ -1926,8 +1923,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
-	    thread_name, active));
+	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
+	    active));
 }
 
 static bool
@@ -1956,30 +1953,30 @@ prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 }
 
 void
-prof_reset(tsdn_t *tsdn, size_t lg_sample)
+prof_reset(tsd_t *tsd, size_t lg_sample)
 {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(tsdn, &prof_dump_mtx);
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsdn);
+		    prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsdn, to_destroy, false);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 }
 
 void
@@ -2189,7 +2186,7 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(tsdn_t *tsdn)
+prof_boot2(tsd_t *tsd)
 {
 
 	cassert(config_prof);
@@ -2215,7 +2212,7 @@ prof_boot2(tsdn_t *tsdn)
 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
@@ -2246,8 +2243,8 @@ prof_boot2(tsdn_t *tsdn)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
-		    * sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
@@ -2256,7 +2253,7 @@ prof_boot2(tsdn_t *tsdn)
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
diff --git a/src/tcache.c b/src/tcache.c
index 175759c..f97aa42 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -445,14 +445,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 }
 
 bool
-tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
+tcaches_create(tsd_t *tsd, unsigned *r_ind)
 {
 	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsdn, sizeof(tcache_t *) *
+		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -460,10 +460,10 @@ tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_ichoose(tsdn, NULL);
+	arena = arena_ichoose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
-	tcache = tcache_create(tsdn, arena);
+	tcache = tcache_create(tsd_tsdn(tsd), arena);
 	if (tcache == NULL)
 		return (true);
 
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 961e2ac..2cbc226 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,24 +2,24 @@
 
 TEST_BEGIN(test_new_delete)
 {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 
-	assert_false(ckh_new(tsdn, &ckh, 3, ckh_pointer_hash,
+	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove)
 {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 	const char *strs[] = {
 	    "a string",
@@ -30,9 +30,9 @@ TEST_BEGIN(test_count_insert_search_remove)
 	const char *missing = "A string not in the hash table.";
 	size_t i;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
@@ -40,7 +40,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
-		ckh_insert(tsdn, &ckh, strs[i], strs[i]);
+		ckh_insert(tsd, &ckh, strs[i], strs[i]);
 		assert_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
@@ -85,7 +85,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsdn, &ckh, strs[i], kp, vp),
+		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
@@ -101,22 +101,22 @@ TEST_BEGIN(test_count_insert_search_remove)
 		    ckh_count(&ckh));
 	}
 
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_insert_iter_remove)
 {
 #define	NITEMS ZU(1000)
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 	void **p[NITEMS];
 	void *q, *r;
 	size_t i;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_pointer_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
@@ -128,7 +128,7 @@ TEST_BEGIN(test_insert_iter_remove)
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsdn, &ckh, p[j], p[j]),
+			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
 			assert_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
@@ -143,13 +143,13 @@ TEST_BEGIN(test_insert_iter_remove)
 		for (j = i + 1; j < NITEMS; j++) {
 			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsdn, &ckh, p[j], &q, &r),
+			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
 			assert_ptr_eq(p[j], q, "Key pointer mismatch");
 			assert_ptr_eq(p[j], r, "Value pointer mismatch");
 			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsdn, &ckh, p[j], &q, &r),
+			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -184,13 +184,13 @@ TEST_BEGIN(test_insert_iter_remove)
 	for (i = 0; i < NITEMS; i++) {
 		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsdn, &ckh, p[i], &q, &r),
+		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
 		assert_ptr_eq(p[i], q, "Key pointer mismatch");
 		assert_ptr_eq(p[i], r, "Value pointer mismatch");
 		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsdn, &ckh, p[i], &q, &r),
+		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
@@ -198,7 +198,7 @@ TEST_BEGIN(test_insert_iter_remove)
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 #undef NITEMS
 }
 TEST_END
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 7dde4b7..4e2622a 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -58,18 +58,18 @@ thd_start(void *arg)
 	data_t d = (data_t)(uintptr_t)arg;
 	void *p;
 
-	assert_x_eq(*data_tsd_get(), DATA_INIT,
+	assert_x_eq(*data_tsd_get(true), DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
 	p = malloc(1);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	data_tsd_set(&d);
-	assert_x_eq(*data_tsd_get(), d,
+	assert_x_eq(*data_tsd_get(true), d,
 	    "After tsd set, tsd get should return value that was set");
 
 	d = 0;
-	assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg,
+	assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
 	free(p);
-- 
cgit v0.12


From 5569b4a42c85f951f783b315bd49668a6fa764e3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 25 Oct 2016 21:52:36 -0700
Subject: Use --whole-archive when linking integration tests on MinGW.

Prior to this change, the malloc_conf weak symbol provided by the
jemalloc dynamic library is always used, even if the application
provides a malloc_conf symbol.  Use the --whole-archive linker option
to allow the weak symbol to be overridden.
---
 Makefile.in | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index a2d5594..de28218 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -125,6 +125,11 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
+ifeq (pecoff, $(ABI))
+LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
+else
+LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+endif
 PC := $(objroot)jemalloc.pc
 MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
@@ -136,7 +141,11 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
+ifeq (pecoff, $(ABI))
+C_UTIL_INTEGRATION_SRCS :=
+else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
+endif
 TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
 	$(srcroot)test/unit/arena_reset.c \
@@ -302,7 +311,7 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-- 
cgit v0.12


From dc553d52d82380ab0c99bb0fa97ae5c1f11ab2be Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 00:41:15 -0700
Subject: Fix over-sized allocation of rtree leaf nodes.

Use the correct level metadata when allocating child nodes so that leaf
nodes don't end up over-sized (2^16 elements vs 2^4 elements).
---
 src/rtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rtree.c b/src/rtree.c
index 5590034..f2e2997 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -128,5 +128,5 @@ rtree_node_elm_t *
 rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
 {
 
-	return (rtree_node_init(rtree, level, &elm->child));
+	return (rtree_node_init(rtree, level+1, &elm->child));
 }
-- 
cgit v0.12


From 1eb801bcad74f4b7eb4d5ab3ce2d67935c10ac58 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 15:41:43 -0700
Subject: Do not force lazy lock on Windows.

This reverts 13473c7c66a81a4dc1cf11a97e9c8b1dbb785b64, which was
intended to work around bootstrapping issues when linking statically.
However, this actually causes problems in various other configurations,
so this reversion may force a future fix for the underlying problem, if
it still exists.
---
 configure.ac | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index a7da9ff..d5ffd4c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -426,7 +426,6 @@ case "${host}" in
   *-*-mingw* | *-*-cygwin*)
 	abi="pecoff"
 	force_tls="0"
-	force_lazy_lock="1"
 	maps_coalesce="0"
 	RPATH=""
 	so="dll"
-- 
cgit v0.12


From 875ff15e6a99f4ff4d8aaaaf76d9dc5f3d8f1e39 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 17:10:56 -0700
Subject: Only use --whole-archive with gcc.

Conditionalize use of --whole-archive on the platform plus compiler,
rather than on the ABI.  This fixes a regression caused by
7b24c6e5570062495243f1e55131b395adb31e33 (Use --whole-archive when
linking integration tests on MinGW.).
---
 Makefile.in     | 5 +++--
 configure.ac    | 3 +++
 msvc/ReadMe.txt | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index de28218..11e94b7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -52,6 +52,7 @@ enable_prof := @enable_prof@
 enable_valgrind := @enable_valgrind@
 enable_zone_allocator := @enable_zone_allocator@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
+link_whole_archive := @link_whole_archive@
 DSO_LDFLAGS = @DSO_LDFLAGS@
 SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
@@ -125,7 +126,7 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
-ifeq (pecoff, $(ABI))
+ifeq (1, $(link_whole_archive))
 LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
 else
 LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@@ -141,7 +142,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
-ifeq (pecoff, $(ABI))
+ifeq (1, $(link_whole_archive))
 C_UTIL_INTEGRATION_SRCS :=
 else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
diff --git a/configure.ac b/configure.ac
index d5ffd4c..c45d821 100644
--- a/configure.ac
+++ b/configure.ac
@@ -313,6 +313,7 @@ o="$ac_objext"
 a="a"
 exe="$ac_exeext"
 libprefix="lib"
+link_whole_archive="0"
 DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
 RPATH='-Wl,-rpath,$(1)'
 SOREV="${so}.${rev}"
@@ -442,6 +443,7 @@ case "${host}" in
         else
 	  importlib="${so}"
 	  DSO_LDFLAGS="-shared"
+	  link_whole_archive="1"
 	fi
 	a="lib"
 	libprefix=""
@@ -479,6 +481,7 @@ AC_SUBST([o])
 AC_SUBST([a])
 AC_SUBST([exe])
 AC_SUBST([libprefix])
+AC_SUBST([link_whole_archive])
 AC_SUBST([DSO_LDFLAGS])
 AC_SUBST([EXTRA_LDFLAGS])
 AC_SUBST([SOREV])
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index b1c2fc5..77d567d 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -17,7 +17,7 @@ How to build jemalloc for Windows
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh" CC=cl --enable-lazy-lock=no
+   sh -c "CC=cl ./autogen.sh"
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln
-- 
cgit v0.12


From e7d6779918d3e18178418e69e8ca496f2f6446f7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:23:48 -0700
Subject: Only link with libm (-lm) if necessary.

This fixes warnings when building with MSVC.
---
 Makefile.in  |  7 ++++---
 configure.ac | 15 ++++++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 11e94b7..9d6b2db 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -63,6 +63,7 @@ MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+LM := @LM@
 INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
@@ -308,15 +309,15 @@ $(STATIC_LIBS):
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index c45d821..d369d6c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -494,6 +494,15 @@ AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])
 AC_SUBST([CC_MM])
 
+dnl Determine whether libm must be linked to use e.g. log(3).
+AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+if test "x$ac_cv_search_log" != "xnone required" ; then
+  LM="$ac_cv_search_log"
+else
+  LM=
+fi
+AC_SUBST(LM)
+
 JE_COMPILABLE([__attribute__ syntax],
               [static __attribute__((unused)) void foo(void){}],
               [],
@@ -940,9 +949,9 @@ fi
 AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
-  if test "x$abi" != "xpecoff"; then
-    dnl Heap profiling uses the log(3) function.
-    LIBS="$LIBS -lm"
+  dnl Heap profiling uses the log(3) function.
+  if test "x$LM" != "x" ; then
+    LIBS="$LIBS $LM"
   fi
 
   AC_DEFINE([JEMALLOC_PROF], [ ])
-- 
cgit v0.12


From 2c53faf352ca7722f1a776c8c381b01da5b4fa96 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 10:44:39 -0700
Subject: Periodically purge in memory-intensive integration tests.

This resolves #393.
---
 test/integration/aligned_alloc.c  | 13 ++++++++++---
 test/integration/mallocx.c        |  7 +++++++
 test/integration/posix_memalign.c | 13 ++++++++++---
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 6090014..80bb38f 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,9 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 25)
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -74,6 +72,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	void *ps[NITER];
@@ -110,7 +109,15 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
+#undef NITER
 }
 TEST_END
 
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 55e1a09..69ce781 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -196,6 +196,13 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
 #undef MAXALIGN
 #undef NITER
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 19741c6..171bcea 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,9 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 25)
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -66,6 +64,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	int err;
@@ -104,7 +103,15 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
+#undef NITER
 }
 TEST_END
 
-- 
cgit v0.12


From eaecaad8ea9fd9cd8b57e49834b5e3332f911c40 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 11:00:36 -0700
Subject: Periodically purge in memory-intensive integration tests.

This resolves #393.
---
 test/integration/mallocx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 69ce781..79ab494 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -139,6 +139,13 @@ TEST_BEGIN(test_basic)
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
 #undef MAXSZ
 }
-- 
cgit v0.12


From b99c72f3d29e3590ae81959922d0032a29dbace9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 11:23:24 -0700
Subject: Reduce memory requirements for regression tests.

This is intended to drop memory usage to a level that AppVeyor test
instances can handle.

This resolves #393.
---
 test/integration/aligned_alloc.c  | 23 +++++++++++++-------
 test/integration/mallocx.c        | 44 ++++++++++++++++++++++-----------------
 test/integration/posix_memalign.c | 23 +++++++++++++-------
 3 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 80bb38f..5843842 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,7 +1,20 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -109,13 +122,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef NITER
 }
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 79ab494..43b76eb 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -50,6 +50,19 @@ get_huge_size(size_t ind)
 	return (get_size_impl("arenas.hchunk.0.size", ind));
 }
 
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
+
 TEST_BEGIN(test_overflow)
 {
 	size_t hugemax;
@@ -96,6 +109,7 @@ TEST_BEGIN(test_oom)
 		if (ptrs[i] != NULL)
 			dallocx(ptrs[i], 0);
 	}
+	purge();
 
 #if LG_SIZEOF_PTR == 3
 	assert_ptr_null(mallocx(0x8000000000000000ULL,
@@ -113,7 +127,7 @@ TEST_END
 
 TEST_BEGIN(test_basic)
 {
-#define	MAXSZ (((size_t)1) << 26)
+#define	MAXSZ (((size_t)1) << 23)
 	size_t sz;
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
@@ -122,30 +136,28 @@ TEST_BEGIN(test_basic)
 		nsz = nallocx(sz, 0);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		assert_zu_ge(rsz, sz, "Real size smaller than expected");
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, MALLOCX_ZERO);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error",
+		    nsz);
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef MAXSZ
 }
@@ -153,7 +165,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
 #define	NITER 4
 	size_t nsz, rsz, sz, alignment, total;
 	unsigned i;
@@ -203,13 +215,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef MAXALIGN
 #undef NITER
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 171bcea..e22e102 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,7 +1,20 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -103,13 +116,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef NITER
 }
-- 
cgit v0.12


From ed84764a2a6d766a74fa1df3223d69977d732510 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Fri, 28 Oct 2016 13:51:52 -0700
Subject: Support static linking of jemalloc with glibc

glibc defines its malloc implementation with several weak and strong
symbols:

strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)

The issue is not with the weak symbols, but that other parts of glibc
depend on __libc_malloc explicitly.  Defining them in terms of jemalloc
API's allows the linker to drop glibc's malloc.o completely from the link,
and static linking no longer results in symbol collisions.

Another wrinkle: jemalloc during initialization calls sysconf to
get the number of CPU's.  GLIBC allocates for the first time before
setting up isspace (and other related) tables, which are used by
sysconf.  Instead, use the pthread API to get the number of
CPUs with GLIBC, which seems to work.

This resolves #442.
---
 .../jemalloc/internal/jemalloc_internal_decls.h    |  3 +++
 src/jemalloc.c                                     | 31 ++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 910b2fc..1d7f207 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -17,6 +17,9 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#    include <sched.h>
+#  endif
 #  include <errno.h>
 #  include <sys/time.h>
 #  include <time.h>
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 53fcae3..b370f9c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -808,6 +808,18 @@ malloc_ncpus(void)
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK)
+	/*
+	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
+	 * *before* setting up the isspace tables.  Therefore we need a
+	 * different method to get the number of CPUs.
+	 */
+	{
+		cpu_set_t set;
+
+		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+		result = CPU_COUNT(&set);
+	}
 #else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 #endif
@@ -2036,6 +2048,25 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 # endif
+
+/*
+ * To enable static linking with glibc, the libc specific malloc interface must
+ * be implemented also, so none of glibc's malloc.o functions are added to the
+ * link.
+ */
+#define	ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+/* To force macro expansion of je_ prefix before stringification. */
+#define	PREALIAS(je_fn)  ALIAS(je_fn)
+void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
+void	__libc_free(void* ptr) PREALIAS(je_free);
+void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
+void	*__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
+void	*__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
+void	*__libc_valloc(size_t size) PREALIAS(je_valloc);
+int	__posix_memalign(void** r, size_t a, size_t s)
+    PREALIAS(je_posix_memalign);
+#undef PREALIAS
+#undef ALIAS
 #endif
 
 /*
-- 
cgit v0.12


From 35799a50308b5c88ba8ed41f4e48d3b619482c7d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 23:03:25 -0700
Subject: Do not mark malloc_conf as weak for unit tests.

This is generally correct (no need for weak symbols since no jemalloc
library is involved in the link phase), and avoids linking problems
(apparently unininitialized non-NULL malloc_conf) when using cygwin with
gcc.
---
 src/jemalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index b370f9c..2435763 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -5,7 +5,11 @@
 /* Data. */
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf JEMALLOC_ATTR(weak);
+const char	*je_malloc_conf
+#ifndef JEMALLOC_JET
+    JEMALLOC_ATTR(weak)
+#endif
+    ;
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
-- 
cgit v0.12


From e46f8f97bc4dc3298e1d3b452ee69616fdbcd43e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 23:59:42 -0700
Subject: Do not mark malloc_conf as weak on Windows.

This works around malloc_conf not being properly initialized by at least
the cygwin toolchain.  Prior build system changes to use
-Wl,--[no-]whole-archive may be necessary for malloc_conf resolution to
work properly as a non-weak symbol (not tested).
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2435763..8210086 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,7 +6,7 @@
 
 /* Runtime configuration options. */
 const char	*je_malloc_conf
-#ifndef JEMALLOC_JET
+#ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
     ;
-- 
cgit v0.12


From 35a108c809038179b7d9932447c75e02171dc3e4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:14:55 -0700
Subject: Fix EXTRA_CFLAGS to not affect configuration.

---
 Makefile.in  | 3 ++-
 configure.ac | 6 ++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 9d6b2db..d13c7f1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -24,7 +24,8 @@ abs_objroot := @abs_objroot@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
-CFLAGS := @CFLAGS@
+EXTRA_CFLAGS := @EXTRA_CFLAGS@
+CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
diff --git a/configure.ac b/configure.ac
index d369d6c..1a89ef1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -203,10 +203,7 @@ if test "x$CFLAGS" = "x" ; then
     fi
   fi
 fi
-dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
-if test "x$EXTRA_CFLAGS" != "x" ; then
-  JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
-fi
+AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
@@ -1883,6 +1880,7 @@ AC_MSG_RESULT([])
 AC_MSG_RESULT([CONFIG             : ${CONFIG}])
 AC_MSG_RESULT([CC                 : ${CC}])
 AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
-- 
cgit v0.12


From c443b67561891ae68d688daf5f8ce37820cdba2b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:41:04 -0700
Subject: Use syscall(2) rather than {open,read,close}(2) during boot.

Some applications wrap various system calls, and if they call the
allocator in their wrappers, unexpected reentry can result.  This is not
a general solution (many other syscalls are spread throughout the code),
but this resolves a bootstrapping issue that is apparently common.

This resolves #443.
---
 src/pages.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 05b0d69..84e2216 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -207,6 +207,11 @@ os_overcommits_sysctl(void)
 #endif
 
 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+/*
+ * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
+ * reentry during bootstrapping if another library has interposed system call
+ * wrappers.
+ */
 static bool
 os_overcommits_proc(void)
 {
@@ -214,12 +219,26 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
+#ifdef SYS_open
+	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+#else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+#endif
 	if (fd == -1)
 		return (false); /* Error. */
 
+#ifdef SYS_read
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
 	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#ifdef SYS_close
+	syscall(SYS_close, fd);
+#else
 	close(fd);
+#endif
+
 	if (nread < 1)
 		return (false); /* Error. */
 	/*
-- 
cgit v0.12


From 1d57c03e331ec9763ba55476a53aa1716f1bc8e1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:55:08 -0700
Subject: Use CLOCK_MONOTONIC_COARSE rather than COARSE_MONOTONIC_RAW.

The raw clock variant is slow (even relative to plain CLOCK_MONOTONIC),
whereas the coarse clock variant is faster than CLOCK_MONOTONIC, but
still has resolution (~1ms) that is adequate for our purposes.

This resolves #479.
---
 configure.ac                                          | 12 ++++++------
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  4 ++--
 src/nstime.c                                          |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1a89ef1..40681d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1342,16 +1342,16 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   fi
 fi
 
-dnl check for CLOCK_MONOTONIC_RAW (Linux-specific).
-JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_RAW, ...)], [
+dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
 #include <time.h>
 ], [
 	struct timespec ts;
 
-	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
-], [je_cv_clock_monotonic_raw])
-if test "x${je_cv_clock_monotonic_raw}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW])
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+], [je_cv_clock_monotonic_coarse])
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
 fi
 
 dnl check for CLOCK_MONOTONIC.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index d10c8a4..6824ab7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -77,9 +77,9 @@
 #undef JEMALLOC_HAVE_ISSETUGID
 
 /*
- * Defined if clock_gettime(CLOCK_MONOTONIC_RAW, ...) is available.
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
-#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
 
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
diff --git a/src/nstime.c b/src/nstime.c
index c420c88..0948e29 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -110,14 +110,14 @@ nstime_get(nstime_t *time)
 
 	nstime_init(time, ticks_100ns * 100);
 }
-#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
 	struct timespec ts;
 
-	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC
-- 
cgit v0.12


From 4752a54eebe1945d1cf9eeecaccbca3ed743f240 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 31 Oct 2016 11:45:41 -0700
Subject: Refactor witness_unlock() to fix undefined test behavior.

This resolves #396.
---
 include/jemalloc/internal/private_symbols.txt |  1 +
 include/jemalloc/internal/witness.h           | 39 +++++++++++++++++++--------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 6221179..09ff832 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -607,6 +607,7 @@ witness_lock
 witness_lock_error
 witness_lockless_error
 witness_not_owner_error
+witness_owner
 witness_owner_error
 witness_postfork_child
 witness_postfork_parent
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index d78dca2..cdf15d7 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -108,6 +108,7 @@ void	witness_postfork_child(tsd_t *tsd);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_lockless(tsdn_t *tsdn);
@@ -116,12 +117,25 @@ void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE bool
+witness_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return (true);
+	}
+
+	return (false);
+}
+
 JEMALLOC_INLINE void
 witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 {
 	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
 
 	if (!config_debug)
 		return;
@@ -132,11 +146,8 @@ witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			return;
-	}
+	if (witness_owner(tsd, witness))
+		return;
 	witness_owner_error(witness);
 }
 
@@ -238,10 +249,16 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witness_assert_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_remove(witnesses, witness, link);
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(tsd, witness)) {
+		witnesses = tsd_witnessesp_get(tsd);
+		ql_remove(witnesses, witness, link);
+	} else
+		witness_assert_owner(tsdn, witness);
 }
 #endif
 
-- 
cgit v0.12


From b599b32280e1142856b0b96293a71e1684b1ccfb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 1 Nov 2016 15:28:54 -0700
Subject: Add "J" (JSON) support to malloc_stats_print().

This resolves #474.
---
 doc/jemalloc.xml.in |   43 +-
 src/stats.c         | 1231 +++++++++++++++++++++++++++++++++++----------------
 2 files changed, 876 insertions(+), 398 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8817229..30b2bdf 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -410,29 +410,30 @@ for (i = 0; i < nbins; i++) {
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print()</function> function
-      writes human-readable summary statistics via the
-      <parameter>write_cb</parameter> callback function pointer and
-      <parameter>cbopaque</parameter> data passed to
-      <parameter>write_cb</parameter>, or
-      <function>malloc_message()</function> if
-      <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
-      function can be called repeatedly.  General information that never
-      changes during execution can be omitted by specifying "g" as a character
+      <para>The <function>malloc_stats_print()</function> function writes
+      summary statistics via the <parameter>write_cb</parameter> callback
+      function pointer and <parameter>cbopaque</parameter> data passed to
+      <parameter>write_cb</parameter>, or <function>malloc_message()</function>
+      if <parameter>write_cb</parameter> is <constant>NULL</constant>.  The
+      statistics are presented in human-readable form unless <quote>J</quote> is
+      specified as a character within the <parameter>opts</parameter> string, in
+      which case the statistics are presented in <ulink
+      url="http://www.json.org/">JSON format</ulink>.  This function can be
+      called repeatedly.  General information that never changes during
+      execution can be omitted by specifying <quote>g</quote> as a character
       within the <parameter>opts</parameter> string.  Note that
       <function>malloc_message()</function> uses the
-      <function>mallctl*()</function> functions internally, so
-      inconsistent statistics can be reported if multiple threads use these
-      functions simultaneously.  If <option>--enable-stats</option> is
-      specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
-      be specified to omit merged arena and per arena statistics, respectively;
-      &ldquo;b&rdquo;, &ldquo;l&rdquo;, and &ldquo;h&rdquo; can be specified to
-      omit per size class statistics for bins, large objects, and huge objects,
-      respectively.  Unrecognized characters are silently ignored.  Note that
-      thread caching may prevent some statistics from being completely up to
-      date, since extra locking would be required to merge counters that track
-      thread cache operations.
-      </para>
+      <function>mallctl*()</function> functions internally, so inconsistent
+      statistics can be reported if multiple threads use these functions
+      simultaneously.  If <option>--enable-stats</option> is specified during
+      configuration, <quote>m</quote> and <quote>a</quote> can be specified to
+      omit merged arena and per arena statistics, respectively;
+      <quote>b</quote>, <quote>l</quote>, and <quote>h</quote> can be specified
+      to omit per size class statistics for bins, large objects, and huge
+      objects, respectively.  Unrecognized characters are silently ignored.
+      Note that thread caching may prevent some statistics from being completely
+      up to date, since extra locking would be required to merge counters that
+      track thread cache operations.</para>
 
       <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
diff --git a/src/stats.c b/src/stats.c
index 073be4f..bd8af39 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -33,85 +33,106 @@ bool	opt_stats_print = false;
 size_t	stats_cactive = 0;
 
 /******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-static void	stats_arena_hchunks_print(
-    void (*write_cb)(void *, const char *), void *cbopaque, unsigned i);
-static void	stats_arena_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i, bool bins, bool large, bool huge);
-
-/******************************************************************************/
 
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    bool json, bool large, bool huge, unsigned i)
 {
 	size_t page;
-	bool config_tcache, in_gap;
+	bool config_tcache, in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
 
-	CTL_GET("config.tcache", &config_tcache, bool);
-	if (config_tcache) {
+	CTL_GET("arenas.nbins", &nbins, unsigned);
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util       nfills     nflushes      newruns"
-		    "       reruns\n");
+		    "\t\t\t\t\"bins\": [\n");
 	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util      newruns       reruns\n");
+		CTL_GET("config.tcache", &config_tcache, bool);
+		if (config_tcache) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "      curruns regs pgs  util       nfills"
+			    "     nflushes      newruns       reruns\n");
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "      curruns regs pgs  util      newruns"
+			    "       reruns\n");
+		}
 	}
-	CTL_GET("arenas.nbins", &nbins, unsigned);
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nruns;
+		size_t reg_size, run_size, curregs;
+		size_t curruns;
+		uint32_t nregs;
+		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+		uint64_t nreruns;
 
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns,
 		    uint64_t);
-		if (nruns == 0)
-			in_gap = true;
-		else {
-			size_t reg_size, run_size, curregs, availregs, milli;
-			size_t curruns;
-			uint32_t nregs;
-			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-			uint64_t reruns;
-			char util[6]; /* "x.yyy". */
+		in_gap_prev = in_gap;
+		in_gap = (nruns == 0);
 
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
-			CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
-			CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
-			CTL_M2_GET("arenas.bin.0.run_size", j, &run_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j,
-			    &nmalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j,
-			    &ndalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j,
-			    &curregs, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
-			    &nrequests, uint64_t);
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
+		CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
+		CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, size_t);
+
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs,
+		    size_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
+		    &nrequests, uint64_t);
+		if (config_tcache) {
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j,
+			    &nfills, uint64_t);
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j,
+			    &nflushes, uint64_t);
+		}
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, &nreruns,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, &curruns,
+		    size_t);
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curregs\": %zu,\n"
+			    "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n",
+			    nmalloc,
+			    ndalloc,
+			    curregs,
+			    nrequests);
 			if (config_tcache) {
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i,
-				    j, &nfills, uint64_t);
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes",
-				    i, j, &nflushes, uint64_t);
+				malloc_cprintf(write_cb, cbopaque,
+				    "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n"
+				    "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n",
+				    nfills,
+				    nflushes);
 			}
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j,
-			    &reruns, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j,
-			    &curruns, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\t\"nreruns\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curruns\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    nreruns,
+			    curruns,
+			    (j + 1 < nbins) ? "," : "");
+		} else if (!in_gap) {
+			size_t availregs, milli;
+			char util[6]; /* "x.yyy". */
 
 			availregs = nregs * curruns;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
@@ -138,7 +159,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curruns, nregs,
 				    run_size / page, util, nfills, nflushes,
-				    nruns, reruns);
+				    nruns, nreruns);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
@@ -147,28 +168,38 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    " %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curruns, nregs,
-				    run_size / page, util, nruns, reruns);
+				    run_size / page, util, nruns, nreruns);
 			}
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]%s\n", (large || huge) ? "," : "");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    bool json, bool huge, unsigned i)
 {
 	unsigned nbins, nlruns, j;
-	bool in_gap;
+	bool in_gap, in_gap_prev;
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:          size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests      curruns\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlruns", &nlruns, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"lruns\": [\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:          size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests      curruns\n");
+	}
 	for (j = 0, in_gap = false; j < nlruns; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t run_size, curruns;
@@ -179,17 +210,25 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j,
 		    &nrequests, uint64_t);
-		if (nrequests == 0)
-			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j,
-			    &curruns, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
+		in_gap_prev = in_gap;
+		in_gap = (nrequests == 0);
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, &curruns,
+		    size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"curruns\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    curruns,
+			    (j + 1 < nlruns) ? "," : "");
+		} else if (!in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
@@ -197,25 +236,35 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    ndalloc, nrequests, curruns);
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]%s\n", huge ? "," : "");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i)
+    void *cbopaque, bool json, unsigned i)
 {
 	unsigned nbins, nlruns, nhchunks, j;
-	bool in_gap;
+	bool in_gap, in_gap_prev;
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "huge:           size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests   curhchunks\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlruns", &nlruns, unsigned);
 	CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"hchunks\": [\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge:           size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests   curhchunks\n");
+	}
 	for (j = 0, in_gap = false; j < nhchunks; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t hchunk_size, curhchunks;
@@ -226,18 +275,25 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 		    &ndalloc, uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j,
 		    &nrequests, uint64_t);
-		if (nrequests == 0)
-			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i,
-			    j, &curhchunks, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
+		in_gap_prev = in_gap;
+		in_gap = (nrequests == 0);
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, j,
+		    &curhchunks, size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"curhchunks\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    curhchunks,
+			    (j + 1 < nhchunks) ? "," : "");
+		} else if (!in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
@@ -246,15 +302,20 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 			    nrequests, curhchunks);
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]\n");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i, bool bins, bool large, bool huge)
+    bool json, unsigned i, bool bins, bool large, bool huge)
 {
 	unsigned nthreads;
 	const char *dss;
@@ -272,266 +333,435 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
-	malloc_cprintf(write_cb, cbopaque,
-	    "assigned threads: %u\n", nthreads);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nthreads\": %u,\n", nthreads);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "assigned threads: %u\n", nthreads);
+	}
+
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
-	malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
-	    dss);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"dss\": \"%s\",\n", dss);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "dss allocation precedence: %s\n", dss);
+	}
+
 	CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t);
-	if (opt_purge == purge_mode_ratio) {
-		if (lg_dirty_mult >= 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: %u:1\n",
-			    (1U << lg_dirty_mult));
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: N/A\n");
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"lg_dirty_mult\": %zd,\n", lg_dirty_mult);
+	} else {
+		if (opt_purge == purge_mode_ratio) {
+			if (lg_dirty_mult >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "min active:dirty page ratio: %u:1\n",
+				    (1U << lg_dirty_mult));
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "min active:dirty page ratio: N/A\n");
+			}
 		}
 	}
+
 	CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t);
-	if (opt_purge == purge_mode_decay) {
-		if (decay_time >= 0) {
-			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
-			    decay_time);
-		} else
-			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"decay_time\": %zd,\n", decay_time);
+	} else {
+		if (opt_purge == purge_mode_decay) {
+			if (decay_time >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "decay time: %zd\n", decay_time);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "decay time: N/A\n");
+			}
+		}
 	}
+
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t);
 	CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", "
-	    "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pactive\": %zu,\n", pactive);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pdirty\": %zu,\n", pdirty);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"purged\": %"FMTu64",\n", purged);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64
+		    ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "                            allocated      nmalloc      ndalloc"
-	    "    nrequests\n");
 	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "small:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"small\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "                            allocated      nmalloc"
+		    "      ndalloc    nrequests\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "small:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated, small_nmalloc, small_ndalloc,
+		    small_nrequests);
+	}
+
 	CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"large\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    large_allocated, large_nmalloc, large_ndalloc,
+		    large_nrequests);
+	}
+
 	CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t);
 	CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "huge:                    %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "total:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated + large_allocated + huge_allocated,
-	    small_nmalloc + large_nmalloc + huge_nmalloc,
-	    small_ndalloc + large_ndalloc + huge_ndalloc,
-	    small_nrequests + large_nrequests + huge_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "active:                  %12zu\n", pactive * page);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"huge\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", huge_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", huge_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", huge_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", huge_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge:                    %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
+		malloc_cprintf(write_cb, cbopaque,
+		    "total:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated + large_allocated + huge_allocated,
+		    small_nmalloc + large_nmalloc + huge_nmalloc,
+		    small_ndalloc + large_ndalloc + huge_ndalloc,
+		    small_nrequests + large_nrequests + huge_nrequests);
+	}
+	if (!json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "active:                  %12zu\n", pactive * page);
+	}
+
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "mapped:                  %12zu\n", mapped);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"mapped\": %zu,\n", mapped);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "mapped:                  %12zu\n", mapped);
+	}
+
 	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "retained:                %12zu\n", retained);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"retained\": %zu,\n", retained);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "retained:                %12zu\n", retained);
+	}
+
 	CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
 	    size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "metadata: mapped: %zu, allocated: %zu\n",
-	    metadata_mapped, metadata_allocated);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"metadata\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"mapped\": %zu,\n", metadata_mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated);
 
-	if (bins)
-		stats_arena_bins_print(write_cb, cbopaque, i);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "metadata: mapped: %zu, allocated: %zu\n",
+		    metadata_mapped, metadata_allocated);
+	}
+
+	if (bins) {
+		stats_arena_bins_print(write_cb, cbopaque, json, large, huge,
+		    i);
+	}
 	if (large)
-		stats_arena_lruns_print(write_cb, cbopaque, i);
+		stats_arena_lruns_print(write_cb, cbopaque, json, huge, i);
 	if (huge)
-		stats_arena_hchunks_print(write_cb, cbopaque, i);
+		stats_arena_hchunks_print(write_cb, cbopaque, json, i);
 }
 
-void
-stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts)
+static void
+stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged)
 {
-	int err;
-	uint64_t epoch;
-	size_t u64sz;
-	bool general = true;
-	bool merged = true;
-	bool unmerged = true;
-	bool bins = true;
-	bool large = true;
-	bool huge = true;
+	const char *cpv;
+	bool bv;
+	unsigned uv;
+	uint32_t u32v;
+	uint64_t u64v;
+	ssize_t ssv;
+	size_t sv, bsz, usz, ssz, sssz, cpsz;
 
-	/*
-	 * Refresh stats, in case mallctl() was called by the application.
-	 *
-	 * Check for OOM here, since refreshing the ctl cache can trigger
-	 * allocation.  In practice, none of the subsequent mallctl()-related
-	 * calls in this function will cause OOM if this one succeeds.
-	 * */
-	epoch = 1;
-	u64sz = sizeof(uint64_t);
-	err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
-	if (err != 0) {
-		if (err == EAGAIN) {
-			malloc_write("<jemalloc>: Memory allocation failure in "
-			    "mallctl(\"epoch\", ...)\n");
-			return;
-		}
-		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
-		    "...)\n");
-		abort();
-	}
+	bsz = sizeof(bool);
+	usz = sizeof(unsigned);
+	ssz = sizeof(size_t);
+	sssz = sizeof(ssize_t);
+	cpsz = sizeof(const char *);
 
-	if (opts != NULL) {
-		unsigned i;
+	CTL_GET("version", &cpv, const char *);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		"\t\t\"version\": \"%s\",\n", cpv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
 
-		for (i = 0; opts[i] != '\0'; i++) {
-			switch (opts[i]) {
-			case 'g':
-				general = false;
-				break;
-			case 'm':
-				merged = false;
-				break;
-			case 'a':
-				unmerged = false;
-				break;
-			case 'b':
-				bins = false;
-				break;
-			case 'l':
-				large = false;
-				break;
-			case 'h':
-				huge = false;
-				break;
-			default:;
-			}
-		}
+	/* config. */
+#define	CONFIG_WRITE_BOOL_JSON(n, c)					\
+	if (json) {							\
+		CTL_GET("config."#n, &bv, bool);			\
+		malloc_cprintf(write_cb, cbopaque,			\
+		    "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false",	\
+		    (c));						\
 	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "___ Begin jemalloc statistics ___\n");
-	if (general) {
-		const char *cpv;
-		bool bv;
-		unsigned uv;
-		ssize_t ssv;
-		size_t sv, bsz, usz, ssz, sssz, cpsz;
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"config\": {\n");
+	}
 
-		bsz = sizeof(bool);
-		usz = sizeof(unsigned);
-		ssz = sizeof(size_t);
-		sssz = sizeof(ssize_t);
-		cpsz = sizeof(const char *);
+	CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",")
 
-		CTL_GET("version", &cpv, const char *);
-		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
-		CTL_GET("config.debug", &bv, bool);
+	CTL_GET("config.debug", &bv, bool);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"debug\": %s,\n", bv ? "true" : "false");
+	} else {
 		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
 		    bv ? "enabled" : "disabled");
+	}
+
+	CONFIG_WRITE_BOOL_JSON(fill, ",")
+	CONFIG_WRITE_BOOL_JSON(lazy_lock, ",")
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"malloc_conf\": \"%s\",\n",
+		    config_malloc_conf);
+	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
+	}
+
+	CONFIG_WRITE_BOOL_JSON(munmap, ",")
+	CONFIG_WRITE_BOOL_JSON(prof, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
+	CONFIG_WRITE_BOOL_JSON(stats, ",")
+	CONFIG_WRITE_BOOL_JSON(tcache, ",")
+	CONFIG_WRITE_BOOL_JSON(tls, ",")
+	CONFIG_WRITE_BOOL_JSON(utrace, ",")
+	CONFIG_WRITE_BOOL_JSON(valgrind, ",")
+	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
 
-#define	OPT_WRITE_BOOL(n)						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) {	\
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+#undef CONFIG_WRITE_BOOL_JSON
+
+	/* opt. */
+#define	OPT_WRITE_BOOL(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %s\n", bv ? "true" : "false");	\
-		}
-#define	OPT_WRITE_BOOL_MUTABLE(n, m) {					\
-		bool bv2;						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {		\
+		}							\
+	}
+#define	OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
+	bool bv2;							\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {			\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
 			    : "false", bv2 ? "true" : "false");		\
 		}							\
+	}								\
 }
-#define	OPT_WRITE_UNSIGNED(n)						\
-		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
+#define	OPT_WRITE_UNSIGNED(n, c)					\
+	if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %u%s\n", uv, (c));		\
+		} else {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			"  opt."#n": %u\n", uv);			\
-		}
-#define	OPT_WRITE_SIZE_T(n)						\
-		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\
+		}							\
+	}
+#define	OPT_WRITE_SIZE_T(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zu%s\n", sv, (c));	\
+		} else {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			"  opt."#n": %zu\n", sv);			\
-		}
-#define	OPT_WRITE_SSIZE_T(n)						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) {	\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %zd\n", ssv);			\
-		}
-#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m) {				\
-		ssize_t ssv2;						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {	\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
+	ssize_t ssv2;							\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {		\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %zd ("#m": %zd)\n",		\
 			    ssv, ssv2);					\
 		}							\
+	}								\
 }
-#define	OPT_WRITE_CHAR_P(n)						\
-		if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) {	\
+#define	OPT_WRITE_CHAR_P(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c));	\
+		} else {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": \"%s\"\n", cpv);		\
-		}
+		}							\
+	}
 
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"opt\": {\n");
+	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "Run-time option settings:\n");
-		OPT_WRITE_BOOL(abort)
-		OPT_WRITE_SIZE_T(lg_chunk)
-		OPT_WRITE_CHAR_P(dss)
-		OPT_WRITE_UNSIGNED(narenas)
-		OPT_WRITE_CHAR_P(purge)
-		if (opt_purge == purge_mode_ratio) {
-			OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
-			    arenas.lg_dirty_mult)
-		}
-		if (opt_purge == purge_mode_decay)
-			OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
-		OPT_WRITE_BOOL(stats_print)
-		OPT_WRITE_CHAR_P(junk)
-		OPT_WRITE_SIZE_T(quarantine)
-		OPT_WRITE_BOOL(redzone)
-		OPT_WRITE_BOOL(zero)
-		OPT_WRITE_BOOL(utrace)
-		OPT_WRITE_BOOL(valgrind)
-		OPT_WRITE_BOOL(xmalloc)
-		OPT_WRITE_BOOL(tcache)
-		OPT_WRITE_SSIZE_T(lg_tcache_max)
-		OPT_WRITE_BOOL(prof)
-		OPT_WRITE_CHAR_P(prof_prefix)
-		OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active)
-		OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init,
-		    prof.thread_active_init)
-		OPT_WRITE_SSIZE_T(lg_prof_sample)
-		OPT_WRITE_BOOL(prof_accum)
-		OPT_WRITE_SSIZE_T(lg_prof_interval)
-		OPT_WRITE_BOOL(prof_gdump)
-		OPT_WRITE_BOOL(prof_final)
-		OPT_WRITE_BOOL(prof_leak)
+	}
+	OPT_WRITE_BOOL(abort, ",")
+	OPT_WRITE_SIZE_T(lg_chunk, ",")
+	OPT_WRITE_CHAR_P(dss, ",")
+	OPT_WRITE_UNSIGNED(narenas, ",")
+	OPT_WRITE_CHAR_P(purge, ",")
+	if (json || opt_purge == purge_mode_ratio) {
+		OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
+		    arenas.lg_dirty_mult, ",")
+	}
+	if (json || opt_purge == purge_mode_decay) {
+		OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
+	}
+	OPT_WRITE_CHAR_P(junk, ",")
+	OPT_WRITE_SIZE_T(quarantine, ",")
+	OPT_WRITE_BOOL(redzone, ",")
+	OPT_WRITE_BOOL(zero, ",")
+	OPT_WRITE_BOOL(utrace, ",")
+	OPT_WRITE_BOOL(xmalloc, ",")
+	OPT_WRITE_BOOL(tcache, ",")
+	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
+	OPT_WRITE_BOOL(prof, ",")
+	OPT_WRITE_CHAR_P(prof_prefix, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init,
+	    ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",")
+	OPT_WRITE_BOOL(prof_accum, ",")
+	OPT_WRITE_SSIZE_T(lg_prof_interval, ",")
+	OPT_WRITE_BOOL(prof_gdump, ",")
+	OPT_WRITE_BOOL(prof_final, ",")
+	OPT_WRITE_BOOL(prof_leak, ",")
+	/*
+	 * stats_print is always emitted, so as long as stats_print comes last
+	 * it's safe to unconditionally omit the comma here (rather than having
+	 * to conditionally omit it elsewhere depending on configuration).
+	 */
+	OPT_WRITE_BOOL(stats_print, "")
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
 
 #undef OPT_WRITE_BOOL
 #undef OPT_WRITE_BOOL_MUTABLE
@@ -539,76 +769,215 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #undef OPT_WRITE_SSIZE_T
 #undef OPT_WRITE_CHAR_P
 
-		malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
+	/* arenas. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"arenas\": {\n");
+	}
 
-		CTL_GET("arenas.narenas", &uv, unsigned);
+	CTL_GET("arenas.narenas", &uv, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"narenas\": %u,\n", uv);
+	} else
 		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
 
-		malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
-		    sizeof(void *));
+	CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lg_dirty_mult\": %zd,\n", ssv);
+	} else if (opt_purge == purge_mode_ratio) {
+		if (ssv >= 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: "
+			    "%u:1\n", (1U << ssv));
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: "
+			    "N/A\n");
+		}
+	}
+	CTL_GET("arenas.decay_time", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"decay_time\": %zd,\n", ssv);
+	} else if (opt_purge == purge_mode_decay) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Unused dirty page decay time: %zd%s\n",
+		    ssv, (ssv < 0) ? " (no decay)" : "");
+	}
 
-		CTL_GET("arenas.quantum", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n",
-		    sv);
+	CTL_GET("arenas.quantum", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"quantum\": %zu,\n", sv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
 
-		CTL_GET("arenas.page", &sv, size_t);
+	CTL_GET("arenas.page", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"page\": %zu,\n", sv);
+	} else
 		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
 
-		CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
-		if (opt_purge == purge_mode_ratio) {
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "%u:1\n", (1U << ssv));
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "N/A\n");
-			}
+	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"tcache_max\": %zu,\n", sv);
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Maximum thread-cached size class: %zu\n", sv);
 		}
-		CTL_GET("arenas.decay_time", &ssv, ssize_t);
-		if (opt_purge == purge_mode_decay) {
+	}
+
+	if (json) {
+		unsigned nbins, nlruns, nhchunks, i;
+
+		CTL_GET("arenas.nbins", &nbins, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nbins\": %u,\n", nbins);
+
+		CTL_GET("arenas.nhbins", &uv, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nhbins\": %u,\n", uv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"bin\": [\n");
+		for (i = 0; i < nbins; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
 			malloc_cprintf(write_cb, cbopaque,
-			    "Unused dirty page decay time: %zd%s\n",
-			    ssv, (ssv < 0) ? " (no decay)" : "");
+			    "\t\t\t\t\t\"size\": %zu,\n", sv);
+
+			CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v);
+
+			CTL_M2_GET("arenas.bin.0.run_size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"run_size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : "");
 		}
-		if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t],\n");
+
+		CTL_GET("arenas.nlruns", &nlruns, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nlruns\": %u,\n", nlruns);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lrun\": [\n");
+		for (i = 0; i < nlruns; i++) {
 			malloc_cprintf(write_cb, cbopaque,
-			    "Maximum thread-cached size class: %zu\n", sv);
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.lrun.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nlruns) ? "," : "");
 		}
-		if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) {
-			CTL_GET("prof.lg_sample", &sv, size_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t],\n");
+
+		CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nhchunks\": %u,\n", nhchunks);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"hchunk\": [\n");
+		for (i = 0; i < nhchunks; i++) {
 			malloc_cprintf(write_cb, cbopaque,
-			    "Average profile sample interval: %"FMTu64
-			    " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
+			    "\t\t\t\t{\n");
 
-			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: %"FMTu64
-				    " (2^%zd)\n",
-				    (((uint64_t)1U) << ssv), ssv);
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: N/A\n");
-			}
+			CTL_M2_GET("arenas.hchunk.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nhchunks) ? "," : "");
 		}
-		CTL_GET("opt.lg_chunk", &sv, size_t);
 		malloc_cprintf(write_cb, cbopaque,
-		    "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv);
+		    "\t\t\t]\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
 	}
 
-	if (config_stats) {
-		size_t *cactive;
-		size_t allocated, active, metadata, resident, mapped, retained;
+	/* prof. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"prof\": {\n");
+
+		CTL_GET("prof.thread_active_init", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" :
+		    "false");
+
+		CTL_GET("prof.active", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.gdump", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.interval", &u64v, uint64_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"interval\": %"FMTu64",\n", u64v);
+
+		CTL_GET("prof.lg_sample", &ssv, ssize_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lg_sample\": %zd\n", ssv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (config_stats || merged || unmerged) ? "," :
+		    "");
+	}
+}
+
+static void
+stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged, bool bins, bool large, bool huge)
+{
+	size_t *cactive;
+	size_t allocated, active, metadata, resident, mapped, retained;
 
-		CTL_GET("stats.cactive", &cactive, size_t *);
-		CTL_GET("stats.allocated", &allocated, size_t);
-		CTL_GET("stats.active", &active, size_t);
-		CTL_GET("stats.metadata", &metadata, size_t);
-		CTL_GET("stats.resident", &resident, size_t);
-		CTL_GET("stats.mapped", &mapped, size_t);
-		CTL_GET("stats.retained", &retained, size_t);
+	CTL_GET("stats.cactive", &cactive, size_t *);
+	CTL_GET("stats.allocated", &allocated, size_t);
+	CTL_GET("stats.active", &active, size_t);
+	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.resident", &resident, size_t);
+	CTL_GET("stats.mapped", &mapped, size_t);
+	CTL_GET("stats.retained", &retained, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"stats\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"cactive\": %zu,\n", atomic_read_z(cactive));
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"allocated\": %zu,\n", allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %zu,\n", active);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"metadata\": %zu,\n", metadata);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"resident\": %zu,\n", resident);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"mapped\": %zu,\n", mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"retained\": %zu\n", retained);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (merged || unmerged) ? "," : "");
+	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
@@ -616,61 +985,169 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "Current active ceiling: %zu\n",
 		    atomic_read_z(cactive));
+	}
 
-		if (merged) {
-			unsigned narenas;
-
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i, ninitialized;
-
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
-				for (i = ninitialized = 0; i < narenas; i++) {
-					if (initialized[i])
-						ninitialized++;
-				}
+	if (merged || unmerged) {
+		unsigned narenas;
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\"stats.arenas\": {\n");
+		}
+
+		CTL_GET("arenas.narenas", &narenas, unsigned);
+		{
+			VARIABLE_ARRAY(bool, initialized, narenas);
+			size_t isz;
+			unsigned i, j, ninitialized;
 
-				if (ninitialized > 1 || !unmerged) {
-					/* Print merged arena stats. */
+			isz = sizeof(bool) * narenas;
+			xmallctl("arenas.initialized", (void *)initialized,
+			    &isz, NULL, 0);
+			for (i = ninitialized = 0; i < narenas; i++) {
+				if (initialized[i])
+					ninitialized++;
+			}
+
+			/* Merged stats. */
+			if (merged && (ninitialized > 1 || !unmerged)) {
+				/* Print merged arena stats. */
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t\"merged\": {\n");
+				} else {
 					malloc_cprintf(write_cb, cbopaque,
 					    "\nMerged arenas stats:\n");
+				}
+				stats_arena_print(write_cb, cbopaque, json,
+				    narenas, bins, large, huge);
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t}%s\n", (ninitialized > 1) ?
+					    "," : "");
+				}
+			}
+
+			/* Unmerged stats. */
+			for (i = j = 0; i < narenas; i++) {
+				if (initialized[i]) {
+					if (json) {
+						j++;
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t\"%u\": {\n", i);
+					} else {
+						malloc_cprintf(write_cb,
+						    cbopaque, "\narenas[%u]:\n",
+						    i);
+					}
 					stats_arena_print(write_cb, cbopaque,
-					    narenas, bins, large, huge);
+					    json, i, bins, large, huge);
+					if (json) {
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t}%s\n", (j <
+						    ninitialized) ? "," : "");
+					}
 				}
 			}
 		}
 
-		if (unmerged) {
-			unsigned narenas;
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t}\n");
+		}
+	}
+}
 
-			/* Print stats for each arena. */
+void
+stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *opts)
+{
+	int err;
+	uint64_t epoch;
+	size_t u64sz;
+	bool json = false;
+	bool general = true;
+	bool merged = true;
+	bool unmerged = true;
+	bool bins = true;
+	bool large = true;
+	bool huge = true;
 
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i;
+	/*
+	 * Refresh stats, in case mallctl() was called by the application.
+	 *
+	 * Check for OOM here, since refreshing the ctl cache can trigger
+	 * allocation.  In practice, none of the subsequent mallctl()-related
+	 * calls in this function will cause OOM if this one succeeds.
+	 * */
+	epoch = 1;
+	u64sz = sizeof(uint64_t);
+	err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+	if (err != 0) {
+		if (err == EAGAIN) {
+			malloc_write("<jemalloc>: Memory allocation failure in "
+			    "mallctl(\"epoch\", ...)\n");
+			return;
+		}
+		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		    "...)\n");
+		abort();
+	}
 
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
+	if (opts != NULL) {
+		unsigned i;
 
-				for (i = 0; i < narenas; i++) {
-					if (initialized[i]) {
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\narenas[%u]:\n", i);
-						stats_arena_print(write_cb,
-						    cbopaque, i, bins, large,
-						    huge);
-					}
-				}
+		for (i = 0; opts[i] != '\0'; i++) {
+			switch (opts[i]) {
+			case 'J':
+				json = true;
+				break;
+			case 'g':
+				general = false;
+				break;
+			case 'm':
+				merged = false;
+				break;
+			case 'a':
+				unmerged = false;
+				break;
+			case 'b':
+				bins = false;
+				break;
+			case 'l':
+				large = false;
+				break;
+			case 'h':
+				huge = false;
+				break;
+			default:;
 			}
 		}
 	}
-	malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "{\n"
+		    "\t\"jemalloc\": {\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "___ Begin jemalloc statistics ___\n");
+	}
+
+	if (general)
+		stats_general_print(write_cb, cbopaque, json, merged, unmerged);
+	if (config_stats) {
+		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
+		    bins, large, huge);
+	}
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t}\n"
+		    "}\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "--- End jemalloc statistics ---\n");
+	}
 }
-- 
cgit v0.12


From f19bedb04cd3c2f603569ca9a40c8c66b05c3a90 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 1 Nov 2016 13:25:42 -0700
Subject: Use <quote>...</quote> rather than &ldquo;...&rdquo; or "..." in XML.

---
 doc/jemalloc.xml.in | 39 ++++++++++++++++++++-------------------
 doc/stylesheet.xsl  |  2 +-
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 30b2bdf..6da59f4 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -375,7 +375,7 @@
       <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
-      to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
+      to a <quote>Management Information Base</quote> (MIB) that can be passed
       repeatedly to <function>mallctlbymib()</function>.  Upon
       successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
@@ -456,7 +456,7 @@ for (i = 0; i < nbins; i++) {
 
     <para>The string specified via <option>--with-malloc-conf</option>, the
     string pointed to by the global variable <varname>malloc_conf</varname>, the
-    &ldquo;name&rdquo; of the file referenced by the symbolic link named
+    <quote>name</quote> of the file referenced by the symbolic link named
     <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
@@ -918,12 +918,12 @@ for (i = 0; i < nbins; i++) {
         settings are supported if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;, and
-        &ldquo;secondary&rdquo;; otherwise only &ldquo;disabled&rdquo; is
-        supported.  The default is &ldquo;secondary&rdquo; if
+        system: <quote>disabled</quote>, <quote>primary</quote>, and
+        <quote>secondary</quote>; otherwise only <quote>disabled</quote> is
+        supported.  The default is <quote>secondary</quote> if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system; &ldquo;disabled&rdquo; otherwise.
+        system; <quote>disabled</quote> otherwise.
         </para></listitem>
       </varlistentry>
 
@@ -1039,15 +1039,16 @@ for (i = 0; i < nbins; i++) {
           <literal>r-</literal>
           [<option>--enable-fill</option>]
         </term>
-        <listitem><para>Junk filling.  If set to "alloc", each byte of
-        uninitialized allocated memory will be initialized to
-        <literal>0xa5</literal>.  If set to "free", all deallocated memory will
-        be initialized to <literal>0x5a</literal>.  If set to "true", both
-        allocated and deallocated memory will be initialized, and if set to
-        "false", junk filling be disabled entirely.  This is intended for
-        debugging and will impact performance negatively.  This option is
-        "false" by default unless <option>--enable-debug</option> is specified
-        during configuration, in which case it is "true" by default unless
+        <listitem><para>Junk filling.  If set to <quote>alloc</quote>, each byte
+        of uninitialized allocated memory will be initialized to
+        <literal>0xa5</literal>.  If set to <quote>free</quote>, all deallocated
+        memory will be initialized to <literal>0x5a</literal>.  If set to
+        <quote>true</quote>, both allocated and deallocated memory will be
+        initialized, and if set to <quote>false</quote>, junk filling be
+        disabled entirely.  This is intended for debugging and will impact
+        performance negatively.  This option is <quote>false</quote> by default
+        unless <option>--enable-debug</option> is specified during
+        configuration, in which case it is <quote>true</quote> by default unless
         running inside <ulink
         url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
       </varlistentry>
@@ -2735,7 +2736,7 @@ MAPPED_LIBRARIES:
     of run-time assertions that catch application errors such as double-free,
     write-after-free, etc.</para>
 
-    <para>Programs often accidentally depend on &ldquo;uninitialized&rdquo;
+    <para>Programs often accidentally depend on <quote>uninitialized</quote>
     memory actually being filled with zero bytes.  Junk filling
     (see the <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
     option) tends to expose such bugs in the form of obviously incorrect
@@ -2772,7 +2773,7 @@ MAPPED_LIBRARIES:
     this function is likely to result in a crash or deadlock.</para>
 
     <para>All messages are prefixed by
-    &ldquo;<computeroutput>&lt;jemalloc&gt;: </computeroutput>&rdquo;.</para>
+    <quote><computeroutput>&lt;jemalloc&gt;: </computeroutput></quote>.</para>
   </refsect1>
   <refsect1 id="return_values">
     <title>RETURN VALUES</title>
@@ -2957,9 +2958,9 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
     <function>calloc()</function>,
     <function>realloc()</function>, and
     <function>free()</function> functions conform to ISO/IEC
-    9899:1990 (&ldquo;ISO C90&rdquo;).</para>
+    9899:1990 (<quote>ISO C90</quote>).</para>
 
     <para>The <function>posix_memalign()</function> function conforms
-    to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+    to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
   </refsect1>
 </refentry>
diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl
index bc8bc2a..619365d 100644
--- a/doc/stylesheet.xsl
+++ b/doc/stylesheet.xsl
@@ -5,6 +5,6 @@
     <xsl:call-template name="inline.monoseq"/>
   </xsl:template>
   <xsl:template match="mallctl">
-    "<xsl:call-template name="inline.monoseq"/>"
+    <quote><xsl:call-template name="inline.monoseq"/></quote>
   </xsl:template>
 </xsl:stylesheet>
-- 
cgit v0.12


From 07ee4c5ff4c60e8e04eb452e2ef154c47fa118a4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 08:54:07 -0700
Subject: Force no lazy-lock on Windows.

Monitoring thread creation is unimplemented for Windows, which means
lazy-lock cannot function correctly.

This resolves #310.
---
 configure.ac | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index 40681d1..4bdd66a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1434,9 +1434,17 @@ fi
 ],
 [enable_lazy_lock=""]
 )
-if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
-  AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
-  enable_lazy_lock="1"
+if test "x${enable_lazy_lock}" = "x" ; then
+  if test "x${force_lazy_lock}" = "x1" ; then
+    AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
+    enable_lazy_lock="1"
+  else
+    enable_lazy_lock="0"
+  fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+  AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented])
+  enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
@@ -1447,8 +1455,6 @@ if test "x$enable_lazy_lock" = "x1" ; then
       ])
   fi
   AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
-else
-  enable_lazy_lock="0"
 fi
 AC_SUBST([enable_lazy_lock])
 
-- 
cgit v0.12


From 31db315f17a48380d11d5dd67dde154adf571573 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:05:19 -0700
Subject: Call _exit(2) rather than exit(3) in forked child.

_exit(2) is async-signal-safe, whereas exit(3) is not.
---
 test/unit/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index 46c815e..c530797 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -26,7 +26,7 @@ TEST_BEGIN(test_fork)
 		test_fail("Unexpected fork() failure");
 	} else if (pid == 0) {
 		/* Child. */
-		exit(0);
+		_exit(0);
 	} else {
 		int status;
 
-- 
cgit v0.12


From a99e0fa2d21917cbcefd8b7a9a2128ae0399d88f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:06:40 -0700
Subject: Fix/refactor zone allocator integration code.

Fix zone_force_unlock() to reinitialize, rather than unlocking mutexes,
since OS X 10.12 cannot tolerate a child unlocking mutexes that were
locked by its parent.

Refactor; this was a side effect of experimenting with zone
{de,re}registration during fork(2).
---
 include/jemalloc/internal/private_symbols.txt |   2 +-
 src/zone.c                                    | 192 ++++++++++++++------------
 2 files changed, 108 insertions(+), 86 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 09ff832..1749952 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -461,7 +461,6 @@ quarantine
 quarantine_alloc_hook
 quarantine_alloc_hook_work
 quarantine_cleanup
-register_zone
 rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
@@ -614,3 +613,4 @@ witness_postfork_parent
 witness_prefork
 witness_unlock
 witnesses_cleanup
+zone_register
diff --git a/src/zone.c b/src/zone.c
index 89a3062..0571920 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -4,7 +4,7 @@
 #endif
 
 /*
- * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * The malloc_default_purgeable_zone() function is only available on >= 10.6.
  * We need to check whether it is present at runtime, thus the weak_import.
  */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
@@ -13,8 +13,9 @@ JEMALLOC_ATTR(weak_import);
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t zone;
-static struct malloc_introspection_t zone_introspect;
+static malloc_zone_t *default_zone, *purgeable_zone;
+static malloc_zone_t jemalloc_zone;
+static struct malloc_introspection_t jemalloc_zone_introspect;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -164,12 +165,68 @@ static void
 zone_force_unlock(malloc_zone_t *zone)
 {
 
+	/*
+	 * Call jemalloc_postfork_child() rather than
+	 * jemalloc_postfork_parent(), because this function is executed by both
+	 * parent and child.  The parent can tolerate having state
+	 * reinitialized, but the child cannot unlock mutexes that were locked
+	 * by the parent.
+	 */
 	if (isthreaded)
-		jemalloc_postfork_parent();
+		jemalloc_postfork_child();
+}
+
+static void
+zone_init(void)
+{
+
+	jemalloc_zone.size = (void *)zone_size;
+	jemalloc_zone.malloc = (void *)zone_malloc;
+	jemalloc_zone.calloc = (void *)zone_calloc;
+	jemalloc_zone.valloc = (void *)zone_valloc;
+	jemalloc_zone.free = (void *)zone_free;
+	jemalloc_zone.realloc = (void *)zone_realloc;
+	jemalloc_zone.destroy = (void *)zone_destroy;
+	jemalloc_zone.zone_name = "jemalloc_zone";
+	jemalloc_zone.batch_malloc = NULL;
+	jemalloc_zone.batch_free = NULL;
+	jemalloc_zone.introspect = &jemalloc_zone_introspect;
+	jemalloc_zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 5)
+	jemalloc_zone.memalign = zone_memalign;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	jemalloc_zone.free_definite_size = zone_free_definite_size;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 8)
+	jemalloc_zone.pressure_relief = NULL;
+#endif
+
+	jemalloc_zone_introspect.enumerator = NULL;
+	jemalloc_zone_introspect.good_size = (void *)zone_good_size;
+	jemalloc_zone_introspect.check = NULL;
+	jemalloc_zone_introspect.print = NULL;
+	jemalloc_zone_introspect.log = NULL;
+	jemalloc_zone_introspect.force_lock = (void *)zone_force_lock;
+	jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock;
+	jemalloc_zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	jemalloc_zone_introspect.zone_locked = NULL;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 7)
+	jemalloc_zone_introspect.enable_discharge_checking = NULL;
+	jemalloc_zone_introspect.disable_discharge_checking = NULL;
+	jemalloc_zone_introspect.discharge = NULL;
+#  ifdef __BLOCKS__
+	jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
+#  else
+	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#  endif
+#endif
 }
 
 static malloc_zone_t *
-get_default_zone(void)
+zone_default_get(void)
 {
 	malloc_zone_t **zones = NULL;
 	unsigned int num_zones = 0;
@@ -183,7 +240,7 @@ get_default_zone(void)
 	 * zone is the default.  So get the list of zones to get the first one,
 	 * instead of relying on malloc_default_zone.
 	 */
-        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
 	    (vm_address_t**)&zones, &num_zones)) {
 		/*
 		 * Reset the value in case the failure happened after it was
@@ -198,85 +255,13 @@ get_default_zone(void)
 	return (malloc_default_zone());
 }
 
-JEMALLOC_ATTR(constructor)
-void
-register_zone(void)
+/* As written, this function can only promote jemalloc_zone. */
+static void
+zone_promote(void)
 {
-
-	/*
-	 * If something else replaced the system default zone allocator, don't
-	 * register jemalloc's.
-	 */
-	malloc_zone_t *default_zone = get_default_zone();
-	malloc_zone_t *purgeable_zone = NULL;
-	if (!default_zone->zone_name ||
-	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
-		return;
-	}
-
-	zone.size = (void *)zone_size;
-	zone.malloc = (void *)zone_malloc;
-	zone.calloc = (void *)zone_calloc;
-	zone.valloc = (void *)zone_valloc;
-	zone.free = (void *)zone_free;
-	zone.realloc = (void *)zone_realloc;
-	zone.destroy = (void *)zone_destroy;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-	zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 5)
-	zone.memalign = zone_memalign;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone.free_definite_size = zone_free_definite_size;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 8)
-	zone.pressure_relief = NULL;
-#endif
-
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = (void *)zone_good_size;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = (void *)zone_force_lock;
-	zone_introspect.force_unlock = (void *)zone_force_unlock;
-	zone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone_introspect.zone_locked = NULL;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 7)
-	zone_introspect.enable_discharge_checking = NULL;
-	zone_introspect.disable_discharge_checking = NULL;
-	zone_introspect.discharge = NULL;
-#ifdef __BLOCKS__
-	zone_introspect.enumerate_discharged_pointers = NULL;
-#else
-	zone_introspect.enumerate_unavailable_without_blocks = NULL;
-#endif
-#endif
-
-	/*
-	 * The default purgeable zone is created lazily by OSX's libc.  It uses
-	 * the default zone when it is created for "small" allocations
-	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
-	 * obviously fails when the default zone is the jemalloc zone, so
-	 * malloc_default_purgeable_zone is called beforehand so that the
-	 * default purgeable zone is created when the default zone is still
-	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
-	 * to check for the existence of malloc_default_purgeable_zone() at
-	 * run time.
-	 */
-	if (malloc_default_purgeable_zone != NULL)
-		purgeable_zone = malloc_default_purgeable_zone();
-
-	/* Register the custom zone.  At this point it won't be the default. */
-	malloc_zone_register(&zone);
+	malloc_zone_t *zone;
 
 	do {
-		default_zone = malloc_default_zone();
 		/*
 		 * Unregister and reregister the default zone.  On OSX >= 10.6,
 		 * unregistering takes the last registered zone and places it
@@ -287,6 +272,7 @@ register_zone(void)
 		 */
 		malloc_zone_unregister(default_zone);
 		malloc_zone_register(default_zone);
+
 		/*
 		 * On OSX 10.6, having the default purgeable zone appear before
 		 * the default zone makes some things crash because it thinks it
@@ -298,11 +284,47 @@ register_zone(void)
 		 * above, i.e. the default zone.  Registering it again then puts
 		 * it at the end, obviously after the default zone.
 		 */
-		if (purgeable_zone) {
+		if (purgeable_zone != NULL) {
 			malloc_zone_unregister(purgeable_zone);
 			malloc_zone_register(purgeable_zone);
 		}
 
-		default_zone = get_default_zone();
-	} while (default_zone != &zone);
+		zone = zone_default_get();
+	} while (zone != &jemalloc_zone);
+}
+
+JEMALLOC_ATTR(constructor)
+void
+zone_register(void)
+{
+
+	/*
+	 * If something else replaced the system default zone allocator, don't
+	 * register jemalloc's.
+	 */
+	default_zone = zone_default_get();
+	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
+	    "DefaultMallocZone") != 0)
+		return;
+
+	/*
+	 * The default purgeable zone is created lazily by OSX's libc.  It uses
+	 * the default zone when it is created for "small" allocations
+	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
+	 * obviously fails when the default zone is the jemalloc zone, so
+	 * malloc_default_purgeable_zone() is called beforehand so that the
+	 * default purgeable zone is created when the default zone is still
+	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
+	 * to check for the existence of malloc_default_purgeable_zone() at
+	 * run time.
+	 */
+	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
+	    malloc_default_purgeable_zone();
+
+	/* Register the custom zone.  At this point it won't be the default. */
+	zone_init();
+	malloc_zone_register(&jemalloc_zone);
+
+	/* Promote the custom zone to be default. */
+	zone_promote();
 }
-- 
cgit v0.12


From 3f2b8d9cfaebdf0565da3f1ea6e8af11874eae8f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:09:45 -0700
Subject: Add os_unfair_lock support.

OS X 10.12 deprecated OSSpinLock; os_unfair_lock is the recommended
replacement.
---
 configure.ac                                          | 14 ++++++++++++++
 include/jemalloc/internal/jemalloc_internal_decls.h   |  3 +++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 include/jemalloc/internal/mutex.h                     |  9 +++++++++
 src/mutex.c                                           |  2 ++
 test/include/test/mtx.h                               |  2 ++
 test/src/mtx.c                                        |  7 +++++++
 7 files changed, 42 insertions(+)

diff --git a/configure.ac b/configure.ac
index 4bdd66a..db51556 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1643,6 +1643,20 @@ if test "x${je_cv_builtin_clz}" = "xyes" ; then
 fi
 
 dnl ============================================================================
+dnl Check for os_unfair_lock operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+#include <os/lock.h>
+], [
+	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+	os_unfair_lock_lock(&lock);
+	os_unfair_lock_unlock(&lock);
+], [je_cv_os_unfair_lock])
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+fi
+
+dnl ============================================================================
 dnl Check for spinlock(3) operations as provided on Darwin.
 
 JE_COMPILABLE([Darwin OSSpin*()], [
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 1d7f207..c907d91 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -17,6 +17,9 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_OS_UNFAIR_LOCK
+#    include <os/lock.h>
+#  endif
 #  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
 #    include <sched.h>
 #  endif
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6824ab7..385801b 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -61,6 +61,11 @@
 #undef JEMALLOC_HAVE_MADVISE
 
 /*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#undef JEMALLOC_OS_UNFAIR_LOCK
+
+/*
  * Defined if OSSpin*() functions are available, as provided by Darwin, and
  * documented in the spinlock(3) manual page.
  */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 5221799..b442d2d 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -5,6 +5,9 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
 #  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -35,6 +38,8 @@ struct malloc_mutex_s {
 #  else
 	CRITICAL_SECTION	lock;
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -88,6 +93,8 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		EnterCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_lock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockLock(&mutex->lock);
 #else
@@ -109,6 +116,8 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		LeaveCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_unlock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockUnlock(&mutex->lock);
 #else
diff --git a/src/mutex.c b/src/mutex.c
index a1fac34..6333e73 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -80,6 +80,8 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 	    _CRT_SPINCOUNT))
 		return (true);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mutex->lock = 0;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h
index bbe822f..58afbc3 100644
--- a/test/include/test/mtx.h
+++ b/test/include/test/mtx.h
@@ -8,6 +8,8 @@
 typedef struct {
 #ifdef _WIN32
 	CRITICAL_SECTION	lock;
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #else
diff --git a/test/src/mtx.c b/test/src/mtx.c
index 73bd02f..8a5dfdd 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -11,6 +11,8 @@ mtx_init(mtx_t *mtx)
 #ifdef _WIN32
 	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT))
 		return (true);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mtx->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mtx->lock = 0;
 #else
@@ -33,6 +35,7 @@ mtx_fini(mtx_t *mtx)
 {
 
 #ifdef _WIN32
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #elif (defined(JEMALLOC_OSSPIN))
 #else
 	pthread_mutex_destroy(&mtx->lock);
@@ -45,6 +48,8 @@ mtx_lock(mtx_t *mtx)
 
 #ifdef _WIN32
 	EnterCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_lock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockLock(&mtx->lock);
 #else
@@ -58,6 +63,8 @@ mtx_unlock(mtx_t *mtx)
 
 #ifdef _WIN32
 	LeaveCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_unlock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockUnlock(&mtx->lock);
 #else
-- 
cgit v0.12


From da206df10bc51f547d05563ebf17291f3f9471b0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:18:33 -0700
Subject: Do not use syscall(2) on OS X 10.12 (deprecated).

---
 configure.ac                                          | 17 +++++++++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 src/pages.c                                           |  6 +++---
 src/util.c                                            |  2 +-
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index db51556..65b2f15 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1380,6 +1380,23 @@ if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
 fi
 
+dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS X
+dnl 10.12's deprecation warning prevents use.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([syscall(2)], [
+#define _GNU_SOURCE
+#include <sys/syscall.h>
+#include <unistd.h>
+], [
+	syscall(SYS_write, 2, "hello", 5);
+],
+              [je_cv_syscall])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x$je_cv_syscall" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ])
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 385801b..9b3dca5 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -71,6 +71,9 @@
  */
 #undef JEMALLOC_OSSPIN
 
+/* Defined if syscall(2) is available. */
+#undef JEMALLOC_HAVE_SYSCALL
+
 /*
  * Defined if secure_getenv(3) is available.
  */
diff --git a/src/pages.c b/src/pages.c
index 84e2216..647952a 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -219,7 +219,7 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
-#ifdef SYS_open
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
 #else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
@@ -227,13 +227,13 @@ os_overcommits_proc(void)
 	if (fd == -1)
 		return (false); /* Error. */
 
-#ifdef SYS_read
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read)
 	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
 #else
 	nread = read(fd, &buf, sizeof(buf));
 #endif
 
-#ifdef SYS_close
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close)
 	syscall(SYS_close, fd);
 #else
 	close(fd);
diff --git a/src/util.c b/src/util.c
index a1c4a2a..7905267 100644
--- a/src/util.c
+++ b/src/util.c
@@ -49,7 +49,7 @@ static void
 wrtmessage(void *cbopaque, const char *s)
 {
 
-#ifdef SYS_write
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary
-- 
cgit v0.12


From eca3bc01316bbd06ebc5e42f3fe9146ef9552754 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:50:44 -0700
Subject: Fix sycall(2) configure test for Linux.

---
 configure.ac | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 65b2f15..3a4f150 100644
--- a/configure.ac
+++ b/configure.ac
@@ -379,7 +379,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	dnl secure_getenv() is exposed by _GNU_SOURCE.
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
@@ -1385,7 +1385,6 @@ dnl 10.12's deprecation warning prevents use.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
 JE_COMPILABLE([syscall(2)], [
-#define _GNU_SOURCE
 #include <sys/syscall.h>
 #include <unistd.h>
 ], [
-- 
cgit v0.12


From 6c56e194b02ec3888ed35c111d01ef2e62c6c808 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 2 Nov 2016 18:22:32 -0700
Subject: Check for existance of CPU_COUNT macro before using it.

This resolves #485.
---
 src/jemalloc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8210086..38650ff 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -812,8 +812,10 @@ malloc_ncpus(void)
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
-#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK)
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
 	/*
+	 * glibc >= 2.6 has the CPU_COUNT macro.
+	 *
 	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
 	 * *before* setting up the isspace tables.  Therefore we need a
 	 * different method to get the number of CPUs.
@@ -2053,6 +2055,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 # endif
 
+#ifdef CPU_COUNT
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
@@ -2071,6 +2074,9 @@ int	__posix_memalign(void** r, size_t a, size_t s)
     PREALIAS(je_posix_memalign);
 #undef PREALIAS
 #undef ALIAS
+
+#endif
+
 #endif
 
 /*
-- 
cgit v0.12


From 62de7680cafbbc7abfbd0d5e9fd2ec5dafb54849 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 12 Sep 2016 11:56:24 -0700
Subject: Update project URL.

---
 README              | 2 +-
 doc/jemalloc.xml.in | 2 +-
 jemalloc.pc.in      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README b/README
index 9b268f4..5ff24a9 100644
--- a/README
+++ b/README
@@ -17,4 +17,4 @@ jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://www.canonware.com/jemalloc/
+URL: http://jemalloc.net/
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 6da59f4..3d2e721 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -52,7 +52,7 @@
     <title>LIBRARY</title>
     <para>This manual describes jemalloc @jemalloc_version@.  More information
     can be found at the <ulink
-    url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+    url="http://jemalloc.net/">jemalloc website</ulink>.</para>
   </refsect1>
   <refsynopsisdiv>
     <title>SYNOPSIS</title>
diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index 1a3ad9b..a318e8d 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -6,7 +6,7 @@ install_suffix=@install_suffix@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://www.canonware.com/jemalloc
+URL: http://jemalloc.net/
 Version: @jemalloc_version@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}
-- 
cgit v0.12


From 1ceae2f8cbd803ba6904ac432dbe6fcef324ba21 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:45:01 -0700
Subject: Update ChangeLog for 4.3.0.

---
 ChangeLog | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 532255d..b43a467 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,43 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.3.0 (November 3, 2016)
+
+  This is the first release that passes the test suite for multiple Windows
+  configurations, thanks in large part to @glandium setting up continuous
+  integration via AppVeyor (and Travis CI for Linux and OS X).
+
+  New features:
+  - Add "J" (JSON) support to malloc_stats_print().  (@jasone)
+  - Add Cray compiler support.  (@ronawho)
+
+  Optimizations:
+  - Add/use adaptive spinning for bootstrapping and radix tree node
+    initialization.  (@jasone)
+
+  Bug fixes:
+  - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
+  - Fix and simplify decay-based purging.  (@jasone)
+  - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
+    deadlocks during thread exit.  (@jasone)
+  - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
+    @jasone)
+  - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
+  - Fix a Valgrind integration bug.  (@ronawho)
+  - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
+  - Fix a file descriptor leak on Linux.  This regression was first released in
+    4.2.0.  (@jasone)
+  - Fix static linking of jemalloc with glibc.  (@djwatson)
+  - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
+    works around other libraries' system call wrappers performing reentrant
+    allocation.  (@jasone)
+  - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
+    @jasone)
+  - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
+    non-TLS and Windows configurations.  (@jasone)
+  - Fix malloc_conf overriding to work on Windows.  (@jasone)
+  - Forcibly disable lazy-lock on Windows (was forcibly *enabled*).  (@jasone)
+
 * 4.2.1 (June 8, 2016)
 
   Bug fixes:
-- 
cgit v0.12


From dd3ed23aea5108d6b5d812c9bf6d347bb50dab56 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 14:55:58 -0700
Subject: Update symbol mangling.

---
 include/jemalloc/internal/private_symbols.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 1749952..8972b37 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -401,6 +401,7 @@ pages_unmap
 pind2sz
 pind2sz_compute
 pind2sz_lookup
+pind2sz_tab
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -487,6 +488,8 @@ size2index
 size2index_compute
 size2index_lookup
 size2index_tab
+spin_adaptive
+spin_init
 stats_cactive
 stats_cactive_add
 stats_cactive_get
-- 
cgit v0.12


From e9012630acf897ce7016e427354bb46fbe893fe1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 17:11:01 -0700
Subject: Fix chunk_alloc_cache() to support decommitted allocation.

Fix chunk_alloc_cache() to support decommitted allocation, and use this
ability in arena_chunk_alloc_internal() and arena_stash_dirty(), so that
chunks don't get permanently stuck in a hybrid state.

This resolves #487.
---
 include/jemalloc/internal/chunk.h |  2 +-
 src/arena.c                       | 16 ++++++++++------
 src/chunk.c                       |  8 +++-----
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index e199a03..38c9a01 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -58,7 +58,7 @@ void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool dalloc_node);
+    bool *zero, bool *commit, bool dalloc_node);
 void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
diff --git a/src/arena.c b/src/arena.c
index 7651495..a7fe34a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -30,6 +30,8 @@ unsigned	nhclasses; /* Number of huge size classes. */
  * definition.
  */
 
+static void	arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk);
 static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t ndirty_limit);
 static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
@@ -579,14 +581,13 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
-	    chunksize, zero, true);
+	    chunksize, zero, commit, true);
 	if (chunk != NULL) {
 		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
 			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
-		*commit = true;
 	}
 	if (chunk == NULL) {
 		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
@@ -883,6 +884,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
+	bool commit = true;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -894,7 +896,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
-	    alignment, zero, true);
+	    alignment, zero, &commit, true);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
 		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
@@ -1004,6 +1006,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	bool commit = true;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -1015,7 +1018,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
 	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, true) == NULL);
+	    chunksize, zero, &commit, true) == NULL);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
@@ -1512,7 +1515,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 		if (rdelm == &chunkselm->rd) {
 			extent_node_t *chunkselm_next;
-			bool zero;
+			bool zero, commit;
 			UNUSED void *chunk;
 
 			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
@@ -1526,10 +1529,11 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
+			commit = false;
 			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
-			    false);
+			    &commit, false);
 			assert(chunk == extent_node_addr_get(chunkselm));
 			assert(zero == extent_node_zeroed_get(chunkselm));
 			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
diff --git a/src/chunk.c b/src/chunk.c
index 302b98c..07e26f7 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -385,23 +385,21 @@ chunk_alloc_base(size_t size)
 
 void *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+    bool dalloc_node)
 {
 	void *ret;
-	bool commit;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	commit = true;
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
 	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
-	    new_addr, size, alignment, zero, &commit, dalloc_node);
+	    new_addr, size, alignment, zero, commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
-	assert(commit);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 	return (ret);
-- 
cgit v0.12


From 32896a902bb962a06261d81c9be22e16210692db Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 22:21:34 -0700
Subject: Fix large allocation to search optimal size class heap.

Fix arena_run_alloc_large_helper() to not convert size to usize when
searching for the first best fit via arena_run_first_best_fit().  This
allows the search to consider the optimal quantized size class, so that
e.g. allocating and deallocating 40 KiB in a tight loop can reuse the
same memory.

This regression was nominally caused by
5707d6f952c71baa2f19102479859012982ac821 (Quantize szad trees by size
class.), but it did not commonly cause problems until
8a03cf039cd06f9fa6972711195055d865673966 (Implement cache index
randomization for large allocations.).  These regressions were first
released in 4.0.0.

This resolves #487.
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index a7fe34a..43c3ccf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1059,7 +1059,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 static arena_run_t *
 arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 {
-	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
+	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;
-- 
cgit v0.12


From 213667fe26ee30cb724d390c6047821960c57a34 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 4 Nov 2016 00:02:43 -0700
Subject: Update ChangeLog for 4.3.0.

---
 ChangeLog | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b43a467..ac2e4d3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.3.0 (November 3, 2016)
+* 4.3.0 (November 4, 2016)
 
   This is the first release that passes the test suite for multiple Windows
   configurations, thanks in large part to @glandium setting up continuous
@@ -19,6 +19,9 @@ brevity.  Much more detail can be found in the git revision history:
     initialization.  (@jasone)
 
   Bug fixes:
+  - Fix large allocation to search starting in the optimal size class heap,
+    which can substantially reduce virtual memory churn and fragmentation.  This
+    regression was first released in 4.0.0.  (@mjp41, @jasone)
   - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
   - Fix and simplify decay-based purging.  (@jasone)
   - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
@@ -29,13 +32,16 @@ brevity.  Much more detail can be found in the git revision history:
   - Fix a Valgrind integration bug.  (@ronawho)
   - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
   - Fix a file descriptor leak on Linux.  This regression was first released in
-    4.2.0.  (@jasone)
+    4.2.0.  (@vsarunas, @jasone)
   - Fix static linking of jemalloc with glibc.  (@djwatson)
   - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
     works around other libraries' system call wrappers performing reentrant
-    allocation.  (@jasone)
+    allocation.  (@kspinka, @Whissi, @jasone)
   - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
     @jasone)
+  - Fix cached memory management to avoid needless commit/decommit operations
+    during purging, which resolves permanent virtual memory map fragmentation
+    issues on Windows.  (@mjp41, @jasone)
   - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
     non-TLS and Windows configurations.  (@jasone)
   - Fix malloc_conf overriding to work on Windows.  (@jasone)
-- 
cgit v0.12


From cb3ad659f0e5c909b7df5c9cbe84ff429754c669 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 21:57:17 -0700
Subject: Use -std=gnu11 if available.

This supersedes -std=gnu99, and enables C11 atomics.
---
 configure.ac | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3a4f150..104fd99 100644
--- a/configure.ac
+++ b/configure.ac
@@ -171,9 +171,15 @@ fi
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
-    JE_CFLAGS_APPEND([-std=gnu99])
-    if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+dnl    JE_CFLAGS_APPEND([-std=gnu99])
+    JE_CFLAGS_APPEND([-std=gnu11])
+    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+    else
+      JE_CFLAGS_APPEND([-std=gnu99])
+      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+        AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+      fi
     fi
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
-- 
cgit v0.12