summaryrefslogtreecommitdiffstats
path: root/Objects/obmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'Objects/obmalloc.c')
-rw-r--r--Objects/obmalloc.c151
1 files changed, 95 insertions, 56 deletions
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 3916262..6225ebb 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -2,6 +2,13 @@
#ifdef WITH_PYMALLOC
+#ifdef HAVE_MMAP
+ #include <sys/mman.h>
+ #ifdef MAP_ANONYMOUS
+ #define ARENAS_USE_MMAP
+ #endif
+#endif
+
#ifdef WITH_VALGRIND
#include <valgrind/valgrind.h>
@@ -75,7 +82,8 @@ static int running_on_valgrind = -1;
* Allocation strategy abstract:
*
* For small requests, the allocator sub-allocates <Big> blocks of memory.
- * Requests greater than 256 bytes are routed to the system's allocator.
+ * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the
+ * system's allocator.
*
* Small requests are grouped in size classes spaced 8 bytes apart, due
* to the required valid alignment of the returned address. Requests of
@@ -107,10 +115,11 @@ static int running_on_valgrind = -1;
* 57-64 64 7
* 65-72 72 8
* ... ... ...
- * 241-248 248 30
- * 249-256 256 31
+ * 497-504 504 62
+ * 505-512 512 63
*
- * 0, 257 and up: routed to the underlying allocator.
+ * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying
+ * allocator.
*/
/*==========================================================================*/
@@ -129,7 +138,6 @@ static int running_on_valgrind = -1;
*/
#define ALIGNMENT 8 /* must be 2^N */
#define ALIGNMENT_SHIFT 3
-#define ALIGNMENT_MASK (ALIGNMENT - 1)
/* Return the number of bytes in size class I, as a uint. */
#define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT)
@@ -139,14 +147,17 @@ static int running_on_valgrind = -1;
* small enough in order to use preallocated memory pools. You can tune
* this value according to your application behaviour and memory needs.
*
+ * Note: a size threshold of 512 guarantees that newly created dictionaries
+ * will be allocated from preallocated memory pools on 64-bit.
+ *
* The following invariants must hold:
- * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256
+ * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512
* 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT
*
* Although not required, for better performance and space efficiency,
* it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2.
*/
-#define SMALL_REQUEST_THRESHOLD 256
+#define SMALL_REQUEST_THRESHOLD 512
#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT)
/*
@@ -174,15 +185,15 @@ static int running_on_valgrind = -1;
/*
* The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
* on a page boundary. This is a reserved virtual address space for the
- * current process (obtained through a malloc call). In no way this means
- * that the memory arenas will be used entirely. A malloc(<Big>) is usually
- * an address range reservation for <Big> bytes, unless all pages within this
- * space are referenced subsequently. So malloc'ing big blocks and not using
- * them does not mean "wasting memory". It's an addressable range wastage...
+ * current process (obtained through a malloc()/mmap() call). In no way this
+ * means that the memory arenas will be used entirely. A malloc(<Big>) is
+ * usually an address range reservation for <Big> bytes, unless all pages within
+ * this space are referenced subsequently. So malloc'ing big blocks and not
+ * using them does not mean "wasting memory". It's an addressable range
+ * wastage...
*
- * Therefore, allocating arenas with malloc is not optimal, because there is
- * some address space wastage, but this is the most portable way to request
- * memory from the system across various platforms.
+ * Arenas are allocated with mmap() on systems supporting anonymous memory
+ * mappings to reduce heap fragmentation.
*/
#define ARENA_SIZE (256 << 10) /* 256KB */
@@ -302,14 +313,12 @@ struct arena_object {
struct arena_object* prevarena;
};
-#undef ROUNDUP
-#define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK)
-#define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header))
+#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT)
#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */
/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */
-#define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK))
+#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE))
/* Return total number of blocks in pool of size index I, as a uint. */
#define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I))
@@ -440,6 +449,9 @@ static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = {
, PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55)
#if NB_SMALL_SIZE_CLASSES > 56
, PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63)
+#if NB_SMALL_SIZE_CLASSES > 64
+#error "NB_SMALL_SIZE_CLASSES should be less than 64"
+#endif /* NB_SMALL_SIZE_CLASSES > 64 */
#endif /* NB_SMALL_SIZE_CLASSES > 56 */
#endif /* NB_SMALL_SIZE_CLASSES > 48 */
#endif /* NB_SMALL_SIZE_CLASSES > 40 */
@@ -508,12 +520,10 @@ static struct arena_object* usable_arenas = NULL;
/* Number of arenas allocated that haven't been free()'d. */
static size_t narenas_currently_allocated = 0;
-#ifdef PYMALLOC_DEBUG
/* Total number of times malloc() called to allocate an arena. */
static size_t ntimes_arena_allocated = 0;
/* High water mark (max value ever seen) for narenas_currently_allocated. */
static size_t narenas_highwater = 0;
-#endif
/* Allocate a new arena. If we run out of memory, return NULL. Else
* allocate a new arena, and return the address of an arena_object
@@ -525,10 +535,12 @@ new_arena(void)
{
struct arena_object* arenaobj;
uint excess; /* number of bytes above pool alignment */
+ void *address;
+ int err;
#ifdef PYMALLOC_DEBUG
if (Py_GETENV("PYTHONMALLOCSTATS"))
- _PyObject_DebugMallocStats();
+ _PyObject_DebugMallocStats(stderr);
#endif
if (unused_arena_objects == NULL) {
uint i;
@@ -577,8 +589,15 @@ new_arena(void)
arenaobj = unused_arena_objects;
unused_arena_objects = arenaobj->nextarena;
assert(arenaobj->address == 0);
- arenaobj->address = (uptr)malloc(ARENA_SIZE);
- if (arenaobj->address == 0) {
+#ifdef ARENAS_USE_MMAP
+ address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ err = (address == MAP_FAILED);
+#else
+ address = malloc(ARENA_SIZE);
+ err = (address == 0);
+#endif
+ if (err) {
/* The allocation failed: return NULL after putting the
* arenaobj back.
*/
@@ -586,13 +605,12 @@ new_arena(void)
unused_arena_objects = arenaobj;
return NULL;
}
+ arenaobj->address = (uptr)address;
++narenas_currently_allocated;
-#ifdef PYMALLOC_DEBUG
++ntimes_arena_allocated;
if (narenas_currently_allocated > narenas_highwater)
narenas_highwater = narenas_currently_allocated;
-#endif
arenaobj->freepools = NULL;
/* pool_address <- first pool-aligned address in the arena
nfreepools <- number of whole pools that fit after alignment */
@@ -1054,7 +1072,11 @@ PyObject_Free(void *p)
unused_arena_objects = ao;
/* Free the entire arena. */
+#ifdef ARENAS_USE_MMAP
+ munmap((void *)ao->address, ARENA_SIZE);
+#else
free((void *)ao->address);
+#endif
ao->address = 0; /* mark unassociated */
--narenas_currently_allocated;
@@ -1694,17 +1716,19 @@ _PyObject_DebugDumpAddress(const void *p)
}
}
+#endif /* PYMALLOC_DEBUG */
+
static size_t
-printone(const char* msg, size_t value)
+printone(FILE *out, const char* msg, size_t value)
{
int i, k;
char buf[100];
size_t origvalue = value;
- fputs(msg, stderr);
+ fputs(msg, out);
for (i = (int)strlen(msg); i < 35; ++i)
- fputc(' ', stderr);
- fputc('=', stderr);
+ fputc(' ', out);
+ fputc('=', out);
/* Write the value with commas. */
i = 22;
@@ -1725,17 +1749,33 @@ printone(const char* msg, size_t value)
while (i >= 0)
buf[i--] = ' ';
- fputs(buf, stderr);
+ fputs(buf, out);
return origvalue;
}
-/* Print summary info to stderr about the state of pymalloc's structures.
+void
+_PyDebugAllocatorStats(FILE *out,
+ const char *block_name, int num_blocks, size_t sizeof_block)
+{
+ char buf1[128];
+ char buf2[128];
+ PyOS_snprintf(buf1, sizeof(buf1),
+ "%d %ss * %zd bytes each",
+ num_blocks, block_name, sizeof_block);
+ PyOS_snprintf(buf2, sizeof(buf2),
+ "%48s ", buf1);
+ (void)printone(out, buf2, num_blocks * sizeof_block);
+}
+
+#ifdef WITH_PYMALLOC
+
+/* Print summary info to "out" about the state of pymalloc's structures.
* In Py_DEBUG mode, also perform some expensive internal consistency
* checks.
*/
void
-_PyObject_DebugMallocStats(void)
+_PyObject_DebugMallocStats(FILE *out)
{
uint i;
const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
@@ -1764,7 +1804,7 @@ _PyObject_DebugMallocStats(void)
size_t total;
char buf[128];
- fprintf(stderr, "Small block threshold = %d, in %u size classes.\n",
+ fprintf(out, "Small block threshold = %d, in %u size classes.\n",
SMALL_REQUEST_THRESHOLD, numclasses);
for (i = 0; i < numclasses; ++i)
@@ -1775,7 +1815,6 @@ _PyObject_DebugMallocStats(void)
* will be living in full pools -- would be a shame to miss them.
*/
for (i = 0; i < maxarenas; ++i) {
- uint poolsinarena;
uint j;
uptr base = arenas[i].address;
@@ -1784,7 +1823,6 @@ _PyObject_DebugMallocStats(void)
continue;
narenas += 1;
- poolsinarena = arenas[i].ntotalpools;
numfreepools += arenas[i].nfreepools;
/* round up to pool alignment */
@@ -1820,10 +1858,10 @@ _PyObject_DebugMallocStats(void)
}
assert(narenas == narenas_currently_allocated);
- fputc('\n', stderr);
+ fputc('\n', out);
fputs("class size num pools blocks in use avail blocks\n"
"----- ---- --------- ------------- ------------\n",
- stderr);
+ out);
for (i = 0; i < numclasses; ++i) {
size_t p = numpools[i];
@@ -1834,7 +1872,7 @@ _PyObject_DebugMallocStats(void)
assert(b == 0 && f == 0);
continue;
}
- fprintf(stderr, "%5u %6u "
+ fprintf(out, "%5u %6u "
"%11" PY_FORMAT_SIZE_T "u "
"%15" PY_FORMAT_SIZE_T "u "
"%13" PY_FORMAT_SIZE_T "u\n",
@@ -1844,35 +1882,36 @@ _PyObject_DebugMallocStats(void)
pool_header_bytes += p * POOL_OVERHEAD;
quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size);
}
- fputc('\n', stderr);
- (void)printone("# times object malloc called", serialno);
-
- (void)printone("# arenas allocated total", ntimes_arena_allocated);
- (void)printone("# arenas reclaimed", ntimes_arena_allocated - narenas);
- (void)printone("# arenas highwater mark", narenas_highwater);
- (void)printone("# arenas allocated current", narenas);
+ fputc('\n', out);
+#ifdef PYMALLOC_DEBUG
+ (void)printone(out, "# times object malloc called", serialno);
+#endif
+ (void)printone(out, "# arenas allocated total", ntimes_arena_allocated);
+ (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
+ (void)printone(out, "# arenas highwater mark", narenas_highwater);
+ (void)printone(out, "# arenas allocated current", narenas);
PyOS_snprintf(buf, sizeof(buf),
"%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena",
narenas, ARENA_SIZE);
- (void)printone(buf, narenas * ARENA_SIZE);
+ (void)printone(out, buf, narenas * ARENA_SIZE);
- fputc('\n', stderr);
+ fputc('\n', out);
- total = printone("# bytes in allocated blocks", allocated_bytes);
- total += printone("# bytes in available blocks", available_bytes);
+ total = printone(out, "# bytes in allocated blocks", allocated_bytes);
+ total += printone(out, "# bytes in available blocks", available_bytes);
PyOS_snprintf(buf, sizeof(buf),
"%u unused pools * %d bytes", numfreepools, POOL_SIZE);
- total += printone(buf, (size_t)numfreepools * POOL_SIZE);
+ total += printone(out, buf, (size_t)numfreepools * POOL_SIZE);
- total += printone("# bytes lost to pool headers", pool_header_bytes);
- total += printone("# bytes lost to quantization", quantization);
- total += printone("# bytes lost to arena alignment", arena_alignment);
- (void)printone("Total", total);
+ total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
+ total += printone(out, "# bytes lost to quantization", quantization);
+ total += printone(out, "# bytes lost to arena alignment", arena_alignment);
+ (void)printone(out, "Total", total);
}
-#endif /* PYMALLOC_DEBUG */
+#endif /* #ifdef WITH_PYMALLOC */
#ifdef Py_USING_MEMORY_DEBUGGER
/* Make this function last so gcc won't inline it since the definition is