diff options
Diffstat (limited to 'Objects/obmalloc.c')
-rw-r--r-- | Objects/obmalloc.c | 151 |
1 files changed, 95 insertions, 56 deletions
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 3916262..6225ebb 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2,6 +2,13 @@ #ifdef WITH_PYMALLOC +#ifdef HAVE_MMAP + #include <sys/mman.h> + #ifdef MAP_ANONYMOUS + #define ARENAS_USE_MMAP + #endif +#endif + #ifdef WITH_VALGRIND #include <valgrind/valgrind.h> @@ -75,7 +82,8 @@ static int running_on_valgrind = -1; * Allocation strategy abstract: * * For small requests, the allocator sub-allocates <Big> blocks of memory. - * Requests greater than 256 bytes are routed to the system's allocator. + * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the + * system's allocator. * * Small requests are grouped in size classes spaced 8 bytes apart, due * to the required valid alignment of the returned address. Requests of @@ -107,10 +115,11 @@ static int running_on_valgrind = -1; * 57-64 64 7 * 65-72 72 8 * ... ... ... - * 241-248 248 30 - * 249-256 256 31 + * 497-504 504 62 + * 505-512 512 63 * - * 0, 257 and up: routed to the underlying allocator. + * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying + * allocator. */ /*==========================================================================*/ @@ -129,7 +138,6 @@ static int running_on_valgrind = -1; */ #define ALIGNMENT 8 /* must be 2^N */ #define ALIGNMENT_SHIFT 3 -#define ALIGNMENT_MASK (ALIGNMENT - 1) /* Return the number of bytes in size class I, as a uint. */ #define INDEX2SIZE(I) (((uint)(I) + 1) << ALIGNMENT_SHIFT) @@ -139,14 +147,17 @@ static int running_on_valgrind = -1; * small enough in order to use preallocated memory pools. You can tune * this value according to your application behaviour and memory needs. * + * Note: a size threshold of 512 guarantees that newly created dictionaries + * will be allocated from preallocated memory pools on 64-bit. + * * The following invariants must hold: - * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 256 + * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT * * Although not required, for better performance and space efficiency, * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. */ -#define SMALL_REQUEST_THRESHOLD 256 +#define SMALL_REQUEST_THRESHOLD 512 #define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) /* @@ -174,15 +185,15 @@ static int running_on_valgrind = -1; /* * The allocator sub-allocates <Big> blocks of memory (called arenas) aligned * on a page boundary. This is a reserved virtual address space for the - * current process (obtained through a malloc call). In no way this means - * that the memory arenas will be used entirely. A malloc(<Big>) is usually - * an address range reservation for <Big> bytes, unless all pages within this - * space are referenced subsequently. So malloc'ing big blocks and not using - * them does not mean "wasting memory". It's an addressable range wastage... + * current process (obtained through a malloc()/mmap() call). In no way this + * means that the memory arenas will be used entirely. A malloc(<Big>) is + * usually an address range reservation for <Big> bytes, unless all pages within + * this space are referenced subsequently. So malloc'ing big blocks and not + * using them does not mean "wasting memory". It's an addressable range + * wastage... * - * Therefore, allocating arenas with malloc is not optimal, because there is - * some address space wastage, but this is the most portable way to request - * memory from the system across various platforms. + * Arenas are allocated with mmap() on systems supporting anonymous memory + * mappings to reduce heap fragmentation. */ #define ARENA_SIZE (256 << 10) /* 256KB */ @@ -302,14 +313,12 @@ struct arena_object { struct arena_object* prevarena; }; -#undef ROUNDUP -#define ROUNDUP(x) (((x) + ALIGNMENT_MASK) & ~ALIGNMENT_MASK) -#define POOL_OVERHEAD ROUNDUP(sizeof(struct pool_header)) +#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) #define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ /* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ -#define POOL_ADDR(P) ((poolp)((uptr)(P) & ~(uptr)POOL_SIZE_MASK)) +#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) /* Return total number of blocks in pool of size index I, as a uint. */ #define NUMBLOCKS(I) ((uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) @@ -440,6 +449,9 @@ static poolp usedpools[2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8] = { , PT(48), PT(49), PT(50), PT(51), PT(52), PT(53), PT(54), PT(55) #if NB_SMALL_SIZE_CLASSES > 56 , PT(56), PT(57), PT(58), PT(59), PT(60), PT(61), PT(62), PT(63) +#if NB_SMALL_SIZE_CLASSES > 64 +#error "NB_SMALL_SIZE_CLASSES should be less than 64" +#endif /* NB_SMALL_SIZE_CLASSES > 64 */ #endif /* NB_SMALL_SIZE_CLASSES > 56 */ #endif /* NB_SMALL_SIZE_CLASSES > 48 */ #endif /* NB_SMALL_SIZE_CLASSES > 40 */ @@ -508,12 +520,10 @@ static struct arena_object* usable_arenas = NULL; /* Number of arenas allocated that haven't been free()'d. */ static size_t narenas_currently_allocated = 0; -#ifdef PYMALLOC_DEBUG /* Total number of times malloc() called to allocate an arena. */ static size_t ntimes_arena_allocated = 0; /* High water mark (max value ever seen) for narenas_currently_allocated. */ static size_t narenas_highwater = 0; -#endif /* Allocate a new arena. If we run out of memory, return NULL. Else * allocate a new arena, and return the address of an arena_object @@ -525,10 +535,12 @@ new_arena(void) { struct arena_object* arenaobj; uint excess; /* number of bytes above pool alignment */ + void *address; + int err; #ifdef PYMALLOC_DEBUG if (Py_GETENV("PYTHONMALLOCSTATS")) - _PyObject_DebugMallocStats(); + _PyObject_DebugMallocStats(stderr); #endif if (unused_arena_objects == NULL) { uint i; @@ -577,8 +589,15 @@ new_arena(void) arenaobj = unused_arena_objects; unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); - arenaobj->address = (uptr)malloc(ARENA_SIZE); - if (arenaobj->address == 0) { +#ifdef ARENAS_USE_MMAP + address = mmap(NULL, ARENA_SIZE, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + err = (address == MAP_FAILED); +#else + address = malloc(ARENA_SIZE); + err = (address == 0); +#endif + if (err) { /* The allocation failed: return NULL after putting the * arenaobj back. */ @@ -586,13 +605,12 @@ new_arena(void) unused_arena_objects = arenaobj; return NULL; } + arenaobj->address = (uptr)address; ++narenas_currently_allocated; -#ifdef PYMALLOC_DEBUG ++ntimes_arena_allocated; if (narenas_currently_allocated > narenas_highwater) narenas_highwater = narenas_currently_allocated; -#endif arenaobj->freepools = NULL; /* pool_address <- first pool-aligned address in the arena nfreepools <- number of whole pools that fit after alignment */ @@ -1054,7 +1072,11 @@ PyObject_Free(void *p) unused_arena_objects = ao; /* Free the entire arena. */ +#ifdef ARENAS_USE_MMAP + munmap((void *)ao->address, ARENA_SIZE); +#else free((void *)ao->address); +#endif ao->address = 0; /* mark unassociated */ --narenas_currently_allocated; @@ -1694,17 +1716,19 @@ _PyObject_DebugDumpAddress(const void *p) } } +#endif /* PYMALLOC_DEBUG */ + static size_t -printone(const char* msg, size_t value) +printone(FILE *out, const char* msg, size_t value) { int i, k; char buf[100]; size_t origvalue = value; - fputs(msg, stderr); + fputs(msg, out); for (i = (int)strlen(msg); i < 35; ++i) - fputc(' ', stderr); - fputc('=', stderr); + fputc(' ', out); + fputc('=', out); /* Write the value with commas. */ i = 22; @@ -1725,17 +1749,33 @@ printone(const char* msg, size_t value) while (i >= 0) buf[i--] = ' '; - fputs(buf, stderr); + fputs(buf, out); return origvalue; } -/* Print summary info to stderr about the state of pymalloc's structures. +void +_PyDebugAllocatorStats(FILE *out, + const char *block_name, int num_blocks, size_t sizeof_block) +{ + char buf1[128]; + char buf2[128]; + PyOS_snprintf(buf1, sizeof(buf1), + "%d %ss * %zd bytes each", + num_blocks, block_name, sizeof_block); + PyOS_snprintf(buf2, sizeof(buf2), + "%48s ", buf1); + (void)printone(out, buf2, num_blocks * sizeof_block); +} + +#ifdef WITH_PYMALLOC + +/* Print summary info to "out" about the state of pymalloc's structures. * In Py_DEBUG mode, also perform some expensive internal consistency * checks. */ void -_PyObject_DebugMallocStats(void) +_PyObject_DebugMallocStats(FILE *out) { uint i; const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; @@ -1764,7 +1804,7 @@ _PyObject_DebugMallocStats(void) size_t total; char buf[128]; - fprintf(stderr, "Small block threshold = %d, in %u size classes.\n", + fprintf(out, "Small block threshold = %d, in %u size classes.\n", SMALL_REQUEST_THRESHOLD, numclasses); for (i = 0; i < numclasses; ++i) @@ -1775,7 +1815,6 @@ _PyObject_DebugMallocStats(void) * will be living in full pools -- would be a shame to miss them. */ for (i = 0; i < maxarenas; ++i) { - uint poolsinarena; uint j; uptr base = arenas[i].address; @@ -1784,7 +1823,6 @@ _PyObject_DebugMallocStats(void) continue; narenas += 1; - poolsinarena = arenas[i].ntotalpools; numfreepools += arenas[i].nfreepools; /* round up to pool alignment */ @@ -1820,10 +1858,10 @@ _PyObject_DebugMallocStats(void) } assert(narenas == narenas_currently_allocated); - fputc('\n', stderr); + fputc('\n', out); fputs("class size num pools blocks in use avail blocks\n" "----- ---- --------- ------------- ------------\n", - stderr); + out); for (i = 0; i < numclasses; ++i) { size_t p = numpools[i]; @@ -1834,7 +1872,7 @@ _PyObject_DebugMallocStats(void) assert(b == 0 && f == 0); continue; } - fprintf(stderr, "%5u %6u " + fprintf(out, "%5u %6u " "%11" PY_FORMAT_SIZE_T "u " "%15" PY_FORMAT_SIZE_T "u " "%13" PY_FORMAT_SIZE_T "u\n", @@ -1844,35 +1882,36 @@ _PyObject_DebugMallocStats(void) pool_header_bytes += p * POOL_OVERHEAD; quantization += p * ((POOL_SIZE - POOL_OVERHEAD) % size); } - fputc('\n', stderr); - (void)printone("# times object malloc called", serialno); - - (void)printone("# arenas allocated total", ntimes_arena_allocated); - (void)printone("# arenas reclaimed", ntimes_arena_allocated - narenas); - (void)printone("# arenas highwater mark", narenas_highwater); - (void)printone("# arenas allocated current", narenas); + fputc('\n', out); +#ifdef PYMALLOC_DEBUG + (void)printone(out, "# times object malloc called", serialno); +#endif + (void)printone(out, "# arenas allocated total", ntimes_arena_allocated); + (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); + (void)printone(out, "# arenas highwater mark", narenas_highwater); + (void)printone(out, "# arenas allocated current", narenas); PyOS_snprintf(buf, sizeof(buf), "%" PY_FORMAT_SIZE_T "u arenas * %d bytes/arena", narenas, ARENA_SIZE); - (void)printone(buf, narenas * ARENA_SIZE); + (void)printone(out, buf, narenas * ARENA_SIZE); - fputc('\n', stderr); + fputc('\n', out); - total = printone("# bytes in allocated blocks", allocated_bytes); - total += printone("# bytes in available blocks", available_bytes); + total = printone(out, "# bytes in allocated blocks", allocated_bytes); + total += printone(out, "# bytes in available blocks", available_bytes); PyOS_snprintf(buf, sizeof(buf), "%u unused pools * %d bytes", numfreepools, POOL_SIZE); - total += printone(buf, (size_t)numfreepools * POOL_SIZE); + total += printone(out, buf, (size_t)numfreepools * POOL_SIZE); - total += printone("# bytes lost to pool headers", pool_header_bytes); - total += printone("# bytes lost to quantization", quantization); - total += printone("# bytes lost to arena alignment", arena_alignment); - (void)printone("Total", total); + total += printone(out, "# bytes lost to pool headers", pool_header_bytes); + total += printone(out, "# bytes lost to quantization", quantization); + total += printone(out, "# bytes lost to arena alignment", arena_alignment); + (void)printone(out, "Total", total); } -#endif /* PYMALLOC_DEBUG */ +#endif /* #ifdef WITH_PYMALLOC */ #ifdef Py_USING_MEMORY_DEBUGGER /* Make this function last so gcc won't inline it since the definition is |