summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorInada Naoki <songofacandy@gmail.com>2019-07-17 12:23:57 (GMT)
committerGitHub <noreply@github.com>2019-07-17 12:23:57 (GMT)
commitfb26504d14a08fcd61bb92bb989b6d2b12188535 (patch)
tree8ac6963cd967455bc323d5acf9c2b834b72f9b57
parent7036e1de3a87d36c7ef41b8a2b44ed6fc4d34be2 (diff)
downloadcpython-fb26504d14a08fcd61bb92bb989b6d2b12188535.zip
cpython-fb26504d14a08fcd61bb92bb989b6d2b12188535.tar.gz
cpython-fb26504d14a08fcd61bb92bb989b6d2b12188535.tar.bz2
bpo-37543: optimize pymalloc (#14674)
PyObject_Malloc() and PyObject_Free() inlines pymalloc_alloc and pymalloc_free partially. But when PGO is not used, compiler don't know where is the hot part in pymalloc_alloc and pymalloc_free.
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2019-07-10-20-33-53.bpo-37543.EvI19D.rst1
-rw-r--r--Objects/obmalloc.c444
2 files changed, 227 insertions, 218 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-07-10-20-33-53.bpo-37543.EvI19D.rst b/Misc/NEWS.d/next/Core and Builtins/2019-07-10-20-33-53.bpo-37543.EvI19D.rst
new file mode 100644
index 0000000..3f74bbc
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-07-10-20-33-53.bpo-37543.EvI19D.rst
@@ -0,0 +1 @@
+Optimized pymalloc for non PGO build.
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 2c00efc..8d5c700 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -710,19 +710,21 @@ PyObject_Free(void *ptr)
}
-#ifdef WITH_PYMALLOC
-
-#ifdef WITH_VALGRIND
-#include <valgrind/valgrind.h>
-
/* If we're using GCC, use __builtin_expect() to reduce overhead of
the valgrind checks */
#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
# define UNLIKELY(value) __builtin_expect((value), 0)
+# define LIKELY(value) __builtin_expect((value), 1)
#else
# define UNLIKELY(value) (value)
+# define LIKELY(value) (value)
#endif
+#ifdef WITH_PYMALLOC
+
+#ifdef WITH_VALGRIND
+#include <valgrind/valgrind.h>
+
/* -1 indicates that we haven't checked that we're running on valgrind yet. */
static int running_on_valgrind = -1;
#endif
@@ -1424,96 +1426,48 @@ address_in_range(void *p, poolp pool)
/*==========================================================================*/
-/* pymalloc allocator
-
- The basic blocks are ordered by decreasing execution frequency,
- which minimizes the number of jumps in the most common cases,
- improves branching prediction and instruction scheduling (small
- block allocations typically result in a couple of instructions).
- Unless the optimizer reorders everything, being too smart...
-
- Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
-
- Return 0 if pymalloc failed to allocate the memory block: on bigger
- requests, on error in the code below (as a last chance to serve the request)
- or when the max memory limit has been reached. */
-static int
-pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
+// Called when freelist is exhausted. Extend the freelist if there is
+// space for a block. Otherwise, remove this pool from usedpools.
+static void
+pymalloc_pool_extend(poolp pool, uint size)
{
- block *bp;
- poolp pool;
- poolp next;
- uint size;
-
-#ifdef WITH_VALGRIND
- if (UNLIKELY(running_on_valgrind == -1)) {
- running_on_valgrind = RUNNING_ON_VALGRIND;
- }
- if (UNLIKELY(running_on_valgrind)) {
- return 0;
- }
-#endif
-
- if (nbytes == 0) {
- return 0;
- }
- if (nbytes > SMALL_REQUEST_THRESHOLD) {
- return 0;
+ if (UNLIKELY(pool->nextoffset <= pool->maxnextoffset)) {
+ /* There is room for another block. */
+ pool->freeblock = (block*)pool + pool->nextoffset;
+ pool->nextoffset += INDEX2SIZE(size);
+ *(block **)(pool->freeblock) = NULL;
+ return;
}
- /*
- * Most frequent paths first
- */
- size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
- pool = usedpools[size + size];
- if (pool != pool->nextpool) {
- /*
- * There is a used pool for this size class.
- * Pick up the head block of its free list.
- */
- ++pool->ref.count;
- bp = pool->freeblock;
- assert(bp != NULL);
- if ((pool->freeblock = *(block **)bp) != NULL) {
- goto success;
- }
-
- /*
- * Reached the end of the free list, try to extend it.
- */
- if (pool->nextoffset <= pool->maxnextoffset) {
- /* There is room for another block. */
- pool->freeblock = (block*)pool +
- pool->nextoffset;
- pool->nextoffset += INDEX2SIZE(size);
- *(block **)(pool->freeblock) = NULL;
- goto success;
- }
-
- /* Pool is full, unlink from used pools. */
- next = pool->nextpool;
- pool = pool->prevpool;
- next->prevpool = pool;
- pool->nextpool = next;
- goto success;
- }
+ /* Pool is full, unlink from used pools. */
+ poolp next;
+ next = pool->nextpool;
+ pool = pool->prevpool;
+ next->prevpool = pool;
+ pool->nextpool = next;
+}
+/* called when pymalloc_alloc can not allocate a block from usedpool.
+ * This function takes new pool and allocate a block from it.
+ */
+static void*
+allocate_from_new_pool(uint size)
+{
/* There isn't a pool of the right size class immediately
* available: use a free pool.
*/
- if (usable_arenas == NULL) {
+ if (UNLIKELY(usable_arenas == NULL)) {
/* No arena has a free pool: allocate a new arena. */
#ifdef WITH_MEMORY_LIMITS
if (narenas_currently_allocated >= MAX_ARENAS) {
- goto failed;
+ return NULL;
}
#endif
usable_arenas = new_arena();
if (usable_arenas == NULL) {
- goto failed;
+ return NULL;
}
- usable_arenas->nextarena =
- usable_arenas->prevarena = NULL;
+ usable_arenas->nextarena = usable_arenas->prevarena = NULL;
assert(nfp2lasta[usable_arenas->nfreepools] == NULL);
nfp2lasta[usable_arenas->nfreepools] = usable_arenas;
}
@@ -1536,12 +1490,12 @@ pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
}
/* Try to get a cached free pool. */
- pool = usable_arenas->freepools;
- if (pool != NULL) {
+ poolp pool = usable_arenas->freepools;
+ if (LIKELY(pool != NULL)) {
/* Unlink from cached pools. */
usable_arenas->freepools = pool->nextpool;
- --usable_arenas->nfreepools;
- if (usable_arenas->nfreepools == 0) {
+ usable_arenas->nfreepools--;
+ if (UNLIKELY(usable_arenas->nfreepools == 0)) {
/* Wholly allocated: remove. */
assert(usable_arenas->freepools == NULL);
assert(usable_arenas->nextarena == NULL ||
@@ -1564,73 +1518,123 @@ pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
(block*)usable_arenas->address +
ARENA_SIZE - POOL_SIZE);
}
+ }
+ else {
+ /* Carve off a new pool. */
+ assert(usable_arenas->nfreepools > 0);
+ assert(usable_arenas->freepools == NULL);
+ pool = (poolp)usable_arenas->pool_address;
+ assert((block*)pool <= (block*)usable_arenas->address +
+ ARENA_SIZE - POOL_SIZE);
+ pool->arenaindex = (uint)(usable_arenas - arenas);
+ assert(&arenas[pool->arenaindex] == usable_arenas);
+ pool->szidx = DUMMY_SIZE_IDX;
+ usable_arenas->pool_address += POOL_SIZE;
+ --usable_arenas->nfreepools;
- init_pool:
- /* Frontlink to used pools. */
- next = usedpools[size + size]; /* == prev */
- pool->nextpool = next;
- pool->prevpool = next;
- next->nextpool = pool;
- next->prevpool = pool;
- pool->ref.count = 1;
- if (pool->szidx == size) {
- /* Luckily, this pool last contained blocks
- * of the same size class, so its header
- * and free list are already initialized.
- */
- bp = pool->freeblock;
- assert(bp != NULL);
- pool->freeblock = *(block **)bp;
- goto success;
+ if (usable_arenas->nfreepools == 0) {
+ assert(usable_arenas->nextarena == NULL ||
+ usable_arenas->nextarena->prevarena ==
+ usable_arenas);
+ /* Unlink the arena: it is completely allocated. */
+ usable_arenas = usable_arenas->nextarena;
+ if (usable_arenas != NULL) {
+ usable_arenas->prevarena = NULL;
+ assert(usable_arenas->address != 0);
+ }
}
- /*
- * Initialize the pool header, set up the free list to
- * contain just the second block, and return the first
- * block.
+ }
+
+ /* Frontlink to used pools. */
+ block *bp;
+ poolp next = usedpools[size + size]; /* == prev */
+ pool->nextpool = next;
+ pool->prevpool = next;
+ next->nextpool = pool;
+ next->prevpool = pool;
+ pool->ref.count = 1;
+ if (pool->szidx == size) {
+ /* Luckily, this pool last contained blocks
+ * of the same size class, so its header
+ * and free list are already initialized.
*/
- pool->szidx = size;
- size = INDEX2SIZE(size);
- bp = (block *)pool + POOL_OVERHEAD;
- pool->nextoffset = POOL_OVERHEAD + (size << 1);
- pool->maxnextoffset = POOL_SIZE - size;
- pool->freeblock = bp + size;
- *(block **)(pool->freeblock) = NULL;
- goto success;
+ bp = pool->freeblock;
+ assert(bp != NULL);
+ pool->freeblock = *(block **)bp;
+ return bp;
+ }
+ /*
+ * Initialize the pool header, set up the free list to
+ * contain just the second block, and return the first
+ * block.
+ */
+ pool->szidx = size;
+ size = INDEX2SIZE(size);
+ bp = (block *)pool + POOL_OVERHEAD;
+ pool->nextoffset = POOL_OVERHEAD + (size << 1);
+ pool->maxnextoffset = POOL_SIZE - size;
+ pool->freeblock = bp + size;
+ *(block **)(pool->freeblock) = NULL;
+ return bp;
+}
+
+/* pymalloc allocator
+
+ Return 1 if pymalloc allocated memory and wrote the pointer into *ptr_p.
+
+ Return 0 if pymalloc failed to allocate the memory block: on bigger
+ requests, on error in the code below (as a last chance to serve the request)
+ or when the max memory limit has been reached.
+*/
+static inline int
+pymalloc_alloc(void *ctx, void **ptr_p, size_t nbytes)
+{
+#ifdef WITH_VALGRIND
+ if (UNLIKELY(running_on_valgrind == -1)) {
+ running_on_valgrind = RUNNING_ON_VALGRIND;
}
+ if (UNLIKELY(running_on_valgrind)) {
+ return 0;
+ }
+#endif
- /* Carve off a new pool. */
- assert(usable_arenas->nfreepools > 0);
- assert(usable_arenas->freepools == NULL);
- pool = (poolp)usable_arenas->pool_address;
- assert((block*)pool <= (block*)usable_arenas->address +
- ARENA_SIZE - POOL_SIZE);
- pool->arenaindex = (uint)(usable_arenas - arenas);
- assert(&arenas[pool->arenaindex] == usable_arenas);
- pool->szidx = DUMMY_SIZE_IDX;
- usable_arenas->pool_address += POOL_SIZE;
- --usable_arenas->nfreepools;
-
- if (usable_arenas->nfreepools == 0) {
- assert(usable_arenas->nextarena == NULL ||
- usable_arenas->nextarena->prevarena ==
- usable_arenas);
- /* Unlink the arena: it is completely allocated. */
- usable_arenas = usable_arenas->nextarena;
- if (usable_arenas != NULL) {
- usable_arenas->prevarena = NULL;
- assert(usable_arenas->address != 0);
- }
+ if (UNLIKELY(nbytes == 0)) {
+ return 0;
+ }
+ if (UNLIKELY(nbytes > SMALL_REQUEST_THRESHOLD)) {
+ return 0;
}
- goto init_pool;
+ uint size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
+ poolp pool = usedpools[size + size];
+ block *bp;
+
+ if (LIKELY(pool != pool->nextpool)) {
+ /*
+ * There is a used pool for this size class.
+ * Pick up the head block of its free list.
+ */
+ ++pool->ref.count;
+ bp = pool->freeblock;
+
+ if (UNLIKELY((pool->freeblock = *(block **)bp) == NULL)) {
+ // Reached the end of the free list, try to extend it.
+ pymalloc_pool_extend(pool, size);
+ }
+ }
+ else {
+ /* There isn't a pool of the right size class immediately
+ * available: use a free pool.
+ */
+ bp = allocate_from_new_pool(size);
+ if (UNLIKELY(bp == NULL)) {
+ return 0;
+ }
+ }
-success:
assert(bp != NULL);
*ptr_p = (void *)bp;
return 1;
-
-failed:
- return 0;
}
@@ -1638,7 +1642,7 @@ static void *
_PyObject_Malloc(void *ctx, size_t nbytes)
{
void* ptr;
- if (pymalloc_alloc(ctx, &ptr, nbytes)) {
+ if (LIKELY(pymalloc_alloc(ctx, &ptr, nbytes))) {
return ptr;
}
@@ -1658,7 +1662,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
size_t nbytes = nelem * elsize;
- if (pymalloc_alloc(ctx, &ptr, nbytes)) {
+ if (LIKELY(pymalloc_alloc(ctx, &ptr, nbytes))) {
memset(ptr, 0, nbytes);
return ptr;
}
@@ -1671,88 +1675,37 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
}
-/* Free a memory block allocated by pymalloc_alloc().
- Return 1 if it was freed.
- Return 0 if the block was not allocated by pymalloc_alloc(). */
-static int
-pymalloc_free(void *ctx, void *p)
+static void
+insert_to_usedpool(poolp pool)
{
- poolp pool;
- block *lastfree;
- poolp next, prev;
- uint size;
-
- assert(p != NULL);
-
-#ifdef WITH_VALGRIND
- if (UNLIKELY(running_on_valgrind > 0)) {
- return 0;
- }
-#endif
+ assert(pool->ref.count > 0); /* else the pool is empty */
- pool = POOL_ADDR(p);
- if (!address_in_range(p, pool)) {
- return 0;
- }
- /* We allocated this address. */
-
- /* Link p to the start of the pool's freeblock list. Since
- * the pool had at least the p block outstanding, the pool
- * wasn't empty (so it's already in a usedpools[] list, or
- * was full and is in no list -- it's not in the freeblocks
- * list in any case).
- */
- assert(pool->ref.count > 0); /* else it was empty */
- *(block **)p = lastfree = pool->freeblock;
- pool->freeblock = (block *)p;
- if (!lastfree) {
- /* Pool was full, so doesn't currently live in any list:
- * link it to the front of the appropriate usedpools[] list.
- * This mimics LRU pool usage for new allocations and
- * targets optimal filling when several pools contain
- * blocks of the same size class.
- */
- --pool->ref.count;
- assert(pool->ref.count > 0); /* else the pool is empty */
- size = pool->szidx;
- next = usedpools[size + size];
- prev = next->prevpool;
-
- /* insert pool before next: prev <-> pool <-> next */
- pool->nextpool = next;
- pool->prevpool = prev;
- next->prevpool = pool;
- prev->nextpool = pool;
- goto success;
- }
+ uint size = pool->szidx;
+ poolp next = usedpools[size + size];
+ poolp prev = next->prevpool;
- struct arena_object* ao;
- uint nf; /* ao->nfreepools */
+ /* insert pool before next: prev <-> pool <-> next */
+ pool->nextpool = next;
+ pool->prevpool = prev;
+ next->prevpool = pool;
+ prev->nextpool = pool;
+}
- /* freeblock wasn't NULL, so the pool wasn't full,
- * and the pool is in a usedpools[] list.
- */
- if (--pool->ref.count != 0) {
- /* pool isn't empty: leave it in usedpools */
- goto success;
- }
- /* Pool is now empty: unlink from usedpools, and
- * link to the front of freepools. This ensures that
- * previously freed pools will be allocated later
- * (being not referenced, they are perhaps paged out).
- */
- next = pool->nextpool;
- prev = pool->prevpool;
+static void
+insert_to_freepool(poolp pool)
+{
+ poolp next = pool->nextpool;
+ poolp prev = pool->prevpool;
next->prevpool = prev;
prev->nextpool = next;
/* Link the pool to freepools. This is a singly-linked
* list, and pool->prevpool isn't used there.
*/
- ao = &arenas[pool->arenaindex];
+ struct arena_object *ao = &arenas[pool->arenaindex];
pool->nextpool = ao->freepools;
ao->freepools = pool;
- nf = ao->nfreepools;
+ uint nf = ao->nfreepools;
/* If this is the rightmost arena with this number of free pools,
* nfp2lasta[nf] needs to change. Caution: if nf is 0, there
* are no arenas in usable_arenas with that value.
@@ -1826,7 +1779,7 @@ pymalloc_free(void *ctx, void *p)
ao->address = 0; /* mark unassociated */
--narenas_currently_allocated;
- goto success;
+ return;
}
if (nf == 1) {
@@ -1845,7 +1798,7 @@ pymalloc_free(void *ctx, void *p)
nfp2lasta[1] = ao;
}
- goto success;
+ return;
}
/* If this arena is now out of order, we need to keep
@@ -1862,7 +1815,7 @@ pymalloc_free(void *ctx, void *p)
/* If this was the rightmost of the old size, it remains in place. */
if (ao == lastnf) {
/* Case 4. Nothing to do. */
- goto success;
+ return;
}
/* If ao were the only arena in the list, the last block would have
* gotten us out.
@@ -1898,10 +1851,65 @@ pymalloc_free(void *ctx, void *p)
assert(ao->nextarena == NULL || ao->nextarena->prevarena == ao);
assert((usable_arenas == ao && ao->prevarena == NULL)
|| ao->prevarena->nextarena == ao);
+}
+
+/* Free a memory block allocated by pymalloc_alloc().
+ Return 1 if it was freed.
+ Return 0 if the block was not allocated by pymalloc_alloc(). */
+static inline int
+pymalloc_free(void *ctx, void *p)
+{
+ assert(p != NULL);
+
+#ifdef WITH_VALGRIND
+ if (UNLIKELY(running_on_valgrind > 0)) {
+ return 0;
+ }
+#endif
+
+ poolp pool = POOL_ADDR(p);
+ if (UNLIKELY(!address_in_range(p, pool))) {
+ return 0;
+ }
+ /* We allocated this address. */
+
+ /* Link p to the start of the pool's freeblock list. Since
+ * the pool had at least the p block outstanding, the pool
+ * wasn't empty (so it's already in a usedpools[] list, or
+ * was full and is in no list -- it's not in the freeblocks
+ * list in any case).
+ */
+ assert(pool->ref.count > 0); /* else it was empty */
+ block *lastfree = pool->freeblock;
+ *(block **)p = lastfree;
+ pool->freeblock = (block *)p;
+ pool->ref.count--;
+
+ if (UNLIKELY(lastfree == NULL)) {
+ /* Pool was full, so doesn't currently live in any list:
+ * link it to the front of the appropriate usedpools[] list.
+ * This mimics LRU pool usage for new allocations and
+ * targets optimal filling when several pools contain
+ * blocks of the same size class.
+ */
+ insert_to_usedpool(pool);
+ return 1;
+ }
- goto success;
+ /* freeblock wasn't NULL, so the pool wasn't full,
+ * and the pool is in a usedpools[] list.
+ */
+ if (LIKELY(pool->ref.count != 0)) {
+ /* pool isn't empty: leave it in usedpools */
+ return 1;
+ }
-success:
+ /* Pool is now empty: unlink from usedpools, and
+ * link to the front of freepools. This ensures that
+ * previously freed pools will be allocated later
+ * (being not referenced, they are perhaps paged out).
+ */
+ insert_to_freepool(pool);
return 1;
}
@@ -1914,7 +1922,7 @@ _PyObject_Free(void *ctx, void *p)
return;
}
- if (!pymalloc_free(ctx, p)) {
+ if (UNLIKELY(!pymalloc_free(ctx, p))) {
/* pymalloc didn't allocate this address */
PyMem_RawFree(p);
raw_allocated_blocks--;