summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Schemenauer <nas-github@arctrix.com>2021-03-30 02:51:15 (GMT)
committerGitHub <noreply@github.com>2021-03-30 02:51:15 (GMT)
commit85b6b70589c187639aeebc560d67e9cc04abb4d8 (patch)
treed8142d0876fd549e4b25743f869676d7dfe95be7
parenta54fc683f237d8f0b6e999a63aa9b8c0a45b7fef (diff)
downloadcpython-85b6b70589c187639aeebc560d67e9cc04abb4d8.zip
cpython-85b6b70589c187639aeebc560d67e9cc04abb4d8.tar.gz
cpython-85b6b70589c187639aeebc560d67e9cc04abb4d8.tar.bz2
bpo-37448: Use radix tree for pymalloc address_in_range(). (GH-14474)
The radix tree approach is a relatively simple and memory sanitary alternative to the old (slightly) unsanitary address_in_range(). To disable the radix tree map, set a preprocessor flag as follows: -DWITH_PYMALLOC_RADIX_TREE=0. Co-authored-by: Tim Peters <tim.peters@gmail.com>
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst15
-rw-r--r--Objects/obmalloc.c341
2 files changed, 351 insertions, 5 deletions
diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst b/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst
new file mode 100644
index 0000000..fe771a5
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst
@@ -0,0 +1,15 @@
+Add a radix tree based memory map to track in-use obmalloc arenas. Use to
+replace the old implementation of address_in_range(). The radix tree
+approach makes it easy to increase pool sizes beyond the OS page size.
+Boosting the pool and arena size allows obmalloc to handle a significantly
+higher percentage of requests from its ultra-fast paths.
+
+It also has the advantage of eliminating the memory unsanitary behavior of
+the previous address_in_range(). The old address_in_range() was marked with
+the annotations _Py_NO_SANITIZE_ADDRESS, _Py_NO_SANITIZE_THREAD, and
+_Py_NO_SANITIZE_MEMORY. Those annotations are no longer needed.
+
+To disable the radix tree map, set a preprocessor flag as follows:
+`-DWITH_PYMALLOC_RADIX_TREE=0`.
+
+Co-authored-by: Tim Peters <tim.peters@gmail.com>
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c
index 03d0e8e..76ff6f9 100644
--- a/Objects/obmalloc.c
+++ b/Objects/obmalloc.c
@@ -894,6 +894,22 @@ static int running_on_valgrind = -1;
#endif
#endif
+#if !defined(WITH_PYMALLOC_RADIX_TREE)
+/* Use radix-tree to track arena memory regions, for address_in_range().
+ * Enable by default since it allows larger pool sizes. Can be disabled
+ * using -DWITH_PYMALLOC_RADIX_TREE=0 */
+#define WITH_PYMALLOC_RADIX_TREE 1
+#endif
+
+#if SIZEOF_VOID_P > 4
+/* on 64-bit platforms use larger pools and arenas if we can */
+#define USE_LARGE_ARENAS
+#if WITH_PYMALLOC_RADIX_TREE
+/* large pools only supported if radix-tree is enabled */
+#define USE_LARGE_POOLS
+#endif
+#endif
+
/*
* The allocator sub-allocates <Big> blocks of memory (called arenas) aligned
* on a page boundary. This is a reserved virtual address space for the
@@ -907,18 +923,34 @@ static int running_on_valgrind = -1;
* Arenas are allocated with mmap() on systems supporting anonymous memory
* mappings to reduce heap fragmentation.
*/
-#define ARENA_SIZE (256 << 10) /* 256KB */
+#ifdef USE_LARGE_ARENAS
+#define ARENA_BITS 20 /* 1 MiB */
+#else
+#define ARENA_BITS 18 /* 256 KiB */
+#endif
+#define ARENA_SIZE (1 << ARENA_BITS)
+#define ARENA_SIZE_MASK (ARENA_SIZE - 1)
#ifdef WITH_MEMORY_LIMITS
#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE)
#endif
/*
- * Size of the pools used for small blocks. Should be a power of 2,
- * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k.
+ * Size of the pools used for small blocks. Must be a power of 2.
*/
-#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */
-#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK
+#ifdef USE_LARGE_POOLS
+#define POOL_BITS 14 /* 16 KiB */
+#else
+#define POOL_BITS 12 /* 4 KiB */
+#endif
+#define POOL_SIZE (1 << POOL_BITS)
+#define POOL_SIZE_MASK (POOL_SIZE - 1)
+
+#if !WITH_PYMALLOC_RADIX_TREE
+#if POOL_SIZE != SYSTEM_PAGE_SIZE
+# error "pool size must be equal to system page size"
+#endif
+#endif
#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE)
#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE
@@ -1233,6 +1265,264 @@ _Py_GetAllocatedBlocks(void)
return n;
}
+#if WITH_PYMALLOC_RADIX_TREE
+/*==========================================================================*/
+/* radix tree for tracking arena usage
+
+ bit allocation for keys
+
+ 64-bit pointers and 2^20 arena size:
+ 16 -> ignored (POINTER_BITS - ADDRESS_BITS)
+ 10 -> MAP_TOP
+ 10 -> MAP_MID
+ 8 -> MAP_BOT
+ 20 -> ideal aligned arena
+ ----
+ 64
+
+ 32-bit pointers and 2^18 arena size:
+ 14 -> MAP_BOT
+ 18 -> ideal aligned arena
+ ----
+ 32
+
+*/
+
+#if SIZEOF_VOID_P == 8
+
+/* number of bits in a pointer */
+#define POINTER_BITS 64
+
+/* Current 64-bit processors are limited to 48-bit physical addresses. For
+ * now, the top 17 bits of addresses will all be equal to bit 2**47. If that
+ * changes in the future, this must be adjusted upwards.
+ */
+#define ADDRESS_BITS 48
+
+/* use the top and mid layers of the radix tree */
+#define USE_INTERIOR_NODES
+
+#elif SIZEOF_VOID_P == 4
+
+#define POINTER_BITS 32
+#define ADDRESS_BITS 32
+
+#else
+
+ /* Currently this code works for 64-bit or 32-bit pointers only. */
+#error "obmalloc radix tree requires 64-bit or 32-bit pointers."
+
+#endif /* SIZEOF_VOID_P */
+
+/* arena_coverage_t members require this to be true */
+#if ARENA_BITS >= 32
+# error "arena size must be < 2^32"
+#endif
+
+#ifdef USE_INTERIOR_NODES
+/* number of bits used for MAP_TOP and MAP_MID nodes */
+#define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3)
+#else
+#define INTERIOR_BITS 0
+#endif
+
+#define MAP_TOP_BITS INTERIOR_BITS
+#define MAP_TOP_LENGTH (1 << MAP_TOP_BITS)
+#define MAP_TOP_MASK (MAP_BOT_LENGTH - 1)
+
+#define MAP_MID_BITS INTERIOR_BITS
+#define MAP_MID_LENGTH (1 << MAP_MID_BITS)
+#define MAP_MID_MASK (MAP_MID_LENGTH - 1)
+
+#define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS)
+#define MAP_BOT_LENGTH (1 << MAP_BOT_BITS)
+#define MAP_BOT_MASK (MAP_BOT_LENGTH - 1)
+
+#define MAP_BOT_SHIFT ARENA_BITS
+#define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT)
+#define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT)
+
+#define AS_UINT(p) ((uintptr_t)(p))
+#define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK)
+#define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK)
+#define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK)
+
+#if ADDRESS_BITS > POINTER_BITS
+/* Return non-physical address bits of a pointer. Those bits should be same
+ * for all valid pointers if ADDRESS_BITS set correctly. Linux has support for
+ * 57-bit address space (Intel 5-level paging) but will not currently give
+ * those addresses to user space.
+ */
+#define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS)
+#else
+#define HIGH_BITS(p) 0
+#endif
+
+
+/* This is the leaf of the radix tree. See arena_map_mark_used() for the
+ * meaning of these members. */
+typedef struct {
+ int32_t tail_hi;
+ int32_t tail_lo;
+} arena_coverage_t;
+
+typedef struct arena_map_bot {
+ /* The members tail_hi and tail_lo are accessed together. So, it
+ * better to have them as an array of structs, rather than two
+ * arrays.
+ */
+ arena_coverage_t arenas[MAP_BOT_LENGTH];
+} arena_map_bot_t;
+
+#ifdef USE_INTERIOR_NODES
+typedef struct arena_map_mid {
+ struct arena_map_bot *ptrs[MAP_MID_LENGTH];
+} arena_map_mid_t;
+
+typedef struct arena_map_top {
+ struct arena_map_mid *ptrs[MAP_TOP_LENGTH];
+} arena_map_top_t;
+#endif
+
+/* The root of radix tree. Note that by initializing like this, the memory
+ * should be in the BSS. The OS will only memory map pages as the MAP_MID
+ * nodes get used (OS pages are demand loaded as needed).
+ */
+#ifdef USE_INTERIOR_NODES
+static arena_map_top_t arena_map_root;
+/* accounting for number of used interior nodes */
+static int arena_map_mid_count;
+static int arena_map_bot_count;
+#else
+static arena_map_bot_t arena_map_root;
+#endif
+
+/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
+ * it cannot be created */
+static arena_map_bot_t *
+arena_map_get(block *p, int create)
+{
+#ifdef USE_INTERIOR_NODES
+ /* sanity check that ADDRESS_BITS is correct */
+ assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root));
+ int i1 = MAP_TOP_INDEX(p);
+ if (arena_map_root.ptrs[i1] == NULL) {
+ if (!create) {
+ return NULL;
+ }
+ arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t));
+ if (n == NULL) {
+ return NULL;
+ }
+ arena_map_root.ptrs[i1] = n;
+ arena_map_mid_count++;
+ }
+ int i2 = MAP_MID_INDEX(p);
+ if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) {
+ if (!create) {
+ return NULL;
+ }
+ arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t));
+ if (n == NULL) {
+ return NULL;
+ }
+ arena_map_root.ptrs[i1]->ptrs[i2] = n;
+ arena_map_bot_count++;
+ }
+ return arena_map_root.ptrs[i1]->ptrs[i2];
+#else
+ return &arena_map_root;
+#endif
+}
+
+
+/* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw
+ * away 24 bits of the address. That reduces the space requirement of
+ * the tree compared to similar radix tree page-map schemes. In
+ * exchange for slashing the space requirement, it needs more
+ * computation to check an address.
+ *
+ * Tracking coverage is done by "ideal" arena address. It is easier to
+ * explain in decimal so let's say that the arena size is 100 bytes.
+ * Then, ideal addresses are 100, 200, 300, etc. For checking if a
+ * pointer address is inside an actual arena, we have to check two ideal
+ * arena addresses. E.g. if pointer is 357, we need to check 200 and
+ * 300. In the rare case that an arena is aligned in the ideal way
+ * (e.g. base address of arena is 200) then we only have to check one
+ * ideal address.
+ *
+ * The tree nodes for 200 and 300 both store the address of arena.
+ * There are two cases: the arena starts at a lower ideal arena and
+ * extends to this one, or the arena starts in this arena and extends to
+ * the next ideal arena. The tail_lo and tail_hi members correspond to
+ * these two cases.
+ */
+
+
+/* mark or unmark addresses covered by arena */
+static int
+arena_map_mark_used(uintptr_t arena_base, int is_used)
+{
+ /* sanity check that ADDRESS_BITS is correct */
+ assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
+ arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used);
+ if (n_hi == NULL) {
+ assert(is_used); /* otherwise node should already exist */
+ return 0; /* failed to allocate space for node */
+ }
+ int i3 = MAP_BOT_INDEX((block *)arena_base);
+ int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK);
+ if (tail == 0) {
+ /* is ideal arena address */
+ n_hi->arenas[i3].tail_hi = is_used ? -1 : 0;
+ }
+ else {
+ /* arena_base address is not ideal (aligned to arena size) and
+ * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node
+ * for the next arena. Note that it might be in different MAP_TOP
+ * and MAP_MID nodes as well so we need to call arena_map_get()
+ * again (do the full tree traversal).
+ */
+ n_hi->arenas[i3].tail_hi = is_used ? tail : 0;
+ uintptr_t arena_base_next = arena_base + ARENA_SIZE;
+ /* If arena_base is a legit arena address, so is arena_base_next - 1
+ * (last address in arena). If arena_base_next overflows then it
+ * must overflow to 0. However, that would mean arena_base was
+ * "ideal" and we should not be in this case. */
+ assert(arena_base < arena_base_next);
+ arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used);
+ if (n_lo == NULL) {
+ assert(is_used); /* otherwise should already exist */
+ n_hi->arenas[i3].tail_hi = 0;
+ return 0; /* failed to allocate space for node */
+ }
+ int i3_next = MAP_BOT_INDEX(arena_base_next);
+ n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0;
+ }
+ return 1;
+}
+
+/* Return true if 'p' is a pointer inside an obmalloc arena.
+ * _PyObject_Free() calls this so it needs to be very fast. */
+static int
+arena_map_is_used(block *p)
+{
+ arena_map_bot_t *n = arena_map_get(p, 0);
+ if (n == NULL) {
+ return 0;
+ }
+ int i3 = MAP_BOT_INDEX(p);
+ /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */
+ int32_t hi = n->arenas[i3].tail_hi;
+ int32_t lo = n->arenas[i3].tail_lo;
+ int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK);
+ return (tail < lo) || (tail >= hi && hi != 0);
+}
+
+/* end of radix tree logic */
+/*==========================================================================*/
+#endif /* WITH_PYMALLOC_RADIX_TREE */
+
/* Allocate a new arena. If we run out of memory, return NULL. Else
* allocate a new arena, and return the address of an arena_object
@@ -1302,6 +1592,15 @@ new_arena(void)
unused_arena_objects = arenaobj->nextarena;
assert(arenaobj->address == 0);
address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
+#if WITH_PYMALLOC_RADIX_TREE
+ if (address != NULL) {
+ if (!arena_map_mark_used((uintptr_t)address, 1)) {
+ /* marking arena in radix tree failed, abort */
+ _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
+ address = NULL;
+ }
+ }
+#endif
if (address == NULL) {
/* The allocation failed: return NULL after putting the
* arenaobj back.
@@ -1332,6 +1631,17 @@ new_arena(void)
}
+
+#if WITH_PYMALLOC_RADIX_TREE
+/* Return true if and only if P is an address that was allocated by
+ pymalloc. When the radix tree is used, 'poolp' is unused.
+ */
+static bool
+address_in_range(void *p, poolp pool)
+{
+ return arena_map_is_used(p);
+}
+#else
/*
address_in_range(P, POOL)
@@ -1423,6 +1733,7 @@ address_in_range(void *p, poolp pool)
arenas[arenaindex].address != 0;
}
+#endif /* !WITH_PYMALLOC_RADIX_TREE */
/*==========================================================================*/
@@ -1768,6 +2079,11 @@ insert_to_freepool(poolp pool)
ao->nextarena = unused_arena_objects;
unused_arena_objects = ao;
+#if WITH_PYMALLOC_RADIX_TREE
+ /* mark arena region as not under control of obmalloc */
+ arena_map_mark_used(ao->address, 0);
+#endif
+
/* Free the entire arena. */
_PyObject_Arena.free(_PyObject_Arena.ctx,
(void *)ao->address, ARENA_SIZE);
@@ -2711,6 +3027,12 @@ _PyObject_DebugMallocStats(FILE *out)
(void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas);
(void)printone(out, "# arenas highwater mark", narenas_highwater);
(void)printone(out, "# arenas allocated current", narenas);
+#ifdef USE_INTERIOR_NODES
+ (void)printone(out, "# arena map mid nodes", arena_map_mid_count);
+ (void)printone(out, "# arena map bot nodes", arena_map_bot_count);
+ fputc('\n', out);
+#endif
+
PyOS_snprintf(buf, sizeof(buf),
"%zu arenas * %d bytes/arena",
@@ -2729,6 +3051,15 @@ _PyObject_DebugMallocStats(FILE *out)
total += printone(out, "# bytes lost to pool headers", pool_header_bytes);
total += printone(out, "# bytes lost to quantization", quantization);
total += printone(out, "# bytes lost to arena alignment", arena_alignment);
+#ifdef WITH_PYMALLOC_RADIX_TREE
+ total += printone(out, "# bytes lost to arena map root", sizeof(arena_map_root));
+#endif
+#ifdef USE_INTERIOR_NODES
+ total += printone(out, "# bytes lost to arena map mid",
+ sizeof(arena_map_mid_t) * arena_map_mid_count);
+ total += printone(out, "# bytes lost to arena map bot",
+ sizeof(arena_map_bot_t) * arena_map_bot_count);
+#endif
(void)printone(out, "Total", total);
return 1;
}