diff options
author | Eric Snow <ericsnowcurrently@gmail.com> | 2023-04-24 23:23:57 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-24 23:23:57 (GMT) |
commit | df3173d28ef25a0f97d2cca8cf4e64e062a08d06 (patch) | |
tree | f2b6f378f81ceee48a9e710154b9d6c4b0f959a2 /Objects/obmalloc.c | |
parent | 01be52e42eac468b6511b56ee60cd1b99baf3848 (diff) | |
download | cpython-df3173d28ef25a0f97d2cca8cf4e64e062a08d06.zip cpython-df3173d28ef25a0f97d2cca8cf4e64e062a08d06.tar.gz cpython-df3173d28ef25a0f97d2cca8cf4e64e062a08d06.tar.bz2 |
gh-101659: Isolate "obmalloc" State to Each Interpreter (gh-101660)
This is strictly about moving the "obmalloc" runtime state from
`_PyRuntimeState` to `PyInterpreterState`. Doing so improves isolation
between interpreters, specifically most of the memory (incl. objects)
allocated for each interpreter's use. This is important for a
per-interpreter GIL, but such isolation is valuable even without it.
FWIW, a per-interpreter obmalloc is the proverbial
canary-in-the-coalmine when it comes to the isolation of objects between
interpreters. Any object that leaks (unintentionally) to another
interpreter is highly likely to cause a crash (on debug builds at
least). That's a useful thing to know, relative to interpreter
isolation.
Diffstat (limited to 'Objects/obmalloc.c')
-rw-r--r-- | Objects/obmalloc.c | 223 |
1 files changed, 179 insertions, 44 deletions
diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 5e1bcda..de62aeb 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -725,20 +725,51 @@ PyObject_Free(void *ptr) static int running_on_valgrind = -1; #endif +typedef struct _obmalloc_state OMState; -#define allarenas (_PyRuntime.obmalloc.mgmt.arenas) -#define maxarenas (_PyRuntime.obmalloc.mgmt.maxarenas) -#define unused_arena_objects (_PyRuntime.obmalloc.mgmt.unused_arena_objects) -#define usable_arenas (_PyRuntime.obmalloc.mgmt.usable_arenas) -#define nfp2lasta (_PyRuntime.obmalloc.mgmt.nfp2lasta) -#define narenas_currently_allocated (_PyRuntime.obmalloc.mgmt.narenas_currently_allocated) -#define ntimes_arena_allocated (_PyRuntime.obmalloc.mgmt.ntimes_arena_allocated) -#define narenas_highwater (_PyRuntime.obmalloc.mgmt.narenas_highwater) -#define raw_allocated_blocks (_PyRuntime.obmalloc.mgmt.raw_allocated_blocks) +static inline int +has_own_state(PyInterpreterState *interp) +{ + return (_Py_IsMainInterpreter(interp) || + !(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) || + _Py_IsMainInterpreterFinalizing(interp)); +} + +static inline OMState * +get_state(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!has_own_state(interp)) { + interp = _PyInterpreterState_Main(); + } + return &interp->obmalloc; +} + +// These macros all rely on a local "state" variable. +#define usedpools (state->pools.used) +#define allarenas (state->mgmt.arenas) +#define maxarenas (state->mgmt.maxarenas) +#define unused_arena_objects (state->mgmt.unused_arena_objects) +#define usable_arenas (state->mgmt.usable_arenas) +#define nfp2lasta (state->mgmt.nfp2lasta) +#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated) +#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated) +#define narenas_highwater (state->mgmt.narenas_highwater) +#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks) Py_ssize_t -_Py_GetAllocatedBlocks(void) +_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) { +#ifdef Py_DEBUG + assert(has_own_state(interp)); +#else + if (!has_own_state(interp)) { + _Py_FatalErrorFunc(__func__, + "the interpreter doesn't have its own allocator"); + } +#endif + OMState *state = &interp->obmalloc; + Py_ssize_t n = raw_allocated_blocks; /* add up allocated blocks for used pools */ for (uint i = 0; i < maxarenas; ++i) { @@ -759,20 +790,100 @@ _Py_GetAllocatedBlocks(void) return n; } +void +_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) +{ + if (has_own_state(interp)) { + Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp); + assert(has_own_state(interp) || leaked == 0); + interp->runtime->obmalloc.interpreter_leaks += leaked; + } +} + +static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *); + +/* We preserve the number of blockss leaked during runtime finalization, + so they can be reported if the runtime is initialized again. */ +// XXX We don't lose any information by dropping this, +// so we should consider doing so. +static Py_ssize_t last_final_leaks = 0; + +void +_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime) +{ + last_final_leaks = get_num_global_allocated_blocks(runtime); + runtime->obmalloc.interpreter_leaks = 0; +} + +static Py_ssize_t +get_num_global_allocated_blocks(_PyRuntimeState *runtime) +{ + Py_ssize_t total = 0; + if (_PyRuntimeState_GetFinalizing(runtime) != NULL) { + PyInterpreterState *interp = _PyInterpreterState_Main(); + if (interp == NULL) { + /* We are at the very end of runtime finalization. + We can't rely on finalizing->interp since that thread + state is probably already freed, so we don't worry + about it. */ + assert(PyInterpreterState_Head() == NULL); + } + else { + assert(interp != NULL); + /* It is probably the last interpreter but not necessarily. */ + assert(PyInterpreterState_Next(interp) == NULL); + total += _PyInterpreterState_GetAllocatedBlocks(interp); + } + } + else { + HEAD_LOCK(runtime); + PyInterpreterState *interp = PyInterpreterState_Head(); + assert(interp != NULL); +#ifdef Py_DEBUG + int got_main = 0; +#endif + for (; interp != NULL; interp = PyInterpreterState_Next(interp)) { +#ifdef Py_DEBUG + if (_Py_IsMainInterpreter(interp)) { + assert(!got_main); + got_main = 1; + assert(has_own_state(interp)); + } +#endif + if (has_own_state(interp)) { + total += _PyInterpreterState_GetAllocatedBlocks(interp); + } + } + HEAD_UNLOCK(runtime); +#ifdef Py_DEBUG + assert(got_main); +#endif + } + total += runtime->obmalloc.interpreter_leaks; + total += last_final_leaks; + return total; +} + +Py_ssize_t +_Py_GetGlobalAllocatedBlocks(void) +{ + return get_num_global_allocated_blocks(&_PyRuntime); +} + #if WITH_PYMALLOC_RADIX_TREE /*==========================================================================*/ /* radix tree for tracking arena usage. */ -#define arena_map_root (_PyRuntime.obmalloc.usage.arena_map_root) +#define arena_map_root (state->usage.arena_map_root) #ifdef USE_INTERIOR_NODES -#define arena_map_mid_count (_PyRuntime.obmalloc.usage.arena_map_mid_count) -#define arena_map_bot_count (_PyRuntime.obmalloc.usage.arena_map_bot_count) +#define arena_map_mid_count (state->usage.arena_map_mid_count) +#define arena_map_bot_count (state->usage.arena_map_bot_count) #endif /* Return a pointer to a bottom tree node, return NULL if it doesn't exist or * it cannot be created */ static Py_ALWAYS_INLINE arena_map_bot_t * -arena_map_get(pymem_block *p, int create) +arena_map_get(OMState *state, pymem_block *p, int create) { #ifdef USE_INTERIOR_NODES /* sanity check that IGNORE_BITS is correct */ @@ -833,11 +944,12 @@ arena_map_get(pymem_block *p, int create) /* mark or unmark addresses covered by arena */ static int -arena_map_mark_used(uintptr_t arena_base, int is_used) +arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used) { /* sanity check that IGNORE_BITS is correct */ assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); - arena_map_bot_t *n_hi = arena_map_get((pymem_block *)arena_base, is_used); + arena_map_bot_t *n_hi = arena_map_get( + state, (pymem_block *)arena_base, is_used); if (n_hi == NULL) { assert(is_used); /* otherwise node should already exist */ return 0; /* failed to allocate space for node */ @@ -862,7 +974,8 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) * must overflow to 0. However, that would mean arena_base was * "ideal" and we should not be in this case. */ assert(arena_base < arena_base_next); - arena_map_bot_t *n_lo = arena_map_get((pymem_block *)arena_base_next, is_used); + arena_map_bot_t *n_lo = arena_map_get( + state, (pymem_block *)arena_base_next, is_used); if (n_lo == NULL) { assert(is_used); /* otherwise should already exist */ n_hi->arenas[i3].tail_hi = 0; @@ -877,9 +990,9 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) /* Return true if 'p' is a pointer inside an obmalloc arena. * _PyObject_Free() calls this so it needs to be very fast. */ static int -arena_map_is_used(pymem_block *p) +arena_map_is_used(OMState *state, pymem_block *p) { - arena_map_bot_t *n = arena_map_get(p, 0); + arena_map_bot_t *n = arena_map_get(state, p, 0); if (n == NULL) { return 0; } @@ -902,7 +1015,7 @@ arena_map_is_used(pymem_block *p) * `usable_arenas` to the return value. */ static struct arena_object* -new_arena(void) +new_arena(OMState *state) { struct arena_object* arenaobj; uint excess; /* number of bytes above pool alignment */ @@ -968,7 +1081,7 @@ new_arena(void) address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); #if WITH_PYMALLOC_RADIX_TREE if (address != NULL) { - if (!arena_map_mark_used((uintptr_t)address, 1)) { + if (!arena_map_mark_used(state, (uintptr_t)address, 1)) { /* marking arena in radix tree failed, abort */ _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE); address = NULL; @@ -1011,9 +1124,9 @@ new_arena(void) pymalloc. When the radix tree is used, 'poolp' is unused. */ static bool -address_in_range(void *p, poolp Py_UNUSED(pool)) +address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool)) { - return arena_map_is_used(p); + return arena_map_is_used(state, p); } #else /* @@ -1094,7 +1207,7 @@ extremely desirable that it be this fast. static bool _Py_NO_SANITIZE_ADDRESS _Py_NO_SANITIZE_THREAD _Py_NO_SANITIZE_MEMORY -address_in_range(void *p, poolp pool) +address_in_range(OMState *state, void *p, poolp pool) { // Since address_in_range may be reading from memory which was not allocated // by Python, it is important that pool->arenaindex is read only once, as @@ -1111,8 +1224,6 @@ address_in_range(void *p, poolp pool) /*==========================================================================*/ -#define usedpools (_PyRuntime.obmalloc.pools.used) - // Called when freelist is exhausted. Extend the freelist if there is // space for a block. Otherwise, remove this pool from usedpools. static void @@ -1138,7 +1249,7 @@ pymalloc_pool_extend(poolp pool, uint size) * This function takes new pool and allocate a block from it. */ static void* -allocate_from_new_pool(uint size) +allocate_from_new_pool(OMState *state, uint size) { /* There isn't a pool of the right size class immediately * available: use a free pool. @@ -1150,7 +1261,7 @@ allocate_from_new_pool(uint size) return NULL; } #endif - usable_arenas = new_arena(); + usable_arenas = new_arena(state); if (usable_arenas == NULL) { return NULL; } @@ -1274,7 +1385,7 @@ allocate_from_new_pool(uint size) or when the max memory limit has been reached. */ static inline void* -pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes) +pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes) { #ifdef WITH_VALGRIND if (UNLIKELY(running_on_valgrind == -1)) { @@ -1314,7 +1425,7 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes) /* There isn't a pool of the right size class immediately * available: use a free pool. */ - bp = allocate_from_new_pool(size); + bp = allocate_from_new_pool(state, size); } return (void *)bp; @@ -1324,7 +1435,8 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes) void * _PyObject_Malloc(void *ctx, size_t nbytes) { - void* ptr = pymalloc_alloc(ctx, nbytes); + OMState *state = get_state(); + void* ptr = pymalloc_alloc(state, ctx, nbytes); if (LIKELY(ptr != NULL)) { return ptr; } @@ -1343,7 +1455,8 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize); size_t nbytes = nelem * elsize; - void* ptr = pymalloc_alloc(ctx, nbytes); + OMState *state = get_state(); + void* ptr = pymalloc_alloc(state, ctx, nbytes); if (LIKELY(ptr != NULL)) { memset(ptr, 0, nbytes); return ptr; @@ -1358,7 +1471,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize) static void -insert_to_usedpool(poolp pool) +insert_to_usedpool(OMState *state, poolp pool) { assert(pool->ref.count > 0); /* else the pool is empty */ @@ -1374,7 +1487,7 @@ insert_to_usedpool(poolp pool) } static void -insert_to_freepool(poolp pool) +insert_to_freepool(OMState *state, poolp pool) { poolp next = pool->nextpool; poolp prev = pool->prevpool; @@ -1457,7 +1570,7 @@ insert_to_freepool(poolp pool) #if WITH_PYMALLOC_RADIX_TREE /* mark arena region as not under control of obmalloc */ - arena_map_mark_used(ao->address, 0); + arena_map_mark_used(state, ao->address, 0); #endif /* Free the entire arena. */ @@ -1544,7 +1657,7 @@ insert_to_freepool(poolp pool) Return 1 if it was freed. Return 0 if the block was not allocated by pymalloc_alloc(). */ static inline int -pymalloc_free(void *Py_UNUSED(ctx), void *p) +pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p) { assert(p != NULL); @@ -1555,7 +1668,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p) #endif poolp pool = POOL_ADDR(p); - if (UNLIKELY(!address_in_range(p, pool))) { + if (UNLIKELY(!address_in_range(state, p, pool))) { return 0; } /* We allocated this address. */ @@ -1579,7 +1692,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p) * targets optimal filling when several pools contain * blocks of the same size class. */ - insert_to_usedpool(pool); + insert_to_usedpool(state, pool); return 1; } @@ -1596,7 +1709,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p) * previously freed pools will be allocated later * (being not referenced, they are perhaps paged out). */ - insert_to_freepool(pool); + insert_to_freepool(state, pool); return 1; } @@ -1609,7 +1722,8 @@ _PyObject_Free(void *ctx, void *p) return; } - if (UNLIKELY(!pymalloc_free(ctx, p))) { + OMState *state = get_state(); + if (UNLIKELY(!pymalloc_free(state, ctx, p))) { /* pymalloc didn't allocate this address */ PyMem_RawFree(p); raw_allocated_blocks--; @@ -1627,7 +1741,8 @@ _PyObject_Free(void *ctx, void *p) Return 0 if pymalloc didn't allocated p. */ static int -pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes) +pymalloc_realloc(OMState *state, void *ctx, + void **newptr_p, void *p, size_t nbytes) { void *bp; poolp pool; @@ -1643,7 +1758,7 @@ pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes) #endif pool = POOL_ADDR(p); - if (!address_in_range(p, pool)) { + if (!address_in_range(state, p, pool)) { /* pymalloc is not managing this block. If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take @@ -1696,7 +1811,8 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) return _PyObject_Malloc(ctx, nbytes); } - if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) { + OMState *state = get_state(); + if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) { return ptr2; } @@ -1710,11 +1826,29 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) * only be used by extensions that are compiled with pymalloc enabled. */ Py_ssize_t -_Py_GetAllocatedBlocks(void) +_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp)) +{ + return 0; +} + +Py_ssize_t +_Py_GetGlobalAllocatedBlocks(void) { return 0; } +void +_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp)) +{ + return; +} + +void +_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime)) +{ + return; +} + #endif /* WITH_PYMALLOC */ @@ -2289,6 +2423,7 @@ _PyObject_DebugMallocStats(FILE *out) if (!_PyMem_PymallocEnabled()) { return 0; } + OMState *state = get_state(); uint i; const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT; |