summaryrefslogtreecommitdiffstats
path: root/Include
diff options
context:
space:
mode:
authorSam Gross <colesbury@gmail.com>2024-03-06 14:42:11 (GMT)
committerGitHub <noreply@github.com>2024-03-06 14:42:11 (GMT)
commitc012c8ab7bb72a733bd98be5df32e262b9045f1a (patch)
treec3f39b0baae9a5b28816e768d788f0ec8d60ee0c /Include
parent02ee475ee3ce9468d44758df2cd79df9f0926303 (diff)
downloadcpython-c012c8ab7bb72a733bd98be5df32e262b9045f1a.zip
cpython-c012c8ab7bb72a733bd98be5df32e262b9045f1a.tar.gz
cpython-c012c8ab7bb72a733bd98be5df32e262b9045f1a.tar.bz2
gh-115103: Delay reuse of mimalloc pages that store PyObjects (#115435)
This implements the delayed reuse of mimalloc pages that contain Python objects in the free-threaded build. Allocations of the same size class are grouped in data structures called pages. These are different from operating system pages. For thread-safety, we want to ensure that memory used to store PyObjects remains valid as long as there may be concurrent lock-free readers; we want to delay using it for other size classes, in other heaps, or returning it to the operating system. When a mimalloc page becomes empty, instead of immediately freeing it, we tag it with a QSBR goal and insert it into a per-thread state linked list of pages to be freed. When mimalloc needs a fresh page, we process the queue and free any still empty pages that are now deemed safe to be freed. Pages waiting to be freed are still available for allocations of the same size class and allocating from a page prevent it from being freed. There is additional logic to handle abandoned pages when threads exit.
Diffstat (limited to 'Include')
-rw-r--r--Include/internal/mimalloc/mimalloc/types.h9
-rw-r--r--Include/internal/pycore_mimalloc.h1
-rw-r--r--Include/internal/pycore_qsbr.h15
3 files changed, 24 insertions, 1 deletions
diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h
index ed93e45..17e4408 100644
--- a/Include/internal/mimalloc/mimalloc/types.h
+++ b/Include/internal/mimalloc/mimalloc/types.h
@@ -311,6 +311,7 @@ typedef struct mi_page_s {
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
uint8_t is_committed : 1; // `true` if the page virtual memory is committed
uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized
+ uint8_t use_qsbr : 1; // delay page freeing using qsbr
uint8_t tag : 4; // tag from the owning heap
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
@@ -336,8 +337,13 @@ typedef struct mi_page_s {
struct mi_page_s* next; // next page owned by this thread with the same `block_size`
struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
+#ifdef Py_GIL_DISABLED
+ struct llist_node qsbr_node;
+ uint64_t qsbr_goal;
+#endif
+
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
- #if MI_INTPTR_SIZE==8
+ #if MI_INTPTR_SIZE==8 && !defined(Py_GIL_DISABLED)
uintptr_t padding[1];
#endif
} mi_page_t;
@@ -555,6 +561,7 @@ struct mi_heap_s {
bool no_reclaim; // `true` if this heap should not reclaim abandoned pages
uint8_t tag; // custom identifier for this heap
uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory
+ bool page_use_qsbr; // should freeing pages be delayed using QSBR
};
diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h
index 44c160b..3ef0154 100644
--- a/Include/internal/pycore_mimalloc.h
+++ b/Include/internal/pycore_mimalloc.h
@@ -48,6 +48,7 @@ struct _mimalloc_thread_state {
mi_heap_t *current_object_heap;
mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT];
mi_tld_t tld;
+ struct llist_node page_list;
};
#endif
diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h
index 475f00d..c3680a2 100644
--- a/Include/internal/pycore_qsbr.h
+++ b/Include/internal/pycore_qsbr.h
@@ -29,6 +29,12 @@ extern "C" {
#define QSBR_INITIAL 1
#define QSBR_INCR 2
+// Wrap-around safe comparison. This is a holdover from the FreeBSD
+// implementation, which uses 32-bit sequence numbers. We currently use 64-bit
+// sequence numbers, so wrap-around is unlikely.
+#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0)
+#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0)
+
struct _qsbr_shared;
struct _PyThreadStateImpl; // forward declare to avoid circular dependency
@@ -89,6 +95,15 @@ _Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr)
_Py_atomic_store_uint64_release(&qsbr->seq, seq);
}
+// Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`,
+// but does not perform a scan of threads.
+static inline bool
+_Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal)
+{
+ uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq);
+ return QSBR_LEQ(goal, rd_seq);
+}
+
// Advance the write sequence and return the new goal. This should be called
// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to
// determine when it is safe to reclaim (free) the memory.