summaryrefslogtreecommitdiffstats
path: root/jemalloc
diff options
context:
space:
mode:
authorJason Evans <jasone@canonware.com>2010-03-15 00:36:10 (GMT)
committerJason Evans <jasone@canonware.com>2010-03-15 02:41:18 (GMT)
commit05b21be347d66b4afd5147ecd26e8cb22f6ea3fe (patch)
tree760b559866775d2a0960d688b92f4bb3011b98ba /jemalloc
parent86815df9dc7d2418a21c87b3dc9747ab42dea73d (diff)
downloadjemalloc-05b21be347d66b4afd5147ecd26e8cb22f6ea3fe.zip
jemalloc-05b21be347d66b4afd5147ecd26e8cb22f6ea3fe.tar.gz
jemalloc-05b21be347d66b4afd5147ecd26e8cb22f6ea3fe.tar.bz2
Purge dirty pages without arena->lock.
Diffstat (limited to 'jemalloc')
-rw-r--r--jemalloc/include/jemalloc/internal/arena.h33
-rw-r--r--jemalloc/src/arena.c298
2 files changed, 255 insertions, 76 deletions
diff --git a/jemalloc/include/jemalloc/internal/arena.h b/jemalloc/include/jemalloc/internal/arena.h
index 06b60ca..f4186a3 100644
--- a/jemalloc/include/jemalloc/internal/arena.h
+++ b/jemalloc/include/jemalloc/internal/arena.h
@@ -95,14 +95,23 @@ typedef struct arena_s arena_t;
/* Each element of the chunk map corresponds to one page within the chunk. */
struct arena_chunk_map_s {
- /*
- * Linkage for run trees. There are two disjoint uses:
- *
- * 1) arena_t's runs_avail tree.
- * 2) arena_run_t conceptually uses this linkage for in-use non-full
- * runs, rather than directly embedding linkage.
- */
- rb_node(arena_chunk_map_t) link;
+ union {
+ /*
+ * Linkage for run trees. There are two disjoint uses:
+ *
+ * 1) arena_t's runs_avail tree.
+ * 2) arena_run_t conceptually uses this linkage for in-use
+ * non-full runs, rather than directly embedding linkage.
+ */
+ rb_node(arena_chunk_map_t) rb_link;
+ /*
+ * List of runs currently in purgatory. arena_chunk_purge()
+ * temporarily allocates runs that contain dirty pages while
+ * purging, so that other threads cannot use the runs while the
+ * purging thread is operating without the arena lock held.
+ */
+ ql_elm(arena_chunk_map_t) ql_link;
+ } u;
#ifdef JEMALLOC_PROF
/* Profile counters, used for large object runs. */
@@ -312,6 +321,14 @@ struct arena_s {
size_t ndirty;
/*
+ * Approximate number of pages being purged. It is possible for
+ * multiple threads to purge dirty pages concurrently, and they use
+ * npurgatory to indicate the total number of pages all threads are
+ * attempting to purge.
+ */
+ size_t npurgatory;
+
+ /*
* Size/address-ordered tree of this arena's available runs. This tree
* is used for first-best-fit run allocation.
*/
diff --git a/jemalloc/src/arena.c b/jemalloc/src/arena.c
index 3981cbb..98e41d0 100644
--- a/jemalloc/src/arena.c
+++ b/jemalloc/src/arena.c
@@ -27,9 +27,6 @@ size_t medium_max;
size_t lg_mspace;
size_t mspace_mask;
-/* Used to prevent threads from concurrently calling madvise(2). */
-static malloc_mutex_t purge_lock;
-
/*
* const_small_size2bin is a static constant lookup table that in the common
* case can be used as-is for small_size2bin. For dynamically linked programs,
@@ -215,7 +212,7 @@ arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
/* Generate red-black tree functions. */
rb_gen(static JEMALLOC_ATTR(unused), arena_run_tree_, arena_run_tree_t,
- arena_chunk_map_t, link, arena_run_comp)
+ arena_chunk_map_t, u.rb_link, arena_run_comp)
static inline int
arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
@@ -247,7 +244,7 @@ arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
/* Generate red-black tree functions. */
rb_gen(static JEMALLOC_ATTR(unused), arena_avail_tree_, arena_avail_tree_t,
- arena_chunk_map_t, link, arena_avail_comp)
+ arena_chunk_map_t, u.rb_link, arena_avail_comp)
static inline void *
arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
@@ -480,11 +477,183 @@ arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
return (run);
}
+static inline void
+arena_maybe_purge(arena_t *arena)
+{
+
+ /* Enforce opt_lg_dirty_mult. */
+ if (opt_lg_dirty_mult >= 0 && arena->ndirty > arena->npurgatory &&
+ (arena->ndirty - arena->npurgatory) > chunk_npages &&
+ (arena->nactive >> opt_lg_dirty_mult) < (arena->ndirty -
+ arena->npurgatory))
+ arena_purge(arena);
+}
+
+static inline void
+arena_chunk_purge(arena_t *arena, arena_chunk_t *chunk)
+{
+ ql_head(arena_chunk_map_t) mapelms;
+ arena_chunk_map_t *mapelm;
+ size_t pageind;
+#ifdef JEMALLOC_DEBUG
+ size_t ndirty;
+#endif
+#ifdef JEMALLOC_STATS
+ size_t nmadvise;
+#endif
+
+ ql_new(&mapelms);
+
+ /*
+ * If chunk is the spare, temporarily re-allocate it, 1) so that its
+ * run is reinserted into runs_avail, and 2) so that it cannot be
+ * completely discarded by another thread while arena->lock is dropped
+ * by this thread. Note that the arena_run_dalloc() call will
+ * implicitly deallocate the chunk, so no explicit action is required
+ * in this function to deallocate the chunk.
+ */
+ if (chunk == arena->spare)
+ arena_chunk_alloc(arena);
+
+ /* Temporarily allocate all free runs that contain dirty pages. */
+ for (pageind = arena_chunk_header_npages; pageind < chunk_npages;) {
+ mapelm = &chunk->map[pageind];
+ if ((mapelm->bits & CHUNK_MAP_ALLOCATED) == 0) {
+ size_t npages, i;
+
+ npages = (mapelm->bits & CHUNK_MAP_PG_MASK) >>
+ CHUNK_MAP_PG_SHIFT;
+ for (i = 0; i < npages; i++) {
+ if (chunk->map[pageind + i].bits &
+ CHUNK_MAP_DIRTY) {
+ /*
+ * This run contains at least one dirty
+ * page. Temporarily allocate it.
+ * Don't bother setting the
+ * CHUNK_MAP_ALLOCATED bit for interior
+ * pages yet. The bit will be set
+ * during the loop that actually does
+ * the madvise() calls, so that the run
+ * looks like a normal large run by the
+ * time it is passed to
+ * arena_run_dalloc().
+ */
+ arena_avail_tree_remove(
+ &arena->runs_avail, mapelm);
+ mapelm->bits |= (CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED);
+ chunk->map[pageind + npages - 1].bits |=
+ (CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED);
+ /*
+ * Append to list for later processing.
+ */
+ ql_elm_new(mapelm, u.ql_link);
+ ql_tail_insert(&mapelms, mapelm,
+ u.ql_link);
+ break;
+ }
+ }
+ pageind += npages;
+ } else {
+ /* Skip allocated run. */
+ if (mapelm->bits & CHUNK_MAP_LARGE) {
+ pageind += (mapelm->bits & CHUNK_MAP_PG_MASK)
+ >> CHUNK_MAP_PG_SHIFT;
+ } else {
+ arena_run_t *run = (arena_run_t *)((uintptr_t)
+ chunk + (uintptr_t)((pageind -
+ ((mapelm->bits & CHUNK_MAP_PG_MASK) >>
+ CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
+ pageind += run->bin->run_size >> PAGE_SHIFT;
+ }
+ }
+ }
+
+#ifdef JEMALLOC_DEBUG
+ ndirty = chunk->ndirty;
+#endif
+#ifdef JEMALLOC_STATS
+ arena->stats.purged += chunk->ndirty;
+#endif
+ arena->ndirty -= chunk->ndirty;
+ chunk->ndirty = 0;
+ ql_remove(&arena->chunks_dirty, chunk, link_dirty);
+ chunk->dirtied = false;
+
+ malloc_mutex_unlock(&arena->lock);
+#ifdef JEMALLOC_STATS
+ nmadvise = 0;
+#endif
+ ql_foreach(mapelm, &mapelms, u.ql_link) {
+ size_t i, j;
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t);
+ size_t npages = (mapelm->bits & CHUNK_MAP_PG_MASK) >>
+ CHUNK_MAP_PG_SHIFT;
+
+ for (i = 0; i < npages;) {
+ if (chunk->map[pageind + i].bits & CHUNK_MAP_DIRTY) {
+ chunk->map[pageind + i].bits ^= CHUNK_MAP_DIRTY;
+ chunk->map[pageind + i].bits |=
+ (CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
+
+ /* Find adjacent dirty page(s). */
+ for (j = 1; i + j < npages; j++) {
+ if ((chunk->map[pageind + i + j].bits &
+ CHUNK_MAP_DIRTY) == 0)
+ break;
+ chunk->map[pageind + i + j].bits ^=
+ CHUNK_MAP_DIRTY;
+ chunk->map[pageind + i + j].bits |=
+ (CHUNK_MAP_LARGE |
+ CHUNK_MAP_ALLOCATED);
+ }
+#ifdef JEMALLOC_DEBUG
+ assert(ndirty >= j);
+ ndirty -= j;
+#endif
+
+ madvise((void *)((uintptr_t)chunk + ((pageind +
+ i) << PAGE_SHIFT)), (j << PAGE_SHIFT),
+ MADV_DONTNEED);
+#ifdef JEMALLOC_STATS
+ nmadvise++;
+#endif
+ i += j;
+ } else {
+ chunk->map[pageind + i].bits |=
+ (CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
+ i++;
+ }
+ }
+ }
+#ifdef JEMALLOC_DEBUG
+ assert(ndirty == 0);
+#endif
+ malloc_mutex_lock(&arena->lock);
+#ifdef JEMALLOC_STATS
+ arena->stats.nmadvise += nmadvise;
+#endif
+
+ /* Deallocate runs. */
+ for (mapelm = ql_first(&mapelms); mapelm != NULL;
+ mapelm = ql_first(&mapelms)) {
+ size_t pageind = ((uintptr_t)mapelm - (uintptr_t)chunk->map) /
+ sizeof(arena_chunk_map_t);
+ arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
+ (uintptr_t)(pageind << PAGE_SHIFT));
+
+ ql_remove(&mapelms, mapelm, u.ql_link);
+ arena_run_dalloc(arena, run, false);
+ }
+}
+
static void
arena_purge(arena_t *arena)
{
arena_chunk_t *chunk;
- size_t i, j, npages;
+ size_t npurgatory;
#ifdef JEMALLOC_DEBUG
size_t ndirty = 0;
@@ -494,6 +663,7 @@ arena_purge(arena_t *arena)
}
assert(ndirty == arena->ndirty);
#endif
+ assert(arena->ndirty > arena->npurgatory);
assert(arena->ndirty > chunk_npages);
assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
@@ -502,64 +672,65 @@ arena_purge(arena_t *arena)
#endif
/*
- * Only allow one thread at a time to purge dirty pages. madvise(2)
- * causes the kernel to modify virtual memory data structures that are
- * typically protected by a lock, and purging isn't important enough to
- * suffer lock contention in the kernel. The result of failing to
- * acquire purge_lock here is that this arena will operate with ndirty
- * above the threshold until some dirty pages are re-used, or the
- * creation of more dirty pages causes this function to be called
- * again.
+ * Compute the minimum number of pages that this thread should try to
+ * purge, and add the result to arena->npurgatory. This will keep
+ * multiple threads from racing to reduce ndirty below the threshold.
*/
- if (malloc_mutex_trylock(&purge_lock))
- return;
+ npurgatory = (arena->ndirty - arena->npurgatory) - (arena->nactive >>
+ opt_lg_dirty_mult);
+ arena->npurgatory += npurgatory;
- /*
- * Iterate through chunks until enough dirty memory has been
- * purged (all dirty pages in one chunk, or enough pages to drop to
- * threshold, whichever is greater). Terminate as soon as possible in
- * order to minimize the number of system calls, even if a chunk has
- * only been partially purged.
- */
- for (i = 0; (arena->nactive >> opt_lg_dirty_mult) < arena->ndirty;
- i++) {
+ while (npurgatory > 0) {
+ /* Get next chunk with dirty pages. */
chunk = ql_first(&arena->chunks_dirty);
- assert(chunk != NULL);
-
- /* Purge pages from high to low within each chunk. */
- for (j = chunk_npages - 1; chunk->ndirty > 0; j--) {
- assert(j >= arena_chunk_header_npages);
- if (chunk->map[j].bits & CHUNK_MAP_DIRTY) {
- chunk->map[j].bits ^= CHUNK_MAP_DIRTY;
- /* Find adjacent dirty run(s). */
- for (npages = 1; j > arena_chunk_header_npages
- && (chunk->map[j - 1].bits &
- CHUNK_MAP_DIRTY); npages++) {
- j--;
- chunk->map[j].bits ^= CHUNK_MAP_DIRTY;
- }
- chunk->ndirty -= npages;
- arena->ndirty -= npages;
-
- madvise((void *)((uintptr_t)chunk + (j <<
- PAGE_SHIFT)), (npages << PAGE_SHIFT),
- MADV_DONTNEED);
-#ifdef JEMALLOC_STATS
- arena->stats.nmadvise++;
- arena->stats.purged += npages;
-#endif
- if ((arena->nactive >> opt_lg_dirty_mult) >=
- arena->ndirty && i > 0)
- break;
- }
+ if (chunk == NULL) {
+ /*
+ * This thread was unable to purge as many pages as
+ * originally intended, due to races with other threads
+ * that either did some of the purging work, or re-used
+ * dirty pages.
+ */
+ arena->npurgatory -= npurgatory;
+ return;
}
-
- if (chunk->ndirty == 0) {
+ while (chunk->ndirty == 0) {
ql_remove(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = false;
+ chunk = ql_first(&arena->chunks_dirty);
+ if (chunk == NULL) {
+ /* Same logic as for above. */
+ arena->npurgatory -= npurgatory;
+ return;
+ }
}
+
+ if (chunk->ndirty > npurgatory) {
+ /*
+ * This thread will, at a minimum, purge all the dirty
+ * pages in chunk, so set npurgatory to reflect this
+ * thread's commitment to purge the pages. This tends
+ * to reduce the chances of the following scenario:
+ *
+ * 1) This thread sets arena->npurgatory such that
+ * (arena->ndirty - arena->npurgatory) is at the
+ * threshold.
+ * 2) This thread drops arena->lock.
+ * 3) Another thread causes one or more pages to be
+ * dirtied, and immediately determines that it must
+ * purge dirty pages.
+ *
+ * If this scenario *does* play out, that's okay,
+ * because all of the purging work being done really
+ * needs to happen.
+ */
+ arena->npurgatory += chunk->ndirty - npurgatory;
+ npurgatory = chunk->ndirty;
+ }
+
+ arena->npurgatory -= chunk->ndirty;
+ npurgatory -= chunk->ndirty;
+ arena_chunk_purge(arena, chunk);
}
- malloc_mutex_unlock(&purge_lock);
}
static void
@@ -685,11 +856,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = true;
}
-
- /* Enforce opt_lg_dirty_mult. */
- if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
- (arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
- arena_purge(arena);
+ arena_maybe_purge(arena);
}
}
@@ -1426,10 +1593,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
ql_tail_insert(&arena->chunks_dirty, chunk, link_dirty);
chunk->dirtied = true;
}
- /* Enforce opt_lg_dirty_mult. */
- if (opt_lg_dirty_mult >= 0 && arena->ndirty > chunk_npages &&
- (arena->nactive >> opt_lg_dirty_mult) < arena->ndirty)
- arena_purge(arena);
+ arena_maybe_purge(arena);
malloc_mutex_unlock(&arena->lock);
#ifdef JEMALLOC_STATS
@@ -1842,6 +2006,7 @@ arena_new(arena_t *arena, unsigned ind)
arena->nactive = 0;
arena->ndirty = 0;
+ arena->npurgatory = 0;
arena_avail_tree_new(&arena->runs_avail);
@@ -2155,8 +2320,5 @@ arena_boot(void)
((header_size & PAGE_MASK) != 0);
arena_maxclass = chunksize - (arena_chunk_header_npages << PAGE_SHIFT);
- if (malloc_mutex_init(&purge_lock))
- return (true);
-
return (false);
}