summaryrefslogtreecommitdiffstats
path: root/src/H5Cmpio.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/H5Cmpio.c')
-rw-r--r--src/H5Cmpio.c1541
1 files changed, 1541 insertions, 0 deletions
diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c
new file mode 100644
index 0000000..6af346c
--- /dev/null
+++ b/src/H5Cmpio.c
@@ -0,0 +1,1541 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the COPYING file, which can be found at the root of the source code *
+ * distribution tree, or in https://www.hdfgroup.org/licenses. *
+ * If you do not have access to either file, you may request a copy from *
+ * help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*-------------------------------------------------------------------------
+ *
+ * Created: H5Cmpio.c
+ * June 20 2015
+ * Quincey Koziol
+ *
+ * Purpose: Functions in this file implement support for parallel I/O for
+ * generic cache code.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/****************/
+/* Module Setup */
+/****************/
+
+#include "H5Cmodule.h" /* This source code file is part of the H5C module */
+#define H5F_FRIEND /*suppress error about including H5Fpkg */
+
+/***********/
+/* Headers */
+/***********/
+#include "H5private.h" /* Generic Functions */
+#include "H5ACprivate.h" /* Metadata cache */
+#include "H5Cpkg.h" /* Cache */
+#include "H5CXprivate.h" /* API Contexts */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5Fpkg.h" /* Files */
+#include "H5FDprivate.h" /* File drivers */
+#include "H5MMprivate.h" /* Memory management */
+
+#ifdef H5_HAVE_PARALLEL
+/****************/
+/* Local Macros */
+/****************/
+#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0
+
+/******************/
+/* Local Typedefs */
+/******************/
+
+/********************/
+/* Local Prototypes */
+/********************/
+static herr_t H5C__collective_write(H5F_t *f);
+static herr_t H5C__flush_candidate_entries(H5F_t *f, unsigned entries_to_flush[H5C_RING_NTYPES],
+ unsigned entries_to_clear[H5C_RING_NTYPES]);
+static herr_t H5C__flush_candidates_in_ring(H5F_t *f, H5C_ring_t ring, unsigned entries_to_flush,
+ unsigned entries_to_clear);
+
+/*********************/
+/* Package Variables */
+/*********************/
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+/*-------------------------------------------------------------------------
+ * Function: H5C_apply_candidate_list
+ *
+ * Purpose: Apply the supplied candidate list.
+ *
+ * We used to do this by simply having each process write
+ * every mpi_size-th entry in the candidate list, starting
+ * at index mpi_rank, and mark all the others clean.
+ *
+ * However, this can cause unnecessary contention in a file
+ * system by increasing the number of processes writing to
+ * adjacent locations in the HDF5 file.
+ *
+ * To attempt to minimize this, we now arange matters such
+ * that each process writes n adjacent entries in the
+ * candidate list, and marks all others clean. We must do
+ * this in such a fashion as to guarantee that each entry
+ * on the candidate list is written by exactly one process,
+ * and marked clean by all others.
+ *
+ * To do this, first construct a table mapping mpi_rank
+ * to the index of the first entry in the candidate list to
+ * be written by the process of that mpi_rank, and then use
+ * the table to control which entries are written and which
+ * are marked as clean as a function of the mpi_rank.
+ *
+ * Note that the table must be identical on all processes, as
+ * all see the same candidate list, mpi_size, and mpi_rank --
+ * the inputs used to construct the table.
+ *
+ * We construct the table as follows. Let:
+ *
+ * n = num_candidates / mpi_size;
+ *
+ * m = num_candidates % mpi_size;
+ *
+ * Now allocate an array of integers of length mpi_size + 1,
+ * and call this array candidate_assignment_table.
+ *
+ * Conceptually, if the number of candidates is a multiple
+ * of the mpi_size, we simply pass through the candidate list
+ * and assign n entries to each process to flush, with the
+ * index of the first entry to flush in the location in
+ * the candidate_assignment_table indicated by the mpi_rank
+ * of the process.
+ *
+ * In the more common case in which the candidate list isn't
+ * isn't a multiple of the mpi_size, we pretend it is, and
+ * give num_candidates % mpi_size processes one extra entry
+ * each to make things work out.
+ *
+ * Once the table is constructed, we determine the first and
+ * last entry this process is to flush as follows:
+ *
+ * first_entry_to_flush = candidate_assignment_table[mpi_rank]
+ *
+ * last_entry_to_flush =
+ * candidate_assignment_table[mpi_rank + 1] - 1;
+ *
+ * With these values determined, we simply scan through the
+ * candidate list, marking all entries in the range
+ * [first_entry_to_flush, last_entry_to_flush] for flush,
+ * and all others to be cleaned.
+ *
+ * Finally, we scan the LRU from tail to head, flushing
+ * or marking clean the candidate entries as indicated.
+ * If necessary, we scan the pinned list as well.
+ *
+ * Note that this function will fail if any protected or
+ * clean entries appear on the candidate list.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, haddr_t *candidates_list_ptr,
+ int mpi_rank, int mpi_size)
+{
+ unsigned first_entry_to_flush;
+ unsigned last_entry_to_flush;
+ unsigned total_entries_to_clear = 0;
+ unsigned total_entries_to_flush = 0;
+ unsigned *candidate_assignment_table = NULL;
+ unsigned entries_to_flush[H5C_RING_NTYPES];
+ unsigned entries_to_clear[H5C_RING_NTYPES];
+ haddr_t addr;
+ H5C_cache_entry_t *entry_ptr = NULL;
+#ifdef H5C_DO_SANITY_CHECKS
+ haddr_t last_addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ char tbl_buf[1024];
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+ unsigned m, n;
+ unsigned u; /* Local index variable */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ HDassert(num_candidates > 0);
+ HDassert((!cache_ptr->slist_enabled) || (num_candidates <= cache_ptr->slist_len));
+ HDassert(candidates_list_ptr != NULL);
+ HDassert(0 <= mpi_rank);
+ HDassert(mpi_rank < mpi_size);
+
+ /* Initialize the entries_to_flush and entries_to_clear arrays */
+ HDmemset(entries_to_flush, 0, sizeof(entries_to_flush));
+ HDmemset(entries_to_clear, 0, sizeof(entries_to_clear));
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n", __func__, mpi_rank);
+
+ HDmemset(tbl_buf, 0, sizeof(tbl_buf));
+
+ HDsnprintf(tbl_buf, sizeof(tbl_buf), "candidate list = ");
+ for (u = 0; u < num_candidates; u++)
+ HDsprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " 0x%llx", (long long)(*(candidates_list_ptr + u)));
+ HDsprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n");
+
+ HDfprintf(stdout, "%s", tbl_buf);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if (f->shared->coll_md_write) {
+ /* Sanity check */
+ HDassert(NULL == cache_ptr->coll_write_list);
+
+ /* Create skip list of entries for collective write */
+ if (NULL == (cache_ptr->coll_write_list = H5SL_create(H5SL_TYPE_HADDR, NULL)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTCREATE, FAIL, "can't create skip list for entries")
+ } /* end if */
+
+ n = num_candidates / (unsigned)mpi_size;
+ m = num_candidates % (unsigned)mpi_size;
+
+ if (NULL ==
+ (candidate_assignment_table = (unsigned *)H5MM_malloc(sizeof(unsigned) * (size_t)(mpi_size + 1))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "memory allocation failed for candidate assignment table")
+
+ candidate_assignment_table[0] = 0;
+ candidate_assignment_table[mpi_size] = num_candidates;
+
+ if (m == 0) { /* mpi_size is an even divisor of num_candidates */
+ for (u = 1; u < (unsigned)mpi_size; u++)
+ candidate_assignment_table[u] = candidate_assignment_table[u - 1] + n;
+ } /* end if */
+ else {
+ for (u = 1; u <= m; u++)
+ candidate_assignment_table[u] = candidate_assignment_table[u - 1] + n + 1;
+
+ if (num_candidates < (unsigned)mpi_size) {
+ for (u = m + 1; u < (unsigned)mpi_size; u++)
+ candidate_assignment_table[u] = num_candidates;
+ } /* end if */
+ else {
+ for (u = m + 1; u < (unsigned)mpi_size; u++)
+ candidate_assignment_table[u] = candidate_assignment_table[u - 1] + n;
+ } /* end else */
+ } /* end else */
+ HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates);
+
+#ifdef H5C_DO_SANITY_CHECKS
+ /* Verify that the candidate assignment table has the expected form */
+ for (u = 1; u < (unsigned)(mpi_size - 1); u++) {
+ unsigned a, b;
+
+ a = candidate_assignment_table[u] - candidate_assignment_table[u - 1];
+ b = candidate_assignment_table[u + 1] - candidate_assignment_table[u];
+
+ HDassert(n + 1 >= a);
+ HDassert(a >= b);
+ HDassert(b >= n);
+ }
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ first_entry_to_flush = candidate_assignment_table[mpi_rank];
+ last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1;
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ for (u = 0; u < 1024; u++)
+ tbl_buf[u] = '\0';
+ HDsnprintf(tbl_buf, sizeof(tbl_buf), "candidate assignment table = ");
+ for (u = 0; u <= (unsigned)mpi_size; u++)
+ HDsprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %u", candidate_assignment_table[u]);
+ HDsprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n");
+ HDfprintf(stdout, "%s", tbl_buf);
+
+ HDfprintf(stdout, "%s:%d: flush entries [%u, %u].\n", __func__, mpi_rank, first_entry_to_flush,
+ last_entry_to_flush);
+
+ HDfprintf(stdout, "%s:%d: marking entries.\n", __func__, mpi_rank);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ for (u = 0; u < num_candidates; u++) {
+ addr = candidates_list_ptr[u];
+ HDassert(H5F_addr_defined(addr));
+
+#ifdef H5C_DO_SANITY_CHECKS
+ if (u > 0) {
+ if (last_addr == addr)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "duplicate entry in cleaned list")
+ else if (last_addr > addr)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted")
+ } /* end if */
+
+ last_addr = addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
+ if (entry_ptr == NULL)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "listed candidate entry not in cache?!?!?")
+ if (!entry_ptr->is_dirty)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?")
+ if (entry_ptr->is_protected)
+ /* For now at least, we can't deal with protected entries.
+ * If we encounter one, scream and die. If it becomes an
+ * issue, we should be able to work around this.
+ */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?")
+
+ /* Sanity checks */
+ HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->ring >= H5C_RING_USER);
+ HDassert(entry_ptr->ring <= H5C_RING_SB);
+ HDassert(!entry_ptr->flush_immediately);
+ HDassert(!entry_ptr->clear_on_unprotect);
+
+ /* Determine whether the entry is to be cleared or flushed,
+ * and mark it accordingly. We will scan the protected and
+ * pinned list shortly, and clear or flush according to these
+ * markings.
+ */
+ if (u >= first_entry_to_flush && u <= last_entry_to_flush) {
+ total_entries_to_flush++;
+ entries_to_flush[entry_ptr->ring]++;
+ entry_ptr->flush_immediately = TRUE;
+ } /* end if */
+ else {
+ total_entries_to_clear++;
+ entries_to_clear[entry_ptr->ring]++;
+ entry_ptr->clear_on_unprotect = TRUE;
+ } /* end else */
+
+ /* Entries marked as collectively accessed and are in the
+ * candidate list to clear from the cache have to be
+ * removed from the coll list. This is OK since the
+ * candidate list is collective and uniform across all
+ * ranks.
+ */
+ if (entry_ptr->coll_access) {
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ } /* end if */
+ } /* end for */
+
+#ifdef H5C_DO_SANITY_CHECKS
+ m = 0;
+ n = 0;
+ for (u = 0; u < H5C_RING_NTYPES; u++) {
+ m += entries_to_flush[u];
+ n += entries_to_clear[u];
+ } /* end if */
+
+ HDassert((unsigned)m == total_entries_to_flush);
+ HDassert(n == total_entries_to_clear);
+#endif /* H5C_DO_SANITY_CHECKS */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %u/%u/%u.\n", __func__, mpi_rank,
+ num_candidates, total_entries_to_clear, total_entries_to_flush);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ /* We have now marked all the entries on the candidate list for
+ * either flush or clear -- now scan the LRU and the pinned list
+ * for these entries and do the deed. Do this via a call to
+ * H5C__flush_candidate_entries().
+ *
+ * Note that we are doing things in this round about manner so as
+ * to preserve the order of the LRU list to the best of our ability.
+ * If we don't do this, my experiments indicate that we will have a
+ * noticeably poorer hit ratio as a result.
+ */
+ if (H5C__flush_candidate_entries(f, entries_to_flush, entries_to_clear) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "flush candidates failed")
+
+ /* If we've deferred writing to do it collectively, take care of that now */
+ if (f->shared->coll_md_write) {
+ /* Sanity check */
+ HDassert(cache_ptr->coll_write_list);
+
+ /* Write collective list */
+ if (H5C__collective_write(f) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_WRITEERROR, FAIL, "can't write metadata collectively")
+ } /* end if */
+
+done:
+ if (candidate_assignment_table != NULL)
+ candidate_assignment_table = (unsigned *)H5MM_xfree((void *)candidate_assignment_table);
+ if (cache_ptr->coll_write_list) {
+ if (H5SL_close(cache_ptr->coll_write_list) < 0)
+ HDONE_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "failed to destroy skip list")
+ cache_ptr->coll_write_list = NULL;
+ } /* end if */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_apply_candidate_list() */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_construct_candidate_list__clean_cache
+ *
+ * Purpose: Construct the list of entries that should be flushed to
+ * clean all entries in the cache.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ * Changes: With the slist optimization, the slist is not maintained
+ * unless a flush is in progress. Thus we can not longer use
+ * cache_ptr->slist_size to determine the total size of
+ * the entries we must insert in the candidate list.
+ *
+ * To address this, we now use cache_ptr->dirty_index_size
+ * instead.
+ *
+ * JRM -- 7/27/20
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_construct_candidate_list__clean_cache(H5C_t *cache_ptr)
+{
+ size_t space_needed;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+
+ /* As a sanity check, set space needed to the dirty_index_size. This
+ * should be the sum total of the sizes of all the dirty entries
+ * in the metadata cache. Note that if the slist is enabled,
+ * cache_ptr->slist_size should equal cache_ptr->dirty_index_size.
+ */
+ space_needed = cache_ptr->dirty_index_size;
+
+ HDassert((!cache_ptr->slist_enabled) || (space_needed == cache_ptr->slist_size));
+
+ /* Recall that while we shouldn't have any protected entries at this
+ * point, it is possible that some dirty entries may reside on the
+ * pinned list at this point.
+ */
+ HDassert(cache_ptr->dirty_index_size <= (cache_ptr->dLRU_list_size + cache_ptr->pel_size));
+ HDassert((!cache_ptr->slist_enabled) ||
+ (cache_ptr->slist_len <= (cache_ptr->dLRU_list_len + cache_ptr->pel_len)));
+
+ if (space_needed > 0) { /* we have work to do */
+
+ H5C_cache_entry_t *entry_ptr;
+ unsigned nominated_entries_count = 0;
+ size_t nominated_entries_size = 0;
+ haddr_t nominated_addr;
+
+ HDassert((!cache_ptr->slist_enabled) || (cache_ptr->slist_len > 0));
+
+ /* Scan the dirty LRU list from tail forward and nominate sufficient
+ * entries to free up the necessary space.
+ */
+ entry_ptr = cache_ptr->dLRU_tail_ptr;
+
+ while ((nominated_entries_size < space_needed) &&
+ ((!cache_ptr->slist_enabled) || (nominated_entries_count < cache_ptr->slist_len)) &&
+ (entry_ptr != NULL)) {
+
+ HDassert(!(entry_ptr->is_protected));
+ HDassert(!(entry_ptr->is_read_only));
+ HDassert(entry_ptr->ro_ref_count == 0);
+ HDassert(entry_ptr->is_dirty);
+ HDassert((!cache_ptr->slist_enabled) || (entry_ptr->in_slist));
+
+ nominated_addr = entry_ptr->addr;
+
+ if (H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ entry_ptr = entry_ptr->aux_prev;
+
+ } /* end while */
+
+ HDassert(entry_ptr == NULL);
+
+ /* it is possible that there are some dirty entries on the
+ * protected entry list as well -- scan it too if necessary
+ */
+ entry_ptr = cache_ptr->pel_head_ptr;
+
+ while ((nominated_entries_size < space_needed) &&
+ ((!cache_ptr->slist_enabled) || (nominated_entries_count < cache_ptr->slist_len)) &&
+ (entry_ptr != NULL)) {
+
+ if (entry_ptr->is_dirty) {
+
+ HDassert(!(entry_ptr->is_protected));
+ HDassert(!(entry_ptr->is_read_only));
+ HDassert(entry_ptr->ro_ref_count == 0);
+ HDassert(entry_ptr->is_dirty);
+ HDassert(entry_ptr->in_slist);
+
+ nominated_addr = entry_ptr->addr;
+
+ if (H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+
+ } /* end if */
+
+ entry_ptr = entry_ptr->next;
+
+ } /* end while */
+
+ HDassert((!cache_ptr->slist_enabled) || (nominated_entries_count == cache_ptr->slist_len));
+ HDassert(nominated_entries_size == space_needed);
+
+ } /* end if */
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5C_construct_candidate_list__clean_cache() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5C_construct_candidate_list__min_clean
+ *
+ * Purpose: Construct the list of entries that should be flushed to
+ * get the cache back within its min clean constraints.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ * Changes: With the slist optimization, the slist is not maintained
+ * unless a flush is in progress. Updated sanity checks to
+ * reflect this.
+ *
+ * JRM -- 7/27/20
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_construct_candidate_list__min_clean(H5C_t *cache_ptr)
+{
+ size_t space_needed = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+
+ /* compute the number of bytes (if any) that must be flushed to get the
+ * cache back within its min clean constraints.
+ */
+ if (cache_ptr->max_cache_size > cache_ptr->index_size) {
+
+ if (((cache_ptr->max_cache_size - cache_ptr->index_size) + cache_ptr->cLRU_list_size) >=
+ cache_ptr->min_clean_size) {
+
+ space_needed = 0;
+ }
+ else {
+
+ space_needed = cache_ptr->min_clean_size -
+ ((cache_ptr->max_cache_size - cache_ptr->index_size) + cache_ptr->cLRU_list_size);
+ }
+ } /* end if */
+ else {
+
+ if (cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) {
+
+ space_needed = 0;
+ }
+ else {
+
+ space_needed = cache_ptr->min_clean_size - cache_ptr->cLRU_list_size;
+ }
+ } /* end else */
+
+ if (space_needed > 0) { /* we have work to do */
+
+ H5C_cache_entry_t *entry_ptr;
+ unsigned nominated_entries_count = 0;
+ size_t nominated_entries_size = 0;
+
+ HDassert((!cache_ptr->slist_enabled) || (cache_ptr->slist_len > 0));
+
+ /* Scan the dirty LRU list from tail forward and nominate sufficient
+ * entries to free up the necessary space.
+ */
+ entry_ptr = cache_ptr->dLRU_tail_ptr;
+
+ while ((nominated_entries_size < space_needed) &&
+ ((!cache_ptr->slist_enabled) || (nominated_entries_count < cache_ptr->slist_len)) &&
+ (entry_ptr != NULL) && (!entry_ptr->flush_me_last)) {
+
+ haddr_t nominated_addr;
+
+ HDassert(!(entry_ptr->is_protected));
+ HDassert(!(entry_ptr->is_read_only));
+ HDassert(entry_ptr->ro_ref_count == 0);
+ HDassert(entry_ptr->is_dirty);
+ HDassert((!cache_ptr->slist_enabled) || (entry_ptr->in_slist));
+
+ nominated_addr = entry_ptr->addr;
+
+ if (H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ entry_ptr = entry_ptr->aux_prev;
+
+ } /* end while */
+
+ HDassert((!cache_ptr->slist_enabled) || (nominated_entries_count <= cache_ptr->slist_len));
+ HDassert(nominated_entries_size <= cache_ptr->dirty_index_size);
+ HDassert(nominated_entries_size >= space_needed);
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_construct_candidate_list__min_clean() */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_mark_entries_as_clean
+ *
+ * Purpose: When the H5C code is used to implement the metadata caches
+ * in PHDF5, only the cache with MPI_rank 0 is allowed to
+ * actually write entries to disk -- all other caches must
+ * retain dirty entries until they are advised that the
+ * entries are clean.
+ *
+ * This function exists to allow the H5C code to receive these
+ * notifications.
+ *
+ * The function receives a list of entry base addresses
+ * which must refer to dirty entries in the cache. If any
+ * of the entries are either clean or don't exist, the
+ * function flags an error.
+ *
+ * The function scans the list of entries and flushes all
+ * those that are currently unprotected with the
+ * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently
+ * protected are flagged for clearing when they are
+ * unprotected.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 7/5/05
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_mark_entries_as_clean(H5F_t *f, unsigned ce_array_len, haddr_t *ce_array_ptr)
+{
+ H5C_t *cache_ptr;
+ unsigned entries_cleared;
+ unsigned pinned_entries_cleared;
+ hbool_t progress;
+ unsigned entries_examined;
+ unsigned initial_list_len;
+ haddr_t addr;
+ unsigned pinned_entries_marked = 0;
+#ifdef H5C_DO_SANITY_CHECKS
+ unsigned protected_entries_marked = 0;
+ unsigned other_entries_marked = 0;
+ haddr_t last_addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+ H5C_cache_entry_t *clear_ptr = NULL;
+ H5C_cache_entry_t *entry_ptr = NULL;
+ unsigned u;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert(f);
+ HDassert(f->shared);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+
+ HDassert(ce_array_len > 0);
+ HDassert(ce_array_ptr != NULL);
+
+#ifdef H5C_DO_EXTREME_SANITY_CHECKS
+ if (H5C_validate_protected_entry_list(cache_ptr) < 0 || H5C_validate_pinned_entry_list(cache_ptr) < 0 ||
+ H5C_validate_lru_list(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed on entry")
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+ for (u = 0; u < ce_array_len; u++) {
+ addr = ce_array_ptr[u];
+
+#ifdef H5C_DO_SANITY_CHECKS
+ if (u == 0)
+ last_addr = addr;
+ else {
+ if (last_addr == addr)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list")
+ if (last_addr > addr)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "cleaned list not sorted")
+ } /* end else */
+
+#ifdef H5C_DO_EXTREME_SANITY_CHECKS
+ if (H5C_validate_protected_entry_list(cache_ptr) < 0 ||
+ H5C_validate_pinned_entry_list(cache_ptr) < 0 || H5C_validate_lru_list(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed in for loop")
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ HDassert(H5F_addr_defined(addr));
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
+
+ if (entry_ptr == NULL) {
+#ifdef H5C_DO_SANITY_CHECKS
+ HDfprintf(stdout, "H5C_mark_entries_as_clean: entry[%u] = %" PRIuHADDR " not in cache.\n", u,
+ addr);
+#endif /* H5C_DO_SANITY_CHECKS */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not in cache?!?!?")
+ } /* end if */
+ else if (!entry_ptr->is_dirty) {
+#ifdef H5C_DO_SANITY_CHECKS
+ HDfprintf(stdout, "H5C_mark_entries_as_clean: entry %" PRIuHADDR " is not dirty!?!\n", addr);
+#endif /* H5C_DO_SANITY_CHECKS */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?")
+ } /* end else-if */
+ else {
+ /* Mark the entry to be cleared on unprotect. We will
+ * scan the LRU list shortly, and clear all those entries
+ * not currently protected.
+ */
+
+ /* Make sure first that we clear the collective flag from
+ it so it can be cleared */
+ if (TRUE == entry_ptr->coll_access) {
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ } /* end if */
+
+ entry_ptr->clear_on_unprotect = TRUE;
+ if (entry_ptr->is_pinned)
+ pinned_entries_marked++;
+#ifdef H5C_DO_SANITY_CHECKS
+ else if (entry_ptr->is_protected)
+ protected_entries_marked++;
+ else
+ other_entries_marked++;
+#endif /* H5C_DO_SANITY_CHECKS */
+ }
+ }
+
+ /* Scan through the LRU list from back to front, and flush the
+ * entries whose clear_on_unprotect flags are set. Observe that
+ * any protected entries will not be on the LRU, and therefore
+ * will not be flushed at this time.
+ *
+ * Note that unlike H5C_apply_candidate_list(),
+ * H5C_mark_entries_as_clean() makes all its calls to
+ * H5C__flush_single_entry() with the H5C__FLUSH_CLEAR_ONLY_FLAG
+ * set. As a result, the pre_serialize() and serialize calls are
+ * not made.
+ *
+ * This then implies that (assuming such actions were
+ * permitted in the parallel case) no loads, dirties,
+ * resizes, or removals of other entries can occur as
+ * a side effect of the flush. Hence, there is no need
+ * for the checks for entry removal / status change
+ * that I ported to H5C_apply_candidate_list().
+ *
+ * However, if (in addition to allowing such operations
+ * in the parallel case), we allow such operations outside
+ * of the pre_serialize / serialize routines, this may
+ * cease to be the case -- requiring a review of this
+ * point.
+ * JRM -- 4/7/15
+ */
+ entries_cleared = 0;
+ entries_examined = 0;
+ initial_list_len = cache_ptr->LRU_list_len;
+ entry_ptr = cache_ptr->LRU_tail_ptr;
+ while (entry_ptr != NULL && entries_examined <= initial_list_len && entries_cleared < ce_array_len) {
+ if (entry_ptr->clear_on_unprotect) {
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->prev;
+ entries_cleared++;
+
+ if (H5C__flush_single_entry(f, clear_ptr,
+ (H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__GENERATE_IMAGE_FLAG |
+ H5C__UPDATE_PAGE_BUFFER_FLAG)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear entry")
+ } /* end if */
+ else
+ entry_ptr = entry_ptr->prev;
+ entries_examined++;
+ } /* end while */
+
+#ifdef H5C_DO_SANITY_CHECKS
+ HDassert(entries_cleared == other_entries_marked);
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ /* It is also possible that some of the cleared entries are on the
+ * pinned list. Must scan that also.
+ */
+ pinned_entries_cleared = 0;
+ progress = TRUE;
+ while ((pinned_entries_cleared < pinned_entries_marked) && progress) {
+ progress = FALSE;
+ entry_ptr = cache_ptr->pel_head_ptr;
+ while (entry_ptr != NULL) {
+ if (entry_ptr->clear_on_unprotect && entry_ptr->flush_dep_ndirty_children == 0) {
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->next;
+ entries_cleared++;
+ pinned_entries_cleared++;
+ progress = TRUE;
+
+ if (H5C__flush_single_entry(f, clear_ptr,
+ (H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__GENERATE_IMAGE_FLAG |
+ H5C__UPDATE_PAGE_BUFFER_FLAG)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear entry")
+ } /* end if */
+ else
+ entry_ptr = entry_ptr->next;
+ } /* end while */
+ } /* end while */
+
+#ifdef H5C_DO_SANITY_CHECKS
+ HDassert(entries_cleared == pinned_entries_marked + other_entries_marked);
+ HDassert(entries_cleared + protected_entries_marked == ce_array_len);
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ HDassert((entries_cleared == ce_array_len) || ((ce_array_len - entries_cleared) <= cache_ptr->pl_len));
+
+#ifdef H5C_DO_SANITY_CHECKS
+ u = 0;
+ entry_ptr = cache_ptr->pl_head_ptr;
+ while (entry_ptr != NULL) {
+ if (entry_ptr->clear_on_unprotect) {
+
+ u++;
+ }
+ entry_ptr = entry_ptr->next;
+ }
+ HDassert((entries_cleared + u) == ce_array_len);
+#endif /* H5C_DO_SANITY_CHECKS */
+
+done:
+#ifdef H5C_DO_EXTREME_SANITY_CHECKS
+ if (H5C_validate_protected_entry_list(cache_ptr) < 0 || H5C_validate_pinned_entry_list(cache_ptr) < 0 ||
+ H5C_validate_lru_list(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed on exit")
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_mark_entries_as_clean() */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_clear_coll_entries
+ *
+ * Purpose: Clear half or the entire list of collective entries and
+ * mark them as independent.
+ *
+ * Return: FAIL if error is detected, SUCCEED otherwise.
+ *
+ * Programmer: Mohamad Chaarawi
+ * April, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_clear_coll_entries(H5C_t *cache_ptr, hbool_t partial)
+{
+ uint32_t clear_cnt;
+ H5C_cache_entry_t *entry_ptr = NULL;
+ herr_t ret_value = SUCCEED;
+
+#ifdef H5C_DO_SANITY_CHECKS
+ FUNC_ENTER_NOAPI_NOINIT
+#else
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+#endif
+
+ entry_ptr = cache_ptr->coll_tail_ptr;
+ clear_cnt = (partial ? cache_ptr->coll_list_len / 2 : cache_ptr->coll_list_len);
+ while (entry_ptr && clear_cnt > 0) {
+ H5C_cache_entry_t *prev_ptr = entry_ptr->coll_prev;
+
+ /* Sanity check */
+ HDassert(entry_ptr->coll_access);
+
+ /* Mark entry as independent */
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+
+ /* Decrement entry count */
+ clear_cnt--;
+
+ /* Advance to next entry */
+ entry_ptr = prev_ptr;
+ } /* end while */
+
+#ifdef H5C_DO_SANITY_CHECKS
+done:
+#endif /* H5C_DO_SANITY_CHECKS */
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_clear_coll_entries */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C__collective_write
+ *
+ * Purpose: Perform a collective write of a list of metadata entries.
+ *
+ * Return: FAIL if error is detected, SUCCEED otherwise.
+ *
+ * Programmer: Mohamad Chaarawi
+ * February, 2016
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5C__collective_write(H5F_t *f)
+{
+ H5AC_t *cache_ptr;
+ H5FD_mpio_xfer_t orig_xfer_mode = H5FD_MPIO_COLLECTIVE;
+ const void **bufs = NULL;
+ H5FD_mem_t *types = NULL;
+ haddr_t *addrs = NULL;
+ size_t *sizes = NULL;
+ uint32_t count32;
+ size_t count;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->coll_write_list != NULL);
+
+ /* Get original transfer mode */
+ if (H5CX_get_io_xfer_mode(&orig_xfer_mode) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
+
+ /* Set transfer mode */
+ if (H5CX_set_io_xfer_mode(H5FD_MPIO_COLLECTIVE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode")
+
+ /* Get number of entries in collective write list */
+ count = H5SL_count(cache_ptr->coll_write_list);
+ H5_CHECKED_ASSIGN(count32, uint32_t, count, size_t);
+
+ if (count > 0) {
+ H5SL_node_t *node;
+ H5C_cache_entry_t *entry_ptr;
+ void *base_buf;
+ int i;
+
+ if (NULL == (addrs = H5MM_malloc(count * sizeof(*addrs))))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "couldn't allocate address array")
+ if (NULL == (sizes = H5MM_malloc(count * sizeof(*sizes))))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "couldn't allocate sizes array")
+ if (NULL == (bufs = H5MM_malloc(count * sizeof(*bufs))))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "couldn't allocate buffers array")
+ if (NULL == (types = H5MM_malloc(count * sizeof(*types))))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "couldn't allocate types array")
+
+ /* Fill arrays */
+ node = H5SL_first(cache_ptr->coll_write_list);
+ HDassert(node);
+ if (NULL == (entry_ptr = (H5C_cache_entry_t *)H5SL_item(node)))
+ HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item")
+
+ /* Set up initial array position & buffer base address */
+ HDassert(entry_ptr->type);
+ base_buf = entry_ptr->image_ptr;
+ addrs[0] = entry_ptr->addr;
+ sizes[0] = entry_ptr->size;
+ bufs[0] = base_buf;
+ types[0] = entry_ptr->type->mem_type;
+
+ node = H5SL_next(node);
+ i = 1;
+ while (node) {
+ if (NULL == (entry_ptr = (H5C_cache_entry_t *)H5SL_item(node)))
+ HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item")
+
+ /* Set up array position */
+ HDassert(entry_ptr->type);
+ addrs[i] = entry_ptr->addr;
+ sizes[i] = entry_ptr->size;
+ bufs[i] = entry_ptr->image_ptr;
+ types[i] = entry_ptr->type->mem_type;
+
+ /* Advance to next node & array location */
+ node = H5SL_next(node);
+ i++;
+ } /* end while */
+ } /* end if */
+
+ /* Pass buf type, file type to the file driver */
+ if (H5CX_set_mpi_coll_datatypes(MPI_BYTE, MPI_BYTE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O properties")
+
+ /* Make vector write call */
+ if (H5F_shared_vector_write(H5F_SHARED(f), count32, types, addrs, sizes, bufs) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_WRITEERROR, FAIL, "unable to write entries")
+
+done:
+ H5MM_xfree(types);
+ H5MM_xfree(bufs);
+ H5MM_xfree(sizes);
+ H5MM_xfree(addrs);
+
+ /* Reset transfer mode in API context, if changed */
+ if (orig_xfer_mode != H5FD_MPIO_COLLECTIVE)
+ if (H5CX_set_io_xfer_mode(orig_xfer_mode) < 0)
+ HDONE_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode")
+
+ FUNC_LEAVE_NOAPI(ret_value);
+} /* end H5C__collective_write() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5C__flush_candidate_entries
+ *
+ * Purpose: Flush or clear (as indicated) the candidate entries that
+ * have been marked in the metadata cache. In so doing,
+ * observe rings and flush dependencies.
+ *
+ * Note that this function presumes that:
+ *
+ * 1) no candidate entries are protected,
+ *
+ * 2) all candidate entries are dirty, and
+ *
+ * 3) if a candidate entry has a dirty flush dependency
+ * child, that child is also a candidate entry.
+ *
+ * The function will fail if any of these preconditions are
+ * not met.
+ *
+ * Candidate entries are marked by setting either the
+ * flush_immediately or the clear_on_unprotect flags in the
+ * cache entry (but not both). Entries marked flush_immediately
+ * will be flushed, those marked clear_on_unprotect will be
+ * cleared.
+ *
+ * Note that this function is a modified version of
+ * H5C_flush_cache() -- any changes there may need to be
+ * reflected here and vice versa.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer
+ * 2/10/17
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5C__flush_candidate_entries(H5F_t *f, unsigned entries_to_flush[H5C_RING_NTYPES],
+ unsigned entries_to_clear[H5C_RING_NTYPES])
+{
+#ifdef H5C_DO_SANITY_CHECKS
+ int i;
+ uint32_t index_len = 0;
+ size_t index_size = (size_t)0;
+ size_t clean_index_size = (size_t)0;
+ size_t dirty_index_size = (size_t)0;
+ size_t slist_size = (size_t)0;
+ uint32_t slist_len = 0;
+#endif /* H5C_DO_SANITY_CHECKS */
+ H5C_ring_t ring;
+ H5C_t *cache_ptr;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_PACKAGE
+
+ HDassert(f);
+ HDassert(f->shared);
+
+ cache_ptr = f->shared->cache;
+
+ HDassert(cache_ptr);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ HDassert(cache_ptr->slist_ptr);
+
+ HDassert(entries_to_flush[H5C_RING_UNDEFINED] == 0);
+ HDassert(entries_to_clear[H5C_RING_UNDEFINED] == 0);
+
+#ifdef H5C_DO_SANITY_CHECKS
+ HDassert(cache_ptr->index_ring_len[H5C_RING_UNDEFINED] == 0);
+ HDassert(cache_ptr->index_ring_size[H5C_RING_UNDEFINED] == (size_t)0);
+ HDassert(cache_ptr->clean_index_ring_size[H5C_RING_UNDEFINED] == (size_t)0);
+ HDassert(cache_ptr->dirty_index_ring_size[H5C_RING_UNDEFINED] == (size_t)0);
+ HDassert(cache_ptr->slist_ring_len[H5C_RING_UNDEFINED] == 0);
+ HDassert(cache_ptr->slist_ring_size[H5C_RING_UNDEFINED] == (size_t)0);
+
+ for (i = H5C_RING_USER; i < H5C_RING_NTYPES; i++) {
+ index_len += cache_ptr->index_ring_len[i];
+ index_size += cache_ptr->index_ring_size[i];
+ clean_index_size += cache_ptr->clean_index_ring_size[i];
+ dirty_index_size += cache_ptr->dirty_index_ring_size[i];
+
+ slist_len += cache_ptr->slist_ring_len[i];
+ slist_size += cache_ptr->slist_ring_size[i];
+ } /* end for */
+
+ HDassert(cache_ptr->index_len == index_len);
+ HDassert(cache_ptr->index_size == index_size);
+ HDassert(cache_ptr->clean_index_size == clean_index_size);
+ HDassert(cache_ptr->dirty_index_size == dirty_index_size);
+ HDassert(cache_ptr->slist_len == slist_len);
+ HDassert(cache_ptr->slist_size == slist_size);
+#endif /* H5C_DO_SANITY_CHECKS */
+
+#ifdef H5C_DO_EXTREME_SANITY_CHECKS
+ if (H5C_validate_protected_entry_list(cache_ptr) < 0 || H5C_validate_pinned_entry_list(cache_ptr) < 0 ||
+ H5C_validate_lru_list(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed on entry")
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+ cache_ptr->flush_in_progress = TRUE;
+
+ /* flush each ring, starting from the outermost ring and
+ * working inward.
+ */
+ ring = H5C_RING_USER;
+ while (ring < H5C_RING_NTYPES) {
+ if (H5C__flush_candidates_in_ring(f, ring, entries_to_flush[ring], entries_to_clear[ring]) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "flush candidates in ring failed")
+
+ ring++;
+ } /* end while */
+
+done:
+ cache_ptr->flush_in_progress = FALSE;
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C__flush_candidate_entries() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5C__flush_candidates_in_ring
+ *
+ * Purpose: Flush or clear (as indicated) the candidate entries
+ * contained in the specified cache and ring. All candidate
+ * entries in rings outside the specified ring must have been
+ * flushed (or cleared) on entry.
+ *
+ * Note that this function presumes that:
+ *
+ * 1) no candidate entries are protected,
+ *
+ * 2) all candidate entries are dirty, and
+ *
+ * 3) if a candidate entry has a dirty flush dependency
+ * child, that child is also a candidate entry.
+ *
+ * The function will fail if any of these preconditions are
+ * not met.
+ *
+ * Candidate entries are marked by setting either the
+ * flush_immediately or the clear_on_unprotect flags in the
+ * cache entry (but not both). Entries marked flush_immediately
+ * will be flushed, those marked clear_on_unprotect will be
+ * cleared.
+ *
+ * Candidate entries residing in the LRU must be flushed
+ * (or cleared) in LRU order to avoid performance issues.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer
+ * 2/10/17
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5C__flush_candidates_in_ring(H5F_t *f, H5C_ring_t ring, unsigned entries_to_flush, unsigned entries_to_clear)
+{
+ H5C_t *cache_ptr;
+ hbool_t progress;
+ hbool_t restart_scan = FALSE;
+ unsigned entries_flushed = 0;
+ unsigned entries_cleared = 0;
+#ifdef H5C_DO_SANITY_CHECKS
+ unsigned init_index_len;
+#endif /* H5C_DO_SANITY_CHECKS */
+ unsigned clear_flags =
+ H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__GENERATE_IMAGE_FLAG | H5C__UPDATE_PAGE_BUFFER_FLAG;
+ unsigned flush_flags = H5C__NO_FLAGS_SET;
+ unsigned op_flags;
+ H5C_cache_entry_t *op_ptr;
+ H5C_cache_entry_t *entry_ptr;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ HDassert(cache_ptr->slist_ptr);
+ HDassert(ring > H5C_RING_UNDEFINED);
+ HDassert(ring < H5C_RING_NTYPES);
+
+#ifdef H5C_DO_EXTREME_SANITY_CHECKS
+ if ((H5C_validate_protected_entry_list(cache_ptr) < 0) ||
+ (H5C_validate_pinned_entry_list(cache_ptr) < 0) || (H5C_validate_lru_list(cache_ptr) < 0))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed on entry")
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+#ifdef H5C_DO_SANITY_CHECKS
+ /* index len should not change */
+ init_index_len = cache_ptr->index_len;
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ /* Examine entries in the LRU list, and flush or clear all entries
+ * so marked in the target ring.
+ *
+ * With the current implementation of flush dependencies, no entry
+ * in the LRU can have flush dependency children -- thus one pass
+ * through the LRU will be sufficient.
+ *
+ * It is possible that this will change -- hence the assertion.
+ */
+ restart_scan = FALSE;
+ entry_ptr = cache_ptr->LRU_tail_ptr;
+ while (((entries_flushed < entries_to_flush) || (entries_cleared < entries_to_clear)) &&
+ (entry_ptr != NULL)) {
+ hbool_t prev_is_dirty = FALSE;
+ H5C_cache_entry_t *next_ptr;
+
+ /* Entries in the LRU must not have flush dependency children */
+ HDassert(entry_ptr->flush_dep_nchildren == 0);
+
+ /* Remember dirty state of entry to advance to */
+ if (entry_ptr->prev != NULL)
+ prev_is_dirty = entry_ptr->prev->is_dirty;
+
+ /* If the entry is in the ring */
+ if (entry_ptr->ring == ring) {
+ /* If this process needs to clear this entry. */
+ if (entry_ptr->clear_on_unprotect) {
+ HDassert(entry_ptr->is_dirty);
+
+ /* Set entry and flags for operation */
+ op_ptr = entry_ptr;
+ op_flags = clear_flags;
+
+ /* Set next entry appropriately */
+ next_ptr = entry_ptr->next;
+
+ /* Reset entry flag */
+ entry_ptr->clear_on_unprotect = FALSE;
+ entries_cleared++;
+ } /* end if */
+ else if (entry_ptr->flush_immediately) {
+ HDassert(entry_ptr->is_dirty);
+
+ /* Set entry and flags for operation */
+ op_ptr = entry_ptr;
+ op_flags = flush_flags;
+
+ /* Set next entry appropriately */
+ next_ptr = entry_ptr->next;
+
+ /* Reset entry flag */
+ entry_ptr->flush_immediately = FALSE;
+ entries_flushed++;
+ } /* end else-if */
+ else {
+ /* No operation for this entry */
+ op_ptr = NULL;
+
+ /* Set next entry appropriately */
+ next_ptr = entry_ptr;
+ } /* end else */
+
+ /* Advance to next entry */
+ entry_ptr = entry_ptr->prev;
+
+ /* Check for operation */
+ if (op_ptr) {
+ /* reset entries_removed_counter and
+ * last_entry_removed_ptr prior to the call to
+ * H5C__flush_single_entry() so that we can spot
+ * unexpected removals of entries from the cache,
+ * and set the restart_scan flag if proceeding
+ * would be likely to cause us to scan an entry
+ * that is no longer in the cache.
+ *
+ * Note that as of this writing, this
+ * case cannot occur in the parallel case.
+ *
+ * Note also that there is no test code to verify
+ * that this code actually works (although similar code
+ * in the serial version exists and is tested).
+ */
+ cache_ptr->entries_removed_counter = 0;
+ cache_ptr->last_entry_removed_ptr = NULL;
+
+ if (H5C__flush_single_entry(f, op_ptr, op_flags) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't flush entry")
+
+ if (cache_ptr->entries_removed_counter != 0 || cache_ptr->last_entry_removed_ptr != NULL)
+ restart_scan = TRUE;
+ } /* end if */
+ } /* end if */
+ else {
+ /* Remember "next" pointer (after advancing entries) */
+ next_ptr = entry_ptr;
+
+ /* Advance to next entry */
+ entry_ptr = entry_ptr->prev;
+ } /* end else */
+
+ /* Check for restarts, etc. */
+ if ((entry_ptr != NULL) &&
+ (restart_scan || (entry_ptr->is_dirty != prev_is_dirty) || (entry_ptr->next != next_ptr) ||
+ entry_ptr->is_protected || entry_ptr->is_pinned)) {
+
+ /* Something has happened to the LRU -- start over
+ * from the tail.
+ *
+ * Recall that this code should be un-reachable at present,
+ * as all the operations by entries on flush that could cause
+ * it to be reachable are disallowed in the parallel case at
+ * present. Hence the following assertion which should be
+ * removed if the above changes.
+ */
+ HDassert(!restart_scan);
+ HDassert(entry_ptr->is_dirty == prev_is_dirty);
+ HDassert(entry_ptr->next == next_ptr);
+ HDassert(!entry_ptr->is_protected);
+ HDassert(!entry_ptr->is_pinned);
+
+ HDassert(FALSE); /* see comment above */
+
+ restart_scan = FALSE;
+ entry_ptr = cache_ptr->LRU_tail_ptr;
+
+ H5C__UPDATE_STATS_FOR_LRU_SCAN_RESTART(cache_ptr)
+ } /* end if */
+ } /* end while */
+
+ /* It is also possible that some of the cleared entries are on the
+ * pinned list. Must scan that also.
+ *
+ * Observe that in the case of the pinned entry list, most of the
+ * entries will have flush dependency children. As entries with
+ * flush dependency children may not be flushed until all of their
+ * children are clean, multiple passes throguh the pinned entry list
+ * may be required.
+ *
+ * WARNING:
+ *
+ * As we now allow unpinning, and removal of other entries as a side
+ * effect of flushing an entry, it is possible that the next entry
+ * in a PEL scan could either be no longer pinned, or no longer in
+ * the cache by the time we get to it.
+ *
+ * At present, this should not be possible in this case, as we disallow
+ * such operations in the parallel version of the library. However,
+ * this may change, and to that end, I have included code to detect
+ * such changes and cause this function to fail if they are detected.
+ */
+ progress = TRUE;
+ while (progress && ((entries_flushed < entries_to_flush) || (entries_cleared < entries_to_clear))) {
+ progress = FALSE;
+ entry_ptr = cache_ptr->pel_head_ptr;
+ while ((entry_ptr != NULL) &&
+ ((entries_flushed < entries_to_flush) || (entries_cleared < entries_to_clear))) {
+ H5C_cache_entry_t *prev_ptr;
+ hbool_t next_is_dirty = FALSE;
+
+ HDassert(entry_ptr->is_pinned);
+
+ /* Remember dirty state of entry to advance to */
+ if (entry_ptr->next != NULL)
+ next_is_dirty = entry_ptr->next->is_dirty;
+
+ if (entry_ptr->ring == ring && entry_ptr->flush_dep_ndirty_children == 0) {
+ if (entry_ptr->clear_on_unprotect) {
+ HDassert(entry_ptr->is_dirty);
+
+ /* Set entry and flags for operation */
+ op_ptr = entry_ptr;
+ op_flags = clear_flags;
+
+ /* Reset entry flag */
+ entry_ptr->clear_on_unprotect = FALSE;
+ entries_cleared++;
+ progress = TRUE;
+ } /* end if */
+ else if (entry_ptr->flush_immediately) {
+ HDassert(entry_ptr->is_dirty);
+
+ /* Set entry and flags for operation */
+ op_ptr = entry_ptr;
+ op_flags = flush_flags;
+
+ /* Reset entry flag */
+ entry_ptr->flush_immediately = FALSE;
+ entries_flushed++;
+ progress = TRUE;
+ } /* end else-if */
+ else
+ /* No operation for this entry */
+ op_ptr = NULL;
+
+ /* Check for operation */
+ if (op_ptr) {
+ /* reset entries_removed_counter and
+ * last_entry_removed_ptr prior to the call to
+ * H5C__flush_single_entry() so that we can spot
+ * unexpected removals of entries from the cache,
+ * and set the restart_scan flag if proceeding
+ * would be likely to cause us to scan an entry
+ * that is no longer in the cache.
+ *
+ * Note that as of this writing, this
+ * case cannot occur in the parallel case.
+ *
+ * Note also that there is no test code to verify
+ * that this code actually works (although similar code
+ * in the serial version exists and is tested).
+ */
+ cache_ptr->entries_removed_counter = 0;
+ cache_ptr->last_entry_removed_ptr = NULL;
+
+ /* Add this entry to the list of entries to collectively write
+ *
+ * This comment is misleading -- the entry will be added to the
+ * collective write list only if said list exists.
+ *
+ * JRM -- 2/9/17
+ */
+ if (H5C__flush_single_entry(f, op_ptr, op_flags) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't flush entry")
+
+ if (cache_ptr->entries_removed_counter != 0 || cache_ptr->last_entry_removed_ptr != NULL)
+ restart_scan = TRUE;
+ } /* end if */
+ } /* end if */
+
+ /* Remember "previous" pointer (after advancing entries) */
+ prev_ptr = entry_ptr;
+
+ /* Advance to next entry */
+ entry_ptr = entry_ptr->next;
+
+ /* Check for restarts, etc. */
+ if ((entry_ptr != NULL) &&
+ (restart_scan || (entry_ptr->is_dirty != next_is_dirty) || (entry_ptr->prev != prev_ptr) ||
+ entry_ptr->is_protected || !entry_ptr->is_pinned)) {
+ /* Something has happened to the pinned entry list -- start
+ * over from the head.
+ *
+ * Recall that this code should be un-reachable at present,
+ * as all the operations by entries on flush that could cause
+ * it to be reachable are disallowed in the parallel case at
+ * present. Hence the following assertion which should be
+ * removed if the above changes.
+ */
+
+ HDassert(!restart_scan);
+ HDassert(entry_ptr->is_dirty == next_is_dirty);
+ HDassert(entry_ptr->prev == prev_ptr);
+ HDassert(!entry_ptr->is_protected);
+ HDassert(entry_ptr->is_pinned);
+
+ HDassert(FALSE); /* see comment above */
+
+ restart_scan = FALSE;
+
+ entry_ptr = cache_ptr->pel_head_ptr;
+
+ /* we don't keeps stats for pinned entry list scan
+ * restarts. If this code ever becomes reachable,
+ * define the necessary field, and implement the
+ * the following macro:
+ *
+ * H5C__UPDATE_STATS_FOR_PEL_SCAN_RESTART(cache_ptr)
+ */
+ } /* end if */
+ } /* end while ( ( entry_ptr != NULL ) &&
+ * ( ( entries_flushed > entries_to_flush ) ||
+ * ( entries_cleared > entries_to_clear ) ) )
+ */
+ } /* end while ( ( ( entries_flushed > entries_to_flush ) ||
+ * ( entries_cleared > entries_to_clear ) ) &&
+ * ( progress ) )
+ */
+
+#ifdef H5C_DO_SANITY_CHECKS
+ HDassert(init_index_len == cache_ptr->index_len);
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ if (entries_flushed != entries_to_flush || entries_cleared != entries_to_clear) {
+ entry_ptr = cache_ptr->il_head;
+ while (entry_ptr != NULL) {
+ HDassert(!entry_ptr->clear_on_unprotect || (entry_ptr->ring > ring));
+ HDassert(!entry_ptr->flush_immediately || (entry_ptr->ring > ring));
+ entry_ptr = entry_ptr->il_next;
+ } /* end while */
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't flush/clear all entries")
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C__flush_candidates_in_ring() */
+#endif /* H5_HAVE_PARALLEL */