summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2015-06-27 16:45:21 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2015-06-27 16:45:21 (GMT)
commit8e94745298c2ceacf80b21f0d16e887217527916 (patch)
treeddd75b1badbe9053c60e214673feffa382f4a4f4 /src
parent58a95d0d4a593157bb7e77f71347ee50783a11a0 (diff)
downloadhdf5-8e94745298c2ceacf80b21f0d16e887217527916.zip
hdf5-8e94745298c2ceacf80b21f0d16e887217527916.tar.gz
hdf5-8e94745298c2ceacf80b21f0d16e887217527916.tar.bz2
[svn-r27293] Description:
Split parallel metadata cache code into separate source code modules. Tested on: MacOSX/64 10.10.3 (amazon) w/serial & parallel (too minor for h5committest)
Diffstat (limited to 'src')
-rw-r--r--src/H5AC.c2256
-rw-r--r--src/H5ACmpio.c2295
-rw-r--r--src/H5ACpkg.h25
-rw-r--r--src/H5C.c1304
-rw-r--r--src/H5Cmpio.c1260
-rw-r--r--src/H5Cpkg.h4
-rw-r--r--src/H5Cprivate.h4
-rw-r--r--src/Makefile.am6
-rw-r--r--src/Makefile.in10
9 files changed, 3658 insertions, 3506 deletions
diff --git a/src/H5AC.c b/src/H5AC.c
index c6b0b47..e6218d1 100644
--- a/src/H5AC.c
+++ b/src/H5AC.c
@@ -44,13 +44,10 @@
#include "H5private.h" /* Generic Functions */
#include "H5ACpkg.h" /* Metadata cache */
#include "H5Cpkg.h" /* Cache */
-#include "H5Dprivate.h" /* Dataset functions */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fpkg.h" /* Files */
#include "H5FDprivate.h" /* File drivers */
-#include "H5FLprivate.h" /* Free Lists */
#include "H5Iprivate.h" /* IDs */
-#include "H5MMprivate.h" /* Memory management */
#include "H5Pprivate.h" /* Property lists */
@@ -63,38 +60,6 @@
/* Local Typedefs */
/******************/
-#ifdef H5_HAVE_PARALLEL
-/****************************************************************************
- *
- * structure H5AC_slist_entry_t
- *
- * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t
- * and the cleaned entry list maintained via the c_slist_ptr field of
- * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned
- * entries. Unfortunately, the slist code makes us define a dynamically
- * allocated structure to store these offsets in. This structure serves
- * that purpose. Its fields are as follows:
- *
- * addr: file offset of a metadata entry. Entries are added to this
- * list (if they aren't there already) when they are marked
- * dirty in an unprotect, inserted, or moved. They are
- * removed when they appear in a clean entries broadcast.
- *
- ****************************************************************************/
-typedef struct H5AC_slist_entry_t
-{
- haddr_t addr;
-} H5AC_slist_entry_t;
-
-/* User data for address list building callbacks */
-typedef struct H5AC_addr_list_ud_t
-{
- H5AC_aux_t * aux_ptr; /* 'Auxiliary' parallel cache info */
- haddr_t * addr_buf_ptr; /* Array to store addresses */
- int i; /* Counter for position in array */
-} H5AC_addr_list_ud_t;
-#endif /* H5_HAVE_PARALLEL */
-
/********************/
/* Local Prototypes */
@@ -105,39 +70,6 @@ static herr_t H5AC__check_if_write_permitted(const H5F_t *f,
static herr_t H5AC__ext_config_2_int_config(H5AC_cache_config_t *ext_conf_ptr,
H5C_auto_size_ctl_t *int_conf_ptr);
-#ifdef H5_HAVE_PARALLEL
-static herr_t H5AC__broadcast_candidate_list(H5AC_t *cache_ptr,
- int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
-static herr_t H5AC__broadcast_clean_list(H5AC_t *cache_ptr);
-static herr_t H5AC__construct_candidate_list(H5AC_t *cache_ptr,
- H5AC_aux_t *aux_ptr, int sync_point_op);
-static herr_t H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr,
- int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
-static herr_t H5AC__flush_entries(H5F_t *f, hid_t dxpl_id);
-static herr_t H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr);
-static herr_t H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr);
-static herr_t H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr,
- hbool_t was_dirty, unsigned flags);
-static herr_t H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr);
-static herr_t H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr,
- haddr_t new_addr);
-static herr_t H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id);
-static herr_t H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f,
- hid_t dxpl_id);
-static herr_t H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr,
- haddr_t **haddr_buf_ptr_ptr);
-static herr_t H5AC__receive_candidate_list(const H5AC_t *cache_ptr,
- int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
-static herr_t H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id);
-static herr_t H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates,
- haddr_t *candidates_list_ptr);
-static herr_t H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id);
-static herr_t H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id);
-static herr_t H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id);
-static herr_t H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id);
-static herr_t H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op);
-#endif /* H5_HAVE_PARALLEL */
-
/*********************/
/* Package Variables */
@@ -164,14 +96,6 @@ hid_t H5AC_ind_dxpl_id = (-1);
/* Local Variables */
/*******************/
-#ifdef H5_HAVE_PARALLEL
-/* Declare a free list to manage the H5AC_aux_t struct */
-H5FL_DEFINE_STATIC(H5AC_aux_t);
-
-/* Declare a free list to manage the H5AC_slist_entry_t struct */
-H5FL_DEFINE_STATIC(H5AC_slist_entry_t);
-#endif /* H5_HAVE_PARALLEL */
-
static const char *H5AC_entry_type_names[H5AC_NTYPES] =
{
"B-tree nodes",
@@ -1490,78 +1414,6 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* H5AC_unprotect() */
-
-/*-------------------------------------------------------------------------
- * Function: HA5C_set_sync_point_done_callback
- *
- * Purpose: Set the value of the sync_point_done callback. This
- * callback is used by the parallel test code to verify
- * that the expected writes and only the expected writes
- * take place during a sync point.
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: John Mainzer
- * 5/9/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-herr_t
-H5AC_set_sync_point_done_callback(H5C_t * cache_ptr,
- void (* sync_point_done)(int num_writes, haddr_t * written_entries_tbl))
-{
- H5AC_aux_t * aux_ptr;
-
- FUNC_ENTER_NOAPI_NOINIT_NOERR
-
- /* Sanity checks */
- HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC));
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
-
- aux_ptr->sync_point_done = sync_point_done;
-
- FUNC_LEAVE_NOAPI(SUCCEED)
-} /* H5AC_set_sync_point_done_callback() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: HA5C_set_write_done_callback
- *
- * Purpose: Set the value of the write_done callback. This callback
- * is used to improve performance of the parallel test bed
- * for the cache.
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: John Mainzer
- * 5/11/06
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-herr_t
-H5AC_set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void))
-{
- H5AC_aux_t * aux_ptr;
-
- FUNC_ENTER_NOAPI_NOINIT_NOERR
-
- /* Sanity checks */
- HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC));
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert( aux_ptr != NULL );
- HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
-
- aux_ptr->write_done = write_done;
-
- FUNC_LEAVE_NOAPI(SUCCEED)
-} /* H5AC_set_write_done_callback() */
-#endif /* H5_HAVE_PARALLEL */
-
#ifndef NDEBUG /* debugging functions */
/*-------------------------------------------------------------------------
@@ -2119,64 +1971,6 @@ done:
} /* H5AC_open_trace_file() */
-/*-------------------------------------------------------------------------
- * Function: H5AC_add_candidate()
- *
- * Purpose: Add the supplied metadata entry address to the candidate
- * list. Verify that each entry added does not appear in
- * the list prior to its insertion.
- *
- * This function is intended for used in constructing list
- * of entried to be flushed during sync points. It shouldn't
- * be called anywhere else.
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: John Mainzer
- * 3/17/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-herr_t
-H5AC_add_candidate(H5AC_t * cache_ptr, haddr_t addr)
-{
- H5AC_aux_t * aux_ptr;
- H5AC_slist_entry_t * slist_entry_ptr = NULL;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_NOAPI(FAIL)
-
- /* Sanity checks */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
- HDassert(aux_ptr->candidate_slist_ptr != NULL);
-
- /* Construct an entry for the supplied address, and insert
- * it into the candidate slist.
- */
- if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate candidate slist entry")
- slist_entry_ptr->addr = addr;
-
- if(H5SL_insert(aux_ptr->candidate_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist")
-
-done:
- /* Clean up on error */
- if(ret_value < 0)
- if(slist_entry_ptr)
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC_add_candidate() */
-#endif /* H5_HAVE_PARALLEL */
-
-
/*************************************************************************/
/*************************** Debugging Functions: ************************/
/*************************************************************************/
@@ -2288,236 +2082,6 @@ done:
/**************************** Private Functions: *************************/
/*************************************************************************/
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__broadcast_candidate_list()
- *
- * Purpose: Broadcast the contents of the process 0 candidate entry
- * slist. In passing, also remove all entries from said
- * list. As the application of this will be handled by
- * the same functions on all processes, construct and
- * return a copy of the list in the same format as that
- * received by the other processes. Note that if this
- * copy is returned in *haddr_buf_ptr_ptr, the caller
- * must free it.
- *
- * This function must only be called by the process with
- * MPI_rank 0.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 7/1/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, int *num_entries_ptr,
- haddr_t **haddr_buf_ptr_ptr)
-{
- H5AC_aux_t * aux_ptr = NULL;
- haddr_t * haddr_buf_ptr = NULL;
- int mpi_result;
- int num_entries;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->mpi_rank == 0);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
- HDassert(aux_ptr->candidate_slist_ptr != NULL);
- HDassert(num_entries_ptr != NULL);
- HDassert(*num_entries_ptr == 0);
- HDassert(haddr_buf_ptr_ptr != NULL);
- HDassert(*haddr_buf_ptr_ptr == NULL);
-
- /* First broadcast the number of entries in the list so that the
- * receivers can set up buffers to receive them. If there aren't
- * any, we are done.
- */
- num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr);
- if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
-
- if(num_entries > 0) {
- size_t buf_size = 0;
- int chk_num_entries = 0;
-
- /* convert the candidate list into the format we
- * are used to receiving from process 0, and also load it
- * into a buffer for transmission.
- */
- if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
- HDassert(chk_num_entries == num_entries);
- HDassert(haddr_buf_ptr != NULL);
-
- /* Now broadcast the list of candidate entries */
- buf_size = sizeof(haddr_t) * (size_t)num_entries;
- if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
- } /* end if */
-
- /* Pass the number of entries and the buffer pointer
- * back to the caller. Do this so that we can use the same code
- * to apply the candidate list to all the processes.
- */
- *num_entries_ptr = num_entries;
- *haddr_buf_ptr_ptr = haddr_buf_ptr;
-
-done:
- if(ret_value < 0)
- if(haddr_buf_ptr)
- haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__broadcast_candidate_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__broadcast_clean_list_cb()
- *
- * Purpose: Skip list callback for building array of addresses for
- * broadcasting the clean list.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: Quincey Koziol, 6/12/15
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__broadcast_clean_list_cb(void *_item, void H5_ATTR_UNUSED *_key,
- void *_udata)
-{
- H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */
- H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */
- haddr_t addr;
-
- FUNC_ENTER_STATIC_NOERR
-
- /* Sanity checks */
- HDassert(slist_entry_ptr);
- HDassert(udata);
-
- /* Store the entry's address in the buffer */
- addr = slist_entry_ptr->addr;
- udata->addr_buf_ptr[udata->i] = addr;
- udata->i++;
-
- /* now release the entry */
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
-
- /* and also remove the matching entry from the dirtied list
- * if it exists.
- */
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(udata->aux_ptr->d_slist_ptr, (void *)(&addr))))
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
-
- FUNC_LEAVE_NOAPI(SUCCEED)
-} /* H5AC__broadcast_clean_list_cb() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__broadcast_clean_list()
- *
- * Purpose: Broadcast the contents of the process 0 cleaned entry
- * slist. In passing, also remove all entries from said
- * list, and also remove any matching entries from the dirtied
- * slist.
- *
- * This function must only be called by the process with
- * MPI_rank 0.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 7/1/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__broadcast_clean_list(H5AC_t * cache_ptr)
-{
- haddr_t * addr_buf_ptr = NULL;
- H5AC_aux_t * aux_ptr;
- int mpi_result;
- int num_entries = 0;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr;
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->mpi_rank == 0);
- HDassert(aux_ptr->c_slist_ptr != NULL);
-
- /* First broadcast the number of entries in the list so that the
- * receives can set up a buffer to receive them. If there aren't
- * any, we are done.
- */
- num_entries = (int)H5SL_count(aux_ptr->c_slist_ptr);
- if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
-
- if(num_entries > 0) {
- H5AC_addr_list_ud_t udata;
- size_t buf_size;
-
- /* allocate a buffer to store the list of entry base addresses in */
- buf_size = sizeof(haddr_t) * (size_t)num_entries;
- if(NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer")
-
- /* Set up user data for callback */
- udata.aux_ptr = aux_ptr;
- udata.addr_buf_ptr = addr_buf_ptr;
- udata.i = 0;
-
- /* Free all the clean list entries, building the address list in the callback */
- /* (Callback also removes the matching entries from the dirtied list) */
- if(H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries")
-
- /* Now broadcast the list of cleaned entries */
- if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)addr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
- } /* end if */
-
- /* if it is defined, call the sync point done callback. Note
- * that this callback is defined purely for testing purposes,
- * and should be undefined under normal operating circumstances.
- */
- if(aux_ptr->sync_point_done)
- (aux_ptr->sync_point_done)(num_entries, addr_buf_ptr);
-
-done:
- if(addr_buf_ptr)
- addr_buf_ptr = (haddr_t *)H5MM_xfree((void *)addr_buf_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__broadcast_clean_list() */
-#endif /* H5_HAVE_PARALLEL */
-
/*-------------------------------------------------------------------------
*
@@ -2575,202 +2139,6 @@ H5_ATTR_UNUSED
/*-------------------------------------------------------------------------
- * Function: H5AC__construct_candidate_list()
- *
- * Purpose: In the parallel case when the metadata_write_strategy is
- * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses
- * this function to construct the list of cache entries to
- * be flushed. This list is then propagated to the other
- * caches, and then flushed in a distributed fashion.
- *
- * The sync_point_op parameter is used to determine the extent
- * of the flush.
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: John Mainzer
- * 3/17/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t *aux_ptr,
- int sync_point_op)
-{
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
- HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE) || (aux_ptr->mpi_rank == 0));
- HDassert(aux_ptr->d_slist_ptr != NULL);
- HDassert(aux_ptr->c_slist_ptr != NULL);
- HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0);
- HDassert(aux_ptr->candidate_slist_ptr != NULL);
- HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0);
- HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || (sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE));
-
- switch(sync_point_op) {
- case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
- if(H5C_construct_candidate_list__min_clean((H5C_t *)cache_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__min_clean() failed.")
- break;
-
- case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
- if(H5C_construct_candidate_list__clean_cache((H5C_t *)cache_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__clean_cache() failed.")
- break;
-
- default:
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown sync point operation.")
- break;
- } /* end switch */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__construct_candidate_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__copy_candidate_list_to_buffer_cb
- *
- * Purpose: Skip list callback for building array of addresses for
- * broadcasting the candidate list.
- *
- * Return: Return SUCCEED on success, and FAIL on failure.
- *
- * Programmer: Quincey Koziol, 6/12/15
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__copy_candidate_list_to_buffer_cb(void *_item, void H5_ATTR_UNUSED *_key,
- void *_udata)
-{
- H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */
- H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */
-
- FUNC_ENTER_STATIC_NOERR
-
- /* Sanity checks */
- HDassert(slist_entry_ptr);
- HDassert(udata);
-
- /* Store the entry's address in the buffer */
- udata->addr_buf_ptr[udata->i] = slist_entry_ptr->addr;
- udata->i++;
-
- /* now release the entry */
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
-
- FUNC_LEAVE_NOAPI(SUCCEED)
-} /* H5AC__copy_candidate_list_to_buffer_cb() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__copy_candidate_list_to_buffer
- *
- * Purpose: Allocate buffer(s) and copy the contents of the candidate
- * entry slist into it (them). In passing, remove all
- * entries from the candidate slist. Note that the
- * candidate slist must not be empty.
- *
- * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer
- * of MPI_Offset, copy the contents of the candidate
- * entry list into it with the appropriate conversions,
- * and return the base address of the buffer in
- * *MPI_Offset_buf_ptr. Note that this is the buffer
- * used by process 0 to transmit the list of entries to
- * be flushed to all other processes (in this file group).
- *
- * Similarly, allocate a buffer of haddr_t, load the contents
- * of the candidate list into this buffer, and return its
- * base address in *haddr_buf_ptr_ptr. Note that this
- * latter buffer is constructed unconditionally.
- *
- * In passing, also remove all entries from the candidate
- * entry slist.
- *
- * Return: Return SUCCEED on success, and FAIL on failure.
- *
- * Programmer: John Mainzer, 4/19/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, int *num_entries_ptr,
- haddr_t **haddr_buf_ptr_ptr)
-{
- H5AC_aux_t * aux_ptr = NULL;
- H5AC_addr_list_ud_t udata;
- haddr_t * haddr_buf_ptr = NULL;
- size_t buf_size;
- int num_entries = 0;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
- HDassert(aux_ptr->candidate_slist_ptr != NULL);
- HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) > 0);
- HDassert(num_entries_ptr != NULL);
- HDassert(*num_entries_ptr == 0);
- HDassert(haddr_buf_ptr_ptr != NULL);
- HDassert(*haddr_buf_ptr_ptr == NULL);
-
- num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr);
-
- /* allocate a buffer(s) to store the list of candidate entry
- * base addresses in
- */
- buf_size = sizeof(haddr_t) * (size_t)num_entries;
- if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer")
-
- /* Set up user data for callback */
- udata.aux_ptr = aux_ptr;
- udata.addr_buf_ptr = haddr_buf_ptr;
- udata.i = 0;
-
- /* Free all the candidate list entries, building the address list in the callback */
- if(H5SL_free(aux_ptr->candidate_slist_ptr, H5AC__copy_candidate_list_to_buffer_cb, &udata) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for candidate entries")
-
- /* Pass the number of entries and the buffer pointer
- * back to the caller.
- */
- *num_entries_ptr = num_entries;
- *haddr_buf_ptr_ptr = haddr_buf_ptr;
-
-done:
- if(ret_value < 0)
- if(haddr_buf_ptr)
- haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__copy_candidate_list_to_buffer() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
* Function: H5AC__ext_config_2_int_config()
*
* Purpose: Utility function to translate an instance of
@@ -2836,1630 +2204,6 @@ done:
} /* H5AC__ext_config_2_int_config() */
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__log_deleted_entry()
- *
- * Purpose: Log an entry which has been deleted.
- *
- * Only called for mpi_rank 0. We must make sure that the entry
- * doesn't appear in the cleaned or dirty entry lists.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 6/29/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- H5AC_slist_entry_t * slist_entry_ptr = NULL;
- haddr_t addr;
-
- FUNC_ENTER_STATIC_NOERR
-
- /* Sanity checks */
- HDassert(entry_ptr);
- addr = entry_ptr->addr;
- cache_ptr = entry_ptr->cache_ptr;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->mpi_rank == 0);
- HDassert(aux_ptr->d_slist_ptr != NULL);
- HDassert(aux_ptr->c_slist_ptr != NULL);
-
- /* if the entry appears in the dirtied entry slist, remove it. */
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))))
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
-
- /* if the entry appears in the cleaned entry slist, remove it. */
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
-
- FUNC_LEAVE_NOAPI(SUCCEED)
-} /* H5AC__log_deleted_entry() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__log_dirtied_entry()
- *
- * Purpose: Update the dirty_bytes count for a newly dirtied entry.
- *
- * If mpi_rank isn't 0, this simply means adding the size
- * of the entries to the dirty_bytes count.
- *
- * If mpi_rank is 0, we must first check to see if the entry
- * appears in the dirty entries slist. If it is, do nothing.
- * If it isn't, add the size to th dirty_bytes count, add the
- * entry to the dirty entries slist, and remove it from the
- * cleaned list (if it is present there).
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 6/29/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(entry_ptr);
- HDassert(entry_ptr->is_dirty == FALSE);
- cache_ptr = entry_ptr->cache_ptr;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
-
- if(aux_ptr->mpi_rank == 0) {
- H5AC_slist_entry_t *slist_entry_ptr;
- haddr_t addr = entry_ptr->addr;
-
- /* Sanity checks */
- HDassert(aux_ptr->d_slist_ptr != NULL);
- HDassert(aux_ptr->c_slist_ptr != NULL);
-
- if(NULL == H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) {
- /* insert the address of the entry in the dirty entry list, and
- * add its size to the dirty_bytes count.
- */
- if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
- slist_entry_ptr->addr = addr;
-
- if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
-
- aux_ptr->dirty_bytes += entry_ptr->size;
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->unprotect_dirty_bytes += entry_ptr->size;
- aux_ptr->unprotect_dirty_bytes_updates += 1;
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
- } /* end if */
-
- /* the entry is dirty. If it exists on the cleaned entries list,
- * remove it.
- */
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
- } /* end if */
- else {
- aux_ptr->dirty_bytes += entry_ptr->size;
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->unprotect_dirty_bytes += entry_size;
- aux_ptr->unprotect_dirty_bytes_updates += 1;
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
- } /* end else */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__log_dirtied_entry() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__log_flushed_entry()
- *
- * Purpose: Update the clean entry slist for the flush of an entry --
- * specifically, if the entry has been cleared, remove it
- * from both the cleaned and dirtied lists if it is present.
- * Otherwise, if the entry was dirty, insert the indicated
- * entry address in the clean slist if it isn't there already.
- *
- * This function is only used in PHDF5, and should only
- * be called for the process with mpi rank 0.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 6/29/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr, hbool_t was_dirty,
- unsigned flags)
-{
- hbool_t cleared;
- H5AC_aux_t * aux_ptr;
- H5AC_slist_entry_t * slist_entry_ptr = NULL;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity check */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->mpi_rank == 0);
- HDassert(aux_ptr->c_slist_ptr != NULL);
-
- /* Set local flags */
- cleared = ((flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0);
-
- if(cleared) {
- /* If the entry has been cleared, must remove it from both the
- * cleaned list and the dirtied list.
- */
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))))
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
- } /* end if */
- else if(was_dirty) {
- if(NULL == H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) {
- /* insert the address of the entry in the clean entry list. */
- if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate clean slist entry .")
- slist_entry_ptr->addr = addr;
-
- if(H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into clean entry slist.")
- } /* end if */
- } /* end else-if */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__log_flushed_entry() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__log_inserted_entry()
- *
- * Purpose: Update the dirty_bytes count for a newly inserted entry.
- *
- * If mpi_rank isnt 0, this simply means adding the size
- * of the entry to the dirty_bytes count.
- *
- * If mpi_rank is 0, we must also add the entry to the
- * dirty entries slist.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 6/30/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(entry_ptr);
- cache_ptr = entry_ptr->cache_ptr;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
-
- if(aux_ptr->mpi_rank == 0) {
- H5AC_slist_entry_t *slist_entry_ptr;
-
- HDassert(aux_ptr->d_slist_ptr != NULL);
- HDassert(aux_ptr->c_slist_ptr != NULL);
-
- /* Entry to insert should not be in dirty list currently */
- if(NULL != H5SL_search(aux_ptr->d_slist_ptr, (const void *)(&entry_ptr->addr)))
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry already in dirty slist.")
-
- /* insert the address of the entry in the dirty entry list, and
- * add its size to the dirty_bytes count.
- */
- if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
- slist_entry_ptr->addr = entry_ptr->addr;
- if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
-
- /* Entry to insert should not be in clean list either */
- if(NULL != H5SL_search(aux_ptr->c_slist_ptr, (const void *)(&entry_ptr->addr)))
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry in clean slist.")
- } /* end if */
-
- aux_ptr->dirty_bytes += entry_ptr->size;
-
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->insert_dirty_bytes += size;
- aux_ptr->insert_dirty_bytes_updates += 1;
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__log_inserted_entry() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__log_moved_entry()
- *
- * Purpose: Update the dirty_bytes count for a moved entry.
- *
- * WARNING
- *
- * At present, the way that the move call is used ensures
- * that the moved entry is present in all caches by
- * moving in a collective operation and immediately after
- * unprotecting the target entry.
- *
- * This function uses this invariant, and will cause arcane
- * failures if it is not met. If maintaining this invariant
- * becomes impossible, we will have to rework this function
- * extensively, and likely include a bit of IPC for
- * synchronization. A better option might be to subsume
- * move in the unprotect operation.
- *
- * Given that the target entry is in all caches, the function
- * proceeds as follows:
- *
- * For processes with mpi rank other 0, it simply checks to
- * see if the entry was dirty prior to the move, and adds
- * the entries size to the dirty bytes count.
- *
- * In the process with mpi rank 0, the function first checks
- * to see if the entry was dirty prior to the move. If it
- * was, and if the entry doesn't appear in the dirtied list
- * under its old address, it adds the entry's size to the
- * dirty bytes count.
- *
- * The rank 0 process then removes any references to the
- * entry under its old address from the cleands and dirtied
- * lists, and inserts an entry in the dirtied list under the
- * new address.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 6/30/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr, haddr_t new_addr)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- hbool_t entry_in_cache;
- hbool_t entry_dirty;
- size_t entry_size;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f);
- HDassert(f->shared);
- cache_ptr = (H5AC_t *)f->shared->cache;
- HDassert(cache_ptr);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
-
- /* get entry status, size, etc here */
- if(H5C_get_entry_status(f, old_addr, &entry_size, &entry_in_cache,
- &entry_dirty, NULL, NULL, NULL, NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.")
- if(!entry_in_cache)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.")
-
- if(aux_ptr->mpi_rank == 0) {
- H5AC_slist_entry_t * slist_entry_ptr;
-
- HDassert(aux_ptr->d_slist_ptr != NULL);
- HDassert(aux_ptr->c_slist_ptr != NULL);
-
- /* if the entry appears in the cleaned entry slist, under its old
- * address, remove it.
- */
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr))))
- slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
-
- /* if the entry appears in the dirtied entry slist under its old
- * address, remove it, but don't free it. Set addr to new_addr.
- */
- if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr))))
- slist_entry_ptr->addr = new_addr;
- else {
- /* otherwise, allocate a new entry that is ready
- * for insertion, and increment dirty_bytes.
- *
- * Note that the fact that the entry wasn't in the dirtied
- * list under its old address implies that it must have
- * been clean to start with.
- */
- HDassert(!entry_dirty);
- if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
- slist_entry_ptr->addr = new_addr;
-
- aux_ptr->dirty_bytes += entry_size;
-
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->move_dirty_bytes += entry_size;
- aux_ptr->move_dirty_bytes_updates += 1;
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
- } /* end else */
-
- /* insert / reinsert the entry in the dirty slist */
- if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
- } /* end if */
- else if(!entry_dirty) {
- aux_ptr->dirty_bytes += entry_size;
-
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->move_dirty_bytes += entry_size;
- aux_ptr->move_dirty_bytes_updates += 1;
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
- } /* end else-if */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__log_moved_entry() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__propagate_and_apply_candidate_list
- *
- * Purpose: Prior to the addition of support for multiple metadata
- * write strategies, in PHDF5, only the metadata cache with
- * mpi rank 0 was allowed to write to file. All other
- * metadata caches on processes with rank greater than 0
- * were required to retain dirty entries until they were
- * notified that the entry was clean.
- *
- * This constraint is relaxed with the distributed
- * metadata write strategy, in which a list of candidate
- * metadata cache entries is constructed by the process 0
- * cache and then distributed to the caches of all the other
- * processes. Once the listed is distributed, many (if not
- * all) processes writing writing a unique subset of the
- * entries, and marking the remainder clean. The subsets
- * are chosen so that each entry in the list of candidates
- * is written by exactly one cache, and all entries are
- * marked as being clean in all caches.
- *
- * While the list of candidate cache entries is prepared
- * elsewhere, this function is the main routine for distributing
- * and applying the list. It must be run simultaniously on
- * all processes that have the relevant file open. To ensure
- * proper synchronization, there is a barrier at the beginning
- * of this function.
- *
- * At present, this function is called under one of two
- * circumstances:
- *
- * 1) Dirty byte creation exceeds some user specified value.
- *
- * While metadata reads may occur independently, all
- * operations writing metadata must be collective. Thus
- * all metadata caches see the same sequence of operations,
- * and therefore the same dirty data creation.
- *
- * This fact is used to synchronize the caches for purposes
- * of propagating the list of candidate entries, by simply
- * calling this function from all caches whenever some user
- * specified threshold on dirty data is exceeded. (the
- * process 0 cache creates the candidate list just before
- * calling this function).
- *
- * 2) Under direct user control -- this operation must be
- * collective.
- *
- * The operations to be managed by this function are as
- * follows:
- *
- * All processes:
- *
- * 1) Participate in an opening barrier.
- *
- * For the process with mpi rank 0:
- *
- * 1) Load the contents of the candidate list
- * (candidate_slist_ptr) into a buffer, and broadcast that
- * buffer to all the other caches. Clear the candidate
- * list in passing.
- *
- * If there is a positive number of candidates, proceed with
- * the following:
- *
- * 2) Apply the candidate entry list.
- *
- * 3) Particpate in a closing barrier.
- *
- * 4) Remove from the dirty list (d_slist_ptr) and from the
- * flushed and still clean entries list (c_slist_ptr),
- * all addresses that appeared in the candidate list, as
- * these entries are now clean.
- *
- *
- * For all processes with mpi rank greater than 0:
- *
- * 1) Receive the candidate entry list broadcast
- *
- * If there is a positive number of candidates, proceed with
- * the following:
- *
- * 2) Apply the candidate entry list.
- *
- * 3) Particpate in a closing barrier.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * 3/17/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- haddr_t * candidates_list_ptr = NULL;
- int mpi_result;
- int num_candidates = 0;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
-
- /* to prevent "messages from the future" we must synchronize all
- * processes before we write any entries.
- */
- if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
-
- if(aux_ptr->mpi_rank == 0) {
- if(H5AC__broadcast_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast candidate slist.")
-
- HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0);
- } /* end if */
- else {
- if(H5AC__receive_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive candidate broadcast.")
- } /* end else */
-
- if(num_candidates > 0) {
- herr_t result;
-
- /* all processes apply the candidate list.
- * H5C_apply_candidate_list() handles the details of
- * distributing the writes across the processes.
- */
-
- /* Enable writes during this operation */
- aux_ptr->write_permitted = TRUE;
-
- /* Apply the candidate list */
- result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_candidates,
- candidates_list_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size);
-
- /* Disable writes again */
- aux_ptr->write_permitted = FALSE;
-
- /* Check for error on the write operation */
- if(result < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
-
- /* this code exists primarily for the test bed -- it allows us to
- * enforce posix semantics on the server that pretends to be a
- * file system in our parallel tests.
- */
- if(aux_ptr->write_done)
- (aux_ptr->write_done)();
-
- /* to prevent "messages from the past" we must synchronize all
- * processes again before we go on.
- */
- if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
-
- /* if this is process zero, tidy up the dirtied,
- * and flushed and still clean lists.
- */
- if(aux_ptr->mpi_rank == 0)
- if(H5AC__tidy_cache_0_lists(cache_ptr, num_candidates, candidates_list_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
- } /* end if */
-
- /* if it is defined, call the sync point done callback. Note
- * that this callback is defined purely for testing purposes,
- * and should be undefined under normal operating circumstances.
- */
- if(aux_ptr->sync_point_done)
- (aux_ptr->sync_point_done)(num_candidates, candidates_list_ptr);
-
-done:
- if(candidates_list_ptr)
- candidates_list_ptr = (haddr_t *)H5MM_xfree((void *)candidates_list_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__propagate_and_apply_candidate_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__propagate_flushed_and_still_clean_entries_list
- *
- * Purpose: In PHDF5, if the process 0 only metadata write strategy
- * is selected, only the metadata cache with mpi rank 0 is
- * allowed to write to file. All other metadata caches on
- * processes with rank greater than 0 must retain dirty
- * entries until they are notified that the entry is now
- * clean.
- *
- * This function is the main routine for handling this
- * notification proceedure. It must be called
- * simultaniously on all processes that have the relevant
- * file open. To this end, it is called only during a
- * sync point, with a barrier prior to the call.
- *
- * Note that any metadata entry writes by process 0 will
- * occur after the barrier and just before this call.
- *
- * Typicaly, calls to this function will be triggered in
- * one of two ways:
- *
- * 1) Dirty byte creation exceeds some user specified value.
- *
- * While metadata reads may occur independently, all
- * operations writing metadata must be collective. Thus
- * all metadata caches see the same sequence of operations,
- * and therefore the same dirty data creation.
- *
- * This fact is used to synchronize the caches for purposes
- * of propagating the list of flushed and still clean
- * entries, by simply calling this function from all
- * caches whenever some user specified threshold on dirty
- * data is exceeded.
- *
- * 2) Under direct user control -- this operation must be
- * collective.
- *
- * The operations to be managed by this function are as
- * follows:
- *
- * For the process with mpi rank 0:
- *
- * 1) Load the contents of the flushed and still clean entries
- * list (c_slist_ptr) into a buffer, and broadcast that
- * buffer to all the other caches.
- *
- * 2) Clear the flushed and still clean entries list
- * (c_slist_ptr).
- *
- *
- * For all processes with mpi rank greater than 0:
- *
- * 1) Receive the flushed and still clean entries list broadcast
- *
- * 2) Mark the specified entries as clean.
- *
- *
- * For all processes:
- *
- * 1) Reset the dirtied bytes count to 0.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * July 5, 2005
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, hid_t dxpl_id)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
-
- if(aux_ptr->mpi_rank == 0) {
- if(H5AC__broadcast_clean_list(cache_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.")
- HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0);
- } /* end if */
- else {
- if(H5AC__receive_and_apply_clean_list(f, dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.")
- } /* end else */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__propagate_flushed_and_still_clean_entries_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC_receive_haddr_list()
- *
- * Purpose: Receive the list of entry addresses from process 0,
- * and return it in a buffer pointed to by *haddr_buf_ptr_ptr.
- * Note that the caller must free this buffer if it is
- * returned.
- *
- * This function must only be called by the process with
- * MPI_rank greater than 0.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: Quincey Koziol, 6/11/2015
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr,
- haddr_t **haddr_buf_ptr_ptr)
-{
- haddr_t * haddr_buf_ptr = NULL;
- int mpi_result;
- int num_entries;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(num_entries_ptr != NULL);
- HDassert(*num_entries_ptr == 0);
- HDassert(haddr_buf_ptr_ptr != NULL);
- HDassert(*haddr_buf_ptr_ptr == NULL);
-
- /* First receive the number of entries in the list so that we
- * can set up a buffer to receive them. If there aren't
- * any, we are done.
- */
- if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
-
- if(num_entries > 0) {
- size_t buf_size;
-
- /* allocate buffers to store the list of entry base addresses in */
- buf_size = sizeof(haddr_t) * (size_t)num_entries;
- if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer")
-
- /* Now receive the list of candidate entries */
- if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
- } /* end if */
-
- /* finally, pass the number of entries and the buffer pointer
- * back to the caller.
- */
- *num_entries_ptr = num_entries;
- *haddr_buf_ptr_ptr = haddr_buf_ptr;
-
-done:
- if(ret_value < 0)
- if(haddr_buf_ptr)
- haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC_receive_haddr_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__receive_and_apply_clean_list()
- *
- * Purpose: Receive the list of cleaned entries from process 0,
- * and mark the specified entries as clean.
- *
- * This function must only be called by the process with
- * MPI_rank greater than 0.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 7/4/05
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- haddr_t * haddr_buf_ptr = NULL;
- int num_entries = 0;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity check */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->mpi_rank != 0);
-
- /* Retrieve the clean list from process 0 */
- if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, &num_entries, &haddr_buf_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list")
-
- if(num_entries > 0)
- /* mark the indicated entries as clean */
- if(H5C_mark_entries_as_clean(f, dxpl_id, (int32_t)num_entries, haddr_buf_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.")
-
- /* if it is defined, call the sync point done callback. Note
- * that this callback is defined purely for testing purposes,
- * and should be undefined under normal operating circumstances.
- */
- if(aux_ptr->sync_point_done)
- (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
-
-done:
- if(haddr_buf_ptr)
- haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__receive_and_apply_clean_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- *
- * Function: H5AC__receive_candidate_list()
- *
- * Purpose: Receive the list of candidate entries from process 0,
- * and return it in a buffer pointed to by *haddr_buf_ptr_ptr.
- * Note that the caller must free this buffer if it is
- * returned.
- *
- * This function must only be called by the process with
- * MPI_rank greater than 0.
- *
- * Return SUCCEED on success, and FAIL on failure.
- *
- * Return: Non-negative on success/Negative on failure.
- *
- * Programmer: John Mainzer, 3/17/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__receive_candidate_list(const H5AC_t *cache_ptr, int *num_entries_ptr,
- haddr_t **haddr_buf_ptr_ptr)
-{
- H5AC_aux_t * aux_ptr;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->mpi_rank != 0);
- HDassert(aux_ptr-> metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
- HDassert(num_entries_ptr != NULL);
- HDassert(*num_entries_ptr == 0);
- HDassert(haddr_buf_ptr_ptr != NULL);
- HDassert(*haddr_buf_ptr_ptr == NULL);
-
- /* Retrieve the candidate list from process 0 */
- if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, num_entries_ptr, haddr_buf_ptr_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list")
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__receive_candidate_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__rsp__dist_md_write__flush
- *
- * Purpose: Routine for handling the details of running a sync point
- * that is triggered by a flush -- which in turn must have been
- * triggered by either a flush API call or a file close --
- * when the distributed metadata write strategy is selected.
- *
- * Upon entry, each process generates it own candidate list,
- * being a sorted list of all dirty metadata entries currently
- * in the metadata cache. Note that this list must be idendical
- * across all processes, as all processes see the same stream
- * of dirty metadata coming in, and use the same lists of
- * candidate entries at each sync point. (At first glance, this
- * argument sounds circular, but think of it in the sense of
- * a recursive proof).
- *
- * If this this list is empty, we are done, and the function
- * returns
- *
- * Otherwise, after the sorted list dirty metadata entries is
- * constructed, each process uses the same algorithm to assign
- * each entry on the candidate list to exactly one process for
- * flushing.
- *
- * At this point, all processes participate in a barrier to
- * avoid messages from the past/future bugs.
- *
- * Each process then flushes the entries assigned to it, and
- * marks all other entries on the candidate list as clean.
- *
- * Finally, all processes participate in a second barrier to
- * avoid messages from the past/future bugs.
- *
- * At the end of this process, process 0 and only process 0
- * must tidy up its lists of dirtied and cleaned entries.
- * These lists are not used in the distributed metadata write
- * strategy, but they must be maintained should we shift
- * to a strategy that uses them.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * April 28, 2010
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- haddr_t * haddr_buf_ptr = NULL;
- int mpi_result;
- int num_entries = 0;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
-
- /* first construct the candidate list -- initially, this will be in the
- * form of a skip list. We will convert it later.
- */
- if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
-
- if(H5SL_count(aux_ptr->candidate_slist_ptr) > 0) {
- herr_t result;
-
- /* convert the candidate list into the format we
- * are used to receiving from process 0.
- */
- if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
-
- /* initial sync point barrier */
- if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
-
- /* Enable writes during this operation */
- aux_ptr->write_permitted = TRUE;
-
- /* Apply the candidate list */
- result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_entries,
- haddr_buf_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size);
-
- /* Disable writes again */
- aux_ptr->write_permitted = FALSE;
-
- /* Check for error on the write operation */
- if(result < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
-
- /* this code exists primarily for the test bed -- it allows us to
- * enforce posix semantics on the server that pretends to be a
- * file system in our parallel tests.
- */
- if(aux_ptr->write_done)
- (aux_ptr->write_done)();
-
- /* final sync point barrier */
- if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
-
- /* if this is process zero, tidy up the dirtied,
- * and flushed and still clean lists.
- */
- if(aux_ptr->mpi_rank == 0)
- if(H5AC__tidy_cache_0_lists(cache_ptr, num_entries, haddr_buf_ptr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
- } /* end if */
-
- /* if it is defined, call the sync point done callback. Note
- * that this callback is defined purely for testing purposes,
- * and should be undefined under normal operating circumstances.
- */
- if(aux_ptr->sync_point_done)
- (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
-
-done:
- if(haddr_buf_ptr)
- haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__rsp__dist_md_write__flush() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__rsp__dist_md_write__flush_to_min_clean
- *
- * Purpose: Routine for handling the details of running a sync point
- * triggered by the accumulation of dirty metadata (as
- * opposed to a flush call to the API) when the distributed
- * metadata write strategy is selected.
- *
- * After invocation and initial sanity checking this function
- * first checks to see if evictions are enabled -- if they
- * are not, the function does nothing and returns.
- *
- * Otherwise, process zero constructs a list of entries to
- * be flushed in order to bring the process zero cache back
- * within its min clean requirement. Note that this list
- * (the candidate list) may be empty.
- *
- * Then, all processes participate in a barrier.
- *
- * After the barrier, process 0 broadcasts the number of
- * entries in the candidate list prepared above, and all
- * other processes receive this number.
- *
- * If this number is zero, we are done, and the function
- * returns without further action.
- *
- * Otherwise, process 0 broadcasts the sorted list of
- * candidate entries, and all other processes receive it.
- *
- * Then, each process uses the same algorithm to assign
- * each entry on the candidate list to exactly one process
- * for flushing.
- *
- * Each process then flushes the entries assigned to it, and
- * marks all other entries on the candidate list as clean.
- *
- * Finally, all processes participate in a second barrier to
- * avoid messages from the past/future bugs.
- *
- * At the end of this process, process 0 and only process 0
- * must tidy up its lists of dirtied and cleaned entries.
- * These lists are not used in the distributed metadata write
- * strategy, but they must be maintained should we shift
- * to a strategy that uses them.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * April 28, 2010
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- hbool_t evictions_enabled;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
-
- /* Query if evictions are allowed */
- if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
-
- if(evictions_enabled) {
- /* construct candidate list -- process 0 only */
- if(aux_ptr->mpi_rank == 0)
- if(H5AC__construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
-
- /* propagate and apply candidate list -- all processes */
- if(H5AC__propagate_and_apply_candidate_list(f, dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.")
- } /* evictions enabled */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__rsp__dist_md_write__flush_to_min_clean() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__rsp__p0_only__flush
- *
- * Purpose: Routine for handling the details of running a sync point
- * that is triggered a flush -- which in turn must have been
- * triggered by either a flush API call or a file close --
- * when the process 0 only metadata write strategy is selected.
- *
- * First, all processes participate in a barrier.
- *
- * Then process zero flushes all dirty entries, and broadcasts
- * they number of clean entries (if any) to all the other
- * caches.
- *
- * If this number is zero, we are done.
- *
- * Otherwise, process 0 broadcasts the list of cleaned
- * entries, and all other processes which are part of this
- * file group receive it, and mark the listed entries as
- * clean in their caches.
- *
- * Since all processes have the same set of dirty
- * entries at the beginning of the sync point, and all
- * entries that will be written are written before
- * process zero broadcasts the number of cleaned entries,
- * there is no need for a closing barrier.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * April 28, 2010
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- int mpi_result;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
-
- /* to prevent "messages from the future" we must
- * synchronize all processes before we start the flush.
- * Hence the following barrier.
- */
- if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
-
- /* Flush data to disk, from rank 0 process */
- if(aux_ptr->mpi_rank == 0) {
- herr_t result;
-
- /* Enable writes during this operation */
- aux_ptr->write_permitted = TRUE;
-
- /* Flush the cache */
- result = H5C_flush_cache(f, dxpl_id, H5AC__NO_FLAGS_SET);
-
- /* Disable writes again */
- aux_ptr->write_permitted = FALSE;
-
- /* Check for error on the write operation */
- if(result < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
-
- /* this code exists primarily for the test bed -- it allows us to
- * enforce posix semantics on the server that pretends to be a
- * file system in our parallel tests.
- */
- if(aux_ptr->write_done)
- (aux_ptr->write_done)();
- } /* end if */
-
- /* Propagate cleaned entries to other ranks. */
- if(H5AC__propagate_flushed_and_still_clean_entries_list(f, H5AC_dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__rsp__p0_only__flush() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__rsp__p0_only__flush_to_min_clean
- *
- * Purpose: Routine for handling the details of running a sync point
- * triggered by the accumulation of dirty metadata (as
- * opposed to a flush call to the API) when the process 0
- * only metadata write strategy is selected.
- *
- * After invocation and initial sanity checking this function
- * first checks to see if evictions are enabled -- if they
- * are not, the function does nothing and returns.
- *
- * Otherwise, all processes participate in a barrier.
- *
- * After the barrier, if this is process 0, the function
- * causes the cache to flush sufficient entries to get the
- * cache back within its minimum clean fraction, and broadcast
- * the number of entries which have been flushed since
- * the last sync point, and are still clean.
- *
- * If this number is zero, we are done.
- *
- * Otherwise, process 0 broadcasts the list of cleaned
- * entries, and all other processes which are part of this
- * file group receive it, and mark the listed entries as
- * clean in their caches.
- *
- * Since all processes have the same set of dirty
- * entries at the beginning of the sync point, and all
- * entries that will be written are written before
- * process zero broadcasts the number of cleaned entries,
- * there is no need for a closing barrier.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * April 28, 2010
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- hbool_t evictions_enabled;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
-
- /* Query if evictions are allowed */
- if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
-
- /* Flush if evictions are allowed -- following call
- * will cause process 0 to flush to min clean size,
- * and then propagate the newly clean entries to the
- * other processes.
- *
- * Otherwise, do nothing.
- */
- if(evictions_enabled) {
- int mpi_result;
-
- /* to prevent "messages from the future" we must synchronize all
- * processes before we start the flush. This synchronization may
- * already be done -- hence the do_barrier parameter.
- */
- if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
-
- if(0 == aux_ptr->mpi_rank) {
- herr_t result;
-
- /* here, process 0 flushes as many entries as necessary to
- * comply with the currently specified min clean size.
- * Note that it is quite possible that no entries will be
- * flushed.
- */
-
- /* Enable writes during this operation */
- aux_ptr->write_permitted = TRUE;
-
- /* Flush the cache */
- result = H5C_flush_to_min_clean(f, dxpl_id);
-
- /* Disable writes again */
- aux_ptr->write_permitted = FALSE;
-
- /* Check for error on the write operation */
- if(result < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.")
-
- /* this call exists primarily for the test code -- it is used
- * to enforce POSIX semantics on the process used to simulate
- * reads and writes in t_cache.c.
- */
- if(aux_ptr->write_done)
- (aux_ptr->write_done)();
- } /* end if */
-
- if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
- } /* end if */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__rsp__p0_only__flush_to_min_clean() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__run_sync_point
- *
- * Purpose: Top level routine for managing a sync point between all
- * meta data caches in the parallel case. Since all caches
- * see the same sequence of dirty metadata, we simply count
- * bytes of dirty metadata, and run a sync point whenever the
- * number of dirty bytes of metadata seen since the last
- * sync point exceeds a threshold that is common across all
- * processes. We also run sync points in response to
- * HDF5 API calls triggering either a flush or a file close.
- *
- * In earlier versions of PHDF5, only the metadata cache with
- * mpi rank 0 was allowed to write to file. All other
- * metadata caches on processes with rank greater than 0 were
- * required to retain dirty entries until they were notified
- * that the entry is was clean.
- *
- * This function was created to make it easier for us to
- * experiment with other options, as it is a single point
- * for the execution of sync points.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * March 11, 2010
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op)
-{
- H5AC_t * cache_ptr;
- H5AC_aux_t * aux_ptr;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f != NULL);
- cache_ptr = f->shared->cache;
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) ||
- (sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED));
-
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
-HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu/%u\n",
- aux_ptr->mpi_rank,
- aux_ptr->dirty_bytes_propagations,
- aux_ptr->unprotect_dirty_bytes,
- aux_ptr->unprotect_dirty_bytes_updates,
- aux_ptr->insert_dirty_bytes,
- aux_ptr->insert_dirty_bytes_updates,
- aux_ptr->rename_dirty_bytes,
- aux_ptr->rename_dirty_bytes_updates);
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
-
- switch(aux_ptr->metadata_write_strategy) {
- case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
- switch(sync_point_op) {
- case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
- if(H5AC__rsp__p0_only__flush_to_min_clean(f, dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush_to_min_clean() failed.")
- break;
-
- case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
- if(H5AC__rsp__p0_only__flush(f, dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush() failed.")
- break;
-
- default:
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
- break;
- } /* end switch */
- break;
-
- case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
- switch(sync_point_op) {
- case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
- if(H5AC__rsp__dist_md_write__flush_to_min_clean(f, dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush_to_min_clean() failed.")
- break;
-
- case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
- if(H5AC__rsp__dist_md_write__flush(f, dxpl_id) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush() failed.")
- break;
-
- default:
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
- break;
- } /* end switch */
- break;
-
- default:
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Unknown metadata write strategy.")
- break;
- } /* end switch */
-
- /* reset the dirty bytes count */
- aux_ptr->dirty_bytes = 0;
-
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->dirty_bytes_propagations += 1;
- aux_ptr->unprotect_dirty_bytes = 0;
- aux_ptr->unprotect_dirty_bytes_updates = 0;
- aux_ptr->insert_dirty_bytes = 0;
- aux_ptr->insert_dirty_bytes_updates = 0;
- aux_ptr->rename_dirty_bytes = 0;
- aux_ptr->rename_dirty_bytes_updates = 0;
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__run_sync_point() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__tidy_cache_0_lists()
- *
- * Purpose: In the distributed metadata write strategy, not all dirty
- * entries are written by process 0 -- thus we must tidy
- * up the dirtied, and flushed and still clean lists
- * maintained by process zero after each sync point.
- *
- * This procedure exists to tend to this issue.
- *
- * At this point, all entries that process 0 cleared should
- * have been removed from both the dirty and flushed and
- * still clean lists, and entries that process 0 has flushed
- * should have been removed from the dirtied list and added
- * to the flushed and still clean list.
- *
- * However, since the distributed metadata write strategy
- * doesn't make use of these lists, the objective is simply
- * to maintain these lists in consistent state that allows
- * them to be used should the metadata write strategy change
- * to one that uses these lists.
- *
- * Thus for our purposes, all we need to do is remove from
- * the dirtied and flushed and still clean lists all
- * references to entries that appear in the candidate list.
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: John Mainzer
- * 4/20/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates,
- haddr_t *candidates_list_ptr)
-{
- H5AC_aux_t * aux_ptr;
- int i;
-
- FUNC_ENTER_STATIC_NOERR
-
- /* Sanity checks */
- HDassert(cache_ptr != NULL);
- HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert(aux_ptr != NULL);
- HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
- HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
- HDassert(aux_ptr->mpi_rank == 0);
- HDassert(num_candidates > 0);
- HDassert(candidates_list_ptr != NULL);
-
- /* clean up dirtied and flushed and still clean lists by removing
- * all entries on the candidate list. Cleared entries should
- * have been removed from both the dirty and cleaned lists at
- * this point, flushed entries should have been added to the
- * cleaned list. However, for this metadata write strategy,
- * we just want to remove all references to the candidate entries.
- */
- for(i = 0; i < num_candidates; i++) {
- H5AC_slist_entry_t * d_slist_entry_ptr;
- H5AC_slist_entry_t * c_slist_entry_ptr;
- haddr_t addr;
-
- addr = candidates_list_ptr[i];
-
- /* addr may be either on the dirtied list, or on the flushed
- * and still clean list. Remove it.
- */
- if(NULL != (d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)&addr)))
- d_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, d_slist_entry_ptr);
- if(NULL != (c_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)&addr)))
- c_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, c_slist_entry_ptr);
- } /* end for */
-
- FUNC_LEAVE_NOAPI(SUCCEED)
-} /* H5AC__tidy_cache_0_lists() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5AC__flush_entries
- *
- * Purpose: Flush the metadata cache associated with the specified file,
- * only writing from rank 0, but propagating the cleaned entries
- * to all ranks.
- *
- * Return: Non-negative on success/Negative on failure if there was a
- * request to flush all items and something was protected.
- *
- * Programmer: Quincey Koziol
- * koziol@hdfgroup.org
- * Aug 22 2009
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5AC__flush_entries(H5F_t *f, hid_t dxpl_id)
-{
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Sanity checks */
- HDassert(f);
- HDassert(f->shared->cache);
-
- /* Check if we have >1 ranks */
- if(f->shared->cache->aux_ptr)
- if(H5AC__run_sync_point(f, dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC__flush_entries() */
-#endif /* H5_HAVE_PARALLEL */
-
-
/*------------------------------------------------------------------------------
* Function: H5AC_ignore_tags()
*
diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c
new file mode 100644
index 0000000..64c27ab
--- /dev/null
+++ b/src/H5ACmpio.c
@@ -0,0 +1,2295 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * Copyright by the Board of Trustees of the University of Illinois. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the files COPYING and Copyright.html. COPYING can be found at the root *
+ * of the source code distribution tree; Copyright.html can be found at the *
+ * root level of an installed copy of the electronic HDF5 document set and *
+ * is linked from the top-level documents page. It can also be found at *
+ * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have *
+ * access to either file, you may request a copy from help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*-------------------------------------------------------------------------
+ *
+ * Created: H5ACmpio.c
+ * Jun 20 2015
+ * Quincey Koziol <koziol@hdfgroup.org>
+ *
+ * Purpose: Functions in this file implement support for parallel
+ * I/O cache functionality
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/****************/
+/* Module Setup */
+/****************/
+
+#define H5AC_PACKAGE /*suppress error about including H5ACpkg */
+#define H5C_PACKAGE /*suppress error about including H5Cpkg */
+#define H5F_PACKAGE /*suppress error about including H5Fpkg */
+
+/* Interface initialization */
+#define H5_INTERFACE_INIT_FUNC H5AC__init_mpio_interface
+
+/***********/
+/* Headers */
+/***********/
+#include "H5private.h" /* Generic Functions */
+#include "H5ACpkg.h" /* Metadata cache */
+#include "H5Cpkg.h" /* Cache */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5Fpkg.h" /* Files */
+#include "H5MMprivate.h" /* Memory management */
+
+#ifdef H5_HAVE_PARALLEL
+
+/****************/
+/* Local Macros */
+/****************/
+
+
+/******************/
+/* Local Typedefs */
+/******************/
+
+/****************************************************************************
+ *
+ * structure H5AC_slist_entry_t
+ *
+ * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t
+ * and the cleaned entry list maintained via the c_slist_ptr field of
+ * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned
+ * entries. Unfortunately, the slist code makes us define a dynamically
+ * allocated structure to store these offsets in. This structure serves
+ * that purpose. Its fields are as follows:
+ *
+ * addr: file offset of a metadata entry. Entries are added to this
+ * list (if they aren't there already) when they are marked
+ * dirty in an unprotect, inserted, or moved. They are
+ * removed when they appear in a clean entries broadcast.
+ *
+ ****************************************************************************/
+typedef struct H5AC_slist_entry_t
+{
+ haddr_t addr;
+} H5AC_slist_entry_t;
+
+/* User data for address list building callbacks */
+typedef struct H5AC_addr_list_ud_t
+{
+ H5AC_aux_t * aux_ptr; /* 'Auxiliary' parallel cache info */
+ haddr_t * addr_buf_ptr; /* Array to store addresses */
+ int i; /* Counter for position in array */
+} H5AC_addr_list_ud_t;
+
+
+/********************/
+/* Local Prototypes */
+/********************/
+
+static herr_t H5AC__broadcast_candidate_list(H5AC_t *cache_ptr,
+ int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
+static herr_t H5AC__broadcast_clean_list(H5AC_t *cache_ptr);
+static herr_t H5AC__construct_candidate_list(H5AC_t *cache_ptr,
+ H5AC_aux_t *aux_ptr, int sync_point_op);
+static herr_t H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr,
+ int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
+static herr_t H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id);
+static herr_t H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f,
+ hid_t dxpl_id);
+static herr_t H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr,
+ haddr_t **haddr_buf_ptr_ptr);
+static herr_t H5AC__receive_candidate_list(const H5AC_t *cache_ptr,
+ int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr);
+static herr_t H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id);
+static herr_t H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates,
+ haddr_t *candidates_list_ptr);
+static herr_t H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id);
+static herr_t H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id);
+static herr_t H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id);
+static herr_t H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id);
+
+
+/*********************/
+/* Package Variables */
+/*********************/
+
+/* Declare a free list to manage the H5AC_aux_t struct */
+H5FL_DEFINE(H5AC_aux_t);
+
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+/* Declare a free list to manage the H5AC_slist_entry_t struct */
+H5FL_DEFINE_STATIC(H5AC_slist_entry_t);
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__init_mpio_interface
+ *
+ * Purpose: Initialize interface-specific information
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Quincey Koziol
+ * 6/20/15
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__init_mpio_interface(void)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Funnel all work to H5AC_init() */
+ if(H5AC_init() < 0)
+ HGOTO_ERROR(H5E_FUNC, H5E_CANTINIT, FAIL, "interface initialization failed")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__init_mpio_interface() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__set_sync_point_done_callback
+ *
+ * Purpose: Set the value of the sync_point_done callback. This
+ * callback is used by the parallel test code to verify
+ * that the expected writes and only the expected writes
+ * take place during a sync point.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 5/9/10
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__set_sync_point_done_callback(H5C_t * cache_ptr,
+ void (* sync_point_done)(int num_writes, haddr_t * written_entries_tbl))
+{
+ H5AC_aux_t * aux_ptr;
+
+ FUNC_ENTER_PACKAGE_NOERR
+
+ /* Sanity checks */
+ HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC));
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+
+ aux_ptr->sync_point_done = sync_point_done;
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5AC__set_sync_point_done_callback() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__set_write_done_callback
+ *
+ * Purpose: Set the value of the write_done callback. This callback
+ * is used to improve performance of the parallel test bed
+ * for the cache.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 5/11/06
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void))
+{
+ H5AC_aux_t * aux_ptr;
+
+ FUNC_ENTER_PACKAGE_NOERR
+
+ /* Sanity checks */
+ HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC));
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ aux_ptr->write_done = write_done;
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5AC__set_write_done_callback() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_add_candidate()
+ *
+ * Purpose: Add the supplied metadata entry address to the candidate
+ * list. Verify that each entry added does not appear in
+ * the list prior to its insertion.
+ *
+ * This function is intended for used in constructing list
+ * of entried to be flushed during sync points. It shouldn't
+ * be called anywhere else.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC_add_candidate(H5AC_t * cache_ptr, haddr_t addr)
+{
+ H5AC_aux_t * aux_ptr;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+ HDassert(aux_ptr->candidate_slist_ptr != NULL);
+
+ /* Construct an entry for the supplied address, and insert
+ * it into the candidate slist.
+ */
+ if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate candidate slist entry")
+ slist_entry_ptr->addr = addr;
+
+ if(H5SL_insert(aux_ptr->candidate_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist")
+
+done:
+ /* Clean up on error */
+ if(ret_value < 0)
+ if(slist_entry_ptr)
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_add_candidate() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__broadcast_candidate_list()
+ *
+ * Purpose: Broadcast the contents of the process 0 candidate entry
+ * slist. In passing, also remove all entries from said
+ * list. As the application of this will be handled by
+ * the same functions on all processes, construct and
+ * return a copy of the list in the same format as that
+ * received by the other processes. Note that if this
+ * copy is returned in *haddr_buf_ptr_ptr, the caller
+ * must free it.
+ *
+ * This function must only be called by the process with
+ * MPI_rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/1/05
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, int *num_entries_ptr,
+ haddr_t **haddr_buf_ptr_ptr)
+{
+ H5AC_aux_t * aux_ptr = NULL;
+ haddr_t * haddr_buf_ptr = NULL;
+ int mpi_result;
+ int num_entries;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->mpi_rank == 0);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+ HDassert(aux_ptr->candidate_slist_ptr != NULL);
+ HDassert(num_entries_ptr != NULL);
+ HDassert(*num_entries_ptr == 0);
+ HDassert(haddr_buf_ptr_ptr != NULL);
+ HDassert(*haddr_buf_ptr_ptr == NULL);
+
+ /* First broadcast the number of entries in the list so that the
+ * receivers can set up buffers to receive them. If there aren't
+ * any, we are done.
+ */
+ num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr);
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
+
+ if(num_entries > 0) {
+ size_t buf_size = 0;
+ int chk_num_entries = 0;
+
+ /* convert the candidate list into the format we
+ * are used to receiving from process 0, and also load it
+ * into a buffer for transmission.
+ */
+ if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
+ HDassert(chk_num_entries == num_entries);
+ HDassert(haddr_buf_ptr != NULL);
+
+ /* Now broadcast the list of candidate entries */
+ buf_size = sizeof(haddr_t) * (size_t)num_entries;
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
+ } /* end if */
+
+ /* Pass the number of entries and the buffer pointer
+ * back to the caller. Do this so that we can use the same code
+ * to apply the candidate list to all the processes.
+ */
+ *num_entries_ptr = num_entries;
+ *haddr_buf_ptr_ptr = haddr_buf_ptr;
+
+done:
+ if(ret_value < 0)
+ if(haddr_buf_ptr)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__broadcast_candidate_list() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__broadcast_clean_list_cb()
+ *
+ * Purpose: Skip list callback for building array of addresses for
+ * broadcasting the clean list.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: Quincey Koziol, 6/12/15
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__broadcast_clean_list_cb(void *_item, void H5_ATTR_UNUSED *_key,
+ void *_udata)
+{
+ H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */
+ H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */
+ haddr_t addr;
+
+ FUNC_ENTER_STATIC_NOERR
+
+ /* Sanity checks */
+ HDassert(slist_entry_ptr);
+ HDassert(udata);
+
+ /* Store the entry's address in the buffer */
+ addr = slist_entry_ptr->addr;
+ udata->addr_buf_ptr[udata->i] = addr;
+ udata->i++;
+
+ /* now release the entry */
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+
+ /* and also remove the matching entry from the dirtied list
+ * if it exists.
+ */
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(udata->aux_ptr->d_slist_ptr, (void *)(&addr))))
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5AC__broadcast_clean_list_cb() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__broadcast_clean_list()
+ *
+ * Purpose: Broadcast the contents of the process 0 cleaned entry
+ * slist. In passing, also remove all entries from said
+ * list, and also remove any matching entries from the dirtied
+ * slist.
+ *
+ * This function must only be called by the process with
+ * MPI_rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/1/05
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__broadcast_clean_list(H5AC_t * cache_ptr)
+{
+ haddr_t * addr_buf_ptr = NULL;
+ H5AC_aux_t * aux_ptr;
+ int mpi_result;
+ int num_entries = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr;
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->mpi_rank == 0);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
+
+ /* First broadcast the number of entries in the list so that the
+ * receives can set up a buffer to receive them. If there aren't
+ * any, we are done.
+ */
+ num_entries = (int)H5SL_count(aux_ptr->c_slist_ptr);
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
+
+ if(num_entries > 0) {
+ H5AC_addr_list_ud_t udata;
+ size_t buf_size;
+
+ /* allocate a buffer to store the list of entry base addresses in */
+ buf_size = sizeof(haddr_t) * (size_t)num_entries;
+ if(NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer")
+
+ /* Set up user data for callback */
+ udata.aux_ptr = aux_ptr;
+ udata.addr_buf_ptr = addr_buf_ptr;
+ udata.i = 0;
+
+ /* Free all the clean list entries, building the address list in the callback */
+ /* (Callback also removes the matching entries from the dirtied list) */
+ if(H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries")
+
+ /* Now broadcast the list of cleaned entries */
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)addr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
+ } /* end if */
+
+ /* if it is defined, call the sync point done callback. Note
+ * that this callback is defined purely for testing purposes,
+ * and should be undefined under normal operating circumstances.
+ */
+ if(aux_ptr->sync_point_done)
+ (aux_ptr->sync_point_done)(num_entries, addr_buf_ptr);
+
+done:
+ if(addr_buf_ptr)
+ addr_buf_ptr = (haddr_t *)H5MM_xfree((void *)addr_buf_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__broadcast_clean_list() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__construct_candidate_list()
+ *
+ * Purpose: In the parallel case when the metadata_write_strategy is
+ * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses
+ * this function to construct the list of cache entries to
+ * be flushed. This list is then propagated to the other
+ * caches, and then flushed in a distributed fashion.
+ *
+ * The sync_point_op parameter is used to determine the extent
+ * of the flush.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t *aux_ptr,
+ int sync_point_op)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+ HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE) || (aux_ptr->mpi_rank == 0));
+ HDassert(aux_ptr->d_slist_ptr != NULL);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
+ HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0);
+ HDassert(aux_ptr->candidate_slist_ptr != NULL);
+ HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0);
+ HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || (sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE));
+
+ switch(sync_point_op) {
+ case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
+ if(H5C_construct_candidate_list__min_clean((H5C_t *)cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__min_clean() failed.")
+ break;
+
+ case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
+ if(H5C_construct_candidate_list__clean_cache((H5C_t *)cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__clean_cache() failed.")
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown sync point operation.")
+ break;
+ } /* end switch */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__construct_candidate_list() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__copy_candidate_list_to_buffer_cb
+ *
+ * Purpose: Skip list callback for building array of addresses for
+ * broadcasting the candidate list.
+ *
+ * Return: Return SUCCEED on success, and FAIL on failure.
+ *
+ * Programmer: Quincey Koziol, 6/12/15
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__copy_candidate_list_to_buffer_cb(void *_item, void H5_ATTR_UNUSED *_key,
+ void *_udata)
+{
+ H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */
+ H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */
+
+ FUNC_ENTER_STATIC_NOERR
+
+ /* Sanity checks */
+ HDassert(slist_entry_ptr);
+ HDassert(udata);
+
+ /* Store the entry's address in the buffer */
+ udata->addr_buf_ptr[udata->i] = slist_entry_ptr->addr;
+ udata->i++;
+
+ /* now release the entry */
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5AC__copy_candidate_list_to_buffer_cb() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__copy_candidate_list_to_buffer
+ *
+ * Purpose: Allocate buffer(s) and copy the contents of the candidate
+ * entry slist into it (them). In passing, remove all
+ * entries from the candidate slist. Note that the
+ * candidate slist must not be empty.
+ *
+ * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer
+ * of MPI_Offset, copy the contents of the candidate
+ * entry list into it with the appropriate conversions,
+ * and return the base address of the buffer in
+ * *MPI_Offset_buf_ptr. Note that this is the buffer
+ * used by process 0 to transmit the list of entries to
+ * be flushed to all other processes (in this file group).
+ *
+ * Similarly, allocate a buffer of haddr_t, load the contents
+ * of the candidate list into this buffer, and return its
+ * base address in *haddr_buf_ptr_ptr. Note that this
+ * latter buffer is constructed unconditionally.
+ *
+ * In passing, also remove all entries from the candidate
+ * entry slist.
+ *
+ * Return: Return SUCCEED on success, and FAIL on failure.
+ *
+ * Programmer: John Mainzer, 4/19/10
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, int *num_entries_ptr,
+ haddr_t **haddr_buf_ptr_ptr)
+{
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_addr_list_ud_t udata;
+ haddr_t * haddr_buf_ptr = NULL;
+ size_t buf_size;
+ int num_entries = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+ HDassert(aux_ptr->candidate_slist_ptr != NULL);
+ HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) > 0);
+ HDassert(num_entries_ptr != NULL);
+ HDassert(*num_entries_ptr == 0);
+ HDassert(haddr_buf_ptr_ptr != NULL);
+ HDassert(*haddr_buf_ptr_ptr == NULL);
+
+ num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr);
+
+ /* allocate a buffer(s) to store the list of candidate entry
+ * base addresses in
+ */
+ buf_size = sizeof(haddr_t) * (size_t)num_entries;
+ if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer")
+
+ /* Set up user data for callback */
+ udata.aux_ptr = aux_ptr;
+ udata.addr_buf_ptr = haddr_buf_ptr;
+ udata.i = 0;
+
+ /* Free all the candidate list entries, building the address list in the callback */
+ if(H5SL_free(aux_ptr->candidate_slist_ptr, H5AC__copy_candidate_list_to_buffer_cb, &udata) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for candidate entries")
+
+ /* Pass the number of entries and the buffer pointer
+ * back to the caller.
+ */
+ *num_entries_ptr = num_entries;
+ *haddr_buf_ptr_ptr = haddr_buf_ptr;
+
+done:
+ if(ret_value < 0)
+ if(haddr_buf_ptr)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__copy_candidate_list_to_buffer() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__log_deleted_entry()
+ *
+ * Purpose: Log an entry which has been deleted.
+ *
+ * Only called for mpi_rank 0. We must make sure that the entry
+ * doesn't appear in the cleaned or dirty entry lists.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ haddr_t addr;
+
+ FUNC_ENTER_PACKAGE_NOERR
+
+ /* Sanity checks */
+ HDassert(entry_ptr);
+ addr = entry_ptr->addr;
+ cache_ptr = entry_ptr->cache_ptr;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->mpi_rank == 0);
+ HDassert(aux_ptr->d_slist_ptr != NULL);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
+
+ /* if the entry appears in the dirtied entry slist, remove it. */
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))))
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+
+ /* if the entry appears in the cleaned entry slist, remove it. */
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5AC__log_deleted_entry() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__log_dirtied_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a newly dirtied entry.
+ *
+ * If mpi_rank isn't 0, this simply means adding the size
+ * of the entries to the dirty_bytes count.
+ *
+ * If mpi_rank is 0, we must first check to see if the entry
+ * appears in the dirty entries slist. If it is, do nothing.
+ * If it isn't, add the size to th dirty_bytes count, add the
+ * entry to the dirty entries slist, and remove it from the
+ * cleaned list (if it is present there).
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity checks */
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->is_dirty == FALSE);
+ cache_ptr = entry_ptr->cache_ptr;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+
+ if(aux_ptr->mpi_rank == 0) {
+ H5AC_slist_entry_t *slist_entry_ptr;
+ haddr_t addr = entry_ptr->addr;
+
+ /* Sanity checks */
+ HDassert(aux_ptr->d_slist_ptr != NULL);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
+
+ if(NULL == H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) {
+ /* insert the address of the entry in the dirty entry list, and
+ * add its size to the dirty_bytes count.
+ */
+ if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
+ slist_entry_ptr->addr = addr;
+
+ if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
+
+ aux_ptr->dirty_bytes += entry_ptr->size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->unprotect_dirty_bytes += entry_ptr->size;
+ aux_ptr->unprotect_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ } /* end if */
+
+ /* the entry is dirty. If it exists on the cleaned entries list,
+ * remove it.
+ */
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ } /* end if */
+ else {
+ aux_ptr->dirty_bytes += entry_ptr->size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->unprotect_dirty_bytes += entry_size;
+ aux_ptr->unprotect_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ } /* end else */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__log_dirtied_entry() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__log_flushed_entry()
+ *
+ * Purpose: Update the clean entry slist for the flush of an entry --
+ * specifically, if the entry has been cleared, remove it
+ * from both the cleaned and dirtied lists if it is present.
+ * Otherwise, if the entry was dirty, insert the indicated
+ * entry address in the clean slist if it isn't there already.
+ *
+ * This function is only used in PHDF5, and should only
+ * be called for the process with mpi rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr, hbool_t was_dirty,
+ unsigned flags)
+{
+ hbool_t cleared;
+ H5AC_aux_t * aux_ptr;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity check */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->mpi_rank == 0);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
+
+ /* Set local flags */
+ cleared = ((flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0);
+
+ if(cleared) {
+ /* If the entry has been cleared, must remove it from both the
+ * cleaned list and the dirtied list.
+ */
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))))
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))))
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ } /* end if */
+ else if(was_dirty) {
+ if(NULL == H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) {
+ /* insert the address of the entry in the clean entry list. */
+ if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate clean slist entry .")
+ slist_entry_ptr->addr = addr;
+
+ if(H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into clean entry slist.")
+ } /* end if */
+ } /* end else-if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__log_flushed_entry() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__log_inserted_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a newly inserted entry.
+ *
+ * If mpi_rank isnt 0, this simply means adding the size
+ * of the entry to the dirty_bytes count.
+ *
+ * If mpi_rank is 0, we must also add the entry to the
+ * dirty entries slist.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/30/05
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity checks */
+ HDassert(entry_ptr);
+ cache_ptr = entry_ptr->cache_ptr;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+
+ if(aux_ptr->mpi_rank == 0) {
+ H5AC_slist_entry_t *slist_entry_ptr;
+
+ HDassert(aux_ptr->d_slist_ptr != NULL);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
+
+ /* Entry to insert should not be in dirty list currently */
+ if(NULL != H5SL_search(aux_ptr->d_slist_ptr, (const void *)(&entry_ptr->addr)))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry already in dirty slist.")
+
+ /* insert the address of the entry in the dirty entry list, and
+ * add its size to the dirty_bytes count.
+ */
+ if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
+ slist_entry_ptr->addr = entry_ptr->addr;
+ if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
+
+ /* Entry to insert should not be in clean list either */
+ if(NULL != H5SL_search(aux_ptr->c_slist_ptr, (const void *)(&entry_ptr->addr)))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry in clean slist.")
+ } /* end if */
+
+ aux_ptr->dirty_bytes += entry_ptr->size;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->insert_dirty_bytes += size;
+ aux_ptr->insert_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__log_inserted_entry() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__log_moved_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a moved entry.
+ *
+ * WARNING
+ *
+ * At present, the way that the move call is used ensures
+ * that the moved entry is present in all caches by
+ * moving in a collective operation and immediately after
+ * unprotecting the target entry.
+ *
+ * This function uses this invariant, and will cause arcane
+ * failures if it is not met. If maintaining this invariant
+ * becomes impossible, we will have to rework this function
+ * extensively, and likely include a bit of IPC for
+ * synchronization. A better option might be to subsume
+ * move in the unprotect operation.
+ *
+ * Given that the target entry is in all caches, the function
+ * proceeds as follows:
+ *
+ * For processes with mpi rank other 0, it simply checks to
+ * see if the entry was dirty prior to the move, and adds
+ * the entries size to the dirty bytes count.
+ *
+ * In the process with mpi rank 0, the function first checks
+ * to see if the entry was dirty prior to the move. If it
+ * was, and if the entry doesn't appear in the dirtied list
+ * under its old address, it adds the entry's size to the
+ * dirty bytes count.
+ *
+ * The rank 0 process then removes any references to the
+ * entry under its old address from the cleands and dirtied
+ * lists, and inserts an entry in the dirtied list under the
+ * new address.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/30/05
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr, haddr_t new_addr)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ hbool_t entry_in_cache;
+ hbool_t entry_dirty;
+ size_t entry_size;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ cache_ptr = (H5AC_t *)f->shared->cache;
+ HDassert(cache_ptr);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+
+ /* get entry status, size, etc here */
+ if(H5C_get_entry_status(f, old_addr, &entry_size, &entry_in_cache,
+ &entry_dirty, NULL, NULL, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.")
+ if(!entry_in_cache)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.")
+
+ if(aux_ptr->mpi_rank == 0) {
+ H5AC_slist_entry_t * slist_entry_ptr;
+
+ HDassert(aux_ptr->d_slist_ptr != NULL);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
+
+ /* if the entry appears in the cleaned entry slist, under its old
+ * address, remove it.
+ */
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr))))
+ slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+
+ /* if the entry appears in the dirtied entry slist under its old
+ * address, remove it, but don't free it. Set addr to new_addr.
+ */
+ if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr))))
+ slist_entry_ptr->addr = new_addr;
+ else {
+ /* otherwise, allocate a new entry that is ready
+ * for insertion, and increment dirty_bytes.
+ *
+ * Note that the fact that the entry wasn't in the dirtied
+ * list under its old address implies that it must have
+ * been clean to start with.
+ */
+ HDassert(!entry_dirty);
+ if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .")
+ slist_entry_ptr->addr = new_addr;
+
+ aux_ptr->dirty_bytes += entry_size;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->move_dirty_bytes += entry_size;
+ aux_ptr->move_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ } /* end else */
+
+ /* insert / reinsert the entry in the dirty slist */
+ if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
+ } /* end if */
+ else if(!entry_dirty) {
+ aux_ptr->dirty_bytes += entry_size;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->move_dirty_bytes += entry_size;
+ aux_ptr->move_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ } /* end else-if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__log_moved_entry() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__propagate_and_apply_candidate_list
+ *
+ * Purpose: Prior to the addition of support for multiple metadata
+ * write strategies, in PHDF5, only the metadata cache with
+ * mpi rank 0 was allowed to write to file. All other
+ * metadata caches on processes with rank greater than 0
+ * were required to retain dirty entries until they were
+ * notified that the entry was clean.
+ *
+ * This constraint is relaxed with the distributed
+ * metadata write strategy, in which a list of candidate
+ * metadata cache entries is constructed by the process 0
+ * cache and then distributed to the caches of all the other
+ * processes. Once the listed is distributed, many (if not
+ * all) processes writing writing a unique subset of the
+ * entries, and marking the remainder clean. The subsets
+ * are chosen so that each entry in the list of candidates
+ * is written by exactly one cache, and all entries are
+ * marked as being clean in all caches.
+ *
+ * While the list of candidate cache entries is prepared
+ * elsewhere, this function is the main routine for distributing
+ * and applying the list. It must be run simultaniously on
+ * all processes that have the relevant file open. To ensure
+ * proper synchronization, there is a barrier at the beginning
+ * of this function.
+ *
+ * At present, this function is called under one of two
+ * circumstances:
+ *
+ * 1) Dirty byte creation exceeds some user specified value.
+ *
+ * While metadata reads may occur independently, all
+ * operations writing metadata must be collective. Thus
+ * all metadata caches see the same sequence of operations,
+ * and therefore the same dirty data creation.
+ *
+ * This fact is used to synchronize the caches for purposes
+ * of propagating the list of candidate entries, by simply
+ * calling this function from all caches whenever some user
+ * specified threshold on dirty data is exceeded. (the
+ * process 0 cache creates the candidate list just before
+ * calling this function).
+ *
+ * 2) Under direct user control -- this operation must be
+ * collective.
+ *
+ * The operations to be managed by this function are as
+ * follows:
+ *
+ * All processes:
+ *
+ * 1) Participate in an opening barrier.
+ *
+ * For the process with mpi rank 0:
+ *
+ * 1) Load the contents of the candidate list
+ * (candidate_slist_ptr) into a buffer, and broadcast that
+ * buffer to all the other caches. Clear the candidate
+ * list in passing.
+ *
+ * If there is a positive number of candidates, proceed with
+ * the following:
+ *
+ * 2) Apply the candidate entry list.
+ *
+ * 3) Particpate in a closing barrier.
+ *
+ * 4) Remove from the dirty list (d_slist_ptr) and from the
+ * flushed and still clean entries list (c_slist_ptr),
+ * all addresses that appeared in the candidate list, as
+ * these entries are now clean.
+ *
+ *
+ * For all processes with mpi rank greater than 0:
+ *
+ * 1) Receive the candidate entry list broadcast
+ *
+ * If there is a positive number of candidates, proceed with
+ * the following:
+ *
+ * 2) Apply the candidate entry list.
+ *
+ * 3) Particpate in a closing barrier.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ haddr_t * candidates_list_ptr = NULL;
+ int mpi_result;
+ int num_candidates = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+
+ /* to prevent "messages from the future" we must synchronize all
+ * processes before we write any entries.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
+
+ if(aux_ptr->mpi_rank == 0) {
+ if(H5AC__broadcast_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast candidate slist.")
+
+ HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0);
+ } /* end if */
+ else {
+ if(H5AC__receive_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive candidate broadcast.")
+ } /* end else */
+
+ if(num_candidates > 0) {
+ herr_t result;
+
+ /* all processes apply the candidate list.
+ * H5C_apply_candidate_list() handles the details of
+ * distributing the writes across the processes.
+ */
+
+ /* Enable writes during this operation */
+ aux_ptr->write_permitted = TRUE;
+
+ /* Apply the candidate list */
+ result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_candidates,
+ candidates_list_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size);
+
+ /* Disable writes again */
+ aux_ptr->write_permitted = FALSE;
+
+ /* Check for error on the write operation */
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
+
+ /* this code exists primarily for the test bed -- it allows us to
+ * enforce posix semantics on the server that pretends to be a
+ * file system in our parallel tests.
+ */
+ if(aux_ptr->write_done)
+ (aux_ptr->write_done)();
+
+ /* to prevent "messages from the past" we must synchronize all
+ * processes again before we go on.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
+
+ /* if this is process zero, tidy up the dirtied,
+ * and flushed and still clean lists.
+ */
+ if(aux_ptr->mpi_rank == 0)
+ if(H5AC__tidy_cache_0_lists(cache_ptr, num_candidates, candidates_list_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
+ } /* end if */
+
+ /* if it is defined, call the sync point done callback. Note
+ * that this callback is defined purely for testing purposes,
+ * and should be undefined under normal operating circumstances.
+ */
+ if(aux_ptr->sync_point_done)
+ (aux_ptr->sync_point_done)(num_candidates, candidates_list_ptr);
+
+done:
+ if(candidates_list_ptr)
+ candidates_list_ptr = (haddr_t *)H5MM_xfree((void *)candidates_list_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__propagate_and_apply_candidate_list() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__propagate_flushed_and_still_clean_entries_list
+ *
+ * Purpose: In PHDF5, if the process 0 only metadata write strategy
+ * is selected, only the metadata cache with mpi rank 0 is
+ * allowed to write to file. All other metadata caches on
+ * processes with rank greater than 0 must retain dirty
+ * entries until they are notified that the entry is now
+ * clean.
+ *
+ * This function is the main routine for handling this
+ * notification proceedure. It must be called
+ * simultaniously on all processes that have the relevant
+ * file open. To this end, it is called only during a
+ * sync point, with a barrier prior to the call.
+ *
+ * Note that any metadata entry writes by process 0 will
+ * occur after the barrier and just before this call.
+ *
+ * Typicaly, calls to this function will be triggered in
+ * one of two ways:
+ *
+ * 1) Dirty byte creation exceeds some user specified value.
+ *
+ * While metadata reads may occur independently, all
+ * operations writing metadata must be collective. Thus
+ * all metadata caches see the same sequence of operations,
+ * and therefore the same dirty data creation.
+ *
+ * This fact is used to synchronize the caches for purposes
+ * of propagating the list of flushed and still clean
+ * entries, by simply calling this function from all
+ * caches whenever some user specified threshold on dirty
+ * data is exceeded.
+ *
+ * 2) Under direct user control -- this operation must be
+ * collective.
+ *
+ * The operations to be managed by this function are as
+ * follows:
+ *
+ * For the process with mpi rank 0:
+ *
+ * 1) Load the contents of the flushed and still clean entries
+ * list (c_slist_ptr) into a buffer, and broadcast that
+ * buffer to all the other caches.
+ *
+ * 2) Clear the flushed and still clean entries list
+ * (c_slist_ptr).
+ *
+ *
+ * For all processes with mpi rank greater than 0:
+ *
+ * 1) Receive the flushed and still clean entries list broadcast
+ *
+ * 2) Mark the specified entries as clean.
+ *
+ *
+ * For all processes:
+ *
+ * 1) Reset the dirtied bytes count to 0.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * July 5, 2005
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, hid_t dxpl_id)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+
+ if(aux_ptr->mpi_rank == 0) {
+ if(H5AC__broadcast_clean_list(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.")
+ HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0);
+ } /* end if */
+ else {
+ if(H5AC__receive_and_apply_clean_list(f, dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.")
+ } /* end else */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__propagate_flushed_and_still_clean_entries_list() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_receive_haddr_list()
+ *
+ * Purpose: Receive the list of entry addresses from process 0,
+ * and return it in a buffer pointed to by *haddr_buf_ptr_ptr.
+ * Note that the caller must free this buffer if it is
+ * returned.
+ *
+ * This function must only be called by the process with
+ * MPI_rank greater than 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: Quincey Koziol, 6/11/2015
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr,
+ haddr_t **haddr_buf_ptr_ptr)
+{
+ haddr_t * haddr_buf_ptr = NULL;
+ int mpi_result;
+ int num_entries;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(num_entries_ptr != NULL);
+ HDassert(*num_entries_ptr == 0);
+ HDassert(haddr_buf_ptr_ptr != NULL);
+ HDassert(*haddr_buf_ptr_ptr == NULL);
+
+ /* First receive the number of entries in the list so that we
+ * can set up a buffer to receive them. If there aren't
+ * any, we are done.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
+
+ if(num_entries > 0) {
+ size_t buf_size;
+
+ /* allocate buffers to store the list of entry base addresses in */
+ buf_size = sizeof(haddr_t) * (size_t)num_entries;
+ if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer")
+
+ /* Now receive the list of candidate entries */
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result)
+ } /* end if */
+
+ /* finally, pass the number of entries and the buffer pointer
+ * back to the caller.
+ */
+ *num_entries_ptr = num_entries;
+ *haddr_buf_ptr_ptr = haddr_buf_ptr;
+
+done:
+ if(ret_value < 0)
+ if(haddr_buf_ptr)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_receive_haddr_list() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__receive_and_apply_clean_list()
+ *
+ * Purpose: Receive the list of cleaned entries from process 0,
+ * and mark the specified entries as clean.
+ *
+ * This function must only be called by the process with
+ * MPI_rank greater than 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/4/05
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ haddr_t * haddr_buf_ptr = NULL;
+ int num_entries = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity check */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->mpi_rank != 0);
+
+ /* Retrieve the clean list from process 0 */
+ if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, &num_entries, &haddr_buf_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list")
+
+ if(num_entries > 0)
+ /* mark the indicated entries as clean */
+ if(H5C_mark_entries_as_clean(f, dxpl_id, (int32_t)num_entries, haddr_buf_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.")
+
+ /* if it is defined, call the sync point done callback. Note
+ * that this callback is defined purely for testing purposes,
+ * and should be undefined under normal operating circumstances.
+ */
+ if(aux_ptr->sync_point_done)
+ (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
+
+done:
+ if(haddr_buf_ptr)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__receive_and_apply_clean_list() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC__receive_candidate_list()
+ *
+ * Purpose: Receive the list of candidate entries from process 0,
+ * and return it in a buffer pointed to by *haddr_buf_ptr_ptr.
+ * Note that the caller must free this buffer if it is
+ * returned.
+ *
+ * This function must only be called by the process with
+ * MPI_rank greater than 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__receive_candidate_list(const H5AC_t *cache_ptr, int *num_entries_ptr,
+ haddr_t **haddr_buf_ptr_ptr)
+{
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->mpi_rank != 0);
+ HDassert(aux_ptr-> metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+ HDassert(num_entries_ptr != NULL);
+ HDassert(*num_entries_ptr == 0);
+ HDassert(haddr_buf_ptr_ptr != NULL);
+ HDassert(*haddr_buf_ptr_ptr == NULL);
+
+ /* Retrieve the candidate list from process 0 */
+ if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, num_entries_ptr, haddr_buf_ptr_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__receive_candidate_list() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__rsp__dist_md_write__flush
+ *
+ * Purpose: Routine for handling the details of running a sync point
+ * that is triggered by a flush -- which in turn must have been
+ * triggered by either a flush API call or a file close --
+ * when the distributed metadata write strategy is selected.
+ *
+ * Upon entry, each process generates it own candidate list,
+ * being a sorted list of all dirty metadata entries currently
+ * in the metadata cache. Note that this list must be idendical
+ * across all processes, as all processes see the same stream
+ * of dirty metadata coming in, and use the same lists of
+ * candidate entries at each sync point. (At first glance, this
+ * argument sounds circular, but think of it in the sense of
+ * a recursive proof).
+ *
+ * If this this list is empty, we are done, and the function
+ * returns
+ *
+ * Otherwise, after the sorted list dirty metadata entries is
+ * constructed, each process uses the same algorithm to assign
+ * each entry on the candidate list to exactly one process for
+ * flushing.
+ *
+ * At this point, all processes participate in a barrier to
+ * avoid messages from the past/future bugs.
+ *
+ * Each process then flushes the entries assigned to it, and
+ * marks all other entries on the candidate list as clean.
+ *
+ * Finally, all processes participate in a second barrier to
+ * avoid messages from the past/future bugs.
+ *
+ * At the end of this process, process 0 and only process 0
+ * must tidy up its lists of dirtied and cleaned entries.
+ * These lists are not used in the distributed metadata write
+ * strategy, but they must be maintained should we shift
+ * to a strategy that uses them.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ haddr_t * haddr_buf_ptr = NULL;
+ int mpi_result;
+ int num_entries = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+
+ /* first construct the candidate list -- initially, this will be in the
+ * form of a skip list. We will convert it later.
+ */
+ if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
+
+ if(H5SL_count(aux_ptr->candidate_slist_ptr) > 0) {
+ herr_t result;
+
+ /* convert the candidate list into the format we
+ * are used to receiving from process 0.
+ */
+ if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
+
+ /* initial sync point barrier */
+ if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
+
+ /* Enable writes during this operation */
+ aux_ptr->write_permitted = TRUE;
+
+ /* Apply the candidate list */
+ result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_entries,
+ haddr_buf_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size);
+
+ /* Disable writes again */
+ aux_ptr->write_permitted = FALSE;
+
+ /* Check for error on the write operation */
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
+
+ /* this code exists primarily for the test bed -- it allows us to
+ * enforce posix semantics on the server that pretends to be a
+ * file system in our parallel tests.
+ */
+ if(aux_ptr->write_done)
+ (aux_ptr->write_done)();
+
+ /* final sync point barrier */
+ if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
+
+ /* if this is process zero, tidy up the dirtied,
+ * and flushed and still clean lists.
+ */
+ if(aux_ptr->mpi_rank == 0)
+ if(H5AC__tidy_cache_0_lists(cache_ptr, num_entries, haddr_buf_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
+ } /* end if */
+
+ /* if it is defined, call the sync point done callback. Note
+ * that this callback is defined purely for testing purposes,
+ * and should be undefined under normal operating circumstances.
+ */
+ if(aux_ptr->sync_point_done)
+ (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
+
+done:
+ if(haddr_buf_ptr)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__rsp__dist_md_write__flush() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__rsp__dist_md_write__flush_to_min_clean
+ *
+ * Purpose: Routine for handling the details of running a sync point
+ * triggered by the accumulation of dirty metadata (as
+ * opposed to a flush call to the API) when the distributed
+ * metadata write strategy is selected.
+ *
+ * After invocation and initial sanity checking this function
+ * first checks to see if evictions are enabled -- if they
+ * are not, the function does nothing and returns.
+ *
+ * Otherwise, process zero constructs a list of entries to
+ * be flushed in order to bring the process zero cache back
+ * within its min clean requirement. Note that this list
+ * (the candidate list) may be empty.
+ *
+ * Then, all processes participate in a barrier.
+ *
+ * After the barrier, process 0 broadcasts the number of
+ * entries in the candidate list prepared above, and all
+ * other processes receive this number.
+ *
+ * If this number is zero, we are done, and the function
+ * returns without further action.
+ *
+ * Otherwise, process 0 broadcasts the sorted list of
+ * candidate entries, and all other processes receive it.
+ *
+ * Then, each process uses the same algorithm to assign
+ * each entry on the candidate list to exactly one process
+ * for flushing.
+ *
+ * Each process then flushes the entries assigned to it, and
+ * marks all other entries on the candidate list as clean.
+ *
+ * Finally, all processes participate in a second barrier to
+ * avoid messages from the past/future bugs.
+ *
+ * At the end of this process, process 0 and only process 0
+ * must tidy up its lists of dirtied and cleaned entries.
+ * These lists are not used in the distributed metadata write
+ * strategy, but they must be maintained should we shift
+ * to a strategy that uses them.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ hbool_t evictions_enabled;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+
+ /* Query if evictions are allowed */
+ if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
+
+ if(evictions_enabled) {
+ /* construct candidate list -- process 0 only */
+ if(aux_ptr->mpi_rank == 0)
+ if(H5AC__construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
+
+ /* propagate and apply candidate list -- all processes */
+ if(H5AC__propagate_and_apply_candidate_list(f, dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.")
+ } /* evictions enabled */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__rsp__dist_md_write__flush_to_min_clean() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__rsp__p0_only__flush
+ *
+ * Purpose: Routine for handling the details of running a sync point
+ * that is triggered a flush -- which in turn must have been
+ * triggered by either a flush API call or a file close --
+ * when the process 0 only metadata write strategy is selected.
+ *
+ * First, all processes participate in a barrier.
+ *
+ * Then process zero flushes all dirty entries, and broadcasts
+ * they number of clean entries (if any) to all the other
+ * caches.
+ *
+ * If this number is zero, we are done.
+ *
+ * Otherwise, process 0 broadcasts the list of cleaned
+ * entries, and all other processes which are part of this
+ * file group receive it, and mark the listed entries as
+ * clean in their caches.
+ *
+ * Since all processes have the same set of dirty
+ * entries at the beginning of the sync point, and all
+ * entries that will be written are written before
+ * process zero broadcasts the number of cleaned entries,
+ * there is no need for a closing barrier.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ int mpi_result;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+
+ /* to prevent "messages from the future" we must
+ * synchronize all processes before we start the flush.
+ * Hence the following barrier.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
+
+ /* Flush data to disk, from rank 0 process */
+ if(aux_ptr->mpi_rank == 0) {
+ herr_t result;
+
+ /* Enable writes during this operation */
+ aux_ptr->write_permitted = TRUE;
+
+ /* Flush the cache */
+ result = H5C_flush_cache(f, dxpl_id, H5AC__NO_FLAGS_SET);
+
+ /* Disable writes again */
+ aux_ptr->write_permitted = FALSE;
+
+ /* Check for error on the write operation */
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
+
+ /* this code exists primarily for the test bed -- it allows us to
+ * enforce posix semantics on the server that pretends to be a
+ * file system in our parallel tests.
+ */
+ if(aux_ptr->write_done)
+ (aux_ptr->write_done)();
+ } /* end if */
+
+ /* Propagate cleaned entries to other ranks. */
+ if(H5AC__propagate_flushed_and_still_clean_entries_list(f, H5AC_dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__rsp__p0_only__flush() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__rsp__p0_only__flush_to_min_clean
+ *
+ * Purpose: Routine for handling the details of running a sync point
+ * triggered by the accumulation of dirty metadata (as
+ * opposed to a flush call to the API) when the process 0
+ * only metadata write strategy is selected.
+ *
+ * After invocation and initial sanity checking this function
+ * first checks to see if evictions are enabled -- if they
+ * are not, the function does nothing and returns.
+ *
+ * Otherwise, all processes participate in a barrier.
+ *
+ * After the barrier, if this is process 0, the function
+ * causes the cache to flush sufficient entries to get the
+ * cache back within its minimum clean fraction, and broadcast
+ * the number of entries which have been flushed since
+ * the last sync point, and are still clean.
+ *
+ * If this number is zero, we are done.
+ *
+ * Otherwise, process 0 broadcasts the list of cleaned
+ * entries, and all other processes which are part of this
+ * file group receive it, and mark the listed entries as
+ * clean in their caches.
+ *
+ * Since all processes have the same set of dirty
+ * entries at the beginning of the sync point, and all
+ * entries that will be written are written before
+ * process zero broadcasts the number of cleaned entries,
+ * there is no need for a closing barrier.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ hbool_t evictions_enabled;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+
+ /* Query if evictions are allowed */
+ if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
+
+ /* Flush if evictions are allowed -- following call
+ * will cause process 0 to flush to min clean size,
+ * and then propagate the newly clean entries to the
+ * other processes.
+ *
+ * Otherwise, do nothing.
+ */
+ if(evictions_enabled) {
+ int mpi_result;
+
+ /* to prevent "messages from the future" we must synchronize all
+ * processes before we start the flush. This synchronization may
+ * already be done -- hence the do_barrier parameter.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result)
+
+ if(0 == aux_ptr->mpi_rank) {
+ herr_t result;
+
+ /* here, process 0 flushes as many entries as necessary to
+ * comply with the currently specified min clean size.
+ * Note that it is quite possible that no entries will be
+ * flushed.
+ */
+
+ /* Enable writes during this operation */
+ aux_ptr->write_permitted = TRUE;
+
+ /* Flush the cache */
+ result = H5C_flush_to_min_clean(f, dxpl_id);
+
+ /* Disable writes again */
+ aux_ptr->write_permitted = FALSE;
+
+ /* Check for error on the write operation */
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.")
+
+ /* this call exists primarily for the test code -- it is used
+ * to enforce POSIX semantics on the process used to simulate
+ * reads and writes in t_cache.c.
+ */
+ if(aux_ptr->write_done)
+ (aux_ptr->write_done)();
+ } /* end if */
+
+ if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__rsp__p0_only__flush_to_min_clean() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__run_sync_point
+ *
+ * Purpose: Top level routine for managing a sync point between all
+ * meta data caches in the parallel case. Since all caches
+ * see the same sequence of dirty metadata, we simply count
+ * bytes of dirty metadata, and run a sync point whenever the
+ * number of dirty bytes of metadata seen since the last
+ * sync point exceeds a threshold that is common across all
+ * processes. We also run sync points in response to
+ * HDF5 API calls triggering either a flush or a file close.
+ *
+ * In earlier versions of PHDF5, only the metadata cache with
+ * mpi rank 0 was allowed to write to file. All other
+ * metadata caches on processes with rank greater than 0 were
+ * required to retain dirty entries until they were notified
+ * that the entry is was clean.
+ *
+ * This function was created to make it easier for us to
+ * experiment with other options, as it is a single point
+ * for the execution of sync points.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * March 11, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity checks */
+ HDassert(f != NULL);
+ cache_ptr = f->shared->cache;
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) ||
+ (sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED));
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu/%u\n",
+ aux_ptr->mpi_rank,
+ aux_ptr->dirty_bytes_propagations,
+ aux_ptr->unprotect_dirty_bytes,
+ aux_ptr->unprotect_dirty_bytes_updates,
+ aux_ptr->insert_dirty_bytes,
+ aux_ptr->insert_dirty_bytes_updates,
+ aux_ptr->rename_dirty_bytes,
+ aux_ptr->rename_dirty_bytes_updates);
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ switch(aux_ptr->metadata_write_strategy) {
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ switch(sync_point_op) {
+ case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
+ if(H5AC__rsp__p0_only__flush_to_min_clean(f, dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush_to_min_clean() failed.")
+ break;
+
+ case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
+ if(H5AC__rsp__p0_only__flush(f, dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush() failed.")
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
+ break;
+ } /* end switch */
+ break;
+
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ switch(sync_point_op) {
+ case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
+ if(H5AC__rsp__dist_md_write__flush_to_min_clean(f, dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush_to_min_clean() failed.")
+ break;
+
+ case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
+ if(H5AC__rsp__dist_md_write__flush(f, dxpl_id) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush() failed.")
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
+ break;
+ } /* end switch */
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Unknown metadata write strategy.")
+ break;
+ } /* end switch */
+
+ /* reset the dirty bytes count */
+ aux_ptr->dirty_bytes = 0;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->dirty_bytes_propagations += 1;
+ aux_ptr->unprotect_dirty_bytes = 0;
+ aux_ptr->unprotect_dirty_bytes_updates = 0;
+ aux_ptr->insert_dirty_bytes = 0;
+ aux_ptr->insert_dirty_bytes_updates = 0;
+ aux_ptr->rename_dirty_bytes = 0;
+ aux_ptr->rename_dirty_bytes_updates = 0;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__run_sync_point() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__tidy_cache_0_lists()
+ *
+ * Purpose: In the distributed metadata write strategy, not all dirty
+ * entries are written by process 0 -- thus we must tidy
+ * up the dirtied, and flushed and still clean lists
+ * maintained by process zero after each sync point.
+ *
+ * This procedure exists to tend to this issue.
+ *
+ * At this point, all entries that process 0 cleared should
+ * have been removed from both the dirty and flushed and
+ * still clean lists, and entries that process 0 has flushed
+ * should have been removed from the dirtied list and added
+ * to the flushed and still clean list.
+ *
+ * However, since the distributed metadata write strategy
+ * doesn't make use of these lists, the objective is simply
+ * to maintain these lists in consistent state that allows
+ * them to be used should the metadata write strategy change
+ * to one that uses these lists.
+ *
+ * Thus for our purposes, all we need to do is remove from
+ * the dirtied and flushed and still clean lists all
+ * references to entries that appear in the candidate list.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * 4/20/10
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates,
+ haddr_t *candidates_list_ptr)
+{
+ H5AC_aux_t * aux_ptr;
+ int i;
+
+ FUNC_ENTER_STATIC_NOERR
+
+ /* Sanity checks */
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
+ HDassert(aux_ptr->mpi_rank == 0);
+ HDassert(num_candidates > 0);
+ HDassert(candidates_list_ptr != NULL);
+
+ /* clean up dirtied and flushed and still clean lists by removing
+ * all entries on the candidate list. Cleared entries should
+ * have been removed from both the dirty and cleaned lists at
+ * this point, flushed entries should have been added to the
+ * cleaned list. However, for this metadata write strategy,
+ * we just want to remove all references to the candidate entries.
+ */
+ for(i = 0; i < num_candidates; i++) {
+ H5AC_slist_entry_t * d_slist_entry_ptr;
+ H5AC_slist_entry_t * c_slist_entry_ptr;
+ haddr_t addr;
+
+ addr = candidates_list_ptr[i];
+
+ /* addr may be either on the dirtied list, or on the flushed
+ * and still clean list. Remove it.
+ */
+ if(NULL != (d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)&addr)))
+ d_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, d_slist_entry_ptr);
+ if(NULL != (c_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)&addr)))
+ c_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, c_slist_entry_ptr);
+ } /* end for */
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5AC__tidy_cache_0_lists() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC__flush_entries
+ *
+ * Purpose: Flush the metadata cache associated with the specified file,
+ * only writing from rank 0, but propagating the cleaned entries
+ * to all ranks.
+ *
+ * Return: Non-negative on success/Negative on failure if there was a
+ * request to flush all items and something was protected.
+ *
+ * Programmer: Quincey Koziol
+ * koziol@hdfgroup.org
+ * Aug 22 2009
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5AC__flush_entries(H5F_t *f, hid_t dxpl_id)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_PACKAGE
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared->cache);
+
+ /* Check if we have >1 ranks */
+ if(f->shared->cache->aux_ptr)
+ if(H5AC__run_sync_point(f, dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC__flush_entries() */
+#endif /* H5_HAVE_PARALLEL */
+
diff --git a/src/H5ACpkg.h b/src/H5ACpkg.h
index 28965d2..74bf079 100644
--- a/src/H5ACpkg.h
+++ b/src/H5ACpkg.h
@@ -41,8 +41,20 @@
/* Get needed headers */
#include "H5Cprivate.h" /* Cache */
+#include "H5FLprivate.h" /* Free Lists */
#include "H5SLprivate.h" /* Skip lists */
+/*****************************/
+/* Package Private Variables */
+/*****************************/
+
+/* Declare extern the free list to manage the H5AC_aux_t struct */
+H5FL_EXTERN(H5AC_aux_t);
+
+
+/**************************/
+/* Package Private Macros */
+/**************************/
#define H5AC_DEBUG_DIRTY_BYTES_CREATION 0
@@ -393,9 +405,18 @@ typedef struct H5AC_aux_t
} H5AC_aux_t; /* struct H5AC_aux_t */
/* Package scoped functions */
-H5_DLL herr_t H5AC_set_sync_point_done_callback(H5C_t *cache_ptr,
+H5_DLL herr_t H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr);
+H5_DLL herr_t H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr);
+H5_DLL herr_t H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr,
+ hbool_t was_dirty, unsigned flags);
+H5_DLL herr_t H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr);
+H5_DLL herr_t H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr,
+ haddr_t new_addr);
+H5_DLL herr_t H5AC__flush_entries(H5F_t *f, hid_t dxpl_id);
+H5_DLL herr_t H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op);
+H5_DLL herr_t H5AC__set_sync_point_done_callback(H5C_t *cache_ptr,
void (*sync_point_done)(int num_writes, haddr_t *written_entries_tbl));
-H5_DLL herr_t H5AC_set_write_done_callback(H5C_t * cache_ptr,
+H5_DLL herr_t H5AC__set_write_done_callback(H5C_t * cache_ptr,
void (* write_done)(void));
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5C.c b/src/H5C.c
index fc4e8a5..5c96593 100644
--- a/src/H5C.c
+++ b/src/H5C.c
@@ -41,7 +41,7 @@
*
* Code Changes:
*
- * - Remove extra functionality in H5C_flush_single_entry()?
+ * - Remove extra functionality in H5C__flush_single_entry()?
*
* - Change protect/unprotect to lock/unlock.
*
@@ -70,16 +70,22 @@
*
**************************************************************************/
+/****************/
+/* Module Setup */
+/****************/
+
#define H5C_PACKAGE /*suppress error about including H5Cpkg */
#define H5F_PACKAGE /*suppress error about including H5Fpkg */
+/***********/
+/* Headers */
+/***********/
#include "H5private.h" /* Generic Functions */
#ifdef H5_HAVE_PARALLEL
#include "H5ACprivate.h" /* Metadata cache */
#endif /* H5_HAVE_PARALLEL */
#include "H5Cpkg.h" /* Cache */
-#include "H5Dprivate.h" /* Dataset functions */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fpkg.h" /* Files */
#include "H5FDprivate.h" /* File drivers */
@@ -91,9 +97,9 @@
#include "H5SLprivate.h" /* Skip lists */
-/*
- * Private macros.
- */
+/****************/
+/* Local Macros */
+/****************/
#if H5C_DO_MEMORY_SANITY_CHECKS
#define H5C_IMAGE_EXTRA_SPACE 8
#define H5C_IMAGE_SANITY_VALUE "DeadBeef"
@@ -101,17 +107,14 @@
#define H5C_IMAGE_EXTRA_SPACE 0
#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
-/*
- * Private file-scope variables.
- */
-
-/* Declare a free list to manage the H5C_t struct */
-H5FL_DEFINE_STATIC(H5C_t);
+/******************/
+/* Local Typedefs */
+/******************/
-/*
- * Private file-scope function declarations:
- */
+/********************/
+/* Local Prototypes */
+/********************/
static herr_t H5C__auto_adjust_cache_size(H5F_t * f,
hid_t dxpl_id,
@@ -140,13 +143,6 @@ static herr_t H5C__flash_increase_cache_size(H5C_t * cache_ptr,
size_t old_entry_size,
size_t new_entry_size);
-static herr_t H5C_flush_single_entry(const H5F_t * f,
- hid_t dxpl_id,
- haddr_t addr,
- unsigned flags,
- hbool_t del_entry_from_slist_on_destroy,
- int64_t *entry_size_change_ptr);
-
static herr_t H5C_flush_invalidate_cache(const H5F_t * f,
hid_t dxpl_id,
unsigned flags);
@@ -198,6 +194,24 @@ herr_t H5C_dump_cache(H5C_t * cache_ptr, const char * cache_name);
herr_t H5C_dump_cache_skip_list(H5C_t * cache_ptr, char * calling_fcn);
#endif /* debugging routines */
+/*********************/
+/* Package Variables */
+/*********************/
+
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+/* Declare a free list to manage the H5C_t struct */
+H5FL_DEFINE_STATIC(H5C_t);
+
+
/****************************************************************************
*
@@ -386,882 +400,6 @@ H5C__epoch_marker_fsf_size(const void H5_ATTR_UNUSED * thing, size_t H5_ATTR_UNU
/*-------------------------------------------------------------------------
- * Function: H5C_apply_candidate_list
- *
- * Purpose: Apply the supplied candidate list.
- *
- * We used to do this by simply having each process write
- * every mpi_size-th entry in the candidate list, starting
- * at index mpi_rank, and mark all the others clean.
- *
- * However, this can cause unnecessary contention in a file
- * system by increasing the number of processes writing to
- * adjacent locations in the HDF5 file.
- *
- * To attempt to minimize this, we now arange matters such
- * that each process writes n adjacent entries in the
- * candidate list, and marks all others clean. We must do
- * this in such a fashion as to guarantee that each entry
- * on the candidate list is written by exactly one process,
- * and marked clean by all others.
- *
- * To do this, first construct a table mapping mpi_rank
- * to the index of the first entry in the candidate list to
- * be written by the process of that mpi_rank, and then use
- * the table to control which entries are written and which
- * are marked as clean as a function of the mpi_rank.
- *
- * Note that the table must be identical on all processes, as
- * all see the same candidate list, mpi_size, and mpi_rank --
- * the inputs used to construct the table.
- *
- * We construct the table as follows. Let:
- *
- * n = num_candidates / mpi_size;
- *
- * m = num_candidates % mpi_size;
- *
- * Now allocate an array of integers of length mpi_size + 1,
- * and call this array candidate_assignment_table.
- *
- * Conceptually, if the number of candidates is a multiple
- * of the mpi_size, we simply pass through the candidate list
- * and assign n entries to each process to flush, with the
- * index of the first entry to flush in the location in
- * the candidate_assignment_table indicated by the mpi_rank
- * of the process.
- *
- * In the more common case in which the candidate list isn't
- * isn't a multiple of the mpi_size, we pretend it is, and
- * give num_candidates % mpi_size processes one extra entry
- * each to make things work out.
- *
- * Once the table is constructed, we determine the first and
- * last entry this process is to flush as follows:
- *
- * first_entry_to_flush = candidate_assignment_table[mpi_rank]
- *
- * last_entry_to_flush =
- * candidate_assignment_table[mpi_rank + 1] - 1;
- *
- * With these values determined, we simply scan through the
- * candidate list, marking all entries in the range
- * [first_entry_to_flush, last_entry_to_flush] for flush,
- * and all others to be cleaned.
- *
- * Finally, we scan the LRU from tail to head, flushing
- * or marking clean the candidate entries as indicated.
- * If necessary, we scan the pinned list as well.
- *
- * Note that this function will fail if any protected or
- * clean entries appear on the candidate list.
- *
- * This function is used in managing sync points, and
- * shouldn't be used elsewhere.
- *
- * Return: Success: SUCCEED
- *
- * Failure: FAIL
- *
- * Programmer: John Mainzer
- * 3/17/10
- *
- * Changes: Ported code to detect next entry status changes as the
- * the result of a flush from the serial code in the scan of
- * the LRU. Also added code to detect and adapt to the
- * removal from the cache of the next entry in the scan of
- * the LRU.
- *
- * Note that at present, all of these changes should not
- * be required as the operations on entries as they are
- * flushed that can cause these condiditions are not premitted
- * in the parallel case. However, Quincey indicates that
- * this may change, and thus has requested the modification.
- *
- * Note the assert(FALSE) in the if statement whose body
- * restarts the scan of the LRU. As the body of the if
- * statement should be unreachable, it should never be
- * triggered until the constraints on the parallel case
- * are relaxed. Please remove the assertion at that time.
- *
- * Also added warning on the Pinned Entry List scan, as it
- * is potentially subject to the same issue. As there is
- * no cognate of this scan in the serial code, I don't have
- * a fix to port to it.
- *
- * JRM -- 4/10/19
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0
-herr_t
-H5C_apply_candidate_list(H5F_t * f,
- hid_t dxpl_id,
- H5C_t * cache_ptr,
- int num_candidates,
- haddr_t * candidates_list_ptr,
- int mpi_rank,
- int mpi_size)
-{
- hbool_t restart_scan;
- hbool_t prev_is_dirty;
- int i;
- int m;
- int n;
- int first_entry_to_flush;
- int last_entry_to_flush;
- int entries_to_clear = 0;
- int entries_to_flush = 0;
- int entries_to_flush_or_clear_last = 0;
- int entries_to_flush_collectively = 0;
- int entries_cleared = 0;
- int entries_flushed = 0;
- int entries_delayed = 0;
- int entries_flushed_or_cleared_last = 0;
- int entries_flushed_collectively = 0;
- int entries_examined = 0;
- int initial_list_len;
- int * candidate_assignment_table = NULL;
- haddr_t addr;
- H5C_cache_entry_t * clear_ptr = NULL;
- H5C_cache_entry_t * next_ptr = NULL;
- H5C_cache_entry_t * entry_ptr = NULL;
- H5C_cache_entry_t * flush_ptr = NULL;
- H5C_cache_entry_t * delayed_ptr = NULL;
-#if H5C_DO_SANITY_CHECKS
- haddr_t last_addr;
-#endif /* H5C_DO_SANITY_CHECKS */
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- char tbl_buf[1024];
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_NOAPI(FAIL)
-
- HDassert( cache_ptr != NULL );
- HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- HDassert( num_candidates > 0 );
- HDassert( num_candidates <= cache_ptr->slist_len );
- HDassert( candidates_list_ptr != NULL );
- HDassert( 0 <= mpi_rank );
- HDassert( mpi_rank < mpi_size );
-
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n",
- FUNC, mpi_rank);
- for ( i = 0; i < 1024; i++ ) tbl_buf[i] = '\0';
- sprintf(&(tbl_buf[0]), "candidate list = ");
- for ( i = 0; i < num_candidates; i++ )
- {
- sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " 0x%llx",
- (long long)(*(candidates_list_ptr + i)));
- }
- sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n");
- HDfprintf(stdout, "%s", tbl_buf);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- n = num_candidates / mpi_size;
- m = num_candidates % mpi_size;
- HDassert(n >= 0);
-
- if(NULL == (candidate_assignment_table = (int *)H5MM_malloc(sizeof(int) * (size_t)(mpi_size + 1))))
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for candidate assignment table")
-
- candidate_assignment_table[0] = 0;
- candidate_assignment_table[mpi_size] = num_candidates;
-
- if(m == 0) { /* mpi_size is an even divisor of num_candidates */
- HDassert(n > 0);
- for(i = 1; i < mpi_size; i++)
- candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n;
- } /* end if */
- else {
- for(i = 1; i <= m; i++)
- candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1;
-
- if(num_candidates < mpi_size) {
- for(i = m + 1; i < mpi_size; i++)
- candidate_assignment_table[i] = num_candidates;
- } /* end if */
- else {
- for(i = m + 1; i < mpi_size; i++)
- candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n;
- } /* end else */
- } /* end else */
- HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates);
-
-#if H5C_DO_SANITY_CHECKS
- /* verify that the candidate assignment table has the expected form */
- for ( i = 1; i < mpi_size - 1; i++ )
- {
- int a, b;
-
- a = candidate_assignment_table[i] - candidate_assignment_table[i - 1];
- b = candidate_assignment_table[i + 1] - candidate_assignment_table[i];
-
- HDassert( n + 1 >= a );
- HDassert( a >= b );
- HDassert( b >= n );
- }
-#endif /* H5C_DO_SANITY_CHECKS */
-
- first_entry_to_flush = candidate_assignment_table[mpi_rank];
- last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1;
-
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- for ( i = 0; i < 1024; i++ )
- tbl_buf[i] = '\0';
- sprintf(&(tbl_buf[0]), "candidate assignment table = ");
- for(i = 0; i <= mpi_size; i++)
- sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %d", candidate_assignment_table[i]);
- sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n");
- HDfprintf(stdout, "%s", tbl_buf);
-
- HDfprintf(stdout, "%s:%d: flush entries [%d, %d].\n",
- FUNC, mpi_rank, first_entry_to_flush, last_entry_to_flush);
-
- HDfprintf(stdout, "%s:%d: marking entries.\n", FUNC, mpi_rank);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- for(i = 0; i < num_candidates; i++) {
- addr = candidates_list_ptr[i];
- HDassert( H5F_addr_defined(addr) );
-
-#if H5C_DO_SANITY_CHECKS
- if ( i > 0 ) {
- if ( last_addr == addr ) {
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list.\n")
- } else if ( last_addr > addr ) {
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted.\n")
- }
- }
-
- last_addr = addr;
-#endif /* H5C_DO_SANITY_CHECKS */
-
- H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
- if(entry_ptr == NULL) {
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed candidate entry not in cache?!?!?.")
- } else if(!entry_ptr->is_dirty) {
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?.")
- } else if ( entry_ptr->is_protected ) {
- /* For now at least, we can't deal with protected entries.
- * If we encounter one, scream and die. If it becomes an
- * issue, we should be able to work around this.
- */
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?.")
- } else {
- /* determine whether the entry is to be cleared or flushed,
- * and mark it accordingly. We will scan the protected and
- * pinned list shortly, and clear or flush according to these
- * markings.
- */
- if((i >= first_entry_to_flush) && (i <= last_entry_to_flush)) {
- entries_to_flush++;
- entry_ptr->flush_immediately = TRUE;
- } /* end if */
- else {
- entries_to_clear++;
- entry_ptr->clear_on_unprotect = TRUE;
- } /* end else */
- } /* end else */
- } /* end for */
-
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %d/%d/%d.\n",
- FUNC, mpi_rank, (int)num_candidates, (int)entries_to_clear,
- (int)entries_to_flush);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
-
- /* We have now marked all the entries on the candidate list for
- * either flush or clear -- now scan the LRU and the pinned list
- * for these entries and do the deed.
- *
- * Note that we are doing things in this round about manner so as
- * to preserve the order of the LRU list to the best of our ability.
- * If we don't do this, my experiments indicate that we will have a
- * noticably poorer hit ratio as a result.
- */
-
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- HDfprintf(stdout, "%s:%d: scanning LRU list. len = %d.\n", FUNC, mpi_rank,
- (int)(cache_ptr->LRU_list_len));
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- /* ===================================================================== *
- * Now scan the LRU and PEL lists, flushing or clearing entries as
- * needed.
- *
- * The flush_me_last and flush_me_collectively flags may dictate how or
- * when some entries can be flushed, and should be addressed here.
- * However, in their initial implementation, these flags only apply to the
- * superblock, so there's only a relatively small change to this function
- * to account for this one case where they come into play. If these flags
- * are ever expanded upon, this function and the following flushing steps
- * should be reworked to account for additional cases.
- * ===================================================================== */
-
- HDassert(entries_to_flush >= 0);
-
- restart_scan = FALSE;
- entries_examined = 0;
- initial_list_len = cache_ptr->LRU_list_len;
- entry_ptr = cache_ptr->LRU_tail_ptr;
-
- /* Examine each entry in the LRU list */
- while ( ( entry_ptr != NULL )
- &&
- ( entries_examined <= (entries_to_flush + 1) * initial_list_len )
- &&
- ( (entries_cleared + entries_flushed) < num_candidates ) ) {
-
- if ( entry_ptr->prev != NULL )
- prev_is_dirty = entry_ptr->prev->is_dirty;
-
- /* If this process needs to clear this entry. */
- if(entry_ptr->clear_on_unprotect) {
-
- HDassert(entry_ptr->is_dirty);
-
- next_ptr = entry_ptr->next;
- entry_ptr->clear_on_unprotect = FALSE;
- clear_ptr = entry_ptr;
- entry_ptr = entry_ptr->prev;
- entries_cleared++;
-
-#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
- HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank,
- (long long)clear_ptr->addr);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- /* No need to check for the next entry in the scan being
- * removed from the cache, as this call to H5C_flush_single_entry()
- * will not call either the pre_serialize or serialize callbacks.
- */
-
- if(H5C_flush_single_entry(f,
- dxpl_id,
- clear_ptr->addr,
- H5C__FLUSH_CLEAR_ONLY_FLAG,
- TRUE,
- NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
- } /* end if */
-
- /* Else, if this process needs to flush this entry. */
- else if (entry_ptr->flush_immediately) {
-
- HDassert(entry_ptr->is_dirty);
-
- next_ptr = entry_ptr->next;
- entry_ptr->flush_immediately = FALSE;
- flush_ptr = entry_ptr;
- entry_ptr = entry_ptr->prev;
- entries_flushed++;
-
-#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
- HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank,
- (long long)flush_ptr->addr);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- /* reset entries_removed_counter and
- * last_entry_removed_ptr prior to the call to
- * H5C_flush_single_entry() so that we can spot
- * unexpected removals of entries from the cache,
- * and set the restart_scan flag if proceeding
- * would be likely to cause us to scan an entry
- * that is no longer in the cache.
- *
- * Note that as of this writing (April 2015) this
- * case cannot occur in the parallel case. However
- * Quincey is making noises about changing this, hence
- * the insertion of this test.
- *
- * Note also that there is no test code to verify
- * that this code actually works (although similar code
- * in the serial version exists and is tested).
- *
- * Implementing a test will likely require implementing
- * flush op like facilities in the parallel tests. At
- * a guess this will not be terribly painful, but it
- * will take a bit of time.
- */
- cache_ptr->entries_removed_counter = 0;
- cache_ptr->last_entry_removed_ptr = NULL;
-
- if(H5C_flush_single_entry(f,
- dxpl_id,
- flush_ptr->addr,
- H5C__NO_FLAGS_SET,
- TRUE,
- NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.")
-
- if ( ( cache_ptr->entries_removed_counter > 1 ) ||
- ( cache_ptr->last_entry_removed_ptr == entry_ptr ) )
-
- restart_scan = TRUE;
-
- } /* end else-if */
-
- /* Otherwise, no action to be taken on this entry. Grab the next. */
- else {
- entry_ptr = entry_ptr->prev;
-
- if ( entry_ptr != NULL )
- next_ptr = entry_ptr->next;
-
- } /* end else */
-
- if ( ( entry_ptr != NULL )
- &&
- ( ( restart_scan )
- ||
- ( entry_ptr->is_dirty != prev_is_dirty )
- ||
- ( entry_ptr->next != next_ptr )
- ||
- ( entry_ptr->is_protected )
- ||
- ( entry_ptr->is_pinned )
- )
- ) {
-
- /* something has happened to the LRU -- start over
- * from the tail.
- *
- * Recall that this code should be un-reachable at present,
- * as all the operations by entries on flush that could cause
- * it to be reachable are disallowed in the parallel case at
- * present. Hence the following assertion which should be
- * removed if the above changes.
- */
-
- HDassert( ! restart_scan );
- HDassert( entry_ptr->is_dirty == prev_is_dirty );
- HDassert( entry_ptr->next == next_ptr );
- HDassert( ! entry_ptr->is_protected );
- HDassert( ! entry_ptr->is_pinned );
-
- HDassert(FALSE); /* see comment above */
-
- restart_scan = FALSE;
- entry_ptr = cache_ptr->LRU_tail_ptr;
-/*
- H5C__UPDATE_STATS_FOR_LRU_SCAN_RESTART(cache_ptr)
-*/
- }
-
- entries_examined++;
- } /* end while */
-
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- HDfprintf(stdout, "%s:%d: entries examined/cleared/flushed = %d/%d/%d.\n",
- FUNC, mpi_rank, entries_examined,
- entries_cleared, entries_flushed);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- /* It is also possible that some of the cleared entries are on the
- * pinned list. Must scan that also.
- *
- * WARNING:
- *
- * As we now allow unpinning, and removal of other entries as a side
- * effect of flushing an entry, it is possible that the next entry
- * in a PEL scan could either be no longer pinned, or no longer in
- * the cache by the time we get to it.
- *
- * At present, this is not possible in this case, as we disallow such
- * operations in the parallel version of the library. However, Quincey
- * has been making noises about relaxing this. If and when he does,
- * we have a potential problem here.
- *
- * The same issue exists in the serial cache, and there are tests
- * to detect this problem when it occurs, and adjust to it. As seen
- * above in the LRU scan, I have ported such tests to the parallel
- * code where a close cognate exists in the serial code.
- *
- * I haven't done so here, as there are no PEL scans where the problem
- * can occur in the serial code. Needless to say, this will have to
- * be repaired if the constraints on pre_serialize and serialize
- * callbacks are relaxed in the parallel version of the metadata cache.
- *
- * JRM -- 4/1/15
- */
-
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- HDfprintf(stdout, "%s:%d: scanning pinned entry list. len = %d\n",
- FUNC, mpi_rank, (int)(cache_ptr->pel_len));
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- entry_ptr = cache_ptr->pel_head_ptr;
- while((entry_ptr != NULL) &&
- ((entries_cleared + entries_flushed + entries_delayed)
- < num_candidates)) {
-
- /* If entry is marked for flush or for clear */
- if((entry_ptr->clear_on_unprotect||entry_ptr->flush_immediately)) {
-
- /* If this entry needs to be flushed last */
- if (entry_ptr->flush_me_last) {
-
- /* At this time, only the superblock supports being
- flushed last. Conveniently, it also happens to be the only
- entry that supports being flushed collectively, as well. Also
- conveniently, it's always pinned, so we only need to check
- for it while scanning the PEL here. Finally, it's never
- included in a candidate list that excludes other dirty
- entries in a cache, so we can handle this relatively simple
- case here.
-
- For now, this function asserts this and saves the entry
- to flush it after scanning the rest of the PEL list.
-
- If there are ever more entries that either need to be
- flushed last and/or flushed collectively, this whole routine
- will need to be reworked to handle all additional cases. As
- it is the simple case of a single pinned entry needing
- flushed last and collectively is just a minor addition to
- this routine, but signficantly buffing up the usage of
- flush_me_last or flush_me_collectively will require a more
- intense rework of this function and potentially the function
- of candidate lists as a whole. */
-
- HDassert(entry_ptr->flush_me_collectively);
- entries_to_flush_or_clear_last++;
- entries_to_flush_collectively++;
- HDassert(entries_to_flush_or_clear_last == 1);
- HDassert(entries_to_flush_collectively == 1);
-
- /* Delay the entry. It will be flushed later. */
- delayed_ptr = entry_ptr;
- entries_delayed++;
- HDassert(entries_delayed == 1);
-
- } /* end if */
-
- /* Else, this process needs to clear this entry. */
- else if (entry_ptr->clear_on_unprotect) {
- HDassert(!entry_ptr->flush_immediately);
- entry_ptr->clear_on_unprotect = FALSE;
- clear_ptr = entry_ptr;
- entry_ptr = entry_ptr->next;
- entries_cleared++;
-
-#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
- HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank,
- (long long)clear_ptr->addr);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- if(H5C_flush_single_entry(f,
- dxpl_id,
- clear_ptr->addr,
- H5C__FLUSH_CLEAR_ONLY_FLAG,
- TRUE,
- NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
- } /* end else-if */
-
- /* Else, if this process needs to independently flush this entry. */
- else if (entry_ptr->flush_immediately) {
- entry_ptr->flush_immediately = FALSE;
- flush_ptr = entry_ptr;
- entry_ptr = entry_ptr->next;
- entries_flushed++;
-
-#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
- HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank,
- (long long)flush_ptr->addr);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- if(H5C_flush_single_entry(f,
- dxpl_id,
- flush_ptr->addr,
- H5C__NO_FLAGS_SET,
- TRUE,
- NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
- } /* end else-if */
- } /* end if */
-
- /* Otherwise, this entry is not marked for flush or clear. Grab the next. */
- else {
- entry_ptr = entry_ptr->next;
- } /* end else */
-
- } /* end while */
-
-#if H5C_APPLY_CANDIDATE_LIST__DEBUG
- HDfprintf(stdout,
- "%s:%d: pel entries examined/cleared/flushed = %d/%d/%d.\n",
- FUNC, mpi_rank, entries_examined,
- entries_cleared, entries_flushed);
- HDfprintf(stdout, "%s:%d: done.\n", FUNC, mpi_rank);
-
- HDfsync(stdout);
-#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
-
- /* ====================================================================== *
- * Now, handle all delayed entries. *
- * *
- * This can *only* be the superblock at this time, so it's relatively *
- * easy to deal with. We're collectively flushing the entry saved from *
- * above. This will need to be handled differently if there are ever more *
- * than one entry needing this special treatment.) *
- * ====================================================================== */
-
- if (delayed_ptr) {
-
- if (delayed_ptr->clear_on_unprotect) {
- entry_ptr->clear_on_unprotect = FALSE;
- entries_cleared++;
- } else if (delayed_ptr->flush_immediately) {
- entry_ptr->flush_immediately = FALSE;
- entries_flushed++;
- } /* end if */
-
- if(H5C_flush_single_entry(f,
- dxpl_id,
- delayed_ptr->addr,
- H5C__NO_FLAGS_SET,
- TRUE,
- NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL,
- "Can't flush entry collectively.")
-
- entries_flushed_collectively++;
- entries_flushed_or_cleared_last++;
- } /* end if */
-
- /* ====================================================================== *
- * Finished flushing everything. *
- * ====================================================================== */
-
- HDassert((entries_flushed == entries_to_flush));
- HDassert((entries_cleared == entries_to_clear));
- HDassert((entries_flushed_or_cleared_last == entries_to_flush_or_clear_last));
- HDassert((entries_flushed_collectively == entries_to_flush_collectively));
-
- if((entries_flushed != entries_to_flush) ||
- (entries_cleared != entries_to_clear) ||
- (entries_flushed_or_cleared_last != entries_to_flush_or_clear_last) ||
- (entries_flushed_collectively != entries_to_flush_collectively))
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry count mismatch.")
-
-done:
- if(candidate_assignment_table != NULL)
- candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table);
-
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5C_apply_candidate_list() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5C_construct_candidate_list__clean_cache
- *
- * Purpose: Construct the list of entries that should be flushed to
- * clean all entries in the cache.
- *
- * This function is used in managing sync points, and
- * shouldn't be used elsewhere.
- *
- * Return: Success: SUCCEED
- *
- * Failure: FAIL
- *
- * Programmer: John Mainzer
- * 3/17/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-herr_t
-H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr)
-{
- size_t space_needed;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_NOAPI(FAIL)
-
- HDassert( cache_ptr != NULL );
- HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
-
- /* As a sanity check, set space needed to the size of the skip list.
- * This should be the sum total of the sizes of all the dirty entries
- * in the metadata cache.
- */
- space_needed = cache_ptr->slist_size;
-
- /* Recall that while we shouldn't have any protected entries at this
- * point, it is possible that some dirty entries may reside on the
- * pinned list at this point.
- */
- HDassert( cache_ptr->slist_size <=
- (cache_ptr->dLRU_list_size + cache_ptr->pel_size) );
- HDassert( cache_ptr->slist_len <=
- (cache_ptr->dLRU_list_len + cache_ptr->pel_len) );
-
- if(space_needed > 0) { /* we have work to do */
- H5C_cache_entry_t *entry_ptr;
- int nominated_entries_count = 0;
- size_t nominated_entries_size = 0;
- haddr_t nominated_addr;
-
- HDassert( cache_ptr->slist_len > 0 );
-
- /* Scan the dirty LRU list from tail forward and nominate sufficient
- * entries to free up the necessary space.
- */
- entry_ptr = cache_ptr->dLRU_tail_ptr;
- while((nominated_entries_size < space_needed) &&
- (nominated_entries_count < cache_ptr->slist_len) &&
- (entry_ptr != NULL)) {
- HDassert( ! (entry_ptr->is_protected) );
- HDassert( ! (entry_ptr->is_read_only) );
- HDassert( entry_ptr->ro_ref_count == 0 );
- HDassert( entry_ptr->is_dirty );
- HDassert( entry_ptr->in_slist );
-
- nominated_addr = entry_ptr->addr;
- if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(1).")
-
- nominated_entries_size += entry_ptr->size;
- nominated_entries_count++;
- entry_ptr = entry_ptr->aux_prev;
- } /* end while */
- HDassert( entry_ptr == NULL );
-
- /* it is possible that there are some dirty entries on the
- * protected entry list as well -- scan it too if necessary
- */
- entry_ptr = cache_ptr->pel_head_ptr;
- while((nominated_entries_size < space_needed) &&
- (nominated_entries_count < cache_ptr->slist_len) &&
- (entry_ptr != NULL)) {
- if(entry_ptr->is_dirty) {
- HDassert( ! (entry_ptr->is_protected) );
- HDassert( ! (entry_ptr->is_read_only) );
- HDassert( entry_ptr->ro_ref_count == 0 );
- HDassert( entry_ptr->is_dirty );
- HDassert( entry_ptr->in_slist );
-
- nominated_addr = entry_ptr->addr;
- if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(2).")
-
- nominated_entries_size += entry_ptr->size;
- nominated_entries_count++;
- } /* end if */
-
- entry_ptr = entry_ptr->next;
- } /* end while */
-
- HDassert( nominated_entries_count == cache_ptr->slist_len );
- HDassert( nominated_entries_size == space_needed );
- } /* end if */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5C_construct_candidate_list__clean_cache() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5C_construct_candidate_list__min_clean
- *
- * Purpose: Construct the list of entries that should be flushed to
- * get the cache back within its min clean constraints.
- *
- * This function is used in managing sync points, and
- * shouldn't be used elsewhere.
- *
- * Return: Success: SUCCEED
- *
- * Failure: FAIL
- *
- * Programmer: John Mainzer
- * 3/17/10
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-herr_t
-H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr)
-{
- size_t space_needed = 0;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_NOAPI(FAIL)
-
- HDassert( cache_ptr != NULL );
- HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
-
- /* compute the number of bytes (if any) that must be flushed to get the
- * cache back within its min clean constraints.
- */
- if(cache_ptr->max_cache_size > cache_ptr->index_size) {
- if(((cache_ptr->max_cache_size - cache_ptr->index_size) +
- cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size)
- space_needed = 0;
- else
- space_needed = cache_ptr->min_clean_size -
- ((cache_ptr->max_cache_size - cache_ptr->index_size) +
- cache_ptr->cLRU_list_size);
- } /* end if */
- else {
- if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size)
- space_needed = 0;
- else
- space_needed = cache_ptr->min_clean_size -
- cache_ptr->cLRU_list_size;
- } /* end else */
-
- if(space_needed > 0) { /* we have work to do */
- H5C_cache_entry_t *entry_ptr;
- int nominated_entries_count = 0;
- size_t nominated_entries_size = 0;
-
- HDassert( cache_ptr->slist_len > 0 );
-
- /* Scan the dirty LRU list from tail forward and nominate sufficient
- * entries to free up the necessary space.
- */
- entry_ptr = cache_ptr->dLRU_tail_ptr;
- while((nominated_entries_size < space_needed) &&
- (nominated_entries_count < cache_ptr->slist_len) &&
- (entry_ptr != NULL) &&
- (!entry_ptr->flush_me_last)) {
- haddr_t nominated_addr;
-
- HDassert( ! (entry_ptr->is_protected) );
- HDassert( ! (entry_ptr->is_read_only) );
- HDassert( entry_ptr->ro_ref_count == 0 );
- HDassert( entry_ptr->is_dirty );
- HDassert( entry_ptr->in_slist );
-
- nominated_addr = entry_ptr->addr;
- if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed.")
-
- nominated_entries_size += entry_ptr->size;
- nominated_entries_count++;
- entry_ptr = entry_ptr->aux_prev;
- } /* end while */
- HDassert( nominated_entries_count <= cache_ptr->slist_len );
- HDassert( nominated_entries_size >= space_needed );
- } /* end if */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5C_construct_candidate_list__min_clean() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
* Function: H5C_create
*
* Purpose: Allocate, initialize, and return the address of a new
@@ -1818,7 +956,7 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type,
if(entry_ptr->is_pinned)
HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "Target entry is pinned.")
- /* If we get this far, call H5C_flush_single_entry() with the
+ /* If we get this far, call H5C__flush_single_entry() with the
* H5C__FLUSH_INVALIDATE_FLAG and the H5C__FLUSH_CLEAR_ONLY_FLAG.
* This will clear the entry, and then delete it from the cache.
*/
@@ -1831,8 +969,8 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type,
entry_size = entry_ptr->size;
#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
- if(H5C_flush_single_entry(f, dxpl_id, entry_ptr->addr, flush_flags, TRUE, NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "H5C_flush_single_entry() failed.")
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, flush_flags, TRUE, NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "H5C__flush_single_entry() failed.")
#if H5C_DO_SANITY_CHECKS
if ( entry_was_dirty )
@@ -1982,7 +1120,7 @@ H5C_flush_cache(H5F_t *f, hid_t dxpl_id, unsigned flags)
/* set the cache_ptr->slist_change_in_pre_serialize and
* cache_ptr->slist_change_in_serialize to false.
*
- * These flags are set to TRUE by H5C_flush_single_entry if the
+ * These flags are set to TRUE by H5C__flush_single_entry if the
* slist is modified by a pre_serialize or serialize call respectively.
* H5C_flush_cache uses these flags to detect any modifications
* to the slist that might corrupt the scan of the slist -- and
@@ -2156,7 +1294,7 @@ H5C_flush_cache(H5F_t *f, hid_t dxpl_id, unsigned flags)
flushed_entries_size += (int64_t)entry_ptr->size;
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- status = H5C_flush_single_entry(f,
+ status = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
flags,
@@ -2216,7 +1354,7 @@ H5C_flush_cache(H5F_t *f, hid_t dxpl_id, unsigned flags)
flushed_entries_size += (int64_t)entry_ptr->size;
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- status = H5C_flush_single_entry(f,
+ status = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
flags,
@@ -3198,318 +2336,6 @@ done:
/*-------------------------------------------------------------------------
- *
- * Function: H5C_mark_entries_as_clean
- *
- * Purpose: When the H5C code is used to implement the metadata caches
- * in PHDF5, only the cache with MPI_rank 0 is allowed to
- * actually write entries to disk -- all other caches must
- * retain dirty entries until they are advised that the
- * entries are clean.
- *
- * This function exists to allow the H5C code to receive these
- * notifications.
- *
- * The function receives a list of entry base addresses
- * which must refer to dirty entries in the cache. If any
- * of the entries are either clean or don't exist, the
- * function flags an error.
- *
- * The function scans the list of entries and flushes all
- * those that are currently unprotected with the
- * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently
- * protected are flagged for clearing when they are
- * unprotected.
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: John Mainzer
- * 7/5/05
- *
- * Changes: Tidied up code, removeing some old commented out
- * code that had been left in pending success of the
- * new version.
- *
- * Note that unlike H5C_apply_candidate_list(),
- * H5C_mark_entries_as_clean() makes all its calls to
- * H6C_flush_single_entry() with the
- * H5C__FLUSH_CLEAR_ONLY_FLAG set. As a result,
- * the pre_serialize() and serialize calls are not made.
- *
- * This then implies that (assuming such actions were
- * permitted in the parallel case) no loads, dirties,
- * resizes, or removals of other entries can occur as
- * a side effect of the flush. Hence, there is no need
- * for the checks for entry removal / status change
- * that I ported to H5C_apply_candidate_list().
- *
- * However, if (in addition to allowing such operations
- * in the parallel case), we allow such operations outside
- * of the pre_serialize / serialize routines, this may
- * cease to be the case -- requiring a review of this
- * function.
- *
- *-------------------------------------------------------------------------
- */
-#ifdef H5_HAVE_PARALLEL
-herr_t
-H5C_mark_entries_as_clean(H5F_t * f,
- hid_t dxpl_id,
- int32_t ce_array_len,
- haddr_t * ce_array_ptr)
-{
- H5C_t * cache_ptr;
- int entries_cleared;
- int entries_examined;
- int i;
- int initial_list_len;
- haddr_t addr;
-#if H5C_DO_SANITY_CHECKS
- int pinned_entries_marked = 0;
- int protected_entries_marked = 0;
- int other_entries_marked = 0;
- haddr_t last_addr;
-#endif /* H5C_DO_SANITY_CHECKS */
- H5C_cache_entry_t * clear_ptr = NULL;
- H5C_cache_entry_t * entry_ptr = NULL;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_NOAPI(FAIL)
-
- HDassert( f );
- HDassert( f->shared );
- cache_ptr = f->shared->cache;
- HDassert( cache_ptr );
- HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
-
- HDassert( ce_array_len > 0 );
- HDassert( ce_array_ptr != NULL );
-
-#if H5C_DO_EXTREME_SANITY_CHECKS
- if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) ||
- ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) ||
- ( H5C_validate_lru_list(cache_ptr) < 0 ) ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "an extreme sanity check failed on entry.\n");
- }
-#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
-
- for ( i = 0; i < ce_array_len; i++ )
- {
- addr = ce_array_ptr[i];
-
-#if H5C_DO_SANITY_CHECKS
- if ( i == 0 ) {
-
- last_addr = addr;
-
- } else {
-
- if ( last_addr == addr ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Duplicate entry in cleaned list.\n");
-
- } else if ( last_addr > addr ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "cleaned list not sorted.\n");
- }
- }
-
-#if H5C_DO_EXTREME_SANITY_CHECKS
- if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) ||
- ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) ||
- ( H5C_validate_lru_list(cache_ptr) < 0 ) ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "an extreme sanity check failed in for loop.\n");
- }
-#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
-#endif /* H5C_DO_SANITY_CHECKS */
-
- HDassert( H5F_addr_defined(addr) );
-
- H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
-
- if ( entry_ptr == NULL ) {
-#if H5C_DO_SANITY_CHECKS
- HDfprintf(stdout,
- "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n",
- (int)i,
- (long)addr);
-#endif /* H5C_DO_SANITY_CHECKS */
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Listed entry not in cache?!?!?.")
-
- } else if ( ! entry_ptr->is_dirty ) {
-
-#if H5C_DO_SANITY_CHECKS
- HDfprintf(stdout,
- "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n",
- (long)addr);
-#endif /* H5C_DO_SANITY_CHECKS */
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Listed entry not dirty?!?!?.")
-
- } else {
-
- /* Mark the entry to be cleared on unprotect. We will
- * scan the LRU list shortly, and clear all those entries
- * not currently protected.
- */
- entry_ptr->clear_on_unprotect = TRUE;
-#if H5C_DO_SANITY_CHECKS
- if ( entry_ptr->is_protected ) {
-
- protected_entries_marked++;
-
- } else if ( entry_ptr->is_pinned ) {
-
- pinned_entries_marked++;
-
- } else {
-
- other_entries_marked++;
- }
-#endif /* H5C_DO_SANITY_CHECKS */
- }
- }
-
- /* Scan through the LRU list from back to front, and flush the
- * entries whose clear_on_unprotect flags are set. Observe that
- * any protected entries will not be on the LRU, and therefore
- * will not be flushed at this time.
- *
- * Note that unlike H5C_apply_candidate_list(),
- * H5C_mark_entries_as_clean() makes all its calls to
- * H6C_flush_single_entry() with the H5C__FLUSH_CLEAR_ONLY_FLAG
- * set. As a result, the pre_serialize() and serialize calls are
- * not made.
- *
- * This then implies that (assuming such actions were
- * permitted in the parallel case) no loads, dirties,
- * resizes, or removals of other entries can occur as
- * a side effect of the flush. Hence, there is no need
- * for the checks for entry removal / status change
- * that I ported to H5C_apply_candidate_list().
- *
- * However, if (in addition to allowing such operations
- * in the parallel case), we allow such operations outside
- * of the pre_serialize / serialize routines, this may
- * cease to be the case -- requiring a review of this
- * point.
- * JRM -- 4/7/15
- */
-
- entries_cleared = 0;
- entries_examined = 0;
- initial_list_len = cache_ptr->LRU_list_len;
- entry_ptr = cache_ptr->LRU_tail_ptr;
-
- while ( ( entry_ptr != NULL ) &&
- ( entries_examined <= initial_list_len ) &&
- ( entries_cleared < ce_array_len ) )
- {
- if ( entry_ptr->clear_on_unprotect ) {
-
- entry_ptr->clear_on_unprotect = FALSE;
- clear_ptr = entry_ptr;
- entry_ptr = entry_ptr->prev;
- entries_cleared++;
-
- if ( H5C_flush_single_entry(f,
- dxpl_id,
- clear_ptr->addr,
- H5C__FLUSH_CLEAR_ONLY_FLAG,
- TRUE,
- NULL) < 0 ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
- }
- } else {
-
- entry_ptr = entry_ptr->prev;
- }
- entries_examined++;
- }
-
-#if H5C_DO_SANITY_CHECKS
- HDassert( entries_cleared == other_entries_marked );
-#endif /* H5C_DO_SANITY_CHECKS */
-
- /* It is also possible that some of the cleared entries are on the
- * pinned list. Must scan that also.
- */
-
- entry_ptr = cache_ptr->pel_head_ptr;
-
- while ( entry_ptr != NULL )
- {
- if ( entry_ptr->clear_on_unprotect ) {
-
- entry_ptr->clear_on_unprotect = FALSE;
- clear_ptr = entry_ptr;
- entry_ptr = entry_ptr->next;
- entries_cleared++;
-
- if ( H5C_flush_single_entry(f,
- dxpl_id,
- clear_ptr->addr,
- H5C__FLUSH_CLEAR_ONLY_FLAG,
- TRUE,
- NULL) < 0 ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
- }
- } else {
-
- entry_ptr = entry_ptr->next;
- }
- }
-
-#if H5C_DO_SANITY_CHECKS
- HDassert( entries_cleared == pinned_entries_marked + other_entries_marked );
- HDassert( entries_cleared + protected_entries_marked == ce_array_len );
-#endif /* H5C_DO_SANITY_CHECKS */
-
- HDassert( ( entries_cleared == ce_array_len ) ||
- ( (ce_array_len - entries_cleared) <= cache_ptr->pl_len ) );
-
-#if H5C_DO_SANITY_CHECKS
- i = 0;
- entry_ptr = cache_ptr->pl_head_ptr;
- while ( entry_ptr != NULL )
- {
- if ( entry_ptr->clear_on_unprotect ) {
-
- i++;
- }
- entry_ptr = entry_ptr->next;
- }
- HDassert( (entries_cleared + i) == ce_array_len );
-#endif /* H5C_DO_SANITY_CHECKS */
-
-done:
-
-#if H5C_DO_EXTREME_SANITY_CHECKS
- if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) ||
- ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) ||
- ( H5C_validate_lru_list(cache_ptr) < 0 ) ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "an extreme sanity check failed on exit.\n");
- }
-#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
-
- FUNC_LEAVE_NOAPI(ret_value)
-
-} /* H5C_mark_entries_as_clean() */
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
* Function: H5C_mark_entry_dirty
*
* Purpose: Mark a pinned or protected entry as dirty. The target entry
@@ -5891,7 +4717,7 @@ done:
* writes (secondary_dxpl_id). Since an uprotect cannot
* occasion a write at present, all this is moot for now.
* However, things change, and in any case,
- * H5C_flush_single_entry() needs primary_dxpl_id and
+ * H5C__flush_single_entry() needs primary_dxpl_id and
* secondary_dxpl_id in its parameter list.
*
* The function can't cause a read either, so the dxpl_id
@@ -6133,7 +4959,7 @@ H5C_unprotect(H5F_t * f,
if(take_ownership)
flush_flags |= H5C__TAKE_OWNERSHIP_FLAG;
- if ( H5C_flush_single_entry(f,
+ if ( H5C__flush_single_entry(f,
dxpl_id,
addr,
flush_flags,
@@ -6174,7 +5000,7 @@ H5C_unprotect(H5F_t * f,
"hash table contains multiple entries for addr?!?.")
}
- if ( H5C_flush_single_entry(f,
+ if ( H5C__flush_single_entry(f,
dxpl_id,
addr,
H5C__FLUSH_CLEAR_ONLY_FLAG,
@@ -7504,7 +6330,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f,
/* reset entries_removed_counter and
* last_entry_removed_ptr prior to the call to
- * H5C_flush_single_entry() so that we can spot
+ * H5C__flush_single_entry() so that we can spot
* unexpected removals of entries from the cache,
* and set the restart_scan flag if proceeding
* would be likely to cause us to scan an entry
@@ -7513,7 +6339,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f,
cache_ptr->entries_removed_counter = 0;
cache_ptr->last_entry_removed_ptr = NULL;
- result = H5C_flush_single_entry(f,
+ result = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
H5C__NO_FLAGS_SET,
@@ -7529,7 +6355,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f,
bytes_evicted += entry_ptr->size;
- result = H5C_flush_single_entry(f,
+ result = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
H5C__FLUSH_INVALIDATE_FLAG,
@@ -7620,7 +6446,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f,
if ( ! (entry_ptr->is_dirty) ) {
- result = H5C_flush_single_entry(f,
+ result = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
H5C__FLUSH_INVALIDATE_FLAG,
@@ -8126,7 +6952,7 @@ done:
* was largely removed from the cache data structures before
* the flush proper. However, re-entrant calls to the cache
* in the parallel case required a re-factoring of the
- * H5C_flush_single_entry() function to keep entries fully
+ * H5C__flush_single_entry() function to keep entries fully
* in the cache until after the pre-serialize and serialize
* calls.
* JRM -- 12/25/14
@@ -8239,7 +7065,7 @@ H5C_flush_invalidate_cache(const H5F_t * f,
*/
#if H5C_DO_SANITY_CHECKS
- /* Depending on circumstances, H5C_flush_single_entry() will
+ /* Depending on circumstances, H5C__flush_single_entry() will
* remove dirty entries from the slist as it flushes them.
* Thus for sanity checks we must make note of the initial
* slist length and size before we do any flushes.
@@ -8272,7 +7098,7 @@ H5C_flush_invalidate_cache(const H5F_t * f,
/* set the cache_ptr->slist_change_in_pre_serialize and
* cache_ptr->slist_change_in_serialize to false.
*
- * These flags are set to TRUE by H5C_flush_single_entry if the
+ * These flags are set to TRUE by H5C__flush_single_entry if the
* slist is modified by a pre_serialize or serialize call
* respectively.
*
@@ -8381,7 +7207,7 @@ H5C_flush_invalidate_cache(const H5F_t * f,
/* Test to see if we are can flush the entry now.
* If we can, go ahead and flush, but don't tell
- * H5C_flush_single_entry() to destroy the entry
+ * H5C__flush_single_entry() to destroy the entry
* as pinned entries can't be evicted.
*/
if(entry_ptr->flush_dep_height == curr_flush_dep_height ) {
@@ -8399,7 +7225,7 @@ H5C_flush_invalidate_cache(const H5F_t * f,
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- status = H5C_flush_single_entry(f,
+ status = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
H5C__NO_FLAGS_SET,
@@ -8461,7 +7287,7 @@ H5C_flush_invalidate_cache(const H5F_t * f,
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- status = H5C_flush_single_entry(f,
+ status = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
(cooked_flags | H5C__FLUSH_INVALIDATE_FLAG),
@@ -8603,7 +7429,7 @@ H5C_flush_invalidate_cache(const H5F_t * f,
entry_was_dirty = entry_ptr->is_dirty;
- status = H5C_flush_single_entry(f,
+ status = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
(cooked_flags | H5C__FLUSH_INVALIDATE_FLAG),
@@ -8755,7 +7581,7 @@ done:
/*-------------------------------------------------------------------------
*
- * Function: H5C_flush_single_entry
+ * Function: H5C__flush_single_entry
*
* Purpose: Flush or clear (and evict if requested) the cache entry
* with the specified address and type. If the type is NULL,
@@ -8814,8 +7640,8 @@ done:
*
*-------------------------------------------------------------------------
*/
-static herr_t
-H5C_flush_single_entry(const H5F_t * f,
+herr_t
+H5C__flush_single_entry(const H5F_t * f,
hid_t dxpl_id,
haddr_t addr,
unsigned flags,
@@ -8838,7 +7664,7 @@ H5C_flush_single_entry(const H5F_t * f,
H5C_cache_entry_t * entry_ptr = NULL;
herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_NOAPI_NOINIT
+ FUNC_ENTER_PACKAGE
HDassert( f );
HDassert( cache_ptr );
@@ -9620,7 +8446,7 @@ done:
( ( entry_ptr ) && ( ! entry_ptr->is_dirty ) ) );
FUNC_LEAVE_NOAPI(ret_value)
-} /* H5C_flush_single_entry() */
+} /* H5C__flush_single_entry() */
/*-------------------------------------------------------------------------
@@ -10099,7 +8925,7 @@ done:
* attempts to load another entry.
*
* This error was exposed by a re-factor of the
- * H5C_flush_single_entry() routine. However, it was
+ * H5C__flush_single_entry() routine. However, it was
* a potential bug from the moment that entries were
* allowed to load other entries on flush.
*
@@ -10221,7 +9047,7 @@ H5C_make_space_in_cache(H5F_t * f,
/* reset entries_removed_counter and
* last_entry_removed_ptr prior to the call to
- * H5C_flush_single_entry() so that we can spot
+ * H5C__flush_single_entry() so that we can spot
* unexpected removals of entries from the cache,
* and set the restart_scan flag if proceeding
* would be likely to cause us to scan an entry
@@ -10230,7 +9056,7 @@ H5C_make_space_in_cache(H5F_t * f,
cache_ptr->entries_removed_counter = 0;
cache_ptr->last_entry_removed_ptr = NULL;
- result = H5C_flush_single_entry(f,
+ result = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
H5C__NO_FLAGS_SET,
@@ -10249,7 +9075,7 @@ H5C_make_space_in_cache(H5F_t * f,
cache_ptr->entries_scanned_to_make_space++;
#endif /* H5C_COLLECT_CACHE_STATS */
- result = H5C_flush_single_entry(f,
+ result = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
H5C__FLUSH_INVALIDATE_FLAG,
@@ -10404,7 +9230,7 @@ H5C_make_space_in_cache(H5F_t * f,
prev_ptr = entry_ptr->aux_prev;
- result = H5C_flush_single_entry(f,
+ result = H5C__flush_single_entry(f,
dxpl_id,
entry_ptr->addr,
H5C__FLUSH_INVALIDATE_FLAG,
diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c
new file mode 100644
index 0000000..3abaf8b
--- /dev/null
+++ b/src/H5Cmpio.c
@@ -0,0 +1,1260 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * Copyright by the Board of Trustees of the University of Illinois. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the files COPYING and Copyright.html. COPYING can be found at the root *
+ * of the source code distribution tree; Copyright.html can be found at the *
+ * root level of an installed copy of the electronic HDF5 document set and *
+ * is linked from the top-level documents page. It can also be found at *
+ * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have *
+ * access to either file, you may request a copy from help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*-------------------------------------------------------------------------
+ *
+ * Created: H5Cmpio.c
+ * June 20 2015
+ * Quincey Koziol
+ *
+ * Purpose: Functions in this file implement support for parallel I/O for
+ * generic cache code.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/****************/
+/* Module Setup */
+/****************/
+
+#define H5C_PACKAGE /*suppress error about including H5Cpkg */
+#define H5F_PACKAGE /*suppress error about including H5Fpkg */
+
+
+/***********/
+/* Headers */
+/***********/
+#include "H5private.h" /* Generic Functions */
+#include "H5ACprivate.h" /* Metadata cache */
+#include "H5Cpkg.h" /* Cache */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5Fpkg.h" /* Files */
+#include "H5Iprivate.h" /* IDs */
+#include "H5MMprivate.h" /* Memory management */
+
+
+#ifdef H5_HAVE_PARALLEL
+
+/****************/
+/* Local Macros */
+/****************/
+#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0
+
+
+/******************/
+/* Local Typedefs */
+/******************/
+
+
+/********************/
+/* Local Prototypes */
+/********************/
+
+
+/*********************/
+/* Package Variables */
+/*********************/
+
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5C_apply_candidate_list
+ *
+ * Purpose: Apply the supplied candidate list.
+ *
+ * We used to do this by simply having each process write
+ * every mpi_size-th entry in the candidate list, starting
+ * at index mpi_rank, and mark all the others clean.
+ *
+ * However, this can cause unnecessary contention in a file
+ * system by increasing the number of processes writing to
+ * adjacent locations in the HDF5 file.
+ *
+ * To attempt to minimize this, we now arange matters such
+ * that each process writes n adjacent entries in the
+ * candidate list, and marks all others clean. We must do
+ * this in such a fashion as to guarantee that each entry
+ * on the candidate list is written by exactly one process,
+ * and marked clean by all others.
+ *
+ * To do this, first construct a table mapping mpi_rank
+ * to the index of the first entry in the candidate list to
+ * be written by the process of that mpi_rank, and then use
+ * the table to control which entries are written and which
+ * are marked as clean as a function of the mpi_rank.
+ *
+ * Note that the table must be identical on all processes, as
+ * all see the same candidate list, mpi_size, and mpi_rank --
+ * the inputs used to construct the table.
+ *
+ * We construct the table as follows. Let:
+ *
+ * n = num_candidates / mpi_size;
+ *
+ * m = num_candidates % mpi_size;
+ *
+ * Now allocate an array of integers of length mpi_size + 1,
+ * and call this array candidate_assignment_table.
+ *
+ * Conceptually, if the number of candidates is a multiple
+ * of the mpi_size, we simply pass through the candidate list
+ * and assign n entries to each process to flush, with the
+ * index of the first entry to flush in the location in
+ * the candidate_assignment_table indicated by the mpi_rank
+ * of the process.
+ *
+ * In the more common case in which the candidate list isn't
+ * isn't a multiple of the mpi_size, we pretend it is, and
+ * give num_candidates % mpi_size processes one extra entry
+ * each to make things work out.
+ *
+ * Once the table is constructed, we determine the first and
+ * last entry this process is to flush as follows:
+ *
+ * first_entry_to_flush = candidate_assignment_table[mpi_rank]
+ *
+ * last_entry_to_flush =
+ * candidate_assignment_table[mpi_rank + 1] - 1;
+ *
+ * With these values determined, we simply scan through the
+ * candidate list, marking all entries in the range
+ * [first_entry_to_flush, last_entry_to_flush] for flush,
+ * and all others to be cleaned.
+ *
+ * Finally, we scan the LRU from tail to head, flushing
+ * or marking clean the candidate entries as indicated.
+ * If necessary, we scan the pinned list as well.
+ *
+ * Note that this function will fail if any protected or
+ * clean entries appear on the candidate list.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ * Changes: Ported code to detect next entry status changes as the
+ * the result of a flush from the serial code in the scan of
+ * the LRU. Also added code to detect and adapt to the
+ * removal from the cache of the next entry in the scan of
+ * the LRU.
+ *
+ * Note that at present, all of these changes should not
+ * be required as the operations on entries as they are
+ * flushed that can cause these condiditions are not premitted
+ * in the parallel case. However, Quincey indicates that
+ * this may change, and thus has requested the modification.
+ *
+ * Note the assert(FALSE) in the if statement whose body
+ * restarts the scan of the LRU. As the body of the if
+ * statement should be unreachable, it should never be
+ * triggered until the constraints on the parallel case
+ * are relaxed. Please remove the assertion at that time.
+ *
+ * Also added warning on the Pinned Entry List scan, as it
+ * is potentially subject to the same issue. As there is
+ * no cognate of this scan in the serial code, I don't have
+ * a fix to port to it.
+ *
+ * JRM -- 4/10/19
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_apply_candidate_list(H5F_t * f,
+ hid_t dxpl_id,
+ H5C_t * cache_ptr,
+ int num_candidates,
+ haddr_t * candidates_list_ptr,
+ int mpi_rank,
+ int mpi_size)
+{
+ hbool_t restart_scan;
+ hbool_t prev_is_dirty;
+ int i;
+ int m;
+ int n;
+ int first_entry_to_flush;
+ int last_entry_to_flush;
+ int entries_to_clear = 0;
+ int entries_to_flush = 0;
+ int entries_to_flush_or_clear_last = 0;
+ int entries_to_flush_collectively = 0;
+ int entries_cleared = 0;
+ int entries_flushed = 0;
+ int entries_delayed = 0;
+ int entries_flushed_or_cleared_last = 0;
+ int entries_flushed_collectively = 0;
+ int entries_examined = 0;
+ int initial_list_len;
+ int * candidate_assignment_table = NULL;
+ haddr_t addr;
+ H5C_cache_entry_t * clear_ptr = NULL;
+ H5C_cache_entry_t * next_ptr = NULL;
+ H5C_cache_entry_t * entry_ptr = NULL;
+ H5C_cache_entry_t * flush_ptr = NULL;
+ H5C_cache_entry_t * delayed_ptr = NULL;
+#if H5C_DO_SANITY_CHECKS
+ haddr_t last_addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ char tbl_buf[1024];
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( num_candidates > 0 );
+ HDassert( num_candidates <= cache_ptr->slist_len );
+ HDassert( candidates_list_ptr != NULL );
+ HDassert( 0 <= mpi_rank );
+ HDassert( mpi_rank < mpi_size );
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n",
+ FUNC, mpi_rank);
+ for ( i = 0; i < 1024; i++ ) tbl_buf[i] = '\0';
+ sprintf(&(tbl_buf[0]), "candidate list = ");
+ for ( i = 0; i < num_candidates; i++ )
+ {
+ sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " 0x%llx",
+ (long long)(*(candidates_list_ptr + i)));
+ }
+ sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n");
+ HDfprintf(stdout, "%s", tbl_buf);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ n = num_candidates / mpi_size;
+ m = num_candidates % mpi_size;
+ HDassert(n >= 0);
+
+ if(NULL == (candidate_assignment_table = (int *)H5MM_malloc(sizeof(int) * (size_t)(mpi_size + 1))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for candidate assignment table")
+
+ candidate_assignment_table[0] = 0;
+ candidate_assignment_table[mpi_size] = num_candidates;
+
+ if(m == 0) { /* mpi_size is an even divisor of num_candidates */
+ HDassert(n > 0);
+ for(i = 1; i < mpi_size; i++)
+ candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n;
+ } /* end if */
+ else {
+ for(i = 1; i <= m; i++)
+ candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1;
+
+ if(num_candidates < mpi_size) {
+ for(i = m + 1; i < mpi_size; i++)
+ candidate_assignment_table[i] = num_candidates;
+ } /* end if */
+ else {
+ for(i = m + 1; i < mpi_size; i++)
+ candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n;
+ } /* end else */
+ } /* end else */
+ HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates);
+
+#if H5C_DO_SANITY_CHECKS
+ /* verify that the candidate assignment table has the expected form */
+ for ( i = 1; i < mpi_size - 1; i++ )
+ {
+ int a, b;
+
+ a = candidate_assignment_table[i] - candidate_assignment_table[i - 1];
+ b = candidate_assignment_table[i + 1] - candidate_assignment_table[i];
+
+ HDassert( n + 1 >= a );
+ HDassert( a >= b );
+ HDassert( b >= n );
+ }
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ first_entry_to_flush = candidate_assignment_table[mpi_rank];
+ last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1;
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ for ( i = 0; i < 1024; i++ )
+ tbl_buf[i] = '\0';
+ sprintf(&(tbl_buf[0]), "candidate assignment table = ");
+ for(i = 0; i <= mpi_size; i++)
+ sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %d", candidate_assignment_table[i]);
+ sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n");
+ HDfprintf(stdout, "%s", tbl_buf);
+
+ HDfprintf(stdout, "%s:%d: flush entries [%d, %d].\n",
+ FUNC, mpi_rank, first_entry_to_flush, last_entry_to_flush);
+
+ HDfprintf(stdout, "%s:%d: marking entries.\n", FUNC, mpi_rank);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ for(i = 0; i < num_candidates; i++) {
+ addr = candidates_list_ptr[i];
+ HDassert( H5F_addr_defined(addr) );
+
+#if H5C_DO_SANITY_CHECKS
+ if ( i > 0 ) {
+ if ( last_addr == addr ) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list.\n")
+ } else if ( last_addr > addr ) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted.\n")
+ }
+ }
+
+ last_addr = addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
+ if(entry_ptr == NULL) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed candidate entry not in cache?!?!?.")
+ } else if(!entry_ptr->is_dirty) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?.")
+ } else if ( entry_ptr->is_protected ) {
+ /* For now at least, we can't deal with protected entries.
+ * If we encounter one, scream and die. If it becomes an
+ * issue, we should be able to work around this.
+ */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?.")
+ } else {
+ /* determine whether the entry is to be cleared or flushed,
+ * and mark it accordingly. We will scan the protected and
+ * pinned list shortly, and clear or flush according to these
+ * markings.
+ */
+ if((i >= first_entry_to_flush) && (i <= last_entry_to_flush)) {
+ entries_to_flush++;
+ entry_ptr->flush_immediately = TRUE;
+ } /* end if */
+ else {
+ entries_to_clear++;
+ entry_ptr->clear_on_unprotect = TRUE;
+ } /* end else */
+ } /* end else */
+ } /* end for */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %d/%d/%d.\n",
+ FUNC, mpi_rank, (int)num_candidates, (int)entries_to_clear,
+ (int)entries_to_flush);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+
+ /* We have now marked all the entries on the candidate list for
+ * either flush or clear -- now scan the LRU and the pinned list
+ * for these entries and do the deed.
+ *
+ * Note that we are doing things in this round about manner so as
+ * to preserve the order of the LRU list to the best of our ability.
+ * If we don't do this, my experiments indicate that we will have a
+ * noticably poorer hit ratio as a result.
+ */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: scanning LRU list. len = %d.\n", FUNC, mpi_rank,
+ (int)(cache_ptr->LRU_list_len));
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ /* ===================================================================== *
+ * Now scan the LRU and PEL lists, flushing or clearing entries as
+ * needed.
+ *
+ * The flush_me_last and flush_me_collectively flags may dictate how or
+ * when some entries can be flushed, and should be addressed here.
+ * However, in their initial implementation, these flags only apply to the
+ * superblock, so there's only a relatively small change to this function
+ * to account for this one case where they come into play. If these flags
+ * are ever expanded upon, this function and the following flushing steps
+ * should be reworked to account for additional cases.
+ * ===================================================================== */
+
+ HDassert(entries_to_flush >= 0);
+
+ restart_scan = FALSE;
+ entries_examined = 0;
+ initial_list_len = cache_ptr->LRU_list_len;
+ entry_ptr = cache_ptr->LRU_tail_ptr;
+
+ /* Examine each entry in the LRU list */
+ while ( ( entry_ptr != NULL )
+ &&
+ ( entries_examined <= (entries_to_flush + 1) * initial_list_len )
+ &&
+ ( (entries_cleared + entries_flushed) < num_candidates ) ) {
+
+ if ( entry_ptr->prev != NULL )
+ prev_is_dirty = entry_ptr->prev->is_dirty;
+
+ /* If this process needs to clear this entry. */
+ if(entry_ptr->clear_on_unprotect) {
+
+ HDassert(entry_ptr->is_dirty);
+
+ next_ptr = entry_ptr->next;
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->prev;
+ entries_cleared++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)clear_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ /* No need to check for the next entry in the scan being
+ * removed from the cache, as this call to H5C__flush_single_entry()
+ * will not call either the pre_serialize or serialize callbacks.
+ */
+
+ if(H5C__flush_single_entry(f,
+ dxpl_id,
+ clear_ptr->addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ TRUE,
+ NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ } /* end if */
+
+ /* Else, if this process needs to flush this entry. */
+ else if (entry_ptr->flush_immediately) {
+
+ HDassert(entry_ptr->is_dirty);
+
+ next_ptr = entry_ptr->next;
+ entry_ptr->flush_immediately = FALSE;
+ flush_ptr = entry_ptr;
+ entry_ptr = entry_ptr->prev;
+ entries_flushed++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)flush_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ /* reset entries_removed_counter and
+ * last_entry_removed_ptr prior to the call to
+ * H5C__flush_single_entry() so that we can spot
+ * unexpected removals of entries from the cache,
+ * and set the restart_scan flag if proceeding
+ * would be likely to cause us to scan an entry
+ * that is no longer in the cache.
+ *
+ * Note that as of this writing (April 2015) this
+ * case cannot occur in the parallel case. However
+ * Quincey is making noises about changing this, hence
+ * the insertion of this test.
+ *
+ * Note also that there is no test code to verify
+ * that this code actually works (although similar code
+ * in the serial version exists and is tested).
+ *
+ * Implementing a test will likely require implementing
+ * flush op like facilities in the parallel tests. At
+ * a guess this will not be terribly painful, but it
+ * will take a bit of time.
+ */
+ cache_ptr->entries_removed_counter = 0;
+ cache_ptr->last_entry_removed_ptr = NULL;
+
+ if(H5C__flush_single_entry(f,
+ dxpl_id,
+ flush_ptr->addr,
+ H5C__NO_FLAGS_SET,
+ TRUE,
+ NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.")
+
+ if ( ( cache_ptr->entries_removed_counter > 1 ) ||
+ ( cache_ptr->last_entry_removed_ptr == entry_ptr ) )
+
+ restart_scan = TRUE;
+
+ } /* end else-if */
+
+ /* Otherwise, no action to be taken on this entry. Grab the next. */
+ else {
+ entry_ptr = entry_ptr->prev;
+
+ if ( entry_ptr != NULL )
+ next_ptr = entry_ptr->next;
+
+ } /* end else */
+
+ if ( ( entry_ptr != NULL )
+ &&
+ ( ( restart_scan )
+ ||
+ ( entry_ptr->is_dirty != prev_is_dirty )
+ ||
+ ( entry_ptr->next != next_ptr )
+ ||
+ ( entry_ptr->is_protected )
+ ||
+ ( entry_ptr->is_pinned )
+ )
+ ) {
+
+ /* something has happened to the LRU -- start over
+ * from the tail.
+ *
+ * Recall that this code should be un-reachable at present,
+ * as all the operations by entries on flush that could cause
+ * it to be reachable are disallowed in the parallel case at
+ * present. Hence the following assertion which should be
+ * removed if the above changes.
+ */
+
+ HDassert( ! restart_scan );
+ HDassert( entry_ptr->is_dirty == prev_is_dirty );
+ HDassert( entry_ptr->next == next_ptr );
+ HDassert( ! entry_ptr->is_protected );
+ HDassert( ! entry_ptr->is_pinned );
+
+ HDassert(FALSE); /* see comment above */
+
+ restart_scan = FALSE;
+ entry_ptr = cache_ptr->LRU_tail_ptr;
+/*
+ H5C__UPDATE_STATS_FOR_LRU_SCAN_RESTART(cache_ptr)
+*/
+ }
+
+ entries_examined++;
+ } /* end while */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: entries examined/cleared/flushed = %d/%d/%d.\n",
+ FUNC, mpi_rank, entries_examined,
+ entries_cleared, entries_flushed);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ /* It is also possible that some of the cleared entries are on the
+ * pinned list. Must scan that also.
+ *
+ * WARNING:
+ *
+ * As we now allow unpinning, and removal of other entries as a side
+ * effect of flushing an entry, it is possible that the next entry
+ * in a PEL scan could either be no longer pinned, or no longer in
+ * the cache by the time we get to it.
+ *
+ * At present, this is not possible in this case, as we disallow such
+ * operations in the parallel version of the library. However, Quincey
+ * has been making noises about relaxing this. If and when he does,
+ * we have a potential problem here.
+ *
+ * The same issue exists in the serial cache, and there are tests
+ * to detect this problem when it occurs, and adjust to it. As seen
+ * above in the LRU scan, I have ported such tests to the parallel
+ * code where a close cognate exists in the serial code.
+ *
+ * I haven't done so here, as there are no PEL scans where the problem
+ * can occur in the serial code. Needless to say, this will have to
+ * be repaired if the constraints on pre_serialize and serialize
+ * callbacks are relaxed in the parallel version of the metadata cache.
+ *
+ * JRM -- 4/1/15
+ */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: scanning pinned entry list. len = %d\n",
+ FUNC, mpi_rank, (int)(cache_ptr->pel_len));
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ entry_ptr = cache_ptr->pel_head_ptr;
+ while((entry_ptr != NULL) &&
+ ((entries_cleared + entries_flushed + entries_delayed)
+ < num_candidates)) {
+
+ /* If entry is marked for flush or for clear */
+ if((entry_ptr->clear_on_unprotect||entry_ptr->flush_immediately)) {
+
+ /* If this entry needs to be flushed last */
+ if (entry_ptr->flush_me_last) {
+
+ /* At this time, only the superblock supports being
+ flushed last. Conveniently, it also happens to be the only
+ entry that supports being flushed collectively, as well. Also
+ conveniently, it's always pinned, so we only need to check
+ for it while scanning the PEL here. Finally, it's never
+ included in a candidate list that excludes other dirty
+ entries in a cache, so we can handle this relatively simple
+ case here.
+
+ For now, this function asserts this and saves the entry
+ to flush it after scanning the rest of the PEL list.
+
+ If there are ever more entries that either need to be
+ flushed last and/or flushed collectively, this whole routine
+ will need to be reworked to handle all additional cases. As
+ it is the simple case of a single pinned entry needing
+ flushed last and collectively is just a minor addition to
+ this routine, but signficantly buffing up the usage of
+ flush_me_last or flush_me_collectively will require a more
+ intense rework of this function and potentially the function
+ of candidate lists as a whole. */
+
+ HDassert(entry_ptr->flush_me_collectively);
+ entries_to_flush_or_clear_last++;
+ entries_to_flush_collectively++;
+ HDassert(entries_to_flush_or_clear_last == 1);
+ HDassert(entries_to_flush_collectively == 1);
+
+ /* Delay the entry. It will be flushed later. */
+ delayed_ptr = entry_ptr;
+ entries_delayed++;
+ HDassert(entries_delayed == 1);
+
+ } /* end if */
+
+ /* Else, this process needs to clear this entry. */
+ else if (entry_ptr->clear_on_unprotect) {
+ HDassert(!entry_ptr->flush_immediately);
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->next;
+ entries_cleared++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)clear_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if(H5C__flush_single_entry(f,
+ dxpl_id,
+ clear_ptr->addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ TRUE,
+ NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ } /* end else-if */
+
+ /* Else, if this process needs to independently flush this entry. */
+ else if (entry_ptr->flush_immediately) {
+ entry_ptr->flush_immediately = FALSE;
+ flush_ptr = entry_ptr;
+ entry_ptr = entry_ptr->next;
+ entries_flushed++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)flush_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if(H5C__flush_single_entry(f,
+ dxpl_id,
+ flush_ptr->addr,
+ H5C__NO_FLAGS_SET,
+ TRUE,
+ NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ } /* end else-if */
+ } /* end if */
+
+ /* Otherwise, this entry is not marked for flush or clear. Grab the next. */
+ else {
+ entry_ptr = entry_ptr->next;
+ } /* end else */
+
+ } /* end while */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout,
+ "%s:%d: pel entries examined/cleared/flushed = %d/%d/%d.\n",
+ FUNC, mpi_rank, entries_examined,
+ entries_cleared, entries_flushed);
+ HDfprintf(stdout, "%s:%d: done.\n", FUNC, mpi_rank);
+
+ HDfsync(stdout);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ /* ====================================================================== *
+ * Now, handle all delayed entries. *
+ * *
+ * This can *only* be the superblock at this time, so it's relatively *
+ * easy to deal with. We're collectively flushing the entry saved from *
+ * above. This will need to be handled differently if there are ever more *
+ * than one entry needing this special treatment.) *
+ * ====================================================================== */
+
+ if (delayed_ptr) {
+
+ if (delayed_ptr->clear_on_unprotect) {
+ entry_ptr->clear_on_unprotect = FALSE;
+ entries_cleared++;
+ } else if (delayed_ptr->flush_immediately) {
+ entry_ptr->flush_immediately = FALSE;
+ entries_flushed++;
+ } /* end if */
+
+ if(H5C__flush_single_entry(f,
+ dxpl_id,
+ delayed_ptr->addr,
+ H5C__NO_FLAGS_SET,
+ TRUE,
+ NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL,
+ "Can't flush entry collectively.")
+
+ entries_flushed_collectively++;
+ entries_flushed_or_cleared_last++;
+ } /* end if */
+
+ /* ====================================================================== *
+ * Finished flushing everything. *
+ * ====================================================================== */
+
+ HDassert((entries_flushed == entries_to_flush));
+ HDassert((entries_cleared == entries_to_clear));
+ HDassert((entries_flushed_or_cleared_last == entries_to_flush_or_clear_last));
+ HDassert((entries_flushed_collectively == entries_to_flush_collectively));
+
+ if((entries_flushed != entries_to_flush) ||
+ (entries_cleared != entries_to_clear) ||
+ (entries_flushed_or_cleared_last != entries_to_flush_or_clear_last) ||
+ (entries_flushed_collectively != entries_to_flush_collectively))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry count mismatch.")
+
+done:
+ if(candidate_assignment_table != NULL)
+ candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_apply_candidate_list() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5C_construct_candidate_list__clean_cache
+ *
+ * Purpose: Construct the list of entries that should be flushed to
+ * clean all entries in the cache.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr)
+{
+ size_t space_needed;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ /* As a sanity check, set space needed to the size of the skip list.
+ * This should be the sum total of the sizes of all the dirty entries
+ * in the metadata cache.
+ */
+ space_needed = cache_ptr->slist_size;
+
+ /* Recall that while we shouldn't have any protected entries at this
+ * point, it is possible that some dirty entries may reside on the
+ * pinned list at this point.
+ */
+ HDassert( cache_ptr->slist_size <=
+ (cache_ptr->dLRU_list_size + cache_ptr->pel_size) );
+ HDassert( cache_ptr->slist_len <=
+ (cache_ptr->dLRU_list_len + cache_ptr->pel_len) );
+
+ if(space_needed > 0) { /* we have work to do */
+ H5C_cache_entry_t *entry_ptr;
+ int nominated_entries_count = 0;
+ size_t nominated_entries_size = 0;
+ haddr_t nominated_addr;
+
+ HDassert( cache_ptr->slist_len > 0 );
+
+ /* Scan the dirty LRU list from tail forward and nominate sufficient
+ * entries to free up the necessary space.
+ */
+ entry_ptr = cache_ptr->dLRU_tail_ptr;
+ while((nominated_entries_size < space_needed) &&
+ (nominated_entries_count < cache_ptr->slist_len) &&
+ (entry_ptr != NULL)) {
+ HDassert( ! (entry_ptr->is_protected) );
+ HDassert( ! (entry_ptr->is_read_only) );
+ HDassert( entry_ptr->ro_ref_count == 0 );
+ HDassert( entry_ptr->is_dirty );
+ HDassert( entry_ptr->in_slist );
+
+ nominated_addr = entry_ptr->addr;
+ if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(1).")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ entry_ptr = entry_ptr->aux_prev;
+ } /* end while */
+ HDassert( entry_ptr == NULL );
+
+ /* it is possible that there are some dirty entries on the
+ * protected entry list as well -- scan it too if necessary
+ */
+ entry_ptr = cache_ptr->pel_head_ptr;
+ while((nominated_entries_size < space_needed) &&
+ (nominated_entries_count < cache_ptr->slist_len) &&
+ (entry_ptr != NULL)) {
+ if(entry_ptr->is_dirty) {
+ HDassert( ! (entry_ptr->is_protected) );
+ HDassert( ! (entry_ptr->is_read_only) );
+ HDassert( entry_ptr->ro_ref_count == 0 );
+ HDassert( entry_ptr->is_dirty );
+ HDassert( entry_ptr->in_slist );
+
+ nominated_addr = entry_ptr->addr;
+ if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(2).")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ } /* end if */
+
+ entry_ptr = entry_ptr->next;
+ } /* end while */
+
+ HDassert( nominated_entries_count == cache_ptr->slist_len );
+ HDassert( nominated_entries_size == space_needed );
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_construct_candidate_list__clean_cache() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5C_construct_candidate_list__min_clean
+ *
+ * Purpose: Construct the list of entries that should be flushed to
+ * get the cache back within its min clean constraints.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr)
+{
+ size_t space_needed = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ /* compute the number of bytes (if any) that must be flushed to get the
+ * cache back within its min clean constraints.
+ */
+ if(cache_ptr->max_cache_size > cache_ptr->index_size) {
+ if(((cache_ptr->max_cache_size - cache_ptr->index_size) +
+ cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size)
+ space_needed = 0;
+ else
+ space_needed = cache_ptr->min_clean_size -
+ ((cache_ptr->max_cache_size - cache_ptr->index_size) +
+ cache_ptr->cLRU_list_size);
+ } /* end if */
+ else {
+ if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size)
+ space_needed = 0;
+ else
+ space_needed = cache_ptr->min_clean_size -
+ cache_ptr->cLRU_list_size;
+ } /* end else */
+
+ if(space_needed > 0) { /* we have work to do */
+ H5C_cache_entry_t *entry_ptr;
+ int nominated_entries_count = 0;
+ size_t nominated_entries_size = 0;
+
+ HDassert( cache_ptr->slist_len > 0 );
+
+ /* Scan the dirty LRU list from tail forward and nominate sufficient
+ * entries to free up the necessary space.
+ */
+ entry_ptr = cache_ptr->dLRU_tail_ptr;
+ while((nominated_entries_size < space_needed) &&
+ (nominated_entries_count < cache_ptr->slist_len) &&
+ (entry_ptr != NULL) &&
+ (!entry_ptr->flush_me_last)) {
+ haddr_t nominated_addr;
+
+ HDassert( ! (entry_ptr->is_protected) );
+ HDassert( ! (entry_ptr->is_read_only) );
+ HDassert( entry_ptr->ro_ref_count == 0 );
+ HDassert( entry_ptr->is_dirty );
+ HDassert( entry_ptr->in_slist );
+
+ nominated_addr = entry_ptr->addr;
+ if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed.")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ entry_ptr = entry_ptr->aux_prev;
+ } /* end while */
+ HDassert( nominated_entries_count <= cache_ptr->slist_len );
+ HDassert( nominated_entries_size >= space_needed );
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_construct_candidate_list__min_clean() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_mark_entries_as_clean
+ *
+ * Purpose: When the H5C code is used to implement the metadata caches
+ * in PHDF5, only the cache with MPI_rank 0 is allowed to
+ * actually write entries to disk -- all other caches must
+ * retain dirty entries until they are advised that the
+ * entries are clean.
+ *
+ * This function exists to allow the H5C code to receive these
+ * notifications.
+ *
+ * The function receives a list of entry base addresses
+ * which must refer to dirty entries in the cache. If any
+ * of the entries are either clean or don't exist, the
+ * function flags an error.
+ *
+ * The function scans the list of entries and flushes all
+ * those that are currently unprotected with the
+ * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently
+ * protected are flagged for clearing when they are
+ * unprotected.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 7/5/05
+ *
+ * Changes: Tidied up code, removeing some old commented out
+ * code that had been left in pending success of the
+ * new version.
+ *
+ * Note that unlike H5C_apply_candidate_list(),
+ * H5C_mark_entries_as_clean() makes all its calls to
+ * H6C_flush_single_entry() with the
+ * H5C__FLUSH_CLEAR_ONLY_FLAG set. As a result,
+ * the pre_serialize() and serialize calls are not made.
+ *
+ * This then implies that (assuming such actions were
+ * permitted in the parallel case) no loads, dirties,
+ * resizes, or removals of other entries can occur as
+ * a side effect of the flush. Hence, there is no need
+ * for the checks for entry removal / status change
+ * that I ported to H5C_apply_candidate_list().
+ *
+ * However, if (in addition to allowing such operations
+ * in the parallel case), we allow such operations outside
+ * of the pre_serialize / serialize routines, this may
+ * cease to be the case -- requiring a review of this
+ * function.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_mark_entries_as_clean(H5F_t * f,
+ hid_t dxpl_id,
+ int32_t ce_array_len,
+ haddr_t * ce_array_ptr)
+{
+ H5C_t * cache_ptr;
+ int entries_cleared;
+ int entries_examined;
+ int i;
+ int initial_list_len;
+ haddr_t addr;
+#if H5C_DO_SANITY_CHECKS
+ int pinned_entries_marked = 0;
+ int protected_entries_marked = 0;
+ int other_entries_marked = 0;
+ haddr_t last_addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+ H5C_cache_entry_t * clear_ptr = NULL;
+ H5C_cache_entry_t * entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert( f );
+ HDassert( f->shared );
+ cache_ptr = f->shared->cache;
+ HDassert( cache_ptr );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ HDassert( ce_array_len > 0 );
+ HDassert( ce_array_ptr != NULL );
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) ||
+ ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) ||
+ ( H5C_validate_lru_list(cache_ptr) < 0 ) ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "an extreme sanity check failed on entry.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+ for ( i = 0; i < ce_array_len; i++ )
+ {
+ addr = ce_array_ptr[i];
+
+#if H5C_DO_SANITY_CHECKS
+ if ( i == 0 ) {
+
+ last_addr = addr;
+
+ } else {
+
+ if ( last_addr == addr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Duplicate entry in cleaned list.\n");
+
+ } else if ( last_addr > addr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "cleaned list not sorted.\n");
+ }
+ }
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) ||
+ ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) ||
+ ( H5C_validate_lru_list(cache_ptr) < 0 ) ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "an extreme sanity check failed in for loop.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ HDassert( H5F_addr_defined(addr) );
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
+
+ if ( entry_ptr == NULL ) {
+#if H5C_DO_SANITY_CHECKS
+ HDfprintf(stdout,
+ "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n",
+ (int)i,
+ (long)addr);
+#endif /* H5C_DO_SANITY_CHECKS */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Listed entry not in cache?!?!?.")
+
+ } else if ( ! entry_ptr->is_dirty ) {
+
+#if H5C_DO_SANITY_CHECKS
+ HDfprintf(stdout,
+ "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n",
+ (long)addr);
+#endif /* H5C_DO_SANITY_CHECKS */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Listed entry not dirty?!?!?.")
+
+ } else {
+
+ /* Mark the entry to be cleared on unprotect. We will
+ * scan the LRU list shortly, and clear all those entries
+ * not currently protected.
+ */
+ entry_ptr->clear_on_unprotect = TRUE;
+#if H5C_DO_SANITY_CHECKS
+ if ( entry_ptr->is_protected ) {
+
+ protected_entries_marked++;
+
+ } else if ( entry_ptr->is_pinned ) {
+
+ pinned_entries_marked++;
+
+ } else {
+
+ other_entries_marked++;
+ }
+#endif /* H5C_DO_SANITY_CHECKS */
+ }
+ }
+
+ /* Scan through the LRU list from back to front, and flush the
+ * entries whose clear_on_unprotect flags are set. Observe that
+ * any protected entries will not be on the LRU, and therefore
+ * will not be flushed at this time.
+ *
+ * Note that unlike H5C_apply_candidate_list(),
+ * H5C_mark_entries_as_clean() makes all its calls to
+ * H6C_flush_single_entry() with the H5C__FLUSH_CLEAR_ONLY_FLAG
+ * set. As a result, the pre_serialize() and serialize calls are
+ * not made.
+ *
+ * This then implies that (assuming such actions were
+ * permitted in the parallel case) no loads, dirties,
+ * resizes, or removals of other entries can occur as
+ * a side effect of the flush. Hence, there is no need
+ * for the checks for entry removal / status change
+ * that I ported to H5C_apply_candidate_list().
+ *
+ * However, if (in addition to allowing such operations
+ * in the parallel case), we allow such operations outside
+ * of the pre_serialize / serialize routines, this may
+ * cease to be the case -- requiring a review of this
+ * point.
+ * JRM -- 4/7/15
+ */
+
+ entries_cleared = 0;
+ entries_examined = 0;
+ initial_list_len = cache_ptr->LRU_list_len;
+ entry_ptr = cache_ptr->LRU_tail_ptr;
+
+ while ( ( entry_ptr != NULL ) &&
+ ( entries_examined <= initial_list_len ) &&
+ ( entries_cleared < ce_array_len ) )
+ {
+ if ( entry_ptr->clear_on_unprotect ) {
+
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->prev;
+ entries_cleared++;
+
+ if ( H5C__flush_single_entry(f,
+ dxpl_id,
+ clear_ptr->addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ TRUE,
+ NULL) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ }
+ } else {
+
+ entry_ptr = entry_ptr->prev;
+ }
+ entries_examined++;
+ }
+
+#if H5C_DO_SANITY_CHECKS
+ HDassert( entries_cleared == other_entries_marked );
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ /* It is also possible that some of the cleared entries are on the
+ * pinned list. Must scan that also.
+ */
+
+ entry_ptr = cache_ptr->pel_head_ptr;
+
+ while ( entry_ptr != NULL )
+ {
+ if ( entry_ptr->clear_on_unprotect ) {
+
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->next;
+ entries_cleared++;
+
+ if ( H5C__flush_single_entry(f,
+ dxpl_id,
+ clear_ptr->addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ TRUE,
+ NULL) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ }
+ } else {
+
+ entry_ptr = entry_ptr->next;
+ }
+ }
+
+#if H5C_DO_SANITY_CHECKS
+ HDassert( entries_cleared == pinned_entries_marked + other_entries_marked );
+ HDassert( entries_cleared + protected_entries_marked == ce_array_len );
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ HDassert( ( entries_cleared == ce_array_len ) ||
+ ( (ce_array_len - entries_cleared) <= cache_ptr->pl_len ) );
+
+#if H5C_DO_SANITY_CHECKS
+ i = 0;
+ entry_ptr = cache_ptr->pl_head_ptr;
+ while ( entry_ptr != NULL )
+ {
+ if ( entry_ptr->clear_on_unprotect ) {
+
+ i++;
+ }
+ entry_ptr = entry_ptr->next;
+ }
+ HDassert( (entries_cleared + i) == ce_array_len );
+#endif /* H5C_DO_SANITY_CHECKS */
+
+done:
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) ||
+ ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) ||
+ ( H5C_validate_lru_list(cache_ptr) < 0 ) ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "an extreme sanity check failed on exit.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5C_mark_entries_as_clean() */
+#endif /* H5_HAVE_PARALLEL */
+
diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h
index 988dfff..6d75222 100644
--- a/src/H5Cpkg.h
+++ b/src/H5Cpkg.h
@@ -3769,7 +3769,9 @@ struct H5C_t {
/******************************/
/* Package Private Prototypes */
/******************************/
-
+H5_DLL herr_t H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id,
+ haddr_t addr, unsigned flags, hbool_t del_entry_from_slist_on_destroy,
+ int64_t *entry_size_change_ptr);
#endif /* _H5Cpkg_H */
diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h
index a9ffb70..2dc611d 100644
--- a/src/H5Cprivate.h
+++ b/src/H5Cprivate.h
@@ -1883,8 +1883,6 @@ H5_DLL FILE *H5C_get_trace_file_ptr(const H5C_t *cache_ptr);
H5_DLL FILE *H5C_get_trace_file_ptr_from_entry(const H5C_cache_entry_t *entry_ptr);
H5_DLL herr_t H5C_insert_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type,
haddr_t addr, void *thing, unsigned int flags);
-H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t *f, hid_t dxpl_id, int32_t ce_array_len,
- haddr_t *ce_array_ptr);
H5_DLL herr_t H5C_mark_entry_dirty(void *thing);
H5_DLL herr_t H5C_move_entry(H5C_t *cache_ptr, const H5C_class_t *type,
haddr_t old_addr, haddr_t new_addr);
@@ -1918,6 +1916,8 @@ H5_DLL herr_t H5C_apply_candidate_list(H5F_t *f, hid_t dxpl_id,
int mpi_rank, int mpi_size);
H5_DLL herr_t H5C_construct_candidate_list__clean_cache(H5C_t *cache_ptr);
H5_DLL herr_t H5C_construct_candidate_list__min_clean(H5C_t *cache_ptr);
+H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t *f, hid_t dxpl_id, int32_t ce_array_len,
+ haddr_t *ce_array_ptr);
#endif /* H5_HAVE_PARALLEL */
#ifndef NDEBUG /* debugging functions */
diff --git a/src/Makefile.am b/src/Makefile.am
index 2df095e..b6c4dad 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -42,9 +42,11 @@ DISTCLEANFILES=H5pubconf.h
# library sources
libhdf5_la_SOURCES= H5.c H5checksum.c H5dbg.c H5system.c H5timer.c H5trace.c \
H5A.c H5Abtree2.c H5Adense.c H5Adeprec.c H5Aint.c H5Atest.c \
- H5AC.c H5B.c H5Bcache.c H5Bdbg.c \
+ H5AC.c H5ACmpio.c \
+ H5B.c H5Bcache.c H5Bdbg.c \
H5B2.c H5B2cache.c H5B2dbg.c H5B2hdr.c H5B2int.c H5B2stat.c H5B2test.c \
- H5C.c H5CS.c \
+ H5C.c H5Cmpio.c \
+ H5CS.c \
H5D.c H5Dbtree.c H5Dchunk.c H5Dcompact.c H5Dcontig.c H5Ddbg.c \
H5Ddeprec.c H5Defl.c H5Dfill.c H5Dint.c \
H5Dio.c H5Dlayout.c \
diff --git a/src/Makefile.in b/src/Makefile.in
index d795677..5ea0a3e 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -150,9 +150,9 @@ LTLIBRARIES = $(lib_LTLIBRARIES)
libhdf5_la_LIBADD =
am_libhdf5_la_OBJECTS = H5.lo H5checksum.lo H5dbg.lo H5system.lo \
H5timer.lo H5trace.lo H5A.lo H5Abtree2.lo H5Adense.lo \
- H5Adeprec.lo H5Aint.lo H5Atest.lo H5AC.lo H5B.lo H5Bcache.lo \
+ H5Adeprec.lo H5Aint.lo H5Atest.lo H5AC.lo H5ACmpio.lo H5B.lo H5Bcache.lo \
H5Bdbg.lo H5B2.lo H5B2cache.lo H5B2dbg.lo H5B2hdr.lo \
- H5B2int.lo H5B2stat.lo H5B2test.lo H5C.lo H5CS.lo H5D.lo \
+ H5B2int.lo H5B2stat.lo H5B2test.lo H5C.lo H5Cmpio.lo H5CS.lo H5D.lo \
H5Dbtree.lo H5Dchunk.lo H5Dcompact.lo H5Dcontig.lo H5Ddbg.lo \
H5Ddeprec.lo H5Defl.lo H5Dfill.lo H5Dint.lo H5Dio.lo \
H5Dlayout.lo H5Dmpio.lo H5Doh.lo H5Dscatgath.lo H5Dselect.lo \
@@ -740,9 +740,9 @@ DISTCLEANFILES = H5pubconf.h
# library sources
libhdf5_la_SOURCES = H5.c H5checksum.c H5dbg.c H5system.c H5timer.c H5trace.c \
H5A.c H5Abtree2.c H5Adense.c H5Adeprec.c H5Aint.c H5Atest.c \
- H5AC.c H5B.c H5Bcache.c H5Bdbg.c \
+ H5AC.c H5ACmpio.c H5B.c H5Bcache.c H5Bdbg.c \
H5B2.c H5B2cache.c H5B2dbg.c H5B2hdr.c H5B2int.c H5B2stat.c H5B2test.c \
- H5C.c H5CS.c \
+ H5C.c H5Cmpio.c H5CS.c \
H5D.c H5Dbtree.c H5Dchunk.c H5Dcompact.c H5Dcontig.c H5Ddbg.c \
H5Ddeprec.c H5Defl.c H5Dfill.c H5Dint.c \
H5Dio.c H5Dlayout.c \
@@ -963,6 +963,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5A.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5AC.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5ACmpio.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Abtree2.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Adense.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Adeprec.Plo@am__quote@
@@ -979,6 +980,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Bcache.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Bdbg.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5C.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Cmpio.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5CS.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5D.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Dbtree.Plo@am__quote@