summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5AC.c2685
-rw-r--r--src/H5ACpkg.h66
-rw-r--r--src/H5ACprivate.h18
-rw-r--r--src/H5ACpublic.h76
-rw-r--r--src/H5C.c863
-rw-r--r--src/H5Cpkg.h41
-rw-r--r--src/H5Cprivate.h26
-rw-r--r--src/H5FDmpio.c2
-rw-r--r--src/H5FDmpiposix.c2
-rw-r--r--test/cache_api.c235
-rw-r--r--test/cache_common.h66
-rw-r--r--testpar/t_cache.c2204
12 files changed, 4844 insertions, 1440 deletions
diff --git a/src/H5AC.c b/src/H5AC.c
index b0ad9b5..b4b0f0c 100644
--- a/src/H5AC.c
+++ b/src/H5AC.c
@@ -125,14 +125,28 @@ static herr_t H5AC_check_if_write_permitted(const H5F_t *f,
hid_t dxpl_id,
hbool_t * write_permitted_ptr);
-#ifdef H5_HAVE_PARALLEL
-static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr);
-#endif /* JRM */
-
static herr_t H5AC_ext_config_2_int_config(H5AC_cache_config_t * ext_conf_ptr,
H5C_auto_size_ctl_t * int_conf_ptr);
#ifdef H5_HAVE_PARALLEL
+static herr_t H5AC_broadcast_candidate_list(H5AC_t * cache_ptr,
+ int * num_entries_ptr,
+ haddr_t ** haddr_buf_ptr_ptr);
+
+static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr);
+
+static herr_t H5AC_construct_candidate_list(H5AC_t * cache_ptr,
+ H5AC_aux_t * aux_ptr,
+ int sync_point_op);
+
+static herr_t H5AC_copy_candidate_list_to_buffer(H5AC_t * cache_ptr,
+ int * num_entries_ptr,
+ haddr_t ** haddr_buf_ptr_ptr,
+ size_t * MPI_Offset_buf_size_ptr,
+ MPI_Offset ** MPI_Offset_buf_ptr_ptr);
+
+static herr_t H5AC_flush_entries(H5F_t *f);
+
static herr_t H5AC_log_deleted_entry(H5AC_t * cache_ptr,
H5AC_info_t * entry_ptr,
haddr_t addr,
@@ -147,33 +161,55 @@ static herr_t H5AC_log_flushed_entry(H5C_t * cache_ptr,
unsigned flags,
int type_id);
-#if 0 /* this is useful debugging code -- JRM */
-static herr_t H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr,
- haddr_t addr,
- hbool_t was_dirty,
- unsigned flags,
- int type_id);
-#endif /* JRM */
+static herr_t H5AC_log_moved_entry(const H5F_t * f,
+ haddr_t old_addr,
+ haddr_t new_addr);
static herr_t H5AC_log_inserted_entry(H5F_t * f,
H5AC_t * cache_ptr,
H5AC_info_t * entry_ptr);
+static herr_t H5AC_propagate_and_apply_candidate_list(H5F_t * f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr);
+
static herr_t H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f,
hid_t dxpl_id,
- H5AC_t * cache_ptr,
- hbool_t do_barrier);
+ H5AC_t * cache_ptr);
+
+static herr_t H5AC_receive_candidate_list(H5AC_t * cache_ptr,
+ int * num_entries_ptr,
+ haddr_t ** haddr_buf_ptr_ptr);
static herr_t H5AC_receive_and_apply_clean_list(H5F_t * f,
hid_t primary_dxpl_id,
hid_t secondary_dxpl_id,
H5AC_t * cache_ptr);
-static herr_t H5AC_log_moved_entry(const H5F_t * f,
- haddr_t old_addr,
- haddr_t new_addr);
+static herr_t H5AC_tidy_cache_0_lists(H5AC_t * cache_ptr,
+ int num_candidates,
+ haddr_t * candidates_list_ptr);
+
+herr_t H5AC_rsp__dist_md_write__flush(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr);
+
+herr_t H5AC_rsp__dist_md_write__flush_to_min_clean(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr);
+
+herr_t H5AC_rsp__p0_only__flush(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr);
+
+herr_t H5AC_rsp__p0_only__flush_to_min_clean(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr);
+
+static herr_t H5AC_run_sync_point(H5F_t *f,
+ hid_t dxpl_id,
+ int sync_point_op);
-static herr_t H5AC_flush_entries(H5F_t *f);
#endif /* H5_HAVE_PARALLEL */
@@ -377,26 +413,6 @@ H5AC_term_interface(void)
FUNC_LEAVE_NOAPI(n)
} /* end H5AC_term_interface() */
-
-/*-------------------------------------------------------------------------
- * Function: H5AC_create
- *
- * Purpose: Initialize the cache just after a file is opened. The
- * SIZE_HINT is the number of cache slots desired. If you
- * pass an invalid value then H5AC_NSLOTS is used. You can
- * turn off caching by using 1 for the SIZE_HINT value.
- *
- * Return: Success: Number of slots actually used.
- *
- * Failure: Negative
- *
- * Programmer: Robb Matzke
- * matzke@llnl.gov
- * Jul 9 1997
- *
- *-------------------------------------------------------------------------
- */
-
static const char * H5AC_entry_type_names[H5AC_NTYPES] =
{
"B-tree nodes",
@@ -428,19 +444,34 @@ static const char * H5AC_entry_type_names[H5AC_NTYPES] =
"test entry" /* for testing only -- not used for actual files */
};
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_create
+ *
+ * Purpose: Initialize the cache just after a file is opened. The
+ * SIZE_HINT is the number of cache slots desired. If you
+ * pass an invalid value then H5AC_NSLOTS is used. You can
+ * turn off caching by using 1 for the SIZE_HINT value.
+ *
+ * Return: Success: Number of slots actually used.
+ *
+ * Failure: Negative
+ *
+ * Programmer: Robb Matzke
+ * matzke@llnl.gov
+ * Jul 9 1997
+ *
+ *-------------------------------------------------------------------------
+ */
herr_t
H5AC_create(const H5F_t *f,
H5AC_cache_config_t *config_ptr)
{
- herr_t ret_value = SUCCEED; /* Return value */
- herr_t result;
#ifdef H5_HAVE_PARALLEL
char prefix[H5C__PREFIX_LEN] = "";
- MPI_Comm mpi_comm = MPI_COMM_NULL;
- int mpi_rank = -1;
- int mpi_size = -1;
H5AC_aux_t * aux_ptr = NULL;
#endif /* H5_HAVE_PARALLEL */
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(H5AC_create, FAIL)
@@ -450,97 +481,78 @@ H5AC_create(const H5F_t *f,
HDcompile_assert(NELMTS(H5AC_entry_type_names) == H5AC_NTYPES);
HDcompile_assert(H5C__MAX_NUM_TYPE_IDS == H5AC_NTYPES);
- result = H5AC_validate_config(config_ptr);
-
- if ( result != SUCCEED ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration");
- }
+ if(H5AC_validate_config(config_ptr) < 0)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration")
#ifdef H5_HAVE_PARALLEL
- if ( IS_H5FD_MPI(f) ) {
+ if(IS_H5FD_MPI(f)) {
+ MPI_Comm mpi_comm;
+ int mpi_rank;
+ int mpi_size;
- if ( (mpi_comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL ) {
-
- HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \
- "can't get MPI communicator")
- }
-
- if ( (mpi_rank = H5F_mpi_get_rank(f)) < 0 ) {
+ if(MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f)))
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI communicator")
+ if((mpi_rank = H5F_mpi_get_rank(f)) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi rank")
- }
-
- if ( (mpi_size = H5F_mpi_get_size(f)) < 0 ) {
+ if((mpi_size = H5F_mpi_get_size(f)) < 0)
HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi size")
- }
/* There is no point in setting up the auxilary structure if size
* is less than or equal to 1, as there will never be any processes
* to broadcast the clean lists to.
*/
- if ( mpi_size > 1 ) {
-
- if ( NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t)) ) {
-
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
- "Can't allocate H5AC auxilary structure.")
-
- } else {
-
- aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC;
- aux_ptr->mpi_comm = mpi_comm;
- aux_ptr->mpi_rank = mpi_rank;
- aux_ptr->mpi_size = mpi_size;
- aux_ptr->write_permitted = FALSE;
- aux_ptr->dirty_bytes_threshold =
- H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD;
- aux_ptr->dirty_bytes = 0;
+ if(mpi_size > 1) {
+ if(NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate H5AC auxilary structure.")
+
+ aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC;
+ aux_ptr->mpi_comm = mpi_comm;
+ aux_ptr->mpi_rank = mpi_rank;
+ aux_ptr->mpi_size = mpi_size;
+ aux_ptr->write_permitted = FALSE;
+ aux_ptr->dirty_bytes_threshold = H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD;
+ aux_ptr->dirty_bytes = 0;
+ aux_ptr->metadata_write_strategy = H5AC__DEFAULT_METADATA_WRITE_STRATEGY;
#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->dirty_bytes_propagations = 0;
- aux_ptr->unprotect_dirty_bytes = 0;
- aux_ptr->unprotect_dirty_bytes_updates = 0;
- aux_ptr->insert_dirty_bytes = 0;
- aux_ptr->insert_dirty_bytes_updates = 0;
- aux_ptr->move_dirty_bytes = 0;
- aux_ptr->move_dirty_bytes_updates = 0;
+ aux_ptr->dirty_bytes_propagations = 0;
+ aux_ptr->unprotect_dirty_bytes = 0;
+ aux_ptr->unprotect_dirty_bytes_updates = 0;
+ aux_ptr->insert_dirty_bytes = 0;
+ aux_ptr->insert_dirty_bytes_updates = 0;
+ aux_ptr->move_dirty_bytes = 0;
+ aux_ptr->move_dirty_bytes_updates = 0;
#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
- aux_ptr->d_slist_ptr = NULL;
- aux_ptr->d_slist_len = 0;
- aux_ptr->c_slist_ptr = NULL;
- aux_ptr->c_slist_len = 0;
- aux_ptr->write_done = NULL;
-
- sprintf(prefix, "%d:", mpi_rank);
- }
-
- if ( mpi_rank == 0 ) {
-
- aux_ptr->d_slist_ptr =
- H5SL_create(H5SL_TYPE_HADDR);
-
- if ( aux_ptr->d_slist_ptr == NULL ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL,
- "can't create dirtied entry list.")
- }
-
- aux_ptr->c_slist_ptr =
- H5SL_create(H5SL_TYPE_HADDR);
-
- if ( aux_ptr->c_slist_ptr == NULL ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL,
- "can't create cleaned entry list.")
- }
- }
- }
-
- if ( aux_ptr != NULL ) {
+ aux_ptr->d_slist_ptr = NULL;
+ aux_ptr->d_slist_len = 0;
+ aux_ptr->c_slist_ptr = NULL;
+ aux_ptr->c_slist_len = 0;
+ aux_ptr->candidate_slist_ptr = NULL;
+ aux_ptr->candidate_slist_len = 0;
+ aux_ptr->write_done = NULL;
+ aux_ptr->sync_point_done = NULL;
+
+ sprintf(prefix, "%d:", mpi_rank);
+
+ if(mpi_rank == 0) {
+ if(NULL == (aux_ptr->d_slist_ptr = H5SL_create(H5SL_TYPE_HADDR)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "can't create dirtied entry list.")
+
+ if(NULL == (aux_ptr->c_slist_ptr = H5SL_create(H5SL_TYPE_HADDR)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "can't create cleaned entry list.")
+ } /* end if */
- if ( aux_ptr->mpi_rank == 0 ) {
+ /* construct the candidate slist for all processes.
+ * when the distributed strategy is selected as all processes
+ * will use it in the case of a flush.
+ */
+ if(NULL == (aux_ptr->candidate_slist_ptr = H5SL_create(H5SL_TYPE_HADDR)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "can't create candidate entry list.")
+ } /* end if */
+ if(aux_ptr != NULL) {
+ if(aux_ptr->mpi_rank == 0) {
f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
H5AC__DEFAULT_MIN_CLEAN_SIZE,
(H5AC_NTYPES - 1),
@@ -549,25 +561,17 @@ H5AC_create(const H5F_t *f,
TRUE,
H5AC_log_flushed_entry,
(void *)aux_ptr);
-
} else {
-
f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
H5AC__DEFAULT_MIN_CLEAN_SIZE,
(H5AC_NTYPES - 1),
(const char **)H5AC_entry_type_names,
+ H5AC_check_if_write_permitted,
+ TRUE,
NULL,
- FALSE,
-#if 0 /* this is useful debugging code -- keep it for a while */ /* JRM */
- H5AC_log_flushed_entry_dummy,
-#else /* JRM */
- NULL,
-#endif /* JRM */
(void *)aux_ptr);
}
-
} else {
-
f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
H5AC__DEFAULT_MIN_CLEAN_SIZE,
(H5AC_NTYPES - 1),
@@ -595,61 +599,40 @@ H5AC_create(const H5F_t *f,
}
#endif /* H5_HAVE_PARALLEL */
- if ( NULL == f->shared->cache ) {
-
+ if(NULL == f->shared->cache)
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed")
- }
#ifdef H5_HAVE_PARALLEL
- else if ( aux_ptr != NULL ) {
-
- result = H5C_set_prefix(f->shared->cache, prefix);
-
- if ( result != SUCCEED ) {
-
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
- "H5C_set_prefix() failed")
- }
- }
+ if(aux_ptr != NULL) {
+ if(H5C_set_prefix(f->shared->cache, prefix) < 0)
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "H5C_set_prefix() failed")
+ } /* end if */
#endif /* H5_HAVE_PARALLEL */
- result = H5AC_set_cache_auto_resize_config(f->shared->cache, config_ptr);
-
- if ( result != SUCCEED ) {
-
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
- "auto resize configuration failed")
- }
+ if(H5AC_set_cache_auto_resize_config(f->shared->cache, config_ptr) < 0)
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "auto resize configuration failed")
done:
-
#ifdef H5_HAVE_PARALLEL
-
/* if there is a failure, try to tidy up the auxilary structure */
-
- if ( ret_value != SUCCEED ) {
-
- if ( aux_ptr != NULL ) {
-
- if ( aux_ptr->d_slist_ptr != NULL ) {
-
+ if(ret_value < 0) {
+ if(aux_ptr != NULL) {
+ if(aux_ptr->d_slist_ptr != NULL)
H5SL_close(aux_ptr->d_slist_ptr);
- }
-
- if ( aux_ptr->c_slist_ptr != NULL ) {
+ if(aux_ptr->c_slist_ptr != NULL)
H5SL_close(aux_ptr->c_slist_ptr);
- }
+
+ if(aux_ptr->candidate_slist_ptr != NULL)
+ H5SL_close(aux_ptr->candidate_slist_ptr);
aux_ptr->magic = 0;
- H5FL_FREE(H5AC_aux_t, aux_ptr);
- aux_ptr = NULL;
- }
- }
+ aux_ptr = H5FL_FREE(H5AC_aux_t, aux_ptr);
+ } /* end if */
+ } /* end if */
#endif /* H5_HAVE_PARALLEL */
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_create() */
@@ -693,7 +676,7 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id)
#endif /* H5AC__TRACE_FILE_ENABLED */
#ifdef H5_HAVE_PARALLEL
- aux_ptr = f->shared->cache->aux_ptr;
+ aux_ptr = (struct H5AC_aux_t *)(f->shared->cache->aux_ptr);
if(aux_ptr)
/* Sanity check */
HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
@@ -714,6 +697,8 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id)
H5SL_close(aux_ptr->d_slist_ptr);
if(aux_ptr->c_slist_ptr != NULL)
H5SL_close(aux_ptr->c_slist_ptr);
+ if(aux_ptr->candidate_slist_ptr != NULL)
+ H5SL_close(aux_ptr->candidate_slist_ptr);
aux_ptr->magic = 0;
H5FL_FREE(H5AC_aux_t, aux_ptr);
aux_ptr = NULL;
@@ -902,8 +887,6 @@ H5AC_get_entry_status(const H5F_t *f,
haddr_t addr,
unsigned * status_ptr)
{
- H5C_t *cache_ptr = f->shared->cache;
- herr_t result;
hbool_t in_cache;
hbool_t is_dirty;
hbool_t is_protected;
@@ -916,50 +899,31 @@ H5AC_get_entry_status(const H5F_t *f,
FUNC_ENTER_NOAPI(H5AC_get_entry_status, FAIL)
- if ( ( cache_ptr == NULL ) ||
- ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ||
- ( ! H5F_addr_defined(addr) ) ||
- ( status_ptr == NULL ) ) {
-
+ if((f == NULL) || (!H5F_addr_defined(addr)) || (status_ptr == NULL))
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad param(s) on entry.")
- }
-
- result = H5C_get_entry_status(f, addr, &entry_size, &in_cache,
- &is_dirty, &is_protected, &is_pinned, &is_flush_dep_parent,
- &is_flush_dep_child);
-
- if ( result < 0 ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "H5C_get_entry_status() failed.")
- }
- if ( in_cache ) {
+ if(H5C_get_entry_status(f, addr, &entry_size, &in_cache, &is_dirty,
+ &is_protected, &is_pinned, &is_flush_dep_parent, &is_flush_dep_child) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_get_entry_status() failed.")
+ if(in_cache) {
status |= H5AC_ES__IN_CACHE;
-
- if ( is_dirty )
+ if(is_dirty)
status |= H5AC_ES__IS_DIRTY;
-
- if ( is_protected )
+ if(is_protected)
status |= H5AC_ES__IS_PROTECTED;
-
- if ( is_pinned )
+ if(is_pinned)
status |= H5AC_ES__IS_PINNED;
-
- if ( is_flush_dep_parent )
+ if(is_flush_dep_parent)
status |= H5AC_ES__IS_FLUSH_DEP_PARENT;
-
- if ( is_flush_dep_child )
+ if(is_flush_dep_child)
status |= H5AC_ES__IS_FLUSH_DEP_CHILD;
- }
+ } /* end if */
*status_ptr = status;
done:
-
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_get_entry_status() */
@@ -982,9 +946,6 @@ herr_t
H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
void *thing, unsigned int flags)
{
-#ifdef H5_HAVE_PARALLEL
- H5AC_aux_t * aux_ptr = NULL;
-#endif /* H5_HAVE_PARALLEL */
#if H5AC__TRACE_FILE_ENABLED
char trace[128] = "";
size_t trace_entry_size = 0;
@@ -1040,26 +1001,20 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
#endif /* H5AC__TRACE_FILE_ENABLED */
#ifdef H5_HAVE_PARALLEL
- if(NULL != (aux_ptr = f->shared->cache->aux_ptr)) {
+{
+ H5AC_aux_t *aux_ptr;
+
+ if(NULL != (aux_ptr = (H5AC_aux_t *)f->shared->cache->aux_ptr)) {
+ /* Log the new entry */
if(H5AC_log_inserted_entry(f, f->shared->cache, (H5AC_info_t *)thing) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5AC_log_inserted_entry() failed")
/* Check if we should try to flush */
- if(aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold) {
- hbool_t evictions_enabled;
-
- /* Query if evictions are allowed */
- if(H5C_get_evictions_enabled((const H5C_t *)f->shared->cache, &evictions_enabled) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
-
- /* Flush if evictions are allowed */
- if(evictions_enabled) {
- if(H5AC_propagate_flushed_and_still_clean_entries_list(f,
- H5AC_noblock_dxpl_id, f->shared->cache, TRUE) < 0 )
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
- } /* end if */
- } /* end if */
+ if(aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)
+ if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
} /* end if */
+}
#endif /* H5_HAVE_PARALLEL */
done:
@@ -1108,7 +1063,7 @@ H5AC_mark_entry_dirty(void *thing)
* occult errors.
*/
if((H5C_get_trace_file_ptr_from_entry(thing, &trace_file_ptr) >= 0) &&
- (NULL != trace_file_ptr))
+ (NULL != trace_file_ptr))
sprintf(trace, "%s 0x%lx", FUNC,
(unsigned long)(((H5C_cache_entry_t *)thing)->addr));
#endif /* H5AC__TRACE_FILE_ENABLED */
@@ -1159,15 +1114,14 @@ done:
herr_t
H5AC_move_entry(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_addr)
{
- herr_t result;
- herr_t ret_value=SUCCEED; /* Return value */
-#ifdef H5_HAVE_PARALLEL
- H5AC_aux_t * aux_ptr = NULL;
-#endif /* H5_HAVE_PARALLEL */
#if H5AC__TRACE_FILE_ENABLED
char trace[128] = "";
FILE * trace_file_ptr = NULL;
#endif /* H5AC__TRACE_FILE_ENABLED */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr;
+#endif /* H5_HAVE_PARALLEL */
+ herr_t ret_value=SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(H5AC_move_entry, FAIL)
@@ -1197,52 +1151,31 @@ H5AC_move_entry(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t ne
#endif /* H5AC__TRACE_FILE_ENABLED */
#ifdef H5_HAVE_PARALLEL
- if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) {
+ /* Log moving the entry */
+ if(NULL != (aux_ptr = (H5AC_aux_t *)f->shared->cache->aux_ptr)) {
if(H5AC_log_moved_entry(f, old_addr, new_addr) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "can't log moved entry")
- }
+ } /* end if */
#endif /* H5_HAVE_PARALLEL */
- result = H5C_move_entry(f->shared->cache,
- type,
- old_addr,
- new_addr);
-
- if ( result < 0 ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, \
- "H5C_move_entry() failed.")
- }
+ if(H5C_move_entry(f->shared->cache, type, old_addr, new_addr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, "H5C_move_entry() failed.")
#ifdef H5_HAVE_PARALLEL
/* Check if we should try to flush */
- if(aux_ptr && (aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)) {
- hbool_t evictions_enabled;
-
- /* Query if evictions are allowed */
- if(H5C_get_evictions_enabled((const H5C_t *)f->shared->cache, &evictions_enabled) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
-
- /* Flush if evictions are allowed */
- if(evictions_enabled) {
- if(H5AC_propagate_flushed_and_still_clean_entries_list(f,
- H5AC_noblock_dxpl_id, f->shared->cache, TRUE) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
- } /* end if */
+ if(NULL != aux_ptr && aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold) {
+ if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
} /* end if */
#endif /* H5_HAVE_PARALLEL */
done:
-
#if H5AC__TRACE_FILE_ENABLED
- if ( trace_file_ptr != NULL ) {
-
+ if(trace_file_ptr != NULL)
HDfprintf(trace_file_ptr, "%s %d\n", trace, (int)ret_value);
- }
#endif /* H5AC__TRACE_FILE_ENABLED */
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_move_entry() */
@@ -1331,7 +1264,6 @@ H5AC_create_flush_dependency(void * parent_thing, void * child_thing)
FUNC,
(unsigned long)(((H5C_cache_entry_t *)parent_thing)->addr),
(unsigned long)(((H5C_cache_entry_t *)child_thing)->addr));
- } /* end if */
#endif /* H5AC__TRACE_FILE_ENABLED */
if(H5C_create_flush_dependency(parent_thing, child_thing) < 0)
@@ -1381,12 +1313,12 @@ H5AC_protect(H5F_t *f,
{
unsigned protect_flags = H5C__NO_FLAGS_SET;
void * thing = (void *)NULL;
- void * ret_value; /* Return value */
#if H5AC__TRACE_FILE_ENABLED
char trace[128] = "";
size_t trace_entry_size = 0;
FILE * trace_file_ptr = NULL;
#endif /* H5AC__TRACE_FILE_ENABLED */
+ void * ret_value; /* Return value */
FUNC_ENTER_NOAPI(H5AC_protect, NULL)
@@ -1415,7 +1347,7 @@ H5AC_protect(H5F_t *f,
( H5C_get_trace_file_ptr(f->shared->cache, &trace_file_ptr) >= 0) &&
( trace_file_ptr != NULL ) ) {
- char * rw_string;
+ const char * rw_string;
if ( rw == H5AC_WRITE ) {
@@ -1627,12 +1559,11 @@ H5AC_destroy_flush_dependency(void * parent_thing, void * child_thing)
#if H5AC__TRACE_FILE_ENABLED
if((H5C_get_trace_file_ptr_from_entry(parent_thing, &trace_file_ptr) >= 0) &&
- (NULL != trace_file_ptr))
- sprintf(trace, "%s %lx",
+ (NULL != trace_file_ptr))
+ sprintf(trace, "%s %llx %llx",
FUNC,
- (unsigned long)(((H5C_cache_entry_t *)parent_thing)->addr),
- (unsigned long)(((H5C_cache_entry_t *)child_thing)->addr));
- } /* end if */
+ (unsigned long long)(((H5C_cache_entry_t *)parent_thing)->addr),
+ (unsigned long long)(((H5C_cache_entry_t *)child_thing)->addr));
#endif /* H5AC__TRACE_FILE_ENABLED */
if(H5C_destroy_flush_dependency(parent_thing, child_thing) < 0)
@@ -1640,7 +1571,7 @@ H5AC_destroy_flush_dependency(void * parent_thing, void * child_thing)
done:
#if H5AC__TRACE_FILE_ENABLED
- if( trace_file_ptr != NULL )
+ if(trace_file_ptr != NULL)
HDfprintf(trace_file_ptr, "%s %d\n", trace, (int)ret_value);
#endif /* H5AC__TRACE_FILE_ENABLED */
@@ -1690,7 +1621,6 @@ herr_t
H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
void *thing, unsigned flags)
{
- herr_t result;
hbool_t dirtied;
hbool_t deleted;
#ifdef H5_HAVE_PARALLEL
@@ -1740,93 +1670,92 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
/* Check if the size changed out from underneath us, if we're not deleting
* the entry.
*/
- if ( dirtied && !deleted ) {
+ if(dirtied && !deleted) {
size_t curr_size = 0;
- if ( (type->size)(f, thing, &curr_size) < 0 ) {
-
- HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \
- "Can't get size of thing")
- }
+ if((type->size)(f, thing, &curr_size) < 0)
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, "Can't get size of thing")
if(((H5AC_info_t *)thing)->size != curr_size)
HGOTO_ERROR(H5E_CACHE, H5E_BADSIZE, FAIL, "size of entry changed")
- }
+ } /* end if */
#ifdef H5_HAVE_PARALLEL
- if ( ( dirtied ) && ( ((H5AC_info_t *)thing)->is_dirty == FALSE ) &&
- ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) ) {
+ if((dirtied) && (((H5AC_info_t *)thing)->is_dirty == FALSE) &&
+ (NULL != (aux_ptr = (H5AC_aux_t *)f->shared->cache->aux_ptr))) {
if(H5AC_log_dirtied_entry((H5AC_info_t *)thing, addr) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "can't log dirtied entry")
- }
-
- if ( ( (flags & H5C__DELETED_FLAG) != 0 ) &&
- ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) &&
- ( aux_ptr->mpi_rank == 0 ) ) {
-
- result = H5AC_log_deleted_entry(f->shared->cache,
- (H5AC_info_t *)thing,
- addr,
- flags);
-
- if ( result < 0 ) {
+ } /* end if */
- HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
- "H5AC_log_deleted_entry() failed.")
- }
- }
+ if((deleted) &&
+ (NULL != (aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr))) &&
+ (aux_ptr->mpi_rank == 0)) {
+ if(H5AC_log_deleted_entry(f->shared->cache, (H5AC_info_t *)thing, addr, flags) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5AC_log_deleted_entry() failed.")
+ } /* end if */
#endif /* H5_HAVE_PARALLEL */
- result = H5C_unprotect(f,
- dxpl_id,
- H5AC_noblock_dxpl_id,
- type,
- addr,
- thing,
- flags);
-
- if ( result < 0 ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
- "H5C_unprotect() failed.")
- }
+ if(H5C_unprotect(f, dxpl_id, H5AC_noblock_dxpl_id, type, addr, thing, flags) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5C_unprotect() failed.")
#ifdef H5_HAVE_PARALLEL
/* Check if we should try to flush */
- if(aux_ptr && (aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)) {
- hbool_t evictions_enabled;
-
- /* Query if evictions are allowed */
- if(H5C_get_evictions_enabled((const H5C_t *)f->shared->cache, &evictions_enabled) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
-
- /* Flush if evictions are allowed */
- if(evictions_enabled) {
- if(H5AC_propagate_flushed_and_still_clean_entries_list(f,
- H5AC_noblock_dxpl_id, f->shared->cache, TRUE) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
- } /* end if */
+ if((aux_ptr != NULL) && (aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)) {
+ if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
} /* end if */
#endif /* H5_HAVE_PARALLEL */
done:
-
#if H5AC__TRACE_FILE_ENABLED
- if ( trace_file_ptr != NULL ) {
-
+ if(trace_file_ptr != NULL)
HDfprintf(trace_file_ptr, "%s %x %d\n",
- trace,
- (unsigned)flags,
- (int)ret_value);
- }
+ trace, (unsigned)flags, (int)ret_value);
#endif /* H5AC__TRACE_FILE_ENABLED */
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_unprotect() */
/*-------------------------------------------------------------------------
+ * Function: HA5C_set_sync_point_done_callback
+ *
+ * Purpose: Set the value of the sync_point_done callback. This
+ * callback is used by the parallel test code to verify
+ * that the expected writes and only the expected writes
+ * take place during a sync point.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 5/9/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_set_sync_point_done_callback(H5C_t * cache_ptr,
+ void (* sync_point_done)(int num_writes, haddr_t * written_entries_tbl))
+{
+ H5AC_aux_t * aux_ptr;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5AC_set_sync_point_done_callback)
+
+ HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC));
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ aux_ptr->sync_point_done = sync_point_done;
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5AC_set_sync_point_done_callback() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
* Function: HA5C_set_write_done_callback
*
* Purpose: Set the value of the write_done callback. This callback
@@ -1845,29 +1774,20 @@ herr_t
H5AC_set_write_done_callback(H5C_t * cache_ptr,
void (* write_done)(void))
{
- herr_t ret_value = SUCCEED; /* Return value */
- H5AC_aux_t * aux_ptr = NULL;
+ H5AC_aux_t * aux_ptr;
- FUNC_ENTER_NOAPI(H5AC_set_write_done_callback, FAIL)
+ FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5AC_set_write_done_callback)
- /* This would normally be an assert, but we need to use an HGOTO_ERROR
- * call to shut up the compiler.
- */
- if ( ( ! cache_ptr ) || ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ) {
+ HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC));
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad cache_ptr")
- }
-
- aux_ptr = cache_ptr->aux_ptr;
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
HDassert( aux_ptr != NULL );
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
aux_ptr->write_done = write_done;
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-
+ FUNC_LEAVE_NOAPI(SUCCEED)
} /* H5AC_set_write_done_callback() */
#endif /* H5_HAVE_PARALLEL */
@@ -1900,7 +1820,6 @@ H5AC_stats(const H5F_t *f)
done:
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_stats() */
@@ -1958,13 +1877,8 @@ H5AC_get_cache_auto_resize_config(const H5AC_t * cache_ptr,
"H5C_get_cache_auto_resize_config() failed.")
}
- result = H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled);
-
- if ( result < 0 ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "H5C_get_resize_enabled() failed.")
- }
+ if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_get_resize_enabled() failed.")
if ( internal_config.rpt_fcn == NULL ) {
@@ -2008,11 +1922,16 @@ H5AC_get_cache_auto_resize_config(const H5AC_t * cache_ptr,
config_ptr->dirty_bytes_threshold =
((H5AC_aux_t *)(cache_ptr->aux_ptr))->dirty_bytes_threshold;
+ config_ptr->metadata_write_strategy =
+ ((H5AC_aux_t *)(cache_ptr->aux_ptr))->metadata_write_strategy;
} else {
#endif /* H5_HAVE_PARALLEL */
- config_ptr->dirty_bytes_threshold = H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD;
+ config_ptr->dirty_bytes_threshold =
+ H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD;
+ config_ptr->metadata_write_strategy =
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY;
#ifdef H5_HAVE_PARALLEL
}
@@ -2211,24 +2130,6 @@ H5AC_set_cache_auto_resize_config(H5AC_t *cache_ptr,
}
}
- if (
- (
- config_ptr->dirty_bytes_threshold
- <
- H5AC__MIN_DIRTY_BYTES_THRESHOLD
- )
- ||
- (
- config_ptr->dirty_bytes_threshold
- >
- H5AC__MAX_DIRTY_BYTES_THRESHOLD
- )
- ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
- "config_ptr->dirty_bytes_threshold out of range.")
- }
-
if ( config_ptr->close_trace_file ) {
if ( H5AC_close_trace_file(cache_ptr) < 0 ) {
@@ -2262,6 +2163,9 @@ H5AC_set_cache_auto_resize_config(H5AC_t *cache_ptr,
((H5AC_aux_t *)(cache_ptr->aux_ptr))->dirty_bytes_threshold =
config_ptr->dirty_bytes_threshold;
+
+ ((H5AC_aux_t *)(cache_ptr->aux_ptr))->metadata_write_strategy =
+ config_ptr->metadata_write_strategy;
}
#endif /* H5_HAVE_PARALLEL */
@@ -2277,7 +2181,7 @@ done:
( trace_file_ptr != NULL ) ) {
HDfprintf(trace_file_ptr,
- "%s %d %d %d %d \"%s\" %d %d %d %f %d %d %ld %d %f %f %d %f %f %d %d %d %f %f %d %d %d %d %f %d %d\n",
+ "%s %d %d %d %d \"%s\" %d %d %d %f %d %d %ld %d %f %f %d %f %f %d %d %d %f %f %d %d %d %d %f %d %d %d\n",
"H5AC_set_cache_auto_resize_config",
trace_config.version,
(int)(trace_config.rpt_fcn_enabled),
@@ -2308,6 +2212,7 @@ done:
(int)(trace_config.apply_empty_reserve),
trace_config.empty_reserve,
trace_config.dirty_bytes_threshold,
+ trace_config.metadata_write_strategy,
(int)ret_value);
}
#endif /* H5AC__TRACE_FILE_ENABLED */
@@ -2342,45 +2247,28 @@ done:
herr_t
H5AC_validate_config(H5AC_cache_config_t * config_ptr)
{
- herr_t result;
H5C_auto_size_ctl_t internal_config;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(H5AC_validate_config, FAIL)
- if ( config_ptr == NULL ) {
-
+ if(config_ptr == NULL)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "NULL config_ptr on entry.")
- }
-
- if ( config_ptr->version != H5AC__CURR_CACHE_CONFIG_VERSION ) {
+ if(config_ptr->version != H5AC__CURR_CACHE_CONFIG_VERSION)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown config version.")
- }
- if ( ( config_ptr->rpt_fcn_enabled != TRUE ) &&
- ( config_ptr->rpt_fcn_enabled != FALSE ) ) {
+ if((config_ptr->rpt_fcn_enabled != TRUE) && (config_ptr->rpt_fcn_enabled != FALSE))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->rpt_fcn_enabled must be either TRUE or FALSE.")
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \
- "config_ptr->rpt_fcn_enabled must be either TRUE or FALSE.")
- }
-
- if ( ( config_ptr->open_trace_file != TRUE ) &&
- ( config_ptr->open_trace_file != FALSE ) ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \
- "config_ptr->open_trace_file must be either TRUE or FALSE.")
- }
+ if((config_ptr->open_trace_file != TRUE) && (config_ptr->open_trace_file != FALSE))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->open_trace_file must be either TRUE or FALSE.")
- if ( ( config_ptr->close_trace_file != TRUE ) &&
- ( config_ptr->close_trace_file != FALSE ) ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \
- "config_ptr->close_trace_file must be either TRUE or FALSE.")
- }
+ if((config_ptr->close_trace_file != TRUE) && (config_ptr->close_trace_file != FALSE))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->close_trace_file must be either TRUE or FALSE.")
/* don't bother to test trace_file_name unless open_trace_file is TRUE */
- if ( config_ptr->open_trace_file ) {
+ if(config_ptr->open_trace_file) {
size_t name_len;
/* Can't really test the trace_file_name field without trying to
@@ -2389,15 +2277,10 @@ H5AC_validate_config(H5AC_cache_config_t * config_ptr)
*/
name_len = HDstrlen(config_ptr->trace_file_name);
- if ( name_len == 0 ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \
- "config_ptr->trace_file_name is empty.")
-
- } else if ( name_len > H5AC__MAX_TRACE_FILE_NAME_LEN ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \
- "config_ptr->trace_file_name too long.")
+ if(name_len == 0) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->trace_file_name is empty.")
+ } else if(name_len > H5AC__MAX_TRACE_FILE_NAME_LEN) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->trace_file_name too long.")
}
}
@@ -2417,36 +2300,24 @@ H5AC_validate_config(H5AC_cache_config_t * config_ptr)
"Can't disable evictions while auto-resize is enabled.")
}
- if ( config_ptr->dirty_bytes_threshold < H5AC__MIN_DIRTY_BYTES_THRESHOLD ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
- "dirty_bytes_threshold too small.")
- } else
- if ( config_ptr->dirty_bytes_threshold > H5AC__MAX_DIRTY_BYTES_THRESHOLD ) {
-
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
- "dirty_bytes_threshold too big.")
- }
-
- if ( H5AC_ext_config_2_int_config(config_ptr, &internal_config) !=
- SUCCEED ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "H5AC_ext_config_2_int_config() failed.")
+ if(config_ptr->dirty_bytes_threshold < H5AC__MIN_DIRTY_BYTES_THRESHOLD) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "dirty_bytes_threshold too small.")
+ } else if(config_ptr->dirty_bytes_threshold > H5AC__MAX_DIRTY_BYTES_THRESHOLD) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "dirty_bytes_threshold too big.")
}
- result = H5C_validate_resize_config(&internal_config,
- H5C_RESIZE_CFG__VALIDATE_ALL);
+ if((config_ptr->metadata_write_strategy != H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY) &&
+ (config_ptr->metadata_write_strategy != H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->metadata_write_strategy out of range.")
- if ( result != SUCCEED ) {
+ if(H5AC_ext_config_2_int_config(config_ptr, &internal_config) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_ext_config_2_int_config() failed.")
+ if(H5C_validate_resize_config(&internal_config, H5C_RESIZE_CFG__VALIDATE_ALL) < 0)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "error(s) in new config.")
- }
done:
-
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_validate_config() */
@@ -2619,12 +2490,192 @@ done:
} /* H5AC_open_trace_file() */
+/*-------------------------------------------------------------------------
+ * Function: H5AC_add_candidate()
+ *
+ * Purpose: Add the supplied metadata entry address to the candidate
+ * list. Verify that each entry added does not appear in
+ * the list prior to its insertion.
+ *
+ * This function is intended for used in constructing list
+ * of entried to be flushed during sync points. It shouldn't
+ * be called anywhere else.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_add_candidate(H5AC_t * cache_ptr,
+ haddr_t addr)
+{
+ H5AC_aux_t * aux_ptr;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_add_candidate, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+ HDassert( aux_ptr->candidate_slist_ptr != NULL );
+
+ /* If the supplied address appears in the candidate list, scream and die. */
+ if(NULL != H5SL_search(aux_ptr->candidate_slist_ptr, (void *)(&addr)))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry already in candidate slist.")
+
+ /* otherwise, construct an entry for the supplied address, and insert
+ * it into the candidate slist.
+ */
+ if(NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "Can't allocate candidate slist entry .")
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = addr;
+
+ if(H5SL_insert(aux_ptr->candidate_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
+
+ aux_ptr->candidate_slist_len += 1;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_add_candidate() */
+#endif /* H5_HAVE_PARALLEL */
+
+
/*************************************************************************/
/**************************** Private Functions: *************************/
/*************************************************************************/
/*-------------------------------------------------------------------------
*
+ * Function: H5AC_broadcast_candidate_list()
+ *
+ * Purpose: Broadcast the contents of the process 0 candidate entry
+ * slist. In passing, also remove all entries from said
+ * list. As the application of this will be handled by
+ * the same functions on all processes, construct and
+ * return a copy of the list in the same format as that
+ * received by the other processes. Note that if this
+ * copy is returned in *haddr_buf_ptr_ptr, the caller
+ * must free it.
+ *
+ * This function must only be called by the process with
+ * MPI_rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/1/05
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_broadcast_candidate_list(H5AC_t * cache_ptr,
+ int * num_entries_ptr,
+ haddr_t ** haddr_buf_ptr_ptr)
+{
+ herr_t result;
+ hbool_t success = FALSE;
+ H5AC_aux_t * aux_ptr = NULL;
+ haddr_t * haddr_buf_ptr = NULL;
+ MPI_Offset * MPI_Offset_buf_ptr = NULL;
+ size_t buf_size = 0;
+ int mpi_result;
+ int chk_num_entries = 0;
+ int num_entries = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_broadcast_candidate_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank == 0 );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+ HDassert( aux_ptr->candidate_slist_ptr != NULL );
+ HDassert( H5SL_count(aux_ptr->candidate_slist_ptr) ==
+ (size_t)(aux_ptr->candidate_slist_len) );
+ HDassert( num_entries_ptr != NULL );
+ HDassert( *num_entries_ptr == 0 );
+ HDassert( haddr_buf_ptr_ptr != NULL );
+ HDassert( *haddr_buf_ptr_ptr == NULL );
+
+ /* First broadcast the number of entries in the list so that the
+ * receivers can set up buffers to receive them. If there aren't
+ * any, we are done.
+ */
+ num_entries = aux_ptr->candidate_slist_len;
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result)
+
+ if(num_entries > 0) {
+ /* convert the candidate list into the format we
+ * are used to receiving from process 0, and also load it
+ * into a buffer for transmission.
+ */
+ if(H5AC_copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries,
+ &haddr_buf_ptr, &buf_size, &MPI_Offset_buf_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
+
+ HDassert( chk_num_entries == num_entries );
+ HDassert( haddr_buf_ptr != NULL );
+ HDassert( MPI_Offset_buf_ptr != NULL );
+ HDassert( aux_ptr->candidate_slist_len == 0 );
+
+ /* Now broadcast the list of candidate entries -- if there is one.
+ *
+ * The peculiar structure of the following call to MPI_Bcast is
+ * due to MPI's (?) failure to believe in the MPI_Offset type.
+ * Thus the element type is MPI_BYTE, with size equal to the
+ * buf_size computed above.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
+ } /* end if */
+
+ success = TRUE;
+
+done:
+ if(MPI_Offset_buf_ptr != NULL)
+ MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr);
+
+ if(success) {
+ /* Pass the number of entries and the buffer pointer
+ * back to the caller. Do this so that we can use the same code
+ * to apply the candidate list to all the processes.
+ */
+ *num_entries_ptr = num_entries;
+ *haddr_buf_ptr_ptr = haddr_buf_ptr;
+ } else if(haddr_buf_ptr != NULL) {
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_broadcast_candidate_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
* Function: H5AC_broadcast_clean_list()
*
* Purpose: Broadcast the contents of the process 0 cleaned entry
@@ -2649,6 +2700,7 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
{
herr_t ret_value = SUCCEED; /* Return value */
haddr_t addr;
+ haddr_t * addr_buf_ptr = NULL;
H5AC_aux_t * aux_ptr = NULL;
H5SL_node_t * slist_node_ptr = NULL;
H5AC_slist_entry_t * slist_entry_ptr = NULL;
@@ -2656,14 +2708,14 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
size_t buf_size;
int i = 0;
int mpi_result;
- int num_entries;
+ int num_entries = 0;
FUNC_ENTER_NOAPI(H5AC_broadcast_clean_list, FAIL)
HDassert( cache_ptr != NULL );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+ aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr;
HDassert( aux_ptr != NULL );
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
@@ -2701,13 +2753,28 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
"memory allocation failed for clean entry buffer")
}
+ /* if the sync_point_done callback is defined, allocate the
+ * addr buffer as well.
+ */
+ if ( aux_ptr->sync_point_done != NULL ) {
+
+ addr_buf_ptr = H5MM_malloc((size_t)(num_entries * sizeof(haddr_t)));
+
+ if ( addr_buf_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for addr buffer")
+ }
+ }
+
+
/* now load the entry base addresses into the buffer, emptying the
* cleaned entry list in passing
*/
while ( NULL != (slist_node_ptr = H5SL_first(aux_ptr->c_slist_ptr) ) )
{
- slist_entry_ptr = H5SL_item(slist_node_ptr);
+ slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_item(slist_node_ptr);
HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
@@ -2715,6 +2782,11 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
addr = slist_entry_ptr->addr;
+ if ( addr_buf_ptr != NULL ) {
+
+ addr_buf_ptr[i] = addr;
+ }
+
if ( H5FD_mpi_haddr_to_MPIOff(addr, &(buf_ptr[i])) < 0 ) {
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \
@@ -2742,19 +2814,12 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
/* and also remove the matching entry from the dirtied list
* if it exists.
*/
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
- (void *)(&addr))) != NULL ) {
-
- HDassert( slist_entry_ptr->magic ==
- H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ if((slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) != NULL) {
+ HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
HDassert( slist_entry_ptr->addr == addr );
- if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
- != slist_entry_ptr ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
- "Can't delete entry from dirty entry slist.")
- }
+ if(H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) != slist_entry_ptr)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from dirty entry slist.")
slist_entry_ptr->magic = 0;
H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
@@ -2763,8 +2828,7 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
aux_ptr->d_slist_len -= 1;
HDassert( aux_ptr->d_slist_len >= 0 );
- }
-
+ } /* end if */
} /* while */
@@ -2785,15 +2849,16 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
}
}
-done:
-
- if ( buf_ptr != NULL ) {
+ if(aux_ptr->sync_point_done != NULL)
+ (aux_ptr->sync_point_done)(num_entries, addr_buf_ptr);
+done:
+ if(buf_ptr != NULL)
buf_ptr = (MPI_Offset *)H5MM_xfree((void *)buf_ptr);
- }
+ if(addr_buf_ptr != NULL)
+ addr_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)addr_buf_ptr);
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_broadcast_clean_list() */
#endif /* H5_HAVE_PARALLEL */
@@ -2849,7 +2914,9 @@ H5AC_check_if_write_permitted(const H5F_t UNUSED * f,
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
- if ( aux_ptr->mpi_rank == 0 ) {
+ if ( ( aux_ptr->mpi_rank == 0 ) ||
+ ( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ) ) {
write_permitted = aux_ptr->write_permitted;
@@ -2870,6 +2937,222 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5AC_construct_candidate_list()
+ *
+ * Purpose: In the parallel case when the metadata_write_strategy is
+ * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses
+ * this function to construct the list of cache entries to
+ * be flushed. This list is then propagated to the other
+ * caches, and then flushed in a distributed fashion.
+ *
+ * The sync_point_op parameter is used to determine the extent
+ * of the flush.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_construct_candidate_list(H5AC_t * cache_ptr,
+ H5AC_aux_t * aux_ptr,
+ int sync_point_op)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_construct_candidate_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+ HDassert( ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE ) ||
+ ( aux_ptr->mpi_rank == 0 ) );
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_len == 0 );
+ HDassert( aux_ptr->candidate_slist_ptr != NULL );
+ HDassert( aux_ptr->candidate_slist_len == 0 );
+ HDassert( ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN ) ||
+ ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE ) );
+
+ switch(sync_point_op) {
+ case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
+ if(H5C_construct_candidate_list__min_clean((H5C_t *)cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__min_clean() failed.")
+ break;
+
+ case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
+ if(H5C_construct_candidate_list__clean_cache((H5C_t *)cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__clean_cache() failed.")
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown sync point operation.")
+ break;
+ } /* end switch */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_construct_candidate_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_copy_candidate_list_to_buffer
+ *
+ * Purpose: Allocate buffer(s) and copy the contents of the candidate
+ * entry slist into it (them). In passing, remove all
+ * entries from the candidate slist. Note that the
+ * candidate slist must not be empty.
+ *
+ * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer
+ * of MPI_Offset, copy the contents of the candidate
+ * entry list into it with the appropriate conversions,
+ * and return the base address of the buffer in
+ * *MPI_Offset_buf_ptr. Note that this is the buffer
+ * used by process 0 to transmit the list of entries to
+ * be flushed to all other processes (in this file group).
+ *
+ * Similarly, allocate a buffer of haddr_t, load the contents
+ * of the candidate list into this buffer, and return its
+ * base address in *haddr_buf_ptr_ptr. Note that this
+ * latter buffer is constructed unconditionally.
+ *
+ * In passing, also remove all entries from the candidate
+ * entry slist.
+ *
+ * Return: Return SUCCEED on success, and FAIL on failure.
+ *
+ * Programmer: John Mainzer, 4/19/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_copy_candidate_list_to_buffer(H5AC_t * cache_ptr,
+ int * num_entries_ptr,
+ haddr_t ** haddr_buf_ptr_ptr,
+ size_t * MPI_Offset_buf_size_ptr,
+ MPI_Offset ** MPI_Offset_buf_ptr_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ hbool_t success = FALSE;
+ haddr_t addr;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5SL_node_t * slist_node_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ MPI_Offset * MPI_Offset_buf_ptr = NULL;
+ haddr_t * haddr_buf_ptr = NULL;
+ size_t buf_size;
+ int i = 0;
+ int num_entries = 0;
+
+ FUNC_ENTER_NOAPI(H5AC_copy_candidate_list_to_buffer, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+ HDassert( aux_ptr->candidate_slist_ptr != NULL );
+ HDassert( H5SL_count(aux_ptr->candidate_slist_ptr) ==
+ (size_t)(aux_ptr->candidate_slist_len) );
+ HDassert( aux_ptr->candidate_slist_len > 0 );
+ HDassert( num_entries_ptr != NULL );
+ HDassert( *num_entries_ptr == 0 );
+ HDassert( haddr_buf_ptr_ptr != NULL );
+ HDassert( *haddr_buf_ptr_ptr == NULL );
+
+ num_entries = aux_ptr->candidate_slist_len;
+
+ /* allocate a buffer(s) to store the list of candidate entry
+ * base addresses in
+ */
+ if(MPI_Offset_buf_ptr_ptr != NULL) {
+ HDassert( MPI_Offset_buf_size_ptr != NULL );
+
+ /* allocate a buffer of MPI_Offset */
+ buf_size = sizeof(MPI_Offset) * (size_t)num_entries;
+ if(NULL == (MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for MPI_Offset buffer")
+ } /* end if */
+
+ /* allocate a buffer of haddr_t */
+ if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)num_entries)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for haddr buffer")
+
+ /* now load the entry base addresses into the buffer, emptying the
+ * candidate entry list in passing
+ */
+ while(NULL != (slist_node_ptr = H5SL_first(aux_ptr->candidate_slist_ptr))) {
+ slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_item(slist_node_ptr);
+
+ HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert( i < num_entries );
+
+ addr = slist_entry_ptr->addr;
+ haddr_buf_ptr[i] = addr;
+ if(MPI_Offset_buf_ptr != NULL) {
+ if(H5FD_mpi_haddr_to_MPIOff(addr, &(MPI_Offset_buf_ptr[i])) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
+ } /* end if */
+
+ i++;
+
+ /* now remove the entry from the cleaned entry list */
+ if(H5SL_remove(aux_ptr->candidate_slist_ptr, (void *)(&addr)) != slist_entry_ptr)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from candidate entry slist.")
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->candidate_slist_len -= 1;
+
+ HDassert( aux_ptr->candidate_slist_len >= 0 );
+ } /* while */
+ HDassert( aux_ptr->candidate_slist_len == 0 );
+
+ success = TRUE;
+
+done:
+ if(success) {
+ /* Pass the number of entries and the buffer pointer
+ * back to the caller.
+ */
+ *num_entries_ptr = num_entries;
+ *haddr_buf_ptr_ptr = haddr_buf_ptr;
+
+ if(MPI_Offset_buf_ptr_ptr != NULL) {
+ HDassert( MPI_Offset_buf_ptr != NULL);
+ *MPI_Offset_buf_size_ptr = buf_size;
+ *MPI_Offset_buf_ptr_ptr = MPI_Offset_buf_ptr;
+ } /* end if */
+ } /* end if */
+ else {
+ if(MPI_Offset_buf_ptr != NULL)
+ MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr);
+ if(haddr_buf_ptr != NULL)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+ } /* end else */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_copy_candidate_list_to_buffer() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
* Function: H5AC_ext_config_2_int_config()
*
* Purpose: Utility function to translate an instance of
@@ -2968,16 +3251,16 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr,
haddr_t addr,
unsigned int flags)
{
- herr_t ret_value = SUCCEED; /* Return value */
- H5AC_aux_t * aux_ptr = NULL;
+ H5AC_aux_t * aux_ptr;
H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(H5AC_log_deleted_entry, FAIL)
HDassert( cache_ptr != NULL );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- aux_ptr = cache_ptr->aux_ptr;
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
HDassert( aux_ptr != NULL );
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
@@ -2987,25 +3270,17 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr,
HDassert( (flags & H5C__DELETED_FLAG) != 0 );
- if ( aux_ptr->mpi_rank == 0 ) {
-
+ if(aux_ptr->mpi_rank == 0) {
HDassert( aux_ptr->d_slist_ptr != NULL );
HDassert( aux_ptr->c_slist_ptr != NULL );
/* if the entry appears in the dirtied entry slist, remove it. */
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
- (void *)(&addr))) != NULL ) {
-
- HDassert( slist_entry_ptr->magic ==
- H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
- HDassert( slist_entry_ptr->addr == addr );
-
- if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
- != slist_entry_ptr ) {
+ if((slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) != NULL) {
+ HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert(slist_entry_ptr->addr == addr);
- HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
- "Can't delete entry from dirty entry slist.")
- }
+ if(H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) != slist_entry_ptr)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from dirty entry slist.")
slist_entry_ptr->magic = 0;
H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
@@ -3014,22 +3289,15 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr,
aux_ptr->d_slist_len -= 1;
HDassert( aux_ptr->d_slist_len >= 0 );
- }
+ } /* end if */
/* if the entry appears in the cleaned entry slist, remove it. */
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
- (void *)(&addr))) != NULL ) {
-
- HDassert( slist_entry_ptr->magic ==
- H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
- HDassert( slist_entry_ptr->addr == addr );
-
- if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
- != slist_entry_ptr ) {
+ if((slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) != NULL) {
+ HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert(slist_entry_ptr->addr == addr);
- HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
- "Can't delete entry from cleaned entry slist.")
- }
+ if(H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) != slist_entry_ptr)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from cleaned entry slist.")
slist_entry_ptr->magic = 0;
H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
@@ -3038,13 +3306,11 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr,
aux_ptr->c_slist_len -= 1;
HDassert( aux_ptr->c_slist_len >= 0 );
- }
- }
+ } /* end if */
+ } /* if */
done:
-
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_log_deleted_entry() */
#endif /* H5_HAVE_PARALLEL */
@@ -3092,7 +3358,7 @@ H5AC_log_dirtied_entry(const H5AC_info_t * entry_ptr,
HDassert( cache_ptr != NULL );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- aux_ptr = cache_ptr->aux_ptr;
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
HDassert( aux_ptr != NULL );
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
@@ -3132,24 +3398,16 @@ H5AC_log_dirtied_entry(const H5AC_info_t * entry_ptr,
#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
}
- if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) {
-
+ if(H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL) {
/* the entry is dirty. If it exists on the cleaned entries list,
* remove it.
*/
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
- (void *)(&addr))) != NULL ) {
-
- HDassert( slist_entry_ptr->magic ==
- H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
- HDassert( slist_entry_ptr->addr == addr );
+ if((slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) != NULL) {
+ HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert(slist_entry_ptr->addr == addr);
- if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
- != slist_entry_ptr ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
- "Can't delete entry from clean entry slist.")
- }
+ if(H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) != slist_entry_ptr)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from clean entry slist.")
slist_entry_ptr->magic = 0;
H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
@@ -3158,8 +3416,8 @@ H5AC_log_dirtied_entry(const H5AC_info_t * entry_ptr,
aux_ptr->c_slist_len -= 1;
HDassert( aux_ptr->c_slist_len >= 0 );
- }
- }
+ } /* end if */
+ } /* end if */
} else {
aux_ptr->dirty_bytes += entry_ptr->size;
@@ -3199,34 +3457,6 @@ done:
*-------------------------------------------------------------------------
*/
#ifdef H5_HAVE_PARALLEL
-#if 0 /* This is useful debugging code. -- JRM */
-static herr_t
-H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr,
- haddr_t addr,
- hbool_t was_dirty,
- unsigned flags,
- int type_id)
-{
- herr_t ret_value = SUCCEED; /* Return value */
- H5AC_aux_t * aux_ptr = NULL;
-
- FUNC_ENTER_NOAPI(H5AC_log_flushed_entry_dummy, FAIL)
-
- aux_ptr = cache_ptr->aux_ptr;
-
- if ( ( was_dirty ) && ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) == 0 ) ) {
-
- HDfprintf(stdout,
- "%d:H5AC_log_flushed_entry(): addr = %d, flags = %x, was_dirty = %d, type_id = %d\n",
- (int)(aux_ptr->mpi_rank), (int)addr, flags, (int)was_dirty, type_id);
- }
-done:
-
- FUNC_LEAVE_NOAPI(ret_value)
-
-} /* H5AC_log_flushed_entry_dummy() */
-#endif /* JRM */
-
static herr_t
H5AC_log_flushed_entry(H5C_t * cache_ptr,
haddr_t addr,
@@ -3245,7 +3475,7 @@ H5AC_log_flushed_entry(H5C_t * cache_ptr,
HDassert( cache_ptr != NULL );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- aux_ptr = cache_ptr->aux_ptr;
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
HDassert( aux_ptr != NULL );
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
@@ -3260,7 +3490,8 @@ H5AC_log_flushed_entry(H5C_t * cache_ptr,
* cleaned list and the dirtied list.
*/
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ if ( (slist_entry_ptr = (H5AC_slist_entry_t *)
+ H5SL_search(aux_ptr->c_slist_ptr,
(void *)(&addr))) != NULL ) {
HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
@@ -3282,8 +3513,8 @@ H5AC_log_flushed_entry(H5C_t * cache_ptr,
HDassert( aux_ptr->c_slist_len >= 0 );
}
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
- (void *)(&addr))) != NULL ) {
+ if ( (slist_entry_ptr = (H5AC_slist_entry_t *)
+ H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) != NULL ) {
HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
HDassert( slist_entry_ptr->addr == addr );
@@ -3363,62 +3594,47 @@ H5AC_log_inserted_entry(H5F_t * f,
H5AC_t * cache_ptr,
H5AC_info_t * entry_ptr)
{
+ H5AC_aux_t * aux_ptr;
herr_t ret_value = SUCCEED; /* Return value */
- H5AC_aux_t * aux_ptr = NULL;
- H5AC_slist_entry_t * slist_entry_ptr = NULL;
FUNC_ENTER_NOAPI(H5AC_log_inserted_entry, FAIL)
- HDassert( cache_ptr != NULL );
- HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
- aux_ptr = cache_ptr->aux_ptr;
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert( aux_ptr != NULL );
- HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
HDassert( entry_ptr != NULL );
- if ( aux_ptr->mpi_rank == 0 ) {
-
- HDassert( aux_ptr->d_slist_ptr != NULL );
- HDassert( aux_ptr->c_slist_ptr != NULL );
-
- if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&entry_ptr->addr)) == NULL ) {
-
- /* insert the address of the entry in the dirty entry list, and
- * add its size to the dirty_bytes count.
- */
- if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
-
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
- "Can't allocate dirty slist entry .")
- }
-
- slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
- slist_entry_ptr->addr = entry_ptr->addr;
+ if(aux_ptr->mpi_rank == 0) {
+ H5AC_slist_entry_t * slist_entry_ptr;
- if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr,
- &(slist_entry_ptr->addr)) < 0 ) {
+ HDassert(aux_ptr->d_slist_ptr != NULL);
+ HDassert(aux_ptr->c_slist_ptr != NULL);
- HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
- "can't insert entry into dirty entry slist.")
- }
+ if(NULL != H5SL_search(aux_ptr->d_slist_ptr, (void *)(&entry_ptr->addr)))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry already in dirty slist.")
- aux_ptr->d_slist_len += 1;
+ /* insert the address of the entry in the dirty entry list, and
+ * add its size to the dirty_bytes count.
+ */
+ if(NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "Can't allocate dirty slist entry .")
- } else {
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = entry_ptr->addr;
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Inserted entry already in dirty slist.")
- }
+ if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0 )
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.")
- if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&entry_ptr->addr)) != NULL ) {
+ aux_ptr->d_slist_len += 1;
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Inserted entry in clean slist.")
- }
- }
+ if(NULL != H5SL_search(aux_ptr->c_slist_ptr, (void *)(&entry_ptr->addr)))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry in clean slist.")
+ } /* end if */
aux_ptr->dirty_bytes += entry_ptr->size;
@@ -3428,9 +3644,7 @@ H5AC_log_inserted_entry(H5F_t * f,
#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
done:
-
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_log_inserted_entry() */
#endif /* H5_HAVE_PARALLEL */
@@ -3505,7 +3719,7 @@ H5AC_log_moved_entry(const H5F_t *f,
HDassert( cache_ptr );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- aux_ptr = cache_ptr->aux_ptr;
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
HDassert( aux_ptr != NULL );
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
@@ -3530,8 +3744,8 @@ H5AC_log_moved_entry(const H5F_t *f,
/* if the entry appears in the cleaned entry slist, under its old
* address, remove it.
*/
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
- (void *)(&old_addr))) != NULL ) {
+ if ( (slist_entry_ptr = (H5AC_slist_entry_t *)
+ H5SL_search(aux_ptr->c_slist_ptr, (void *)(&old_addr))) != NULL ) {
HDassert( slist_entry_ptr->magic ==
H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
@@ -3556,8 +3770,8 @@ H5AC_log_moved_entry(const H5F_t *f,
/* if the entry appears in the dirtied entry slist under its old
* address, remove it, but don't free it. Set addr to new_addr.
*/
- if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
- (void *)(&old_addr))) != NULL ) {
+ if ( (slist_entry_ptr = (H5AC_slist_entry_t *)
+ H5SL_search(aux_ptr->d_slist_ptr, (void *)(&old_addr))) != NULL ) {
HDassert( slist_entry_ptr->magic ==
H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
@@ -3633,27 +3847,223 @@ H5AC_log_moved_entry(const H5F_t *f,
}
done:
-
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_log_moved_entry() */
#endif /* H5_HAVE_PARALLEL */
/*-------------------------------------------------------------------------
+ * Function: H5AC_propagate_and_apply_candidate_list
+ *
+ * Purpose: Prior to the addition of support for multiple metadata
+ * write strategies, in PHDF5, only the metadata cache with
+ * mpi rank 0 was allowed to write to file. All other
+ * metadata caches on processes with rank greater than 0
+ * were required to retain dirty entries until they were
+ * notified that the entry was clean.
+ *
+ * This constraint is relaxed with the distributed
+ * metadata write strategy, in which a list of candidate
+ * metadata cache entries is constructed by the process 0
+ * cache and then distributed to the caches of all the other
+ * processes. Once the listed is distributed, many (if not
+ * all) processes writing writing a unique subset of the
+ * entries, and marking the remainder clean. The subsets
+ * are chosen so that each entry in the list of candidates
+ * is written by exactly one cache, and all entries are
+ * marked as being clean in all caches.
+ *
+ * While the list of candidate cache entries is prepared
+ * elsewhere, this function is the main routine for distributing
+ * and applying the list. It must be run simultaniously on
+ * all processes that have the relevant file open. To ensure
+ * proper synchronization, there is a barrier at the beginning
+ * of this function.
+ *
+ * At present, this function is called under one of two
+ * circumstances:
+ *
+ * 1) Dirty byte creation exceeds some user specified value.
+ *
+ * While metadata reads may occur independently, all
+ * operations writing metadata must be collective. Thus
+ * all metadata caches see the same sequence of operations,
+ * and therefore the same dirty data creation.
+ *
+ * This fact is used to synchronize the caches for purposes
+ * of propagating the list of candidate entries, by simply
+ * calling this function from all caches whenever some user
+ * specified threshold on dirty data is exceeded. (the
+ * process 0 cache creates the candidate list just before
+ * calling this function).
+ *
+ * 2) Under direct user control -- this operation must be
+ * collective.
+ *
+ * The operations to be managed by this function are as
+ * follows:
+ *
+ * All processes:
+ *
+ * 1) Participate in an opening barrier.
+ *
+ * For the process with mpi rank 0:
+ *
+ * 1) Load the contents of the candidate list
+ * (candidate_slist_ptr) into a buffer, and broadcast that
+ * buffer to all the other caches. Clear the candidate
+ * list in passing.
+ *
+ * If there is a positive number of candidates, proceed with
+ * the following:
+ *
+ * 2) Apply the candidate entry list.
+ *
+ * 3) Particpate in a closing barrier.
+ *
+ * 4) Remove from the dirty list (d_slist_ptr) and from the
+ * flushed and still clean entries list (c_slist_ptr),
+ * all addresses that appeared in the candidate list, as
+ * these entries are now clean.
+ *
+ *
+ * For all processes with mpi rank greater than 0:
+ *
+ * 1) Receive the candidate entry list broadcast
+ *
+ * If there is a positive number of candidates, proceed with
+ * the following:
+ *
+ * 2) Apply the candidate entry list.
+ *
+ * 3) Particpate in a closing barrier.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_propagate_and_apply_candidate_list(H5F_t * f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr)
+{
+ int mpi_code;
+ int num_candidates = 0;
+ haddr_t * candidates_list_ptr = NULL;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_propagate_and_apply_candidate_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+
+ /* to prevent "messages from the future" we must synchronize all
+ * processes before we write any entries.
+ */
+ if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code)
+
+ if(aux_ptr->mpi_rank == 0) {
+ if(H5AC_broadcast_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast candidate slist.")
+
+ HDassert( aux_ptr->candidate_slist_len == 0 );
+ } /* end if */
+ else {
+ if(H5AC_receive_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive candidate broadcast.")
+ } /* end else */
+
+ if(num_candidates > 0) {
+ herr_t result;
+
+ /* all processes apply the candidate list.
+ * H5C_apply_candidate_list() handles the details of
+ * distributing the writes across the processes.
+ */
+
+ aux_ptr->write_permitted = TRUE;
+
+ result = H5C_apply_candidate_list(f,
+ dxpl_id,
+ dxpl_id,
+ cache_ptr,
+ num_candidates,
+ candidates_list_ptr,
+ aux_ptr->mpi_rank,
+ aux_ptr->mpi_size);
+
+ aux_ptr->write_permitted = FALSE;
+
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
+
+ if(aux_ptr->write_done != NULL)
+ (aux_ptr->write_done)();
+
+ /* to prevent "messages from the past" we must synchronize all
+ * processes again before we go on.
+ */
+ if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 2", mpi_code)
+
+ if(aux_ptr->mpi_rank == 0) {
+ if(H5AC_tidy_cache_0_lists(cache_ptr, num_candidates, candidates_list_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
+ } /* end if */
+ } /* end if */
+
+ /* if it is defined, call the sync point done callback. Note
+ * that this callback is defined purely for testing purposes,
+ * and should be undefined under normal operating circumstances.
+ */
+ if(aux_ptr->sync_point_done != NULL)
+ (aux_ptr->sync_point_done)(num_candidates, candidates_list_ptr);
+
+done:
+ if(candidates_list_ptr != NULL)
+ candidates_list_ptr = (haddr_t *)H5MM_xfree((void *)candidates_list_ptr);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_propagate_and_apply_candidate_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
* Function: H5AC_propagate_flushed_and_still_clean_entries_list
*
- * Purpose: In PHDF5, only the metadata cache with mpi rank 0 is allowed
- * to write to file. All other metadata caches on processes
- * with rank greater than 0 must retain dirty entries until
- * they are notified that the entry is now clean.
+ * Purpose: In PHDF5, if the process 0 only metadata write strategy
+ * is selected, only the metadata cache with mpi rank 0 is
+ * allowed to write to file. All other metadata caches on
+ * processes with rank greater than 0 must retain dirty
+ * entries until they are notified that the entry is now
+ * clean.
*
- * This function is the main routine for that proceedure.
- * It must be called simultaniously on all processes that
- * have the relevant file open. To this end, there must
- * be a barrier immediately prior to this call.
+ * This function is the main routine for handling this
+ * notification proceedure. It must be called
+ * simultaniously on all processes that have the relevant
+ * file open. To this end, it is called only during a
+ * sync point, with a barrier prior to the call.
*
- * Typicaly, this will be done one of two ways:
+ * Note that any metadata entry writes by process 0 will
+ * occur after the barrier and just before this call.
+ *
+ * Typicaly, calls to this function will be triggered in
+ * one of two ways:
*
* 1) Dirty byte creation exceeds some user specified value.
*
@@ -3676,14 +4086,11 @@ done:
*
* For the process with mpi rank 0:
*
- * 1) Enable writes, flush the cache to its min clean size,
- * and then disable writes again.
- *
- * 2) Load the contents of the flushed and still clean entries
+ * 1) Load the contents of the flushed and still clean entries
* list (c_slist_ptr) into a buffer, and broadcast that
* buffer to all the other caches.
*
- * 3) Clear the flushed and still clean entries list
+ * 2) Clear the flushed and still clean entries list
* (c_slist_ptr).
*
*
@@ -3711,113 +4118,156 @@ done:
herr_t
H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f,
hid_t dxpl_id,
- H5AC_t * cache_ptr,
- hbool_t do_barrier)
+ H5AC_t * cache_ptr)
{
+ H5AC_aux_t * aux_ptr;
herr_t ret_value = SUCCEED; /* Return value */
- herr_t result;
- int mpi_code;
- H5AC_aux_t * aux_ptr = NULL;
FUNC_ENTER_NOAPI(H5AC_propagate_flushed_and_still_clean_entries_list, FAIL)
- HDassert( cache_ptr != NULL );
- HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert(cache_ptr != NULL);
+ HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HDassert( aux_ptr != NULL );
- HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
-
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- HDfprintf(stdout,
- "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n",
- (int)(aux_ptr->mpi_rank),
- (int)(aux_ptr->dirty_bytes_propagations),
- (int)(aux_ptr->unprotect_dirty_bytes),
- (int)(aux_ptr->unprotect_dirty_bytes_updates),
- (int)(aux_ptr->insert_dirty_bytes),
- (int)(aux_ptr->insert_dirty_bytes_updates),
- (int)(aux_ptr->move_dirty_bytes),
- (int)(aux_ptr->move_dirty_bytes_updates));
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
-
- if ( do_barrier ) {
+ HDassert(aux_ptr != NULL);
+ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
+ HDassert(aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
- /* to prevent "messages from the future" we must synchronize all
- * processes before we start the flush. This synchronization may
- * already be done -- hence the do_barrier parameter.
- */
-
- if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) {
+ if(aux_ptr->mpi_rank == 0) {
+ if(H5AC_broadcast_clean_list(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.")
+ HDassert( aux_ptr->c_slist_len == 0 );
+ } /* end if */
+ else {
+ if(H5AC_receive_and_apply_clean_list(f, dxpl_id, H5AC_noblock_dxpl_id, cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.")
+ } /* end else */
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
- }
- }
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_propagate_flushed_and_still_clean_entries_list() */
+#endif /* H5_HAVE_PARALLEL */
- if ( aux_ptr->mpi_rank == 0 ) {
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_receive_and_apply_clean_list()
+ *
+ * Purpose: Receive the list of cleaned entries from process 0,
+ * and mark the specified entries as clean.
+ *
+ * This function must only be called by the process with
+ * MPI_rank greater than 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/4/05
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_receive_and_apply_clean_list(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5AC_t * cache_ptr)
+{
+ H5AC_aux_t * aux_ptr;
+ haddr_t * haddr_buf_ptr = NULL;
+ MPI_Offset * MPI_Offset_buf_ptr = NULL;
+ int mpi_result;
+ int num_entries = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
- aux_ptr->write_permitted = TRUE;
+ FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL)
- result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id);
+ HDassert( f != NULL );
+ HDassert( f->shared->cache == cache_ptr );
- aux_ptr->write_permitted = FALSE;
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- if ( result < 0 ) {
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "H5C_flush_to_min_clean() failed.")
- }
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank != 0 );
- if ( aux_ptr->write_done != NULL ) {
+ /* First receive the number of entries in the list so that we
+ * can set up a buffer to receive them. If there aren't
+ * any, we are done.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result)
- (aux_ptr->write_done)();
- }
+ if(num_entries > 0) {
+ size_t buf_size;
+ int i;
- if ( H5AC_broadcast_clean_list(cache_ptr) < 0 ) {
+ /* allocate buffers to store the list of entry base addresses in */
+ buf_size = sizeof(MPI_Offset) * (size_t)num_entries;
+ if(NULL == (MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for receive buffer")
+ if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)num_entries)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for haddr buffer")
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Can't broadcast clean slist.")
- }
+ /* Now receive the list of cleaned entries
+ *
+ * The peculiar structure of the following call to MPI_Bcast is
+ * due to MPI's (?) failure to believe in the MPI_Offset type.
+ * Thus the element type is MPI_BYTE, with size equal to the
+ * buf_size computed above.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
- HDassert( aux_ptr->c_slist_len == 0 );
+ /* translate the MPI_Offsets to haddr_t */
+ i = 0;
+ while(i < num_entries) {
+ haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]);
- } else {
+ if(haddr_buf_ptr[i] == HADDR_UNDEF)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert MPI off to haddr")
- if ( H5AC_receive_and_apply_clean_list(f, dxpl_id,
- H5AC_noblock_dxpl_id,
- cache_ptr) < 0 ) {
+ i++;
+ } /* end while */
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Can't receive and/or process clean slist broadcast.")
- }
- }
+ /* mark the indicated entries as clean */
+ if(H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id,
+ (int32_t)num_entries, &(haddr_buf_ptr[0])) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.")
+ } /* end if */
- aux_ptr->dirty_bytes = 0;
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- aux_ptr->dirty_bytes_propagations += 1;
- aux_ptr->unprotect_dirty_bytes = 0;
- aux_ptr->unprotect_dirty_bytes_updates = 0;
- aux_ptr->insert_dirty_bytes = 0;
- aux_ptr->insert_dirty_bytes_updates = 0;
- aux_ptr->move_dirty_bytes = 0;
- aux_ptr->move_dirty_bytes_updates = 0;
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ /* if it is defined, call the sync point done callback. Note
+ * that this callback is defined purely for testing purposes,
+ * and should be undefined under normal operating circumstances.
+ */
+ if(aux_ptr->sync_point_done != NULL)
+ (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
done:
+ if(MPI_Offset_buf_ptr != NULL)
+ MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr);
+ if(haddr_buf_ptr != NULL)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
FUNC_LEAVE_NOAPI(ret_value)
-
-} /* H5AC_propagate_flushed_and_still_clean_entries_list() */
+} /* H5AC_receive_and_apply_clean_list() */
#endif /* H5_HAVE_PARALLEL */
/*-------------------------------------------------------------------------
*
- * Function: H5AC_receive_and_apply_clean_list()
+ * Function: H5AC_receive_candidate_list()
*
- * Purpose: Receive the list of cleaned entries from process 0,
- * and mark the specified entries as clean.
+ * Purpose: Receive the list of candidate entries from process 0,
+ * and return it in a buffer pointed to by *haddr_buf_ptr_ptr.
+ * Note that the caller must free this buffer if it is
+ * returned.
*
* This function must only be called by the process with
* MPI_rank greater than 0.
@@ -3826,27 +4276,25 @@ done:
*
* Return: Non-negative on success/Negative on failure.
*
- * Programmer: John Mainzer, 7/4/05
+ * Programmer: John Mainzer, 3/17/10
*
*-------------------------------------------------------------------------
*/
#ifdef H5_HAVE_PARALLEL
static herr_t
-H5AC_receive_and_apply_clean_list(H5F_t * f,
- hid_t primary_dxpl_id,
- hid_t secondary_dxpl_id,
- H5AC_t * cache_ptr)
+H5AC_receive_candidate_list(H5AC_t * cache_ptr,
+ int * num_entries_ptr,
+ haddr_t ** haddr_buf_ptr_ptr)
{
- herr_t ret_value = SUCCEED; /* Return value */
- H5AC_aux_t * aux_ptr = NULL;
+ hbool_t success = FALSE;
+ H5AC_aux_t * aux_ptr;
haddr_t * haddr_buf_ptr = NULL;
MPI_Offset * MPI_Offset_buf_ptr = NULL;
- size_t buf_size;
- int i = 0;
int mpi_result;
int num_entries;
+ herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL)
+ FUNC_ENTER_NOAPI(H5AC_receive_candidate_list, FAIL)
HDassert( cache_ptr != NULL );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
@@ -3856,180 +4304,830 @@ H5AC_receive_and_apply_clean_list(H5F_t * f,
HDassert( aux_ptr != NULL );
HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
HDassert( aux_ptr->mpi_rank != 0 );
+ HDassert( aux_ptr-> metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+
+ HDassert( num_entries_ptr != NULL );
+ HDassert( *num_entries_ptr == 0 );
+
+ HDassert( haddr_buf_ptr_ptr != NULL );
+ HDassert( *haddr_buf_ptr_ptr == NULL );
+
/* First receive the number of entries in the list so that we
* can set up a buffer to receive them. If there aren't
* any, we are done.
*/
- mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm);
-
- if ( mpi_result != MPI_SUCCESS ) {
-
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result)
- }
- if ( num_entries > 0 )
- {
- /* allocate a buffers to store the list of entry base addresses in */
+ if(num_entries > 0) {
+ size_t buf_size;
+ int i;
+ /* allocate buffers to store the list of entry base addresses in */
buf_size = sizeof(MPI_Offset) * (size_t)num_entries;
- MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size);
+ if(NULL == (MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for receive buffer")
+ if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)num_entries)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for haddr buffer")
- if ( MPI_Offset_buf_ptr == NULL ) {
+ /* Now receive the list of candidate entries
+ *
+ * The peculiar structure of the following call to MPI_Bcast is
+ * due to MPI's (?) failure to believe in the MPI_Offset type.
+ * Thus the element type is MPI_BYTE, with size equal to the
+ * buf_size computed above.
+ */
+ if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
- "memory allocation failed for receive buffer")
- }
+ /* translate the MPI_Offsets to haddr_t */
+ i = 0;
+ while(i < num_entries) {
+ haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]);
- haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) *
- (size_t)num_entries);
+ if(haddr_buf_ptr[i] == HADDR_UNDEF)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert MPI off to haddr")
- if ( haddr_buf_ptr == NULL ) {
+ i++;
+ } /* end while */
+ } /* end if */
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
- "memory allocation failed for haddr buffer")
- }
+ success = TRUE;
+done:
+ if(MPI_Offset_buf_ptr != NULL)
+ MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr);
- /* Now receive the list of cleaned entries
- *
- * The peculiar structure of the following call to MPI_Bcast is
- * due to MPI's (?) failure to believe in the MPI_Offset type.
- * Thus the element type is MPI_BYTE, with size equal to the
- * buf_size computed above.
+ if(success) {
+ /* finally, pass the number of entries and the buffer pointer
+ * back to the caller. Do this so that we can use the same code
+ * to apply the candidate list to all the processes.
*/
+ *num_entries_ptr = num_entries;
+ *haddr_buf_ptr_ptr = haddr_buf_ptr;
+ } /* end if */
+ else {
+ if(haddr_buf_ptr != NULL)
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+ } /* end else */
- mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size,
- MPI_BYTE, 0, aux_ptr->mpi_comm);
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_receive_candidate_list() */
+#endif /* H5_HAVE_PARALLEL */
- if ( mpi_result != MPI_SUCCESS ) {
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_rsp__dist_md_write__flush
+ *
+ * Purpose: Routine for handling the details of running a sync point
+ * that is triggered by a flush -- which in turn must have been
+ * triggered by either a flush API call or a file close --
+ * when the distributed metadata write strategy is selected.
+ *
+ * Upon entry, each process generates it own candidate list,
+ * being a sorted list of all dirty metadata entries currently
+ * in the metadata cache. Note that this list must be idendical
+ * across all processes, as all processes see the same stream
+ * of dirty metadata coming in, and use the same lists of
+ * candidate entries at each sync point. (At first glance, this
+ * argument sounds circular, but think of it in the sense of
+ * a recursive proof).
+ *
+ * If this this list is empty, we are done, and the function
+ * returns
+ *
+ * Otherwise, after the sorted list dirty metadata entries is
+ * constructed, each process uses the same algorithm to assign
+ * each entry on the candidate list to exactly one process for
+ * flushing.
+ *
+ * At this point, all processes participate in a barrier to
+ * avoid messages from the past/future bugs.
+ *
+ * Each process then flushes the entries assigned to it, and
+ * marks all other entries on the candidate list as clean.
+ *
+ * Finally, all processes participate in a second barrier to
+ * avoid messages from the past/future bugs.
+ *
+ * At the end of this process, process 0 and only process 0
+ * must tidy up its lists of dirtied and cleaned entries.
+ * These lists are not used in the distributed metadata write
+ * strategy, but they must be maintained should we shift
+ * to a strategy that uses them.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_rsp__dist_md_write__flush(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr)
+{
+ int mpi_code;
+ int num_entries = 0;
+ haddr_t * haddr_buf_ptr = NULL;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
- }
+ FUNC_ENTER_NOAPI(H5AC_rsp__dist_md_write__flush, FAIL)
+ HDassert( f != NULL );
+ HDassert( f->shared->cache == cache_ptr );
- /* translate the MPI_Offsets to haddr_t */
- i = 0;
- while ( i < num_entries )
- {
- haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]);
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- if ( haddr_buf_ptr[i] == HADDR_UNDEF ) {
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
- HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \
- "can't convert MPI off to haddr")
- }
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
- i++;
- }
+ /* first construct the candidate list -- initially, this will be in the
+ * form of a skip list. We will convert it later.
+ */
+ if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
+ if(aux_ptr->candidate_slist_len > 0) {
+ herr_t result;
- /* mark the indicated entries as clean */
- if ( H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id,
- (int32_t)num_entries, &(haddr_buf_ptr[0])) < 0 ) {
+ /* convert the candidate list into the format we
+ * are used to receiving from process 0.
+ */
+ if(H5AC_copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.")
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
- "Can't mark entries clean.")
+ /* initial sync point barrier */
+ if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code)
- }
- }
+ /* apply the candidate list */
+ aux_ptr->write_permitted = TRUE;
-done:
+ result = H5C_apply_candidate_list(f,
+ dxpl_id,
+ dxpl_id,
+ cache_ptr,
+ num_entries,
+ haddr_buf_ptr,
+ aux_ptr->mpi_rank,
+ aux_ptr->mpi_size);
- if ( MPI_Offset_buf_ptr != NULL ) {
+ aux_ptr->write_permitted = FALSE;
- MPI_Offset_buf_ptr =
- (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr);
- }
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.")
- if ( haddr_buf_ptr != NULL ) {
+ /* this code exists primarily for the test bed -- it allows us to
+ * enforce posix semantics on the server that pretends to be a
+ * file system in our parallel tests.
+ */
+ if(aux_ptr->write_done != NULL)
+ (aux_ptr->write_done)();
+ /* final sync point barrier */
+ if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code)
+
+ /* if this is process zero, tidy up the dirtied,
+ * and flushed and still clean lists.
+ */
+ if(aux_ptr->mpi_rank == 0) {
+ if(H5AC_tidy_cache_0_lists(cache_ptr, num_entries, haddr_buf_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.")
+ } /* end if */
+ } /* end if */
+
+ /* if it is defined, call the sync point done callback. Note
+ * that this callback is defined purely for testing purposes,
+ * and should be undefined under normal operating circumstances.
+ */
+ if(aux_ptr->sync_point_done != NULL)
+ (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr);
+
+done:
+ if(haddr_buf_ptr != NULL)
haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
- }
FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_rsp__dist_md_write__flush() */
+#endif /* H5_HAVE_PARALLEL */
-} /* H5AC_receive_and_apply_clean_list() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_rsp__dist_md_write__flush_to_min_clean
+ *
+ * Purpose: Routine for handling the details of running a sync point
+ * triggered by the accumulation of dirty metadata (as
+ * opposed to a flush call to the API) when the distributed
+ * metadata write strategy is selected.
+ *
+ * After invocation and initial sanity checking this function
+ * first checks to see if evictions are enabled -- if they
+ * are not, the function does nothing and returns.
+ *
+ * Otherwise, process zero constructs a list of entries to
+ * be flushed in order to bring the process zero cache back
+ * within its min clean requirement. Note that this list
+ * (the candidate list) may be empty.
+ *
+ * Then, all processes participate in a barrier.
+ *
+ * After the barrier, process 0 broadcasts the number of
+ * entries in the candidate list prepared above, and all
+ * other processes receive this number.
+ *
+ * If this number is zero, we are done, and the function
+ * returns without further action.
+ *
+ * Otherwise, process 0 broadcasts the sorted list of
+ * candidate entries, and all other processes receive it.
+ *
+ * Then, each process uses the same algorithm to assign
+ * each entry on the candidate list to exactly one process
+ * for flushing.
+ *
+ * Each process then flushes the entries assigned to it, and
+ * marks all other entries on the candidate list as clean.
+ *
+ * Finally, all processes participate in a second barrier to
+ * avoid messages from the past/future bugs.
+ *
+ * At the end of this process, process 0 and only process 0
+ * must tidy up its lists of dirtied and cleaned entries.
+ * These lists are not used in the distributed metadata write
+ * strategy, but they must be maintained should we shift
+ * to a strategy that uses them.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_rsp__dist_md_write__flush_to_min_clean(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr)
+{
+ hbool_t evictions_enabled;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_rsp__dist_md_write__flush_to_min_clean, FAIL)
+
+ HDassert( f != NULL );
+ HDassert( f->shared->cache == cache_ptr );
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+
+ /* Query if evictions are allowed */
+ if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
+
+ if(evictions_enabled) {
+ /* construct candidate list -- process 0 only */
+ if(aux_ptr->mpi_rank == 0) {
+ if(H5AC_construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.")
+ } /* mpi rank == 0 */
+
+ /* propagate and apply candidate list -- all processes */
+ if(H5AC_propagate_and_apply_candidate_list(f, dxpl_id, cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.")
+ } /* evictions enabled */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_rsp__dist_md_write__flush_to_min_clean() */
+#endif /* H5_HAVE_PARALLEL */
/*-------------------------------------------------------------------------
- * Function: H5AC_flush_entries
+ * Function: H5AC_rsp__p0_only__flush
*
- * Purpose: Flush the metadata cache associated with the specified file,
- * only writing from rank 0, but propagating the cleaned entries
- * to all ranks.
+ * Purpose: Routine for handling the details of running a sync point
+ * that is triggered a flush -- which in turn must have been
+ * triggered by either a flush API call or a file close --
+ * when the process 0 only metadata write strategy is selected.
*
- * Return: Non-negative on success/Negative on failure if there was a
- * request to flush all items and something was protected.
+ * First, all processes participate in a barrier.
*
- * Programmer: Quincey Koziol
- * koziol@hdfgroup.org
- * Aug 22 2009
+ * Then process zero flushes all dirty entries, and broadcasts
+ * they number of clean entries (if any) to all the other
+ * caches.
+ *
+ * If this number is zero, we are done.
+ *
+ * Otherwise, process 0 broadcasts the list of cleaned
+ * entries, and all other processes which are part of this
+ * file group receive it, and mark the listed entries as
+ * clean in their caches.
+ *
+ * Since all processes have the same set of dirty
+ * entries at the beginning of the sync point, and all
+ * entries that will be written are written before
+ * process zero broadcasts the number of cleaned entries,
+ * there is no need for a closing barrier.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
*
*-------------------------------------------------------------------------
*/
+#ifdef H5_HAVE_PARALLEL
herr_t
-H5AC_flush_entries(H5F_t *f)
+H5AC_rsp__p0_only__flush(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr)
{
- herr_t ret_value = SUCCEED; /* Return value */
+ int mpi_code;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_NOAPI_NOINIT(H5AC_flush_entries)
+ FUNC_ENTER_NOAPI(H5AC_rsp__p0_only__flush, FAIL)
- HDassert(f);
- HDassert(f->shared->cache);
+ HDassert( f != NULL );
+ HDassert( f->shared->cache == cache_ptr );
- /* Check if we have >1 ranks */
- if(f->shared->cache->aux_ptr) {
- H5AC_aux_t * aux_ptr = f->shared->cache->aux_ptr;
- int mpi_code;
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
-#if H5AC_DEBUG_DIRTY_BYTES_CREATION
- HDfprintf(stdout,
- "%d::H5AC_flush: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n",
- (int)(aux_ptr->mpi_rank),
- (int)(aux_ptr->unprotect_dirty_bytes),
- (int)(aux_ptr->unprotect_dirty_bytes_updates),
- (int)(aux_ptr->insert_dirty_bytes),
- (int)(aux_ptr->insert_dirty_bytes_updates),
- (int)(aux_ptr->move_dirty_bytes),
- (int)(aux_ptr->move_dirty_bytes_updates));
-#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY );
+
+
+ /* to prevent "messages from the future" we must
+ * synchronize all processes before we start the flush.
+ * Hence the following barrier.
+ */
+ if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code)
+
+ /* Flush data to disk, from rank 0 process */
+ if(aux_ptr->mpi_rank == 0) {
+ herr_t result;
+
+ aux_ptr->write_permitted = TRUE;
+
+ result = H5C_flush_cache(f, dxpl_id, dxpl_id, H5AC__NO_FLAGS_SET);
+
+ aux_ptr->write_permitted = FALSE;
+
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
+
+ if(aux_ptr->write_done != NULL)
+ (aux_ptr->write_done)();
+ } /* end if */
+
+ /* Propagate cleaned entries to other ranks. */
+ if(H5AC_propagate_flushed_and_still_clean_entries_list(f, H5AC_noblock_dxpl_id, cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_rsp__p0_only__flush() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_rsp__p0_only__flush_to_min_clean
+ *
+ * Purpose: Routine for handling the details of running a sync point
+ * triggered by the accumulation of dirty metadata (as
+ * opposed to a flush call to the API) when the process 0
+ * only metadata write strategy is selected.
+ *
+ * After invocation and initial sanity checking this function
+ * first checks to see if evictions are enabled -- if they
+ * are not, the function does nothing and returns.
+ *
+ * Otherwise, all processes participate in a barrier.
+ *
+ * After the barrier, if this is process 0, the function
+ * causes the cache to flush sufficient entries to get the
+ * cache back within its minimum clean fraction, and broadcast
+ * the number of entries which have been flushed since
+ * the last sync point, and are still clean.
+ *
+ * If this number is zero, we are done.
+ *
+ * Otherwise, process 0 broadcasts the list of cleaned
+ * entries, and all other processes which are part of this
+ * file group receive it, and mark the listed entries as
+ * clean in their caches.
+ *
+ * Since all processes have the same set of dirty
+ * entries at the beginning of the sync point, and all
+ * entries that will be written are written before
+ * process zero broadcasts the number of cleaned entries,
+ * there is no need for a closing barrier.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * April 28, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_rsp__p0_only__flush_to_min_clean(H5F_t *f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr)
+{
+ hbool_t evictions_enabled;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_rsp__p0_only__flush_to_min_clean, FAIL)
+
+ HDassert( f != NULL );
+ HDassert( f->shared->cache == cache_ptr );
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY );
+
+ /* Query if evictions are allowed */
+ if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.")
+
+ /* Flush if evictions are allowed -- following call
+ * will cause process 0 to flush to min clean size,
+ * and then propagate the newly clean entries to the
+ * other processes.
+ *
+ * Otherwise, do nothing.
+ */
+ if(evictions_enabled) {
+ int mpi_code;
/* to prevent "messages from the future" we must synchronize all
- * processes before we start the flush. Hence the following
- * barrier.
+ * processes before we start the flush. This synchronization may
+ * already be done -- hence the do_barrier parameter.
*/
if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
- /* Flush data to disk, from rank 0 process */
- if(aux_ptr->mpi_rank == 0 ) {
- herr_t status;
+ if(0 == aux_ptr->mpi_rank) {
+ herr_t result;
- aux_ptr->write_permitted = TRUE;
+ /* here, process 0 flushes as many entries as necessary to
+ * comply with the currently specified min clean size.
+ * Note that it is quite possible that no entries will be
+ * flushed.
+ */
+ aux_ptr->write_permitted = TRUE;
- status = H5C_flush_cache(f,
- H5AC_noblock_dxpl_id,
- H5AC_noblock_dxpl_id,
- H5AC__NO_FLAGS_SET);
+ result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id);
- aux_ptr->write_permitted = FALSE;
+ aux_ptr->write_permitted = FALSE;
- if(status < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
+ if(result < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.")
+ /* this call exists primarily for the test code -- it is used
+ * to enforce POSIX semantics on the process used to simulate
+ * reads and writes in t_cache.c.
+ */
if(aux_ptr->write_done != NULL)
(aux_ptr->write_done)();
- } /* end if ( aux_ptr->mpi_rank == 0 ) */
+ } /* end if */
- /* Propagate cleaned entries to other ranks */
- if(H5AC_propagate_flushed_and_still_clean_entries_list(f,
- H5AC_noblock_dxpl_id,
- f->shared->cache,
- FALSE) < 0 )
+ if(H5AC_propagate_flushed_and_still_clean_entries_list(f, dxpl_id, cache_ptr) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.")
- } /* end if ( aux_ptr != NULL ) */
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_rsp__p0_only__flush_to_min_clean() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_run_sync_point
+ *
+ * Purpose: Top level routine for managing a sync point between all
+ * meta data caches in the parallel case. Since all caches
+ * see the same sequence of dirty metadata, we simply count
+ * bytes of dirty metadata, and run a sync point whenever the
+ * number of dirty bytes of metadata seen since the last
+ * sync point exceeds a threshold that is common across all
+ * processes. We also run sync points in response to
+ * HDF5 API calls triggering either a flush or a file close.
+ *
+ * In earlier versions of PHDF5, only the metadata cache with
+ * mpi rank 0 was allowed to write to file. All other
+ * metadata caches on processes with rank greater than 0 were
+ * required to retain dirty entries until they were notified
+ * that the entry is was clean.
+ *
+ * This function was created to make it easier for us to
+ * experiment with other options, as it is a single point
+ * for the execution of sync points.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * March 11, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_run_sync_point(H5F_t *f,
+ hid_t dxpl_id,
+ int sync_point_op)
+{
+ H5AC_t * cache_ptr;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_run_sync_point, FAIL)
+
+ HDassert( f != NULL );
+
+ cache_ptr = f->shared->cache;
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ HDassert( ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN ) ||
+ ( sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ) );
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ HDfprintf(stdout,
+ "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n",
+ (int)(aux_ptr->mpi_rank),
+ (int)(aux_ptr->dirty_bytes_propagations),
+ (int)(aux_ptr->unprotect_dirty_bytes),
+ (int)(aux_ptr->unprotect_dirty_bytes_updates),
+ (int)(aux_ptr->insert_dirty_bytes),
+ (int)(aux_ptr->insert_dirty_bytes_updates),
+ (int)(aux_ptr->rename_dirty_bytes),
+ (int)(aux_ptr->rename_dirty_bytes_updates));
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ switch(aux_ptr->metadata_write_strategy) {
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ switch(sync_point_op) {
+ case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
+ if(H5AC_rsp__p0_only__flush_to_min_clean(f, dxpl_id, cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__p0_only__flush_to_min_clean() failed.")
+ break;
+
+ case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
+ if(H5AC_rsp__p0_only__flush(f, dxpl_id, cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__p0_only__flush() failed.")
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
+ break;
+ } /* end switch */
+ break;
+
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ switch(sync_point_op) {
+ case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN:
+ if(H5AC_rsp__dist_md_write__flush_to_min_clean(f, dxpl_id, cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__dist_md_write__flush() failed.")
+ break;
+
+ case H5AC_SYNC_POINT_OP__FLUSH_CACHE:
+ if(H5AC_rsp__dist_md_write__flush(f, dxpl_id, cache_ptr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__dist_md_write__flush() failed.")
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op");
+ break;
+ } /* end switch */
+ break;
+
+ default:
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Unknown metadata write strategy.")
+ break;
+ } /* end switch */
+
+ /* reset the dirty bytes count */
+ aux_ptr->dirty_bytes = 0;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->dirty_bytes_propagations += 1;
+ aux_ptr->unprotect_dirty_bytes = 0;
+ aux_ptr->unprotect_dirty_bytes_updates = 0;
+ aux_ptr->insert_dirty_bytes = 0;
+ aux_ptr->insert_dirty_bytes_updates = 0;
+ aux_ptr->rename_dirty_bytes = 0;
+ aux_ptr->rename_dirty_bytes_updates = 0;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_run_sync_point() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_tidy_cache_0_lists()
+ *
+ * Purpose: In the distributed metadata write strategy, not all dirty
+ * entries are written by process 0 -- thus we must tidy
+ * up the dirtied, and flushed and still clean lists
+ * maintained by process zero after each sync point.
+ *
+ * This procedure exists to tend to this issue.
+ *
+ * At this point, all entries that process 0 cleared should
+ * have been removed from both the dirty and flushed and
+ * still clean lists, and entries that process 0 has flushed
+ * should have been removed from the dirtied list and added
+ * to the flushed and still clean list.
+ *
+ * However, since the distributed metadata write strategy
+ * doesn't make use of these lists, the objective is simply
+ * to maintain these lists in consistent state that allows
+ * them to be used should the metadata write strategy change
+ * to one that uses these lists.
+ *
+ * Thus for our purposes, all we need to do is remove from
+ * the dirtied and flushed and still clean lists all
+ * references to entries that appear in the candidate list.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * 4/20/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_tidy_cache_0_lists(H5AC_t * cache_ptr,
+ int num_candidates,
+ haddr_t * candidates_list_ptr)
+
+{
+ int i;
+ H5AC_aux_t * aux_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5AC_tidy_cache_0_lists, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr);
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED );
+ HDassert( aux_ptr->mpi_rank == 0 );
+ HDassert( num_candidates > 0 );
+ HDassert( candidates_list_ptr != NULL );
+
+ /* clean up dirtied and flushed and still clean lists by removing
+ * all entries on the candidate list. Cleared entries should
+ * have been removed from both the dirty and cleaned lists at
+ * this point, flushed entries should have been added to the
+ * cleaned list. However, for this metadata write strategy,
+ * we just want to remove all references to the candidate entries.
+ */
+ for(i = 0; i < num_candidates; i++) {
+ H5AC_slist_entry_t * d_slist_entry_ptr;
+ H5AC_slist_entry_t * c_slist_entry_ptr;
+ haddr_t addr;
+
+ addr = candidates_list_ptr[i];
+
+ /* addr must be either on the dirtied list, or on the flushed
+ * and still clean list. Remove it.
+ */
+ d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->d_slist_ptr, (void *)&addr);
+ if(d_slist_entry_ptr != NULL) {
+ HDassert(d_slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert(d_slist_entry_ptr->addr == addr);
+
+ if(H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) != d_slist_entry_ptr)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from dirty entry slist.")
+
+ d_slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, d_slist_entry_ptr);
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert(aux_ptr->d_slist_len >= 0);
+ } /* end if */
+
+ c_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->c_slist_ptr, (void *)&addr);
+ if(c_slist_entry_ptr != NULL) {
+ HDassert(c_slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert(c_slist_entry_ptr->addr == addr);
+
+ if(H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) != c_slist_entry_ptr)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from clean entry slist.")
+
+ c_slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, c_slist_entry_ptr);
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+ } /* end if */
+ } /* end for */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5AC_tidy_cache_0_lists() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_flush_entries
+ *
+ * Purpose: Flush the metadata cache associated with the specified file,
+ * only writing from rank 0, but propagating the cleaned entries
+ * to all ranks.
+ *
+ * Return: Non-negative on success/Negative on failure if there was a
+ * request to flush all items and something was protected.
+ *
+ * Programmer: Quincey Koziol
+ * koziol@hdfgroup.org
+ * Aug 22 2009
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_flush_entries(H5F_t *f)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI_NOINIT(H5AC_flush_entries)
+
+ HDassert(f);
+ HDassert(f->shared->cache);
+
+ /* Check if we have >1 ranks */
+ if(f->shared->cache->aux_ptr) {
+ if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
+ } /* end if */
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -4056,7 +5154,6 @@ herr_t
H5AC_ignore_tags(H5F_t * f)
{
/* Variable Declarations */
- H5AC_t * cache_ptr = NULL;
herr_t ret_value = SUCCEED;
/* Function Enter Macro */
@@ -4067,18 +5164,12 @@ H5AC_ignore_tags(H5F_t * f)
HDassert(f->shared);
HDassert(f->shared->cache);
- /* Get cache pointer */
- cache_ptr = f->shared->cache;
-
/* Set up a new metadata tag */
- if (H5C_ignore_tags(cache_ptr) < 0)
+ if(H5C_ignore_tags(f->shared->cache) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "H5C_ignore_tags() failed.")
done:
-
- /* Function Leave Macro */
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5AC_ignore_tags() */
@@ -4102,27 +5193,24 @@ H5AC_tag(hid_t dxpl_id, haddr_t metadata_tag, haddr_t * prev_tag)
herr_t ret_value = SUCCEED;
/* Function Enter Macro */
- FUNC_ENTER_NOAPI_NOINIT(H5AC_tag)
+ FUNC_ENTER_NOAPI(H5AC_tag, FAIL)
/* Check Arguments */
if(NULL == (dxpl = (H5P_genplist_t *)H5I_object_verify(dxpl_id, H5I_GENPROP_LST)))
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a property list");
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a property list")
/* Get the current tag value and return that (if prev_tag is NOT null)*/
- if (prev_tag) {
- if( (H5P_get(dxpl, "H5AC_metadata_tag", prev_tag)) < 0 )
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "unable to query dxpl");
- }
+ if(prev_tag) {
+ if((H5P_get(dxpl, "H5AC_metadata_tag", prev_tag)) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "unable to query dxpl")
+ } /* end if */
/* Set the provided tag value in the dxpl_id. */
if(H5P_set(dxpl, "H5AC_metadata_tag", &metadata_tag) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set property in dxpl")
done:
-
- /* Function Leave Macro */
- FUNC_LEAVE_NOAPI(ret_value);
-
+ FUNC_LEAVE_NOAPI(ret_value)
} /* H5AC_tag */
@@ -4143,22 +5231,19 @@ done:
herr_t
H5AC_retag_copied_metadata(H5F_t * f, haddr_t metadata_tag)
{
- /* Variable Declarations */
herr_t ret_value = SUCCEED;
/* Function Enter Macro */
- FUNC_ENTER_NOAPI_NOINIT(H5AC_retag_copied_metadata)
+ FUNC_ENTER_NOAPI(H5AC_retag_copied_metadata, FAIL)
/* Assertions */
HDassert(f);
HDassert(f->shared);
/* Call cache-level function to retag entries */
- H5C_retag_copied_metadata(f->shared->cache, metadata_tag);
+ H5C_retag_copied_metadata(f->shared->cache, metadata_tag);
done:
-
- /* Function Leave Macro */
- FUNC_LEAVE_NOAPI(ret_value);
-
+ FUNC_LEAVE_NOAPI(ret_value)
} /* H5AC_retag_copied_metadata */
+
diff --git a/src/H5ACpkg.h b/src/H5ACpkg.h
index d5346f5..3060a70 100644
--- a/src/H5ACpkg.h
+++ b/src/H5ACpkg.h
@@ -46,6 +46,17 @@
#define H5AC_DEBUG_DIRTY_BYTES_CREATION 0
+#ifdef H5_HAVE_PARALLEL
+
+/* the following #defined are used to specify the operation required
+ * at a sync point.
+ */
+
+#define H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN 0
+#define H5AC_SYNC_POINT_OP__FLUSH_CACHE 1
+
+#endif /* H5_HAVE_PARALLEL */
+
/*-------------------------------------------------------------------------
* It is a bit difficult to set ranges of allowable values on the
* dirty_bytes_threshold field of H5AC_aux_t. The following are
@@ -59,6 +70,9 @@
#define H5AC__MAX_DIRTY_BYTES_THRESHOLD (int32_t) \
(H5C__MAX_MAX_CACHE_SIZE / 4)
+#define H5AC__DEFAULT_METADATA_WRITE_STRATEGY \
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED
+
/****************************************************************************
*
* structure H5AC_aux_t
@@ -162,6 +176,12 @@
* broadcast. This field is reset to zero after each such
* broadcast.
*
+ * metadata_write_strategy: Integer code indicating how we will be
+ * writing the metadata. In the first incarnation of
+ * this code, all writes were done from process 0. This
+ * field exists to facilitate experiments with other
+ * strategies.
+ *
* dirty_bytes_propagations: This field only exists when the
* H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
*
@@ -211,6 +231,19 @@
* been created via move operations since the last time
* the cleaned list was propagated.
*
+ * Things have changed a bit since the following four fields were defined.
+ * If metadata_write_strategy is H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY,
+ * all comments hold as before -- with the caviate that pending further
+ * coding, the process 0 metadata cache is forbidden to flush entries outside
+ * of a sync point.
+ *
+ * However, for different metadata write strategies, these fields are used
+ * only to maintain the correct dirty byte count on process zero -- and in
+ * most if not all cases, this is redundant, as process zero will be barred
+ * from flushing entries outside of a sync point.
+ *
+ * JRM -- 3/16/10
+ *
* d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list
* of entries that have been dirtied since the last time they
* were listed in a clean entries broadcast. This list is
@@ -259,6 +292,17 @@
* contain the value 0 on all processes other than process 0.
* It exists primarily for sanity checking.
*
+ * The following two fields are used only when metadata_write_strategy
+ * is H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED.
+ *
+ * candidate_slist_ptr: Pointer to an instance of H5SL_t used by process 0
+ * to construct a list of entries to be flushed at this sync
+ * point. This list is then broadcast to the other processes,
+ * which then either flush or mark clean all entries on it.
+ *
+ * candidate_slist_len: Integer field containing the number of entries on the
+ * candidate list. It exists primarily for sanity checking.
+ *
* write_done: In the parallel test bed, it is necessary to ensure that
* all writes to the server process from cache 0 complete
* before it enters the barrier call with the other caches.
@@ -271,6 +315,19 @@
* This field must be set to NULL when the callback is not
* needed.
*
+ * Note: This field has been extended for use by all processes
+ * with the addition of support for the distributed
+ * metadata write strategy.
+ * JRM -- 5/9/10
+ *
+ * sync_point_done: In the parallel test bed, it is necessary to verify
+ * that the expected writes, and only the expected writes,
+ * have taken place at the end of each sync point.
+ *
+ * The sync_point_done callback allows t_cache to perform
+ * this verification. The field is set to NULL when the
+ * callback is not needed.
+ *
****************************************************************************/
#ifdef H5_HAVE_PARALLEL
@@ -293,6 +350,8 @@ typedef struct H5AC_aux_t
int32_t dirty_bytes;
+ int32_t metadata_write_strategy;
+
#if H5AC_DEBUG_DIRTY_BYTES_CREATION
int32_t dirty_bytes_propagations;
@@ -316,8 +375,15 @@ typedef struct H5AC_aux_t
int32_t c_slist_len;
+ H5SL_t * candidate_slist_ptr;
+
+ int32_t candidate_slist_len;
+
void (* write_done)(void);
+ void (* sync_point_done)(int num_writes,
+ haddr_t * written_entries_tbl);
+
} H5AC_aux_t; /* struct H5AC_aux_t */
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5ACprivate.h b/src/H5ACprivate.h
index ef19480..7fa5cf4 100644
--- a/src/H5ACprivate.h
+++ b/src/H5ACprivate.h
@@ -222,6 +222,9 @@ H5_DLLVAR hid_t H5AC_ind_dxpl_id;
/* Default cache configuration. */
+#define H5AC__DEFAULT_METADATA_WRITE_STRATEGY \
+ H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED
+
#ifdef H5_HAVE_PARALLEL
#define H5AC__DEFAULT_CACHE_CONFIG \
{ \
@@ -254,7 +257,9 @@ H5_DLLVAR hid_t H5AC_ind_dxpl_id;
/* int epochs_before_eviction = */ 3, \
/* hbool_t apply_empty_reserve = */ TRUE, \
/* double empty_reserve = */ 0.1, \
- /* int dirty_bytes_threshold = */ (256 * 1024) \
+ /* int dirty_bytes_threshold = */ (256 * 1024), \
+ /* int metadata_write_strategy = */ \
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY \
}
#else /* H5_HAVE_PARALLEL */
#define H5AC__DEFAULT_CACHE_CONFIG \
@@ -288,7 +293,9 @@ H5_DLLVAR hid_t H5AC_ind_dxpl_id;
/* int epochs_before_eviction = */ 3, \
/* hbool_t apply_empty_reserve = */ TRUE, \
/* double empty_reserve = */ 0.1, \
- /* int dirty_bytes_threshold = */ (256 * 1024) \
+ /* int dirty_bytes_threshold = */ (256 * 1024), \
+ /* int metadata_write_strategy = */ \
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY \
}
#endif /* H5_HAVE_PARALLEL */
@@ -358,6 +365,9 @@ H5_DLL herr_t H5AC_expunge_entry(H5F_t *f, hid_t dxpl_id,
const H5AC_class_t *type, haddr_t addr,
unsigned flags);
+H5_DLL herr_t H5AC_set_sync_point_done_callback(H5C_t *cache_ptr,
+ void (*sync_point_done)(int num_writes, haddr_t *written_entries_tbl));
+
H5_DLL herr_t H5AC_set_write_done_callback(H5C_t * cache_ptr,
void (* write_done)(void));
H5_DLL herr_t H5AC_stats(const H5F_t *f);
@@ -392,5 +402,9 @@ H5_DLL herr_t H5AC_retag_copied_metadata(H5F_t * f, haddr_t metadata_tag);
H5_DLL herr_t H5AC_ignore_tags(H5F_t * f);
+#ifdef H5_HAVE_PARALLEL
+H5_DLL herr_t H5AC_add_candidate(H5AC_t * cache_ptr, haddr_t addr);
+#endif /* H5_HAVE_PARALLEL */
+
#endif /* !_H5ACprivate_H */
diff --git a/src/H5ACpublic.h b/src/H5ACpublic.h
index 02941b6..639179c 100644
--- a/src/H5ACpublic.h
+++ b/src/H5ACpublic.h
@@ -354,21 +354,22 @@ extern "C" {
* Parallel Configuration Fields:
*
* In PHDF5, all operations that modify metadata must be executed collectively.
+ *
* We used to think that this was enough to ensure consistency across the
* metadata caches, but since we allow processes to read metadata individually,
* the order of dirty entries in the LRU list can vary across processes,
* which can result in inconsistencies between the caches.
*
- * To prevent this, only the metadata cache on process 0 is allowed to write
- * to file, and then only after synchronizing with the other caches. After
- * it writes entries to file, it sends the base addresses of the now clean
- * entries to the other caches, so they can mark these entries clean as well.
+ * PHDF5 uses several strategies to prevent such inconsistencies in metadata,
+ * all of which use the fact that the same stream of dirty metadata is seen
+ * by all processes for purposes of synchronization. This is done by
+ * having each process count the number of bytes of dirty metadata generated,
+ * and then running a "sync point" whenever this count exceeds a user
+ * specified threshold (see dirty_bytes_threshold below).
*
- * The different caches know when to synchronize caches by counting the
- * number of bytes of dirty metadata created by the collective operations
- * modifying metadata. Whenever this count exceeds a user specified
- * threshold (see below), process 0 flushes down to its minimum clean size,
- * and then sends the list of newly cleaned entries to the other caches.
+ * The current metadata write strategy is indicated by the
+ * metadata_write_strategy field. The possible values of this field, along
+ * with the associated metadata write strategies are discussed below.
*
* dirty_bytes_threshold: Threshold of dirty byte creation used to
* synchronize updates between caches. (See above for outline and
@@ -378,11 +379,67 @@ extern "C" {
* file. This field is ignored unless HDF5 has been compiled for
* parallel.
*
+ * metadata_write_strategy: Integer field containing a code indicating the
+ * desired metadata write strategy. The valid values of this field
+ * are enumerated and discussed below:
+ *
+ *
+ * H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ *
+ * When metadata_write_strategy is set to this value, only process
+ * zero is allowed to write dirty metadata to disk. All other
+ * processes must retain dirty metadata until they are informed at
+ * a sync point that the dirty metadata in question has been written
+ * to disk.
+ *
+ * When the sync point is reached (or when there is a user generated
+ * flush), process zero flushes sufficient entries to bring it into
+ * complience with its min clean size (or flushes all dirty entries in
+ * the case of a user generated flush), broad casts the list of
+ * entries just cleaned to all the other processes, and then exits
+ * the sync point.
+ *
+ * Upon receipt of the broadcast, the other processes mark the indicated
+ * entries as clean, and leave the sync point as well.
+ *
+ *
+ * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ *
+ * In the distributed metadata write strategy, process zero still makes
+ * the decisions as to what entries should be flushed, but the actual
+ * flushes are distributed across the processes in the computation to
+ * the extent possible.
+ *
+ * In this strategy, when a sync point is triggered (either by dirty
+ * metadata creation or manual flush), all processes enter a barrier.
+ *
+ * On the other side of the barrier, process 0 constructs an ordered
+ * list of the entries to be flushed, and then broadcasts this list
+ * to the caches in all the processes.
+ *
+ * All processes then scan the list of entries to be flushed, flushing
+ * some, and marking the rest as clean. The algorithm for this purpose
+ * ensures that each entry in the list is flushed exactly once, and
+ * all are marked clean in each cache.
+ *
+ * Note that in the case of a flush of the cache, no message passing
+ * is necessary, as all processes have the same list of dirty entries,
+ * and all of these entries must be flushed. Thus in this case it is
+ * sufficient for each process to sort its list of dirty entries after
+ * leaving the initial barrier, and use this list as if it had been
+ * received from process zero.
+ *
+ * To avoid possible messages from the past/future, all caches must
+ * wait until all caches are done before leaving the sync point.
+ *
****************************************************************************/
#define H5AC__CURR_CACHE_CONFIG_VERSION 1
#define H5AC__MAX_TRACE_FILE_NAME_LEN 1024
+#define H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY 0
+#define H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED 1
+
typedef struct H5AC_cache_config_t
{
/* general configuration fields: */
@@ -440,6 +497,7 @@ typedef struct H5AC_cache_config_t
/* parallel configuration fields: */
int dirty_bytes_threshold;
+ int metadata_write_strategy;
} H5AC_cache_config_t;
diff --git a/src/H5C.c b/src/H5C.c
index bfb7e05..11ad8bb 100644
--- a/src/H5C.c
+++ b/src/H5C.c
@@ -335,6 +335,624 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5C_apply_candidate_list
+ *
+ * Purpose: Apply the supplied candidate list.
+ *
+ * We used to do this by simply having each process write
+ * every mpi_size-th entry in the candidate list, starting
+ * at index mpi_rank, and mark all the others clean.
+ *
+ * However, this can cause unnecessary contention in a file
+ * system by increasing the number of processes writing to
+ * adjacent locations in the HDF5 file.
+ *
+ * To attempt to minimize this, we now arange matters such
+ * that each process writes n adjacent entries in the
+ * candidate list, and marks all others clean. We must do
+ * this in such a fashion as to guarantee that each entry
+ * on the candidate list is written by exactly one process,
+ * and marked clean by all others.
+ *
+ * To do this, first construct a table mapping mpi_rank
+ * to the index of the first entry in the candidate list to
+ * be written by the process of that mpi_rank, and then use
+ * the table to control which entries are written and which
+ * are marked as clean as a function of the mpi_rank.
+ *
+ * Note that the table must be identical on all processes, as
+ * all see the same candidate list, mpi_size, and mpi_rank --
+ * the inputs used to construct the table.
+ *
+ * We construct the table as follows. Let:
+ *
+ * n = num_candidates / mpi_size;
+ *
+ * m = num_candidates % mpi_size;
+ *
+ * Now allocate an array of integers of length mpi_size + 1,
+ * and call this array candidate_assignment_table.
+ *
+ * Conceptually, if the number of candidates is a multiple
+ * of the mpi_size, we simply pass through the candidate list
+ * and assign n entries to each process to flush, with the
+ * index of the first entry to flush in the location in
+ * the candidate_assignment_table indicated by the mpi_rank
+ * of the process.
+ *
+ * In the more common case in which the candidate list isn't
+ * isn't a multiple of the mpi_size, we pretend it is, and
+ * give num_candidates % mpi_size processes one extra entry
+ * each to make things work out.
+ *
+ * Once the table is constructed, we determine the first and
+ * last entry this process is to flush as follows:
+ *
+ * first_entry_to_flush = candidate_assignment_table[mpi_rank]
+ *
+ * last_entry_to_flush =
+ * candidate_assignment_table[mpi_rank + 1] - 1;
+ *
+ * With these values determined, we simply scan through the
+ * candidate list, marking all entries in the range
+ * [first_entry_to_flush, last_entry_to_flush] for flush,
+ * and all others to be cleaned.
+ *
+ * Finally, we scan the LRU from tail to head, flushing
+ * or marking clean the candidate entries as indicated.
+ * If necessary, we scan the pinned list as well.
+ *
+ * Note that this function will fail if any protected or
+ * clean entries appear on the candidate list.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ * Modifications:
+ *
+ * Heavily reworked to have each process flush a group of
+ * adjacent entries.
+ * JRM -- 4/15/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0
+herr_t
+H5C_apply_candidate_list(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5C_t * cache_ptr,
+ int num_candidates,
+ haddr_t * candidates_list_ptr,
+ int mpi_rank,
+ int mpi_size)
+{
+ hbool_t first_flush = FALSE;
+ int i;
+ int m;
+ int n;
+ int first_entry_to_flush;
+ int last_entry_to_flush;
+ int entries_to_clear = 0;
+ int entries_to_flush = 0;
+ int entries_cleared = 0;
+ int entries_flushed = 0;
+ int entries_examined = 0;
+ int initial_list_len;
+ int * candidate_assignment_table = NULL;
+ haddr_t addr;
+ H5C_cache_entry_t * clear_ptr = NULL;
+ H5C_cache_entry_t * entry_ptr = NULL;
+ H5C_cache_entry_t * flush_ptr = NULL;
+#if H5C_DO_SANITY_CHECKS
+ haddr_t last_addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ char tbl_buf[1024];
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5C_apply_candidate_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( num_candidates > 0 );
+ HDassert( num_candidates <= cache_ptr->slist_len );
+ HDassert( candidates_list_ptr != NULL );
+ HDassert( 0 <= mpi_rank );
+ HDassert( mpi_rank < mpi_size );
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n",
+ FUNC, mpi_rank);
+ for ( i = 0; i < 1024; i++ ) tbl_buf[i] = '\0';
+ sprintf(&(tbl_buf[0]), "candidate list = ");
+ for ( i = 0; i < num_candidates; i++ )
+ {
+ sprintf(&(tbl_buf[strlen(tbl_buf)]), " 0x%llx",
+ (long long)(*(candidates_list_ptr + i)));
+ }
+ sprintf(&(tbl_buf[strlen(tbl_buf)]), "\n");
+ HDfprintf(stdout, "%s", tbl_buf);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ n = num_candidates / mpi_size;
+ m = num_candidates % mpi_size;
+ HDassert(n >= 0);
+
+ if(NULL == (candidate_assignment_table = (int *)H5MM_malloc(sizeof(int) * (size_t)(mpi_size + 1))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for candidate assignment table")
+
+ candidate_assignment_table[0] = 0;
+ candidate_assignment_table[mpi_size] = num_candidates;
+
+ if(m == 0) { /* mpi_size is an even divisor of num_candidates */
+ HDassert(n > 0);
+ for(i = 1; i < mpi_size; i++)
+ candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n;
+ } /* end if */
+ else {
+ for(i = 1; i <= m; i++)
+ candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1;
+
+ if(num_candidates < mpi_size) {
+ for(i = m + 1; i < mpi_size; i++)
+ candidate_assignment_table[i] = num_candidates;
+ } /* end if */
+ else {
+ for(i = m + 1; i < mpi_size; i++)
+ candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n;
+ } /* end else */
+ } /* end else */
+ HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates);
+
+#if H5C_DO_SANITY_CHECKS
+ /* verify that the candidate assignment table has the expected form */
+ for ( i = 1; i < mpi_size - 1; i++ )
+ {
+ int a, b;
+
+ a = candidate_assignment_table[i] - candidate_assignment_table[i - 1];
+ b = candidate_assignment_table[i + 1] - candidate_assignment_table[i];
+
+ HDassert( n + 1 >= a );
+ HDassert( a >= b );
+ HDassert( b >= n );
+ }
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ first_entry_to_flush = candidate_assignment_table[mpi_rank];
+ last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1;
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ for ( i = 0; i < 1024; i++ )
+ tbl_buf[i] = '\0';
+ sprintf(&(tbl_buf[0]), "candidate assignment table = ");
+ for(i = 0; i <= mpi_size; i++)
+ sprintf(&(tbl_buf[strlen(tbl_buf)]), " %d", candidate_assignment_table[i]);
+ sprintf(&(tbl_buf[strlen(tbl_buf)]), "\n");
+ HDfprintf(stdout, "%s", tbl_buf);
+
+ HDfprintf(stdout, "%s:%d: flush entries [%d, %d].\n",
+ FUNC, mpi_rank, first_entry_to_flush, last_entry_to_flush);
+
+ HDfprintf(stdout, "%s:%d: marking entries.\n", FUNC, mpi_rank);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ for(i = 0; i < num_candidates; i++) {
+ addr = candidates_list_ptr[i];
+ HDassert( H5F_addr_defined(addr) );
+
+#if H5C_DO_SANITY_CHECKS
+ if ( i > 0 ) {
+ if ( last_addr == addr ) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list.\n")
+ } else if ( last_addr > addr ) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted.\n")
+ }
+ }
+
+ last_addr = addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
+ if(entry_ptr == NULL) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed candidate entry not in cache?!?!?.")
+ } else if(!entry_ptr->is_dirty) {
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?.")
+ } else if ( entry_ptr->is_protected ) {
+ /* For now at least, we can't deal with protected entries.
+ * If we encounter one, scream and die. If it becomes an
+ * issue, we should be able to work around this.
+ */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?.")
+ } else {
+ /* determine whether the entry is to be cleared or flushed,
+ * and mark it accordingly. We will scan the protected and
+ * pinned list shortly, and clear or flush according to these
+ * markings.
+ */
+ if((i >= first_entry_to_flush) && (i <= last_entry_to_flush)) {
+ entries_to_flush++;
+ entry_ptr->flush_immediately = TRUE;
+ } /* end if */
+ else {
+ entries_to_clear++;
+ entry_ptr->clear_on_unprotect = TRUE;
+ } /* end else */
+ } /* end else */
+ } /* end for */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %d/%d/%d.\n",
+ FUNC, mpi_rank, (int)num_candidates, (int)entries_to_clear,
+ (int)entries_to_flush);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+
+ /* We have now marked all the entries on the candidate list for
+ * either flush or clear -- now scan the LRU and the pinned list
+ * for these entries and do the deed.
+ *
+ * Note that we are doing things in this round about manner so as
+ * to preserve the order of the LRU list to the best of our ability.
+ * If we don't do this, my experiments indicate that we will have a
+ * noticably poorer hit ratio as a result.
+ */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: scanning LRU list. len = %d.\n", FUNC, mpi_rank,
+ (int)(cache_ptr->LRU_list_len));
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ entries_examined = 0;
+ initial_list_len = cache_ptr->LRU_list_len;
+ entry_ptr = cache_ptr->LRU_tail_ptr;
+
+ while((entry_ptr != NULL) && (entries_examined <= initial_list_len) &&
+ ((entries_cleared + entries_flushed) < num_candidates)) {
+ if(entry_ptr->clear_on_unprotect) {
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->prev;
+ entries_cleared++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)clear_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if(H5C_flush_single_entry(f,
+ primary_dxpl_id,
+ secondary_dxpl_id,
+ clear_ptr->type,
+ clear_ptr->addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ &first_flush,
+ TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ } else if(entry_ptr->flush_immediately) {
+ entry_ptr->flush_immediately = FALSE;
+ flush_ptr = entry_ptr;
+ entry_ptr = entry_ptr->prev;
+ entries_flushed++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)flush_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if(H5C_flush_single_entry(f,
+ primary_dxpl_id,
+ secondary_dxpl_id,
+ flush_ptr->type,
+ flush_ptr->addr,
+ H5C__NO_FLAGS_SET,
+ &first_flush,
+ TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ } else {
+ entry_ptr = entry_ptr->prev;
+ }
+
+ entries_examined++;
+ } /* end while */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: entries examined/cleared/flushed = %d/%d/%d.\n",
+ FUNC, mpi_rank, entries_examined,
+ entries_cleared, entries_flushed);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ /* It is also possible that some of the cleared entries are on the
+ * pinned list. Must scan that also.
+ */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout, "%s:%d: scanning pinned entry list. len = %d\n",
+ FUNC, mpi_rank, (int)(cache_ptr->pel_len));
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ entry_ptr = cache_ptr->pel_head_ptr;
+ while((entry_ptr != NULL) &&
+ ((entries_cleared + entries_flushed) < num_candidates)) {
+ if(entry_ptr->clear_on_unprotect) {
+ entry_ptr->clear_on_unprotect = FALSE;
+ clear_ptr = entry_ptr;
+ entry_ptr = entry_ptr->next;
+ entries_cleared++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)clear_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if(H5C_flush_single_entry(f,
+ primary_dxpl_id,
+ secondary_dxpl_id,
+ clear_ptr->type,
+ clear_ptr->addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ &first_flush,
+ TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ } else if(entry_ptr->flush_immediately) {
+ entry_ptr->flush_immediately = FALSE;
+ flush_ptr = entry_ptr;
+ entry_ptr = entry_ptr->next;
+ entries_flushed++;
+
+#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 )
+ HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank,
+ (long long)flush_ptr->addr);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if(H5C_flush_single_entry(f,
+ primary_dxpl_id,
+ secondary_dxpl_id,
+ flush_ptr->type,
+ flush_ptr->addr,
+ H5C__NO_FLAGS_SET,
+ &first_flush,
+ TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ } else {
+ entry_ptr = entry_ptr->next;
+ }
+ } /* end while */
+
+#if H5C_APPLY_CANDIDATE_LIST__DEBUG
+ HDfprintf(stdout,
+ "%s:%d: pel entries examined/cleared/flushed = %d/%d/%d.\n",
+ FUNC, mpi_rank, entries_examined,
+ entries_cleared, entries_flushed);
+ HDfprintf(stdout, "%s:%d: done.\n", FUNC, mpi_rank);
+
+ fsync(stdout);
+#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
+ if((entries_flushed != entries_to_flush) || (entries_cleared != entries_to_clear))
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry count mismatch.")
+
+done:
+ if(candidate_assignment_table != NULL)
+ candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table);
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_apply_candidate_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5C_construct_candidate_list__clean_cache
+ *
+ * Purpose: Construct the list of entries that should be flushed to
+ * clean all entries in the cache.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr)
+{
+ size_t space_needed;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5C_construct_candidate_list__clean_cache, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ /* As a sanity check, set space needed to the size of the skip list.
+ * This should be the sum total of the sizes of all the dirty entries
+ * in the metadata cache.
+ */
+ space_needed = cache_ptr->slist_size;
+
+ /* Recall that while we shouldn't have any protected entries at this
+ * point, it is possible that some dirty entries may reside on the
+ * pinned list at this point.
+ */
+ HDassert( cache_ptr->slist_size <=
+ (cache_ptr->dLRU_list_size + cache_ptr->pel_size) );
+ HDassert( cache_ptr->slist_len <=
+ (cache_ptr->dLRU_list_len + cache_ptr->pel_len) );
+
+ if(space_needed > 0) { /* we have work to do */
+ H5C_cache_entry_t *entry_ptr;
+ int nominated_entries_count = 0;
+ size_t nominated_entries_size = 0;
+ haddr_t nominated_addr;
+
+ HDassert( cache_ptr->slist_len > 0 );
+
+ /* Scan the dirty LRU list from tail forward and nominate sufficient
+ * entries to free up the necessary space.
+ */
+ entry_ptr = cache_ptr->dLRU_tail_ptr;
+ while((nominated_entries_size < space_needed) &&
+ (nominated_entries_count < cache_ptr->slist_len) &&
+ (entry_ptr != NULL)) {
+ HDassert( ! (entry_ptr->is_protected) );
+ HDassert( ! (entry_ptr->is_read_only) );
+ HDassert( entry_ptr->ro_ref_count == 0 );
+ HDassert( entry_ptr->is_dirty );
+ HDassert( entry_ptr->in_slist );
+
+ nominated_addr = entry_ptr->addr;
+ if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(1).")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ entry_ptr = entry_ptr->aux_prev;
+ } /* end while */
+ HDassert( entry_ptr == NULL );
+
+ /* it is possible that there are some dirty entries on the
+ * protected entry list as well -- scan it too if necessary
+ */
+ entry_ptr = cache_ptr->pel_head_ptr;
+ while((nominated_entries_size < space_needed) &&
+ (nominated_entries_count < cache_ptr->slist_len) &&
+ (entry_ptr != NULL)) {
+ if(entry_ptr->is_dirty) {
+ HDassert( ! (entry_ptr->is_protected) );
+ HDassert( ! (entry_ptr->is_read_only) );
+ HDassert( entry_ptr->ro_ref_count == 0 );
+ HDassert( entry_ptr->is_dirty );
+ HDassert( entry_ptr->in_slist );
+
+ nominated_addr = entry_ptr->addr;
+ if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(2).")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ } /* end if */
+
+ entry_ptr = entry_ptr->next;
+ } /* end while */
+
+ HDassert( nominated_entries_count == cache_ptr->slist_len );
+ HDassert( nominated_entries_size == space_needed );
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_construct_candidate_list__clean_cache() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5C_construct_candidate_list__min_clean
+ *
+ * Purpose: Construct the list of entries that should be flushed to
+ * get the cache back within its min clean constraints.
+ *
+ * This function is used in managing sync points, and
+ * shouldn't be used elsewhere.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: John Mainzer
+ * 3/17/10
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr)
+{
+ size_t space_needed = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5C_construct_candidate_list__min_clean, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ /* compute the number of bytes (if any) that must be flushed to get the
+ * cache back within its min clean constraints.
+ */
+ if(cache_ptr->max_cache_size > cache_ptr->index_size) {
+ if(((cache_ptr->max_cache_size - cache_ptr->index_size) +
+ cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size)
+ space_needed = 0;
+ else
+ space_needed = cache_ptr->min_clean_size -
+ ((cache_ptr->max_cache_size - cache_ptr->index_size) +
+ cache_ptr->cLRU_list_size);
+ } /* end if */
+ else {
+ if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size)
+ space_needed = 0;
+ else
+ space_needed = cache_ptr->min_clean_size -
+ cache_ptr->cLRU_list_size;
+ } /* end else */
+
+ if(space_needed > 0) { /* we have work to do */
+ H5C_cache_entry_t *entry_ptr;
+ int nominated_entries_count = 0;
+ size_t nominated_entries_size = 0;
+
+ HDassert( cache_ptr->slist_len > 0 );
+
+ /* Scan the dirty LRU list from tail forward and nominate sufficient
+ * entries to free up the necessary space.
+ */
+ entry_ptr = cache_ptr->dLRU_tail_ptr;
+ while((nominated_entries_size < space_needed) &&
+ (nominated_entries_count < cache_ptr->slist_len) &&
+ (entry_ptr != NULL)) {
+ haddr_t nominated_addr;
+
+ HDassert( ! (entry_ptr->is_protected) );
+ HDassert( ! (entry_ptr->is_read_only) );
+ HDassert( entry_ptr->ro_ref_count == 0 );
+ HDassert( entry_ptr->is_dirty );
+ HDassert( entry_ptr->in_slist );
+
+ nominated_addr = entry_ptr->addr;
+ if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed.")
+
+ nominated_entries_size += entry_ptr->size;
+ nominated_entries_count++;
+ entry_ptr = entry_ptr->aux_prev;
+ } /* end while */
+ HDassert( nominated_entries_count <= cache_ptr->slist_len );
+ HDassert( nominated_entries_size >= space_needed );
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_construct_candidate_list__min_clean() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
* Function: H5C_create
*
* Purpose: Allocate, initialize, and return the address of a new
@@ -356,10 +974,6 @@ done:
* Programmer: John Mainzer
* 6/2/04
*
- * JRM -- 11/5/08
- * Added initialization for the new clean_index_size and
- * dirty_index_size fields of H5C_t.
- *
*-------------------------------------------------------------------------
*/
H5C_t *
@@ -1502,9 +2116,7 @@ H5C_flush_to_min_clean(H5F_t * f,
#endif /* end modified code -- commented out for now */
done:
-
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5C_flush_to_min_clean() */
@@ -1903,33 +2515,6 @@ H5C_get_trace_file_ptr_from_entry(const H5C_cache_entry_t *entry_ptr,
* Programmer: John Mainzer
* 6/2/04
*
- * QAK -- 1/31/08
- * Added initialization for the new free_file_space_on_destroy
- * field.
- *
- * JRM -- 11/13/08
- * Moved test to see if we already have an entry with the
- * specified address in the cache. This was necessary as
- * we used to modify some fields in the entry to be inserted
- * priort to this test, which got the cache confused if the
- * insertion failed because the entry was already present.
- *
- * Also revised the function to call H5C_make_space_in_cache()
- * if the min_clean_size is not met at present, not just if
- * there is insufficient space in the cache for the new
- * entry.
- *
- * The purpose of this modification is to avoid "metadata
- * blizzards" in the write only case. In such instances,
- * the cache was allowed to fill with dirty metadata. When
- * we finally needed to evict an entry to make space, we had
- * to flush out a whole cache full of metadata -- which has
- * interesting performance effects. We hope to avoid (or
- * perhaps more accurately hide) this effect by maintaining
- * the min_clean_size, which should force us to start flushing
- * entries long before we actually have to evict something
- * to make space.
- *
*-------------------------------------------------------------------------
*/
herr_t
@@ -2042,6 +2627,7 @@ H5C_insert_entry(H5F_t * f,
#ifdef H5_HAVE_PARALLEL
entry_ptr->clear_on_unprotect = FALSE;
+ entry_ptr->flush_immediately = FALSE;
#endif /* H5_HAVE_PARALLEL */
entry_ptr->flush_in_progress = FALSE;
@@ -3813,29 +4399,17 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5C_set_prefix(H5C_t * cache_ptr,
- char * prefix)
+H5C_set_prefix(H5C_t * cache_ptr, char * prefix)
{
- herr_t ret_value = SUCCEED; /* Return value */
+ FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5C_set_prefix)
- FUNC_ENTER_NOAPI(H5C_set_prefix, FAIL)
-
- /* This would normally be an assert, but we need to use an HGOTO_ERROR
- * call to shut up the compiler.
- */
- if ( ( ! cache_ptr ) || ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ) {
-
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad cache_ptr")
- }
-
- HDassert( prefix );
- HDassert( HDstrlen(prefix) < H5C__PREFIX_LEN ) ;
+ HDassert((cache_ptr) && (cache_ptr->magic == H5C__H5C_T_MAGIC));
+ HDassert(prefix);
+ HDassert(HDstrlen(prefix) < H5C__PREFIX_LEN);
HDstrcpy(&(cache_ptr->prefix[0]), prefix);
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-
+ FUNC_LEAVE_NOAPI(SUCCEED)
} /* H5C_set_prefix() */
@@ -7441,17 +8015,6 @@ H5C_flush_single_entry(H5F_t * f,
}
}
}
-#if 0
- /* this should be useful for debugging from time to time.
- * lets leave it in for now. -- JRM 12/15/04
- */
- else {
- HDfprintf(stdout,
- "H5C_flush_single_entry(): non-existant entry. addr = %a\n",
- addr);
- HDfflush(stdout);
- }
-#endif
#endif /* H5C_DO_SANITY_CHECKS */
if ( ( entry_ptr != NULL ) && ( entry_ptr->is_protected ) )
@@ -7572,133 +8135,6 @@ H5C_flush_single_entry(H5F_t * f,
*/
if ( destroy ) { /* AKA eviction */
-#if 0 /* JRM */
- /* This test code may come in handy -- lets keep it for a while.
- *
- * Note that it will cause spurious errors in the serial case
- * unless we are maintaining the clean and dirty LRU lists.
- */
- {
- if ( entry_ptr->is_dirty )
- {
- if ( cache_ptr->dLRU_head_ptr == NULL )
- HDfprintf(stdout,
- "%s: cache_ptr->dLRU_head_ptr == NULL.\n",
- FUNC);
-
- if ( cache_ptr->dLRU_tail_ptr == NULL )
- HDfprintf(stdout,
- "%s: cache_ptr->dLRU_tail_ptr == NULL.\n",
- FUNC);
-
- if ( cache_ptr->dLRU_list_len <= 0 )
- HDfprintf(stdout,
- "%s: cache_ptr->dLRU_list_len <= 0.\n",
- FUNC);
-
- if ( cache_ptr->dLRU_list_size <= 0 )
- HDfprintf(stdout,
- "%s: cache_ptr->dLRU_list_size <= 0.\n",
- FUNC);
-
- if ( cache_ptr->dLRU_list_size < entry_ptr->size )
- HDfprintf(stdout,
- "%s: cache_ptr->dLRU_list_size < entry_ptr->size.\n",
- FUNC);
-
- if ( ( (cache_ptr->dLRU_list_size) == entry_ptr->size ) &&
- ( ! ( (cache_ptr->dLRU_list_len) == 1 ) ) )
- HDfprintf(stdout,
- "%s: dLRU_list_size == size && dLRU_list_len != 1\n",
- FUNC);
-
- if ( ( entry_ptr->aux_prev == NULL ) &&
- ( cache_ptr->dLRU_head_ptr != entry_ptr ) )
- HDfprintf(stdout,
- "%s: entry_ptr->aux_prev == NULL && dLRU_head_ptr != entry_ptr\n",
- FUNC);
-
- if ( ( entry_ptr->aux_next == NULL ) &&
- ( cache_ptr->dLRU_tail_ptr != entry_ptr ) )
- HDfprintf(stdout,
- "%s: entry_ptr->aux_next == NULL && dLRU_tail_ptr != entry_ptr\n",
- FUNC);
-
- if ( ( cache_ptr->dLRU_list_len == 1 ) &&
- ( ! ( ( cache_ptr->dLRU_head_ptr == entry_ptr ) &&
- ( cache_ptr->dLRU_tail_ptr == entry_ptr ) &&
- ( entry_ptr->aux_next == NULL ) &&
- ( entry_ptr->aux_prev == NULL ) &&
- ( cache_ptr->dLRU_list_size == entry_ptr->size )
- )
- )
- )
- {
- HDfprintf(stdout,
- "%s: single entry dlru sanity check fails\n",
- FUNC);
- }
-
- }
- else
- {
- if ( cache_ptr->cLRU_head_ptr == NULL )
- HDfprintf(stdout,
- "%s: cache_ptr->cLRU_head_ptr == NULL.\n",
- FUNC);
-
- if ( cache_ptr->cLRU_tail_ptr == NULL )
- HDfprintf(stdout,
- "%s: cache_ptr->cLRU_tail_ptr == NULL.\n",
- FUNC);
-
- if ( cache_ptr->cLRU_list_len <= 0 )
- HDfprintf(stdout,
- "%s: cache_ptr->cLRU_list_len <= 0.\n",
- FUNC);
-
- if ( cache_ptr->cLRU_list_size <= 0 )
- HDfprintf(stdout,
- "%s: cache_ptr->cLRU_list_size <= 0.\n",
- FUNC);
-
- if ( cache_ptr->cLRU_list_size < entry_ptr->size )
- HDfprintf(stdout,
- "%s: cache_ptr->cLRU_list_size < entry_ptr->size.\n",
- FUNC);
-
- if ( ( (cache_ptr->cLRU_list_size) == entry_ptr->size ) &&
- ( ! ( (cache_ptr->cLRU_list_len) == 1 ) ) )
- HDfprintf(stdout,
- "%s: cLRU_list_size == size && cLRU_list_len != 1\n",
- FUNC);
-
- if ( ( entry_ptr->aux_prev == NULL ) &&
- ( cache_ptr->cLRU_head_ptr != entry_ptr ) )
- HDfprintf(stdout, "%s: entry_ptr->aux_prev == NULL && cLRU_head_ptr != entry_ptr\n", FUNC);
-
- if ( ( entry_ptr->aux_next == NULL ) &&
- ( cache_ptr->cLRU_tail_ptr != entry_ptr ) )
- HDfprintf(stdout, "%s: entry_ptr->aux_next == NULL && cLRU_tail_ptr != entry_ptr\n", FUNC);
-
- if ( ( cache_ptr->cLRU_list_len == 1 ) &&
- ( ! ( ( cache_ptr->cLRU_head_ptr == entry_ptr ) &&
- ( cache_ptr->cLRU_tail_ptr == entry_ptr ) &&
- ( entry_ptr->aux_next == NULL ) &&
- ( entry_ptr->aux_prev == NULL ) &&
- ( cache_ptr->cLRU_list_size == entry_ptr->size )
- )
- )
- )
- {
- HDfprintf(stdout,
- "%s: single entry clru sanity check fails\n",
- FUNC);
- }
- }
- }
-#endif /* JRM */
-
H5C__UPDATE_RP_FOR_EVICTION(cache_ptr, entry_ptr, FAIL)
} else {
@@ -7864,10 +8300,10 @@ H5C_flush_single_entry(H5F_t * f,
* H5C__UPDATE_INDEX_FOR_ENTRY_CLEAN()).
*/
H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), \
- (entry_ptr->size),\
+ (entry_ptr->size), \
(new_size), \
(entry_ptr), \
- (TRUE));
+ (TRUE))
/* The entry can't be protected since we just flushed it.
* Thus we must update the replacement policy data
@@ -7923,9 +8359,7 @@ H5C_flush_single_entry(H5F_t * f,
}
done:
-
FUNC_LEAVE_NOAPI(ret_value)
-
} /* H5C_flush_single_entry() */
@@ -8023,6 +8457,7 @@ H5C_load_entry(H5F_t * f,
entry->flush_marker = FALSE;
#ifdef H5_HAVE_PARALLEL
entry->clear_on_unprotect = FALSE;
+ entry->flush_immediately = FALSE;
#endif /* H5_HAVE_PARALLEL */
entry->flush_in_progress = FALSE;
entry->destroy_in_progress = FALSE;
@@ -8862,20 +9297,17 @@ H5C_flush_marked_entries(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
/* Flush all marked entries */
- if (H5C_flush_cache(f,
+ if(H5C_flush_cache(f,
primary_dxpl_id,
secondary_dxpl_id,
H5C__FLUSH_MARKED_ENTRIES_FLAG |
H5C__FLUSH_IGNORE_PROTECTED_FLAG) < 0) {
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush cache")
-
} /* end if */
done:
-
- FUNC_LEAVE_NOAPI(ret_value);
-
+ FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_flush_marked_entries */
#if H5C_DO_TAGGING_SANITY_CHECKS
@@ -8891,8 +9323,6 @@ done:
* Programmer: Mike McGreevy
* January 14, 2010
*
- * Modifications:
- *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -8959,10 +9389,7 @@ H5C_verify_tag(int id, haddr_t tag)
}
done:
-
- /* Function Leave Macro */
- FUNC_LEAVE_NOAPI(ret_value);
-
+ FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_verify_tag */
#endif
@@ -8980,43 +9407,35 @@ done:
* Programmer: Mike McGreevy
* March 17, 2010
*
- * Modifications:
- *
*-------------------------------------------------------------------------
*/
-herr_t
+void
H5C_retag_copied_metadata(H5C_t * cache_ptr, haddr_t metadata_tag)
{
/* Variable Declarations */
- herr_t ret_value = SUCCEED; /* Return Value */
int i = 0; /* Iterator */
- H5C_cache_entry_t *next_entry_ptr = NULL; /* entry pointer */
/* Assertions */
HDassert(cache_ptr);
/* Function Enter Macro */
- FUNC_ENTER_NOAPI(H5C_retag_copied_metadata, FAIL)
+ FUNC_ENTER_NOAPI_NOFUNC(H5C_retag_copied_metadata)
/* Iterate through entries, retagging those with the H5AC__COPIED_TAG tag */
- for (i = 0; i < H5C__HASH_TABLE_LEN; i++) {
+ for(i = 0; i < H5C__HASH_TABLE_LEN; i++) {
+ H5C_cache_entry_t *next_entry_ptr; /* entry pointer */
next_entry_ptr = cache_ptr->index[i];
-
- while ( next_entry_ptr != NULL ) {
- if (cache_ptr->index[i] != NULL) {
- if ((cache_ptr->index[i])->tag == H5AC__COPIED_TAG) {
+ while(next_entry_ptr != NULL) {
+ if(cache_ptr->index[i] != NULL) {
+ if((cache_ptr->index[i])->tag == H5AC__COPIED_TAG)
(cache_ptr->index[i])->tag = metadata_tag;
- } /* end if */
} /* end if */
+
next_entry_ptr = next_entry_ptr->ht_next;
} /* end while */
-
} /* end for */
-done:
-
- /* Function Leave Macro */
- FUNC_LEAVE_NOAPI(ret_value);
-
+ FUNC_LEAVE_NOAPI_VOID
} /* H5C_retag_copied_metadata */
+
diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h
index 22d3514..71fb405 100644
--- a/src/H5Cpkg.h
+++ b/src/H5Cpkg.h
@@ -1875,7 +1875,7 @@ if ( ( (cache_ptr) == NULL ) || \
( ( !( was_clean ) || \
( (cache_ptr)->clean_index_size < (old_size) ) ) && \
( ( (was_clean) ) || \
- ( (cache_ptr)->dirty_index_size < (old_size) ) ) ) \
+ ( (cache_ptr)->dirty_index_size < (old_size) ) ) ) || \
( (entry_ptr) == NULL ) ) { \
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
"Pre HT entry size change SC failed") \
@@ -1893,7 +1893,7 @@ if ( ( (cache_ptr) == NULL ) || \
( ( !((entry_ptr)->is_dirty ) || \
( (cache_ptr)->dirty_index_size < (new_size) ) ) && \
( ( ((entry_ptr)->is_dirty) ) || \
- ( (cache_ptr)->clean_index_size < (new_size) ) ) ) \
+ ( (cache_ptr)->clean_index_size < (new_size) ) ) ) || \
( ( (cache_ptr)->index_len == 1 ) && \
( (cache_ptr)->index_size != (new_size) ) ) ) { \
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
@@ -2098,24 +2098,25 @@ if ( (cache_ptr)->index_size != \
H5C__POST_HT_UPDATE_FOR_ENTRY_DIRTY_SC(cache_ptr, entry_ptr); \
}
-#define H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size, \
- entry_ptr, was_clean) \
-{ \
- H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \
- entry_ptr, was_clean) \
- (cache_ptr)->index_size -= (old_size); \
- (cache_ptr)->index_size += (new_size); \
- if ( was_clean ) { \
- (cache_ptr)->clean_index_size -= (old_size); \
- } else { \
- (cache_ptr)->dirty_index_size -= (old_size); \
- } \
- if ( (entry_ptr)->is_dirty ) { \
- (cache_ptr)->dirty_index_size += (new_size); \
- } else { \
- (cache_ptr)->clean_index_size += (new_size); \
- } \
- H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, entry_ptr) \
+#define H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size, \
+ entry_ptr, was_clean) \
+{ \
+ H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \
+ entry_ptr, was_clean) \
+ (cache_ptr)->index_size -= (old_size); \
+ (cache_ptr)->index_size += (new_size); \
+ if ( was_clean ) { \
+ (cache_ptr)->clean_index_size -= (old_size); \
+ } else { \
+ (cache_ptr)->dirty_index_size -= (old_size); \
+ } \
+ if ( (entry_ptr)->is_dirty ) { \
+ (cache_ptr)->dirty_index_size += (new_size); \
+ } else { \
+ (cache_ptr)->clean_index_size += (new_size); \
+ } \
+ H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \
+ entry_ptr) \
}
diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h
index 3f38500..7e14872 100644
--- a/src/H5Cprivate.h
+++ b/src/H5Cprivate.h
@@ -383,6 +383,14 @@ typedef herr_t (*H5C_log_flush_func_t)(H5C_t * cache_ptr,
* the unprotect, the entry's is_dirty flag is reset by flushing
* it with the H5C__FLUSH_CLEAR_ONLY_FLAG.
*
+ * flush_immediately: Boolean flag used only in Phdf5 -- and then only
+ * for H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED.
+ *
+ * When a destributed metadata write is triggered at a
+ * sync point, this field is used to mark entries that
+ * must be flushed before leaving the sync point. At all
+ * other times, this field should be set to FALSE.
+ *
* flush_in_progress: Boolean flag that is set to true iff the entry
* is in the process of being flushed. This allows the cache
* to detect when a call is the result of a flush callback.
@@ -581,6 +589,7 @@ typedef struct H5C_cache_entry_t
hbool_t flush_marker;
#ifdef H5_HAVE_PARALLEL
hbool_t clear_on_unprotect;
+ hbool_t flush_immediately;
#endif /* H5_HAVE_PARALLEL */
hbool_t flush_in_progress;
hbool_t destroy_in_progress;
@@ -1034,6 +1043,21 @@ typedef struct H5C_auto_size_ctl_t
#define H5C__FREE_FILE_SPACE_FLAG 0x0800
#define H5C__TAKE_OWNERSHIP_FLAG 0x1000
+#ifdef H5_HAVE_PARALLEL
+H5_DLL herr_t H5C_apply_candidate_list(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5C_t * cache_ptr,
+ int num_candidates,
+ haddr_t * candidates_list_ptr,
+ int mpi_rank,
+ int mpi_size);
+
+H5_DLL herr_t H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr);
+
+H5_DLL herr_t H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr);
+#endif /* H5_HAVE_PARALLEL */
+
H5_DLL H5C_t * H5C_create(size_t max_cache_size,
size_t min_clean_size,
int max_type_id,
@@ -1177,7 +1201,7 @@ H5_DLL herr_t H5C_validate_resize_config(H5C_auto_size_ctl_t * config_ptr,
H5_DLL herr_t H5C_ignore_tags(H5C_t * cache_ptr);
-H5_DLL herr_t H5C_retag_copied_metadata(H5C_t * cache_ptr, haddr_t metadata_tag);
+H5_DLL void H5C_retag_copied_metadata(H5C_t * cache_ptr, haddr_t metadata_tag);
#endif /* !_H5Cprivate_H */
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 117dfc7..b382fb4 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -1771,6 +1771,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
} /* end if */
} /* end if */
else {
+#if 0 /* JRM -- 3/23/10 */ /* this is no longer always the case */
/* Only one process can do the actual metadata write */
if(file->mpi_rank != H5_PAR_META_WRITE)
#ifdef LATER
@@ -1778,6 +1779,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
#else /* LATER */
HGOTO_DONE(SUCCEED) /* skip the actual write */
#endif /* LATER */
+#endif /* JRM */
} /* end if */
/* Write the data. */
diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c
index d5e58e9..5bea8fe 100644
--- a/src/H5FDmpiposix.c
+++ b/src/H5FDmpiposix.c
@@ -1274,9 +1274,11 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
#endif /* JRM */
+#if 0 /* JRM -- 3/23/10 */ /* this is no longer always the case */
/* Only one process will do the actual write if all procs in comm write same metadata */
if (file->mpi_rank != H5_PAR_META_WRITE)
HGOTO_DONE(SUCCEED) /* skip the actual write */
+#endif /* JRM */
} /* end if */
#ifdef REPORT_IO
diff --git a/test/cache_api.c b/test/cache_api.c
index 8fd2912..c6e27c6 100644
--- a/test/cache_api.c
+++ b/test/cache_api.c
@@ -65,11 +65,8 @@ static void check_file_mdc_api_errs(void);
* Programmer: John Mainzer
* 4/12/04
*
- * Modifications:
- *
*-------------------------------------------------------------------------
*/
-
static void
check_fapl_mdc_api_calls(void)
{
@@ -113,7 +110,9 @@ check_fapl_mdc_api_calls(void)
/* int epochs_before_eviction = */ 4,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.05,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
H5AC_cache_config_t scratch;
H5C_auto_size_ctl_t default_auto_size_ctl;
@@ -560,7 +559,9 @@ check_file_mdc_api_calls(void)
/* int epochs_before_eviction = */ 4,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.05,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
H5AC_cache_config_t mod_config_2 =
{
@@ -593,7 +594,9 @@ check_file_mdc_api_calls(void)
/* int epochs_before_eviction = */ 4,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.05,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
H5AC_cache_config_t mod_config_3 =
{
@@ -626,7 +629,9 @@ check_file_mdc_api_calls(void)
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ FALSE,
/* double empty_reserve = */ 0.05,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
H5AC_cache_config_t mod_config_4 =
{
@@ -660,7 +665,9 @@ check_file_mdc_api_calls(void)
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
TESTING("MDC/FILE related API calls");
@@ -915,7 +922,9 @@ mdc_api_call_smoke_check(int express_test)
/* int epochs_before_eviction = */ 2,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.05,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
H5AC_cache_config_t mod_config_2 =
{
@@ -948,7 +957,9 @@ mdc_api_call_smoke_check(int express_test)
/* int epochs_before_eviction = */ 2,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.05,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
H5AC_cache_config_t mod_config_3 =
{
@@ -981,7 +992,9 @@ mdc_api_call_smoke_check(int express_test)
/* int epochs_before_eviction = */ 2,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.05,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
};
TESTING("MDC API smoke check");
@@ -1514,7 +1527,7 @@ mdc_api_call_smoke_check(int express_test)
* used to test error rejection in the MDC related API calls.
*/
-#define NUM_INVALID_CONFIGS 41
+#define NUM_INVALID_CONFIGS 42
H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
{
@@ -1549,7 +1562,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 1 -- bad rpt_fcn_enabled */
@@ -1582,7 +1597,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 2 -- bad open_trace_file */
@@ -1615,7 +1632,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 3 -- bad close_trace_file */
@@ -1648,7 +1667,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 4 -- open_trace_file == TRUE and empty trace_file_name */
@@ -1681,7 +1702,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 5 -- bad set_initial_size */
@@ -1714,7 +1737,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 6 -- max_size too big */
@@ -1747,7 +1772,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 7 -- min_size too small */
@@ -1780,7 +1807,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 8 -- min_size > max_size */
@@ -1813,7 +1842,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 9 -- initial size out of range (too big) */
@@ -1846,7 +1877,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 10 -- initial_size out of range (too small) */
@@ -1879,7 +1912,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 11 -- min_clean_fraction too big */
@@ -1912,7 +1947,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 12 -- min_clean_fraction too small */
@@ -1945,7 +1982,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 13 -- epoch_length too small */
@@ -1978,7 +2017,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 14 -- epoch_length too big */
@@ -2011,7 +2052,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 15 -- invalid incr_mode */
@@ -2044,7 +2087,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 16 -- lower_hr_threshold too small */
@@ -2077,7 +2122,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 17 -- lower_hr_threshold too big */
@@ -2110,7 +2157,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 18 -- increment too small */
@@ -2143,7 +2192,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 19 -- bad apply_max_increment */
@@ -2176,7 +2227,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 20 -- invalid flash_incr_mode */
@@ -2209,7 +2262,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 21 -- flash_multiple too small */
@@ -2242,7 +2297,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 22 -- flash_multiple too big */
@@ -2275,7 +2332,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 23 -- flash_threshold too small */
@@ -2308,7 +2367,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 24 -- flash_threshold too big */
@@ -2341,7 +2402,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 25 -- bad decr_mode */
@@ -2374,7 +2437,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 26 -- upper_hr_threshold too big */
@@ -2407,7 +2472,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 27 -- decrement too small */
@@ -2440,7 +2507,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 28 -- decrement too big */
@@ -2473,7 +2542,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 29 -- epochs_before_eviction too small */
@@ -2506,7 +2577,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 0,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 30 -- epochs_before_eviction too big */
@@ -2539,7 +2612,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ H5C__MAX_EPOCH_MARKERS + 1,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 31 -- invalid apply_empty_reserve */
@@ -2572,7 +2647,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ 2,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 32 -- empty_reserve too small */
@@ -2605,7 +2682,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ -0.0000000001,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 33 -- empty_reserve too big */
@@ -2638,7 +2717,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 1.00000000001,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 34 -- upper_hr_threshold too small */
@@ -2671,7 +2752,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 35 -- upper_hr_threshold too big */
@@ -2704,7 +2787,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 36 -- upper_hr_threshold <= lower_hr_threshold */
@@ -2737,7 +2822,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 37 -- dirty_bytes_threshold too small */
@@ -2770,7 +2857,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (H5C__MIN_MAX_CACHE_SIZE / 2) - 1
+ /* int dirty_bytes_threshold = */ (H5C__MIN_MAX_CACHE_SIZE / 2) - 1,
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 38 -- dirty_bytes_threshold too big */
@@ -2803,7 +2892,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (H5C__MAX_MAX_CACHE_SIZE / 4) + 1
+ /* int dirty_bytes_threshold = */ (H5C__MAX_MAX_CACHE_SIZE / 4) + 1,
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 39 -- attempt to disable evictions when auto incr enabled */
@@ -2836,7 +2927,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
},
{
/* 40 -- attempt to disable evictions when auto decr enabled */
@@ -2869,7 +2962,43 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] =
/* int epochs_before_eviction = */ 3,
/* hbool_t apply_empty_reserve = */ TRUE,
/* double empty_reserve = */ 0.1,
- /* int dirty_bytes_threshold = */ (256 * 1024)
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */
+ H5AC__DEFAULT_METADATA_WRITE_STRATEGY
+ },
+ {
+ /* 41 -- unknown metadata write strategy */
+ /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER,
+ /* hbool_t rpt_fcn_enabled = */ FALSE,
+ /* hbool_t open_trace_file = */ FALSE,
+ /* hbool_t close_trace_file = */ FALSE,
+ /* char trace_file_name[] = */ "",
+ /* hbool_t evictions_enabled = */ TRUE,
+ /* hbool_t set_initial_size = */ TRUE,
+ /* size_t initial_size = */ (1 * 1024 * 1024),
+ /* double min_clean_fraction = */ 0.25,
+ /* size_t max_size = */ (16 * 1024 * 1024),
+ /* size_t min_size = */ ( 1 * 1024 * 1024),
+ /* long int epoch_length = */ 50000,
+ /* enum H5C_cache_incr_mode incr_mode = */ H5C_incr__threshold,
+ /* double lower_hr_threshold = */ 0.9,
+ /* double increment = */ 2.0,
+ /* hbool_t apply_max_increment = */ TRUE,
+ /* size_t max_increment = */ (4 * 1024 * 1024),
+ /* enum H5C_cache_flash_incr_mode */
+ /* flash_incr_mode = */ H5C_flash_incr__off,
+ /* double flash_multiple = */ 2.0,
+ /* double flash_threshold = */ 0.5,
+ /* enum H5C_cache_decr_mode decr_mode = */ H5C_decr__age_out_with_threshold,
+ /* double upper_hr_threshold = */ 0.999,
+ /* double decrement = */ 0.9,
+ /* hbool_t apply_max_decrement = */ TRUE,
+ /* size_t max_decrement = */ (1 * 1024 * 1024),
+ /* int epochs_before_eviction = */ 3,
+ /* hbool_t apply_empty_reserve = */ TRUE,
+ /* double empty_reserve = */ 0.1,
+ /* int dirty_bytes_threshold = */ (256 * 1024),
+ /* int metadata_write_strategy = */ -1
}
};
diff --git a/test/cache_common.h b/test/cache_common.h
index f493239..d7e7f1a 100644
--- a/test/cache_common.h
+++ b/test/cache_common.h
@@ -412,38 +412,40 @@ if ( ( (cache_ptr) == NULL ) || \
/* Macros used in H5AC level tests */
-#define CACHE_CONFIGS_EQUAL(a, b, cmp_set_init, cmp_init_size) \
- ( ( (a).version == (b).version ) && \
- ( (a).rpt_fcn_enabled == (b).rpt_fcn_enabled ) && \
- ( (a).open_trace_file == (b).open_trace_file ) && \
- ( (a).close_trace_file == (b).close_trace_file ) && \
- ( ( (a).open_trace_file == FALSE ) || \
- ( strcmp((a).trace_file_name, (b).trace_file_name) == 0 ) ) && \
- ( (a).evictions_enabled == (b).evictions_enabled ) && \
- ( ( ! cmp_set_init ) || \
- ( (a).set_initial_size == (b).set_initial_size ) ) && \
- ( ( ! cmp_init_size ) || \
- ( (a).initial_size == (b).initial_size ) ) && \
- ( (a).min_clean_fraction == (b).min_clean_fraction ) && \
- ( (a).max_size == (b).max_size ) && \
- ( (a).min_size == (b).min_size ) && \
- ( (a).epoch_length == (b).epoch_length ) && \
- ( (a).incr_mode == (b).incr_mode ) && \
- ( (a).lower_hr_threshold == (b).lower_hr_threshold ) && \
- ( (a).increment == (b).increment ) && \
- ( (a).apply_max_increment == (b).apply_max_increment ) && \
- ( (a).max_increment == (b).max_increment ) && \
- ( (a).flash_incr_mode == (b).flash_incr_mode ) && \
- ( (a).flash_multiple == (b).flash_multiple ) && \
- ( (a).flash_threshold == (b).flash_threshold ) && \
- ( (a).decr_mode == (b).decr_mode ) && \
- ( (a).upper_hr_threshold == (b).upper_hr_threshold ) && \
- ( (a).decrement == (b).decrement ) && \
- ( (a).apply_max_decrement == (b).apply_max_decrement ) && \
- ( (a).max_decrement == (b).max_decrement ) && \
- ( (a).epochs_before_eviction == (b).epochs_before_eviction ) && \
- ( (a).apply_empty_reserve == (b).apply_empty_reserve ) && \
- ( (a).empty_reserve == (b).empty_reserve ) )
+#define CACHE_CONFIGS_EQUAL(a, b, cmp_set_init, cmp_init_size) \
+ ( ( (a).version == (b).version ) && \
+ ( (a).rpt_fcn_enabled == (b).rpt_fcn_enabled ) && \
+ ( (a).open_trace_file == (b).open_trace_file ) && \
+ ( (a).close_trace_file == (b).close_trace_file ) && \
+ ( ( (a).open_trace_file == FALSE ) || \
+ ( strcmp((a).trace_file_name, (b).trace_file_name) == 0 ) ) && \
+ ( (a).evictions_enabled == (b).evictions_enabled ) && \
+ ( ( ! cmp_set_init ) || \
+ ( (a).set_initial_size == (b).set_initial_size ) ) && \
+ ( ( ! cmp_init_size ) || \
+ ( (a).initial_size == (b).initial_size ) ) && \
+ ( (a).min_clean_fraction == (b).min_clean_fraction ) && \
+ ( (a).max_size == (b).max_size ) && \
+ ( (a).min_size == (b).min_size ) && \
+ ( (a).epoch_length == (b).epoch_length ) && \
+ ( (a).incr_mode == (b).incr_mode ) && \
+ ( (a).lower_hr_threshold == (b).lower_hr_threshold ) && \
+ ( (a).increment == (b).increment ) && \
+ ( (a).apply_max_increment == (b).apply_max_increment ) && \
+ ( (a).max_increment == (b).max_increment ) && \
+ ( (a).flash_incr_mode == (b).flash_incr_mode ) && \
+ ( (a).flash_multiple == (b).flash_multiple ) && \
+ ( (a).flash_threshold == (b).flash_threshold ) && \
+ ( (a).decr_mode == (b).decr_mode ) && \
+ ( (a).upper_hr_threshold == (b).upper_hr_threshold ) && \
+ ( (a).decrement == (b).decrement ) && \
+ ( (a).apply_max_decrement == (b).apply_max_decrement ) && \
+ ( (a).max_decrement == (b).max_decrement ) && \
+ ( (a).epochs_before_eviction == (b).epochs_before_eviction ) && \
+ ( (a).apply_empty_reserve == (b).apply_empty_reserve ) && \
+ ( (a).empty_reserve == (b).empty_reserve ) && \
+ ( (a).dirty_bytes_threshold == (b).dirty_bytes_threshold ) && \
+ ( (a).metadata_write_strategy == (b).metadata_write_strategy ) )
#define XLATE_EXT_TO_INT_MDC_CONFIG(i, e) \
{ \
diff --git a/testpar/t_cache.c b/testpar/t_cache.c
index 554a8cc..0579829 100644
--- a/testpar/t_cache.c
+++ b/testpar/t_cache.c
@@ -77,6 +77,11 @@ long global_dirty_pins = 0;
long local_pins = 0;
+/* the following fields are used by the server process only */
+int total_reads = 0;
+int total_writes = 0;
+
+
/*****************************************************************************
* struct datum
*
@@ -135,6 +140,14 @@ long local_pins = 0;
* flushed: Boolean flag that is set to true whenever the entry is
* dirty, and is flushed via a call to flush_datum().
*
+ * reads: Integer field used to maintain a count of the number of
+ * times this entry has been read from the server since
+ * the last time the read and write counts were reset.
+ *
+ * writes: Integer field used to maintain a count of the number of
+ * times this entry has been written to the server since
+ * the last time the read and write counts were reset.
+ *
* index: Index of this instance of datum in the data_index[] array
* discussed below.
*
@@ -154,6 +167,8 @@ struct datum
hbool_t local_pinned;
hbool_t cleared;
hbool_t flushed;
+ int reads;
+ int writes;
int index;
};
@@ -217,6 +232,38 @@ int data_index[NUM_DATA_ENTRIES];
/*****************************************************************************
+ * The following two #defines are used to control code that is in turn used
+ * to force "POSIX" semantics on the server process used to simulate metadata
+ * reads and writes. Without some such mechanism, the test code contains
+ * race conditions that will frequently cause spurious failures.
+ *
+ * When set to TRUE, DO_WRITE_REQ_ACK forces the server to send an ack after
+ * each write request, and the client to wait until the ack is received
+ * before proceeding. This was my first solution to the problem, and at
+ * first glance, it would seem to have a lot of unnecessary overhead.
+ *
+ * In an attempt to reduce the overhead, I implemented a second solution
+ * in which no acks are sent after writes. Instead, the metadata cache is
+ * provided with a callback function to call after each sequence of writes.
+ * This callback simply causes the client to send the server process a
+ * "sync" message and and await an ack in reply.
+ *
+ * Strangely, at least on Phoenix, the first solution runs faster by a
+ * rather large margin. However, I can imagine this changing with
+ * different OS's and MPI implementatins.
+ *
+ * Thus I have left code supporting the second solution in place.
+ *
+ * Note that while one of these two #defines must be set to TRUE, there
+ * should never be any need to set both of them to TRUE (although the
+ * tests will still function with this setting).
+ *****************************************************************************/
+
+#define DO_WRITE_REQ_ACK TRUE
+#define DO_SYNC_AFTER_WRITE FALSE
+
+
+/*****************************************************************************
* struct mssg
*
* The mssg structure is used as a generic container for messages to
@@ -236,22 +283,32 @@ int data_index[NUM_DATA_ENTRIES];
*
* ver: Version number of a datum. Not used in all mssgs.
*
+ * count: Reported number of total/entry reads/writes. Not used
+ * in all mssgs.
+ *
* magic: Magic number for error detection. Must be set to
* MSSG_MAGIC.
*
*****************************************************************************/
-#define DO_WRITE_REQ_ACK FALSE
-#define DO_SYNC_AFTER_WRITE TRUE
-
-#define WRITE_REQ_CODE 0
-#define WRITE_REQ_ACK_CODE 1
-#define READ_REQ_CODE 2
-#define READ_REQ_REPLY_CODE 3
-#define SYNC_REQ_CODE 4
-#define SYNC_ACK_CODE 5
-#define DONE_REQ_CODE 6
-#define MAX_REQ_CODE 6
+#define WRITE_REQ_CODE 0
+#define WRITE_REQ_ACK_CODE 1
+#define READ_REQ_CODE 2
+#define READ_REQ_REPLY_CODE 3
+#define SYNC_REQ_CODE 4
+#define SYNC_ACK_CODE 5
+#define REQ_TTL_WRITES_CODE 6
+#define REQ_TTL_WRITES_RPLY_CODE 7
+#define REQ_TTL_READS_CODE 8
+#define REQ_TTL_READS_RPLY_CODE 9
+#define REQ_ENTRY_WRITES_CODE 10
+#define REQ_ENTRY_WRITES_RPLY_CODE 11
+#define REQ_ENTRY_READS_CODE 12
+#define REQ_ENTRY_READS_RPLY_CODE 13
+#define REQ_RW_COUNT_RESET_CODE 14
+#define REQ_RW_COUNT_RESET_RPLY_CODE 15
+#define DONE_REQ_CODE 16
+#define MAX_REQ_CODE 16
#define MSSG_MAGIC 0x1248
@@ -262,8 +319,9 @@ struct mssg_t
int dest;
long int mssg_num;
haddr_t base_addr;
- int len;
+ unsigned len;
int ver;
+ int count;
unsigned magic;
};
@@ -306,10 +364,16 @@ static hbool_t takedown_derived_types(void);
/* server functions */
+static hbool_t reset_server_counters(void);
static hbool_t server_main(void);
static hbool_t serve_read_request(struct mssg_t * mssg_ptr);
static hbool_t serve_sync_request(struct mssg_t * mssg_ptr);
static hbool_t serve_write_request(struct mssg_t * mssg_ptr);
+static hbool_t serve_total_writes_request(struct mssg_t * mssg_ptr);
+static hbool_t serve_total_reads_request(struct mssg_t * mssg_ptr);
+static hbool_t serve_entry_writes_request(struct mssg_t * mssg_ptr);
+static hbool_t serve_entry_reads_request(struct mssg_t * mssg_ptr);
+static hbool_t serve_rw_count_reset_request(struct mssg_t * mssg_ptr);
/* call back functions & related data structures */
@@ -361,11 +425,19 @@ void mark_entry_dirty(int32_t idx);
void pin_entry(H5F_t * file_ptr, int32_t idx, hbool_t global, hbool_t dirty);
void pin_protected_entry(int32_t idx, hbool_t global);
void move_entry(H5F_t * file_ptr, int32_t old_idx, int32_t new_idx);
+static hbool_t reset_server_counts(void);
void resize_entry(int32_t idx, size_t new_size);
-hbool_t setup_cache_for_test(hid_t * fid_ptr, H5F_t ** file_ptr_ptr,
- H5C_t ** cache_ptr_ptr);
+hbool_t setup_cache_for_test(hid_t * fid_ptr,
+ H5F_t ** file_ptr_ptr,
+ H5C_t ** cache_ptr_ptr,
+ int metadata_write_strategy);
void setup_rand(void);
hbool_t take_down_cache(hid_t fid);
+static hbool_t verify_entry_reads(haddr_t addr, int expected_entry_reads);
+static hbool_t verify_entry_writes(haddr_t addr, int expected_entry_writes);
+static hbool_t verify_total_reads(int expected_total_reads);
+static hbool_t verify_total_writes(int expected_total_writes);
+void verify_writes(int num_writes, haddr_t * written_entries_tbl);
void unlock_entry(H5F_t * file_ptr, int32_t type, unsigned int flags);
void unpin_entry(H5F_t * file_ptr, int32_t idx, hbool_t global,
hbool_t dirty, hbool_t via_unprotect);
@@ -374,12 +446,12 @@ void unpin_entry(H5F_t * file_ptr, int32_t idx, hbool_t global,
/* test functions */
hbool_t server_smoke_check(void);
-hbool_t smoke_check_1(void);
-hbool_t smoke_check_2(void);
-hbool_t smoke_check_3(void);
-hbool_t smoke_check_4(void);
-hbool_t smoke_check_5(void);
-hbool_t trace_file_check(void);
+hbool_t smoke_check_1(int metadata_write_strategy);
+hbool_t smoke_check_2(int metadata_write_strategy);
+hbool_t smoke_check_3(int metadata_write_strategy);
+hbool_t smoke_check_4(int metadata_write_strategy);
+hbool_t smoke_check_5(int metadata_write_strategy);
+hbool_t trace_file_check(int metadata_write_strategy);
/*****************************************************************************/
@@ -631,12 +703,7 @@ set_up_file_communicator(void)
*
* Programmer: JRM -- 12/20/05
*
- * Modifications:
- *
- * None.
- *
*****************************************************************************/
-
static int
addr_to_datum_index(haddr_t base_addr)
{
@@ -684,20 +751,7 @@ addr_to_datum_index(haddr_t base_addr)
*
* Programmer: JRM -- 12/20/05
*
- * Modifications:
- *
- * JRM -- 7/11/06
- * Added support for the local_len field.
- *
- * JRM -- 2/4/09
- * Added initialization for the cleared and flushed fields.
- *
- * Mike McGreevy, July 2, 2009
- * Changed base address from 0 to 512 since the superblock will
- * always be at address 0.
- *
*****************************************************************************/
-
static void
init_data(void)
{
@@ -735,6 +789,8 @@ init_data(void)
data[i].local_pinned = FALSE;
data[i].cleared = FALSE;
data[i].flushed = FALSE;
+ data[i].reads = 0;
+ data[i].writes = 0;
data[i].index = i;
data_index[i] = i;
@@ -773,12 +829,7 @@ init_data(void)
*
* Programmer: JRM -- 4/25/06
*
- * Modifications:
- *
- * None.
- *
*****************************************************************************/
-
static int
do_express_test(void)
{
@@ -850,6 +901,7 @@ do_sync(void)
mssg.base_addr = 0;
mssg.len = 0;
mssg.ver = 0;
+ mssg.count = 0;
mssg.magic = MSSG_MAGIC;
if ( ! send_mssg(&mssg, FALSE) ) {
@@ -1103,7 +1155,7 @@ send_mssg(struct mssg_t *mssg_ptr,
} /* send_mssg() */
-
+
/*****************************************************************************
*
* Function: setup_derived_types()
@@ -1117,12 +1169,7 @@ send_mssg(struct mssg_t *mssg_ptr,
*
* Programmer: JRM -- 12/22/05
*
- * Modifications:
- *
- * None.
- *
*****************************************************************************/
-
static hbool_t
setup_derived_types(void)
{
@@ -1130,11 +1177,11 @@ setup_derived_types(void)
hbool_t success = TRUE;
int i;
int result;
- MPI_Datatype mpi_types[8] = {MPI_INT, MPI_INT, MPI_INT, MPI_LONG,
+ MPI_Datatype mpi_types[9] = {MPI_INT, MPI_INT, MPI_INT, MPI_LONG,
HADDR_AS_MPI_TYPE, MPI_INT, MPI_INT,
- MPI_UNSIGNED};
- int block_len[8] = {1, 1, 1, 1, 1, 1, 1, 1};
- MPI_Aint displs[8];
+ MPI_INT, MPI_UNSIGNED};
+ int block_len[9] = {1, 1, 1, 1, 1, 1, 1, 1, 1};
+ MPI_Aint displs[9];
struct mssg_t sample; /* used to compute displacements */
/* setup the displacements array */
@@ -1145,7 +1192,8 @@ setup_derived_types(void)
( MPI_SUCCESS != MPI_Address(&sample.base_addr, &displs[4]) ) ||
( MPI_SUCCESS != MPI_Address(&sample.len, &displs[5]) ) ||
( MPI_SUCCESS != MPI_Address(&sample.ver, &displs[6]) ) ||
- ( MPI_SUCCESS != MPI_Address(&sample.magic, &displs[7]) ) ) {
+ ( MPI_SUCCESS != MPI_Address(&sample.count, &displs[7]) ) ||
+ ( MPI_SUCCESS != MPI_Address(&sample.magic, &displs[8]) ) ) {
nerrors++;
success = FALSE;
@@ -1157,7 +1205,7 @@ setup_derived_types(void)
} else {
/* Now calculate the actual displacements */
- for ( i = 7; i >= 0; --i)
+ for ( i = 8; i >= 0; --i)
{
displs[i] -= displs[0];
}
@@ -1165,7 +1213,7 @@ setup_derived_types(void)
if ( success ) {
- result = MPI_Type_struct(8, block_len, displs, mpi_types, &mpi_mssg_t);
+ result = MPI_Type_struct(9, block_len, displs, mpi_types, &mpi_mssg_t);
if ( result != MPI_SUCCESS ) {
@@ -1247,6 +1295,79 @@ takedown_derived_types(void)
/*****************************************************************************
*
+ * Function: reset_server_counters()
+ *
+ * Purpose: Reset the counters maintained by the server, doing a
+ * sanity check in passing.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/5/10
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *****************************************************************************/
+
+static hbool_t
+reset_server_counters(void)
+{
+ const char * fcn_name = "reset_server_counters()";
+ hbool_t success = TRUE;
+ int i;
+ long actual_total_reads = 0;
+ long actual_total_writes = 0;
+
+ for ( i = 0; i < NUM_DATA_ENTRIES; i++ )
+ {
+ if ( data[i].reads > 0 ) {
+
+ actual_total_reads += data[i].reads;
+ data[i].reads = 0;
+ }
+
+ if ( data[i].writes > 0 ) {
+
+ actual_total_writes += data[i].writes;
+ data[i].writes = 0;
+ }
+ }
+
+ if ( actual_total_reads != total_reads ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: actual/total reads mismatch (%ld/%ld).\n",
+ world_mpi_rank, fcn_name,
+ actual_total_reads, total_reads);
+ }
+ }
+
+ if ( actual_total_writes != total_writes ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: actual/total writes mismatch (%ld/%ld).\n",
+ world_mpi_rank, fcn_name,
+ actual_total_writes, total_writes);
+ }
+ }
+
+ total_reads = 0;
+ total_writes = 0;
+
+ return(success);
+
+} /* reset_server_counters() */
+
+
+/*****************************************************************************
+ *
* Function: server_main()
*
* Purpose: Main function for the server process. This process exists
@@ -1303,7 +1424,8 @@ server_main(void)
case WRITE_REQ_ACK_CODE:
success = FALSE;
- HDfprintf(stdout, "%s: Received write ack?!?.\n", fcn_name);
+ if(verbose)
+ HDfprintf(stdout, "%s: Received write ack?!?.\n", fcn_name);
break;
case READ_REQ_CODE:
@@ -1312,8 +1434,8 @@ server_main(void)
case READ_REQ_REPLY_CODE:
success = FALSE;
- HDfprintf(stdout, "%s: Received read req reply?!?.\n",
- fcn_name);
+ if(verbose)
+ HDfprintf(stdout, "%s: Received read req reply?!?.\n", fcn_name);
break;
case SYNC_REQ_CODE:
@@ -1322,27 +1444,71 @@ server_main(void)
case SYNC_ACK_CODE:
success = FALSE;
- HDfprintf(stdout, "%s: Received sync ack?!?.\n",
- fcn_name);
+ if(verbose)
+ HDfprintf(stdout, "%s: Received sync ack?!?.\n", fcn_name);
+ break;
+
+ case REQ_TTL_WRITES_CODE:
+ success = serve_total_writes_request(&mssg);
+ break;
+
+ case REQ_TTL_WRITES_RPLY_CODE:
+ success = FALSE;
+ if(verbose)
+ HDfprintf(stdout, "%s: Received total writes reply?!?.\n", fcn_name);
+ break;
+
+ case REQ_TTL_READS_CODE:
+ success = serve_total_reads_request(&mssg);
+ break;
+
+ case REQ_TTL_READS_RPLY_CODE:
+ success = FALSE;
+ if(verbose)
+ HDfprintf(stdout, "%s: Received total reads reply?!?.\n", fcn_name);
+ break;
+
+ case REQ_ENTRY_WRITES_CODE:
+ success = serve_entry_writes_request(&mssg);
+ break;
+
+ case REQ_ENTRY_WRITES_RPLY_CODE:
+ success = FALSE;
+ if(verbose)
+ HDfprintf(stdout, "%s: Received entry writes reply?!?.\n", fcn_name);
+ break;
+
+ case REQ_ENTRY_READS_CODE:
+ success = serve_entry_reads_request(&mssg);
+ break;
+
+ case REQ_ENTRY_READS_RPLY_CODE:
+ success = FALSE;
+ if(verbose)
+ HDfprintf(stdout, "%s: Received entry reads reply?!?.\n", fcn_name);
+ break;
+
+ case REQ_RW_COUNT_RESET_CODE:
+ success = serve_rw_count_reset_request(&mssg);
+ break;
+
+ case REQ_RW_COUNT_RESET_RPLY_CODE:
+ success = FALSE;
+ if(verbose)
+ HDfprintf(stdout, "%s: Received RW count reset reply?!?.\n", fcn_name);
break;
case DONE_REQ_CODE:
done_count++;
- /* HDfprintf(stdout, "%d:%s: done_count = %d.\n",
- world_mpi_rank, fcn_name, done_count); */
- if ( done_count >= file_mpi_size ) {
-
+ if(done_count >= file_mpi_size)
done = TRUE;
- }
break;
default:
nerrors++;
success = FALSE;
- if ( verbose ) {
- HDfprintf(stdout, "%d:%s: Unknown request code.\n",
- world_mpi_rank, fcn_name);
- }
+ if(verbose)
+ HDfprintf(stdout, "%d:%s: Unknown request code.\n", world_mpi_rank, fcn_name);
break;
}
}
@@ -1352,7 +1518,7 @@ server_main(void)
} /* server_main() */
-
+
/*****************************************************************************
*
* Function: serve_read_request()
@@ -1370,16 +1536,12 @@ server_main(void)
*
* Programmer: JRM -- 12/22/05
*
- * Modifications:
- *
- * None.
- *
*****************************************************************************/
-
static hbool_t
serve_read_request(struct mssg_t * mssg_ptr)
{
const char * fcn_name = "serve_read_request()";
+ hbool_t report_mssg = FALSE;
hbool_t success = TRUE;
int target_index;
haddr_t target_addr;
@@ -1426,11 +1588,11 @@ serve_read_request(struct mssg_t * mssg_ptr)
success = FALSE;
if ( verbose ) {
HDfprintf(stdout,
- "%d:%s: proc %d read invalid entry. idx/base_addr = %d/%a.\n",
- world_mpi_rank, fcn_name,
- mssg_ptr->src,
+ "%d:%s: proc %d read invalid entry. idx/base_addr = %d/0x%llx.\n",
+ world_mpi_rank, fcn_name,
+ mssg_ptr->src,
target_index,
- data[target_index].base_addr);
+ (long long)(data[target_index].base_addr));
}
} else {
@@ -1442,7 +1604,12 @@ serve_read_request(struct mssg_t * mssg_ptr)
reply.base_addr = data[target_index].base_addr;
reply.len = data[target_index].len;
reply.ver = data[target_index].ver;
+ reply.count = 0;
reply.magic = MSSG_MAGIC;
+
+ /* and update the counters */
+ total_reads++;
+ (data[target_index].reads)++;
}
}
@@ -1451,6 +1618,27 @@ serve_read_request(struct mssg_t * mssg_ptr)
success = send_mssg(&reply, TRUE);
}
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d read 0x%llx. len = %d. ver = %d.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (int)(data[target_index].len),
+ (int)(data[target_index].ver));
+
+ } else {
+
+ HDfprintf(stdout, "%d read 0x%llx FAILED. len = %d. ver = %d.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (int)(data[target_index].len),
+ (int)(data[target_index].ver));
+
+ }
+ }
+
return(success);
} /* serve_read_request() */
@@ -1486,6 +1674,7 @@ static hbool_t
serve_sync_request(struct mssg_t * mssg_ptr)
{
const char * fcn_name = "serve_sync_request()";
+ hbool_t report_mssg = FALSE;
hbool_t success = TRUE;
struct mssg_t reply;
@@ -1511,6 +1700,7 @@ serve_sync_request(struct mssg_t * mssg_ptr)
reply.base_addr = 0;
reply.len = 0;
reply.ver = 0;
+ reply.count = 0;
reply.magic = MSSG_MAGIC;
}
@@ -1519,11 +1709,24 @@ serve_sync_request(struct mssg_t * mssg_ptr)
success = send_mssg(&reply, TRUE);
}
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d sync.\n", (int)(mssg_ptr->src));
+
+ } else {
+
+ HDfprintf(stdout, "%d sync FAILED.\n", (int)(mssg_ptr->src));
+
+ }
+ }
+
return(success);
} /* serve_sync_request() */
-
+
/*****************************************************************************
*
* Function: serve_write_request()
@@ -1541,19 +1744,12 @@ serve_sync_request(struct mssg_t * mssg_ptr)
*
* Programmer: JRM -- 12/21/05
*
- * Modifications:
- *
- * JRM -- 5/9/06
- * Added code supporting a write ack message. This is a
- * speculative fix to a bug observed on Cobalt. If it
- * doesn't work, it will help narrow down the possibilities.
- *
*****************************************************************************/
-
static hbool_t
serve_write_request(struct mssg_t * mssg_ptr)
{
const char * fcn_name = "serve_write_request()";
+ hbool_t report_mssg = FALSE;
hbool_t success = TRUE;
int target_index;
int new_ver_num;
@@ -1604,6 +1800,7 @@ serve_write_request(struct mssg_t * mssg_ptr)
new_ver_num = mssg_ptr->ver;
+ /* this check should catch duplicate writes */
if ( new_ver_num <= data[target_index].ver ) {
nerrors++;
@@ -1622,6 +1819,10 @@ serve_write_request(struct mssg_t * mssg_ptr)
data[target_index].ver = new_ver_num;
data[target_index].valid = TRUE;
+ /* and update the counters */
+ total_writes++;
+ (data[target_index].writes)++;
+
#if DO_WRITE_REQ_ACK
/* compose the reply message */
@@ -1632,6 +1833,7 @@ serve_write_request(struct mssg_t * mssg_ptr)
reply.base_addr = data[target_index].base_addr;
reply.len = data[target_index].len;
reply.ver = data[target_index].ver;
+ reply.count = 0;
reply.magic = MSSG_MAGIC;
/* and send it */
@@ -1641,10 +1843,469 @@ serve_write_request(struct mssg_t * mssg_ptr)
}
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d write 0x%llx. len = %d. ver = %d.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (int)(data[target_index].len),
+ (int)(data[target_index].ver));
+
+ } else {
+
+ HDfprintf(stdout, "%d write 0x%llx FAILED. len = %d. ver = %d.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (int)(data[target_index].len),
+ (int)(data[target_index].ver));
+
+ }
+ }
+
return(success);
} /* serve_write_request() */
+
+/*****************************************************************************
+ *
+ * Function: serve_total_writes_request()
+ *
+ * Purpose: Serve a request for the total number of writes recorded since
+ * the last reset.
+ *
+ * The function accepts a pointer to an instance of struct
+ * mssg_t as input. If all sanity checks pass, it sends
+ * the current value of the total_writes global variable to
+ * the requesting process.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/5/10
+ *
+ *****************************************************************************/
+static hbool_t
+serve_total_writes_request(struct mssg_t * mssg_ptr)
+{
+ const char * fcn_name = "serve_total_writes_request()";
+ hbool_t report_mssg = FALSE;
+ hbool_t success = TRUE;
+ struct mssg_t reply;
+
+ if ( ( mssg_ptr == NULL ) ||
+ ( mssg_ptr->req != REQ_TTL_WRITES_CODE ) ||
+ ( mssg_ptr->magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ if ( success ) {
+
+ /* compose the reply message */
+ reply.req = REQ_TTL_WRITES_RPLY_CODE;
+ reply.src = world_mpi_rank;
+ reply.dest = mssg_ptr->src;
+ reply.mssg_num = -1; /* set by send function */
+ reply.base_addr = 0;
+ reply.len = 0;
+ reply.ver = 0;
+ reply.count = total_writes;
+ reply.magic = MSSG_MAGIC;
+ }
+
+ if ( success ) {
+
+ success = send_mssg(&reply, TRUE);
+ }
+
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d request total writes %ld.\n",
+ (int)(mssg_ptr->src),
+ total_writes);
+
+ } else {
+
+ HDfprintf(stdout, "%d request total writes %ld -- FAILED.\n",
+ (int)(mssg_ptr->src),
+ total_writes);
+
+ }
+ }
+
+ return(success);
+
+} /* serve_total_writes_request() */
+
+
+/*****************************************************************************
+ *
+ * Function: serve_total_reads_request()
+ *
+ * Purpose: Serve a request for the total number of reads recorded since
+ * the last reset.
+ *
+ * The function accepts a pointer to an instance of struct
+ * mssg_t as input. If all sanity checks pass, it sends
+ * the current value of the total_reads global variable to
+ * the requesting process.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/5/10
+ *
+ *****************************************************************************/
+static hbool_t
+serve_total_reads_request(struct mssg_t * mssg_ptr)
+{
+ const char * fcn_name = "serve_total_reads_request()";
+ hbool_t report_mssg = FALSE;
+ hbool_t success = TRUE;
+ struct mssg_t reply;
+
+ if ( ( mssg_ptr == NULL ) ||
+ ( mssg_ptr->req != REQ_TTL_READS_CODE ) ||
+ ( mssg_ptr->magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ if ( success ) {
+
+ /* compose the reply message */
+ reply.req = REQ_TTL_READS_RPLY_CODE;
+ reply.src = world_mpi_rank;
+ reply.dest = mssg_ptr->src;
+ reply.mssg_num = -1; /* set by send function */
+ reply.base_addr = 0;
+ reply.len = 0;
+ reply.ver = 0;
+ reply.count = total_reads;
+ reply.magic = MSSG_MAGIC;
+ }
+
+ if ( success ) {
+
+ success = send_mssg(&reply, TRUE);
+ }
+
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d request total reads %ld.\n",
+ (int)(mssg_ptr->src),
+ total_reads);
+
+ } else {
+
+ HDfprintf(stdout, "%d request total reads %ld -- FAILED.\n",
+ (int)(mssg_ptr->src),
+ total_reads);
+
+ }
+ }
+
+ return(success);
+
+} /* serve_total_reads_request() */
+
+
+/*****************************************************************************
+ *
+ * Function: serve_entry_writes_request()
+ *
+ * Purpose: Serve an entry writes request.
+ *
+ * The function accepts a pointer to an instance of struct
+ * mssg_t as input. If all sanity checks pass, it sends
+ * the number of times that the indicated datum has been
+ * written since the last counter reset to the requesting
+ * process.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/5/10
+ *
+ *****************************************************************************/
+static hbool_t
+serve_entry_writes_request(struct mssg_t * mssg_ptr)
+{
+ const char * fcn_name = "serve_entry_writes_request()";
+ hbool_t report_mssg = FALSE;
+ hbool_t success = TRUE;
+ int target_index;
+ haddr_t target_addr;
+ struct mssg_t reply;
+
+ if ( ( mssg_ptr == NULL ) ||
+ ( mssg_ptr->req != REQ_ENTRY_WRITES_CODE ) ||
+ ( mssg_ptr->magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ if ( success ) {
+
+ target_addr = mssg_ptr->base_addr;
+ target_index = addr_to_datum_index(target_addr);
+
+ if ( target_index < 0 ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: addr lookup failed for %a.\n",
+ world_mpi_rank, fcn_name, target_addr);
+ }
+ } else {
+
+ /* compose the reply message */
+ reply.req = REQ_ENTRY_WRITES_RPLY_CODE;
+ reply.src = world_mpi_rank;
+ reply.dest = mssg_ptr->src;
+ reply.mssg_num = -1; /* set by send function */
+ reply.base_addr = target_addr;
+ reply.len = 0;
+ reply.ver = 0;
+ reply.count = data[target_index].writes;
+ reply.magic = MSSG_MAGIC;
+ }
+ }
+
+ if ( success ) {
+
+ success = send_mssg(&reply, TRUE);
+ }
+
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d request entry 0x%llx writes = %ld.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (long)(data[target_index].writes));
+
+ } else {
+
+ HDfprintf(stdout, "%d request entry 0x%llx writes = %ld FAILED.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (long)(data[target_index].writes));
+
+ }
+ }
+
+ return(success);
+
+} /* serve_entry_writes_request() */
+
+
+/*****************************************************************************
+ *
+ * Function: serve_entry_reads_request()
+ *
+ * Purpose: Serve an entry reads request.
+ *
+ * The function accepts a pointer to an instance of struct
+ * mssg_t as input. If all sanity checks pass, it sends
+ * the number of times that the indicated datum has been
+ * read since the last counter reset to the requesting
+ * process.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/5/10
+ *
+ *****************************************************************************/
+static hbool_t
+serve_entry_reads_request(struct mssg_t * mssg_ptr)
+{
+ const char * fcn_name = "serve_entry_reads_request()";
+ hbool_t report_mssg = FALSE;
+ hbool_t success = TRUE;
+ int target_index;
+ haddr_t target_addr;
+ struct mssg_t reply;
+
+ if ( ( mssg_ptr == NULL ) ||
+ ( mssg_ptr->req != REQ_ENTRY_READS_CODE ) ||
+ ( mssg_ptr->magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ if ( success ) {
+
+ target_addr = mssg_ptr->base_addr;
+ target_index = addr_to_datum_index(target_addr);
+
+ if ( target_index < 0 ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: addr lookup failed for %a.\n",
+ world_mpi_rank, fcn_name, target_addr);
+ }
+ } else {
+
+ /* compose the reply message */
+ reply.req = REQ_ENTRY_READS_RPLY_CODE;
+ reply.src = world_mpi_rank;
+ reply.dest = mssg_ptr->src;
+ reply.mssg_num = -1; /* set by send function */
+ reply.base_addr = target_addr;
+ reply.len = 0;
+ reply.ver = 0;
+ reply.count = (long)(data[target_index].reads);
+ reply.magic = MSSG_MAGIC;
+ }
+ }
+
+ if ( success ) {
+
+ success = send_mssg(&reply, TRUE);
+ }
+
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d request entry 0x%llx reads = %ld.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (long)(data[target_index].reads));
+
+ } else {
+
+ HDfprintf(stdout, "%d request entry 0x%llx reads = %ld FAILED.\n",
+ (int)(mssg_ptr->src),
+ (long long)(data[target_index].base_addr),
+ (long)(data[target_index].reads));
+
+ }
+ }
+
+ return(success);
+
+} /* serve_entry_reads_request() */
+
+
+/*****************************************************************************
+ *
+ * Function: serve_rw_count_reset_request()
+ *
+ * Purpose: Serve read/write count reset request.
+ *
+ * The function accepts a pointer to an instance of struct
+ * mssg_t as input. If all sanity checks pass, it resets the
+ * read/write counters, and sends a confirmation message to
+ * the calling process.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/5/10
+ *
+ *****************************************************************************/
+static hbool_t
+serve_rw_count_reset_request(struct mssg_t * mssg_ptr)
+{
+ const char * fcn_name = "serve_rw_count_reset_request()";
+ hbool_t report_mssg = FALSE;
+ hbool_t success = TRUE;
+ struct mssg_t reply;
+
+ if ( ( mssg_ptr == NULL ) ||
+ ( mssg_ptr->req != REQ_RW_COUNT_RESET_CODE ) ||
+ ( mssg_ptr->magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ if ( success ) {
+
+ success = reset_server_counters();
+ }
+
+ if ( success ) {
+
+ /* compose the reply message */
+ reply.req = REQ_RW_COUNT_RESET_RPLY_CODE;
+ reply.src = world_mpi_rank;
+ reply.dest = mssg_ptr->src;
+ reply.mssg_num = -1; /* set by send function */
+ reply.base_addr = 0;
+ reply.len = 0;
+ reply.ver = 0;
+ reply.count = 0;
+ reply.magic = MSSG_MAGIC;
+ }
+
+ if ( success ) {
+
+ success = send_mssg(&reply, TRUE);
+ }
+
+ if ( report_mssg ) {
+
+ if ( success ) {
+
+ HDfprintf(stdout, "%d request R/W counter reset.\n",
+ (int)(mssg_ptr->src));
+
+ } else {
+
+ HDfprintf(stdout, "%d request R/w counter reset FAILED.\n",
+ (int)(mssg_ptr->src));
+
+ }
+ }
+
+ return(success);
+
+} /* serve_rw_count_reset_request() */
+
/*****************************************************************************/
/**************************** Call back functions ****************************/
@@ -1662,21 +2323,8 @@ serve_write_request(struct mssg_t * mssg_ptr)
* Programmer: John Mainzer
* 12/29/05
*
- * Modifications:
- *
- * JRM -- 7/11/06
- * Modified code to support the local_len field of datum.
- * This field allow us to track the cache's value for the
- * length of the entry, while retaining the original
- * value for communications with the server.
- *
- * JRM -- 2/4/09
- * Added code to set the cleared flag when a dirty entry is
- * cleared.
- *
*-------------------------------------------------------------------------
*/
-
static herr_t
clear_datum(H5F_t * f,
void * thing,
@@ -1728,6 +2376,7 @@ clear_datum(H5F_t * f,
} /* clear_datum() */
+
/*-------------------------------------------------------------------------
* Function: destroy_datum()
*
@@ -1740,17 +2389,8 @@ clear_datum(H5F_t * f,
* Programmer: John Mainzer
* 12/29/05
*
- * Modifications:
- *
- * JRM -- 7/11/06
- * Modified code to support the local_len field of datum.
- * This field allow us to track the cache's value for the
- * length of the entry, while retaining the original
- * value for communications with the server.
- *
*-------------------------------------------------------------------------
*/
-
static herr_t
destroy_datum(H5F_t UNUSED * f,
void * thing)
@@ -1785,6 +2425,7 @@ destroy_datum(H5F_t UNUSED * f,
} /* destroy_datum() */
+
/*-------------------------------------------------------------------------
* Function: flush_datum
*
@@ -1796,27 +2437,8 @@ destroy_datum(H5F_t UNUSED * f,
* Programmer: John Mainzer
* 12/29/05
*
- * Modifications:
- *
- * JRM -- 5/9/06
- * Added code to receive the write request ack messages
- * from the server. This is part of a speculative fix to
- * a bug spotted on Cobalt. If it doesn't fix the problem,
- * it will narrow down the possibilities.
- *
- * JRM -- 7/11/06
- * Modified code to support the local_len field of datum.
- * This field allow us to track the cache's value for the
- * length of the entry, while retaining the original
- * value for communications with the server.
- *
- * JRM -- 2/4/09
- * Added code to set the flushed flag when a dirty entry
- * is flushed.
- *
*-------------------------------------------------------------------------
*/
-
static herr_t
flush_datum(H5F_t *f,
hid_t UNUSED dxpl_id,
@@ -1825,15 +2447,31 @@ flush_datum(H5F_t *f,
void *thing)
{
const char * fcn_name = "flush_datum()";
+ hbool_t was_dirty = FALSE;
herr_t ret_value = SUCCEED;
int idx;
struct datum * entry_ptr;
struct mssg_t mssg;
+ H5C_t * cache_ptr;
+ struct H5AC_aux_t * aux_ptr;
HDassert( thing );
entry_ptr = (struct datum *)thing;
+ HDassert( f );
+ HDassert( f->shared );
+ HDassert( f->shared->cache );
+
+ cache_ptr = f->shared->cache;
+
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( cache_ptr->aux_ptr );
+
+ aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr);
+
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
idx = addr_to_datum_index(entry_ptr->base_addr);
HDassert( idx >= 0 );
@@ -1847,7 +2485,10 @@ flush_datum(H5F_t *f,
HDassert( entry_ptr->header.is_dirty == entry_ptr->dirty );
- if ( ( file_mpi_rank != 0 ) && ( entry_ptr->dirty ) ) {
+ if ( ( file_mpi_rank != 0 ) &&
+ ( entry_ptr->dirty ) &&
+ ( aux_ptr->metadata_write_strategy ==
+ H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY ) ) {
ret_value = FAIL;
HDfprintf(stdout,
@@ -1859,6 +2500,8 @@ flush_datum(H5F_t *f,
if ( entry_ptr->header.is_dirty ) {
+ was_dirty = TRUE; /* so we will receive the ack if requested */
+
/* compose the message */
mssg.req = WRITE_REQ_CODE;
mssg.src = world_mpi_rank;
@@ -1867,6 +2510,7 @@ flush_datum(H5F_t *f,
mssg.base_addr = entry_ptr->base_addr;
mssg.len = entry_ptr->len;
mssg.ver = entry_ptr->ver;
+ mssg.count = 0;
mssg.magic = MSSG_MAGIC;
if ( ! send_mssg(&mssg, FALSE) ) {
@@ -1889,7 +2533,7 @@ flush_datum(H5F_t *f,
#if DO_WRITE_REQ_ACK
- if ( ( ret_value == SUCCEED ) && ( entry_ptr->header.is_dirty ) ) {
+ if ( ( ret_value == SUCCEED ) && ( was_dirty ) ) {
if ( ! recv_mssg(&mssg, WRITE_REQ_ACK_CODE) ) {
@@ -1986,6 +2630,7 @@ load_datum(H5F_t UNUSED *f,
mssg.base_addr = entry_ptr->base_addr;
mssg.len = entry_ptr->len;
mssg.ver = 0; /* bogus -- should be corrected by server */
+ mssg.count = 0; /* not used */
mssg.magic = MSSG_MAGIC;
if ( ! send_mssg(&mssg, FALSE) ) {
@@ -2420,7 +3065,7 @@ local_pin_and_unpin_random_entries(H5F_t * file_ptr,
} /* local_pin_and_unpin_random_entries() */
-
+
/*****************************************************************************
* Function: local_pin_random_entry()
*
@@ -2436,10 +3081,7 @@ local_pin_and_unpin_random_entries(H5F_t * file_ptr,
* Programmer: John Mainzer
* 4/12/06
*
- * Modifications:
- *
*****************************************************************************/
-
void
local_pin_random_entry(H5F_t * file_ptr,
int min_idx,
@@ -2940,7 +3582,7 @@ pin_protected_entry(int32_t idx,
} /* pin_protected_entry() */
-
+
/*****************************************************************************
* Function: move_entry()
*
@@ -2956,13 +3598,7 @@ pin_protected_entry(int32_t idx,
* Programmer: John Mainzer
* 1/10/06
*
- * Modifications:
- *
- * 7/11/06 -- JRM
- * Added support for the phony_len field in datum.
- *
*****************************************************************************/
-
void
move_entry(H5F_t * file_ptr,
int32_t old_idx,
@@ -2996,7 +3632,35 @@ move_entry(H5F_t * file_ptr,
old_addr = old_entry_ptr->base_addr;
new_addr = new_entry_ptr->base_addr;
- result = H5AC_move_entry(file_ptr, &(types[0]), old_addr, new_addr);
+ /* Moving will mark the entry dirty if it is not already */
+ old_entry_ptr->dirty = TRUE;
+
+ /* touch up versions, base_addrs, and data_index. Do this
+ * now as it is possible that the rename will trigger a
+ * sync point.
+ */
+ if(old_entry_ptr->ver < new_entry_ptr->ver)
+ old_entry_ptr->ver = new_entry_ptr->ver;
+ else
+ (old_entry_ptr->ver)++;
+
+ old_entry_ptr->base_addr = new_addr;
+ new_entry_ptr->base_addr = old_addr;
+
+ data_index[old_entry_ptr->index] = new_idx;
+ data_index[new_entry_ptr->index] = old_idx;
+
+ tmp = old_entry_ptr->index;
+ old_entry_ptr->index = new_entry_ptr->index;
+ new_entry_ptr->index = tmp;
+
+ if(old_entry_ptr->local_len != new_entry_ptr->local_len) {
+ tmp_len = old_entry_ptr->local_len;
+ old_entry_ptr->local_len = new_entry_ptr->local_len;
+ new_entry_ptr->local_len = tmp_len;
+ } /* end if */
+
+ result = H5AC_move_entry(file_ptr, &(types[0]), old_addr, new_addr);
if ( ( result < 0 ) || ( old_entry_ptr->header.addr != new_addr ) ) {
@@ -3009,43 +3673,118 @@ move_entry(H5F_t * file_ptr,
} else {
HDassert( ((old_entry_ptr->header).type)->id == DATUM_ENTRY_TYPE );
- HDassert( old_entry_ptr->header.is_dirty );
- old_entry_ptr->dirty = TRUE;
- /* touch up versions, base_addrs, and data_index */
+ if ( ! (old_entry_ptr->header.is_dirty) ) {
- if ( old_entry_ptr->ver < new_entry_ptr->ver ) {
+ /* it is possible that we just exceeded the dirty bytes
+ * threshold, triggering a write of the newly inserted
+ * entry. Test for this, and only flag an error if this
+ * is not the case.
+ */
- old_entry_ptr->ver = new_entry_ptr->ver;
+ struct H5AC_aux_t * aux_ptr;
- } else {
+ aux_ptr = ((H5AC_aux_t *)(file_ptr->shared->cache->aux_ptr));
- (old_entry_ptr->ver)++;
+ if ( ! ( ( aux_ptr != NULL ) &&
+ ( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ) &&
+ ( aux_ptr->dirty_bytes == 0 ) ) ) {
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: data[%d].header.is_dirty = %d.\n",
+ world_mpi_rank, fcn_name, new_idx,
+ (int)(data[new_idx].header.is_dirty));
+ }
+ }
+ } else {
+
+ HDassert( old_entry_ptr->header.is_dirty );
}
+ }
+ }
- old_entry_ptr->base_addr = new_addr;
- new_entry_ptr->base_addr = old_addr;
+} /* move_entry() */
- data_index[old_entry_ptr->index] = new_idx;
- data_index[new_entry_ptr->index] = old_idx;
+
+/*****************************************************************************
+ *
+ * Function: reset_server_counts()
+ *
+ * Purpose: Send a message to the server process requesting it to reset
+ * its counters. Await confirmation message.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/6/10
+ *
+ *****************************************************************************/
+static hbool_t
+reset_server_counts(void)
+{
+ const char * fcn_name = "reset_server_counts()";
+ hbool_t success = TRUE; /* will set to FALSE if appropriate. */
+ struct mssg_t mssg;
+
+ if ( success ) {
- tmp = old_entry_ptr->index;
- old_entry_ptr->index = new_entry_ptr->index;
- new_entry_ptr->index = tmp;
+ /* compose the message */
+ mssg.req = REQ_RW_COUNT_RESET_CODE;
+ mssg.src = world_mpi_rank;
+ mssg.dest = world_server_mpi_rank;
+ mssg.mssg_num = -1; /* set by send function */
+ mssg.base_addr = 0;
+ mssg.len = 0;
+ mssg.ver = 0;
+ mssg.count = 0;
+ mssg.magic = MSSG_MAGIC;
- if ( old_entry_ptr->local_len != new_entry_ptr->local_len ) {
+ if ( ! send_mssg(&mssg, FALSE) ) {
- tmp_len = old_entry_ptr->local_len;
- old_entry_ptr->local_len = new_entry_ptr->local_len;
- new_entry_ptr->local_len = tmp_len;
- }
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: send_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
}
}
- return;
+ if ( success ) {
-} /* move_entry() */
+ if ( ! recv_mssg(&mssg, REQ_RW_COUNT_RESET_RPLY_CODE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else if ( ( mssg.req != REQ_RW_COUNT_RESET_RPLY_CODE ) ||
+ ( mssg.src != world_server_mpi_rank ) ||
+ ( mssg.dest != world_mpi_rank ) ||
+ ( mssg.base_addr != 0 ) ||
+ ( mssg.len != 0 ) ||
+ ( mssg.ver != 0 ) ||
+ ( mssg.count != 0 ) ||
+ ( mssg.magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: Bad data in req r/w counter reset reply.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ return(success);
+
+} /* reset_server_counts() */
/*****************************************************************************
@@ -3121,7 +3860,7 @@ resize_entry(int32_t idx,
} /* resize_entry() */
-
+
/*****************************************************************************
*
* Function: setup_cache_for_test()
@@ -3140,22 +3879,19 @@ resize_entry(int32_t idx,
*
* Programmer: JRM -- 1/4/06
*
- * Modifications:
- *
- * None.
- *
*****************************************************************************/
-
hbool_t
setup_cache_for_test(hid_t * fid_ptr,
H5F_t ** file_ptr_ptr,
- H5C_t ** cache_ptr_ptr)
+ H5C_t ** cache_ptr_ptr,
+ int metadata_write_strategy)
{
const char * fcn_name = "setup_cache_for_test()";
hbool_t success = FALSE; /* will set to TRUE if appropriate. */
hbool_t enable_rpt_fcn = FALSE;
hid_t fid = -1;
H5AC_cache_config_t config;
+ H5AC_cache_config_t test_config;
H5F_t * file_ptr = NULL;
H5C_t * cache_ptr = NULL;
@@ -3213,7 +3949,7 @@ setup_cache_for_test(hid_t * fid_ptr,
success = TRUE;
}
- if ( ( success ) && ( enable_rpt_fcn ) ) {
+ if ( success ) {
config.version = H5AC__CURR_CACHE_CONFIG_VERSION;
@@ -3221,12 +3957,13 @@ setup_cache_for_test(hid_t * fid_ptr,
!= SUCCEED ) {
HDfprintf(stdout,
- "%d:%s: H5AC_get_cache_auto_resize_config() failed.\n",
+ "%d:%s: H5AC_get_cache_auto_resize_config(1) failed.\n",
world_mpi_rank, fcn_name);
} else {
- config.rpt_fcn_enabled = TRUE;
+ config.rpt_fcn_enabled = enable_rpt_fcn;
+ config.metadata_write_strategy = metadata_write_strategy;
if ( H5AC_set_cache_auto_resize_config(cache_ptr, &config)
!= SUCCEED ) {
@@ -3234,7 +3971,8 @@ setup_cache_for_test(hid_t * fid_ptr,
HDfprintf(stdout,
"%d:%s: H5AC_set_cache_auto_resize_config() failed.\n",
world_mpi_rank, fcn_name);
- } else {
+
+ } else if ( enable_rpt_fcn ) {
HDfprintf(stdout, "%d:%s: rpt_fcn enabled.\n",
world_mpi_rank, fcn_name);
@@ -3242,6 +3980,71 @@ setup_cache_for_test(hid_t * fid_ptr,
}
}
+ /* verify that the metadata write strategy is set as expected. Must
+ * do this here, as this field is only set in the parallel case. Hence
+ * we can't do our usual checks in the serial case.
+ */
+
+ if ( success ) /* verify that the metadata write strategy is as expected */
+ {
+ if ( cache_ptr->aux_ptr == NULL ) {
+
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: cache_ptr->aux_ptr == NULL.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else if ( ((H5AC_aux_t *)(cache_ptr->aux_ptr))->magic !=
+ H5AC__H5AC_AUX_T_MAGIC ) {
+
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: cache_ptr->aux_ptr->magic != H5AC__H5AC_AUX_T_MAGIC.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else if( ((H5AC_aux_t *)(cache_ptr->aux_ptr))->metadata_write_strategy
+ != metadata_write_strategy ) {
+
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: bad cache_ptr->aux_ptr->metadata_write_strategy\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ /* also verify that the expected metadata write strategy is reported
+ * when we get the current configuration.
+ */
+
+ if ( success ) {
+
+ test_config.version = H5AC__CURR_CACHE_CONFIG_VERSION;
+
+ if ( H5AC_get_cache_auto_resize_config(cache_ptr, &test_config)
+ != SUCCEED ) {
+
+ HDfprintf(stdout,
+ "%d:%s: H5AC_get_cache_auto_resize_config(2) failed.\n",
+ world_mpi_rank, fcn_name);
+
+ } else if ( test_config.metadata_write_strategy !=
+ metadata_write_strategy ) {
+
+ nerrors++;
+
+ if ( verbose ) {
+
+ HDfprintf(stdout,
+ "%d:%s: unexpected metadata_write_strategy.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+
#if DO_SYNC_AFTER_WRITE
if ( success ) {
@@ -3259,10 +4062,153 @@ setup_cache_for_test(hid_t * fid_ptr,
#endif /* DO_SYNC_AFTER_WRITE */
+ if ( success ) {
+
+ if ( H5AC_set_sync_point_done_callback(cache_ptr, verify_writes) !=
+ SUCCEED ) {
+
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: H5AC_set_sync_point_done_callback failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
return(success);
} /* setup_cache_for_test() */
+
+/*****************************************************************************
+ *
+ * Function: verify_writes()
+ *
+ * Purpose: Verify that the indicated entries have been written exactly
+ * once each, and that the indicated total number of writes
+ * has been processed by the server process. Flag an error if
+ * discrepency is noted. Finally reset the counters maintained
+ * by the server process.
+ *
+ * This function should only be called by the metadata cache
+ * as the "sync point done" function, as it must do some
+ * synchronization to avoid false positives.
+ *
+ * Note that at present, this function does not allow for the
+ * case in which one or more of the indicated entries should
+ * have been written more than once since the last time the
+ * server process's counters were reset. That is fine for now,
+ * as with the current metadata write strategies, no entry
+ * should be written more than once per sync point. If this
+ * changes this limitation will have to be revisited.
+ *
+ * Return: void.
+ *
+ * Programmer: JRM -- 5/9/10
+ *
+ *****************************************************************************/
+void
+verify_writes(int num_writes,
+ haddr_t * written_entries_tbl)
+{
+ const char * fcn_name = "verify_writes()";
+ const hbool_t report = FALSE;
+ hbool_t proceed = TRUE;
+ int i;
+
+ HDassert( world_mpi_rank != world_server_mpi_rank );
+ HDassert( num_writes >= 0 );
+ HDassert( ( num_writes == 0 ) ||
+ ( written_entries_tbl != NULL ) );
+
+ /* barrier to ensure that all other processes are ready to leave
+ * the sync point as well.
+ */
+ if ( proceed ) {
+
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ proceed = FALSE;
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: barrier 1 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( proceed ) {
+
+ proceed = verify_total_writes(num_writes);
+ }
+
+ while ( ( proceed ) && ( i < num_writes ) )
+ {
+ proceed = verify_entry_writes(written_entries_tbl[i], 1);
+ i++;
+ }
+
+ /* barrier to ensure that all other processes have finished verifying
+ * the number of writes before we reset the counters.
+ */
+ if ( proceed ) {
+
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ proceed = FALSE;
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: barrier 2 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( proceed ) {
+
+ proceed = reset_server_counts();
+ }
+
+ /* if requested, display status of check to stdout */
+ if ( ( report ) && ( file_mpi_rank == 0 ) ) {
+
+ if ( proceed ) {
+
+ HDfprintf(stdout, "%d:%s: verified %d writes.\n",
+ world_mpi_rank, fcn_name, num_writes);
+
+ } else {
+
+ HDfprintf(stdout, "%d:%s: FAILED to verify %d writes.\n",
+ world_mpi_rank, fcn_name, num_writes);
+
+ }
+ }
+
+ /* final barrier to ensure that all processes think that the server
+ * counters have been reset before we leave the sync point. This
+ * barrier is probaby not necessary at this point in time (5/9/10),
+ * but I can think of at least one likely change to the metadata write
+ * strategies that will require it -- hence its insertion now.
+ */
+ if ( proceed ) {
+
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ proceed = FALSE;
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: barrier 3 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ return;
+
+} /* verify_writes() */
+
/*****************************************************************************
*
@@ -3509,6 +4455,425 @@ take_down_cache(hid_t fid)
} /* take_down_cache() */
+
+/*****************************************************************************
+ * Function: verify_entry_reads
+ *
+ * Purpose: Query the server to determine the number of times the
+ * indicated entry has been read since the last time the
+ * server counters were reset.
+ *
+ * Return TRUE if successful, and if the supplied expected
+ * number of reads matches the number of reads reported by
+ * the server process.
+ *
+ * Return FALSE and flag an error otherwise.
+ *
+ * Return: TRUE if successful, FALSE otherwise.
+ *
+ * Programmer: John Mainzer
+ * 5/6/10
+ *
+ *-------------------------------------------------------------------------
+ */
+static hbool_t
+verify_entry_reads(haddr_t addr,
+ int expected_entry_reads)
+{
+ const char * fcn_name = "verify_entry_reads()";
+ hbool_t success = TRUE;
+ int reported_entry_reads;
+ struct mssg_t mssg;
+
+ if ( success ) {
+
+ /* compose the message */
+ mssg.req = REQ_ENTRY_READS_CODE;
+ mssg.src = world_mpi_rank;
+ mssg.dest = world_server_mpi_rank;
+ mssg.mssg_num = -1; /* set by send function */
+ mssg.base_addr = addr;
+ mssg.len = 0; /* not used */
+ mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
+ mssg.magic = MSSG_MAGIC;
+
+ if ( ! send_mssg(&mssg, FALSE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: send_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( success ) {
+
+ if ( ! recv_mssg(&mssg, REQ_ENTRY_READS_RPLY_CODE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( success ) {
+
+ if ( ( mssg.req != REQ_ENTRY_READS_RPLY_CODE ) ||
+ ( mssg.src != world_server_mpi_rank ) ||
+ ( mssg.dest != world_mpi_rank ) ||
+ ( mssg.base_addr != addr ) ||
+ ( mssg.len != 0 ) ||
+ ( mssg.ver != 0 ) ||
+ ( mssg.magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad data in req entry reads reply.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else {
+
+ reported_entry_reads = mssg.count;
+ }
+ }
+
+ if ( ! success ) {
+
+ if ( reported_entry_reads != expected_entry_reads ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: rep/exp entry 0x%llx reads mismatch (%ld/%ld).\n",
+ world_mpi_rank, fcn_name, (long long)addr,
+ reported_entry_reads, expected_entry_reads);
+ }
+ }
+ }
+
+ return(success);
+
+} /* verify_entry_reads() */
+
+
+/*****************************************************************************
+ * Function: verify_entry_writes
+ *
+ * Purpose: Query the server to determine the number of times the
+ * indicated entry has been written since the last time the
+ * server counters were reset.
+ *
+ * Return TRUE if successful, and if the supplied expected
+ * number of reads matches the number of reads reported by
+ * the server process.
+ *
+ * Return FALSE and flag an error otherwise.
+ *
+ * Return: TRUE if successful, FALSE otherwise.
+ *
+ * Programmer: John Mainzer
+ * 5/6/10
+ *
+ *-------------------------------------------------------------------------
+ */
+static hbool_t
+verify_entry_writes(haddr_t addr,
+ int expected_entry_writes)
+{
+ const char * fcn_name = "verify_entry_writes()";
+ hbool_t success = TRUE;
+ int reported_entry_writes;
+ struct mssg_t mssg;
+
+ if ( success ) {
+
+ /* compose the message */
+ mssg.req = REQ_ENTRY_WRITES_CODE;
+ mssg.src = world_mpi_rank;
+ mssg.dest = world_server_mpi_rank;
+ mssg.mssg_num = -1; /* set by send function */
+ mssg.base_addr = addr;
+ mssg.len = 0; /* not used */
+ mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
+ mssg.magic = MSSG_MAGIC;
+
+ if ( ! send_mssg(&mssg, FALSE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: send_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( success ) {
+
+ if ( ! recv_mssg(&mssg, REQ_ENTRY_WRITES_RPLY_CODE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( success ) {
+
+ if ( ( mssg.req != REQ_ENTRY_WRITES_RPLY_CODE ) ||
+ ( mssg.src != world_server_mpi_rank ) ||
+ ( mssg.dest != world_mpi_rank ) ||
+ ( mssg.base_addr != addr ) ||
+ ( mssg.len != 0 ) ||
+ ( mssg.ver != 0 ) ||
+ ( mssg.magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad data in req entry writes reply.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else {
+
+ reported_entry_writes = mssg.count;
+ }
+ }
+
+ if ( ! success ) {
+
+ if ( reported_entry_writes != expected_entry_writes ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: rep/exp entry 0x%llx writes mismatch (%ld/%ld).\n",
+ world_mpi_rank, fcn_name, (long long)addr,
+ reported_entry_writes, expected_entry_writes);
+ }
+ }
+ }
+
+ return(success);
+
+} /* verify_entry_writes() */
+
+
+/*****************************************************************************
+ *
+ * Function: verify_total_reads()
+ *
+ * Purpose: Query the server to obtain the total reads since the last
+ * server counter reset, and compare this value with the supplied
+ * expected value.
+ *
+ * If the values match, return TRUE.
+ *
+ * If the values don't match, flag an error and return FALSE.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/6/10
+ *
+ *****************************************************************************/
+static hbool_t
+verify_total_reads(int expected_total_reads)
+{
+ const char * fcn_name = "verify_total_reads()";
+ hbool_t success = TRUE; /* will set to FALSE if appropriate. */
+ long reported_total_reads;
+ struct mssg_t mssg;
+
+ if ( success ) {
+
+ /* compose the message */
+ mssg.req = REQ_TTL_READS_CODE;
+ mssg.src = world_mpi_rank;
+ mssg.dest = world_server_mpi_rank;
+ mssg.mssg_num = -1; /* set by send function */
+ mssg.base_addr = 0;
+ mssg.len = 0;
+ mssg.ver = 0;
+ mssg.count = 0;
+ mssg.magic = MSSG_MAGIC;
+
+ if ( ! send_mssg(&mssg, FALSE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: send_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( success ) {
+
+ if ( ! recv_mssg(&mssg, REQ_TTL_READS_RPLY_CODE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else if ( ( mssg.req != REQ_TTL_READS_RPLY_CODE ) ||
+ ( mssg.src != world_server_mpi_rank ) ||
+ ( mssg.dest != world_mpi_rank ) ||
+ ( mssg.base_addr != 0 ) ||
+ ( mssg.len != 0 ) ||
+ ( mssg.ver != 0 ) ||
+ ( mssg.magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad data in req total reads reply.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else {
+
+ reported_total_reads = mssg.count;
+ }
+ }
+
+ if ( success ) {
+
+ if ( reported_total_reads != expected_total_reads ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: reported/expected total reads mismatch (%ld/%ld).\n",
+ world_mpi_rank, fcn_name,
+ reported_total_reads, expected_total_reads);
+
+ }
+ }
+ }
+
+ return(success);
+
+} /* verify_total_reads() */
+
+
+/*****************************************************************************
+ *
+ * Function: verify_total_writes()
+ *
+ * Purpose: Query the server to obtain the total writes since the last
+ * server counter reset, and compare this value with the supplied
+ * expected value.
+ *
+ * If the values match, return TRUE.
+ *
+ * If the values don't match, flag an error and return FALSE.
+ *
+ * Return: Success: TRUE
+ *
+ * Failure: FALSE
+ *
+ * Programmer: JRM -- 5/6/10
+ *
+ *****************************************************************************/
+static hbool_t
+verify_total_writes(int expected_total_writes)
+{
+ const char * fcn_name = "verify_total_writes()";
+ hbool_t success = TRUE; /* will set to FALSE if appropriate. */
+ long reported_total_writes;
+ struct mssg_t mssg;
+
+ if ( success ) {
+
+ /* compose the message */
+ mssg.req = REQ_TTL_WRITES_CODE;
+ mssg.src = world_mpi_rank;
+ mssg.dest = world_server_mpi_rank;
+ mssg.mssg_num = -1; /* set by send function */
+ mssg.base_addr = 0;
+ mssg.len = 0;
+ mssg.ver = 0;
+ mssg.count = 0;
+ mssg.magic = MSSG_MAGIC;
+
+ if ( ! send_mssg(&mssg, FALSE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: send_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+ }
+
+ if ( success ) {
+
+ if ( ! recv_mssg(&mssg, REQ_TTL_WRITES_RPLY_CODE) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else if ( ( mssg.req != REQ_TTL_WRITES_RPLY_CODE ) ||
+ ( mssg.src != world_server_mpi_rank ) ||
+ ( mssg.dest != world_mpi_rank ) ||
+ ( mssg.base_addr != 0 ) ||
+ ( mssg.len != 0 ) ||
+ ( mssg.ver != 0 ) ||
+ ( mssg.magic != MSSG_MAGIC ) ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: Bad data in req total reads reply.\n",
+ world_mpi_rank, fcn_name);
+ }
+ } else {
+
+ reported_total_writes = mssg.count;
+ }
+ }
+
+ if ( success ) {
+
+ if ( reported_total_writes != expected_total_writes ) {
+
+ nerrors++;
+ success = FALSE;
+ if ( verbose ) {
+ HDfprintf(stdout,
+ "%d:%s: reported/expected total writes mismatch (%ld/%ld).\n",
+ world_mpi_rank, fcn_name,
+ reported_total_writes, expected_total_writes);
+ }
+ }
+ }
+
+ return(success);
+
+} /* verify_total_writes() */
+
/*****************************************************************************
* Function: unlock_entry()
@@ -3695,6 +5060,7 @@ unpin_entry(H5F_t * file_ptr,
/****************************** test functions *******************************/
/*****************************************************************************/
+
/*****************************************************************************
*
* Function: server_smoke_check()
@@ -3707,21 +5073,7 @@ unpin_entry(H5F_t * file_ptr,
*
* Programmer: JRM -- 12/21/05
*
- * Modifications:
- *
- * JRM -- 5/9/06
- * Added code supporting the write request ack message. This
- * message was added to eliminate one possible cause of a
- * bug spotted on cobalt. If this doesn't fix the problem,
- * it will narrow things down a bit.
- *
- * JRM -- 5/10/06
- * Added call to do_sync(). This is part of an attempt to
- * optimize out the slowdown caused by the addition of the
- * write request ack message.
- *
*****************************************************************************/
-
hbool_t
server_smoke_check(void)
{
@@ -3761,6 +5113,7 @@ server_smoke_check(void)
mssg.base_addr = data[world_mpi_rank].base_addr;
mssg.len = data[world_mpi_rank].len;
mssg.ver = ++(data[world_mpi_rank].ver);
+ mssg.count = 0;
mssg.magic = MSSG_MAGIC;
if ( ! ( success = send_mssg(&mssg, FALSE) ) ) {
@@ -3813,6 +5166,50 @@ server_smoke_check(void)
do_sync();
+ /* barrier to allow all writes to complete */
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: barrier 1 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ /* verify that the expected entries have been written, the total */
+ if ( success ) {
+
+ success = verify_entry_writes(data[world_mpi_rank].base_addr, 1);
+ }
+
+ if ( success ) {
+
+ success = verify_entry_reads(data[world_mpi_rank].base_addr, 0);
+ }
+
+ if ( success ) {
+
+ success = verify_total_writes(world_mpi_size - 1);
+ }
+
+ if ( success ) {
+
+ success = verify_total_reads(0);
+ }
+
+ /* barrier to allow all writes to complete */
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+
+ HDfprintf(stdout, "%d:%s: barrier 2 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
/* compose the read message */
mssg.req = READ_REQ_CODE;
mssg.src = world_mpi_rank;
@@ -3821,6 +5218,7 @@ server_smoke_check(void)
mssg.base_addr = data[world_mpi_rank].base_addr;
mssg.len = data[world_mpi_rank].len;
mssg.ver = 0; /* bogus -- should be corrected by server */
+ mssg.count = 0;
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -3872,6 +5270,98 @@ server_smoke_check(void)
}
}
+ /* barrier to allow all writes to complete */
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+ HDfprintf(stdout, "%d:%s: barrier 3 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ /* verify that the expected entries have been read, and the total */
+ if ( success ) {
+
+ success = verify_entry_writes(data[world_mpi_rank].base_addr, 1);
+ }
+
+ if ( success ) {
+
+ success = verify_entry_reads(data[world_mpi_rank].base_addr, 1);
+ }
+
+ if ( success ) {
+
+ success = verify_total_writes(world_mpi_size - 1);
+ }
+
+ if ( success ) {
+
+ success = verify_total_reads(world_mpi_size - 1);
+ }
+
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+
+ HDfprintf(stdout, "%d:%s: barrier 4 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ /* reset the counters */
+ if ( success ) {
+
+ success = reset_server_counts();
+ }
+
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+
+ HDfprintf(stdout, "%d:%s: barrier 5 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
+ /* verify that the counters have been reset */
+ if ( success ) {
+
+ success = verify_entry_writes(data[world_mpi_rank].base_addr, 0);
+ }
+
+ if ( success ) {
+
+ success = verify_entry_reads(data[world_mpi_rank].base_addr, 0);
+ }
+
+ if ( success ) {
+
+ success = verify_total_writes(0);
+ }
+
+ if ( success ) {
+
+ success = verify_total_reads(0);
+ }
+
+ if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) {
+
+ success = FALSE;
+ nerrors++;
+ if ( verbose ) {
+
+ HDfprintf(stdout, "%d:%s: barrier 6 failed.\n",
+ world_mpi_rank, fcn_name);
+ }
+ }
+
/* compose the done message */
mssg.req = DONE_REQ_CODE;
mssg.src = world_mpi_rank;
@@ -3880,6 +5370,7 @@ server_smoke_check(void)
mssg.base_addr = 0; /* not used */
mssg.len = 0; /* not used */
mssg.ver = 0; /* not used */
+ mssg.count = 0;
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -3918,6 +5409,7 @@ server_smoke_check(void)
} /* server_smoke_check() */
+
/*****************************************************************************
*
* Function: smoke_check_1()
@@ -3930,14 +5422,9 @@ server_smoke_check(void)
*
* Programmer: JRM -- 1/4/06
*
- * Modifications:
- *
- * None.
- *
*****************************************************************************/
-
hbool_t
-smoke_check_1(void)
+smoke_check_1(int metadata_write_strategy)
{
const char * fcn_name = "smoke_check_1()";
hbool_t success = TRUE;
@@ -3948,9 +5435,25 @@ smoke_check_1(void)
H5C_t * cache_ptr = NULL;
struct mssg_t mssg;
- if ( world_mpi_rank == 0 ) {
+ switch ( metadata_write_strategy ) {
+
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #1 -- process 0 only md write strategy");
+ }
+ break;
- TESTING("smoke check #1");
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #1 -- distributed md write strategy");
+ }
+ break;
+
+ default:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #1 -- unknown md write strategy");
+ }
+ break;
}
nerrors = 0;
@@ -3971,7 +5474,8 @@ smoke_check_1(void)
}
else /* run the clients */
{
- if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
+ if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr,
+ metadata_write_strategy) ) {
nerrors++;
fid = -1;
@@ -4039,6 +5543,7 @@ smoke_check_1(void)
mssg.base_addr = 0; /* not used */
mssg.len = 0; /* not used */
mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -4077,7 +5582,7 @@ smoke_check_1(void)
} /* smoke_check_1() */
-
+
/*****************************************************************************
*
* Function: smoke_check_2()
@@ -4093,18 +5598,9 @@ smoke_check_1(void)
*
* Programmer: JRM -- 1/12/06
*
- * Modifications:
- *
- * JRM -- 4/13/06
- * Added pinned entry tests.
- *
- * JRM -- 4/28/06
- * Modified test to move pinned entries.
- *
*****************************************************************************/
-
hbool_t
-smoke_check_2(void)
+smoke_check_2(int metadata_write_strategy)
{
const char * fcn_name = "smoke_check_2()";
hbool_t success = TRUE;
@@ -4115,9 +5611,25 @@ smoke_check_2(void)
H5C_t * cache_ptr = NULL;
struct mssg_t mssg;
- if ( world_mpi_rank == 0 ) {
+ switch ( metadata_write_strategy ) {
- TESTING("smoke check #2");
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #2 -- process 0 only md write strategy");
+ }
+ break;
+
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #2 -- distributed md write strategy");
+ }
+ break;
+
+ default:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #2 -- unknown md write strategy");
+ }
+ break;
}
nerrors = 0;
@@ -4138,7 +5650,8 @@ smoke_check_2(void)
}
else /* run the clients */
{
- if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
+ if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr,
+ metadata_write_strategy) ) {
nerrors++;
fid = -1;
@@ -4253,6 +5766,7 @@ smoke_check_2(void)
mssg.base_addr = 0; /* not used */
mssg.len = 0; /* not used */
mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -4291,7 +5805,7 @@ smoke_check_2(void)
} /* smoke_check_2() */
-
+
/*****************************************************************************
*
* Function: smoke_check_3()
@@ -4310,18 +5824,9 @@ smoke_check_2(void)
*
* Programmer: JRM -- 1/13/06
*
- * Modifications:
- *
- * Added code intended to ensure correct operation with large
- * numbers of processors.
- * JRM - 1/31/06
- *
- * Added pinned entry tests. JRM - 4/14/06
- *
*****************************************************************************/
-
hbool_t
-smoke_check_3(void)
+smoke_check_3(int metadata_write_strategy)
{
const char * fcn_name = "smoke_check_3()";
hbool_t success = TRUE;
@@ -4338,9 +5843,25 @@ smoke_check_3(void)
H5C_t * cache_ptr = NULL;
struct mssg_t mssg;
- if ( world_mpi_rank == 0 ) {
+ switch ( metadata_write_strategy ) {
+
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #3 -- process 0 only md write strategy");
+ }
+ break;
+
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #3 -- distributed md write strategy");
+ }
+ break;
- TESTING("smoke check #3");
+ default:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #3 -- unknown md write strategy");
+ }
+ break;
}
/* 0 */
@@ -4373,7 +5894,8 @@ smoke_check_3(void)
/* 1 */
if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);}
- if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
+ if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr,
+ metadata_write_strategy) ) {
nerrors++;
fid = -1;
@@ -4612,6 +6134,7 @@ smoke_check_3(void)
mssg.base_addr = 0; /* not used */
mssg.len = 0; /* not used */
mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -4654,7 +6177,7 @@ smoke_check_3(void)
} /* smoke_check_3() */
-
+
/*****************************************************************************
*
* Function: smoke_check_4()
@@ -4673,20 +6196,9 @@ smoke_check_3(void)
*
* Programmer: JRM -- 1/13/06
*
- * Modifications:
- *
- * Added code intended to insure correct operation with large
- * numbers of processors.
- * JRM - 1/31/06
- *
- * Added code testing pinned insertion of entries.
- *
- * JRM - 8/15/06
- *
*****************************************************************************/
-
hbool_t
-smoke_check_4(void)
+smoke_check_4(int metadata_write_strategy)
{
const char * fcn_name = "smoke_check_4()";
hbool_t success = TRUE;
@@ -4701,9 +6213,25 @@ smoke_check_4(void)
H5C_t * cache_ptr = NULL;
struct mssg_t mssg;
- if ( world_mpi_rank == 0 ) {
+ switch ( metadata_write_strategy ) {
+
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #4 -- process 0 only md write strategy");
+ }
+ break;
+
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #4 -- distributed md write strategy");
+ }
+ break;
- TESTING("smoke check #4");
+ default:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #4 -- unknown md write strategy");
+ }
+ break;
}
nerrors = 0;
@@ -4724,7 +6252,8 @@ smoke_check_4(void)
}
else /* run the clients */
{
- if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
+ if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr,
+ metadata_write_strategy) ) {
nerrors++;
fid = -1;
@@ -4926,6 +6455,7 @@ smoke_check_4(void)
mssg.base_addr = 0; /* not used */
mssg.len = 0; /* not used */
mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -4965,7 +6495,7 @@ smoke_check_4(void)
} /* smoke_check_4() */
-
+
/*****************************************************************************
*
* Function: smoke_check_5()
@@ -4979,16 +6509,9 @@ smoke_check_4(void)
*
* Programmer: JRM -- 5/18/06
*
- * Modifications:
- *
- * JRM -- 7/12/06
- * Added test code for H5AC_expunge_entry() and
- * H5AC_resize_entry().
- *
*****************************************************************************/
-
hbool_t
-smoke_check_5(void)
+smoke_check_5(int metadata_write_strategy)
{
const char * fcn_name = "smoke_check_5()";
hbool_t success = TRUE;
@@ -5001,11 +6524,28 @@ smoke_check_5(void)
H5C_t * cache_ptr = NULL;
struct mssg_t mssg;
- if ( world_mpi_rank == 0 ) {
+ switch ( metadata_write_strategy ) {
+
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #5 -- process 0 only md write strategy");
+ }
+ break;
- TESTING("smoke check #5");
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #5 -- distributed md write strategy");
+ }
+ break;
+
+ default:
+ if ( world_mpi_rank == 0 ) {
+ TESTING("smoke check #5 -- unknown md write strategy");
+ }
+ break;
}
+
/* 0 */
if ( verbose ) { HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++); }
@@ -5043,7 +6583,8 @@ smoke_check_5(void)
HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);
}
- if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
+ if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr,
+ metadata_write_strategy) ) {
nerrors++;
fid = -1;
@@ -5180,6 +6721,7 @@ smoke_check_5(void)
mssg.base_addr = 0; /* not used */
mssg.len = 0; /* not used */
mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -5223,7 +6765,7 @@ smoke_check_5(void)
} /* smoke_check_5() */
-
+
/*****************************************************************************
*
* Function: trace_file_check()
@@ -5245,7 +6787,7 @@ smoke_check_5(void)
* - H5AC_flush()
* - H5AC_set()
* - H5AC_mark_entry_dirty()
- * H5AC_move_entry()
+ * - H5AC_move_entry()
* - H5AC_pin_protected_entry()
* - H5AC_protect()
* - H5AC_unpin_entry()
@@ -5262,47 +6804,69 @@ smoke_check_5(void)
*
* Programmer: JRM -- 6/13/06
*
- * Modifications:
- *
- * JRM -- 7/11/06
- * Updated for H5AC_expunge_entry() and
- * H5AC_resize_entry().
- *
*****************************************************************************/
-
hbool_t
-trace_file_check(void)
+trace_file_check(int metadata_write_strategy)
{
hbool_t success = TRUE;
#ifdef H5_METADATA_TRACE_FILE
const char * fcn_name = "trace_file_check()";
- const char * expected_output[] =
+ const char *((* expected_output)[]) = NULL;
+ const char * expected_output_0[] =
{
"### HDF5 metadata cache trace file version 1 ###\n",
- "H5AC_set_cache_auto_resize_config 1 0 1 0 \"t_cache_trace.txt\" 1 0 1048576 0.500000 16777216 1048576 50000 1 0.900000 2.000000 1 1.000000 0.250000 1 4194304 3 0.999000 0.900000 1 1048576 3 1 0.100000 262144 0\n",
- "H5AC_set 0x0 15 0x0 2 0\n",
- "H5AC_set 0x2 15 0x0 2 0\n",
- "H5AC_set 0x4 15 0x0 4 0\n",
- "H5AC_set 0x8 15 0x0 6 0\n",
- "H5AC_protect 0 15 H5AC_WRITE 2 1\n",
- "H5AC_mark_entry_dirty 0 0\n",
- "H5AC_unprotect 0 15 0 0 0\n",
- "H5AC_protect 2 15 H5AC_WRITE 2 1\n",
- "H5AC_pin_protected_entry 2 0\n",
- "H5AC_unprotect 2 15 0 0 0\n",
- "H5AC_unpin_entry 2 0\n",
- "H5AC_expunge_entry 2 15 0\n",
- "H5AC_protect 4 15 H5AC_WRITE 4 1\n",
- "H5AC_pin_protected_entry 4 0\n",
- "H5AC_unprotect 4 15 0 0 0\n",
- "H5AC_mark_entry_dirty 0x4 0 0 0\n",
- "H5AC_resize_entry 0x4 2 0\n",
- "H5AC_resize_entry 0x4 4 0\n",
- "H5AC_unpin_entry 4 0\n",
- "H5AC_move_entry 0 8a65 15 0\n",
- "H5AC_move_entry 8a65 0 15 0\n",
+ "H5AC_set_cache_auto_resize_config 1 0 1 0 \"t_cache_trace.txt\" 1 0 2097152 0.300000 33554432 1048576 50000 1 0.900000 2.000000 1 1.000000 0.250000 1 4194304 3 0.999000 0.900000 1 1048576 3 1 0.100000 262144 0 0\n",
+ "H5AC_set 0x200 25 0x0 2 0\n",
+ "H5AC_set 0x202 25 0x0 2 0\n",
+ "H5AC_set 0x204 25 0x0 4 0\n",
+ "H5AC_set 0x208 25 0x0 6 0\n",
+ "H5AC_protect 0x200 25 H5AC_WRITE 2 1\n",
+ "H5AC_mark_entry_dirty 0x200 0\n",
+ "H5AC_unprotect 0x200 25 0 0 0\n",
+ "H5AC_protect 0x202 25 H5AC_WRITE 2 1\n",
+ "H5AC_pin_protected_entry 0x202 0\n",
+ "H5AC_unprotect 0x202 25 0 0 0\n",
+ "H5AC_unpin_entry 0x202 0\n",
+ "H5AC_expunge_entry 0x202 25 0\n",
+ "H5AC_protect 0x204 25 H5AC_WRITE 4 1\n",
+ "H5AC_pin_protected_entry 0x204 0\n",
+ "H5AC_unprotect 0x204 25 0 0 0\n",
+ "H5AC_mark_entry_dirty 0x204 0 0 0\n",
+ "H5AC_resize_entry 0x204 2 0\n",
+ "H5AC_resize_entry 0x204 4 0\n",
+ "H5AC_unpin_entry 0x204 0\n",
+ "H5AC_move_entry 0x200 0x8c65 25 0\n",
+ "H5AC_move_entry 0x8c65 0x200 25 0\n",
+ "H5AC_flush 0\n",
+ NULL
+ };
+ const char * expected_output_1[] =
+ {
+ "### HDF5 metadata cache trace file version 1 ###\n",
+ "H5AC_set_cache_auto_resize_config 1 0 1 0 \"t_cache_trace.txt\" 1 0 2097152 0.300000 33554432 1048576 50000 1 0.900000 2.000000 1 1.000000 0.250000 1 4194304 3 0.999000 0.900000 1 1048576 3 1 0.100000 262144 1 0\n",
+ "H5AC_set 0x200 25 0x0 2 0\n",
+ "H5AC_set 0x202 25 0x0 2 0\n",
+ "H5AC_set 0x204 25 0x0 4 0\n",
+ "H5AC_set 0x208 25 0x0 6 0\n",
+ "H5AC_protect 0x200 25 H5AC_WRITE 2 1\n",
+ "H5AC_mark_entry_dirty 0x200 0\n",
+ "H5AC_unprotect 0x200 25 0 0 0\n",
+ "H5AC_protect 0x202 25 H5AC_WRITE 2 1\n",
+ "H5AC_pin_protected_entry 0x202 0\n",
+ "H5AC_unprotect 0x202 25 0 0 0\n",
+ "H5AC_unpin_entry 0x202 0\n",
+ "H5AC_expunge_entry 0x202 25 0\n",
+ "H5AC_protect 0x204 25 H5AC_WRITE 4 1\n",
+ "H5AC_pin_protected_entry 0x204 0\n",
+ "H5AC_unprotect 0x204 25 0 0 0\n",
+ "H5AC_mark_entry_dirty 0x204 0 0 0\n",
+ "H5AC_resize_pinned_entry 0x204 2 0\n",
+ "H5AC_resize_pinned_entry 0x204 4 0\n",
+ "H5AC_unpin_entry 0x204 0\n",
+ "H5AC_move_entry 0x200 0x8c65 25 0\n",
+ "H5AC_move_entry 0x8c65 0x200 25 0\n",
"H5AC_flush 0\n",
NULL
};
@@ -5322,9 +6886,39 @@ trace_file_check(void)
#endif /* H5_METADATA_TRACE_FILE */
- if ( world_mpi_rank == 0 ) {
+ switch ( metadata_write_strategy ) {
+
+ case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
+#ifdef H5_METADATA_TRACE_FILE
+ expected_output = &expected_output_0;
+#endif /* H5_METADATA_TRACE_FILE */
+ if ( world_mpi_rank == 0 ) {
+ TESTING(
+ "trace file collection -- process 0 only md write strategy");
+ }
+ break;
- TESTING("trace file collection");
+ case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED:
+#ifdef H5_METADATA_TRACE_FILE
+ expected_output = &expected_output_1;
+#endif /* H5_METADATA_TRACE_FILE */
+ if ( world_mpi_rank == 0 ) {
+ TESTING(
+ "trace file collection -- distributed md write strategy");
+ }
+ break;
+
+ default:
+#ifdef H5_METADATA_TRACE_FILE
+ /* this will almost certainly cause a failure, but it keeps us
+ * from de-referenceing a NULL pointer.
+ */
+ expected_output = &expected_output_0;
+#endif /* H5_METADATA_TRACE_FILE */
+ if ( world_mpi_rank == 0 ) {
+ TESTING("trace file collection -- unknown md write strategy");
+ }
+ break;
}
#ifdef H5_METADATA_TRACE_FILE
@@ -5348,7 +6942,8 @@ trace_file_check(void)
else /* run the clients */
{
- if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) {
+ if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr,
+ metadata_write_strategy) ) {
nerrors++;
fid = -1;
@@ -5481,6 +7076,7 @@ trace_file_check(void)
mssg.base_addr = 0; /* not used */
mssg.len = 0; /* not used */
mssg.ver = 0; /* not used */
+ mssg.count = 0; /* not used */
mssg.magic = MSSG_MAGIC;
if ( success ) {
@@ -5515,13 +7111,13 @@ trace_file_check(void)
i = 0;
while ( ( nerrors == 0 ) && ( ! done ) )
{
- if ( expected_output[i] == NULL ) {
+ if ( (*expected_output)[i] == NULL ) {
expected_line_len = 0;
} else {
- expected_line_len = HDstrlen(expected_output[i]);
+ expected_line_len = HDstrlen((*expected_output)[i]);
}
if ( HDfgets(buffer, 255, trace_file_ptr) != NULL ) {
@@ -5538,7 +7134,7 @@ trace_file_check(void)
done = TRUE;
} else if ( ( actual_line_len != expected_line_len ) ||
- ( HDstrcmp(buffer, expected_output[i]) != 0 ) ) {
+ ( HDstrcmp(buffer, (*expected_output)[i]) != 0 ) ) {
nerrors++;
if ( verbose ) {
@@ -5546,7 +7142,7 @@ trace_file_check(void)
"%d:%s: Unexpected data in trace file line %d.\n",
world_mpi_rank, fcn_name, i);
HDfprintf(stdout, "%d:%s: expected = \"%s\" %d\n",
- world_mpi_rank, fcn_name, expected_output[i],
+ world_mpi_rank, fcn_name, (*expected_output)[i],
expected_line_len);
HDfprintf(stdout, "%d:%s: actual = \"%s\" %d\n",
world_mpi_rank, fcn_name, buffer,
@@ -5770,22 +7366,28 @@ main(int argc, char **argv)
server_smoke_check();
#endif
#if 1
- smoke_check_1();
+ smoke_check_1(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+ smoke_check_1(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
#endif
#if 1
- smoke_check_2();
+ smoke_check_2(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+ smoke_check_2(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
#endif
#if 1
- smoke_check_3();
+ smoke_check_3(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+ smoke_check_3(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
#endif
#if 1
- smoke_check_4();
+ smoke_check_4(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+ smoke_check_4(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
#endif
#if 1
- smoke_check_5();
+ smoke_check_5(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+ smoke_check_5(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
#endif
#if 1
- trace_file_check();
+ trace_file_check(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY);
+ trace_file_check(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
#endif
finish: