diff options
-rw-r--r-- | src/H5AC.c | 2685 | ||||
-rw-r--r-- | src/H5ACpkg.h | 66 | ||||
-rw-r--r-- | src/H5ACprivate.h | 18 | ||||
-rw-r--r-- | src/H5ACpublic.h | 76 | ||||
-rw-r--r-- | src/H5C.c | 863 | ||||
-rw-r--r-- | src/H5Cpkg.h | 41 | ||||
-rw-r--r-- | src/H5Cprivate.h | 26 | ||||
-rw-r--r-- | src/H5FDmpio.c | 2 | ||||
-rw-r--r-- | src/H5FDmpiposix.c | 2 | ||||
-rw-r--r-- | test/cache_api.c | 235 | ||||
-rw-r--r-- | test/cache_common.h | 66 | ||||
-rw-r--r-- | testpar/t_cache.c | 2204 |
12 files changed, 4844 insertions, 1440 deletions
@@ -125,14 +125,28 @@ static herr_t H5AC_check_if_write_permitted(const H5F_t *f, hid_t dxpl_id, hbool_t * write_permitted_ptr); -#ifdef H5_HAVE_PARALLEL -static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr); -#endif /* JRM */ - static herr_t H5AC_ext_config_2_int_config(H5AC_cache_config_t * ext_conf_ptr, H5C_auto_size_ctl_t * int_conf_ptr); #ifdef H5_HAVE_PARALLEL +static herr_t H5AC_broadcast_candidate_list(H5AC_t * cache_ptr, + int * num_entries_ptr, + haddr_t ** haddr_buf_ptr_ptr); + +static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr); + +static herr_t H5AC_construct_candidate_list(H5AC_t * cache_ptr, + H5AC_aux_t * aux_ptr, + int sync_point_op); + +static herr_t H5AC_copy_candidate_list_to_buffer(H5AC_t * cache_ptr, + int * num_entries_ptr, + haddr_t ** haddr_buf_ptr_ptr, + size_t * MPI_Offset_buf_size_ptr, + MPI_Offset ** MPI_Offset_buf_ptr_ptr); + +static herr_t H5AC_flush_entries(H5F_t *f); + static herr_t H5AC_log_deleted_entry(H5AC_t * cache_ptr, H5AC_info_t * entry_ptr, haddr_t addr, @@ -147,33 +161,55 @@ static herr_t H5AC_log_flushed_entry(H5C_t * cache_ptr, unsigned flags, int type_id); -#if 0 /* this is useful debugging code -- JRM */ -static herr_t H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr, - haddr_t addr, - hbool_t was_dirty, - unsigned flags, - int type_id); -#endif /* JRM */ +static herr_t H5AC_log_moved_entry(const H5F_t * f, + haddr_t old_addr, + haddr_t new_addr); static herr_t H5AC_log_inserted_entry(H5F_t * f, H5AC_t * cache_ptr, H5AC_info_t * entry_ptr); +static herr_t H5AC_propagate_and_apply_candidate_list(H5F_t * f, + hid_t dxpl_id, + H5AC_t * cache_ptr); + static herr_t H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f, hid_t dxpl_id, - H5AC_t * cache_ptr, - hbool_t do_barrier); + H5AC_t * cache_ptr); + +static herr_t H5AC_receive_candidate_list(H5AC_t * cache_ptr, + int * num_entries_ptr, + haddr_t ** haddr_buf_ptr_ptr); static herr_t H5AC_receive_and_apply_clean_list(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, H5AC_t * cache_ptr); -static herr_t H5AC_log_moved_entry(const H5F_t * f, - haddr_t old_addr, - haddr_t new_addr); +static herr_t H5AC_tidy_cache_0_lists(H5AC_t * cache_ptr, + int num_candidates, + haddr_t * candidates_list_ptr); + +herr_t H5AC_rsp__dist_md_write__flush(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr); + +herr_t H5AC_rsp__dist_md_write__flush_to_min_clean(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr); + +herr_t H5AC_rsp__p0_only__flush(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr); + +herr_t H5AC_rsp__p0_only__flush_to_min_clean(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr); + +static herr_t H5AC_run_sync_point(H5F_t *f, + hid_t dxpl_id, + int sync_point_op); -static herr_t H5AC_flush_entries(H5F_t *f); #endif /* H5_HAVE_PARALLEL */ @@ -377,26 +413,6 @@ H5AC_term_interface(void) FUNC_LEAVE_NOAPI(n) } /* end H5AC_term_interface() */ - -/*------------------------------------------------------------------------- - * Function: H5AC_create - * - * Purpose: Initialize the cache just after a file is opened. The - * SIZE_HINT is the number of cache slots desired. If you - * pass an invalid value then H5AC_NSLOTS is used. You can - * turn off caching by using 1 for the SIZE_HINT value. - * - * Return: Success: Number of slots actually used. - * - * Failure: Negative - * - * Programmer: Robb Matzke - * matzke@llnl.gov - * Jul 9 1997 - * - *------------------------------------------------------------------------- - */ - static const char * H5AC_entry_type_names[H5AC_NTYPES] = { "B-tree nodes", @@ -428,19 +444,34 @@ static const char * H5AC_entry_type_names[H5AC_NTYPES] = "test entry" /* for testing only -- not used for actual files */ }; + +/*------------------------------------------------------------------------- + * Function: H5AC_create + * + * Purpose: Initialize the cache just after a file is opened. The + * SIZE_HINT is the number of cache slots desired. If you + * pass an invalid value then H5AC_NSLOTS is used. You can + * turn off caching by using 1 for the SIZE_HINT value. + * + * Return: Success: Number of slots actually used. + * + * Failure: Negative + * + * Programmer: Robb Matzke + * matzke@llnl.gov + * Jul 9 1997 + * + *------------------------------------------------------------------------- + */ herr_t H5AC_create(const H5F_t *f, H5AC_cache_config_t *config_ptr) { - herr_t ret_value = SUCCEED; /* Return value */ - herr_t result; #ifdef H5_HAVE_PARALLEL char prefix[H5C__PREFIX_LEN] = ""; - MPI_Comm mpi_comm = MPI_COMM_NULL; - int mpi_rank = -1; - int mpi_size = -1; H5AC_aux_t * aux_ptr = NULL; #endif /* H5_HAVE_PARALLEL */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5AC_create, FAIL) @@ -450,97 +481,78 @@ H5AC_create(const H5F_t *f, HDcompile_assert(NELMTS(H5AC_entry_type_names) == H5AC_NTYPES); HDcompile_assert(H5C__MAX_NUM_TYPE_IDS == H5AC_NTYPES); - result = H5AC_validate_config(config_ptr); - - if ( result != SUCCEED ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration"); - } + if(H5AC_validate_config(config_ptr) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration") #ifdef H5_HAVE_PARALLEL - if ( IS_H5FD_MPI(f) ) { + if(IS_H5FD_MPI(f)) { + MPI_Comm mpi_comm; + int mpi_rank; + int mpi_size; - if ( (mpi_comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL ) { - - HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \ - "can't get MPI communicator") - } - - if ( (mpi_rank = H5F_mpi_get_rank(f)) < 0 ) { + if(MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f))) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI communicator") + if((mpi_rank = H5F_mpi_get_rank(f)) < 0) HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi rank") - } - - if ( (mpi_size = H5F_mpi_get_size(f)) < 0 ) { + if((mpi_size = H5F_mpi_get_size(f)) < 0) HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi size") - } /* There is no point in setting up the auxilary structure if size * is less than or equal to 1, as there will never be any processes * to broadcast the clean lists to. */ - if ( mpi_size > 1 ) { - - if ( NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t)) ) { - - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ - "Can't allocate H5AC auxilary structure.") - - } else { - - aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC; - aux_ptr->mpi_comm = mpi_comm; - aux_ptr->mpi_rank = mpi_rank; - aux_ptr->mpi_size = mpi_size; - aux_ptr->write_permitted = FALSE; - aux_ptr->dirty_bytes_threshold = - H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD; - aux_ptr->dirty_bytes = 0; + if(mpi_size > 1) { + if(NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate H5AC auxilary structure.") + + aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC; + aux_ptr->mpi_comm = mpi_comm; + aux_ptr->mpi_rank = mpi_rank; + aux_ptr->mpi_size = mpi_size; + aux_ptr->write_permitted = FALSE; + aux_ptr->dirty_bytes_threshold = H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD; + aux_ptr->dirty_bytes = 0; + aux_ptr->metadata_write_strategy = H5AC__DEFAULT_METADATA_WRITE_STRATEGY; #if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->dirty_bytes_propagations = 0; - aux_ptr->unprotect_dirty_bytes = 0; - aux_ptr->unprotect_dirty_bytes_updates = 0; - aux_ptr->insert_dirty_bytes = 0; - aux_ptr->insert_dirty_bytes_updates = 0; - aux_ptr->move_dirty_bytes = 0; - aux_ptr->move_dirty_bytes_updates = 0; + aux_ptr->dirty_bytes_propagations = 0; + aux_ptr->unprotect_dirty_bytes = 0; + aux_ptr->unprotect_dirty_bytes_updates = 0; + aux_ptr->insert_dirty_bytes = 0; + aux_ptr->insert_dirty_bytes_updates = 0; + aux_ptr->move_dirty_bytes = 0; + aux_ptr->move_dirty_bytes_updates = 0; #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - aux_ptr->d_slist_ptr = NULL; - aux_ptr->d_slist_len = 0; - aux_ptr->c_slist_ptr = NULL; - aux_ptr->c_slist_len = 0; - aux_ptr->write_done = NULL; - - sprintf(prefix, "%d:", mpi_rank); - } - - if ( mpi_rank == 0 ) { - - aux_ptr->d_slist_ptr = - H5SL_create(H5SL_TYPE_HADDR); - - if ( aux_ptr->d_slist_ptr == NULL ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, - "can't create dirtied entry list.") - } - - aux_ptr->c_slist_ptr = - H5SL_create(H5SL_TYPE_HADDR); - - if ( aux_ptr->c_slist_ptr == NULL ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, - "can't create cleaned entry list.") - } - } - } - - if ( aux_ptr != NULL ) { + aux_ptr->d_slist_ptr = NULL; + aux_ptr->d_slist_len = 0; + aux_ptr->c_slist_ptr = NULL; + aux_ptr->c_slist_len = 0; + aux_ptr->candidate_slist_ptr = NULL; + aux_ptr->candidate_slist_len = 0; + aux_ptr->write_done = NULL; + aux_ptr->sync_point_done = NULL; + + sprintf(prefix, "%d:", mpi_rank); + + if(mpi_rank == 0) { + if(NULL == (aux_ptr->d_slist_ptr = H5SL_create(H5SL_TYPE_HADDR))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "can't create dirtied entry list.") + + if(NULL == (aux_ptr->c_slist_ptr = H5SL_create(H5SL_TYPE_HADDR))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "can't create cleaned entry list.") + } /* end if */ - if ( aux_ptr->mpi_rank == 0 ) { + /* construct the candidate slist for all processes. + * when the distributed strategy is selected as all processes + * will use it in the case of a flush. + */ + if(NULL == (aux_ptr->candidate_slist_ptr = H5SL_create(H5SL_TYPE_HADDR))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "can't create candidate entry list.") + } /* end if */ + if(aux_ptr != NULL) { + if(aux_ptr->mpi_rank == 0) { f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, H5AC__DEFAULT_MIN_CLEAN_SIZE, (H5AC_NTYPES - 1), @@ -549,25 +561,17 @@ H5AC_create(const H5F_t *f, TRUE, H5AC_log_flushed_entry, (void *)aux_ptr); - } else { - f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, H5AC__DEFAULT_MIN_CLEAN_SIZE, (H5AC_NTYPES - 1), (const char **)H5AC_entry_type_names, + H5AC_check_if_write_permitted, + TRUE, NULL, - FALSE, -#if 0 /* this is useful debugging code -- keep it for a while */ /* JRM */ - H5AC_log_flushed_entry_dummy, -#else /* JRM */ - NULL, -#endif /* JRM */ (void *)aux_ptr); } - } else { - f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, H5AC__DEFAULT_MIN_CLEAN_SIZE, (H5AC_NTYPES - 1), @@ -595,61 +599,40 @@ H5AC_create(const H5F_t *f, } #endif /* H5_HAVE_PARALLEL */ - if ( NULL == f->shared->cache ) { - + if(NULL == f->shared->cache) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed") - } #ifdef H5_HAVE_PARALLEL - else if ( aux_ptr != NULL ) { - - result = H5C_set_prefix(f->shared->cache, prefix); - - if ( result != SUCCEED ) { - - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ - "H5C_set_prefix() failed") - } - } + if(aux_ptr != NULL) { + if(H5C_set_prefix(f->shared->cache, prefix) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "H5C_set_prefix() failed") + } /* end if */ #endif /* H5_HAVE_PARALLEL */ - result = H5AC_set_cache_auto_resize_config(f->shared->cache, config_ptr); - - if ( result != SUCCEED ) { - - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ - "auto resize configuration failed") - } + if(H5AC_set_cache_auto_resize_config(f->shared->cache, config_ptr) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "auto resize configuration failed") done: - #ifdef H5_HAVE_PARALLEL - /* if there is a failure, try to tidy up the auxilary structure */ - - if ( ret_value != SUCCEED ) { - - if ( aux_ptr != NULL ) { - - if ( aux_ptr->d_slist_ptr != NULL ) { - + if(ret_value < 0) { + if(aux_ptr != NULL) { + if(aux_ptr->d_slist_ptr != NULL) H5SL_close(aux_ptr->d_slist_ptr); - } - - if ( aux_ptr->c_slist_ptr != NULL ) { + if(aux_ptr->c_slist_ptr != NULL) H5SL_close(aux_ptr->c_slist_ptr); - } + + if(aux_ptr->candidate_slist_ptr != NULL) + H5SL_close(aux_ptr->candidate_slist_ptr); aux_ptr->magic = 0; - H5FL_FREE(H5AC_aux_t, aux_ptr); - aux_ptr = NULL; - } - } + aux_ptr = H5FL_FREE(H5AC_aux_t, aux_ptr); + } /* end if */ + } /* end if */ #endif /* H5_HAVE_PARALLEL */ FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_create() */ @@ -693,7 +676,7 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id) #endif /* H5AC__TRACE_FILE_ENABLED */ #ifdef H5_HAVE_PARALLEL - aux_ptr = f->shared->cache->aux_ptr; + aux_ptr = (struct H5AC_aux_t *)(f->shared->cache->aux_ptr); if(aux_ptr) /* Sanity check */ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); @@ -714,6 +697,8 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id) H5SL_close(aux_ptr->d_slist_ptr); if(aux_ptr->c_slist_ptr != NULL) H5SL_close(aux_ptr->c_slist_ptr); + if(aux_ptr->candidate_slist_ptr != NULL) + H5SL_close(aux_ptr->candidate_slist_ptr); aux_ptr->magic = 0; H5FL_FREE(H5AC_aux_t, aux_ptr); aux_ptr = NULL; @@ -902,8 +887,6 @@ H5AC_get_entry_status(const H5F_t *f, haddr_t addr, unsigned * status_ptr) { - H5C_t *cache_ptr = f->shared->cache; - herr_t result; hbool_t in_cache; hbool_t is_dirty; hbool_t is_protected; @@ -916,50 +899,31 @@ H5AC_get_entry_status(const H5F_t *f, FUNC_ENTER_NOAPI(H5AC_get_entry_status, FAIL) - if ( ( cache_ptr == NULL ) || - ( cache_ptr->magic != H5C__H5C_T_MAGIC ) || - ( ! H5F_addr_defined(addr) ) || - ( status_ptr == NULL ) ) { - + if((f == NULL) || (!H5F_addr_defined(addr)) || (status_ptr == NULL)) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad param(s) on entry.") - } - - result = H5C_get_entry_status(f, addr, &entry_size, &in_cache, - &is_dirty, &is_protected, &is_pinned, &is_flush_dep_parent, - &is_flush_dep_child); - - if ( result < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "H5C_get_entry_status() failed.") - } - if ( in_cache ) { + if(H5C_get_entry_status(f, addr, &entry_size, &in_cache, &is_dirty, + &is_protected, &is_pinned, &is_flush_dep_parent, &is_flush_dep_child) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_get_entry_status() failed.") + if(in_cache) { status |= H5AC_ES__IN_CACHE; - - if ( is_dirty ) + if(is_dirty) status |= H5AC_ES__IS_DIRTY; - - if ( is_protected ) + if(is_protected) status |= H5AC_ES__IS_PROTECTED; - - if ( is_pinned ) + if(is_pinned) status |= H5AC_ES__IS_PINNED; - - if ( is_flush_dep_parent ) + if(is_flush_dep_parent) status |= H5AC_ES__IS_FLUSH_DEP_PARENT; - - if ( is_flush_dep_child ) + if(is_flush_dep_child) status |= H5AC_ES__IS_FLUSH_DEP_CHILD; - } + } /* end if */ *status_ptr = status; done: - FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_get_entry_status() */ @@ -982,9 +946,6 @@ herr_t H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *thing, unsigned int flags) { -#ifdef H5_HAVE_PARALLEL - H5AC_aux_t * aux_ptr = NULL; -#endif /* H5_HAVE_PARALLEL */ #if H5AC__TRACE_FILE_ENABLED char trace[128] = ""; size_t trace_entry_size = 0; @@ -1040,26 +1001,20 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, #endif /* H5AC__TRACE_FILE_ENABLED */ #ifdef H5_HAVE_PARALLEL - if(NULL != (aux_ptr = f->shared->cache->aux_ptr)) { +{ + H5AC_aux_t *aux_ptr; + + if(NULL != (aux_ptr = (H5AC_aux_t *)f->shared->cache->aux_ptr)) { + /* Log the new entry */ if(H5AC_log_inserted_entry(f, f->shared->cache, (H5AC_info_t *)thing) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5AC_log_inserted_entry() failed") /* Check if we should try to flush */ - if(aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold) { - hbool_t evictions_enabled; - - /* Query if evictions are allowed */ - if(H5C_get_evictions_enabled((const H5C_t *)f->shared->cache, &evictions_enabled) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") - - /* Flush if evictions are allowed */ - if(evictions_enabled) { - if(H5AC_propagate_flushed_and_still_clean_entries_list(f, - H5AC_noblock_dxpl_id, f->shared->cache, TRUE) < 0 ) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") - } /* end if */ - } /* end if */ + if(aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold) + if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.") } /* end if */ +} #endif /* H5_HAVE_PARALLEL */ done: @@ -1108,7 +1063,7 @@ H5AC_mark_entry_dirty(void *thing) * occult errors. */ if((H5C_get_trace_file_ptr_from_entry(thing, &trace_file_ptr) >= 0) && - (NULL != trace_file_ptr)) + (NULL != trace_file_ptr)) sprintf(trace, "%s 0x%lx", FUNC, (unsigned long)(((H5C_cache_entry_t *)thing)->addr)); #endif /* H5AC__TRACE_FILE_ENABLED */ @@ -1159,15 +1114,14 @@ done: herr_t H5AC_move_entry(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_addr) { - herr_t result; - herr_t ret_value=SUCCEED; /* Return value */ -#ifdef H5_HAVE_PARALLEL - H5AC_aux_t * aux_ptr = NULL; -#endif /* H5_HAVE_PARALLEL */ #if H5AC__TRACE_FILE_ENABLED char trace[128] = ""; FILE * trace_file_ptr = NULL; #endif /* H5AC__TRACE_FILE_ENABLED */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr; +#endif /* H5_HAVE_PARALLEL */ + herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5AC_move_entry, FAIL) @@ -1197,52 +1151,31 @@ H5AC_move_entry(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t ne #endif /* H5AC__TRACE_FILE_ENABLED */ #ifdef H5_HAVE_PARALLEL - if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) { + /* Log moving the entry */ + if(NULL != (aux_ptr = (H5AC_aux_t *)f->shared->cache->aux_ptr)) { if(H5AC_log_moved_entry(f, old_addr, new_addr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "can't log moved entry") - } + } /* end if */ #endif /* H5_HAVE_PARALLEL */ - result = H5C_move_entry(f->shared->cache, - type, - old_addr, - new_addr); - - if ( result < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, \ - "H5C_move_entry() failed.") - } + if(H5C_move_entry(f->shared->cache, type, old_addr, new_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, "H5C_move_entry() failed.") #ifdef H5_HAVE_PARALLEL /* Check if we should try to flush */ - if(aux_ptr && (aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)) { - hbool_t evictions_enabled; - - /* Query if evictions are allowed */ - if(H5C_get_evictions_enabled((const H5C_t *)f->shared->cache, &evictions_enabled) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") - - /* Flush if evictions are allowed */ - if(evictions_enabled) { - if(H5AC_propagate_flushed_and_still_clean_entries_list(f, - H5AC_noblock_dxpl_id, f->shared->cache, TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") - } /* end if */ + if(NULL != aux_ptr && aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold) { + if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.") } /* end if */ #endif /* H5_HAVE_PARALLEL */ done: - #if H5AC__TRACE_FILE_ENABLED - if ( trace_file_ptr != NULL ) { - + if(trace_file_ptr != NULL) HDfprintf(trace_file_ptr, "%s %d\n", trace, (int)ret_value); - } #endif /* H5AC__TRACE_FILE_ENABLED */ FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_move_entry() */ @@ -1331,7 +1264,6 @@ H5AC_create_flush_dependency(void * parent_thing, void * child_thing) FUNC, (unsigned long)(((H5C_cache_entry_t *)parent_thing)->addr), (unsigned long)(((H5C_cache_entry_t *)child_thing)->addr)); - } /* end if */ #endif /* H5AC__TRACE_FILE_ENABLED */ if(H5C_create_flush_dependency(parent_thing, child_thing) < 0) @@ -1381,12 +1313,12 @@ H5AC_protect(H5F_t *f, { unsigned protect_flags = H5C__NO_FLAGS_SET; void * thing = (void *)NULL; - void * ret_value; /* Return value */ #if H5AC__TRACE_FILE_ENABLED char trace[128] = ""; size_t trace_entry_size = 0; FILE * trace_file_ptr = NULL; #endif /* H5AC__TRACE_FILE_ENABLED */ + void * ret_value; /* Return value */ FUNC_ENTER_NOAPI(H5AC_protect, NULL) @@ -1415,7 +1347,7 @@ H5AC_protect(H5F_t *f, ( H5C_get_trace_file_ptr(f->shared->cache, &trace_file_ptr) >= 0) && ( trace_file_ptr != NULL ) ) { - char * rw_string; + const char * rw_string; if ( rw == H5AC_WRITE ) { @@ -1627,12 +1559,11 @@ H5AC_destroy_flush_dependency(void * parent_thing, void * child_thing) #if H5AC__TRACE_FILE_ENABLED if((H5C_get_trace_file_ptr_from_entry(parent_thing, &trace_file_ptr) >= 0) && - (NULL != trace_file_ptr)) - sprintf(trace, "%s %lx", + (NULL != trace_file_ptr)) + sprintf(trace, "%s %llx %llx", FUNC, - (unsigned long)(((H5C_cache_entry_t *)parent_thing)->addr), - (unsigned long)(((H5C_cache_entry_t *)child_thing)->addr)); - } /* end if */ + (unsigned long long)(((H5C_cache_entry_t *)parent_thing)->addr), + (unsigned long long)(((H5C_cache_entry_t *)child_thing)->addr)); #endif /* H5AC__TRACE_FILE_ENABLED */ if(H5C_destroy_flush_dependency(parent_thing, child_thing) < 0) @@ -1640,7 +1571,7 @@ H5AC_destroy_flush_dependency(void * parent_thing, void * child_thing) done: #if H5AC__TRACE_FILE_ENABLED - if( trace_file_ptr != NULL ) + if(trace_file_ptr != NULL) HDfprintf(trace_file_ptr, "%s %d\n", trace, (int)ret_value); #endif /* H5AC__TRACE_FILE_ENABLED */ @@ -1690,7 +1621,6 @@ herr_t H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *thing, unsigned flags) { - herr_t result; hbool_t dirtied; hbool_t deleted; #ifdef H5_HAVE_PARALLEL @@ -1740,93 +1670,92 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, /* Check if the size changed out from underneath us, if we're not deleting * the entry. */ - if ( dirtied && !deleted ) { + if(dirtied && !deleted) { size_t curr_size = 0; - if ( (type->size)(f, thing, &curr_size) < 0 ) { - - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \ - "Can't get size of thing") - } + if((type->size)(f, thing, &curr_size) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, "Can't get size of thing") if(((H5AC_info_t *)thing)->size != curr_size) HGOTO_ERROR(H5E_CACHE, H5E_BADSIZE, FAIL, "size of entry changed") - } + } /* end if */ #ifdef H5_HAVE_PARALLEL - if ( ( dirtied ) && ( ((H5AC_info_t *)thing)->is_dirty == FALSE ) && - ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) ) { + if((dirtied) && (((H5AC_info_t *)thing)->is_dirty == FALSE) && + (NULL != (aux_ptr = (H5AC_aux_t *)f->shared->cache->aux_ptr))) { if(H5AC_log_dirtied_entry((H5AC_info_t *)thing, addr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "can't log dirtied entry") - } - - if ( ( (flags & H5C__DELETED_FLAG) != 0 ) && - ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) && - ( aux_ptr->mpi_rank == 0 ) ) { - - result = H5AC_log_deleted_entry(f->shared->cache, - (H5AC_info_t *)thing, - addr, - flags); - - if ( result < 0 ) { + } /* end if */ - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ - "H5AC_log_deleted_entry() failed.") - } - } + if((deleted) && + (NULL != (aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr))) && + (aux_ptr->mpi_rank == 0)) { + if(H5AC_log_deleted_entry(f->shared->cache, (H5AC_info_t *)thing, addr, flags) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5AC_log_deleted_entry() failed.") + } /* end if */ #endif /* H5_HAVE_PARALLEL */ - result = H5C_unprotect(f, - dxpl_id, - H5AC_noblock_dxpl_id, - type, - addr, - thing, - flags); - - if ( result < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ - "H5C_unprotect() failed.") - } + if(H5C_unprotect(f, dxpl_id, H5AC_noblock_dxpl_id, type, addr, thing, flags) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "H5C_unprotect() failed.") #ifdef H5_HAVE_PARALLEL /* Check if we should try to flush */ - if(aux_ptr && (aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)) { - hbool_t evictions_enabled; - - /* Query if evictions are allowed */ - if(H5C_get_evictions_enabled((const H5C_t *)f->shared->cache, &evictions_enabled) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") - - /* Flush if evictions are allowed */ - if(evictions_enabled) { - if(H5AC_propagate_flushed_and_still_clean_entries_list(f, - H5AC_noblock_dxpl_id, f->shared->cache, TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") - } /* end if */ + if((aux_ptr != NULL) && (aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)) { + if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.") } /* end if */ #endif /* H5_HAVE_PARALLEL */ done: - #if H5AC__TRACE_FILE_ENABLED - if ( trace_file_ptr != NULL ) { - + if(trace_file_ptr != NULL) HDfprintf(trace_file_ptr, "%s %x %d\n", - trace, - (unsigned)flags, - (int)ret_value); - } + trace, (unsigned)flags, (int)ret_value); #endif /* H5AC__TRACE_FILE_ENABLED */ FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_unprotect() */ /*------------------------------------------------------------------------- + * Function: HA5C_set_sync_point_done_callback + * + * Purpose: Set the value of the sync_point_done callback. This + * callback is used by the parallel test code to verify + * that the expected writes and only the expected writes + * take place during a sync point. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 5/9/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_set_sync_point_done_callback(H5C_t * cache_ptr, + void (* sync_point_done)(int num_writes, haddr_t * written_entries_tbl)) +{ + H5AC_aux_t * aux_ptr; + + FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5AC_set_sync_point_done_callback) + + HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC)); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + aux_ptr->sync_point_done = sync_point_done; + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5AC_set_sync_point_done_callback() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- * Function: HA5C_set_write_done_callback * * Purpose: Set the value of the write_done callback. This callback @@ -1845,29 +1774,20 @@ herr_t H5AC_set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void)) { - herr_t ret_value = SUCCEED; /* Return value */ - H5AC_aux_t * aux_ptr = NULL; + H5AC_aux_t * aux_ptr; - FUNC_ENTER_NOAPI(H5AC_set_write_done_callback, FAIL) + FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5AC_set_write_done_callback) - /* This would normally be an assert, but we need to use an HGOTO_ERROR - * call to shut up the compiler. - */ - if ( ( ! cache_ptr ) || ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ) { + HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC)); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad cache_ptr") - } - - aux_ptr = cache_ptr->aux_ptr; + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); HDassert( aux_ptr != NULL ); HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); aux_ptr->write_done = write_done; -done: - FUNC_LEAVE_NOAPI(ret_value) - + FUNC_LEAVE_NOAPI(SUCCEED) } /* H5AC_set_write_done_callback() */ #endif /* H5_HAVE_PARALLEL */ @@ -1900,7 +1820,6 @@ H5AC_stats(const H5F_t *f) done: FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_stats() */ @@ -1958,13 +1877,8 @@ H5AC_get_cache_auto_resize_config(const H5AC_t * cache_ptr, "H5C_get_cache_auto_resize_config() failed.") } - result = H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled); - - if ( result < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "H5C_get_resize_enabled() failed.") - } + if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_get_resize_enabled() failed.") if ( internal_config.rpt_fcn == NULL ) { @@ -2008,11 +1922,16 @@ H5AC_get_cache_auto_resize_config(const H5AC_t * cache_ptr, config_ptr->dirty_bytes_threshold = ((H5AC_aux_t *)(cache_ptr->aux_ptr))->dirty_bytes_threshold; + config_ptr->metadata_write_strategy = + ((H5AC_aux_t *)(cache_ptr->aux_ptr))->metadata_write_strategy; } else { #endif /* H5_HAVE_PARALLEL */ - config_ptr->dirty_bytes_threshold = H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD; + config_ptr->dirty_bytes_threshold = + H5AC__DEFAULT_DIRTY_BYTES_THRESHOLD; + config_ptr->metadata_write_strategy = + H5AC__DEFAULT_METADATA_WRITE_STRATEGY; #ifdef H5_HAVE_PARALLEL } @@ -2211,24 +2130,6 @@ H5AC_set_cache_auto_resize_config(H5AC_t *cache_ptr, } } - if ( - ( - config_ptr->dirty_bytes_threshold - < - H5AC__MIN_DIRTY_BYTES_THRESHOLD - ) - || - ( - config_ptr->dirty_bytes_threshold - > - H5AC__MAX_DIRTY_BYTES_THRESHOLD - ) - ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, - "config_ptr->dirty_bytes_threshold out of range.") - } - if ( config_ptr->close_trace_file ) { if ( H5AC_close_trace_file(cache_ptr) < 0 ) { @@ -2262,6 +2163,9 @@ H5AC_set_cache_auto_resize_config(H5AC_t *cache_ptr, ((H5AC_aux_t *)(cache_ptr->aux_ptr))->dirty_bytes_threshold = config_ptr->dirty_bytes_threshold; + + ((H5AC_aux_t *)(cache_ptr->aux_ptr))->metadata_write_strategy = + config_ptr->metadata_write_strategy; } #endif /* H5_HAVE_PARALLEL */ @@ -2277,7 +2181,7 @@ done: ( trace_file_ptr != NULL ) ) { HDfprintf(trace_file_ptr, - "%s %d %d %d %d \"%s\" %d %d %d %f %d %d %ld %d %f %f %d %f %f %d %d %d %f %f %d %d %d %d %f %d %d\n", + "%s %d %d %d %d \"%s\" %d %d %d %f %d %d %ld %d %f %f %d %f %f %d %d %d %f %f %d %d %d %d %f %d %d %d\n", "H5AC_set_cache_auto_resize_config", trace_config.version, (int)(trace_config.rpt_fcn_enabled), @@ -2308,6 +2212,7 @@ done: (int)(trace_config.apply_empty_reserve), trace_config.empty_reserve, trace_config.dirty_bytes_threshold, + trace_config.metadata_write_strategy, (int)ret_value); } #endif /* H5AC__TRACE_FILE_ENABLED */ @@ -2342,45 +2247,28 @@ done: herr_t H5AC_validate_config(H5AC_cache_config_t * config_ptr) { - herr_t result; H5C_auto_size_ctl_t internal_config; herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5AC_validate_config, FAIL) - if ( config_ptr == NULL ) { - + if(config_ptr == NULL) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "NULL config_ptr on entry.") - } - - if ( config_ptr->version != H5AC__CURR_CACHE_CONFIG_VERSION ) { + if(config_ptr->version != H5AC__CURR_CACHE_CONFIG_VERSION) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown config version.") - } - if ( ( config_ptr->rpt_fcn_enabled != TRUE ) && - ( config_ptr->rpt_fcn_enabled != FALSE ) ) { + if((config_ptr->rpt_fcn_enabled != TRUE) && (config_ptr->rpt_fcn_enabled != FALSE)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->rpt_fcn_enabled must be either TRUE or FALSE.") - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \ - "config_ptr->rpt_fcn_enabled must be either TRUE or FALSE.") - } - - if ( ( config_ptr->open_trace_file != TRUE ) && - ( config_ptr->open_trace_file != FALSE ) ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \ - "config_ptr->open_trace_file must be either TRUE or FALSE.") - } + if((config_ptr->open_trace_file != TRUE) && (config_ptr->open_trace_file != FALSE)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->open_trace_file must be either TRUE or FALSE.") - if ( ( config_ptr->close_trace_file != TRUE ) && - ( config_ptr->close_trace_file != FALSE ) ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \ - "config_ptr->close_trace_file must be either TRUE or FALSE.") - } + if((config_ptr->close_trace_file != TRUE) && (config_ptr->close_trace_file != FALSE)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->close_trace_file must be either TRUE or FALSE.") /* don't bother to test trace_file_name unless open_trace_file is TRUE */ - if ( config_ptr->open_trace_file ) { + if(config_ptr->open_trace_file) { size_t name_len; /* Can't really test the trace_file_name field without trying to @@ -2389,15 +2277,10 @@ H5AC_validate_config(H5AC_cache_config_t * config_ptr) */ name_len = HDstrlen(config_ptr->trace_file_name); - if ( name_len == 0 ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \ - "config_ptr->trace_file_name is empty.") - - } else if ( name_len > H5AC__MAX_TRACE_FILE_NAME_LEN ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, \ - "config_ptr->trace_file_name too long.") + if(name_len == 0) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->trace_file_name is empty.") + } else if(name_len > H5AC__MAX_TRACE_FILE_NAME_LEN) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->trace_file_name too long.") } } @@ -2417,36 +2300,24 @@ H5AC_validate_config(H5AC_cache_config_t * config_ptr) "Can't disable evictions while auto-resize is enabled.") } - if ( config_ptr->dirty_bytes_threshold < H5AC__MIN_DIRTY_BYTES_THRESHOLD ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, - "dirty_bytes_threshold too small.") - } else - if ( config_ptr->dirty_bytes_threshold > H5AC__MAX_DIRTY_BYTES_THRESHOLD ) { - - HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, - "dirty_bytes_threshold too big.") - } - - if ( H5AC_ext_config_2_int_config(config_ptr, &internal_config) != - SUCCEED ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "H5AC_ext_config_2_int_config() failed.") + if(config_ptr->dirty_bytes_threshold < H5AC__MIN_DIRTY_BYTES_THRESHOLD) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "dirty_bytes_threshold too small.") + } else if(config_ptr->dirty_bytes_threshold > H5AC__MAX_DIRTY_BYTES_THRESHOLD) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "dirty_bytes_threshold too big.") } - result = H5C_validate_resize_config(&internal_config, - H5C_RESIZE_CFG__VALIDATE_ALL); + if((config_ptr->metadata_write_strategy != H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY) && + (config_ptr->metadata_write_strategy != H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "config_ptr->metadata_write_strategy out of range.") - if ( result != SUCCEED ) { + if(H5AC_ext_config_2_int_config(config_ptr, &internal_config) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_ext_config_2_int_config() failed.") + if(H5C_validate_resize_config(&internal_config, H5C_RESIZE_CFG__VALIDATE_ALL) < 0) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "error(s) in new config.") - } done: - FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_validate_config() */ @@ -2619,12 +2490,192 @@ done: } /* H5AC_open_trace_file() */ +/*------------------------------------------------------------------------- + * Function: H5AC_add_candidate() + * + * Purpose: Add the supplied metadata entry address to the candidate + * list. Verify that each entry added does not appear in + * the list prior to its insertion. + * + * This function is intended for used in constructing list + * of entried to be flushed during sync points. It shouldn't + * be called anywhere else. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_add_candidate(H5AC_t * cache_ptr, + haddr_t addr) +{ + H5AC_aux_t * aux_ptr; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_add_candidate, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + HDassert( aux_ptr->candidate_slist_ptr != NULL ); + + /* If the supplied address appears in the candidate list, scream and die. */ + if(NULL != H5SL_search(aux_ptr->candidate_slist_ptr, (void *)(&addr))) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry already in candidate slist.") + + /* otherwise, construct an entry for the supplied address, and insert + * it into the candidate slist. + */ + if(NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "Can't allocate candidate slist entry .") + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = addr; + + if(H5SL_insert(aux_ptr->candidate_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") + + aux_ptr->candidate_slist_len += 1; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_add_candidate() */ +#endif /* H5_HAVE_PARALLEL */ + + /*************************************************************************/ /**************************** Private Functions: *************************/ /*************************************************************************/ /*------------------------------------------------------------------------- * + * Function: H5AC_broadcast_candidate_list() + * + * Purpose: Broadcast the contents of the process 0 candidate entry + * slist. In passing, also remove all entries from said + * list. As the application of this will be handled by + * the same functions on all processes, construct and + * return a copy of the list in the same format as that + * received by the other processes. Note that if this + * copy is returned in *haddr_buf_ptr_ptr, the caller + * must free it. + * + * This function must only be called by the process with + * MPI_rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/1/05 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_broadcast_candidate_list(H5AC_t * cache_ptr, + int * num_entries_ptr, + haddr_t ** haddr_buf_ptr_ptr) +{ + herr_t result; + hbool_t success = FALSE; + H5AC_aux_t * aux_ptr = NULL; + haddr_t * haddr_buf_ptr = NULL; + MPI_Offset * MPI_Offset_buf_ptr = NULL; + size_t buf_size = 0; + int mpi_result; + int chk_num_entries = 0; + int num_entries = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_broadcast_candidate_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank == 0 ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + HDassert( aux_ptr->candidate_slist_ptr != NULL ); + HDassert( H5SL_count(aux_ptr->candidate_slist_ptr) == + (size_t)(aux_ptr->candidate_slist_len) ); + HDassert( num_entries_ptr != NULL ); + HDassert( *num_entries_ptr == 0 ); + HDassert( haddr_buf_ptr_ptr != NULL ); + HDassert( *haddr_buf_ptr_ptr == NULL ); + + /* First broadcast the number of entries in the list so that the + * receivers can set up buffers to receive them. If there aren't + * any, we are done. + */ + num_entries = aux_ptr->candidate_slist_len; + if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result) + + if(num_entries > 0) { + /* convert the candidate list into the format we + * are used to receiving from process 0, and also load it + * into a buffer for transmission. + */ + if(H5AC_copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, + &haddr_buf_ptr, &buf_size, &MPI_Offset_buf_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") + + HDassert( chk_num_entries == num_entries ); + HDassert( haddr_buf_ptr != NULL ); + HDassert( MPI_Offset_buf_ptr != NULL ); + HDassert( aux_ptr->candidate_slist_len == 0 ); + + /* Now broadcast the list of candidate entries -- if there is one. + * + * The peculiar structure of the following call to MPI_Bcast is + * due to MPI's (?) failure to believe in the MPI_Offset type. + * Thus the element type is MPI_BYTE, with size equal to the + * buf_size computed above. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) + } /* end if */ + + success = TRUE; + +done: + if(MPI_Offset_buf_ptr != NULL) + MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr); + + if(success) { + /* Pass the number of entries and the buffer pointer + * back to the caller. Do this so that we can use the same code + * to apply the candidate list to all the processes. + */ + *num_entries_ptr = num_entries; + *haddr_buf_ptr_ptr = haddr_buf_ptr; + } else if(haddr_buf_ptr != NULL) { + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_broadcast_candidate_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * * Function: H5AC_broadcast_clean_list() * * Purpose: Broadcast the contents of the process 0 cleaned entry @@ -2649,6 +2700,7 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr) { herr_t ret_value = SUCCEED; /* Return value */ haddr_t addr; + haddr_t * addr_buf_ptr = NULL; H5AC_aux_t * aux_ptr = NULL; H5SL_node_t * slist_node_ptr = NULL; H5AC_slist_entry_t * slist_entry_ptr = NULL; @@ -2656,14 +2708,14 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr) size_t buf_size; int i = 0; int mpi_result; - int num_entries; + int num_entries = 0; FUNC_ENTER_NOAPI(H5AC_broadcast_clean_list, FAIL) HDassert( cache_ptr != NULL ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr; HDassert( aux_ptr != NULL ); HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); @@ -2701,13 +2753,28 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr) "memory allocation failed for clean entry buffer") } + /* if the sync_point_done callback is defined, allocate the + * addr buffer as well. + */ + if ( aux_ptr->sync_point_done != NULL ) { + + addr_buf_ptr = H5MM_malloc((size_t)(num_entries * sizeof(haddr_t))); + + if ( addr_buf_ptr == NULL ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed for addr buffer") + } + } + + /* now load the entry base addresses into the buffer, emptying the * cleaned entry list in passing */ while ( NULL != (slist_node_ptr = H5SL_first(aux_ptr->c_slist_ptr) ) ) { - slist_entry_ptr = H5SL_item(slist_node_ptr); + slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_item(slist_node_ptr); HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); @@ -2715,6 +2782,11 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr) addr = slist_entry_ptr->addr; + if ( addr_buf_ptr != NULL ) { + + addr_buf_ptr[i] = addr; + } + if ( H5FD_mpi_haddr_to_MPIOff(addr, &(buf_ptr[i])) < 0 ) { HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \ @@ -2742,19 +2814,12 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr) /* and also remove the matching entry from the dirtied list * if it exists. */ - if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, - (void *)(&addr))) != NULL ) { - - HDassert( slist_entry_ptr->magic == - H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + if((slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) != NULL) { + HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); HDassert( slist_entry_ptr->addr == addr ); - if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) - != slist_entry_ptr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ - "Can't delete entry from dirty entry slist.") - } + if(H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) != slist_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from dirty entry slist.") slist_entry_ptr->magic = 0; H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); @@ -2763,8 +2828,7 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr) aux_ptr->d_slist_len -= 1; HDassert( aux_ptr->d_slist_len >= 0 ); - } - + } /* end if */ } /* while */ @@ -2785,15 +2849,16 @@ H5AC_broadcast_clean_list(H5AC_t * cache_ptr) } } -done: - - if ( buf_ptr != NULL ) { + if(aux_ptr->sync_point_done != NULL) + (aux_ptr->sync_point_done)(num_entries, addr_buf_ptr); +done: + if(buf_ptr != NULL) buf_ptr = (MPI_Offset *)H5MM_xfree((void *)buf_ptr); - } + if(addr_buf_ptr != NULL) + addr_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)addr_buf_ptr); FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_broadcast_clean_list() */ #endif /* H5_HAVE_PARALLEL */ @@ -2849,7 +2914,9 @@ H5AC_check_if_write_permitted(const H5F_t UNUSED * f, HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); - if ( aux_ptr->mpi_rank == 0 ) { + if ( ( aux_ptr->mpi_rank == 0 ) || + ( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ) ) { write_permitted = aux_ptr->write_permitted; @@ -2870,6 +2937,222 @@ done: /*------------------------------------------------------------------------- + * Function: H5AC_construct_candidate_list() + * + * Purpose: In the parallel case when the metadata_write_strategy is + * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses + * this function to construct the list of cache entries to + * be flushed. This list is then propagated to the other + * caches, and then flushed in a distributed fashion. + * + * The sync_point_op parameter is used to determine the extent + * of the flush. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_construct_candidate_list(H5AC_t * cache_ptr, + H5AC_aux_t * aux_ptr, + int sync_point_op) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_construct_candidate_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + HDassert( ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE ) || + ( aux_ptr->mpi_rank == 0 ) ); + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_len == 0 ); + HDassert( aux_ptr->candidate_slist_ptr != NULL ); + HDassert( aux_ptr->candidate_slist_len == 0 ); + HDassert( ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN ) || + ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE ) ); + + switch(sync_point_op) { + case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: + if(H5C_construct_candidate_list__min_clean((H5C_t *)cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__min_clean() failed.") + break; + + case H5AC_SYNC_POINT_OP__FLUSH_CACHE: + if(H5C_construct_candidate_list__clean_cache((H5C_t *)cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__clean_cache() failed.") + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown sync point operation.") + break; + } /* end switch */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_construct_candidate_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_copy_candidate_list_to_buffer + * + * Purpose: Allocate buffer(s) and copy the contents of the candidate + * entry slist into it (them). In passing, remove all + * entries from the candidate slist. Note that the + * candidate slist must not be empty. + * + * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer + * of MPI_Offset, copy the contents of the candidate + * entry list into it with the appropriate conversions, + * and return the base address of the buffer in + * *MPI_Offset_buf_ptr. Note that this is the buffer + * used by process 0 to transmit the list of entries to + * be flushed to all other processes (in this file group). + * + * Similarly, allocate a buffer of haddr_t, load the contents + * of the candidate list into this buffer, and return its + * base address in *haddr_buf_ptr_ptr. Note that this + * latter buffer is constructed unconditionally. + * + * In passing, also remove all entries from the candidate + * entry slist. + * + * Return: Return SUCCEED on success, and FAIL on failure. + * + * Programmer: John Mainzer, 4/19/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_copy_candidate_list_to_buffer(H5AC_t * cache_ptr, + int * num_entries_ptr, + haddr_t ** haddr_buf_ptr_ptr, + size_t * MPI_Offset_buf_size_ptr, + MPI_Offset ** MPI_Offset_buf_ptr_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + hbool_t success = FALSE; + haddr_t addr; + H5AC_aux_t * aux_ptr = NULL; + H5SL_node_t * slist_node_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + MPI_Offset * MPI_Offset_buf_ptr = NULL; + haddr_t * haddr_buf_ptr = NULL; + size_t buf_size; + int i = 0; + int num_entries = 0; + + FUNC_ENTER_NOAPI(H5AC_copy_candidate_list_to_buffer, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + HDassert( aux_ptr->candidate_slist_ptr != NULL ); + HDassert( H5SL_count(aux_ptr->candidate_slist_ptr) == + (size_t)(aux_ptr->candidate_slist_len) ); + HDassert( aux_ptr->candidate_slist_len > 0 ); + HDassert( num_entries_ptr != NULL ); + HDassert( *num_entries_ptr == 0 ); + HDassert( haddr_buf_ptr_ptr != NULL ); + HDassert( *haddr_buf_ptr_ptr == NULL ); + + num_entries = aux_ptr->candidate_slist_len; + + /* allocate a buffer(s) to store the list of candidate entry + * base addresses in + */ + if(MPI_Offset_buf_ptr_ptr != NULL) { + HDassert( MPI_Offset_buf_size_ptr != NULL ); + + /* allocate a buffer of MPI_Offset */ + buf_size = sizeof(MPI_Offset) * (size_t)num_entries; + if(NULL == (MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for MPI_Offset buffer") + } /* end if */ + + /* allocate a buffer of haddr_t */ + if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)num_entries))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for haddr buffer") + + /* now load the entry base addresses into the buffer, emptying the + * candidate entry list in passing + */ + while(NULL != (slist_node_ptr = H5SL_first(aux_ptr->candidate_slist_ptr))) { + slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_item(slist_node_ptr); + + HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert( i < num_entries ); + + addr = slist_entry_ptr->addr; + haddr_buf_ptr[i] = addr; + if(MPI_Offset_buf_ptr != NULL) { + if(H5FD_mpi_haddr_to_MPIOff(addr, &(MPI_Offset_buf_ptr[i])) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") + } /* end if */ + + i++; + + /* now remove the entry from the cleaned entry list */ + if(H5SL_remove(aux_ptr->candidate_slist_ptr, (void *)(&addr)) != slist_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from candidate entry slist.") + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->candidate_slist_len -= 1; + + HDassert( aux_ptr->candidate_slist_len >= 0 ); + } /* while */ + HDassert( aux_ptr->candidate_slist_len == 0 ); + + success = TRUE; + +done: + if(success) { + /* Pass the number of entries and the buffer pointer + * back to the caller. + */ + *num_entries_ptr = num_entries; + *haddr_buf_ptr_ptr = haddr_buf_ptr; + + if(MPI_Offset_buf_ptr_ptr != NULL) { + HDassert( MPI_Offset_buf_ptr != NULL); + *MPI_Offset_buf_size_ptr = buf_size; + *MPI_Offset_buf_ptr_ptr = MPI_Offset_buf_ptr; + } /* end if */ + } /* end if */ + else { + if(MPI_Offset_buf_ptr != NULL) + MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr); + if(haddr_buf_ptr != NULL) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + } /* end else */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_copy_candidate_list_to_buffer() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- * Function: H5AC_ext_config_2_int_config() * * Purpose: Utility function to translate an instance of @@ -2968,16 +3251,16 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr, haddr_t addr, unsigned int flags) { - herr_t ret_value = SUCCEED; /* Return value */ - H5AC_aux_t * aux_ptr = NULL; + H5AC_aux_t * aux_ptr; H5AC_slist_entry_t * slist_entry_ptr = NULL; + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5AC_log_deleted_entry, FAIL) HDassert( cache_ptr != NULL ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - aux_ptr = cache_ptr->aux_ptr; + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); HDassert( aux_ptr != NULL ); HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); @@ -2987,25 +3270,17 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr, HDassert( (flags & H5C__DELETED_FLAG) != 0 ); - if ( aux_ptr->mpi_rank == 0 ) { - + if(aux_ptr->mpi_rank == 0) { HDassert( aux_ptr->d_slist_ptr != NULL ); HDassert( aux_ptr->c_slist_ptr != NULL ); /* if the entry appears in the dirtied entry slist, remove it. */ - if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, - (void *)(&addr))) != NULL ) { - - HDassert( slist_entry_ptr->magic == - H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); - HDassert( slist_entry_ptr->addr == addr ); - - if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) - != slist_entry_ptr ) { + if((slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) != NULL) { + HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert(slist_entry_ptr->addr == addr); - HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ - "Can't delete entry from dirty entry slist.") - } + if(H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) != slist_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from dirty entry slist.") slist_entry_ptr->magic = 0; H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); @@ -3014,22 +3289,15 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr, aux_ptr->d_slist_len -= 1; HDassert( aux_ptr->d_slist_len >= 0 ); - } + } /* end if */ /* if the entry appears in the cleaned entry slist, remove it. */ - if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, - (void *)(&addr))) != NULL ) { - - HDassert( slist_entry_ptr->magic == - H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); - HDassert( slist_entry_ptr->addr == addr ); - - if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) - != slist_entry_ptr ) { + if((slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) != NULL) { + HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert(slist_entry_ptr->addr == addr); - HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ - "Can't delete entry from cleaned entry slist.") - } + if(H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) != slist_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from cleaned entry slist.") slist_entry_ptr->magic = 0; H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); @@ -3038,13 +3306,11 @@ H5AC_log_deleted_entry(H5AC_t * cache_ptr, aux_ptr->c_slist_len -= 1; HDassert( aux_ptr->c_slist_len >= 0 ); - } - } + } /* end if */ + } /* if */ done: - FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_log_deleted_entry() */ #endif /* H5_HAVE_PARALLEL */ @@ -3092,7 +3358,7 @@ H5AC_log_dirtied_entry(const H5AC_info_t * entry_ptr, HDassert( cache_ptr != NULL ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - aux_ptr = cache_ptr->aux_ptr; + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); HDassert( aux_ptr != NULL ); HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); @@ -3132,24 +3398,16 @@ H5AC_log_dirtied_entry(const H5AC_info_t * entry_ptr, #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ } - if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) { - + if(H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL) { /* the entry is dirty. If it exists on the cleaned entries list, * remove it. */ - if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, - (void *)(&addr))) != NULL ) { - - HDassert( slist_entry_ptr->magic == - H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); - HDassert( slist_entry_ptr->addr == addr ); + if((slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) != NULL) { + HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert(slist_entry_ptr->addr == addr); - if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) - != slist_entry_ptr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ - "Can't delete entry from clean entry slist.") - } + if(H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) != slist_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from clean entry slist.") slist_entry_ptr->magic = 0; H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); @@ -3158,8 +3416,8 @@ H5AC_log_dirtied_entry(const H5AC_info_t * entry_ptr, aux_ptr->c_slist_len -= 1; HDassert( aux_ptr->c_slist_len >= 0 ); - } - } + } /* end if */ + } /* end if */ } else { aux_ptr->dirty_bytes += entry_ptr->size; @@ -3199,34 +3457,6 @@ done: *------------------------------------------------------------------------- */ #ifdef H5_HAVE_PARALLEL -#if 0 /* This is useful debugging code. -- JRM */ -static herr_t -H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr, - haddr_t addr, - hbool_t was_dirty, - unsigned flags, - int type_id) -{ - herr_t ret_value = SUCCEED; /* Return value */ - H5AC_aux_t * aux_ptr = NULL; - - FUNC_ENTER_NOAPI(H5AC_log_flushed_entry_dummy, FAIL) - - aux_ptr = cache_ptr->aux_ptr; - - if ( ( was_dirty ) && ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) == 0 ) ) { - - HDfprintf(stdout, - "%d:H5AC_log_flushed_entry(): addr = %d, flags = %x, was_dirty = %d, type_id = %d\n", - (int)(aux_ptr->mpi_rank), (int)addr, flags, (int)was_dirty, type_id); - } -done: - - FUNC_LEAVE_NOAPI(ret_value) - -} /* H5AC_log_flushed_entry_dummy() */ -#endif /* JRM */ - static herr_t H5AC_log_flushed_entry(H5C_t * cache_ptr, haddr_t addr, @@ -3245,7 +3475,7 @@ H5AC_log_flushed_entry(H5C_t * cache_ptr, HDassert( cache_ptr != NULL ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - aux_ptr = cache_ptr->aux_ptr; + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); HDassert( aux_ptr != NULL ); HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); @@ -3260,7 +3490,8 @@ H5AC_log_flushed_entry(H5C_t * cache_ptr, * cleaned list and the dirtied list. */ - if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + if ( (slist_entry_ptr = (H5AC_slist_entry_t *) + H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) != NULL ) { HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); @@ -3282,8 +3513,8 @@ H5AC_log_flushed_entry(H5C_t * cache_ptr, HDassert( aux_ptr->c_slist_len >= 0 ); } - if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, - (void *)(&addr))) != NULL ) { + if ( (slist_entry_ptr = (H5AC_slist_entry_t *) + H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) != NULL ) { HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); HDassert( slist_entry_ptr->addr == addr ); @@ -3363,62 +3594,47 @@ H5AC_log_inserted_entry(H5F_t * f, H5AC_t * cache_ptr, H5AC_info_t * entry_ptr) { + H5AC_aux_t * aux_ptr; herr_t ret_value = SUCCEED; /* Return value */ - H5AC_aux_t * aux_ptr = NULL; - H5AC_slist_entry_t * slist_entry_ptr = NULL; FUNC_ENTER_NOAPI(H5AC_log_inserted_entry, FAIL) - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = cache_ptr->aux_ptr; + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert( aux_ptr != NULL ); - HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); HDassert( entry_ptr != NULL ); - if ( aux_ptr->mpi_rank == 0 ) { - - HDassert( aux_ptr->d_slist_ptr != NULL ); - HDassert( aux_ptr->c_slist_ptr != NULL ); - - if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&entry_ptr->addr)) == NULL ) { - - /* insert the address of the entry in the dirty entry list, and - * add its size to the dirty_bytes count. - */ - if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { - - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ - "Can't allocate dirty slist entry .") - } - - slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; - slist_entry_ptr->addr = entry_ptr->addr; + if(aux_ptr->mpi_rank == 0) { + H5AC_slist_entry_t * slist_entry_ptr; - if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, - &(slist_entry_ptr->addr)) < 0 ) { + HDassert(aux_ptr->d_slist_ptr != NULL); + HDassert(aux_ptr->c_slist_ptr != NULL); - HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ - "can't insert entry into dirty entry slist.") - } + if(NULL != H5SL_search(aux_ptr->d_slist_ptr, (void *)(&entry_ptr->addr))) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry already in dirty slist.") - aux_ptr->d_slist_len += 1; + /* insert the address of the entry in the dirty entry list, and + * add its size to the dirty_bytes count. + */ + if(NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "Can't allocate dirty slist entry .") - } else { + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = entry_ptr->addr; - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Inserted entry already in dirty slist.") - } + if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0 ) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") - if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&entry_ptr->addr)) != NULL ) { + aux_ptr->d_slist_len += 1; - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Inserted entry in clean slist.") - } - } + if(NULL != H5SL_search(aux_ptr->c_slist_ptr, (void *)(&entry_ptr->addr))) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry in clean slist.") + } /* end if */ aux_ptr->dirty_bytes += entry_ptr->size; @@ -3428,9 +3644,7 @@ H5AC_log_inserted_entry(H5F_t * f, #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ done: - FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_log_inserted_entry() */ #endif /* H5_HAVE_PARALLEL */ @@ -3505,7 +3719,7 @@ H5AC_log_moved_entry(const H5F_t *f, HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - aux_ptr = cache_ptr->aux_ptr; + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); HDassert( aux_ptr != NULL ); HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); @@ -3530,8 +3744,8 @@ H5AC_log_moved_entry(const H5F_t *f, /* if the entry appears in the cleaned entry slist, under its old * address, remove it. */ - if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, - (void *)(&old_addr))) != NULL ) { + if ( (slist_entry_ptr = (H5AC_slist_entry_t *) + H5SL_search(aux_ptr->c_slist_ptr, (void *)(&old_addr))) != NULL ) { HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); @@ -3556,8 +3770,8 @@ H5AC_log_moved_entry(const H5F_t *f, /* if the entry appears in the dirtied entry slist under its old * address, remove it, but don't free it. Set addr to new_addr. */ - if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, - (void *)(&old_addr))) != NULL ) { + if ( (slist_entry_ptr = (H5AC_slist_entry_t *) + H5SL_search(aux_ptr->d_slist_ptr, (void *)(&old_addr))) != NULL ) { HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); @@ -3633,27 +3847,223 @@ H5AC_log_moved_entry(const H5F_t *f, } done: - FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_log_moved_entry() */ #endif /* H5_HAVE_PARALLEL */ /*------------------------------------------------------------------------- + * Function: H5AC_propagate_and_apply_candidate_list + * + * Purpose: Prior to the addition of support for multiple metadata + * write strategies, in PHDF5, only the metadata cache with + * mpi rank 0 was allowed to write to file. All other + * metadata caches on processes with rank greater than 0 + * were required to retain dirty entries until they were + * notified that the entry was clean. + * + * This constraint is relaxed with the distributed + * metadata write strategy, in which a list of candidate + * metadata cache entries is constructed by the process 0 + * cache and then distributed to the caches of all the other + * processes. Once the listed is distributed, many (if not + * all) processes writing writing a unique subset of the + * entries, and marking the remainder clean. The subsets + * are chosen so that each entry in the list of candidates + * is written by exactly one cache, and all entries are + * marked as being clean in all caches. + * + * While the list of candidate cache entries is prepared + * elsewhere, this function is the main routine for distributing + * and applying the list. It must be run simultaniously on + * all processes that have the relevant file open. To ensure + * proper synchronization, there is a barrier at the beginning + * of this function. + * + * At present, this function is called under one of two + * circumstances: + * + * 1) Dirty byte creation exceeds some user specified value. + * + * While metadata reads may occur independently, all + * operations writing metadata must be collective. Thus + * all metadata caches see the same sequence of operations, + * and therefore the same dirty data creation. + * + * This fact is used to synchronize the caches for purposes + * of propagating the list of candidate entries, by simply + * calling this function from all caches whenever some user + * specified threshold on dirty data is exceeded. (the + * process 0 cache creates the candidate list just before + * calling this function). + * + * 2) Under direct user control -- this operation must be + * collective. + * + * The operations to be managed by this function are as + * follows: + * + * All processes: + * + * 1) Participate in an opening barrier. + * + * For the process with mpi rank 0: + * + * 1) Load the contents of the candidate list + * (candidate_slist_ptr) into a buffer, and broadcast that + * buffer to all the other caches. Clear the candidate + * list in passing. + * + * If there is a positive number of candidates, proceed with + * the following: + * + * 2) Apply the candidate entry list. + * + * 3) Particpate in a closing barrier. + * + * 4) Remove from the dirty list (d_slist_ptr) and from the + * flushed and still clean entries list (c_slist_ptr), + * all addresses that appeared in the candidate list, as + * these entries are now clean. + * + * + * For all processes with mpi rank greater than 0: + * + * 1) Receive the candidate entry list broadcast + * + * If there is a positive number of candidates, proceed with + * the following: + * + * 2) Apply the candidate entry list. + * + * 3) Particpate in a closing barrier. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_propagate_and_apply_candidate_list(H5F_t * f, + hid_t dxpl_id, + H5AC_t * cache_ptr) +{ + int mpi_code; + int num_candidates = 0; + haddr_t * candidates_list_ptr = NULL; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_propagate_and_apply_candidate_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + + /* to prevent "messages from the future" we must synchronize all + * processes before we write any entries. + */ + if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code) + + if(aux_ptr->mpi_rank == 0) { + if(H5AC_broadcast_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast candidate slist.") + + HDassert( aux_ptr->candidate_slist_len == 0 ); + } /* end if */ + else { + if(H5AC_receive_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive candidate broadcast.") + } /* end else */ + + if(num_candidates > 0) { + herr_t result; + + /* all processes apply the candidate list. + * H5C_apply_candidate_list() handles the details of + * distributing the writes across the processes. + */ + + aux_ptr->write_permitted = TRUE; + + result = H5C_apply_candidate_list(f, + dxpl_id, + dxpl_id, + cache_ptr, + num_candidates, + candidates_list_ptr, + aux_ptr->mpi_rank, + aux_ptr->mpi_size); + + aux_ptr->write_permitted = FALSE; + + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") + + if(aux_ptr->write_done != NULL) + (aux_ptr->write_done)(); + + /* to prevent "messages from the past" we must synchronize all + * processes again before we go on. + */ + if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 2", mpi_code) + + if(aux_ptr->mpi_rank == 0) { + if(H5AC_tidy_cache_0_lists(cache_ptr, num_candidates, candidates_list_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.") + } /* end if */ + } /* end if */ + + /* if it is defined, call the sync point done callback. Note + * that this callback is defined purely for testing purposes, + * and should be undefined under normal operating circumstances. + */ + if(aux_ptr->sync_point_done != NULL) + (aux_ptr->sync_point_done)(num_candidates, candidates_list_ptr); + +done: + if(candidates_list_ptr != NULL) + candidates_list_ptr = (haddr_t *)H5MM_xfree((void *)candidates_list_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_propagate_and_apply_candidate_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- * Function: H5AC_propagate_flushed_and_still_clean_entries_list * - * Purpose: In PHDF5, only the metadata cache with mpi rank 0 is allowed - * to write to file. All other metadata caches on processes - * with rank greater than 0 must retain dirty entries until - * they are notified that the entry is now clean. + * Purpose: In PHDF5, if the process 0 only metadata write strategy + * is selected, only the metadata cache with mpi rank 0 is + * allowed to write to file. All other metadata caches on + * processes with rank greater than 0 must retain dirty + * entries until they are notified that the entry is now + * clean. * - * This function is the main routine for that proceedure. - * It must be called simultaniously on all processes that - * have the relevant file open. To this end, there must - * be a barrier immediately prior to this call. + * This function is the main routine for handling this + * notification proceedure. It must be called + * simultaniously on all processes that have the relevant + * file open. To this end, it is called only during a + * sync point, with a barrier prior to the call. * - * Typicaly, this will be done one of two ways: + * Note that any metadata entry writes by process 0 will + * occur after the barrier and just before this call. + * + * Typicaly, calls to this function will be triggered in + * one of two ways: * * 1) Dirty byte creation exceeds some user specified value. * @@ -3676,14 +4086,11 @@ done: * * For the process with mpi rank 0: * - * 1) Enable writes, flush the cache to its min clean size, - * and then disable writes again. - * - * 2) Load the contents of the flushed and still clean entries + * 1) Load the contents of the flushed and still clean entries * list (c_slist_ptr) into a buffer, and broadcast that * buffer to all the other caches. * - * 3) Clear the flushed and still clean entries list + * 2) Clear the flushed and still clean entries list * (c_slist_ptr). * * @@ -3711,113 +4118,156 @@ done: herr_t H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f, hid_t dxpl_id, - H5AC_t * cache_ptr, - hbool_t do_barrier) + H5AC_t * cache_ptr) { + H5AC_aux_t * aux_ptr; herr_t ret_value = SUCCEED; /* Return value */ - herr_t result; - int mpi_code; - H5AC_aux_t * aux_ptr = NULL; FUNC_ENTER_NOAPI(H5AC_propagate_flushed_and_still_clean_entries_list, FAIL) - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert( aux_ptr != NULL ); - HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); - -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - HDfprintf(stdout, - "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n", - (int)(aux_ptr->mpi_rank), - (int)(aux_ptr->dirty_bytes_propagations), - (int)(aux_ptr->unprotect_dirty_bytes), - (int)(aux_ptr->unprotect_dirty_bytes_updates), - (int)(aux_ptr->insert_dirty_bytes), - (int)(aux_ptr->insert_dirty_bytes_updates), - (int)(aux_ptr->move_dirty_bytes), - (int)(aux_ptr->move_dirty_bytes_updates)); -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - - if ( do_barrier ) { + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); - /* to prevent "messages from the future" we must synchronize all - * processes before we start the flush. This synchronization may - * already be done -- hence the do_barrier parameter. - */ - - if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) { + if(aux_ptr->mpi_rank == 0) { + if(H5AC_broadcast_clean_list(cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.") + HDassert( aux_ptr->c_slist_len == 0 ); + } /* end if */ + else { + if(H5AC_receive_and_apply_clean_list(f, dxpl_id, H5AC_noblock_dxpl_id, cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.") + } /* end else */ - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) - } - } +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_propagate_flushed_and_still_clean_entries_list() */ +#endif /* H5_HAVE_PARALLEL */ - if ( aux_ptr->mpi_rank == 0 ) { + +/*------------------------------------------------------------------------- + * + * Function: H5AC_receive_and_apply_clean_list() + * + * Purpose: Receive the list of cleaned entries from process 0, + * and mark the specified entries as clean. + * + * This function must only be called by the process with + * MPI_rank greater than 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/4/05 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_receive_and_apply_clean_list(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5AC_t * cache_ptr) +{ + H5AC_aux_t * aux_ptr; + haddr_t * haddr_buf_ptr = NULL; + MPI_Offset * MPI_Offset_buf_ptr = NULL; + int mpi_result; + int num_entries = 0; + herr_t ret_value = SUCCEED; /* Return value */ - aux_ptr->write_permitted = TRUE; + FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL) - result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id); + HDassert( f != NULL ); + HDassert( f->shared->cache == cache_ptr ); - aux_ptr->write_permitted = FALSE; + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - if ( result < 0 ) { + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "H5C_flush_to_min_clean() failed.") - } + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank != 0 ); - if ( aux_ptr->write_done != NULL ) { + /* First receive the number of entries in the list so that we + * can set up a buffer to receive them. If there aren't + * any, we are done. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result) - (aux_ptr->write_done)(); - } + if(num_entries > 0) { + size_t buf_size; + int i; - if ( H5AC_broadcast_clean_list(cache_ptr) < 0 ) { + /* allocate buffers to store the list of entry base addresses in */ + buf_size = sizeof(MPI_Offset) * (size_t)num_entries; + if(NULL == (MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for receive buffer") + if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)num_entries))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for haddr buffer") - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Can't broadcast clean slist.") - } + /* Now receive the list of cleaned entries + * + * The peculiar structure of the following call to MPI_Bcast is + * due to MPI's (?) failure to believe in the MPI_Offset type. + * Thus the element type is MPI_BYTE, with size equal to the + * buf_size computed above. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) - HDassert( aux_ptr->c_slist_len == 0 ); + /* translate the MPI_Offsets to haddr_t */ + i = 0; + while(i < num_entries) { + haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]); - } else { + if(haddr_buf_ptr[i] == HADDR_UNDEF) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert MPI off to haddr") - if ( H5AC_receive_and_apply_clean_list(f, dxpl_id, - H5AC_noblock_dxpl_id, - cache_ptr) < 0 ) { + i++; + } /* end while */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Can't receive and/or process clean slist broadcast.") - } - } + /* mark the indicated entries as clean */ + if(H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id, + (int32_t)num_entries, &(haddr_buf_ptr[0])) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.") + } /* end if */ - aux_ptr->dirty_bytes = 0; -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->dirty_bytes_propagations += 1; - aux_ptr->unprotect_dirty_bytes = 0; - aux_ptr->unprotect_dirty_bytes_updates = 0; - aux_ptr->insert_dirty_bytes = 0; - aux_ptr->insert_dirty_bytes_updates = 0; - aux_ptr->move_dirty_bytes = 0; - aux_ptr->move_dirty_bytes_updates = 0; -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + /* if it is defined, call the sync point done callback. Note + * that this callback is defined purely for testing purposes, + * and should be undefined under normal operating circumstances. + */ + if(aux_ptr->sync_point_done != NULL) + (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr); done: + if(MPI_Offset_buf_ptr != NULL) + MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr); + if(haddr_buf_ptr != NULL) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); FUNC_LEAVE_NOAPI(ret_value) - -} /* H5AC_propagate_flushed_and_still_clean_entries_list() */ +} /* H5AC_receive_and_apply_clean_list() */ #endif /* H5_HAVE_PARALLEL */ /*------------------------------------------------------------------------- * - * Function: H5AC_receive_and_apply_clean_list() + * Function: H5AC_receive_candidate_list() * - * Purpose: Receive the list of cleaned entries from process 0, - * and mark the specified entries as clean. + * Purpose: Receive the list of candidate entries from process 0, + * and return it in a buffer pointed to by *haddr_buf_ptr_ptr. + * Note that the caller must free this buffer if it is + * returned. * * This function must only be called by the process with * MPI_rank greater than 0. @@ -3826,27 +4276,25 @@ done: * * Return: Non-negative on success/Negative on failure. * - * Programmer: John Mainzer, 7/4/05 + * Programmer: John Mainzer, 3/17/10 * *------------------------------------------------------------------------- */ #ifdef H5_HAVE_PARALLEL static herr_t -H5AC_receive_and_apply_clean_list(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - H5AC_t * cache_ptr) +H5AC_receive_candidate_list(H5AC_t * cache_ptr, + int * num_entries_ptr, + haddr_t ** haddr_buf_ptr_ptr) { - herr_t ret_value = SUCCEED; /* Return value */ - H5AC_aux_t * aux_ptr = NULL; + hbool_t success = FALSE; + H5AC_aux_t * aux_ptr; haddr_t * haddr_buf_ptr = NULL; MPI_Offset * MPI_Offset_buf_ptr = NULL; - size_t buf_size; - int i = 0; int mpi_result; int num_entries; + herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL) + FUNC_ENTER_NOAPI(H5AC_receive_candidate_list, FAIL) HDassert( cache_ptr != NULL ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); @@ -3856,180 +4304,830 @@ H5AC_receive_and_apply_clean_list(H5F_t * f, HDassert( aux_ptr != NULL ); HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); HDassert( aux_ptr->mpi_rank != 0 ); + HDassert( aux_ptr-> metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + + HDassert( num_entries_ptr != NULL ); + HDassert( *num_entries_ptr == 0 ); + + HDassert( haddr_buf_ptr_ptr != NULL ); + HDassert( *haddr_buf_ptr_ptr == NULL ); + /* First receive the number of entries in the list so that we * can set up a buffer to receive them. If there aren't * any, we are done. */ - mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm); - - if ( mpi_result != MPI_SUCCESS ) { - + if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result) - } - if ( num_entries > 0 ) - { - /* allocate a buffers to store the list of entry base addresses in */ + if(num_entries > 0) { + size_t buf_size; + int i; + /* allocate buffers to store the list of entry base addresses in */ buf_size = sizeof(MPI_Offset) * (size_t)num_entries; - MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size); + if(NULL == (MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for receive buffer") + if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)num_entries))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for haddr buffer") - if ( MPI_Offset_buf_ptr == NULL ) { + /* Now receive the list of candidate entries + * + * The peculiar structure of the following call to MPI_Bcast is + * due to MPI's (?) failure to believe in the MPI_Offset type. + * Thus the element type is MPI_BYTE, with size equal to the + * buf_size computed above. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ - "memory allocation failed for receive buffer") - } + /* translate the MPI_Offsets to haddr_t */ + i = 0; + while(i < num_entries) { + haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]); - haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * - (size_t)num_entries); + if(haddr_buf_ptr[i] == HADDR_UNDEF) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert MPI off to haddr") - if ( haddr_buf_ptr == NULL ) { + i++; + } /* end while */ + } /* end if */ - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ - "memory allocation failed for haddr buffer") - } + success = TRUE; +done: + if(MPI_Offset_buf_ptr != NULL) + MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr); - /* Now receive the list of cleaned entries - * - * The peculiar structure of the following call to MPI_Bcast is - * due to MPI's (?) failure to believe in the MPI_Offset type. - * Thus the element type is MPI_BYTE, with size equal to the - * buf_size computed above. + if(success) { + /* finally, pass the number of entries and the buffer pointer + * back to the caller. Do this so that we can use the same code + * to apply the candidate list to all the processes. */ + *num_entries_ptr = num_entries; + *haddr_buf_ptr_ptr = haddr_buf_ptr; + } /* end if */ + else { + if(haddr_buf_ptr != NULL) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + } /* end else */ - mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, - MPI_BYTE, 0, aux_ptr->mpi_comm); + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_receive_candidate_list() */ +#endif /* H5_HAVE_PARALLEL */ - if ( mpi_result != MPI_SUCCESS ) { + +/*------------------------------------------------------------------------- + * Function: H5AC_rsp__dist_md_write__flush + * + * Purpose: Routine for handling the details of running a sync point + * that is triggered by a flush -- which in turn must have been + * triggered by either a flush API call or a file close -- + * when the distributed metadata write strategy is selected. + * + * Upon entry, each process generates it own candidate list, + * being a sorted list of all dirty metadata entries currently + * in the metadata cache. Note that this list must be idendical + * across all processes, as all processes see the same stream + * of dirty metadata coming in, and use the same lists of + * candidate entries at each sync point. (At first glance, this + * argument sounds circular, but think of it in the sense of + * a recursive proof). + * + * If this this list is empty, we are done, and the function + * returns + * + * Otherwise, after the sorted list dirty metadata entries is + * constructed, each process uses the same algorithm to assign + * each entry on the candidate list to exactly one process for + * flushing. + * + * At this point, all processes participate in a barrier to + * avoid messages from the past/future bugs. + * + * Each process then flushes the entries assigned to it, and + * marks all other entries on the candidate list as clean. + * + * Finally, all processes participate in a second barrier to + * avoid messages from the past/future bugs. + * + * At the end of this process, process 0 and only process 0 + * must tidy up its lists of dirtied and cleaned entries. + * These lists are not used in the distributed metadata write + * strategy, but they must be maintained should we shift + * to a strategy that uses them. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_rsp__dist_md_write__flush(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr) +{ + int mpi_code; + int num_entries = 0; + haddr_t * haddr_buf_ptr = NULL; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) - } + FUNC_ENTER_NOAPI(H5AC_rsp__dist_md_write__flush, FAIL) + HDassert( f != NULL ); + HDassert( f->shared->cache == cache_ptr ); - /* translate the MPI_Offsets to haddr_t */ - i = 0; - while ( i < num_entries ) - { - haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]); + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - if ( haddr_buf_ptr[i] == HADDR_UNDEF ) { + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \ - "can't convert MPI off to haddr") - } + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); - i++; - } + /* first construct the candidate list -- initially, this will be in the + * form of a skip list. We will convert it later. + */ + if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + if(aux_ptr->candidate_slist_len > 0) { + herr_t result; - /* mark the indicated entries as clean */ - if ( H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id, - (int32_t)num_entries, &(haddr_buf_ptr[0])) < 0 ) { + /* convert the candidate list into the format we + * are used to receiving from process 0. + */ + if(H5AC_copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr, NULL, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Can't mark entries clean.") + /* initial sync point barrier */ + if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code) - } - } + /* apply the candidate list */ + aux_ptr->write_permitted = TRUE; -done: + result = H5C_apply_candidate_list(f, + dxpl_id, + dxpl_id, + cache_ptr, + num_entries, + haddr_buf_ptr, + aux_ptr->mpi_rank, + aux_ptr->mpi_size); - if ( MPI_Offset_buf_ptr != NULL ) { + aux_ptr->write_permitted = FALSE; - MPI_Offset_buf_ptr = - (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr); - } + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") - if ( haddr_buf_ptr != NULL ) { + /* this code exists primarily for the test bed -- it allows us to + * enforce posix semantics on the server that pretends to be a + * file system in our parallel tests. + */ + if(aux_ptr->write_done != NULL) + (aux_ptr->write_done)(); + /* final sync point barrier */ + if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code) + + /* if this is process zero, tidy up the dirtied, + * and flushed and still clean lists. + */ + if(aux_ptr->mpi_rank == 0) { + if(H5AC_tidy_cache_0_lists(cache_ptr, num_entries, haddr_buf_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.") + } /* end if */ + } /* end if */ + + /* if it is defined, call the sync point done callback. Note + * that this callback is defined purely for testing purposes, + * and should be undefined under normal operating circumstances. + */ + if(aux_ptr->sync_point_done != NULL) + (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr); + +done: + if(haddr_buf_ptr != NULL) haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); - } FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_rsp__dist_md_write__flush() */ +#endif /* H5_HAVE_PARALLEL */ -} /* H5AC_receive_and_apply_clean_list() */ + +/*------------------------------------------------------------------------- + * Function: H5AC_rsp__dist_md_write__flush_to_min_clean + * + * Purpose: Routine for handling the details of running a sync point + * triggered by the accumulation of dirty metadata (as + * opposed to a flush call to the API) when the distributed + * metadata write strategy is selected. + * + * After invocation and initial sanity checking this function + * first checks to see if evictions are enabled -- if they + * are not, the function does nothing and returns. + * + * Otherwise, process zero constructs a list of entries to + * be flushed in order to bring the process zero cache back + * within its min clean requirement. Note that this list + * (the candidate list) may be empty. + * + * Then, all processes participate in a barrier. + * + * After the barrier, process 0 broadcasts the number of + * entries in the candidate list prepared above, and all + * other processes receive this number. + * + * If this number is zero, we are done, and the function + * returns without further action. + * + * Otherwise, process 0 broadcasts the sorted list of + * candidate entries, and all other processes receive it. + * + * Then, each process uses the same algorithm to assign + * each entry on the candidate list to exactly one process + * for flushing. + * + * Each process then flushes the entries assigned to it, and + * marks all other entries on the candidate list as clean. + * + * Finally, all processes participate in a second barrier to + * avoid messages from the past/future bugs. + * + * At the end of this process, process 0 and only process 0 + * must tidy up its lists of dirtied and cleaned entries. + * These lists are not used in the distributed metadata write + * strategy, but they must be maintained should we shift + * to a strategy that uses them. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_rsp__dist_md_write__flush_to_min_clean(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr) +{ + hbool_t evictions_enabled; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_rsp__dist_md_write__flush_to_min_clean, FAIL) + + HDassert( f != NULL ); + HDassert( f->shared->cache == cache_ptr ); + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + + /* Query if evictions are allowed */ + if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") + + if(evictions_enabled) { + /* construct candidate list -- process 0 only */ + if(aux_ptr->mpi_rank == 0) { + if(H5AC_construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + } /* mpi rank == 0 */ + + /* propagate and apply candidate list -- all processes */ + if(H5AC_propagate_and_apply_candidate_list(f, dxpl_id, cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.") + } /* evictions enabled */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_rsp__dist_md_write__flush_to_min_clean() */ +#endif /* H5_HAVE_PARALLEL */ /*------------------------------------------------------------------------- - * Function: H5AC_flush_entries + * Function: H5AC_rsp__p0_only__flush * - * Purpose: Flush the metadata cache associated with the specified file, - * only writing from rank 0, but propagating the cleaned entries - * to all ranks. + * Purpose: Routine for handling the details of running a sync point + * that is triggered a flush -- which in turn must have been + * triggered by either a flush API call or a file close -- + * when the process 0 only metadata write strategy is selected. * - * Return: Non-negative on success/Negative on failure if there was a - * request to flush all items and something was protected. + * First, all processes participate in a barrier. * - * Programmer: Quincey Koziol - * koziol@hdfgroup.org - * Aug 22 2009 + * Then process zero flushes all dirty entries, and broadcasts + * they number of clean entries (if any) to all the other + * caches. + * + * If this number is zero, we are done. + * + * Otherwise, process 0 broadcasts the list of cleaned + * entries, and all other processes which are part of this + * file group receive it, and mark the listed entries as + * clean in their caches. + * + * Since all processes have the same set of dirty + * entries at the beginning of the sync point, and all + * entries that will be written are written before + * process zero broadcasts the number of cleaned entries, + * there is no need for a closing barrier. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 * *------------------------------------------------------------------------- */ +#ifdef H5_HAVE_PARALLEL herr_t -H5AC_flush_entries(H5F_t *f) +H5AC_rsp__p0_only__flush(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr) { - herr_t ret_value = SUCCEED; /* Return value */ + int mpi_code; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT(H5AC_flush_entries) + FUNC_ENTER_NOAPI(H5AC_rsp__p0_only__flush, FAIL) - HDassert(f); - HDassert(f->shared->cache); + HDassert( f != NULL ); + HDassert( f->shared->cache == cache_ptr ); - /* Check if we have >1 ranks */ - if(f->shared->cache->aux_ptr) { - H5AC_aux_t * aux_ptr = f->shared->cache->aux_ptr; - int mpi_code; + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - HDfprintf(stdout, - "%d::H5AC_flush: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n", - (int)(aux_ptr->mpi_rank), - (int)(aux_ptr->unprotect_dirty_bytes), - (int)(aux_ptr->unprotect_dirty_bytes_updates), - (int)(aux_ptr->insert_dirty_bytes), - (int)(aux_ptr->insert_dirty_bytes_updates), - (int)(aux_ptr->move_dirty_bytes), - (int)(aux_ptr->move_dirty_bytes_updates)); -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY ); + + + /* to prevent "messages from the future" we must + * synchronize all processes before we start the flush. + * Hence the following barrier. + */ + if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed 1", mpi_code) + + /* Flush data to disk, from rank 0 process */ + if(aux_ptr->mpi_rank == 0) { + herr_t result; + + aux_ptr->write_permitted = TRUE; + + result = H5C_flush_cache(f, dxpl_id, dxpl_id, H5AC__NO_FLAGS_SET); + + aux_ptr->write_permitted = FALSE; + + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") + + if(aux_ptr->write_done != NULL) + (aux_ptr->write_done)(); + } /* end if */ + + /* Propagate cleaned entries to other ranks. */ + if(H5AC_propagate_flushed_and_still_clean_entries_list(f, H5AC_noblock_dxpl_id, cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_rsp__p0_only__flush() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * Function: H5AC_rsp__p0_only__flush_to_min_clean + * + * Purpose: Routine for handling the details of running a sync point + * triggered by the accumulation of dirty metadata (as + * opposed to a flush call to the API) when the process 0 + * only metadata write strategy is selected. + * + * After invocation and initial sanity checking this function + * first checks to see if evictions are enabled -- if they + * are not, the function does nothing and returns. + * + * Otherwise, all processes participate in a barrier. + * + * After the barrier, if this is process 0, the function + * causes the cache to flush sufficient entries to get the + * cache back within its minimum clean fraction, and broadcast + * the number of entries which have been flushed since + * the last sync point, and are still clean. + * + * If this number is zero, we are done. + * + * Otherwise, process 0 broadcasts the list of cleaned + * entries, and all other processes which are part of this + * file group receive it, and mark the listed entries as + * clean in their caches. + * + * Since all processes have the same set of dirty + * entries at the beginning of the sync point, and all + * entries that will be written are written before + * process zero broadcasts the number of cleaned entries, + * there is no need for a closing barrier. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_rsp__p0_only__flush_to_min_clean(H5F_t *f, + hid_t dxpl_id, + H5AC_t * cache_ptr) +{ + hbool_t evictions_enabled; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_rsp__p0_only__flush_to_min_clean, FAIL) + + HDassert( f != NULL ); + HDassert( f->shared->cache == cache_ptr ); + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY ); + + /* Query if evictions are allowed */ + if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") + + /* Flush if evictions are allowed -- following call + * will cause process 0 to flush to min clean size, + * and then propagate the newly clean entries to the + * other processes. + * + * Otherwise, do nothing. + */ + if(evictions_enabled) { + int mpi_code; /* to prevent "messages from the future" we must synchronize all - * processes before we start the flush. Hence the following - * barrier. + * processes before we start the flush. This synchronization may + * already be done -- hence the do_barrier parameter. */ if(MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) - /* Flush data to disk, from rank 0 process */ - if(aux_ptr->mpi_rank == 0 ) { - herr_t status; + if(0 == aux_ptr->mpi_rank) { + herr_t result; - aux_ptr->write_permitted = TRUE; + /* here, process 0 flushes as many entries as necessary to + * comply with the currently specified min clean size. + * Note that it is quite possible that no entries will be + * flushed. + */ + aux_ptr->write_permitted = TRUE; - status = H5C_flush_cache(f, - H5AC_noblock_dxpl_id, - H5AC_noblock_dxpl_id, - H5AC__NO_FLAGS_SET); + result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id); - aux_ptr->write_permitted = FALSE; + aux_ptr->write_permitted = FALSE; - if(status < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.") + /* this call exists primarily for the test code -- it is used + * to enforce POSIX semantics on the process used to simulate + * reads and writes in t_cache.c. + */ if(aux_ptr->write_done != NULL) (aux_ptr->write_done)(); - } /* end if ( aux_ptr->mpi_rank == 0 ) */ + } /* end if */ - /* Propagate cleaned entries to other ranks */ - if(H5AC_propagate_flushed_and_still_clean_entries_list(f, - H5AC_noblock_dxpl_id, - f->shared->cache, - FALSE) < 0 ) + if(H5AC_propagate_flushed_and_still_clean_entries_list(f, dxpl_id, cache_ptr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") - } /* end if ( aux_ptr != NULL ) */ + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_rsp__p0_only__flush_to_min_clean() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * Function: H5AC_run_sync_point + * + * Purpose: Top level routine for managing a sync point between all + * meta data caches in the parallel case. Since all caches + * see the same sequence of dirty metadata, we simply count + * bytes of dirty metadata, and run a sync point whenever the + * number of dirty bytes of metadata seen since the last + * sync point exceeds a threshold that is common across all + * processes. We also run sync points in response to + * HDF5 API calls triggering either a flush or a file close. + * + * In earlier versions of PHDF5, only the metadata cache with + * mpi rank 0 was allowed to write to file. All other + * metadata caches on processes with rank greater than 0 were + * required to retain dirty entries until they were notified + * that the entry is was clean. + * + * This function was created to make it easier for us to + * experiment with other options, as it is a single point + * for the execution of sync points. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * March 11, 2010 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_run_sync_point(H5F_t *f, + hid_t dxpl_id, + int sync_point_op) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_run_sync_point, FAIL) + + HDassert( f != NULL ); + + cache_ptr = f->shared->cache; + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + HDassert( ( sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN ) || + ( sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ) ); + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + HDfprintf(stdout, + "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n", + (int)(aux_ptr->mpi_rank), + (int)(aux_ptr->dirty_bytes_propagations), + (int)(aux_ptr->unprotect_dirty_bytes), + (int)(aux_ptr->unprotect_dirty_bytes_updates), + (int)(aux_ptr->insert_dirty_bytes), + (int)(aux_ptr->insert_dirty_bytes_updates), + (int)(aux_ptr->rename_dirty_bytes), + (int)(aux_ptr->rename_dirty_bytes_updates)); +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + switch(aux_ptr->metadata_write_strategy) { + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + switch(sync_point_op) { + case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: + if(H5AC_rsp__p0_only__flush_to_min_clean(f, dxpl_id, cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__p0_only__flush_to_min_clean() failed.") + break; + + case H5AC_SYNC_POINT_OP__FLUSH_CACHE: + if(H5AC_rsp__p0_only__flush(f, dxpl_id, cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__p0_only__flush() failed.") + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op"); + break; + } /* end switch */ + break; + + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + switch(sync_point_op) { + case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: + if(H5AC_rsp__dist_md_write__flush_to_min_clean(f, dxpl_id, cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__dist_md_write__flush() failed.") + break; + + case H5AC_SYNC_POINT_OP__FLUSH_CACHE: + if(H5AC_rsp__dist_md_write__flush(f, dxpl_id, cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC_rsp__dist_md_write__flush() failed.") + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op"); + break; + } /* end switch */ + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Unknown metadata write strategy.") + break; + } /* end switch */ + + /* reset the dirty bytes count */ + aux_ptr->dirty_bytes = 0; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->dirty_bytes_propagations += 1; + aux_ptr->unprotect_dirty_bytes = 0; + aux_ptr->unprotect_dirty_bytes_updates = 0; + aux_ptr->insert_dirty_bytes = 0; + aux_ptr->insert_dirty_bytes_updates = 0; + aux_ptr->rename_dirty_bytes = 0; + aux_ptr->rename_dirty_bytes_updates = 0; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_run_sync_point() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * Function: H5AC_tidy_cache_0_lists() + * + * Purpose: In the distributed metadata write strategy, not all dirty + * entries are written by process 0 -- thus we must tidy + * up the dirtied, and flushed and still clean lists + * maintained by process zero after each sync point. + * + * This procedure exists to tend to this issue. + * + * At this point, all entries that process 0 cleared should + * have been removed from both the dirty and flushed and + * still clean lists, and entries that process 0 has flushed + * should have been removed from the dirtied list and added + * to the flushed and still clean list. + * + * However, since the distributed metadata write strategy + * doesn't make use of these lists, the objective is simply + * to maintain these lists in consistent state that allows + * them to be used should the metadata write strategy change + * to one that uses these lists. + * + * Thus for our purposes, all we need to do is remove from + * the dirtied and flushed and still clean lists all + * references to entries that appear in the candidate list. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * 4/20/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_tidy_cache_0_lists(H5AC_t * cache_ptr, + int num_candidates, + haddr_t * candidates_list_ptr) + +{ + int i; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5AC_tidy_cache_0_lists, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED ); + HDassert( aux_ptr->mpi_rank == 0 ); + HDassert( num_candidates > 0 ); + HDassert( candidates_list_ptr != NULL ); + + /* clean up dirtied and flushed and still clean lists by removing + * all entries on the candidate list. Cleared entries should + * have been removed from both the dirty and cleaned lists at + * this point, flushed entries should have been added to the + * cleaned list. However, for this metadata write strategy, + * we just want to remove all references to the candidate entries. + */ + for(i = 0; i < num_candidates; i++) { + H5AC_slist_entry_t * d_slist_entry_ptr; + H5AC_slist_entry_t * c_slist_entry_ptr; + haddr_t addr; + + addr = candidates_list_ptr[i]; + + /* addr must be either on the dirtied list, or on the flushed + * and still clean list. Remove it. + */ + d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->d_slist_ptr, (void *)&addr); + if(d_slist_entry_ptr != NULL) { + HDassert(d_slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert(d_slist_entry_ptr->addr == addr); + + if(H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) != d_slist_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from dirty entry slist.") + + d_slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, d_slist_entry_ptr); + + aux_ptr->d_slist_len -= 1; + + HDassert(aux_ptr->d_slist_len >= 0); + } /* end if */ + + c_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_search(aux_ptr->c_slist_ptr, (void *)&addr); + if(c_slist_entry_ptr != NULL) { + HDassert(c_slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert(c_slist_entry_ptr->addr == addr); + + if(H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) != c_slist_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, "Can't delete entry from clean entry slist.") + + c_slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, c_slist_entry_ptr); + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + } /* end if */ + } /* end for */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_tidy_cache_0_lists() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * Function: H5AC_flush_entries + * + * Purpose: Flush the metadata cache associated with the specified file, + * only writing from rank 0, but propagating the cleaned entries + * to all ranks. + * + * Return: Non-negative on success/Negative on failure if there was a + * request to flush all items and something was protected. + * + * Programmer: Quincey Koziol + * koziol@hdfgroup.org + * Aug 22 2009 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_flush_entries(H5F_t *f) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT(H5AC_flush_entries) + + HDassert(f); + HDassert(f->shared->cache); + + /* Check if we have >1 ranks */ + if(f->shared->cache->aux_ptr) { + if(H5AC_run_sync_point(f, H5AC_noblock_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.") + } /* end if */ done: FUNC_LEAVE_NOAPI(ret_value) @@ -4056,7 +5154,6 @@ herr_t H5AC_ignore_tags(H5F_t * f) { /* Variable Declarations */ - H5AC_t * cache_ptr = NULL; herr_t ret_value = SUCCEED; /* Function Enter Macro */ @@ -4067,18 +5164,12 @@ H5AC_ignore_tags(H5F_t * f) HDassert(f->shared); HDassert(f->shared->cache); - /* Get cache pointer */ - cache_ptr = f->shared->cache; - /* Set up a new metadata tag */ - if (H5C_ignore_tags(cache_ptr) < 0) + if(H5C_ignore_tags(f->shared->cache) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "H5C_ignore_tags() failed.") done: - - /* Function Leave Macro */ FUNC_LEAVE_NOAPI(ret_value) - } /* H5AC_ignore_tags() */ @@ -4102,27 +5193,24 @@ H5AC_tag(hid_t dxpl_id, haddr_t metadata_tag, haddr_t * prev_tag) herr_t ret_value = SUCCEED; /* Function Enter Macro */ - FUNC_ENTER_NOAPI_NOINIT(H5AC_tag) + FUNC_ENTER_NOAPI(H5AC_tag, FAIL) /* Check Arguments */ if(NULL == (dxpl = (H5P_genplist_t *)H5I_object_verify(dxpl_id, H5I_GENPROP_LST))) - HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a property list"); + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a property list") /* Get the current tag value and return that (if prev_tag is NOT null)*/ - if (prev_tag) { - if( (H5P_get(dxpl, "H5AC_metadata_tag", prev_tag)) < 0 ) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "unable to query dxpl"); - } + if(prev_tag) { + if((H5P_get(dxpl, "H5AC_metadata_tag", prev_tag)) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "unable to query dxpl") + } /* end if */ /* Set the provided tag value in the dxpl_id. */ if(H5P_set(dxpl, "H5AC_metadata_tag", &metadata_tag) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set property in dxpl") done: - - /* Function Leave Macro */ - FUNC_LEAVE_NOAPI(ret_value); - + FUNC_LEAVE_NOAPI(ret_value) } /* H5AC_tag */ @@ -4143,22 +5231,19 @@ done: herr_t H5AC_retag_copied_metadata(H5F_t * f, haddr_t metadata_tag) { - /* Variable Declarations */ herr_t ret_value = SUCCEED; /* Function Enter Macro */ - FUNC_ENTER_NOAPI_NOINIT(H5AC_retag_copied_metadata) + FUNC_ENTER_NOAPI(H5AC_retag_copied_metadata, FAIL) /* Assertions */ HDassert(f); HDassert(f->shared); /* Call cache-level function to retag entries */ - H5C_retag_copied_metadata(f->shared->cache, metadata_tag); + H5C_retag_copied_metadata(f->shared->cache, metadata_tag); done: - - /* Function Leave Macro */ - FUNC_LEAVE_NOAPI(ret_value); - + FUNC_LEAVE_NOAPI(ret_value) } /* H5AC_retag_copied_metadata */ + diff --git a/src/H5ACpkg.h b/src/H5ACpkg.h index d5346f5..3060a70 100644 --- a/src/H5ACpkg.h +++ b/src/H5ACpkg.h @@ -46,6 +46,17 @@ #define H5AC_DEBUG_DIRTY_BYTES_CREATION 0 +#ifdef H5_HAVE_PARALLEL + +/* the following #defined are used to specify the operation required + * at a sync point. + */ + +#define H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN 0 +#define H5AC_SYNC_POINT_OP__FLUSH_CACHE 1 + +#endif /* H5_HAVE_PARALLEL */ + /*------------------------------------------------------------------------- * It is a bit difficult to set ranges of allowable values on the * dirty_bytes_threshold field of H5AC_aux_t. The following are @@ -59,6 +70,9 @@ #define H5AC__MAX_DIRTY_BYTES_THRESHOLD (int32_t) \ (H5C__MAX_MAX_CACHE_SIZE / 4) +#define H5AC__DEFAULT_METADATA_WRITE_STRATEGY \ + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED + /**************************************************************************** * * structure H5AC_aux_t @@ -162,6 +176,12 @@ * broadcast. This field is reset to zero after each such * broadcast. * + * metadata_write_strategy: Integer code indicating how we will be + * writing the metadata. In the first incarnation of + * this code, all writes were done from process 0. This + * field exists to facilitate experiments with other + * strategies. + * * dirty_bytes_propagations: This field only exists when the * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. * @@ -211,6 +231,19 @@ * been created via move operations since the last time * the cleaned list was propagated. * + * Things have changed a bit since the following four fields were defined. + * If metadata_write_strategy is H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY, + * all comments hold as before -- with the caviate that pending further + * coding, the process 0 metadata cache is forbidden to flush entries outside + * of a sync point. + * + * However, for different metadata write strategies, these fields are used + * only to maintain the correct dirty byte count on process zero -- and in + * most if not all cases, this is redundant, as process zero will be barred + * from flushing entries outside of a sync point. + * + * JRM -- 3/16/10 + * * d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list * of entries that have been dirtied since the last time they * were listed in a clean entries broadcast. This list is @@ -259,6 +292,17 @@ * contain the value 0 on all processes other than process 0. * It exists primarily for sanity checking. * + * The following two fields are used only when metadata_write_strategy + * is H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED. + * + * candidate_slist_ptr: Pointer to an instance of H5SL_t used by process 0 + * to construct a list of entries to be flushed at this sync + * point. This list is then broadcast to the other processes, + * which then either flush or mark clean all entries on it. + * + * candidate_slist_len: Integer field containing the number of entries on the + * candidate list. It exists primarily for sanity checking. + * * write_done: In the parallel test bed, it is necessary to ensure that * all writes to the server process from cache 0 complete * before it enters the barrier call with the other caches. @@ -271,6 +315,19 @@ * This field must be set to NULL when the callback is not * needed. * + * Note: This field has been extended for use by all processes + * with the addition of support for the distributed + * metadata write strategy. + * JRM -- 5/9/10 + * + * sync_point_done: In the parallel test bed, it is necessary to verify + * that the expected writes, and only the expected writes, + * have taken place at the end of each sync point. + * + * The sync_point_done callback allows t_cache to perform + * this verification. The field is set to NULL when the + * callback is not needed. + * ****************************************************************************/ #ifdef H5_HAVE_PARALLEL @@ -293,6 +350,8 @@ typedef struct H5AC_aux_t int32_t dirty_bytes; + int32_t metadata_write_strategy; + #if H5AC_DEBUG_DIRTY_BYTES_CREATION int32_t dirty_bytes_propagations; @@ -316,8 +375,15 @@ typedef struct H5AC_aux_t int32_t c_slist_len; + H5SL_t * candidate_slist_ptr; + + int32_t candidate_slist_len; + void (* write_done)(void); + void (* sync_point_done)(int num_writes, + haddr_t * written_entries_tbl); + } H5AC_aux_t; /* struct H5AC_aux_t */ #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5ACprivate.h b/src/H5ACprivate.h index ef19480..7fa5cf4 100644 --- a/src/H5ACprivate.h +++ b/src/H5ACprivate.h @@ -222,6 +222,9 @@ H5_DLLVAR hid_t H5AC_ind_dxpl_id; /* Default cache configuration. */ +#define H5AC__DEFAULT_METADATA_WRITE_STRATEGY \ + H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED + #ifdef H5_HAVE_PARALLEL #define H5AC__DEFAULT_CACHE_CONFIG \ { \ @@ -254,7 +257,9 @@ H5_DLLVAR hid_t H5AC_ind_dxpl_id; /* int epochs_before_eviction = */ 3, \ /* hbool_t apply_empty_reserve = */ TRUE, \ /* double empty_reserve = */ 0.1, \ - /* int dirty_bytes_threshold = */ (256 * 1024) \ + /* int dirty_bytes_threshold = */ (256 * 1024), \ + /* int metadata_write_strategy = */ \ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY \ } #else /* H5_HAVE_PARALLEL */ #define H5AC__DEFAULT_CACHE_CONFIG \ @@ -288,7 +293,9 @@ H5_DLLVAR hid_t H5AC_ind_dxpl_id; /* int epochs_before_eviction = */ 3, \ /* hbool_t apply_empty_reserve = */ TRUE, \ /* double empty_reserve = */ 0.1, \ - /* int dirty_bytes_threshold = */ (256 * 1024) \ + /* int dirty_bytes_threshold = */ (256 * 1024), \ + /* int metadata_write_strategy = */ \ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY \ } #endif /* H5_HAVE_PARALLEL */ @@ -358,6 +365,9 @@ H5_DLL herr_t H5AC_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, unsigned flags); +H5_DLL herr_t H5AC_set_sync_point_done_callback(H5C_t *cache_ptr, + void (*sync_point_done)(int num_writes, haddr_t *written_entries_tbl)); + H5_DLL herr_t H5AC_set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void)); H5_DLL herr_t H5AC_stats(const H5F_t *f); @@ -392,5 +402,9 @@ H5_DLL herr_t H5AC_retag_copied_metadata(H5F_t * f, haddr_t metadata_tag); H5_DLL herr_t H5AC_ignore_tags(H5F_t * f); +#ifdef H5_HAVE_PARALLEL +H5_DLL herr_t H5AC_add_candidate(H5AC_t * cache_ptr, haddr_t addr); +#endif /* H5_HAVE_PARALLEL */ + #endif /* !_H5ACprivate_H */ diff --git a/src/H5ACpublic.h b/src/H5ACpublic.h index 02941b6..639179c 100644 --- a/src/H5ACpublic.h +++ b/src/H5ACpublic.h @@ -354,21 +354,22 @@ extern "C" { * Parallel Configuration Fields: * * In PHDF5, all operations that modify metadata must be executed collectively. + * * We used to think that this was enough to ensure consistency across the * metadata caches, but since we allow processes to read metadata individually, * the order of dirty entries in the LRU list can vary across processes, * which can result in inconsistencies between the caches. * - * To prevent this, only the metadata cache on process 0 is allowed to write - * to file, and then only after synchronizing with the other caches. After - * it writes entries to file, it sends the base addresses of the now clean - * entries to the other caches, so they can mark these entries clean as well. + * PHDF5 uses several strategies to prevent such inconsistencies in metadata, + * all of which use the fact that the same stream of dirty metadata is seen + * by all processes for purposes of synchronization. This is done by + * having each process count the number of bytes of dirty metadata generated, + * and then running a "sync point" whenever this count exceeds a user + * specified threshold (see dirty_bytes_threshold below). * - * The different caches know when to synchronize caches by counting the - * number of bytes of dirty metadata created by the collective operations - * modifying metadata. Whenever this count exceeds a user specified - * threshold (see below), process 0 flushes down to its minimum clean size, - * and then sends the list of newly cleaned entries to the other caches. + * The current metadata write strategy is indicated by the + * metadata_write_strategy field. The possible values of this field, along + * with the associated metadata write strategies are discussed below. * * dirty_bytes_threshold: Threshold of dirty byte creation used to * synchronize updates between caches. (See above for outline and @@ -378,11 +379,67 @@ extern "C" { * file. This field is ignored unless HDF5 has been compiled for * parallel. * + * metadata_write_strategy: Integer field containing a code indicating the + * desired metadata write strategy. The valid values of this field + * are enumerated and discussed below: + * + * + * H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + * + * When metadata_write_strategy is set to this value, only process + * zero is allowed to write dirty metadata to disk. All other + * processes must retain dirty metadata until they are informed at + * a sync point that the dirty metadata in question has been written + * to disk. + * + * When the sync point is reached (or when there is a user generated + * flush), process zero flushes sufficient entries to bring it into + * complience with its min clean size (or flushes all dirty entries in + * the case of a user generated flush), broad casts the list of + * entries just cleaned to all the other processes, and then exits + * the sync point. + * + * Upon receipt of the broadcast, the other processes mark the indicated + * entries as clean, and leave the sync point as well. + * + * + * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + * + * In the distributed metadata write strategy, process zero still makes + * the decisions as to what entries should be flushed, but the actual + * flushes are distributed across the processes in the computation to + * the extent possible. + * + * In this strategy, when a sync point is triggered (either by dirty + * metadata creation or manual flush), all processes enter a barrier. + * + * On the other side of the barrier, process 0 constructs an ordered + * list of the entries to be flushed, and then broadcasts this list + * to the caches in all the processes. + * + * All processes then scan the list of entries to be flushed, flushing + * some, and marking the rest as clean. The algorithm for this purpose + * ensures that each entry in the list is flushed exactly once, and + * all are marked clean in each cache. + * + * Note that in the case of a flush of the cache, no message passing + * is necessary, as all processes have the same list of dirty entries, + * and all of these entries must be flushed. Thus in this case it is + * sufficient for each process to sort its list of dirty entries after + * leaving the initial barrier, and use this list as if it had been + * received from process zero. + * + * To avoid possible messages from the past/future, all caches must + * wait until all caches are done before leaving the sync point. + * ****************************************************************************/ #define H5AC__CURR_CACHE_CONFIG_VERSION 1 #define H5AC__MAX_TRACE_FILE_NAME_LEN 1024 +#define H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY 0 +#define H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED 1 + typedef struct H5AC_cache_config_t { /* general configuration fields: */ @@ -440,6 +497,7 @@ typedef struct H5AC_cache_config_t /* parallel configuration fields: */ int dirty_bytes_threshold; + int metadata_write_strategy; } H5AC_cache_config_t; @@ -335,6 +335,624 @@ done: /*------------------------------------------------------------------------- + * Function: H5C_apply_candidate_list + * + * Purpose: Apply the supplied candidate list. + * + * We used to do this by simply having each process write + * every mpi_size-th entry in the candidate list, starting + * at index mpi_rank, and mark all the others clean. + * + * However, this can cause unnecessary contention in a file + * system by increasing the number of processes writing to + * adjacent locations in the HDF5 file. + * + * To attempt to minimize this, we now arange matters such + * that each process writes n adjacent entries in the + * candidate list, and marks all others clean. We must do + * this in such a fashion as to guarantee that each entry + * on the candidate list is written by exactly one process, + * and marked clean by all others. + * + * To do this, first construct a table mapping mpi_rank + * to the index of the first entry in the candidate list to + * be written by the process of that mpi_rank, and then use + * the table to control which entries are written and which + * are marked as clean as a function of the mpi_rank. + * + * Note that the table must be identical on all processes, as + * all see the same candidate list, mpi_size, and mpi_rank -- + * the inputs used to construct the table. + * + * We construct the table as follows. Let: + * + * n = num_candidates / mpi_size; + * + * m = num_candidates % mpi_size; + * + * Now allocate an array of integers of length mpi_size + 1, + * and call this array candidate_assignment_table. + * + * Conceptually, if the number of candidates is a multiple + * of the mpi_size, we simply pass through the candidate list + * and assign n entries to each process to flush, with the + * index of the first entry to flush in the location in + * the candidate_assignment_table indicated by the mpi_rank + * of the process. + * + * In the more common case in which the candidate list isn't + * isn't a multiple of the mpi_size, we pretend it is, and + * give num_candidates % mpi_size processes one extra entry + * each to make things work out. + * + * Once the table is constructed, we determine the first and + * last entry this process is to flush as follows: + * + * first_entry_to_flush = candidate_assignment_table[mpi_rank] + * + * last_entry_to_flush = + * candidate_assignment_table[mpi_rank + 1] - 1; + * + * With these values determined, we simply scan through the + * candidate list, marking all entries in the range + * [first_entry_to_flush, last_entry_to_flush] for flush, + * and all others to be cleaned. + * + * Finally, we scan the LRU from tail to head, flushing + * or marking clean the candidate entries as indicated. + * If necessary, we scan the pinned list as well. + * + * Note that this function will fail if any protected or + * clean entries appear on the candidate list. + * + * This function is used in managing sync points, and + * shouldn't be used elsewhere. + * + * Return: Success: SUCCEED + * + * Failure: FAIL + * + * Programmer: John Mainzer + * 3/17/10 + * + * Modifications: + * + * Heavily reworked to have each process flush a group of + * adjacent entries. + * JRM -- 4/15/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0 +herr_t +H5C_apply_candidate_list(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr, + int num_candidates, + haddr_t * candidates_list_ptr, + int mpi_rank, + int mpi_size) +{ + hbool_t first_flush = FALSE; + int i; + int m; + int n; + int first_entry_to_flush; + int last_entry_to_flush; + int entries_to_clear = 0; + int entries_to_flush = 0; + int entries_cleared = 0; + int entries_flushed = 0; + int entries_examined = 0; + int initial_list_len; + int * candidate_assignment_table = NULL; + haddr_t addr; + H5C_cache_entry_t * clear_ptr = NULL; + H5C_cache_entry_t * entry_ptr = NULL; + H5C_cache_entry_t * flush_ptr = NULL; +#if H5C_DO_SANITY_CHECKS + haddr_t last_addr; +#endif /* H5C_DO_SANITY_CHECKS */ +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + char tbl_buf[1024]; +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5C_apply_candidate_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( num_candidates > 0 ); + HDassert( num_candidates <= cache_ptr->slist_len ); + HDassert( candidates_list_ptr != NULL ); + HDassert( 0 <= mpi_rank ); + HDassert( mpi_rank < mpi_size ); + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n", + FUNC, mpi_rank); + for ( i = 0; i < 1024; i++ ) tbl_buf[i] = '\0'; + sprintf(&(tbl_buf[0]), "candidate list = "); + for ( i = 0; i < num_candidates; i++ ) + { + sprintf(&(tbl_buf[strlen(tbl_buf)]), " 0x%llx", + (long long)(*(candidates_list_ptr + i))); + } + sprintf(&(tbl_buf[strlen(tbl_buf)]), "\n"); + HDfprintf(stdout, "%s", tbl_buf); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + n = num_candidates / mpi_size; + m = num_candidates % mpi_size; + HDassert(n >= 0); + + if(NULL == (candidate_assignment_table = (int *)H5MM_malloc(sizeof(int) * (size_t)(mpi_size + 1)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for candidate assignment table") + + candidate_assignment_table[0] = 0; + candidate_assignment_table[mpi_size] = num_candidates; + + if(m == 0) { /* mpi_size is an even divisor of num_candidates */ + HDassert(n > 0); + for(i = 1; i < mpi_size; i++) + candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; + } /* end if */ + else { + for(i = 1; i <= m; i++) + candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1; + + if(num_candidates < mpi_size) { + for(i = m + 1; i < mpi_size; i++) + candidate_assignment_table[i] = num_candidates; + } /* end if */ + else { + for(i = m + 1; i < mpi_size; i++) + candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; + } /* end else */ + } /* end else */ + HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates); + +#if H5C_DO_SANITY_CHECKS + /* verify that the candidate assignment table has the expected form */ + for ( i = 1; i < mpi_size - 1; i++ ) + { + int a, b; + + a = candidate_assignment_table[i] - candidate_assignment_table[i - 1]; + b = candidate_assignment_table[i + 1] - candidate_assignment_table[i]; + + HDassert( n + 1 >= a ); + HDassert( a >= b ); + HDassert( b >= n ); + } +#endif /* H5C_DO_SANITY_CHECKS */ + + first_entry_to_flush = candidate_assignment_table[mpi_rank]; + last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1; + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + for ( i = 0; i < 1024; i++ ) + tbl_buf[i] = '\0'; + sprintf(&(tbl_buf[0]), "candidate assignment table = "); + for(i = 0; i <= mpi_size; i++) + sprintf(&(tbl_buf[strlen(tbl_buf)]), " %d", candidate_assignment_table[i]); + sprintf(&(tbl_buf[strlen(tbl_buf)]), "\n"); + HDfprintf(stdout, "%s", tbl_buf); + + HDfprintf(stdout, "%s:%d: flush entries [%d, %d].\n", + FUNC, mpi_rank, first_entry_to_flush, last_entry_to_flush); + + HDfprintf(stdout, "%s:%d: marking entries.\n", FUNC, mpi_rank); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + for(i = 0; i < num_candidates; i++) { + addr = candidates_list_ptr[i]; + HDassert( H5F_addr_defined(addr) ); + +#if H5C_DO_SANITY_CHECKS + if ( i > 0 ) { + if ( last_addr == addr ) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list.\n") + } else if ( last_addr > addr ) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted.\n") + } + } + + last_addr = addr; +#endif /* H5C_DO_SANITY_CHECKS */ + + H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + if(entry_ptr == NULL) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed candidate entry not in cache?!?!?.") + } else if(!entry_ptr->is_dirty) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?.") + } else if ( entry_ptr->is_protected ) { + /* For now at least, we can't deal with protected entries. + * If we encounter one, scream and die. If it becomes an + * issue, we should be able to work around this. + */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?.") + } else { + /* determine whether the entry is to be cleared or flushed, + * and mark it accordingly. We will scan the protected and + * pinned list shortly, and clear or flush according to these + * markings. + */ + if((i >= first_entry_to_flush) && (i <= last_entry_to_flush)) { + entries_to_flush++; + entry_ptr->flush_immediately = TRUE; + } /* end if */ + else { + entries_to_clear++; + entry_ptr->clear_on_unprotect = TRUE; + } /* end else */ + } /* end else */ + } /* end for */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %d/%d/%d.\n", + FUNC, mpi_rank, (int)num_candidates, (int)entries_to_clear, + (int)entries_to_flush); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + + /* We have now marked all the entries on the candidate list for + * either flush or clear -- now scan the LRU and the pinned list + * for these entries and do the deed. + * + * Note that we are doing things in this round about manner so as + * to preserve the order of the LRU list to the best of our ability. + * If we don't do this, my experiments indicate that we will have a + * noticably poorer hit ratio as a result. + */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: scanning LRU list. len = %d.\n", FUNC, mpi_rank, + (int)(cache_ptr->LRU_list_len)); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + entries_examined = 0; + initial_list_len = cache_ptr->LRU_list_len; + entry_ptr = cache_ptr->LRU_tail_ptr; + + while((entry_ptr != NULL) && (entries_examined <= initial_list_len) && + ((entries_cleared + entries_flushed) < num_candidates)) { + if(entry_ptr->clear_on_unprotect) { + entry_ptr->clear_on_unprotect = FALSE; + clear_ptr = entry_ptr; + entry_ptr = entry_ptr->prev; + entries_cleared++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, + (long long)clear_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + if(H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + clear_ptr->type, + clear_ptr->addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + &first_flush, + TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } else if(entry_ptr->flush_immediately) { + entry_ptr->flush_immediately = FALSE; + flush_ptr = entry_ptr; + entry_ptr = entry_ptr->prev; + entries_flushed++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, + (long long)flush_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + if(H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + flush_ptr->type, + flush_ptr->addr, + H5C__NO_FLAGS_SET, + &first_flush, + TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } else { + entry_ptr = entry_ptr->prev; + } + + entries_examined++; + } /* end while */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: entries examined/cleared/flushed = %d/%d/%d.\n", + FUNC, mpi_rank, entries_examined, + entries_cleared, entries_flushed); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + /* It is also possible that some of the cleared entries are on the + * pinned list. Must scan that also. + */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: scanning pinned entry list. len = %d\n", + FUNC, mpi_rank, (int)(cache_ptr->pel_len)); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + entry_ptr = cache_ptr->pel_head_ptr; + while((entry_ptr != NULL) && + ((entries_cleared + entries_flushed) < num_candidates)) { + if(entry_ptr->clear_on_unprotect) { + entry_ptr->clear_on_unprotect = FALSE; + clear_ptr = entry_ptr; + entry_ptr = entry_ptr->next; + entries_cleared++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, + (long long)clear_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + if(H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + clear_ptr->type, + clear_ptr->addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + &first_flush, + TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } else if(entry_ptr->flush_immediately) { + entry_ptr->flush_immediately = FALSE; + flush_ptr = entry_ptr; + entry_ptr = entry_ptr->next; + entries_flushed++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, + (long long)flush_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + if(H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + flush_ptr->type, + flush_ptr->addr, + H5C__NO_FLAGS_SET, + &first_flush, + TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } else { + entry_ptr = entry_ptr->next; + } + } /* end while */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, + "%s:%d: pel entries examined/cleared/flushed = %d/%d/%d.\n", + FUNC, mpi_rank, entries_examined, + entries_cleared, entries_flushed); + HDfprintf(stdout, "%s:%d: done.\n", FUNC, mpi_rank); + + fsync(stdout); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + if((entries_flushed != entries_to_flush) || (entries_cleared != entries_to_clear)) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry count mismatch.") + +done: + if(candidate_assignment_table != NULL) + candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C_apply_candidate_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * Function: H5C_construct_candidate_list__clean_cache + * + * Purpose: Construct the list of entries that should be flushed to + * clean all entries in the cache. + * + * This function is used in managing sync points, and + * shouldn't be used elsewhere. + * + * Return: Success: SUCCEED + * + * Failure: FAIL + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr) +{ + size_t space_needed; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5C_construct_candidate_list__clean_cache, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + /* As a sanity check, set space needed to the size of the skip list. + * This should be the sum total of the sizes of all the dirty entries + * in the metadata cache. + */ + space_needed = cache_ptr->slist_size; + + /* Recall that while we shouldn't have any protected entries at this + * point, it is possible that some dirty entries may reside on the + * pinned list at this point. + */ + HDassert( cache_ptr->slist_size <= + (cache_ptr->dLRU_list_size + cache_ptr->pel_size) ); + HDassert( cache_ptr->slist_len <= + (cache_ptr->dLRU_list_len + cache_ptr->pel_len) ); + + if(space_needed > 0) { /* we have work to do */ + H5C_cache_entry_t *entry_ptr; + int nominated_entries_count = 0; + size_t nominated_entries_size = 0; + haddr_t nominated_addr; + + HDassert( cache_ptr->slist_len > 0 ); + + /* Scan the dirty LRU list from tail forward and nominate sufficient + * entries to free up the necessary space. + */ + entry_ptr = cache_ptr->dLRU_tail_ptr; + while((nominated_entries_size < space_needed) && + (nominated_entries_count < cache_ptr->slist_len) && + (entry_ptr != NULL)) { + HDassert( ! (entry_ptr->is_protected) ); + HDassert( ! (entry_ptr->is_read_only) ); + HDassert( entry_ptr->ro_ref_count == 0 ); + HDassert( entry_ptr->is_dirty ); + HDassert( entry_ptr->in_slist ); + + nominated_addr = entry_ptr->addr; + if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(1).") + + nominated_entries_size += entry_ptr->size; + nominated_entries_count++; + entry_ptr = entry_ptr->aux_prev; + } /* end while */ + HDassert( entry_ptr == NULL ); + + /* it is possible that there are some dirty entries on the + * protected entry list as well -- scan it too if necessary + */ + entry_ptr = cache_ptr->pel_head_ptr; + while((nominated_entries_size < space_needed) && + (nominated_entries_count < cache_ptr->slist_len) && + (entry_ptr != NULL)) { + if(entry_ptr->is_dirty) { + HDassert( ! (entry_ptr->is_protected) ); + HDassert( ! (entry_ptr->is_read_only) ); + HDassert( entry_ptr->ro_ref_count == 0 ); + HDassert( entry_ptr->is_dirty ); + HDassert( entry_ptr->in_slist ); + + nominated_addr = entry_ptr->addr; + if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(2).") + + nominated_entries_size += entry_ptr->size; + nominated_entries_count++; + } /* end if */ + + entry_ptr = entry_ptr->next; + } /* end while */ + + HDassert( nominated_entries_count == cache_ptr->slist_len ); + HDassert( nominated_entries_size == space_needed ); + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C_construct_candidate_list__clean_cache() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * Function: H5C_construct_candidate_list__min_clean + * + * Purpose: Construct the list of entries that should be flushed to + * get the cache back within its min clean constraints. + * + * This function is used in managing sync points, and + * shouldn't be used elsewhere. + * + * Return: Success: SUCCEED + * + * Failure: FAIL + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +herr_t +H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr) +{ + size_t space_needed = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5C_construct_candidate_list__min_clean, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + /* compute the number of bytes (if any) that must be flushed to get the + * cache back within its min clean constraints. + */ + if(cache_ptr->max_cache_size > cache_ptr->index_size) { + if(((cache_ptr->max_cache_size - cache_ptr->index_size) + + cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size) + space_needed = 0; + else + space_needed = cache_ptr->min_clean_size - + ((cache_ptr->max_cache_size - cache_ptr->index_size) + + cache_ptr->cLRU_list_size); + } /* end if */ + else { + if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) + space_needed = 0; + else + space_needed = cache_ptr->min_clean_size - + cache_ptr->cLRU_list_size; + } /* end else */ + + if(space_needed > 0) { /* we have work to do */ + H5C_cache_entry_t *entry_ptr; + int nominated_entries_count = 0; + size_t nominated_entries_size = 0; + + HDassert( cache_ptr->slist_len > 0 ); + + /* Scan the dirty LRU list from tail forward and nominate sufficient + * entries to free up the necessary space. + */ + entry_ptr = cache_ptr->dLRU_tail_ptr; + while((nominated_entries_size < space_needed) && + (nominated_entries_count < cache_ptr->slist_len) && + (entry_ptr != NULL)) { + haddr_t nominated_addr; + + HDassert( ! (entry_ptr->is_protected) ); + HDassert( ! (entry_ptr->is_read_only) ); + HDassert( entry_ptr->ro_ref_count == 0 ); + HDassert( entry_ptr->is_dirty ); + HDassert( entry_ptr->in_slist ); + + nominated_addr = entry_ptr->addr; + if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed.") + + nominated_entries_size += entry_ptr->size; + nominated_entries_count++; + entry_ptr = entry_ptr->aux_prev; + } /* end while */ + HDassert( nominated_entries_count <= cache_ptr->slist_len ); + HDassert( nominated_entries_size >= space_needed ); + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C_construct_candidate_list__min_clean() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- * Function: H5C_create * * Purpose: Allocate, initialize, and return the address of a new @@ -356,10 +974,6 @@ done: * Programmer: John Mainzer * 6/2/04 * - * JRM -- 11/5/08 - * Added initialization for the new clean_index_size and - * dirty_index_size fields of H5C_t. - * *------------------------------------------------------------------------- */ H5C_t * @@ -1502,9 +2116,7 @@ H5C_flush_to_min_clean(H5F_t * f, #endif /* end modified code -- commented out for now */ done: - FUNC_LEAVE_NOAPI(ret_value) - } /* H5C_flush_to_min_clean() */ @@ -1903,33 +2515,6 @@ H5C_get_trace_file_ptr_from_entry(const H5C_cache_entry_t *entry_ptr, * Programmer: John Mainzer * 6/2/04 * - * QAK -- 1/31/08 - * Added initialization for the new free_file_space_on_destroy - * field. - * - * JRM -- 11/13/08 - * Moved test to see if we already have an entry with the - * specified address in the cache. This was necessary as - * we used to modify some fields in the entry to be inserted - * priort to this test, which got the cache confused if the - * insertion failed because the entry was already present. - * - * Also revised the function to call H5C_make_space_in_cache() - * if the min_clean_size is not met at present, not just if - * there is insufficient space in the cache for the new - * entry. - * - * The purpose of this modification is to avoid "metadata - * blizzards" in the write only case. In such instances, - * the cache was allowed to fill with dirty metadata. When - * we finally needed to evict an entry to make space, we had - * to flush out a whole cache full of metadata -- which has - * interesting performance effects. We hope to avoid (or - * perhaps more accurately hide) this effect by maintaining - * the min_clean_size, which should force us to start flushing - * entries long before we actually have to evict something - * to make space. - * *------------------------------------------------------------------------- */ herr_t @@ -2042,6 +2627,7 @@ H5C_insert_entry(H5F_t * f, #ifdef H5_HAVE_PARALLEL entry_ptr->clear_on_unprotect = FALSE; + entry_ptr->flush_immediately = FALSE; #endif /* H5_HAVE_PARALLEL */ entry_ptr->flush_in_progress = FALSE; @@ -3813,29 +4399,17 @@ done: *------------------------------------------------------------------------- */ herr_t -H5C_set_prefix(H5C_t * cache_ptr, - char * prefix) +H5C_set_prefix(H5C_t * cache_ptr, char * prefix) { - herr_t ret_value = SUCCEED; /* Return value */ + FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5C_set_prefix) - FUNC_ENTER_NOAPI(H5C_set_prefix, FAIL) - - /* This would normally be an assert, but we need to use an HGOTO_ERROR - * call to shut up the compiler. - */ - if ( ( ! cache_ptr ) || ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad cache_ptr") - } - - HDassert( prefix ); - HDassert( HDstrlen(prefix) < H5C__PREFIX_LEN ) ; + HDassert((cache_ptr) && (cache_ptr->magic == H5C__H5C_T_MAGIC)); + HDassert(prefix); + HDassert(HDstrlen(prefix) < H5C__PREFIX_LEN); HDstrcpy(&(cache_ptr->prefix[0]), prefix); -done: - FUNC_LEAVE_NOAPI(ret_value) - + FUNC_LEAVE_NOAPI(SUCCEED) } /* H5C_set_prefix() */ @@ -7441,17 +8015,6 @@ H5C_flush_single_entry(H5F_t * f, } } } -#if 0 - /* this should be useful for debugging from time to time. - * lets leave it in for now. -- JRM 12/15/04 - */ - else { - HDfprintf(stdout, - "H5C_flush_single_entry(): non-existant entry. addr = %a\n", - addr); - HDfflush(stdout); - } -#endif #endif /* H5C_DO_SANITY_CHECKS */ if ( ( entry_ptr != NULL ) && ( entry_ptr->is_protected ) ) @@ -7572,133 +8135,6 @@ H5C_flush_single_entry(H5F_t * f, */ if ( destroy ) { /* AKA eviction */ -#if 0 /* JRM */ - /* This test code may come in handy -- lets keep it for a while. - * - * Note that it will cause spurious errors in the serial case - * unless we are maintaining the clean and dirty LRU lists. - */ - { - if ( entry_ptr->is_dirty ) - { - if ( cache_ptr->dLRU_head_ptr == NULL ) - HDfprintf(stdout, - "%s: cache_ptr->dLRU_head_ptr == NULL.\n", - FUNC); - - if ( cache_ptr->dLRU_tail_ptr == NULL ) - HDfprintf(stdout, - "%s: cache_ptr->dLRU_tail_ptr == NULL.\n", - FUNC); - - if ( cache_ptr->dLRU_list_len <= 0 ) - HDfprintf(stdout, - "%s: cache_ptr->dLRU_list_len <= 0.\n", - FUNC); - - if ( cache_ptr->dLRU_list_size <= 0 ) - HDfprintf(stdout, - "%s: cache_ptr->dLRU_list_size <= 0.\n", - FUNC); - - if ( cache_ptr->dLRU_list_size < entry_ptr->size ) - HDfprintf(stdout, - "%s: cache_ptr->dLRU_list_size < entry_ptr->size.\n", - FUNC); - - if ( ( (cache_ptr->dLRU_list_size) == entry_ptr->size ) && - ( ! ( (cache_ptr->dLRU_list_len) == 1 ) ) ) - HDfprintf(stdout, - "%s: dLRU_list_size == size && dLRU_list_len != 1\n", - FUNC); - - if ( ( entry_ptr->aux_prev == NULL ) && - ( cache_ptr->dLRU_head_ptr != entry_ptr ) ) - HDfprintf(stdout, - "%s: entry_ptr->aux_prev == NULL && dLRU_head_ptr != entry_ptr\n", - FUNC); - - if ( ( entry_ptr->aux_next == NULL ) && - ( cache_ptr->dLRU_tail_ptr != entry_ptr ) ) - HDfprintf(stdout, - "%s: entry_ptr->aux_next == NULL && dLRU_tail_ptr != entry_ptr\n", - FUNC); - - if ( ( cache_ptr->dLRU_list_len == 1 ) && - ( ! ( ( cache_ptr->dLRU_head_ptr == entry_ptr ) && - ( cache_ptr->dLRU_tail_ptr == entry_ptr ) && - ( entry_ptr->aux_next == NULL ) && - ( entry_ptr->aux_prev == NULL ) && - ( cache_ptr->dLRU_list_size == entry_ptr->size ) - ) - ) - ) - { - HDfprintf(stdout, - "%s: single entry dlru sanity check fails\n", - FUNC); - } - - } - else - { - if ( cache_ptr->cLRU_head_ptr == NULL ) - HDfprintf(stdout, - "%s: cache_ptr->cLRU_head_ptr == NULL.\n", - FUNC); - - if ( cache_ptr->cLRU_tail_ptr == NULL ) - HDfprintf(stdout, - "%s: cache_ptr->cLRU_tail_ptr == NULL.\n", - FUNC); - - if ( cache_ptr->cLRU_list_len <= 0 ) - HDfprintf(stdout, - "%s: cache_ptr->cLRU_list_len <= 0.\n", - FUNC); - - if ( cache_ptr->cLRU_list_size <= 0 ) - HDfprintf(stdout, - "%s: cache_ptr->cLRU_list_size <= 0.\n", - FUNC); - - if ( cache_ptr->cLRU_list_size < entry_ptr->size ) - HDfprintf(stdout, - "%s: cache_ptr->cLRU_list_size < entry_ptr->size.\n", - FUNC); - - if ( ( (cache_ptr->cLRU_list_size) == entry_ptr->size ) && - ( ! ( (cache_ptr->cLRU_list_len) == 1 ) ) ) - HDfprintf(stdout, - "%s: cLRU_list_size == size && cLRU_list_len != 1\n", - FUNC); - - if ( ( entry_ptr->aux_prev == NULL ) && - ( cache_ptr->cLRU_head_ptr != entry_ptr ) ) - HDfprintf(stdout, "%s: entry_ptr->aux_prev == NULL && cLRU_head_ptr != entry_ptr\n", FUNC); - - if ( ( entry_ptr->aux_next == NULL ) && - ( cache_ptr->cLRU_tail_ptr != entry_ptr ) ) - HDfprintf(stdout, "%s: entry_ptr->aux_next == NULL && cLRU_tail_ptr != entry_ptr\n", FUNC); - - if ( ( cache_ptr->cLRU_list_len == 1 ) && - ( ! ( ( cache_ptr->cLRU_head_ptr == entry_ptr ) && - ( cache_ptr->cLRU_tail_ptr == entry_ptr ) && - ( entry_ptr->aux_next == NULL ) && - ( entry_ptr->aux_prev == NULL ) && - ( cache_ptr->cLRU_list_size == entry_ptr->size ) - ) - ) - ) - { - HDfprintf(stdout, - "%s: single entry clru sanity check fails\n", - FUNC); - } - } - } -#endif /* JRM */ - H5C__UPDATE_RP_FOR_EVICTION(cache_ptr, entry_ptr, FAIL) } else { @@ -7864,10 +8300,10 @@ H5C_flush_single_entry(H5F_t * f, * H5C__UPDATE_INDEX_FOR_ENTRY_CLEAN()). */ H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), \ - (entry_ptr->size),\ + (entry_ptr->size), \ (new_size), \ (entry_ptr), \ - (TRUE)); + (TRUE)) /* The entry can't be protected since we just flushed it. * Thus we must update the replacement policy data @@ -7923,9 +8359,7 @@ H5C_flush_single_entry(H5F_t * f, } done: - FUNC_LEAVE_NOAPI(ret_value) - } /* H5C_flush_single_entry() */ @@ -8023,6 +8457,7 @@ H5C_load_entry(H5F_t * f, entry->flush_marker = FALSE; #ifdef H5_HAVE_PARALLEL entry->clear_on_unprotect = FALSE; + entry->flush_immediately = FALSE; #endif /* H5_HAVE_PARALLEL */ entry->flush_in_progress = FALSE; entry->destroy_in_progress = FALSE; @@ -8862,20 +9297,17 @@ H5C_flush_marked_entries(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); /* Flush all marked entries */ - if (H5C_flush_cache(f, + if(H5C_flush_cache(f, primary_dxpl_id, secondary_dxpl_id, H5C__FLUSH_MARKED_ENTRIES_FLAG | H5C__FLUSH_IGNORE_PROTECTED_FLAG) < 0) { HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush cache") - } /* end if */ done: - - FUNC_LEAVE_NOAPI(ret_value); - + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_flush_marked_entries */ #if H5C_DO_TAGGING_SANITY_CHECKS @@ -8891,8 +9323,6 @@ done: * Programmer: Mike McGreevy * January 14, 2010 * - * Modifications: - * *------------------------------------------------------------------------- */ static herr_t @@ -8959,10 +9389,7 @@ H5C_verify_tag(int id, haddr_t tag) } done: - - /* Function Leave Macro */ - FUNC_LEAVE_NOAPI(ret_value); - + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_verify_tag */ #endif @@ -8980,43 +9407,35 @@ done: * Programmer: Mike McGreevy * March 17, 2010 * - * Modifications: - * *------------------------------------------------------------------------- */ -herr_t +void H5C_retag_copied_metadata(H5C_t * cache_ptr, haddr_t metadata_tag) { /* Variable Declarations */ - herr_t ret_value = SUCCEED; /* Return Value */ int i = 0; /* Iterator */ - H5C_cache_entry_t *next_entry_ptr = NULL; /* entry pointer */ /* Assertions */ HDassert(cache_ptr); /* Function Enter Macro */ - FUNC_ENTER_NOAPI(H5C_retag_copied_metadata, FAIL) + FUNC_ENTER_NOAPI_NOFUNC(H5C_retag_copied_metadata) /* Iterate through entries, retagging those with the H5AC__COPIED_TAG tag */ - for (i = 0; i < H5C__HASH_TABLE_LEN; i++) { + for(i = 0; i < H5C__HASH_TABLE_LEN; i++) { + H5C_cache_entry_t *next_entry_ptr; /* entry pointer */ next_entry_ptr = cache_ptr->index[i]; - - while ( next_entry_ptr != NULL ) { - if (cache_ptr->index[i] != NULL) { - if ((cache_ptr->index[i])->tag == H5AC__COPIED_TAG) { + while(next_entry_ptr != NULL) { + if(cache_ptr->index[i] != NULL) { + if((cache_ptr->index[i])->tag == H5AC__COPIED_TAG) (cache_ptr->index[i])->tag = metadata_tag; - } /* end if */ } /* end if */ + next_entry_ptr = next_entry_ptr->ht_next; } /* end while */ - } /* end for */ -done: - - /* Function Leave Macro */ - FUNC_LEAVE_NOAPI(ret_value); - + FUNC_LEAVE_NOAPI_VOID } /* H5C_retag_copied_metadata */ + diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h index 22d3514..71fb405 100644 --- a/src/H5Cpkg.h +++ b/src/H5Cpkg.h @@ -1875,7 +1875,7 @@ if ( ( (cache_ptr) == NULL ) || \ ( ( !( was_clean ) || \ ( (cache_ptr)->clean_index_size < (old_size) ) ) && \ ( ( (was_clean) ) || \ - ( (cache_ptr)->dirty_index_size < (old_size) ) ) ) \ + ( (cache_ptr)->dirty_index_size < (old_size) ) ) ) || \ ( (entry_ptr) == NULL ) ) { \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ "Pre HT entry size change SC failed") \ @@ -1893,7 +1893,7 @@ if ( ( (cache_ptr) == NULL ) || \ ( ( !((entry_ptr)->is_dirty ) || \ ( (cache_ptr)->dirty_index_size < (new_size) ) ) && \ ( ( ((entry_ptr)->is_dirty) ) || \ - ( (cache_ptr)->clean_index_size < (new_size) ) ) ) \ + ( (cache_ptr)->clean_index_size < (new_size) ) ) ) || \ ( ( (cache_ptr)->index_len == 1 ) && \ ( (cache_ptr)->index_size != (new_size) ) ) ) { \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ @@ -2098,24 +2098,25 @@ if ( (cache_ptr)->index_size != \ H5C__POST_HT_UPDATE_FOR_ENTRY_DIRTY_SC(cache_ptr, entry_ptr); \ } -#define H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size, \ - entry_ptr, was_clean) \ -{ \ - H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \ - entry_ptr, was_clean) \ - (cache_ptr)->index_size -= (old_size); \ - (cache_ptr)->index_size += (new_size); \ - if ( was_clean ) { \ - (cache_ptr)->clean_index_size -= (old_size); \ - } else { \ - (cache_ptr)->dirty_index_size -= (old_size); \ - } \ - if ( (entry_ptr)->is_dirty ) { \ - (cache_ptr)->dirty_index_size += (new_size); \ - } else { \ - (cache_ptr)->clean_index_size += (new_size); \ - } \ - H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, entry_ptr) \ +#define H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size, \ + entry_ptr, was_clean) \ +{ \ + H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \ + entry_ptr, was_clean) \ + (cache_ptr)->index_size -= (old_size); \ + (cache_ptr)->index_size += (new_size); \ + if ( was_clean ) { \ + (cache_ptr)->clean_index_size -= (old_size); \ + } else { \ + (cache_ptr)->dirty_index_size -= (old_size); \ + } \ + if ( (entry_ptr)->is_dirty ) { \ + (cache_ptr)->dirty_index_size += (new_size); \ + } else { \ + (cache_ptr)->clean_index_size += (new_size); \ + } \ + H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \ + entry_ptr) \ } diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index 3f38500..7e14872 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -383,6 +383,14 @@ typedef herr_t (*H5C_log_flush_func_t)(H5C_t * cache_ptr, * the unprotect, the entry's is_dirty flag is reset by flushing * it with the H5C__FLUSH_CLEAR_ONLY_FLAG. * + * flush_immediately: Boolean flag used only in Phdf5 -- and then only + * for H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED. + * + * When a destributed metadata write is triggered at a + * sync point, this field is used to mark entries that + * must be flushed before leaving the sync point. At all + * other times, this field should be set to FALSE. + * * flush_in_progress: Boolean flag that is set to true iff the entry * is in the process of being flushed. This allows the cache * to detect when a call is the result of a flush callback. @@ -581,6 +589,7 @@ typedef struct H5C_cache_entry_t hbool_t flush_marker; #ifdef H5_HAVE_PARALLEL hbool_t clear_on_unprotect; + hbool_t flush_immediately; #endif /* H5_HAVE_PARALLEL */ hbool_t flush_in_progress; hbool_t destroy_in_progress; @@ -1034,6 +1043,21 @@ typedef struct H5C_auto_size_ctl_t #define H5C__FREE_FILE_SPACE_FLAG 0x0800 #define H5C__TAKE_OWNERSHIP_FLAG 0x1000 +#ifdef H5_HAVE_PARALLEL +H5_DLL herr_t H5C_apply_candidate_list(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr, + int num_candidates, + haddr_t * candidates_list_ptr, + int mpi_rank, + int mpi_size); + +H5_DLL herr_t H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr); + +H5_DLL herr_t H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr); +#endif /* H5_HAVE_PARALLEL */ + H5_DLL H5C_t * H5C_create(size_t max_cache_size, size_t min_clean_size, int max_type_id, @@ -1177,7 +1201,7 @@ H5_DLL herr_t H5C_validate_resize_config(H5C_auto_size_ctl_t * config_ptr, H5_DLL herr_t H5C_ignore_tags(H5C_t * cache_ptr); -H5_DLL herr_t H5C_retag_copied_metadata(H5C_t * cache_ptr, haddr_t metadata_tag); +H5_DLL void H5C_retag_copied_metadata(H5C_t * cache_ptr, haddr_t metadata_tag); #endif /* !_H5Cprivate_H */ diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 117dfc7..b382fb4 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -1771,6 +1771,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, } /* end if */ } /* end if */ else { +#if 0 /* JRM -- 3/23/10 */ /* this is no longer always the case */ /* Only one process can do the actual metadata write */ if(file->mpi_rank != H5_PAR_META_WRITE) #ifdef LATER @@ -1778,6 +1779,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, #else /* LATER */ HGOTO_DONE(SUCCEED) /* skip the actual write */ #endif /* LATER */ +#endif /* JRM */ } /* end if */ /* Write the data. */ diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index d5e58e9..5bea8fe 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -1274,9 +1274,11 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) #endif /* JRM */ +#if 0 /* JRM -- 3/23/10 */ /* this is no longer always the case */ /* Only one process will do the actual write if all procs in comm write same metadata */ if (file->mpi_rank != H5_PAR_META_WRITE) HGOTO_DONE(SUCCEED) /* skip the actual write */ +#endif /* JRM */ } /* end if */ #ifdef REPORT_IO diff --git a/test/cache_api.c b/test/cache_api.c index 8fd2912..c6e27c6 100644 --- a/test/cache_api.c +++ b/test/cache_api.c @@ -65,11 +65,8 @@ static void check_file_mdc_api_errs(void); * Programmer: John Mainzer * 4/12/04 * - * Modifications: - * *------------------------------------------------------------------------- */ - static void check_fapl_mdc_api_calls(void) { @@ -113,7 +110,9 @@ check_fapl_mdc_api_calls(void) /* int epochs_before_eviction = */ 4, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.05, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; H5AC_cache_config_t scratch; H5C_auto_size_ctl_t default_auto_size_ctl; @@ -560,7 +559,9 @@ check_file_mdc_api_calls(void) /* int epochs_before_eviction = */ 4, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.05, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; H5AC_cache_config_t mod_config_2 = { @@ -593,7 +594,9 @@ check_file_mdc_api_calls(void) /* int epochs_before_eviction = */ 4, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.05, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; H5AC_cache_config_t mod_config_3 = { @@ -626,7 +629,9 @@ check_file_mdc_api_calls(void) /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ FALSE, /* double empty_reserve = */ 0.05, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; H5AC_cache_config_t mod_config_4 = { @@ -660,7 +665,9 @@ check_file_mdc_api_calls(void) /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; TESTING("MDC/FILE related API calls"); @@ -915,7 +922,9 @@ mdc_api_call_smoke_check(int express_test) /* int epochs_before_eviction = */ 2, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.05, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; H5AC_cache_config_t mod_config_2 = { @@ -948,7 +957,9 @@ mdc_api_call_smoke_check(int express_test) /* int epochs_before_eviction = */ 2, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.05, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; H5AC_cache_config_t mod_config_3 = { @@ -981,7 +992,9 @@ mdc_api_call_smoke_check(int express_test) /* int epochs_before_eviction = */ 2, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.05, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }; TESTING("MDC API smoke check"); @@ -1514,7 +1527,7 @@ mdc_api_call_smoke_check(int express_test) * used to test error rejection in the MDC related API calls. */ -#define NUM_INVALID_CONFIGS 41 +#define NUM_INVALID_CONFIGS 42 H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = { @@ -1549,7 +1562,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 1 -- bad rpt_fcn_enabled */ @@ -1582,7 +1597,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 2 -- bad open_trace_file */ @@ -1615,7 +1632,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 3 -- bad close_trace_file */ @@ -1648,7 +1667,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 4 -- open_trace_file == TRUE and empty trace_file_name */ @@ -1681,7 +1702,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 5 -- bad set_initial_size */ @@ -1714,7 +1737,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 6 -- max_size too big */ @@ -1747,7 +1772,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 7 -- min_size too small */ @@ -1780,7 +1807,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 8 -- min_size > max_size */ @@ -1813,7 +1842,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 9 -- initial size out of range (too big) */ @@ -1846,7 +1877,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 10 -- initial_size out of range (too small) */ @@ -1879,7 +1912,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 11 -- min_clean_fraction too big */ @@ -1912,7 +1947,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 12 -- min_clean_fraction too small */ @@ -1945,7 +1982,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 13 -- epoch_length too small */ @@ -1978,7 +2017,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 14 -- epoch_length too big */ @@ -2011,7 +2052,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 15 -- invalid incr_mode */ @@ -2044,7 +2087,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 16 -- lower_hr_threshold too small */ @@ -2077,7 +2122,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 17 -- lower_hr_threshold too big */ @@ -2110,7 +2157,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 18 -- increment too small */ @@ -2143,7 +2192,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 19 -- bad apply_max_increment */ @@ -2176,7 +2227,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 20 -- invalid flash_incr_mode */ @@ -2209,7 +2262,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 21 -- flash_multiple too small */ @@ -2242,7 +2297,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 22 -- flash_multiple too big */ @@ -2275,7 +2332,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 23 -- flash_threshold too small */ @@ -2308,7 +2367,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 24 -- flash_threshold too big */ @@ -2341,7 +2402,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 25 -- bad decr_mode */ @@ -2374,7 +2437,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 26 -- upper_hr_threshold too big */ @@ -2407,7 +2472,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 27 -- decrement too small */ @@ -2440,7 +2507,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 28 -- decrement too big */ @@ -2473,7 +2542,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 29 -- epochs_before_eviction too small */ @@ -2506,7 +2577,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 0, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 30 -- epochs_before_eviction too big */ @@ -2539,7 +2612,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ H5C__MAX_EPOCH_MARKERS + 1, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 31 -- invalid apply_empty_reserve */ @@ -2572,7 +2647,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ 2, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 32 -- empty_reserve too small */ @@ -2605,7 +2682,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ -0.0000000001, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 33 -- empty_reserve too big */ @@ -2638,7 +2717,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 1.00000000001, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 34 -- upper_hr_threshold too small */ @@ -2671,7 +2752,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 35 -- upper_hr_threshold too big */ @@ -2704,7 +2787,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 36 -- upper_hr_threshold <= lower_hr_threshold */ @@ -2737,7 +2822,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 37 -- dirty_bytes_threshold too small */ @@ -2770,7 +2857,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (H5C__MIN_MAX_CACHE_SIZE / 2) - 1 + /* int dirty_bytes_threshold = */ (H5C__MIN_MAX_CACHE_SIZE / 2) - 1, + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 38 -- dirty_bytes_threshold too big */ @@ -2803,7 +2892,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (H5C__MAX_MAX_CACHE_SIZE / 4) + 1 + /* int dirty_bytes_threshold = */ (H5C__MAX_MAX_CACHE_SIZE / 4) + 1, + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 39 -- attempt to disable evictions when auto incr enabled */ @@ -2836,7 +2927,9 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY }, { /* 40 -- attempt to disable evictions when auto decr enabled */ @@ -2869,7 +2962,43 @@ H5AC_cache_config_t invalid_configs[NUM_INVALID_CONFIGS] = /* int epochs_before_eviction = */ 3, /* hbool_t apply_empty_reserve = */ TRUE, /* double empty_reserve = */ 0.1, - /* int dirty_bytes_threshold = */ (256 * 1024) + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ + H5AC__DEFAULT_METADATA_WRITE_STRATEGY + }, + { + /* 41 -- unknown metadata write strategy */ + /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER, + /* hbool_t rpt_fcn_enabled = */ FALSE, + /* hbool_t open_trace_file = */ FALSE, + /* hbool_t close_trace_file = */ FALSE, + /* char trace_file_name[] = */ "", + /* hbool_t evictions_enabled = */ TRUE, + /* hbool_t set_initial_size = */ TRUE, + /* size_t initial_size = */ (1 * 1024 * 1024), + /* double min_clean_fraction = */ 0.25, + /* size_t max_size = */ (16 * 1024 * 1024), + /* size_t min_size = */ ( 1 * 1024 * 1024), + /* long int epoch_length = */ 50000, + /* enum H5C_cache_incr_mode incr_mode = */ H5C_incr__threshold, + /* double lower_hr_threshold = */ 0.9, + /* double increment = */ 2.0, + /* hbool_t apply_max_increment = */ TRUE, + /* size_t max_increment = */ (4 * 1024 * 1024), + /* enum H5C_cache_flash_incr_mode */ + /* flash_incr_mode = */ H5C_flash_incr__off, + /* double flash_multiple = */ 2.0, + /* double flash_threshold = */ 0.5, + /* enum H5C_cache_decr_mode decr_mode = */ H5C_decr__age_out_with_threshold, + /* double upper_hr_threshold = */ 0.999, + /* double decrement = */ 0.9, + /* hbool_t apply_max_decrement = */ TRUE, + /* size_t max_decrement = */ (1 * 1024 * 1024), + /* int epochs_before_eviction = */ 3, + /* hbool_t apply_empty_reserve = */ TRUE, + /* double empty_reserve = */ 0.1, + /* int dirty_bytes_threshold = */ (256 * 1024), + /* int metadata_write_strategy = */ -1 } }; diff --git a/test/cache_common.h b/test/cache_common.h index f493239..d7e7f1a 100644 --- a/test/cache_common.h +++ b/test/cache_common.h @@ -412,38 +412,40 @@ if ( ( (cache_ptr) == NULL ) || \ /* Macros used in H5AC level tests */ -#define CACHE_CONFIGS_EQUAL(a, b, cmp_set_init, cmp_init_size) \ - ( ( (a).version == (b).version ) && \ - ( (a).rpt_fcn_enabled == (b).rpt_fcn_enabled ) && \ - ( (a).open_trace_file == (b).open_trace_file ) && \ - ( (a).close_trace_file == (b).close_trace_file ) && \ - ( ( (a).open_trace_file == FALSE ) || \ - ( strcmp((a).trace_file_name, (b).trace_file_name) == 0 ) ) && \ - ( (a).evictions_enabled == (b).evictions_enabled ) && \ - ( ( ! cmp_set_init ) || \ - ( (a).set_initial_size == (b).set_initial_size ) ) && \ - ( ( ! cmp_init_size ) || \ - ( (a).initial_size == (b).initial_size ) ) && \ - ( (a).min_clean_fraction == (b).min_clean_fraction ) && \ - ( (a).max_size == (b).max_size ) && \ - ( (a).min_size == (b).min_size ) && \ - ( (a).epoch_length == (b).epoch_length ) && \ - ( (a).incr_mode == (b).incr_mode ) && \ - ( (a).lower_hr_threshold == (b).lower_hr_threshold ) && \ - ( (a).increment == (b).increment ) && \ - ( (a).apply_max_increment == (b).apply_max_increment ) && \ - ( (a).max_increment == (b).max_increment ) && \ - ( (a).flash_incr_mode == (b).flash_incr_mode ) && \ - ( (a).flash_multiple == (b).flash_multiple ) && \ - ( (a).flash_threshold == (b).flash_threshold ) && \ - ( (a).decr_mode == (b).decr_mode ) && \ - ( (a).upper_hr_threshold == (b).upper_hr_threshold ) && \ - ( (a).decrement == (b).decrement ) && \ - ( (a).apply_max_decrement == (b).apply_max_decrement ) && \ - ( (a).max_decrement == (b).max_decrement ) && \ - ( (a).epochs_before_eviction == (b).epochs_before_eviction ) && \ - ( (a).apply_empty_reserve == (b).apply_empty_reserve ) && \ - ( (a).empty_reserve == (b).empty_reserve ) ) +#define CACHE_CONFIGS_EQUAL(a, b, cmp_set_init, cmp_init_size) \ + ( ( (a).version == (b).version ) && \ + ( (a).rpt_fcn_enabled == (b).rpt_fcn_enabled ) && \ + ( (a).open_trace_file == (b).open_trace_file ) && \ + ( (a).close_trace_file == (b).close_trace_file ) && \ + ( ( (a).open_trace_file == FALSE ) || \ + ( strcmp((a).trace_file_name, (b).trace_file_name) == 0 ) ) && \ + ( (a).evictions_enabled == (b).evictions_enabled ) && \ + ( ( ! cmp_set_init ) || \ + ( (a).set_initial_size == (b).set_initial_size ) ) && \ + ( ( ! cmp_init_size ) || \ + ( (a).initial_size == (b).initial_size ) ) && \ + ( (a).min_clean_fraction == (b).min_clean_fraction ) && \ + ( (a).max_size == (b).max_size ) && \ + ( (a).min_size == (b).min_size ) && \ + ( (a).epoch_length == (b).epoch_length ) && \ + ( (a).incr_mode == (b).incr_mode ) && \ + ( (a).lower_hr_threshold == (b).lower_hr_threshold ) && \ + ( (a).increment == (b).increment ) && \ + ( (a).apply_max_increment == (b).apply_max_increment ) && \ + ( (a).max_increment == (b).max_increment ) && \ + ( (a).flash_incr_mode == (b).flash_incr_mode ) && \ + ( (a).flash_multiple == (b).flash_multiple ) && \ + ( (a).flash_threshold == (b).flash_threshold ) && \ + ( (a).decr_mode == (b).decr_mode ) && \ + ( (a).upper_hr_threshold == (b).upper_hr_threshold ) && \ + ( (a).decrement == (b).decrement ) && \ + ( (a).apply_max_decrement == (b).apply_max_decrement ) && \ + ( (a).max_decrement == (b).max_decrement ) && \ + ( (a).epochs_before_eviction == (b).epochs_before_eviction ) && \ + ( (a).apply_empty_reserve == (b).apply_empty_reserve ) && \ + ( (a).empty_reserve == (b).empty_reserve ) && \ + ( (a).dirty_bytes_threshold == (b).dirty_bytes_threshold ) && \ + ( (a).metadata_write_strategy == (b).metadata_write_strategy ) ) #define XLATE_EXT_TO_INT_MDC_CONFIG(i, e) \ { \ diff --git a/testpar/t_cache.c b/testpar/t_cache.c index 554a8cc..0579829 100644 --- a/testpar/t_cache.c +++ b/testpar/t_cache.c @@ -77,6 +77,11 @@ long global_dirty_pins = 0; long local_pins = 0; +/* the following fields are used by the server process only */ +int total_reads = 0; +int total_writes = 0; + + /***************************************************************************** * struct datum * @@ -135,6 +140,14 @@ long local_pins = 0; * flushed: Boolean flag that is set to true whenever the entry is * dirty, and is flushed via a call to flush_datum(). * + * reads: Integer field used to maintain a count of the number of + * times this entry has been read from the server since + * the last time the read and write counts were reset. + * + * writes: Integer field used to maintain a count of the number of + * times this entry has been written to the server since + * the last time the read and write counts were reset. + * * index: Index of this instance of datum in the data_index[] array * discussed below. * @@ -154,6 +167,8 @@ struct datum hbool_t local_pinned; hbool_t cleared; hbool_t flushed; + int reads; + int writes; int index; }; @@ -217,6 +232,38 @@ int data_index[NUM_DATA_ENTRIES]; /***************************************************************************** + * The following two #defines are used to control code that is in turn used + * to force "POSIX" semantics on the server process used to simulate metadata + * reads and writes. Without some such mechanism, the test code contains + * race conditions that will frequently cause spurious failures. + * + * When set to TRUE, DO_WRITE_REQ_ACK forces the server to send an ack after + * each write request, and the client to wait until the ack is received + * before proceeding. This was my first solution to the problem, and at + * first glance, it would seem to have a lot of unnecessary overhead. + * + * In an attempt to reduce the overhead, I implemented a second solution + * in which no acks are sent after writes. Instead, the metadata cache is + * provided with a callback function to call after each sequence of writes. + * This callback simply causes the client to send the server process a + * "sync" message and and await an ack in reply. + * + * Strangely, at least on Phoenix, the first solution runs faster by a + * rather large margin. However, I can imagine this changing with + * different OS's and MPI implementatins. + * + * Thus I have left code supporting the second solution in place. + * + * Note that while one of these two #defines must be set to TRUE, there + * should never be any need to set both of them to TRUE (although the + * tests will still function with this setting). + *****************************************************************************/ + +#define DO_WRITE_REQ_ACK TRUE +#define DO_SYNC_AFTER_WRITE FALSE + + +/***************************************************************************** * struct mssg * * The mssg structure is used as a generic container for messages to @@ -236,22 +283,32 @@ int data_index[NUM_DATA_ENTRIES]; * * ver: Version number of a datum. Not used in all mssgs. * + * count: Reported number of total/entry reads/writes. Not used + * in all mssgs. + * * magic: Magic number for error detection. Must be set to * MSSG_MAGIC. * *****************************************************************************/ -#define DO_WRITE_REQ_ACK FALSE -#define DO_SYNC_AFTER_WRITE TRUE - -#define WRITE_REQ_CODE 0 -#define WRITE_REQ_ACK_CODE 1 -#define READ_REQ_CODE 2 -#define READ_REQ_REPLY_CODE 3 -#define SYNC_REQ_CODE 4 -#define SYNC_ACK_CODE 5 -#define DONE_REQ_CODE 6 -#define MAX_REQ_CODE 6 +#define WRITE_REQ_CODE 0 +#define WRITE_REQ_ACK_CODE 1 +#define READ_REQ_CODE 2 +#define READ_REQ_REPLY_CODE 3 +#define SYNC_REQ_CODE 4 +#define SYNC_ACK_CODE 5 +#define REQ_TTL_WRITES_CODE 6 +#define REQ_TTL_WRITES_RPLY_CODE 7 +#define REQ_TTL_READS_CODE 8 +#define REQ_TTL_READS_RPLY_CODE 9 +#define REQ_ENTRY_WRITES_CODE 10 +#define REQ_ENTRY_WRITES_RPLY_CODE 11 +#define REQ_ENTRY_READS_CODE 12 +#define REQ_ENTRY_READS_RPLY_CODE 13 +#define REQ_RW_COUNT_RESET_CODE 14 +#define REQ_RW_COUNT_RESET_RPLY_CODE 15 +#define DONE_REQ_CODE 16 +#define MAX_REQ_CODE 16 #define MSSG_MAGIC 0x1248 @@ -262,8 +319,9 @@ struct mssg_t int dest; long int mssg_num; haddr_t base_addr; - int len; + unsigned len; int ver; + int count; unsigned magic; }; @@ -306,10 +364,16 @@ static hbool_t takedown_derived_types(void); /* server functions */ +static hbool_t reset_server_counters(void); static hbool_t server_main(void); static hbool_t serve_read_request(struct mssg_t * mssg_ptr); static hbool_t serve_sync_request(struct mssg_t * mssg_ptr); static hbool_t serve_write_request(struct mssg_t * mssg_ptr); +static hbool_t serve_total_writes_request(struct mssg_t * mssg_ptr); +static hbool_t serve_total_reads_request(struct mssg_t * mssg_ptr); +static hbool_t serve_entry_writes_request(struct mssg_t * mssg_ptr); +static hbool_t serve_entry_reads_request(struct mssg_t * mssg_ptr); +static hbool_t serve_rw_count_reset_request(struct mssg_t * mssg_ptr); /* call back functions & related data structures */ @@ -361,11 +425,19 @@ void mark_entry_dirty(int32_t idx); void pin_entry(H5F_t * file_ptr, int32_t idx, hbool_t global, hbool_t dirty); void pin_protected_entry(int32_t idx, hbool_t global); void move_entry(H5F_t * file_ptr, int32_t old_idx, int32_t new_idx); +static hbool_t reset_server_counts(void); void resize_entry(int32_t idx, size_t new_size); -hbool_t setup_cache_for_test(hid_t * fid_ptr, H5F_t ** file_ptr_ptr, - H5C_t ** cache_ptr_ptr); +hbool_t setup_cache_for_test(hid_t * fid_ptr, + H5F_t ** file_ptr_ptr, + H5C_t ** cache_ptr_ptr, + int metadata_write_strategy); void setup_rand(void); hbool_t take_down_cache(hid_t fid); +static hbool_t verify_entry_reads(haddr_t addr, int expected_entry_reads); +static hbool_t verify_entry_writes(haddr_t addr, int expected_entry_writes); +static hbool_t verify_total_reads(int expected_total_reads); +static hbool_t verify_total_writes(int expected_total_writes); +void verify_writes(int num_writes, haddr_t * written_entries_tbl); void unlock_entry(H5F_t * file_ptr, int32_t type, unsigned int flags); void unpin_entry(H5F_t * file_ptr, int32_t idx, hbool_t global, hbool_t dirty, hbool_t via_unprotect); @@ -374,12 +446,12 @@ void unpin_entry(H5F_t * file_ptr, int32_t idx, hbool_t global, /* test functions */ hbool_t server_smoke_check(void); -hbool_t smoke_check_1(void); -hbool_t smoke_check_2(void); -hbool_t smoke_check_3(void); -hbool_t smoke_check_4(void); -hbool_t smoke_check_5(void); -hbool_t trace_file_check(void); +hbool_t smoke_check_1(int metadata_write_strategy); +hbool_t smoke_check_2(int metadata_write_strategy); +hbool_t smoke_check_3(int metadata_write_strategy); +hbool_t smoke_check_4(int metadata_write_strategy); +hbool_t smoke_check_5(int metadata_write_strategy); +hbool_t trace_file_check(int metadata_write_strategy); /*****************************************************************************/ @@ -631,12 +703,7 @@ set_up_file_communicator(void) * * Programmer: JRM -- 12/20/05 * - * Modifications: - * - * None. - * *****************************************************************************/ - static int addr_to_datum_index(haddr_t base_addr) { @@ -684,20 +751,7 @@ addr_to_datum_index(haddr_t base_addr) * * Programmer: JRM -- 12/20/05 * - * Modifications: - * - * JRM -- 7/11/06 - * Added support for the local_len field. - * - * JRM -- 2/4/09 - * Added initialization for the cleared and flushed fields. - * - * Mike McGreevy, July 2, 2009 - * Changed base address from 0 to 512 since the superblock will - * always be at address 0. - * *****************************************************************************/ - static void init_data(void) { @@ -735,6 +789,8 @@ init_data(void) data[i].local_pinned = FALSE; data[i].cleared = FALSE; data[i].flushed = FALSE; + data[i].reads = 0; + data[i].writes = 0; data[i].index = i; data_index[i] = i; @@ -773,12 +829,7 @@ init_data(void) * * Programmer: JRM -- 4/25/06 * - * Modifications: - * - * None. - * *****************************************************************************/ - static int do_express_test(void) { @@ -850,6 +901,7 @@ do_sync(void) mssg.base_addr = 0; mssg.len = 0; mssg.ver = 0; + mssg.count = 0; mssg.magic = MSSG_MAGIC; if ( ! send_mssg(&mssg, FALSE) ) { @@ -1103,7 +1155,7 @@ send_mssg(struct mssg_t *mssg_ptr, } /* send_mssg() */ - + /***************************************************************************** * * Function: setup_derived_types() @@ -1117,12 +1169,7 @@ send_mssg(struct mssg_t *mssg_ptr, * * Programmer: JRM -- 12/22/05 * - * Modifications: - * - * None. - * *****************************************************************************/ - static hbool_t setup_derived_types(void) { @@ -1130,11 +1177,11 @@ setup_derived_types(void) hbool_t success = TRUE; int i; int result; - MPI_Datatype mpi_types[8] = {MPI_INT, MPI_INT, MPI_INT, MPI_LONG, + MPI_Datatype mpi_types[9] = {MPI_INT, MPI_INT, MPI_INT, MPI_LONG, HADDR_AS_MPI_TYPE, MPI_INT, MPI_INT, - MPI_UNSIGNED}; - int block_len[8] = {1, 1, 1, 1, 1, 1, 1, 1}; - MPI_Aint displs[8]; + MPI_INT, MPI_UNSIGNED}; + int block_len[9] = {1, 1, 1, 1, 1, 1, 1, 1, 1}; + MPI_Aint displs[9]; struct mssg_t sample; /* used to compute displacements */ /* setup the displacements array */ @@ -1145,7 +1192,8 @@ setup_derived_types(void) ( MPI_SUCCESS != MPI_Address(&sample.base_addr, &displs[4]) ) || ( MPI_SUCCESS != MPI_Address(&sample.len, &displs[5]) ) || ( MPI_SUCCESS != MPI_Address(&sample.ver, &displs[6]) ) || - ( MPI_SUCCESS != MPI_Address(&sample.magic, &displs[7]) ) ) { + ( MPI_SUCCESS != MPI_Address(&sample.count, &displs[7]) ) || + ( MPI_SUCCESS != MPI_Address(&sample.magic, &displs[8]) ) ) { nerrors++; success = FALSE; @@ -1157,7 +1205,7 @@ setup_derived_types(void) } else { /* Now calculate the actual displacements */ - for ( i = 7; i >= 0; --i) + for ( i = 8; i >= 0; --i) { displs[i] -= displs[0]; } @@ -1165,7 +1213,7 @@ setup_derived_types(void) if ( success ) { - result = MPI_Type_struct(8, block_len, displs, mpi_types, &mpi_mssg_t); + result = MPI_Type_struct(9, block_len, displs, mpi_types, &mpi_mssg_t); if ( result != MPI_SUCCESS ) { @@ -1247,6 +1295,79 @@ takedown_derived_types(void) /***************************************************************************** * + * Function: reset_server_counters() + * + * Purpose: Reset the counters maintained by the server, doing a + * sanity check in passing. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/5/10 + * + * Modifications: + * + * None. + * + *****************************************************************************/ + +static hbool_t +reset_server_counters(void) +{ + const char * fcn_name = "reset_server_counters()"; + hbool_t success = TRUE; + int i; + long actual_total_reads = 0; + long actual_total_writes = 0; + + for ( i = 0; i < NUM_DATA_ENTRIES; i++ ) + { + if ( data[i].reads > 0 ) { + + actual_total_reads += data[i].reads; + data[i].reads = 0; + } + + if ( data[i].writes > 0 ) { + + actual_total_writes += data[i].writes; + data[i].writes = 0; + } + } + + if ( actual_total_reads != total_reads ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: actual/total reads mismatch (%ld/%ld).\n", + world_mpi_rank, fcn_name, + actual_total_reads, total_reads); + } + } + + if ( actual_total_writes != total_writes ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: actual/total writes mismatch (%ld/%ld).\n", + world_mpi_rank, fcn_name, + actual_total_writes, total_writes); + } + } + + total_reads = 0; + total_writes = 0; + + return(success); + +} /* reset_server_counters() */ + + +/***************************************************************************** + * * Function: server_main() * * Purpose: Main function for the server process. This process exists @@ -1303,7 +1424,8 @@ server_main(void) case WRITE_REQ_ACK_CODE: success = FALSE; - HDfprintf(stdout, "%s: Received write ack?!?.\n", fcn_name); + if(verbose) + HDfprintf(stdout, "%s: Received write ack?!?.\n", fcn_name); break; case READ_REQ_CODE: @@ -1312,8 +1434,8 @@ server_main(void) case READ_REQ_REPLY_CODE: success = FALSE; - HDfprintf(stdout, "%s: Received read req reply?!?.\n", - fcn_name); + if(verbose) + HDfprintf(stdout, "%s: Received read req reply?!?.\n", fcn_name); break; case SYNC_REQ_CODE: @@ -1322,27 +1444,71 @@ server_main(void) case SYNC_ACK_CODE: success = FALSE; - HDfprintf(stdout, "%s: Received sync ack?!?.\n", - fcn_name); + if(verbose) + HDfprintf(stdout, "%s: Received sync ack?!?.\n", fcn_name); + break; + + case REQ_TTL_WRITES_CODE: + success = serve_total_writes_request(&mssg); + break; + + case REQ_TTL_WRITES_RPLY_CODE: + success = FALSE; + if(verbose) + HDfprintf(stdout, "%s: Received total writes reply?!?.\n", fcn_name); + break; + + case REQ_TTL_READS_CODE: + success = serve_total_reads_request(&mssg); + break; + + case REQ_TTL_READS_RPLY_CODE: + success = FALSE; + if(verbose) + HDfprintf(stdout, "%s: Received total reads reply?!?.\n", fcn_name); + break; + + case REQ_ENTRY_WRITES_CODE: + success = serve_entry_writes_request(&mssg); + break; + + case REQ_ENTRY_WRITES_RPLY_CODE: + success = FALSE; + if(verbose) + HDfprintf(stdout, "%s: Received entry writes reply?!?.\n", fcn_name); + break; + + case REQ_ENTRY_READS_CODE: + success = serve_entry_reads_request(&mssg); + break; + + case REQ_ENTRY_READS_RPLY_CODE: + success = FALSE; + if(verbose) + HDfprintf(stdout, "%s: Received entry reads reply?!?.\n", fcn_name); + break; + + case REQ_RW_COUNT_RESET_CODE: + success = serve_rw_count_reset_request(&mssg); + break; + + case REQ_RW_COUNT_RESET_RPLY_CODE: + success = FALSE; + if(verbose) + HDfprintf(stdout, "%s: Received RW count reset reply?!?.\n", fcn_name); break; case DONE_REQ_CODE: done_count++; - /* HDfprintf(stdout, "%d:%s: done_count = %d.\n", - world_mpi_rank, fcn_name, done_count); */ - if ( done_count >= file_mpi_size ) { - + if(done_count >= file_mpi_size) done = TRUE; - } break; default: nerrors++; success = FALSE; - if ( verbose ) { - HDfprintf(stdout, "%d:%s: Unknown request code.\n", - world_mpi_rank, fcn_name); - } + if(verbose) + HDfprintf(stdout, "%d:%s: Unknown request code.\n", world_mpi_rank, fcn_name); break; } } @@ -1352,7 +1518,7 @@ server_main(void) } /* server_main() */ - + /***************************************************************************** * * Function: serve_read_request() @@ -1370,16 +1536,12 @@ server_main(void) * * Programmer: JRM -- 12/22/05 * - * Modifications: - * - * None. - * *****************************************************************************/ - static hbool_t serve_read_request(struct mssg_t * mssg_ptr) { const char * fcn_name = "serve_read_request()"; + hbool_t report_mssg = FALSE; hbool_t success = TRUE; int target_index; haddr_t target_addr; @@ -1426,11 +1588,11 @@ serve_read_request(struct mssg_t * mssg_ptr) success = FALSE; if ( verbose ) { HDfprintf(stdout, - "%d:%s: proc %d read invalid entry. idx/base_addr = %d/%a.\n", - world_mpi_rank, fcn_name, - mssg_ptr->src, + "%d:%s: proc %d read invalid entry. idx/base_addr = %d/0x%llx.\n", + world_mpi_rank, fcn_name, + mssg_ptr->src, target_index, - data[target_index].base_addr); + (long long)(data[target_index].base_addr)); } } else { @@ -1442,7 +1604,12 @@ serve_read_request(struct mssg_t * mssg_ptr) reply.base_addr = data[target_index].base_addr; reply.len = data[target_index].len; reply.ver = data[target_index].ver; + reply.count = 0; reply.magic = MSSG_MAGIC; + + /* and update the counters */ + total_reads++; + (data[target_index].reads)++; } } @@ -1451,6 +1618,27 @@ serve_read_request(struct mssg_t * mssg_ptr) success = send_mssg(&reply, TRUE); } + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d read 0x%llx. len = %d. ver = %d.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (int)(data[target_index].len), + (int)(data[target_index].ver)); + + } else { + + HDfprintf(stdout, "%d read 0x%llx FAILED. len = %d. ver = %d.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (int)(data[target_index].len), + (int)(data[target_index].ver)); + + } + } + return(success); } /* serve_read_request() */ @@ -1486,6 +1674,7 @@ static hbool_t serve_sync_request(struct mssg_t * mssg_ptr) { const char * fcn_name = "serve_sync_request()"; + hbool_t report_mssg = FALSE; hbool_t success = TRUE; struct mssg_t reply; @@ -1511,6 +1700,7 @@ serve_sync_request(struct mssg_t * mssg_ptr) reply.base_addr = 0; reply.len = 0; reply.ver = 0; + reply.count = 0; reply.magic = MSSG_MAGIC; } @@ -1519,11 +1709,24 @@ serve_sync_request(struct mssg_t * mssg_ptr) success = send_mssg(&reply, TRUE); } + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d sync.\n", (int)(mssg_ptr->src)); + + } else { + + HDfprintf(stdout, "%d sync FAILED.\n", (int)(mssg_ptr->src)); + + } + } + return(success); } /* serve_sync_request() */ - + /***************************************************************************** * * Function: serve_write_request() @@ -1541,19 +1744,12 @@ serve_sync_request(struct mssg_t * mssg_ptr) * * Programmer: JRM -- 12/21/05 * - * Modifications: - * - * JRM -- 5/9/06 - * Added code supporting a write ack message. This is a - * speculative fix to a bug observed on Cobalt. If it - * doesn't work, it will help narrow down the possibilities. - * *****************************************************************************/ - static hbool_t serve_write_request(struct mssg_t * mssg_ptr) { const char * fcn_name = "serve_write_request()"; + hbool_t report_mssg = FALSE; hbool_t success = TRUE; int target_index; int new_ver_num; @@ -1604,6 +1800,7 @@ serve_write_request(struct mssg_t * mssg_ptr) new_ver_num = mssg_ptr->ver; + /* this check should catch duplicate writes */ if ( new_ver_num <= data[target_index].ver ) { nerrors++; @@ -1622,6 +1819,10 @@ serve_write_request(struct mssg_t * mssg_ptr) data[target_index].ver = new_ver_num; data[target_index].valid = TRUE; + /* and update the counters */ + total_writes++; + (data[target_index].writes)++; + #if DO_WRITE_REQ_ACK /* compose the reply message */ @@ -1632,6 +1833,7 @@ serve_write_request(struct mssg_t * mssg_ptr) reply.base_addr = data[target_index].base_addr; reply.len = data[target_index].len; reply.ver = data[target_index].ver; + reply.count = 0; reply.magic = MSSG_MAGIC; /* and send it */ @@ -1641,10 +1843,469 @@ serve_write_request(struct mssg_t * mssg_ptr) } + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d write 0x%llx. len = %d. ver = %d.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (int)(data[target_index].len), + (int)(data[target_index].ver)); + + } else { + + HDfprintf(stdout, "%d write 0x%llx FAILED. len = %d. ver = %d.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (int)(data[target_index].len), + (int)(data[target_index].ver)); + + } + } + return(success); } /* serve_write_request() */ + +/***************************************************************************** + * + * Function: serve_total_writes_request() + * + * Purpose: Serve a request for the total number of writes recorded since + * the last reset. + * + * The function accepts a pointer to an instance of struct + * mssg_t as input. If all sanity checks pass, it sends + * the current value of the total_writes global variable to + * the requesting process. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/5/10 + * + *****************************************************************************/ +static hbool_t +serve_total_writes_request(struct mssg_t * mssg_ptr) +{ + const char * fcn_name = "serve_total_writes_request()"; + hbool_t report_mssg = FALSE; + hbool_t success = TRUE; + struct mssg_t reply; + + if ( ( mssg_ptr == NULL ) || + ( mssg_ptr->req != REQ_TTL_WRITES_CODE ) || + ( mssg_ptr->magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n", + world_mpi_rank, fcn_name); + } + } + + if ( success ) { + + /* compose the reply message */ + reply.req = REQ_TTL_WRITES_RPLY_CODE; + reply.src = world_mpi_rank; + reply.dest = mssg_ptr->src; + reply.mssg_num = -1; /* set by send function */ + reply.base_addr = 0; + reply.len = 0; + reply.ver = 0; + reply.count = total_writes; + reply.magic = MSSG_MAGIC; + } + + if ( success ) { + + success = send_mssg(&reply, TRUE); + } + + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d request total writes %ld.\n", + (int)(mssg_ptr->src), + total_writes); + + } else { + + HDfprintf(stdout, "%d request total writes %ld -- FAILED.\n", + (int)(mssg_ptr->src), + total_writes); + + } + } + + return(success); + +} /* serve_total_writes_request() */ + + +/***************************************************************************** + * + * Function: serve_total_reads_request() + * + * Purpose: Serve a request for the total number of reads recorded since + * the last reset. + * + * The function accepts a pointer to an instance of struct + * mssg_t as input. If all sanity checks pass, it sends + * the current value of the total_reads global variable to + * the requesting process. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/5/10 + * + *****************************************************************************/ +static hbool_t +serve_total_reads_request(struct mssg_t * mssg_ptr) +{ + const char * fcn_name = "serve_total_reads_request()"; + hbool_t report_mssg = FALSE; + hbool_t success = TRUE; + struct mssg_t reply; + + if ( ( mssg_ptr == NULL ) || + ( mssg_ptr->req != REQ_TTL_READS_CODE ) || + ( mssg_ptr->magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n", + world_mpi_rank, fcn_name); + } + } + + if ( success ) { + + /* compose the reply message */ + reply.req = REQ_TTL_READS_RPLY_CODE; + reply.src = world_mpi_rank; + reply.dest = mssg_ptr->src; + reply.mssg_num = -1; /* set by send function */ + reply.base_addr = 0; + reply.len = 0; + reply.ver = 0; + reply.count = total_reads; + reply.magic = MSSG_MAGIC; + } + + if ( success ) { + + success = send_mssg(&reply, TRUE); + } + + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d request total reads %ld.\n", + (int)(mssg_ptr->src), + total_reads); + + } else { + + HDfprintf(stdout, "%d request total reads %ld -- FAILED.\n", + (int)(mssg_ptr->src), + total_reads); + + } + } + + return(success); + +} /* serve_total_reads_request() */ + + +/***************************************************************************** + * + * Function: serve_entry_writes_request() + * + * Purpose: Serve an entry writes request. + * + * The function accepts a pointer to an instance of struct + * mssg_t as input. If all sanity checks pass, it sends + * the number of times that the indicated datum has been + * written since the last counter reset to the requesting + * process. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/5/10 + * + *****************************************************************************/ +static hbool_t +serve_entry_writes_request(struct mssg_t * mssg_ptr) +{ + const char * fcn_name = "serve_entry_writes_request()"; + hbool_t report_mssg = FALSE; + hbool_t success = TRUE; + int target_index; + haddr_t target_addr; + struct mssg_t reply; + + if ( ( mssg_ptr == NULL ) || + ( mssg_ptr->req != REQ_ENTRY_WRITES_CODE ) || + ( mssg_ptr->magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n", + world_mpi_rank, fcn_name); + } + } + + if ( success ) { + + target_addr = mssg_ptr->base_addr; + target_index = addr_to_datum_index(target_addr); + + if ( target_index < 0 ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: addr lookup failed for %a.\n", + world_mpi_rank, fcn_name, target_addr); + } + } else { + + /* compose the reply message */ + reply.req = REQ_ENTRY_WRITES_RPLY_CODE; + reply.src = world_mpi_rank; + reply.dest = mssg_ptr->src; + reply.mssg_num = -1; /* set by send function */ + reply.base_addr = target_addr; + reply.len = 0; + reply.ver = 0; + reply.count = data[target_index].writes; + reply.magic = MSSG_MAGIC; + } + } + + if ( success ) { + + success = send_mssg(&reply, TRUE); + } + + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d request entry 0x%llx writes = %ld.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (long)(data[target_index].writes)); + + } else { + + HDfprintf(stdout, "%d request entry 0x%llx writes = %ld FAILED.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (long)(data[target_index].writes)); + + } + } + + return(success); + +} /* serve_entry_writes_request() */ + + +/***************************************************************************** + * + * Function: serve_entry_reads_request() + * + * Purpose: Serve an entry reads request. + * + * The function accepts a pointer to an instance of struct + * mssg_t as input. If all sanity checks pass, it sends + * the number of times that the indicated datum has been + * read since the last counter reset to the requesting + * process. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/5/10 + * + *****************************************************************************/ +static hbool_t +serve_entry_reads_request(struct mssg_t * mssg_ptr) +{ + const char * fcn_name = "serve_entry_reads_request()"; + hbool_t report_mssg = FALSE; + hbool_t success = TRUE; + int target_index; + haddr_t target_addr; + struct mssg_t reply; + + if ( ( mssg_ptr == NULL ) || + ( mssg_ptr->req != REQ_ENTRY_READS_CODE ) || + ( mssg_ptr->magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n", + world_mpi_rank, fcn_name); + } + } + + if ( success ) { + + target_addr = mssg_ptr->base_addr; + target_index = addr_to_datum_index(target_addr); + + if ( target_index < 0 ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: addr lookup failed for %a.\n", + world_mpi_rank, fcn_name, target_addr); + } + } else { + + /* compose the reply message */ + reply.req = REQ_ENTRY_READS_RPLY_CODE; + reply.src = world_mpi_rank; + reply.dest = mssg_ptr->src; + reply.mssg_num = -1; /* set by send function */ + reply.base_addr = target_addr; + reply.len = 0; + reply.ver = 0; + reply.count = (long)(data[target_index].reads); + reply.magic = MSSG_MAGIC; + } + } + + if ( success ) { + + success = send_mssg(&reply, TRUE); + } + + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d request entry 0x%llx reads = %ld.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (long)(data[target_index].reads)); + + } else { + + HDfprintf(stdout, "%d request entry 0x%llx reads = %ld FAILED.\n", + (int)(mssg_ptr->src), + (long long)(data[target_index].base_addr), + (long)(data[target_index].reads)); + + } + } + + return(success); + +} /* serve_entry_reads_request() */ + + +/***************************************************************************** + * + * Function: serve_rw_count_reset_request() + * + * Purpose: Serve read/write count reset request. + * + * The function accepts a pointer to an instance of struct + * mssg_t as input. If all sanity checks pass, it resets the + * read/write counters, and sends a confirmation message to + * the calling process. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/5/10 + * + *****************************************************************************/ +static hbool_t +serve_rw_count_reset_request(struct mssg_t * mssg_ptr) +{ + const char * fcn_name = "serve_rw_count_reset_request()"; + hbool_t report_mssg = FALSE; + hbool_t success = TRUE; + struct mssg_t reply; + + if ( ( mssg_ptr == NULL ) || + ( mssg_ptr->req != REQ_RW_COUNT_RESET_CODE ) || + ( mssg_ptr->magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad mssg on entry.\n", + world_mpi_rank, fcn_name); + } + } + + if ( success ) { + + success = reset_server_counters(); + } + + if ( success ) { + + /* compose the reply message */ + reply.req = REQ_RW_COUNT_RESET_RPLY_CODE; + reply.src = world_mpi_rank; + reply.dest = mssg_ptr->src; + reply.mssg_num = -1; /* set by send function */ + reply.base_addr = 0; + reply.len = 0; + reply.ver = 0; + reply.count = 0; + reply.magic = MSSG_MAGIC; + } + + if ( success ) { + + success = send_mssg(&reply, TRUE); + } + + if ( report_mssg ) { + + if ( success ) { + + HDfprintf(stdout, "%d request R/W counter reset.\n", + (int)(mssg_ptr->src)); + + } else { + + HDfprintf(stdout, "%d request R/w counter reset FAILED.\n", + (int)(mssg_ptr->src)); + + } + } + + return(success); + +} /* serve_rw_count_reset_request() */ + /*****************************************************************************/ /**************************** Call back functions ****************************/ @@ -1662,21 +2323,8 @@ serve_write_request(struct mssg_t * mssg_ptr) * Programmer: John Mainzer * 12/29/05 * - * Modifications: - * - * JRM -- 7/11/06 - * Modified code to support the local_len field of datum. - * This field allow us to track the cache's value for the - * length of the entry, while retaining the original - * value for communications with the server. - * - * JRM -- 2/4/09 - * Added code to set the cleared flag when a dirty entry is - * cleared. - * *------------------------------------------------------------------------- */ - static herr_t clear_datum(H5F_t * f, void * thing, @@ -1728,6 +2376,7 @@ clear_datum(H5F_t * f, } /* clear_datum() */ + /*------------------------------------------------------------------------- * Function: destroy_datum() * @@ -1740,17 +2389,8 @@ clear_datum(H5F_t * f, * Programmer: John Mainzer * 12/29/05 * - * Modifications: - * - * JRM -- 7/11/06 - * Modified code to support the local_len field of datum. - * This field allow us to track the cache's value for the - * length of the entry, while retaining the original - * value for communications with the server. - * *------------------------------------------------------------------------- */ - static herr_t destroy_datum(H5F_t UNUSED * f, void * thing) @@ -1785,6 +2425,7 @@ destroy_datum(H5F_t UNUSED * f, } /* destroy_datum() */ + /*------------------------------------------------------------------------- * Function: flush_datum * @@ -1796,27 +2437,8 @@ destroy_datum(H5F_t UNUSED * f, * Programmer: John Mainzer * 12/29/05 * - * Modifications: - * - * JRM -- 5/9/06 - * Added code to receive the write request ack messages - * from the server. This is part of a speculative fix to - * a bug spotted on Cobalt. If it doesn't fix the problem, - * it will narrow down the possibilities. - * - * JRM -- 7/11/06 - * Modified code to support the local_len field of datum. - * This field allow us to track the cache's value for the - * length of the entry, while retaining the original - * value for communications with the server. - * - * JRM -- 2/4/09 - * Added code to set the flushed flag when a dirty entry - * is flushed. - * *------------------------------------------------------------------------- */ - static herr_t flush_datum(H5F_t *f, hid_t UNUSED dxpl_id, @@ -1825,15 +2447,31 @@ flush_datum(H5F_t *f, void *thing) { const char * fcn_name = "flush_datum()"; + hbool_t was_dirty = FALSE; herr_t ret_value = SUCCEED; int idx; struct datum * entry_ptr; struct mssg_t mssg; + H5C_t * cache_ptr; + struct H5AC_aux_t * aux_ptr; HDassert( thing ); entry_ptr = (struct datum *)thing; + HDassert( f ); + HDassert( f->shared ); + HDassert( f->shared->cache ); + + cache_ptr = f->shared->cache; + + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( cache_ptr->aux_ptr ); + + aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr); + + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + idx = addr_to_datum_index(entry_ptr->base_addr); HDassert( idx >= 0 ); @@ -1847,7 +2485,10 @@ flush_datum(H5F_t *f, HDassert( entry_ptr->header.is_dirty == entry_ptr->dirty ); - if ( ( file_mpi_rank != 0 ) && ( entry_ptr->dirty ) ) { + if ( ( file_mpi_rank != 0 ) && + ( entry_ptr->dirty ) && + ( aux_ptr->metadata_write_strategy == + H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY ) ) { ret_value = FAIL; HDfprintf(stdout, @@ -1859,6 +2500,8 @@ flush_datum(H5F_t *f, if ( entry_ptr->header.is_dirty ) { + was_dirty = TRUE; /* so we will receive the ack if requested */ + /* compose the message */ mssg.req = WRITE_REQ_CODE; mssg.src = world_mpi_rank; @@ -1867,6 +2510,7 @@ flush_datum(H5F_t *f, mssg.base_addr = entry_ptr->base_addr; mssg.len = entry_ptr->len; mssg.ver = entry_ptr->ver; + mssg.count = 0; mssg.magic = MSSG_MAGIC; if ( ! send_mssg(&mssg, FALSE) ) { @@ -1889,7 +2533,7 @@ flush_datum(H5F_t *f, #if DO_WRITE_REQ_ACK - if ( ( ret_value == SUCCEED ) && ( entry_ptr->header.is_dirty ) ) { + if ( ( ret_value == SUCCEED ) && ( was_dirty ) ) { if ( ! recv_mssg(&mssg, WRITE_REQ_ACK_CODE) ) { @@ -1986,6 +2630,7 @@ load_datum(H5F_t UNUSED *f, mssg.base_addr = entry_ptr->base_addr; mssg.len = entry_ptr->len; mssg.ver = 0; /* bogus -- should be corrected by server */ + mssg.count = 0; /* not used */ mssg.magic = MSSG_MAGIC; if ( ! send_mssg(&mssg, FALSE) ) { @@ -2420,7 +3065,7 @@ local_pin_and_unpin_random_entries(H5F_t * file_ptr, } /* local_pin_and_unpin_random_entries() */ - + /***************************************************************************** * Function: local_pin_random_entry() * @@ -2436,10 +3081,7 @@ local_pin_and_unpin_random_entries(H5F_t * file_ptr, * Programmer: John Mainzer * 4/12/06 * - * Modifications: - * *****************************************************************************/ - void local_pin_random_entry(H5F_t * file_ptr, int min_idx, @@ -2940,7 +3582,7 @@ pin_protected_entry(int32_t idx, } /* pin_protected_entry() */ - + /***************************************************************************** * Function: move_entry() * @@ -2956,13 +3598,7 @@ pin_protected_entry(int32_t idx, * Programmer: John Mainzer * 1/10/06 * - * Modifications: - * - * 7/11/06 -- JRM - * Added support for the phony_len field in datum. - * *****************************************************************************/ - void move_entry(H5F_t * file_ptr, int32_t old_idx, @@ -2996,7 +3632,35 @@ move_entry(H5F_t * file_ptr, old_addr = old_entry_ptr->base_addr; new_addr = new_entry_ptr->base_addr; - result = H5AC_move_entry(file_ptr, &(types[0]), old_addr, new_addr); + /* Moving will mark the entry dirty if it is not already */ + old_entry_ptr->dirty = TRUE; + + /* touch up versions, base_addrs, and data_index. Do this + * now as it is possible that the rename will trigger a + * sync point. + */ + if(old_entry_ptr->ver < new_entry_ptr->ver) + old_entry_ptr->ver = new_entry_ptr->ver; + else + (old_entry_ptr->ver)++; + + old_entry_ptr->base_addr = new_addr; + new_entry_ptr->base_addr = old_addr; + + data_index[old_entry_ptr->index] = new_idx; + data_index[new_entry_ptr->index] = old_idx; + + tmp = old_entry_ptr->index; + old_entry_ptr->index = new_entry_ptr->index; + new_entry_ptr->index = tmp; + + if(old_entry_ptr->local_len != new_entry_ptr->local_len) { + tmp_len = old_entry_ptr->local_len; + old_entry_ptr->local_len = new_entry_ptr->local_len; + new_entry_ptr->local_len = tmp_len; + } /* end if */ + + result = H5AC_move_entry(file_ptr, &(types[0]), old_addr, new_addr); if ( ( result < 0 ) || ( old_entry_ptr->header.addr != new_addr ) ) { @@ -3009,43 +3673,118 @@ move_entry(H5F_t * file_ptr, } else { HDassert( ((old_entry_ptr->header).type)->id == DATUM_ENTRY_TYPE ); - HDassert( old_entry_ptr->header.is_dirty ); - old_entry_ptr->dirty = TRUE; - /* touch up versions, base_addrs, and data_index */ + if ( ! (old_entry_ptr->header.is_dirty) ) { - if ( old_entry_ptr->ver < new_entry_ptr->ver ) { + /* it is possible that we just exceeded the dirty bytes + * threshold, triggering a write of the newly inserted + * entry. Test for this, and only flag an error if this + * is not the case. + */ - old_entry_ptr->ver = new_entry_ptr->ver; + struct H5AC_aux_t * aux_ptr; - } else { + aux_ptr = ((H5AC_aux_t *)(file_ptr->shared->cache->aux_ptr)); - (old_entry_ptr->ver)++; + if ( ! ( ( aux_ptr != NULL ) && + ( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ) && + ( aux_ptr->dirty_bytes == 0 ) ) ) { + nerrors++; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: data[%d].header.is_dirty = %d.\n", + world_mpi_rank, fcn_name, new_idx, + (int)(data[new_idx].header.is_dirty)); + } + } + } else { + + HDassert( old_entry_ptr->header.is_dirty ); } + } + } - old_entry_ptr->base_addr = new_addr; - new_entry_ptr->base_addr = old_addr; +} /* move_entry() */ - data_index[old_entry_ptr->index] = new_idx; - data_index[new_entry_ptr->index] = old_idx; + +/***************************************************************************** + * + * Function: reset_server_counts() + * + * Purpose: Send a message to the server process requesting it to reset + * its counters. Await confirmation message. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/6/10 + * + *****************************************************************************/ +static hbool_t +reset_server_counts(void) +{ + const char * fcn_name = "reset_server_counts()"; + hbool_t success = TRUE; /* will set to FALSE if appropriate. */ + struct mssg_t mssg; + + if ( success ) { - tmp = old_entry_ptr->index; - old_entry_ptr->index = new_entry_ptr->index; - new_entry_ptr->index = tmp; + /* compose the message */ + mssg.req = REQ_RW_COUNT_RESET_CODE; + mssg.src = world_mpi_rank; + mssg.dest = world_server_mpi_rank; + mssg.mssg_num = -1; /* set by send function */ + mssg.base_addr = 0; + mssg.len = 0; + mssg.ver = 0; + mssg.count = 0; + mssg.magic = MSSG_MAGIC; - if ( old_entry_ptr->local_len != new_entry_ptr->local_len ) { + if ( ! send_mssg(&mssg, FALSE) ) { - tmp_len = old_entry_ptr->local_len; - old_entry_ptr->local_len = new_entry_ptr->local_len; - new_entry_ptr->local_len = tmp_len; - } + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: send_mssg() failed.\n", + world_mpi_rank, fcn_name); + } } } - return; + if ( success ) { -} /* move_entry() */ + if ( ! recv_mssg(&mssg, REQ_RW_COUNT_RESET_RPLY_CODE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } else if ( ( mssg.req != REQ_RW_COUNT_RESET_RPLY_CODE ) || + ( mssg.src != world_server_mpi_rank ) || + ( mssg.dest != world_mpi_rank ) || + ( mssg.base_addr != 0 ) || + ( mssg.len != 0 ) || + ( mssg.ver != 0 ) || + ( mssg.count != 0 ) || + ( mssg.magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: Bad data in req r/w counter reset reply.\n", + world_mpi_rank, fcn_name); + } + } + } + + return(success); + +} /* reset_server_counts() */ /***************************************************************************** @@ -3121,7 +3860,7 @@ resize_entry(int32_t idx, } /* resize_entry() */ - + /***************************************************************************** * * Function: setup_cache_for_test() @@ -3140,22 +3879,19 @@ resize_entry(int32_t idx, * * Programmer: JRM -- 1/4/06 * - * Modifications: - * - * None. - * *****************************************************************************/ - hbool_t setup_cache_for_test(hid_t * fid_ptr, H5F_t ** file_ptr_ptr, - H5C_t ** cache_ptr_ptr) + H5C_t ** cache_ptr_ptr, + int metadata_write_strategy) { const char * fcn_name = "setup_cache_for_test()"; hbool_t success = FALSE; /* will set to TRUE if appropriate. */ hbool_t enable_rpt_fcn = FALSE; hid_t fid = -1; H5AC_cache_config_t config; + H5AC_cache_config_t test_config; H5F_t * file_ptr = NULL; H5C_t * cache_ptr = NULL; @@ -3213,7 +3949,7 @@ setup_cache_for_test(hid_t * fid_ptr, success = TRUE; } - if ( ( success ) && ( enable_rpt_fcn ) ) { + if ( success ) { config.version = H5AC__CURR_CACHE_CONFIG_VERSION; @@ -3221,12 +3957,13 @@ setup_cache_for_test(hid_t * fid_ptr, != SUCCEED ) { HDfprintf(stdout, - "%d:%s: H5AC_get_cache_auto_resize_config() failed.\n", + "%d:%s: H5AC_get_cache_auto_resize_config(1) failed.\n", world_mpi_rank, fcn_name); } else { - config.rpt_fcn_enabled = TRUE; + config.rpt_fcn_enabled = enable_rpt_fcn; + config.metadata_write_strategy = metadata_write_strategy; if ( H5AC_set_cache_auto_resize_config(cache_ptr, &config) != SUCCEED ) { @@ -3234,7 +3971,8 @@ setup_cache_for_test(hid_t * fid_ptr, HDfprintf(stdout, "%d:%s: H5AC_set_cache_auto_resize_config() failed.\n", world_mpi_rank, fcn_name); - } else { + + } else if ( enable_rpt_fcn ) { HDfprintf(stdout, "%d:%s: rpt_fcn enabled.\n", world_mpi_rank, fcn_name); @@ -3242,6 +3980,71 @@ setup_cache_for_test(hid_t * fid_ptr, } } + /* verify that the metadata write strategy is set as expected. Must + * do this here, as this field is only set in the parallel case. Hence + * we can't do our usual checks in the serial case. + */ + + if ( success ) /* verify that the metadata write strategy is as expected */ + { + if ( cache_ptr->aux_ptr == NULL ) { + + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: cache_ptr->aux_ptr == NULL.\n", + world_mpi_rank, fcn_name); + } + } else if ( ((H5AC_aux_t *)(cache_ptr->aux_ptr))->magic != + H5AC__H5AC_AUX_T_MAGIC ) { + + nerrors++; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: cache_ptr->aux_ptr->magic != H5AC__H5AC_AUX_T_MAGIC.\n", + world_mpi_rank, fcn_name); + } + } else if( ((H5AC_aux_t *)(cache_ptr->aux_ptr))->metadata_write_strategy + != metadata_write_strategy ) { + + nerrors++; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: bad cache_ptr->aux_ptr->metadata_write_strategy\n", + world_mpi_rank, fcn_name); + } + } + } + + /* also verify that the expected metadata write strategy is reported + * when we get the current configuration. + */ + + if ( success ) { + + test_config.version = H5AC__CURR_CACHE_CONFIG_VERSION; + + if ( H5AC_get_cache_auto_resize_config(cache_ptr, &test_config) + != SUCCEED ) { + + HDfprintf(stdout, + "%d:%s: H5AC_get_cache_auto_resize_config(2) failed.\n", + world_mpi_rank, fcn_name); + + } else if ( test_config.metadata_write_strategy != + metadata_write_strategy ) { + + nerrors++; + + if ( verbose ) { + + HDfprintf(stdout, + "%d:%s: unexpected metadata_write_strategy.\n", + world_mpi_rank, fcn_name); + } + } + } + + #if DO_SYNC_AFTER_WRITE if ( success ) { @@ -3259,10 +4062,153 @@ setup_cache_for_test(hid_t * fid_ptr, #endif /* DO_SYNC_AFTER_WRITE */ + if ( success ) { + + if ( H5AC_set_sync_point_done_callback(cache_ptr, verify_writes) != + SUCCEED ) { + + nerrors++; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: H5AC_set_sync_point_done_callback failed.\n", + world_mpi_rank, fcn_name); + } + } + } + return(success); } /* setup_cache_for_test() */ + +/***************************************************************************** + * + * Function: verify_writes() + * + * Purpose: Verify that the indicated entries have been written exactly + * once each, and that the indicated total number of writes + * has been processed by the server process. Flag an error if + * discrepency is noted. Finally reset the counters maintained + * by the server process. + * + * This function should only be called by the metadata cache + * as the "sync point done" function, as it must do some + * synchronization to avoid false positives. + * + * Note that at present, this function does not allow for the + * case in which one or more of the indicated entries should + * have been written more than once since the last time the + * server process's counters were reset. That is fine for now, + * as with the current metadata write strategies, no entry + * should be written more than once per sync point. If this + * changes this limitation will have to be revisited. + * + * Return: void. + * + * Programmer: JRM -- 5/9/10 + * + *****************************************************************************/ +void +verify_writes(int num_writes, + haddr_t * written_entries_tbl) +{ + const char * fcn_name = "verify_writes()"; + const hbool_t report = FALSE; + hbool_t proceed = TRUE; + int i; + + HDassert( world_mpi_rank != world_server_mpi_rank ); + HDassert( num_writes >= 0 ); + HDassert( ( num_writes == 0 ) || + ( written_entries_tbl != NULL ) ); + + /* barrier to ensure that all other processes are ready to leave + * the sync point as well. + */ + if ( proceed ) { + + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + proceed = FALSE; + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: barrier 1 failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( proceed ) { + + proceed = verify_total_writes(num_writes); + } + + while ( ( proceed ) && ( i < num_writes ) ) + { + proceed = verify_entry_writes(written_entries_tbl[i], 1); + i++; + } + + /* barrier to ensure that all other processes have finished verifying + * the number of writes before we reset the counters. + */ + if ( proceed ) { + + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + proceed = FALSE; + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: barrier 2 failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( proceed ) { + + proceed = reset_server_counts(); + } + + /* if requested, display status of check to stdout */ + if ( ( report ) && ( file_mpi_rank == 0 ) ) { + + if ( proceed ) { + + HDfprintf(stdout, "%d:%s: verified %d writes.\n", + world_mpi_rank, fcn_name, num_writes); + + } else { + + HDfprintf(stdout, "%d:%s: FAILED to verify %d writes.\n", + world_mpi_rank, fcn_name, num_writes); + + } + } + + /* final barrier to ensure that all processes think that the server + * counters have been reset before we leave the sync point. This + * barrier is probaby not necessary at this point in time (5/9/10), + * but I can think of at least one likely change to the metadata write + * strategies that will require it -- hence its insertion now. + */ + if ( proceed ) { + + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + proceed = FALSE; + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: barrier 3 failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + return; + +} /* verify_writes() */ + /***************************************************************************** * @@ -3509,6 +4455,425 @@ take_down_cache(hid_t fid) } /* take_down_cache() */ + +/***************************************************************************** + * Function: verify_entry_reads + * + * Purpose: Query the server to determine the number of times the + * indicated entry has been read since the last time the + * server counters were reset. + * + * Return TRUE if successful, and if the supplied expected + * number of reads matches the number of reads reported by + * the server process. + * + * Return FALSE and flag an error otherwise. + * + * Return: TRUE if successful, FALSE otherwise. + * + * Programmer: John Mainzer + * 5/6/10 + * + *------------------------------------------------------------------------- + */ +static hbool_t +verify_entry_reads(haddr_t addr, + int expected_entry_reads) +{ + const char * fcn_name = "verify_entry_reads()"; + hbool_t success = TRUE; + int reported_entry_reads; + struct mssg_t mssg; + + if ( success ) { + + /* compose the message */ + mssg.req = REQ_ENTRY_READS_CODE; + mssg.src = world_mpi_rank; + mssg.dest = world_server_mpi_rank; + mssg.mssg_num = -1; /* set by send function */ + mssg.base_addr = addr; + mssg.len = 0; /* not used */ + mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ + mssg.magic = MSSG_MAGIC; + + if ( ! send_mssg(&mssg, FALSE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: send_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( success ) { + + if ( ! recv_mssg(&mssg, REQ_ENTRY_READS_RPLY_CODE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( success ) { + + if ( ( mssg.req != REQ_ENTRY_READS_RPLY_CODE ) || + ( mssg.src != world_server_mpi_rank ) || + ( mssg.dest != world_mpi_rank ) || + ( mssg.base_addr != addr ) || + ( mssg.len != 0 ) || + ( mssg.ver != 0 ) || + ( mssg.magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad data in req entry reads reply.\n", + world_mpi_rank, fcn_name); + } + } else { + + reported_entry_reads = mssg.count; + } + } + + if ( ! success ) { + + if ( reported_entry_reads != expected_entry_reads ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: rep/exp entry 0x%llx reads mismatch (%ld/%ld).\n", + world_mpi_rank, fcn_name, (long long)addr, + reported_entry_reads, expected_entry_reads); + } + } + } + + return(success); + +} /* verify_entry_reads() */ + + +/***************************************************************************** + * Function: verify_entry_writes + * + * Purpose: Query the server to determine the number of times the + * indicated entry has been written since the last time the + * server counters were reset. + * + * Return TRUE if successful, and if the supplied expected + * number of reads matches the number of reads reported by + * the server process. + * + * Return FALSE and flag an error otherwise. + * + * Return: TRUE if successful, FALSE otherwise. + * + * Programmer: John Mainzer + * 5/6/10 + * + *------------------------------------------------------------------------- + */ +static hbool_t +verify_entry_writes(haddr_t addr, + int expected_entry_writes) +{ + const char * fcn_name = "verify_entry_writes()"; + hbool_t success = TRUE; + int reported_entry_writes; + struct mssg_t mssg; + + if ( success ) { + + /* compose the message */ + mssg.req = REQ_ENTRY_WRITES_CODE; + mssg.src = world_mpi_rank; + mssg.dest = world_server_mpi_rank; + mssg.mssg_num = -1; /* set by send function */ + mssg.base_addr = addr; + mssg.len = 0; /* not used */ + mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ + mssg.magic = MSSG_MAGIC; + + if ( ! send_mssg(&mssg, FALSE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: send_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( success ) { + + if ( ! recv_mssg(&mssg, REQ_ENTRY_WRITES_RPLY_CODE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( success ) { + + if ( ( mssg.req != REQ_ENTRY_WRITES_RPLY_CODE ) || + ( mssg.src != world_server_mpi_rank ) || + ( mssg.dest != world_mpi_rank ) || + ( mssg.base_addr != addr ) || + ( mssg.len != 0 ) || + ( mssg.ver != 0 ) || + ( mssg.magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad data in req entry writes reply.\n", + world_mpi_rank, fcn_name); + } + } else { + + reported_entry_writes = mssg.count; + } + } + + if ( ! success ) { + + if ( reported_entry_writes != expected_entry_writes ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: rep/exp entry 0x%llx writes mismatch (%ld/%ld).\n", + world_mpi_rank, fcn_name, (long long)addr, + reported_entry_writes, expected_entry_writes); + } + } + } + + return(success); + +} /* verify_entry_writes() */ + + +/***************************************************************************** + * + * Function: verify_total_reads() + * + * Purpose: Query the server to obtain the total reads since the last + * server counter reset, and compare this value with the supplied + * expected value. + * + * If the values match, return TRUE. + * + * If the values don't match, flag an error and return FALSE. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/6/10 + * + *****************************************************************************/ +static hbool_t +verify_total_reads(int expected_total_reads) +{ + const char * fcn_name = "verify_total_reads()"; + hbool_t success = TRUE; /* will set to FALSE if appropriate. */ + long reported_total_reads; + struct mssg_t mssg; + + if ( success ) { + + /* compose the message */ + mssg.req = REQ_TTL_READS_CODE; + mssg.src = world_mpi_rank; + mssg.dest = world_server_mpi_rank; + mssg.mssg_num = -1; /* set by send function */ + mssg.base_addr = 0; + mssg.len = 0; + mssg.ver = 0; + mssg.count = 0; + mssg.magic = MSSG_MAGIC; + + if ( ! send_mssg(&mssg, FALSE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: send_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( success ) { + + if ( ! recv_mssg(&mssg, REQ_TTL_READS_RPLY_CODE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } else if ( ( mssg.req != REQ_TTL_READS_RPLY_CODE ) || + ( mssg.src != world_server_mpi_rank ) || + ( mssg.dest != world_mpi_rank ) || + ( mssg.base_addr != 0 ) || + ( mssg.len != 0 ) || + ( mssg.ver != 0 ) || + ( mssg.magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad data in req total reads reply.\n", + world_mpi_rank, fcn_name); + } + } else { + + reported_total_reads = mssg.count; + } + } + + if ( success ) { + + if ( reported_total_reads != expected_total_reads ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: reported/expected total reads mismatch (%ld/%ld).\n", + world_mpi_rank, fcn_name, + reported_total_reads, expected_total_reads); + + } + } + } + + return(success); + +} /* verify_total_reads() */ + + +/***************************************************************************** + * + * Function: verify_total_writes() + * + * Purpose: Query the server to obtain the total writes since the last + * server counter reset, and compare this value with the supplied + * expected value. + * + * If the values match, return TRUE. + * + * If the values don't match, flag an error and return FALSE. + * + * Return: Success: TRUE + * + * Failure: FALSE + * + * Programmer: JRM -- 5/6/10 + * + *****************************************************************************/ +static hbool_t +verify_total_writes(int expected_total_writes) +{ + const char * fcn_name = "verify_total_writes()"; + hbool_t success = TRUE; /* will set to FALSE if appropriate. */ + long reported_total_writes; + struct mssg_t mssg; + + if ( success ) { + + /* compose the message */ + mssg.req = REQ_TTL_WRITES_CODE; + mssg.src = world_mpi_rank; + mssg.dest = world_server_mpi_rank; + mssg.mssg_num = -1; /* set by send function */ + mssg.base_addr = 0; + mssg.len = 0; + mssg.ver = 0; + mssg.count = 0; + mssg.magic = MSSG_MAGIC; + + if ( ! send_mssg(&mssg, FALSE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: send_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } + } + + if ( success ) { + + if ( ! recv_mssg(&mssg, REQ_TTL_WRITES_RPLY_CODE) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: recv_mssg() failed.\n", + world_mpi_rank, fcn_name); + } + } else if ( ( mssg.req != REQ_TTL_WRITES_RPLY_CODE ) || + ( mssg.src != world_server_mpi_rank ) || + ( mssg.dest != world_mpi_rank ) || + ( mssg.base_addr != 0 ) || + ( mssg.len != 0 ) || + ( mssg.ver != 0 ) || + ( mssg.magic != MSSG_MAGIC ) ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: Bad data in req total reads reply.\n", + world_mpi_rank, fcn_name); + } + } else { + + reported_total_writes = mssg.count; + } + } + + if ( success ) { + + if ( reported_total_writes != expected_total_writes ) { + + nerrors++; + success = FALSE; + if ( verbose ) { + HDfprintf(stdout, + "%d:%s: reported/expected total writes mismatch (%ld/%ld).\n", + world_mpi_rank, fcn_name, + reported_total_writes, expected_total_writes); + } + } + } + + return(success); + +} /* verify_total_writes() */ + /***************************************************************************** * Function: unlock_entry() @@ -3695,6 +5060,7 @@ unpin_entry(H5F_t * file_ptr, /****************************** test functions *******************************/ /*****************************************************************************/ + /***************************************************************************** * * Function: server_smoke_check() @@ -3707,21 +5073,7 @@ unpin_entry(H5F_t * file_ptr, * * Programmer: JRM -- 12/21/05 * - * Modifications: - * - * JRM -- 5/9/06 - * Added code supporting the write request ack message. This - * message was added to eliminate one possible cause of a - * bug spotted on cobalt. If this doesn't fix the problem, - * it will narrow things down a bit. - * - * JRM -- 5/10/06 - * Added call to do_sync(). This is part of an attempt to - * optimize out the slowdown caused by the addition of the - * write request ack message. - * *****************************************************************************/ - hbool_t server_smoke_check(void) { @@ -3761,6 +5113,7 @@ server_smoke_check(void) mssg.base_addr = data[world_mpi_rank].base_addr; mssg.len = data[world_mpi_rank].len; mssg.ver = ++(data[world_mpi_rank].ver); + mssg.count = 0; mssg.magic = MSSG_MAGIC; if ( ! ( success = send_mssg(&mssg, FALSE) ) ) { @@ -3813,6 +5166,50 @@ server_smoke_check(void) do_sync(); + /* barrier to allow all writes to complete */ + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: barrier 1 failed.\n", + world_mpi_rank, fcn_name); + } + } + + /* verify that the expected entries have been written, the total */ + if ( success ) { + + success = verify_entry_writes(data[world_mpi_rank].base_addr, 1); + } + + if ( success ) { + + success = verify_entry_reads(data[world_mpi_rank].base_addr, 0); + } + + if ( success ) { + + success = verify_total_writes(world_mpi_size - 1); + } + + if ( success ) { + + success = verify_total_reads(0); + } + + /* barrier to allow all writes to complete */ + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + + HDfprintf(stdout, "%d:%s: barrier 2 failed.\n", + world_mpi_rank, fcn_name); + } + } + /* compose the read message */ mssg.req = READ_REQ_CODE; mssg.src = world_mpi_rank; @@ -3821,6 +5218,7 @@ server_smoke_check(void) mssg.base_addr = data[world_mpi_rank].base_addr; mssg.len = data[world_mpi_rank].len; mssg.ver = 0; /* bogus -- should be corrected by server */ + mssg.count = 0; mssg.magic = MSSG_MAGIC; if ( success ) { @@ -3872,6 +5270,98 @@ server_smoke_check(void) } } + /* barrier to allow all writes to complete */ + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + HDfprintf(stdout, "%d:%s: barrier 3 failed.\n", + world_mpi_rank, fcn_name); + } + } + + /* verify that the expected entries have been read, and the total */ + if ( success ) { + + success = verify_entry_writes(data[world_mpi_rank].base_addr, 1); + } + + if ( success ) { + + success = verify_entry_reads(data[world_mpi_rank].base_addr, 1); + } + + if ( success ) { + + success = verify_total_writes(world_mpi_size - 1); + } + + if ( success ) { + + success = verify_total_reads(world_mpi_size - 1); + } + + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + + HDfprintf(stdout, "%d:%s: barrier 4 failed.\n", + world_mpi_rank, fcn_name); + } + } + + /* reset the counters */ + if ( success ) { + + success = reset_server_counts(); + } + + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + + HDfprintf(stdout, "%d:%s: barrier 5 failed.\n", + world_mpi_rank, fcn_name); + } + } + + /* verify that the counters have been reset */ + if ( success ) { + + success = verify_entry_writes(data[world_mpi_rank].base_addr, 0); + } + + if ( success ) { + + success = verify_entry_reads(data[world_mpi_rank].base_addr, 0); + } + + if ( success ) { + + success = verify_total_writes(0); + } + + if ( success ) { + + success = verify_total_reads(0); + } + + if ( MPI_SUCCESS != MPI_Barrier(file_mpi_comm) ) { + + success = FALSE; + nerrors++; + if ( verbose ) { + + HDfprintf(stdout, "%d:%s: barrier 6 failed.\n", + world_mpi_rank, fcn_name); + } + } + /* compose the done message */ mssg.req = DONE_REQ_CODE; mssg.src = world_mpi_rank; @@ -3880,6 +5370,7 @@ server_smoke_check(void) mssg.base_addr = 0; /* not used */ mssg.len = 0; /* not used */ mssg.ver = 0; /* not used */ + mssg.count = 0; mssg.magic = MSSG_MAGIC; if ( success ) { @@ -3918,6 +5409,7 @@ server_smoke_check(void) } /* server_smoke_check() */ + /***************************************************************************** * * Function: smoke_check_1() @@ -3930,14 +5422,9 @@ server_smoke_check(void) * * Programmer: JRM -- 1/4/06 * - * Modifications: - * - * None. - * *****************************************************************************/ - hbool_t -smoke_check_1(void) +smoke_check_1(int metadata_write_strategy) { const char * fcn_name = "smoke_check_1()"; hbool_t success = TRUE; @@ -3948,9 +5435,25 @@ smoke_check_1(void) H5C_t * cache_ptr = NULL; struct mssg_t mssg; - if ( world_mpi_rank == 0 ) { + switch ( metadata_write_strategy ) { + + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #1 -- process 0 only md write strategy"); + } + break; - TESTING("smoke check #1"); + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #1 -- distributed md write strategy"); + } + break; + + default: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #1 -- unknown md write strategy"); + } + break; } nerrors = 0; @@ -3971,7 +5474,8 @@ smoke_check_1(void) } else /* run the clients */ { - if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) { + if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr, + metadata_write_strategy) ) { nerrors++; fid = -1; @@ -4039,6 +5543,7 @@ smoke_check_1(void) mssg.base_addr = 0; /* not used */ mssg.len = 0; /* not used */ mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ mssg.magic = MSSG_MAGIC; if ( success ) { @@ -4077,7 +5582,7 @@ smoke_check_1(void) } /* smoke_check_1() */ - + /***************************************************************************** * * Function: smoke_check_2() @@ -4093,18 +5598,9 @@ smoke_check_1(void) * * Programmer: JRM -- 1/12/06 * - * Modifications: - * - * JRM -- 4/13/06 - * Added pinned entry tests. - * - * JRM -- 4/28/06 - * Modified test to move pinned entries. - * *****************************************************************************/ - hbool_t -smoke_check_2(void) +smoke_check_2(int metadata_write_strategy) { const char * fcn_name = "smoke_check_2()"; hbool_t success = TRUE; @@ -4115,9 +5611,25 @@ smoke_check_2(void) H5C_t * cache_ptr = NULL; struct mssg_t mssg; - if ( world_mpi_rank == 0 ) { + switch ( metadata_write_strategy ) { - TESTING("smoke check #2"); + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #2 -- process 0 only md write strategy"); + } + break; + + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #2 -- distributed md write strategy"); + } + break; + + default: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #2 -- unknown md write strategy"); + } + break; } nerrors = 0; @@ -4138,7 +5650,8 @@ smoke_check_2(void) } else /* run the clients */ { - if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) { + if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr, + metadata_write_strategy) ) { nerrors++; fid = -1; @@ -4253,6 +5766,7 @@ smoke_check_2(void) mssg.base_addr = 0; /* not used */ mssg.len = 0; /* not used */ mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ mssg.magic = MSSG_MAGIC; if ( success ) { @@ -4291,7 +5805,7 @@ smoke_check_2(void) } /* smoke_check_2() */ - + /***************************************************************************** * * Function: smoke_check_3() @@ -4310,18 +5824,9 @@ smoke_check_2(void) * * Programmer: JRM -- 1/13/06 * - * Modifications: - * - * Added code intended to ensure correct operation with large - * numbers of processors. - * JRM - 1/31/06 - * - * Added pinned entry tests. JRM - 4/14/06 - * *****************************************************************************/ - hbool_t -smoke_check_3(void) +smoke_check_3(int metadata_write_strategy) { const char * fcn_name = "smoke_check_3()"; hbool_t success = TRUE; @@ -4338,9 +5843,25 @@ smoke_check_3(void) H5C_t * cache_ptr = NULL; struct mssg_t mssg; - if ( world_mpi_rank == 0 ) { + switch ( metadata_write_strategy ) { + + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #3 -- process 0 only md write strategy"); + } + break; + + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #3 -- distributed md write strategy"); + } + break; - TESTING("smoke check #3"); + default: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #3 -- unknown md write strategy"); + } + break; } /* 0 */ @@ -4373,7 +5894,8 @@ smoke_check_3(void) /* 1 */ if ( verbose ) {HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++);} - if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) { + if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr, + metadata_write_strategy) ) { nerrors++; fid = -1; @@ -4612,6 +6134,7 @@ smoke_check_3(void) mssg.base_addr = 0; /* not used */ mssg.len = 0; /* not used */ mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ mssg.magic = MSSG_MAGIC; if ( success ) { @@ -4654,7 +6177,7 @@ smoke_check_3(void) } /* smoke_check_3() */ - + /***************************************************************************** * * Function: smoke_check_4() @@ -4673,20 +6196,9 @@ smoke_check_3(void) * * Programmer: JRM -- 1/13/06 * - * Modifications: - * - * Added code intended to insure correct operation with large - * numbers of processors. - * JRM - 1/31/06 - * - * Added code testing pinned insertion of entries. - * - * JRM - 8/15/06 - * *****************************************************************************/ - hbool_t -smoke_check_4(void) +smoke_check_4(int metadata_write_strategy) { const char * fcn_name = "smoke_check_4()"; hbool_t success = TRUE; @@ -4701,9 +6213,25 @@ smoke_check_4(void) H5C_t * cache_ptr = NULL; struct mssg_t mssg; - if ( world_mpi_rank == 0 ) { + switch ( metadata_write_strategy ) { + + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #4 -- process 0 only md write strategy"); + } + break; + + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #4 -- distributed md write strategy"); + } + break; - TESTING("smoke check #4"); + default: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #4 -- unknown md write strategy"); + } + break; } nerrors = 0; @@ -4724,7 +6252,8 @@ smoke_check_4(void) } else /* run the clients */ { - if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) { + if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr, + metadata_write_strategy) ) { nerrors++; fid = -1; @@ -4926,6 +6455,7 @@ smoke_check_4(void) mssg.base_addr = 0; /* not used */ mssg.len = 0; /* not used */ mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ mssg.magic = MSSG_MAGIC; if ( success ) { @@ -4965,7 +6495,7 @@ smoke_check_4(void) } /* smoke_check_4() */ - + /***************************************************************************** * * Function: smoke_check_5() @@ -4979,16 +6509,9 @@ smoke_check_4(void) * * Programmer: JRM -- 5/18/06 * - * Modifications: - * - * JRM -- 7/12/06 - * Added test code for H5AC_expunge_entry() and - * H5AC_resize_entry(). - * *****************************************************************************/ - hbool_t -smoke_check_5(void) +smoke_check_5(int metadata_write_strategy) { const char * fcn_name = "smoke_check_5()"; hbool_t success = TRUE; @@ -5001,11 +6524,28 @@ smoke_check_5(void) H5C_t * cache_ptr = NULL; struct mssg_t mssg; - if ( world_mpi_rank == 0 ) { + switch ( metadata_write_strategy ) { + + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #5 -- process 0 only md write strategy"); + } + break; - TESTING("smoke check #5"); + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #5 -- distributed md write strategy"); + } + break; + + default: + if ( world_mpi_rank == 0 ) { + TESTING("smoke check #5 -- unknown md write strategy"); + } + break; } + /* 0 */ if ( verbose ) { HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++); } @@ -5043,7 +6583,8 @@ smoke_check_5(void) HDfprintf(stderr, "%d: cp = %d\n", world_mpi_rank, cp++); } - if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) { + if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr, + metadata_write_strategy) ) { nerrors++; fid = -1; @@ -5180,6 +6721,7 @@ smoke_check_5(void) mssg.base_addr = 0; /* not used */ mssg.len = 0; /* not used */ mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ mssg.magic = MSSG_MAGIC; if ( success ) { @@ -5223,7 +6765,7 @@ smoke_check_5(void) } /* smoke_check_5() */ - + /***************************************************************************** * * Function: trace_file_check() @@ -5245,7 +6787,7 @@ smoke_check_5(void) * - H5AC_flush() * - H5AC_set() * - H5AC_mark_entry_dirty() - * H5AC_move_entry() + * - H5AC_move_entry() * - H5AC_pin_protected_entry() * - H5AC_protect() * - H5AC_unpin_entry() @@ -5262,47 +6804,69 @@ smoke_check_5(void) * * Programmer: JRM -- 6/13/06 * - * Modifications: - * - * JRM -- 7/11/06 - * Updated for H5AC_expunge_entry() and - * H5AC_resize_entry(). - * *****************************************************************************/ - hbool_t -trace_file_check(void) +trace_file_check(int metadata_write_strategy) { hbool_t success = TRUE; #ifdef H5_METADATA_TRACE_FILE const char * fcn_name = "trace_file_check()"; - const char * expected_output[] = + const char *((* expected_output)[]) = NULL; + const char * expected_output_0[] = { "### HDF5 metadata cache trace file version 1 ###\n", - "H5AC_set_cache_auto_resize_config 1 0 1 0 \"t_cache_trace.txt\" 1 0 1048576 0.500000 16777216 1048576 50000 1 0.900000 2.000000 1 1.000000 0.250000 1 4194304 3 0.999000 0.900000 1 1048576 3 1 0.100000 262144 0\n", - "H5AC_set 0x0 15 0x0 2 0\n", - "H5AC_set 0x2 15 0x0 2 0\n", - "H5AC_set 0x4 15 0x0 4 0\n", - "H5AC_set 0x8 15 0x0 6 0\n", - "H5AC_protect 0 15 H5AC_WRITE 2 1\n", - "H5AC_mark_entry_dirty 0 0\n", - "H5AC_unprotect 0 15 0 0 0\n", - "H5AC_protect 2 15 H5AC_WRITE 2 1\n", - "H5AC_pin_protected_entry 2 0\n", - "H5AC_unprotect 2 15 0 0 0\n", - "H5AC_unpin_entry 2 0\n", - "H5AC_expunge_entry 2 15 0\n", - "H5AC_protect 4 15 H5AC_WRITE 4 1\n", - "H5AC_pin_protected_entry 4 0\n", - "H5AC_unprotect 4 15 0 0 0\n", - "H5AC_mark_entry_dirty 0x4 0 0 0\n", - "H5AC_resize_entry 0x4 2 0\n", - "H5AC_resize_entry 0x4 4 0\n", - "H5AC_unpin_entry 4 0\n", - "H5AC_move_entry 0 8a65 15 0\n", - "H5AC_move_entry 8a65 0 15 0\n", + "H5AC_set_cache_auto_resize_config 1 0 1 0 \"t_cache_trace.txt\" 1 0 2097152 0.300000 33554432 1048576 50000 1 0.900000 2.000000 1 1.000000 0.250000 1 4194304 3 0.999000 0.900000 1 1048576 3 1 0.100000 262144 0 0\n", + "H5AC_set 0x200 25 0x0 2 0\n", + "H5AC_set 0x202 25 0x0 2 0\n", + "H5AC_set 0x204 25 0x0 4 0\n", + "H5AC_set 0x208 25 0x0 6 0\n", + "H5AC_protect 0x200 25 H5AC_WRITE 2 1\n", + "H5AC_mark_entry_dirty 0x200 0\n", + "H5AC_unprotect 0x200 25 0 0 0\n", + "H5AC_protect 0x202 25 H5AC_WRITE 2 1\n", + "H5AC_pin_protected_entry 0x202 0\n", + "H5AC_unprotect 0x202 25 0 0 0\n", + "H5AC_unpin_entry 0x202 0\n", + "H5AC_expunge_entry 0x202 25 0\n", + "H5AC_protect 0x204 25 H5AC_WRITE 4 1\n", + "H5AC_pin_protected_entry 0x204 0\n", + "H5AC_unprotect 0x204 25 0 0 0\n", + "H5AC_mark_entry_dirty 0x204 0 0 0\n", + "H5AC_resize_entry 0x204 2 0\n", + "H5AC_resize_entry 0x204 4 0\n", + "H5AC_unpin_entry 0x204 0\n", + "H5AC_move_entry 0x200 0x8c65 25 0\n", + "H5AC_move_entry 0x8c65 0x200 25 0\n", + "H5AC_flush 0\n", + NULL + }; + const char * expected_output_1[] = + { + "### HDF5 metadata cache trace file version 1 ###\n", + "H5AC_set_cache_auto_resize_config 1 0 1 0 \"t_cache_trace.txt\" 1 0 2097152 0.300000 33554432 1048576 50000 1 0.900000 2.000000 1 1.000000 0.250000 1 4194304 3 0.999000 0.900000 1 1048576 3 1 0.100000 262144 1 0\n", + "H5AC_set 0x200 25 0x0 2 0\n", + "H5AC_set 0x202 25 0x0 2 0\n", + "H5AC_set 0x204 25 0x0 4 0\n", + "H5AC_set 0x208 25 0x0 6 0\n", + "H5AC_protect 0x200 25 H5AC_WRITE 2 1\n", + "H5AC_mark_entry_dirty 0x200 0\n", + "H5AC_unprotect 0x200 25 0 0 0\n", + "H5AC_protect 0x202 25 H5AC_WRITE 2 1\n", + "H5AC_pin_protected_entry 0x202 0\n", + "H5AC_unprotect 0x202 25 0 0 0\n", + "H5AC_unpin_entry 0x202 0\n", + "H5AC_expunge_entry 0x202 25 0\n", + "H5AC_protect 0x204 25 H5AC_WRITE 4 1\n", + "H5AC_pin_protected_entry 0x204 0\n", + "H5AC_unprotect 0x204 25 0 0 0\n", + "H5AC_mark_entry_dirty 0x204 0 0 0\n", + "H5AC_resize_pinned_entry 0x204 2 0\n", + "H5AC_resize_pinned_entry 0x204 4 0\n", + "H5AC_unpin_entry 0x204 0\n", + "H5AC_move_entry 0x200 0x8c65 25 0\n", + "H5AC_move_entry 0x8c65 0x200 25 0\n", "H5AC_flush 0\n", NULL }; @@ -5322,9 +6886,39 @@ trace_file_check(void) #endif /* H5_METADATA_TRACE_FILE */ - if ( world_mpi_rank == 0 ) { + switch ( metadata_write_strategy ) { + + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: +#ifdef H5_METADATA_TRACE_FILE + expected_output = &expected_output_0; +#endif /* H5_METADATA_TRACE_FILE */ + if ( world_mpi_rank == 0 ) { + TESTING( + "trace file collection -- process 0 only md write strategy"); + } + break; - TESTING("trace file collection"); + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: +#ifdef H5_METADATA_TRACE_FILE + expected_output = &expected_output_1; +#endif /* H5_METADATA_TRACE_FILE */ + if ( world_mpi_rank == 0 ) { + TESTING( + "trace file collection -- distributed md write strategy"); + } + break; + + default: +#ifdef H5_METADATA_TRACE_FILE + /* this will almost certainly cause a failure, but it keeps us + * from de-referenceing a NULL pointer. + */ + expected_output = &expected_output_0; +#endif /* H5_METADATA_TRACE_FILE */ + if ( world_mpi_rank == 0 ) { + TESTING("trace file collection -- unknown md write strategy"); + } + break; } #ifdef H5_METADATA_TRACE_FILE @@ -5348,7 +6942,8 @@ trace_file_check(void) else /* run the clients */ { - if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr) ) { + if ( ! setup_cache_for_test(&fid, &file_ptr, &cache_ptr, + metadata_write_strategy) ) { nerrors++; fid = -1; @@ -5481,6 +7076,7 @@ trace_file_check(void) mssg.base_addr = 0; /* not used */ mssg.len = 0; /* not used */ mssg.ver = 0; /* not used */ + mssg.count = 0; /* not used */ mssg.magic = MSSG_MAGIC; if ( success ) { @@ -5515,13 +7111,13 @@ trace_file_check(void) i = 0; while ( ( nerrors == 0 ) && ( ! done ) ) { - if ( expected_output[i] == NULL ) { + if ( (*expected_output)[i] == NULL ) { expected_line_len = 0; } else { - expected_line_len = HDstrlen(expected_output[i]); + expected_line_len = HDstrlen((*expected_output)[i]); } if ( HDfgets(buffer, 255, trace_file_ptr) != NULL ) { @@ -5538,7 +7134,7 @@ trace_file_check(void) done = TRUE; } else if ( ( actual_line_len != expected_line_len ) || - ( HDstrcmp(buffer, expected_output[i]) != 0 ) ) { + ( HDstrcmp(buffer, (*expected_output)[i]) != 0 ) ) { nerrors++; if ( verbose ) { @@ -5546,7 +7142,7 @@ trace_file_check(void) "%d:%s: Unexpected data in trace file line %d.\n", world_mpi_rank, fcn_name, i); HDfprintf(stdout, "%d:%s: expected = \"%s\" %d\n", - world_mpi_rank, fcn_name, expected_output[i], + world_mpi_rank, fcn_name, (*expected_output)[i], expected_line_len); HDfprintf(stdout, "%d:%s: actual = \"%s\" %d\n", world_mpi_rank, fcn_name, buffer, @@ -5770,22 +7366,28 @@ main(int argc, char **argv) server_smoke_check(); #endif #if 1 - smoke_check_1(); + smoke_check_1(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + smoke_check_1(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); #endif #if 1 - smoke_check_2(); + smoke_check_2(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + smoke_check_2(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); #endif #if 1 - smoke_check_3(); + smoke_check_3(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + smoke_check_3(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); #endif #if 1 - smoke_check_4(); + smoke_check_4(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + smoke_check_4(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); #endif #if 1 - smoke_check_5(); + smoke_check_5(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + smoke_check_5(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); #endif #if 1 - trace_file_check(); + trace_file_check(H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + trace_file_check(H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); #endif finish: |