diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2015-06-27 16:45:21 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2015-06-27 16:45:21 (GMT) |
commit | 8e94745298c2ceacf80b21f0d16e887217527916 (patch) | |
tree | ddd75b1badbe9053c60e214673feffa382f4a4f4 /src | |
parent | 58a95d0d4a593157bb7e77f71347ee50783a11a0 (diff) | |
download | hdf5-8e94745298c2ceacf80b21f0d16e887217527916.zip hdf5-8e94745298c2ceacf80b21f0d16e887217527916.tar.gz hdf5-8e94745298c2ceacf80b21f0d16e887217527916.tar.bz2 |
[svn-r27293] Description:
Split parallel metadata cache code into separate source code modules.
Tested on:
MacOSX/64 10.10.3 (amazon) w/serial & parallel
(too minor for h5committest)
Diffstat (limited to 'src')
-rw-r--r-- | src/H5AC.c | 2256 | ||||
-rw-r--r-- | src/H5ACmpio.c | 2295 | ||||
-rw-r--r-- | src/H5ACpkg.h | 25 | ||||
-rw-r--r-- | src/H5C.c | 1304 | ||||
-rw-r--r-- | src/H5Cmpio.c | 1260 | ||||
-rw-r--r-- | src/H5Cpkg.h | 4 | ||||
-rw-r--r-- | src/H5Cprivate.h | 4 | ||||
-rw-r--r-- | src/Makefile.am | 6 | ||||
-rw-r--r-- | src/Makefile.in | 10 |
9 files changed, 3658 insertions, 3506 deletions
@@ -44,13 +44,10 @@ #include "H5private.h" /* Generic Functions */ #include "H5ACpkg.h" /* Metadata cache */ #include "H5Cpkg.h" /* Cache */ -#include "H5Dprivate.h" /* Dataset functions */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ #include "H5FDprivate.h" /* File drivers */ -#include "H5FLprivate.h" /* Free Lists */ #include "H5Iprivate.h" /* IDs */ -#include "H5MMprivate.h" /* Memory management */ #include "H5Pprivate.h" /* Property lists */ @@ -63,38 +60,6 @@ /* Local Typedefs */ /******************/ -#ifdef H5_HAVE_PARALLEL -/**************************************************************************** - * - * structure H5AC_slist_entry_t - * - * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t - * and the cleaned entry list maintained via the c_slist_ptr field of - * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned - * entries. Unfortunately, the slist code makes us define a dynamically - * allocated structure to store these offsets in. This structure serves - * that purpose. Its fields are as follows: - * - * addr: file offset of a metadata entry. Entries are added to this - * list (if they aren't there already) when they are marked - * dirty in an unprotect, inserted, or moved. They are - * removed when they appear in a clean entries broadcast. - * - ****************************************************************************/ -typedef struct H5AC_slist_entry_t -{ - haddr_t addr; -} H5AC_slist_entry_t; - -/* User data for address list building callbacks */ -typedef struct H5AC_addr_list_ud_t -{ - H5AC_aux_t * aux_ptr; /* 'Auxiliary' parallel cache info */ - haddr_t * addr_buf_ptr; /* Array to store addresses */ - int i; /* Counter for position in array */ -} H5AC_addr_list_ud_t; -#endif /* H5_HAVE_PARALLEL */ - /********************/ /* Local Prototypes */ @@ -105,39 +70,6 @@ static herr_t H5AC__check_if_write_permitted(const H5F_t *f, static herr_t H5AC__ext_config_2_int_config(H5AC_cache_config_t *ext_conf_ptr, H5C_auto_size_ctl_t *int_conf_ptr); -#ifdef H5_HAVE_PARALLEL -static herr_t H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, - int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); -static herr_t H5AC__broadcast_clean_list(H5AC_t *cache_ptr); -static herr_t H5AC__construct_candidate_list(H5AC_t *cache_ptr, - H5AC_aux_t *aux_ptr, int sync_point_op); -static herr_t H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, - int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); -static herr_t H5AC__flush_entries(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr); -static herr_t H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr); -static herr_t H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr, - hbool_t was_dirty, unsigned flags); -static herr_t H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr); -static herr_t H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr, - haddr_t new_addr); -static herr_t H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, - hid_t dxpl_id); -static herr_t H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr, - haddr_t **haddr_buf_ptr_ptr); -static herr_t H5AC__receive_candidate_list(const H5AC_t *cache_ptr, - int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); -static herr_t H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates, - haddr_t *candidates_list_ptr); -static herr_t H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op); -#endif /* H5_HAVE_PARALLEL */ - /*********************/ /* Package Variables */ @@ -164,14 +96,6 @@ hid_t H5AC_ind_dxpl_id = (-1); /* Local Variables */ /*******************/ -#ifdef H5_HAVE_PARALLEL -/* Declare a free list to manage the H5AC_aux_t struct */ -H5FL_DEFINE_STATIC(H5AC_aux_t); - -/* Declare a free list to manage the H5AC_slist_entry_t struct */ -H5FL_DEFINE_STATIC(H5AC_slist_entry_t); -#endif /* H5_HAVE_PARALLEL */ - static const char *H5AC_entry_type_names[H5AC_NTYPES] = { "B-tree nodes", @@ -1490,78 +1414,6 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* H5AC_unprotect() */ - -/*------------------------------------------------------------------------- - * Function: HA5C_set_sync_point_done_callback - * - * Purpose: Set the value of the sync_point_done callback. This - * callback is used by the parallel test code to verify - * that the expected writes and only the expected writes - * take place during a sync point. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: John Mainzer - * 5/9/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5AC_set_sync_point_done_callback(H5C_t * cache_ptr, - void (* sync_point_done)(int num_writes, haddr_t * written_entries_tbl)) -{ - H5AC_aux_t * aux_ptr; - - FUNC_ENTER_NOAPI_NOINIT_NOERR - - /* Sanity checks */ - HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC)); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - - aux_ptr->sync_point_done = sync_point_done; - - FUNC_LEAVE_NOAPI(SUCCEED) -} /* H5AC_set_sync_point_done_callback() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: HA5C_set_write_done_callback - * - * Purpose: Set the value of the write_done callback. This callback - * is used to improve performance of the parallel test bed - * for the cache. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: John Mainzer - * 5/11/06 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5AC_set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void)) -{ - H5AC_aux_t * aux_ptr; - - FUNC_ENTER_NOAPI_NOINIT_NOERR - - /* Sanity checks */ - HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC)); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert( aux_ptr != NULL ); - HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); - - aux_ptr->write_done = write_done; - - FUNC_LEAVE_NOAPI(SUCCEED) -} /* H5AC_set_write_done_callback() */ -#endif /* H5_HAVE_PARALLEL */ - #ifndef NDEBUG /* debugging functions */ /*------------------------------------------------------------------------- @@ -2119,64 +1971,6 @@ done: } /* H5AC_open_trace_file() */ -/*------------------------------------------------------------------------- - * Function: H5AC_add_candidate() - * - * Purpose: Add the supplied metadata entry address to the candidate - * list. Verify that each entry added does not appear in - * the list prior to its insertion. - * - * This function is intended for used in constructing list - * of entried to be flushed during sync points. It shouldn't - * be called anywhere else. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5AC_add_candidate(H5AC_t * cache_ptr, haddr_t addr) -{ - H5AC_aux_t * aux_ptr; - H5AC_slist_entry_t * slist_entry_ptr = NULL; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(FAIL) - - /* Sanity checks */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - HDassert(aux_ptr->candidate_slist_ptr != NULL); - - /* Construct an entry for the supplied address, and insert - * it into the candidate slist. - */ - if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate candidate slist entry") - slist_entry_ptr->addr = addr; - - if(H5SL_insert(aux_ptr->candidate_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist") - -done: - /* Clean up on error */ - if(ret_value < 0) - if(slist_entry_ptr) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC_add_candidate() */ -#endif /* H5_HAVE_PARALLEL */ - - /*************************************************************************/ /*************************** Debugging Functions: ************************/ /*************************************************************************/ @@ -2288,236 +2082,6 @@ done: /**************************** Private Functions: *************************/ /*************************************************************************/ -/*------------------------------------------------------------------------- - * - * Function: H5AC__broadcast_candidate_list() - * - * Purpose: Broadcast the contents of the process 0 candidate entry - * slist. In passing, also remove all entries from said - * list. As the application of this will be handled by - * the same functions on all processes, construct and - * return a copy of the list in the same format as that - * received by the other processes. Note that if this - * copy is returned in *haddr_buf_ptr_ptr, the caller - * must free it. - * - * This function must only be called by the process with - * MPI_rank 0. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 7/1/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, int *num_entries_ptr, - haddr_t **haddr_buf_ptr_ptr) -{ - H5AC_aux_t * aux_ptr = NULL; - haddr_t * haddr_buf_ptr = NULL; - int mpi_result; - int num_entries; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->mpi_rank == 0); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - HDassert(aux_ptr->candidate_slist_ptr != NULL); - HDassert(num_entries_ptr != NULL); - HDassert(*num_entries_ptr == 0); - HDassert(haddr_buf_ptr_ptr != NULL); - HDassert(*haddr_buf_ptr_ptr == NULL); - - /* First broadcast the number of entries in the list so that the - * receivers can set up buffers to receive them. If there aren't - * any, we are done. - */ - num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr); - if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) - - if(num_entries > 0) { - size_t buf_size = 0; - int chk_num_entries = 0; - - /* convert the candidate list into the format we - * are used to receiving from process 0, and also load it - * into a buffer for transmission. - */ - if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") - HDassert(chk_num_entries == num_entries); - HDassert(haddr_buf_ptr != NULL); - - /* Now broadcast the list of candidate entries */ - buf_size = sizeof(haddr_t) * (size_t)num_entries; - if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) - } /* end if */ - - /* Pass the number of entries and the buffer pointer - * back to the caller. Do this so that we can use the same code - * to apply the candidate list to all the processes. - */ - *num_entries_ptr = num_entries; - *haddr_buf_ptr_ptr = haddr_buf_ptr; - -done: - if(ret_value < 0) - if(haddr_buf_ptr) - haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__broadcast_candidate_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__broadcast_clean_list_cb() - * - * Purpose: Skip list callback for building array of addresses for - * broadcasting the clean list. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: Quincey Koziol, 6/12/15 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__broadcast_clean_list_cb(void *_item, void H5_ATTR_UNUSED *_key, - void *_udata) -{ - H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */ - H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */ - haddr_t addr; - - FUNC_ENTER_STATIC_NOERR - - /* Sanity checks */ - HDassert(slist_entry_ptr); - HDassert(udata); - - /* Store the entry's address in the buffer */ - addr = slist_entry_ptr->addr; - udata->addr_buf_ptr[udata->i] = addr; - udata->i++; - - /* now release the entry */ - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - - /* and also remove the matching entry from the dirtied list - * if it exists. - */ - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(udata->aux_ptr->d_slist_ptr, (void *)(&addr)))) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - - FUNC_LEAVE_NOAPI(SUCCEED) -} /* H5AC__broadcast_clean_list_cb() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__broadcast_clean_list() - * - * Purpose: Broadcast the contents of the process 0 cleaned entry - * slist. In passing, also remove all entries from said - * list, and also remove any matching entries from the dirtied - * slist. - * - * This function must only be called by the process with - * MPI_rank 0. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 7/1/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__broadcast_clean_list(H5AC_t * cache_ptr) -{ - haddr_t * addr_buf_ptr = NULL; - H5AC_aux_t * aux_ptr; - int mpi_result; - int num_entries = 0; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr; - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->mpi_rank == 0); - HDassert(aux_ptr->c_slist_ptr != NULL); - - /* First broadcast the number of entries in the list so that the - * receives can set up a buffer to receive them. If there aren't - * any, we are done. - */ - num_entries = (int)H5SL_count(aux_ptr->c_slist_ptr); - if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) - - if(num_entries > 0) { - H5AC_addr_list_ud_t udata; - size_t buf_size; - - /* allocate a buffer to store the list of entry base addresses in */ - buf_size = sizeof(haddr_t) * (size_t)num_entries; - if(NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer") - - /* Set up user data for callback */ - udata.aux_ptr = aux_ptr; - udata.addr_buf_ptr = addr_buf_ptr; - udata.i = 0; - - /* Free all the clean list entries, building the address list in the callback */ - /* (Callback also removes the matching entries from the dirtied list) */ - if(H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries") - - /* Now broadcast the list of cleaned entries */ - if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)addr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) - } /* end if */ - - /* if it is defined, call the sync point done callback. Note - * that this callback is defined purely for testing purposes, - * and should be undefined under normal operating circumstances. - */ - if(aux_ptr->sync_point_done) - (aux_ptr->sync_point_done)(num_entries, addr_buf_ptr); - -done: - if(addr_buf_ptr) - addr_buf_ptr = (haddr_t *)H5MM_xfree((void *)addr_buf_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__broadcast_clean_list() */ -#endif /* H5_HAVE_PARALLEL */ - /*------------------------------------------------------------------------- * @@ -2575,202 +2139,6 @@ H5_ATTR_UNUSED /*------------------------------------------------------------------------- - * Function: H5AC__construct_candidate_list() - * - * Purpose: In the parallel case when the metadata_write_strategy is - * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses - * this function to construct the list of cache entries to - * be flushed. This list is then propagated to the other - * caches, and then flushed in a distributed fashion. - * - * The sync_point_op parameter is used to determine the extent - * of the flush. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t *aux_ptr, - int sync_point_op) -{ - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE) || (aux_ptr->mpi_rank == 0)); - HDassert(aux_ptr->d_slist_ptr != NULL); - HDassert(aux_ptr->c_slist_ptr != NULL); - HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0); - HDassert(aux_ptr->candidate_slist_ptr != NULL); - HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0); - HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || (sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE)); - - switch(sync_point_op) { - case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: - if(H5C_construct_candidate_list__min_clean((H5C_t *)cache_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__min_clean() failed.") - break; - - case H5AC_SYNC_POINT_OP__FLUSH_CACHE: - if(H5C_construct_candidate_list__clean_cache((H5C_t *)cache_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__clean_cache() failed.") - break; - - default: - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown sync point operation.") - break; - } /* end switch */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__construct_candidate_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__copy_candidate_list_to_buffer_cb - * - * Purpose: Skip list callback for building array of addresses for - * broadcasting the candidate list. - * - * Return: Return SUCCEED on success, and FAIL on failure. - * - * Programmer: Quincey Koziol, 6/12/15 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__copy_candidate_list_to_buffer_cb(void *_item, void H5_ATTR_UNUSED *_key, - void *_udata) -{ - H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */ - H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */ - - FUNC_ENTER_STATIC_NOERR - - /* Sanity checks */ - HDassert(slist_entry_ptr); - HDassert(udata); - - /* Store the entry's address in the buffer */ - udata->addr_buf_ptr[udata->i] = slist_entry_ptr->addr; - udata->i++; - - /* now release the entry */ - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - - FUNC_LEAVE_NOAPI(SUCCEED) -} /* H5AC__copy_candidate_list_to_buffer_cb() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__copy_candidate_list_to_buffer - * - * Purpose: Allocate buffer(s) and copy the contents of the candidate - * entry slist into it (them). In passing, remove all - * entries from the candidate slist. Note that the - * candidate slist must not be empty. - * - * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer - * of MPI_Offset, copy the contents of the candidate - * entry list into it with the appropriate conversions, - * and return the base address of the buffer in - * *MPI_Offset_buf_ptr. Note that this is the buffer - * used by process 0 to transmit the list of entries to - * be flushed to all other processes (in this file group). - * - * Similarly, allocate a buffer of haddr_t, load the contents - * of the candidate list into this buffer, and return its - * base address in *haddr_buf_ptr_ptr. Note that this - * latter buffer is constructed unconditionally. - * - * In passing, also remove all entries from the candidate - * entry slist. - * - * Return: Return SUCCEED on success, and FAIL on failure. - * - * Programmer: John Mainzer, 4/19/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, int *num_entries_ptr, - haddr_t **haddr_buf_ptr_ptr) -{ - H5AC_aux_t * aux_ptr = NULL; - H5AC_addr_list_ud_t udata; - haddr_t * haddr_buf_ptr = NULL; - size_t buf_size; - int num_entries = 0; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - HDassert(aux_ptr->candidate_slist_ptr != NULL); - HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) > 0); - HDassert(num_entries_ptr != NULL); - HDassert(*num_entries_ptr == 0); - HDassert(haddr_buf_ptr_ptr != NULL); - HDassert(*haddr_buf_ptr_ptr == NULL); - - num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr); - - /* allocate a buffer(s) to store the list of candidate entry - * base addresses in - */ - buf_size = sizeof(haddr_t) * (size_t)num_entries; - if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") - - /* Set up user data for callback */ - udata.aux_ptr = aux_ptr; - udata.addr_buf_ptr = haddr_buf_ptr; - udata.i = 0; - - /* Free all the candidate list entries, building the address list in the callback */ - if(H5SL_free(aux_ptr->candidate_slist_ptr, H5AC__copy_candidate_list_to_buffer_cb, &udata) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for candidate entries") - - /* Pass the number of entries and the buffer pointer - * back to the caller. - */ - *num_entries_ptr = num_entries; - *haddr_buf_ptr_ptr = haddr_buf_ptr; - -done: - if(ret_value < 0) - if(haddr_buf_ptr) - haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__copy_candidate_list_to_buffer() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- * Function: H5AC__ext_config_2_int_config() * * Purpose: Utility function to translate an instance of @@ -2836,1630 +2204,6 @@ done: } /* H5AC__ext_config_2_int_config() */ -/*------------------------------------------------------------------------- - * - * Function: H5AC__log_deleted_entry() - * - * Purpose: Log an entry which has been deleted. - * - * Only called for mpi_rank 0. We must make sure that the entry - * doesn't appear in the cleaned or dirty entry lists. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 6/29/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - H5AC_slist_entry_t * slist_entry_ptr = NULL; - haddr_t addr; - - FUNC_ENTER_STATIC_NOERR - - /* Sanity checks */ - HDassert(entry_ptr); - addr = entry_ptr->addr; - cache_ptr = entry_ptr->cache_ptr; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->mpi_rank == 0); - HDassert(aux_ptr->d_slist_ptr != NULL); - HDassert(aux_ptr->c_slist_ptr != NULL); - - /* if the entry appears in the dirtied entry slist, remove it. */ - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)))) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - - /* if the entry appears in the cleaned entry slist, remove it. */ - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)))) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - - FUNC_LEAVE_NOAPI(SUCCEED) -} /* H5AC__log_deleted_entry() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__log_dirtied_entry() - * - * Purpose: Update the dirty_bytes count for a newly dirtied entry. - * - * If mpi_rank isn't 0, this simply means adding the size - * of the entries to the dirty_bytes count. - * - * If mpi_rank is 0, we must first check to see if the entry - * appears in the dirty entries slist. If it is, do nothing. - * If it isn't, add the size to th dirty_bytes count, add the - * entry to the dirty entries slist, and remove it from the - * cleaned list (if it is present there). - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 6/29/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(entry_ptr); - HDassert(entry_ptr->is_dirty == FALSE); - cache_ptr = entry_ptr->cache_ptr; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - - if(aux_ptr->mpi_rank == 0) { - H5AC_slist_entry_t *slist_entry_ptr; - haddr_t addr = entry_ptr->addr; - - /* Sanity checks */ - HDassert(aux_ptr->d_slist_ptr != NULL); - HDassert(aux_ptr->c_slist_ptr != NULL); - - if(NULL == H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) { - /* insert the address of the entry in the dirty entry list, and - * add its size to the dirty_bytes count. - */ - if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .") - slist_entry_ptr->addr = addr; - - if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") - - aux_ptr->dirty_bytes += entry_ptr->size; -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->unprotect_dirty_bytes += entry_ptr->size; - aux_ptr->unprotect_dirty_bytes_updates += 1; -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - } /* end if */ - - /* the entry is dirty. If it exists on the cleaned entries list, - * remove it. - */ - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)))) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - } /* end if */ - else { - aux_ptr->dirty_bytes += entry_ptr->size; -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->unprotect_dirty_bytes += entry_size; - aux_ptr->unprotect_dirty_bytes_updates += 1; -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - } /* end else */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__log_dirtied_entry() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__log_flushed_entry() - * - * Purpose: Update the clean entry slist for the flush of an entry -- - * specifically, if the entry has been cleared, remove it - * from both the cleaned and dirtied lists if it is present. - * Otherwise, if the entry was dirty, insert the indicated - * entry address in the clean slist if it isn't there already. - * - * This function is only used in PHDF5, and should only - * be called for the process with mpi rank 0. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 6/29/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr, hbool_t was_dirty, - unsigned flags) -{ - hbool_t cleared; - H5AC_aux_t * aux_ptr; - H5AC_slist_entry_t * slist_entry_ptr = NULL; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity check */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->mpi_rank == 0); - HDassert(aux_ptr->c_slist_ptr != NULL); - - /* Set local flags */ - cleared = ((flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0); - - if(cleared) { - /* If the entry has been cleared, must remove it from both the - * cleaned list and the dirtied list. - */ - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)))) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)))) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - } /* end if */ - else if(was_dirty) { - if(NULL == H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) { - /* insert the address of the entry in the clean entry list. */ - if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate clean slist entry .") - slist_entry_ptr->addr = addr; - - if(H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into clean entry slist.") - } /* end if */ - } /* end else-if */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__log_flushed_entry() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__log_inserted_entry() - * - * Purpose: Update the dirty_bytes count for a newly inserted entry. - * - * If mpi_rank isnt 0, this simply means adding the size - * of the entry to the dirty_bytes count. - * - * If mpi_rank is 0, we must also add the entry to the - * dirty entries slist. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 6/30/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(entry_ptr); - cache_ptr = entry_ptr->cache_ptr; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - - if(aux_ptr->mpi_rank == 0) { - H5AC_slist_entry_t *slist_entry_ptr; - - HDassert(aux_ptr->d_slist_ptr != NULL); - HDassert(aux_ptr->c_slist_ptr != NULL); - - /* Entry to insert should not be in dirty list currently */ - if(NULL != H5SL_search(aux_ptr->d_slist_ptr, (const void *)(&entry_ptr->addr))) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry already in dirty slist.") - - /* insert the address of the entry in the dirty entry list, and - * add its size to the dirty_bytes count. - */ - if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .") - slist_entry_ptr->addr = entry_ptr->addr; - if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") - - /* Entry to insert should not be in clean list either */ - if(NULL != H5SL_search(aux_ptr->c_slist_ptr, (const void *)(&entry_ptr->addr))) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry in clean slist.") - } /* end if */ - - aux_ptr->dirty_bytes += entry_ptr->size; - -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->insert_dirty_bytes += size; - aux_ptr->insert_dirty_bytes_updates += 1; -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__log_inserted_entry() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__log_moved_entry() - * - * Purpose: Update the dirty_bytes count for a moved entry. - * - * WARNING - * - * At present, the way that the move call is used ensures - * that the moved entry is present in all caches by - * moving in a collective operation and immediately after - * unprotecting the target entry. - * - * This function uses this invariant, and will cause arcane - * failures if it is not met. If maintaining this invariant - * becomes impossible, we will have to rework this function - * extensively, and likely include a bit of IPC for - * synchronization. A better option might be to subsume - * move in the unprotect operation. - * - * Given that the target entry is in all caches, the function - * proceeds as follows: - * - * For processes with mpi rank other 0, it simply checks to - * see if the entry was dirty prior to the move, and adds - * the entries size to the dirty bytes count. - * - * In the process with mpi rank 0, the function first checks - * to see if the entry was dirty prior to the move. If it - * was, and if the entry doesn't appear in the dirtied list - * under its old address, it adds the entry's size to the - * dirty bytes count. - * - * The rank 0 process then removes any references to the - * entry under its old address from the cleands and dirtied - * lists, and inserts an entry in the dirtied list under the - * new address. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 6/30/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr, haddr_t new_addr) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - hbool_t entry_in_cache; - hbool_t entry_dirty; - size_t entry_size; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f); - HDassert(f->shared); - cache_ptr = (H5AC_t *)f->shared->cache; - HDassert(cache_ptr); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - - /* get entry status, size, etc here */ - if(H5C_get_entry_status(f, old_addr, &entry_size, &entry_in_cache, - &entry_dirty, NULL, NULL, NULL, NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.") - if(!entry_in_cache) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.") - - if(aux_ptr->mpi_rank == 0) { - H5AC_slist_entry_t * slist_entry_ptr; - - HDassert(aux_ptr->d_slist_ptr != NULL); - HDassert(aux_ptr->c_slist_ptr != NULL); - - /* if the entry appears in the cleaned entry slist, under its old - * address, remove it. - */ - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr)))) - slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); - - /* if the entry appears in the dirtied entry slist under its old - * address, remove it, but don't free it. Set addr to new_addr. - */ - if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr)))) - slist_entry_ptr->addr = new_addr; - else { - /* otherwise, allocate a new entry that is ready - * for insertion, and increment dirty_bytes. - * - * Note that the fact that the entry wasn't in the dirtied - * list under its old address implies that it must have - * been clean to start with. - */ - HDassert(!entry_dirty); - if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .") - slist_entry_ptr->addr = new_addr; - - aux_ptr->dirty_bytes += entry_size; - -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->move_dirty_bytes += entry_size; - aux_ptr->move_dirty_bytes_updates += 1; -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - } /* end else */ - - /* insert / reinsert the entry in the dirty slist */ - if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") - } /* end if */ - else if(!entry_dirty) { - aux_ptr->dirty_bytes += entry_size; - -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->move_dirty_bytes += entry_size; - aux_ptr->move_dirty_bytes_updates += 1; -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - } /* end else-if */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__log_moved_entry() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__propagate_and_apply_candidate_list - * - * Purpose: Prior to the addition of support for multiple metadata - * write strategies, in PHDF5, only the metadata cache with - * mpi rank 0 was allowed to write to file. All other - * metadata caches on processes with rank greater than 0 - * were required to retain dirty entries until they were - * notified that the entry was clean. - * - * This constraint is relaxed with the distributed - * metadata write strategy, in which a list of candidate - * metadata cache entries is constructed by the process 0 - * cache and then distributed to the caches of all the other - * processes. Once the listed is distributed, many (if not - * all) processes writing writing a unique subset of the - * entries, and marking the remainder clean. The subsets - * are chosen so that each entry in the list of candidates - * is written by exactly one cache, and all entries are - * marked as being clean in all caches. - * - * While the list of candidate cache entries is prepared - * elsewhere, this function is the main routine for distributing - * and applying the list. It must be run simultaniously on - * all processes that have the relevant file open. To ensure - * proper synchronization, there is a barrier at the beginning - * of this function. - * - * At present, this function is called under one of two - * circumstances: - * - * 1) Dirty byte creation exceeds some user specified value. - * - * While metadata reads may occur independently, all - * operations writing metadata must be collective. Thus - * all metadata caches see the same sequence of operations, - * and therefore the same dirty data creation. - * - * This fact is used to synchronize the caches for purposes - * of propagating the list of candidate entries, by simply - * calling this function from all caches whenever some user - * specified threshold on dirty data is exceeded. (the - * process 0 cache creates the candidate list just before - * calling this function). - * - * 2) Under direct user control -- this operation must be - * collective. - * - * The operations to be managed by this function are as - * follows: - * - * All processes: - * - * 1) Participate in an opening barrier. - * - * For the process with mpi rank 0: - * - * 1) Load the contents of the candidate list - * (candidate_slist_ptr) into a buffer, and broadcast that - * buffer to all the other caches. Clear the candidate - * list in passing. - * - * If there is a positive number of candidates, proceed with - * the following: - * - * 2) Apply the candidate entry list. - * - * 3) Particpate in a closing barrier. - * - * 4) Remove from the dirty list (d_slist_ptr) and from the - * flushed and still clean entries list (c_slist_ptr), - * all addresses that appeared in the candidate list, as - * these entries are now clean. - * - * - * For all processes with mpi rank greater than 0: - * - * 1) Receive the candidate entry list broadcast - * - * If there is a positive number of candidates, proceed with - * the following: - * - * 2) Apply the candidate entry list. - * - * 3) Particpate in a closing barrier. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - haddr_t * candidates_list_ptr = NULL; - int mpi_result; - int num_candidates = 0; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - - /* to prevent "messages from the future" we must synchronize all - * processes before we write any entries. - */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) - - if(aux_ptr->mpi_rank == 0) { - if(H5AC__broadcast_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast candidate slist.") - - HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0); - } /* end if */ - else { - if(H5AC__receive_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive candidate broadcast.") - } /* end else */ - - if(num_candidates > 0) { - herr_t result; - - /* all processes apply the candidate list. - * H5C_apply_candidate_list() handles the details of - * distributing the writes across the processes. - */ - - /* Enable writes during this operation */ - aux_ptr->write_permitted = TRUE; - - /* Apply the candidate list */ - result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_candidates, - candidates_list_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size); - - /* Disable writes again */ - aux_ptr->write_permitted = FALSE; - - /* Check for error on the write operation */ - if(result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") - - /* this code exists primarily for the test bed -- it allows us to - * enforce posix semantics on the server that pretends to be a - * file system in our parallel tests. - */ - if(aux_ptr->write_done) - (aux_ptr->write_done)(); - - /* to prevent "messages from the past" we must synchronize all - * processes again before we go on. - */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) - - /* if this is process zero, tidy up the dirtied, - * and flushed and still clean lists. - */ - if(aux_ptr->mpi_rank == 0) - if(H5AC__tidy_cache_0_lists(cache_ptr, num_candidates, candidates_list_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.") - } /* end if */ - - /* if it is defined, call the sync point done callback. Note - * that this callback is defined purely for testing purposes, - * and should be undefined under normal operating circumstances. - */ - if(aux_ptr->sync_point_done) - (aux_ptr->sync_point_done)(num_candidates, candidates_list_ptr); - -done: - if(candidates_list_ptr) - candidates_list_ptr = (haddr_t *)H5MM_xfree((void *)candidates_list_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__propagate_and_apply_candidate_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__propagate_flushed_and_still_clean_entries_list - * - * Purpose: In PHDF5, if the process 0 only metadata write strategy - * is selected, only the metadata cache with mpi rank 0 is - * allowed to write to file. All other metadata caches on - * processes with rank greater than 0 must retain dirty - * entries until they are notified that the entry is now - * clean. - * - * This function is the main routine for handling this - * notification proceedure. It must be called - * simultaniously on all processes that have the relevant - * file open. To this end, it is called only during a - * sync point, with a barrier prior to the call. - * - * Note that any metadata entry writes by process 0 will - * occur after the barrier and just before this call. - * - * Typicaly, calls to this function will be triggered in - * one of two ways: - * - * 1) Dirty byte creation exceeds some user specified value. - * - * While metadata reads may occur independently, all - * operations writing metadata must be collective. Thus - * all metadata caches see the same sequence of operations, - * and therefore the same dirty data creation. - * - * This fact is used to synchronize the caches for purposes - * of propagating the list of flushed and still clean - * entries, by simply calling this function from all - * caches whenever some user specified threshold on dirty - * data is exceeded. - * - * 2) Under direct user control -- this operation must be - * collective. - * - * The operations to be managed by this function are as - * follows: - * - * For the process with mpi rank 0: - * - * 1) Load the contents of the flushed and still clean entries - * list (c_slist_ptr) into a buffer, and broadcast that - * buffer to all the other caches. - * - * 2) Clear the flushed and still clean entries list - * (c_slist_ptr). - * - * - * For all processes with mpi rank greater than 0: - * - * 1) Receive the flushed and still clean entries list broadcast - * - * 2) Mark the specified entries as clean. - * - * - * For all processes: - * - * 1) Reset the dirtied bytes count to 0. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * July 5, 2005 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, hid_t dxpl_id) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); - - if(aux_ptr->mpi_rank == 0) { - if(H5AC__broadcast_clean_list(cache_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.") - HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0); - } /* end if */ - else { - if(H5AC__receive_and_apply_clean_list(f, dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.") - } /* end else */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__propagate_flushed_and_still_clean_entries_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC_receive_haddr_list() - * - * Purpose: Receive the list of entry addresses from process 0, - * and return it in a buffer pointed to by *haddr_buf_ptr_ptr. - * Note that the caller must free this buffer if it is - * returned. - * - * This function must only be called by the process with - * MPI_rank greater than 0. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: Quincey Koziol, 6/11/2015 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr, - haddr_t **haddr_buf_ptr_ptr) -{ - haddr_t * haddr_buf_ptr = NULL; - int mpi_result; - int num_entries; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(num_entries_ptr != NULL); - HDassert(*num_entries_ptr == 0); - HDassert(haddr_buf_ptr_ptr != NULL); - HDassert(*haddr_buf_ptr_ptr == NULL); - - /* First receive the number of entries in the list so that we - * can set up a buffer to receive them. If there aren't - * any, we are done. - */ - if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) - - if(num_entries > 0) { - size_t buf_size; - - /* allocate buffers to store the list of entry base addresses in */ - buf_size = sizeof(haddr_t) * (size_t)num_entries; - if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") - - /* Now receive the list of candidate entries */ - if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) - } /* end if */ - - /* finally, pass the number of entries and the buffer pointer - * back to the caller. - */ - *num_entries_ptr = num_entries; - *haddr_buf_ptr_ptr = haddr_buf_ptr; - -done: - if(ret_value < 0) - if(haddr_buf_ptr) - haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC_receive_haddr_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__receive_and_apply_clean_list() - * - * Purpose: Receive the list of cleaned entries from process 0, - * and mark the specified entries as clean. - * - * This function must only be called by the process with - * MPI_rank greater than 0. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 7/4/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - haddr_t * haddr_buf_ptr = NULL; - int num_entries = 0; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity check */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->mpi_rank != 0); - - /* Retrieve the clean list from process 0 */ - if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, &num_entries, &haddr_buf_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list") - - if(num_entries > 0) - /* mark the indicated entries as clean */ - if(H5C_mark_entries_as_clean(f, dxpl_id, (int32_t)num_entries, haddr_buf_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.") - - /* if it is defined, call the sync point done callback. Note - * that this callback is defined purely for testing purposes, - * and should be undefined under normal operating circumstances. - */ - if(aux_ptr->sync_point_done) - (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr); - -done: - if(haddr_buf_ptr) - haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__receive_and_apply_clean_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * - * Function: H5AC__receive_candidate_list() - * - * Purpose: Receive the list of candidate entries from process 0, - * and return it in a buffer pointed to by *haddr_buf_ptr_ptr. - * Note that the caller must free this buffer if it is - * returned. - * - * This function must only be called by the process with - * MPI_rank greater than 0. - * - * Return SUCCEED on success, and FAIL on failure. - * - * Return: Non-negative on success/Negative on failure. - * - * Programmer: John Mainzer, 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__receive_candidate_list(const H5AC_t *cache_ptr, int *num_entries_ptr, - haddr_t **haddr_buf_ptr_ptr) -{ - H5AC_aux_t * aux_ptr; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->mpi_rank != 0); - HDassert(aux_ptr-> metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - HDassert(num_entries_ptr != NULL); - HDassert(*num_entries_ptr == 0); - HDassert(haddr_buf_ptr_ptr != NULL); - HDassert(*haddr_buf_ptr_ptr == NULL); - - /* Retrieve the candidate list from process 0 */ - if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, num_entries_ptr, haddr_buf_ptr_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__receive_candidate_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__rsp__dist_md_write__flush - * - * Purpose: Routine for handling the details of running a sync point - * that is triggered by a flush -- which in turn must have been - * triggered by either a flush API call or a file close -- - * when the distributed metadata write strategy is selected. - * - * Upon entry, each process generates it own candidate list, - * being a sorted list of all dirty metadata entries currently - * in the metadata cache. Note that this list must be idendical - * across all processes, as all processes see the same stream - * of dirty metadata coming in, and use the same lists of - * candidate entries at each sync point. (At first glance, this - * argument sounds circular, but think of it in the sense of - * a recursive proof). - * - * If this this list is empty, we are done, and the function - * returns - * - * Otherwise, after the sorted list dirty metadata entries is - * constructed, each process uses the same algorithm to assign - * each entry on the candidate list to exactly one process for - * flushing. - * - * At this point, all processes participate in a barrier to - * avoid messages from the past/future bugs. - * - * Each process then flushes the entries assigned to it, and - * marks all other entries on the candidate list as clean. - * - * Finally, all processes participate in a second barrier to - * avoid messages from the past/future bugs. - * - * At the end of this process, process 0 and only process 0 - * must tidy up its lists of dirtied and cleaned entries. - * These lists are not used in the distributed metadata write - * strategy, but they must be maintained should we shift - * to a strategy that uses them. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * April 28, 2010 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - haddr_t * haddr_buf_ptr = NULL; - int mpi_result; - int num_entries = 0; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - - /* first construct the candidate list -- initially, this will be in the - * form of a skip list. We will convert it later. - */ - if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") - - if(H5SL_count(aux_ptr->candidate_slist_ptr) > 0) { - herr_t result; - - /* convert the candidate list into the format we - * are used to receiving from process 0. - */ - if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") - - /* initial sync point barrier */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) - - /* Enable writes during this operation */ - aux_ptr->write_permitted = TRUE; - - /* Apply the candidate list */ - result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_entries, - haddr_buf_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size); - - /* Disable writes again */ - aux_ptr->write_permitted = FALSE; - - /* Check for error on the write operation */ - if(result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") - - /* this code exists primarily for the test bed -- it allows us to - * enforce posix semantics on the server that pretends to be a - * file system in our parallel tests. - */ - if(aux_ptr->write_done) - (aux_ptr->write_done)(); - - /* final sync point barrier */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) - - /* if this is process zero, tidy up the dirtied, - * and flushed and still clean lists. - */ - if(aux_ptr->mpi_rank == 0) - if(H5AC__tidy_cache_0_lists(cache_ptr, num_entries, haddr_buf_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.") - } /* end if */ - - /* if it is defined, call the sync point done callback. Note - * that this callback is defined purely for testing purposes, - * and should be undefined under normal operating circumstances. - */ - if(aux_ptr->sync_point_done) - (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr); - -done: - if(haddr_buf_ptr) - haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__rsp__dist_md_write__flush() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__rsp__dist_md_write__flush_to_min_clean - * - * Purpose: Routine for handling the details of running a sync point - * triggered by the accumulation of dirty metadata (as - * opposed to a flush call to the API) when the distributed - * metadata write strategy is selected. - * - * After invocation and initial sanity checking this function - * first checks to see if evictions are enabled -- if they - * are not, the function does nothing and returns. - * - * Otherwise, process zero constructs a list of entries to - * be flushed in order to bring the process zero cache back - * within its min clean requirement. Note that this list - * (the candidate list) may be empty. - * - * Then, all processes participate in a barrier. - * - * After the barrier, process 0 broadcasts the number of - * entries in the candidate list prepared above, and all - * other processes receive this number. - * - * If this number is zero, we are done, and the function - * returns without further action. - * - * Otherwise, process 0 broadcasts the sorted list of - * candidate entries, and all other processes receive it. - * - * Then, each process uses the same algorithm to assign - * each entry on the candidate list to exactly one process - * for flushing. - * - * Each process then flushes the entries assigned to it, and - * marks all other entries on the candidate list as clean. - * - * Finally, all processes participate in a second barrier to - * avoid messages from the past/future bugs. - * - * At the end of this process, process 0 and only process 0 - * must tidy up its lists of dirtied and cleaned entries. - * These lists are not used in the distributed metadata write - * strategy, but they must be maintained should we shift - * to a strategy that uses them. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * April 28, 2010 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - hbool_t evictions_enabled; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - - /* Query if evictions are allowed */ - if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") - - if(evictions_enabled) { - /* construct candidate list -- process 0 only */ - if(aux_ptr->mpi_rank == 0) - if(H5AC__construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") - - /* propagate and apply candidate list -- all processes */ - if(H5AC__propagate_and_apply_candidate_list(f, dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.") - } /* evictions enabled */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__rsp__dist_md_write__flush_to_min_clean() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__rsp__p0_only__flush - * - * Purpose: Routine for handling the details of running a sync point - * that is triggered a flush -- which in turn must have been - * triggered by either a flush API call or a file close -- - * when the process 0 only metadata write strategy is selected. - * - * First, all processes participate in a barrier. - * - * Then process zero flushes all dirty entries, and broadcasts - * they number of clean entries (if any) to all the other - * caches. - * - * If this number is zero, we are done. - * - * Otherwise, process 0 broadcasts the list of cleaned - * entries, and all other processes which are part of this - * file group receive it, and mark the listed entries as - * clean in their caches. - * - * Since all processes have the same set of dirty - * entries at the beginning of the sync point, and all - * entries that will be written are written before - * process zero broadcasts the number of cleaned entries, - * there is no need for a closing barrier. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * April 28, 2010 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - int mpi_result; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); - - /* to prevent "messages from the future" we must - * synchronize all processes before we start the flush. - * Hence the following barrier. - */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) - - /* Flush data to disk, from rank 0 process */ - if(aux_ptr->mpi_rank == 0) { - herr_t result; - - /* Enable writes during this operation */ - aux_ptr->write_permitted = TRUE; - - /* Flush the cache */ - result = H5C_flush_cache(f, dxpl_id, H5AC__NO_FLAGS_SET); - - /* Disable writes again */ - aux_ptr->write_permitted = FALSE; - - /* Check for error on the write operation */ - if(result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") - - /* this code exists primarily for the test bed -- it allows us to - * enforce posix semantics on the server that pretends to be a - * file system in our parallel tests. - */ - if(aux_ptr->write_done) - (aux_ptr->write_done)(); - } /* end if */ - - /* Propagate cleaned entries to other ranks. */ - if(H5AC__propagate_flushed_and_still_clean_entries_list(f, H5AC_dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__rsp__p0_only__flush() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__rsp__p0_only__flush_to_min_clean - * - * Purpose: Routine for handling the details of running a sync point - * triggered by the accumulation of dirty metadata (as - * opposed to a flush call to the API) when the process 0 - * only metadata write strategy is selected. - * - * After invocation and initial sanity checking this function - * first checks to see if evictions are enabled -- if they - * are not, the function does nothing and returns. - * - * Otherwise, all processes participate in a barrier. - * - * After the barrier, if this is process 0, the function - * causes the cache to flush sufficient entries to get the - * cache back within its minimum clean fraction, and broadcast - * the number of entries which have been flushed since - * the last sync point, and are still clean. - * - * If this number is zero, we are done. - * - * Otherwise, process 0 broadcasts the list of cleaned - * entries, and all other processes which are part of this - * file group receive it, and mark the listed entries as - * clean in their caches. - * - * Since all processes have the same set of dirty - * entries at the beginning of the sync point, and all - * entries that will be written are written before - * process zero broadcasts the number of cleaned entries, - * there is no need for a closing barrier. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * April 28, 2010 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - hbool_t evictions_enabled; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); - - /* Query if evictions are allowed */ - if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") - - /* Flush if evictions are allowed -- following call - * will cause process 0 to flush to min clean size, - * and then propagate the newly clean entries to the - * other processes. - * - * Otherwise, do nothing. - */ - if(evictions_enabled) { - int mpi_result; - - /* to prevent "messages from the future" we must synchronize all - * processes before we start the flush. This synchronization may - * already be done -- hence the do_barrier parameter. - */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) - - if(0 == aux_ptr->mpi_rank) { - herr_t result; - - /* here, process 0 flushes as many entries as necessary to - * comply with the currently specified min clean size. - * Note that it is quite possible that no entries will be - * flushed. - */ - - /* Enable writes during this operation */ - aux_ptr->write_permitted = TRUE; - - /* Flush the cache */ - result = H5C_flush_to_min_clean(f, dxpl_id); - - /* Disable writes again */ - aux_ptr->write_permitted = FALSE; - - /* Check for error on the write operation */ - if(result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.") - - /* this call exists primarily for the test code -- it is used - * to enforce POSIX semantics on the process used to simulate - * reads and writes in t_cache.c. - */ - if(aux_ptr->write_done) - (aux_ptr->write_done)(); - } /* end if */ - - if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") - } /* end if */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__rsp__p0_only__flush_to_min_clean() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__run_sync_point - * - * Purpose: Top level routine for managing a sync point between all - * meta data caches in the parallel case. Since all caches - * see the same sequence of dirty metadata, we simply count - * bytes of dirty metadata, and run a sync point whenever the - * number of dirty bytes of metadata seen since the last - * sync point exceeds a threshold that is common across all - * processes. We also run sync points in response to - * HDF5 API calls triggering either a flush or a file close. - * - * In earlier versions of PHDF5, only the metadata cache with - * mpi rank 0 was allowed to write to file. All other - * metadata caches on processes with rank greater than 0 were - * required to retain dirty entries until they were notified - * that the entry is was clean. - * - * This function was created to make it easier for us to - * experiment with other options, as it is a single point - * for the execution of sync points. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * March 11, 2010 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op) -{ - H5AC_t * cache_ptr; - H5AC_aux_t * aux_ptr; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f != NULL); - cache_ptr = f->shared->cache; - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || - (sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED)); - -#if H5AC_DEBUG_DIRTY_BYTES_CREATION -HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu/%u\n", - aux_ptr->mpi_rank, - aux_ptr->dirty_bytes_propagations, - aux_ptr->unprotect_dirty_bytes, - aux_ptr->unprotect_dirty_bytes_updates, - aux_ptr->insert_dirty_bytes, - aux_ptr->insert_dirty_bytes_updates, - aux_ptr->rename_dirty_bytes, - aux_ptr->rename_dirty_bytes_updates); -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - - switch(aux_ptr->metadata_write_strategy) { - case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: - switch(sync_point_op) { - case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: - if(H5AC__rsp__p0_only__flush_to_min_clean(f, dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush_to_min_clean() failed.") - break; - - case H5AC_SYNC_POINT_OP__FLUSH_CACHE: - if(H5AC__rsp__p0_only__flush(f, dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush() failed.") - break; - - default: - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op"); - break; - } /* end switch */ - break; - - case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: - switch(sync_point_op) { - case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: - if(H5AC__rsp__dist_md_write__flush_to_min_clean(f, dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush_to_min_clean() failed.") - break; - - case H5AC_SYNC_POINT_OP__FLUSH_CACHE: - if(H5AC__rsp__dist_md_write__flush(f, dxpl_id) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush() failed.") - break; - - default: - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op"); - break; - } /* end switch */ - break; - - default: - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Unknown metadata write strategy.") - break; - } /* end switch */ - - /* reset the dirty bytes count */ - aux_ptr->dirty_bytes = 0; - -#if H5AC_DEBUG_DIRTY_BYTES_CREATION - aux_ptr->dirty_bytes_propagations += 1; - aux_ptr->unprotect_dirty_bytes = 0; - aux_ptr->unprotect_dirty_bytes_updates = 0; - aux_ptr->insert_dirty_bytes = 0; - aux_ptr->insert_dirty_bytes_updates = 0; - aux_ptr->rename_dirty_bytes = 0; - aux_ptr->rename_dirty_bytes_updates = 0; -#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__run_sync_point() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__tidy_cache_0_lists() - * - * Purpose: In the distributed metadata write strategy, not all dirty - * entries are written by process 0 -- thus we must tidy - * up the dirtied, and flushed and still clean lists - * maintained by process zero after each sync point. - * - * This procedure exists to tend to this issue. - * - * At this point, all entries that process 0 cleared should - * have been removed from both the dirty and flushed and - * still clean lists, and entries that process 0 has flushed - * should have been removed from the dirtied list and added - * to the flushed and still clean list. - * - * However, since the distributed metadata write strategy - * doesn't make use of these lists, the objective is simply - * to maintain these lists in consistent state that allows - * them to be used should the metadata write strategy change - * to one that uses these lists. - * - * Thus for our purposes, all we need to do is remove from - * the dirtied and flushed and still clean lists all - * references to entries that appear in the candidate list. - * - * Return: Success: non-negative - * - * Failure: negative - * - * Programmer: John Mainzer - * 4/20/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates, - haddr_t *candidates_list_ptr) -{ - H5AC_aux_t * aux_ptr; - int i; - - FUNC_ENTER_STATIC_NOERR - - /* Sanity checks */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); - HDassert(aux_ptr != NULL); - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - HDassert(aux_ptr->mpi_rank == 0); - HDassert(num_candidates > 0); - HDassert(candidates_list_ptr != NULL); - - /* clean up dirtied and flushed and still clean lists by removing - * all entries on the candidate list. Cleared entries should - * have been removed from both the dirty and cleaned lists at - * this point, flushed entries should have been added to the - * cleaned list. However, for this metadata write strategy, - * we just want to remove all references to the candidate entries. - */ - for(i = 0; i < num_candidates; i++) { - H5AC_slist_entry_t * d_slist_entry_ptr; - H5AC_slist_entry_t * c_slist_entry_ptr; - haddr_t addr; - - addr = candidates_list_ptr[i]; - - /* addr may be either on the dirtied list, or on the flushed - * and still clean list. Remove it. - */ - if(NULL != (d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)&addr))) - d_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, d_slist_entry_ptr); - if(NULL != (c_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)&addr))) - c_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, c_slist_entry_ptr); - } /* end for */ - - FUNC_LEAVE_NOAPI(SUCCEED) -} /* H5AC__tidy_cache_0_lists() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5AC__flush_entries - * - * Purpose: Flush the metadata cache associated with the specified file, - * only writing from rank 0, but propagating the cleaned entries - * to all ranks. - * - * Return: Non-negative on success/Negative on failure if there was a - * request to flush all items and something was protected. - * - * Programmer: Quincey Koziol - * koziol@hdfgroup.org - * Aug 22 2009 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -static herr_t -H5AC__flush_entries(H5F_t *f, hid_t dxpl_id) -{ - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Sanity checks */ - HDassert(f); - HDassert(f->shared->cache); - - /* Check if we have >1 ranks */ - if(f->shared->cache->aux_ptr) - if(H5AC__run_sync_point(f, dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC__flush_entries() */ -#endif /* H5_HAVE_PARALLEL */ - - /*------------------------------------------------------------------------------ * Function: H5AC_ignore_tags() * diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c new file mode 100644 index 0000000..64c27ab --- /dev/null +++ b/src/H5ACmpio.c @@ -0,0 +1,2295 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/*------------------------------------------------------------------------- + * + * Created: H5ACmpio.c + * Jun 20 2015 + * Quincey Koziol <koziol@hdfgroup.org> + * + * Purpose: Functions in this file implement support for parallel + * I/O cache functionality + * + *------------------------------------------------------------------------- + */ + +/****************/ +/* Module Setup */ +/****************/ + +#define H5AC_PACKAGE /*suppress error about including H5ACpkg */ +#define H5C_PACKAGE /*suppress error about including H5Cpkg */ +#define H5F_PACKAGE /*suppress error about including H5Fpkg */ + +/* Interface initialization */ +#define H5_INTERFACE_INIT_FUNC H5AC__init_mpio_interface + +/***********/ +/* Headers */ +/***********/ +#include "H5private.h" /* Generic Functions */ +#include "H5ACpkg.h" /* Metadata cache */ +#include "H5Cpkg.h" /* Cache */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fpkg.h" /* Files */ +#include "H5MMprivate.h" /* Memory management */ + +#ifdef H5_HAVE_PARALLEL + +/****************/ +/* Local Macros */ +/****************/ + + +/******************/ +/* Local Typedefs */ +/******************/ + +/**************************************************************************** + * + * structure H5AC_slist_entry_t + * + * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t + * and the cleaned entry list maintained via the c_slist_ptr field of + * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned + * entries. Unfortunately, the slist code makes us define a dynamically + * allocated structure to store these offsets in. This structure serves + * that purpose. Its fields are as follows: + * + * addr: file offset of a metadata entry. Entries are added to this + * list (if they aren't there already) when they are marked + * dirty in an unprotect, inserted, or moved. They are + * removed when they appear in a clean entries broadcast. + * + ****************************************************************************/ +typedef struct H5AC_slist_entry_t +{ + haddr_t addr; +} H5AC_slist_entry_t; + +/* User data for address list building callbacks */ +typedef struct H5AC_addr_list_ud_t +{ + H5AC_aux_t * aux_ptr; /* 'Auxiliary' parallel cache info */ + haddr_t * addr_buf_ptr; /* Array to store addresses */ + int i; /* Counter for position in array */ +} H5AC_addr_list_ud_t; + + +/********************/ +/* Local Prototypes */ +/********************/ + +static herr_t H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, + int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); +static herr_t H5AC__broadcast_clean_list(H5AC_t *cache_ptr); +static herr_t H5AC__construct_candidate_list(H5AC_t *cache_ptr, + H5AC_aux_t *aux_ptr, int sync_point_op); +static herr_t H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, + int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); +static herr_t H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id); +static herr_t H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, + hid_t dxpl_id); +static herr_t H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr, + haddr_t **haddr_buf_ptr_ptr); +static herr_t H5AC__receive_candidate_list(const H5AC_t *cache_ptr, + int *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); +static herr_t H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id); +static herr_t H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates, + haddr_t *candidates_list_ptr); +static herr_t H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id); +static herr_t H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id); +static herr_t H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id); +static herr_t H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id); + + +/*********************/ +/* Package Variables */ +/*********************/ + +/* Declare a free list to manage the H5AC_aux_t struct */ +H5FL_DEFINE(H5AC_aux_t); + + +/*****************************/ +/* Library Private Variables */ +/*****************************/ + + +/*******************/ +/* Local Variables */ +/*******************/ + +/* Declare a free list to manage the H5AC_slist_entry_t struct */ +H5FL_DEFINE_STATIC(H5AC_slist_entry_t); + + +/*------------------------------------------------------------------------- + * Function: H5AC__init_mpio_interface + * + * Purpose: Initialize interface-specific information + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Quincey Koziol + * 6/20/15 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__init_mpio_interface(void) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Funnel all work to H5AC_init() */ + if(H5AC_init() < 0) + HGOTO_ERROR(H5E_FUNC, H5E_CANTINIT, FAIL, "interface initialization failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__init_mpio_interface() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__set_sync_point_done_callback + * + * Purpose: Set the value of the sync_point_done callback. This + * callback is used by the parallel test code to verify + * that the expected writes and only the expected writes + * take place during a sync point. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 5/9/10 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__set_sync_point_done_callback(H5C_t * cache_ptr, + void (* sync_point_done)(int num_writes, haddr_t * written_entries_tbl)) +{ + H5AC_aux_t * aux_ptr; + + FUNC_ENTER_PACKAGE_NOERR + + /* Sanity checks */ + HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC)); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + + aux_ptr->sync_point_done = sync_point_done; + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5AC__set_sync_point_done_callback() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__set_write_done_callback + * + * Purpose: Set the value of the write_done callback. This callback + * is used to improve performance of the parallel test bed + * for the cache. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 5/11/06 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void)) +{ + H5AC_aux_t * aux_ptr; + + FUNC_ENTER_PACKAGE_NOERR + + /* Sanity checks */ + HDassert(cache_ptr && (cache_ptr->magic == H5C__H5C_T_MAGIC)); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + aux_ptr->write_done = write_done; + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5AC__set_write_done_callback() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC_add_candidate() + * + * Purpose: Add the supplied metadata entry address to the candidate + * list. Verify that each entry added does not appear in + * the list prior to its insertion. + * + * This function is intended for used in constructing list + * of entried to be flushed during sync points. It shouldn't + * be called anywhere else. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC_add_candidate(H5AC_t * cache_ptr, haddr_t addr) +{ + H5AC_aux_t * aux_ptr; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + HDassert(aux_ptr->candidate_slist_ptr != NULL); + + /* Construct an entry for the supplied address, and insert + * it into the candidate slist. + */ + if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate candidate slist entry") + slist_entry_ptr->addr = addr; + + if(H5SL_insert(aux_ptr->candidate_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist") + +done: + /* Clean up on error */ + if(ret_value < 0) + if(slist_entry_ptr) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_add_candidate() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__broadcast_candidate_list() + * + * Purpose: Broadcast the contents of the process 0 candidate entry + * slist. In passing, also remove all entries from said + * list. As the application of this will be handled by + * the same functions on all processes, construct and + * return a copy of the list in the same format as that + * received by the other processes. Note that if this + * copy is returned in *haddr_buf_ptr_ptr, the caller + * must free it. + * + * This function must only be called by the process with + * MPI_rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/1/05 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, int *num_entries_ptr, + haddr_t **haddr_buf_ptr_ptr) +{ + H5AC_aux_t * aux_ptr = NULL; + haddr_t * haddr_buf_ptr = NULL; + int mpi_result; + int num_entries; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->mpi_rank == 0); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + HDassert(aux_ptr->candidate_slist_ptr != NULL); + HDassert(num_entries_ptr != NULL); + HDassert(*num_entries_ptr == 0); + HDassert(haddr_buf_ptr_ptr != NULL); + HDassert(*haddr_buf_ptr_ptr == NULL); + + /* First broadcast the number of entries in the list so that the + * receivers can set up buffers to receive them. If there aren't + * any, we are done. + */ + num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr); + if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) + + if(num_entries > 0) { + size_t buf_size = 0; + int chk_num_entries = 0; + + /* convert the candidate list into the format we + * are used to receiving from process 0, and also load it + * into a buffer for transmission. + */ + if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") + HDassert(chk_num_entries == num_entries); + HDassert(haddr_buf_ptr != NULL); + + /* Now broadcast the list of candidate entries */ + buf_size = sizeof(haddr_t) * (size_t)num_entries; + if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) + } /* end if */ + + /* Pass the number of entries and the buffer pointer + * back to the caller. Do this so that we can use the same code + * to apply the candidate list to all the processes. + */ + *num_entries_ptr = num_entries; + *haddr_buf_ptr_ptr = haddr_buf_ptr; + +done: + if(ret_value < 0) + if(haddr_buf_ptr) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__broadcast_candidate_list() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__broadcast_clean_list_cb() + * + * Purpose: Skip list callback for building array of addresses for + * broadcasting the clean list. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: Quincey Koziol, 6/12/15 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__broadcast_clean_list_cb(void *_item, void H5_ATTR_UNUSED *_key, + void *_udata) +{ + H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */ + H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */ + haddr_t addr; + + FUNC_ENTER_STATIC_NOERR + + /* Sanity checks */ + HDassert(slist_entry_ptr); + HDassert(udata); + + /* Store the entry's address in the buffer */ + addr = slist_entry_ptr->addr; + udata->addr_buf_ptr[udata->i] = addr; + udata->i++; + + /* now release the entry */ + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + + /* and also remove the matching entry from the dirtied list + * if it exists. + */ + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(udata->aux_ptr->d_slist_ptr, (void *)(&addr)))) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5AC__broadcast_clean_list_cb() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__broadcast_clean_list() + * + * Purpose: Broadcast the contents of the process 0 cleaned entry + * slist. In passing, also remove all entries from said + * list, and also remove any matching entries from the dirtied + * slist. + * + * This function must only be called by the process with + * MPI_rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/1/05 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__broadcast_clean_list(H5AC_t * cache_ptr) +{ + haddr_t * addr_buf_ptr = NULL; + H5AC_aux_t * aux_ptr; + int mpi_result; + int num_entries = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr; + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->mpi_rank == 0); + HDassert(aux_ptr->c_slist_ptr != NULL); + + /* First broadcast the number of entries in the list so that the + * receives can set up a buffer to receive them. If there aren't + * any, we are done. + */ + num_entries = (int)H5SL_count(aux_ptr->c_slist_ptr); + if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) + + if(num_entries > 0) { + H5AC_addr_list_ud_t udata; + size_t buf_size; + + /* allocate a buffer to store the list of entry base addresses in */ + buf_size = sizeof(haddr_t) * (size_t)num_entries; + if(NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer") + + /* Set up user data for callback */ + udata.aux_ptr = aux_ptr; + udata.addr_buf_ptr = addr_buf_ptr; + udata.i = 0; + + /* Free all the clean list entries, building the address list in the callback */ + /* (Callback also removes the matching entries from the dirtied list) */ + if(H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries") + + /* Now broadcast the list of cleaned entries */ + if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)addr_buf_ptr, (int)buf_size, MPI_BYTE, 0, aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) + } /* end if */ + + /* if it is defined, call the sync point done callback. Note + * that this callback is defined purely for testing purposes, + * and should be undefined under normal operating circumstances. + */ + if(aux_ptr->sync_point_done) + (aux_ptr->sync_point_done)(num_entries, addr_buf_ptr); + +done: + if(addr_buf_ptr) + addr_buf_ptr = (haddr_t *)H5MM_xfree((void *)addr_buf_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__broadcast_clean_list() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__construct_candidate_list() + * + * Purpose: In the parallel case when the metadata_write_strategy is + * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses + * this function to construct the list of cache entries to + * be flushed. This list is then propagated to the other + * caches, and then flushed in a distributed fashion. + * + * The sync_point_op parameter is used to determine the extent + * of the flush. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t *aux_ptr, + int sync_point_op) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE) || (aux_ptr->mpi_rank == 0)); + HDassert(aux_ptr->d_slist_ptr != NULL); + HDassert(aux_ptr->c_slist_ptr != NULL); + HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0); + HDassert(aux_ptr->candidate_slist_ptr != NULL); + HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0); + HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || (sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_CACHE)); + + switch(sync_point_op) { + case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: + if(H5C_construct_candidate_list__min_clean((H5C_t *)cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__min_clean() failed.") + break; + + case H5AC_SYNC_POINT_OP__FLUSH_CACHE: + if(H5C_construct_candidate_list__clean_cache((H5C_t *)cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_construct_candidate_list__clean_cache() failed.") + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown sync point operation.") + break; + } /* end switch */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__construct_candidate_list() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__copy_candidate_list_to_buffer_cb + * + * Purpose: Skip list callback for building array of addresses for + * broadcasting the candidate list. + * + * Return: Return SUCCEED on success, and FAIL on failure. + * + * Programmer: Quincey Koziol, 6/12/15 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__copy_candidate_list_to_buffer_cb(void *_item, void H5_ATTR_UNUSED *_key, + void *_udata) +{ + H5AC_slist_entry_t * slist_entry_ptr = (H5AC_slist_entry_t *)_item; /* Address of item */ + H5AC_addr_list_ud_t * udata = (H5AC_addr_list_ud_t *)_udata; /* Context for callback */ + + FUNC_ENTER_STATIC_NOERR + + /* Sanity checks */ + HDassert(slist_entry_ptr); + HDassert(udata); + + /* Store the entry's address in the buffer */ + udata->addr_buf_ptr[udata->i] = slist_entry_ptr->addr; + udata->i++; + + /* now release the entry */ + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5AC__copy_candidate_list_to_buffer_cb() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__copy_candidate_list_to_buffer + * + * Purpose: Allocate buffer(s) and copy the contents of the candidate + * entry slist into it (them). In passing, remove all + * entries from the candidate slist. Note that the + * candidate slist must not be empty. + * + * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer + * of MPI_Offset, copy the contents of the candidate + * entry list into it with the appropriate conversions, + * and return the base address of the buffer in + * *MPI_Offset_buf_ptr. Note that this is the buffer + * used by process 0 to transmit the list of entries to + * be flushed to all other processes (in this file group). + * + * Similarly, allocate a buffer of haddr_t, load the contents + * of the candidate list into this buffer, and return its + * base address in *haddr_buf_ptr_ptr. Note that this + * latter buffer is constructed unconditionally. + * + * In passing, also remove all entries from the candidate + * entry slist. + * + * Return: Return SUCCEED on success, and FAIL on failure. + * + * Programmer: John Mainzer, 4/19/10 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, int *num_entries_ptr, + haddr_t **haddr_buf_ptr_ptr) +{ + H5AC_aux_t * aux_ptr = NULL; + H5AC_addr_list_ud_t udata; + haddr_t * haddr_buf_ptr = NULL; + size_t buf_size; + int num_entries = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + HDassert(aux_ptr->candidate_slist_ptr != NULL); + HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) > 0); + HDassert(num_entries_ptr != NULL); + HDassert(*num_entries_ptr == 0); + HDassert(haddr_buf_ptr_ptr != NULL); + HDassert(*haddr_buf_ptr_ptr == NULL); + + num_entries = (int)H5SL_count(aux_ptr->candidate_slist_ptr); + + /* allocate a buffer(s) to store the list of candidate entry + * base addresses in + */ + buf_size = sizeof(haddr_t) * (size_t)num_entries; + if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") + + /* Set up user data for callback */ + udata.aux_ptr = aux_ptr; + udata.addr_buf_ptr = haddr_buf_ptr; + udata.i = 0; + + /* Free all the candidate list entries, building the address list in the callback */ + if(H5SL_free(aux_ptr->candidate_slist_ptr, H5AC__copy_candidate_list_to_buffer_cb, &udata) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for candidate entries") + + /* Pass the number of entries and the buffer pointer + * back to the caller. + */ + *num_entries_ptr = num_entries; + *haddr_buf_ptr_ptr = haddr_buf_ptr; + +done: + if(ret_value < 0) + if(haddr_buf_ptr) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__copy_candidate_list_to_buffer() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__log_deleted_entry() + * + * Purpose: Log an entry which has been deleted. + * + * Only called for mpi_rank 0. We must make sure that the entry + * doesn't appear in the cleaned or dirty entry lists. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + haddr_t addr; + + FUNC_ENTER_PACKAGE_NOERR + + /* Sanity checks */ + HDassert(entry_ptr); + addr = entry_ptr->addr; + cache_ptr = entry_ptr->cache_ptr; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->mpi_rank == 0); + HDassert(aux_ptr->d_slist_ptr != NULL); + HDassert(aux_ptr->c_slist_ptr != NULL); + + /* if the entry appears in the dirtied entry slist, remove it. */ + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)))) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + + /* if the entry appears in the cleaned entry slist, remove it. */ + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)))) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5AC__log_deleted_entry() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__log_dirtied_entry() + * + * Purpose: Update the dirty_bytes count for a newly dirtied entry. + * + * If mpi_rank isn't 0, this simply means adding the size + * of the entries to the dirty_bytes count. + * + * If mpi_rank is 0, we must first check to see if the entry + * appears in the dirty entries slist. If it is, do nothing. + * If it isn't, add the size to th dirty_bytes count, add the + * entry to the dirty entries slist, and remove it from the + * cleaned list (if it is present there). + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Sanity checks */ + HDassert(entry_ptr); + HDassert(entry_ptr->is_dirty == FALSE); + cache_ptr = entry_ptr->cache_ptr; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + + if(aux_ptr->mpi_rank == 0) { + H5AC_slist_entry_t *slist_entry_ptr; + haddr_t addr = entry_ptr->addr; + + /* Sanity checks */ + HDassert(aux_ptr->d_slist_ptr != NULL); + HDassert(aux_ptr->c_slist_ptr != NULL); + + if(NULL == H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr))) { + /* insert the address of the entry in the dirty entry list, and + * add its size to the dirty_bytes count. + */ + if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .") + slist_entry_ptr->addr = addr; + + if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") + + aux_ptr->dirty_bytes += entry_ptr->size; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->unprotect_dirty_bytes += entry_ptr->size; + aux_ptr->unprotect_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } /* end if */ + + /* the entry is dirty. If it exists on the cleaned entries list, + * remove it. + */ + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)))) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + } /* end if */ + else { + aux_ptr->dirty_bytes += entry_ptr->size; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->unprotect_dirty_bytes += entry_size; + aux_ptr->unprotect_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } /* end else */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__log_dirtied_entry() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__log_flushed_entry() + * + * Purpose: Update the clean entry slist for the flush of an entry -- + * specifically, if the entry has been cleared, remove it + * from both the cleaned and dirtied lists if it is present. + * Otherwise, if the entry was dirty, insert the indicated + * entry address in the clean slist if it isn't there already. + * + * This function is only used in PHDF5, and should only + * be called for the process with mpi rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr, hbool_t was_dirty, + unsigned flags) +{ + hbool_t cleared; + H5AC_aux_t * aux_ptr; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Sanity check */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->mpi_rank == 0); + HDassert(aux_ptr->c_slist_ptr != NULL); + + /* Set local flags */ + cleared = ((flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0); + + if(cleared) { + /* If the entry has been cleared, must remove it from both the + * cleaned list and the dirtied list. + */ + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)))) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)))) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + } /* end if */ + else if(was_dirty) { + if(NULL == H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr))) { + /* insert the address of the entry in the clean entry list. */ + if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate clean slist entry .") + slist_entry_ptr->addr = addr; + + if(H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into clean entry slist.") + } /* end if */ + } /* end else-if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__log_flushed_entry() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__log_inserted_entry() + * + * Purpose: Update the dirty_bytes count for a newly inserted entry. + * + * If mpi_rank isnt 0, this simply means adding the size + * of the entry to the dirty_bytes count. + * + * If mpi_rank is 0, we must also add the entry to the + * dirty entries slist. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/30/05 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Sanity checks */ + HDassert(entry_ptr); + cache_ptr = entry_ptr->cache_ptr; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + + if(aux_ptr->mpi_rank == 0) { + H5AC_slist_entry_t *slist_entry_ptr; + + HDassert(aux_ptr->d_slist_ptr != NULL); + HDassert(aux_ptr->c_slist_ptr != NULL); + + /* Entry to insert should not be in dirty list currently */ + if(NULL != H5SL_search(aux_ptr->d_slist_ptr, (const void *)(&entry_ptr->addr))) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry already in dirty slist.") + + /* insert the address of the entry in the dirty entry list, and + * add its size to the dirty_bytes count. + */ + if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .") + slist_entry_ptr->addr = entry_ptr->addr; + if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") + + /* Entry to insert should not be in clean list either */ + if(NULL != H5SL_search(aux_ptr->c_slist_ptr, (const void *)(&entry_ptr->addr))) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Inserted entry in clean slist.") + } /* end if */ + + aux_ptr->dirty_bytes += entry_ptr->size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->insert_dirty_bytes += size; + aux_ptr->insert_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__log_inserted_entry() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__log_moved_entry() + * + * Purpose: Update the dirty_bytes count for a moved entry. + * + * WARNING + * + * At present, the way that the move call is used ensures + * that the moved entry is present in all caches by + * moving in a collective operation and immediately after + * unprotecting the target entry. + * + * This function uses this invariant, and will cause arcane + * failures if it is not met. If maintaining this invariant + * becomes impossible, we will have to rework this function + * extensively, and likely include a bit of IPC for + * synchronization. A better option might be to subsume + * move in the unprotect operation. + * + * Given that the target entry is in all caches, the function + * proceeds as follows: + * + * For processes with mpi rank other 0, it simply checks to + * see if the entry was dirty prior to the move, and adds + * the entries size to the dirty bytes count. + * + * In the process with mpi rank 0, the function first checks + * to see if the entry was dirty prior to the move. If it + * was, and if the entry doesn't appear in the dirtied list + * under its old address, it adds the entry's size to the + * dirty bytes count. + * + * The rank 0 process then removes any references to the + * entry under its old address from the cleands and dirtied + * lists, and inserts an entry in the dirtied list under the + * new address. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/30/05 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr, haddr_t new_addr) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + hbool_t entry_in_cache; + hbool_t entry_dirty; + size_t entry_size; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Sanity checks */ + HDassert(f); + HDassert(f->shared); + cache_ptr = (H5AC_t *)f->shared->cache; + HDassert(cache_ptr); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + + /* get entry status, size, etc here */ + if(H5C_get_entry_status(f, old_addr, &entry_size, &entry_in_cache, + &entry_dirty, NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.") + if(!entry_in_cache) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.") + + if(aux_ptr->mpi_rank == 0) { + H5AC_slist_entry_t * slist_entry_ptr; + + HDassert(aux_ptr->d_slist_ptr != NULL); + HDassert(aux_ptr->c_slist_ptr != NULL); + + /* if the entry appears in the cleaned entry slist, under its old + * address, remove it. + */ + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr)))) + slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + + /* if the entry appears in the dirtied entry slist under its old + * address, remove it, but don't free it. Set addr to new_addr. + */ + if(NULL != (slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr)))) + slist_entry_ptr->addr = new_addr; + else { + /* otherwise, allocate a new entry that is ready + * for insertion, and increment dirty_bytes. + * + * Note that the fact that the entry wasn't in the dirtied + * list under its old address implies that it must have + * been clean to start with. + */ + HDassert(!entry_dirty); + if(NULL == (slist_entry_ptr = H5FL_MALLOC(H5AC_slist_entry_t))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "Can't allocate dirty slist entry .") + slist_entry_ptr->addr = new_addr; + + aux_ptr->dirty_bytes += entry_size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->move_dirty_bytes += entry_size; + aux_ptr->move_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } /* end else */ + + /* insert / reinsert the entry in the dirty slist */ + if(H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, &(slist_entry_ptr->addr)) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, "can't insert entry into dirty entry slist.") + } /* end if */ + else if(!entry_dirty) { + aux_ptr->dirty_bytes += entry_size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->move_dirty_bytes += entry_size; + aux_ptr->move_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } /* end else-if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__log_moved_entry() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__propagate_and_apply_candidate_list + * + * Purpose: Prior to the addition of support for multiple metadata + * write strategies, in PHDF5, only the metadata cache with + * mpi rank 0 was allowed to write to file. All other + * metadata caches on processes with rank greater than 0 + * were required to retain dirty entries until they were + * notified that the entry was clean. + * + * This constraint is relaxed with the distributed + * metadata write strategy, in which a list of candidate + * metadata cache entries is constructed by the process 0 + * cache and then distributed to the caches of all the other + * processes. Once the listed is distributed, many (if not + * all) processes writing writing a unique subset of the + * entries, and marking the remainder clean. The subsets + * are chosen so that each entry in the list of candidates + * is written by exactly one cache, and all entries are + * marked as being clean in all caches. + * + * While the list of candidate cache entries is prepared + * elsewhere, this function is the main routine for distributing + * and applying the list. It must be run simultaniously on + * all processes that have the relevant file open. To ensure + * proper synchronization, there is a barrier at the beginning + * of this function. + * + * At present, this function is called under one of two + * circumstances: + * + * 1) Dirty byte creation exceeds some user specified value. + * + * While metadata reads may occur independently, all + * operations writing metadata must be collective. Thus + * all metadata caches see the same sequence of operations, + * and therefore the same dirty data creation. + * + * This fact is used to synchronize the caches for purposes + * of propagating the list of candidate entries, by simply + * calling this function from all caches whenever some user + * specified threshold on dirty data is exceeded. (the + * process 0 cache creates the candidate list just before + * calling this function). + * + * 2) Under direct user control -- this operation must be + * collective. + * + * The operations to be managed by this function are as + * follows: + * + * All processes: + * + * 1) Participate in an opening barrier. + * + * For the process with mpi rank 0: + * + * 1) Load the contents of the candidate list + * (candidate_slist_ptr) into a buffer, and broadcast that + * buffer to all the other caches. Clear the candidate + * list in passing. + * + * If there is a positive number of candidates, proceed with + * the following: + * + * 2) Apply the candidate entry list. + * + * 3) Particpate in a closing barrier. + * + * 4) Remove from the dirty list (d_slist_ptr) and from the + * flushed and still clean entries list (c_slist_ptr), + * all addresses that appeared in the candidate list, as + * these entries are now clean. + * + * + * For all processes with mpi rank greater than 0: + * + * 1) Receive the candidate entry list broadcast + * + * If there is a positive number of candidates, proceed with + * the following: + * + * 2) Apply the candidate entry list. + * + * 3) Particpate in a closing barrier. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + haddr_t * candidates_list_ptr = NULL; + int mpi_result; + int num_candidates = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + + /* to prevent "messages from the future" we must synchronize all + * processes before we write any entries. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + + if(aux_ptr->mpi_rank == 0) { + if(H5AC__broadcast_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast candidate slist.") + + HDassert(H5SL_count(aux_ptr->candidate_slist_ptr) == 0); + } /* end if */ + else { + if(H5AC__receive_candidate_list(cache_ptr, &num_candidates, &candidates_list_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive candidate broadcast.") + } /* end else */ + + if(num_candidates > 0) { + herr_t result; + + /* all processes apply the candidate list. + * H5C_apply_candidate_list() handles the details of + * distributing the writes across the processes. + */ + + /* Enable writes during this operation */ + aux_ptr->write_permitted = TRUE; + + /* Apply the candidate list */ + result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_candidates, + candidates_list_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size); + + /* Disable writes again */ + aux_ptr->write_permitted = FALSE; + + /* Check for error on the write operation */ + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") + + /* this code exists primarily for the test bed -- it allows us to + * enforce posix semantics on the server that pretends to be a + * file system in our parallel tests. + */ + if(aux_ptr->write_done) + (aux_ptr->write_done)(); + + /* to prevent "messages from the past" we must synchronize all + * processes again before we go on. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + + /* if this is process zero, tidy up the dirtied, + * and flushed and still clean lists. + */ + if(aux_ptr->mpi_rank == 0) + if(H5AC__tidy_cache_0_lists(cache_ptr, num_candidates, candidates_list_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.") + } /* end if */ + + /* if it is defined, call the sync point done callback. Note + * that this callback is defined purely for testing purposes, + * and should be undefined under normal operating circumstances. + */ + if(aux_ptr->sync_point_done) + (aux_ptr->sync_point_done)(num_candidates, candidates_list_ptr); + +done: + if(candidates_list_ptr) + candidates_list_ptr = (haddr_t *)H5MM_xfree((void *)candidates_list_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__propagate_and_apply_candidate_list() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__propagate_flushed_and_still_clean_entries_list + * + * Purpose: In PHDF5, if the process 0 only metadata write strategy + * is selected, only the metadata cache with mpi rank 0 is + * allowed to write to file. All other metadata caches on + * processes with rank greater than 0 must retain dirty + * entries until they are notified that the entry is now + * clean. + * + * This function is the main routine for handling this + * notification proceedure. It must be called + * simultaniously on all processes that have the relevant + * file open. To this end, it is called only during a + * sync point, with a barrier prior to the call. + * + * Note that any metadata entry writes by process 0 will + * occur after the barrier and just before this call. + * + * Typicaly, calls to this function will be triggered in + * one of two ways: + * + * 1) Dirty byte creation exceeds some user specified value. + * + * While metadata reads may occur independently, all + * operations writing metadata must be collective. Thus + * all metadata caches see the same sequence of operations, + * and therefore the same dirty data creation. + * + * This fact is used to synchronize the caches for purposes + * of propagating the list of flushed and still clean + * entries, by simply calling this function from all + * caches whenever some user specified threshold on dirty + * data is exceeded. + * + * 2) Under direct user control -- this operation must be + * collective. + * + * The operations to be managed by this function are as + * follows: + * + * For the process with mpi rank 0: + * + * 1) Load the contents of the flushed and still clean entries + * list (c_slist_ptr) into a buffer, and broadcast that + * buffer to all the other caches. + * + * 2) Clear the flushed and still clean entries list + * (c_slist_ptr). + * + * + * For all processes with mpi rank greater than 0: + * + * 1) Receive the flushed and still clean entries list broadcast + * + * 2) Mark the specified entries as clean. + * + * + * For all processes: + * + * 1) Reset the dirtied bytes count to 0. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * July 5, 2005 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, hid_t dxpl_id) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + + if(aux_ptr->mpi_rank == 0) { + if(H5AC__broadcast_clean_list(cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.") + HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0); + } /* end if */ + else { + if(H5AC__receive_and_apply_clean_list(f, dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.") + } /* end else */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__propagate_flushed_and_still_clean_entries_list() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_receive_haddr_list() + * + * Purpose: Receive the list of entry addresses from process 0, + * and return it in a buffer pointed to by *haddr_buf_ptr_ptr. + * Note that the caller must free this buffer if it is + * returned. + * + * This function must only be called by the process with + * MPI_rank greater than 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: Quincey Koziol, 6/11/2015 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__receive_haddr_list(MPI_Comm mpi_comm, int *num_entries_ptr, + haddr_t **haddr_buf_ptr_ptr) +{ + haddr_t * haddr_buf_ptr = NULL; + int mpi_result; + int num_entries; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(num_entries_ptr != NULL); + HDassert(*num_entries_ptr == 0); + HDassert(haddr_buf_ptr_ptr != NULL); + HDassert(*haddr_buf_ptr_ptr == NULL); + + /* First receive the number of entries in the list so that we + * can set up a buffer to receive them. If there aren't + * any, we are done. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) + + if(num_entries > 0) { + size_t buf_size; + + /* allocate buffers to store the list of entry base addresses in */ + buf_size = sizeof(haddr_t) * (size_t)num_entries; + if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") + + /* Now receive the list of candidate entries */ + if(MPI_SUCCESS != (mpi_result = MPI_Bcast((void *)haddr_buf_ptr, (int)buf_size, MPI_BYTE, 0, mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) + } /* end if */ + + /* finally, pass the number of entries and the buffer pointer + * back to the caller. + */ + *num_entries_ptr = num_entries; + *haddr_buf_ptr_ptr = haddr_buf_ptr; + +done: + if(ret_value < 0) + if(haddr_buf_ptr) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC_receive_haddr_list() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__receive_and_apply_clean_list() + * + * Purpose: Receive the list of cleaned entries from process 0, + * and mark the specified entries as clean. + * + * This function must only be called by the process with + * MPI_rank greater than 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/4/05 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + haddr_t * haddr_buf_ptr = NULL; + int num_entries = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity check */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->mpi_rank != 0); + + /* Retrieve the clean list from process 0 */ + if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, &num_entries, &haddr_buf_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list") + + if(num_entries > 0) + /* mark the indicated entries as clean */ + if(H5C_mark_entries_as_clean(f, dxpl_id, (int32_t)num_entries, haddr_buf_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.") + + /* if it is defined, call the sync point done callback. Note + * that this callback is defined purely for testing purposes, + * and should be undefined under normal operating circumstances. + */ + if(aux_ptr->sync_point_done) + (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr); + +done: + if(haddr_buf_ptr) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__receive_and_apply_clean_list() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC__receive_candidate_list() + * + * Purpose: Receive the list of candidate entries from process 0, + * and return it in a buffer pointed to by *haddr_buf_ptr_ptr. + * Note that the caller must free this buffer if it is + * returned. + * + * This function must only be called by the process with + * MPI_rank greater than 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 3/17/10 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__receive_candidate_list(const H5AC_t *cache_ptr, int *num_entries_ptr, + haddr_t **haddr_buf_ptr_ptr) +{ + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->mpi_rank != 0); + HDassert(aux_ptr-> metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + HDassert(num_entries_ptr != NULL); + HDassert(*num_entries_ptr == 0); + HDassert(haddr_buf_ptr_ptr != NULL); + HDassert(*haddr_buf_ptr_ptr == NULL); + + /* Retrieve the candidate list from process 0 */ + if(H5AC__receive_haddr_list(aux_ptr->mpi_comm, num_entries_ptr, haddr_buf_ptr_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__receive_candidate_list() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__rsp__dist_md_write__flush + * + * Purpose: Routine for handling the details of running a sync point + * that is triggered by a flush -- which in turn must have been + * triggered by either a flush API call or a file close -- + * when the distributed metadata write strategy is selected. + * + * Upon entry, each process generates it own candidate list, + * being a sorted list of all dirty metadata entries currently + * in the metadata cache. Note that this list must be idendical + * across all processes, as all processes see the same stream + * of dirty metadata coming in, and use the same lists of + * candidate entries at each sync point. (At first glance, this + * argument sounds circular, but think of it in the sense of + * a recursive proof). + * + * If this this list is empty, we are done, and the function + * returns + * + * Otherwise, after the sorted list dirty metadata entries is + * constructed, each process uses the same algorithm to assign + * each entry on the candidate list to exactly one process for + * flushing. + * + * At this point, all processes participate in a barrier to + * avoid messages from the past/future bugs. + * + * Each process then flushes the entries assigned to it, and + * marks all other entries on the candidate list as clean. + * + * Finally, all processes participate in a second barrier to + * avoid messages from the past/future bugs. + * + * At the end of this process, process 0 and only process 0 + * must tidy up its lists of dirtied and cleaned entries. + * These lists are not used in the distributed metadata write + * strategy, but they must be maintained should we shift + * to a strategy that uses them. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + haddr_t * haddr_buf_ptr = NULL; + int mpi_result; + int num_entries = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + + /* first construct the candidate list -- initially, this will be in the + * form of a skip list. We will convert it later. + */ + if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + + if(H5SL_count(aux_ptr->candidate_slist_ptr) > 0) { + herr_t result; + + /* convert the candidate list into the format we + * are used to receiving from process 0. + */ + if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") + + /* initial sync point barrier */ + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + + /* Enable writes during this operation */ + aux_ptr->write_permitted = TRUE; + + /* Apply the candidate list */ + result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_entries, + haddr_buf_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size); + + /* Disable writes again */ + aux_ptr->write_permitted = FALSE; + + /* Check for error on the write operation */ + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") + + /* this code exists primarily for the test bed -- it allows us to + * enforce posix semantics on the server that pretends to be a + * file system in our parallel tests. + */ + if(aux_ptr->write_done) + (aux_ptr->write_done)(); + + /* final sync point barrier */ + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + + /* if this is process zero, tidy up the dirtied, + * and flushed and still clean lists. + */ + if(aux_ptr->mpi_rank == 0) + if(H5AC__tidy_cache_0_lists(cache_ptr, num_entries, haddr_buf_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't tidy up process 0 lists.") + } /* end if */ + + /* if it is defined, call the sync point done callback. Note + * that this callback is defined purely for testing purposes, + * and should be undefined under normal operating circumstances. + */ + if(aux_ptr->sync_point_done) + (aux_ptr->sync_point_done)(num_entries, haddr_buf_ptr); + +done: + if(haddr_buf_ptr) + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__rsp__dist_md_write__flush() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__rsp__dist_md_write__flush_to_min_clean + * + * Purpose: Routine for handling the details of running a sync point + * triggered by the accumulation of dirty metadata (as + * opposed to a flush call to the API) when the distributed + * metadata write strategy is selected. + * + * After invocation and initial sanity checking this function + * first checks to see if evictions are enabled -- if they + * are not, the function does nothing and returns. + * + * Otherwise, process zero constructs a list of entries to + * be flushed in order to bring the process zero cache back + * within its min clean requirement. Note that this list + * (the candidate list) may be empty. + * + * Then, all processes participate in a barrier. + * + * After the barrier, process 0 broadcasts the number of + * entries in the candidate list prepared above, and all + * other processes receive this number. + * + * If this number is zero, we are done, and the function + * returns without further action. + * + * Otherwise, process 0 broadcasts the sorted list of + * candidate entries, and all other processes receive it. + * + * Then, each process uses the same algorithm to assign + * each entry on the candidate list to exactly one process + * for flushing. + * + * Each process then flushes the entries assigned to it, and + * marks all other entries on the candidate list as clean. + * + * Finally, all processes participate in a second barrier to + * avoid messages from the past/future bugs. + * + * At the end of this process, process 0 and only process 0 + * must tidy up its lists of dirtied and cleaned entries. + * These lists are not used in the distributed metadata write + * strategy, but they must be maintained should we shift + * to a strategy that uses them. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + hbool_t evictions_enabled; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + + /* Query if evictions are allowed */ + if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") + + if(evictions_enabled) { + /* construct candidate list -- process 0 only */ + if(aux_ptr->mpi_rank == 0) + if(H5AC__construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + + /* propagate and apply candidate list -- all processes */ + if(H5AC__propagate_and_apply_candidate_list(f, dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.") + } /* evictions enabled */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__rsp__dist_md_write__flush_to_min_clean() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__rsp__p0_only__flush + * + * Purpose: Routine for handling the details of running a sync point + * that is triggered a flush -- which in turn must have been + * triggered by either a flush API call or a file close -- + * when the process 0 only metadata write strategy is selected. + * + * First, all processes participate in a barrier. + * + * Then process zero flushes all dirty entries, and broadcasts + * they number of clean entries (if any) to all the other + * caches. + * + * If this number is zero, we are done. + * + * Otherwise, process 0 broadcasts the list of cleaned + * entries, and all other processes which are part of this + * file group receive it, and mark the listed entries as + * clean in their caches. + * + * Since all processes have the same set of dirty + * entries at the beginning of the sync point, and all + * entries that will be written are written before + * process zero broadcasts the number of cleaned entries, + * there is no need for a closing barrier. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + int mpi_result; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + + /* to prevent "messages from the future" we must + * synchronize all processes before we start the flush. + * Hence the following barrier. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + + /* Flush data to disk, from rank 0 process */ + if(aux_ptr->mpi_rank == 0) { + herr_t result; + + /* Enable writes during this operation */ + aux_ptr->write_permitted = TRUE; + + /* Flush the cache */ + result = H5C_flush_cache(f, dxpl_id, H5AC__NO_FLAGS_SET); + + /* Disable writes again */ + aux_ptr->write_permitted = FALSE; + + /* Check for error on the write operation */ + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") + + /* this code exists primarily for the test bed -- it allows us to + * enforce posix semantics on the server that pretends to be a + * file system in our parallel tests. + */ + if(aux_ptr->write_done) + (aux_ptr->write_done)(); + } /* end if */ + + /* Propagate cleaned entries to other ranks. */ + if(H5AC__propagate_flushed_and_still_clean_entries_list(f, H5AC_dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__rsp__p0_only__flush() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__rsp__p0_only__flush_to_min_clean + * + * Purpose: Routine for handling the details of running a sync point + * triggered by the accumulation of dirty metadata (as + * opposed to a flush call to the API) when the process 0 + * only metadata write strategy is selected. + * + * After invocation and initial sanity checking this function + * first checks to see if evictions are enabled -- if they + * are not, the function does nothing and returns. + * + * Otherwise, all processes participate in a barrier. + * + * After the barrier, if this is process 0, the function + * causes the cache to flush sufficient entries to get the + * cache back within its minimum clean fraction, and broadcast + * the number of entries which have been flushed since + * the last sync point, and are still clean. + * + * If this number is zero, we are done. + * + * Otherwise, process 0 broadcasts the list of cleaned + * entries, and all other processes which are part of this + * file group receive it, and mark the listed entries as + * clean in their caches. + * + * Since all processes have the same set of dirty + * entries at the beginning of the sync point, and all + * entries that will be written are written before + * process zero broadcasts the number of cleaned entries, + * there is no need for a closing barrier. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * April 28, 2010 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + hbool_t evictions_enabled; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity checks */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); + + /* Query if evictions are allowed */ + if(H5C_get_evictions_enabled((const H5C_t *)cache_ptr, &evictions_enabled) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_get_evictions_enabled() failed.") + + /* Flush if evictions are allowed -- following call + * will cause process 0 to flush to min clean size, + * and then propagate the newly clean entries to the + * other processes. + * + * Otherwise, do nothing. + */ + if(evictions_enabled) { + int mpi_result; + + /* to prevent "messages from the future" we must synchronize all + * processes before we start the flush. This synchronization may + * already be done -- hence the do_barrier parameter. + */ + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + + if(0 == aux_ptr->mpi_rank) { + herr_t result; + + /* here, process 0 flushes as many entries as necessary to + * comply with the currently specified min clean size. + * Note that it is quite possible that no entries will be + * flushed. + */ + + /* Enable writes during this operation */ + aux_ptr->write_permitted = TRUE; + + /* Flush the cache */ + result = H5C_flush_to_min_clean(f, dxpl_id); + + /* Disable writes again */ + aux_ptr->write_permitted = FALSE; + + /* Check for error on the write operation */ + if(result < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.") + + /* this call exists primarily for the test code -- it is used + * to enforce POSIX semantics on the process used to simulate + * reads and writes in t_cache.c. + */ + if(aux_ptr->write_done) + (aux_ptr->write_done)(); + } /* end if */ + + if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__rsp__p0_only__flush_to_min_clean() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__run_sync_point + * + * Purpose: Top level routine for managing a sync point between all + * meta data caches in the parallel case. Since all caches + * see the same sequence of dirty metadata, we simply count + * bytes of dirty metadata, and run a sync point whenever the + * number of dirty bytes of metadata seen since the last + * sync point exceeds a threshold that is common across all + * processes. We also run sync points in response to + * HDF5 API calls triggering either a flush or a file close. + * + * In earlier versions of PHDF5, only the metadata cache with + * mpi rank 0 was allowed to write to file. All other + * metadata caches on processes with rank greater than 0 were + * required to retain dirty entries until they were notified + * that the entry is was clean. + * + * This function was created to make it easier for us to + * experiment with other options, as it is a single point + * for the execution of sync points. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * March 11, 2010 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op) +{ + H5AC_t * cache_ptr; + H5AC_aux_t * aux_ptr; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Sanity checks */ + HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || + (sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED)); + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION +HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu/%u\n", + aux_ptr->mpi_rank, + aux_ptr->dirty_bytes_propagations, + aux_ptr->unprotect_dirty_bytes, + aux_ptr->unprotect_dirty_bytes_updates, + aux_ptr->insert_dirty_bytes, + aux_ptr->insert_dirty_bytes_updates, + aux_ptr->rename_dirty_bytes, + aux_ptr->rename_dirty_bytes_updates); +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + switch(aux_ptr->metadata_write_strategy) { + case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: + switch(sync_point_op) { + case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: + if(H5AC__rsp__p0_only__flush_to_min_clean(f, dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush_to_min_clean() failed.") + break; + + case H5AC_SYNC_POINT_OP__FLUSH_CACHE: + if(H5AC__rsp__p0_only__flush(f, dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush() failed.") + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op"); + break; + } /* end switch */ + break; + + case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: + switch(sync_point_op) { + case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: + if(H5AC__rsp__dist_md_write__flush_to_min_clean(f, dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush_to_min_clean() failed.") + break; + + case H5AC_SYNC_POINT_OP__FLUSH_CACHE: + if(H5AC__rsp__dist_md_write__flush(f, dxpl_id) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush() failed.") + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "unknown flush op"); + break; + } /* end switch */ + break; + + default: + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Unknown metadata write strategy.") + break; + } /* end switch */ + + /* reset the dirty bytes count */ + aux_ptr->dirty_bytes = 0; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->dirty_bytes_propagations += 1; + aux_ptr->unprotect_dirty_bytes = 0; + aux_ptr->unprotect_dirty_bytes_updates = 0; + aux_ptr->insert_dirty_bytes = 0; + aux_ptr->insert_dirty_bytes_updates = 0; + aux_ptr->rename_dirty_bytes = 0; + aux_ptr->rename_dirty_bytes_updates = 0; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__run_sync_point() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__tidy_cache_0_lists() + * + * Purpose: In the distributed metadata write strategy, not all dirty + * entries are written by process 0 -- thus we must tidy + * up the dirtied, and flushed and still clean lists + * maintained by process zero after each sync point. + * + * This procedure exists to tend to this issue. + * + * At this point, all entries that process 0 cleared should + * have been removed from both the dirty and flushed and + * still clean lists, and entries that process 0 has flushed + * should have been removed from the dirtied list and added + * to the flushed and still clean list. + * + * However, since the distributed metadata write strategy + * doesn't make use of these lists, the objective is simply + * to maintain these lists in consistent state that allows + * them to be used should the metadata write strategy change + * to one that uses these lists. + * + * Thus for our purposes, all we need to do is remove from + * the dirtied and flushed and still clean lists all + * references to entries that appear in the candidate list. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * 4/20/10 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, int num_candidates, + haddr_t *candidates_list_ptr) +{ + H5AC_aux_t * aux_ptr; + int i; + + FUNC_ENTER_STATIC_NOERR + + /* Sanity checks */ + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + aux_ptr = (H5AC_aux_t *)(cache_ptr->aux_ptr); + HDassert(aux_ptr != NULL); + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); + HDassert(aux_ptr->mpi_rank == 0); + HDassert(num_candidates > 0); + HDassert(candidates_list_ptr != NULL); + + /* clean up dirtied and flushed and still clean lists by removing + * all entries on the candidate list. Cleared entries should + * have been removed from both the dirty and cleaned lists at + * this point, flushed entries should have been added to the + * cleaned list. However, for this metadata write strategy, + * we just want to remove all references to the candidate entries. + */ + for(i = 0; i < num_candidates; i++) { + H5AC_slist_entry_t * d_slist_entry_ptr; + H5AC_slist_entry_t * c_slist_entry_ptr; + haddr_t addr; + + addr = candidates_list_ptr[i]; + + /* addr may be either on the dirtied list, or on the flushed + * and still clean list. Remove it. + */ + if(NULL != (d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)&addr))) + d_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, d_slist_entry_ptr); + if(NULL != (c_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->c_slist_ptr, (void *)&addr))) + c_slist_entry_ptr = H5FL_FREE(H5AC_slist_entry_t, c_slist_entry_ptr); + } /* end for */ + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5AC__tidy_cache_0_lists() */ + + +/*------------------------------------------------------------------------- + * Function: H5AC__flush_entries + * + * Purpose: Flush the metadata cache associated with the specified file, + * only writing from rank 0, but propagating the cleaned entries + * to all ranks. + * + * Return: Non-negative on success/Negative on failure if there was a + * request to flush all items and something was protected. + * + * Programmer: Quincey Koziol + * koziol@hdfgroup.org + * Aug 22 2009 + * + *------------------------------------------------------------------------- + */ +herr_t +H5AC__flush_entries(H5F_t *f, hid_t dxpl_id) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Sanity checks */ + HDassert(f); + HDassert(f->shared->cache); + + /* Check if we have >1 ranks */ + if(f->shared->cache->aux_ptr) + if(H5AC__run_sync_point(f, dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5AC__flush_entries() */ +#endif /* H5_HAVE_PARALLEL */ + diff --git a/src/H5ACpkg.h b/src/H5ACpkg.h index 28965d2..74bf079 100644 --- a/src/H5ACpkg.h +++ b/src/H5ACpkg.h @@ -41,8 +41,20 @@ /* Get needed headers */ #include "H5Cprivate.h" /* Cache */ +#include "H5FLprivate.h" /* Free Lists */ #include "H5SLprivate.h" /* Skip lists */ +/*****************************/ +/* Package Private Variables */ +/*****************************/ + +/* Declare extern the free list to manage the H5AC_aux_t struct */ +H5FL_EXTERN(H5AC_aux_t); + + +/**************************/ +/* Package Private Macros */ +/**************************/ #define H5AC_DEBUG_DIRTY_BYTES_CREATION 0 @@ -393,9 +405,18 @@ typedef struct H5AC_aux_t } H5AC_aux_t; /* struct H5AC_aux_t */ /* Package scoped functions */ -H5_DLL herr_t H5AC_set_sync_point_done_callback(H5C_t *cache_ptr, +H5_DLL herr_t H5AC__log_deleted_entry(const H5AC_info_t *entry_ptr); +H5_DLL herr_t H5AC__log_dirtied_entry(const H5AC_info_t *entry_ptr); +H5_DLL herr_t H5AC__log_flushed_entry(H5C_t *cache_ptr, haddr_t addr, + hbool_t was_dirty, unsigned flags); +H5_DLL herr_t H5AC__log_inserted_entry(const H5AC_info_t *entry_ptr); +H5_DLL herr_t H5AC__log_moved_entry(const H5F_t *f, haddr_t old_addr, + haddr_t new_addr); +H5_DLL herr_t H5AC__flush_entries(H5F_t *f, hid_t dxpl_id); +H5_DLL herr_t H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op); +H5_DLL herr_t H5AC__set_sync_point_done_callback(H5C_t *cache_ptr, void (*sync_point_done)(int num_writes, haddr_t *written_entries_tbl)); -H5_DLL herr_t H5AC_set_write_done_callback(H5C_t * cache_ptr, +H5_DLL herr_t H5AC__set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void)); #endif /* H5_HAVE_PARALLEL */ @@ -41,7 +41,7 @@ * * Code Changes: * - * - Remove extra functionality in H5C_flush_single_entry()? + * - Remove extra functionality in H5C__flush_single_entry()? * * - Change protect/unprotect to lock/unlock. * @@ -70,16 +70,22 @@ * **************************************************************************/ +/****************/ +/* Module Setup */ +/****************/ + #define H5C_PACKAGE /*suppress error about including H5Cpkg */ #define H5F_PACKAGE /*suppress error about including H5Fpkg */ +/***********/ +/* Headers */ +/***********/ #include "H5private.h" /* Generic Functions */ #ifdef H5_HAVE_PARALLEL #include "H5ACprivate.h" /* Metadata cache */ #endif /* H5_HAVE_PARALLEL */ #include "H5Cpkg.h" /* Cache */ -#include "H5Dprivate.h" /* Dataset functions */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ #include "H5FDprivate.h" /* File drivers */ @@ -91,9 +97,9 @@ #include "H5SLprivate.h" /* Skip lists */ -/* - * Private macros. - */ +/****************/ +/* Local Macros */ +/****************/ #if H5C_DO_MEMORY_SANITY_CHECKS #define H5C_IMAGE_EXTRA_SPACE 8 #define H5C_IMAGE_SANITY_VALUE "DeadBeef" @@ -101,17 +107,14 @@ #define H5C_IMAGE_EXTRA_SPACE 0 #endif /* H5C_DO_MEMORY_SANITY_CHECKS */ -/* - * Private file-scope variables. - */ - -/* Declare a free list to manage the H5C_t struct */ -H5FL_DEFINE_STATIC(H5C_t); +/******************/ +/* Local Typedefs */ +/******************/ -/* - * Private file-scope function declarations: - */ +/********************/ +/* Local Prototypes */ +/********************/ static herr_t H5C__auto_adjust_cache_size(H5F_t * f, hid_t dxpl_id, @@ -140,13 +143,6 @@ static herr_t H5C__flash_increase_cache_size(H5C_t * cache_ptr, size_t old_entry_size, size_t new_entry_size); -static herr_t H5C_flush_single_entry(const H5F_t * f, - hid_t dxpl_id, - haddr_t addr, - unsigned flags, - hbool_t del_entry_from_slist_on_destroy, - int64_t *entry_size_change_ptr); - static herr_t H5C_flush_invalidate_cache(const H5F_t * f, hid_t dxpl_id, unsigned flags); @@ -198,6 +194,24 @@ herr_t H5C_dump_cache(H5C_t * cache_ptr, const char * cache_name); herr_t H5C_dump_cache_skip_list(H5C_t * cache_ptr, char * calling_fcn); #endif /* debugging routines */ +/*********************/ +/* Package Variables */ +/*********************/ + + +/*****************************/ +/* Library Private Variables */ +/*****************************/ + + +/*******************/ +/* Local Variables */ +/*******************/ + +/* Declare a free list to manage the H5C_t struct */ +H5FL_DEFINE_STATIC(H5C_t); + + /**************************************************************************** * @@ -386,882 +400,6 @@ H5C__epoch_marker_fsf_size(const void H5_ATTR_UNUSED * thing, size_t H5_ATTR_UNU /*------------------------------------------------------------------------- - * Function: H5C_apply_candidate_list - * - * Purpose: Apply the supplied candidate list. - * - * We used to do this by simply having each process write - * every mpi_size-th entry in the candidate list, starting - * at index mpi_rank, and mark all the others clean. - * - * However, this can cause unnecessary contention in a file - * system by increasing the number of processes writing to - * adjacent locations in the HDF5 file. - * - * To attempt to minimize this, we now arange matters such - * that each process writes n adjacent entries in the - * candidate list, and marks all others clean. We must do - * this in such a fashion as to guarantee that each entry - * on the candidate list is written by exactly one process, - * and marked clean by all others. - * - * To do this, first construct a table mapping mpi_rank - * to the index of the first entry in the candidate list to - * be written by the process of that mpi_rank, and then use - * the table to control which entries are written and which - * are marked as clean as a function of the mpi_rank. - * - * Note that the table must be identical on all processes, as - * all see the same candidate list, mpi_size, and mpi_rank -- - * the inputs used to construct the table. - * - * We construct the table as follows. Let: - * - * n = num_candidates / mpi_size; - * - * m = num_candidates % mpi_size; - * - * Now allocate an array of integers of length mpi_size + 1, - * and call this array candidate_assignment_table. - * - * Conceptually, if the number of candidates is a multiple - * of the mpi_size, we simply pass through the candidate list - * and assign n entries to each process to flush, with the - * index of the first entry to flush in the location in - * the candidate_assignment_table indicated by the mpi_rank - * of the process. - * - * In the more common case in which the candidate list isn't - * isn't a multiple of the mpi_size, we pretend it is, and - * give num_candidates % mpi_size processes one extra entry - * each to make things work out. - * - * Once the table is constructed, we determine the first and - * last entry this process is to flush as follows: - * - * first_entry_to_flush = candidate_assignment_table[mpi_rank] - * - * last_entry_to_flush = - * candidate_assignment_table[mpi_rank + 1] - 1; - * - * With these values determined, we simply scan through the - * candidate list, marking all entries in the range - * [first_entry_to_flush, last_entry_to_flush] for flush, - * and all others to be cleaned. - * - * Finally, we scan the LRU from tail to head, flushing - * or marking clean the candidate entries as indicated. - * If necessary, we scan the pinned list as well. - * - * Note that this function will fail if any protected or - * clean entries appear on the candidate list. - * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. - * - * Return: Success: SUCCEED - * - * Failure: FAIL - * - * Programmer: John Mainzer - * 3/17/10 - * - * Changes: Ported code to detect next entry status changes as the - * the result of a flush from the serial code in the scan of - * the LRU. Also added code to detect and adapt to the - * removal from the cache of the next entry in the scan of - * the LRU. - * - * Note that at present, all of these changes should not - * be required as the operations on entries as they are - * flushed that can cause these condiditions are not premitted - * in the parallel case. However, Quincey indicates that - * this may change, and thus has requested the modification. - * - * Note the assert(FALSE) in the if statement whose body - * restarts the scan of the LRU. As the body of the if - * statement should be unreachable, it should never be - * triggered until the constraints on the parallel case - * are relaxed. Please remove the assertion at that time. - * - * Also added warning on the Pinned Entry List scan, as it - * is potentially subject to the same issue. As there is - * no cognate of this scan in the serial code, I don't have - * a fix to port to it. - * - * JRM -- 4/10/19 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0 -herr_t -H5C_apply_candidate_list(H5F_t * f, - hid_t dxpl_id, - H5C_t * cache_ptr, - int num_candidates, - haddr_t * candidates_list_ptr, - int mpi_rank, - int mpi_size) -{ - hbool_t restart_scan; - hbool_t prev_is_dirty; - int i; - int m; - int n; - int first_entry_to_flush; - int last_entry_to_flush; - int entries_to_clear = 0; - int entries_to_flush = 0; - int entries_to_flush_or_clear_last = 0; - int entries_to_flush_collectively = 0; - int entries_cleared = 0; - int entries_flushed = 0; - int entries_delayed = 0; - int entries_flushed_or_cleared_last = 0; - int entries_flushed_collectively = 0; - int entries_examined = 0; - int initial_list_len; - int * candidate_assignment_table = NULL; - haddr_t addr; - H5C_cache_entry_t * clear_ptr = NULL; - H5C_cache_entry_t * next_ptr = NULL; - H5C_cache_entry_t * entry_ptr = NULL; - H5C_cache_entry_t * flush_ptr = NULL; - H5C_cache_entry_t * delayed_ptr = NULL; -#if H5C_DO_SANITY_CHECKS - haddr_t last_addr; -#endif /* H5C_DO_SANITY_CHECKS */ -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - char tbl_buf[1024]; -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(FAIL) - - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - HDassert( num_candidates > 0 ); - HDassert( num_candidates <= cache_ptr->slist_len ); - HDassert( candidates_list_ptr != NULL ); - HDassert( 0 <= mpi_rank ); - HDassert( mpi_rank < mpi_size ); - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n", - FUNC, mpi_rank); - for ( i = 0; i < 1024; i++ ) tbl_buf[i] = '\0'; - sprintf(&(tbl_buf[0]), "candidate list = "); - for ( i = 0; i < num_candidates; i++ ) - { - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " 0x%llx", - (long long)(*(candidates_list_ptr + i))); - } - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n"); - HDfprintf(stdout, "%s", tbl_buf); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - n = num_candidates / mpi_size; - m = num_candidates % mpi_size; - HDassert(n >= 0); - - if(NULL == (candidate_assignment_table = (int *)H5MM_malloc(sizeof(int) * (size_t)(mpi_size + 1)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for candidate assignment table") - - candidate_assignment_table[0] = 0; - candidate_assignment_table[mpi_size] = num_candidates; - - if(m == 0) { /* mpi_size is an even divisor of num_candidates */ - HDassert(n > 0); - for(i = 1; i < mpi_size; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; - } /* end if */ - else { - for(i = 1; i <= m; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1; - - if(num_candidates < mpi_size) { - for(i = m + 1; i < mpi_size; i++) - candidate_assignment_table[i] = num_candidates; - } /* end if */ - else { - for(i = m + 1; i < mpi_size; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; - } /* end else */ - } /* end else */ - HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates); - -#if H5C_DO_SANITY_CHECKS - /* verify that the candidate assignment table has the expected form */ - for ( i = 1; i < mpi_size - 1; i++ ) - { - int a, b; - - a = candidate_assignment_table[i] - candidate_assignment_table[i - 1]; - b = candidate_assignment_table[i + 1] - candidate_assignment_table[i]; - - HDassert( n + 1 >= a ); - HDassert( a >= b ); - HDassert( b >= n ); - } -#endif /* H5C_DO_SANITY_CHECKS */ - - first_entry_to_flush = candidate_assignment_table[mpi_rank]; - last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1; - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - for ( i = 0; i < 1024; i++ ) - tbl_buf[i] = '\0'; - sprintf(&(tbl_buf[0]), "candidate assignment table = "); - for(i = 0; i <= mpi_size; i++) - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %d", candidate_assignment_table[i]); - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n"); - HDfprintf(stdout, "%s", tbl_buf); - - HDfprintf(stdout, "%s:%d: flush entries [%d, %d].\n", - FUNC, mpi_rank, first_entry_to_flush, last_entry_to_flush); - - HDfprintf(stdout, "%s:%d: marking entries.\n", FUNC, mpi_rank); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - for(i = 0; i < num_candidates; i++) { - addr = candidates_list_ptr[i]; - HDassert( H5F_addr_defined(addr) ); - -#if H5C_DO_SANITY_CHECKS - if ( i > 0 ) { - if ( last_addr == addr ) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list.\n") - } else if ( last_addr > addr ) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted.\n") - } - } - - last_addr = addr; -#endif /* H5C_DO_SANITY_CHECKS */ - - H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) - if(entry_ptr == NULL) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed candidate entry not in cache?!?!?.") - } else if(!entry_ptr->is_dirty) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?.") - } else if ( entry_ptr->is_protected ) { - /* For now at least, we can't deal with protected entries. - * If we encounter one, scream and die. If it becomes an - * issue, we should be able to work around this. - */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?.") - } else { - /* determine whether the entry is to be cleared or flushed, - * and mark it accordingly. We will scan the protected and - * pinned list shortly, and clear or flush according to these - * markings. - */ - if((i >= first_entry_to_flush) && (i <= last_entry_to_flush)) { - entries_to_flush++; - entry_ptr->flush_immediately = TRUE; - } /* end if */ - else { - entries_to_clear++; - entry_ptr->clear_on_unprotect = TRUE; - } /* end else */ - } /* end else */ - } /* end for */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %d/%d/%d.\n", - FUNC, mpi_rank, (int)num_candidates, (int)entries_to_clear, - (int)entries_to_flush); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - - /* We have now marked all the entries on the candidate list for - * either flush or clear -- now scan the LRU and the pinned list - * for these entries and do the deed. - * - * Note that we are doing things in this round about manner so as - * to preserve the order of the LRU list to the best of our ability. - * If we don't do this, my experiments indicate that we will have a - * noticably poorer hit ratio as a result. - */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: scanning LRU list. len = %d.\n", FUNC, mpi_rank, - (int)(cache_ptr->LRU_list_len)); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* ===================================================================== * - * Now scan the LRU and PEL lists, flushing or clearing entries as - * needed. - * - * The flush_me_last and flush_me_collectively flags may dictate how or - * when some entries can be flushed, and should be addressed here. - * However, in their initial implementation, these flags only apply to the - * superblock, so there's only a relatively small change to this function - * to account for this one case where they come into play. If these flags - * are ever expanded upon, this function and the following flushing steps - * should be reworked to account for additional cases. - * ===================================================================== */ - - HDassert(entries_to_flush >= 0); - - restart_scan = FALSE; - entries_examined = 0; - initial_list_len = cache_ptr->LRU_list_len; - entry_ptr = cache_ptr->LRU_tail_ptr; - - /* Examine each entry in the LRU list */ - while ( ( entry_ptr != NULL ) - && - ( entries_examined <= (entries_to_flush + 1) * initial_list_len ) - && - ( (entries_cleared + entries_flushed) < num_candidates ) ) { - - if ( entry_ptr->prev != NULL ) - prev_is_dirty = entry_ptr->prev->is_dirty; - - /* If this process needs to clear this entry. */ - if(entry_ptr->clear_on_unprotect) { - - HDassert(entry_ptr->is_dirty); - - next_ptr = entry_ptr->next; - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->prev; - entries_cleared++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, - (long long)clear_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* No need to check for the next entry in the scan being - * removed from the cache, as this call to H5C_flush_single_entry() - * will not call either the pre_serialize or serialize callbacks. - */ - - if(H5C_flush_single_entry(f, - dxpl_id, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - TRUE, - NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } /* end if */ - - /* Else, if this process needs to flush this entry. */ - else if (entry_ptr->flush_immediately) { - - HDassert(entry_ptr->is_dirty); - - next_ptr = entry_ptr->next; - entry_ptr->flush_immediately = FALSE; - flush_ptr = entry_ptr; - entry_ptr = entry_ptr->prev; - entries_flushed++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, - (long long)flush_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* reset entries_removed_counter and - * last_entry_removed_ptr prior to the call to - * H5C_flush_single_entry() so that we can spot - * unexpected removals of entries from the cache, - * and set the restart_scan flag if proceeding - * would be likely to cause us to scan an entry - * that is no longer in the cache. - * - * Note that as of this writing (April 2015) this - * case cannot occur in the parallel case. However - * Quincey is making noises about changing this, hence - * the insertion of this test. - * - * Note also that there is no test code to verify - * that this code actually works (although similar code - * in the serial version exists and is tested). - * - * Implementing a test will likely require implementing - * flush op like facilities in the parallel tests. At - * a guess this will not be terribly painful, but it - * will take a bit of time. - */ - cache_ptr->entries_removed_counter = 0; - cache_ptr->last_entry_removed_ptr = NULL; - - if(H5C_flush_single_entry(f, - dxpl_id, - flush_ptr->addr, - H5C__NO_FLAGS_SET, - TRUE, - NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.") - - if ( ( cache_ptr->entries_removed_counter > 1 ) || - ( cache_ptr->last_entry_removed_ptr == entry_ptr ) ) - - restart_scan = TRUE; - - } /* end else-if */ - - /* Otherwise, no action to be taken on this entry. Grab the next. */ - else { - entry_ptr = entry_ptr->prev; - - if ( entry_ptr != NULL ) - next_ptr = entry_ptr->next; - - } /* end else */ - - if ( ( entry_ptr != NULL ) - && - ( ( restart_scan ) - || - ( entry_ptr->is_dirty != prev_is_dirty ) - || - ( entry_ptr->next != next_ptr ) - || - ( entry_ptr->is_protected ) - || - ( entry_ptr->is_pinned ) - ) - ) { - - /* something has happened to the LRU -- start over - * from the tail. - * - * Recall that this code should be un-reachable at present, - * as all the operations by entries on flush that could cause - * it to be reachable are disallowed in the parallel case at - * present. Hence the following assertion which should be - * removed if the above changes. - */ - - HDassert( ! restart_scan ); - HDassert( entry_ptr->is_dirty == prev_is_dirty ); - HDassert( entry_ptr->next == next_ptr ); - HDassert( ! entry_ptr->is_protected ); - HDassert( ! entry_ptr->is_pinned ); - - HDassert(FALSE); /* see comment above */ - - restart_scan = FALSE; - entry_ptr = cache_ptr->LRU_tail_ptr; -/* - H5C__UPDATE_STATS_FOR_LRU_SCAN_RESTART(cache_ptr) -*/ - } - - entries_examined++; - } /* end while */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: entries examined/cleared/flushed = %d/%d/%d.\n", - FUNC, mpi_rank, entries_examined, - entries_cleared, entries_flushed); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* It is also possible that some of the cleared entries are on the - * pinned list. Must scan that also. - * - * WARNING: - * - * As we now allow unpinning, and removal of other entries as a side - * effect of flushing an entry, it is possible that the next entry - * in a PEL scan could either be no longer pinned, or no longer in - * the cache by the time we get to it. - * - * At present, this is not possible in this case, as we disallow such - * operations in the parallel version of the library. However, Quincey - * has been making noises about relaxing this. If and when he does, - * we have a potential problem here. - * - * The same issue exists in the serial cache, and there are tests - * to detect this problem when it occurs, and adjust to it. As seen - * above in the LRU scan, I have ported such tests to the parallel - * code where a close cognate exists in the serial code. - * - * I haven't done so here, as there are no PEL scans where the problem - * can occur in the serial code. Needless to say, this will have to - * be repaired if the constraints on pre_serialize and serialize - * callbacks are relaxed in the parallel version of the metadata cache. - * - * JRM -- 4/1/15 - */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: scanning pinned entry list. len = %d\n", - FUNC, mpi_rank, (int)(cache_ptr->pel_len)); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - entry_ptr = cache_ptr->pel_head_ptr; - while((entry_ptr != NULL) && - ((entries_cleared + entries_flushed + entries_delayed) - < num_candidates)) { - - /* If entry is marked for flush or for clear */ - if((entry_ptr->clear_on_unprotect||entry_ptr->flush_immediately)) { - - /* If this entry needs to be flushed last */ - if (entry_ptr->flush_me_last) { - - /* At this time, only the superblock supports being - flushed last. Conveniently, it also happens to be the only - entry that supports being flushed collectively, as well. Also - conveniently, it's always pinned, so we only need to check - for it while scanning the PEL here. Finally, it's never - included in a candidate list that excludes other dirty - entries in a cache, so we can handle this relatively simple - case here. - - For now, this function asserts this and saves the entry - to flush it after scanning the rest of the PEL list. - - If there are ever more entries that either need to be - flushed last and/or flushed collectively, this whole routine - will need to be reworked to handle all additional cases. As - it is the simple case of a single pinned entry needing - flushed last and collectively is just a minor addition to - this routine, but signficantly buffing up the usage of - flush_me_last or flush_me_collectively will require a more - intense rework of this function and potentially the function - of candidate lists as a whole. */ - - HDassert(entry_ptr->flush_me_collectively); - entries_to_flush_or_clear_last++; - entries_to_flush_collectively++; - HDassert(entries_to_flush_or_clear_last == 1); - HDassert(entries_to_flush_collectively == 1); - - /* Delay the entry. It will be flushed later. */ - delayed_ptr = entry_ptr; - entries_delayed++; - HDassert(entries_delayed == 1); - - } /* end if */ - - /* Else, this process needs to clear this entry. */ - else if (entry_ptr->clear_on_unprotect) { - HDassert(!entry_ptr->flush_immediately); - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->next; - entries_cleared++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, - (long long)clear_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - if(H5C_flush_single_entry(f, - dxpl_id, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - TRUE, - NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } /* end else-if */ - - /* Else, if this process needs to independently flush this entry. */ - else if (entry_ptr->flush_immediately) { - entry_ptr->flush_immediately = FALSE; - flush_ptr = entry_ptr; - entry_ptr = entry_ptr->next; - entries_flushed++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, - (long long)flush_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - if(H5C_flush_single_entry(f, - dxpl_id, - flush_ptr->addr, - H5C__NO_FLAGS_SET, - TRUE, - NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } /* end else-if */ - } /* end if */ - - /* Otherwise, this entry is not marked for flush or clear. Grab the next. */ - else { - entry_ptr = entry_ptr->next; - } /* end else */ - - } /* end while */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, - "%s:%d: pel entries examined/cleared/flushed = %d/%d/%d.\n", - FUNC, mpi_rank, entries_examined, - entries_cleared, entries_flushed); - HDfprintf(stdout, "%s:%d: done.\n", FUNC, mpi_rank); - - HDfsync(stdout); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* ====================================================================== * - * Now, handle all delayed entries. * - * * - * This can *only* be the superblock at this time, so it's relatively * - * easy to deal with. We're collectively flushing the entry saved from * - * above. This will need to be handled differently if there are ever more * - * than one entry needing this special treatment.) * - * ====================================================================== */ - - if (delayed_ptr) { - - if (delayed_ptr->clear_on_unprotect) { - entry_ptr->clear_on_unprotect = FALSE; - entries_cleared++; - } else if (delayed_ptr->flush_immediately) { - entry_ptr->flush_immediately = FALSE; - entries_flushed++; - } /* end if */ - - if(H5C_flush_single_entry(f, - dxpl_id, - delayed_ptr->addr, - H5C__NO_FLAGS_SET, - TRUE, - NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, - "Can't flush entry collectively.") - - entries_flushed_collectively++; - entries_flushed_or_cleared_last++; - } /* end if */ - - /* ====================================================================== * - * Finished flushing everything. * - * ====================================================================== */ - - HDassert((entries_flushed == entries_to_flush)); - HDassert((entries_cleared == entries_to_clear)); - HDassert((entries_flushed_or_cleared_last == entries_to_flush_or_clear_last)); - HDassert((entries_flushed_collectively == entries_to_flush_collectively)); - - if((entries_flushed != entries_to_flush) || - (entries_cleared != entries_to_clear) || - (entries_flushed_or_cleared_last != entries_to_flush_or_clear_last) || - (entries_flushed_collectively != entries_to_flush_collectively)) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry count mismatch.") - -done: - if(candidate_assignment_table != NULL) - candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_apply_candidate_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5C_construct_candidate_list__clean_cache - * - * Purpose: Construct the list of entries that should be flushed to - * clean all entries in the cache. - * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. - * - * Return: Success: SUCCEED - * - * Failure: FAIL - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr) -{ - size_t space_needed; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(FAIL) - - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - - /* As a sanity check, set space needed to the size of the skip list. - * This should be the sum total of the sizes of all the dirty entries - * in the metadata cache. - */ - space_needed = cache_ptr->slist_size; - - /* Recall that while we shouldn't have any protected entries at this - * point, it is possible that some dirty entries may reside on the - * pinned list at this point. - */ - HDassert( cache_ptr->slist_size <= - (cache_ptr->dLRU_list_size + cache_ptr->pel_size) ); - HDassert( cache_ptr->slist_len <= - (cache_ptr->dLRU_list_len + cache_ptr->pel_len) ); - - if(space_needed > 0) { /* we have work to do */ - H5C_cache_entry_t *entry_ptr; - int nominated_entries_count = 0; - size_t nominated_entries_size = 0; - haddr_t nominated_addr; - - HDassert( cache_ptr->slist_len > 0 ); - - /* Scan the dirty LRU list from tail forward and nominate sufficient - * entries to free up the necessary space. - */ - entry_ptr = cache_ptr->dLRU_tail_ptr; - while((nominated_entries_size < space_needed) && - (nominated_entries_count < cache_ptr->slist_len) && - (entry_ptr != NULL)) { - HDassert( ! (entry_ptr->is_protected) ); - HDassert( ! (entry_ptr->is_read_only) ); - HDassert( entry_ptr->ro_ref_count == 0 ); - HDassert( entry_ptr->is_dirty ); - HDassert( entry_ptr->in_slist ); - - nominated_addr = entry_ptr->addr; - if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(1).") - - nominated_entries_size += entry_ptr->size; - nominated_entries_count++; - entry_ptr = entry_ptr->aux_prev; - } /* end while */ - HDassert( entry_ptr == NULL ); - - /* it is possible that there are some dirty entries on the - * protected entry list as well -- scan it too if necessary - */ - entry_ptr = cache_ptr->pel_head_ptr; - while((nominated_entries_size < space_needed) && - (nominated_entries_count < cache_ptr->slist_len) && - (entry_ptr != NULL)) { - if(entry_ptr->is_dirty) { - HDassert( ! (entry_ptr->is_protected) ); - HDassert( ! (entry_ptr->is_read_only) ); - HDassert( entry_ptr->ro_ref_count == 0 ); - HDassert( entry_ptr->is_dirty ); - HDassert( entry_ptr->in_slist ); - - nominated_addr = entry_ptr->addr; - if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(2).") - - nominated_entries_size += entry_ptr->size; - nominated_entries_count++; - } /* end if */ - - entry_ptr = entry_ptr->next; - } /* end while */ - - HDassert( nominated_entries_count == cache_ptr->slist_len ); - HDassert( nominated_entries_size == space_needed ); - } /* end if */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_construct_candidate_list__clean_cache() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5C_construct_candidate_list__min_clean - * - * Purpose: Construct the list of entries that should be flushed to - * get the cache back within its min clean constraints. - * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. - * - * Return: Success: SUCCEED - * - * Failure: FAIL - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr) -{ - size_t space_needed = 0; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(FAIL) - - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - - /* compute the number of bytes (if any) that must be flushed to get the - * cache back within its min clean constraints. - */ - if(cache_ptr->max_cache_size > cache_ptr->index_size) { - if(((cache_ptr->max_cache_size - cache_ptr->index_size) + - cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size) - space_needed = 0; - else - space_needed = cache_ptr->min_clean_size - - ((cache_ptr->max_cache_size - cache_ptr->index_size) + - cache_ptr->cLRU_list_size); - } /* end if */ - else { - if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) - space_needed = 0; - else - space_needed = cache_ptr->min_clean_size - - cache_ptr->cLRU_list_size; - } /* end else */ - - if(space_needed > 0) { /* we have work to do */ - H5C_cache_entry_t *entry_ptr; - int nominated_entries_count = 0; - size_t nominated_entries_size = 0; - - HDassert( cache_ptr->slist_len > 0 ); - - /* Scan the dirty LRU list from tail forward and nominate sufficient - * entries to free up the necessary space. - */ - entry_ptr = cache_ptr->dLRU_tail_ptr; - while((nominated_entries_size < space_needed) && - (nominated_entries_count < cache_ptr->slist_len) && - (entry_ptr != NULL) && - (!entry_ptr->flush_me_last)) { - haddr_t nominated_addr; - - HDassert( ! (entry_ptr->is_protected) ); - HDassert( ! (entry_ptr->is_read_only) ); - HDassert( entry_ptr->ro_ref_count == 0 ); - HDassert( entry_ptr->is_dirty ); - HDassert( entry_ptr->in_slist ); - - nominated_addr = entry_ptr->addr; - if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed.") - - nominated_entries_size += entry_ptr->size; - nominated_entries_count++; - entry_ptr = entry_ptr->aux_prev; - } /* end while */ - HDassert( nominated_entries_count <= cache_ptr->slist_len ); - HDassert( nominated_entries_size >= space_needed ); - } /* end if */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_construct_candidate_list__min_clean() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- * Function: H5C_create * * Purpose: Allocate, initialize, and return the address of a new @@ -1818,7 +956,7 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type, if(entry_ptr->is_pinned) HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "Target entry is pinned.") - /* If we get this far, call H5C_flush_single_entry() with the + /* If we get this far, call H5C__flush_single_entry() with the * H5C__FLUSH_INVALIDATE_FLAG and the H5C__FLUSH_CLEAR_ONLY_FLAG. * This will clear the entry, and then delete it from the cache. */ @@ -1831,8 +969,8 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type, entry_size = entry_ptr->size; #endif /* H5C_DO_EXTREME_SANITY_CHECKS */ - if(H5C_flush_single_entry(f, dxpl_id, entry_ptr->addr, flush_flags, TRUE, NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "H5C_flush_single_entry() failed.") + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, flush_flags, TRUE, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "H5C__flush_single_entry() failed.") #if H5C_DO_SANITY_CHECKS if ( entry_was_dirty ) @@ -1982,7 +1120,7 @@ H5C_flush_cache(H5F_t *f, hid_t dxpl_id, unsigned flags) /* set the cache_ptr->slist_change_in_pre_serialize and * cache_ptr->slist_change_in_serialize to false. * - * These flags are set to TRUE by H5C_flush_single_entry if the + * These flags are set to TRUE by H5C__flush_single_entry if the * slist is modified by a pre_serialize or serialize call respectively. * H5C_flush_cache uses these flags to detect any modifications * to the slist that might corrupt the scan of the slist -- and @@ -2156,7 +1294,7 @@ H5C_flush_cache(H5F_t *f, hid_t dxpl_id, unsigned flags) flushed_entries_size += (int64_t)entry_ptr->size; entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - status = H5C_flush_single_entry(f, + status = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, flags, @@ -2216,7 +1354,7 @@ H5C_flush_cache(H5F_t *f, hid_t dxpl_id, unsigned flags) flushed_entries_size += (int64_t)entry_ptr->size; entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - status = H5C_flush_single_entry(f, + status = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, flags, @@ -3198,318 +2336,6 @@ done: /*------------------------------------------------------------------------- - * - * Function: H5C_mark_entries_as_clean - * - * Purpose: When the H5C code is used to implement the metadata caches - * in PHDF5, only the cache with MPI_rank 0 is allowed to - * actually write entries to disk -- all other caches must - * retain dirty entries until they are advised that the - * entries are clean. - * - * This function exists to allow the H5C code to receive these - * notifications. - * - * The function receives a list of entry base addresses - * which must refer to dirty entries in the cache. If any - * of the entries are either clean or don't exist, the - * function flags an error. - * - * The function scans the list of entries and flushes all - * those that are currently unprotected with the - * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently - * protected are flagged for clearing when they are - * unprotected. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: John Mainzer - * 7/5/05 - * - * Changes: Tidied up code, removeing some old commented out - * code that had been left in pending success of the - * new version. - * - * Note that unlike H5C_apply_candidate_list(), - * H5C_mark_entries_as_clean() makes all its calls to - * H6C_flush_single_entry() with the - * H5C__FLUSH_CLEAR_ONLY_FLAG set. As a result, - * the pre_serialize() and serialize calls are not made. - * - * This then implies that (assuming such actions were - * permitted in the parallel case) no loads, dirties, - * resizes, or removals of other entries can occur as - * a side effect of the flush. Hence, there is no need - * for the checks for entry removal / status change - * that I ported to H5C_apply_candidate_list(). - * - * However, if (in addition to allowing such operations - * in the parallel case), we allow such operations outside - * of the pre_serialize / serialize routines, this may - * cease to be the case -- requiring a review of this - * function. - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5C_mark_entries_as_clean(H5F_t * f, - hid_t dxpl_id, - int32_t ce_array_len, - haddr_t * ce_array_ptr) -{ - H5C_t * cache_ptr; - int entries_cleared; - int entries_examined; - int i; - int initial_list_len; - haddr_t addr; -#if H5C_DO_SANITY_CHECKS - int pinned_entries_marked = 0; - int protected_entries_marked = 0; - int other_entries_marked = 0; - haddr_t last_addr; -#endif /* H5C_DO_SANITY_CHECKS */ - H5C_cache_entry_t * clear_ptr = NULL; - H5C_cache_entry_t * entry_ptr = NULL; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(FAIL) - - HDassert( f ); - HDassert( f->shared ); - cache_ptr = f->shared->cache; - HDassert( cache_ptr ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - - HDassert( ce_array_len > 0 ); - HDassert( ce_array_ptr != NULL ); - -#if H5C_DO_EXTREME_SANITY_CHECKS - if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "an extreme sanity check failed on entry.\n"); - } -#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ - - for ( i = 0; i < ce_array_len; i++ ) - { - addr = ce_array_ptr[i]; - -#if H5C_DO_SANITY_CHECKS - if ( i == 0 ) { - - last_addr = addr; - - } else { - - if ( last_addr == addr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Duplicate entry in cleaned list.\n"); - - } else if ( last_addr > addr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "cleaned list not sorted.\n"); - } - } - -#if H5C_DO_EXTREME_SANITY_CHECKS - if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "an extreme sanity check failed in for loop.\n"); - } -#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ -#endif /* H5C_DO_SANITY_CHECKS */ - - HDassert( H5F_addr_defined(addr) ); - - H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) - - if ( entry_ptr == NULL ) { -#if H5C_DO_SANITY_CHECKS - HDfprintf(stdout, - "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n", - (int)i, - (long)addr); -#endif /* H5C_DO_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Listed entry not in cache?!?!?.") - - } else if ( ! entry_ptr->is_dirty ) { - -#if H5C_DO_SANITY_CHECKS - HDfprintf(stdout, - "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n", - (long)addr); -#endif /* H5C_DO_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Listed entry not dirty?!?!?.") - - } else { - - /* Mark the entry to be cleared on unprotect. We will - * scan the LRU list shortly, and clear all those entries - * not currently protected. - */ - entry_ptr->clear_on_unprotect = TRUE; -#if H5C_DO_SANITY_CHECKS - if ( entry_ptr->is_protected ) { - - protected_entries_marked++; - - } else if ( entry_ptr->is_pinned ) { - - pinned_entries_marked++; - - } else { - - other_entries_marked++; - } -#endif /* H5C_DO_SANITY_CHECKS */ - } - } - - /* Scan through the LRU list from back to front, and flush the - * entries whose clear_on_unprotect flags are set. Observe that - * any protected entries will not be on the LRU, and therefore - * will not be flushed at this time. - * - * Note that unlike H5C_apply_candidate_list(), - * H5C_mark_entries_as_clean() makes all its calls to - * H6C_flush_single_entry() with the H5C__FLUSH_CLEAR_ONLY_FLAG - * set. As a result, the pre_serialize() and serialize calls are - * not made. - * - * This then implies that (assuming such actions were - * permitted in the parallel case) no loads, dirties, - * resizes, or removals of other entries can occur as - * a side effect of the flush. Hence, there is no need - * for the checks for entry removal / status change - * that I ported to H5C_apply_candidate_list(). - * - * However, if (in addition to allowing such operations - * in the parallel case), we allow such operations outside - * of the pre_serialize / serialize routines, this may - * cease to be the case -- requiring a review of this - * point. - * JRM -- 4/7/15 - */ - - entries_cleared = 0; - entries_examined = 0; - initial_list_len = cache_ptr->LRU_list_len; - entry_ptr = cache_ptr->LRU_tail_ptr; - - while ( ( entry_ptr != NULL ) && - ( entries_examined <= initial_list_len ) && - ( entries_cleared < ce_array_len ) ) - { - if ( entry_ptr->clear_on_unprotect ) { - - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->prev; - entries_cleared++; - - if ( H5C_flush_single_entry(f, - dxpl_id, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - TRUE, - NULL) < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } - } else { - - entry_ptr = entry_ptr->prev; - } - entries_examined++; - } - -#if H5C_DO_SANITY_CHECKS - HDassert( entries_cleared == other_entries_marked ); -#endif /* H5C_DO_SANITY_CHECKS */ - - /* It is also possible that some of the cleared entries are on the - * pinned list. Must scan that also. - */ - - entry_ptr = cache_ptr->pel_head_ptr; - - while ( entry_ptr != NULL ) - { - if ( entry_ptr->clear_on_unprotect ) { - - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->next; - entries_cleared++; - - if ( H5C_flush_single_entry(f, - dxpl_id, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - TRUE, - NULL) < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } - } else { - - entry_ptr = entry_ptr->next; - } - } - -#if H5C_DO_SANITY_CHECKS - HDassert( entries_cleared == pinned_entries_marked + other_entries_marked ); - HDassert( entries_cleared + protected_entries_marked == ce_array_len ); -#endif /* H5C_DO_SANITY_CHECKS */ - - HDassert( ( entries_cleared == ce_array_len ) || - ( (ce_array_len - entries_cleared) <= cache_ptr->pl_len ) ); - -#if H5C_DO_SANITY_CHECKS - i = 0; - entry_ptr = cache_ptr->pl_head_ptr; - while ( entry_ptr != NULL ) - { - if ( entry_ptr->clear_on_unprotect ) { - - i++; - } - entry_ptr = entry_ptr->next; - } - HDassert( (entries_cleared + i) == ce_array_len ); -#endif /* H5C_DO_SANITY_CHECKS */ - -done: - -#if H5C_DO_EXTREME_SANITY_CHECKS - if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "an extreme sanity check failed on exit.\n"); - } -#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ - - FUNC_LEAVE_NOAPI(ret_value) - -} /* H5C_mark_entries_as_clean() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- * Function: H5C_mark_entry_dirty * * Purpose: Mark a pinned or protected entry as dirty. The target entry @@ -5891,7 +4717,7 @@ done: * writes (secondary_dxpl_id). Since an uprotect cannot * occasion a write at present, all this is moot for now. * However, things change, and in any case, - * H5C_flush_single_entry() needs primary_dxpl_id and + * H5C__flush_single_entry() needs primary_dxpl_id and * secondary_dxpl_id in its parameter list. * * The function can't cause a read either, so the dxpl_id @@ -6133,7 +4959,7 @@ H5C_unprotect(H5F_t * f, if(take_ownership) flush_flags |= H5C__TAKE_OWNERSHIP_FLAG; - if ( H5C_flush_single_entry(f, + if ( H5C__flush_single_entry(f, dxpl_id, addr, flush_flags, @@ -6174,7 +5000,7 @@ H5C_unprotect(H5F_t * f, "hash table contains multiple entries for addr?!?.") } - if ( H5C_flush_single_entry(f, + if ( H5C__flush_single_entry(f, dxpl_id, addr, H5C__FLUSH_CLEAR_ONLY_FLAG, @@ -7504,7 +6330,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, /* reset entries_removed_counter and * last_entry_removed_ptr prior to the call to - * H5C_flush_single_entry() so that we can spot + * H5C__flush_single_entry() so that we can spot * unexpected removals of entries from the cache, * and set the restart_scan flag if proceeding * would be likely to cause us to scan an entry @@ -7513,7 +6339,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, cache_ptr->entries_removed_counter = 0; cache_ptr->last_entry_removed_ptr = NULL; - result = H5C_flush_single_entry(f, + result = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, H5C__NO_FLAGS_SET, @@ -7529,7 +6355,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, bytes_evicted += entry_ptr->size; - result = H5C_flush_single_entry(f, + result = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, H5C__FLUSH_INVALIDATE_FLAG, @@ -7620,7 +6446,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, if ( ! (entry_ptr->is_dirty) ) { - result = H5C_flush_single_entry(f, + result = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, H5C__FLUSH_INVALIDATE_FLAG, @@ -8126,7 +6952,7 @@ done: * was largely removed from the cache data structures before * the flush proper. However, re-entrant calls to the cache * in the parallel case required a re-factoring of the - * H5C_flush_single_entry() function to keep entries fully + * H5C__flush_single_entry() function to keep entries fully * in the cache until after the pre-serialize and serialize * calls. * JRM -- 12/25/14 @@ -8239,7 +7065,7 @@ H5C_flush_invalidate_cache(const H5F_t * f, */ #if H5C_DO_SANITY_CHECKS - /* Depending on circumstances, H5C_flush_single_entry() will + /* Depending on circumstances, H5C__flush_single_entry() will * remove dirty entries from the slist as it flushes them. * Thus for sanity checks we must make note of the initial * slist length and size before we do any flushes. @@ -8272,7 +7098,7 @@ H5C_flush_invalidate_cache(const H5F_t * f, /* set the cache_ptr->slist_change_in_pre_serialize and * cache_ptr->slist_change_in_serialize to false. * - * These flags are set to TRUE by H5C_flush_single_entry if the + * These flags are set to TRUE by H5C__flush_single_entry if the * slist is modified by a pre_serialize or serialize call * respectively. * @@ -8381,7 +7207,7 @@ H5C_flush_invalidate_cache(const H5F_t * f, /* Test to see if we are can flush the entry now. * If we can, go ahead and flush, but don't tell - * H5C_flush_single_entry() to destroy the entry + * H5C__flush_single_entry() to destroy the entry * as pinned entries can't be evicted. */ if(entry_ptr->flush_dep_height == curr_flush_dep_height ) { @@ -8399,7 +7225,7 @@ H5C_flush_invalidate_cache(const H5F_t * f, entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - status = H5C_flush_single_entry(f, + status = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, H5C__NO_FLAGS_SET, @@ -8461,7 +7287,7 @@ H5C_flush_invalidate_cache(const H5F_t * f, entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - status = H5C_flush_single_entry(f, + status = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG), @@ -8603,7 +7429,7 @@ H5C_flush_invalidate_cache(const H5F_t * f, entry_was_dirty = entry_ptr->is_dirty; - status = H5C_flush_single_entry(f, + status = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG), @@ -8755,7 +7581,7 @@ done: /*------------------------------------------------------------------------- * - * Function: H5C_flush_single_entry + * Function: H5C__flush_single_entry * * Purpose: Flush or clear (and evict if requested) the cache entry * with the specified address and type. If the type is NULL, @@ -8814,8 +7640,8 @@ done: * *------------------------------------------------------------------------- */ -static herr_t -H5C_flush_single_entry(const H5F_t * f, +herr_t +H5C__flush_single_entry(const H5F_t * f, hid_t dxpl_id, haddr_t addr, unsigned flags, @@ -8838,7 +7664,7 @@ H5C_flush_single_entry(const H5F_t * f, H5C_cache_entry_t * entry_ptr = NULL; herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_PACKAGE HDassert( f ); HDassert( cache_ptr ); @@ -9620,7 +8446,7 @@ done: ( ( entry_ptr ) && ( ! entry_ptr->is_dirty ) ) ); FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_flush_single_entry() */ +} /* H5C__flush_single_entry() */ /*------------------------------------------------------------------------- @@ -10099,7 +8925,7 @@ done: * attempts to load another entry. * * This error was exposed by a re-factor of the - * H5C_flush_single_entry() routine. However, it was + * H5C__flush_single_entry() routine. However, it was * a potential bug from the moment that entries were * allowed to load other entries on flush. * @@ -10221,7 +9047,7 @@ H5C_make_space_in_cache(H5F_t * f, /* reset entries_removed_counter and * last_entry_removed_ptr prior to the call to - * H5C_flush_single_entry() so that we can spot + * H5C__flush_single_entry() so that we can spot * unexpected removals of entries from the cache, * and set the restart_scan flag if proceeding * would be likely to cause us to scan an entry @@ -10230,7 +9056,7 @@ H5C_make_space_in_cache(H5F_t * f, cache_ptr->entries_removed_counter = 0; cache_ptr->last_entry_removed_ptr = NULL; - result = H5C_flush_single_entry(f, + result = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, H5C__NO_FLAGS_SET, @@ -10249,7 +9075,7 @@ H5C_make_space_in_cache(H5F_t * f, cache_ptr->entries_scanned_to_make_space++; #endif /* H5C_COLLECT_CACHE_STATS */ - result = H5C_flush_single_entry(f, + result = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, H5C__FLUSH_INVALIDATE_FLAG, @@ -10404,7 +9230,7 @@ H5C_make_space_in_cache(H5F_t * f, prev_ptr = entry_ptr->aux_prev; - result = H5C_flush_single_entry(f, + result = H5C__flush_single_entry(f, dxpl_id, entry_ptr->addr, H5C__FLUSH_INVALIDATE_FLAG, diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c new file mode 100644 index 0000000..3abaf8b --- /dev/null +++ b/src/H5Cmpio.c @@ -0,0 +1,1260 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/*------------------------------------------------------------------------- + * + * Created: H5Cmpio.c + * June 20 2015 + * Quincey Koziol + * + * Purpose: Functions in this file implement support for parallel I/O for + * generic cache code. + * + *------------------------------------------------------------------------- + */ + +/****************/ +/* Module Setup */ +/****************/ + +#define H5C_PACKAGE /*suppress error about including H5Cpkg */ +#define H5F_PACKAGE /*suppress error about including H5Fpkg */ + + +/***********/ +/* Headers */ +/***********/ +#include "H5private.h" /* Generic Functions */ +#include "H5ACprivate.h" /* Metadata cache */ +#include "H5Cpkg.h" /* Cache */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fpkg.h" /* Files */ +#include "H5Iprivate.h" /* IDs */ +#include "H5MMprivate.h" /* Memory management */ + + +#ifdef H5_HAVE_PARALLEL + +/****************/ +/* Local Macros */ +/****************/ +#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0 + + +/******************/ +/* Local Typedefs */ +/******************/ + + +/********************/ +/* Local Prototypes */ +/********************/ + + +/*********************/ +/* Package Variables */ +/*********************/ + + +/*****************************/ +/* Library Private Variables */ +/*****************************/ + + +/*******************/ +/* Local Variables */ +/*******************/ + + + +/*------------------------------------------------------------------------- + * Function: H5C_apply_candidate_list + * + * Purpose: Apply the supplied candidate list. + * + * We used to do this by simply having each process write + * every mpi_size-th entry in the candidate list, starting + * at index mpi_rank, and mark all the others clean. + * + * However, this can cause unnecessary contention in a file + * system by increasing the number of processes writing to + * adjacent locations in the HDF5 file. + * + * To attempt to minimize this, we now arange matters such + * that each process writes n adjacent entries in the + * candidate list, and marks all others clean. We must do + * this in such a fashion as to guarantee that each entry + * on the candidate list is written by exactly one process, + * and marked clean by all others. + * + * To do this, first construct a table mapping mpi_rank + * to the index of the first entry in the candidate list to + * be written by the process of that mpi_rank, and then use + * the table to control which entries are written and which + * are marked as clean as a function of the mpi_rank. + * + * Note that the table must be identical on all processes, as + * all see the same candidate list, mpi_size, and mpi_rank -- + * the inputs used to construct the table. + * + * We construct the table as follows. Let: + * + * n = num_candidates / mpi_size; + * + * m = num_candidates % mpi_size; + * + * Now allocate an array of integers of length mpi_size + 1, + * and call this array candidate_assignment_table. + * + * Conceptually, if the number of candidates is a multiple + * of the mpi_size, we simply pass through the candidate list + * and assign n entries to each process to flush, with the + * index of the first entry to flush in the location in + * the candidate_assignment_table indicated by the mpi_rank + * of the process. + * + * In the more common case in which the candidate list isn't + * isn't a multiple of the mpi_size, we pretend it is, and + * give num_candidates % mpi_size processes one extra entry + * each to make things work out. + * + * Once the table is constructed, we determine the first and + * last entry this process is to flush as follows: + * + * first_entry_to_flush = candidate_assignment_table[mpi_rank] + * + * last_entry_to_flush = + * candidate_assignment_table[mpi_rank + 1] - 1; + * + * With these values determined, we simply scan through the + * candidate list, marking all entries in the range + * [first_entry_to_flush, last_entry_to_flush] for flush, + * and all others to be cleaned. + * + * Finally, we scan the LRU from tail to head, flushing + * or marking clean the candidate entries as indicated. + * If necessary, we scan the pinned list as well. + * + * Note that this function will fail if any protected or + * clean entries appear on the candidate list. + * + * This function is used in managing sync points, and + * shouldn't be used elsewhere. + * + * Return: Success: SUCCEED + * + * Failure: FAIL + * + * Programmer: John Mainzer + * 3/17/10 + * + * Changes: Ported code to detect next entry status changes as the + * the result of a flush from the serial code in the scan of + * the LRU. Also added code to detect and adapt to the + * removal from the cache of the next entry in the scan of + * the LRU. + * + * Note that at present, all of these changes should not + * be required as the operations on entries as they are + * flushed that can cause these condiditions are not premitted + * in the parallel case. However, Quincey indicates that + * this may change, and thus has requested the modification. + * + * Note the assert(FALSE) in the if statement whose body + * restarts the scan of the LRU. As the body of the if + * statement should be unreachable, it should never be + * triggered until the constraints on the parallel case + * are relaxed. Please remove the assertion at that time. + * + * Also added warning on the Pinned Entry List scan, as it + * is potentially subject to the same issue. As there is + * no cognate of this scan in the serial code, I don't have + * a fix to port to it. + * + * JRM -- 4/10/19 + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_apply_candidate_list(H5F_t * f, + hid_t dxpl_id, + H5C_t * cache_ptr, + int num_candidates, + haddr_t * candidates_list_ptr, + int mpi_rank, + int mpi_size) +{ + hbool_t restart_scan; + hbool_t prev_is_dirty; + int i; + int m; + int n; + int first_entry_to_flush; + int last_entry_to_flush; + int entries_to_clear = 0; + int entries_to_flush = 0; + int entries_to_flush_or_clear_last = 0; + int entries_to_flush_collectively = 0; + int entries_cleared = 0; + int entries_flushed = 0; + int entries_delayed = 0; + int entries_flushed_or_cleared_last = 0; + int entries_flushed_collectively = 0; + int entries_examined = 0; + int initial_list_len; + int * candidate_assignment_table = NULL; + haddr_t addr; + H5C_cache_entry_t * clear_ptr = NULL; + H5C_cache_entry_t * next_ptr = NULL; + H5C_cache_entry_t * entry_ptr = NULL; + H5C_cache_entry_t * flush_ptr = NULL; + H5C_cache_entry_t * delayed_ptr = NULL; +#if H5C_DO_SANITY_CHECKS + haddr_t last_addr; +#endif /* H5C_DO_SANITY_CHECKS */ +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + char tbl_buf[1024]; +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( num_candidates > 0 ); + HDassert( num_candidates <= cache_ptr->slist_len ); + HDassert( candidates_list_ptr != NULL ); + HDassert( 0 <= mpi_rank ); + HDassert( mpi_rank < mpi_size ); + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n", + FUNC, mpi_rank); + for ( i = 0; i < 1024; i++ ) tbl_buf[i] = '\0'; + sprintf(&(tbl_buf[0]), "candidate list = "); + for ( i = 0; i < num_candidates; i++ ) + { + sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " 0x%llx", + (long long)(*(candidates_list_ptr + i))); + } + sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n"); + HDfprintf(stdout, "%s", tbl_buf); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + n = num_candidates / mpi_size; + m = num_candidates % mpi_size; + HDassert(n >= 0); + + if(NULL == (candidate_assignment_table = (int *)H5MM_malloc(sizeof(int) * (size_t)(mpi_size + 1)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for candidate assignment table") + + candidate_assignment_table[0] = 0; + candidate_assignment_table[mpi_size] = num_candidates; + + if(m == 0) { /* mpi_size is an even divisor of num_candidates */ + HDassert(n > 0); + for(i = 1; i < mpi_size; i++) + candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; + } /* end if */ + else { + for(i = 1; i <= m; i++) + candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1; + + if(num_candidates < mpi_size) { + for(i = m + 1; i < mpi_size; i++) + candidate_assignment_table[i] = num_candidates; + } /* end if */ + else { + for(i = m + 1; i < mpi_size; i++) + candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; + } /* end else */ + } /* end else */ + HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates); + +#if H5C_DO_SANITY_CHECKS + /* verify that the candidate assignment table has the expected form */ + for ( i = 1; i < mpi_size - 1; i++ ) + { + int a, b; + + a = candidate_assignment_table[i] - candidate_assignment_table[i - 1]; + b = candidate_assignment_table[i + 1] - candidate_assignment_table[i]; + + HDassert( n + 1 >= a ); + HDassert( a >= b ); + HDassert( b >= n ); + } +#endif /* H5C_DO_SANITY_CHECKS */ + + first_entry_to_flush = candidate_assignment_table[mpi_rank]; + last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1; + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + for ( i = 0; i < 1024; i++ ) + tbl_buf[i] = '\0'; + sprintf(&(tbl_buf[0]), "candidate assignment table = "); + for(i = 0; i <= mpi_size; i++) + sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %d", candidate_assignment_table[i]); + sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n"); + HDfprintf(stdout, "%s", tbl_buf); + + HDfprintf(stdout, "%s:%d: flush entries [%d, %d].\n", + FUNC, mpi_rank, first_entry_to_flush, last_entry_to_flush); + + HDfprintf(stdout, "%s:%d: marking entries.\n", FUNC, mpi_rank); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + for(i = 0; i < num_candidates; i++) { + addr = candidates_list_ptr[i]; + HDassert( H5F_addr_defined(addr) ); + +#if H5C_DO_SANITY_CHECKS + if ( i > 0 ) { + if ( last_addr == addr ) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list.\n") + } else if ( last_addr > addr ) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted.\n") + } + } + + last_addr = addr; +#endif /* H5C_DO_SANITY_CHECKS */ + + H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + if(entry_ptr == NULL) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed candidate entry not in cache?!?!?.") + } else if(!entry_ptr->is_dirty) { + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?.") + } else if ( entry_ptr->is_protected ) { + /* For now at least, we can't deal with protected entries. + * If we encounter one, scream and die. If it becomes an + * issue, we should be able to work around this. + */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?.") + } else { + /* determine whether the entry is to be cleared or flushed, + * and mark it accordingly. We will scan the protected and + * pinned list shortly, and clear or flush according to these + * markings. + */ + if((i >= first_entry_to_flush) && (i <= last_entry_to_flush)) { + entries_to_flush++; + entry_ptr->flush_immediately = TRUE; + } /* end if */ + else { + entries_to_clear++; + entry_ptr->clear_on_unprotect = TRUE; + } /* end else */ + } /* end else */ + } /* end for */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %d/%d/%d.\n", + FUNC, mpi_rank, (int)num_candidates, (int)entries_to_clear, + (int)entries_to_flush); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + + /* We have now marked all the entries on the candidate list for + * either flush or clear -- now scan the LRU and the pinned list + * for these entries and do the deed. + * + * Note that we are doing things in this round about manner so as + * to preserve the order of the LRU list to the best of our ability. + * If we don't do this, my experiments indicate that we will have a + * noticably poorer hit ratio as a result. + */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: scanning LRU list. len = %d.\n", FUNC, mpi_rank, + (int)(cache_ptr->LRU_list_len)); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + /* ===================================================================== * + * Now scan the LRU and PEL lists, flushing or clearing entries as + * needed. + * + * The flush_me_last and flush_me_collectively flags may dictate how or + * when some entries can be flushed, and should be addressed here. + * However, in their initial implementation, these flags only apply to the + * superblock, so there's only a relatively small change to this function + * to account for this one case where they come into play. If these flags + * are ever expanded upon, this function and the following flushing steps + * should be reworked to account for additional cases. + * ===================================================================== */ + + HDassert(entries_to_flush >= 0); + + restart_scan = FALSE; + entries_examined = 0; + initial_list_len = cache_ptr->LRU_list_len; + entry_ptr = cache_ptr->LRU_tail_ptr; + + /* Examine each entry in the LRU list */ + while ( ( entry_ptr != NULL ) + && + ( entries_examined <= (entries_to_flush + 1) * initial_list_len ) + && + ( (entries_cleared + entries_flushed) < num_candidates ) ) { + + if ( entry_ptr->prev != NULL ) + prev_is_dirty = entry_ptr->prev->is_dirty; + + /* If this process needs to clear this entry. */ + if(entry_ptr->clear_on_unprotect) { + + HDassert(entry_ptr->is_dirty); + + next_ptr = entry_ptr->next; + entry_ptr->clear_on_unprotect = FALSE; + clear_ptr = entry_ptr; + entry_ptr = entry_ptr->prev; + entries_cleared++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, + (long long)clear_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + /* No need to check for the next entry in the scan being + * removed from the cache, as this call to H5C__flush_single_entry() + * will not call either the pre_serialize or serialize callbacks. + */ + + if(H5C__flush_single_entry(f, + dxpl_id, + clear_ptr->addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + TRUE, + NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } /* end if */ + + /* Else, if this process needs to flush this entry. */ + else if (entry_ptr->flush_immediately) { + + HDassert(entry_ptr->is_dirty); + + next_ptr = entry_ptr->next; + entry_ptr->flush_immediately = FALSE; + flush_ptr = entry_ptr; + entry_ptr = entry_ptr->prev; + entries_flushed++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, + (long long)flush_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + /* reset entries_removed_counter and + * last_entry_removed_ptr prior to the call to + * H5C__flush_single_entry() so that we can spot + * unexpected removals of entries from the cache, + * and set the restart_scan flag if proceeding + * would be likely to cause us to scan an entry + * that is no longer in the cache. + * + * Note that as of this writing (April 2015) this + * case cannot occur in the parallel case. However + * Quincey is making noises about changing this, hence + * the insertion of this test. + * + * Note also that there is no test code to verify + * that this code actually works (although similar code + * in the serial version exists and is tested). + * + * Implementing a test will likely require implementing + * flush op like facilities in the parallel tests. At + * a guess this will not be terribly painful, but it + * will take a bit of time. + */ + cache_ptr->entries_removed_counter = 0; + cache_ptr->last_entry_removed_ptr = NULL; + + if(H5C__flush_single_entry(f, + dxpl_id, + flush_ptr->addr, + H5C__NO_FLAGS_SET, + TRUE, + NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.") + + if ( ( cache_ptr->entries_removed_counter > 1 ) || + ( cache_ptr->last_entry_removed_ptr == entry_ptr ) ) + + restart_scan = TRUE; + + } /* end else-if */ + + /* Otherwise, no action to be taken on this entry. Grab the next. */ + else { + entry_ptr = entry_ptr->prev; + + if ( entry_ptr != NULL ) + next_ptr = entry_ptr->next; + + } /* end else */ + + if ( ( entry_ptr != NULL ) + && + ( ( restart_scan ) + || + ( entry_ptr->is_dirty != prev_is_dirty ) + || + ( entry_ptr->next != next_ptr ) + || + ( entry_ptr->is_protected ) + || + ( entry_ptr->is_pinned ) + ) + ) { + + /* something has happened to the LRU -- start over + * from the tail. + * + * Recall that this code should be un-reachable at present, + * as all the operations by entries on flush that could cause + * it to be reachable are disallowed in the parallel case at + * present. Hence the following assertion which should be + * removed if the above changes. + */ + + HDassert( ! restart_scan ); + HDassert( entry_ptr->is_dirty == prev_is_dirty ); + HDassert( entry_ptr->next == next_ptr ); + HDassert( ! entry_ptr->is_protected ); + HDassert( ! entry_ptr->is_pinned ); + + HDassert(FALSE); /* see comment above */ + + restart_scan = FALSE; + entry_ptr = cache_ptr->LRU_tail_ptr; +/* + H5C__UPDATE_STATS_FOR_LRU_SCAN_RESTART(cache_ptr) +*/ + } + + entries_examined++; + } /* end while */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: entries examined/cleared/flushed = %d/%d/%d.\n", + FUNC, mpi_rank, entries_examined, + entries_cleared, entries_flushed); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + /* It is also possible that some of the cleared entries are on the + * pinned list. Must scan that also. + * + * WARNING: + * + * As we now allow unpinning, and removal of other entries as a side + * effect of flushing an entry, it is possible that the next entry + * in a PEL scan could either be no longer pinned, or no longer in + * the cache by the time we get to it. + * + * At present, this is not possible in this case, as we disallow such + * operations in the parallel version of the library. However, Quincey + * has been making noises about relaxing this. If and when he does, + * we have a potential problem here. + * + * The same issue exists in the serial cache, and there are tests + * to detect this problem when it occurs, and adjust to it. As seen + * above in the LRU scan, I have ported such tests to the parallel + * code where a close cognate exists in the serial code. + * + * I haven't done so here, as there are no PEL scans where the problem + * can occur in the serial code. Needless to say, this will have to + * be repaired if the constraints on pre_serialize and serialize + * callbacks are relaxed in the parallel version of the metadata cache. + * + * JRM -- 4/1/15 + */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, "%s:%d: scanning pinned entry list. len = %d\n", + FUNC, mpi_rank, (int)(cache_ptr->pel_len)); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + entry_ptr = cache_ptr->pel_head_ptr; + while((entry_ptr != NULL) && + ((entries_cleared + entries_flushed + entries_delayed) + < num_candidates)) { + + /* If entry is marked for flush or for clear */ + if((entry_ptr->clear_on_unprotect||entry_ptr->flush_immediately)) { + + /* If this entry needs to be flushed last */ + if (entry_ptr->flush_me_last) { + + /* At this time, only the superblock supports being + flushed last. Conveniently, it also happens to be the only + entry that supports being flushed collectively, as well. Also + conveniently, it's always pinned, so we only need to check + for it while scanning the PEL here. Finally, it's never + included in a candidate list that excludes other dirty + entries in a cache, so we can handle this relatively simple + case here. + + For now, this function asserts this and saves the entry + to flush it after scanning the rest of the PEL list. + + If there are ever more entries that either need to be + flushed last and/or flushed collectively, this whole routine + will need to be reworked to handle all additional cases. As + it is the simple case of a single pinned entry needing + flushed last and collectively is just a minor addition to + this routine, but signficantly buffing up the usage of + flush_me_last or flush_me_collectively will require a more + intense rework of this function and potentially the function + of candidate lists as a whole. */ + + HDassert(entry_ptr->flush_me_collectively); + entries_to_flush_or_clear_last++; + entries_to_flush_collectively++; + HDassert(entries_to_flush_or_clear_last == 1); + HDassert(entries_to_flush_collectively == 1); + + /* Delay the entry. It will be flushed later. */ + delayed_ptr = entry_ptr; + entries_delayed++; + HDassert(entries_delayed == 1); + + } /* end if */ + + /* Else, this process needs to clear this entry. */ + else if (entry_ptr->clear_on_unprotect) { + HDassert(!entry_ptr->flush_immediately); + entry_ptr->clear_on_unprotect = FALSE; + clear_ptr = entry_ptr; + entry_ptr = entry_ptr->next; + entries_cleared++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, + (long long)clear_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + if(H5C__flush_single_entry(f, + dxpl_id, + clear_ptr->addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + TRUE, + NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } /* end else-if */ + + /* Else, if this process needs to independently flush this entry. */ + else if (entry_ptr->flush_immediately) { + entry_ptr->flush_immediately = FALSE; + flush_ptr = entry_ptr; + entry_ptr = entry_ptr->next; + entries_flushed++; + +#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) + HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, + (long long)flush_ptr->addr); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + if(H5C__flush_single_entry(f, + dxpl_id, + flush_ptr->addr, + H5C__NO_FLAGS_SET, + TRUE, + NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } /* end else-if */ + } /* end if */ + + /* Otherwise, this entry is not marked for flush or clear. Grab the next. */ + else { + entry_ptr = entry_ptr->next; + } /* end else */ + + } /* end while */ + +#if H5C_APPLY_CANDIDATE_LIST__DEBUG + HDfprintf(stdout, + "%s:%d: pel entries examined/cleared/flushed = %d/%d/%d.\n", + FUNC, mpi_rank, entries_examined, + entries_cleared, entries_flushed); + HDfprintf(stdout, "%s:%d: done.\n", FUNC, mpi_rank); + + HDfsync(stdout); +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + + /* ====================================================================== * + * Now, handle all delayed entries. * + * * + * This can *only* be the superblock at this time, so it's relatively * + * easy to deal with. We're collectively flushing the entry saved from * + * above. This will need to be handled differently if there are ever more * + * than one entry needing this special treatment.) * + * ====================================================================== */ + + if (delayed_ptr) { + + if (delayed_ptr->clear_on_unprotect) { + entry_ptr->clear_on_unprotect = FALSE; + entries_cleared++; + } else if (delayed_ptr->flush_immediately) { + entry_ptr->flush_immediately = FALSE; + entries_flushed++; + } /* end if */ + + if(H5C__flush_single_entry(f, + dxpl_id, + delayed_ptr->addr, + H5C__NO_FLAGS_SET, + TRUE, + NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, + "Can't flush entry collectively.") + + entries_flushed_collectively++; + entries_flushed_or_cleared_last++; + } /* end if */ + + /* ====================================================================== * + * Finished flushing everything. * + * ====================================================================== */ + + HDassert((entries_flushed == entries_to_flush)); + HDassert((entries_cleared == entries_to_clear)); + HDassert((entries_flushed_or_cleared_last == entries_to_flush_or_clear_last)); + HDassert((entries_flushed_collectively == entries_to_flush_collectively)); + + if((entries_flushed != entries_to_flush) || + (entries_cleared != entries_to_clear) || + (entries_flushed_or_cleared_last != entries_to_flush_or_clear_last) || + (entries_flushed_collectively != entries_to_flush_collectively)) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry count mismatch.") + +done: + if(candidate_assignment_table != NULL) + candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table); + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C_apply_candidate_list() */ + + +/*------------------------------------------------------------------------- + * Function: H5C_construct_candidate_list__clean_cache + * + * Purpose: Construct the list of entries that should be flushed to + * clean all entries in the cache. + * + * This function is used in managing sync points, and + * shouldn't be used elsewhere. + * + * Return: Success: SUCCEED + * + * Failure: FAIL + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr) +{ + size_t space_needed; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + /* As a sanity check, set space needed to the size of the skip list. + * This should be the sum total of the sizes of all the dirty entries + * in the metadata cache. + */ + space_needed = cache_ptr->slist_size; + + /* Recall that while we shouldn't have any protected entries at this + * point, it is possible that some dirty entries may reside on the + * pinned list at this point. + */ + HDassert( cache_ptr->slist_size <= + (cache_ptr->dLRU_list_size + cache_ptr->pel_size) ); + HDassert( cache_ptr->slist_len <= + (cache_ptr->dLRU_list_len + cache_ptr->pel_len) ); + + if(space_needed > 0) { /* we have work to do */ + H5C_cache_entry_t *entry_ptr; + int nominated_entries_count = 0; + size_t nominated_entries_size = 0; + haddr_t nominated_addr; + + HDassert( cache_ptr->slist_len > 0 ); + + /* Scan the dirty LRU list from tail forward and nominate sufficient + * entries to free up the necessary space. + */ + entry_ptr = cache_ptr->dLRU_tail_ptr; + while((nominated_entries_size < space_needed) && + (nominated_entries_count < cache_ptr->slist_len) && + (entry_ptr != NULL)) { + HDassert( ! (entry_ptr->is_protected) ); + HDassert( ! (entry_ptr->is_read_only) ); + HDassert( entry_ptr->ro_ref_count == 0 ); + HDassert( entry_ptr->is_dirty ); + HDassert( entry_ptr->in_slist ); + + nominated_addr = entry_ptr->addr; + if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(1).") + + nominated_entries_size += entry_ptr->size; + nominated_entries_count++; + entry_ptr = entry_ptr->aux_prev; + } /* end while */ + HDassert( entry_ptr == NULL ); + + /* it is possible that there are some dirty entries on the + * protected entry list as well -- scan it too if necessary + */ + entry_ptr = cache_ptr->pel_head_ptr; + while((nominated_entries_size < space_needed) && + (nominated_entries_count < cache_ptr->slist_len) && + (entry_ptr != NULL)) { + if(entry_ptr->is_dirty) { + HDassert( ! (entry_ptr->is_protected) ); + HDassert( ! (entry_ptr->is_read_only) ); + HDassert( entry_ptr->ro_ref_count == 0 ); + HDassert( entry_ptr->is_dirty ); + HDassert( entry_ptr->in_slist ); + + nominated_addr = entry_ptr->addr; + if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(2).") + + nominated_entries_size += entry_ptr->size; + nominated_entries_count++; + } /* end if */ + + entry_ptr = entry_ptr->next; + } /* end while */ + + HDassert( nominated_entries_count == cache_ptr->slist_len ); + HDassert( nominated_entries_size == space_needed ); + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C_construct_candidate_list__clean_cache() */ + + +/*------------------------------------------------------------------------- + * Function: H5C_construct_candidate_list__min_clean + * + * Purpose: Construct the list of entries that should be flushed to + * get the cache back within its min clean constraints. + * + * This function is used in managing sync points, and + * shouldn't be used elsewhere. + * + * Return: Success: SUCCEED + * + * Failure: FAIL + * + * Programmer: John Mainzer + * 3/17/10 + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr) +{ + size_t space_needed = 0; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + /* compute the number of bytes (if any) that must be flushed to get the + * cache back within its min clean constraints. + */ + if(cache_ptr->max_cache_size > cache_ptr->index_size) { + if(((cache_ptr->max_cache_size - cache_ptr->index_size) + + cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size) + space_needed = 0; + else + space_needed = cache_ptr->min_clean_size - + ((cache_ptr->max_cache_size - cache_ptr->index_size) + + cache_ptr->cLRU_list_size); + } /* end if */ + else { + if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) + space_needed = 0; + else + space_needed = cache_ptr->min_clean_size - + cache_ptr->cLRU_list_size; + } /* end else */ + + if(space_needed > 0) { /* we have work to do */ + H5C_cache_entry_t *entry_ptr; + int nominated_entries_count = 0; + size_t nominated_entries_size = 0; + + HDassert( cache_ptr->slist_len > 0 ); + + /* Scan the dirty LRU list from tail forward and nominate sufficient + * entries to free up the necessary space. + */ + entry_ptr = cache_ptr->dLRU_tail_ptr; + while((nominated_entries_size < space_needed) && + (nominated_entries_count < cache_ptr->slist_len) && + (entry_ptr != NULL) && + (!entry_ptr->flush_me_last)) { + haddr_t nominated_addr; + + HDassert( ! (entry_ptr->is_protected) ); + HDassert( ! (entry_ptr->is_read_only) ); + HDassert( entry_ptr->ro_ref_count == 0 ); + HDassert( entry_ptr->is_dirty ); + HDassert( entry_ptr->in_slist ); + + nominated_addr = entry_ptr->addr; + if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed.") + + nominated_entries_size += entry_ptr->size; + nominated_entries_count++; + entry_ptr = entry_ptr->aux_prev; + } /* end while */ + HDassert( nominated_entries_count <= cache_ptr->slist_len ); + HDassert( nominated_entries_size >= space_needed ); + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C_construct_candidate_list__min_clean() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5C_mark_entries_as_clean + * + * Purpose: When the H5C code is used to implement the metadata caches + * in PHDF5, only the cache with MPI_rank 0 is allowed to + * actually write entries to disk -- all other caches must + * retain dirty entries until they are advised that the + * entries are clean. + * + * This function exists to allow the H5C code to receive these + * notifications. + * + * The function receives a list of entry base addresses + * which must refer to dirty entries in the cache. If any + * of the entries are either clean or don't exist, the + * function flags an error. + * + * The function scans the list of entries and flushes all + * those that are currently unprotected with the + * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently + * protected are flagged for clearing when they are + * unprotected. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 7/5/05 + * + * Changes: Tidied up code, removeing some old commented out + * code that had been left in pending success of the + * new version. + * + * Note that unlike H5C_apply_candidate_list(), + * H5C_mark_entries_as_clean() makes all its calls to + * H6C_flush_single_entry() with the + * H5C__FLUSH_CLEAR_ONLY_FLAG set. As a result, + * the pre_serialize() and serialize calls are not made. + * + * This then implies that (assuming such actions were + * permitted in the parallel case) no loads, dirties, + * resizes, or removals of other entries can occur as + * a side effect of the flush. Hence, there is no need + * for the checks for entry removal / status change + * that I ported to H5C_apply_candidate_list(). + * + * However, if (in addition to allowing such operations + * in the parallel case), we allow such operations outside + * of the pre_serialize / serialize routines, this may + * cease to be the case -- requiring a review of this + * function. + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_mark_entries_as_clean(H5F_t * f, + hid_t dxpl_id, + int32_t ce_array_len, + haddr_t * ce_array_ptr) +{ + H5C_t * cache_ptr; + int entries_cleared; + int entries_examined; + int i; + int initial_list_len; + haddr_t addr; +#if H5C_DO_SANITY_CHECKS + int pinned_entries_marked = 0; + int protected_entries_marked = 0; + int other_entries_marked = 0; + haddr_t last_addr; +#endif /* H5C_DO_SANITY_CHECKS */ + H5C_cache_entry_t * clear_ptr = NULL; + H5C_cache_entry_t * entry_ptr = NULL; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + HDassert( f ); + HDassert( f->shared ); + cache_ptr = f->shared->cache; + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + HDassert( ce_array_len > 0 ); + HDassert( ce_array_ptr != NULL ); + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "an extreme sanity check failed on entry.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + for ( i = 0; i < ce_array_len; i++ ) + { + addr = ce_array_ptr[i]; + +#if H5C_DO_SANITY_CHECKS + if ( i == 0 ) { + + last_addr = addr; + + } else { + + if ( last_addr == addr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Duplicate entry in cleaned list.\n"); + + } else if ( last_addr > addr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "cleaned list not sorted.\n"); + } + } + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "an extreme sanity check failed in for loop.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ +#endif /* H5C_DO_SANITY_CHECKS */ + + HDassert( H5F_addr_defined(addr) ); + + H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + + if ( entry_ptr == NULL ) { +#if H5C_DO_SANITY_CHECKS + HDfprintf(stdout, + "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n", + (int)i, + (long)addr); +#endif /* H5C_DO_SANITY_CHECKS */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Listed entry not in cache?!?!?.") + + } else if ( ! entry_ptr->is_dirty ) { + +#if H5C_DO_SANITY_CHECKS + HDfprintf(stdout, + "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n", + (long)addr); +#endif /* H5C_DO_SANITY_CHECKS */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Listed entry not dirty?!?!?.") + + } else { + + /* Mark the entry to be cleared on unprotect. We will + * scan the LRU list shortly, and clear all those entries + * not currently protected. + */ + entry_ptr->clear_on_unprotect = TRUE; +#if H5C_DO_SANITY_CHECKS + if ( entry_ptr->is_protected ) { + + protected_entries_marked++; + + } else if ( entry_ptr->is_pinned ) { + + pinned_entries_marked++; + + } else { + + other_entries_marked++; + } +#endif /* H5C_DO_SANITY_CHECKS */ + } + } + + /* Scan through the LRU list from back to front, and flush the + * entries whose clear_on_unprotect flags are set. Observe that + * any protected entries will not be on the LRU, and therefore + * will not be flushed at this time. + * + * Note that unlike H5C_apply_candidate_list(), + * H5C_mark_entries_as_clean() makes all its calls to + * H6C_flush_single_entry() with the H5C__FLUSH_CLEAR_ONLY_FLAG + * set. As a result, the pre_serialize() and serialize calls are + * not made. + * + * This then implies that (assuming such actions were + * permitted in the parallel case) no loads, dirties, + * resizes, or removals of other entries can occur as + * a side effect of the flush. Hence, there is no need + * for the checks for entry removal / status change + * that I ported to H5C_apply_candidate_list(). + * + * However, if (in addition to allowing such operations + * in the parallel case), we allow such operations outside + * of the pre_serialize / serialize routines, this may + * cease to be the case -- requiring a review of this + * point. + * JRM -- 4/7/15 + */ + + entries_cleared = 0; + entries_examined = 0; + initial_list_len = cache_ptr->LRU_list_len; + entry_ptr = cache_ptr->LRU_tail_ptr; + + while ( ( entry_ptr != NULL ) && + ( entries_examined <= initial_list_len ) && + ( entries_cleared < ce_array_len ) ) + { + if ( entry_ptr->clear_on_unprotect ) { + + entry_ptr->clear_on_unprotect = FALSE; + clear_ptr = entry_ptr; + entry_ptr = entry_ptr->prev; + entries_cleared++; + + if ( H5C__flush_single_entry(f, + dxpl_id, + clear_ptr->addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + TRUE, + NULL) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } + } else { + + entry_ptr = entry_ptr->prev; + } + entries_examined++; + } + +#if H5C_DO_SANITY_CHECKS + HDassert( entries_cleared == other_entries_marked ); +#endif /* H5C_DO_SANITY_CHECKS */ + + /* It is also possible that some of the cleared entries are on the + * pinned list. Must scan that also. + */ + + entry_ptr = cache_ptr->pel_head_ptr; + + while ( entry_ptr != NULL ) + { + if ( entry_ptr->clear_on_unprotect ) { + + entry_ptr->clear_on_unprotect = FALSE; + clear_ptr = entry_ptr; + entry_ptr = entry_ptr->next; + entries_cleared++; + + if ( H5C__flush_single_entry(f, + dxpl_id, + clear_ptr->addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + TRUE, + NULL) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } + } else { + + entry_ptr = entry_ptr->next; + } + } + +#if H5C_DO_SANITY_CHECKS + HDassert( entries_cleared == pinned_entries_marked + other_entries_marked ); + HDassert( entries_cleared + protected_entries_marked == ce_array_len ); +#endif /* H5C_DO_SANITY_CHECKS */ + + HDassert( ( entries_cleared == ce_array_len ) || + ( (ce_array_len - entries_cleared) <= cache_ptr->pl_len ) ); + +#if H5C_DO_SANITY_CHECKS + i = 0; + entry_ptr = cache_ptr->pl_head_ptr; + while ( entry_ptr != NULL ) + { + if ( entry_ptr->clear_on_unprotect ) { + + i++; + } + entry_ptr = entry_ptr->next; + } + HDassert( (entries_cleared + i) == ce_array_len ); +#endif /* H5C_DO_SANITY_CHECKS */ + +done: + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "an extreme sanity check failed on exit.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_mark_entries_as_clean() */ +#endif /* H5_HAVE_PARALLEL */ + diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h index 988dfff..6d75222 100644 --- a/src/H5Cpkg.h +++ b/src/H5Cpkg.h @@ -3769,7 +3769,9 @@ struct H5C_t { /******************************/ /* Package Private Prototypes */ /******************************/ - +H5_DLL herr_t H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, + haddr_t addr, unsigned flags, hbool_t del_entry_from_slist_on_destroy, + int64_t *entry_size_change_ptr); #endif /* _H5Cpkg_H */ diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index a9ffb70..2dc611d 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -1883,8 +1883,6 @@ H5_DLL FILE *H5C_get_trace_file_ptr(const H5C_t *cache_ptr); H5_DLL FILE *H5C_get_trace_file_ptr_from_entry(const H5C_cache_entry_t *entry_ptr); H5_DLL herr_t H5C_insert_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type, haddr_t addr, void *thing, unsigned int flags); -H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t *f, hid_t dxpl_id, int32_t ce_array_len, - haddr_t *ce_array_ptr); H5_DLL herr_t H5C_mark_entry_dirty(void *thing); H5_DLL herr_t H5C_move_entry(H5C_t *cache_ptr, const H5C_class_t *type, haddr_t old_addr, haddr_t new_addr); @@ -1918,6 +1916,8 @@ H5_DLL herr_t H5C_apply_candidate_list(H5F_t *f, hid_t dxpl_id, int mpi_rank, int mpi_size); H5_DLL herr_t H5C_construct_candidate_list__clean_cache(H5C_t *cache_ptr); H5_DLL herr_t H5C_construct_candidate_list__min_clean(H5C_t *cache_ptr); +H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t *f, hid_t dxpl_id, int32_t ce_array_len, + haddr_t *ce_array_ptr); #endif /* H5_HAVE_PARALLEL */ #ifndef NDEBUG /* debugging functions */ diff --git a/src/Makefile.am b/src/Makefile.am index 2df095e..b6c4dad 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -42,9 +42,11 @@ DISTCLEANFILES=H5pubconf.h # library sources libhdf5_la_SOURCES= H5.c H5checksum.c H5dbg.c H5system.c H5timer.c H5trace.c \ H5A.c H5Abtree2.c H5Adense.c H5Adeprec.c H5Aint.c H5Atest.c \ - H5AC.c H5B.c H5Bcache.c H5Bdbg.c \ + H5AC.c H5ACmpio.c \ + H5B.c H5Bcache.c H5Bdbg.c \ H5B2.c H5B2cache.c H5B2dbg.c H5B2hdr.c H5B2int.c H5B2stat.c H5B2test.c \ - H5C.c H5CS.c \ + H5C.c H5Cmpio.c \ + H5CS.c \ H5D.c H5Dbtree.c H5Dchunk.c H5Dcompact.c H5Dcontig.c H5Ddbg.c \ H5Ddeprec.c H5Defl.c H5Dfill.c H5Dint.c \ H5Dio.c H5Dlayout.c \ diff --git a/src/Makefile.in b/src/Makefile.in index d795677..5ea0a3e 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -150,9 +150,9 @@ LTLIBRARIES = $(lib_LTLIBRARIES) libhdf5_la_LIBADD = am_libhdf5_la_OBJECTS = H5.lo H5checksum.lo H5dbg.lo H5system.lo \ H5timer.lo H5trace.lo H5A.lo H5Abtree2.lo H5Adense.lo \ - H5Adeprec.lo H5Aint.lo H5Atest.lo H5AC.lo H5B.lo H5Bcache.lo \ + H5Adeprec.lo H5Aint.lo H5Atest.lo H5AC.lo H5ACmpio.lo H5B.lo H5Bcache.lo \ H5Bdbg.lo H5B2.lo H5B2cache.lo H5B2dbg.lo H5B2hdr.lo \ - H5B2int.lo H5B2stat.lo H5B2test.lo H5C.lo H5CS.lo H5D.lo \ + H5B2int.lo H5B2stat.lo H5B2test.lo H5C.lo H5Cmpio.lo H5CS.lo H5D.lo \ H5Dbtree.lo H5Dchunk.lo H5Dcompact.lo H5Dcontig.lo H5Ddbg.lo \ H5Ddeprec.lo H5Defl.lo H5Dfill.lo H5Dint.lo H5Dio.lo \ H5Dlayout.lo H5Dmpio.lo H5Doh.lo H5Dscatgath.lo H5Dselect.lo \ @@ -740,9 +740,9 @@ DISTCLEANFILES = H5pubconf.h # library sources libhdf5_la_SOURCES = H5.c H5checksum.c H5dbg.c H5system.c H5timer.c H5trace.c \ H5A.c H5Abtree2.c H5Adense.c H5Adeprec.c H5Aint.c H5Atest.c \ - H5AC.c H5B.c H5Bcache.c H5Bdbg.c \ + H5AC.c H5ACmpio.c H5B.c H5Bcache.c H5Bdbg.c \ H5B2.c H5B2cache.c H5B2dbg.c H5B2hdr.c H5B2int.c H5B2stat.c H5B2test.c \ - H5C.c H5CS.c \ + H5C.c H5Cmpio.c H5CS.c \ H5D.c H5Dbtree.c H5Dchunk.c H5Dcompact.c H5Dcontig.c H5Ddbg.c \ H5Ddeprec.c H5Defl.c H5Dfill.c H5Dint.c \ H5Dio.c H5Dlayout.c \ @@ -963,6 +963,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5A.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5AC.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5ACmpio.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Abtree2.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Adense.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Adeprec.Plo@am__quote@ @@ -979,6 +980,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Bcache.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Bdbg.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5C.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Cmpio.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5CS.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5D.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/H5Dbtree.Plo@am__quote@ |