diff options
Diffstat (limited to 'src/H5ACmpio.c')
-rw-r--r-- | src/H5ACmpio.c | 356 |
1 files changed, 181 insertions, 175 deletions
diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c index d03c17b..f097e83 100644 --- a/src/H5ACmpio.c +++ b/src/H5ACmpio.c @@ -37,6 +37,7 @@ #include "H5private.h" /* Generic Functions */ #include "H5ACpkg.h" /* Metadata cache */ #include "H5Cprivate.h" /* Cache */ +#include "H5CXprivate.h" /* API Contexts */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ #include "H5MMprivate.h" /* Memory management */ @@ -94,20 +95,19 @@ static herr_t H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t *aux_ptr, int sync_point_op); static herr_t H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, unsigned *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); -static herr_t H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, - hid_t dxpl_id); +static herr_t H5AC__propagate_and_apply_candidate_list(H5F_t *f); +static herr_t H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f); static herr_t H5AC__receive_haddr_list(MPI_Comm mpi_comm, unsigned *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); static herr_t H5AC__receive_candidate_list(const H5AC_t *cache_ptr, unsigned *num_entries_ptr, haddr_t **haddr_buf_ptr_ptr); -static herr_t H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id); +static herr_t H5AC__receive_and_apply_clean_list(H5F_t *f); static herr_t H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, unsigned num_candidates, haddr_t *candidates_list_ptr); -static herr_t H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id); -static herr_t H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id); +static herr_t H5AC__rsp__dist_md_write__flush(H5F_t *f); +static herr_t H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f); +static herr_t H5AC__rsp__p0_only__flush(H5F_t *f); +static herr_t H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f); /*********************/ @@ -135,7 +135,7 @@ H5FL_DEFINE_STATIC(H5AC_slist_entry_t); /*------------------------------------------------------------------------- * Function: H5AC__set_sync_point_done_callback * - * Purpose: Set the value of the sync_point_done callback. This + * Purpose: Set the value of the sync_point_done callback. This * callback is used by the parallel test code to verify * that the expected writes and only the expected writes * take place during a sync point. @@ -204,7 +204,7 @@ H5AC__set_write_done_callback(H5C_t * cache_ptr, void (* write_done)(void)) * Function: H5AC_add_candidate() * * Purpose: Add the supplied metadata entry address to the candidate - * list. Verify that each entry added does not appear in + * list. Verify that each entry added does not appear in * the list prior to its insertion. * * This function is intended for used in constructing list @@ -261,11 +261,11 @@ done: * * Purpose: Broadcast the contents of the process 0 candidate entry * slist. In passing, also remove all entries from said - * list. As the application of this will be handled by - * the same functions on all processes, construct and + * list. As the application of this will be handled by + * the same functions on all processes, construct and * return a copy of the list in the same format as that * received by the other processes. Note that if this - * copy is returned in *haddr_buf_ptr_ptr, the caller + * copy is returned in *haddr_buf_ptr_ptr, the caller * must free it. * * This function must only be called by the process with @@ -317,7 +317,7 @@ H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, unsigned *num_entries_ptr, unsigned chk_num_entries = 0; /* convert the candidate list into the format we - * are used to receiving from process 0, and also load it + * are used to receiving from process 0, and also load it * into a buffer for transmission. */ if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0) @@ -331,7 +331,7 @@ H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, unsigned *num_entries_ptr, HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) } /* end if */ - /* Pass the number of entries and the buffer pointer + /* Pass the number of entries and the buffer pointer * back to the caller. Do this so that we can use the same code * to apply the candidate list to all the processes. */ @@ -481,10 +481,10 @@ done: /*------------------------------------------------------------------------- * Function: H5AC__construct_candidate_list() * - * Purpose: In the parallel case when the metadata_write_strategy is + * Purpose: In the parallel case when the metadata_write_strategy is * H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, process 0 uses - * this function to construct the list of cache entries to - * be flushed. This list is then propagated to the other + * this function to construct the list of cache entries to + * be flushed. This list is then propagated to the other * caches, and then flushed in a distributed fashion. * * The sync_point_op parameter is used to determine the extent @@ -498,7 +498,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t *aux_ptr, +H5AC__construct_candidate_list(H5AC_t *cache_ptr, H5AC_aux_t H5_ATTR_NDEBUG_UNUSED *aux_ptr, int sync_point_op) { herr_t ret_value = SUCCEED; /* Return value */ @@ -581,22 +581,22 @@ H5AC__copy_candidate_list_to_buffer_cb(void *_item, void H5_ATTR_UNUSED *_key, * Function: H5AC__copy_candidate_list_to_buffer * * Purpose: Allocate buffer(s) and copy the contents of the candidate - * entry slist into it (them). In passing, remove all - * entries from the candidate slist. Note that the + * entry slist into it (them). In passing, remove all + * entries from the candidate slist. Note that the * candidate slist must not be empty. * * If MPI_Offset_buf_ptr_ptr is not NULL, allocate a buffer * of MPI_Offset, copy the contents of the candidate - * entry list into it with the appropriate conversions, - * and return the base address of the buffer in + * entry list into it with the appropriate conversions, + * and return the base address of the buffer in * *MPI_Offset_buf_ptr. Note that this is the buffer - * used by process 0 to transmit the list of entries to + * used by process 0 to transmit the list of entries to * be flushed to all other processes (in this file group). * * Similarly, allocate a buffer of haddr_t, load the contents - * of the candidate list into this buffer, and return its - * base address in *haddr_buf_ptr_ptr. Note that this - * latter buffer is constructed unconditionally. + * of the candidate list into this buffer, and return its + * base address in *haddr_buf_ptr_ptr. Note that this + * latter buffer is constructed unconditionally. * * In passing, also remove all entries from the candidate * entry slist. @@ -635,8 +635,8 @@ H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, unsigned *num_entri num_entries = (unsigned)H5SL_count(aux_ptr->candidate_slist_ptr); - /* allocate a buffer(s) to store the list of candidate entry - * base addresses in + /* allocate a buffer(s) to store the list of candidate entry + * base addresses in */ buf_size = sizeof(haddr_t) * num_entries; if(NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) @@ -651,7 +651,7 @@ H5AC__copy_candidate_list_to_buffer(const H5AC_t *cache_ptr, unsigned *num_entri if(H5SL_free(aux_ptr->candidate_slist_ptr, H5AC__copy_candidate_list_to_buffer_cb, &udata) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for candidate entries") - /* Pass the number of entries and the buffer pointer + /* Pass the number of entries and the buffer pointer * back to the caller. */ *num_entries_ptr = num_entries; @@ -821,9 +821,8 @@ H5AC__log_cleaned_entry(const H5AC_info_t *entry_ptr) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; - herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_PACKAGE + FUNC_ENTER_PACKAGE_NOERR /* Sanity check */ HDassert(entry_ptr); @@ -853,8 +852,7 @@ H5AC__log_cleaned_entry(const H5AC_info_t *entry_ptr) /* Decrement the dirty byte count */ aux_ptr->dirty_bytes -= entry_ptr->size; -done: - FUNC_LEAVE_NOAPI(ret_value) + FUNC_LEAVE_NOAPI(SUCCEED) } /* H5AC__log_cleaned_entry() */ @@ -933,7 +931,7 @@ done: * * Purpose: Update the dirty_bytes count for a newly inserted entry. * - * If mpi_rank isnt 0, this simply means adding the size + * If mpi_rank isn't 0, this simply means adding the size * of the entry to the dirty_bytes count. * * If mpi_rank is 0, we must also add the entry to the @@ -1133,32 +1131,32 @@ done: /*------------------------------------------------------------------------- * Function: H5AC__propagate_and_apply_candidate_list * - * Purpose: Prior to the addition of support for multiple metadata - * write strategies, in PHDF5, only the metadata cache with - * mpi rank 0 was allowed to write to file. All other - * metadata caches on processes with rank greater than 0 - * were required to retain dirty entries until they were + * Purpose: Prior to the addition of support for multiple metadata + * write strategies, in PHDF5, only the metadata cache with + * mpi rank 0 was allowed to write to file. All other + * metadata caches on processes with rank greater than 0 + * were required to retain dirty entries until they were * notified that the entry was clean. * - * This constraint is relaxed with the distributed + * This constraint is relaxed with the distributed * metadata write strategy, in which a list of candidate * metadata cache entries is constructed by the process 0 * cache and then distributed to the caches of all the other - * processes. Once the listed is distributed, many (if not - * all) processes writing writing a unique subset of the - * entries, and marking the remainder clean. The subsets - * are chosen so that each entry in the list of candidates - * is written by exactly one cache, and all entries are + * processes. Once the listed is distributed, many (if not + * all) processes writing writing a unique subset of the + * entries, and marking the remainder clean. The subsets + * are chosen so that each entry in the list of candidates + * is written by exactly one cache, and all entries are * marked as being clean in all caches. * - * While the list of candidate cache entries is prepared + * While the list of candidate cache entries is prepared * elsewhere, this function is the main routine for distributing - * and applying the list. It must be run simultaniously on + * and applying the list. It must be run simultaniously on * all processes that have the relevant file open. To ensure - * proper synchronization, there is a barrier at the beginning + * proper synchronization, there is a barrier at the beginning * of this function. * - * At present, this function is called under one of two + * At present, this function is called under one of two * circumstances: * * 1) Dirty byte creation exceeds some user specified value. @@ -1169,10 +1167,10 @@ done: * and therefore the same dirty data creation. * * This fact is used to synchronize the caches for purposes - * of propagating the list of candidate entries, by simply - * calling this function from all caches whenever some user - * specified threshold on dirty data is exceeded. (the - * process 0 cache creates the candidate list just before + * of propagating the list of candidate entries, by simply + * calling this function from all caches whenever some user + * specified threshold on dirty data is exceeded. (the + * process 0 cache creates the candidate list just before * calling this function). * * 2) Under direct user control -- this operation must be @@ -1187,20 +1185,20 @@ done: * * For the process with mpi rank 0: * - * 1) Load the contents of the candidate list + * 1) Load the contents of the candidate list * (candidate_slist_ptr) into a buffer, and broadcast that * buffer to all the other caches. Clear the candidate * list in passing. * - * If there is a positive number of candidates, proceed with + * If there is a positive number of candidates, proceed with * the following: * * 2) Apply the candidate entry list. * * 3) Particpate in a closing barrier. * - * 4) Remove from the dirty list (d_slist_ptr) and from the - * flushed and still clean entries list (c_slist_ptr), + * 4) Remove from the dirty list (d_slist_ptr) and from the + * flushed and still clean entries list (c_slist_ptr), * all addresses that appeared in the candidate list, as * these entries are now clean. * @@ -1209,7 +1207,7 @@ done: * * 1) Receive the candidate entry list broadcast * - * If there is a positive number of candidates, proceed with + * If there is a positive number of candidates, proceed with * the following: * * 2) Apply the candidate entry list. @@ -1226,7 +1224,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id) +H5AC__propagate_and_apply_candidate_list(H5F_t *f) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -1266,8 +1264,8 @@ H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id) if(num_candidates > 0) { herr_t result; - /* all processes apply the candidate list. - * H5C_apply_candidate_list() handles the details of + /* all processes apply the candidate list. + * H5C_apply_candidate_list() handles the details of * distributing the writes across the processes. */ @@ -1275,7 +1273,7 @@ H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id) aux_ptr->write_permitted = TRUE; /* Apply the candidate list */ - result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_candidates, + result = H5C_apply_candidate_list(f, cache_ptr, num_candidates, candidates_list_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size); /* Disable writes again */ @@ -1286,7 +1284,7 @@ H5AC__propagate_and_apply_candidate_list(H5F_t *f, hid_t dxpl_id) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") /* this code exists primarily for the test bed -- it allows us to - * enforce posix semantics on the server that pretends to be a + * enforce posix semantics on the server that pretends to be a * file system in our parallel tests. */ if(aux_ptr->write_done) @@ -1325,22 +1323,22 @@ done: * Function: H5AC__propagate_flushed_and_still_clean_entries_list * * Purpose: In PHDF5, if the process 0 only metadata write strategy - * is selected, only the metadata cache with mpi rank 0 is - * allowed to write to file. All other metadata caches on - * processes with rank greater than 0 must retain dirty - * entries until they are notified that the entry is now + * is selected, only the metadata cache with mpi rank 0 is + * allowed to write to file. All other metadata caches on + * processes with rank greater than 0 must retain dirty + * entries until they are notified that the entry is now * clean. * - * This function is the main routine for handling this - * notification proceedure. It must be called - * simultaniously on all processes that have the relevant - * file open. To this end, it is called only during a + * This function is the main routine for handling this + * notification procedure. It must be called + * simultaniously on all processes that have the relevant + * file open. To this end, it is called only during a * sync point, with a barrier prior to the call. * * Note that any metadata entry writes by process 0 will * occur after the barrier and just before this call. * - * Typicaly, calls to this function will be triggered in + * Typically, calls to this function will be triggered in * one of two ways: * * 1) Dirty byte creation exceeds some user specified value. @@ -1393,7 +1391,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, hid_t dxpl_id) +H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -1415,10 +1413,9 @@ H5AC__propagate_flushed_and_still_clean_entries_list(H5F_t *f, hid_t dxpl_id) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't broadcast clean slist.") HDassert(H5SL_count(aux_ptr->c_slist_ptr) == 0); } /* end if */ - else { - if(H5AC__receive_and_apply_clean_list(f, dxpl_id) < 0) + else + if(H5AC__receive_and_apply_clean_list(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't receive and/or process clean slist broadcast.") - } /* end else */ done: FUNC_LEAVE_NOAPI(ret_value) @@ -1427,11 +1424,11 @@ done: /*------------------------------------------------------------------------- * - * Function: H5AC_receive_haddr_list() + * Function: H5AC__receive_haddr_list() * * Purpose: Receive the list of entry addresses from process 0, * and return it in a buffer pointed to by *haddr_buf_ptr_ptr. - * Note that the caller must free this buffer if it is + * Note that the caller must free this buffer if it is * returned. * * This function must only be called by the process with @@ -1482,7 +1479,7 @@ H5AC__receive_haddr_list(MPI_Comm mpi_comm, unsigned *num_entries_ptr, HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_result) } /* end if */ - /* finally, pass the number of entries and the buffer pointer + /* finally, pass the number of entries and the buffer pointer * back to the caller. */ *num_entries_ptr = num_entries; @@ -1494,7 +1491,7 @@ done: haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC_receive_haddr_list() */ +} /* H5AC__receive_haddr_list() */ /*------------------------------------------------------------------------- @@ -1516,7 +1513,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id) +H5AC__receive_and_apply_clean_list(H5F_t *f) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -1541,7 +1538,7 @@ H5AC__receive_and_apply_clean_list(H5F_t *f, hid_t dxpl_id) if(num_entries > 0) /* mark the indicated entries as clean */ - if(H5C_mark_entries_as_clean(f, dxpl_id, num_entries, haddr_buf_ptr) < 0) + if(H5C_mark_entries_as_clean(f, num_entries, haddr_buf_ptr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.") /* if it is defined, call the sync point done callback. Note @@ -1565,7 +1562,7 @@ done: * * Purpose: Receive the list of candidate entries from process 0, * and return it in a buffer pointed to by *haddr_buf_ptr_ptr. - * Note that the caller must free this buffer if it is + * Note that the caller must free this buffer if it is * returned. * * This function must only be called by the process with @@ -1615,38 +1612,38 @@ done: * Purpose: Routine for handling the details of running a sync point * that is triggered by a flush -- which in turn must have been * triggered by either a flush API call or a file close -- - * when the distributed metadata write strategy is selected. - * - * Upon entry, each process generates it own candidate list, - * being a sorted list of all dirty metadata entries currently - * in the metadata cache. Note that this list must be idendical - * across all processes, as all processes see the same stream - * of dirty metadata coming in, and use the same lists of - * candidate entries at each sync point. (At first glance, this + * when the distributed metadata write strategy is selected. + * + * Upon entry, each process generates it own candidate list, + * being a sorted list of all dirty metadata entries currently + * in the metadata cache. Note that this list must be idendical + * across all processes, as all processes see the same stream + * of dirty metadata coming in, and use the same lists of + * candidate entries at each sync point. (At first glance, this * argument sounds circular, but think of it in the sense of * a recursive proof). * - * If this this list is empty, we are done, and the function + * If this this list is empty, we are done, and the function * returns * - * Otherwise, after the sorted list dirty metadata entries is - * constructed, each process uses the same algorithm to assign - * each entry on the candidate list to exactly one process for + * Otherwise, after the sorted list dirty metadata entries is + * constructed, each process uses the same algorithm to assign + * each entry on the candidate list to exactly one process for * flushing. * * At this point, all processes participate in a barrier to * avoid messages from the past/future bugs. * - * Each process then flushes the entries assigned to it, and + * Each process then flushes the entries assigned to it, and * marks all other entries on the candidate list as clean. * - * Finally, all processes participate in a second barrier to + * Finally, all processes participate in a second barrier to * avoid messages from the past/future bugs. * * At the end of this process, process 0 and only process 0 - * must tidy up its lists of dirtied and cleaned entries. + * must tidy up its lists of dirtied and cleaned entries. * These lists are not used in the distributed metadata write - * strategy, but they must be maintained should we shift + * strategy, but they must be maintained should we shift * to a strategy that uses them. * * Return: Success: non-negative @@ -1659,7 +1656,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id) +H5AC__rsp__dist_md_write__flush(H5F_t *f) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -1679,7 +1676,7 @@ H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id) HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED); - /* first construct the candidate list -- initially, this will be in the + /* first construct the candidate list -- initially, this will be in the * form of a skip list. We will convert it later. */ if(H5C_construct_candidate_list__clean_cache(cache_ptr) < 0) @@ -1694,15 +1691,20 @@ H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id) if(H5AC__copy_candidate_list_to_buffer(cache_ptr, &num_entries, &haddr_buf_ptr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") - /* initial sync point barrier */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + /* Initial sync point barrier + * + * When flushing from within the close operation from a file, + * it's possible to skip this barrier (on the second flush of the cache). + */ + if(!H5CX_get_mpi_file_flushing()) + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) /* Enable writes during this operation */ aux_ptr->write_permitted = TRUE; /* Apply the candidate list */ - result = H5C_apply_candidate_list(f, dxpl_id, cache_ptr, num_entries, + result = H5C_apply_candidate_list(f, cache_ptr, num_entries, haddr_buf_ptr, aux_ptr->mpi_rank, aux_ptr->mpi_size); /* Disable writes again */ @@ -1713,7 +1715,7 @@ H5AC__rsp__dist_md_write__flush(H5F_t *f, hid_t dxpl_id) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't apply candidate list.") /* this code exists primarily for the test bed -- it allows us to - * enforce posix semantics on the server that pretends to be a + * enforce posix semantics on the server that pretends to be a * file system in our parallel tests. */ if(aux_ptr->write_done) @@ -1750,45 +1752,45 @@ done: * Function: H5AC__rsp__dist_md_write__flush_to_min_clean * * Purpose: Routine for handling the details of running a sync point - * triggered by the accumulation of dirty metadata (as + * triggered by the accumulation of dirty metadata (as * opposed to a flush call to the API) when the distributed * metadata write strategy is selected. * * After invocation and initial sanity checking this function - * first checks to see if evictions are enabled -- if they + * first checks to see if evictions are enabled -- if they * are not, the function does nothing and returns. * - * Otherwise, process zero constructs a list of entries to + * Otherwise, process zero constructs a list of entries to * be flushed in order to bring the process zero cache back - * within its min clean requirement. Note that this list + * within its min clean requirement. Note that this list * (the candidate list) may be empty. * * Then, all processes participate in a barrier. * - * After the barrier, process 0 broadcasts the number of - * entries in the candidate list prepared above, and all + * After the barrier, process 0 broadcasts the number of + * entries in the candidate list prepared above, and all * other processes receive this number. * * If this number is zero, we are done, and the function * returns without further action. * - * Otherwise, process 0 broadcasts the sorted list of + * Otherwise, process 0 broadcasts the sorted list of * candidate entries, and all other processes receive it. * - * Then, each process uses the same algorithm to assign - * each entry on the candidate list to exactly one process + * Then, each process uses the same algorithm to assign + * each entry on the candidate list to exactly one process * for flushing. * - * Each process then flushes the entries assigned to it, and + * Each process then flushes the entries assigned to it, and * marks all other entries on the candidate list as clean. * - * Finally, all processes participate in a second barrier to + * Finally, all processes participate in a second barrier to * avoid messages from the past/future bugs. * * At the end of this process, process 0 and only process 0 - * must tidy up its lists of dirtied and cleaned entries. + * must tidy up its lists of dirtied and cleaned entries. * These lists are not used in the distributed metadata write - * strategy, but they must be maintained should we shift + * strategy, but they must be maintained should we shift * to a strategy that uses them. * * Return: Success: non-negative @@ -1801,7 +1803,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) +H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -1830,7 +1832,7 @@ H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") /* propagate and apply candidate list -- all processes */ - if(H5AC__propagate_and_apply_candidate_list(f, dxpl_id) < 0) + if(H5AC__propagate_and_apply_candidate_list(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate and apply candidate list.") } /* evictions enabled */ @@ -1844,25 +1846,25 @@ done: * * Purpose: Routine for handling the details of running a sync point * that is triggered a flush -- which in turn must have been - * triggered by either a flush API call or a file close -- - * when the process 0 only metadata write strategy is selected. + * triggered by either a flush API call or a file close -- + * when the process 0 only metadata write strategy is selected. * * First, all processes participate in a barrier. * * Then process zero flushes all dirty entries, and broadcasts - * they number of clean entries (if any) to all the other + * they number of clean entries (if any) to all the other * caches. * * If this number is zero, we are done. * - * Otherwise, process 0 broadcasts the list of cleaned + * Otherwise, process 0 broadcasts the list of cleaned * entries, and all other processes which are part of this * file group receive it, and mark the listed entries as * clean in their caches. * - * Since all processes have the same set of dirty + * Since all processes have the same set of dirty * entries at the beginning of the sync point, and all - * entries that will be written are written before + * entries that will be written are written before * process zero broadcasts the number of cleaned entries, * there is no need for a closing barrier. * @@ -1876,7 +1878,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id) +H5AC__rsp__p0_only__flush(H5F_t *f) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -1894,12 +1896,16 @@ H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id) HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); HDassert(aux_ptr->metadata_write_strategy == H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY); - /* to prevent "messages from the future" we must - * synchronize all processes before we start the flush. + /* To prevent "messages from the future" we must + * synchronize all processes before we start the flush. * Hence the following barrier. + * + * However, when flushing from within the close operation from a file, + * it's possible to skip this barrier (on the second flush of the cache). */ - if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + if(!H5CX_get_mpi_file_flushing()) + if(MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) /* Flush data to disk, from rank 0 process */ if(aux_ptr->mpi_rank == 0) { @@ -1909,7 +1915,7 @@ H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id) aux_ptr->write_permitted = TRUE; /* Flush the cache */ - result = H5C_flush_cache(f, dxpl_id, H5AC__NO_FLAGS_SET); + result = H5C_flush_cache(f, H5AC__NO_FLAGS_SET); /* Disable writes again */ aux_ptr->write_permitted = FALSE; @@ -1919,7 +1925,7 @@ H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") /* this code exists primarily for the test bed -- it allows us to - * enforce posix semantics on the server that pretends to be a + * enforce POSIX semantics on the server that pretends to be a * file system in our parallel tests. */ if(aux_ptr->write_done) @@ -1927,7 +1933,7 @@ H5AC__rsp__p0_only__flush(H5F_t *f, hid_t dxpl_id) } /* end if */ /* Propagate cleaned entries to other ranks. */ - if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0) + if(H5AC__propagate_flushed_and_still_clean_entries_list(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") done: @@ -1939,32 +1945,32 @@ done: * Function: H5AC__rsp__p0_only__flush_to_min_clean * * Purpose: Routine for handling the details of running a sync point - * triggered by the accumulation of dirty metadata (as + * triggered by the accumulation of dirty metadata (as * opposed to a flush call to the API) when the process 0 * only metadata write strategy is selected. * * After invocation and initial sanity checking this function - * first checks to see if evictions are enabled -- if they + * first checks to see if evictions are enabled -- if they * are not, the function does nothing and returns. * * Otherwise, all processes participate in a barrier. * - * After the barrier, if this is process 0, the function - * causes the cache to flush sufficient entries to get the - * cache back within its minimum clean fraction, and broadcast - * the number of entries which have been flushed since + * After the barrier, if this is process 0, the function + * causes the cache to flush sufficient entries to get the + * cache back within its minimum clean fraction, and broadcast + * the number of entries which have been flushed since * the last sync point, and are still clean. * * If this number is zero, we are done. * - * Otherwise, process 0 broadcasts the list of cleaned + * Otherwise, process 0 broadcasts the list of cleaned * entries, and all other processes which are part of this * file group receive it, and mark the listed entries as * clean in their caches. * - * Since all processes have the same set of dirty + * Since all processes have the same set of dirty * entries at the beginning of the sync point, and all - * entries that will be written are written before + * entries that will be written are written before * process zero broadcasts the number of cleaned entries, * there is no need for a closing barrier. * @@ -1978,7 +1984,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) +H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -2020,9 +2026,9 @@ H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) if(0 == aux_ptr->mpi_rank) { herr_t result; - /* here, process 0 flushes as many entries as necessary to + /* here, process 0 flushes as many entries as necessary to * comply with the currently specified min clean size. - * Note that it is quite possible that no entries will be + * Note that it is quite possible that no entries will be * flushed. */ @@ -2030,7 +2036,7 @@ H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) aux_ptr->write_permitted = TRUE; /* Flush the cache */ - result = H5C_flush_to_min_clean(f, dxpl_id); + result = H5C_flush_to_min_clean(f); /* Disable writes again */ aux_ptr->write_permitted = FALSE; @@ -2047,7 +2053,7 @@ H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f, hid_t dxpl_id) (aux_ptr->write_done)(); } /* end if */ - if(H5AC__propagate_flushed_and_still_clean_entries_list(f, dxpl_id) < 0) + if(H5AC__propagate_flushed_and_still_clean_entries_list(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't propagate clean entries list.") } /* end if */ @@ -2060,23 +2066,23 @@ done: * Function: H5AC__run_sync_point * * Purpose: Top level routine for managing a sync point between all - * meta data caches in the parallel case. Since all caches + * meta data caches in the parallel case. Since all caches * see the same sequence of dirty metadata, we simply count * bytes of dirty metadata, and run a sync point whenever the * number of dirty bytes of metadata seen since the last * sync point exceeds a threshold that is common across all - * processes. We also run sync points in response to + * processes. We also run sync points in response to * HDF5 API calls triggering either a flush or a file close. * - * In earlier versions of PHDF5, only the metadata cache with - * mpi rank 0 was allowed to write to file. All other + * In earlier versions of PHDF5, only the metadata cache with + * mpi rank 0 was allowed to write to file. All other * metadata caches on processes with rank greater than 0 were - * required to retain dirty entries until they were notified + * required to retain dirty entries until they were notified * that the entry is was clean. * - * This function was created to make it easier for us to - * experiment with other options, as it is a single point - * for the execution of sync points. + * This function was created to make it easier for us to + * experiment with other options, as it is a single point + * for the execution of sync points. * * Return: Success: non-negative * @@ -2088,7 +2094,7 @@ done: *------------------------------------------------------------------------- */ herr_t -H5AC__run_sync_point(H5F_t *f, hid_t dxpl_id, int sync_point_op) +H5AC__run_sync_point(H5F_t *f, int sync_point_op) { H5AC_t * cache_ptr; H5AC_aux_t * aux_ptr; @@ -2120,7 +2126,7 @@ HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu /* clear collective access flag on half of the entries in the cache and mark them as independent in case they need to be - evicted later. All ranks are guranteed to mark the same entries + evicted later. All ranks are guaranteed to mark the same entries since we don't modify the order of the collectively accessed entries except through collective access. */ if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0) @@ -2130,12 +2136,12 @@ HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: switch(sync_point_op) { case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: - if(H5AC__rsp__p0_only__flush_to_min_clean(f, dxpl_id) < 0) + if(H5AC__rsp__p0_only__flush_to_min_clean(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush_to_min_clean() failed.") break; case H5AC_SYNC_POINT_OP__FLUSH_CACHE: - if(H5AC__rsp__p0_only__flush(f, dxpl_id) < 0) + if(H5AC__rsp__p0_only__flush(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__p0_only__flush() failed.") break; @@ -2148,12 +2154,12 @@ HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu case H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED: switch(sync_point_op) { case H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN: - if(H5AC__rsp__dist_md_write__flush_to_min_clean(f, dxpl_id) < 0) + if(H5AC__rsp__dist_md_write__flush_to_min_clean(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush_to_min_clean() failed.") break; case H5AC_SYNC_POINT_OP__FLUSH_CACHE: - if(H5AC__rsp__dist_md_write__flush(f, dxpl_id) < 0) + if(H5AC__rsp__dist_md_write__flush(f) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5AC__rsp__dist_md_write__flush() failed.") break; @@ -2191,13 +2197,13 @@ done: * * Purpose: In the distributed metadata write strategy, not all dirty * entries are written by process 0 -- thus we must tidy - * up the dirtied, and flushed and still clean lists + * up the dirtied, and flushed and still clean lists * maintained by process zero after each sync point. * * This procedure exists to tend to this issue. * * At this point, all entries that process 0 cleared should - * have been removed from both the dirty and flushed and + * have been removed from both the dirty and flushed and * still clean lists, and entries that process 0 has flushed * should have been removed from the dirtied list and added * to the flushed and still clean list. @@ -2208,7 +2214,7 @@ done: * them to be used should the metadata write strategy change * to one that uses these lists. * - * Thus for our purposes, all we need to do is remove from + * Thus for our purposes, all we need to do is remove from * the dirtied and flushed and still clean lists all * references to entries that appear in the candidate list. * @@ -2240,11 +2246,11 @@ H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, unsigned num_candidates, HDassert(num_candidates > 0); HDassert(candidates_list_ptr != NULL); - /* clean up dirtied and flushed and still clean lists by removing - * all entries on the candidate list. Cleared entries should - * have been removed from both the dirty and cleaned lists at - * this point, flushed entries should have been added to the - * cleaned list. However, for this metadata write strategy, + /* clean up dirtied and flushed and still clean lists by removing + * all entries on the candidate list. Cleared entries should + * have been removed from both the dirty and cleaned lists at + * this point, flushed entries should have been added to the + * cleaned list. However, for this metadata write strategy, * we just want to remove all references to the candidate entries. */ for(u = 0; u < num_candidates; u++) { @@ -2254,7 +2260,7 @@ H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, unsigned num_candidates, addr = candidates_list_ptr[u]; - /* addr may be either on the dirtied list, or on the flushed + /* addr may be either on the dirtied list, or on the flushed * and still clean list. Remove it. */ if(NULL != (d_slist_entry_ptr = (H5AC_slist_entry_t *)H5SL_remove(aux_ptr->d_slist_ptr, (void *)&addr))) @@ -2284,7 +2290,7 @@ H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, unsigned num_candidates, *------------------------------------------------------------------------- */ herr_t -H5AC__flush_entries(H5F_t *f, hid_t dxpl_id) +H5AC__flush_entries(H5F_t *f) { herr_t ret_value = SUCCEED; /* Return value */ @@ -2296,7 +2302,7 @@ H5AC__flush_entries(H5F_t *f, hid_t dxpl_id) /* Check if we have >1 ranks */ if(H5C_get_aux_ptr(f->shared->cache)) - if(H5AC__run_sync_point(f, dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0) + if(H5AC__run_sync_point(f, H5AC_SYNC_POINT_OP__FLUSH_CACHE) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.") done: |