diff options
Diffstat (limited to 'src/H5Cmpio.c')
-rw-r--r-- | src/H5Cmpio.c | 412 |
1 files changed, 214 insertions, 198 deletions
diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c index abcd3c0..66c6601 100644 --- a/src/H5Cmpio.c +++ b/src/H5Cmpio.c @@ -6,7 +6,7 @@ * This file is part of HDF5. The full HDF5 copyright notice, including * * terms governing use, modification, and redistribution, is contained in * * the COPYING file, which can be found at the root of the source code * - * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. * + * distribution tree, or in https://www.hdfgroup.org/licenses. * * If you do not have access to either file, you may request a copy from * * help@hdfgroup.org. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ @@ -18,7 +18,7 @@ * Quincey Koziol * * Purpose: Functions in this file implement support for parallel I/O for - * generic cache code. + * generic cache code. * *------------------------------------------------------------------------- */ @@ -28,19 +28,19 @@ /****************/ #include "H5Cmodule.h" /* This source code file is part of the H5C module */ -#define H5F_FRIEND /*suppress error about including H5Fpkg */ +#define H5F_FRIEND /*suppress error about including H5Fpkg */ /***********/ /* Headers */ /***********/ -#include "H5private.h" /* Generic Functions */ +#include "H5private.h" /* Generic Functions */ #include "H5ACprivate.h" /* Metadata cache */ -#include "H5Cpkg.h" /* Cache */ +#include "H5Cpkg.h" /* Cache */ #include "H5CXprivate.h" /* API Contexts */ -#include "H5Eprivate.h" /* Error handling */ -#include "H5Fpkg.h" /* Files */ -#include "H5FDprivate.h" /* File drivers */ -#include "H5MMprivate.h" /* Memory management */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fpkg.h" /* Files */ +#include "H5FDprivate.h" /* File drivers */ +#include "H5MMprivate.h" /* Memory management */ #ifdef H5_HAVE_PARALLEL /****************/ @@ -78,74 +78,74 @@ static herr_t H5C__flush_candidates_in_ring(H5F_t *f, H5C_ring_t ring, unsigned * * Purpose: Apply the supplied candidate list. * - * We used to do this by simply having each process write - * every mpi_size-th entry in the candidate list, starting - * at index mpi_rank, and mark all the others clean. + * We used to do this by simply having each process write + * every mpi_size-th entry in the candidate list, starting + * at index mpi_rank, and mark all the others clean. * - * However, this can cause unnecessary contention in a file - * system by increasing the number of processes writing to - * adjacent locations in the HDF5 file. + * However, this can cause unnecessary contention in a file + * system by increasing the number of processes writing to + * adjacent locations in the HDF5 file. * - * To attempt to minimize this, we now arange matters such - * that each process writes n adjacent entries in the - * candidate list, and marks all others clean. We must do - * this in such a fashion as to guarantee that each entry - * on the candidate list is written by exactly one process, - * and marked clean by all others. + * To attempt to minimize this, we now arange matters such + * that each process writes n adjacent entries in the + * candidate list, and marks all others clean. We must do + * this in such a fashion as to guarantee that each entry + * on the candidate list is written by exactly one process, + * and marked clean by all others. * - * To do this, first construct a table mapping mpi_rank - * to the index of the first entry in the candidate list to - * be written by the process of that mpi_rank, and then use - * the table to control which entries are written and which - * are marked as clean as a function of the mpi_rank. + * To do this, first construct a table mapping mpi_rank + * to the index of the first entry in the candidate list to + * be written by the process of that mpi_rank, and then use + * the table to control which entries are written and which + * are marked as clean as a function of the mpi_rank. * - * Note that the table must be identical on all processes, as - * all see the same candidate list, mpi_size, and mpi_rank -- - * the inputs used to construct the table. + * Note that the table must be identical on all processes, as + * all see the same candidate list, mpi_size, and mpi_rank -- + * the inputs used to construct the table. * - * We construct the table as follows. Let: + * We construct the table as follows. Let: * - * n = num_candidates / mpi_size; + * n = num_candidates / mpi_size; * - * m = num_candidates % mpi_size; + * m = num_candidates % mpi_size; * - * Now allocate an array of integers of length mpi_size + 1, - * and call this array candidate_assignment_table. + * Now allocate an array of integers of length mpi_size + 1, + * and call this array candidate_assignment_table. * - * Conceptually, if the number of candidates is a multiple - * of the mpi_size, we simply pass through the candidate list - * and assign n entries to each process to flush, with the - * index of the first entry to flush in the location in - * the candidate_assignment_table indicated by the mpi_rank - * of the process. + * Conceptually, if the number of candidates is a multiple + * of the mpi_size, we simply pass through the candidate list + * and assign n entries to each process to flush, with the + * index of the first entry to flush in the location in + * the candidate_assignment_table indicated by the mpi_rank + * of the process. * - * In the more common case in which the candidate list isn't - * isn't a multiple of the mpi_size, we pretend it is, and - * give num_candidates % mpi_size processes one extra entry - * each to make things work out. + * In the more common case in which the candidate list isn't + * isn't a multiple of the mpi_size, we pretend it is, and + * give num_candidates % mpi_size processes one extra entry + * each to make things work out. * - * Once the table is constructed, we determine the first and - * last entry this process is to flush as follows: + * Once the table is constructed, we determine the first and + * last entry this process is to flush as follows: * - * first_entry_to_flush = candidate_assignment_table[mpi_rank] + * first_entry_to_flush = candidate_assignment_table[mpi_rank] * - * last_entry_to_flush = - * candidate_assignment_table[mpi_rank + 1] - 1; + * last_entry_to_flush = + * candidate_assignment_table[mpi_rank + 1] - 1; * - * With these values determined, we simply scan through the - * candidate list, marking all entries in the range - * [first_entry_to_flush, last_entry_to_flush] for flush, - * and all others to be cleaned. + * With these values determined, we simply scan through the + * candidate list, marking all entries in the range + * [first_entry_to_flush, last_entry_to_flush] for flush, + * and all others to be cleaned. * - * Finally, we scan the LRU from tail to head, flushing - * or marking clean the candidate entries as indicated. - * If necessary, we scan the pinned list as well. + * Finally, we scan the LRU from tail to head, flushing + * or marking clean the candidate entries as indicated. + * If necessary, we scan the pinned list as well. * - * Note that this function will fail if any protected or - * clean entries appear on the candidate list. + * Note that this function will fail if any protected or + * clean entries appear on the candidate list. * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. + * This function is used in managing sync points, and + * shouldn't be used elsewhere. * * Return: Success: SUCCEED * @@ -160,9 +160,6 @@ herr_t H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, haddr_t *candidates_list_ptr, int mpi_rank, int mpi_size) { - int i; - int m; - unsigned n; unsigned first_entry_to_flush; unsigned last_entry_to_flush; unsigned total_entries_to_clear = 0; @@ -177,7 +174,8 @@ H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, ha #endif /* H5C_DO_SANITY_CHECKS */ #if H5C_APPLY_CANDIDATE_LIST__DEBUG char tbl_buf[1024]; -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ +#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + unsigned m, n; unsigned u; /* Local index variable */ herr_t ret_value = SUCCEED; /* Return value */ @@ -187,7 +185,7 @@ H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, ha HDassert(cache_ptr != NULL); HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); HDassert(num_candidates > 0); - HDassert(num_candidates <= cache_ptr->slist_len); + HDassert((!cache_ptr->slist_enabled) || (num_candidates <= cache_ptr->slist_len)); HDassert(candidates_list_ptr != NULL); HDassert(0 <= mpi_rank); HDassert(mpi_rank < mpi_size); @@ -219,9 +217,7 @@ H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, ha } /* end if */ n = num_candidates / (unsigned)mpi_size; - if (num_candidates % (unsigned)mpi_size > INT_MAX) - HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "m overflow") - m = (int)(num_candidates % (unsigned)mpi_size); + m = num_candidates % (unsigned)mpi_size; if (NULL == (candidate_assignment_table = (unsigned *)H5MM_malloc(sizeof(unsigned) * (size_t)(mpi_size + 1)))) @@ -232,31 +228,31 @@ H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, ha candidate_assignment_table[mpi_size] = num_candidates; if (m == 0) { /* mpi_size is an even divisor of num_candidates */ - for (i = 1; i < mpi_size; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; + for (u = 1; u < (unsigned)mpi_size; u++) + candidate_assignment_table[u] = candidate_assignment_table[u - 1] + n; } /* end if */ else { - for (i = 1; i <= m; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1; + for (u = 1; u <= m; u++) + candidate_assignment_table[u] = candidate_assignment_table[u - 1] + n + 1; if (num_candidates < (unsigned)mpi_size) { - for (i = m + 1; i < mpi_size; i++) - candidate_assignment_table[i] = num_candidates; + for (u = m + 1; u < (unsigned)mpi_size; u++) + candidate_assignment_table[u] = num_candidates; } /* end if */ else { - for (i = m + 1; i < mpi_size; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; + for (u = m + 1; u < (unsigned)mpi_size; u++) + candidate_assignment_table[u] = candidate_assignment_table[u - 1] + n; } /* end else */ } /* end else */ HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates); #if H5C_DO_SANITY_CHECKS /* Verify that the candidate assignment table has the expected form */ - for (i = 1; i < mpi_size - 1; i++) { + for (u = 1; u < (unsigned)(mpi_size - 1); u++) { unsigned a, b; - a = candidate_assignment_table[i] - candidate_assignment_table[i - 1]; - b = candidate_assignment_table[i + 1] - candidate_assignment_table[i]; + a = candidate_assignment_table[u] - candidate_assignment_table[u - 1]; + b = candidate_assignment_table[u + 1] - candidate_assignment_table[u]; HDassert(n + 1 >= a); HDassert(a >= b); @@ -268,11 +264,11 @@ H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, ha last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1; #if H5C_APPLY_CANDIDATE_LIST__DEBUG - for (i = 0; i < 1024; i++) - tbl_buf[i] = '\0'; + for (u = 0; u < 1024; u++) + tbl_buf[u] = '\0'; HDsprintf(&(tbl_buf[0]), "candidate assignment table = "); - for (i = 0; i <= mpi_size; i++) - HDsprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %u", candidate_assignment_table[i]); + for (u = 0; u <= (unsigned)mpi_size; u++) + HDsprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %u", candidate_assignment_table[u]); HDsprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n"); HDfprintf(stdout, "%s", tbl_buf); @@ -347,9 +343,9 @@ H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, unsigned num_candidates, ha #if H5C_DO_SANITY_CHECKS m = 0; n = 0; - for (i = 0; i < H5C_RING_NTYPES; i++) { - m += (int)entries_to_flush[i]; - n += entries_to_clear[i]; + for (u = 0; u < H5C_RING_NTYPES; u++) { + m += entries_to_flush[u]; + n += entries_to_clear[u]; } /* end if */ HDassert((unsigned)m == total_entries_to_flush); @@ -397,13 +393,14 @@ done: } /* H5C_apply_candidate_list() */ /*------------------------------------------------------------------------- + * * Function: H5C_construct_candidate_list__clean_cache * * Purpose: Construct the list of entries that should be flushed to - * clean all entries in the cache. + * clean all entries in the cache. * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. + * This function is used in managing sync points, and + * shouldn't be used elsewhere. * * Return: Success: SUCCEED * @@ -412,6 +409,16 @@ done: * Programmer: John Mainzer * 3/17/10 * + * Changes: With the slist optimization, the slist is not maintained + * unless a flush is in progress. Thus we can not longer use + * cache_ptr->slist_size to determine the total size of + * the entries we must insert in the candidate list. + * + * To address this, we now use cache_ptr->dirty_index_size + * instead. + * + * JRM -- 7/27/20 + * *------------------------------------------------------------------------- */ herr_t @@ -425,56 +432,72 @@ H5C_construct_candidate_list__clean_cache(H5C_t *cache_ptr) HDassert(cache_ptr != NULL); HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - /* As a sanity check, set space needed to the size of the skip list. - * This should be the sum total of the sizes of all the dirty entries - * in the metadata cache. + /* As a sanity check, set space needed to the dirty_index_size. This + * should be the sum total of the sizes of all the dirty entries + * in the metadata cache. Note that if the slist is enabled, + * cache_ptr->slist_size should equal cache_ptr->dirty_index_size. */ - space_needed = cache_ptr->slist_size; + space_needed = cache_ptr->dirty_index_size; + + HDassert((!cache_ptr->slist_enabled) || (space_needed == cache_ptr->slist_size)); /* Recall that while we shouldn't have any protected entries at this * point, it is possible that some dirty entries may reside on the * pinned list at this point. */ - HDassert(cache_ptr->slist_size <= (cache_ptr->dLRU_list_size + cache_ptr->pel_size)); - HDassert(cache_ptr->slist_len <= (cache_ptr->dLRU_list_len + cache_ptr->pel_len)); + HDassert(cache_ptr->dirty_index_size <= (cache_ptr->dLRU_list_size + cache_ptr->pel_size)); + HDassert((!cache_ptr->slist_enabled) || + (cache_ptr->slist_len <= (cache_ptr->dLRU_list_len + cache_ptr->pel_len))); if (space_needed > 0) { /* we have work to do */ + H5C_cache_entry_t *entry_ptr; unsigned nominated_entries_count = 0; size_t nominated_entries_size = 0; haddr_t nominated_addr; - HDassert(cache_ptr->slist_len > 0); + HDassert((!cache_ptr->slist_enabled) || (cache_ptr->slist_len > 0)); /* Scan the dirty LRU list from tail forward and nominate sufficient * entries to free up the necessary space. */ entry_ptr = cache_ptr->dLRU_tail_ptr; - while ((nominated_entries_size < space_needed) && (nominated_entries_count < cache_ptr->slist_len) && + + while ((nominated_entries_size < space_needed) && + ((!cache_ptr->slist_enabled) || (nominated_entries_count < cache_ptr->slist_len)) && (entry_ptr != NULL)) { + HDassert(!(entry_ptr->is_protected)); HDassert(!(entry_ptr->is_read_only)); HDassert(entry_ptr->ro_ref_count == 0); HDassert(entry_ptr->is_dirty); - HDassert(entry_ptr->in_slist); + HDassert((!cache_ptr->slist_enabled) || (entry_ptr->in_slist)); nominated_addr = entry_ptr->addr; + if (H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed") nominated_entries_size += entry_ptr->size; nominated_entries_count++; entry_ptr = entry_ptr->aux_prev; + } /* end while */ + HDassert(entry_ptr == NULL); /* it is possible that there are some dirty entries on the * protected entry list as well -- scan it too if necessary */ entry_ptr = cache_ptr->pel_head_ptr; - while ((nominated_entries_size < space_needed) && (nominated_entries_count < cache_ptr->slist_len) && + + while ((nominated_entries_size < space_needed) && + ((!cache_ptr->slist_enabled) || (nominated_entries_count < cache_ptr->slist_len)) && (entry_ptr != NULL)) { + if (entry_ptr->is_dirty) { + HDassert(!(entry_ptr->is_protected)); HDassert(!(entry_ptr->is_read_only)); HDassert(entry_ptr->ro_ref_count == 0); @@ -482,32 +505,39 @@ H5C_construct_candidate_list__clean_cache(H5C_t *cache_ptr) HDassert(entry_ptr->in_slist); nominated_addr = entry_ptr->addr; + if (H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed") nominated_entries_size += entry_ptr->size; nominated_entries_count++; + } /* end if */ entry_ptr = entry_ptr->next; + } /* end while */ - HDassert(nominated_entries_count == cache_ptr->slist_len); + HDassert((!cache_ptr->slist_enabled) || (nominated_entries_count == cache_ptr->slist_len)); HDassert(nominated_entries_size == space_needed); + } /* end if */ done: + FUNC_LEAVE_NOAPI(ret_value) + } /* H5C_construct_candidate_list__clean_cache() */ /*------------------------------------------------------------------------- * Function: H5C_construct_candidate_list__min_clean * * Purpose: Construct the list of entries that should be flushed to - * get the cache back within its min clean constraints. + * get the cache back within its min clean constraints. * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. + * This function is used in managing sync points, and + * shouldn't be used elsewhere. * * Return: Success: SUCCEED * @@ -516,6 +546,12 @@ done: * Programmer: John Mainzer * 3/17/10 * + * Changes: With the slist optimization, the slist is not maintained + * unless a flush is in progress. Updated sanity checks to + * reflect this. + * + * JRM -- 7/27/20 + * *------------------------------------------------------------------------- */ herr_t @@ -533,50 +569,69 @@ H5C_construct_candidate_list__min_clean(H5C_t *cache_ptr) * cache back within its min clean constraints. */ if (cache_ptr->max_cache_size > cache_ptr->index_size) { + if (((cache_ptr->max_cache_size - cache_ptr->index_size) + cache_ptr->cLRU_list_size) >= - cache_ptr->min_clean_size) + cache_ptr->min_clean_size) { + space_needed = 0; - else + } + else { + space_needed = cache_ptr->min_clean_size - ((cache_ptr->max_cache_size - cache_ptr->index_size) + cache_ptr->cLRU_list_size); + } } /* end if */ else { - if (cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) + + if (cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) { + space_needed = 0; - else + } + else { + space_needed = cache_ptr->min_clean_size - cache_ptr->cLRU_list_size; + } } /* end else */ if (space_needed > 0) { /* we have work to do */ + H5C_cache_entry_t *entry_ptr; unsigned nominated_entries_count = 0; size_t nominated_entries_size = 0; - HDassert(cache_ptr->slist_len > 0); + HDassert((!cache_ptr->slist_enabled) || (cache_ptr->slist_len > 0)); /* Scan the dirty LRU list from tail forward and nominate sufficient * entries to free up the necessary space. */ entry_ptr = cache_ptr->dLRU_tail_ptr; - while ((nominated_entries_size < space_needed) && (nominated_entries_count < cache_ptr->slist_len) && + + while ((nominated_entries_size < space_needed) && + ((!cache_ptr->slist_enabled) || (nominated_entries_count < cache_ptr->slist_len)) && (entry_ptr != NULL) && (!entry_ptr->flush_me_last)) { + haddr_t nominated_addr; HDassert(!(entry_ptr->is_protected)); HDassert(!(entry_ptr->is_read_only)); HDassert(entry_ptr->ro_ref_count == 0); HDassert(entry_ptr->is_dirty); - HDassert(entry_ptr->in_slist); + HDassert((!cache_ptr->slist_enabled) || (entry_ptr->in_slist)); nominated_addr = entry_ptr->addr; + if (H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed") nominated_entries_size += entry_ptr->size; nominated_entries_count++; entry_ptr = entry_ptr->aux_prev; + } /* end while */ - HDassert(nominated_entries_count <= cache_ptr->slist_len); + + HDassert((!cache_ptr->slist_enabled) || (nominated_entries_count <= cache_ptr->slist_len)); + HDassert(nominated_entries_size <= cache_ptr->dirty_index_size); HDassert(nominated_entries_size >= space_needed); } /* end if */ @@ -589,24 +644,24 @@ done: * Function: H5C_mark_entries_as_clean * * Purpose: When the H5C code is used to implement the metadata caches - * in PHDF5, only the cache with MPI_rank 0 is allowed to - * actually write entries to disk -- all other caches must - * retain dirty entries until they are advised that the - * entries are clean. + * in PHDF5, only the cache with MPI_rank 0 is allowed to + * actually write entries to disk -- all other caches must + * retain dirty entries until they are advised that the + * entries are clean. * - * This function exists to allow the H5C code to receive these - * notifications. + * This function exists to allow the H5C code to receive these + * notifications. * - * The function receives a list of entry base addresses - * which must refer to dirty entries in the cache. If any - * of the entries are either clean or don't exist, the - * function flags an error. + * The function receives a list of entry base addresses + * which must refer to dirty entries in the cache. If any + * of the entries are either clean or don't exist, the + * function flags an error. * - * The function scans the list of entries and flushes all - * those that are currently unprotected with the - * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently - * protected are flagged for clearing when they are - * unprotected. + * The function scans the list of entries and flushes all + * those that are currently unprotected with the + * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently + * protected are flagged for clearing when they are + * unprotected. * * Return: Non-negative on success/Negative on failure * @@ -679,13 +734,14 @@ H5C_mark_entries_as_clean(H5F_t *f, unsigned ce_array_len, haddr_t *ce_array_ptr if (entry_ptr == NULL) { #if H5C_DO_SANITY_CHECKS - HDfprintf(stdout, "H5C_mark_entries_as_clean: entry[%u] = %a not in cache.\n", u, addr); + HDfprintf(stdout, "H5C_mark_entries_as_clean: entry[%u] = %" PRIuHADDR " not in cache.\n", u, + addr); #endif /* H5C_DO_SANITY_CHECKS */ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not in cache?!?!?") } /* end if */ else if (!entry_ptr->is_dirty) { #if H5C_DO_SANITY_CHECKS - HDfprintf(stdout, "H5C_mark_entries_as_clean: entry %a is not dirty!?!\n", addr); + HDfprintf(stdout, "H5C_mark_entries_as_clean: entry %" PRIuHADDR " is not dirty!?!\n", addr); #endif /* H5C_DO_SANITY_CHECKS */ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?") } /* end else-if */ @@ -737,7 +793,7 @@ H5C_mark_entries_as_clean(H5F_t *f, unsigned ce_array_len, haddr_t *ce_array_ptr * of the pre_serialize / serialize routines, this may * cease to be the case -- requiring a review of this * point. - * JRM -- 4/7/15 + * JRM -- 4/7/15 */ entries_cleared = 0; entries_examined = 0; @@ -885,15 +941,16 @@ H5C__collective_write(H5F_t *f) { H5AC_t * cache_ptr; H5FD_mpio_xfer_t orig_xfer_mode = H5FD_MPIO_COLLECTIVE; + void * base_buf; int count; int * length_array = NULL; MPI_Aint * buf_array = NULL; MPI_Aint * offset_array = NULL; - MPI_Datatype btype; - hbool_t btype_created = FALSE; - MPI_Datatype ftype; - hbool_t ftype_created = FALSE; + MPI_Datatype btype = MPI_BYTE; + MPI_Datatype ftype = MPI_BYTE; int mpi_code; + char unused = 0; /* Unused, except for non-NULL pointer value */ + size_t buf_count; herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -908,20 +965,17 @@ H5C__collective_write(H5F_t *f) if (H5CX_get_io_xfer_mode(&orig_xfer_mode) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") + /* Set transfer mode */ + if (H5CX_set_io_xfer_mode(H5FD_MPIO_COLLECTIVE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") + /* Get number of entries in collective write list */ count = (int)H5SL_count(cache_ptr->coll_write_list); - if (count > 0) { - H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE; H5SL_node_t * node; H5C_cache_entry_t *entry_ptr; - void * base_buf; int i; - /* Set new transfer mode */ - if (H5CX_set_io_xfer_mode(xfer_mode) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") - /* Allocate arrays */ if (NULL == (length_array = (int *)H5MM_malloc((size_t)count * sizeof(int)))) HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, @@ -948,7 +1002,6 @@ H5C__collective_write(H5F_t *f) node = H5SL_next(node); i = 1; while (node) { - if (NULL == (entry_ptr = (H5C_cache_entry_t *)H5SL_item(node))) HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item") @@ -966,9 +1019,6 @@ H5C__collective_write(H5F_t *f) if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, buf_array, MPI_BYTE, &btype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - - btype_created = TRUE; - if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&btype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) @@ -976,61 +1026,27 @@ H5C__collective_write(H5F_t *f) if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, offset_array, MPI_BYTE, &ftype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - - ftype_created = TRUE; - if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&ftype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - /* Pass buf type, file type to the file driver */ - if (H5CX_set_mpi_coll_datatypes(btype, ftype) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O properties") - - /* Write data */ - if (H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, (size_t)1, base_buf) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to write entries collectively") - + /* MPI count to write */ + buf_count = 1; } /* end if */ else { - MPI_Status mpi_stat; - MPI_File * mpi_fh_p; - MPI_File mpi_fh; - MPI_Info * info_p; - MPI_Info info; - - /* This should be rewritten to call H5F_block_write, with the correct - * buffer and file datatypes (null ones). -QAK, 2018/02/21 - */ - if (H5F_get_mpi_handle(f, (MPI_File **)&mpi_fh_p) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't get mpi file handle") - - mpi_fh = *(MPI_File *)mpi_fh_p; - - if (H5F_get_mpi_info(f, &info_p) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file info") + /* Set non-NULL pointer for I/O operation */ + base_buf = &unused; - info = *info_p; - - /* just to match up with the 1st MPI_File_set_view from - * H5FD_mpio_write() - */ - if (MPI_SUCCESS != - (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + /* MPI count to write */ + buf_count = 0; + } /* end else */ - /* just to match up with MPI_File_write_at_all from H5FD_mpio_write() */ - HDmemset(&mpi_stat, 0, sizeof(MPI_Status)); - if (MPI_SUCCESS != - (mpi_code = MPI_File_write_at_all(mpi_fh, (MPI_Offset)0, NULL, 0, MPI_BYTE, &mpi_stat))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) + /* Pass buf type, file type to the file driver */ + if (H5CX_set_mpi_coll_datatypes(btype, ftype) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O properties") - /* just to match up with the 2nd MPI_File_set_view (reset) in - * H5FD_mpio_write() - */ - if (MPI_SUCCESS != - (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) - } /* end else */ + /* Write data */ + if (H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, buf_count, base_buf) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_WRITEERROR, FAIL, "unable to write entries collectively") done: /* Free arrays */ @@ -1039,9 +1055,9 @@ done: offset_array = (MPI_Aint *)H5MM_xfree(offset_array); /* Free MPI Types */ - if (btype_created && MPI_SUCCESS != (mpi_code = MPI_Type_free(&btype))) + if (MPI_BYTE != btype && MPI_SUCCESS != (mpi_code = MPI_Type_free(&btype))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - if (ftype_created && MPI_SUCCESS != (mpi_code = MPI_Type_free(&ftype))) + if (MPI_BYTE != ftype && MPI_SUCCESS != (mpi_code = MPI_Type_free(&ftype))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) /* Reset transfer mode in API context, if changed */ @@ -1055,7 +1071,7 @@ done: /*------------------------------------------------------------------------- * Function: H5C__flush_candidate_entries * - * Purpose: Flush or clear (as indicated) the candidate entries that + * Purpose: Flush or clear (as indicated) the candidate entries that * have been marked in the metadata cache. In so doing, * observe rings and flush dependencies. * @@ -1084,9 +1100,9 @@ done: * Return: Non-negative on success/Negative on failure. * * Programmer: John Mainzer - * 2/10/17 + * 2/10/17 * - * Changes: None. + * Changes: None. * *------------------------------------------------------------------------- */ @@ -1175,7 +1191,7 @@ done: /*------------------------------------------------------------------------- * Function: H5C__flush_candidates_in_ring * - * Purpose: Flush or clear (as indicated) the candidate entries + * Purpose: Flush or clear (as indicated) the candidate entries * contained in the specified cache and ring. All candidate * entries in rings outside the specified ring must have been * flushed (or cleared) on entry. @@ -1204,7 +1220,7 @@ done: * Return: Non-negative on success/Negative on failure. * * Programmer: John Mainzer - * 2/10/17 + * 2/10/17 * *------------------------------------------------------------------------- */ |