diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/H5C.c | 39 | ||||
-rw-r--r-- | src/H5CX.c | 2 | ||||
-rw-r--r-- | src/H5Dchunk.c | 17 | ||||
-rw-r--r-- | src/H5Dmpio.c | 119 | ||||
-rw-r--r-- | src/H5Fmpi.c | 132 | ||||
-rw-r--r-- | src/H5Fprivate.h | 2 | ||||
-rw-r--r-- | src/H5Pfapl.c | 24 | ||||
-rw-r--r-- | src/H5Z.c | 14 |
8 files changed, 268 insertions, 81 deletions
@@ -1536,17 +1536,26 @@ H5C_insert_entry(H5F_t *f, const H5C_class_t *type, haddr_t addr, void *thing, u #ifdef H5_HAVE_PARALLEL if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) - coll_access = H5CX_get_coll_metadata_read(); + coll_access = H5F_get_coll_metadata_reads(f); entry_ptr->coll_access = coll_access; if (coll_access) { H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, FAIL) /* Make sure the size of the collective entries in the cache remain in check */ - if (cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) - if (H5C_clear_coll_entries(cache_ptr, TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries") - } /* end if */ + if (H5P_USER_TRUE == H5F_COLL_MD_READ(f)) { + if (cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) { + if (H5C_clear_coll_entries(cache_ptr, TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries") + } /* end if */ + } /* end if */ + else { + if (cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100) { + if (H5C_clear_coll_entries(cache_ptr, TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries") + } /* end if */ + } /* end else */ + } /* end if */ #endif done: @@ -2264,7 +2273,7 @@ H5C_protect(H5F_t *f, const H5C_class_t *type, haddr_t addr, void *udata, unsign #ifdef H5_HAVE_PARALLEL if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) - coll_access = H5CX_get_coll_metadata_read(); + coll_access = H5F_get_coll_metadata_reads(f); #endif /* H5_HAVE_PARALLEL */ /* first check to see if the target is in cache */ @@ -2611,11 +2620,19 @@ H5C_protect(H5F_t *f, const H5C_class_t *type, haddr_t addr, void *udata, unsign #ifdef H5_HAVE_PARALLEL /* Make sure the size of the collective entries in the cache remain in check */ - if (coll_access) - if (cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) - if (H5C_clear_coll_entries(cache_ptr, TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries") -#endif /* H5_HAVE_PARALLEL */ + if (coll_access) { + if (H5P_USER_TRUE == H5F_COLL_MD_READ(f)) { + if (cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) + if (H5C_clear_coll_entries(cache_ptr, TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries") + } /* end if */ + else { + if (cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100) + if (H5C_clear_coll_entries(cache_ptr, TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries") + } /* end else */ + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ done: #if H5C_DO_EXTREME_SANITY_CHECKS @@ -1150,7 +1150,7 @@ done: * Purpose: Sanity checks and sets up collective operations. * * Note: Should be called for all API routines that modify file - * file metadata but don't pass in an access property list. + * metadata but don't pass in an access property list. * * Return: Non-negative on success / Negative on failure * diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 1d74610..6c2871e 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -3148,7 +3148,9 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled, H5D_chunk_ud_t *udat unsigned idx = 0; /* Index of chunk in cache, if present */ hbool_t found = FALSE; /* In cache? */ #ifdef H5_HAVE_PARALLEL - hbool_t reenable_coll_md_reads = FALSE; + H5P_coll_md_read_flag_t md_reads_file_flag; + hbool_t md_reads_context_flag; + hbool_t restore_md_reads_state = FALSE; #endif herr_t ret_value = SUCCEED; /* Return value */ @@ -3222,11 +3224,10 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled, H5D_chunk_ud_t *udat * processes. */ if (H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI)) { - hbool_t do_coll_md_reads = H5CX_get_coll_metadata_read(); - if (do_coll_md_reads) { - H5CX_set_coll_metadata_read(FALSE); - reenable_coll_md_reads = TRUE; - } + md_reads_file_flag = H5P_FORCE_FALSE; + md_reads_context_flag = FALSE; + H5F_set_coll_metadata_reads(idx_info.f, &md_reads_file_flag, &md_reads_context_flag); + restore_md_reads_state = TRUE; } #endif /* H5_HAVE_PARALLEL */ @@ -3272,8 +3273,8 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled, H5D_chunk_ud_t *udat done: #ifdef H5_HAVE_PARALLEL /* Re-enable collective metadata reads if we disabled them */ - if (reenable_coll_md_reads) - H5CX_set_coll_metadata_read(TRUE); + if (restore_md_reads_state) + H5F_set_coll_metadata_reads(dset->oloc.file, &md_reads_file_flag, &md_reads_context_flag); #endif /* H5_HAVE_PARALLEL */ FUNC_LEAVE_NOAPI(ret_value) diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 41d9bc0..a823729 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -805,10 +805,6 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf HDassert(type_info); HDassert(fm); - /* Disable collective metadata reads for chunked dataset I/O operations - * in order to prevent potential hangs */ - H5CX_set_coll_metadata_read(FALSE); - /* Check the optional property list for the collective chunk IO optimization option */ if (H5CX_get_mpio_chunk_opt_mode(&chunk_opt_mode) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't get chunk optimization option") @@ -2303,17 +2299,20 @@ static herr_t H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, H5D_chunk_addr_info_t chunk_addr_info_array[], int sum_chunk) { - H5SL_node_t * chunk_node; /* Current node in chunk skip list */ - H5D_chunk_info_t *chunk_info; /* Current chunking info. of this node. */ - haddr_t chunk_addr; /* Current chunking address of this node */ - haddr_t *total_chunk_addr_array = NULL; /* The array of chunk address for the total number of chunk */ - hbool_t do_sort = FALSE; /* Whether the addresses need to be sorted */ - int bsearch_coll_chunk_threshold; - int many_chunk_opt = H5D_OBTAIN_ONE_CHUNK_ADDR_IND; - int mpi_size; /* Number of MPI processes */ - int mpi_code; /* MPI return code */ - int i; /* Local index variable */ - herr_t ret_value = SUCCEED; /* Return value */ + H5SL_node_t * chunk_node; /* Current node in chunk skip list */ + H5D_chunk_info_t *chunk_info; /* Current chunking info. of this node. */ + haddr_t chunk_addr; /* Current chunking address of this node */ + haddr_t *total_chunk_addr_array = NULL; /* The array of chunk address for the total number of chunk */ + H5P_coll_md_read_flag_t md_reads_file_flag; + hbool_t md_reads_context_flag; + hbool_t restore_md_reads_state = FALSE; + hbool_t do_sort = FALSE; /* Whether the addresses need to be sorted */ + int bsearch_coll_chunk_threshold; + int many_chunk_opt = H5D_OBTAIN_ONE_CHUNK_ADDR_IND; + int mpi_size; /* Number of MPI processes */ + int mpi_code; /* MPI return code */ + int i; /* Local index variable */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC @@ -2357,8 +2356,41 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank") if (mpi_rank == 0) { - if (H5D__chunk_addrmap(io_info, total_chunk_addr_array) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get chunk address") + herr_t result; + + /* + * If enabled, disable collective metadata reads here. + * Since the chunk address mapping is done on rank 0 + * only here, it will cause problems if collective + * metadata reads are enabled. + */ + if (H5F_get_coll_metadata_reads(io_info->dset->oloc.file)) { + md_reads_file_flag = H5P_FORCE_FALSE; + md_reads_context_flag = FALSE; + H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag, + &md_reads_context_flag); + restore_md_reads_state = TRUE; + } + + result = H5D__chunk_addrmap(io_info, total_chunk_addr_array); + + /* Ensure that we restore the old collective metadata reads state */ + if (restore_md_reads_state) { + H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag, + &md_reads_context_flag); + restore_md_reads_state = FALSE; + } + + if (result < 0) { + size_t u; + + /* Clear total chunk address array */ + for (u = 0; u < (size_t)fm->layout->u.chunk.nchunks; u++) + total_chunk_addr_array[u] = HADDR_UNDEF; + + /* Push error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get chunk address") + } } /* end if */ /* Broadcasting the MPI_IO option info. and chunk address info. */ @@ -2413,6 +2445,10 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, } /* end if */ done: + /* Re-enable collective metadata reads if we disabled them */ + if (restore_md_reads_state) + H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag, &md_reads_context_flag); + if (total_chunk_addr_array) H5MM_xfree(total_chunk_addr_array); @@ -2460,20 +2496,23 @@ static herr_t H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assign_io_mode[], haddr_t chunk_addr[]) { - size_t total_chunks; - unsigned percent_nproc_per_chunk, threshold_nproc_per_chunk; - uint8_t * io_mode_info = NULL; - uint8_t * recv_io_mode_info = NULL; - uint8_t * mergebuf = NULL; - uint8_t * tempbuf; - H5SL_node_t * chunk_node; - H5D_chunk_info_t *chunk_info; - int mpi_size, mpi_rank; - MPI_Comm comm; - int root; - size_t ic; - int mpi_code; - herr_t ret_value = SUCCEED; + size_t total_chunks; + unsigned percent_nproc_per_chunk, threshold_nproc_per_chunk; + uint8_t * io_mode_info = NULL; + uint8_t * recv_io_mode_info = NULL; + uint8_t * mergebuf = NULL; + uint8_t * tempbuf; + H5SL_node_t * chunk_node; + H5D_chunk_info_t * chunk_info; + H5P_coll_md_read_flag_t md_reads_file_flag; + hbool_t md_reads_context_flag; + hbool_t restore_md_reads_state = FALSE; + int mpi_size, mpi_rank; + MPI_Comm comm; + int root; + size_t ic; + int mpi_code; + herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -2533,6 +2572,20 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig size_t nproc; unsigned *nproc_per_chunk; + /* + * If enabled, disable collective metadata reads here. + * Since the chunk address mapping is done on rank 0 + * only here, it will cause problems if collective + * metadata reads are enabled. + */ + if (H5F_get_coll_metadata_reads(io_info->dset->oloc.file)) { + md_reads_file_flag = H5P_FORCE_FALSE; + md_reads_context_flag = FALSE; + H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag, + &md_reads_context_flag); + restore_md_reads_state = TRUE; + } + /* pre-computing: calculate number of processes and regularity of the selection occupied in each chunk */ if (NULL == (nproc_per_chunk = (unsigned *)H5MM_calloc(total_chunks * sizeof(unsigned)))) @@ -2599,6 +2652,10 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig #endif done: + /* Re-enable collective metadata reads if we disabled them */ + if (restore_md_reads_state) + H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag, &md_reads_context_flag); + if (io_mode_info) H5MM_free(io_mode_info); if (mergebuf) diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c index 57a5be1..8942a6c 100644 --- a/src/H5Fmpi.c +++ b/src/H5Fmpi.c @@ -31,11 +31,12 @@ /***********/ /* Headers */ /***********/ -#include "H5private.h" /* Generic Functions */ -#include "H5Eprivate.h" /* Error handling */ -#include "H5Fpkg.h" /* File access */ -#include "H5FDprivate.h" /* File drivers */ -#include "H5Iprivate.h" /* IDs */ +#include "H5private.h" /* Generic Functions */ +#include "H5CXprivate.h" /* API Contexts */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fpkg.h" /* File access */ +#include "H5FDprivate.h" /* File drivers */ +#include "H5Iprivate.h" /* IDs */ /****************/ /* Local Macros */ @@ -328,4 +329,125 @@ H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm) done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5F_mpi_retrieve_comm */ + +/*------------------------------------------------------------------------- + * Function: H5F_get_coll_metadata_reads + * + * Purpose: Determines whether collective metadata reads should be + * performed. This routine is meant to be the single source of + * truth for the collective metadata reads status, as it + * coordinates between the file-global flag and the flag set + * for the current operation in the current API context. + * + * Return: TRUE/FALSE (can't fail) + * + *------------------------------------------------------------------------- + */ +hbool_t +H5F_get_coll_metadata_reads(const H5F_t *file) +{ + H5P_coll_md_read_flag_t file_flag = H5P_USER_FALSE; + hbool_t ret_value = FALSE; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + HDassert(file && file->shared); + + /* Retrieve the file-global flag */ + file_flag = H5F_COLL_MD_READ(file); + + /* If file flag is set to H5P_FORCE_FALSE, exit early + * with FALSE, since collective metadata reads have + * been explicitly disabled somewhere in the library. + */ + if (H5P_FORCE_FALSE == file_flag) + ret_value = FALSE; + else { + /* If file flag is set to H5P_USER_TRUE, ignore + * any settings in the API context. A file-global + * setting of H5P_USER_TRUE for collective metadata + * reads should ignore any settings on an Access + * Property List for an individual operation. + */ + if (H5P_USER_TRUE == file_flag) + ret_value = TRUE; + else { + /* Get the collective metadata reads flag from + * the current API context. + */ + ret_value = H5CX_get_coll_metadata_read(); + } + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_get_coll_metadata_reads() */ + +/*------------------------------------------------------------------------- + * Function: H5F_set_coll_metadata_reads + * + * Purpose: Used to temporarily modify the collective metadata reads + * status. This is useful for cases where either: + * + * * Collective metadata reads are enabled, but need to be + * disabled for an operation about to occur that may trigger + * an independent metadata read (such as only rank 0 doing + * something) + * + * * Metadata reads are currently independent, but it is + * guaranteed that the application has maintained + * collectivity at the interface level (e.g., an operation + * that modifies metadata is being performed). In this case, + * it should be safe to enable collective metadata reads, + * barring any internal library issues that may occur + * + * After completion, the `file_flag` parameter will be set to + * the previous value of the file-global collective metadata + * reads flag. The `context_flag` parameter will be set to the + * previous value of the API context's collective metadata + * reads flag. Another call to this routine should be made to + * restore these values (see below warning). + * + * !! WARNING !! + * It is dangerous to modify the collective metadata reads + * status, as this can cause crashes, hangs and corruption in + * the HDF5 file when improperly done. Therefore, the + * `file_flag` and `context_flag` parameters are both + * mandatory, and it is assumed that the caller will guarantee + * these settings are restored with another call to this + * routine once the bracketed operation is complete. + * !! WARNING !! + * + * Return: Nothing + * + *------------------------------------------------------------------------- + */ +void +H5F_set_coll_metadata_reads(H5F_t *file, H5P_coll_md_read_flag_t *file_flag, hbool_t *context_flag) +{ + H5P_coll_md_read_flag_t prev_file_flag = H5P_USER_FALSE; + hbool_t prev_context_flag = FALSE; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + HDassert(file && file->shared); + HDassert(file_flag); + HDassert(context_flag); + + /* Save old state */ + prev_file_flag = H5F_COLL_MD_READ(file); + prev_context_flag = H5CX_get_coll_metadata_read(); + + /* Set new desired state */ + if (prev_file_flag != *file_flag) { + H5F_COLL_MD_READ(file) = *file_flag; + *file_flag = prev_file_flag; + } + if (prev_context_flag != *context_flag) { + H5CX_set_coll_metadata_read(*context_flag); + *context_flag = prev_context_flag; + } + + FUNC_LEAVE_NOAPI_VOID +} /* end H5F_set_coll_metadata_reads() */ + #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index 6498cc3..1731f05 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -951,6 +951,8 @@ H5_DLL int H5F_mpi_get_rank(const H5F_t *f); H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); H5_DLL int H5F_mpi_get_size(const H5F_t *f); H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm); +H5_DLL hbool_t H5F_get_coll_metadata_reads(const H5F_t *f); +H5_DLL void H5F_set_coll_metadata_reads(H5F_t *f, H5P_coll_md_read_flag_t *file_flag, hbool_t *context_flag); #endif /* H5_HAVE_PARALLEL */ /* External file cache routines */ diff --git a/src/H5Pfapl.c b/src/H5Pfapl.c index d7408e3..04df2ff 100644 --- a/src/H5Pfapl.c +++ b/src/H5Pfapl.c @@ -4796,15 +4796,14 @@ H5P__decode_coll_md_read_flag_t(const void **_pp, void *_value) * Function: H5Pset_all_coll_metadata_ops * * Purpose: Tell the library whether the metadata read operations will - * be done collectively (1) or not (0). Default is independent. - * With collective mode, the library will optimize access to - * metadata operations on the file. + * be done collectively (1) or not (0). Default is independent. + * With collective mode, the library will optimize access to + * metadata operations on the file. * * Note: This routine accepts file access property lists, link - * access property lists, attribute access property lists, - * dataset access property lists, group access property lists, - * named datatype access property lists, - * and dataset transfer property lists. + * access property lists, attribute access property lists, + * dataset access property lists, group access property lists + * and named datatype access property lists. * * Return: Non-negative on success/Negative on failure * @@ -4824,7 +4823,7 @@ H5Pset_all_coll_metadata_ops(hid_t plist_id, hbool_t is_collective) H5TRACE2("e", "ib", plist_id, is_collective); /* Compare the property list's class against the other class */ - /* (Dataset, group, attribute, and named datype access property lists + /* (Dataset, group, attribute, and named datatype access property lists * are sub-classes of link access property lists -QAK) */ if (TRUE != H5P_isa_class(plist_id, H5P_LINK_ACCESS) && @@ -4855,10 +4854,9 @@ done: * Purpose: Gets information about collective metadata read mode. * * Note: This routine accepts file access property lists, link - * access property lists, attribute access property lists, - * dataset access property lists, group access property lists, - * named datatype access property lists, - * and dataset transfer property lists. + * access property lists, attribute access property lists, + * dataset access property lists, group access property lists, + * and named datatype access property lists. * * Return: Non-negative on success/Negative on failure * @@ -4876,7 +4874,7 @@ H5Pget_all_coll_metadata_ops(hid_t plist_id, hbool_t *is_collective) H5TRACE2("e", "i*b", plist_id, is_collective); /* Compare the property list's class against the other class */ - /* (Dataset, group, attribute, and named datype access property lists + /* (Dataset, group, attribute, and named datatype access property lists * are sub-classes of link access property lists -QAK) */ if (TRUE != H5P_isa_class(plist_id, H5P_LINK_ACCESS) && @@ -599,14 +599,9 @@ H5Z__flush_file_cb(void *obj_ptr, hid_t H5_ATTR_UNUSED obj_id, void *key H5_ATTR /* Do a global flush if the file is opened for write */ if (H5F_ACC_RDWR & H5F_INTENT(f)) { -/* When parallel HDF5 is defined, check for collective metadata reads on this - * file and set the flag for metadata I/O in the API context. -QAK, 2018/02/14 - */ #ifdef H5_HAVE_PARALLEL /* Check if MPIO driver is used */ if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { - H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */ - /* Sanity check for collectively calling H5Zunregister, if requested */ /* (Sanity check assumes that a barrier on one file's comm * is sufficient (i.e. that there aren't different comms for @@ -626,13 +621,8 @@ H5Z__flush_file_cb(void *obj_ptr, hid_t H5_ATTR_UNUSED obj_id, void *key H5_ATTR /* Set the "sanity checked" flag */ object->sanity_checked = TRUE; } /* end if */ - - /* Check whether to use the collective metadata read DXPL */ - coll_md_read = H5F_COLL_MD_READ(f); - if (H5P_USER_TRUE == coll_md_read) - H5CX_set_coll_metadata_read(TRUE); - } /* end if */ -#endif /* H5_HAVE_PARALLEL */ + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ /* Call the flush routine for mounted file hierarchies */ if (H5F_flush_mounts((H5F_t *)obj_ptr) < 0) |