summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJordan Henderson <jhenderson@hdfgroup.org>2019-01-25 21:32:51 (GMT)
committerJordan Henderson <jhenderson@hdfgroup.org>2019-02-12 20:38:02 (GMT)
commita4cfd0bfd7be9cba5d9af02825358f898885c5c4 (patch)
tree7c9933648971055ebc24f7fa1e04285a5a2e13e8 /src
parent9f9485a17a87b075f122b654c1f8426d9114a6df (diff)
downloadhdf5-a4cfd0bfd7be9cba5d9af02825358f898885c5c4.zip
hdf5-a4cfd0bfd7be9cba5d9af02825358f898885c5c4.tar.gz
hdf5-a4cfd0bfd7be9cba5d9af02825358f898885c5c4.tar.bz2
Fix some collective metadata read issues
Diffstat (limited to 'src')
-rw-r--r--src/H5C.c55
-rw-r--r--src/H5CX.c4
-rw-r--r--src/H5Cimage.c4
-rw-r--r--src/H5Dchunk.c22
-rw-r--r--src/H5Dmpio.c6
-rw-r--r--src/H5Z.c2
6 files changed, 29 insertions, 64 deletions
diff --git a/src/H5C.c b/src/H5C.c
index 3e4f1ad..288f3db 100644
--- a/src/H5C.c
+++ b/src/H5C.c
@@ -1483,31 +1483,17 @@ H5C_insert_entry(H5F_t * f,
H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr)
#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
- coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE);
-
- /* If not explicitly disabled, get the cmdr setting from the API context */
- if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read)
- coll_access = H5CX_get_coll_metadata_read();
- } /* end if */
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI))
+ coll_access = H5CX_get_coll_metadata_read();
entry_ptr->coll_access = coll_access;
if(coll_access) {
H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, FAIL)
/* Make sure the size of the collective entries in the cache remain in check */
- if(H5P_USER_TRUE == f->coll_md_read) {
- if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) {
- if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries")
- } /* end if */
- } /* end if */
- else {
- if(cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100) {
- if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries")
- } /* end if */
- } /* end else */
+ if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100)
+ if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries")
} /* end if */
#endif
@@ -2243,13 +2229,8 @@ H5C_protect(H5F_t * f,
ring = H5CX_get_ring();
#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
- coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE);
-
- /* If not explicitly disabled, get the cmdr setting from the API context */
- if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read)
- coll_access = H5CX_get_coll_metadata_read();
- } /* end if */
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI))
+ coll_access = H5CX_get_coll_metadata_read();
#endif /* H5_HAVE_PARALLEL */
/* first check to see if the target is in cache */
@@ -2286,7 +2267,7 @@ H5C_protect(H5F_t * f,
the entry in their cache still have to participate in the
bcast. */
#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && coll_access) {
+ if(coll_access) {
if(!(entry_ptr->is_dirty) && !(entry_ptr->coll_access)) {
MPI_Comm comm; /* File MPI Communicator */
int mpi_code; /* MPI error code */
@@ -2601,21 +2582,11 @@ H5C_protect(H5F_t * f,
} /* end if */
#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
- /* Make sure the size of the collective entries in the cache remain in check */
- if(coll_access) {
- if(H5P_USER_TRUE == f->coll_md_read) {
- if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100)
- if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries")
- } /* end if */
- else {
- if(cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100)
- if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries")
- } /* end else */
- } /* end if */
- } /* end if */
+ /* Make sure the size of the collective entries in the cache remain in check */
+ if(coll_access)
+ if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100)
+ if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries")
#endif /* H5_HAVE_PARALLEL */
done:
diff --git a/src/H5CX.c b/src/H5CX.c
index 9ef2886..6b5e510 100644
--- a/src/H5CX.c
+++ b/src/H5CX.c
@@ -807,8 +807,8 @@ H5CX_set_apl(hid_t *acspl_id, const H5P_libclass_t *libclass,
#ifdef H5_HAVE_PARALLEL
/* If this routine is not guaranteed to be collective (i.e. it doesn't
- * modify the structural metadata in a file), check if we should use
- * a collective metadata read.
+ * modify the structural metadata in a file), check if the application
+ * specified a collective metadata read for just this operation.
*/
if(!is_collective) {
H5P_genplist_t *plist; /* Property list pointer */
diff --git a/src/H5Cimage.c b/src/H5Cimage.c
index 26b6506..298a2dd 100644
--- a/src/H5Cimage.c
+++ b/src/H5Cimage.c
@@ -1065,9 +1065,9 @@ H5C__read_cache_image(H5F_t *f, H5C_t *cache_ptr)
H5C__UPDATE_STATS_FOR_CACHE_IMAGE_READ(cache_ptr)
#ifdef H5_HAVE_PARALLEL
- if ( aux_ptr ) {
+ if ( aux_ptr ) {
- /* Broadcast cache image */
+ /* Broadcast cache image */
if ( MPI_SUCCESS !=
(mpi_result = MPI_Bcast(cache_ptr->image_buffer,
(int)cache_ptr->image_len, MPI_BYTE,
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index 3fa4a23..13aced6 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -2929,9 +2929,6 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled,
/* Check for cached information */
if(!H5D__chunk_cinfo_cache_found(&dset->shared->cache.chunk.last, udata)) {
H5D_chk_idx_info_t idx_info; /* Chunked index info */
-#ifdef H5_HAVE_PARALLEL
- H5P_coll_md_read_flag_t temp_cmr; /* Temp value to hold the coll metadata read setting */
-#endif /* H5_HAVE_PARALLEL */
/* Compose chunked index info struct */
idx_info.f = dset->oloc.file;
@@ -2940,25 +2937,18 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled,
idx_info.storage = &dset->shared->layout.storage.u.chunk;
#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI)) {
- /* disable collective metadata read for chunk indexes
- as it is highly unlikely that users would read the
- same chunks from all processes. MSC - might turn on
- for root node? */
- temp_cmr = H5F_COLL_MD_READ(idx_info.f);
- H5F_set_coll_md_read(idx_info.f, H5P_FORCE_FALSE);
- } /* end if */
+ /* Disable collective metadata read for chunk indexes as it is
+ * highly unlikely that users would read the same chunks from all
+ * processes.
+ */
+ if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI))
+ H5CX_set_coll_metadata_read(FALSE);
#endif /* H5_HAVE_PARALLEL */
/* Go get the chunk information */
if((dset->shared->layout.storage.u.chunk.ops->get_addr)(&idx_info, udata) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't query chunk address")
-#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI))
- H5F_set_coll_md_read(idx_info.f, temp_cmr);
-#endif /* H5_HAVE_PARALLEL */
-
/*
* Cache the information retrieved.
*
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 7352375..6803b60 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -800,6 +800,10 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
HDassert(type_info);
HDassert(fm);
+ /* Disable collective metadata reads for chunked dataset I/O operations
+ * in order to prevent potential hangs */
+ H5CX_set_coll_metadata_read(FALSE);
+
/* Check the optional property list for the collective chunk IO optimization option */
if(H5CX_get_mpio_chunk_opt_mode(&chunk_opt_mode) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't get chunk optimization option")
@@ -2313,7 +2317,7 @@ if(H5DEBUG(D))
/* Broadcasting the MPI_IO option info. and chunk address info. */
if(MPI_SUCCESS != (mpi_code = MPI_Bcast(total_chunk_addr_array, (int)(sizeof(haddr_t) * fm->layout->u.chunk.nchunks), MPI_BYTE, (int)0, io_info->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_BCast failed", mpi_code)
+ HMPI_GOTO_ERROR(FAIL, "MPI_BCast failed", mpi_code)
} /* end if */
/* Start at first node in chunk skip list */
diff --git a/src/H5Z.c b/src/H5Z.c
index 14b0400..baea0d1 100644
--- a/src/H5Z.c
+++ b/src/H5Z.c
@@ -603,7 +603,7 @@ H5Z__flush_file_cb(void *obj_ptr, hid_t H5_ATTR_UNUSED obj_id, void *key)
/* Sanity check for collectively calling H5Zunregister, if requested */
/* (Sanity check assumes that a barrier on one file's comm
* is sufficient (i.e. that there aren't different comms for
- * different files). -QAK, 2018/02/14
+ * different files). -QAK, 2018/02/14)
*/
if(H5_coll_api_sanity_check_g && !object->sanity_checked) {
MPI_Comm mpi_comm; /* File's communicator */