summaryrefslogtreecommitdiffstats
path: root/src/H5Dmpio.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/H5Dmpio.c')
-rw-r--r--src/H5Dmpio.c119
1 files changed, 88 insertions, 31 deletions
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 41d9bc0..a823729 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -805,10 +805,6 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
HDassert(type_info);
HDassert(fm);
- /* Disable collective metadata reads for chunked dataset I/O operations
- * in order to prevent potential hangs */
- H5CX_set_coll_metadata_read(FALSE);
-
/* Check the optional property list for the collective chunk IO optimization option */
if (H5CX_get_mpio_chunk_opt_mode(&chunk_opt_mode) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't get chunk optimization option")
@@ -2303,17 +2299,20 @@ static herr_t
H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
H5D_chunk_addr_info_t chunk_addr_info_array[], int sum_chunk)
{
- H5SL_node_t * chunk_node; /* Current node in chunk skip list */
- H5D_chunk_info_t *chunk_info; /* Current chunking info. of this node. */
- haddr_t chunk_addr; /* Current chunking address of this node */
- haddr_t *total_chunk_addr_array = NULL; /* The array of chunk address for the total number of chunk */
- hbool_t do_sort = FALSE; /* Whether the addresses need to be sorted */
- int bsearch_coll_chunk_threshold;
- int many_chunk_opt = H5D_OBTAIN_ONE_CHUNK_ADDR_IND;
- int mpi_size; /* Number of MPI processes */
- int mpi_code; /* MPI return code */
- int i; /* Local index variable */
- herr_t ret_value = SUCCEED; /* Return value */
+ H5SL_node_t * chunk_node; /* Current node in chunk skip list */
+ H5D_chunk_info_t *chunk_info; /* Current chunking info. of this node. */
+ haddr_t chunk_addr; /* Current chunking address of this node */
+ haddr_t *total_chunk_addr_array = NULL; /* The array of chunk address for the total number of chunk */
+ H5P_coll_md_read_flag_t md_reads_file_flag;
+ hbool_t md_reads_context_flag;
+ hbool_t restore_md_reads_state = FALSE;
+ hbool_t do_sort = FALSE; /* Whether the addresses need to be sorted */
+ int bsearch_coll_chunk_threshold;
+ int many_chunk_opt = H5D_OBTAIN_ONE_CHUNK_ADDR_IND;
+ int mpi_size; /* Number of MPI processes */
+ int mpi_code; /* MPI return code */
+ int i; /* Local index variable */
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_STATIC
@@ -2357,8 +2356,41 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
if (mpi_rank == 0) {
- if (H5D__chunk_addrmap(io_info, total_chunk_addr_array) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get chunk address")
+ herr_t result;
+
+ /*
+ * If enabled, disable collective metadata reads here.
+ * Since the chunk address mapping is done on rank 0
+ * only here, it will cause problems if collective
+ * metadata reads are enabled.
+ */
+ if (H5F_get_coll_metadata_reads(io_info->dset->oloc.file)) {
+ md_reads_file_flag = H5P_FORCE_FALSE;
+ md_reads_context_flag = FALSE;
+ H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag,
+ &md_reads_context_flag);
+ restore_md_reads_state = TRUE;
+ }
+
+ result = H5D__chunk_addrmap(io_info, total_chunk_addr_array);
+
+ /* Ensure that we restore the old collective metadata reads state */
+ if (restore_md_reads_state) {
+ H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag,
+ &md_reads_context_flag);
+ restore_md_reads_state = FALSE;
+ }
+
+ if (result < 0) {
+ size_t u;
+
+ /* Clear total chunk address array */
+ for (u = 0; u < (size_t)fm->layout->u.chunk.nchunks; u++)
+ total_chunk_addr_array[u] = HADDR_UNDEF;
+
+ /* Push error, but still participate in following MPI_Bcast */
+ HDONE_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get chunk address")
+ }
} /* end if */
/* Broadcasting the MPI_IO option info. and chunk address info. */
@@ -2413,6 +2445,10 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
} /* end if */
done:
+ /* Re-enable collective metadata reads if we disabled them */
+ if (restore_md_reads_state)
+ H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag, &md_reads_context_flag);
+
if (total_chunk_addr_array)
H5MM_xfree(total_chunk_addr_array);
@@ -2460,20 +2496,23 @@ static herr_t
H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assign_io_mode[],
haddr_t chunk_addr[])
{
- size_t total_chunks;
- unsigned percent_nproc_per_chunk, threshold_nproc_per_chunk;
- uint8_t * io_mode_info = NULL;
- uint8_t * recv_io_mode_info = NULL;
- uint8_t * mergebuf = NULL;
- uint8_t * tempbuf;
- H5SL_node_t * chunk_node;
- H5D_chunk_info_t *chunk_info;
- int mpi_size, mpi_rank;
- MPI_Comm comm;
- int root;
- size_t ic;
- int mpi_code;
- herr_t ret_value = SUCCEED;
+ size_t total_chunks;
+ unsigned percent_nproc_per_chunk, threshold_nproc_per_chunk;
+ uint8_t * io_mode_info = NULL;
+ uint8_t * recv_io_mode_info = NULL;
+ uint8_t * mergebuf = NULL;
+ uint8_t * tempbuf;
+ H5SL_node_t * chunk_node;
+ H5D_chunk_info_t * chunk_info;
+ H5P_coll_md_read_flag_t md_reads_file_flag;
+ hbool_t md_reads_context_flag;
+ hbool_t restore_md_reads_state = FALSE;
+ int mpi_size, mpi_rank;
+ MPI_Comm comm;
+ int root;
+ size_t ic;
+ int mpi_code;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_STATIC
@@ -2533,6 +2572,20 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig
size_t nproc;
unsigned *nproc_per_chunk;
+ /*
+ * If enabled, disable collective metadata reads here.
+ * Since the chunk address mapping is done on rank 0
+ * only here, it will cause problems if collective
+ * metadata reads are enabled.
+ */
+ if (H5F_get_coll_metadata_reads(io_info->dset->oloc.file)) {
+ md_reads_file_flag = H5P_FORCE_FALSE;
+ md_reads_context_flag = FALSE;
+ H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag,
+ &md_reads_context_flag);
+ restore_md_reads_state = TRUE;
+ }
+
/* pre-computing: calculate number of processes and
regularity of the selection occupied in each chunk */
if (NULL == (nproc_per_chunk = (unsigned *)H5MM_calloc(total_chunks * sizeof(unsigned))))
@@ -2599,6 +2652,10 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig
#endif
done:
+ /* Re-enable collective metadata reads if we disabled them */
+ if (restore_md_reads_state)
+ H5F_set_coll_metadata_reads(io_info->dset->oloc.file, &md_reads_file_flag, &md_reads_context_flag);
+
if (io_mode_info)
H5MM_free(io_mode_info);
if (mergebuf)