summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorScot Breitenfeld <brtnfld@hdfgroup.org>2019-01-07 23:08:33 (GMT)
committerScot Breitenfeld <brtnfld@hdfgroup.org>2019-01-07 23:08:33 (GMT)
commitd9b1ec3ce8672cd9c308f72baedc8a6f7bb9474c (patch)
tree32ff8b32b33a2947c27cbff5a05baee35c23d58b
parent5dfe00629588a54dbfb6f2d09dfbd88177e37cc2 (diff)
parentab5fe769ab711f736238abc89ef215a6ecff5f7e (diff)
downloadhdf5-d9b1ec3ce8672cd9c308f72baedc8a6f7bb9474c.zip
hdf5-d9b1ec3ce8672cd9c308f72baedc8a6f7bb9474c.tar.gz
hdf5-d9b1ec3ce8672cd9c308f72baedc8a6f7bb9474c.tar.bz2
Merge pull request #1439 in HDFFV/hdf5 from rank0_bcast to develop
* commit 'ab5fe769ab711f736238abc89ef215a6ecff5f7e': HDFFV-10625 -- Implemented a process-0 read and then broadcast for collective read of full (HS_ALL), contiguous, atomic datasets by all the processes in the file communicator. indent change changed logic statement in if Added chunking test, fixed issue with CX set Correct another git merge failure. Correct misplaced line from git merge. Updated and refined version of Scot's "rank 0 bcast" changes.
-rw-r--r--src/H5CX.c92
-rw-r--r--src/H5CXprivate.h3
-rw-r--r--src/H5Dio.c18
-rw-r--r--src/H5Dmpio.c115
-rw-r--r--src/H5Dprivate.h8
-rw-r--r--src/H5FDmpio.c43
-rw-r--r--src/H5Pdxpl.c3
-rw-r--r--src/H5Ppublic.h3
-rw-r--r--src/H5T.c106
-rw-r--r--src/H5Tprivate.h1
-rw-r--r--testpar/t_bigio.c4
-rw-r--r--testpar/t_dset.c2
-rw-r--r--testpar/t_mdset.c173
-rw-r--r--testpar/t_pread.c422
14 files changed, 869 insertions, 124 deletions
diff --git a/src/H5CX.c b/src/H5CX.c
index 1f91ee2..0d20132 100644
--- a/src/H5CX.c
+++ b/src/H5CX.c
@@ -198,6 +198,7 @@ typedef struct H5CX_t {
MPI_Datatype btype; /* MPI datatype for buffer, when using collective I/O */
MPI_Datatype ftype; /* MPI datatype for file, when using collective I/O */
hbool_t mpi_file_flushing; /* Whether an MPI-opened file is being flushed */
+ hbool_t rank0_bcast; /* Whether a dataset meets read-with-rank0-and-bcast requirements */
#endif /* H5_HAVE_PARALLEL */
/* Cached DXPL properties */
@@ -261,6 +262,8 @@ typedef struct H5CX_t {
hbool_t mpio_coll_chunk_multi_ratio_coll_set; /* Whether instrumented "collective chunk multi ratio coll" value is set */
int mpio_coll_chunk_multi_ratio_ind; /* Instrumented "collective chunk multi ratio ind" value (H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME) */
hbool_t mpio_coll_chunk_multi_ratio_ind_set; /* Whether instrumented "collective chunk multi ratio ind" value is set */
+ hbool_t mpio_coll_rank0_bcast; /* Instrumented "collective chunk multi ratio ind" value (H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME) */
+ hbool_t mpio_coll_rank0_bcast_set; /* Whether instrumented "collective chunk multi ratio ind" value is set */
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /* H5_HAVE_PARALLEL */
@@ -1254,6 +1257,32 @@ H5CX_get_mpi_file_flushing(void)
FUNC_LEAVE_NOAPI((*head)->ctx.mpi_file_flushing)
} /* end H5CX_get_mpi_file_flushing() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5CX_get_mpio_rank0_bcast
+ *
+ * Purpose: Retrieves if the dataset meets read-with-rank0-and-bcast requirements for the current API call context.
+ *
+ * Return: Non-negative on success / Negative on failure
+ *
+ * Programmer: M. Breitenfeld
+ * December 31, 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+hbool_t
+H5CX_get_mpio_rank0_bcast(void)
+{
+ H5CX_node_t **head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ /* Sanity check */
+ HDassert(head && *head);
+
+ FUNC_LEAVE_NOAPI((*head)->ctx.rank0_bcast)
+} /* end H5CX_get_mpio_rank0_bcast() */
#endif /* H5_HAVE_PARALLEL */
@@ -2185,6 +2214,34 @@ H5CX_set_mpi_file_flushing(hbool_t flushing)
FUNC_LEAVE_NOAPI_VOID
} /* end H5CX_set_mpi_file_flushing() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5CX_set_mpio_rank0_bcast
+ *
+ * Purpose: Sets the "dataset meets read-with-rank0-and-bcast requirements" flag for the current API call context.
+ *
+ * Return: <none>
+ *
+ * Programmer: M. Breitenfeld
+ * December 31, 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+void
+H5CX_set_mpio_rank0_bcast(hbool_t rank0_bcast)
+{
+ H5CX_node_t **head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ /* Sanity checks */
+ HDassert(head && *head);
+
+ (*head)->ctx.rank0_bcast = rank0_bcast;
+
+ FUNC_LEAVE_NOAPI_VOID
+} /* end H5CX_set_mpio_rank0_bcast() */
#endif /* H5_HAVE_PARALLEL */
@@ -2596,6 +2653,40 @@ H5CX_test_set_mpio_coll_chunk_multi_ratio_ind(int mpio_coll_chunk_multi_ratio_in
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5CX_test_set_mpio_coll_chunk_multi_ratio_ind() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5CX_test_set_mpio_coll_rank0_bcast
+ *
+ * Purpose: Sets the instrumented "read-with-rank0-bcast" flag for the current API call context.
+ *
+ * Note: Only sets value if property set in DXPL
+ *
+ * Return: Non-negative on success / Negative on failure
+ *
+ * Programmer: Quincey Koziol
+ * January 2, 2019
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5CX_test_set_mpio_coll_rank0_bcast(hbool_t mpio_coll_rank0_bcast)
+{
+ H5CX_node_t **head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ /* Sanity checks */
+ HDassert(head && *head);
+ HDassert(!((*head)->ctx.dxpl_id == H5P_DEFAULT ||
+ (*head)->ctx.dxpl_id == H5P_DATASET_XFER_DEFAULT));
+
+ H5CX_TEST_SET_PROP(H5D_XFER_COLL_RANK0_BCAST_NAME, mpio_coll_rank0_bcast)
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5CX_test_set_mpio_coll_rank0_bcast() */
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /* H5_HAVE_PARALLEL */
@@ -2640,6 +2731,7 @@ H5CX__pop_common(void)
H5CX_SET_PROP(H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME, mpio_coll_chunk_link_num_false)
H5CX_SET_PROP(H5D_XFER_COLL_CHUNK_MULTI_RATIO_COLL_NAME, mpio_coll_chunk_multi_ratio_coll)
H5CX_SET_PROP(H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME, mpio_coll_chunk_multi_ratio_ind)
+ H5CX_SET_PROP(H5D_XFER_COLL_RANK0_BCAST_NAME, mpio_coll_rank0_bcast)
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5CXprivate.h b/src/H5CXprivate.h
index 46289c4..46d25d0 100644
--- a/src/H5CXprivate.h
+++ b/src/H5CXprivate.h
@@ -77,6 +77,7 @@ H5_DLL H5AC_ring_t H5CX_get_ring(void);
H5_DLL hbool_t H5CX_get_coll_metadata_read(void);
H5_DLL herr_t H5CX_get_mpi_coll_datatypes(MPI_Datatype *btype, MPI_Datatype *ftype);
H5_DLL hbool_t H5CX_get_mpi_file_flushing(void);
+H5_DLL hbool_t H5CX_get_mpio_rank0_bcast(void);
#endif /* H5_HAVE_PARALLEL */
/* "Getter" routines for DXPL properties cached in API context */
@@ -112,6 +113,7 @@ H5_DLL void H5CX_set_coll_metadata_read(hbool_t cmdr);
H5_DLL herr_t H5CX_set_mpi_coll_datatypes(MPI_Datatype btype, MPI_Datatype ftype);
H5_DLL herr_t H5CX_set_mpio_coll_opt(H5FD_mpio_collective_opt_t mpio_coll_opt);
H5_DLL void H5CX_set_mpi_file_flushing(hbool_t flushing);
+H5_DLL void H5CX_set_mpio_rank0_bcast(hbool_t rank0_bcast);
#endif /* H5_HAVE_PARALLEL */
/* "Setter" routines for DXPL properties cached in API context */
@@ -137,6 +139,7 @@ H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_link_num_true(int mpio_coll_chunk_li
H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_link_num_false(int mpio_coll_chunk_link_num_false);
H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_multi_ratio_coll(int mpio_coll_chunk_multi_ratio_coll);
H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_multi_ratio_ind(int mpio_coll_chunk_multi_ratio_ind);
+H5_DLL herr_t H5CX_test_set_mpio_coll_rank0_bcast(hbool_t rank0_bcast);
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Dio.c b/src/H5Dio.c
index fe85d23..6062dff 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -657,22 +657,12 @@ H5D__write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
/* Various MPI based checks */
#ifdef H5_HAVE_PARALLEL
- if H5F_HAS_FEATURE(dataset->oloc.file, H5FD_FEAT_HAS_MPI) {
- /* If MPI based VFD is used, no VL datatype support yet. */
+ if(H5F_HAS_FEATURE(dataset->oloc.file, H5FD_FEAT_HAS_MPI)) {
+ /* If MPI based VFD is used, no VL or region reference datatype support yet. */
/* This is because they use the global heap in the file and we don't */
/* support parallel access of that yet */
- if(H5T_detect_class(type_info.mem_type, H5T_VLEN, FALSE) > 0)
- HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet")
-
- /* If MPI based VFD is used, no VL datatype support yet. */
- /* This is because they use the global heap in the file and we don't */
- /* support parallel access of that yet */
- /* We should really use H5T_detect_class() here, but it will be difficult
- * to detect the type of the reference if it is nested... -QAK
- */
- if(H5T_get_class(type_info.mem_type, TRUE) == H5T_REFERENCE &&
- H5T_get_ref_type(type_info.mem_type) == H5R_DATASET_REGION)
- HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet")
+ if(H5T_is_vl_storage(type_info.mem_type) > 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL or region reference datatypes yet")
} /* end if */
else {
H5FD_mpio_xfer_t io_xfer_mode; /* MPI I/O transfer mode */
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 2c06800..f5da33d 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -37,7 +37,6 @@
#include "H5Eprivate.h" /* Error handling */
#include "H5Fprivate.h" /* File access */
#include "H5FDprivate.h" /* File drivers */
-#include "H5FDmpi.h" /* MPI-based file drivers */
#include "H5Iprivate.h" /* IDs */
#include "H5MMprivate.h" /* Memory management */
#include "H5Oprivate.h" /* Object headers */
@@ -89,10 +88,20 @@
/******************/
/* Combine chunk address and chunk info into a struct for better performance. */
typedef struct H5D_chunk_addr_info_t {
- haddr_t chunk_addr;
- H5D_chunk_info_t chunk_info;
+ haddr_t chunk_addr;
+ H5D_chunk_info_t chunk_info;
} H5D_chunk_addr_info_t;
+/* Rank 0 Bcast values */
+typedef enum H5D_mpio_no_rank0_bcast_cause_t {
+ H5D_MPIO_RANK0_BCAST = 0x00,
+ H5D_MPIO_RANK0_NOT_H5S_ALL = 0x01,
+ H5D_MPIO_RANK0_NOT_CONTIGUOUS = 0x02,
+ H5D_MPIO_RANK0_NOT_FIXED_SIZE = 0x04,
+ H5D_MPIO_RANK0_GREATER_THAN_2GB = 0x08
+} H5D_mpio_no_rank0_bcast_cause_t;
+
+
/*
* Information about a single chunk when performing collective filtered I/O. All
* of the fields of one of these structs are initialized at the start of collective
@@ -281,8 +290,10 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
const H5S_t *mem_space, const H5D_type_info_t *type_info)
{
H5FD_mpio_xfer_t io_xfer_mode; /* MPI I/O transfer mode */
- unsigned local_cause = 0; /* Local reason(s) for breaking collective mode */
- unsigned global_cause = 0; /* Global reason(s) for breaking collective mode */
+ unsigned local_cause[2] = {0,0}; /* [0] Local reason(s) for breaking collective mode */
+ /* [1] Flag if dataset is both: H5S_ALL and small */
+ unsigned global_cause[2] = {0,0}; /* Global reason(s) for breaking collective mode */
+ htri_t is_vl_storage; /* Whether the dataset's datatype is stored in a variable-length form */
htri_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_PACKAGE
@@ -296,36 +307,37 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
/* For independent I/O, get out quickly and don't try to form consensus */
if(H5CX_get_io_xfer_mode(&io_xfer_mode) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
+ /* Set error flag, but keep going */
+ local_cause[0] |= H5D_MPIO_ERROR_WHILE_CHECKING_COLLECTIVE_POSSIBLE;
if(io_xfer_mode == H5FD_MPIO_INDEPENDENT)
- local_cause |= H5D_MPIO_SET_INDEPENDENT;
+ local_cause[0] |= H5D_MPIO_SET_INDEPENDENT;
/* Optimized MPI types flag must be set */
/* (based on 'HDF5_MPI_OPT_TYPES' environment variable) */
if(!H5FD_mpi_opt_types_g)
- local_cause |= H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED;
+ local_cause[0] |= H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED;
/* Don't allow collective operations if datatype conversions need to happen */
if(!type_info->is_conv_noop)
- local_cause |= H5D_MPIO_DATATYPE_CONVERSION;
+ local_cause[0] |= H5D_MPIO_DATATYPE_CONVERSION;
/* Don't allow collective operations if data transform operations should occur */
if(!type_info->is_xform_noop)
- local_cause |= H5D_MPIO_DATA_TRANSFORMS;
+ local_cause[0] |= H5D_MPIO_DATA_TRANSFORMS;
/* Check whether these are both simple or scalar dataspaces */
if(!((H5S_SIMPLE == H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(mem_space))
&& (H5S_SIMPLE == H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(file_space))))
- local_cause |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES;
+ local_cause[0] |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES;
/* Dataset storage must be contiguous or chunked */
if(!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS ||
io_info->dset->shared->layout.type == H5D_CHUNKED))
- local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
+ local_cause[0] |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
/* check if external-file storage is used */
if(io_info->dset->shared->dcpl_cache.efl.nused > 0)
- local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
+ local_cause[0] |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
/* The handling of memory space is different for chunking and contiguous
* storage. For contiguous storage, mem_space and file_space won't change
@@ -340,31 +352,84 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
* is less than 3. The functions needed (MPI_Mprobe and MPI_Imrecv) will
* not be available.
*/
- if (io_info->op_type == H5D_IO_OP_WRITE &&
- io_info->dset->shared->layout.type == H5D_CHUNKED &&
- io_info->dset->shared->dcpl_cache.pline.nused > 0)
- local_cause |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED;
+ if(io_info->op_type == H5D_IO_OP_WRITE &&
+ io_info->dset->shared->layout.type == H5D_CHUNKED &&
+ io_info->dset->shared->dcpl_cache.pline.nused > 0)
+ local_cause[0] |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED;
#endif
+ /* Check if we are able to do a MPI_Bcast of the data from one rank
+ * instead of having all the processes involved in the collective I/O call.
+ */
+
+ /* Check to see if the process is reading the entire dataset */
+ if(H5S_GET_SELECT_TYPE(file_space) != H5S_SEL_ALL)
+ local_cause[1] |= H5D_MPIO_RANK0_NOT_H5S_ALL;
+ /* Only perform this optimization for contigous datasets, currently */
+ else if(H5D_CONTIGUOUS != io_info->dset->shared->layout.type)
+ /* Flag to do a MPI_Bcast of the data from one proc instead of
+ * having all the processes involved in the collective I/O.
+ */
+ local_cause[1] |= H5D_MPIO_RANK0_NOT_CONTIGUOUS;
+ else if((is_vl_storage = H5T_is_vl_storage(type_info->dset_type)) < 0)
+ local_cause[0] |= H5D_MPIO_ERROR_WHILE_CHECKING_COLLECTIVE_POSSIBLE;
+ else if(is_vl_storage)
+ local_cause[1] |= H5D_MPIO_RANK0_NOT_FIXED_SIZE;
+ else {
+ size_t type_size; /* Size of dataset's datatype */
+
+ /* Retrieve the size of the dataset's datatype */
+ if(0 == (type_size = H5T_GET_SIZE(type_info->dset_type)))
+ local_cause[0] |= H5D_MPIO_ERROR_WHILE_CHECKING_COLLECTIVE_POSSIBLE;
+ else {
+ hssize_t snelmts; /* [Signed] # of elements in dataset's dataspace */
+
+ /* Retrieve the size of the dataset's datatype */
+ if((snelmts = H5S_GET_EXTENT_NPOINTS(file_space)) < 0)
+ local_cause[0] |= H5D_MPIO_ERROR_WHILE_CHECKING_COLLECTIVE_POSSIBLE;
+ else {
+ hsize_t dset_size;
+
+ /* Determine dataset size */
+ dset_size = ((hsize_t)snelmts) * type_size;
+
+ /* If the size of the dataset is less than 2GB then do an MPI_Bcast
+ * of the data from one process instead of having all the processes
+ * involved in the collective I/O.
+ */
+ if(dset_size > ((hsize_t)(2.0F * H5_GB) - 1))
+ local_cause[1] |= H5D_MPIO_RANK0_GREATER_THAN_2GB;
+ } /* end else */
+ } /* end else */
+ } /* end else */
+
/* Check for independent I/O */
- if(local_cause & H5D_MPIO_SET_INDEPENDENT)
- global_cause = local_cause;
+ if(local_cause[0] & H5D_MPIO_SET_INDEPENDENT)
+ global_cause[0] = local_cause[0];
else {
int mpi_code; /* MPI error code */
/* Form consensus opinion among all processes about whether to perform
* collective I/O
*/
- if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_UNSIGNED, MPI_BOR, io_info->comm)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 2, MPI_UNSIGNED, MPI_BOR, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
} /* end else */
/* Set the local & global values of no-collective-cause in the API context */
- H5CX_set_mpio_local_no_coll_cause(local_cause);
- H5CX_set_mpio_global_no_coll_cause(global_cause);
+ H5CX_set_mpio_local_no_coll_cause(local_cause[0]);
+ H5CX_set_mpio_global_no_coll_cause(global_cause[0]);
+
+ /* Set read-with-rank0-and-bcast flag if possible */
+ if(global_cause[0] == 0 && global_cause[1] == 0) {
+ H5CX_set_mpio_rank0_bcast(TRUE);
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ H5CX_test_set_mpio_coll_rank0_bcast(TRUE);
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+ } /* end if */
/* Set the return value, based on the global cause */
- ret_value = global_cause > 0 ? FALSE : TRUE;
+ ret_value = global_cause[0] > 0 ? FALSE : TRUE;
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -3069,8 +3134,8 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
chunk_entry->chunk_states.new_chunk.length = chunk_entry->chunk_states.chunk_current.length;
/* Currently, these chunk reads are done independently and will likely
- * cause issues with collective metadata reads enabled. In the future,
- * this should be refactored to use collective chunk reads - JTH */
+ * cause issues with collective metadata reads enabled. In the future,
+ * this should be refactored to use collective chunk reads - JTH */
/* Get the original state of parallel I/O transfer mode */
if(H5CX_get_io_xfer_mode(&xfer_mode) < 0)
diff --git a/src/H5Dprivate.h b/src/H5Dprivate.h
index aaa3db2..6fb7889 100644
--- a/src/H5Dprivate.h
+++ b/src/H5Dprivate.h
@@ -95,7 +95,13 @@
/* Definitions for all collective chunk instrumentation properties */
#define H5D_XFER_COLL_CHUNK_SIZE sizeof(unsigned)
#define H5D_XFER_COLL_CHUNK_DEF 1
-#define H5D_XFER_COLL_CHUNK_FIX 0
+
+/* General collective I/O instrumentation properties */
+#define H5D_XFER_COLL_RANK0_BCAST_NAME "coll_rank0_bcast"
+
+/* Definitions for general collective I/O instrumentation properties */
+#define H5D_XFER_COLL_RANK0_BCAST_SIZE sizeof(hbool_t)
+#define H5D_XFER_COLL_RANK0_BCAST_DEF FALSE
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
/* Default temporary buffer size */
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index d160858..3ab90aa 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -1354,6 +1354,7 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
int n;
#endif
hbool_t use_view_this_time = FALSE;
+ hbool_t rank0_bcast = FALSE; /* If read-with-rank0-and-bcast flag was used */
herr_t ret_value = SUCCEED;
FUNC_ENTER_STATIC
@@ -1437,8 +1438,25 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
if(H5FD_mpio_Debug[(int)'r'])
HDfprintf(stdout, "%s: doing MPI collective IO\n", FUNC);
#endif
- if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
+ /* Check whether we should read from rank 0 and broadcast to other ranks */
+ if(H5CX_get_mpio_rank0_bcast()) {
+#ifdef H5FDmpio_DEBUG
+ if(H5FD_mpio_Debug[(int)'r'])
+ HDfprintf(stdout, "%s: doing read-rank0-and-MPI_Bcast\n", FUNC);
+#endif
+ /* Indicate path we've taken */
+ rank0_bcast = TRUE;
+
+ /* Read on rank 0 Bcast to other ranks */
+ if(file->mpi_rank == 0)
+ if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(buf, size_i, buf_type, 0, file->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ } /* end if */
+ else
+ if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
} /* end if */
else {
#ifdef H5FDmpio_DEBUG
@@ -1460,13 +1478,26 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
- /* How many bytes were actually read? */
+ /* Only retrieve bytes read if this rank _actually_ participated in I/O */
+ if(!rank0_bcast || (rank0_bcast && file->mpi_rank == 0) ) {
+ /* How many bytes were actually read? */
#if MPI_VERSION >= 3
- if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
#else
- if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
#endif
- HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+ } /* end if */
+
+ /* If the rank0-bcast feature was used, broadcast the # of bytes read to
+ * other ranks, which didn't perform any I/O.
+ */
+ /* NOTE: This could be optimized further to be combined with the broadcast
+ * of the data. (QAK - 2019/1/2)
+ */
+ if(rank0_bcast)
+ if(MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_LONG_LONG, 0, file->comm))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0)
/* Get the type's size */
#if MPI_VERSION >= 3
diff --git a/src/H5Pdxpl.c b/src/H5Pdxpl.c
index bfc1d93..8338d84 100644
--- a/src/H5Pdxpl.c
+++ b/src/H5Pdxpl.c
@@ -2017,6 +2017,7 @@ done:
FUNC_LEAVE_API(ret_value)
} /* end H5Pget_mpio_actual_io_mode() */
+
/*-------------------------------------------------------------------------
* Function: H5Pget_mpio_no_collective_cause
*
@@ -2053,8 +2054,6 @@ H5Pget_mpio_no_collective_cause(hid_t plist_id, uint32_t *local_no_collective_ca
done:
FUNC_LEAVE_API(ret_value)
} /* end H5Pget_mpio_no_collective_cause() */
-
-
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h
index 2f094ea..078fe74 100644
--- a/src/H5Ppublic.h
+++ b/src/H5Ppublic.h
@@ -167,7 +167,8 @@ typedef enum H5D_mpio_no_collective_cause_t {
H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES = 0x10,
H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET = 0x20,
H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED = 0x40,
- H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE = 0x80
+ H5D_MPIO_ERROR_WHILE_CHECKING_COLLECTIVE_POSSIBLE = 0x80,
+ H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE = 0x100
} H5D_mpio_no_collective_cause_t;
/********************/
diff --git a/src/H5T.c b/src/H5T.c
index 01ace87..9544488 100644
--- a/src/H5T.c
+++ b/src/H5T.c
@@ -295,6 +295,7 @@ static htri_t H5T__compiler_conv(H5T_t *src, H5T_t *dst);
static herr_t H5T__set_size(H5T_t *dt, size_t size);
static herr_t H5T__close_cb(H5T_t *dt);
static H5T_path_t *H5T__path_find_real(const H5T_t *src, const H5T_t *dst, const char *name, H5T_conv_func_t *conv);
+static hbool_t H5T__detect_reg_ref(const H5T_t *dt);
/*****************************/
@@ -5506,6 +5507,111 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5T_detect_reg_ref
+ *
+ * Purpose: Check whether a datatype contains (or is) a region reference
+ * datatype.
+ *
+ * Return: TRUE (1) or FALSE (0) on success
+ * (Can't fail)
+ *
+ * Programmer: Quincey Koziol
+ * Saturday, January 5, 2019
+ *
+ *-------------------------------------------------------------------------
+ */
+static hbool_t
+H5T__detect_reg_ref(const H5T_t *dt)
+{
+ unsigned u; /* Local index variable */
+ hbool_t ret_value = FALSE; /* Return value */
+
+ FUNC_ENTER_STATIC_NOERR
+
+ /* Sanity checks */
+ HDassert(dt);
+
+ /* Check if this datatype is a region reference */
+ if(H5T_REFERENCE == dt->shared->type && H5R_DATASET_REGION == dt->shared->u.atomic.u.r.rtype)
+ HGOTO_DONE(TRUE);
+
+ /* Check for types that might have the correct type as a component */
+ switch(dt->shared->type) {
+ case H5T_COMPOUND:
+ /* Iterate over all the compound datatype's fields */
+ for(u = 0; u < dt->shared->u.compnd.nmembs; u++)
+ /* Recurse on field's datatype */
+ if(H5T__detect_reg_ref(dt->shared->u.compnd.memb[u].type))
+ HGOTO_DONE(TRUE);
+ break;
+
+ case H5T_ARRAY:
+ case H5T_VLEN:
+ case H5T_ENUM:
+ HGOTO_DONE(H5T__detect_reg_ref(dt->shared->parent));
+ break;
+
+ case H5T_NO_CLASS:
+ case H5T_INTEGER:
+ case H5T_FLOAT:
+ case H5T_TIME:
+ case H5T_STRING:
+ case H5T_BITFIELD:
+ case H5T_OPAQUE:
+ case H5T_REFERENCE:
+ case H5T_NCLASSES:
+ default:
+ break;
+ } /* end if */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5T__detect_reg_ref() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5T_is_vl_storage
+ *
+ * Purpose: Check if a datatype will be stored in a variable-length form.
+ *
+ * Notes: Currently, only variable-length string & sequences and region
+ * references are stored in a variable-length form.
+ *
+ * Return:
+ * One of two values on success:
+ * TRUE - If the datatype will be stored in a variable-length form
+ * FALSE - If the datatype will NOT be stored in a variable-length form
+ * <0 is returned on failure
+ *
+ * Programmer: Quincey Koziol
+ * Saturday, January 5, 2019
+ *
+ *-------------------------------------------------------------------------
+ */
+htri_t
+H5T_is_vl_storage(const H5T_t *dt)
+{
+ htri_t ret_value = FALSE;
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity check */
+ HDassert(dt);
+
+ /* VL and region reference datatypes are stored in variable-length form */
+ if(H5T_detect_class(dt, H5T_VLEN, FALSE))
+ ret_value = TRUE;
+ else if(H5T_detect_class(dt, H5T_REFERENCE, FALSE))
+ ret_value = H5T__detect_reg_ref(dt);
+ else
+ ret_value = FALSE;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5T_is_vl_storage() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5T_upgrade_version_cb
*
* Purpose: H5T__visit callback to Upgrade the version of a datatype
diff --git a/src/H5Tprivate.h b/src/H5Tprivate.h
index 6b6446f..3dcbb2c 100644
--- a/src/H5Tprivate.h
+++ b/src/H5Tprivate.h
@@ -145,6 +145,7 @@ H5_DLL H5T_t *H5T_get_actual_type(H5T_t *dt);
H5_DLL herr_t H5T_save_refresh_state(hid_t tid, struct H5O_shared_t *cached_H5O_shared);
H5_DLL herr_t H5T_restore_refresh_state(hid_t tid, struct H5O_shared_t *cached_H5O_shared);
H5_DLL hbool_t H5T_already_vol_managed(const H5T_t *dt);
+H5_DLL htri_t H5T_is_vl_storage(const H5T_t *dt);
/* Reference specific functions */
H5_DLL H5R_type_t H5T_get_ref_type(const H5T_t *dt);
diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c
index fdd3488..1d882b8 100644
--- a/testpar/t_bigio.c
+++ b/testpar/t_bigio.c
@@ -671,7 +671,7 @@ dataset_big_write(void)
/* create a memory dataspace independently */
mem_dataspace = H5Screate_simple (RANK, dims, NULL);
VRFY((mem_dataspace >= 0), "");
- if(!mpi_rank == 0) {
+ if(mpi_rank != 0) {
ret = H5Sselect_none(mem_dataspace);
VRFY((ret >= 0), "H5Sset_none succeeded");
}
@@ -980,7 +980,7 @@ dataset_big_read(void)
/* create a memory dataspace independently */
mem_dataspace = H5Screate_simple (RANK, dims, NULL);
VRFY((mem_dataspace >= 0), "");
- if(!mpi_rank == 0) {
+ if(mpi_rank != 0) {
ret = H5Sselect_none(mem_dataspace);
VRFY((ret >= 0), "H5Sset_none succeeded");
}
diff --git a/testpar/t_dset.c b/testpar/t_dset.c
index 281d027..35501d8 100644
--- a/testpar/t_dset.c
+++ b/testpar/t_dset.c
@@ -2649,7 +2649,7 @@ compress_readAll(void)
/* Try reading the data */
ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read);
- VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+ VRFY((ret >= 0), "H5Dread succeeded");
/* Verify data read */
for(u=0; u<dim; u++)
diff --git a/testpar/t_mdset.c b/testpar/t_mdset.c
index 5d989bb..16eb13c 100644
--- a/testpar/t_mdset.c
+++ b/testpar/t_mdset.c
@@ -12,6 +12,7 @@
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#include "testphdf5.h"
+#include "H5Dprivate.h"
#define DIM 2
#define SIZE 32
@@ -311,13 +312,27 @@ void compact_dataset(void)
VRFY((ret>= 0),"set independent IO collectively succeeded");
}
-
dataset = H5Dopen2(iof, dname, H5P_DEFAULT);
VRFY((dataset >= 0), "H5Dopen2 succeeded");
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ hbool_t prop_value;
+ prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
+ ret = H5Pinsert2(dxpl, H5D_XFER_COLL_RANK0_BCAST_NAME, H5D_XFER_COLL_RANK0_BCAST_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((ret >= 0), "H5Pinsert2() succeeded");
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
ret = H5Dread(dataset, H5T_NATIVE_DOUBLE, H5S_ALL, H5S_ALL, dxpl, inme);
VRFY((ret >= 0), "H5Dread succeeded");
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ prop_value = FALSE;
+ ret = H5Pget(dxpl, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value);
+ VRFY((ret >= 0), "H5Pget succeeded");
+ VRFY((prop_value == FALSE && dxfer_coll_type == DXFER_COLLECTIVE_IO),"rank 0 Bcast optimization was performed for a compact dataset");
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
/* Verify data value */
for(i = 0; i < size; i++)
for(j = 0; j < size; j++)
@@ -603,8 +618,8 @@ void dataset_fillvalue(void)
hsize_t req_count[4] = {1, 6, 7, 8};
hsize_t dset_size; /* Dataset size */
int *rdata, *wdata; /* Buffers for data to read and write */
- int *twdata, *trdata; /* Temporary pointer into buffer */
- int acc, i, j, k, l; /* Local index variables */
+ int *twdata, *trdata; /* Temporary pointer into buffer */
+ int acc, i, j, k, l, ii; /* Local index variables */
herr_t ret; /* Generic return value */
const char *filename;
@@ -645,27 +660,60 @@ void dataset_fillvalue(void)
/*
* Read dataset before any data is written.
*/
- /* set entire read buffer with the constant 2 */
- HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
- /* Independently read the entire dataset back */
- ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
- VRFY((ret >= 0), "H5Dread succeeded");
- /* Verify all data read are the fill value 0 */
- trdata = rdata;
- err_num = 0;
- for(i = 0; i < (int)dset_dims[0]; i++)
+ /* Create DXPL for I/O */
+ dxpl = H5Pcreate(H5P_DATASET_XFER);
+ VRFY((dxpl >= 0), "H5Pcreate succeeded");
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ hbool_t prop_value;
+ prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
+ ret = H5Pinsert2(dxpl, H5D_XFER_COLL_RANK0_BCAST_NAME, H5D_XFER_COLL_RANK0_BCAST_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((ret >= 0),"testing property list inserted succeeded");
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ for(ii = 0; ii < 2; ii++) {
+
+ if(ii == 0)
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT);
+ else
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+
+ /* set entire read buffer with the constant 2 */
+ HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
+
+ /* Read the entire dataset back */
+ ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl, rdata);
+ VRFY((ret >= 0), "H5Dread succeeded");
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ prop_value = FALSE;
+ ret = H5Pget(dxpl, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value);
+ VRFY((ret >= 0), "testing property list get succeeded");
+ if(ii == 0)
+ VRFY((prop_value == FALSE), "correctly handled rank 0 Bcast");
+ else
+ VRFY((prop_value == TRUE), "correctly handled rank 0 Bcast");
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* Verify all data read are the fill value 0 */
+ trdata = rdata;
+ err_num = 0;
+ for(i = 0; i < (int)dset_dims[0]; i++)
for(j = 0; j < (int)dset_dims[1]; j++)
- for(k = 0; k < (int)dset_dims[2]; k++)
- for(l = 0; l < (int)dset_dims[3]; l++, twdata++, trdata++)
- if(*trdata != 0)
- if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
- printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i, j, k, l, *trdata);
- if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
+ for(k = 0; k < (int)dset_dims[2]; k++)
+ for(l = 0; l < (int)dset_dims[3]; l++, twdata++, trdata++)
+ if(*trdata != 0)
+ if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
+ printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i, j, k, l, *trdata);
+ if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
printf("[more errors ...]\n");
- if(err_num){
+ if(err_num) {
printf("%d errors found in check_value\n", err_num);
- nerrors++;
+ nerrors++;
+ }
}
/* Barrier to ensure all processes have completed the above test. */
@@ -681,10 +729,6 @@ void dataset_fillvalue(void)
ret = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, req_start, NULL, req_count, NULL);
VRFY((ret >= 0), "H5Sselect_hyperslab succeeded on memory dataspace");
- /* Create DXPL for collective I/O */
- dxpl = H5Pcreate(H5P_DATASET_XFER);
- VRFY((dxpl >= 0), "H5Pcreate succeeded");
-
ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
@@ -711,37 +755,64 @@ void dataset_fillvalue(void)
/*
* Read dataset after partial write.
*/
- /* set entire read buffer with the constant 2 */
- HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
- /* Independently read the entire dataset back */
- ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
- VRFY((ret >= 0), "H5Dread succeeded");
- /* Verify correct data read */
- twdata=wdata;
- trdata=rdata;
- err_num=0;
- for(i=0; i<(int)dset_dims[0]; i++)
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
+ ret = H5Pset(dxpl, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value);
+ VRFY((ret >= 0), " H5Pset succeeded");
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ for(ii = 0; ii < 2; ii++) {
+
+ if(ii == 0)
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT);
+ else
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+
+ /* set entire read buffer with the constant 2 */
+ HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
+
+ /* Read the entire dataset back */
+ ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl, rdata);
+ VRFY((ret >= 0), "H5Dread succeeded");
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ prop_value = FALSE;
+ ret = H5Pget(dxpl, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value);
+ VRFY((ret >= 0), "testing property list get succeeded");
+ if(ii == 0)
+ VRFY((prop_value == FALSE), "correctly handled rank 0 Bcast");
+ else
+ VRFY((prop_value == TRUE), "correctly handled rank 0 Bcast");
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* Verify correct data read */
+ twdata=wdata;
+ trdata=rdata;
+ err_num=0;
+ for(i=0; i<(int)dset_dims[0]; i++)
for(j=0; j<(int)dset_dims[1]; j++)
- for(k=0; k<(int)dset_dims[2]; k++)
- for(l=0; l<(int)dset_dims[3]; l++, twdata++, trdata++)
- if(i<mpi_size) {
- if(*twdata != *trdata )
- if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
- printf("Dataset Verify failed at [%d][%d][%d][%d]: expect %d, got %d\n", i,j,k,l, *twdata, *trdata);
- } /* end if */
- else {
- if(*trdata != 0)
- if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
- printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i,j,k,l, *trdata);
- } /* end else */
- if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
+ for(k=0; k<(int)dset_dims[2]; k++)
+ for(l=0; l<(int)dset_dims[3]; l++, twdata++, trdata++)
+ if(i<mpi_size) {
+ if(*twdata != *trdata )
+ if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
+ printf("Dataset Verify failed at [%d][%d][%d][%d]: expect %d, got %d\n", i,j,k,l, *twdata, *trdata);
+ } /* end if */
+ else {
+ if(*trdata != 0)
+ if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
+ printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i,j,k,l, *trdata);
+ } /* end else */
+ if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
printf("[more errors ...]\n");
- if(err_num){
+ if(err_num){
printf("%d errors found in check_value\n", err_num);
- nerrors++;
+ nerrors++;
+ }
}
-
+
/* Close all file objects */
ret = H5Dclose(dataset);
VRFY((ret >= 0), "H5Dclose succeeded");
@@ -856,7 +927,7 @@ void collective_group_write(void)
if(!((m+1) % 10)) {
printf("created %d groups\n", m+1);
MPI_Barrier(MPI_COMM_WORLD);
- }
+ }
#endif /* BARRIER_CHECKS */
}
diff --git a/testpar/t_pread.c b/testpar/t_pread.c
index 0905d44..74feeb6 100644
--- a/testpar/t_pread.c
+++ b/testpar/t_pread.c
@@ -17,6 +17,7 @@
*/
#include "testpar.h"
+#include "H5Dprivate.h"
/* The collection of files is included below to aid
* an external "cleanup" process if required.
@@ -34,6 +35,8 @@ const char *FILENAMES[NFILENAME + 1]={"reloc_t_pread_data_file",
#define COUNT 1000
+#define LIMIT_NPROC 6
+
hbool_t pass = true;
static const char *random_hdf5_text =
"Now is the time for all first-time-users of HDF5 to read their \
@@ -46,7 +49,7 @@ completely foolproof is to underestimate the ingenuity of complete\n\
fools.\n";
static int generate_test_file(MPI_Comm comm, int mpi_rank, int group);
-static int test_parallel_read(MPI_Comm comm, int mpi_rank, int group);
+static int test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group);
static char *test_argv0 = NULL;
@@ -108,6 +111,9 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
hid_t fapl_id = -1;
hid_t dxpl_id = -1;
hid_t dset_id = -1;
+ hid_t dset_id_ch = -1;
+ hid_t dcpl_id = H5P_DEFAULT;
+ hsize_t chunk[1];
float nextValue;
float *data_slice = NULL;
@@ -272,6 +278,55 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
}
}
+
+ /* create a chunked dataset */
+ chunk[0] = COUNT/8;
+
+ if ( pass ) {
+ if ( (dcpl_id = H5Pcreate (H5P_DATASET_CREATE)) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Pcreate() failed.\n";
+ }
+ }
+
+ if ( pass ) {
+ if ( (H5Pset_chunk (dcpl_id, 1, chunk) ) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Pset_chunk() failed.\n";
+ }
+ }
+
+ if ( pass ) {
+
+ if ( (dset_id_ch = H5Dcreate2(file_id, "dataset0_chunked", H5T_NATIVE_FLOAT,
+ filespace, H5P_DEFAULT, dcpl_id,
+ H5P_DEFAULT)) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Dcreate2() failed.\n";
+ }
+ }
+
+ if ( pass ) {
+ if ( (H5Dwrite(dset_id_ch, H5T_NATIVE_FLOAT, memspace,
+ filespace, dxpl_id, data_slice)) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Dwrite() failed.\n";
+ }
+ }
+ if ( pass || (dcpl_id != -1)) {
+ if ( H5Pclose(dcpl_id) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Pclose(dcpl_id) failed.\n";
+ }
+ }
+
+ if ( pass || (dset_id_ch != -1)) {
+ if ( H5Dclose(dset_id_ch) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Dclose(dset_id_ch) failed.\n";
+ }
+ }
+
/* close file, etc. */
if ( pass || (dset_id != -1)) {
if ( H5Dclose(dset_id) < 0 ) {
@@ -413,7 +468,7 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
* Function: test_parallel_read
*
* Purpose: This actually tests the superblock optimization
- * and covers the two primary cases we're interested in.
+ * and covers the three primary cases we're interested in.
* 1). That HDF5 files can be opened in parallel by
* the rank 0 process and that the superblock
* offset is correctly broadcast to the other
@@ -423,6 +478,10 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
* subgroups of MPI_COMM_WORLD and that each
* subgroup operates as described in (1) to
* collectively read the data.
+ * 3). Testing proc0-read-and-MPI_Bcast using
+ * sub-communicators, and reading into
+ * a memory space that is different from the
+ * file space, and chunked datasets.
*
* The global MPI rank is used for reading and
* writing data for process specific data in the
@@ -444,7 +503,7 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
*-------------------------------------------------------------------------
*/
static int
-test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
+test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group_id)
{
const char *failure_mssg;
const char *fcn_name = "test_parallel_read()";
@@ -457,8 +516,13 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
hid_t fapl_id = -1;
hid_t file_id = -1;
hid_t dset_id = -1;
+ hid_t dset_id_ch = -1;
+ hid_t dxpl_id = H5P_DEFAULT;
hid_t memspace = -1;
hid_t filespace = -1;
+ hid_t filetype = -1;
+ size_t filetype_size;
+ hssize_t dset_size;
hsize_t i;
hsize_t offset;
hsize_t count = COUNT;
@@ -552,6 +616,14 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
}
}
+ /* open the chunked data set */
+ if ( pass ) {
+ if ( (dset_id_ch = H5Dopen2(file_id, "dataset0_chunked", H5P_DEFAULT)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dopen2() failed\n";
+ }
+ }
+
/* setup memspace */
if ( pass ) {
dims[0] = count;
@@ -606,14 +678,6 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
}
}
- /* close file, etc. */
- if ( pass || (dset_id != -1) ) {
- if ( H5Dclose(dset_id) < 0 ) {
- pass = false;
- failure_mssg = "H5Dclose(dset_id) failed.\n";
- }
- }
-
if ( pass || (memspace != -1) ) {
if ( H5Sclose(memspace) < 0 ) {
pass = false;
@@ -628,6 +692,330 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
}
}
+ /* free data_slice if it has been allocated */
+ if ( data_slice != NULL ) {
+ HDfree(data_slice);
+ data_slice = NULL;
+ }
+
+ /*
+ * Test reading proc0-read-and-bcast with sub-communicators
+ */
+
+ /* Don't test with more than LIMIT_NPROC processes to avoid memory issues */
+
+ if( group_size <= LIMIT_NPROC ) {
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ hbool_t prop_value;
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ if ( (filespace = H5Dget_space(dset_id )) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dget_space failed.\n";
+ }
+
+ if ( (dset_size = H5Sget_simple_extent_npoints(filespace)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Sget_simple_extent_npoints failed.\n";
+ }
+
+ if ( (filetype = H5Dget_type(dset_id)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dget_type failed.\n";
+ }
+
+ if ( (filetype_size = H5Tget_size(filetype)) == 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Tget_size failed.\n";
+ }
+
+ if ( H5Tclose(filetype) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Tclose failed.\n";
+ };
+
+ if ( (data_slice = (float *)HDmalloc((size_t)dset_size*filetype_size)) == NULL ) {
+ pass = FALSE;
+ failure_mssg = "malloc of data_slice failed.\n";
+ }
+
+ if ( pass ) {
+ if ( (dxpl_id = H5Pcreate(H5P_DATASET_XFER)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Pcreate(H5P_DATASET_XFER) failed.\n";
+ }
+ }
+
+ if ( pass ) {
+ if ( (H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Pset_dxpl_mpio() failed.\n";
+ }
+ }
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if ( pass ) {
+ prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
+ if(H5Pinsert2(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, H5D_XFER_COLL_RANK0_BCAST_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL) < 0) {
+ pass = FALSE;
+ failure_mssg = "H5Pinsert2() failed\n";
+ }
+ }
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* read H5S_ALL section */
+ if ( pass ) {
+ if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, H5S_ALL,
+ H5S_ALL, dxpl_id, data_slice)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dread() failed\n";
+ }
+ }
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if ( pass ) {
+ prop_value = FALSE;
+ if(H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
+ pass = FALSE;
+ failure_mssg = "H5Pget() failed\n";
+ }
+ if (pass) {
+ if(prop_value != TRUE) {
+ pass = FALSE;
+ failure_mssg = "rank 0 Bcast optimization was mistakenly not performed\n";
+ }
+ }
+ }
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* verify the data */
+ if ( pass ) {
+
+ if ( comm == MPI_COMM_WORLD ) /* test 1 */
+ nextValue = 0;
+ else if ( group_id == 0 ) /* test 2 group 0 */
+ nextValue = 0;
+ else /* test 2 group 1 */
+ nextValue = (float)((hsize_t)( mpi_size / 2 )*count);
+
+ i = 0;
+ while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
+ /* what we really want is data_slice[i] != nextValue --
+ * the following is a circumlocution to shut up the
+ * the compiler.
+ */
+ if ( ( data_slice[i] > nextValue ) ||
+ ( data_slice[i] < nextValue ) ) {
+ pass = FALSE;
+ failure_mssg = "Unexpected dset contents.\n";
+ }
+ nextValue += 1;
+ i++;
+ }
+ }
+
+ /* read H5S_ALL section for the chunked dataset */
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if ( pass ) {
+ prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
+ if(H5Pset(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
+ pass = FALSE;
+ failure_mssg = "H5Pset() failed\n";
+ }
+ }
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ for ( i = 0; i < (hsize_t)dset_size; i++) {
+ data_slice[i] = 0;
+ }
+ if ( pass ) {
+ if ( (H5Dread(dset_id_ch, H5T_NATIVE_FLOAT, H5S_ALL,
+ H5S_ALL, dxpl_id, data_slice)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dread() failed\n";
+ }
+ }
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if ( pass ) {
+ prop_value = FALSE;
+ if(H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
+ pass = FALSE;
+ failure_mssg = "H5Pget() failed\n";
+ }
+ if (pass) {
+ if(prop_value == TRUE) {
+ pass = FALSE;
+ failure_mssg = "rank 0 Bcast optimization was mistakenly performed for chunked dataset\n";
+ }
+ }
+ }
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* verify the data */
+ if ( pass ) {
+
+ if ( comm == MPI_COMM_WORLD ) /* test 1 */
+ nextValue = 0;
+ else if ( group_id == 0 ) /* test 2 group 0 */
+ nextValue = 0;
+ else /* test 2 group 1 */
+ nextValue = (float)((hsize_t)( mpi_size / 2 )*count);
+
+ i = 0;
+ while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
+ /* what we really want is data_slice[i] != nextValue --
+ * the following is a circumlocution to shut up the
+ * the compiler.
+ */
+ if ( ( data_slice[i] > nextValue ) ||
+ ( data_slice[i] < nextValue ) ) {
+ pass = FALSE;
+ failure_mssg = "Unexpected chunked dset contents.\n";
+ }
+ nextValue += 1;
+ i++;
+ }
+ }
+
+ if ( pass || (filespace != -1) ) {
+ if ( H5Sclose(filespace) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Sclose(filespace) failed.\n";
+ }
+ }
+
+ /* free data_slice if it has been allocated */
+ if ( data_slice != NULL ) {
+ HDfree(data_slice);
+ data_slice = NULL;
+ }
+
+ /*
+ * Read an H5S_ALL filespace into a hyperslab defined memory space
+ */
+
+ if ( (data_slice = (float *)HDmalloc((size_t)(dset_size*2)*filetype_size)) == NULL ) {
+ pass = FALSE;
+ failure_mssg = "malloc of data_slice failed.\n";
+ }
+
+ /* setup memspace */
+ if ( pass ) {
+ dims[0] = (hsize_t)dset_size*2;
+ if ( (memspace = H5Screate_simple(1, dims, NULL)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Screate_simple(1, dims, NULL) failed\n";
+ }
+ }
+ if ( pass ) {
+ offset = (hsize_t)dset_size;
+ if ( (H5Sselect_hyperslab(memspace, H5S_SELECT_SET,
+ &offset, NULL, &offset, NULL)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Sselect_hyperslab() failed\n";
+ }
+ }
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if ( pass ) {
+ prop_value = H5D_XFER_COLL_RANK0_BCAST_DEF;
+ if(H5Pset(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
+ pass = FALSE;
+ failure_mssg = "H5Pset() failed\n";
+ }
+ }
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* read this processes section of the data */
+ if ( pass ) {
+ if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace,
+ H5S_ALL, dxpl_id, data_slice)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dread() failed\n";
+ }
+ }
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if ( pass ) {
+ prop_value = FALSE;
+ if(H5Pget(dxpl_id, H5D_XFER_COLL_RANK0_BCAST_NAME, &prop_value) < 0) {
+ pass = FALSE;
+ failure_mssg = "H5Pget() failed\n";
+ }
+ if (pass) {
+ if(prop_value != TRUE) {
+ pass = FALSE;
+ failure_mssg = "rank 0 Bcast optimization was mistakenly not performed\n";
+ }
+ }
+ }
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ /* verify the data */
+ if ( pass ) {
+
+ if ( comm == MPI_COMM_WORLD ) /* test 1 */
+ nextValue = 0;
+ else if ( group_id == 0 ) /* test 2 group 0 */
+ nextValue = 0;
+ else /* test 2 group 1 */
+ nextValue = (float)((hsize_t)(mpi_size / 2)*count);
+
+ i = (hsize_t)dset_size;
+ while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
+ /* what we really want is data_slice[i] != nextValue --
+ * the following is a circumlocution to shut up the
+ * the compiler.
+ */
+ if ( ( data_slice[i] > nextValue ) ||
+ ( data_slice[i] < nextValue ) ) {
+ pass = FALSE;
+ failure_mssg = "Unexpected dset contents.\n";
+ }
+ nextValue += 1;
+ i++;
+ }
+ }
+
+ if ( pass || (memspace != -1) ) {
+ if ( H5Sclose(memspace) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Sclose(memspace) failed.\n";
+ }
+ }
+
+ /* free data_slice if it has been allocated */
+ if ( data_slice != NULL ) {
+ HDfree(data_slice);
+ data_slice = NULL;
+ }
+
+ if ( pass || (dxpl_id != -1) ) {
+ if ( H5Pclose(dxpl_id) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Pclose(dxpl_id) failed.\n";
+ }
+ }
+ }
+
+ /* close file, etc. */
+ if ( pass || (dset_id != -1) ) {
+ if ( H5Dclose(dset_id) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Dclose(dset_id) failed.\n";
+ }
+ }
+
+ if ( pass || (dset_id_ch != -1) ) {
+ if ( H5Dclose(dset_id_ch) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Dclose(dset_id_ch) failed.\n";
+ }
+ }
+
if ( pass || (file_id != -1) ) {
if ( H5Fclose(file_id) < 0 ) {
pass = false;
@@ -668,17 +1056,9 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
HDfprintf(stdout, "%s: failure_mssg = \"%s\"\n",
fcn_name, failure_mssg);
}
-
HDremove(reloc_data_filename);
}
- /* free data_slice if it has been allocated */
- if ( data_slice != NULL ) {
- HDfree(data_slice);
- data_slice = NULL;
- }
-
-
return( ! pass );
} /* test_parallel_read() */
@@ -803,7 +1183,7 @@ main( int argc, char **argv)
}
/* Now read the generated test file (stil using MPI_COMM_WORLD) */
- nerrs += test_parallel_read( MPI_COMM_WORLD, mpi_rank, which_group);
+ nerrs += test_parallel_read( MPI_COMM_WORLD, mpi_rank, mpi_size, which_group);
if ( nerrs > 0 ) {
if ( mpi_rank == 0 ) {
@@ -819,7 +1199,7 @@ main( int argc, char **argv)
}
/* run the 2nd set of tests */
- nerrs += test_parallel_read(group_comm, mpi_rank, which_group);
+ nerrs += test_parallel_read(group_comm, mpi_rank, mpi_size, which_group);
if ( nerrs > 0 ) {
if ( mpi_rank == 0 ) {