summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorM. Scot Breitenfeld <brtnfld@hdfgroup.org>2018-12-12 21:39:18 (GMT)
committerM. Scot Breitenfeld <brtnfld@hdfgroup.org>2019-01-02 15:51:44 (GMT)
commit58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b (patch)
treeb507c9e9d207e1df5830192ace60cfe7a8c1ccc2
parentafdf3094cc6577bacd004f2cb4b553b63bf503f7 (diff)
downloadhdf5-58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b.zip
hdf5-58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b.tar.gz
hdf5-58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b.tar.bz2
HDFFV-10652
Implemented a process-0 read and then broadcast for collective read of full datasets (H5S_ALL) by all the processes in the file communicator.
-rw-r--r--src/H5CX.c56
-rw-r--r--src/H5CXprivate.h2
-rw-r--r--src/H5Dmpio.c71
-rw-r--r--src/H5FDmpio.c77
-rw-r--r--src/H5Pprivate.h1
-rw-r--r--src/H5private.h3
-rw-r--r--testpar/t_dset.c2
-rw-r--r--testpar/t_mdset.c118
-rw-r--r--testpar/t_pread.c230
9 files changed, 441 insertions, 119 deletions
diff --git a/src/H5CX.c b/src/H5CX.c
index 1d9cf3d..756dc03 100644
--- a/src/H5CX.c
+++ b/src/H5CX.c
@@ -198,6 +198,7 @@ typedef struct H5CX_t {
MPI_Datatype btype; /* MPI datatype for buffer, when using collective I/O */
MPI_Datatype ftype; /* MPI datatype for file, when using collective I/O */
hbool_t mpi_file_flushing; /* Whether an MPI-opened file is being flushed */
+ hbool_t mpio_Proc0_BCast; /* Whether a dataset meets read-proc0-and-bcast requirements */
#endif /* H5_HAVE_PARALLEL */
/* Cached DXPL properties */
@@ -1537,6 +1538,33 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5CX_get_mpio_Proc0_BCast
+ *
+ * Purpose: Retrieves if the dataset meets read-proc0-and-bcast requirements for the current API call context.
+ *
+ * Return: Non-negative on success / Negative on failure
+ *
+ * Programmer: M. Breitenfeld
+ * December 31, 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+hbool_t
+H5CX_get_mpio_Proc0_BCast(void)
+{
+ H5CX_node_t **head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ /* Sanity check */
+ HDassert(head && *head);
+
+ FUNC_LEAVE_NOAPI((*head)->ctx.mpio_Proc0_BCast)
+
+} /* end H5CX_get_mpio_Proc0_BCast() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5CX_get_mpio_chunk_opt_mode
*
* Purpose: Retrieves the collective chunk optimization mode for the current API call context.
@@ -2320,6 +2348,34 @@ H5CX_set_mpio_global_no_coll_cause(uint32_t mpio_global_no_coll_cause)
FUNC_LEAVE_NOAPI_VOID
} /* end H5CX_set_mpio_global_no_coll_cause() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5CX_set_mpio_Proc0_BCast
+ *
+ * Purpose: Sets if the dataset meets read-proc0-and-bcast requirements for the current API call context.
+ *
+ * Return: <none>
+ *
+ * Programmer: M. Breitenfeld
+ * December 31, 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+void
+H5CX_set_mpio_Proc0_BCast(hbool_t mpio_Proc0_BCast)
+{
+ H5CX_node_t **head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ /* Sanity checks */
+ HDassert(head && *head);
+
+ (*head)->ctx.mpio_Proc0_BCast = mpio_Proc0_BCast;
+
+ FUNC_LEAVE_NOAPI_VOID
+} /* end H5CX_set_mpio_Proc0_BCast */
+
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
/*-------------------------------------------------------------------------
diff --git a/src/H5CXprivate.h b/src/H5CXprivate.h
index 57ca5cd..f93121c 100644
--- a/src/H5CXprivate.h
+++ b/src/H5CXprivate.h
@@ -75,6 +75,7 @@ H5_DLL H5AC_ring_t H5CX_get_ring(void);
H5_DLL hbool_t H5CX_get_coll_metadata_read(void);
H5_DLL herr_t H5CX_get_mpi_coll_datatypes(MPI_Datatype *btype, MPI_Datatype *ftype);
H5_DLL hbool_t H5CX_get_mpi_file_flushing(void);
+H5_DLL hbool_t H5CX_get_mpio_Proc0_BCast(void);
#endif /* H5_HAVE_PARALLEL */
/* "Getter" routines for DXPL properties cached in API context */
@@ -128,6 +129,7 @@ H5_DLL void H5CX_set_mpio_actual_chunk_opt(H5D_mpio_actual_chunk_opt_mode_t chun
H5_DLL void H5CX_set_mpio_actual_io_mode(H5D_mpio_actual_io_mode_t actual_io_mode);
H5_DLL void H5CX_set_mpio_local_no_coll_cause(uint32_t mpio_local_no_coll_cause);
H5_DLL void H5CX_set_mpio_global_no_coll_cause(uint32_t mpio_global_no_coll_cause);
+H5_DLL void H5CX_set_mpio_Proc0_BCast(hbool_t mpio_Proc0_BCast);
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_link_hard(int mpio_coll_chunk_link_hard);
H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_multi_hard(int mpio_coll_chunk_multi_hard);
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 2c06800..e1c0edb 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -37,7 +37,7 @@
#include "H5Eprivate.h" /* Error handling */
#include "H5Fprivate.h" /* File access */
#include "H5FDprivate.h" /* File drivers */
-#include "H5FDmpi.h" /* MPI-based file drivers */
+#include "H5FDmpi.h" /* MPI-based file drivers */
#include "H5Iprivate.h" /* IDs */
#include "H5MMprivate.h" /* Memory management */
#include "H5Oprivate.h" /* Object headers */
@@ -281,9 +281,11 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
const H5S_t *mem_space, const H5D_type_info_t *type_info)
{
H5FD_mpio_xfer_t io_xfer_mode; /* MPI I/O transfer mode */
- unsigned local_cause = 0; /* Local reason(s) for breaking collective mode */
- unsigned global_cause = 0; /* Global reason(s) for breaking collective mode */
+ unsigned local_cause[2] = {0,0}; /* [0] Local reason(s) for breaking collective mode */
+ /* [1] Flag if dataset is both: H5S_ALL and small */
+ unsigned global_cause[2] = {0,0}; /* Global reason(s) for breaking collective mode */
htri_t ret_value = SUCCEED; /* Return value */
+ hbool_t H5FD_MPIO_Proc0_BCast; /* Flag if dataset is both: H5S_ALL and < 2GB */
FUNC_ENTER_PACKAGE
@@ -298,34 +300,34 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
if(H5CX_get_io_xfer_mode(&io_xfer_mode) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
if(io_xfer_mode == H5FD_MPIO_INDEPENDENT)
- local_cause |= H5D_MPIO_SET_INDEPENDENT;
+ local_cause[0] |= H5D_MPIO_SET_INDEPENDENT;
/* Optimized MPI types flag must be set */
/* (based on 'HDF5_MPI_OPT_TYPES' environment variable) */
if(!H5FD_mpi_opt_types_g)
- local_cause |= H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED;
+ local_cause[0] |= H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED;
/* Don't allow collective operations if datatype conversions need to happen */
if(!type_info->is_conv_noop)
- local_cause |= H5D_MPIO_DATATYPE_CONVERSION;
+ local_cause[0] |= H5D_MPIO_DATATYPE_CONVERSION;
/* Don't allow collective operations if data transform operations should occur */
if(!type_info->is_xform_noop)
- local_cause |= H5D_MPIO_DATA_TRANSFORMS;
+ local_cause[0] |= H5D_MPIO_DATA_TRANSFORMS;
/* Check whether these are both simple or scalar dataspaces */
if(!((H5S_SIMPLE == H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(mem_space))
&& (H5S_SIMPLE == H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(file_space))))
- local_cause |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES;
+ local_cause[0] |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES;
/* Dataset storage must be contiguous or chunked */
if(!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS ||
io_info->dset->shared->layout.type == H5D_CHUNKED))
- local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
+ local_cause[0] |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
/* check if external-file storage is used */
if(io_info->dset->shared->dcpl_cache.efl.nused > 0)
- local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
+ local_cause[0] |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
/* The handling of memory space is different for chunking and contiguous
* storage. For contiguous storage, mem_space and file_space won't change
@@ -343,28 +345,59 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
if (io_info->op_type == H5D_IO_OP_WRITE &&
io_info->dset->shared->layout.type == H5D_CHUNKED &&
io_info->dset->shared->dcpl_cache.pline.nused > 0)
- local_cause |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED;
+ local_cause[0] |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED;
#endif
+ H5FD_MPIO_Proc0_BCast = FALSE;
+ /* Check to see if all the processes are reading the same data */
+ if((H5S_GET_SELECT_TYPE(file_space) != H5S_SEL_ALL)) {
+ /* Flag to do a MPI_Bcast of the data from one proc instead of
+ * having all the processes involved in the persistent I/O.
+ */
+ local_cause[1] |= 0x01;
+ }
+ else {
+
+ /* If the size of the dataset is less than 2GB then do an MPI_Bcast
+ * of the data from one process instead of having all the processes
+ * involved in the persistent I/O.
+ */
+
+ hsize_t dset_storage_size;
+ H5D__get_storage_size(io_info->dset, &dset_storage_size);
+
+ if(dset_storage_size > ((hsize_t)(H5_2GB) - 1) ) {
+ local_cause[1] |= 0x02;
+ }
+ }
+
/* Check for independent I/O */
- if(local_cause & H5D_MPIO_SET_INDEPENDENT)
- global_cause = local_cause;
+ if(local_cause[0] & H5D_MPIO_SET_INDEPENDENT)
+ global_cause[0] = local_cause[0];
else {
int mpi_code; /* MPI error code */
/* Form consensus opinion among all processes about whether to perform
* collective I/O
*/
- if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_UNSIGNED, MPI_BOR, io_info->comm)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 2, MPI_UNSIGNED, MPI_BOR, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
+
} /* end else */
/* Set the local & global values of no-collective-cause in the API context */
- H5CX_set_mpio_local_no_coll_cause(local_cause);
- H5CX_set_mpio_global_no_coll_cause(global_cause);
+ H5CX_set_mpio_local_no_coll_cause(local_cause[0]);
+ H5CX_set_mpio_global_no_coll_cause(global_cause[0]);
/* Set the return value, based on the global cause */
- ret_value = global_cause > 0 ? FALSE : TRUE;
+ ret_value = global_cause[0] > 0 ? FALSE : TRUE;
+
+ /* read-proc0-and-bcast if collective and H5S_ALL */
+ if(global_cause[0] == 0 && global_cause[1] == 0)
+ H5FD_MPIO_Proc0_BCast = TRUE;
+
+ /* Set Flag if dataset is both: H5S_ALL and < 2GB in the API context */
+ H5CX_set_mpio_Proc0_BCast(H5FD_MPIO_Proc0_BCast);
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -3069,8 +3102,8 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
chunk_entry->chunk_states.new_chunk.length = chunk_entry->chunk_states.chunk_current.length;
/* Currently, these chunk reads are done independently and will likely
- * cause issues with collective metadata reads enabled. In the future,
- * this should be refactored to use collective chunk reads - JTH */
+ * cause issues with collective metadata reads enabled. In the future,
+ * this should be refactored to use collective chunk reads - JTH */
/* Get the original state of parallel I/O transfer mode */
if(H5CX_get_io_xfer_mode(&xfer_mode) < 0)
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 87f8b6a..a4b9ef3 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -1414,31 +1414,32 @@ static herr_t
H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
hid_t H5_ATTR_UNUSED dxpl_id, haddr_t addr, size_t size, void *buf/*out*/)
{
- H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
- MPI_Offset mpi_off;
- MPI_Status mpi_stat; /* Status from I/O operation */
- int mpi_code; /* mpi return code */
- MPI_Datatype buf_type = MPI_BYTE; /* MPI description of the selection in memory */
- int size_i; /* Integer copy of 'size' to read */
+ H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
+ MPI_Offset mpi_off;
+ MPI_Status mpi_stat; /* Status from I/O operation */
+ int mpi_code; /* mpi return code */
+ MPI_Datatype buf_type = MPI_BYTE; /* MPI description of the selection in memory */
+ int size_i; /* Integer copy of 'size' to read */
#if MPI_VERSION >= 3
- MPI_Count bytes_read; /* Number of bytes read in */
+ MPI_Count bytes_read = 0; /* Number of bytes read in */
MPI_Count type_size; /* MPI datatype used for I/O's size */
MPI_Count io_size; /* Actual number of bytes requested */
MPI_Count n;
#else
- int bytes_read; /* Number of bytes read in */
+ int bytes_read = 0; /* Number of bytes read in */
int type_size; /* MPI datatype used for I/O's size */
int io_size; /* Actual number of bytes requested */
- int n;
+ int n;
#endif
- hbool_t use_view_this_time = FALSE;
- herr_t ret_value = SUCCEED;
+ hbool_t use_view_this_time = FALSE;
+ hbool_t Proc0_BCast_enabled = FALSE; /* If read-proc0-and-bcast option was used */
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
- fprintf(stdout, "Entering H5FD_mpio_read\n" );
+ fprintf(stdout, "Entering H5FD_mpio_read\n" );
#endif
HDassert(file);
HDassert(H5FD_MPIO==file->pub.driver_id);
@@ -1457,12 +1458,12 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'r'])
fprintf(stdout, "in H5FD_mpio_read mpi_off=%ld size_i=%d\n",
- (long)mpi_off, size_i );
+ (long)mpi_off, size_i );
#endif
/* Only look for MPI views for raw data transfers */
if(type == H5FD_MEM_DRAW) {
- H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */
+ H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */
/* Get the transfer mode from the API context */
if(H5CX_get_io_xfer_mode(&xfer_mode) < 0)
@@ -1475,7 +1476,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
* could mean "use MPI_BYTE" by convention).
*/
if(xfer_mode==H5FD_MPIO_COLLECTIVE) {
- MPI_Datatype file_type;
+ MPI_Datatype file_type;
/* Remember that views are used */
use_view_this_time = TRUE;
@@ -1502,8 +1503,8 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
H5FD_mpio_collective_opt_t coll_opt_mode;
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'t'])
- fprintf(stdout, "H5FD_mpio_read: using MPIO collective mode\n");
+ if (H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "H5FD_mpio_read: using MPIO collective mode\n");
#endif
/* Get the collective_opt property to check whether the application wants to do IO individually. */
if(H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0)
@@ -1514,8 +1515,23 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
if(H5FD_mpio_Debug[(int)'t'])
fprintf(stdout, "H5FD_mpio_read: doing MPI collective IO\n");
#endif
- if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
+ if(H5CX_get_mpio_Proc0_BCast()) {
+#ifdef H5FDmpio_DEBUG
+ if(H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "H5FD_mpio_read: doing Proc0-read-and-MPI_Bcast\n");
+#endif
+ if(file->mpi_rank == 0) {
+ if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+ }
+ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(buf, size_i, buf_type, 0, file->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ Proc0_BCast_enabled = TRUE;
+ }
+ else {
+ if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
+ }
} /* end if */
else {
#ifdef H5FDmpio_DEBUG
@@ -1534,24 +1550,31 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
} else {
if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
}
- /* How many bytes were actually read? */
+ if( !Proc0_BCast_enabled || (Proc0_BCast_enabled && file->mpi_rank == 0) ) {
+ /* How many bytes were actually read? */
#if MPI_VERSION >= 3
- if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
#else
- if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
#endif
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+ }
+
+ if(Proc0_BCast_enabled ) {
+ if(MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_LONG_LONG, 0, file->comm))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0)
+ }
/* Get the type's size */
#if MPI_VERSION >= 3
- if (MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
#else
- if (MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
#endif
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
/* Compute the actual number of bytes requested */
io_size=type_size*size_i;
@@ -1564,12 +1587,12 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type,
* This gives us zeroes beyond end of physical MPI file.
*/
if ((n=(io_size-bytes_read)) > 0)
- HDmemset((char*)buf+bytes_read, 0, (size_t)n);
+ HDmemset((char*)buf+bytes_read, 0, (size_t)n);
done:
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
- fprintf(stdout, "Leaving H5FD_mpio_read\n" );
+ fprintf(stdout, "Leaving H5FD_mpio_read\n" );
#endif
FUNC_LEAVE_NOAPI(ret_value)
diff --git a/src/H5Pprivate.h b/src/H5Pprivate.h
index 866f088..7007635 100644
--- a/src/H5Pprivate.h
+++ b/src/H5Pprivate.h
@@ -44,7 +44,6 @@ typedef struct H5P_genplist_t H5P_genplist_t;
#define H5_COLL_MD_READ_FLAG_NAME "collective_metadata_read"
-
/****************************/
/* Library Private Typedefs */
/****************************/
diff --git a/src/H5private.h b/src/H5private.h
index b654bae..05bfc63 100644
--- a/src/H5private.h
+++ b/src/H5private.h
@@ -568,6 +568,9 @@
#define H5_PB (1024.0F * 1024.0F * 1024.0F * 1024.0F * 1024.0F)
#define H5_EB (1024.0F * 1024.0F * 1024.0F * 1024.0F * 1024.0F * 1024.0F)
+/* Define 2GB -- Used for 2GB limitation checks */
+#define H5_2GB (2.0F * 1024.0F * 1024.0F * 1024.0F)
+
#ifndef H5_HAVE_FLOCK
/* flock() operations. Used in the source so we have to define them when
* the call is not available (e.g.: Windows). These should NOT be used
diff --git a/testpar/t_dset.c b/testpar/t_dset.c
index 281d027..35501d8 100644
--- a/testpar/t_dset.c
+++ b/testpar/t_dset.c
@@ -2649,7 +2649,7 @@ compress_readAll(void)
/* Try reading the data */
ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read);
- VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+ VRFY((ret >= 0), "H5Dread succeeded");
/* Verify data read */
for(u=0; u<dim; u++)
diff --git a/testpar/t_mdset.c b/testpar/t_mdset.c
index 5d989bb..119ce4f 100644
--- a/testpar/t_mdset.c
+++ b/testpar/t_mdset.c
@@ -603,8 +603,8 @@ void dataset_fillvalue(void)
hsize_t req_count[4] = {1, 6, 7, 8};
hsize_t dset_size; /* Dataset size */
int *rdata, *wdata; /* Buffers for data to read and write */
- int *twdata, *trdata; /* Temporary pointer into buffer */
- int acc, i, j, k, l; /* Local index variables */
+ int *twdata, *trdata; /* Temporary pointer into buffer */
+ int acc, i, j, k, l, ii; /* Local index variables */
herr_t ret; /* Generic return value */
const char *filename;
@@ -645,27 +645,40 @@ void dataset_fillvalue(void)
/*
* Read dataset before any data is written.
*/
- /* set entire read buffer with the constant 2 */
- HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
- /* Independently read the entire dataset back */
- ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
- VRFY((ret >= 0), "H5Dread succeeded");
- /* Verify all data read are the fill value 0 */
- trdata = rdata;
- err_num = 0;
- for(i = 0; i < (int)dset_dims[0]; i++)
+ /* Create DXPL for I/O */
+ dxpl = H5Pcreate(H5P_DATASET_XFER);
+ VRFY((dxpl >= 0), "H5Pcreate succeeded");
+
+ for(ii = 0; ii < 2; ii++) {
+ if(ii == 0)
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT);
+ else
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+
+ /* set entire read buffer with the constant 2 */
+ HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
+ /* Read the entire dataset back */
+ ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl, rdata);
+ VRFY((ret >= 0), "H5Dread succeeded");
+
+ /* Verify all data read are the fill value 0 */
+ trdata = rdata;
+ err_num = 0;
+ for(i = 0; i < (int)dset_dims[0]; i++)
for(j = 0; j < (int)dset_dims[1]; j++)
- for(k = 0; k < (int)dset_dims[2]; k++)
- for(l = 0; l < (int)dset_dims[3]; l++, twdata++, trdata++)
- if(*trdata != 0)
- if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
- printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i, j, k, l, *trdata);
- if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
+ for(k = 0; k < (int)dset_dims[2]; k++)
+ for(l = 0; l < (int)dset_dims[3]; l++, twdata++, trdata++)
+ if(*trdata != 0)
+ if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
+ printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i, j, k, l, *trdata);
+ if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
printf("[more errors ...]\n");
- if(err_num){
+ if(err_num){
printf("%d errors found in check_value\n", err_num);
- nerrors++;
+ nerrors++;
+ }
}
/* Barrier to ensure all processes have completed the above test. */
@@ -681,10 +694,6 @@ void dataset_fillvalue(void)
ret = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, req_start, NULL, req_count, NULL);
VRFY((ret >= 0), "H5Sselect_hyperslab succeeded on memory dataspace");
- /* Create DXPL for collective I/O */
- dxpl = H5Pcreate(H5P_DATASET_XFER);
- VRFY((dxpl >= 0), "H5Pcreate succeeded");
-
ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
@@ -711,37 +720,46 @@ void dataset_fillvalue(void)
/*
* Read dataset after partial write.
*/
- /* set entire read buffer with the constant 2 */
- HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
- /* Independently read the entire dataset back */
- ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata);
- VRFY((ret >= 0), "H5Dread succeeded");
- /* Verify correct data read */
- twdata=wdata;
- trdata=rdata;
- err_num=0;
- for(i=0; i<(int)dset_dims[0]; i++)
+ for(ii = 0; ii < 2; ii++) {
+ if(ii == 0)
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT);
+ else
+ ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+
+ /* set entire read buffer with the constant 2 */
+ HDmemset(rdata,2,(size_t)(dset_size*sizeof(int)));
+ /* Read the entire dataset back */
+ ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl, rdata);
+ VRFY((ret >= 0), "H5Dread succeeded");
+
+ /* Verify correct data read */
+ twdata=wdata;
+ trdata=rdata;
+ err_num=0;
+ for(i=0; i<(int)dset_dims[0]; i++)
for(j=0; j<(int)dset_dims[1]; j++)
- for(k=0; k<(int)dset_dims[2]; k++)
- for(l=0; l<(int)dset_dims[3]; l++, twdata++, trdata++)
- if(i<mpi_size) {
- if(*twdata != *trdata )
- if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
- printf("Dataset Verify failed at [%d][%d][%d][%d]: expect %d, got %d\n", i,j,k,l, *twdata, *trdata);
- } /* end if */
- else {
- if(*trdata != 0)
- if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
- printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i,j,k,l, *trdata);
- } /* end else */
- if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
+ for(k=0; k<(int)dset_dims[2]; k++)
+ for(l=0; l<(int)dset_dims[3]; l++, twdata++, trdata++)
+ if(i<mpi_size) {
+ if(*twdata != *trdata )
+ if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
+ printf("Dataset Verify failed at [%d][%d][%d][%d]: expect %d, got %d\n", i,j,k,l, *twdata, *trdata);
+ } /* end if */
+ else {
+ if(*trdata != 0)
+ if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED)
+ printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i,j,k,l, *trdata);
+ } /* end else */
+ if(err_num > MAX_ERR_REPORT && !VERBOSE_MED)
printf("[more errors ...]\n");
- if(err_num){
+ if(err_num){
printf("%d errors found in check_value\n", err_num);
- nerrors++;
+ nerrors++;
+ }
}
-
+
/* Close all file objects */
ret = H5Dclose(dataset);
VRFY((ret >= 0), "H5Dclose succeeded");
@@ -856,7 +874,7 @@ void collective_group_write(void)
if(!((m+1) % 10)) {
printf("created %d groups\n", m+1);
MPI_Barrier(MPI_COMM_WORLD);
- }
+ }
#endif /* BARRIER_CHECKS */
}
diff --git a/testpar/t_pread.c b/testpar/t_pread.c
index 0905d44..a6a6a7d 100644
--- a/testpar/t_pread.c
+++ b/testpar/t_pread.c
@@ -46,7 +46,7 @@ completely foolproof is to underestimate the ingenuity of complete\n\
fools.\n";
static int generate_test_file(MPI_Comm comm, int mpi_rank, int group);
-static int test_parallel_read(MPI_Comm comm, int mpi_rank, int group);
+static int test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group);
static char *test_argv0 = NULL;
@@ -413,7 +413,7 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
* Function: test_parallel_read
*
* Purpose: This actually tests the superblock optimization
- * and covers the two primary cases we're interested in.
+ * and covers the three primary cases we're interested in.
* 1). That HDF5 files can be opened in parallel by
* the rank 0 process and that the superblock
* offset is correctly broadcast to the other
@@ -423,6 +423,10 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
* subgroups of MPI_COMM_WORLD and that each
* subgroup operates as described in (1) to
* collectively read the data.
+ * 3). Testing proc0-read-and-MPI_Bcast using
+ * sub-communicators, and reading into
+ * a memory space that is different from the
+ * file space.
*
* The global MPI rank is used for reading and
* writing data for process specific data in the
@@ -444,7 +448,7 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id )
*-------------------------------------------------------------------------
*/
static int
-test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
+test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group_id)
{
const char *failure_mssg;
const char *fcn_name = "test_parallel_read()";
@@ -457,8 +461,12 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
hid_t fapl_id = -1;
hid_t file_id = -1;
hid_t dset_id = -1;
+ hid_t dxpl_id = H5P_DEFAULT;
hid_t memspace = -1;
hid_t filespace = -1;
+ hid_t filetype = -1;
+ size_t filetype_size;
+ hssize_t dset_size;
hsize_t i;
hsize_t offset;
hsize_t count = COUNT;
@@ -606,14 +614,6 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
}
}
- /* close file, etc. */
- if ( pass || (dset_id != -1) ) {
- if ( H5Dclose(dset_id) < 0 ) {
- pass = false;
- failure_mssg = "H5Dclose(dset_id) failed.\n";
- }
- }
-
if ( pass || (memspace != -1) ) {
if ( H5Sclose(memspace) < 0 ) {
pass = false;
@@ -628,6 +628,202 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
}
}
+ /* free data_slice if it has been allocated */
+ if ( data_slice != NULL ) {
+ HDfree(data_slice);
+ data_slice = NULL;
+ }
+
+ /*
+ * Test reading proc0-read-and-bcast with sub-communicators
+ */
+
+ /* Don't test with more than 6 processes to avoid memory issues */
+
+ if( group_size <= 6 ) {
+
+ if ( (filespace = H5Dget_space(dset_id )) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dget_space failed.\n";
+ }
+
+ if ( (dset_size = H5Sget_simple_extent_npoints(filespace)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Sget_simple_extent_npoints failed.\n";
+ }
+
+ if ( (filetype = H5Dget_type(dset_id)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dget_type failed.\n";
+ }
+
+ if ( (filetype_size = H5Tget_size(filetype)) == 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Tget_size failed.\n";
+ }
+
+ if ( H5Tclose(filetype) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Tclose failed.\n";
+ };
+
+ if ( (data_slice = (float *)HDmalloc((size_t)dset_size*filetype_size)) == NULL ) {
+ pass = FALSE;
+ failure_mssg = "malloc of data_slice failed.\n";
+ }
+
+ if ( pass ) {
+ if ( (dxpl_id = H5Pcreate(H5P_DATASET_XFER)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Pcreate(H5P_DATASET_XFER) failed.\n";
+ }
+ }
+
+ if ( pass ) {
+ if ( (H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Pset_dxpl_mpio() failed.\n";
+ }
+ }
+
+ /* read H5S_ALL section */
+ if ( pass ) {
+ if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, H5S_ALL,
+ H5S_ALL, dxpl_id, data_slice)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dread() failed\n";
+ }
+ }
+
+ /* verify the data */
+ if ( pass ) {
+
+ if ( comm == MPI_COMM_WORLD ) /* test 1 */
+ nextValue = 0;
+ else if ( group_id == 0 ) /* test 2 group 0 */
+ nextValue = 0;
+ else /* test 2 group 1 */
+ nextValue = (float)((hsize_t)( mpi_size / 2 )*count);
+
+ i = 0;
+ while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
+ /* what we really want is data_slice[i] != nextValue --
+ * the following is a circumlocution to shut up the
+ * the compiler.
+ */
+ if ( ( data_slice[i] > nextValue ) ||
+ ( data_slice[i] < nextValue ) ) {
+ pass = FALSE;
+ failure_mssg = "Unexpected dset contents.\n";
+ }
+ nextValue += 1;
+ i++;
+ }
+ }
+
+ if ( pass || (filespace != -1) ) {
+ if ( H5Sclose(filespace) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Sclose(filespace) failed.\n";
+ }
+ }
+
+ /* free data_slice if it has been allocated */
+ if ( data_slice != NULL ) {
+ HDfree(data_slice);
+ data_slice = NULL;
+ }
+
+ /*
+ * Read an H5S_ALL filespace into a hyperslab defined memory space
+ */
+
+ if ( (data_slice = (float *)HDmalloc((size_t)(dset_size*2)*filetype_size)) == NULL ) {
+ pass = FALSE;
+ failure_mssg = "malloc of data_slice failed.\n";
+ }
+
+ /* setup memspace */
+ if ( pass ) {
+ dims[0] = (hsize_t)dset_size*2;
+ if ( (memspace = H5Screate_simple(1, dims, NULL)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Screate_simple(1, dims, NULL) failed\n";
+ }
+ }
+ if ( pass ) {
+ offset = (hsize_t)dset_size;
+ if ( (H5Sselect_hyperslab(memspace, H5S_SELECT_SET,
+ &offset, NULL, &offset, NULL)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Sselect_hyperslab() failed\n";
+ }
+ }
+
+ /* read this processes section of the data */
+ if ( pass ) {
+ if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace,
+ H5S_ALL, dxpl_id, data_slice)) < 0 ) {
+ pass = FALSE;
+ failure_mssg = "H5Dread() failed\n";
+ }
+ }
+
+ /* verify the data */
+ if ( pass ) {
+
+ if ( comm == MPI_COMM_WORLD ) /* test 1 */
+ nextValue = 0;
+ else if ( group_id == 0 ) /* test 2 group 0 */
+ nextValue = 0;
+ else /* test 2 group 1 */
+ nextValue = (float)((hsize_t)(mpi_size / 2)*count);
+
+ i = (hsize_t)dset_size;
+ while ( ( pass ) && ( i < (hsize_t)dset_size ) ) {
+ /* what we really want is data_slice[i] != nextValue --
+ * the following is a circumlocution to shut up the
+ * the compiler.
+ */
+ if ( ( data_slice[i] > nextValue ) ||
+ ( data_slice[i] < nextValue ) ) {
+ pass = FALSE;
+ failure_mssg = "Unexpected dset contents.\n";
+ }
+ nextValue += 1;
+ i++;
+ }
+ }
+
+ if ( pass || (memspace != -1) ) {
+ if ( H5Sclose(memspace) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Sclose(memspace) failed.\n";
+ }
+ }
+
+ /* free data_slice if it has been allocated */
+ if ( data_slice != NULL ) {
+ HDfree(data_slice);
+ data_slice = NULL;
+ }
+
+ if ( pass || (dxpl_id != -1) ) {
+ if ( H5Pclose(dxpl_id) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Pclose(dxpl_id) failed.\n";
+ }
+ }
+ }
+
+ /* close file, etc. */
+ if ( pass || (dset_id != -1) ) {
+ if ( H5Dclose(dset_id) < 0 ) {
+ pass = false;
+ failure_mssg = "H5Dclose(dset_id) failed.\n";
+ }
+ }
+
if ( pass || (file_id != -1) ) {
if ( H5Fclose(file_id) < 0 ) {
pass = false;
@@ -668,17 +864,9 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id)
HDfprintf(stdout, "%s: failure_mssg = \"%s\"\n",
fcn_name, failure_mssg);
}
-
HDremove(reloc_data_filename);
}
- /* free data_slice if it has been allocated */
- if ( data_slice != NULL ) {
- HDfree(data_slice);
- data_slice = NULL;
- }
-
-
return( ! pass );
} /* test_parallel_read() */
@@ -803,7 +991,7 @@ main( int argc, char **argv)
}
/* Now read the generated test file (stil using MPI_COMM_WORLD) */
- nerrs += test_parallel_read( MPI_COMM_WORLD, mpi_rank, which_group);
+ nerrs += test_parallel_read( MPI_COMM_WORLD, mpi_rank, mpi_size, which_group);
if ( nerrs > 0 ) {
if ( mpi_rank == 0 ) {
@@ -819,7 +1007,7 @@ main( int argc, char **argv)
}
/* run the 2nd set of tests */
- nerrs += test_parallel_read(group_comm, mpi_rank, which_group);
+ nerrs += test_parallel_read(group_comm, mpi_rank, mpi_size, which_group);
if ( nerrs > 0 ) {
if ( mpi_rank == 0 ) {