diff options
author | M. Scot Breitenfeld <brtnfld@hdfgroup.org> | 2018-12-12 21:39:18 (GMT) |
---|---|---|
committer | M. Scot Breitenfeld <brtnfld@hdfgroup.org> | 2019-01-02 15:51:44 (GMT) |
commit | 58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b (patch) | |
tree | b507c9e9d207e1df5830192ace60cfe7a8c1ccc2 | |
parent | afdf3094cc6577bacd004f2cb4b553b63bf503f7 (diff) | |
download | hdf5-58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b.zip hdf5-58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b.tar.gz hdf5-58decdbd88f6f6c273c65a37dc54ac1bff6f1b2b.tar.bz2 |
HDFFV-10652
Implemented a process-0 read and then broadcast
for collective read of full datasets (H5S_ALL) by
all the processes in the file communicator.
-rw-r--r-- | src/H5CX.c | 56 | ||||
-rw-r--r-- | src/H5CXprivate.h | 2 | ||||
-rw-r--r-- | src/H5Dmpio.c | 71 | ||||
-rw-r--r-- | src/H5FDmpio.c | 77 | ||||
-rw-r--r-- | src/H5Pprivate.h | 1 | ||||
-rw-r--r-- | src/H5private.h | 3 | ||||
-rw-r--r-- | testpar/t_dset.c | 2 | ||||
-rw-r--r-- | testpar/t_mdset.c | 118 | ||||
-rw-r--r-- | testpar/t_pread.c | 230 |
9 files changed, 441 insertions, 119 deletions
@@ -198,6 +198,7 @@ typedef struct H5CX_t { MPI_Datatype btype; /* MPI datatype for buffer, when using collective I/O */ MPI_Datatype ftype; /* MPI datatype for file, when using collective I/O */ hbool_t mpi_file_flushing; /* Whether an MPI-opened file is being flushed */ + hbool_t mpio_Proc0_BCast; /* Whether a dataset meets read-proc0-and-bcast requirements */ #endif /* H5_HAVE_PARALLEL */ /* Cached DXPL properties */ @@ -1537,6 +1538,33 @@ done: /*------------------------------------------------------------------------- + * Function: H5CX_get_mpio_Proc0_BCast + * + * Purpose: Retrieves if the dataset meets read-proc0-and-bcast requirements for the current API call context. + * + * Return: Non-negative on success / Negative on failure + * + * Programmer: M. Breitenfeld + * December 31, 2018 + * + *------------------------------------------------------------------------- + */ +hbool_t +H5CX_get_mpio_Proc0_BCast(void) +{ + H5CX_node_t **head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */ + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + /* Sanity check */ + HDassert(head && *head); + + FUNC_LEAVE_NOAPI((*head)->ctx.mpio_Proc0_BCast) + +} /* end H5CX_get_mpio_Proc0_BCast() */ + + +/*------------------------------------------------------------------------- * Function: H5CX_get_mpio_chunk_opt_mode * * Purpose: Retrieves the collective chunk optimization mode for the current API call context. @@ -2320,6 +2348,34 @@ H5CX_set_mpio_global_no_coll_cause(uint32_t mpio_global_no_coll_cause) FUNC_LEAVE_NOAPI_VOID } /* end H5CX_set_mpio_global_no_coll_cause() */ + +/*------------------------------------------------------------------------- + * Function: H5CX_set_mpio_Proc0_BCast + * + * Purpose: Sets if the dataset meets read-proc0-and-bcast requirements for the current API call context. + * + * Return: <none> + * + * Programmer: M. Breitenfeld + * December 31, 2018 + * + *------------------------------------------------------------------------- + */ +void +H5CX_set_mpio_Proc0_BCast(hbool_t mpio_Proc0_BCast) +{ + H5CX_node_t **head = H5CX_get_my_context(); /* Get the pointer to the head of the API context, for this thread */ + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + /* Sanity checks */ + HDassert(head && *head); + + (*head)->ctx.mpio_Proc0_BCast = mpio_Proc0_BCast; + + FUNC_LEAVE_NOAPI_VOID +} /* end H5CX_set_mpio_Proc0_BCast */ + #ifdef H5_HAVE_INSTRUMENTED_LIBRARY /*------------------------------------------------------------------------- diff --git a/src/H5CXprivate.h b/src/H5CXprivate.h index 57ca5cd..f93121c 100644 --- a/src/H5CXprivate.h +++ b/src/H5CXprivate.h @@ -75,6 +75,7 @@ H5_DLL H5AC_ring_t H5CX_get_ring(void); H5_DLL hbool_t H5CX_get_coll_metadata_read(void); H5_DLL herr_t H5CX_get_mpi_coll_datatypes(MPI_Datatype *btype, MPI_Datatype *ftype); H5_DLL hbool_t H5CX_get_mpi_file_flushing(void); +H5_DLL hbool_t H5CX_get_mpio_Proc0_BCast(void); #endif /* H5_HAVE_PARALLEL */ /* "Getter" routines for DXPL properties cached in API context */ @@ -128,6 +129,7 @@ H5_DLL void H5CX_set_mpio_actual_chunk_opt(H5D_mpio_actual_chunk_opt_mode_t chun H5_DLL void H5CX_set_mpio_actual_io_mode(H5D_mpio_actual_io_mode_t actual_io_mode); H5_DLL void H5CX_set_mpio_local_no_coll_cause(uint32_t mpio_local_no_coll_cause); H5_DLL void H5CX_set_mpio_global_no_coll_cause(uint32_t mpio_global_no_coll_cause); +H5_DLL void H5CX_set_mpio_Proc0_BCast(hbool_t mpio_Proc0_BCast); #ifdef H5_HAVE_INSTRUMENTED_LIBRARY H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_link_hard(int mpio_coll_chunk_link_hard); H5_DLL herr_t H5CX_test_set_mpio_coll_chunk_multi_hard(int mpio_coll_chunk_multi_hard); diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 2c06800..e1c0edb 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -37,7 +37,7 @@ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* File access */ #include "H5FDprivate.h" /* File drivers */ -#include "H5FDmpi.h" /* MPI-based file drivers */ +#include "H5FDmpi.h" /* MPI-based file drivers */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ #include "H5Oprivate.h" /* Object headers */ @@ -281,9 +281,11 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, const H5S_t *mem_space, const H5D_type_info_t *type_info) { H5FD_mpio_xfer_t io_xfer_mode; /* MPI I/O transfer mode */ - unsigned local_cause = 0; /* Local reason(s) for breaking collective mode */ - unsigned global_cause = 0; /* Global reason(s) for breaking collective mode */ + unsigned local_cause[2] = {0,0}; /* [0] Local reason(s) for breaking collective mode */ + /* [1] Flag if dataset is both: H5S_ALL and small */ + unsigned global_cause[2] = {0,0}; /* Global reason(s) for breaking collective mode */ htri_t ret_value = SUCCEED; /* Return value */ + hbool_t H5FD_MPIO_Proc0_BCast; /* Flag if dataset is both: H5S_ALL and < 2GB */ FUNC_ENTER_PACKAGE @@ -298,34 +300,34 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, if(H5CX_get_io_xfer_mode(&io_xfer_mode) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") if(io_xfer_mode == H5FD_MPIO_INDEPENDENT) - local_cause |= H5D_MPIO_SET_INDEPENDENT; + local_cause[0] |= H5D_MPIO_SET_INDEPENDENT; /* Optimized MPI types flag must be set */ /* (based on 'HDF5_MPI_OPT_TYPES' environment variable) */ if(!H5FD_mpi_opt_types_g) - local_cause |= H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED; + local_cause[0] |= H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED; /* Don't allow collective operations if datatype conversions need to happen */ if(!type_info->is_conv_noop) - local_cause |= H5D_MPIO_DATATYPE_CONVERSION; + local_cause[0] |= H5D_MPIO_DATATYPE_CONVERSION; /* Don't allow collective operations if data transform operations should occur */ if(!type_info->is_xform_noop) - local_cause |= H5D_MPIO_DATA_TRANSFORMS; + local_cause[0] |= H5D_MPIO_DATA_TRANSFORMS; /* Check whether these are both simple or scalar dataspaces */ if(!((H5S_SIMPLE == H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(mem_space)) && (H5S_SIMPLE == H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(file_space)))) - local_cause |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES; + local_cause[0] |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES; /* Dataset storage must be contiguous or chunked */ if(!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS || io_info->dset->shared->layout.type == H5D_CHUNKED)) - local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; + local_cause[0] |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; /* check if external-file storage is used */ if(io_info->dset->shared->dcpl_cache.efl.nused > 0) - local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; + local_cause[0] |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; /* The handling of memory space is different for chunking and contiguous * storage. For contiguous storage, mem_space and file_space won't change @@ -343,28 +345,59 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, if (io_info->op_type == H5D_IO_OP_WRITE && io_info->dset->shared->layout.type == H5D_CHUNKED && io_info->dset->shared->dcpl_cache.pline.nused > 0) - local_cause |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED; + local_cause[0] |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED; #endif + H5FD_MPIO_Proc0_BCast = FALSE; + /* Check to see if all the processes are reading the same data */ + if((H5S_GET_SELECT_TYPE(file_space) != H5S_SEL_ALL)) { + /* Flag to do a MPI_Bcast of the data from one proc instead of + * having all the processes involved in the persistent I/O. + */ + local_cause[1] |= 0x01; + } + else { + + /* If the size of the dataset is less than 2GB then do an MPI_Bcast + * of the data from one process instead of having all the processes + * involved in the persistent I/O. + */ + + hsize_t dset_storage_size; + H5D__get_storage_size(io_info->dset, &dset_storage_size); + + if(dset_storage_size > ((hsize_t)(H5_2GB) - 1) ) { + local_cause[1] |= 0x02; + } + } + /* Check for independent I/O */ - if(local_cause & H5D_MPIO_SET_INDEPENDENT) - global_cause = local_cause; + if(local_cause[0] & H5D_MPIO_SET_INDEPENDENT) + global_cause[0] = local_cause[0]; else { int mpi_code; /* MPI error code */ /* Form consensus opinion among all processes about whether to perform * collective I/O */ - if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_UNSIGNED, MPI_BOR, io_info->comm))) + if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 2, MPI_UNSIGNED, MPI_BOR, io_info->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) + } /* end else */ /* Set the local & global values of no-collective-cause in the API context */ - H5CX_set_mpio_local_no_coll_cause(local_cause); - H5CX_set_mpio_global_no_coll_cause(global_cause); + H5CX_set_mpio_local_no_coll_cause(local_cause[0]); + H5CX_set_mpio_global_no_coll_cause(global_cause[0]); /* Set the return value, based on the global cause */ - ret_value = global_cause > 0 ? FALSE : TRUE; + ret_value = global_cause[0] > 0 ? FALSE : TRUE; + + /* read-proc0-and-bcast if collective and H5S_ALL */ + if(global_cause[0] == 0 && global_cause[1] == 0) + H5FD_MPIO_Proc0_BCast = TRUE; + + /* Set Flag if dataset is both: H5S_ALL and < 2GB in the API context */ + H5CX_set_mpio_Proc0_BCast(H5FD_MPIO_Proc0_BCast); done: FUNC_LEAVE_NOAPI(ret_value) @@ -3069,8 +3102,8 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk chunk_entry->chunk_states.new_chunk.length = chunk_entry->chunk_states.chunk_current.length; /* Currently, these chunk reads are done independently and will likely - * cause issues with collective metadata reads enabled. In the future, - * this should be refactored to use collective chunk reads - JTH */ + * cause issues with collective metadata reads enabled. In the future, + * this should be refactored to use collective chunk reads - JTH */ /* Get the original state of parallel I/O transfer mode */ if(H5CX_get_io_xfer_mode(&xfer_mode) < 0) diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 87f8b6a..a4b9ef3 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -1414,31 +1414,32 @@ static herr_t H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNUSED dxpl_id, haddr_t addr, size_t size, void *buf/*out*/) { - H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - MPI_Offset mpi_off; - MPI_Status mpi_stat; /* Status from I/O operation */ - int mpi_code; /* mpi return code */ - MPI_Datatype buf_type = MPI_BYTE; /* MPI description of the selection in memory */ - int size_i; /* Integer copy of 'size' to read */ + H5FD_mpio_t *file = (H5FD_mpio_t*)_file; + MPI_Offset mpi_off; + MPI_Status mpi_stat; /* Status from I/O operation */ + int mpi_code; /* mpi return code */ + MPI_Datatype buf_type = MPI_BYTE; /* MPI description of the selection in memory */ + int size_i; /* Integer copy of 'size' to read */ #if MPI_VERSION >= 3 - MPI_Count bytes_read; /* Number of bytes read in */ + MPI_Count bytes_read = 0; /* Number of bytes read in */ MPI_Count type_size; /* MPI datatype used for I/O's size */ MPI_Count io_size; /* Actual number of bytes requested */ MPI_Count n; #else - int bytes_read; /* Number of bytes read in */ + int bytes_read = 0; /* Number of bytes read in */ int type_size; /* MPI datatype used for I/O's size */ int io_size; /* Actual number of bytes requested */ - int n; + int n; #endif - hbool_t use_view_this_time = FALSE; - herr_t ret_value = SUCCEED; + hbool_t use_view_this_time = FALSE; + hbool_t Proc0_BCast_enabled = FALSE; /* If read-proc0-and-bcast option was used */ + herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI_NOINIT #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5FD_mpio_read\n" ); + fprintf(stdout, "Entering H5FD_mpio_read\n" ); #endif HDassert(file); HDassert(H5FD_MPIO==file->pub.driver_id); @@ -1457,12 +1458,12 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'r']) fprintf(stdout, "in H5FD_mpio_read mpi_off=%ld size_i=%d\n", - (long)mpi_off, size_i ); + (long)mpi_off, size_i ); #endif /* Only look for MPI views for raw data transfers */ if(type == H5FD_MEM_DRAW) { - H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */ + H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */ /* Get the transfer mode from the API context */ if(H5CX_get_io_xfer_mode(&xfer_mode) < 0) @@ -1475,7 +1476,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, * could mean "use MPI_BYTE" by convention). */ if(xfer_mode==H5FD_MPIO_COLLECTIVE) { - MPI_Datatype file_type; + MPI_Datatype file_type; /* Remember that views are used */ use_view_this_time = TRUE; @@ -1502,8 +1503,8 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, H5FD_mpio_collective_opt_t coll_opt_mode; #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'t']) - fprintf(stdout, "H5FD_mpio_read: using MPIO collective mode\n"); + if (H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "H5FD_mpio_read: using MPIO collective mode\n"); #endif /* Get the collective_opt property to check whether the application wants to do IO individually. */ if(H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0) @@ -1514,8 +1515,23 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, if(H5FD_mpio_Debug[(int)'t']) fprintf(stdout, "H5FD_mpio_read: doing MPI collective IO\n"); #endif - if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) + if(H5CX_get_mpio_Proc0_BCast()) { +#ifdef H5FDmpio_DEBUG + if(H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "H5FD_mpio_read: doing Proc0-read-and-MPI_Bcast\n"); +#endif + if(file->mpi_rank == 0) { + if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) + } + if(MPI_SUCCESS != (mpi_code = MPI_Bcast(buf, size_i, buf_type, 0, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + Proc0_BCast_enabled = TRUE; + } + else { + if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code) + } } /* end if */ else { #ifdef H5FDmpio_DEBUG @@ -1534,24 +1550,31 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) } else { if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) } - /* How many bytes were actually read? */ + if( !Proc0_BCast_enabled || (Proc0_BCast_enabled && file->mpi_rank == 0) ) { + /* How many bytes were actually read? */ #if MPI_VERSION >= 3 - if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read))) + if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read))) #else - if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) + if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) #endif HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) + } + + if(Proc0_BCast_enabled ) { + if(MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_LONG_LONG, 0, file->comm)) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0) + } /* Get the type's size */ #if MPI_VERSION >= 3 - if (MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size))) + if(MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size))) #else - if (MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size))) + if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size))) #endif - HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code) /* Compute the actual number of bytes requested */ io_size=type_size*size_i; @@ -1564,12 +1587,12 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, * This gives us zeroes beyond end of physical MPI file. */ if ((n=(io_size-bytes_read)) > 0) - HDmemset((char*)buf+bytes_read, 0, (size_t)n); + HDmemset((char*)buf+bytes_read, 0, (size_t)n); done: #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) - fprintf(stdout, "Leaving H5FD_mpio_read\n" ); + fprintf(stdout, "Leaving H5FD_mpio_read\n" ); #endif FUNC_LEAVE_NOAPI(ret_value) diff --git a/src/H5Pprivate.h b/src/H5Pprivate.h index 866f088..7007635 100644 --- a/src/H5Pprivate.h +++ b/src/H5Pprivate.h @@ -44,7 +44,6 @@ typedef struct H5P_genplist_t H5P_genplist_t; #define H5_COLL_MD_READ_FLAG_NAME "collective_metadata_read" - /****************************/ /* Library Private Typedefs */ /****************************/ diff --git a/src/H5private.h b/src/H5private.h index b654bae..05bfc63 100644 --- a/src/H5private.h +++ b/src/H5private.h @@ -568,6 +568,9 @@ #define H5_PB (1024.0F * 1024.0F * 1024.0F * 1024.0F * 1024.0F) #define H5_EB (1024.0F * 1024.0F * 1024.0F * 1024.0F * 1024.0F * 1024.0F) +/* Define 2GB -- Used for 2GB limitation checks */ +#define H5_2GB (2.0F * 1024.0F * 1024.0F * 1024.0F) + #ifndef H5_HAVE_FLOCK /* flock() operations. Used in the source so we have to define them when * the call is not available (e.g.: Windows). These should NOT be used diff --git a/testpar/t_dset.c b/testpar/t_dset.c index 281d027..35501d8 100644 --- a/testpar/t_dset.c +++ b/testpar/t_dset.c @@ -2649,7 +2649,7 @@ compress_readAll(void) /* Try reading the data */ ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read); - VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + VRFY((ret >= 0), "H5Dread succeeded"); /* Verify data read */ for(u=0; u<dim; u++) diff --git a/testpar/t_mdset.c b/testpar/t_mdset.c index 5d989bb..119ce4f 100644 --- a/testpar/t_mdset.c +++ b/testpar/t_mdset.c @@ -603,8 +603,8 @@ void dataset_fillvalue(void) hsize_t req_count[4] = {1, 6, 7, 8}; hsize_t dset_size; /* Dataset size */ int *rdata, *wdata; /* Buffers for data to read and write */ - int *twdata, *trdata; /* Temporary pointer into buffer */ - int acc, i, j, k, l; /* Local index variables */ + int *twdata, *trdata; /* Temporary pointer into buffer */ + int acc, i, j, k, l, ii; /* Local index variables */ herr_t ret; /* Generic return value */ const char *filename; @@ -645,27 +645,40 @@ void dataset_fillvalue(void) /* * Read dataset before any data is written. */ - /* set entire read buffer with the constant 2 */ - HDmemset(rdata,2,(size_t)(dset_size*sizeof(int))); - /* Independently read the entire dataset back */ - ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata); - VRFY((ret >= 0), "H5Dread succeeded"); - /* Verify all data read are the fill value 0 */ - trdata = rdata; - err_num = 0; - for(i = 0; i < (int)dset_dims[0]; i++) + /* Create DXPL for I/O */ + dxpl = H5Pcreate(H5P_DATASET_XFER); + VRFY((dxpl >= 0), "H5Pcreate succeeded"); + + for(ii = 0; ii < 2; ii++) { + if(ii == 0) + ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT); + else + ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + + /* set entire read buffer with the constant 2 */ + HDmemset(rdata,2,(size_t)(dset_size*sizeof(int))); + /* Read the entire dataset back */ + ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl, rdata); + VRFY((ret >= 0), "H5Dread succeeded"); + + /* Verify all data read are the fill value 0 */ + trdata = rdata; + err_num = 0; + for(i = 0; i < (int)dset_dims[0]; i++) for(j = 0; j < (int)dset_dims[1]; j++) - for(k = 0; k < (int)dset_dims[2]; k++) - for(l = 0; l < (int)dset_dims[3]; l++, twdata++, trdata++) - if(*trdata != 0) - if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED) - printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i, j, k, l, *trdata); - if(err_num > MAX_ERR_REPORT && !VERBOSE_MED) + for(k = 0; k < (int)dset_dims[2]; k++) + for(l = 0; l < (int)dset_dims[3]; l++, twdata++, trdata++) + if(*trdata != 0) + if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED) + printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i, j, k, l, *trdata); + if(err_num > MAX_ERR_REPORT && !VERBOSE_MED) printf("[more errors ...]\n"); - if(err_num){ + if(err_num){ printf("%d errors found in check_value\n", err_num); - nerrors++; + nerrors++; + } } /* Barrier to ensure all processes have completed the above test. */ @@ -681,10 +694,6 @@ void dataset_fillvalue(void) ret = H5Sselect_hyperslab(memspace, H5S_SELECT_SET, req_start, NULL, req_count, NULL); VRFY((ret >= 0), "H5Sselect_hyperslab succeeded on memory dataspace"); - /* Create DXPL for collective I/O */ - dxpl = H5Pcreate(H5P_DATASET_XFER); - VRFY((dxpl >= 0), "H5Pcreate succeeded"); - ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { @@ -711,37 +720,46 @@ void dataset_fillvalue(void) /* * Read dataset after partial write. */ - /* set entire read buffer with the constant 2 */ - HDmemset(rdata,2,(size_t)(dset_size*sizeof(int))); - /* Independently read the entire dataset back */ - ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata); - VRFY((ret >= 0), "H5Dread succeeded"); - /* Verify correct data read */ - twdata=wdata; - trdata=rdata; - err_num=0; - for(i=0; i<(int)dset_dims[0]; i++) + for(ii = 0; ii < 2; ii++) { + if(ii == 0) + ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT); + else + ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + + /* set entire read buffer with the constant 2 */ + HDmemset(rdata,2,(size_t)(dset_size*sizeof(int))); + /* Read the entire dataset back */ + ret = H5Dread(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl, rdata); + VRFY((ret >= 0), "H5Dread succeeded"); + + /* Verify correct data read */ + twdata=wdata; + trdata=rdata; + err_num=0; + for(i=0; i<(int)dset_dims[0]; i++) for(j=0; j<(int)dset_dims[1]; j++) - for(k=0; k<(int)dset_dims[2]; k++) - for(l=0; l<(int)dset_dims[3]; l++, twdata++, trdata++) - if(i<mpi_size) { - if(*twdata != *trdata ) - if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED) - printf("Dataset Verify failed at [%d][%d][%d][%d]: expect %d, got %d\n", i,j,k,l, *twdata, *trdata); - } /* end if */ - else { - if(*trdata != 0) - if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED) - printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i,j,k,l, *trdata); - } /* end else */ - if(err_num > MAX_ERR_REPORT && !VERBOSE_MED) + for(k=0; k<(int)dset_dims[2]; k++) + for(l=0; l<(int)dset_dims[3]; l++, twdata++, trdata++) + if(i<mpi_size) { + if(*twdata != *trdata ) + if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED) + printf("Dataset Verify failed at [%d][%d][%d][%d]: expect %d, got %d\n", i,j,k,l, *twdata, *trdata); + } /* end if */ + else { + if(*trdata != 0) + if(err_num++ < MAX_ERR_REPORT || VERBOSE_MED) + printf("Dataset Verify failed at [%d][%d][%d][%d]: expect 0, got %d\n", i,j,k,l, *trdata); + } /* end else */ + if(err_num > MAX_ERR_REPORT && !VERBOSE_MED) printf("[more errors ...]\n"); - if(err_num){ + if(err_num){ printf("%d errors found in check_value\n", err_num); - nerrors++; + nerrors++; + } } - + /* Close all file objects */ ret = H5Dclose(dataset); VRFY((ret >= 0), "H5Dclose succeeded"); @@ -856,7 +874,7 @@ void collective_group_write(void) if(!((m+1) % 10)) { printf("created %d groups\n", m+1); MPI_Barrier(MPI_COMM_WORLD); - } + } #endif /* BARRIER_CHECKS */ } diff --git a/testpar/t_pread.c b/testpar/t_pread.c index 0905d44..a6a6a7d 100644 --- a/testpar/t_pread.c +++ b/testpar/t_pread.c @@ -46,7 +46,7 @@ completely foolproof is to underestimate the ingenuity of complete\n\ fools.\n"; static int generate_test_file(MPI_Comm comm, int mpi_rank, int group); -static int test_parallel_read(MPI_Comm comm, int mpi_rank, int group); +static int test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group); static char *test_argv0 = NULL; @@ -413,7 +413,7 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id ) * Function: test_parallel_read * * Purpose: This actually tests the superblock optimization - * and covers the two primary cases we're interested in. + * and covers the three primary cases we're interested in. * 1). That HDF5 files can be opened in parallel by * the rank 0 process and that the superblock * offset is correctly broadcast to the other @@ -423,6 +423,10 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id ) * subgroups of MPI_COMM_WORLD and that each * subgroup operates as described in (1) to * collectively read the data. + * 3). Testing proc0-read-and-MPI_Bcast using + * sub-communicators, and reading into + * a memory space that is different from the + * file space. * * The global MPI rank is used for reading and * writing data for process specific data in the @@ -444,7 +448,7 @@ generate_test_file( MPI_Comm comm, int mpi_rank, int group_id ) *------------------------------------------------------------------------- */ static int -test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id) +test_parallel_read(MPI_Comm comm, int mpi_rank, int mpi_size, int group_id) { const char *failure_mssg; const char *fcn_name = "test_parallel_read()"; @@ -457,8 +461,12 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id) hid_t fapl_id = -1; hid_t file_id = -1; hid_t dset_id = -1; + hid_t dxpl_id = H5P_DEFAULT; hid_t memspace = -1; hid_t filespace = -1; + hid_t filetype = -1; + size_t filetype_size; + hssize_t dset_size; hsize_t i; hsize_t offset; hsize_t count = COUNT; @@ -606,14 +614,6 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id) } } - /* close file, etc. */ - if ( pass || (dset_id != -1) ) { - if ( H5Dclose(dset_id) < 0 ) { - pass = false; - failure_mssg = "H5Dclose(dset_id) failed.\n"; - } - } - if ( pass || (memspace != -1) ) { if ( H5Sclose(memspace) < 0 ) { pass = false; @@ -628,6 +628,202 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id) } } + /* free data_slice if it has been allocated */ + if ( data_slice != NULL ) { + HDfree(data_slice); + data_slice = NULL; + } + + /* + * Test reading proc0-read-and-bcast with sub-communicators + */ + + /* Don't test with more than 6 processes to avoid memory issues */ + + if( group_size <= 6 ) { + + if ( (filespace = H5Dget_space(dset_id )) < 0 ) { + pass = FALSE; + failure_mssg = "H5Dget_space failed.\n"; + } + + if ( (dset_size = H5Sget_simple_extent_npoints(filespace)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Sget_simple_extent_npoints failed.\n"; + } + + if ( (filetype = H5Dget_type(dset_id)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Dget_type failed.\n"; + } + + if ( (filetype_size = H5Tget_size(filetype)) == 0 ) { + pass = FALSE; + failure_mssg = "H5Tget_size failed.\n"; + } + + if ( H5Tclose(filetype) < 0 ) { + pass = FALSE; + failure_mssg = "H5Tclose failed.\n"; + }; + + if ( (data_slice = (float *)HDmalloc((size_t)dset_size*filetype_size)) == NULL ) { + pass = FALSE; + failure_mssg = "malloc of data_slice failed.\n"; + } + + if ( pass ) { + if ( (dxpl_id = H5Pcreate(H5P_DATASET_XFER)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Pcreate(H5P_DATASET_XFER) failed.\n"; + } + } + + if ( pass ) { + if ( (H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Pset_dxpl_mpio() failed.\n"; + } + } + + /* read H5S_ALL section */ + if ( pass ) { + if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, H5S_ALL, + H5S_ALL, dxpl_id, data_slice)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Dread() failed\n"; + } + } + + /* verify the data */ + if ( pass ) { + + if ( comm == MPI_COMM_WORLD ) /* test 1 */ + nextValue = 0; + else if ( group_id == 0 ) /* test 2 group 0 */ + nextValue = 0; + else /* test 2 group 1 */ + nextValue = (float)((hsize_t)( mpi_size / 2 )*count); + + i = 0; + while ( ( pass ) && ( i < (hsize_t)dset_size ) ) { + /* what we really want is data_slice[i] != nextValue -- + * the following is a circumlocution to shut up the + * the compiler. + */ + if ( ( data_slice[i] > nextValue ) || + ( data_slice[i] < nextValue ) ) { + pass = FALSE; + failure_mssg = "Unexpected dset contents.\n"; + } + nextValue += 1; + i++; + } + } + + if ( pass || (filespace != -1) ) { + if ( H5Sclose(filespace) < 0 ) { + pass = false; + failure_mssg = "H5Sclose(filespace) failed.\n"; + } + } + + /* free data_slice if it has been allocated */ + if ( data_slice != NULL ) { + HDfree(data_slice); + data_slice = NULL; + } + + /* + * Read an H5S_ALL filespace into a hyperslab defined memory space + */ + + if ( (data_slice = (float *)HDmalloc((size_t)(dset_size*2)*filetype_size)) == NULL ) { + pass = FALSE; + failure_mssg = "malloc of data_slice failed.\n"; + } + + /* setup memspace */ + if ( pass ) { + dims[0] = (hsize_t)dset_size*2; + if ( (memspace = H5Screate_simple(1, dims, NULL)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Screate_simple(1, dims, NULL) failed\n"; + } + } + if ( pass ) { + offset = (hsize_t)dset_size; + if ( (H5Sselect_hyperslab(memspace, H5S_SELECT_SET, + &offset, NULL, &offset, NULL)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Sselect_hyperslab() failed\n"; + } + } + + /* read this processes section of the data */ + if ( pass ) { + if ( (H5Dread(dset_id, H5T_NATIVE_FLOAT, memspace, + H5S_ALL, dxpl_id, data_slice)) < 0 ) { + pass = FALSE; + failure_mssg = "H5Dread() failed\n"; + } + } + + /* verify the data */ + if ( pass ) { + + if ( comm == MPI_COMM_WORLD ) /* test 1 */ + nextValue = 0; + else if ( group_id == 0 ) /* test 2 group 0 */ + nextValue = 0; + else /* test 2 group 1 */ + nextValue = (float)((hsize_t)(mpi_size / 2)*count); + + i = (hsize_t)dset_size; + while ( ( pass ) && ( i < (hsize_t)dset_size ) ) { + /* what we really want is data_slice[i] != nextValue -- + * the following is a circumlocution to shut up the + * the compiler. + */ + if ( ( data_slice[i] > nextValue ) || + ( data_slice[i] < nextValue ) ) { + pass = FALSE; + failure_mssg = "Unexpected dset contents.\n"; + } + nextValue += 1; + i++; + } + } + + if ( pass || (memspace != -1) ) { + if ( H5Sclose(memspace) < 0 ) { + pass = false; + failure_mssg = "H5Sclose(memspace) failed.\n"; + } + } + + /* free data_slice if it has been allocated */ + if ( data_slice != NULL ) { + HDfree(data_slice); + data_slice = NULL; + } + + if ( pass || (dxpl_id != -1) ) { + if ( H5Pclose(dxpl_id) < 0 ) { + pass = false; + failure_mssg = "H5Pclose(dxpl_id) failed.\n"; + } + } + } + + /* close file, etc. */ + if ( pass || (dset_id != -1) ) { + if ( H5Dclose(dset_id) < 0 ) { + pass = false; + failure_mssg = "H5Dclose(dset_id) failed.\n"; + } + } + if ( pass || (file_id != -1) ) { if ( H5Fclose(file_id) < 0 ) { pass = false; @@ -668,17 +864,9 @@ test_parallel_read(MPI_Comm comm, int mpi_rank, int group_id) HDfprintf(stdout, "%s: failure_mssg = \"%s\"\n", fcn_name, failure_mssg); } - HDremove(reloc_data_filename); } - /* free data_slice if it has been allocated */ - if ( data_slice != NULL ) { - HDfree(data_slice); - data_slice = NULL; - } - - return( ! pass ); } /* test_parallel_read() */ @@ -803,7 +991,7 @@ main( int argc, char **argv) } /* Now read the generated test file (stil using MPI_COMM_WORLD) */ - nerrs += test_parallel_read( MPI_COMM_WORLD, mpi_rank, which_group); + nerrs += test_parallel_read( MPI_COMM_WORLD, mpi_rank, mpi_size, which_group); if ( nerrs > 0 ) { if ( mpi_rank == 0 ) { @@ -819,7 +1007,7 @@ main( int argc, char **argv) } /* run the 2nd set of tests */ - nerrs += test_parallel_read(group_comm, mpi_rank, which_group); + nerrs += test_parallel_read(group_comm, mpi_rank, mpi_size, which_group); if ( nerrs > 0 ) { if ( mpi_rank == 0 ) { |