summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5Dio.c530
-rw-r--r--src/H5Dmpio.c399
-rw-r--r--src/H5Dpkg.h19
-rw-r--r--src/H5Smpio.c286
4 files changed, 652 insertions, 582 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c
index f1d12f8..fa3f49e 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -108,13 +108,24 @@ static htri_t
H5D_get_collective_io_consensus(const H5F_t *file,
const htri_t local_opinion,
const unsigned flags);
+
+static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset,
+ const H5S_t *mem_space,
+ const H5S_t *file_space,
+ int *min_chunkf);
+static herr_t
+H5D_obtain_duplicate_pid(hid_t dxpl_id,
+ hid_t* dp_id,
+ H5D_dxpl_cache_t **cache);
+
#endif /* H5_HAVE_PARALLEL */
/* I/O info operations */
static herr_t
H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
- const H5S_t *mem_space, const H5S_t *file_space,
- unsigned flags, hbool_t *use_par_opt_io, H5D_io_info_t *io_info);
+ hid_t dp_dxpl_id, H5D_dxpl_cache_t *dp_dxpl_cache,
+ const H5S_t *mem_space, const H5S_t *file_space,
+ unsigned flags, hbool_t *use_par_opt_io, H5D_io_info_t *io_info);
/* Chunk operations */
static herr_t H5D_create_chunk_map(const H5D_t *dataset, const H5T_t *mem_type,
@@ -648,8 +659,13 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */
+ H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */
+ H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */
+ hid_t dp_id;
#ifdef H5_HAVE_PARALLEL
- hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
+ hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
+ H5FD_mpio_xfer_t xfer_mode;
+
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
int prop_value,new_value;
htri_t check_prop;
@@ -660,6 +676,8 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
unsigned sconv_flags=0; /* Flags for the space conversion */
herr_t ret_value = SUCCEED; /* Return value */
+
+
FUNC_ENTER_NOAPI_NOINIT(H5D_read)
/* check args */
@@ -682,6 +700,7 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache")
#ifdef H5_HAVE_PARALLEL
+
/* Collective access is not permissible without a MPI based VFD */
if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based drivers only")
@@ -765,8 +784,15 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
assert(0 && "Unhandled layout type!");
} /* end switch */
+#ifdef H5_HAVE_PARALLEL
+ /* Obtain duplicate property list id. This is used to handle
+ collective chunk IO. */
+
+ if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't obtain duplicated property id")
+#endif
/* Set up I/O operation */
- if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0)
+ if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
#ifdef H5_HAVE_PARALLEL
@@ -877,8 +903,13 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */
+ hid_t dp_id;
+ H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */
+ H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */
#ifdef H5_HAVE_PARALLEL
- hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
+ hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
+ H5FD_mpio_xfer_t xfer_mode;
+ int mpi_rank;
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
int prop_value,new_value;
htri_t check_prop;
@@ -1013,9 +1044,12 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
default:
assert(0 && "Unhandled layout type!");
} /* end switch */
-
+#ifdef H5_HAVE_PARALLEL
+ if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't obtain duplicated property id")
+#endif
/* Set up I/O operation */
- if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0)
+ if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
#ifdef H5_HAVE_PARALLEL
@@ -1654,6 +1688,12 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
H5D_storage_t store; /*union of EFL and chunk pointer in file space */
herr_t ret_value = SUCCEED; /*return value */
+#ifdef H5_HAVE_PARALLEL
+ int count_chunk,mpi_rank, mpi_code,min_num_chunk,is_regular,all_regular;
+ hid_t temp_id;
+ MPI_Comm comm;
+#endif
+
FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_read)
/* Map elements between file and memory for each chunk*/
@@ -1679,7 +1719,13 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
/* Get first node in chunk skip list */
chunk_node=H5SL_first(fm.fsel);
-
+#ifdef H5_HAVE_PARALLEL
+ if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
+ if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk")
+ }
+ count_chunk = 0;
+#endif
/* Iterate through chunks to be operated on */
while(chunk_node) {
H5D_chunk_info_t *chunk_info; /* chunk information */
@@ -1691,15 +1737,148 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
store.chunk.offset = chunk_info->coords;
store.chunk.index = chunk_info->index;
- /* Perform the actual read operation */
- status = (io_info->ops.read)(io_info,
+#ifdef H5_HAVE_PARALLEL
+
+ count_chunk++;
+ if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
+ /* If the number of chunk is greater than minimum number of chunk,
+ Do independent read */
+
+ if(count_chunk <= min_num_chunk) {
+#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
+ H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
+ is_regular = 1;
+ else is_regular = 0;
+
+ /* Getting MPI communicator and rank */
+ if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
+ if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
+
+ if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+#endif
+ }
+
+ if(count_chunk > min_num_chunk) {
+ temp_id = io_info->dxpl_id;
+ io_info->dxpl_id = io_info->dp_dxpl_id;
+ status = (io_info->ops_sca.read)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ io_info->dxpl_id = temp_id;
+ }
+
+
+ else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE)||
+ (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)){
+
+#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ /* Perform the independent read operation */
+ temp_id = io_info->dxpl_id;
+ io_info->dxpl_id = io_info->dp_dxpl_id;
+ status = (io_info->ops_sca.read)(io_info,
chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
chunk_info->fspace, chunk_info->mspace,
buf);
-
- /* Check return value from optimized read */
- if (status<0)
- HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ io_info->dxpl_id = temp_id;
+#else
+
+ /* Perform the actual collective read operation */
+ status = (io_info->ops.read)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+#endif
+ }
+
+ else {
+ /* For regular selection,
+ if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
+ unless spaces for all processors are regular, independent read operation should be performed.*/
+
+#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ if(!all_regular) {
+
+ /* Perform the independent read operation */
+ temp_id = io_info->dxpl_id;
+ io_info->dxpl_id = io_info->dp_dxpl_id;
+ status = (io_info->ops_sca.read)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ io_info->dxpl_id = temp_id;
+ }
+
+ else {
+ /* For regular collective read in parallel*/
+ /* Perform the read operation */
+ status = (io_info->ops.read)(io_info,
+ chunk_info->chunk_points,
+ H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ }
+#else
+
+ /* For regular collective read in parallel*/
+ /* Perform the read operation */
+ status = (io_info->ops.read)(io_info,
+ chunk_info->chunk_points,
+ H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+#endif
+ }
+
+ }
+ else {
+ /* For regular independent read in parallel*/
+ /* Perform the read operation */
+ status = (io_info->ops.read)(io_info,
+ chunk_info->chunk_points,
+ H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ }
+
+#else
+ /* Perform the actual read operation for sequential*/
+ status = (io_info->ops.read)(io_info,
+ chunk_info->chunk_points,
+ H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+#endif
/* Get the next chunk node in the skip list */
chunk_node=H5SL_next(chunk_node);
@@ -1937,6 +2116,8 @@ done:
* Hacked on it a lot. :-)
* Leon Arber: 4/20/04
* Added support for data transforms.
+ * Kent Yang: 8/10/04
+ * Added support for collective chunk IO.
*
*-------------------------------------------------------------------------
*/
@@ -1973,6 +2154,13 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
H5D_storage_t store; /*union of EFL and chunk pointer in file space */
herr_t ret_value = SUCCEED; /*return value */
+#ifdef H5_HAVE_PARALLEL
+ hid_t temp_id;
+ int count_chunk,mpi_rank,mpi_code,min_num_chunk,is_regular,all_regular = 0;
+ MPI_Comm comm;
+
+#endif
+
FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_write)
/* Map elements between file and memory for each chunk*/
@@ -1990,6 +2178,14 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
#ifdef H5S_DEBUG
H5_timer_begin(&timer);
#endif
+
+#ifdef H5_HAVE_PARALLEL
+ if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
+ if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk")
+ }
+ count_chunk = 0;
+#endif
/* Get first node in chunk skip list */
chunk_node=H5SL_first(fm.fsel);
@@ -2004,15 +2200,136 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
store.chunk.offset = chunk_info->coords;
store.chunk.index = chunk_info->index;
- /* Perform the actual write operation */
- status = (io_info->ops.write)(io_info,
+#ifdef H5_HAVE_PARALLEL
+
+ count_chunk++;
+ if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
+ /* If the number of chunk is greater than minimum number of chunk,
+ Do independent write */
+
+ if(count_chunk <= min_num_chunk) {
+#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
+ H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
+ is_regular = 1;
+ else is_regular = 0;
+ /* Getting MPI communicator and rank */
+ if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
+ if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
+ if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+#endif
+ }
+ if(count_chunk > min_num_chunk) {
+ temp_id = io_info->dxpl_id;
+ io_info->dxpl_id = io_info->dp_dxpl_id;
+ fflush(stdout);
+ status = (io_info->ops_sca.write)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized write */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+ io_info->dxpl_id = temp_id;
+ }
+
+
+ else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE)||
+ (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)){
+
+#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ /* Perform the independent write operation */
+
+ temp_id = io_info->dxpl_id;
+ io_info->dxpl_id = io_info->dp_dxpl_id;
+ status = (io_info->ops_sca.write)(io_info,
chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
chunk_info->fspace, chunk_info->mspace,
buf);
-
- /* Check return value from optimized write */
- if (status<0)
- HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+
+ /* Check return value from optimized write */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+ io_info->dxpl_id = temp_id;
+#else
+
+ /* Perform the actual collective write operation */
+ status = (io_info->ops.write)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized write */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+#endif
+ }
+
+ else {
+#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ if(!all_regular) {
+
+ /* Perform the independent write operation */
+ temp_id = io_info->dxpl_id;
+ io_info->dxpl_id = io_info->dp_dxpl_id;
+ status = (io_info->ops_sca.write)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized read */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ io_info->dxpl_id = temp_id;
+ }
+ else {
+ /* For regular selection, perform the collective write operation */
+ status = (io_info->ops.write)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized write */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+ }
+#else
+
+ status = (io_info->ops.write)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized write */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+
+#endif
+ }
+ }
+ else {
+ /* For independent parallel write*/
+ /* Perform the write operation */
+ status = (io_info->ops.write)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized write */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+ }
+
+#else
+ /* Perform the actual write operation for sequential*/
+ status = (io_info->ops.write)(io_info,
+ chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
+ chunk_info->fspace, chunk_info->mspace,
+ buf);
+ /* Check return value from optimized write */
+ if (status<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+#endif
/* Get the next chunk node in the skip list */
chunk_node=H5SL_next(chunk_node);
@@ -3270,7 +3587,16 @@ done:
*/
static herr_t
H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
- const H5S_t
+ hid_t
+#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG)
+ UNUSED
+#endif /* H5_HAVE_PARALLEL */
+ dp_dxpl_id, H5D_dxpl_cache_t
+
+#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG)
+ UNUSED
+#endif /* H5_HAVE_PARALLEL */
+*dp_dxpl_cache,const H5S_t
#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG)
UNUSED
#endif /* H5_HAVE_PARALLEL */
@@ -3320,6 +3646,9 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
/*
* Check if we can set direct MPI-IO read/write functions
*/
+ io_info->dp_dxpl_id = dp_dxpl_id;
+ io_info->dp_dxpl_cache = dp_dxpl_cache;
+
opt=H5D_mpio_opt_possible(dset,mem_space,file_space,flags);
if(opt==FAIL)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace ");
@@ -3333,28 +3662,20 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
/* Set the pointers to the MPI-specific routines */
- if((H5S_SELECT_IS_REGULAR(file_space) == TRUE) &&
- (H5S_SELECT_IS_REGULAR(mem_space) == TRUE)){
- io_info->ops.read = H5D_mpio_spaces_read;
- io_info->ops.write = H5D_mpio_spaces_write;
- }
-
- #ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- else {
- io_info->ops.read = H5D_mpio_spaces_span_read;
- io_info->ops.write = H5D_mpio_spaces_span_write;
- }
- #endif
- /* Indicate that the I/O will be parallel */
- *use_par_opt_io=TRUE;
- } /* end if */
+ io_info->ops.read = H5D_mpio_select_read;
+ io_info->ops.write = H5D_mpio_select_write;
+ io_info->ops_sca.read = H5D_select_read;
+ io_info->ops_sca.write = H5D_select_write;
+ *use_par_opt_io=TRUE;
+ /* Indicate that the I/O will use collective */
+ }
+ /* end if */
else {
- /* Indicate that the I/O will _NOT_ be parallel */
+ /* Indicate that the I/O will _NOT_ be parallel, use independent IO */
*use_par_opt_io=FALSE;
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
-
} /* end else */
#else
io_info->ops.read = H5D_select_read;
@@ -3372,3 +3693,136 @@ done:
#endif /* H5_HAVE_PARALLEL || H5S_DEBUG */
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D_ioinfo_init() */
+
+
+#ifdef H5_HAVE_PARALLEL
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5D_mpio_get_mini_chunk
+ *
+ * Purpose: Routine for obtaining minimum number of chunks to cover
+ hyperslab selection selected by all processors.
+ *
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer:
+ *
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset,
+ const H5S_t *mem_space,
+ const H5S_t *file_space,
+ int *min_chunkf) {
+
+
+ hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */
+ hsize_t startf[H5S_MAX_RANK], /* Selection start bounds */
+ endf[H5S_MAX_RANK]; /* Selection end bounds */
+ unsigned dim_rankf; /* Number of dimensions of file dataspace */
+ int pcheck_hyper,check_hyper, /* Flags for checking if selection is in one chunk */
+ tnum_chunkf, /* Number of chunks selection overlaps */
+ max_chunkf, /* Maximum number of chunks selection overlaps */
+ num_chunks_same; /* Flag indicating whether all processes have the same # of chunks to operate on */
+ unsigned dim_chunks; /* Temporary number of chunks in a dimension */
+ MPI_Comm comm; /* MPI communicator for file */
+ int mpi_rank; /* Rank in MPI communicator */
+ int mpi_code; /* MPI return code */
+ unsigned u; /* Local index variable */
+ herr_t ret_value;
+
+ ret_value = SUCCEED;
+ FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_get_mini_chunk);
+ /* Getting MPI communicator and rank */
+ if((comm = H5F_mpi_get_comm(dset->ent.file))==MPI_COMM_NULL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
+ if((mpi_rank = H5F_mpi_get_rank(dset->ent.file))<0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
+
+
+ dim_rankf = H5S_GET_EXTENT_NDIMS(file_space);
+
+ if(H5S_SELECT_BOUNDS(file_space,startf,endf)==FAIL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE,FAIL, "invalid check for single selection blocks");
+
+ for(u=0; u < dset->shared->layout.u.chunk.ndims; u++)
+ chunk_dim[u] = dset->shared->layout.u.chunk.dim[u];
+
+
+ /* Compute the number of chunks covered by the selection on this process */
+ tnum_chunkf = 1;
+ for (u=0; u<dim_rankf; u++) {
+ dim_chunks = (endf[u]/chunk_dim[u]-startf[u]/chunk_dim[u])+1;
+ tnum_chunkf = dim_chunks*tnum_chunkf;
+ }
+
+ /* Determine the minimum and maximum # of chunks for all processes */
+
+ if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+
+
+ /* Broadcast the flag indicating the number of chunks are the same */
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+
+ done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5D_obtain_duplicate_pid
+ *
+ * Purpose: Routine for obtaining a copy property list ID of
+ data transfer property.
+
+ *
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer:
+ *
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static herr_t H5D_obtain_duplicate_pid(hid_t dxpl_id,
+ hid_t* dp_id,
+ H5D_dxpl_cache_t **cache)
+{
+
+ H5FD_mpio_xfer_t xfer_mode;
+ H5P_genplist_t *dp_dx_plist; /* Data transer property list */
+ herr_t ret_value=SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5D_obtain_duplicate_pid)
+
+ *dp_id = H5Pcopy(dxpl_id);
+
+ /* printf("inside function dp id %d\n",*dp_id);*/
+ /* Get the dataset transfer property list */
+ if (NULL == (dp_dx_plist = H5I_object(*dp_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
+
+ xfer_mode = H5FD_MPIO_INDEPENDENT;
+ if(H5P_set (dp_dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
+
+ /* Fill the DXPL cache values for later use */
+ if (H5D_get_dxpl_cache(*dp_id,cache)<0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache")
+
+ done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+}
+#endif /*H5_HAVE_PARALLEL*/
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index e216d85..f85c551 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -43,12 +43,6 @@ H5D_mpio_spaces_xfer(H5D_io_info_t *io_info, size_t elmt_size,
void *buf/*out*/,
hbool_t do_write);
-/* For irregular hyperslab selection. */
-static herr_t
-H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info, size_t elmt_size,
- const H5S_t *file_space, const H5S_t *mem_space,
- void *buf/*out*/,
- hbool_t do_write);
/*-------------------------------------------------------------------------
* Function: H5D_mpio_opt_possible
@@ -66,10 +60,10 @@ H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info, size_t elmt_size,
*
*-------------------------------------------------------------------------
*/
+
htri_t
H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *file_space, const unsigned flags)
{
- htri_t c1,c2; /* Flags whether a selection is optimizable */
htri_t ret_value=TRUE;
FUNC_ENTER_NOAPI(H5D_mpio_opt_possible, FAIL);
@@ -88,16 +82,7 @@ H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *f
&& (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space))))
HGOTO_DONE(FALSE);
- /* Check whether both selections are "regular" */
- /*#ifndef KYANG*/
-#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- c1=H5S_SELECT_IS_REGULAR(file_space);
- c2=H5S_SELECT_IS_REGULAR(mem_space);
- if(c1==FAIL || c2==FAIL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for single selection blocks");
- if(c1==FALSE || c2==FALSE)
- HGOTO_DONE(FALSE);
-#endif
+
/* Can't currently handle point selections */
if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space))
HGOTO_DONE(FALSE);
@@ -107,126 +92,35 @@ H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *f
(flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CHUNKED)
HGOTO_DONE(FALSE);
- if ((flags&H5S_CONV_STORAGE_MASK)==H5S_CONV_STORAGE_CHUNKED) {
- hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */
- hsize_t startf[H5S_MAX_RANK], /* Selection start bounds */
- endf[H5S_MAX_RANK]; /* Selection end bounds */
- unsigned dim_rankf; /* Number of dimensions of file dataspace */
- int pcheck_hyper,check_hyper, /* Flags for checking if selection is in one chunk */
- tnum_chunkf, /* Number of chunks selection overlaps */
- max_chunkf, /* Maximum number of chunks selection overlaps */
- min_chunkf, /* Minimum number of chunks selection overlaps */
- num_chunks_same; /* Flag indicating whether all processes have the same # of chunks to operate on */
- unsigned dim_chunks; /* Temporary number of chunks in a dimension */
- MPI_Comm comm; /* MPI communicator for file */
- int mpi_rank; /* Rank in MPI communicator */
- int mpi_code; /* MPI return code */
- unsigned u; /* Local index variable */
-
- /* Disallow collective I/O if there are any I/O filters on chunks */
- if(dset->shared->dcpl_cache.pline.nused>0)
- HGOTO_DONE(FALSE)
-
- /* Getting MPI communicator and rank */
- if((comm = H5F_mpi_get_comm(dset->ent.file))==MPI_COMM_NULL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
- if((mpi_rank = H5F_mpi_get_rank(dset->ent.file))<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
-
- /* Currently collective chunking storage
- inside HDF5 is supported for either one of the following two cases:
- 1. All the hyperslabs for one process is inside one chunk.
- 2. For single hyperslab selection, the number of chunks that covered
- the single selection for all processes should be equal.
- KY, 2004/7/14
- */
-
- /* Quincey, please read.
- This is maybe redundant, I think only when both memory and file space be SCALAR
- space, the collective IO can work. Otherwise, SELECT_POINT will be reached,collective
- IO shouldn't work.
- Please clarify and correct the code on the following,
- Quincey said that it was probably okay if only one data space is SCALAR,
- Still keep the code here until we added more tests later.
- Kent */
- if(H5S_SCALAR==H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR ==H5S_GET_EXTENT_TYPE(file_space)) {
- if(!(H5S_SCALAR==H5S_GET_EXTENT_TYPE(mem_space) && H5S_SCALAR ==H5S_GET_EXTENT_TYPE(file_space)))
- HGOTO_DONE(FALSE)
- else
- HGOTO_DONE(TRUE)
- } /* end if */
-
- dim_rankf = H5S_GET_EXTENT_NDIMS(file_space);
-
- if(H5S_SELECT_BOUNDS(file_space,startf,endf)==FAIL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE,FAIL, "invalid check for single selection blocks");
-
- for(u=0; u < dset->shared->layout.u.chunk.ndims; u++)
- chunk_dim[u] = dset->shared->layout.u.chunk.dim[u];
-
- /* Case 1: check whether all hyperslab in this process is inside one chunk.
- Note: we don't handle when starting point is less than zero since that may cover
- two chunks. */
-
- /*for file space checking*/
- pcheck_hyper = 1;
- for (u=0; u<dim_rankf; u++)
- if(endf[u]/chunk_dim[u]!=startf[u]/chunk_dim[u]) {
- pcheck_hyper = 0;
- break;
- }
-
- if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&pcheck_hyper,&check_hyper,1,MPI_INT,MPI_LAND,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
- if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&check_hyper,1,MPI_INT,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
-
- /*if check_hyper is true, condition for collective IO case is fulfilled, no
- need to do further test. */
- if(check_hyper)
- HGOTO_DONE(TRUE);
+ /*The handling of memory space is different for chunking
+ and contiguous storage,
+ For contigous storage, mem_space and file_space won't
+ change when it it is doing disk IO.
+ For chunking storage, mem_space will change for different
+ chunks. So for chunking storage, whether we can use
+ collective IO will defer until the each chunk IO is reached.
+ For contiguous storage, if we find the MPI-IO cannot
+ support complicated MPI derived data type, we will
+ set use_par_opt_io = FALSE.
+ */
+ if(dset->shared->layout.type == H5D_CONTIGUOUS) {
+
+#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) ||
+ (H5S_SELECT_IS_REGULAR(mem_space) != TRUE))
+ HGOTO_DONE(FALSE);
+#endif
+ }
- /* Case 2:Check whether the number of chunks that covered the single hyperslab is the same.
- If not,no collective chunk IO.
- KY, 2004/7/14
- */
-
- c1 = H5S_SELECT_IS_SINGLE(file_space);
- c2 = H5S_SELECT_IS_SINGLE(mem_space);
-
- if(c1==FAIL || c2 ==FAIL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for single selection blocks");
- if(c1==FALSE || c2 ==FALSE)
- HGOTO_DONE(FALSE);
-
- /* Compute the number of chunks covered by the selection on this process */
- tnum_chunkf = 1;
- for (u=0; u<dim_rankf; u++) {
- dim_chunks = (endf[u]/chunk_dim[u]-startf[u]/chunk_dim[u])+1;
- tnum_chunkf = dim_chunks*tnum_chunkf;
- }
-
- /* Determine the minimum and maximum # of chunks for all processes */
- if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,&max_chunkf,1,MPI_INT,MPI_MAX,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
- if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,&min_chunkf,1,MPI_INT,MPI_MIN,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
-
- /* Let the rank==0 process determine if the same number of chunks will be operated on by all processes */
- if(mpi_rank == 0)
- num_chunks_same = (max_chunkf==min_chunkf);
-
- /* Broadcast the flag indicating the number of chunks are the same */
- if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&num_chunks_same,1,MPI_INT,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
-
- /* Can't handle case when number of chunks is different (yet) */
- if(!num_chunks_same)
- HGOTO_DONE(FALSE);
- } /* end if */
+ if(dset->shared->layout.type == H5D_CHUNKED)
+ if(dset->shared->dcpl_cache.pline.nused>0)
+ HGOTO_DONE(FALSE); /* Perform the independent write operation */
+
+
done:
FUNC_LEAVE_NOAPI(ret_value);
+
} /* H5D_mpio_opt_possible() */
@@ -370,281 +264,68 @@ done:
FUNC_LEAVE_NOAPI(ret_value);
} /* end H5D_mpio_spaces_xfer() */
-
-static herr_t
-H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info,
- size_t elmt_size,
- const H5S_t *file_space,
- const H5S_t *mem_space,
- void *_buf /*out*/,
- hbool_t do_write )
-{
- haddr_t addr; /* Address of dataset (or selection) within file */
- size_t mpi_buf_count, mpi_file_count; /* Number of "objects" to transfer */
- hsize_t mpi_buf_offset, mpi_file_offset; /* Offset within dataset where selection (ie. MPI type) begins */
- MPI_Datatype mpi_buf_type, mpi_file_type; /* MPI types for buffer (memory) and file */
- hbool_t mbt_is_derived=0, /* Whether the buffer (memory) type is derived and needs to be free'd */
- mft_is_derived=0; /* Whether the file type is derived and needs to be free'd */
- hbool_t plist_is_setup=0; /* Whether the dxpl has been customized */
- uint8_t *buf=(uint8_t *)_buf; /* Alias for pointer arithmetic */
- int mpi_code; /* MPI return code */
- herr_t ret_value = SUCCEED; /* Return value */
-
-
- FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_spaces_span_xfer);
-
- /* Check args */
- assert (io_info);
- assert (io_info->dset);
- assert (file_space);
- assert (mem_space);
- assert (buf);
- assert (IS_H5FD_MPIO(io_info->dset->ent.file));
-
- /* Make certain we have the correct type of property list */
- assert(TRUE==H5P_isa_class(io_info->dxpl_id,H5P_DATASET_XFER));
-
-
- /* create the MPI buffer type */
- if(H5S_SELECT_IS_REGULAR(mem_space)==TRUE){
- if (H5S_mpio_space_type( mem_space, elmt_size,
- /* out: */
- &mpi_buf_type,
- &mpi_buf_count,
- &mpi_buf_offset,
- &mbt_is_derived )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type");
- }
- else {
- if (H5S_mpio_space_span_type( mem_space, elmt_size,
- /* out: */
- &mpi_buf_type,
- &mpi_buf_count,
- &mpi_buf_offset,
- &mbt_is_derived )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type");
- }
-
- /* create the MPI file type */
-
- if(H5S_SELECT_IS_REGULAR(file_space)== TRUE){
- if ( H5S_mpio_space_type( file_space, elmt_size,
- /* out: */
- &mpi_file_type,
- &mpi_file_count,
- &mpi_file_offset,
- &mft_is_derived )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type");
- }
- else {
- if ( H5S_mpio_space_span_type( file_space, elmt_size,
- /* out: */
- &mpi_file_type,
- &mpi_file_count,
- &mpi_file_offset,
- &mft_is_derived )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type");
- }
-
- /* Get the base address of the contiguous dataset or the chunk */
- if(io_info->dset->shared->layout.type == H5D_CONTIGUOUS)
- addr = H5D_contig_get_addr(io_info->dset) + mpi_file_offset;
- else {
- haddr_t chunk_addr; /* for collective chunk IO */
- assert(io_info->dset->shared->layout.type == H5D_CHUNKED);
- chunk_addr=H5D_istore_get_addr(io_info,NULL);
- addr = H5F_BASE_ADDR(io_info->dset->ent.file) + chunk_addr + mpi_file_offset;
- }
-
- /*
- * Pass buf type, file type to the file driver. Request an MPI type
- * transfer (instead of an elementary byteblock transfer).
- */
- if(H5FD_mpi_setup_collective(io_info->dxpl_id, mpi_buf_type, mpi_file_type)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties");
- plist_is_setup=1;
-
- /* Adjust the buffer pointer to the beginning of the selection */
- buf+=mpi_buf_offset;
-
- /* transfer the data */
- if (do_write) {
- if (H5F_block_write(io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0)
- HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed");
- }
- else {
- if (H5F_block_read (io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0)
- HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed");
- }
-
-done:
- /* Reset the dxpl settings */
- if(plist_is_setup) {
- if(H5FD_mpi_teardown_collective(io_info->dxpl_id)<0)
- HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "unable to reset dxpl values");
- } /* end if */
-
- /* free the MPI buf and file types */
- if (mbt_is_derived) {
- if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_buf_type )))
- HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
- }
- if (mft_is_derived) {
- if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_file_type )))
- HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
- }
-
- FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5D_mpio_spaces_span_xfer() */
-
/*-------------------------------------------------------------------------
- * Function: H5D_mpio_spaces_read
+ * Function: H5D_mpio_select_read
*
* Purpose: MPI-IO function to read directly from app buffer to file.
*
* Return: non-negative on success, negative on failure.
*
- * Programmer: rky 980813
+ * Programmer:
*
* Modifications:
*
- * rky 980918
- * Added must_convert parameter to let caller know we can't optimize the xfer.
- *
- * QAK - 2002/04/02
- * Removed the must_convert parameter and move preconditions to
- * H5S_mpio_opt_possible() routine
- *
*-------------------------------------------------------------------------
*/
herr_t
-H5D_mpio_spaces_read(H5D_io_info_t *io_info,
+H5D_mpio_select_read(H5D_io_info_t *io_info,
size_t UNUSED nelmts, size_t elmt_size,
const H5S_t *file_space, const H5S_t *mem_space,
void *buf/*out*/)
{
herr_t ret_value;
- FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_read);
+ FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_select_read);
- ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space,
+
+ ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space,
mem_space, buf, 0/*read*/);
FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5D_mpio_spaces_read() */
+} /* end H5D_mpio_select_read() */
/*-------------------------------------------------------------------------
- * Function: H5D_mpio_spaces_write
+ * Function: H5D_mpio_select_write
*
* Purpose: MPI-IO function to write directly from app buffer to file.
*
* Return: non-negative on success, negative on failure.
*
- * Programmer: rky 980813
+ * Programmer:
*
* Modifications:
*
- * rky 980918
- * Added must_convert parameter to let caller know we can't optimize the xfer.
- *
- * QAK - 2002/04/02
- * Removed the must_convert parameter and move preconditions to
- * H5S_mpio_opt_possible() routine
*
*-------------------------------------------------------------------------
*/
herr_t
-H5D_mpio_spaces_write(H5D_io_info_t *io_info,
+H5D_mpio_select_write(H5D_io_info_t *io_info,
size_t UNUSED nelmts, size_t elmt_size,
const H5S_t *file_space, const H5S_t *mem_space,
const void *buf)
{
herr_t ret_value;
- FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_write);
+ FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_select_write);
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space,
- mem_space, (void*)buf, 1/*write*/);
+ ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space,
+ mem_space, (void*)buf, 1/*write*/);
FUNC_LEAVE_NOAPI(ret_value);
} /* end H5D_mpio_spaces_write() */
-
-/*-------------------------------------------------------------------------
- * Function: H5D_mpio_spaces_span_read
- *
- * Purpose: MPI-IO function to read directly from app buffer to file for
- span-tree
- *
- * Return: non-negative on success, negative on failure.
- *
- * Programmer: KY
- *
- * Modifications:
- *
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5D_mpio_spaces_span_read(H5D_io_info_t *io_info,
- size_t UNUSED nelmts,
- size_t elmt_size,
- const H5S_t *file_space,
- const H5S_t *mem_space,
- void *buf/*out*/)
-{
- herr_t ret_value;
-
- FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_span_read);
-
- ret_value = H5D_mpio_spaces_span_xfer(io_info, elmt_size, file_space,
- mem_space, buf, 0/*read*/);
-
- FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5D_mpio_spaces_read() */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5D_mpio_spaces_span_write
- *
- * Purpose: MPI-IO function to write directly from app buffer to file.
- *
- * Return: non-negative on success, negative on failure.
- *
- * Programmer: KY
-
- *
- * Modifications:
- *
- * rky 980918
- * Added must_convert parameter to let caller know we can't optimize the xfer.
- *
- * QAK - 2002/04/02
- * Removed the must_convert parameter and move preconditions to
- * H5S_mpio_opt_possible() routine
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5D_mpio_spaces_span_write(H5D_io_info_t *io_info,
- size_t UNUSED nelmts,
- size_t elmt_size,
- const H5S_t *file_space,
- const H5S_t *mem_space,
- const void *buf)
-{
- herr_t ret_value;
-
- FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_span_write);
-
- /*OKAY: CAST DISCARDS CONST QUALIFIER*/
- ret_value = H5D_mpio_spaces_span_xfer(io_info, elmt_size, file_space,
- mem_space, (void*)buf, 1/*write*/);
-
- FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5D_mpio_spaces_span_write() */
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index a4b4574..7b2c9e3 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -100,8 +100,13 @@ typedef struct H5D_io_info_t {
H5D_t *dset; /* Pointer to dataset being operated on */
const H5D_dxpl_cache_t *dxpl_cache; /* Pointer to cache DXPL info */
hid_t dxpl_id; /* Original DXPL ID */
+#ifdef H5_HAVE_PARALLEL
+ hid_t dp_dxpl_id;
+ H5D_dxpl_cache_t *dp_dxpl_cache;
+#endif
const H5D_storage_t *store; /* Dataset storage info */
H5D_io_ops_t ops; /* I/O operation function pointers */
+ H5D_io_ops_t ops_sca;
#ifdef H5S_DEBUG
H5S_iostats_t *stats; /* I/O statistics */
#endif /* H5S_DEBUG */
@@ -276,6 +281,20 @@ H5_DLL ssize_t H5D_efl_writevv(const H5D_io_info_t *io_info,
const void *buf);
#ifdef H5_HAVE_PARALLEL
+
+/* MPI-IO function to read , it will select either regular or irregular read */
+H5_DLL herr_t H5D_mpio_select_read(H5D_io_info_t *io_info,
+ size_t nelmts, size_t elmt_size,
+ const struct H5S_t *file_space, const struct H5S_t *mem_space,
+ void *buf/*out*/);
+
+/* MPI-IO function to read , it will select either regular or irregular read */
+H5_DLL herr_t H5D_mpio_select_write(H5D_io_info_t *io_info,
+ size_t nelmts, size_t elmt_size,
+ const struct H5S_t *file_space, const struct H5S_t *mem_space,
+ const void *buf);
+
+
/* MPI-IO function to read directly from app buffer to file rky980813 */
H5_DLL herr_t H5D_mpio_spaces_read(H5D_io_info_t *io_info,
size_t nelmts, size_t elmt_size,
diff --git a/src/H5Smpio.c b/src/H5Smpio.c
index 5ccf842..a9b90ee 100644
--- a/src/H5Smpio.c
+++ b/src/H5Smpio.c
@@ -462,190 +462,7 @@ done:
FUNC_LEAVE_NOAPI(ret_value);
}
-
-/*-------------------------------------------------------------------------
- * Function: H5S_mpio_space_type
- *
- * Purpose: Translate an HDF5 dataspace selection into an MPI type.
- * Currently handle only hyperslab and "all" selections.
- *
- * Return: non-negative on success, negative on failure.
- *
- * Outputs: *new_type the MPI type corresponding to the selection
- * *count how many objects of the new_type in selection
- * (useful if this is the buffer type for xfer)
- * *extra_offset Number of bytes of offset within dataset
- * *is_derived_type 0 if MPI primitive type, 1 if derived
- *
- * Programmer: rky 980813
- *
- * Modifications:
- *
- * Quincey Koziol, June 18, 2002
- * Added 'extra_offset' parameter
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5S_mpio_space_type( const H5S_t *space, size_t elmt_size,
- /* out: */
- MPI_Datatype *new_type,
- size_t *count,
- hsize_t *extra_offset,
- hbool_t *is_derived_type )
-{
- herr_t ret_value = SUCCEED;
-
- FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_space_type);
-
- /* Check args */
- assert (space);
-
- /* Creat MPI type based on the kind of selection */
- switch (H5S_GET_EXTENT_TYPE(space)) {
- case H5S_NULL:
- case H5S_SCALAR:
- case H5S_SIMPLE:
- switch(H5S_GET_SELECT_TYPE(space)) {
- case H5S_SEL_NONE:
- if ( H5S_mpio_none_type( space, elmt_size,
- /* out: */ new_type, count, extra_offset, is_derived_type ) <0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
- break;
-
- case H5S_SEL_ALL:
- if ( H5S_mpio_all_type( space, elmt_size,
- /* out: */ new_type, count, extra_offset, is_derived_type ) <0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
- break;
-
- case H5S_SEL_POINTS:
- /* not yet implemented */
- ret_value = FAIL;
- break;
-
- case H5S_SEL_HYPERSLABS:
- if(H5S_mpio_hyper_type( space, elmt_size,
- /* out: */ new_type, count, extra_offset, is_derived_type )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
- break;
-
- default:
- assert("unknown selection type" && 0);
- break;
- } /* end switch */
- break;
-
- case H5S_COMPLEX:
- /* not yet implemented */
- HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "complex data spaces are not supported yet");
-
- default:
- assert("unknown data space type" && 0);
- break;
- }
-
-done:
- FUNC_LEAVE_NOAPI(ret_value);
-}
-
-
-
-/*-------------------------------------------------------------------------
- * Function: H5S_mpio_space_span_type
- *
- * Purpose: Translate an HDF5 dataspace selection into a general
- MPI derived datatype, the selection is implemented with
- span-tree.
- *
- * Currently handle only hyperslab and "all" selections.
- *
- * Return: non-negative on success, negative on failure.
- *
- * Outputs: *new_type the MPI type corresponding to the selection
- * *count how many objects of the new_type in selection
- * (useful if this is the buffer type for xfer)
- * *extra_offset Number of bytes of offset within dataset
- * *is_derived_type 0 if MPI primitive type, 1 if derived
- *
- * Programmer: KY
- *
- * Modifications:
- *
- * Quincey Koziol, June 18, 2002
- * Added 'extra_offset' parameter
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5S_mpio_space_span_type( const H5S_t *space,
- size_t elmt_size,/* out: */
- MPI_Datatype *new_type,
- size_t *count,
- hsize_t *extra_offset,
- hbool_t *is_derived_type )
-{
- herr_t ret_value = SUCCEED;
-
- FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_space_span_type);
-
- /* Check args */
- assert (space);
-
- /* Creat MPI type based on the kind of selection */
- switch (H5S_GET_EXTENT_TYPE(space)) {
- case H5S_NULL:
- case H5S_SCALAR:
- case H5S_SIMPLE:
- switch(H5S_GET_SELECT_TYPE(space)) {
- case H5S_SEL_NONE:
- if ( H5S_mpio_none_type( space, elmt_size,
- /* out: */ new_type, count, extra_offset, is_derived_type ) <0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
- break;
-
- case H5S_SEL_ALL:
- if ( H5S_mpio_all_type( space, elmt_size,
- /* out: */ new_type, count, extra_offset, is_derived_type ) <0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
- break;
-
- case H5S_SEL_POINTS:
- /* not yet implemented */
- ret_value = FAIL;
- break;
-
- case H5S_SEL_HYPERSLABS:
- if(H5S_mpio_span_hyper_type( space, elmt_size,
- /* out: */ new_type, count, extra_offset, is_derived_type )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
- break;
-
- default:
- assert("unknown selection type" && 0);
- break;
- } /* end switch */
- break;
-
- case H5S_COMPLEX:
- /* not yet implemented */
- HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "complex data spaces are not supported yet");
-
- default:
- assert("unknown data space type" && 0);
- break;
- }
-
-done:
- FUNC_LEAVE_NOAPI(ret_value);
-}
-
-/* The following codes have been used by Kent to test
- general collective derived datatype functionality.
- It should NOT be called by other routines except with
- macro #ifdef KENT #endif
- Nov. 11th, 2004 */
@@ -682,20 +499,24 @@ H5S_mpio_span_hyper_type( const H5S_t *space,
herr_t ret_value = SUCCEED;
MPI_Aint extent,lb;
+
FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5S_mpio_span_hyper_type);
/* Check args */
assert (space);
/* assert(sizeof(MPI_Aint) >= sizeof(elmt_size)); not sure the reason*/
-
+
+
rank = space->extent.rank;
/* size = HDcalloc((size_t)rank,sizeof(hsize_t)); */
if (0==elmt_size)
goto empty;
size = space->extent.size;
-
+ if(size == 0)
+ goto empty;
+
odown = space->select.sel_info.hslab->span_lst;
if(odown == NULL)
goto empty;
@@ -904,4 +725,99 @@ static herr_t H5S_obtain_datatype(const hsize_t size[],
FUNC_LEAVE_NOAPI(ret_value);
}
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_space_type
+ *
+ * Purpose: Translate an HDF5 dataspace selection into an MPI type.
+ * Currently handle only hyperslab and "all" selections.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Outputs: *new_type the MPI type corresponding to the selection
+ * *count how many objects of the new_type in selection
+ * (useful if this is the buffer type for xfer)
+ * *extra_offset Number of bytes of offset within dataset
+ * *is_derived_type 0 if MPI primitive type, 1 if derived
+ *
+ * Programmer: rky 980813
+ *
+ * Modifications:
+ *
+ * Quincey Koziol, June 18, 2002
+ * Added 'extra_offset' parameter
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5S_mpio_space_type( const H5S_t *space, size_t elmt_size,
+ /* out: */
+ MPI_Datatype *new_type,
+ size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *is_derived_type )
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_space_type);
+
+ /* Check args */
+ assert (space);
+
+ /* Creat MPI type based on the kind of selection */
+ switch (H5S_GET_EXTENT_TYPE(space)) {
+ case H5S_NULL:
+ case H5S_SCALAR:
+ case H5S_SIMPLE:
+ switch(H5S_GET_SELECT_TYPE(space)) {
+ case H5S_SEL_NONE:
+ if ( H5S_mpio_none_type( space, elmt_size,
+ /* out: */ new_type, count, extra_offset, is_derived_type ) <0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
+ break;
+
+ case H5S_SEL_ALL:
+ if ( H5S_mpio_all_type( space, elmt_size,
+ /* out: */ new_type, count, extra_offset, is_derived_type ) <0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
+ break;
+
+ case H5S_SEL_POINTS:
+ /* not yet implemented */
+ ret_value = FAIL;
+ break;
+
+ case H5S_SEL_HYPERSLABS:
+ if((H5S_SELECT_IS_REGULAR(space) == TRUE)) {
+ if(H5S_mpio_hyper_type( space, elmt_size,
+ /* out: */ new_type, count, extra_offset, is_derived_type )<0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
+ }
+ else {
+ if(H5S_mpio_span_hyper_type( space, elmt_size,
+ /* out: */ new_type, count, extra_offset, is_derived_type )<0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
+ }
+ break;
+
+ default:
+ assert("unknown selection type" && 0);
+ break;
+ } /* end switch */
+ break;
+
+ case H5S_COMPLEX:
+ /* not yet implemented */
+ HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "complex data spaces are not supported yet");
+
+ default:
+ assert("unknown data space type" && 0);
+ break;
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+}
+
#endif /* H5_HAVE_PARALLEL */