summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2005-08-13 18:09:57 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2005-08-13 18:09:57 (GMT)
commit45f1eba2132c179e92d7144c6ff8713573ad24fc (patch)
tree09b34ed34f71dccc89a1b6b169284c3f936a325a /src
parent2efc3027ec480c9d0c9c6cdb08da5ac3afa38761 (diff)
downloadhdf5-45f1eba2132c179e92d7144c6ff8713573ad24fc.zip
hdf5-45f1eba2132c179e92d7144c6ff8713573ad24fc.tar.gz
hdf5-45f1eba2132c179e92d7144c6ff8713573ad24fc.tar.bz2
[svn-r11241] Purpose:
Code cleanup Description: Fix logic error in previous checkin and also finish refactoring I/O initialization, including simplifying all the collective & parallel cases into a more unified mechanism. Platforms tested: FreeBSD 4.11 (sleipnir) w/ & w/o parallel Linux 2.4 (mir)
Diffstat (limited to 'src')
-rw-r--r--src/H5Dio.c447
-rw-r--r--src/H5Dmpio.c82
-rw-r--r--src/H5Dpkg.h5
3 files changed, 175 insertions, 359 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c
index 6956c23..df9e1b7 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -102,8 +102,6 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
static herr_t H5D_ioinfo_make_ind(H5D_io_info_t *io_info);
static herr_t H5D_ioinfo_make_coll(H5D_io_info_t *io_info);
static herr_t H5D_ioinfo_term(H5D_io_info_t *io_info);
-static htri_t H5D_get_collective_io_consensus(const H5F_t *file,
- const htri_t local_opinion, const unsigned flags);
static herr_t H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
const fm_map *fm, int *min_chunkf);
#endif /* H5_HAVE_PARALLEL */
@@ -111,7 +109,7 @@ static herr_t H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
/* I/O info operations */
static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache,
hid_t dxpl_id, const H5S_t *mem_space, const H5S_t *file_space,
- H5T_path_t *tpath, unsigned flags, H5D_io_info_t *io_info);
+ H5T_path_t *tpath, H5D_io_info_t *io_info);
/* Chunk operations */
static herr_t H5D_create_chunk_map(const H5D_t *dataset, const H5T_t *mem_type,
@@ -644,19 +642,11 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
H5T_path_t *tpath = NULL; /*type conversion info */
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
-#ifdef H5_HAVE_PARALLEL
-#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
- int prop_value,new_value;
- htri_t check_prop;
-#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
-#endif /*H5_HAVE_PARALLEL*/
+ hbool_t io_info_init = FALSE; /* Whether the I/O info has been initialized */
H5D_dxpl_cache_t _dxpl_cache; /* Data transfer property cache buffer */
H5D_dxpl_cache_t *dxpl_cache=&_dxpl_cache; /* Data transfer property cache */
- unsigned sconv_flags=0; /* Flags for the space conversion */
herr_t ret_value = SUCCEED; /* Return value */
-
-
FUNC_ENTER_NOAPI_NOINIT(H5D_read)
/* check args */
@@ -682,11 +672,6 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
/* Collective access is not permissible without a MPI based VFD */
if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based drivers only")
-
- /* Set the "parallel I/O possible" flag, for H5S_find(), if we are doing collective I/O */
- /* (Don't set the parallel I/O possible flag for the MPI-posix driver, since it doesn't do real collective I/O) */
- if (H5S_mpi_opt_types_g && dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(dataset->ent.file))
- sconv_flags |= H5S_CONV_PAR_IO_POSSIBLE;
#endif /*H5_HAVE_PARALLEL*/
/* Make certain that the number of elements in each selection is the same */
@@ -744,51 +729,10 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
if (NULL==(tpath=H5T_path_find(dataset->shared->type, mem_type, NULL, NULL, dxpl_id)))
HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert between src and dest data types")
- /* Set the storage flags for the space conversion check */
- switch(dataset->shared->layout.type) {
- case H5D_COMPACT:
- sconv_flags |= H5S_CONV_STORAGE_COMPACT;
- break;
-
- case H5D_CONTIGUOUS:
- sconv_flags |= H5S_CONV_STORAGE_CONTIGUOUS;
- break;
-
- case H5D_CHUNKED:
- sconv_flags |= H5S_CONV_STORAGE_CHUNKED;
- break;
-
- default:
- assert(0 && "Unhandled layout type!");
- } /* end switch */
-
/* Set up I/O operation */
- if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0)
+ if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
-
-#ifdef H5_HAVE_PARALLEL
-#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
- /**** Test for collective chunk IO
- notice the following code should be removed after
- a more general collective chunk IO algorithm is applied.
- */
- if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
- check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
- if(check_prop < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list");
- if(check_prop > 0) {
- if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value");
- if(!io_info.use_par_opt_io) {
- new_value = 0;
- if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
- }
- }
- }
- /* end Test for collective chunk IO */
-#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
-#endif /*H5_HAVE_PARALLEL*/
+ io_info_init = TRUE;
/* Determine correct I/O routine to invoke */
if(dataset->shared->layout.type!=H5D_CHUNKED) {
@@ -805,8 +749,9 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
done:
#ifdef H5_HAVE_PARALLEL
/* Shut down io_info struct */
- if (H5D_ioinfo_term(&io_info) < 0)
- HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
+ if (io_info_init)
+ if(H5D_ioinfo_term(&io_info) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
#endif /*H5_HAVE_PARALLEL*/
FUNC_LEAVE_NOAPI(ret_value)
@@ -868,15 +813,9 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
H5T_path_t *tpath = NULL; /*type conversion info */
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
-#ifdef H5_HAVE_PARALLEL
-#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
- int prop_value,new_value;
- htri_t check_prop;
-#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
-#endif /*H5_HAVE_PARALLEL*/
+ hbool_t io_info_init = FALSE; /* Whether the I/O info has been initialized */
H5D_dxpl_cache_t _dxpl_cache; /* Data transfer property cache buffer */
H5D_dxpl_cache_t *dxpl_cache=&_dxpl_cache; /* Data transfer property cache */
- unsigned sconv_flags=0; /* Flags for the space conversion */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5D_write)
@@ -897,22 +836,6 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
dataset->shared->checked_filters = TRUE;
}
- /* If MPI based VFD is used, no VL datatype support yet. */
- /* This is because they use the global heap in the file and we don't */
- /* support parallel access of that yet */
- if (IS_H5FD_MPI(dataset->ent.file) && H5T_detect_class(mem_type, H5T_VLEN)>0)
- HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet")
- /* If MPI based VFD is used, no VL datatype support yet. */
- /* This is because they use the global heap in the file and we don't */
- /* support parallel access of that yet */
- /* We should really use H5T_detect_class() here, but it will be difficult
- * to detect the type of the reference if it is nested... -QAK
- */
- if (IS_H5FD_MPI(dataset->ent.file) &&
- H5T_get_class(mem_type, TRUE)==H5T_REFERENCE &&
- H5T_get_ref_type(mem_type)==H5R_DATASET_REGION)
- HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet")
-
/* Check if we are allowed to write to this file */
if (0==(H5F_get_intent(dataset->ent.file) & H5F_ACC_RDWR))
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "no write intent on file")
@@ -921,6 +844,32 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
if (H5D_get_dxpl_cache(dxpl_id,&dxpl_cache)<0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache")
+ /* Various MPI based checks */
+ if (IS_H5FD_MPI(dataset->ent.file)) {
+ /* If MPI based VFD is used, no VL datatype support yet. */
+ /* This is because they use the global heap in the file and we don't */
+ /* support parallel access of that yet */
+ if(H5T_detect_class(mem_type, H5T_VLEN)>0)
+ HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet")
+
+ /* If MPI based VFD is used, no VL datatype support yet. */
+ /* This is because they use the global heap in the file and we don't */
+ /* support parallel access of that yet */
+ /* We should really use H5T_detect_class() here, but it will be difficult
+ * to detect the type of the reference if it is nested... -QAK
+ */
+ if (H5T_get_class(mem_type, TRUE)==H5T_REFERENCE &&
+ H5T_get_ref_type(mem_type)==H5R_DATASET_REGION)
+ HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet")
+ } /* end if */
+#ifdef H5_HAVE_PARALLEL
+ else {
+ /* Collective access is not permissible without a MPI based VFD */
+ if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE)
+ HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based driver only")
+ } /* end else */
+#endif /*H5_HAVE_PARALLEL*/
+
if (!file_space)
file_space = dataset->shared->space;
if (!mem_space)
@@ -929,17 +878,6 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
HGOTO_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL, "src dataspace has invalid selection")
H5_ASSIGN_OVERFLOW(nelmts,snelmts,hssize_t,hsize_t);
-#ifdef H5_HAVE_PARALLEL
- /* Collective access is not permissible without a MPI based VFD */
- if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file))
- HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based driver only")
-
- /* Set the "parallel I/O possible" flag, for H5S_find(), if we are doing collective I/O */
- /* (Don't set the parallel I/O possible flag for the MPI-posix driver, since it doesn't do real collective I/O) */
- if (H5S_mpi_opt_types_g && dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(dataset->ent.file))
- sconv_flags |= H5S_CONV_PAR_IO_POSSIBLE;
-#endif /*H5_HAVE_PARALLEL*/
-
/* Make certain that the number of elements in each selection is the same */
if (nelmts!=(hsize_t)H5S_GET_SELECT_NPOINTS(file_space))
HGOTO_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL, "src and dest data spaces have different sizes")
@@ -986,51 +924,10 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
if (NULL==(tpath=H5T_path_find(mem_type, dataset->shared->type, NULL, NULL, dxpl_id)))
HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert between src and dest data types")
- /* Set the storage flags for the space conversion check */
- switch(dataset->shared->layout.type) {
- case H5D_COMPACT:
- sconv_flags |= H5S_CONV_STORAGE_COMPACT;
- break;
-
- case H5D_CONTIGUOUS:
- sconv_flags |= H5S_CONV_STORAGE_CONTIGUOUS;
- break;
-
- case H5D_CHUNKED:
- sconv_flags |= H5S_CONV_STORAGE_CHUNKED;
- break;
-
- default:
- assert(0 && "Unhandled layout type!");
- } /* end switch */
-
/* Set up I/O operation */
- if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0)
+ if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
-
-#ifdef H5_HAVE_PARALLEL
-#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
- /**** Test for collective chunk IO
- notice the following code should be removed after
- a more general collective chunk IO algorithm is applied.
- */
- if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
- check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
- if(check_prop < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list");
- if(check_prop > 0) {
- if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value");
- if(!io_info.use_par_opt_io) {
- new_value = 0;
- if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
- }
- }
- }
- /* end Test for collective chunk IO */
-#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
-#endif /*H5_HAVE_PARALLEL*/
+ io_info_init = TRUE;
/* Determine correct I/O routine to invoke */
if(dataset->shared->layout.type!=H5D_CHUNKED) {
@@ -1062,8 +959,9 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
done:
#ifdef H5_HAVE_PARALLEL
/* Shut down io_info struct */
- if (H5D_ioinfo_term(&io_info) < 0)
- HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
+ if (io_info_init)
+ if(H5D_ioinfo_term(&io_info) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
#endif /*H5_HAVE_PARALLEL*/
FUNC_LEAVE_NOAPI(ret_value)
@@ -1711,32 +1609,25 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
int mpi_code; /* MPI error code */
int all_regular = 0; /* If this chunk's selections are regular on all processes */
+ /* Determine if this process has regular selections */
if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
is_regular = 1;
else
is_regular = 0;
- if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
- if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ /* Determine if all processes have regular selections */
+ if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&is_regular, &all_regular, 1, MPI_INT, MPI_LAND, io_info->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
- if(!is_regular) {
+ /* For regular selection,
+ if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
+ unless spaces for all processors are regular, independent read operation should be performed.*/
+ if(!all_regular) {
/* Switch to independent I/O (temporarily) */
make_ind = TRUE;
make_coll = TRUE;
- }
- else {
- /* For regular selection,
- if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
- unless spaces for all processors are regular, independent read operation should be performed.*/
- if(!all_regular) {
- /* Switch to independent I/O (temporarily) */
- make_ind = TRUE;
- make_coll = TRUE;
- } /* end if */
- } /* end else */
+ } /* end if */
} /* end else */
#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
} /* end if */
@@ -2104,32 +1995,25 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
int mpi_code; /* MPI error code */
int all_regular = 0; /* If this chunk's selections are regular on all processes */
+ /* Determine if this process has regular selections */
if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
- is_regular = 1;
+ is_regular = 1;
else
is_regular = 0;
- if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
- if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ /* Determine if all processes have regular selections */
+ if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&is_regular, &all_regular, 1, MPI_INT, MPI_LAND, io_info->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
- if(!is_regular) {
+ /* For regular selection,
+ if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
+ unless spaces for all processors are regular, independent read operation should be performed.*/
+ if(!all_regular) {
/* Switch to independent I/O (temporarily) */
make_ind = TRUE;
make_coll = TRUE;
- }
- else {
- /* For regular selection,
- if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
- unless spaces for all processors are regular, independent read operation should be performed.*/
- if(!all_regular) {
- /* Switch to independent I/O (temporarily) */
- make_ind = TRUE;
- make_coll = TRUE;
- } /* end if */
- } /* end else */
+ } /* end if */
} /* end else */
#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
} /* end if */
@@ -3203,103 +3087,6 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5D_get_collective_io_consensus
- *
- * Purpose: Compare notes with all other processes involved in this I/O
- * and see if all are go for collective I/O.
- *
- * If all are, return TRUE.
- *
- * If any process can't manage collective I/O, then collective
- * I/O is impossible, and we return FALSE.
- *
- * If the flags indicate that collective I/O is impossible,
- * skip the interprocess communication and just return FALSE.
- *
- * In any error is detected, return FAIL.
- *
- * Return: Success: TRUE or FALSE
- *
- * Failure: FAIL
- *
- * Programmer: JRM -- 8/30/04
- *
- * Modifications:
- *
- * None.
- *
- *-------------------------------------------------------------------------
- */
-
-#ifdef H5_HAVE_PARALLEL
-static htri_t
-H5D_get_collective_io_consensus(const H5F_t *file,
- const htri_t local_opinion,
- const unsigned flags)
-{
- htri_t ret_value = FAIL; /* will update if successful */
- MPI_Comm comm;
- int int_local_opinion;
- int consensus;
- int mpi_result;
-
- FUNC_ENTER_NOAPI_NOINIT(H5D_get_collective_io_consensus);
-
- HDassert ( ( local_opinion == TRUE ) || ( local_opinion == FALSE ) );
-
- /* Don't do the interprocess communication unless the Parallel I/O
- * conversion flag is set -- there may not be other processes to
- * talk to.
- */
- if ( ! ( flags & flags&H5S_CONV_PAR_IO_POSSIBLE ) ) {
-
- HGOTO_DONE(FALSE);
- }
-
- comm = H5F_mpi_get_comm(file);
-
- if ( comm == MPI_COMM_NULL )
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, \
- "can't retrieve MPI communicator")
-
- if ( local_opinion == TRUE ) {
-
- int_local_opinion = 1;
-
- } else {
-
- int_local_opinion = 0;
- }
-
- mpi_result = MPI_Allreduce((void *)(&int_local_opinion),
- (void *)(&consensus),
- 1,
- MPI_INT,
- MPI_LAND,
- comm);
-
- if ( mpi_result != MPI_SUCCESS )
- HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_result)
-
- if ( consensus ) {
-
- ret_value = TRUE;
-
- } else {
-
- ret_value = FALSE;
- }
-
-done:
-
- FUNC_LEAVE_NOAPI(ret_value);
-
-} /* H5D_get_collective_io_consensus() */
-
-#endif /* H5_HAVE_PARALLEL */
-
-
-/*-------------------------------------------------------------------------
* Function: H5D_ioinfo_init
*
* Purpose: Routine for determining correct I/O operations for
@@ -3328,16 +3115,9 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
#ifndef H5_HAVE_PARALLEL
UNUSED
#endif /* H5_HAVE_PARALLEL */
- *tpath, unsigned
-#ifndef H5_HAVE_PARALLEL
- UNUSED
-#endif /* H5_HAVE_PARALLEL */
- flags,
+ *tpath,
H5D_io_info_t *io_info)
{
-#ifdef H5_HAVE_PARALLEL
- htri_t opt; /* Flag whether a selection is optimizable */
-#endif /* H5_HAVE_PARALLEL */
herr_t ret_value = SUCCEED; /* Return value */
#if defined H5_HAVE_PARALLEL || defined H5S_DEBUG
@@ -3364,7 +3144,12 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
io_info->ops=dset->shared->io_ops;
#ifdef H5_HAVE_PARALLEL
+ /* Start in the "not modified" xfer_mode state */
+ io_info->xfer_mode_changed = FALSE;
+
if(IS_H5FD_MPI(dset->ent.file)) {
+ htri_t opt; /* Flag whether a selection is optimizable */
+
/* Get MPI communicator */
if((io_info->comm = H5F_mpi_get_comm(dset->ent.file)) == MPI_COMM_NULL)
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
@@ -3372,72 +3157,69 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
/*
* Check if we can set direct MPI-IO read/write functions
*/
- opt=H5D_mpio_opt_possible(dset,mem_space,file_space,flags);
+ opt=H5D_mpio_opt_possible(io_info, mem_space, file_space, tpath);
if(opt==FAIL)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace ");
- opt = H5D_get_collective_io_consensus(dset->ent.file, opt, flags);
- if ( opt == FAIL )
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "check for collective I/O consensus failed.");
-
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
/* Set the pointers to the MPI-specific routines */
io_info->ops.read = H5D_mpio_select_read;
io_info->ops.write = H5D_mpio_select_write;
-
- /* Indicate that the I/O will use collective */
- io_info->use_par_opt_io=TRUE;
} /* end if */
else {
/* Set the pointers to the non-MPI-specific routines */
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
- /* Indicate that the I/O will _NOT_ be parallel, use independent IO */
- io_info->use_par_opt_io=FALSE;
- } /* end else */
-
- /* Start in the "not modified" state */
- io_info->xfer_mode_changed=FALSE;
-
- /* If we can't or won't be doing collective I/O, but the user
- * asked for collective I/O, change the request to use independent I/O
- */
- if(!(io_info->use_par_opt_io && H5T_path_noop(tpath) &&
- H5Z_xform_noop(dxpl_cache->data_xform_prop))
- && io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) {
- H5P_genplist_t *dx_plist; /* Data transer property list */
-
- /* Get the dataset transfer property list */
- if (NULL == (dx_plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
-
- /* Change the xfer_mode to independent for handling the I/O */
- io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT;
- if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
-
- /* Set the pointers to the non-MPI-specific routines */
- io_info->ops.read = H5D_select_read;
- io_info->ops.write = H5D_select_write;
-
- /* Indicate that the transfer mode should be restored before returning
- * to user.
+ /* If we won't be doing collective I/O, but the user asked for
+ * collective I/O, change the request to use independent I/O, but
+ * mark it so that we remember to revert the change.
*/
- io_info->xfer_mode_changed=TRUE;
- } /* end if */
+ if(io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) {
+ H5P_genplist_t *dx_plist; /* Data transer property list */
+
+ /* Get the dataset transfer property list */
+ if (NULL == (dx_plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
+
+ /* Change the xfer_mode to independent for handling the I/O */
+ io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT;
+ if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
+
+ /* Indicate that the transfer mode should be restored before returning
+ * to user.
+ */
+ io_info->xfer_mode_changed = TRUE;
+ } /* end if */
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ /**** Test for collective chunk IO
+ notice the following code should be removed after
+ a more general collective chunk IO algorithm is applied.
+ (This property is only reset for independent I/O)
+ */
+ if(dset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
+ htri_t check_prop;
+
+ check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
+ if(check_prop < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list");
+ if(check_prop > 0) {
+ int prop_value = 0;
+
+ if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
+ HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
+ } /* end if */
+ } /* end if */
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+ } /* end else */
} /* end if */
else {
- /* Indicate that the I/O will _NOT_ be parallel */
- io_info->use_par_opt_io=FALSE;
-
/* Set the pointers to the non-MPI-specific routines */
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
-
- /* Set the "not modified" state */
- io_info->xfer_mode_changed=FALSE;
} /* end else */
#else /* H5_HAVE_PARALLEL */
io_info->ops.read = H5D_select_read;
@@ -3570,13 +3352,14 @@ done:
static herr_t
H5D_ioinfo_term(H5D_io_info_t *io_info)
{
- H5P_genplist_t *dx_plist; /* Data transer property list */
herr_t ret_value = SUCCEED; /*return value */
FUNC_ENTER_NOAPI_NOINIT(H5D_ioinfo_term)
/* Check if we need to revert the change to the xfer mode */
- if (io_info->use_par_opt_io && io_info->xfer_mode_changed) {
+ if (io_info->xfer_mode_changed) {
+ H5P_genplist_t *dx_plist; /* Data transer property list */
+
/* Get the dataset transfer property list */
if (NULL == (dx_plist = H5I_object(io_info->dxpl_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list")
@@ -3619,13 +3402,9 @@ H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
/* Get the number of chunks to perform I/O on */
num_chunkf = H5SL_count(fm->fsel);
- /* Determine the minimum and maximum # of chunks for all processes */
- if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&num_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,io_info->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
-
- /* Broadcast the flag indicating the number of chunks are the same */
- if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,io_info->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ /* Determine the minimum # of chunks for all processes */
+ if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&num_chunkf, min_chunkf, 1, MPI_INT, MPI_MIN, io_info->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
done:
FUNC_LEAVE_NOAPI(ret_value);
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 572aaca..479db17 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -24,7 +24,6 @@
#define H5D_PACKAGE /*suppress error about including H5Dpkg */
-
#include "H5private.h" /* Generic Functions */
#include "H5Dpkg.h" /* Datasets */
#include "H5Eprivate.h" /* Error handling */
@@ -61,35 +60,51 @@ H5D_mpio_spaces_xfer(H5D_io_info_t *io_info, size_t elmt_size,
*-------------------------------------------------------------------------
*/
htri_t
-H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *file_space, const unsigned flags)
+H5D_mpio_opt_possible( const H5D_io_info_t *io_info,
+ const H5S_t *mem_space, const H5S_t *file_space, const H5T_path_t *tpath)
{
+ int local_opinion = TRUE; /* This process's idea of whether to perform collective I/O or not */
+ int consensus; /* Consensus opinion of all processes */
+ int mpi_code; /* MPI error code */
htri_t ret_value=TRUE;
FUNC_ENTER_NOAPI(H5D_mpio_opt_possible, FAIL);
/* Check args */
- assert(dset);
+ assert(io_info);
assert(mem_space);
assert(file_space);
- /* Parallel I/O conversion flag must be set, if it is not collective IO, go to false. */
- if(!(flags&H5S_CONV_PAR_IO_POSSIBLE))
+ /* For independent I/O, get out quickly and don't try to form consensus */
+ if (io_info->dxpl_cache->xfer_mode==H5FD_MPIO_INDEPENDENT)
HGOTO_DONE(FALSE);
+ /* Optimized MPI types flag must be set and it is must be collective IO */
+ /* (Don't allow parallel I/O for the MPI-posix driver, since it doesn't do real collective I/O) */
+ if (!(H5S_mpi_opt_types_g && io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(io_info->dset->ent.file))) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
+
/* Check whether these are both simple or scalar dataspaces */
if (!((H5S_SIMPLE==H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(mem_space))
- && (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space))))
- HGOTO_DONE(FALSE);
-
+ && (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space)))) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
/* Can't currently handle point selections */
- if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space))
- HGOTO_DONE(FALSE);
+ if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space)) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
/* Dataset storage must be contiguous or chunked */
- if ((flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CONTIGUOUS &&
- (flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CHUNKED)
- HGOTO_DONE(FALSE);
+ if (!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS ||
+ io_info->dset->shared->layout.type == H5D_CHUNKED)) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
/*The handling of memory space is different for chunking
and contiguous storage,
@@ -102,18 +117,41 @@ H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *f
support complicated MPI derived data type, we will
set use_par_opt_io = FALSE.
*/
- if(dset->shared->layout.type == H5D_CONTIGUOUS) {
-
#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) ||
- (H5S_SELECT_IS_REGULAR(mem_space) != TRUE))
- HGOTO_DONE(FALSE);
+ if(io_info->dset->shared->layout.type == H5D_CONTIGUOUS)
+ if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) ||
+ (H5S_SELECT_IS_REGULAR(mem_space) != TRUE)) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
#endif
- }
- if(dset->shared->layout.type == H5D_CHUNKED)
- if(dset->shared->dcpl_cache.pline.nused>0)
- HGOTO_DONE(FALSE); /* Perform the independent write operation */
+ /* Don't allow collective operations if filters need to be applied */
+ if(io_info->dset->shared->layout.type == H5D_CHUNKED)
+ if(io_info->dset->shared->dcpl_cache.pline.nused>0) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
+
+ /* Don't allow collective operations if datatype conversions need to happen */
+ if(!H5T_path_noop(tpath)) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
+
+ /* Don't allow collective operations if data transform operations should occur */
+ if(!H5Z_xform_noop(io_info->dxpl_cache->data_xform_prop)) {
+ local_opinion = FALSE;
+ goto broadcast;
+ } /* end if */
+
+broadcast:
+ /* Form consensus opinion among all processes about whether to perform
+ * collective I/O */
+ if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_opinion, &consensus, 1, MPI_INT, MPI_LAND, io_info->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
+
+ ret_value = consensus > 0 ? TRUE : FALSE;
done:
FUNC_LEAVE_NOAPI(ret_value);
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index cf416f3..609b590 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -105,7 +105,6 @@ typedef struct H5D_io_info_t {
hid_t dxpl_id; /* Original DXPL ID */
#ifdef H5_HAVE_PARALLEL
MPI_Comm comm; /* MPI communicator for file */
- hbool_t use_par_opt_io; /* Whether the 'optimized' I/O routines with be parallel */
hbool_t xfer_mode_changed; /* Whether the transfer mode was changed */
#endif /* H5_HAVE_PARALLEL */
const H5D_storage_t *store; /* Dataset storage info */
@@ -324,8 +323,8 @@ H5_DLL herr_t H5D_mpio_spaces_span_write(H5D_io_info_t *io_info,
/* MPI-IO function to check if a direct I/O transfer is possible between
* memory and the file */
-H5_DLL htri_t H5D_mpio_opt_possible(const H5D_t *dset, const H5S_t *mem_space,
- const H5S_t *file_space, const unsigned flags);
+H5_DLL htri_t H5D_mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *mem_space,
+ const H5S_t *file_space, const H5T_path_t *tpath);
#endif /* H5_HAVE_PARALLEL */
/* Testing functions */