summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJohn Mainzer <mainzer@hdfgroup.org>2004-08-31 22:12:07 (GMT)
committerJohn Mainzer <mainzer@hdfgroup.org>2004-08-31 22:12:07 (GMT)
commitda9fd4bb1800ee29266a9b1b2201fcab4b7d94f4 (patch)
tree2e3f812b4e8f8c17ca8e36e72298cfc21fe77ef4 /src
parent2202352d2840a4abbbd85ab654124194cade2b07 (diff)
downloadhdf5-da9fd4bb1800ee29266a9b1b2201fcab4b7d94f4.zip
hdf5-da9fd4bb1800ee29266a9b1b2201fcab4b7d94f4.tar.gz
hdf5-da9fd4bb1800ee29266a9b1b2201fcab4b7d94f4.tar.bz2
[svn-r9178] Purpose:
Fix parallel bug reported by Thomas Guignon, in which different processes became confused as to whether they were doing collective or individual I/O. Description: When one process had a point selection, and another didn't, the first concluded that the I/O was independant, while the second presumed that it was collective. A hang resulted. Solution: Get all processes involved in an I/O to compare notes. If all agree that the I/O is collective, they proceed with same. If any think the I/O should be independant, all use independant I/O. Note that this is an interrim fix -- the correct solution is to support collective I/O on point selections. This will take a while. Platforms tested: copper h5committested Eirene (serial and parallel) In the parallel test on Eirene, I encountered a bug in h5repacktst. However the problem vanished on recompile. Since I couldn't reproduce it elsewhere, I went ahead with the checkin. Given my druthers, I would have liked to study the code more carefully before this check-in. However, there is some time pressure. The new code implementing the consensus check must not be executed unless MPI is initialized, and there is a communicator associated with the file. I think my guards against this case are adequate, but if we run into a hang or an illegal instruction error, this change should be suspect. Misc. update:
Diffstat (limited to 'src')
-rw-r--r--src/H5S.c119
1 files changed, 119 insertions, 0 deletions
diff --git a/src/H5S.c b/src/H5S.c
index 3ad61f8..ccc023c 100644
--- a/src/H5S.c
+++ b/src/H5S.c
@@ -33,6 +33,12 @@ static herr_t H5S_set_extent_simple (H5S_t *space, unsigned rank,
const hsize_t *dims, const hsize_t *max);
static htri_t H5S_is_simple(const H5S_t *sdim);
+#ifdef H5_HAVE_PARALLEL
+htri_t H5S_get_collective_io_consensus(const H5F_t *file,
+ const htri_t local_opinion,
+ const unsigned flags);
+#endif /* H5_HAVE_PARALLEL */
+
/* Interface initialization */
#define INTERFACE_INIT H5S_init_interface
static int interface_initialize_g = 0;
@@ -1376,6 +1382,103 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5S_get_collective_io_consensus
+ *
+ * Purpose: Compare notes with all other processes involved in this I/O
+ * and see if all are go for collective I/O.
+ *
+ * If all are, return TRUE.
+ *
+ * If any process can't manage collective I/O, then collective
+ * I/O is impossible, and we return FALSE.
+ *
+ * If the flags indicate that collective I/O is impossible,
+ * skip the interprocess communication and just return FALSE.
+ *
+ * In any error is detected, return FAIL.
+ *
+ * Return: Success: TRUE or FALSE
+ *
+ * Failure: FAIL
+ *
+ * Programmer: JRM -- 8/30/04
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+htri_t
+H5S_get_collective_io_consensus(const H5F_t *file,
+ const htri_t local_opinion,
+ const unsigned flags)
+{
+ htri_t ret_value = FAIL; /* will update if successful */
+ MPI_Comm comm;
+ int int_local_opinion;
+ int consensus;
+ int mpi_result;
+
+ FUNC_ENTER_NOAPI(H5S_get_collective_io_consensus, NULL);
+
+ HDassert ( ( local_opinion == TRUE ) || ( local_opinion == FALSE ) );
+
+ /* Don't do the interprocess communication unless the Parallel I/O
+ * conversion flag is set -- there may not be other processes to
+ * talk to.
+ */
+ if ( ! ( flags & flags&H5S_CONV_PAR_IO_POSSIBLE ) ) {
+
+ HGOTO_DONE(FALSE);
+ }
+
+ comm = H5F_mpi_get_comm(file);
+
+ if ( comm == MPI_COMM_NULL )
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, \
+ "can't retrieve MPI communicator")
+
+ if ( local_opinion == TRUE ) {
+
+ int_local_opinion = 1;
+
+ } else {
+
+ int_local_opinion = 0;
+ }
+
+ mpi_result = MPI_Allreduce((void *)(&int_local_opinion),
+ (void *)(&consensus),
+ 1,
+ MPI_INT,
+ MPI_LAND,
+ comm);
+
+ if ( mpi_result != MPI_SUCCESS )
+ HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_result)
+
+ if ( consensus ) {
+
+ ret_value = TRUE;
+
+ } else {
+
+ ret_value = FALSE;
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5S_get_collective_io_consensus() */
+
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
* Function: H5S_find
*
* Purpose: Given two data spaces (MEM_SPACE and FILE_SPACE) this
@@ -1403,6 +1506,10 @@ done:
* along with other data whose scope is the conversion path (like path
* statistics).
*
+ * John Mainzer, 8/30/04
+ * Modified code to check with all other processes that have the
+ * file open before OKing collective I/O.
+ *
*-------------------------------------------------------------------------
*/
H5S_conv_t *
@@ -1459,6 +1566,12 @@ const H5O_layout_t *layout
if(opt==FAIL)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, "invalid check for direct IO dataspace ");
+ opt = H5S_get_collective_io_consensus(file, opt, flags);
+
+ if ( opt == FAIL )
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, \
+ "check for collective I/O consensus failed.");
+
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
/* Set the pointers to the MPI-specific routines */
@@ -1501,6 +1614,12 @@ const H5O_layout_t *layout
if(opt==FAIL)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, "invalid check for direct IO dataspace ");
+ opt = H5S_get_collective_io_consensus(file, opt, flags);
+
+ if ( opt == FAIL )
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, \
+ "check for collective I/O consensus failed.");
+
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
/* Set the pointers to the MPI-specific routines */