From da9fd4bb1800ee29266a9b1b2201fcab4b7d94f4 Mon Sep 17 00:00:00 2001 From: John Mainzer Date: Tue, 31 Aug 2004 17:12:07 -0500 Subject: [svn-r9178] Purpose: Fix parallel bug reported by Thomas Guignon, in which different processes became confused as to whether they were doing collective or individual I/O. Description: When one process had a point selection, and another didn't, the first concluded that the I/O was independant, while the second presumed that it was collective. A hang resulted. Solution: Get all processes involved in an I/O to compare notes. If all agree that the I/O is collective, they proceed with same. If any think the I/O should be independant, all use independant I/O. Note that this is an interrim fix -- the correct solution is to support collective I/O on point selections. This will take a while. Platforms tested: copper h5committested Eirene (serial and parallel) In the parallel test on Eirene, I encountered a bug in h5repacktst. However the problem vanished on recompile. Since I couldn't reproduce it elsewhere, I went ahead with the checkin. Given my druthers, I would have liked to study the code more carefully before this check-in. However, there is some time pressure. The new code implementing the consensus check must not be executed unless MPI is initialized, and there is a communicator associated with the file. I think my guards against this case are adequate, but if we run into a hang or an illegal instruction error, this change should be suspect. Misc. update: --- src/H5S.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) diff --git a/src/H5S.c b/src/H5S.c index 3ad61f8..ccc023c 100644 --- a/src/H5S.c +++ b/src/H5S.c @@ -33,6 +33,12 @@ static herr_t H5S_set_extent_simple (H5S_t *space, unsigned rank, const hsize_t *dims, const hsize_t *max); static htri_t H5S_is_simple(const H5S_t *sdim); +#ifdef H5_HAVE_PARALLEL +htri_t H5S_get_collective_io_consensus(const H5F_t *file, + const htri_t local_opinion, + const unsigned flags); +#endif /* H5_HAVE_PARALLEL */ + /* Interface initialization */ #define INTERFACE_INIT H5S_init_interface static int interface_initialize_g = 0; @@ -1376,6 +1382,103 @@ done: /*------------------------------------------------------------------------- + * Function: H5S_get_collective_io_consensus + * + * Purpose: Compare notes with all other processes involved in this I/O + * and see if all are go for collective I/O. + * + * If all are, return TRUE. + * + * If any process can't manage collective I/O, then collective + * I/O is impossible, and we return FALSE. + * + * If the flags indicate that collective I/O is impossible, + * skip the interprocess communication and just return FALSE. + * + * In any error is detected, return FAIL. + * + * Return: Success: TRUE or FALSE + * + * Failure: FAIL + * + * Programmer: JRM -- 8/30/04 + * + * Modifications: + * + * None. + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +htri_t +H5S_get_collective_io_consensus(const H5F_t *file, + const htri_t local_opinion, + const unsigned flags) +{ + htri_t ret_value = FAIL; /* will update if successful */ + MPI_Comm comm; + int int_local_opinion; + int consensus; + int mpi_result; + + FUNC_ENTER_NOAPI(H5S_get_collective_io_consensus, NULL); + + HDassert ( ( local_opinion == TRUE ) || ( local_opinion == FALSE ) ); + + /* Don't do the interprocess communication unless the Parallel I/O + * conversion flag is set -- there may not be other processes to + * talk to. + */ + if ( ! ( flags & flags&H5S_CONV_PAR_IO_POSSIBLE ) ) { + + HGOTO_DONE(FALSE); + } + + comm = H5F_mpi_get_comm(file); + + if ( comm == MPI_COMM_NULL ) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, \ + "can't retrieve MPI communicator") + + if ( local_opinion == TRUE ) { + + int_local_opinion = 1; + + } else { + + int_local_opinion = 0; + } + + mpi_result = MPI_Allreduce((void *)(&int_local_opinion), + (void *)(&consensus), + 1, + MPI_INT, + MPI_LAND, + comm); + + if ( mpi_result != MPI_SUCCESS ) + HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_result) + + if ( consensus ) { + + ret_value = TRUE; + + } else { + + ret_value = FALSE; + } + +done: + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5S_get_collective_io_consensus() */ + +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- * Function: H5S_find * * Purpose: Given two data spaces (MEM_SPACE and FILE_SPACE) this @@ -1403,6 +1506,10 @@ done: * along with other data whose scope is the conversion path (like path * statistics). * + * John Mainzer, 8/30/04 + * Modified code to check with all other processes that have the + * file open before OKing collective I/O. + * *------------------------------------------------------------------------- */ H5S_conv_t * @@ -1459,6 +1566,12 @@ const H5O_layout_t *layout if(opt==FAIL) HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, "invalid check for direct IO dataspace "); + opt = H5S_get_collective_io_consensus(file, opt, flags); + + if ( opt == FAIL ) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, \ + "check for collective I/O consensus failed."); + /* Check if we can use the optimized parallel I/O routines */ if(opt==TRUE) { /* Set the pointers to the MPI-specific routines */ @@ -1501,6 +1614,12 @@ const H5O_layout_t *layout if(opt==FAIL) HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, "invalid check for direct IO dataspace "); + opt = H5S_get_collective_io_consensus(file, opt, flags); + + if ( opt == FAIL ) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, NULL, \ + "check for collective I/O consensus failed."); + /* Check if we can use the optimized parallel I/O routines */ if(opt==TRUE) { /* Set the pointers to the MPI-specific routines */ -- cgit v0.12