summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2002-06-24 13:45:21 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2002-06-24 13:45:21 (GMT)
commit02a2764a1d5ae228379e70835e155a00aa4e85ca (patch)
treee480d65fa678f1cdc6525b609c187eeb6d099dd0
parent5f486e6e4648a4aad2cbf97a632f14a5254b8c47 (diff)
downloadhdf5-02a2764a1d5ae228379e70835e155a00aa4e85ca.zip
hdf5-02a2764a1d5ae228379e70835e155a00aa4e85ca.tar.gz
hdf5-02a2764a1d5ae228379e70835e155a00aa4e85ca.tar.bz2
[svn-r5689] Purpose:
Bug Fix Description: When parallel I/O is used, the MPI-I/O VFL driver uses a "lazy" model to call MPI_File_set_view() in order to reduce the number of calls to this function. However, this is unsafe, because if a collective I/O which uses MPI derived types (and thus uses MPI_File_set_view()) is immediately followed by an independent I/O, the code will attempt to call MPI_File_set_view() in order to switch back to the default view of the file. MPI_File_set_view() is a collective call however, and this causes the application to hang. Solution: Removed "lazy" MPI_File_set_view() code, instead set the file view when it is needed (with MPI derived types) and immediately set the file view back to the default view before leaving the I/O routine. Platforms tested: IRIX64 6.5 (modi4) w/parallel. Also, tested with the latest development and release code for the SAF library, which now works correctly with this change. (Although the release branch of the SAF library seems to have a bug, this 1.4.4 release candidate code gets as far as the version the SAF library is released on top of (1.4.2-patch1, I believe)).
-rw-r--r--release_docs/RELEASE.txt3
-rw-r--r--src/H5FDmpio.c57
2 files changed, 39 insertions, 21 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index cfed11b..b24171c 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -71,6 +71,9 @@ New Features
o Parallel Library
==================
+ * Fixed bug in parallel I/O routines where a collective I/O which used
+ MPI derived types, followed by an independent I/O would cause the library
+ to hang. QAK 2002/06/24
* Added environment variable flag to control whether creating MPI derived
typed is preferred or not. This can affect performance, depending on
which way the MPI-I/O library is optimized for. The default is set to
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 6724d18..09372f2 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -70,7 +70,6 @@ typedef struct H5FD_mpio_t {
haddr_t eof; /*end-of-file marker */
haddr_t eoa; /*end-of-address marker */
haddr_t last_eoa; /* Last known end-of-address marker */
- unsigned old_use_view; /*remember value of use_view */
} H5FD_mpio_t;
/* Prototypes */
@@ -1179,6 +1178,11 @@ H5FD_mpio_get_eof(H5FD_t *_file)
* the address of the dataset in MPI_File_set_view() calls, as
* necessary.
*
+ * Quincey Koziol - 2002/06/24
+ * Removed "lazy" MPI_File_set_view() calls, since they would fail
+ * if the first I/O was a collective I/O using MPI derived types
+ * and the next I/O was an independent I/O.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -1193,7 +1197,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
MPI_Status mpi_stat;
MPI_Datatype buf_type, file_type;
int size_i, bytes_read, n;
- unsigned use_view_this_time=0, used_view_last_time;
+ unsigned use_view_this_time=0;
herr_t ret_value=SUCCEED;
FUNC_ENTER(H5FD_mpio_read, FAIL);
@@ -1266,20 +1270,14 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
}
/*
- * Don't bother to reset the view if we're not using the types this time,
- * and did we didn't use them last time either.
+ * Set the file view when we are using MPI derived types
*/
- used_view_last_time = file->old_use_view;
- if (used_view_last_time || /* change to new ftype or MPI_BYTE */
- use_view_this_time) { /* almost certainly a different ftype */
+ if (use_view_this_time) {
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
- }
+ } /* end if */
- /* Keep the 'use view' flag around for the next I/O */
- file->old_use_view = use_view_this_time;
-
/* Read the data. */
assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode);
if (H5FD_MPIO_INDEPENDENT==dx->xfer_mode) {
@@ -1335,6 +1333,15 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed");
/*
+ * Reset the file view when we used MPI derived types
+ */
+ if (use_view_this_time) {
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if (MPI_SUCCESS != MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
+ } /* end if */
+
+ /*
* This gives us zeroes beyond end of physical MPI file. What about
* reading past logical end of HDF5 file???
*/
@@ -1469,6 +1476,11 @@ done:
* the address of the dataset in MPI_File_set_view() calls, as
* necessary.
*
+ * Quincey Koziol - 2002/06/24
+ * Removed "lazy" MPI_File_set_view() calls, since they would fail
+ * if the first I/O was a collective I/O using MPI derived types
+ * and the next I/O was an independent I/O.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -1483,7 +1495,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
MPI_Status mpi_stat;
MPI_Datatype buf_type, file_type;
int size_i, bytes_written;
- unsigned use_view_this_time=0, used_view_last_time;
+ unsigned use_view_this_time=0;
herr_t ret_value=SUCCEED;
FUNC_ENTER(H5FD_mpio_write, FAIL);
@@ -1556,20 +1568,14 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
}
/*
- * Don't bother to reset the view if we're not using the types this time,
- * and did we didn't use them last time either.
+ * Set the file view when we are using MPI derived types
*/
- used_view_last_time = file->old_use_view;
- if (used_view_last_time || /* change to new ftype or MPI_BYTE */
- use_view_this_time) { /* almost certainly a different ftype */
+ if (use_view_this_time) {
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
- }
+ } /* end if */
- /* Keep the 'use view' flag around for the next I/O */
- file->old_use_view = use_view_this_time;
-
/* Only p<round> will do the actual write if all procs in comm write same data */
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
if (file->mpi_rank != file->mpi_round) {
@@ -1641,6 +1647,15 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
if (bytes_written<0 || bytes_written>size_i)
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed");
+ /*
+ * Reset the file view when we used MPI derived types
+ */
+ if (use_view_this_time) {
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if (MPI_SUCCESS != MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
+ } /* end if */
+
/* Forget the EOF value (see H5FD_mpio_get_eof()) --rpm 1999-08-06 */
file->eof = HADDR_UNDEF;