summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Kim Yates <rkyates@llnl.gov>1998-12-07 23:13:05 (GMT)
committerRobert Kim Yates <rkyates@llnl.gov>1998-12-07 23:13:05 (GMT)
commitbf250c6bb28b13f3a4f986a6eab52c5c70ee84b6 (patch)
treef0ca8e2b051e7655bab30c06a9398e742b212858
parentb58e9a6253c5b0a5beff9bd444f68cf64c2533e9 (diff)
downloadhdf5-bf250c6bb28b13f3a4f986a6eab52c5c70ee84b6.zip
hdf5-bf250c6bb28b13f3a4f986a6eab52c5c70ee84b6.tar.gz
hdf5-bf250c6bb28b13f3a4f986a6eab52c5c70ee84b6.tar.bz2
[svn-r960] Added interprocess coordination in H5F_istore_allocate around calls to
H5F_istore_lock and H5F_istore_inlock to prevent race between reading and writing data chunks that caused "holes" (i.e., sequences of 0s) in chunked datasets.
-rw-r--r--src/H5Distore.c18
-rw-r--r--src/H5Fistore.c18
-rw-r--r--src/H5Fmpio.c94
-rw-r--r--src/H5Fprivate.h2
4 files changed, 132 insertions, 0 deletions
diff --git a/src/H5Distore.c b/src/H5Distore.c
index 89d087c..4d2d0dd 100644
--- a/src/H5Distore.c
+++ b/src/H5Distore.c
@@ -2054,6 +2054,10 @@ H5F_istore_get_addr (H5F_t *f, const H5O_layout_t *layout,
* rky 980923
* Added barrier to preclude racing with data writes.
*
+ * rky 19981207
+ * Added Wait-Signal wrapper around unlock-lock critical region
+ * to prevent race condition (unlock reads, lock writes the chunk).
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -2118,6 +2122,14 @@ H5F_istore_allocate (H5F_t *f, const H5O_layout_t *layout,
* Lock the chunk, copy from application to chunk, then unlock the
* chunk.
*/
+
+ /* rky 981207 Serialize access to this critical region. */
+ if (SUCCEED!=
+ H5PC_Wait_for_left_neighbor(f->shared->access_parms->u.mpio.comm))
+ {
+ HRETURN_ERROR (H5E_IO, H5E_WRITEERROR, FAIL,
+ "unable to lock the data chunk");
+ }
if (NULL==(chunk=H5F_istore_lock (f, layout, split_ratios, pline,
fill, chunk_offset, FALSE,
&idx_hint))) {
@@ -2130,6 +2142,12 @@ H5F_istore_allocate (H5F_t *f, const H5O_layout_t *layout,
HRETURN_ERROR (H5E_IO, H5E_WRITEERROR, FAIL,
"uanble to unlock raw data chunk");
}
+ if (SUCCEED!=
+ H5PC_Signal_right_neighbor(f->shared->access_parms->u.mpio.comm))
+ {
+ HRETURN_ERROR (H5E_IO, H5E_WRITEERROR, FAIL,
+ "unable to unlock the data chunk");
+ }
#ifdef NO
} else {
#ifdef AKC
diff --git a/src/H5Fistore.c b/src/H5Fistore.c
index 89d087c..4d2d0dd 100644
--- a/src/H5Fistore.c
+++ b/src/H5Fistore.c
@@ -2054,6 +2054,10 @@ H5F_istore_get_addr (H5F_t *f, const H5O_layout_t *layout,
* rky 980923
* Added barrier to preclude racing with data writes.
*
+ * rky 19981207
+ * Added Wait-Signal wrapper around unlock-lock critical region
+ * to prevent race condition (unlock reads, lock writes the chunk).
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -2118,6 +2122,14 @@ H5F_istore_allocate (H5F_t *f, const H5O_layout_t *layout,
* Lock the chunk, copy from application to chunk, then unlock the
* chunk.
*/
+
+ /* rky 981207 Serialize access to this critical region. */
+ if (SUCCEED!=
+ H5PC_Wait_for_left_neighbor(f->shared->access_parms->u.mpio.comm))
+ {
+ HRETURN_ERROR (H5E_IO, H5E_WRITEERROR, FAIL,
+ "unable to lock the data chunk");
+ }
if (NULL==(chunk=H5F_istore_lock (f, layout, split_ratios, pline,
fill, chunk_offset, FALSE,
&idx_hint))) {
@@ -2130,6 +2142,12 @@ H5F_istore_allocate (H5F_t *f, const H5O_layout_t *layout,
HRETURN_ERROR (H5E_IO, H5E_WRITEERROR, FAIL,
"uanble to unlock raw data chunk");
}
+ if (SUCCEED!=
+ H5PC_Signal_right_neighbor(f->shared->access_parms->u.mpio.comm))
+ {
+ HRETURN_ERROR (H5E_IO, H5E_WRITEERROR, FAIL,
+ "unable to unlock the data chunk");
+ }
#ifdef NO
} else {
#ifdef AKC
diff --git a/src/H5Fmpio.c b/src/H5Fmpio.c
index b12072b..a9ac303 100644
--- a/src/H5Fmpio.c
+++ b/src/H5Fmpio.c
@@ -992,5 +992,99 @@ H5F_haddr_to_MPIOff( haddr_t addr, MPI_Offset *mpi_off )
return (ret_val);
}
+
+/*-------------------------------------------------------------------------
+ * Function: H5PC_Wait_for_left_neighbor
+ *
+ * Purpose: Blocks until (empty) msg is received
+ * from immediately lower-rank neighbor.
+ * In conjunction with Signal_right_neighbor,
+ * useful for enforcing 1-process-at-at-time access
+ * to critical regions to avoid race conditions
+ * (though it is overkill to require that the processes
+ * be allowed to proceed strictly in order of their rank).
+ *
+ * NOTE: This routine doesn't read or write any file,
+ * just performs interprocess coordination.
+ * It really should reside in a separate package of such routines.
+ *
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ *
+ * Programmer: rky
+ * 19981207
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5PC_Wait_for_left_neighbor( MPI_Comm comm )
+{
+ char msgbuf[1];
+ int myid, mpi_err;
+ MPI_Status rcvstat;
+
+ FUNC_ENTER (H5PC_Wait_for_left_neighbor, FAIL);
+
+ mpi_err = MPI_Comm_rank( comm, &myid );
+ if (MPI_SUCCESS!=mpi_err)
+ HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Comm_rank failed");
+ /* p0 has no left neighbor; all other procs wait for msg */
+ if (myid != 0) {
+ mpi_err = MPI_Recv( &msgbuf, 1, MPI_CHAR, myid-1, MPI_ANY_TAG, comm,
+ &rcvstat );
+ if (MPI_SUCCESS!=mpi_err)
+ HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Recv failed");
+ }
+ FUNC_LEAVE (SUCCEED);
+} /* H5PC_Wait_for_left_neighbor */
+
+/*-------------------------------------------------------------------------
+ * Function: H5PC_Signal_right_neighbor
+ *
+ * Purpose: Blocks until (empty) msg is received
+ * from immediately lower-rank neighbor.
+ * In conjunction with Wait_for_left_neighbor,
+ * useful for enforcing 1-process-at-at-time access
+ * to critical regions to avoid race conditions
+ * (though it is overkill to require that the processes
+ * be allowed to proceed strictly in order of their rank).
+ *
+ * NOTE: This routine doesn't read or write any file,
+ * just performs interprocess coordination.
+ * It really should reside in a separate package of such routines.
+ *
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ *
+ * Programmer: rky
+ * 19981207
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5PC_Signal_right_neighbor( MPI_Comm comm )
+{
+ char msgbuf[1];
+ int myid, numprocs, mpi_err;
+
+ FUNC_ENTER (H5PC_Signal_right_neighbor, FAIL);
+
+ mpi_err = MPI_Comm_size( comm, &numprocs );
+ if (MPI_SUCCESS!=mpi_err)
+ HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Comm_rank failed");
+ mpi_err = MPI_Comm_rank( comm, &myid );
+ if (MPI_SUCCESS!=mpi_err)
+ HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Comm_rank failed");
+ if (myid != (numprocs-1)) {
+ mpi_err = MPI_Send( &msgbuf, 0/*empty msg*/, MPI_CHAR, myid+1, 0, comm);
+ if (MPI_SUCCESS!=mpi_err)
+ HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Send failed");
+ }
+ FUNC_LEAVE (SUCCEED);
+} /* H5PC_Signal_right_neighbor */
#endif /* HAVE_PARALLEL */
diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h
index 6b5f0df..3014d1c 100644
--- a/src/H5Fprivate.h
+++ b/src/H5Fprivate.h
@@ -654,6 +654,8 @@ herr_t H5F_addr_pack(H5F_t *f, haddr_t *addr, const long objno[2]);
/* Functions for MPI-IO */
#ifdef HAVE_PARALLEL
htri_t H5F_mpio_tas_allsame(H5F_low_t *lf, hbool_t newval );
+herr_t H5PC_Wait_for_left_neighbor( MPI_Comm comm );
+herr_t H5PC_Signal_right_neighbor( MPI_Comm comm );
#endif /* HAVE_PARALLEL */
#endif