summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2003-11-20 14:37:11 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2003-11-20 14:37:11 (GMT)
commit1b27db1755be9cef391ee93a0e3c03360cd0c8ff (patch)
tree3e413f6e2e0f9e3a141dabdb0708d6e9dc955b15
parent649ddde342329272100b74c7be0cd3c9fc5f8793 (diff)
downloadhdf5-1b27db1755be9cef391ee93a0e3c03360cd0c8ff.zip
hdf5-1b27db1755be9cef391ee93a0e3c03360cd0c8ff.tar.gz
hdf5-1b27db1755be9cef391ee93a0e3c03360cd0c8ff.tar.bz2
[svn-r7861] Purpose:
Bug fix Description: Our previous "optimization" of metadata writing which only wrote metadata from one process was abusing MPI-I/O and after some consultation with Rob Ross and Rajeev Thakur, Albert & I have come up with a solution... Solution: Instead of only writing from one process, issue a collective write operation with all processes, for metadata writes. Platforms tested: FreeBSD 4.9 (sleipnir) h5committest
-rw-r--r--src/H5F.c2
-rw-r--r--src/H5FDmpio.c21
-rw-r--r--src/H5FDmpiposix.c12
-rw-r--r--src/H5Fpkg.h2
4 files changed, 37 insertions, 0 deletions
diff --git a/src/H5F.c b/src/H5F.c
index 63d6408..586e416 100644
--- a/src/H5F.c
+++ b/src/H5F.c
@@ -232,6 +232,7 @@ H5F_init_interface(void)
FUNC_ENTER_NOINIT(H5F_init_interface);
+#ifdef OLD_METADATA_WRITE
#ifdef H5_HAVE_PARALLEL
{
/* Allow MPI buf-and-file-type optimizations? */
@@ -241,6 +242,7 @@ H5F_init_interface(void)
}
}
#endif /* H5_HAVE_PARALLEL */
+#endif /* OLD_METADATA_WRITE */
/*
* Initialize the atom group for the file IDs. There are two groups:
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index dc98bc2..492cba3 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -149,6 +149,7 @@ static int H5FD_mpio_Debug[256] =
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
#endif
+#ifdef OLD_METADATA_WRITE
/* Global var to allow elimination of redundant metadata writes
* to be controlled by the value of an environment variable. */
/* Use the elimination by default unless this is the Intel Red machine */
@@ -157,6 +158,7 @@ hbool_t H5_mpi_1_metawrite_g = TRUE;
#else
hbool_t H5_mpi_1_metawrite_g = FALSE;
#endif
+#endif /* OLD_METADATA_WRITE */
/* Interface initialization */
#define PABLO_MASK H5FD_mpio_mask
@@ -1852,6 +1854,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
+#ifdef OLD_METADATA_WRITE
/* Only p<round> will do the actual write if all procs in comm write same metadata */
if (H5_mpi_1_metawrite_g) {
if (file->mpi_rank != H5_PAR_META_WRITE) {
@@ -1865,6 +1868,22 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
HGOTO_DONE(SUCCEED) /* skip the actual write */
}
}
+#else /* OLD_METADATA_WRITE */
+ /* Remember that views are used */
+ use_view_this_time=TRUE;
+
+ /*
+ * Set the file view when we are using MPI derived types
+ */
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, MPI_BYTE, (char*)"native", file->info)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+
+ /* When using types, use the address as the displacement for
+ * MPI_File_set_view and reset the address for the read to zero
+ */
+ mpi_off=0;
+#endif /* OLD_METADATA_WRITE */
} /* end if */
/* Write the data. */
@@ -1941,6 +1960,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
file->eof = HADDR_UNDEF;
done:
+#ifdef OLD_METADATA_WRITE
/* Guard against getting into metadate broadcast in failure cases */
if(ret_value!=FAIL) {
/* if only p<round> writes, need to broadcast the ret_value to other processes */
@@ -1949,6 +1969,7 @@ done:
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code);
} /* end if */
} /* end if */
+#endif /* OLD_METADATA_WRITE */
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c
index 4920ade..8bca0bf 100644
--- a/src/H5FDmpiposix.c
+++ b/src/H5FDmpiposix.c
@@ -224,6 +224,7 @@ static const H5FD_class_t H5FD_mpiposix_g = {
H5FD_FLMAP_SINGLE /*fl_map */
};
+#ifdef OLD_METADATA_WRITE
/* Global var to allow elimination of redundant metadata writes
* to be controlled by the value of an environment variable. */
/* Use the elimination by default unless this is the Intel Red machine */
@@ -232,6 +233,7 @@ hbool_t H5_mpiposix_1_metawrite_g = TRUE;
#else
hbool_t H5_mpiposix_1_metawrite_g = FALSE;
#endif
+#endif /* OLD_METADATA_WRITE */
/* Interface initialization */
#define PABLO_MASK H5FD_mpiposix_mask
@@ -1424,7 +1426,9 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
/* Only p<round> will do the actual write if all procs in comm write same metadata */
+#ifdef OLD_METADATA_WRITE
if (H5_mpiposix_1_metawrite_g)
+#endif /* OLD_METADATA_WRITE */
if (file->mpi_rank != H5_PAR_META_WRITE)
HGOTO_DONE(SUCCEED) /* skip the actual write */
} /* end if */
@@ -1498,11 +1502,19 @@ done:
} /* end if */
/* Guard against getting into metadata broadcast in failure cases */
else {
+#ifdef OLD_METADATA_WRITE
/* if only p<round> writes, need to broadcast the ret_value to other processes */
if ((type!=H5FD_MEM_DRAW) && H5_mpiposix_1_metawrite_g) {
if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code);
} /* end if */
+#else /* OLD_METADATA_WRITE */
+ /* if only p<round> writes, need to broadcast the ret_value to other processes */
+ if (type!=H5FD_MEM_DRAW) {
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code);
+ } /* end if */
+#endif /* OLD_METADATA_WRITE */
} /* end else */
FUNC_LEAVE_NOAPI(ret_value);
diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h
index 631d482..257e990 100644
--- a/src/H5Fpkg.h
+++ b/src/H5Fpkg.h
@@ -185,11 +185,13 @@ struct H5F_t {
H5F_mtab_t mtab; /* File mount table */
};
+#ifdef OLD_METADATA_WRITE
#ifdef H5_HAVE_PARALLEL
/* Whether a single process writes metadata */
H5_DLLVAR hbool_t H5_mpi_1_metawrite_g;
H5_DLLVAR hbool_t H5_mpiposix_1_metawrite_g;
#endif /* H5_HAVE_PARALLEL */
+#endif /* OLD_METADATA_WRITE */
/* Private functions, not part of the publicly documented API */
#ifdef NOT_YET