summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlbert Cheng <acheng@hdfgroup.org>1998-04-13 04:31:23 (GMT)
committerAlbert Cheng <acheng@hdfgroup.org>1998-04-13 04:31:23 (GMT)
commit33a49221fccc5adc7ec9c5a556be4f6cb72f4e34 (patch)
tree9eab81feeb1694e8700bcb2e282d4df673bc2dbd
parent1a7e4bcd6c61b8f89001b8de9946995e579e1e79 (diff)
downloadhdf5-33a49221fccc5adc7ec9c5a556be4f6cb72f4e34.zip
hdf5-33a49221fccc5adc7ec9c5a556be4f6cb72f4e34.tar.gz
hdf5-33a49221fccc5adc7ec9c5a556be4f6cb72f4e34.tar.bz2
[svn-r343] First implementation of collective access support.
H5D.c Changed H5D_write and H5D_read to recognize collective transfer requests. Current algorithm puts the xfer-mode in the file->mpio structure. Not thread safe. Need to revise it to pass transfer parameter as a function parameters to the lower levels. H5Dprivate.h H5Dpublic.h Added xfer_mode to the transfer property list structure. Added defination of transfer modes, H5D_XFER_INDEPENDENT and H5D_XFER_COLLECTIVE. H5Farray.c Added code to handle collective access requests. Currently works for one contiguous block at a time. Need to revise it with the MPIO low level calls to combine all blocks as one truely collective call. H5P.c H5Ppublic.h Added H5Pset_xfer and H5Pget_xfer routines that can set and get the transfer mode of a data transfer property list.
-rw-r--r--src/H5D.c120
-rw-r--r--src/H5Dprivate.h3
-rw-r--r--src/H5Dpublic.h8
-rw-r--r--src/H5Farray.c77
-rw-r--r--src/H5P.c105
-rw-r--r--src/H5Ppublic.h2
6 files changed, 313 insertions, 2 deletions
diff --git a/src/H5D.c b/src/H5D.c
index 6d043ea..b06a9cf 100644
--- a/src/H5D.c
+++ b/src/H5D.c
@@ -67,6 +67,9 @@ const H5D_xfer_t H5D_xfer_dflt = {
NULL, /* Type conversion buffer or NULL */
NULL, /* Background buffer or NULL */
H5T_BKG_NO, /* Type of background buffer needed */
+#ifdef HAVE_PARALLEL
+ H5D_XFER_INDEPENDENT, /* Independent data transfer */
+#endif
};
/* Interface initialization? */
@@ -1108,6 +1111,9 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
size_t target_size; /*desired buffer size */
size_t request_nelmts; /*requested strip mine */
H5T_bkg_t need_bkg; /*type of background buf*/
+#ifdef HAVE_PARALLEL
+ int access_mode_saved = -1;
+#endif
FUNC_ENTER(H5D_read, FAIL);
@@ -1149,6 +1155,52 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
"unable to convert from file to memory data space");
}
+#ifdef HAVE_PARALLEL
+ /*
+ * Check if collective data transfer requested.
+ */
+ if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){
+ /* verify that the file can support collective access. */
+ /* The check may not be necessarily since collective access */
+ /* can always be simulated by independent access. */
+ /* Nevertheless, must check driver is MPIO before using those */
+ /* access_mode which exists only for MPIO case. */
+ if (dataset->ent.file->shared->access_parms.driver == H5F_LOW_MPIO){
+ /* Supports only no conversion, type or space, for now. */
+ if (H5T_conv_noop==tconv_func &&
+ NULL!=sconv_func->read) {
+ /*
+ * -AKC-
+ * "plant" the collective access mode into the file information
+ * so that the lower level mpio routines know to use collective
+ * access.
+ * This is not thread-safe, is a klutch for now.
+ * Should change all the I/O routines to pass along the xfer
+ * property list to the low level I/O for proper execution.
+ * Make it to work now. Must fix it later.
+ * -AKC-
+ */
+#ifdef AKC
+ printf("%s: collective access requested\n", FUNC);
+ printf("%s: current f->access_mode = %x\n", FUNC,
+ dataset->ent.file->shared->access_parms.u.mpio.access_mode);
+#endif
+ access_mode_saved = dataset->ent.file->shared->access_parms.u.mpio.access_mode;
+ dataset->ent.file->shared->access_parms.u.mpio.access_mode = H5ACC_COLLECTIVE;
+ status = (sconv_func->read)(dataset->ent.file, &(dataset->layout),
+ &(dataset->create_parms->efl),
+ H5T_get_size (dataset->type), file_space,
+ mem_space, buf/*out*/);
+ if (status>=0) goto succeed;
+ HGOTO_ERROR (H5E_DATASET, H5E_READERROR, FAIL,
+ "collective read failed");
+ }
+ }
+ HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL,
+ "collective access not permissible");
+ }
+#endif /*HAVE_PARALLEL*/
+
/*
* If there is no type conversion then try reading directly into the
@@ -1293,6 +1345,15 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
if (bkg_buf && NULL==xfer_parms->bkg_buf) {
H5MM_xfree (bkg_buf);
}
+#ifdef HAVE_PARALLEL
+ /*
+ * Check if collective data transfer requested.
+ * If so, need to restore the access mode. Shouldnot needed.
+ */
+ if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){
+ dataset->ent.file->shared->access_parms.u.mpio.access_mode = access_mode_saved;
+ }
+#endif /*HAVE_PARALLEL*/
FUNC_LEAVE(ret_value);
}
@@ -1335,6 +1396,9 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
size_t target_size; /*desired buffer size */
size_t request_nelmts; /*requested strip mine */
H5T_bkg_t need_bkg; /*type of background buf*/
+#ifdef HAVE_PARALLEL
+ int access_mode_saved = -1;
+#endif
FUNC_ENTER(H5D_write, FAIL);
@@ -1376,6 +1440,53 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
"unable to convert from memory to file data space");
}
+#ifdef HAVE_PARALLEL
+ /*
+ * Check if collective data transfer requested.
+ */
+ if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){
+ /* verify that the file can support collective access. */
+ /* The check may not be necessarily since collective access */
+ /* can always be simulated by independent access. */
+ /* Nevertheless, must check driver is MPIO before using those */
+ /* access_mode which exists only for MPIO case. */
+ if (dataset->ent.file->shared->access_parms.driver == H5F_LOW_MPIO){
+ /* Supports only no conversion, type or space, for now. */
+ if (H5T_conv_noop==tconv_func &&
+ NULL!=sconv_func->write) {
+ /*
+ * -AKC-
+ * "plant" the collective access mode into the file information
+ * so that the lower level mpio routines know to use collective
+ * access.
+ * This is not thread-safe, is a klutch for now.
+ * Should change all the I/O routines to pass along the xfer
+ * property list to the low level I/O for proper execution.
+ * Make it to work now. Must fix it later.
+ * -AKC-
+ */
+#ifdef AKC
+ printf("%s: collective access requested\n", FUNC);
+ printf("%s: current f->access_mode = %x\n", FUNC,
+ dataset->ent.file->shared->access_parms.u.mpio.access_mode);
+#endif
+ access_mode_saved = dataset->ent.file->shared->access_parms.u.mpio.access_mode;
+ dataset->ent.file->shared->access_parms.u.mpio.access_mode = H5ACC_COLLECTIVE;
+ status = (sconv_func->write)(dataset->ent.file, &(dataset->layout),
+ &(dataset->create_parms->efl),
+ H5T_get_size (dataset->type), file_space,
+ mem_space, buf);
+ if (status>=0) goto succeed;
+ HGOTO_ERROR (H5E_DATASET, H5E_WRITEERROR, FAIL,
+ "collective write failed");
+ }
+ }
+ HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL,
+ "collective access not permissible");
+ }
+#endif /*HAVE_PARALLEL*/
+
+
/*
* If there is no type conversion then try writing directly from
* application buffer to file.
@@ -1522,6 +1633,15 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
if (bkg_buf && NULL==xfer_parms->bkg_buf) {
H5MM_xfree (bkg_buf);
}
+#ifdef HAVE_PARALLEL
+ /*
+ * Check if collective data transfer requested.
+ * If so, need to restore the access mode. Shouldnot needed.
+ */
+ if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){
+ dataset->ent.file->shared->access_parms.u.mpio.access_mode = access_mode_saved;
+ }
+#endif /*HAVE_PARALLEL*/
FUNC_LEAVE(ret_value);
}
diff --git a/src/H5Dprivate.h b/src/H5Dprivate.h
index 6f0c23c..f39e5de 100644
--- a/src/H5Dprivate.h
+++ b/src/H5Dprivate.h
@@ -54,6 +54,9 @@ typedef struct H5D_xfer_t {
void *tconv_buf; /*type conversion buffer or null */
void *bkg_buf; /*background buffer or null */
H5T_bkg_t need_bkg; /*type of background buffer needed */
+#ifdef HAVE_PARALLEL
+ H5D_transfer_t xfer_mode; /*independent or collective transfer */
+#endif
} H5D_xfer_t;
typedef struct H5D_t H5D_t;
diff --git a/src/H5Dpublic.h b/src/H5Dpublic.h
index bfaacd2..634b5b9 100644
--- a/src/H5Dpublic.h
+++ b/src/H5Dpublic.h
@@ -31,6 +31,14 @@ typedef enum H5D_layout_t {
H5D_NLAYOUTS = 3 /*this one must be last! */
} H5D_layout_t;
+#ifdef HAVE_PARALLEL
+/* Values for the data transfer property */
+typedef enum H5D_transfer_t {
+ H5D_XFER_INDEPENDENT, /*Independent data transfer */
+ H5D_XFER_COLLECTIVE /*Collective data transfer */
+} H5D_transfer_t;
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/src/H5Farray.c b/src/H5Farray.c
index 138a20f..76f09d8 100644
--- a/src/H5Farray.c
+++ b/src/H5Farray.c
@@ -130,6 +130,9 @@ H5F_arr_read (H5F_t *f, const struct H5O_layout_t *layout,
haddr_t addr; /*address in file */
intn i, j; /*counters */
hbool_t carray; /*carry for subtraction */
+#ifdef HAVE_PARALLEL
+ intn is_collective; /*collective access flag*/
+#endif
FUNC_ENTER (H5F_arr_read, FAIL);
@@ -145,6 +148,19 @@ H5F_arr_read (H5F_t *f, const struct H5O_layout_t *layout,
/* Make a local copy of size so we can modify it */
H5V_vector_cpy (layout->ndims, hslab_size, _hslab_size);
+#ifdef HAVE_PARALLEL
+ is_collective = (f->shared->access_parms.driver==H5F_LOW_MPIO
+ && f->shared->access_parms.u.mpio.access_mode==H5ACC_COLLECTIVE);
+ if (is_collective){
+#ifdef AKC
+ printf("%s: collective read requested\n", FUNC);
+#endif
+ if (layout->type != H5D_CONTIGUOUS)
+ HRETURN_ERROR (H5E_DATASET, H5E_READERROR, FAIL,
+ "collective access on non-contiguous datasets not supported yet");
+ }
+#endif
+
switch (layout->type) {
case H5D_CONTIGUOUS:
ndims = layout->ndims;
@@ -190,6 +206,29 @@ H5F_arr_read (H5F_t *f, const struct H5O_layout_t *layout,
* Now begin to walk through the array, copying data from disk to
* memory.
*/
+#ifdef HAVE_PARALLEL
+ if (is_collective){
+ /* Currently supports same number of collective access.
+ * Need to be changed LATER to combine all reads into one
+ * collective MPIO call.
+ */
+ long max, min, temp;
+
+ temp = nelmts;
+ assert(temp==nelmts); /* verify no overflow */
+ MPI_Allreduce(&temp, &max, 1, MPI_LONG, MPI_MAX,
+ f->shared->access_parms.u.mpio.comm);
+ MPI_Allreduce(&temp, &min, 1, MPI_LONG, MPI_MIN,
+ f->shared->access_parms.u.mpio.comm);
+#ifdef AKC
+printf("nelmnts=%ld, min=%ld, max=%ld\n", temp, min, max);
+#endif
+ if (max != min)
+ HRETURN_ERROR(H5E_DATASET, H5E_READERROR, FAIL,
+ "collective access with unequal number of blocks not supported yet");
+ }
+#endif
+
for (z=0; z<nelmts; z++) {
/* Read from file */
@@ -291,6 +330,9 @@ H5F_arr_write (H5F_t *f, const struct H5O_layout_t *layout,
haddr_t addr; /*address in file */
intn i, j; /*counters */
hbool_t carray; /*carry for subtraction */
+#ifdef HAVE_PARALLEL
+ intn is_collective; /*collective access flag*/
+#endif
FUNC_ENTER (H5F_arr_write, FAIL);
@@ -306,6 +348,18 @@ H5F_arr_write (H5F_t *f, const struct H5O_layout_t *layout,
/* Make a local copy of _size so we can modify it */
H5V_vector_cpy (layout->ndims, hslab_size, _hslab_size);
+#ifdef HAVE_PARALLEL
+ is_collective = (f->shared->access_parms.driver==H5F_LOW_MPIO
+ && f->shared->access_parms.u.mpio.access_mode==H5ACC_COLLECTIVE);
+ if (is_collective){
+#ifdef AKC
+ printf("%s: collective write requested\n", FUNC);
+#endif
+ if (layout->type != H5D_CONTIGUOUS)
+ HRETURN_ERROR (H5E_DATASET, H5E_WRITEERROR, FAIL,
+ "collective access on non-contiguous datasets not supported yet");
+ }
+#endif
switch (layout->type) {
case H5D_CONTIGUOUS:
@@ -352,6 +406,29 @@ H5F_arr_write (H5F_t *f, const struct H5O_layout_t *layout,
* Now begin to walk through the array, copying data from memory to
* disk.
*/
+#ifdef HAVE_PARALLEL
+ if (is_collective){
+ /* Currently supports same number of collective access.
+ * Need to be changed LATER to combine all writes into one
+ * collective MPIO call.
+ */
+ long max, min, temp;
+
+ temp = nelmts;
+ assert(temp==nelmts); /* verify no overflow */
+ MPI_Allreduce(&temp, &max, 1, MPI_LONG, MPI_MAX,
+ f->shared->access_parms.u.mpio.comm);
+ MPI_Allreduce(&temp, &min, 1, MPI_LONG, MPI_MIN,
+ f->shared->access_parms.u.mpio.comm);
+#ifdef AKC
+printf("nelmnts=%ld, min=%ld, max=%ld\n", temp, min, max);
+#endif
+ if (max != min)
+ HRETURN_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+ "collective access with unequal number of blocks not supported yet");
+ }
+#endif
+
for (z=0; z<nelmts; z++) {
/* Write to file */
diff --git a/src/H5P.c b/src/H5P.c
index 25f9f31..d06aa2a 100644
--- a/src/H5P.c
+++ b/src/H5P.c
@@ -1850,6 +1850,7 @@ H5Pget_preserve (hid_t plist_id)
+#ifdef HAVE_PARALLEL
/*-------------------------------------------------------------------------
* Function: H5Pset_mpi
*
@@ -1903,7 +1904,6 @@ H5Pget_preserve (hid_t plist_id)
*
*-------------------------------------------------------------------------
*/
-#ifdef HAVE_PARALLEL
herr_t
H5Pset_mpi (hid_t tid, MPI_Comm comm, MPI_Info info, unsigned access_mode)
{
@@ -1964,6 +1964,7 @@ H5Pset_mpi (hid_t tid, MPI_Comm comm, MPI_Info info, unsigned access_mode)
#endif /*HAVE_PARALLEL*/
+#ifdef HAVE_PARALLEL
/*-------------------------------------------------------------------------
* Function: H5Pget_mpi
*
@@ -1983,7 +1984,6 @@ H5Pset_mpi (hid_t tid, MPI_Comm comm, MPI_Info info, unsigned access_mode)
*
*-------------------------------------------------------------------------
*/
-#ifdef HAVE_PARALLEL
herr_t
H5Pget_mpi (hid_t tid, MPI_Comm *comm, MPI_Info *info, unsigned *access_mode)
{
@@ -2012,6 +2012,107 @@ H5Pget_mpi (hid_t tid, MPI_Comm *comm, MPI_Info *info, unsigned *access_mode)
#endif /*HAVE_PARALLEL*/
+#ifdef HAVE_PARALLEL
+/*-------------------------------------------------------------------------
+ * Function: H5Pset_xfer
+ *
+ * Signature: herr_t H5Pset_xfer(hid_t tid, H5D_transfer_t data_xfer_mode)
+ *
+ * Purpose: Set the transfer mode of the dataset transfer property list.
+ * The list can then be used to control the I/O transfer mode
+ * during dataset accesses. This function is available only
+ * in the parallel HDF5 library and is not a collective function.
+ *
+ * Parameters:
+ * hid_t tid
+ * ID of a dataset transfer property list
+ * H5D_transfer_t data_xfer_mode
+ * data transfer modes:
+ * H5ACC_INDEPENDENT
+ * Use independent I/O access.
+ * H5ACC_COLLECTIVE
+ * Use MPI collective I/O access.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: Albert Cheng
+ * April 2, 1998
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pset_xfer (hid_t tid, H5D_transfer_t data_xfer_mode)
+{
+ H5D_xfer_t *plist = NULL;
+
+ FUNC_ENTER(H5Pset_xfer, FAIL);
+
+ /* Check arguments */
+ if (H5P_DATASET_XFER != H5Pget_class(tid) ||
+ NULL == (plist = H5I_object(tid))) {
+ HRETURN_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL,
+ "not a dataset transfer property list");
+ }
+
+ switch (data_xfer_mode){
+ case H5D_XFER_INDEPENDENT:
+ case H5D_XFER_COLLECTIVE:
+ plist->xfer_mode = data_xfer_mode;
+ break;
+ default:
+ HRETURN_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL,
+ "invalid dataset transfer mode");
+ }
+
+ FUNC_LEAVE(SUCCEED);
+}
+#endif /*HAVE_PARALLEL*/
+
+
+#ifdef HAVE_PARALLEL
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_xfer
+ *
+ * Purpose: Reads the transfer mode current set in the property list.
+ * This function is available only in the parallel HDF5 library
+ * and is not a collective function.
+ *
+ * Return: Success: SUCCEED
+ *
+ * Failure: FAIL
+ *
+ * Programmer: Albert Cheng
+ * April 2, 1998
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_xfer (hid_t tid, H5D_transfer_t *data_xfer_mode)
+{
+ H5D_xfer_t *plist = NULL;
+
+ FUNC_ENTER (H5Pget_xfer, FAIL);
+
+ /* Check arguments */
+ if (H5P_DATASET_XFER != H5Pget_class(tid) ||
+ NULL == (plist = H5I_object(tid))) {
+ HRETURN_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL,
+ "not a dataset transfer property list");
+ }
+
+ *data_xfer_mode = plist->xfer_mode;
+
+ FUNC_LEAVE (SUCCEED);
+}
+#endif /*HAVE_PARALLEL*/
+
+
/*--------------------------------------------------------------------------
NAME
H5Pcopy
diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h
index 28d2b80..6f11dce 100644
--- a/src/H5Ppublic.h
+++ b/src/H5Ppublic.h
@@ -93,6 +93,8 @@ herr_t H5Pset_mpi (hid_t tid, MPI_Comm comm, MPI_Info info,
unsigned access_mode);
herr_t H5Pget_mpi (hid_t tid, MPI_Comm *comm/*out*/, MPI_Info *info/*out*/,
unsigned *access_mode/*out*/);
+herr_t H5Pset_xfer (hid_t tid, H5D_transfer_t data_xfer_mode);
+herr_t H5Pget_xfer (hid_t tid, H5D_transfer_t *data_xfer_mode/*out*/);
#endif
#ifdef __cplusplus