From 33a49221fccc5adc7ec9c5a556be4f6cb72f4e34 Mon Sep 17 00:00:00 2001 From: Albert Cheng Date: Sun, 12 Apr 1998 23:31:23 -0500 Subject: [svn-r343] First implementation of collective access support. H5D.c Changed H5D_write and H5D_read to recognize collective transfer requests. Current algorithm puts the xfer-mode in the file->mpio structure. Not thread safe. Need to revise it to pass transfer parameter as a function parameters to the lower levels. H5Dprivate.h H5Dpublic.h Added xfer_mode to the transfer property list structure. Added defination of transfer modes, H5D_XFER_INDEPENDENT and H5D_XFER_COLLECTIVE. H5Farray.c Added code to handle collective access requests. Currently works for one contiguous block at a time. Need to revise it with the MPIO low level calls to combine all blocks as one truely collective call. H5P.c H5Ppublic.h Added H5Pset_xfer and H5Pget_xfer routines that can set and get the transfer mode of a data transfer property list. --- src/H5D.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/H5Dprivate.h | 3 ++ src/H5Dpublic.h | 8 ++++ src/H5Farray.c | 77 +++++++++++++++++++++++++++++++++++ src/H5P.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++- src/H5Ppublic.h | 2 + 6 files changed, 313 insertions(+), 2 deletions(-) diff --git a/src/H5D.c b/src/H5D.c index 6d043ea..b06a9cf 100644 --- a/src/H5D.c +++ b/src/H5D.c @@ -67,6 +67,9 @@ const H5D_xfer_t H5D_xfer_dflt = { NULL, /* Type conversion buffer or NULL */ NULL, /* Background buffer or NULL */ H5T_BKG_NO, /* Type of background buffer needed */ +#ifdef HAVE_PARALLEL + H5D_XFER_INDEPENDENT, /* Independent data transfer */ +#endif }; /* Interface initialization? */ @@ -1108,6 +1111,9 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, size_t target_size; /*desired buffer size */ size_t request_nelmts; /*requested strip mine */ H5T_bkg_t need_bkg; /*type of background buf*/ +#ifdef HAVE_PARALLEL + int access_mode_saved = -1; +#endif FUNC_ENTER(H5D_read, FAIL); @@ -1149,6 +1155,52 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, "unable to convert from file to memory data space"); } +#ifdef HAVE_PARALLEL + /* + * Check if collective data transfer requested. + */ + if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){ + /* verify that the file can support collective access. */ + /* The check may not be necessarily since collective access */ + /* can always be simulated by independent access. */ + /* Nevertheless, must check driver is MPIO before using those */ + /* access_mode which exists only for MPIO case. */ + if (dataset->ent.file->shared->access_parms.driver == H5F_LOW_MPIO){ + /* Supports only no conversion, type or space, for now. */ + if (H5T_conv_noop==tconv_func && + NULL!=sconv_func->read) { + /* + * -AKC- + * "plant" the collective access mode into the file information + * so that the lower level mpio routines know to use collective + * access. + * This is not thread-safe, is a klutch for now. + * Should change all the I/O routines to pass along the xfer + * property list to the low level I/O for proper execution. + * Make it to work now. Must fix it later. + * -AKC- + */ +#ifdef AKC + printf("%s: collective access requested\n", FUNC); + printf("%s: current f->access_mode = %x\n", FUNC, + dataset->ent.file->shared->access_parms.u.mpio.access_mode); +#endif + access_mode_saved = dataset->ent.file->shared->access_parms.u.mpio.access_mode; + dataset->ent.file->shared->access_parms.u.mpio.access_mode = H5ACC_COLLECTIVE; + status = (sconv_func->read)(dataset->ent.file, &(dataset->layout), + &(dataset->create_parms->efl), + H5T_get_size (dataset->type), file_space, + mem_space, buf/*out*/); + if (status>=0) goto succeed; + HGOTO_ERROR (H5E_DATASET, H5E_READERROR, FAIL, + "collective read failed"); + } + } + HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, + "collective access not permissible"); + } +#endif /*HAVE_PARALLEL*/ + /* * If there is no type conversion then try reading directly into the @@ -1293,6 +1345,15 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, if (bkg_buf && NULL==xfer_parms->bkg_buf) { H5MM_xfree (bkg_buf); } +#ifdef HAVE_PARALLEL + /* + * Check if collective data transfer requested. + * If so, need to restore the access mode. Shouldnot needed. + */ + if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){ + dataset->ent.file->shared->access_parms.u.mpio.access_mode = access_mode_saved; + } +#endif /*HAVE_PARALLEL*/ FUNC_LEAVE(ret_value); } @@ -1335,6 +1396,9 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, size_t target_size; /*desired buffer size */ size_t request_nelmts; /*requested strip mine */ H5T_bkg_t need_bkg; /*type of background buf*/ +#ifdef HAVE_PARALLEL + int access_mode_saved = -1; +#endif FUNC_ENTER(H5D_write, FAIL); @@ -1376,6 +1440,53 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, "unable to convert from memory to file data space"); } +#ifdef HAVE_PARALLEL + /* + * Check if collective data transfer requested. + */ + if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){ + /* verify that the file can support collective access. */ + /* The check may not be necessarily since collective access */ + /* can always be simulated by independent access. */ + /* Nevertheless, must check driver is MPIO before using those */ + /* access_mode which exists only for MPIO case. */ + if (dataset->ent.file->shared->access_parms.driver == H5F_LOW_MPIO){ + /* Supports only no conversion, type or space, for now. */ + if (H5T_conv_noop==tconv_func && + NULL!=sconv_func->write) { + /* + * -AKC- + * "plant" the collective access mode into the file information + * so that the lower level mpio routines know to use collective + * access. + * This is not thread-safe, is a klutch for now. + * Should change all the I/O routines to pass along the xfer + * property list to the low level I/O for proper execution. + * Make it to work now. Must fix it later. + * -AKC- + */ +#ifdef AKC + printf("%s: collective access requested\n", FUNC); + printf("%s: current f->access_mode = %x\n", FUNC, + dataset->ent.file->shared->access_parms.u.mpio.access_mode); +#endif + access_mode_saved = dataset->ent.file->shared->access_parms.u.mpio.access_mode; + dataset->ent.file->shared->access_parms.u.mpio.access_mode = H5ACC_COLLECTIVE; + status = (sconv_func->write)(dataset->ent.file, &(dataset->layout), + &(dataset->create_parms->efl), + H5T_get_size (dataset->type), file_space, + mem_space, buf); + if (status>=0) goto succeed; + HGOTO_ERROR (H5E_DATASET, H5E_WRITEERROR, FAIL, + "collective write failed"); + } + } + HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, + "collective access not permissible"); + } +#endif /*HAVE_PARALLEL*/ + + /* * If there is no type conversion then try writing directly from * application buffer to file. @@ -1522,6 +1633,15 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, if (bkg_buf && NULL==xfer_parms->bkg_buf) { H5MM_xfree (bkg_buf); } +#ifdef HAVE_PARALLEL + /* + * Check if collective data transfer requested. + * If so, need to restore the access mode. Shouldnot needed. + */ + if (xfer_parms->xfer_mode == H5D_XFER_COLLECTIVE){ + dataset->ent.file->shared->access_parms.u.mpio.access_mode = access_mode_saved; + } +#endif /*HAVE_PARALLEL*/ FUNC_LEAVE(ret_value); } diff --git a/src/H5Dprivate.h b/src/H5Dprivate.h index 6f0c23c..f39e5de 100644 --- a/src/H5Dprivate.h +++ b/src/H5Dprivate.h @@ -54,6 +54,9 @@ typedef struct H5D_xfer_t { void *tconv_buf; /*type conversion buffer or null */ void *bkg_buf; /*background buffer or null */ H5T_bkg_t need_bkg; /*type of background buffer needed */ +#ifdef HAVE_PARALLEL + H5D_transfer_t xfer_mode; /*independent or collective transfer */ +#endif } H5D_xfer_t; typedef struct H5D_t H5D_t; diff --git a/src/H5Dpublic.h b/src/H5Dpublic.h index bfaacd2..634b5b9 100644 --- a/src/H5Dpublic.h +++ b/src/H5Dpublic.h @@ -31,6 +31,14 @@ typedef enum H5D_layout_t { H5D_NLAYOUTS = 3 /*this one must be last! */ } H5D_layout_t; +#ifdef HAVE_PARALLEL +/* Values for the data transfer property */ +typedef enum H5D_transfer_t { + H5D_XFER_INDEPENDENT, /*Independent data transfer */ + H5D_XFER_COLLECTIVE /*Collective data transfer */ +} H5D_transfer_t; +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/src/H5Farray.c b/src/H5Farray.c index 138a20f..76f09d8 100644 --- a/src/H5Farray.c +++ b/src/H5Farray.c @@ -130,6 +130,9 @@ H5F_arr_read (H5F_t *f, const struct H5O_layout_t *layout, haddr_t addr; /*address in file */ intn i, j; /*counters */ hbool_t carray; /*carry for subtraction */ +#ifdef HAVE_PARALLEL + intn is_collective; /*collective access flag*/ +#endif FUNC_ENTER (H5F_arr_read, FAIL); @@ -145,6 +148,19 @@ H5F_arr_read (H5F_t *f, const struct H5O_layout_t *layout, /* Make a local copy of size so we can modify it */ H5V_vector_cpy (layout->ndims, hslab_size, _hslab_size); +#ifdef HAVE_PARALLEL + is_collective = (f->shared->access_parms.driver==H5F_LOW_MPIO + && f->shared->access_parms.u.mpio.access_mode==H5ACC_COLLECTIVE); + if (is_collective){ +#ifdef AKC + printf("%s: collective read requested\n", FUNC); +#endif + if (layout->type != H5D_CONTIGUOUS) + HRETURN_ERROR (H5E_DATASET, H5E_READERROR, FAIL, + "collective access on non-contiguous datasets not supported yet"); + } +#endif + switch (layout->type) { case H5D_CONTIGUOUS: ndims = layout->ndims; @@ -190,6 +206,29 @@ H5F_arr_read (H5F_t *f, const struct H5O_layout_t *layout, * Now begin to walk through the array, copying data from disk to * memory. */ +#ifdef HAVE_PARALLEL + if (is_collective){ + /* Currently supports same number of collective access. + * Need to be changed LATER to combine all reads into one + * collective MPIO call. + */ + long max, min, temp; + + temp = nelmts; + assert(temp==nelmts); /* verify no overflow */ + MPI_Allreduce(&temp, &max, 1, MPI_LONG, MPI_MAX, + f->shared->access_parms.u.mpio.comm); + MPI_Allreduce(&temp, &min, 1, MPI_LONG, MPI_MIN, + f->shared->access_parms.u.mpio.comm); +#ifdef AKC +printf("nelmnts=%ld, min=%ld, max=%ld\n", temp, min, max); +#endif + if (max != min) + HRETURN_ERROR(H5E_DATASET, H5E_READERROR, FAIL, + "collective access with unequal number of blocks not supported yet"); + } +#endif + for (z=0; zndims, hslab_size, _hslab_size); +#ifdef HAVE_PARALLEL + is_collective = (f->shared->access_parms.driver==H5F_LOW_MPIO + && f->shared->access_parms.u.mpio.access_mode==H5ACC_COLLECTIVE); + if (is_collective){ +#ifdef AKC + printf("%s: collective write requested\n", FUNC); +#endif + if (layout->type != H5D_CONTIGUOUS) + HRETURN_ERROR (H5E_DATASET, H5E_WRITEERROR, FAIL, + "collective access on non-contiguous datasets not supported yet"); + } +#endif switch (layout->type) { case H5D_CONTIGUOUS: @@ -352,6 +406,29 @@ H5F_arr_write (H5F_t *f, const struct H5O_layout_t *layout, * Now begin to walk through the array, copying data from memory to * disk. */ +#ifdef HAVE_PARALLEL + if (is_collective){ + /* Currently supports same number of collective access. + * Need to be changed LATER to combine all writes into one + * collective MPIO call. + */ + long max, min, temp; + + temp = nelmts; + assert(temp==nelmts); /* verify no overflow */ + MPI_Allreduce(&temp, &max, 1, MPI_LONG, MPI_MAX, + f->shared->access_parms.u.mpio.comm); + MPI_Allreduce(&temp, &min, 1, MPI_LONG, MPI_MIN, + f->shared->access_parms.u.mpio.comm); +#ifdef AKC +printf("nelmnts=%ld, min=%ld, max=%ld\n", temp, min, max); +#endif + if (max != min) + HRETURN_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "collective access with unequal number of blocks not supported yet"); + } +#endif + for (z=0; zxfer_mode = data_xfer_mode; + break; + default: + HRETURN_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL, + "invalid dataset transfer mode"); + } + + FUNC_LEAVE(SUCCEED); +} +#endif /*HAVE_PARALLEL*/ + + +#ifdef HAVE_PARALLEL +/*------------------------------------------------------------------------- + * Function: H5Pget_xfer + * + * Purpose: Reads the transfer mode current set in the property list. + * This function is available only in the parallel HDF5 library + * and is not a collective function. + * + * Return: Success: SUCCEED + * + * Failure: FAIL + * + * Programmer: Albert Cheng + * April 2, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pget_xfer (hid_t tid, H5D_transfer_t *data_xfer_mode) +{ + H5D_xfer_t *plist = NULL; + + FUNC_ENTER (H5Pget_xfer, FAIL); + + /* Check arguments */ + if (H5P_DATASET_XFER != H5Pget_class(tid) || + NULL == (plist = H5I_object(tid))) { + HRETURN_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, + "not a dataset transfer property list"); + } + + *data_xfer_mode = plist->xfer_mode; + + FUNC_LEAVE (SUCCEED); +} +#endif /*HAVE_PARALLEL*/ + + /*-------------------------------------------------------------------------- NAME H5Pcopy diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h index 28d2b80..6f11dce 100644 --- a/src/H5Ppublic.h +++ b/src/H5Ppublic.h @@ -93,6 +93,8 @@ herr_t H5Pset_mpi (hid_t tid, MPI_Comm comm, MPI_Info info, unsigned access_mode); herr_t H5Pget_mpi (hid_t tid, MPI_Comm *comm/*out*/, MPI_Info *info/*out*/, unsigned *access_mode/*out*/); +herr_t H5Pset_xfer (hid_t tid, H5D_transfer_t data_xfer_mode); +herr_t H5Pget_xfer (hid_t tid, H5D_transfer_t *data_xfer_mode/*out*/); #endif #ifdef __cplusplus -- cgit v0.12