From 1702d75b3a49465b184362e088cf120b1b4817f3 Mon Sep 17 00:00:00 2001 From: MuQun Yang Date: Thu, 11 Nov 2004 16:00:01 -0500 Subject: [svn-r9519] Purpose: Adding codes for the general MPI derived datatype in order to better incorporate new fixes of HDF5 library. Description: Note: These codes have not been tested for general use. Don't call these functions in your developments of the HDF5 library. Also these codes are stand-alone codes, they should not affect other library codes. Solution: Platforms tested: Heping(C and Parallel linux 2.4, mpich 1.2.6) Arabica(C,C++,Fortran, Solaris 2.7) Copper(C,c++,Fortran, AIX 5.1, NOTE: c++ FAILED, seems not due to the recent check-in) Misc. update: --- src/H5Dio.c | 6 ++ src/H5Dmpio.c | 189 +++++++++++++++++++++++++++++++++++ src/H5Dpkg.h | 12 +++ src/H5Smpio.c | 299 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/H5Sprivate.h | 9 ++ 5 files changed, 515 insertions(+) diff --git a/src/H5Dio.c b/src/H5Dio.c index 941801c..0eb0dd7 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -3315,7 +3315,13 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, } /* end if */ else { /* Indicate that the I/O will _NOT_ be parallel */ + +#ifdef KYANG + io_info->ops.read = H5D_mpio_spaces_span_read; + io_info->ops.write = H5D_mpio_spaces_span_write; +#else *use_par_opt_io=FALSE; +#endif #endif /* H5_HAVE_PARALLEL */ io_info->ops.read = H5D_select_read; diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 08c6a48..4dfb17b 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -45,6 +45,12 @@ H5D_mpio_spaces_xfer(H5D_io_info_t *io_info, size_t elmt_size, void *buf/*out*/, hbool_t do_write); + +static herr_t +H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info, size_t elmt_size, + const H5S_t *file_space, const H5S_t *mem_space, + void *buf/*out*/, + hbool_t do_write); /*------------------------------------------------------------------------- * Function: H5D_mpio_opt_possible @@ -364,6 +370,107 @@ done: FUNC_LEAVE_NOAPI(ret_value); } /* end H5D_mpio_spaces_xfer() */ + +/** The following function has been tested, don't call this + function until you don't see this line. Nov. 11,2004, KY**/ + +static herr_t +H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info, size_t elmt_size, + const H5S_t *file_space, const H5S_t *mem_space, + void *_buf /*out*/, hbool_t do_write ) +{ + haddr_t addr; /* Address of dataset (or selection) within file */ + size_t mpi_buf_count, mpi_file_count; /* Number of "objects" to transfer */ + hsize_t mpi_buf_offset, mpi_file_offset; /* Offset within dataset where selection (ie. MPI type) begins */ + MPI_Datatype mpi_buf_type, mpi_file_type; /* MPI types for buffer (memory) and file */ + hbool_t mbt_is_derived=0, /* Whether the buffer (memory) type is derived and needs to be free'd */ + mft_is_derived=0; /* Whether the file type is derived and needs to be free'd */ + hbool_t plist_is_setup=0; /* Whether the dxpl has been customized */ + uint8_t *buf=(uint8_t *)_buf; /* Alias for pointer arithmetic */ + int mpi_code; /* MPI return code */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_spaces_xfer); + + /* Check args */ + assert (io_info); + assert (io_info->dset); + assert (file_space); + assert (mem_space); + assert (buf); + assert (IS_H5FD_MPIO(io_info->dset->ent.file)); + /* Make certain we have the correct type of property list */ + assert(TRUE==H5P_isa_class(io_info->dxpl_id,H5P_DATASET_XFER)); + + /* create the MPI buffer type */ + if (H5S_mpio_space_span_type( mem_space, elmt_size, + /* out: */ + &mpi_buf_type, + &mpi_buf_count, + &mpi_buf_offset, + &mbt_is_derived )<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type"); + + /* create the MPI file type */ + if ( H5S_mpio_space_span_type( file_space, elmt_size, + /* out: */ + &mpi_file_type, + &mpi_file_count, + &mpi_file_offset, + &mft_is_derived )<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type"); + + /* Get the base address of the contiguous dataset or the chunk */ + if(io_info->dset->shared->layout.type == H5D_CONTIGUOUS) + addr = H5D_contig_get_addr(io_info->dset) + mpi_file_offset; + else { + haddr_t chunk_addr; /* for collective chunk IO */ + + assert(io_info->dset->shared->layout.type == H5D_CHUNKED); + chunk_addr=H5D_istore_get_addr(io_info,NULL); + addr = H5F_BASE_ADDR(io_info->dset->ent.file) + chunk_addr + mpi_file_offset; + } + + /* + * Pass buf type, file type to the file driver. Request an MPI type + * transfer (instead of an elementary byteblock transfer). + */ + if(H5FD_mpi_setup_collective(io_info->dxpl_id, mpi_buf_type, mpi_file_type)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties"); + plist_is_setup=1; + + /* Adjust the buffer pointer to the beginning of the selection */ + buf+=mpi_buf_offset; + + /* transfer the data */ + if (do_write) { + if (H5F_block_write(io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed"); + } else { + if (H5F_block_read (io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed"); + } + +done: + /* Reset the dxpl settings */ + if(plist_is_setup) { + if(H5FD_mpi_teardown_collective(io_info->dxpl_id)<0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "unable to reset dxpl values"); + } /* end if */ + + /* free the MPI buf and file types */ + if (mbt_is_derived) { + if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_buf_type ))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + } + if (mft_is_derived) { + if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_file_type ))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + } + + FUNC_LEAVE_NOAPI(ret_value); +} /* end H5D_mpio_spaces_span_xfer() */ + /*------------------------------------------------------------------------- * Function: H5D_mpio_spaces_read @@ -438,4 +545,86 @@ H5D_mpio_spaces_write(H5D_io_info_t *io_info, FUNC_LEAVE_NOAPI(ret_value); } /* end H5D_mpio_spaces_write() */ + + + +/*------------------------------------------------------------------------- + * Function: H5D_mpio_spaces_span_read + * + * Purpose: MPI-IO function to read directly from app buffer to file for + span-tree + * + * Return: non-negative on success, negative on failure. + * + * Programmer: KY + * Note : Don't call this routine + * until you don't see this line. 11/11/2004, KY + * + * Modifications: + * + * rky 980918 + * Added must_convert parameter to let caller know we can't optimize the xfer. + * + * QAK - 2002/04/02 + * Removed the must_convert parameter and move preconditions to + * H5S_mpio_opt_possible() routine + * + *------------------------------------------------------------------------- + */ +herr_t +H5D_mpio_spaces_span_read(H5D_io_info_t *io_info, + size_t UNUSED nelmts, size_t elmt_size, + const H5S_t *file_space, const H5S_t *mem_space, + void *buf/*out*/) +{ + herr_t ret_value; + + FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_read); + + ret_value = H5D_mpio_spaces_span_xfer(io_info, elmt_size, file_space, + mem_space, buf, 0/*read*/); + + FUNC_LEAVE_NOAPI(ret_value); +} /* end H5D_mpio_spaces_read() */ + + +/*------------------------------------------------------------------------- + * Function: H5D_mpio_spaces_span_write + * + * Purpose: MPI-IO function to write directly from app buffer to file. + * + * Return: non-negative on success, negative on failure. + * + * Programmer: KY + * Note: Don't call this funtion until you don't see this line. + * KY, 11/11/04 + + * + * Modifications: + * + * rky 980918 + * Added must_convert parameter to let caller know we can't optimize the xfer. + * + * QAK - 2002/04/02 + * Removed the must_convert parameter and move preconditions to + * H5S_mpio_opt_possible() routine + * + *------------------------------------------------------------------------- + */ +herr_t +H5D_mpio_spaces_span_write(H5D_io_info_t *io_info, + size_t UNUSED nelmts, size_t elmt_size, + const H5S_t *file_space, const H5S_t *mem_space, + const void *buf) +{ + herr_t ret_value; + + FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_write); + + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + ret_value = H5D_mpio_spaces_span_xfer(io_info, elmt_size, file_space, + mem_space, (void*)buf, 1/*write*/); + + FUNC_LEAVE_NOAPI(ret_value); +} /* end H5D_mpio_spaces_write() */ #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index 499ceb0..68c8742 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -288,6 +288,18 @@ H5_DLL herr_t H5D_mpio_spaces_write(H5D_io_info_t *io_info, const struct H5S_t *file_space, const struct H5S_t *mem_space, const void *buf); +/* MPI-IO function to read directly from app buffer to file rky980813 */ +H5_DLL herr_t H5D_mpio_spaces_span_read(H5D_io_info_t *io_info, + size_t nelmts, size_t elmt_size, + const struct H5S_t *file_space, const struct H5S_t *mem_space, + void *buf/*out*/); + +/* MPI-IO function to write directly from app buffer to file rky980813 */ +H5_DLL herr_t H5D_mpio_spaces_span_write(H5D_io_info_t *io_info, + size_t nelmts, size_t elmt_size, + const struct H5S_t *file_space, const struct H5S_t *mem_space, + const void *buf); + /* MPI-IO function to check if a direct I/O transfer is possible between * memory and the file */ H5_DLL htri_t H5D_mpio_opt_possible(const H5D_t *file, const H5S_t *mem_space, diff --git a/src/H5Smpio.c b/src/H5Smpio.c index dab77ee..0f5b075 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -61,6 +61,18 @@ H5S_mpio_hyper_type( const H5S_t *space, size_t elmt_size, hsize_t *extra_offset, hbool_t *is_derived_type ); +static herr_t +H5S_mpio_span_hyper_type( const H5S_t *space, size_t elmt_size, + /* out: */ + MPI_Datatype *new_type, + size_t *count, + hsize_t *extra_offset, + hbool_t *is_derived_type ); + +static herr_t obtain_datatype(const hsize_t size[], + H5S_hyper_span_t* span,MPI_Datatype *span_type, + size_t elmt_size,int dimindex); + /*------------------------------------------------------------------------- * Function: H5S_mpio_all_type @@ -539,4 +551,291 @@ H5S_mpio_space_type( const H5S_t *space, size_t elmt_size, done: FUNC_LEAVE_NOAPI(ret_value); } + + + +/*------------------------------------------------------------------------- + * Function: H5S_mpio_space_span_ type + * + * Purpose: Translate an HDF5 dataspace selection into a general + MPI derivewd datatype built with span-tree. + * + * Currently handle only hyperslab and "all" selections. + * + * Return: non-negative on success, negative on failure. + * + * Outputs: *new_type the MPI type corresponding to the selection + * *count how many objects of the new_type in selection + * (useful if this is the buffer type for xfer) + * *extra_offset Number of bytes of offset within dataset + * *is_derived_type 0 if MPI primitive type, 1 if derived + * + * Programmer: KY + * + * Modifications: + * + * Quincey Koziol, June 18, 2002 + * Added 'extra_offset' parameter + * + *------------------------------------------------------------------------- + */ +herr_t +H5S_mpio_space_span_type( const H5S_t *space, size_t elmt_size, + /* out: */ + MPI_Datatype *new_type, + size_t *count, + hsize_t *extra_offset, + hbool_t *is_derived_type ) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_space_span_type); + + /* Check args */ + assert (space); + + /* Creat MPI type based on the kind of selection */ + switch (H5S_GET_EXTENT_TYPE(space)) { + case H5S_NULL: + case H5S_SCALAR: + case H5S_SIMPLE: + switch(H5S_GET_SELECT_TYPE(space)) { + case H5S_SEL_NONE: + if ( H5S_mpio_none_type( space, elmt_size, + /* out: */ new_type, count, extra_offset, is_derived_type ) <0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); + break; + + case H5S_SEL_ALL: + if ( H5S_mpio_all_type( space, elmt_size, + /* out: */ new_type, count, extra_offset, is_derived_type ) <0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); + break; + + case H5S_SEL_POINTS: + /* not yet implemented */ + ret_value = FAIL; + break; + + case H5S_SEL_HYPERSLABS: + if(H5S_mpio_span_hyper_type( space, elmt_size, + /* out: */ new_type, count, extra_offset, is_derived_type )<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); + break; + + default: + assert("unknown selection type" && 0); + break; + } /* end switch */ + break; + + case H5S_COMPLEX: + /* not yet implemented */ + HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "complex data spaces are not supported yet"); + + default: + assert("unknown data space type" && 0); + break; + } + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/* The following codes have been used by Kent to test + general collective derived datatype functionality. + It should NOT be called by other routines except with + macro #ifdef KENT #endif + Nov. 11th, 2004 */ + + +static herr_t +H5S_mpio_span_hyper_type( const H5S_t *space, size_t elmt_size, + /* out: */ + MPI_Datatype *new_type, + size_t *count, + hsize_t *extra_offset, + hbool_t *is_derived_type ){ + + MPI_Datatype span_type; + H5S_hyper_span_t *ospan; + H5S_hyper_span_info_t *odown; + hsize_t *size; + int rank; + herr_t ret_value = SUCCEED; + + + FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_span_hyper_type); + + /* Check args */ + assert (space); + /* assert(sizeof(MPI_Aint) >= sizeof(elmt_size));?? */ + + /* Only for simple extent + rank = space->extent.u.simple.rank; + */ + rank = space->extent.rank; + + size = HDcalloc((size_t)rank,sizeof(hsize_t)); + if (0==elmt_size) + goto empty; + size = space->extent.size; + + + odown = space->select.sel_info.hslab->span_lst; + if(odown == NULL) + goto empty; + ospan = odown->head; + if(ospan == NULL) + goto empty; + + obtain_datatype(size,ospan,&span_type,elmt_size,rank); + + *new_type = span_type; + /* fill in the remaining return values */ + *count = 1; + *extra_offset = 0; + *is_derived_type = 1; + HDfree(size); + HGOTO_DONE(SUCCEED); + +empty: + /* special case: empty hyperslab */ + *new_type = MPI_BYTE; + *count = 0; + *extra_offset = 0; + *is_derived_type = 0; + +done: + FUNC_LEAVE_NOAPI(ret_value); + } + + +static herr_t obtain_datatype(const hsize_t size[], H5S_hyper_span_t* span,MPI_Datatype *span_type, + size_t elmt_size,int dimindex) { + + int innercount,outercount; + MPI_Datatype bas_type,temp_type,tempinner_type; + MPI_Datatype *inner_type; + int inner_blocklen; + MPI_Aint inner_disp; + int *blocklen; + MPI_Aint *disp; + MPI_Aint stride; + MPI_Aint extent,lb; + H5S_hyper_span_info_t *down; + H5S_hyper_span_t *tspan; + MPI_Aint sizeaint,sizedtype; + hsize_t total_lowd,total_lowd1; + int i; + + assert(span); + down = span->down; + tspan = span; + + outercount = 0; + while(tspan) { + tspan = tspan->next; + outercount ++; + } + + +#ifdef H5_HAVE_MPI2 + MPI_Type_extent(MPI_Aint,&sizeaint); + MPI_Type_extent(MPI_Datatype,&sizedtype); + blocklen = (int *)HDcalloc((size_t)outercount,sizeof(int)); + disp = (MPI_Aint *)HDcalloc((size_t)outercount,sizeaint); + inner_type = (MPI_Datatype *)HDcalloc((size_t)outercount,sizedtype); +#else + blocklen = (int *)HDcalloc((size_t)outercount,sizeof(int)); + disp = (MPI_Aint *)HDcalloc((size_t)outercount,sizeof(int)); + inner_type = (MPI_Datatype *)HDcalloc((size_t)outercount,sizeof(int)); +#endif + + + tspan = span; + outercount = 0; + + if(down == NULL){ + + if(dimindex > 1) printf("wrong area \n"); + MPI_Type_contiguous((int)elmt_size,MPI_BYTE,&bas_type); + MPI_Type_commit(&bas_type); + + while(tspan){ + disp[outercount] = (MPI_Aint)elmt_size * tspan->low; + blocklen[outercount] = tspan->nelem; + tspan = tspan->next; + outercount ++; + } + + MPI_Type_hindexed(outercount,blocklen,disp,bas_type,&span_type); + + } + else {/* dimindex is the rank of the dimension */ + + if(dimindex <2) printf("something is wrong \n"); + /* Calculate the total bytes of the lower dimension */ + total_lowd = 1; /* one dimension down */ + total_lowd1 = 1; /* two dimensions down */ + + for ( i = 0; i < dimindex-1; i++) + total_lowd = total_lowd * size[i]; + + for ( i = 0; i < dimindex-2; i++) + total_lowd1 = total_lowd1 * size[i]; + HDfprintf(stdout, " one dimension down size %Hu",total_lowd); + HDfprintf(stdout, " two dimension down size %Hu",total_lowd1); + while(tspan){ +/* Displacement should be in byte and should have dimension information */ +/* First using MPI Type vector to build derived data type for this span only */ +/* Need to calculate the disp in byte for this dimension. */ + /* Calculate the total bytes of the lower dimension */ + + disp[outercount] = tspan->low*total_lowd*elmt_size; + blocklen[outercount] = 1; + + /* generating inner derived datatype by using MPI_Type_hvector */ + obtain_datatype(size,tspan->down->head,&temp_type,elmt_size,dimindex-1); +/* inner_type[count] = temp_type; */ + +#ifdef H5_HAVE_MPI2 + MPI_Type_get_extent(temp_type,&lb,&extent); +#else + MPI_Type_lb(temp_type,&lb); + MPI_Type_extent(temp_type,&extent); +#endif + /* building the inner vector datatype */ + /* The following calculation of stride is wrong since stride is calculated + from the first element of the block to the first element of the next + block. */ + /*stride = total_lowd1 * (size[dimindex-1]*elmt_size-extent-lb);*/ + stride = total_lowd*elmt_size; + innercount = tspan->nelem; + MPI_Type_hvector(innercount,1,stride,temp_type,&tempinner_type); + MPI_Type_commit(&tempinner_type); + MPI_Type_free(&temp_type); + inner_type[outercount] = tempinner_type; + outercount ++; + tspan = tspan->next; + } + + /* building the whole vector datatype */ + MPI_Type_struct(outercount,blocklen,disp,inner_type,&span_type); + } + + MPI_Type_commit(span_type); + if(down == NULL) + MPI_Type_free(&bas_type); + else { + MPI_Type_free(inner_type); + } + + HDfree(inner_type); + HDfree(blocklen); + HDfree(disp); + +} + #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Sprivate.h b/src/H5Sprivate.h index a0fdf09..61eb9a9 100644 --- a/src/H5Sprivate.h +++ b/src/H5Sprivate.h @@ -279,6 +279,15 @@ H5S_mpio_space_type( const H5S_t *space, size_t elmt_size, size_t *count, hsize_t *extra_offset, hbool_t *is_derived_type ); + +H5_DLL herr_t +H5S_mpio_space_span_type( const H5S_t *space, size_t elmt_size, + /* out: */ + MPI_Datatype *new_type, + size_t *count, + hsize_t *extra_offset, + hbool_t *is_derived_type ); + #endif /* H5_HAVE_PARALLEL */ #endif /* _H5Sprivate_H */ -- cgit v0.12