diff options
-rw-r--r-- | src/H5Dio.c | 530 | ||||
-rw-r--r-- | src/H5Dmpio.c | 399 | ||||
-rw-r--r-- | src/H5Dpkg.h | 19 | ||||
-rw-r--r-- | src/H5Smpio.c | 286 |
4 files changed, 652 insertions, 582 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c index f1d12f8..fa3f49e 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -108,13 +108,24 @@ static htri_t H5D_get_collective_io_consensus(const H5F_t *file, const htri_t local_opinion, const unsigned flags); + +static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset, + const H5S_t *mem_space, + const H5S_t *file_space, + int *min_chunkf); +static herr_t +H5D_obtain_duplicate_pid(hid_t dxpl_id, + hid_t* dp_id, + H5D_dxpl_cache_t **cache); + #endif /* H5_HAVE_PARALLEL */ /* I/O info operations */ static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, - const H5S_t *mem_space, const H5S_t *file_space, - unsigned flags, hbool_t *use_par_opt_io, H5D_io_info_t *io_info); + hid_t dp_dxpl_id, H5D_dxpl_cache_t *dp_dxpl_cache, + const H5S_t *mem_space, const H5S_t *file_space, + unsigned flags, hbool_t *use_par_opt_io, H5D_io_info_t *io_info); /* Chunk operations */ static herr_t H5D_create_chunk_map(const H5D_t *dataset, const H5T_t *mem_type, @@ -648,8 +659,13 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, const H5T_t *mem_type = NULL; /* Memory datatype */ H5D_io_info_t io_info; /* Dataset I/O info */ hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */ + H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */ + H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */ + hid_t dp_id; #ifdef H5_HAVE_PARALLEL - hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + H5FD_mpio_xfer_t xfer_mode; + #ifdef H5_HAVE_INSTRUMENTED_LIBRARY int prop_value,new_value; htri_t check_prop; @@ -660,6 +676,8 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, unsigned sconv_flags=0; /* Flags for the space conversion */ herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT(H5D_read) /* check args */ @@ -682,6 +700,7 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache") #ifdef H5_HAVE_PARALLEL + /* Collective access is not permissible without a MPI based VFD */ if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file)) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based drivers only") @@ -765,8 +784,15 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, assert(0 && "Unhandled layout type!"); } /* end switch */ +#ifdef H5_HAVE_PARALLEL + /* Obtain duplicate property list id. This is used to handle + collective chunk IO. */ + + if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't obtain duplicated property id") +#endif /* Set up I/O operation */ - if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) + if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation") #ifdef H5_HAVE_PARALLEL @@ -877,8 +903,13 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, const H5T_t *mem_type = NULL; /* Memory datatype */ H5D_io_info_t io_info; /* Dataset I/O info */ hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */ + hid_t dp_id; + H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */ + H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */ #ifdef H5_HAVE_PARALLEL - hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + H5FD_mpio_xfer_t xfer_mode; + int mpi_rank; #ifdef H5_HAVE_INSTRUMENTED_LIBRARY int prop_value,new_value; htri_t check_prop; @@ -1013,9 +1044,12 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, default: assert(0 && "Unhandled layout type!"); } /* end switch */ - +#ifdef H5_HAVE_PARALLEL + if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't obtain duplicated property id") +#endif /* Set up I/O operation */ - if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) + if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation") #ifdef H5_HAVE_PARALLEL @@ -1654,6 +1688,12 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, H5D_storage_t store; /*union of EFL and chunk pointer in file space */ herr_t ret_value = SUCCEED; /*return value */ +#ifdef H5_HAVE_PARALLEL + int count_chunk,mpi_rank, mpi_code,min_num_chunk,is_regular,all_regular; + hid_t temp_id; + MPI_Comm comm; +#endif + FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_read) /* Map elements between file and memory for each chunk*/ @@ -1679,7 +1719,13 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, /* Get first node in chunk skip list */ chunk_node=H5SL_first(fm.fsel); - +#ifdef H5_HAVE_PARALLEL + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk") + } + count_chunk = 0; +#endif /* Iterate through chunks to be operated on */ while(chunk_node) { H5D_chunk_info_t *chunk_info; /* chunk information */ @@ -1691,15 +1737,148 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, store.chunk.offset = chunk_info->coords; store.chunk.index = chunk_info->index; - /* Perform the actual read operation */ - status = (io_info->ops.read)(io_info, +#ifdef H5_HAVE_PARALLEL + + count_chunk++; + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + /* If the number of chunk is greater than minimum number of chunk, + Do independent read */ + + if(count_chunk <= min_num_chunk) { +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE && + H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE) + is_regular = 1; + else is_regular = 0; + + /* Getting MPI communicator and rank */ + if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") + if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank") + + if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) +#endif + } + + if(count_chunk > min_num_chunk) { + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.read)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; + } + + + else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE)|| + (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)){ + +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + /* Perform the independent read operation */ + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.read)(io_info, chunk_info->chunk_points, H5T_get_size(dataset->shared->type), chunk_info->fspace, chunk_info->mspace, buf); - - /* Check return value from optimized read */ - if (status<0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; +#else + + /* Perform the actual collective read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") +#endif + } + + else { + /* For regular selection, + if MPI_COMPLEX_DERIVED_DATATYPE is not defined, + unless spaces for all processors are regular, independent read operation should be performed.*/ + +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(!all_regular) { + + /* Perform the independent read operation */ + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.read)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; + } + + else { + /* For regular collective read in parallel*/ + /* Perform the read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + } +#else + + /* For regular collective read in parallel*/ + /* Perform the read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") +#endif + } + + } + else { + /* For regular independent read in parallel*/ + /* Perform the read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + } + +#else + /* Perform the actual read operation for sequential*/ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") +#endif /* Get the next chunk node in the skip list */ chunk_node=H5SL_next(chunk_node); @@ -1937,6 +2116,8 @@ done: * Hacked on it a lot. :-) * Leon Arber: 4/20/04 * Added support for data transforms. + * Kent Yang: 8/10/04 + * Added support for collective chunk IO. * *------------------------------------------------------------------------- */ @@ -1973,6 +2154,13 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, H5D_storage_t store; /*union of EFL and chunk pointer in file space */ herr_t ret_value = SUCCEED; /*return value */ +#ifdef H5_HAVE_PARALLEL + hid_t temp_id; + int count_chunk,mpi_rank,mpi_code,min_num_chunk,is_regular,all_regular = 0; + MPI_Comm comm; + +#endif + FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_write) /* Map elements between file and memory for each chunk*/ @@ -1990,6 +2178,14 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, #ifdef H5S_DEBUG H5_timer_begin(&timer); #endif + +#ifdef H5_HAVE_PARALLEL + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk") + } + count_chunk = 0; +#endif /* Get first node in chunk skip list */ chunk_node=H5SL_first(fm.fsel); @@ -2004,15 +2200,136 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, store.chunk.offset = chunk_info->coords; store.chunk.index = chunk_info->index; - /* Perform the actual write operation */ - status = (io_info->ops.write)(io_info, +#ifdef H5_HAVE_PARALLEL + + count_chunk++; + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + /* If the number of chunk is greater than minimum number of chunk, + Do independent write */ + + if(count_chunk <= min_num_chunk) { +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE && + H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE) + is_regular = 1; + else is_regular = 0; + /* Getting MPI communicator and rank */ + if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") + if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank") + if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) +#endif + } + if(count_chunk > min_num_chunk) { + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + fflush(stdout); + status = (io_info->ops_sca.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + io_info->dxpl_id = temp_id; + } + + + else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE)|| + (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)){ + +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + /* Perform the independent write operation */ + + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.write)(io_info, chunk_info->chunk_points, H5T_get_size(dataset->shared->type), chunk_info->fspace, chunk_info->mspace, buf); - - /* Check return value from optimized write */ - if (status<0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + io_info->dxpl_id = temp_id; +#else + + /* Perform the actual collective write operation */ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") +#endif + } + + else { +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(!all_regular) { + + /* Perform the independent write operation */ + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; + } + else { + /* For regular selection, perform the collective write operation */ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + } +#else + + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + +#endif + } + } + else { + /* For independent parallel write*/ + /* Perform the write operation */ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + } + +#else + /* Perform the actual write operation for sequential*/ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") +#endif /* Get the next chunk node in the skip list */ chunk_node=H5SL_next(chunk_node); @@ -3270,7 +3587,16 @@ done: */ static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, - const H5S_t + hid_t +#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG) + UNUSED +#endif /* H5_HAVE_PARALLEL */ + dp_dxpl_id, H5D_dxpl_cache_t + +#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG) + UNUSED +#endif /* H5_HAVE_PARALLEL */ +*dp_dxpl_cache,const H5S_t #if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG) UNUSED #endif /* H5_HAVE_PARALLEL */ @@ -3320,6 +3646,9 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, /* * Check if we can set direct MPI-IO read/write functions */ + io_info->dp_dxpl_id = dp_dxpl_id; + io_info->dp_dxpl_cache = dp_dxpl_cache; + opt=H5D_mpio_opt_possible(dset,mem_space,file_space,flags); if(opt==FAIL) HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace "); @@ -3333,28 +3662,20 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, /* Check if we can use the optimized parallel I/O routines */ if(opt==TRUE) { /* Set the pointers to the MPI-specific routines */ - if((H5S_SELECT_IS_REGULAR(file_space) == TRUE) && - (H5S_SELECT_IS_REGULAR(mem_space) == TRUE)){ - io_info->ops.read = H5D_mpio_spaces_read; - io_info->ops.write = H5D_mpio_spaces_write; - } - - #ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS - else { - io_info->ops.read = H5D_mpio_spaces_span_read; - io_info->ops.write = H5D_mpio_spaces_span_write; - } - #endif - /* Indicate that the I/O will be parallel */ - *use_par_opt_io=TRUE; - } /* end if */ + io_info->ops.read = H5D_mpio_select_read; + io_info->ops.write = H5D_mpio_select_write; + io_info->ops_sca.read = H5D_select_read; + io_info->ops_sca.write = H5D_select_write; + *use_par_opt_io=TRUE; + /* Indicate that the I/O will use collective */ + } + /* end if */ else { - /* Indicate that the I/O will _NOT_ be parallel */ + /* Indicate that the I/O will _NOT_ be parallel, use independent IO */ *use_par_opt_io=FALSE; io_info->ops.read = H5D_select_read; io_info->ops.write = H5D_select_write; - } /* end else */ #else io_info->ops.read = H5D_select_read; @@ -3372,3 +3693,136 @@ done: #endif /* H5_HAVE_PARALLEL || H5S_DEBUG */ FUNC_LEAVE_NOAPI(ret_value) } /* end H5D_ioinfo_init() */ + + +#ifdef H5_HAVE_PARALLEL + + +/*------------------------------------------------------------------------- + * Function: H5D_mpio_get_mini_chunk + * + * Purpose: Routine for obtaining minimum number of chunks to cover + hyperslab selection selected by all processors. + * + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: + * + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset, + const H5S_t *mem_space, + const H5S_t *file_space, + int *min_chunkf) { + + + hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */ + hsize_t startf[H5S_MAX_RANK], /* Selection start bounds */ + endf[H5S_MAX_RANK]; /* Selection end bounds */ + unsigned dim_rankf; /* Number of dimensions of file dataspace */ + int pcheck_hyper,check_hyper, /* Flags for checking if selection is in one chunk */ + tnum_chunkf, /* Number of chunks selection overlaps */ + max_chunkf, /* Maximum number of chunks selection overlaps */ + num_chunks_same; /* Flag indicating whether all processes have the same # of chunks to operate on */ + unsigned dim_chunks; /* Temporary number of chunks in a dimension */ + MPI_Comm comm; /* MPI communicator for file */ + int mpi_rank; /* Rank in MPI communicator */ + int mpi_code; /* MPI return code */ + unsigned u; /* Local index variable */ + herr_t ret_value; + + ret_value = SUCCEED; + FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_get_mini_chunk); + /* Getting MPI communicator and rank */ + if((comm = H5F_mpi_get_comm(dset->ent.file))==MPI_COMM_NULL) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") + if((mpi_rank = H5F_mpi_get_rank(dset->ent.file))<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank") + + + dim_rankf = H5S_GET_EXTENT_NDIMS(file_space); + + if(H5S_SELECT_BOUNDS(file_space,startf,endf)==FAIL) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE,FAIL, "invalid check for single selection blocks"); + + for(u=0; u < dset->shared->layout.u.chunk.ndims; u++) + chunk_dim[u] = dset->shared->layout.u.chunk.dim[u]; + + + /* Compute the number of chunks covered by the selection on this process */ + tnum_chunkf = 1; + for (u=0; u<dim_rankf; u++) { + dim_chunks = (endf[u]/chunk_dim[u]-startf[u]/chunk_dim[u])+1; + tnum_chunkf = dim_chunks*tnum_chunkf; + } + + /* Determine the minimum and maximum # of chunks for all processes */ + + if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) + + + /* Broadcast the flag indicating the number of chunks are the same */ + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + + done: + FUNC_LEAVE_NOAPI(ret_value); + +} + + +/*------------------------------------------------------------------------- + * Function: H5D_obtain_duplicate_pid + * + * Purpose: Routine for obtaining a copy property list ID of + data transfer property. + + * + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: + * + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +static herr_t H5D_obtain_duplicate_pid(hid_t dxpl_id, + hid_t* dp_id, + H5D_dxpl_cache_t **cache) +{ + + H5FD_mpio_xfer_t xfer_mode; + H5P_genplist_t *dp_dx_plist; /* Data transer property list */ + herr_t ret_value=SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT(H5D_obtain_duplicate_pid) + + *dp_id = H5Pcopy(dxpl_id); + + /* printf("inside function dp id %d\n",*dp_id);*/ + /* Get the dataset transfer property list */ + if (NULL == (dp_dx_plist = H5I_object(*dp_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list") + + xfer_mode = H5FD_MPIO_INDEPENDENT; + if(H5P_set (dp_dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode") + + /* Fill the DXPL cache values for later use */ + if (H5D_get_dxpl_cache(*dp_id,cache)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache") + + done: + FUNC_LEAVE_NOAPI(ret_value) + +} +#endif /*H5_HAVE_PARALLEL*/ diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index e216d85..f85c551 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -43,12 +43,6 @@ H5D_mpio_spaces_xfer(H5D_io_info_t *io_info, size_t elmt_size, void *buf/*out*/, hbool_t do_write); -/* For irregular hyperslab selection. */ -static herr_t -H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info, size_t elmt_size, - const H5S_t *file_space, const H5S_t *mem_space, - void *buf/*out*/, - hbool_t do_write); /*------------------------------------------------------------------------- * Function: H5D_mpio_opt_possible @@ -66,10 +60,10 @@ H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info, size_t elmt_size, * *------------------------------------------------------------------------- */ + htri_t H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *file_space, const unsigned flags) { - htri_t c1,c2; /* Flags whether a selection is optimizable */ htri_t ret_value=TRUE; FUNC_ENTER_NOAPI(H5D_mpio_opt_possible, FAIL); @@ -88,16 +82,7 @@ H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *f && (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space)))) HGOTO_DONE(FALSE); - /* Check whether both selections are "regular" */ - /*#ifndef KYANG*/ -#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS - c1=H5S_SELECT_IS_REGULAR(file_space); - c2=H5S_SELECT_IS_REGULAR(mem_space); - if(c1==FAIL || c2==FAIL) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for single selection blocks"); - if(c1==FALSE || c2==FALSE) - HGOTO_DONE(FALSE); -#endif + /* Can't currently handle point selections */ if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space)) HGOTO_DONE(FALSE); @@ -107,126 +92,35 @@ H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *f (flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CHUNKED) HGOTO_DONE(FALSE); - if ((flags&H5S_CONV_STORAGE_MASK)==H5S_CONV_STORAGE_CHUNKED) { - hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */ - hsize_t startf[H5S_MAX_RANK], /* Selection start bounds */ - endf[H5S_MAX_RANK]; /* Selection end bounds */ - unsigned dim_rankf; /* Number of dimensions of file dataspace */ - int pcheck_hyper,check_hyper, /* Flags for checking if selection is in one chunk */ - tnum_chunkf, /* Number of chunks selection overlaps */ - max_chunkf, /* Maximum number of chunks selection overlaps */ - min_chunkf, /* Minimum number of chunks selection overlaps */ - num_chunks_same; /* Flag indicating whether all processes have the same # of chunks to operate on */ - unsigned dim_chunks; /* Temporary number of chunks in a dimension */ - MPI_Comm comm; /* MPI communicator for file */ - int mpi_rank; /* Rank in MPI communicator */ - int mpi_code; /* MPI return code */ - unsigned u; /* Local index variable */ - - /* Disallow collective I/O if there are any I/O filters on chunks */ - if(dset->shared->dcpl_cache.pline.nused>0) - HGOTO_DONE(FALSE) - - /* Getting MPI communicator and rank */ - if((comm = H5F_mpi_get_comm(dset->ent.file))==MPI_COMM_NULL) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") - if((mpi_rank = H5F_mpi_get_rank(dset->ent.file))<0) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank") - - /* Currently collective chunking storage - inside HDF5 is supported for either one of the following two cases: - 1. All the hyperslabs for one process is inside one chunk. - 2. For single hyperslab selection, the number of chunks that covered - the single selection for all processes should be equal. - KY, 2004/7/14 - */ - - /* Quincey, please read. - This is maybe redundant, I think only when both memory and file space be SCALAR - space, the collective IO can work. Otherwise, SELECT_POINT will be reached,collective - IO shouldn't work. - Please clarify and correct the code on the following, - Quincey said that it was probably okay if only one data space is SCALAR, - Still keep the code here until we added more tests later. - Kent */ - if(H5S_SCALAR==H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR ==H5S_GET_EXTENT_TYPE(file_space)) { - if(!(H5S_SCALAR==H5S_GET_EXTENT_TYPE(mem_space) && H5S_SCALAR ==H5S_GET_EXTENT_TYPE(file_space))) - HGOTO_DONE(FALSE) - else - HGOTO_DONE(TRUE) - } /* end if */ - - dim_rankf = H5S_GET_EXTENT_NDIMS(file_space); - - if(H5S_SELECT_BOUNDS(file_space,startf,endf)==FAIL) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE,FAIL, "invalid check for single selection blocks"); - - for(u=0; u < dset->shared->layout.u.chunk.ndims; u++) - chunk_dim[u] = dset->shared->layout.u.chunk.dim[u]; - - /* Case 1: check whether all hyperslab in this process is inside one chunk. - Note: we don't handle when starting point is less than zero since that may cover - two chunks. */ - - /*for file space checking*/ - pcheck_hyper = 1; - for (u=0; u<dim_rankf; u++) - if(endf[u]/chunk_dim[u]!=startf[u]/chunk_dim[u]) { - pcheck_hyper = 0; - break; - } - - if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&pcheck_hyper,&check_hyper,1,MPI_INT,MPI_LAND,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) - if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&check_hyper,1,MPI_INT,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) - - /*if check_hyper is true, condition for collective IO case is fulfilled, no - need to do further test. */ - if(check_hyper) - HGOTO_DONE(TRUE); + /*The handling of memory space is different for chunking + and contiguous storage, + For contigous storage, mem_space and file_space won't + change when it it is doing disk IO. + For chunking storage, mem_space will change for different + chunks. So for chunking storage, whether we can use + collective IO will defer until the each chunk IO is reached. + For contiguous storage, if we find the MPI-IO cannot + support complicated MPI derived data type, we will + set use_par_opt_io = FALSE. + */ + if(dset->shared->layout.type == H5D_CONTIGUOUS) { + +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) || + (H5S_SELECT_IS_REGULAR(mem_space) != TRUE)) + HGOTO_DONE(FALSE); +#endif + } - /* Case 2:Check whether the number of chunks that covered the single hyperslab is the same. - If not,no collective chunk IO. - KY, 2004/7/14 - */ - - c1 = H5S_SELECT_IS_SINGLE(file_space); - c2 = H5S_SELECT_IS_SINGLE(mem_space); - - if(c1==FAIL || c2 ==FAIL) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for single selection blocks"); - if(c1==FALSE || c2 ==FALSE) - HGOTO_DONE(FALSE); - - /* Compute the number of chunks covered by the selection on this process */ - tnum_chunkf = 1; - for (u=0; u<dim_rankf; u++) { - dim_chunks = (endf[u]/chunk_dim[u]-startf[u]/chunk_dim[u])+1; - tnum_chunkf = dim_chunks*tnum_chunkf; - } - - /* Determine the minimum and maximum # of chunks for all processes */ - if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,&max_chunkf,1,MPI_INT,MPI_MAX,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) - if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,&min_chunkf,1,MPI_INT,MPI_MIN,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) - - /* Let the rank==0 process determine if the same number of chunks will be operated on by all processes */ - if(mpi_rank == 0) - num_chunks_same = (max_chunkf==min_chunkf); - - /* Broadcast the flag indicating the number of chunks are the same */ - if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&num_chunks_same,1,MPI_INT,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) - - /* Can't handle case when number of chunks is different (yet) */ - if(!num_chunks_same) - HGOTO_DONE(FALSE); - } /* end if */ + if(dset->shared->layout.type == H5D_CHUNKED) + if(dset->shared->dcpl_cache.pline.nused>0) + HGOTO_DONE(FALSE); /* Perform the independent write operation */ + + done: FUNC_LEAVE_NOAPI(ret_value); + } /* H5D_mpio_opt_possible() */ @@ -370,281 +264,68 @@ done: FUNC_LEAVE_NOAPI(ret_value); } /* end H5D_mpio_spaces_xfer() */ - -static herr_t -H5D_mpio_spaces_span_xfer(H5D_io_info_t *io_info, - size_t elmt_size, - const H5S_t *file_space, - const H5S_t *mem_space, - void *_buf /*out*/, - hbool_t do_write ) -{ - haddr_t addr; /* Address of dataset (or selection) within file */ - size_t mpi_buf_count, mpi_file_count; /* Number of "objects" to transfer */ - hsize_t mpi_buf_offset, mpi_file_offset; /* Offset within dataset where selection (ie. MPI type) begins */ - MPI_Datatype mpi_buf_type, mpi_file_type; /* MPI types for buffer (memory) and file */ - hbool_t mbt_is_derived=0, /* Whether the buffer (memory) type is derived and needs to be free'd */ - mft_is_derived=0; /* Whether the file type is derived and needs to be free'd */ - hbool_t plist_is_setup=0; /* Whether the dxpl has been customized */ - uint8_t *buf=(uint8_t *)_buf; /* Alias for pointer arithmetic */ - int mpi_code; /* MPI return code */ - herr_t ret_value = SUCCEED; /* Return value */ - - - FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_spaces_span_xfer); - - /* Check args */ - assert (io_info); - assert (io_info->dset); - assert (file_space); - assert (mem_space); - assert (buf); - assert (IS_H5FD_MPIO(io_info->dset->ent.file)); - - /* Make certain we have the correct type of property list */ - assert(TRUE==H5P_isa_class(io_info->dxpl_id,H5P_DATASET_XFER)); - - - /* create the MPI buffer type */ - if(H5S_SELECT_IS_REGULAR(mem_space)==TRUE){ - if (H5S_mpio_space_type( mem_space, elmt_size, - /* out: */ - &mpi_buf_type, - &mpi_buf_count, - &mpi_buf_offset, - &mbt_is_derived )<0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type"); - } - else { - if (H5S_mpio_space_span_type( mem_space, elmt_size, - /* out: */ - &mpi_buf_type, - &mpi_buf_count, - &mpi_buf_offset, - &mbt_is_derived )<0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type"); - } - - /* create the MPI file type */ - - if(H5S_SELECT_IS_REGULAR(file_space)== TRUE){ - if ( H5S_mpio_space_type( file_space, elmt_size, - /* out: */ - &mpi_file_type, - &mpi_file_count, - &mpi_file_offset, - &mft_is_derived )<0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type"); - } - else { - if ( H5S_mpio_space_span_type( file_space, elmt_size, - /* out: */ - &mpi_file_type, - &mpi_file_count, - &mpi_file_offset, - &mft_is_derived )<0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type"); - } - - /* Get the base address of the contiguous dataset or the chunk */ - if(io_info->dset->shared->layout.type == H5D_CONTIGUOUS) - addr = H5D_contig_get_addr(io_info->dset) + mpi_file_offset; - else { - haddr_t chunk_addr; /* for collective chunk IO */ - assert(io_info->dset->shared->layout.type == H5D_CHUNKED); - chunk_addr=H5D_istore_get_addr(io_info,NULL); - addr = H5F_BASE_ADDR(io_info->dset->ent.file) + chunk_addr + mpi_file_offset; - } - - /* - * Pass buf type, file type to the file driver. Request an MPI type - * transfer (instead of an elementary byteblock transfer). - */ - if(H5FD_mpi_setup_collective(io_info->dxpl_id, mpi_buf_type, mpi_file_type)<0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties"); - plist_is_setup=1; - - /* Adjust the buffer pointer to the beginning of the selection */ - buf+=mpi_buf_offset; - - /* transfer the data */ - if (do_write) { - if (H5F_block_write(io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0) - HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed"); - } - else { - if (H5F_block_read (io_info->dset->ent.file, H5FD_MEM_DRAW, addr, mpi_buf_count, io_info->dxpl_id, buf) <0) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed"); - } - -done: - /* Reset the dxpl settings */ - if(plist_is_setup) { - if(H5FD_mpi_teardown_collective(io_info->dxpl_id)<0) - HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "unable to reset dxpl values"); - } /* end if */ - - /* free the MPI buf and file types */ - if (mbt_is_derived) { - if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_buf_type ))) - HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); - } - if (mft_is_derived) { - if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_file_type ))) - HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); - } - - FUNC_LEAVE_NOAPI(ret_value); -} /* end H5D_mpio_spaces_span_xfer() */ - /*------------------------------------------------------------------------- - * Function: H5D_mpio_spaces_read + * Function: H5D_mpio_select_read * * Purpose: MPI-IO function to read directly from app buffer to file. * * Return: non-negative on success, negative on failure. * - * Programmer: rky 980813 + * Programmer: * * Modifications: * - * rky 980918 - * Added must_convert parameter to let caller know we can't optimize the xfer. - * - * QAK - 2002/04/02 - * Removed the must_convert parameter and move preconditions to - * H5S_mpio_opt_possible() routine - * *------------------------------------------------------------------------- */ herr_t -H5D_mpio_spaces_read(H5D_io_info_t *io_info, +H5D_mpio_select_read(H5D_io_info_t *io_info, size_t UNUSED nelmts, size_t elmt_size, const H5S_t *file_space, const H5S_t *mem_space, void *buf/*out*/) { herr_t ret_value; - FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_read); + FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_select_read); - ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space, + + ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space, mem_space, buf, 0/*read*/); FUNC_LEAVE_NOAPI(ret_value); -} /* end H5D_mpio_spaces_read() */ +} /* end H5D_mpio_select_read() */ /*------------------------------------------------------------------------- - * Function: H5D_mpio_spaces_write + * Function: H5D_mpio_select_write * * Purpose: MPI-IO function to write directly from app buffer to file. * * Return: non-negative on success, negative on failure. * - * Programmer: rky 980813 + * Programmer: * * Modifications: * - * rky 980918 - * Added must_convert parameter to let caller know we can't optimize the xfer. - * - * QAK - 2002/04/02 - * Removed the must_convert parameter and move preconditions to - * H5S_mpio_opt_possible() routine * *------------------------------------------------------------------------- */ herr_t -H5D_mpio_spaces_write(H5D_io_info_t *io_info, +H5D_mpio_select_write(H5D_io_info_t *io_info, size_t UNUSED nelmts, size_t elmt_size, const H5S_t *file_space, const H5S_t *mem_space, const void *buf) { herr_t ret_value; - FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_write); + FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_select_write); /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space, - mem_space, (void*)buf, 1/*write*/); + ret_value = H5D_mpio_spaces_xfer(io_info, elmt_size, file_space, + mem_space, (void*)buf, 1/*write*/); FUNC_LEAVE_NOAPI(ret_value); } /* end H5D_mpio_spaces_write() */ - -/*------------------------------------------------------------------------- - * Function: H5D_mpio_spaces_span_read - * - * Purpose: MPI-IO function to read directly from app buffer to file for - span-tree - * - * Return: non-negative on success, negative on failure. - * - * Programmer: KY - * - * Modifications: - * - * - *------------------------------------------------------------------------- - */ -herr_t -H5D_mpio_spaces_span_read(H5D_io_info_t *io_info, - size_t UNUSED nelmts, - size_t elmt_size, - const H5S_t *file_space, - const H5S_t *mem_space, - void *buf/*out*/) -{ - herr_t ret_value; - - FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_span_read); - - ret_value = H5D_mpio_spaces_span_xfer(io_info, elmt_size, file_space, - mem_space, buf, 0/*read*/); - - FUNC_LEAVE_NOAPI(ret_value); -} /* end H5D_mpio_spaces_read() */ - - -/*------------------------------------------------------------------------- - * Function: H5D_mpio_spaces_span_write - * - * Purpose: MPI-IO function to write directly from app buffer to file. - * - * Return: non-negative on success, negative on failure. - * - * Programmer: KY - - * - * Modifications: - * - * rky 980918 - * Added must_convert parameter to let caller know we can't optimize the xfer. - * - * QAK - 2002/04/02 - * Removed the must_convert parameter and move preconditions to - * H5S_mpio_opt_possible() routine - * - *------------------------------------------------------------------------- - */ -herr_t -H5D_mpio_spaces_span_write(H5D_io_info_t *io_info, - size_t UNUSED nelmts, - size_t elmt_size, - const H5S_t *file_space, - const H5S_t *mem_space, - const void *buf) -{ - herr_t ret_value; - - FUNC_ENTER_NOAPI_NOFUNC(H5D_mpio_spaces_span_write); - - /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - ret_value = H5D_mpio_spaces_span_xfer(io_info, elmt_size, file_space, - mem_space, (void*)buf, 1/*write*/); - - FUNC_LEAVE_NOAPI(ret_value); -} /* end H5D_mpio_spaces_span_write() */ #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index a4b4574..7b2c9e3 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -100,8 +100,13 @@ typedef struct H5D_io_info_t { H5D_t *dset; /* Pointer to dataset being operated on */ const H5D_dxpl_cache_t *dxpl_cache; /* Pointer to cache DXPL info */ hid_t dxpl_id; /* Original DXPL ID */ +#ifdef H5_HAVE_PARALLEL + hid_t dp_dxpl_id; + H5D_dxpl_cache_t *dp_dxpl_cache; +#endif const H5D_storage_t *store; /* Dataset storage info */ H5D_io_ops_t ops; /* I/O operation function pointers */ + H5D_io_ops_t ops_sca; #ifdef H5S_DEBUG H5S_iostats_t *stats; /* I/O statistics */ #endif /* H5S_DEBUG */ @@ -276,6 +281,20 @@ H5_DLL ssize_t H5D_efl_writevv(const H5D_io_info_t *io_info, const void *buf); #ifdef H5_HAVE_PARALLEL + +/* MPI-IO function to read , it will select either regular or irregular read */ +H5_DLL herr_t H5D_mpio_select_read(H5D_io_info_t *io_info, + size_t nelmts, size_t elmt_size, + const struct H5S_t *file_space, const struct H5S_t *mem_space, + void *buf/*out*/); + +/* MPI-IO function to read , it will select either regular or irregular read */ +H5_DLL herr_t H5D_mpio_select_write(H5D_io_info_t *io_info, + size_t nelmts, size_t elmt_size, + const struct H5S_t *file_space, const struct H5S_t *mem_space, + const void *buf); + + /* MPI-IO function to read directly from app buffer to file rky980813 */ H5_DLL herr_t H5D_mpio_spaces_read(H5D_io_info_t *io_info, size_t nelmts, size_t elmt_size, diff --git a/src/H5Smpio.c b/src/H5Smpio.c index 5ccf842..a9b90ee 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -462,190 +462,7 @@ done: FUNC_LEAVE_NOAPI(ret_value); } - -/*------------------------------------------------------------------------- - * Function: H5S_mpio_space_type - * - * Purpose: Translate an HDF5 dataspace selection into an MPI type. - * Currently handle only hyperslab and "all" selections. - * - * Return: non-negative on success, negative on failure. - * - * Outputs: *new_type the MPI type corresponding to the selection - * *count how many objects of the new_type in selection - * (useful if this is the buffer type for xfer) - * *extra_offset Number of bytes of offset within dataset - * *is_derived_type 0 if MPI primitive type, 1 if derived - * - * Programmer: rky 980813 - * - * Modifications: - * - * Quincey Koziol, June 18, 2002 - * Added 'extra_offset' parameter - * - *------------------------------------------------------------------------- - */ -herr_t -H5S_mpio_space_type( const H5S_t *space, size_t elmt_size, - /* out: */ - MPI_Datatype *new_type, - size_t *count, - hsize_t *extra_offset, - hbool_t *is_derived_type ) -{ - herr_t ret_value = SUCCEED; - - FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_space_type); - - /* Check args */ - assert (space); - - /* Creat MPI type based on the kind of selection */ - switch (H5S_GET_EXTENT_TYPE(space)) { - case H5S_NULL: - case H5S_SCALAR: - case H5S_SIMPLE: - switch(H5S_GET_SELECT_TYPE(space)) { - case H5S_SEL_NONE: - if ( H5S_mpio_none_type( space, elmt_size, - /* out: */ new_type, count, extra_offset, is_derived_type ) <0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); - break; - - case H5S_SEL_ALL: - if ( H5S_mpio_all_type( space, elmt_size, - /* out: */ new_type, count, extra_offset, is_derived_type ) <0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); - break; - - case H5S_SEL_POINTS: - /* not yet implemented */ - ret_value = FAIL; - break; - - case H5S_SEL_HYPERSLABS: - if(H5S_mpio_hyper_type( space, elmt_size, - /* out: */ new_type, count, extra_offset, is_derived_type )<0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); - break; - - default: - assert("unknown selection type" && 0); - break; - } /* end switch */ - break; - - case H5S_COMPLEX: - /* not yet implemented */ - HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "complex data spaces are not supported yet"); - - default: - assert("unknown data space type" && 0); - break; - } - -done: - FUNC_LEAVE_NOAPI(ret_value); -} - - - -/*------------------------------------------------------------------------- - * Function: H5S_mpio_space_span_type - * - * Purpose: Translate an HDF5 dataspace selection into a general - MPI derived datatype, the selection is implemented with - span-tree. - * - * Currently handle only hyperslab and "all" selections. - * - * Return: non-negative on success, negative on failure. - * - * Outputs: *new_type the MPI type corresponding to the selection - * *count how many objects of the new_type in selection - * (useful if this is the buffer type for xfer) - * *extra_offset Number of bytes of offset within dataset - * *is_derived_type 0 if MPI primitive type, 1 if derived - * - * Programmer: KY - * - * Modifications: - * - * Quincey Koziol, June 18, 2002 - * Added 'extra_offset' parameter - * - *------------------------------------------------------------------------- - */ -herr_t -H5S_mpio_space_span_type( const H5S_t *space, - size_t elmt_size,/* out: */ - MPI_Datatype *new_type, - size_t *count, - hsize_t *extra_offset, - hbool_t *is_derived_type ) -{ - herr_t ret_value = SUCCEED; - - FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_space_span_type); - - /* Check args */ - assert (space); - - /* Creat MPI type based on the kind of selection */ - switch (H5S_GET_EXTENT_TYPE(space)) { - case H5S_NULL: - case H5S_SCALAR: - case H5S_SIMPLE: - switch(H5S_GET_SELECT_TYPE(space)) { - case H5S_SEL_NONE: - if ( H5S_mpio_none_type( space, elmt_size, - /* out: */ new_type, count, extra_offset, is_derived_type ) <0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); - break; - - case H5S_SEL_ALL: - if ( H5S_mpio_all_type( space, elmt_size, - /* out: */ new_type, count, extra_offset, is_derived_type ) <0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); - break; - - case H5S_SEL_POINTS: - /* not yet implemented */ - ret_value = FAIL; - break; - - case H5S_SEL_HYPERSLABS: - if(H5S_mpio_span_hyper_type( space, elmt_size, - /* out: */ new_type, count, extra_offset, is_derived_type )<0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); - break; - - default: - assert("unknown selection type" && 0); - break; - } /* end switch */ - break; - - case H5S_COMPLEX: - /* not yet implemented */ - HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "complex data spaces are not supported yet"); - - default: - assert("unknown data space type" && 0); - break; - } - -done: - FUNC_LEAVE_NOAPI(ret_value); -} - -/* The following codes have been used by Kent to test - general collective derived datatype functionality. - It should NOT be called by other routines except with - macro #ifdef KENT #endif - Nov. 11th, 2004 */ @@ -682,20 +499,24 @@ H5S_mpio_span_hyper_type( const H5S_t *space, herr_t ret_value = SUCCEED; MPI_Aint extent,lb; + FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5S_mpio_span_hyper_type); /* Check args */ assert (space); /* assert(sizeof(MPI_Aint) >= sizeof(elmt_size)); not sure the reason*/ - + + rank = space->extent.rank; /* size = HDcalloc((size_t)rank,sizeof(hsize_t)); */ if (0==elmt_size) goto empty; size = space->extent.size; - + if(size == 0) + goto empty; + odown = space->select.sel_info.hslab->span_lst; if(odown == NULL) goto empty; @@ -904,4 +725,99 @@ static herr_t H5S_obtain_datatype(const hsize_t size[], FUNC_LEAVE_NOAPI(ret_value); } + + +/*------------------------------------------------------------------------- + * Function: H5S_mpio_space_type + * + * Purpose: Translate an HDF5 dataspace selection into an MPI type. + * Currently handle only hyperslab and "all" selections. + * + * Return: non-negative on success, negative on failure. + * + * Outputs: *new_type the MPI type corresponding to the selection + * *count how many objects of the new_type in selection + * (useful if this is the buffer type for xfer) + * *extra_offset Number of bytes of offset within dataset + * *is_derived_type 0 if MPI primitive type, 1 if derived + * + * Programmer: rky 980813 + * + * Modifications: + * + * Quincey Koziol, June 18, 2002 + * Added 'extra_offset' parameter + * + *------------------------------------------------------------------------- + */ +herr_t +H5S_mpio_space_type( const H5S_t *space, size_t elmt_size, + /* out: */ + MPI_Datatype *new_type, + size_t *count, + hsize_t *extra_offset, + hbool_t *is_derived_type ) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_space_type); + + /* Check args */ + assert (space); + + /* Creat MPI type based on the kind of selection */ + switch (H5S_GET_EXTENT_TYPE(space)) { + case H5S_NULL: + case H5S_SCALAR: + case H5S_SIMPLE: + switch(H5S_GET_SELECT_TYPE(space)) { + case H5S_SEL_NONE: + if ( H5S_mpio_none_type( space, elmt_size, + /* out: */ new_type, count, extra_offset, is_derived_type ) <0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); + break; + + case H5S_SEL_ALL: + if ( H5S_mpio_all_type( space, elmt_size, + /* out: */ new_type, count, extra_offset, is_derived_type ) <0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); + break; + + case H5S_SEL_POINTS: + /* not yet implemented */ + ret_value = FAIL; + break; + + case H5S_SEL_HYPERSLABS: + if((H5S_SELECT_IS_REGULAR(space) == TRUE)) { + if(H5S_mpio_hyper_type( space, elmt_size, + /* out: */ new_type, count, extra_offset, is_derived_type )<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); + } + else { + if(H5S_mpio_span_hyper_type( space, elmt_size, + /* out: */ new_type, count, extra_offset, is_derived_type )<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); + } + break; + + default: + assert("unknown selection type" && 0); + break; + } /* end switch */ + break; + + case H5S_COMPLEX: + /* not yet implemented */ + HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "complex data spaces are not supported yet"); + + default: + assert("unknown data space type" && 0); + break; + } + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + #endif /* H5_HAVE_PARALLEL */ |