diff options
Diffstat (limited to 'src/H5Dio.c')
-rw-r--r-- | src/H5Dio.c | 530 |
1 files changed, 492 insertions, 38 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c index f1d12f8..fa3f49e 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -108,13 +108,24 @@ static htri_t H5D_get_collective_io_consensus(const H5F_t *file, const htri_t local_opinion, const unsigned flags); + +static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset, + const H5S_t *mem_space, + const H5S_t *file_space, + int *min_chunkf); +static herr_t +H5D_obtain_duplicate_pid(hid_t dxpl_id, + hid_t* dp_id, + H5D_dxpl_cache_t **cache); + #endif /* H5_HAVE_PARALLEL */ /* I/O info operations */ static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, - const H5S_t *mem_space, const H5S_t *file_space, - unsigned flags, hbool_t *use_par_opt_io, H5D_io_info_t *io_info); + hid_t dp_dxpl_id, H5D_dxpl_cache_t *dp_dxpl_cache, + const H5S_t *mem_space, const H5S_t *file_space, + unsigned flags, hbool_t *use_par_opt_io, H5D_io_info_t *io_info); /* Chunk operations */ static herr_t H5D_create_chunk_map(const H5D_t *dataset, const H5T_t *mem_type, @@ -648,8 +659,13 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, const H5T_t *mem_type = NULL; /* Memory datatype */ H5D_io_info_t io_info; /* Dataset I/O info */ hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */ + H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */ + H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */ + hid_t dp_id; #ifdef H5_HAVE_PARALLEL - hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + H5FD_mpio_xfer_t xfer_mode; + #ifdef H5_HAVE_INSTRUMENTED_LIBRARY int prop_value,new_value; htri_t check_prop; @@ -660,6 +676,8 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, unsigned sconv_flags=0; /* Flags for the space conversion */ herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT(H5D_read) /* check args */ @@ -682,6 +700,7 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache") #ifdef H5_HAVE_PARALLEL + /* Collective access is not permissible without a MPI based VFD */ if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file)) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based drivers only") @@ -765,8 +784,15 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, assert(0 && "Unhandled layout type!"); } /* end switch */ +#ifdef H5_HAVE_PARALLEL + /* Obtain duplicate property list id. This is used to handle + collective chunk IO. */ + + if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't obtain duplicated property id") +#endif /* Set up I/O operation */ - if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) + if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation") #ifdef H5_HAVE_PARALLEL @@ -877,8 +903,13 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, const H5T_t *mem_type = NULL; /* Memory datatype */ H5D_io_info_t io_info; /* Dataset I/O info */ hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */ + hid_t dp_id; + H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */ + H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */ #ifdef H5_HAVE_PARALLEL - hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ + H5FD_mpio_xfer_t xfer_mode; + int mpi_rank; #ifdef H5_HAVE_INSTRUMENTED_LIBRARY int prop_value,new_value; htri_t check_prop; @@ -1013,9 +1044,12 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, default: assert(0 && "Unhandled layout type!"); } /* end switch */ - +#ifdef H5_HAVE_PARALLEL + if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't obtain duplicated property id") +#endif /* Set up I/O operation */ - if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) + if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation") #ifdef H5_HAVE_PARALLEL @@ -1654,6 +1688,12 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, H5D_storage_t store; /*union of EFL and chunk pointer in file space */ herr_t ret_value = SUCCEED; /*return value */ +#ifdef H5_HAVE_PARALLEL + int count_chunk,mpi_rank, mpi_code,min_num_chunk,is_regular,all_regular; + hid_t temp_id; + MPI_Comm comm; +#endif + FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_read) /* Map elements between file and memory for each chunk*/ @@ -1679,7 +1719,13 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, /* Get first node in chunk skip list */ chunk_node=H5SL_first(fm.fsel); - +#ifdef H5_HAVE_PARALLEL + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk") + } + count_chunk = 0; +#endif /* Iterate through chunks to be operated on */ while(chunk_node) { H5D_chunk_info_t *chunk_info; /* chunk information */ @@ -1691,15 +1737,148 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, store.chunk.offset = chunk_info->coords; store.chunk.index = chunk_info->index; - /* Perform the actual read operation */ - status = (io_info->ops.read)(io_info, +#ifdef H5_HAVE_PARALLEL + + count_chunk++; + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + /* If the number of chunk is greater than minimum number of chunk, + Do independent read */ + + if(count_chunk <= min_num_chunk) { +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE && + H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE) + is_regular = 1; + else is_regular = 0; + + /* Getting MPI communicator and rank */ + if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") + if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank") + + if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) +#endif + } + + if(count_chunk > min_num_chunk) { + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.read)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; + } + + + else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE)|| + (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)){ + +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + /* Perform the independent read operation */ + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.read)(io_info, chunk_info->chunk_points, H5T_get_size(dataset->shared->type), chunk_info->fspace, chunk_info->mspace, buf); - - /* Check return value from optimized read */ - if (status<0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; +#else + + /* Perform the actual collective read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") +#endif + } + + else { + /* For regular selection, + if MPI_COMPLEX_DERIVED_DATATYPE is not defined, + unless spaces for all processors are regular, independent read operation should be performed.*/ + +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(!all_regular) { + + /* Perform the independent read operation */ + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.read)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; + } + + else { + /* For regular collective read in parallel*/ + /* Perform the read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + } +#else + + /* For regular collective read in parallel*/ + /* Perform the read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") +#endif + } + + } + else { + /* For regular independent read in parallel*/ + /* Perform the read operation */ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + } + +#else + /* Perform the actual read operation for sequential*/ + status = (io_info->ops.read)(io_info, + chunk_info->chunk_points, + H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") +#endif /* Get the next chunk node in the skip list */ chunk_node=H5SL_next(chunk_node); @@ -1937,6 +2116,8 @@ done: * Hacked on it a lot. :-) * Leon Arber: 4/20/04 * Added support for data transforms. + * Kent Yang: 8/10/04 + * Added support for collective chunk IO. * *------------------------------------------------------------------------- */ @@ -1973,6 +2154,13 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, H5D_storage_t store; /*union of EFL and chunk pointer in file space */ herr_t ret_value = SUCCEED; /*return value */ +#ifdef H5_HAVE_PARALLEL + hid_t temp_id; + int count_chunk,mpi_rank,mpi_code,min_num_chunk,is_regular,all_regular = 0; + MPI_Comm comm; + +#endif + FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_write) /* Map elements between file and memory for each chunk*/ @@ -1990,6 +2178,14 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, #ifdef H5S_DEBUG H5_timer_begin(&timer); #endif + +#ifdef H5_HAVE_PARALLEL + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk") + } + count_chunk = 0; +#endif /* Get first node in chunk skip list */ chunk_node=H5SL_first(fm.fsel); @@ -2004,15 +2200,136 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, store.chunk.offset = chunk_info->coords; store.chunk.index = chunk_info->index; - /* Perform the actual write operation */ - status = (io_info->ops.write)(io_info, +#ifdef H5_HAVE_PARALLEL + + count_chunk++; + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) { + /* If the number of chunk is greater than minimum number of chunk, + Do independent write */ + + if(count_chunk <= min_num_chunk) { +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE && + H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE) + is_regular = 1; + else is_regular = 0; + /* Getting MPI communicator and rank */ + if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") + if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank") + if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) +#endif + } + if(count_chunk > min_num_chunk) { + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + fflush(stdout); + status = (io_info->ops_sca.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + io_info->dxpl_id = temp_id; + } + + + else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE)|| + (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)){ + +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + /* Perform the independent write operation */ + + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.write)(io_info, chunk_info->chunk_points, H5T_get_size(dataset->shared->type), chunk_info->fspace, chunk_info->mspace, buf); - - /* Check return value from optimized write */ - if (status<0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + io_info->dxpl_id = temp_id; +#else + + /* Perform the actual collective write operation */ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") +#endif + } + + else { +#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS + if(!all_regular) { + + /* Perform the independent write operation */ + temp_id = io_info->dxpl_id; + io_info->dxpl_id = io_info->dp_dxpl_id; + status = (io_info->ops_sca.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized read */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + io_info->dxpl_id = temp_id; + } + else { + /* For regular selection, perform the collective write operation */ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + } +#else + + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + +#endif + } + } + else { + /* For independent parallel write*/ + /* Perform the write operation */ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + } + +#else + /* Perform the actual write operation for sequential*/ + status = (io_info->ops.write)(io_info, + chunk_info->chunk_points, H5T_get_size(dataset->shared->type), + chunk_info->fspace, chunk_info->mspace, + buf); + /* Check return value from optimized write */ + if (status<0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") +#endif /* Get the next chunk node in the skip list */ chunk_node=H5SL_next(chunk_node); @@ -3270,7 +3587,16 @@ done: */ static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, - const H5S_t + hid_t +#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG) + UNUSED +#endif /* H5_HAVE_PARALLEL */ + dp_dxpl_id, H5D_dxpl_cache_t + +#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG) + UNUSED +#endif /* H5_HAVE_PARALLEL */ +*dp_dxpl_cache,const H5S_t #if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG) UNUSED #endif /* H5_HAVE_PARALLEL */ @@ -3320,6 +3646,9 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, /* * Check if we can set direct MPI-IO read/write functions */ + io_info->dp_dxpl_id = dp_dxpl_id; + io_info->dp_dxpl_cache = dp_dxpl_cache; + opt=H5D_mpio_opt_possible(dset,mem_space,file_space,flags); if(opt==FAIL) HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace "); @@ -3333,28 +3662,20 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, /* Check if we can use the optimized parallel I/O routines */ if(opt==TRUE) { /* Set the pointers to the MPI-specific routines */ - if((H5S_SELECT_IS_REGULAR(file_space) == TRUE) && - (H5S_SELECT_IS_REGULAR(mem_space) == TRUE)){ - io_info->ops.read = H5D_mpio_spaces_read; - io_info->ops.write = H5D_mpio_spaces_write; - } - - #ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS - else { - io_info->ops.read = H5D_mpio_spaces_span_read; - io_info->ops.write = H5D_mpio_spaces_span_write; - } - #endif - /* Indicate that the I/O will be parallel */ - *use_par_opt_io=TRUE; - } /* end if */ + io_info->ops.read = H5D_mpio_select_read; + io_info->ops.write = H5D_mpio_select_write; + io_info->ops_sca.read = H5D_select_read; + io_info->ops_sca.write = H5D_select_write; + *use_par_opt_io=TRUE; + /* Indicate that the I/O will use collective */ + } + /* end if */ else { - /* Indicate that the I/O will _NOT_ be parallel */ + /* Indicate that the I/O will _NOT_ be parallel, use independent IO */ *use_par_opt_io=FALSE; io_info->ops.read = H5D_select_read; io_info->ops.write = H5D_select_write; - } /* end else */ #else io_info->ops.read = H5D_select_read; @@ -3372,3 +3693,136 @@ done: #endif /* H5_HAVE_PARALLEL || H5S_DEBUG */ FUNC_LEAVE_NOAPI(ret_value) } /* end H5D_ioinfo_init() */ + + +#ifdef H5_HAVE_PARALLEL + + +/*------------------------------------------------------------------------- + * Function: H5D_mpio_get_mini_chunk + * + * Purpose: Routine for obtaining minimum number of chunks to cover + hyperslab selection selected by all processors. + * + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: + * + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset, + const H5S_t *mem_space, + const H5S_t *file_space, + int *min_chunkf) { + + + hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */ + hsize_t startf[H5S_MAX_RANK], /* Selection start bounds */ + endf[H5S_MAX_RANK]; /* Selection end bounds */ + unsigned dim_rankf; /* Number of dimensions of file dataspace */ + int pcheck_hyper,check_hyper, /* Flags for checking if selection is in one chunk */ + tnum_chunkf, /* Number of chunks selection overlaps */ + max_chunkf, /* Maximum number of chunks selection overlaps */ + num_chunks_same; /* Flag indicating whether all processes have the same # of chunks to operate on */ + unsigned dim_chunks; /* Temporary number of chunks in a dimension */ + MPI_Comm comm; /* MPI communicator for file */ + int mpi_rank; /* Rank in MPI communicator */ + int mpi_code; /* MPI return code */ + unsigned u; /* Local index variable */ + herr_t ret_value; + + ret_value = SUCCEED; + FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_get_mini_chunk); + /* Getting MPI communicator and rank */ + if((comm = H5F_mpi_get_comm(dset->ent.file))==MPI_COMM_NULL) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") + if((mpi_rank = H5F_mpi_get_rank(dset->ent.file))<0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank") + + + dim_rankf = H5S_GET_EXTENT_NDIMS(file_space); + + if(H5S_SELECT_BOUNDS(file_space,startf,endf)==FAIL) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE,FAIL, "invalid check for single selection blocks"); + + for(u=0; u < dset->shared->layout.u.chunk.ndims; u++) + chunk_dim[u] = dset->shared->layout.u.chunk.dim[u]; + + + /* Compute the number of chunks covered by the selection on this process */ + tnum_chunkf = 1; + for (u=0; u<dim_rankf; u++) { + dim_chunks = (endf[u]/chunk_dim[u]-startf[u]/chunk_dim[u])+1; + tnum_chunkf = dim_chunks*tnum_chunkf; + } + + /* Determine the minimum and maximum # of chunks for all processes */ + + if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) + + + /* Broadcast the flag indicating the number of chunks are the same */ + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + + done: + FUNC_LEAVE_NOAPI(ret_value); + +} + + +/*------------------------------------------------------------------------- + * Function: H5D_obtain_duplicate_pid + * + * Purpose: Routine for obtaining a copy property list ID of + data transfer property. + + * + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: + * + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +static herr_t H5D_obtain_duplicate_pid(hid_t dxpl_id, + hid_t* dp_id, + H5D_dxpl_cache_t **cache) +{ + + H5FD_mpio_xfer_t xfer_mode; + H5P_genplist_t *dp_dx_plist; /* Data transer property list */ + herr_t ret_value=SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT(H5D_obtain_duplicate_pid) + + *dp_id = H5Pcopy(dxpl_id); + + /* printf("inside function dp id %d\n",*dp_id);*/ + /* Get the dataset transfer property list */ + if (NULL == (dp_dx_plist = H5I_object(*dp_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list") + + xfer_mode = H5FD_MPIO_INDEPENDENT; + if(H5P_set (dp_dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode") + + /* Fill the DXPL cache values for later use */ + if (H5D_get_dxpl_cache(*dp_id,cache)<0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache") + + done: + FUNC_LEAVE_NOAPI(ret_value) + +} +#endif /*H5_HAVE_PARALLEL*/ |