summaryrefslogtreecommitdiffstats
path: root/src/H5Smpio.c
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2004-08-02 17:56:37 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2004-08-02 17:56:37 (GMT)
commit34cbb564af98b82c9b9f392fa33787094be4c74f (patch)
treed86ac0112734470cec82b4b26eacf2d798f61a3d /src/H5Smpio.c
parentaf7def1357e2e8fa43e6abe1adc768219c2df620 (diff)
downloadhdf5-34cbb564af98b82c9b9f392fa33787094be4c74f.zip
hdf5-34cbb564af98b82c9b9f392fa33787094be4c74f.tar.gz
hdf5-34cbb564af98b82c9b9f392fa33787094be4c74f.tar.bz2
[svn-r8989] Purpose:
Backport feature Description: Backport Kent's collective chunk I/O work to date into the release branch. Also, minor code cleanups, etc. Platforms tested: FreeBSD 4.10 (sleipnir) w/parallel IRIX64 6.5 (modi4) h5committested
Diffstat (limited to 'src/H5Smpio.c')
-rw-r--r--src/H5Smpio.c355
1 files changed, 122 insertions, 233 deletions
diff --git a/src/H5Smpio.c b/src/H5Smpio.c
index c8aab2d..bf8f717 100644
--- a/src/H5Smpio.c
+++ b/src/H5Smpio.c
@@ -21,7 +21,6 @@
* I didn't make them portable.
*/
-#define H5F_PACKAGE /*suppress error about including H5Fpkg */
#define H5S_PACKAGE /*suppress error about including H5Spkg */
/* Pablo information */
@@ -29,10 +28,12 @@
#define PABLO_MASK H5S_mpio_mask
#include "H5private.h" /* Generic Functions */
+#include "H5Dprivate.h" /* Datasets */
#include "H5Eprivate.h" /* Error handling */
-#include "H5Fpkg.h" /* Files */
+#include "H5Fprivate.h" /* File access */
#include "H5FDprivate.h" /* File drivers */
#include "H5Iprivate.h" /* IDs */
+#include "H5Oprivate.h" /* Object headers */
#include "H5Pprivate.h" /* Property lists */
#include "H5Spkg.h" /* Dataspaces */
@@ -63,17 +64,6 @@ H5S_mpio_hyper_type( const H5S_t *space, size_t elmt_size,
size_t *count,
hsize_t *extra_offset,
hbool_t *is_derived_type );
-static herr_t
-H5S_mpio_space_type( const H5S_t *space, size_t elmt_size,
- /* out: */
- MPI_Datatype *new_type,
- size_t *count,
- hsize_t *extra_offset,
- hbool_t *is_derived_type );
-static herr_t
-H5S_mpio_spaces_xfer(H5F_t *f, const H5D_t *dset, size_t elmt_size,
- const H5S_t *file_space, const H5S_t *mem_space,
- hid_t dxpl_id, void *buf/*out*/, hbool_t do_write);
/*-------------------------------------------------------------------------
@@ -522,7 +512,7 @@ done:
*
*-------------------------------------------------------------------------
*/
-static herr_t
+herr_t
H5S_mpio_space_type( const H5S_t *space, size_t elmt_size,
/* out: */
MPI_Datatype *new_type,
@@ -586,221 +576,6 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5S_mpio_spaces_xfer
- *
- * Purpose: Use MPI-IO to transfer data efficiently
- * directly between app buffer and file.
- *
- * Return: non-negative on success, negative on failure.
- *
- * Programmer: rky 980813
- *
- * Notes:
- * For collective data transfer only since this would eventually call
- * H5FD_mpio_setup to do setup to eveually call MPI_File_set_view in
- * H5FD_mpio_read or H5FD_mpio_write. MPI_File_set_view is a collective
- * call. Letting independent data transfer use this route would result in
- * hanging.
- *
- * The preconditions for calling this routine are located in the
- * H5S_mpio_opt_possible() routine, which determines whether this routine
- * can be called for a given dataset transfer.
- *
- * Modifications:
- * rky 980918
- * Added must_convert parameter to let caller know we can't optimize
- * the xfer.
- *
- * Albert Cheng, 001123
- * Include the MPI_type freeing as part of cleanup code.
- *
- * QAK - 2002/04/02
- * Removed the must_convert parameter and move preconditions to
- * H5S_mpio_opt_possible() routine
- *
- * QAK - 2002/06/17
- * Removed 'disp' parameter from H5FD_mpio_setup routine and use the
- * address of the dataset in MPI_File_set_view() calls, as necessary.
- *
- * QAK - 2002/06/18
- * Removed 'dc_plist' parameter, since it was not used. Also, switch to
- * getting the 'extra_offset' setting for each selection.
- *
- *-------------------------------------------------------------------------
- */
-static herr_t
-H5S_mpio_spaces_xfer(H5F_t *f, const H5D_t *dset, size_t elmt_size,
- const H5S_t *file_space, const H5S_t *mem_space,
- hid_t dxpl_id, void *_buf /*out*/,
- hbool_t do_write )
-{
- haddr_t addr; /* Address of dataset (or selection) within file */
- size_t mpi_buf_count, mpi_file_count; /* Number of "objects" to transfer */
- hsize_t mpi_buf_offset, mpi_file_offset; /* Offset within dataset where selection (ie. MPI type) begins */
- MPI_Datatype mpi_buf_type, mpi_file_type; /* MPI types for buffer (memory) and file */
- hbool_t mbt_is_derived=0, /* Whether the buffer (memory) type is derived and needs to be free'd */
- mft_is_derived=0; /* Whether the file type is derived and needs to be free'd */
- hbool_t plist_is_setup=0; /* Whether the dxpl has been customized */
- uint8_t *buf=(uint8_t *)_buf; /* Alias for pointer arithmetic */
- int mpi_code; /* MPI return code */
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_NOAPI_NOINIT(H5S_mpio_spaces_xfer);
-
- /* Check args */
- assert (f);
- assert (dset);
- assert (file_space);
- assert (mem_space);
- assert (buf);
- assert (IS_H5FD_MPIO(f));
- /* Make certain we have the correct type of property list */
- assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
-
- /* create the MPI buffer type */
- if (H5S_mpio_space_type( mem_space, elmt_size,
- /* out: */
- &mpi_buf_type,
- &mpi_buf_count,
- &mpi_buf_offset,
- &mbt_is_derived )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type");
-
- /* create the MPI file type */
- if ( H5S_mpio_space_type( file_space, elmt_size,
- /* out: */
- &mpi_file_type,
- &mpi_file_count,
- &mpi_file_offset,
- &mft_is_derived )<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type");
-
- addr = H5D_contig_get_addr(dset) + mpi_file_offset;
-#ifdef H5Smpi_DEBUG
- HDfprintf(stderr, "spaces_xfer: relative addr=%a\n", addr );
-#endif
-
- /*
- * Pass buf type, file type to the file driver. Request an MPI type
- * transfer (instead of an elementary byteblock transfer).
- */
- if(H5FD_mpi_setup_collective(dxpl_id, mpi_buf_type, mpi_file_type)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties");
- plist_is_setup=1;
-
- /* Adjust the buffer pointer to the beginning of the selection */
- buf+=mpi_buf_offset;
-
- /* transfer the data */
- if (do_write) {
- if (H5F_block_write(f, H5FD_MEM_DRAW, addr, mpi_buf_count, dxpl_id, buf) <0)
- HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed");
- } else {
- if (H5F_block_read (f, H5FD_MEM_DRAW, addr, mpi_buf_count, dxpl_id, buf) <0)
- HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed");
- }
-
-done:
- /* Reset the dxpl settings */
- if(plist_is_setup) {
- if(H5FD_mpi_teardown_collective(dxpl_id)<0)
- HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "unable to reset dxpl values");
- } /* end if */
-
- /* free the MPI buf and file types */
- if (mbt_is_derived) {
- if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_buf_type )))
- HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
- }
- if (mft_is_derived) {
- if (MPI_SUCCESS != (mpi_code= MPI_Type_free( &mpi_file_type )))
- HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
- }
-
- FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5S_mpio_spaces_xfer() */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5S_mpio_spaces_read
- *
- * Purpose: MPI-IO function to read directly from app buffer to file.
- *
- * Return: non-negative on success, negative on failure.
- *
- * Programmer: rky 980813
- *
- * Modifications:
- *
- * rky 980918
- * Added must_convert parameter to let caller know we can't optimize the xfer.
- *
- * QAK - 2002/04/02
- * Removed the must_convert parameter and move preconditions to
- * H5S_mpio_opt_possible() routine
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5S_mpio_spaces_read(H5F_t *f, const H5D_dxpl_cache_t UNUSED *dxpl_cache, hid_t dxpl_id,
- H5D_t *dset, const H5D_storage_t UNUSED *store,
- size_t UNUSED nelmts, size_t elmt_size,
- const H5S_t *file_space, const H5S_t *mem_space,
- void *buf/*out*/)
-{
- herr_t ret_value;
-
- FUNC_ENTER_NOAPI(H5S_mpio_spaces_read, FAIL);
-
- ret_value = H5S_mpio_spaces_xfer(f, dset, elmt_size, file_space,
- mem_space, dxpl_id, buf, 0/*read*/);
-
-done:
- FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5S_mpio_spaces_read() */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5S_mpio_spaces_write
- *
- * Purpose: MPI-IO function to write directly from app buffer to file.
- *
- * Return: non-negative on success, negative on failure.
- *
- * Programmer: rky 980813
- *
- * Modifications:
- *
- * rky 980918
- * Added must_convert parameter to let caller know we can't optimize the xfer.
- *
- * QAK - 2002/04/02
- * Removed the must_convert parameter and move preconditions to
- * H5S_mpio_opt_possible() routine
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5S_mpio_spaces_write(H5F_t *f, const H5D_dxpl_cache_t UNUSED *dxpl_cache, hid_t dxpl_id,
- H5D_t *dset, const H5D_storage_t UNUSED *store,
- size_t UNUSED nelmts, size_t elmt_size,
- const H5S_t *file_space, const H5S_t *mem_space,
- const void *buf)
-{
- herr_t ret_value;
-
- FUNC_ENTER_NOAPI(H5S_mpio_spaces_write, FAIL);
-
- /*OKAY: CAST DISCARDS CONST QUALIFIER*/
- ret_value = H5S_mpio_spaces_xfer(f, dset, elmt_size, file_space,
- mem_space, dxpl_id, (void*)buf, 1/*write*/);
-
-done:
- FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5S_mpio_spaces_write() */
-
-
-/*-------------------------------------------------------------------------
* Function: H5S_mpio_opt_possible
*
* Purpose: Checks if an direct I/O transfer is possible between memory and
@@ -817,7 +592,7 @@ done:
*-------------------------------------------------------------------------
*/
htri_t
-H5S_mpio_opt_possible( const H5S_t *mem_space, const H5S_t *file_space, const unsigned flags)
+H5S_mpio_opt_possible( const H5F_t *file, const H5S_t *mem_space, const H5S_t *file_space, const unsigned flags,const H5O_layout_t *layout)
{
htri_t c1,c2; /* Flags whether a selection is optimizable */
htri_t ret_value=TRUE;
@@ -849,12 +624,126 @@ H5S_mpio_opt_possible( const H5S_t *mem_space, const H5S_t *file_space, const un
if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space))
HGOTO_DONE(FALSE);
- /* Dataset storage must be contiguous currently */
- if ((flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CONTIGUOUS)
+ /* Dataset storage must be contiguous or chunked */
+ if ((flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CONTIGUOUS &&
+ (flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CHUNKED)
HGOTO_DONE(FALSE);
+ if ((flags&H5S_CONV_STORAGE_MASK)==H5S_CONV_STORAGE_CHUNKED) {
+ hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */
+ hssize_t startf[H5S_MAX_RANK], /* Selection start bounds */
+ endf[H5S_MAX_RANK]; /* Selection end bounds */
+ unsigned dim_rankf; /* Number of dimensions of file dataspace */
+ int pcheck_hyper,check_hyper, /* Flags for checking if selection is in one chunk */
+ tnum_chunkf, /* Number of chunks selection overlaps */
+ max_chunkf, /* Maximum number of chunks selection overlaps */
+ min_chunkf, /* Minimum number of chunks selection overlaps */
+ num_chunks_same; /* Flag indicating whether all processes have the same # of chunks to operate on */
+ unsigned dim_chunks; /* Temporary number of chunks in a dimension */
+ MPI_Comm comm; /* MPI communicator for file */
+ int mpi_rank; /* Rank in MPI communicator */
+ int mpi_code; /* MPI return code */
+ unsigned u; /* Local index variable */
+
+ /* Getting MPI communicator and rank */
+ if((comm = H5F_mpi_get_comm(file))==MPI_COMM_NULL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
+ if((mpi_rank = H5F_mpi_get_rank(file))<0)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
+
+ /* Currently collective chunking storage
+ inside HDF5 is supported for either one of the following two cases:
+ 1. All the hyperslabs for one process is inside one chunk.
+ 2. For single hyperslab selection, the number of chunks that covered
+ the single selection for all processes should be equal.
+ KY, 2004/7/14
+ */
+
+ /* Quincey, please read.
+ This is maybe redundant, I think only when both memory and file space be SCALAR
+ space, the collective IO can work. Otherwise, SELECT_POINT will be reached,collective
+ IO shouldn't work.
+ Please clarify and correct the code on the following,
+ Quincey said that it was probably okay if only one data space is SCALAR,
+ Still keep the code here until we added more tests later.
+ Kent */
+ if(H5S_SCALAR==mem_space->extent.type || H5S_SCALAR ==file_space->extent.type) {
+ if(!(H5S_SCALAR==mem_space->extent.type && H5S_SCALAR ==file_space->extent.type))
+ HGOTO_DONE(FALSE)
+ else
+ HGOTO_DONE(TRUE)
+ } /* end if */
+
+ dim_rankf = file_space->extent.rank;
+
+ if(H5S_SELECT_BOUNDS(file_space,startf,endf)==FAIL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE,FAIL, "invalid check for single selection blocks");
+
+ for(u=0; u < layout->u.chunk.ndims; u++)
+ chunk_dim[u] = layout->u.chunk.dim[u];
+
+ /* Case 1: check whether all hyperslab in this process is inside one chunk.
+ Note: we don't handle when starting point is less than zero since that may cover
+ two chunks. */
+
+ /*for file space checking*/
+ pcheck_hyper = 1;
+ for (u=0; u<dim_rankf; u++)
+ if(endf[u]/chunk_dim[u]!=startf[u]/chunk_dim[u]) {
+ pcheck_hyper = 0;
+ break;
+ }
+
+ if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&pcheck_hyper,&check_hyper,1,MPI_INT,MPI_LAND,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&check_hyper,1,MPI_INT,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+
+ /*if check_hyper is true, condition for collective IO case is fulfilled, no
+ need to do further test. */
+ if(check_hyper)
+ HGOTO_DONE(TRUE);
+
+ /* Case 2:Check whether the number of chunks that covered the single hyperslab is the same.
+ If not,no collective chunk IO.
+ KY, 2004/7/14
+ */
+
+ c1 = H5S_SELECT_IS_SINGLE(file_space);
+ c2 = H5S_SELECT_IS_SINGLE(mem_space);
+
+ if(c1==FAIL || c2 ==FAIL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for single selection blocks");
+ if(c1==FALSE || c2 ==FALSE)
+ HGOTO_DONE(FALSE);
+
+ /* Compute the number of chunks covered by the selection on this process */
+ tnum_chunkf = 1;
+ for (u=0; u<dim_rankf; u++) {
+ dim_chunks = (endf[u]/chunk_dim[u]-startf[u]/chunk_dim[u])+1;
+ tnum_chunkf = dim_chunks*tnum_chunkf;
+ }
+
+ /* Determine the minimum and maximum # of chunks for all processes */
+ if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,&max_chunkf,1,MPI_INT,MPI_MAX,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+ if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,&min_chunkf,1,MPI_INT,MPI_MIN,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+
+ /* Let the rank==0 process determine if the same number of chunks will be operated on by all processes */
+ if(mpi_rank == 0)
+ num_chunks_same = (max_chunkf==min_chunkf);
+
+ /* Broadcast the flag indicating the number of chunks are the same */
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&num_chunks_same,1,MPI_INT,0,comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+
+ /* Can't handle case when number of chunks is different (yet) */
+ if(!num_chunks_same)
+ HGOTO_DONE(FALSE);
+ }
+
done:
FUNC_LEAVE_NOAPI(ret_value);
} /* H5S_mpio_opt_possible() */
-
#endif /* H5_HAVE_PARALLEL */