summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5Smpio.c557
1 files changed, 557 insertions, 0 deletions
diff --git a/src/H5Smpio.c b/src/H5Smpio.c
new file mode 100644
index 0000000..f91738f
--- /dev/null
+++ b/src/H5Smpio.c
@@ -0,0 +1,557 @@
+/*
+ * Copyright (C) 1998 NCSA
+ * All rights reserved.
+ *
+ * Programmer: rky 980813
+ *
+ * Purpose: Functions to read/write directly between app buffer and file.
+ *
+ * Beware of the ifdef'ed print statements.
+ * I didn't make them portable.
+ */
+
+#include <H5private.h>
+#include <H5Eprivate.h>
+#include <H5Sprivate.h>
+
+/* Interface initialization */
+#define PABLO_MASK H5S_all_mask
+#define INTERFACE_INIT NULL
+static intn interface_initialize_g = FALSE;
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_all_type
+ *
+ * Purpose: Translate an HDF5 "all" selection into an MPI type.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Outputs: *new_type the MPI type corresponding to the selection
+ * *count how many objects of the new_type in selection
+ * (useful if this is the buffer type for xfer)
+ * *is_derived_type 0 if MPI primitive type, 1 if derived
+ *
+ * Programmer: rky 980813
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5S_mpio_all_type( const H5S_t *space, const hsize_t elmt_size,
+ /* out: */
+ MPI_Datatype *new_type,
+ hsize_t *count,
+ hbool_t *is_derived_type )
+{
+ hsize_t total_bytes;
+ int i;
+
+ FUNC_ENTER (H5S_mpio_all_type, FAIL);
+
+ /* Check args */
+ assert (space);
+
+ /* Just treat the entire extent as a block of bytes */
+ total_bytes = elmt_size;
+ for (i=0; i<space->extent.u.simple.rank; ++i) {
+ total_bytes *= space->extent.u.simple.size[i];
+ }
+
+ /* fill in the return values */
+ *new_type = MPI_BYTE;
+ *count = total_bytes;
+ *is_derived_type = 0;
+
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "Leave %s total_bytes=%lld\n", FUNC, total_bytes );
+#endif
+ FUNC_LEAVE (SUCCEED);
+} /* H5S_mpio_all_type() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_hyper_type
+ *
+ * Purpose: Translate an HDF5 hyperslab selection into an MPI type.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Outputs: *new_type the MPI type corresponding to the selection
+ * *count how many objects of the new_type in selection
+ * (useful if this is the buffer type for xfer)
+ * *is_derived_type 0 if MPI primitive type, 1 if derived
+ *
+ * Programmer: rky 980813
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5S_mpio_hyper_type( const H5S_t *space, const hsize_t elmt_size,
+ /* out: */
+ MPI_Datatype *new_type,
+ hsize_t *count,
+ hbool_t *is_derived_type )
+{
+ struct dim { /* less hassle than malloc/free & ilk */
+ hssize_t start;
+ hsize_t strid;
+ hsize_t block;
+ hsize_t xtent;
+ hsize_t count;
+ } d[32];
+
+ int i, err, new_rank, num_to_collapse;
+ H5S_hyper_dim_t *diminfo; /* [rank] */
+ intn rank;
+ MPI_Datatype inner_type, outer_type;
+ MPI_Aint s[2]; /* array of displacements for struct type */
+ MPI_Aint extent_len, start_Aint; /* for calculating d[1] */
+
+ FUNC_ENTER (H5S_mpio_hyper_type, FAIL);
+
+ /* Check and abbreviate args */
+ assert (space);
+ diminfo = space->select.sel_info.hyper.diminfo;
+ assert (diminfo);
+ rank = space->extent.u.simple.rank;
+ assert (rank >= 0);
+
+ /* make a local copy of the dimension info so we can transform them */
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "rank=%d ", rank );
+#endif
+ assert(rank<=32); /* within array bounds */
+ for ( i=0; i<rank; ++i) {
+ d[i].start = diminfo[i].start;
+ d[i].strid = diminfo[i].stride;
+ d[i].block = diminfo[i].block;
+ d[i].count = diminfo[i].count;
+ d[i].xtent = space->extent.u.simple.size[i];
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout,
+ "hyper_type: start=%lld count=%lld stride=%lld block=%lld xtent=%lld\n",
+ d[i].start, d[i].count, d[i].strid, d[i].block, d[i].xtent );
+#endif
+ }
+
+ /* Create a type covering the selected hyperslab.
+ * Multidimensional dataspaces are stored in row-major order.
+ * The type is built from the inside out, going from the
+ * fastest-changing (i.e., inner) dimension * to the slowest (outer). */
+
+ /* Optimization: check for contiguous inner dimensions.
+ * Supposing the dimensions were numbered from 1 to rank, we find that
+ *
+ * dim d=rank is contiguous if:
+ * stride[d] = block[d]
+ * and count[d] * block[d] = extent.u.simple.size[d]
+ *
+ * (i.e., there's no overlap or gaps and the entire extent is filled.)
+ *
+ * dim d (1<=d<rank) is contiguous if:
+ * dim d+1 is contiguous
+ * and stride[d] = block[d]
+ * and count[d] * block[d] = extent.u.simple.size[d]
+ *
+ * There is also a weak sense in which the first noncollapsible dim
+ * is contiguous if it consists of a single unbroken range,
+ * and we also take advantage of that.
+ */
+
+ /* figure out how many dimensions we can eliminate */
+ /* This loop examines contiguity from the inside out. */
+ for ( i=0; i<rank; ++i) {
+ if ((d[rank-i].strid != d[rank-i].block)
+ ||
+ (d[rank-i].count*d[rank-i].block) != space->extent.u.simple.size[rank-i]) {
+ break;
+ }
+ } /* end for */
+ num_to_collapse = (i)? i-1: 0;
+ assert(0<=num_to_collapse && num_to_collapse<rank);
+ new_rank = rank - num_to_collapse;
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "hyper_type: new_rank=%d\n", new_rank );
+#endif
+
+ /* To collapse dims, we only need to transform the dimension info */
+ for (i=0; i<num_to_collapse; ++i) {
+ d[rank-i-1].block *= d[rank-i].strid;
+ d[rank-i-1].strid *= d[rank-i].strid;
+ d[rank-i-1].xtent *= d[rank-i].strid;
+ assert( d[rank-i].start == 0 );
+ /* d[rank-i-1].start stays unchanged */
+ /* d[rank-i-1].count stays unchanged */
+ }
+ /* check for possibility to coalesce blocks of the innermost dimension */
+ if (d[new_rank-1].strid == d[new_rank-1].block) {
+ /* transform the smaller blocks to 1 larger block of combined size */
+ d[new_rank-1].block *= d[new_rank-1].count;
+ d[new_rank-1].count = 1;
+ }
+
+ /* initialize induction variables */
+ s[0] = 0; /* stays constant */
+ /* create contig type for inner contig dims */
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "hyper_type: Making contig type %lld MPI_BYTEs\n", elmt_size );
+#endif
+ err = MPI_Type_contiguous( elmt_size, MPI_BYTE, &inner_type );
+ if (err) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't create MPI contiguous type");
+ }
+
+ /* construct the type by walking the hyperslab dims from the inside out */
+ for ( i=new_rank-1; i>=0; --i) {
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "hyper_type: i=%d Making vector type\n count=%lld block=%lld stride=%lld\n", i, d[i].count, d[i].block, d[i].strid );
+#endif
+ err = MPI_Type_vector( d[i].count, /* count */
+ d[i].block, /* blocklength */
+ d[i].strid, /* stride */
+ inner_type, /* old type */
+ &outer_type ); /* new type */
+ if (err) {
+ MPI_Type_free( &inner_type ); /* free before abort */
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't create MPI vector type");
+ }
+ /* from here to end of loop, inner_type actually will get the value
+ * of the outermost type: it will be inner for the next iteration */
+ if (0 == d[i].start) {
+ /* don't need to compensate for the start displacement */
+ MPI_Type_free( &inner_type ); /* old inner no longer needed */
+ inner_type = outer_type; /* prepare for next iter */
+ } else {
+ /* need to compensate for the start displacement */
+ int b[2]; /* array of rep counts */
+ MPI_Datatype t[2]; /* array of MPI types */
+
+ /* fill in the b, d, and t arrays, length is 2 */
+ /* b gives rep count for each type in t */
+ b[0] = 1;
+ b[1] = 1;
+
+ /* s gives the byte displacement for each "field" by induction:
+ * for 0<=i<rank-1 s[1]_i = start[i]*extent_length[i+1];
+ * with base case s[1]_(rank-1) = elmt_size (i decreasing).
+ * (Assuming dimension index increases as we go deeper in.)
+ * Note that in this loop, extent_length[i+1] is the extent length
+ * of the inner type (i.e., the type constructed in previous trip).
+ */
+ err = MPI_Type_extent( inner_type, &extent_len );
+ if (err) {
+ MPI_Type_free( &inner_type ); /* free before abort */
+ MPI_Type_free( &outer_type ); /* free before abort */
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,
+ "couldn't get extent of MPI type");
+ }
+ start_Aint = (MPI_Aint)(d[i].start);
+ if (start_Aint != d[i].start) {
+ MPI_Type_free( &inner_type ); /* free before abort */
+ MPI_Type_free( &outer_type ); /* free before abort */
+ HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
+ "start value overflows MPI_Aint");
+ }
+ s[1] = start_Aint * extent_len;
+
+ /* t gives the MPI types for the "fields" */
+ /* I think we could do without the LB type and just have
+ * one "field" consisting of the vector type with displacement,
+ * but I guess there's no harm in doing it this way. */
+ t[0] = MPI_LB;
+ t[1] = outer_type; /* the just-created vector type */
+ /* Create new struct type to compensate for start displacement.
+ * The struct's first "field" is the displacement,
+ * and its second "field" is the just-created vector type */
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "hyper_type: i=%d Making struct type\n b[1]=%d d[1]=%lld\n", i, b[1], (long long)s[1] );
+#endif
+ err = MPI_Type_struct( 2, b, s, t, &inner_type/*becomes outer*/ );
+ MPI_Type_free( &outer_type ); /* no longer needed */
+ if (err) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't create MPI struct type");
+ }
+ } /* end else */
+ /* at this point, inner_type is actually the outermost type */
+ } /* end for */
+
+ /* here inner_type is actually the outermost type, even for 0-trip loop */
+ *new_type = inner_type; /* return the just-constructed type */
+ err = MPI_Type_commit( new_type );
+ if (err) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't commit MPI vector type");
+ }
+
+ /* fill in the remaining return values */
+ *count = 1; /* only have to move one of these suckers! */
+ *is_derived_type = 1;
+
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "Leave %s\n", FUNC );
+#endif
+ FUNC_LEAVE (SUCCEED);
+} /* H5S_mpio_hyper_type() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_space_type
+ *
+ * Purpose: Translate an HDF5 dataspace selection into an MPI type.
+ * Currently handle only hyperslab and "all" selections.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Outputs: *new_type the MPI type corresponding to the selection
+ * *count how many objects of the new_type in selection
+ * (useful if this is the buffer type for xfer)
+ * *is_derived_type 0 if MPI primitive type, 1 if derived
+ *
+ * Programmer: rky 980813
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5S_mpio_space_type( const H5S_t *space, const size_t elmt_size,
+ /* out: */
+ MPI_Datatype *new_type,
+ hsize_t *count,
+ hbool_t *is_derived_type )
+{
+ int err;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER (H5S_mpio_space_type, FAIL);
+
+ /* Check args */
+ assert (space);
+
+ /* Creat MPI type based on the kind of selection */
+ switch (space->extent.type) {
+ case H5S_SCALAR:
+ /* not yet implemented */
+ ret_value = FAIL;
+ break;
+
+ case H5S_SIMPLE:
+ switch(space->select.type) {
+ case H5S_SEL_NONE:
+ case H5S_SEL_ALL:
+ err = H5S_mpio_all_type( space, elmt_size,
+ /* out: */ new_type, count, is_derived_type );
+ if (err<0) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
+ }
+ break;
+
+ case H5S_SEL_POINTS:
+ /* not yet implemented */
+ ret_value = FAIL;
+ break;
+
+ case H5S_SEL_HYPERSLABS:
+ err = H5S_mpio_hyper_type( space, elmt_size,
+ /* out: */ new_type, count, is_derived_type );
+ if (err) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
+ }
+ break;
+
+ default:
+ assert("unknown selection type" && 0);
+ break;
+ } /* end switch */
+ break;
+
+ case H5S_COMPLEX:
+ /* not yet implemented */
+ ret_value = FAIL;
+ break;
+
+ default:
+ assert("unknown data space type" && 0);
+ break;
+ }
+
+ FUNC_LEAVE (ret_value);
+} /* H5S_mpio_space_type() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_spaces_xfer
+ *
+ * Purpose: Use MPI-IO to transfer data efficiently
+ * directly between app buffer and file.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Programmer: rky 980813
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5S_mpio_spaces_xfer (H5F_t *f, const struct H5O_layout_t *layout,
+ const struct H5O_pline_t *pline,
+ const struct H5O_efl_t *efl, size_t elmt_size,
+ const H5S_t *file_space, const H5S_t *mem_space,
+ const H5D_transfer_t xfer_mode, void *buf /*out*/,
+ const hbool_t do_write )
+{
+ int err;
+ haddr_t disp, addr;
+ size_t mpi_count;
+ hsize_t mpi_buf_count, mpi_unused_count;
+ hsize_t elmt_hsize;
+ MPI_Datatype mpi_buf_type, mpi_file_type;
+ hbool_t mbt_is_derived, mft_is_derived;
+
+ FUNC_ENTER (H5S_mpio_spaces_xfer, FAIL);
+
+ /* Check args */
+ assert (f);
+ assert (layout);
+ assert (file_space);
+ assert (mem_space);
+ assert (buf);
+ assert (f->shared->access_parms->driver == H5F_LOW_MPIO);
+
+ /* INCOMPLETE!!! rky 980816 */
+ /* Currently can only handle H5D_CONTIGUOUS layout */
+ if (layout->type != H5D_CONTIGUOUS) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL,"can only handle contiguous layout");
+ }
+
+ /* create the MPI buffer type */
+ elmt_hsize = (hsize_t)elmt_size;
+ if (elmt_hsize != elmt_size)
+ HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"element size overflows hsize_t");
+ err = H5S_mpio_space_type( mem_space, elmt_size,
+ /* out: */
+ &mpi_buf_type,
+ &mpi_buf_count,
+ &mbt_is_derived );
+ if (MPI_SUCCESS != err)
+ HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type");
+ /* pass the buf type to low-level write via access_parms */
+ f->shared->access_parms->u.mpio.btype = mpi_buf_type;
+
+ /* create the MPI file type */
+ err = H5S_mpio_space_type( file_space, elmt_size,
+ /* out: */
+ &mpi_file_type,
+ &mpi_unused_count,
+ &mft_is_derived );
+ if (MPI_SUCCESS != err)
+ HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type");
+ /* pass the file type to low-level write via access_parms */
+ f->shared->access_parms->u.mpio.ftype = mpi_file_type;
+
+ /* calculate the absolute base addr (i.e., the file view disp) */
+ disp = f->shared->base_addr;
+ H5F_addr_add( &disp, &(layout->addr) );
+ f->shared->access_parms->u.mpio.disp = disp;
+#ifdef H5Smpi_DEBUG
+ fprintf(stdout, "spaces_xfer: disp=%lld\n", disp.offset );
+#endif
+
+ /* Effective address determined by base addr and the MPI file type */
+ H5F_addr_reset( &addr ); /* set to 0 */
+
+ /* request a dataspace xfer (instead of an elementary byteblock xfer) */
+ f->shared->access_parms->u.mpio.use_types = 1;
+
+ /* transfer the data */
+ mpi_count = (size_t)mpi_buf_count;
+ if (mpi_count != mpi_buf_count)
+ HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"transfer size overflows size_t");
+ if (do_write) {
+ err = H5F_low_write( f->shared->lf, f->shared->access_parms,
+ xfer_mode, &addr, mpi_count, buf );
+ if (err) HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed");
+ } else {
+ err = H5F_low_read ( f->shared->lf, f->shared->access_parms,
+ xfer_mode, &addr, mpi_count, buf );
+ if (err) HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed");
+ }
+
+ /* free the MPI buf and file types */
+ if (mbt_is_derived) {
+ err = MPI_Type_free( &mpi_buf_type );
+ if (MPI_SUCCESS != err) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't free MPI file type");
+ }
+ }
+ if (mft_is_derived) {
+ err = MPI_Type_free( &mpi_file_type );
+ if (MPI_SUCCESS != err) {
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't free MPI file type");
+ }
+ }
+
+ FUNC_LEAVE (SUCCEED);
+} /* H5S_mpio_spaces_xfer() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_spaces_read
+ *
+ * Purpose: MPI-IO function to read directly from app buffer to file.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Programmer: rky 980813
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5S_mpio_spaces_read (H5F_t *f, const struct H5O_layout_t *layout,
+ const struct H5O_pline_t *pline,
+ const struct H5O_efl_t *efl, size_t elmt_size,
+ const H5S_t *file_space, const H5S_t *mem_space,
+ const H5D_transfer_t xfer_mode, void *buf /*out*/ )
+{
+ herr_t ret_value = FAIL;
+
+ FUNC_ENTER (H5S_mpio_spaces_read, FAIL);
+
+ ret_value = H5S_mpio_spaces_xfer( f, layout, pline, efl, elmt_size,
+ file_space, mem_space, xfer_mode, (void*)buf,
+ 0 /*read*/ );
+
+ FUNC_LEAVE (ret_value);
+} /* H5S_mpio_spaces_read() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_spaces_write
+ *
+ * Purpose: MPI-IO function to write directly from app buffer to file.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Programmer: rky 980813
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5S_mpio_spaces_write(H5F_t *f, const struct H5O_layout_t *layout,
+ const struct H5O_pline_t *pline,
+ const struct H5O_efl_t *efl, size_t elmt_size,
+ const H5S_t *file_space, const H5S_t *mem_space,
+ const H5D_transfer_t xfer_mode, const void *buf )
+{
+ herr_t ret_value = FAIL;
+
+ FUNC_ENTER (H5S_mpio_spaces_write, FAIL);
+
+ ret_value = H5S_mpio_spaces_xfer( f, layout, pline, efl, elmt_size,
+ file_space, mem_space, xfer_mode, (void*)buf,
+ 1 /*write*/ );
+
+ FUNC_LEAVE (ret_value);
+} /* H5S_mpio_spaces_write() */