/* * Copyright (C) 1998 NCSA * All rights reserved. * * Programmer: rky 980813 * * Purpose: Functions to read/write directly between app buffer and file. * * Beware of the ifdef'ed print statements. * I didn't make them portable. */ #include #include #include #ifndef HAVE_PARALLEL /* * The H5S_mpio_xxxx functions are for parallel I/O only and are * valid only when HAVE_PARALLEL is #defined. This empty #ifndef * body is used to allow this source file be included in the serial * distribution. * Some compilers/linkers may complain about "empty" object file. * If that happens, uncomment the following statement to pacify * them. */ /* const hbool_t H5S_mpio_avail = FALSE; */ #else /* HAVE_PARALLEL */ /* Interface initialization */ #define PABLO_MASK H5S_all_mask #define INTERFACE_INIT NULL static intn interface_initialize_g = FALSE; /*------------------------------------------------------------------------- * Function: H5S_mpio_all_type * * Purpose: Translate an HDF5 "all" selection into an MPI type. * * Return: non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection * (useful if this is the buffer type for xfer) * *is_derived_type 0 if MPI primitive type, 1 if derived * * Programmer: rky 980813 * * Modifications: * *------------------------------------------------------------------------- */ herr_t H5S_mpio_all_type( const H5S_t *space, const size_t elmt_size, /* out: */ MPI_Datatype *new_type, hsize_t *count, hbool_t *is_derived_type ) { hsize_t total_bytes; int i; FUNC_ENTER (H5S_mpio_all_type, FAIL); /* Check args */ assert (space); /* Just treat the entire extent as a block of bytes */ total_bytes = (hsize_t)elmt_size; for (i=0; iextent.u.simple.rank; ++i) { total_bytes *= space->extent.u.simple.size[i]; } /* fill in the return values */ *new_type = MPI_BYTE; *count = total_bytes; *is_derived_type = 0; #ifdef H5Smpi_DEBUG fprintf(stdout, "Leave %s total_bytes=%lld\n", FUNC, (long long)total_bytes ); #endif FUNC_LEAVE (SUCCEED); } /* H5S_mpio_all_type() */ /*------------------------------------------------------------------------- * Function: H5S_mpio_hyper_type * * Purpose: Translate an HDF5 hyperslab selection into an MPI type. * * Return: non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection * (useful if this is the buffer type for xfer) * *is_derived_type 0 if MPI primitive type, 1 if derived * * Programmer: rky 980813 * * Modifications: * *------------------------------------------------------------------------- */ herr_t H5S_mpio_hyper_type( const H5S_t *space, const size_t elmt_size, /* out: */ MPI_Datatype *new_type, hsize_t *count, hbool_t *is_derived_type ) { struct dim { /* less hassle than malloc/free & ilk */ hssize_t start; hsize_t strid; hsize_t block; hsize_t xtent; hsize_t count; } d[32]; int i, err, new_rank, num_to_collapse; H5S_hyper_dim_t *diminfo; /* [rank] */ intn rank; MPI_Datatype inner_type, outer_type; MPI_Aint s[2]; /* array of displacements for struct type */ MPI_Aint extent_len, start_Aint; /* for calculating d[1] */ FUNC_ENTER (H5S_mpio_hyper_type, FAIL); /* Check and abbreviate args */ assert (space); diminfo = space->select.sel_info.hyper.diminfo; assert (diminfo); rank = space->extent.u.simple.rank; assert (rank >= 0); /* make a local copy of the dimension info so we can transform them */ assert(rank<=32); /* within array bounds */ for ( i=0; iextent.u.simple.size[i]; #ifdef H5Smpi_DEBUG fprintf(stdout, "hyper_type: start=%lld stride=%lld count=%lld block=%lld xtent=%lld", (long long)d[i].start, (long long)d[i].strid, (long long)d[i].count, (long long)d[i].block, (long long)d[i].xtent ); if (i==0) fprintf(stdout, " rank=%d\n", rank ); else fprintf(stdout, "\n" ); #endif } /* Create a type covering the selected hyperslab. * Multidimensional dataspaces are stored in row-major order. * The type is built from the inside out, going from the * fastest-changing (i.e., inner) dimension * to the slowest (outer). */ /* Optimization: check for contiguous inner dimensions. * Supposing the dimensions were numbered from 1 to rank, we find that * * dim d=rank is contiguous if: * stride[d] = block[d] * and count[d] * block[d] = extent.u.simple.size[d] * * (i.e., there's no overlap or gaps and the entire extent is filled.) * * dim d (1<=dextent.u.simple.size[rank-i-1]) { break; } } /* end for */ num_to_collapse = i; num_to_collapse = 0; /* rky 980827 DEBUG Temporary change to prevent collapsing dims until further testing of collapse */ assert(0<=num_to_collapse && num_to_collapse=new_rank; --i) { d[i-1].block *= d[i].strid; d[i-1].strid *= d[i].strid; d[i-1].xtent *= d[i].strid; assert( d[i].start == 0 ); /* d[i-1].start stays unchanged */ /* d[i-1].count stays unchanged */ } /* check for possibility to coalesce blocks of the uncoalesced dimensions */ for (i=0; i=0; --i) { #ifdef H5Smpi_DEBUG fprintf(stdout, "hyper_type: i=%d Making vector type\n count=%lld block=%lld stride=%lld\n", i, (long long)d[i].count, (long long)d[i].block, (long long)d[i].strid ); #endif err = MPI_Type_vector( (int)(d[i].count), /* count */ (int)(d[i].block), /* blocklength */ (MPI_Aint)(d[i].strid), /* stride */ inner_type, /* old type */ &outer_type ); /* new type */ if (err) { MPI_Type_free( &inner_type ); /* free before abort */ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't create MPI vector type"); } /* from here to end of loop, inner_type actually will get the value * of the outermost type: it will be inner for the next iteration */ if (0 == d[i].start) { /* don't need to compensate for the start displacement */ MPI_Type_free( &inner_type ); /* old inner no longer needed */ inner_type = outer_type; /* prepare for next iter */ } else { /* need to compensate for the start displacement */ int b[2]; /* array of rep counts */ MPI_Datatype t[2]; /* array of MPI types */ /* fill in the b, d, and t arrays, length is 2 */ /* b gives rep count for each type in t */ b[0] = 1; b[1] = 1; /* s gives the byte displacement for each "field" by induction: * for 0<=iextent.type) { case H5S_SCALAR: /* not yet implemented */ ret_value = FAIL; break; case H5S_SIMPLE: switch(space->select.type) { case H5S_SEL_NONE: case H5S_SEL_ALL: err = H5S_mpio_all_type( space, elmt_size, /* out: */ new_type, count, is_derived_type ); if (err<0) { HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); } break; case H5S_SEL_POINTS: /* not yet implemented */ ret_value = FAIL; break; case H5S_SEL_HYPERSLABS: err = H5S_mpio_hyper_type( space, elmt_size, /* out: */ new_type, count, is_derived_type ); if (err) { HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type"); } break; default: assert("unknown selection type" && 0); break; } /* end switch */ break; case H5S_COMPLEX: /* not yet implemented */ ret_value = FAIL; break; default: assert("unknown data space type" && 0); break; } FUNC_LEAVE (ret_value); } /* H5S_mpio_space_type() */ /*------------------------------------------------------------------------- * Function: H5S_mpio_spaces_xfer * * Purpose: Use MPI-IO to transfer data efficiently * directly between app buffer and file. * * Return: non-negative on success, negative on failure. * * Programmer: rky 980813 * * Modifications: * *------------------------------------------------------------------------- */ herr_t H5S_mpio_spaces_xfer (H5F_t *f, const struct H5O_layout_t *layout, const struct H5O_pline_t *pline, const struct H5O_efl_t *efl, size_t elmt_size, const H5S_t *file_space, const H5S_t *mem_space, const H5D_transfer_t xfer_mode, void *buf /*out*/, const hbool_t do_write ) { herr_t ret_value = SUCCEED; int err; haddr_t disp, addr; size_t mpi_count; hsize_t mpi_buf_count, mpi_unused_count; MPI_Datatype mpi_buf_type, mpi_file_type; hbool_t mbt_is_derived, mft_is_derived; FUNC_ENTER (H5S_mpio_spaces_xfer, FAIL); /* Check args */ assert (f); assert (layout); assert (file_space); assert (mem_space); assert (buf); assert (f->shared->access_parms->driver == H5F_LOW_MPIO); /* INCOMPLETE!!! rky 980816 */ /* Currently can only handle H5D_CONTIGUOUS layout */ if (layout->type != H5D_CONTIGUOUS) { #ifdef H5S_DEBUG fprintf (stderr, "H5S: can only create MPI datatype for hyperslab selection when layout is contiguous.\n" ); #endif ret_value = FAIL; goto done; } /* create the MPI buffer type */ err = H5S_mpio_space_type( mem_space, elmt_size, /* out: */ &mpi_buf_type, &mpi_buf_count, &mbt_is_derived ); if (MPI_SUCCESS != err) HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type"); /* pass the buf type to low-level write via access_parms */ f->shared->access_parms->u.mpio.btype = mpi_buf_type; /* create the MPI file type */ err = H5S_mpio_space_type( file_space, elmt_size, /* out: */ &mpi_file_type, &mpi_unused_count, &mft_is_derived ); if (MPI_SUCCESS != err) HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type"); /* pass the file type to low-level write via access_parms */ f->shared->access_parms->u.mpio.ftype = mpi_file_type; /* calculate the absolute base addr (i.e., the file view disp) */ disp = f->shared->base_addr; H5F_addr_add( &disp, &(layout->addr) ); f->shared->access_parms->u.mpio.disp = disp; #ifdef H5Smpi_DEBUG fprintf(stdout, "spaces_xfer: disp=%lld\n", (long long)disp.offset ); #endif /* Effective address determined by base addr and the MPI file type */ H5F_addr_reset( &addr ); /* set to 0 */ /* request a dataspace xfer (instead of an elementary byteblock xfer) */ f->shared->access_parms->u.mpio.use_types = 1; /* transfer the data */ mpi_count = (size_t)mpi_buf_count; if (mpi_count != mpi_buf_count) HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"transfer size overflows size_t"); if (do_write) { err = H5F_low_write( f->shared->lf, f->shared->access_parms, xfer_mode, &addr, mpi_count, buf ); if (err) { HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed"); } } else { err = H5F_low_read ( f->shared->lf, f->shared->access_parms, xfer_mode, &addr, mpi_count, buf ); if (err) { HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed"); } } /* free the MPI buf and file types */ if (mbt_is_derived) { err = MPI_Type_free( &mpi_buf_type ); if (MPI_SUCCESS != err) { HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't free MPI file type"); } } if (mft_is_derived) { err = MPI_Type_free( &mpi_file_type ); if (MPI_SUCCESS != err) { HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,"couldn't free MPI file type"); } } done: FUNC_LEAVE (ret_value); } /* H5S_mpio_spaces_xfer() */ /*------------------------------------------------------------------------- * Function: H5S_mpio_spaces_read * * Purpose: MPI-IO function to read directly from app buffer to file. * * Return: non-negative on success, negative on failure. * * Programmer: rky 980813 * * Modifications: * *------------------------------------------------------------------------- */ herr_t H5S_mpio_spaces_read (H5F_t *f, const struct H5O_layout_t *layout, const struct H5O_pline_t *pline, const struct H5O_efl_t *efl, size_t elmt_size, const H5S_t *file_space, const H5S_t *mem_space, const H5D_transfer_t xfer_mode, void *buf /*out*/ ) { herr_t ret_value = FAIL; FUNC_ENTER (H5S_mpio_spaces_read, FAIL); ret_value = H5S_mpio_spaces_xfer( f, layout, pline, efl, elmt_size, file_space, mem_space, xfer_mode, (void*)buf, 0 /*read*/ ); FUNC_LEAVE (ret_value); } /* H5S_mpio_spaces_read() */ /*------------------------------------------------------------------------- * Function: H5S_mpio_spaces_write * * Purpose: MPI-IO function to write directly from app buffer to file. * * Return: non-negative on success, negative on failure. * * Programmer: rky 980813 * * Modifications: * *------------------------------------------------------------------------- */ herr_t H5S_mpio_spaces_write(H5F_t *f, const struct H5O_layout_t *layout, const struct H5O_pline_t *pline, const struct H5O_efl_t *efl, size_t elmt_size, const H5S_t *file_space, const H5S_t *mem_space, const H5D_transfer_t xfer_mode, const void *buf ) { herr_t ret_value = FAIL; FUNC_ENTER (H5S_mpio_spaces_write, FAIL); ret_value = H5S_mpio_spaces_xfer( f, layout, pline, efl, elmt_size, file_space, mem_space, xfer_mode, (void*)buf, 1 /*write*/ ); FUNC_LEAVE (ret_value); } /* H5S_mpio_spaces_write() */ #endif /* HAVE_PARALLEL */