diff options
-rw-r--r-- | src/H5Fmpio.c | 262 |
1 files changed, 210 insertions, 52 deletions
diff --git a/src/H5Fmpio.c b/src/H5Fmpio.c index f71b653..7647031 100644 --- a/src/H5Fmpio.c +++ b/src/H5Fmpio.c @@ -99,10 +99,10 @@ static H5F_low_t *H5F_mpio_open(const char *name, const H5F_access_t *access_parms, uintn flags, H5F_search_t *key/*out*/); static herr_t H5F_mpio_close(H5F_low_t *lf, const H5F_access_t *access_parms); -static herr_t H5F_mpio_read(H5F_low_t *lf, const H5F_access_t *access_parms, +static herr_t H5F_mpio_read(H5F_low_t *lf, H5F_access_t *access_parms, const H5D_transfer_t xfer_mode, const haddr_t *addr, size_t size, uint8 *buf/*out*/); -static herr_t H5F_mpio_write(H5F_low_t *lf, const H5F_access_t *access_parms, +static herr_t H5F_mpio_write(H5F_low_t *lf, H5F_access_t *access_parms, const H5D_transfer_t xfer_mode, const haddr_t *addr, size_t size, const uint8 *buf); static herr_t H5F_mpio_flush(H5F_low_t *lf, const H5F_access_t *access_parms); @@ -113,8 +113,21 @@ const H5F_low_class_t H5F_LOW_MPIO_g[1] = {{ H5F_mpio_access, /*access method */ H5F_mpio_open, /*open method */ H5F_mpio_close, /*close method */ + + /* rky 980816 + * this is ugly, but removing the const modifier from access_parms + * in the parameter list of the write function in H5F_low_class_t + * would propagate to a lot of functions that don't change that param */ + (int(*)(struct H5F_low_t *lf, const H5F_access_t *access_parms, const H5D_transfer_t xfer_mode, const haddr_t *addr, size_t size, uint8 *buf)) H5F_mpio_read, /*read method */ + + /* rky 980816 + * this is ugly, but removing the const modifier from access_parms + * in the parameter list of the write function in H5F_low_class_t + * would propagate to a lot of functions that don't change that param */ + (int(*)(struct H5F_low_t *lf, const H5F_access_t *access_parms, const H5D_transfer_t xfer_mode, const haddr_t *addr, size_t size, const uint8 *buf)) H5F_mpio_write, /*write method */ + H5F_mpio_flush, /*flush method */ NULL, /*extend method */ NULL, /*alloc method */ @@ -207,9 +220,9 @@ H5F_mpio_access(const char *name, const H5F_access_t *access_parms, int mode, /* (char*) name is okay since MPI_File_open will not change it. */ mpierr = MPI_File_open(access_parms->u.mpio.comm, (char*) name, mpi_mode, access_parms->u.mpio.info, &fh ); - if (mpierr == MPI_SUCCESS) { + if (MPI_SUCCESS == mpierr) { mpierr = MPI_File_close( &fh ); - if (mpierr != MPI_SUCCESS) + if (MPI_SUCCESS != mpierr) HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_File_close failed"); ret_val = TRUE; } else if (mode == F_OK) { @@ -217,9 +230,9 @@ H5F_mpio_access(const char *name, const H5F_access_t *access_parms, int mode, mpierr = MPI_File_open(access_parms->u.mpio.comm, (char*)name, MPI_MODE_WRONLY, access_parms->u.mpio.info, &fh ); - if (mpierr == MPI_SUCCESS) { + if (MPI_SUCCESS == mpierr) { mpierr = MPI_File_close( &fh ); - if (mpierr != MPI_SUCCESS) + if (MPI_SUCCESS != mpierr) HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_File_close failed"); ret_val = TRUE; } @@ -317,7 +330,7 @@ H5F_mpio_open(const char *name, const H5F_access_t *access_parms, uintn flags, #endif mpierr = MPI_File_open(access_parms->u.mpio.comm, (char*)name, mpi_amode, access_parms->u.mpio.info, &fh); - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_Error_string( mpierr, mpierrmsg, &msglen ); HRETURN_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, mpierrmsg ); } @@ -325,7 +338,7 @@ H5F_mpio_open(const char *name, const H5F_access_t *access_parms, uintn flags, /* truncate the file, if requested */ if (flags&H5F_ACC_TRUNC) { mpierr = MPI_File_set_size( fh, (MPI_Offset)0 ); - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_File_close( &fh ); HRETURN_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, "MPI_File_set_size failed trying to truncate file" ); @@ -340,7 +353,7 @@ H5F_mpio_open(const char *name, const H5F_access_t *access_parms, uintn flags, lf->u.mpio.f = fh; H5F_addr_reset(&(lf->eof)); mpierr = MPI_File_get_size( fh, &size ); - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_File_close( &(lf->u.mpio.f) ); MPI_Error_string( mpierr, mpierrmsg, &msglen ); HRETURN_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, mpierrmsg ); @@ -411,7 +424,7 @@ H5F_mpio_close(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms) mpierr = MPI_File_close( &(lf->u.mpio.f) ); /* MPI_File_close sets lf->u.mpio.f to MPI_FILE_NULL */ - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_Error_string( mpierr, mpierrmsg, &msglen ); HRETURN_ERROR(H5E_IO, H5E_CLOSEERROR, FAIL, mpierrmsg ); } @@ -426,20 +439,28 @@ H5F_mpio_close(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms) /*------------------------------------------------------------------------- * Function: H5F_mpio_read * - * Purpose: Reads SIZE bytes beginning at address ADDR in file LF and - * places them in buffer BUF. Reading past the logical or - * physical end of file returns zeros instead of failing. + * Purpose: Depending on a field in access params, either: + * - Writes SIZE bytes from the beginning of BUF into file LF + * at file address ADDR. + * - Reads SIZE bytes beginning at address ADDR in file LF + * and places them in buffer BUF. + * - Uses the (potentially complex) file and buffer types + * to effect the transfer. + * This can allow MPI to coalesce requests from + * different processes (collective or independent). + * + * Reading past the end of the MPI file + * returns zeros instead of failing. * * Errors: * IO READERROR MPI_File_read_at failed. * IO READERROR MPI_Get_count failed * * Return: Success: SUCCEED - * * Failure: FAIL + * (use_types and old_use_types in the access params are altered) * - * Programmer: - * January 30, 1998 + * Programmer: rky 980130 * * Modifications: * @@ -452,19 +473,24 @@ H5F_mpio_close(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms) * Albert Cheng, June 1, 1998 * Added xfer_mode to control independent or collective MPI read. * + * rky 980816 + * Use btype, ftype, and disp from access parms. + * The guts of H5F_mpio_read and H5F_mpio_write + * should be replaced by a single dual-purpose routine. + * *------------------------------------------------------------------------- */ static herr_t -H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, +H5F_mpio_read(H5F_low_t *lf, H5F_access_t *access_parms, const H5D_transfer_t xfer_mode, const haddr_t *addr, size_t size, uint8 *buf/*out*/) { - MPI_Offset mpi_off; - int size_i, bytes_read, n; - MPI_Status mpi_stat; - int mpierr; - char mpierrmsg[MPI_MAX_ERROR_STRING]; - int msglen; + MPI_Offset mpi_off, mpi_disp; + MPI_Status mpi_stat; + MPI_Datatype buf_type, file_type; + int mpierr, msglen, size_i, bytes_read, n; + int use_types_this_time, used_types_last_time; + char mpierrmsg[MPI_MAX_ERROR_STRING]; FUNC_ENTER(H5F_mpio_read, FAIL); #ifdef H5Fmpio_DEBUG @@ -472,14 +498,14 @@ H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, fprintf(stdout, "Entering H5F_mpio_read\n" ); #endif - /* numeric conversion of offset and size */ + /* some numeric conversions */ if (SUCCEED != H5F_haddr_to_MPIOff( *addr, &mpi_off )) { - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, + HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, "couldn't convert addr to MPIOffset" ); } size_i = (int)size; if (size_i != size) { /* check type conversion */ - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, + HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, "couldn't convert size to int" ); } @@ -489,12 +515,53 @@ H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, mpi_off, size_i ); #endif + /* Set up for a fancy xfer using complex types, or single byte block. + * We wouldn't need to rely on the use_types field + * if MPI semantics allowed us to test that btype=ftype=MPI_BYTE + * (or even MPI_TYPE_NULL, which could mean "use MPI_BYTE" by convention). + */ + use_types_this_time = access_parms->u.mpio.use_types; + if (access_parms->u.mpio.use_types) { + /* prepare for a full-blown xfer using btype, ftype, and disp */ + buf_type = access_parms->u.mpio.btype; + file_type = access_parms->u.mpio.ftype; + if (SUCCEED != + H5F_haddr_to_MPIOff( access_parms->u.mpio.disp, &mpi_disp)) { + HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, "couldn't convert addr to MPIOffset" ); + } + } else { + /* Prepare for a simple xfer of a contiguous block of bytes. + * The btype, ftype, and disp fields are not used. */ + buf_type = MPI_BYTE; + file_type = MPI_BYTE; + mpi_disp = 0; /* mpi_off is sufficient */ + } + + /* Don't bother to reset the view if we're not using the types this time, + * and did we didn't use them last time either. */ + used_types_last_time = access_parms->u.mpio.old_use_types; + if (used_types_last_time /* change to new ftype or MPI_BYTE */ + || use_types_this_time) /* almost certainly a different ftype */ { + mpierr = MPI_File_set_view( lf->u.mpio.f, mpi_disp, + MPI_BYTE, file_type, + "native", access_parms->u.mpio.info ); + if (MPI_SUCCESS != mpierr) { + MPI_Error_string( mpierr, mpierrmsg, &msglen ); + HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); + } + } + /* We always set the use_types flag to 0 because the + * default is not to use types next time, + * unless someone explicitly requests it by setting this flag to !=0. */ + access_parms->u.mpio.old_use_types = use_types_this_time; + access_parms->u.mpio.use_types = 0; + /* Read the data. */ switch (xfer_mode){ case H5D_XFER_INDEPENDENT: case H5D_XFER_DFLT: mpierr = MPI_File_read_at ( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, MPI_BYTE, &mpi_stat ); + size_i, buf_type, &mpi_stat ); break; case H5D_XFER_COLLECTIVE: @@ -502,13 +569,13 @@ H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, printf("%s: using MPIO collective mode\n", FUNC); #endif mpierr = MPI_File_read_at_all ( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, MPI_BYTE, &mpi_stat ); + size_i, buf_type, &mpi_stat ); break; default: HRETURN_ERROR(H5E_IO, H5E_BADVALUE, FAIL, "invalid file access mode"); } - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_Error_string( mpierr, mpierrmsg, &msglen ); HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, mpierrmsg ); } @@ -521,9 +588,9 @@ H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, "In H5F_mpio_read after Get_count size_i=%d bytes_read=%d\n", size_i, bytes_read ); #endif - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, mpierrmsg ); + HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); } #define MPI_KLUGE0202 @@ -538,9 +605,18 @@ H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, "MPI_Get_count returned invalid count" ); } - /* read zeroes past the end of the file */ - if ((n=(size_i-bytes_read)) > 0) { - HDmemset( buf+bytes_read, 0, (size_t)n ); + /* beyond end of file, pretend we're reading zeroes */ + if (use_types_this_time) { + /* BUG rky 980816 Not implemented yet. How to do it??? */ + HRETURN_ERROR(H5E_IO, H5E_UNSUPPORTED, FAIL, + "haven't implemented reading zeroes beyond end of file" ); + } else { + /* BUG??? rky 980816 + * This gives us zeroes beyond end of physical MPI file. + * What about reading past logical end of HDF5 file??? */ + if ((n=(size_i-bytes_read)) > 0) { + HDmemset( buf+bytes_read, 0, (size_t)n ); + } } #ifdef H5Fmpio_DEBUG @@ -548,20 +624,25 @@ H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, fprintf(stdout, "Leaving H5F_mpio_read\n" ); #endif FUNC_LEAVE(SUCCEED); -} +} /* H5F_mpio_read */ /*------------------------------------------------------------------------- * Function: H5F_mpio_write * - * Purpose: Writes SIZE bytes from the beginning of BUF into file LF at - * file address ADDR. + * Purpose: Depending on a field in access params, either: + * - Writes SIZE bytes from the beginning of BUF into file LF + * at file address ADDR. + * - Uses the (potentially complex) file and buffer types + * to effect the transfer. + * This can allow MPI to coalesce requests from + * different processes (collective or independent). * * Errors: * IO WRITEERROR MPI_File_write_at failed. * * Return: Success: SUCCEED - * * Failure: FAIL + * (use_types and old_use_types in the access params are altered) * * Programmer: * January 30, 1998 @@ -580,17 +661,24 @@ H5F_mpio_read(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, * Albert Cheng, June 1, 1998 * Added xfer_mode to control independent or collective MPI write. * + * rky 980816 + * Use btype, ftype, and disp from access parms. + * The guts of H5F_mpio_read and H5F_mpio_write + * should be replaced by a single dual-purpose routine. + * *------------------------------------------------------------------------- */ static herr_t -H5F_mpio_write(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, +H5F_mpio_write(H5F_low_t *lf, H5F_access_t *access_parms, const H5D_transfer_t xfer_mode, const haddr_t *addr, size_t size, const uint8 *buf) { - MPI_Offset mpi_off; - MPI_Status mpi_stat; - int mpierr, msglen, size_i; - char mpierrmsg[MPI_MAX_ERROR_STRING]; + MPI_Offset mpi_off, mpi_disp; + MPI_Status mpi_stat; + MPI_Datatype buf_type, file_type; + int mpierr, msglen, size_i, bytes_written; + int use_types_this_time, used_types_last_time; + char mpierrmsg[MPI_MAX_ERROR_STRING]; FUNC_ENTER(H5F_mpio_write, FAIL); #ifdef H5Fmpio_DEBUG @@ -598,14 +686,18 @@ H5F_mpio_write(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, fprintf(stdout, "Entering H5F_mpio_write\n" ); #endif - /* numeric conversion of offset and size */ + /* some numeric conversions */ if (SUCCEED != H5F_haddr_to_MPIOff( *addr, &mpi_off )) { - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, + HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, + "couldn't convert addr to MPIOffset" ); + } + if (SUCCEED!=H5F_haddr_to_MPIOff( access_parms->u.mpio.disp, &mpi_disp)) { + HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, "couldn't convert addr to MPIOffset" ); } size_i = (int)size; if (size_i != size) { /* check type conversion */ - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, + HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, "couldn't convert size to int" ); } @@ -615,12 +707,53 @@ H5F_mpio_write(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, mpi_off, size_i ); #endif + /* Set up for a fancy xfer using complex types, or single byte block. + * We wouldn't need to rely on the use_types field + * if MPI semantics allowed us to test that btype=ftype=MPI_BYTE + * (or even MPI_TYPE_NULL, which could mean "use MPI_BYTE" by convention). + */ + use_types_this_time = access_parms->u.mpio.use_types; + if (use_types_this_time) { + /* prepare for a full-blown xfer using btype, ftype, and disp */ + buf_type = access_parms->u.mpio.btype; + file_type = access_parms->u.mpio.ftype; + if (SUCCEED != + H5F_haddr_to_MPIOff( access_parms->u.mpio.disp, &mpi_disp)) { + HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, "couldn't convert addr to MPIOffset" ); + } + } else { + /* Prepare for a simple xfer of a contiguous block of bytes. + * The btype, ftype, and disp fields are not used. */ + buf_type = MPI_BYTE; + file_type = MPI_BYTE; + mpi_disp = 0; /* mpi_off is sufficient */ + } + + /* Don't bother to reset the view if we're not using the types this time, + * and did we didn't use them last time either. */ + used_types_last_time = access_parms->u.mpio.old_use_types; + if (used_types_last_time /* change to new ftype or MPI_BYTE */ + || use_types_this_time) /* almost certainly a different ftype */ { + mpierr = MPI_File_set_view( lf->u.mpio.f, mpi_disp, + MPI_BYTE, file_type, + "native", access_parms->u.mpio.info ); + if (MPI_SUCCESS != mpierr) { + MPI_Error_string( mpierr, mpierrmsg, &msglen ); + HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); + } + } + /* We always set the use_types flag to 0 because the + * default is not to use types next time, + * unless someone explicitly requests it by setting this flag to !=0. */ + access_parms->u.mpio.old_use_types = use_types_this_time; + access_parms->u.mpio.use_types = 0; + /* Write the data. */ switch (xfer_mode){ case H5D_XFER_INDEPENDENT: case H5D_XFER_DFLT: mpierr = MPI_File_write_at ( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, MPI_BYTE, &mpi_stat ); + size_i, buf_type, &mpi_stat ); break; case H5D_XFER_COLLECTIVE: @@ -628,15 +761,40 @@ H5F_mpio_write(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, printf("%s: using MPIO collective mode\n", FUNC); #endif mpierr = MPI_File_write_at_all( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, MPI_BYTE, &mpi_stat ); + size_i, buf_type, &mpi_stat ); break; default: HRETURN_ERROR(H5E_IO, H5E_BADVALUE, FAIL, "invalid file access mode"); } - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, mpierrmsg ); + HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, mpierrmsg ); + } + + /* How many bytes were actually written? */ + mpierr = MPI_Get_count( &mpi_stat, MPI_BYTE, &bytes_written ); +#ifdef H5Fmpio_DEBUG + if (H5F_mpio_Debug[(int)'c']) + fprintf(stdout, + "In H5F_mpio_write after Get_count size_i=%d bytes_written=%d\n", + size_i, bytes_written ); +#endif + if (MPI_SUCCESS != mpierr) { + MPI_Error_string( mpierr, mpierrmsg, &msglen ); + HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); + } + +#define MPI_KLUGE0202 +#ifdef MPI_KLUGE0202 + /* KLUGE rky 980202 MPI_Get_count incorrectly returns negative count; + fake a complete write */ + bytes_written = size_i; /* KLUGE rky 980202 */ +#endif + + if ((bytes_written<0) || (bytes_written > size_i)) { + HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, + "MPI_Get_count returned invalid count" ); } #ifdef H5Fmpio_DEBUG @@ -644,7 +802,7 @@ H5F_mpio_write(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms, fprintf(stdout, "Leaving H5F_mpio_write\n" ); #endif FUNC_LEAVE(SUCCEED); -} +} /* H5F_mpio_write */ /*------------------------------------------------------------------------- * Function: H5F_mpio_flush @@ -682,7 +840,7 @@ H5F_mpio_flush(H5F_low_t *lf, const H5F_access_t __unused__ *access_parms) #endif mpierr = MPI_File_sync( lf->u.mpio.f ); - if (mpierr != MPI_SUCCESS) { + if (MPI_SUCCESS != mpierr) { MPI_Error_string( mpierr, mpierrmsg, &msglen ); HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, mpierrmsg ); } |