From d7bde16f45fac765f45172d88a1a9cd44a1f95fa Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Tue, 16 Sep 2003 12:33:00 -0500 Subject: [svn-r7480] Purpose: Bug fix Description: The MPI_File_set_size() routine on ASCI Red is not able to extend files so that they are larger than 2GB. Solution: Add an extra macro which controls whether MPI_File_set_size() can handle >2GB offsets or if our "older" way of reading a byte, then writing a byte at the appropriate offset should be used. Platforms tested: FreeBSD 4.9 (sleipnir) h5committest --- config/intel-osf1 | 4 ++ configure | 23 +++++++++ configure.in | 22 ++++++++- release_docs/RELEASE.txt | 2 + src/H5FDmpio.c | 26 +++++------ src/H5config.h.in | 6 ++- testpar/t_mdset.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++ testpar/testphdf5.c | 39 ++++++++++++++-- testpar/testphdf5.h | 2 + 9 files changed, 223 insertions(+), 19 deletions(-) diff --git a/config/intel-osf1 b/config/intel-osf1 index 196b311..69fc63a 100644 --- a/config/intel-osf1 +++ b/config/intel-osf1 @@ -141,3 +141,7 @@ ac_cv_sizeof_uint_fast32_t=${ac_cv_sizeof_uint_fast32_t='0'} ac_cv_sizeof_uint_fast64_t=${ac_cv_sizeof_uint_fast64_t='0'} hdf5_cv_system_scope_threads=${hdf5_cv_system_scope_threads="no"} + +# Hard set MPI_File_set_size() working for files over 2GB to no +hdf5_cv_mpi_file_set_size_big=${hdf5_cv_mpi_file_set_size_big='no'} + diff --git a/configure b/configure index 1a28d9c..bafd73b 100755 --- a/configure +++ b/configure @@ -32989,6 +32989,29 @@ echo "${ECHO_T}yes" >&6 echo "$as_me:$LINENO: result: no" >&5 echo "${ECHO_T}no" >&6 fi + + echo "$as_me:$LINENO: checking if MPI_File_set_size works for files over 2GB" >&5 +echo $ECHO_N "checking if MPI_File_set_size works for files over 2GB... $ECHO_C" >&6 + if test "${hdf5_cv_mpi_file_set_size_big+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + hdf5_cv_mpi_file_set_size_big=yes +fi + + + if test ${hdf5_cv_mpi_file_set_size_big} = "yes"; then + +cat >>confdefs.h <<\_ACEOF +#define MPI_FILE_SET_SIZE_BIG 1 +_ACEOF + + echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 + fi + fi diff --git a/configure.in b/configure.in index 363a00d..2f83597 100644 --- a/configure.in +++ b/configure.in @@ -1683,7 +1683,7 @@ int main(void) if test ${hdf5_cv_malloc_works} = "yes"; then AC_DEFINE([MALLOC_WORKS], [1], - [Define if your system has a working \`malloc' function.]) + [Define if your system's \`malloc' function returns a valid pointer for 0-byte allocations.]) AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no]) @@ -2296,8 +2296,26 @@ dnl else AC_MSG_RESULT(no) fi -fi + dnl ---------------------------------------------------------------------- + dnl Set the flag to indicate that the MPI_File_set_size() function + dnl works with files over 2GB, unless it's already set in the cache. + dnl (This flag should be set for all machines, except for ASCI Red, where + dnl the cache value is set in it's config file) + dnl + AC_MSG_CHECKING([if MPI_File_set_size works for files over 2GB]) + AC_CACHE_VAL([hdf5_cv_mpi_file_set_size_big], [hdf5_cv_mpi_file_set_size_big=yes]) + + if test ${hdf5_cv_mpi_file_set_size_big} = "yes"; then + AC_DEFINE([MPI_FILE_SET_SIZE_BIG], [1], + [Define if your system's \`MPI_File_set_size' function works for files over 2GB.]) + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi + +fi + dnl ---------------------------------------------------------------------- dnl Turn on internal I/O filters by setting macros in header files dnl Internal I/O filters are contained entirely within the library and do diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index b501cb6..39cb855 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -74,6 +74,8 @@ Bug Fixes since HDF5-1.6.0 release Library ------- + - Revert changes which caused files >2GB to fail when created with + MPI-I/O file driver on certain platforms. QAK - 2003/09/16 - Allow compound datatypes to grow in size. SLU - 2003/09/10 - Detect if a type is already packed before attempting to pack it again or check if it is locked. SLU - 2003/09/10 diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 503641a..5bd714b 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -2026,10 +2026,10 @@ H5FD_mpio_flush(H5FD_t *_file, hid_t UNUSED dxpl_id, unsigned closing) int mpi_code; /* mpi return code */ MPI_Offset mpi_off; herr_t ret_value=SUCCEED; -#ifdef OLD_WAY +#ifndef H5_MPI_FILE_SET_SIZE_BIG uint8_t byte=0; MPI_Status mpi_stat; -#endif /* OLD_WAY */ +#endif /* H5_MPI_FILE_SET_SIZE_BIG */ FUNC_ENTER_NOAPI(H5FD_mpio_flush, FAIL) @@ -2040,17 +2040,24 @@ H5FD_mpio_flush(H5FD_t *_file, hid_t UNUSED dxpl_id, unsigned closing) assert(file); assert(H5FD_MPIO==file->pub.driver_id); -#ifdef OLD_WAY +#ifndef H5_MPI_FILE_SET_SIZE_BIG /* Portably initialize MPI status variable */ HDmemset(&mpi_stat,0,sizeof(MPI_Status)); -#endif /* OLD_WAY */ +#endif /* H5_MPI_FILE_SET_SIZE_BIG */ /* Extend the file to make sure it's large enough, then sync. * Unfortunately, keeping track of EOF is an expensive operation, so * we can't just check whether EOFeoa>file->last_eoa) { -#ifdef OLD_WAY +#ifdef H5_MPI_FILE_SET_SIZE_BIG + if (H5FD_mpio_haddr_to_MPIOff(file->eoa, &mpi_off)<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset") + + /* Extend the file's size */ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(file->f, mpi_off))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code) +#else /* H5_MPI_FILE_SET_SIZE_BIG */ if (0==file->mpi_rank) { if (H5FD_mpio_haddr_to_MPIOff(file->eoa-1, &mpi_off)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset") @@ -2059,13 +2066,7 @@ H5FD_mpio_flush(H5FD_t *_file, hid_t UNUSED dxpl_id, unsigned closing) if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code) } /* end if */ -#else /* OLD_WAY */ - if (H5FD_mpio_haddr_to_MPIOff(file->eoa, &mpi_off)<0) - HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset") - - /* Extend the file's size */ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(file->f, mpi_off))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code) +#endif /* H5_MPI_FILE_SET_SIZE_BIG */ /* Don't let any proc return until all have extended the file. * (Prevents race condition where some processes go ahead and write @@ -2075,7 +2076,6 @@ H5FD_mpio_flush(H5FD_t *_file, hid_t UNUSED dxpl_id, unsigned closing) */ if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) -#endif /* OLD_WAY */ /* Update the 'last' eoa value */ file->last_eoa=file->eoa; diff --git a/src/H5config.h.in b/src/H5config.h.in index 083f2ee..ea14802 100644 --- a/src/H5config.h.in +++ b/src/H5config.h.in @@ -352,6 +352,10 @@ /* Define if your system has a working `malloc' function. */ #undef MALLOC_WORKS +/* Define if your system's `MPI_File_set_size' function works for files over + 2GB. */ +#undef MPI_FILE_SET_SIZE_BIG + /* Define if shared writing must be disabled (CodeWarrior only) */ #undef NO_SHARED_WRITING @@ -370,7 +374,7 @@ /* Define to the version of this package. */ #undef PACKAGE_VERSION -/* Width for printf() for type `long long' or `__int64', us. `ll' */ +/* Width for printf() for type `long long' or `__int64', use `ll' */ #undef PRINTF_LL_WIDTH /* The size of a `char', as computed by sizeof. */ diff --git a/testpar/t_mdset.c b/testpar/t_mdset.c index b8bf4b6..3d40307 100644 --- a/testpar/t_mdset.c +++ b/testpar/t_mdset.c @@ -211,6 +211,124 @@ void compact_dataset(char *filename) H5Fclose(iof); } +/* Example of using PHDF5 to create "large" datasets. (>2GB, >4GB, >8GB) + * Actual data is _not_ written to these datasets. Dataspaces are exact + * sizes (2GB, 4GB, etc.), but the metadata for the file pushes the file over + * the boundary of interest. + */ +void big_dataset(const char *filename) +{ + int mpi_size, mpi_rank; /* MPI info */ + hbool_t use_gpfs = FALSE; /* Don't use GPFS stuff for this test */ + hid_t iof, /* File ID */ + fapl, /* File access property list ID */ + dataset, /* Dataset ID */ + filespace; /* Dataset's dataspace ID */ + hsize_t file_dims [4]; /* Dimensions of dataspace */ + char dname[]="dataset"; /* Name of dataset */ + MPI_Offset file_size; /* Size of file on disk */ + herr_t ret; /* Generic return value */ + + MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank); + MPI_Comm_size (MPI_COMM_WORLD, &mpi_size); + + VRFY((mpi_size <= SIZE), "mpi_size <= SIZE"); + + fapl = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type, use_gpfs); + VRFY((fapl >= 0), "create_faccess_plist succeeded"); + + /* + * Create >2GB HDF5 file + */ + iof = H5Fcreate (filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl); + VRFY((iof >= 0), "H5Fcreate succeeded"); + + /* Define dataspace for 2GB dataspace */ + file_dims[0]= 2; + file_dims[1]= 1024; + file_dims[2]= 1024; + file_dims[3]= 1024; + filespace = H5Screate_simple (4, file_dims, NULL); + VRFY((filespace >= 0), "H5Screate_simple succeeded"); + + dataset = H5Dcreate (iof, dname, H5T_NATIVE_UCHAR, filespace, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate succeeded"); + + /* Close all file objects */ + ret=H5Dclose (dataset); + VRFY((ret >= 0), "H5Dclose succeeded"); + ret=H5Sclose (filespace); + VRFY((ret >= 0), "H5Sclose succeeded"); + ret=H5Fclose (iof); + VRFY((ret >= 0), "H5Fclose succeeded"); + + /* Check that file of the correct size was created */ + file_size=h5_mpi_get_file_size(filename, MPI_COMM_WORLD, MPI_INFO_NULL); + VRFY((file_size == 2147485696ULL), "File is correct size"); + + /* + * Create >4GB HDF5 file + */ + iof = H5Fcreate (filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl); + VRFY((iof >= 0), "H5Fcreate succeeded"); + + /* Define dataspace for 4GB dataspace */ + file_dims[0]= 4; + file_dims[1]= 1024; + file_dims[2]= 1024; + file_dims[3]= 1024; + filespace = H5Screate_simple (4, file_dims, NULL); + VRFY((filespace >= 0), "H5Screate_simple succeeded"); + + dataset = H5Dcreate (iof, dname, H5T_NATIVE_UCHAR, filespace, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate succeeded"); + + /* Close all file objects */ + ret=H5Dclose (dataset); + VRFY((ret >= 0), "H5Dclose succeeded"); + ret=H5Sclose (filespace); + VRFY((ret >= 0), "H5Sclose succeeded"); + ret=H5Fclose (iof); + VRFY((ret >= 0), "H5Fclose succeeded"); + + /* Check that file of the correct size was created */ + file_size=h5_mpi_get_file_size(filename, MPI_COMM_WORLD, MPI_INFO_NULL); + VRFY((file_size == 4294969344ULL), "File is correct size"); + + /* + * Create >8GB HDF5 file + */ + iof = H5Fcreate (filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl); + VRFY((iof >= 0), "H5Fcreate succeeded"); + + /* Define dataspace for 8GB dataspace */ + file_dims[0]= 8; + file_dims[1]= 1024; + file_dims[2]= 1024; + file_dims[3]= 1024; + filespace = H5Screate_simple (4, file_dims, NULL); + VRFY((filespace >= 0), "H5Screate_simple succeeded"); + + dataset = H5Dcreate (iof, dname, H5T_NATIVE_UCHAR, filespace, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate succeeded"); + + /* Close all file objects */ + ret=H5Dclose (dataset); + VRFY((ret >= 0), "H5Dclose succeeded"); + ret=H5Sclose (filespace); + VRFY((ret >= 0), "H5Sclose succeeded"); + ret=H5Fclose (iof); + VRFY((ret >= 0), "H5Fclose succeeded"); + + /* Check that file of the correct size was created */ + file_size=h5_mpi_get_file_size(filename, MPI_COMM_WORLD, MPI_INFO_NULL); + VRFY((file_size == 8589936640ULL), "File is correct size"); + + /* Close fapl */ + ret=H5Pclose (fapl); + VRFY((ret >= 0), "H5Pclose succeeded"); +} + /* Write multiple groups with a chunked dataset in each group collectively. * These groups and datasets are for testing independent read later. */ diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index 6a4a0b2..ff44424 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -44,9 +44,10 @@ int doread=1; /* read test */ int dowrite=1; /* write test */ int docompact=1; /* compact dataset test */ int doindependent=1; /* independent test */ +unsigned dobig=1; /* "big" dataset tests */ /* FILENAME and filenames must have the same number of names */ -const char *FILENAME[8]={ +const char *FILENAME[9]={ "ParaEg1", "ParaEg2", "ParaEg3", @@ -54,8 +55,9 @@ const char *FILENAME[8]={ "ParaMgroup", "ParaCompact", "ParaIndividual", + "ParaBig", NULL}; -char filenames[8][PATH_MAX]; +char filenames[9][PATH_MAX]; hid_t fapl; /* file access property list */ #ifdef USE_PAUSE @@ -177,6 +179,8 @@ parse_options(int argc, char **argv) break; case 'i': doindependent = 0; break; + case 'b': dobig = 0; + break; case 'v': verbose = 1; break; case 'f': if (--argc < 1) { @@ -326,6 +330,27 @@ create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type, return (ret_pl); } +/* + * Check the size of a file using MPI routines + */ +MPI_Offset +h5_mpi_get_file_size(const char *filename, MPI_Comm comm, MPI_Info info) +{ + MPI_File fh; /* MPI file handle */ + MPI_Offset size=0; /* File size to return */ + + if (MPI_SUCCESS != MPI_File_open(comm, (char*)filename, MPI_MODE_RDONLY, info, &fh)) + goto done; + + if (MPI_SUCCESS != (MPI_File_get_size(fh, &size))) + goto done; + + if (MPI_SUCCESS != MPI_File_close(&fh)) + size=0; + +done: + return(size); +} int main(int argc, char **argv) { @@ -445,7 +470,15 @@ int main(int argc, char **argv) MPI_BANNER("Independent test skipped"); } - if (!(dowrite || doread || ndatasets || ngroups || docompact)){ + if (dobig && sizeof(MPI_Offset)>4){ + MPI_BANNER("big dataset test..."); + big_dataset(filenames[7]); + } + else { + MPI_BANNER("big dataset test skipped"); + } + + if (!(dowrite || doread || ndatasets || ngroups || docompact || doindependent || dobig)){ usage(); nerrors++; } diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index afbce3e..b6f5dc2 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -123,6 +123,7 @@ extern int facc_type; /*Test file access type */ /* prototypes */ hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type, hbool_t use_gpfs); +MPI_Offset h5_mpi_get_file_size(const char *filename, MPI_Comm comm, MPI_Info info); void multiple_dset_write(char *filename, int ndatasets); void multiple_group_write(char *filename, int ngroups); void multiple_group_read(char *filename, int ngroups); @@ -140,6 +141,7 @@ void dataset_readAll(char *filename); void extend_readInd(char *filename); void extend_readAll(char *filename); void compact_dataset(char *filename); +void big_dataset(const char *filename); int dataset_vrfy(hssize_t start[], hsize_t count[], hsize_t stride[], hsize_t block[], DATATYPE *dataset, DATATYPE *original); #endif /* PHDF5TEST_H */ -- cgit v0.12