From ea1714b3035b9de6cac885508c9f4d882c817b3f Mon Sep 17 00:00:00 2001 From: Glenn Song <43005495+glennsong09@users.noreply.github.com> Date: Tue, 24 Oct 2023 12:51:55 -0500 Subject: Fix H5Pset_evict_on_close failing regardless of actual parallel use (#3761) Allow H5Pset_evict_on_close to be called regardless of whether a parallel build of HDF5 is being used Fail during file opens if H5Pset_evict_on_close has been set to true on the given File Access Property List and the size of the MPI communicator being used is greater than 1 --- src/H5Fint.c | 18 ++++++++-- src/H5Pfapl.c | 7 +--- test/evict_on_close.c | 92 --------------------------------------------------- testpar/t_file.c | 59 +++++++++++++++++++++++++++++++++ testpar/testphdf5.c | 3 ++ testpar/testphdf5.h | 1 + 6 files changed, 80 insertions(+), 100 deletions(-) diff --git a/src/H5Fint.c b/src/H5Fint.c index 014f619..4093b4b 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -1968,6 +1968,22 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id) HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get minimum raw data fraction of page buffer"); } /* end if */ + /* Get the evict on close setting */ + if (H5P_get(a_plist, H5F_ACS_EVICT_ON_CLOSE_FLAG_NAME, &evict_on_close) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get evict on close value"); + +#ifdef H5_HAVE_PARALLEL + /* Check for evict on close in parallel (currently unsupported) */ + assert(file->shared); + if (H5F_SHARED_HAS_FEATURE(file->shared, H5FD_FEAT_HAS_MPI)) { + int mpi_size = H5F_shared_mpi_get_size(file->shared); + + if ((mpi_size > 1) && evict_on_close) + HGOTO_ERROR(H5E_FILE, H5E_UNSUPPORTED, NULL, + "evict on close is currently not supported in parallel HDF5"); + } +#endif + /* * Read or write the file superblock, depending on whether the file is * empty or not. @@ -2046,8 +2062,6 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id) * or later, verify that the access property list value matches the value * in shared file structure. */ - if (H5P_get(a_plist, H5F_ACS_EVICT_ON_CLOSE_FLAG_NAME, &evict_on_close) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get evict on close value"); if (shared->nrefs == 1) shared->evict_on_close = evict_on_close; else if (shared->nrefs > 1) { diff --git a/src/H5Pfapl.c b/src/H5Pfapl.c index dc122af..e7c1fb3 100644 --- a/src/H5Pfapl.c +++ b/src/H5Pfapl.c @@ -4848,7 +4848,7 @@ H5P__facc_mdc_log_location_close(const char H5_ATTR_UNUSED *name, size_t H5_ATTR *------------------------------------------------------------------------- */ herr_t -H5Pset_evict_on_close(hid_t fapl_id, hbool_t H5_ATTR_PARALLEL_UNUSED evict_on_close) +H5Pset_evict_on_close(hid_t fapl_id, hbool_t evict_on_close) { H5P_genplist_t *plist; /* property list pointer */ herr_t ret_value = SUCCEED; /* return value */ @@ -4864,14 +4864,9 @@ H5Pset_evict_on_close(hid_t fapl_id, hbool_t H5_ATTR_PARALLEL_UNUSED evict_on_cl if (NULL == (plist = (H5P_genplist_t *)H5I_object(fapl_id))) HGOTO_ERROR(H5E_ID, H5E_BADID, FAIL, "can't find object for ID"); -#ifndef H5_HAVE_PARALLEL /* Set value */ if (H5P_set(plist, H5F_ACS_EVICT_ON_CLOSE_FLAG_NAME, &evict_on_close) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set evict on close property"); -#else - HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, - "evict on close is currently not supported in parallel HDF5"); -#endif /* H5_HAVE_PARALLEL */ done: FUNC_LEAVE_API(ret_value) diff --git a/test/evict_on_close.c b/test/evict_on_close.c index 9ca7f9f..db2a962 100644 --- a/test/evict_on_close.c +++ b/test/evict_on_close.c @@ -32,12 +32,6 @@ #include "H5Ipkg.h" #include "H5VLprivate.h" /* Virtual Object Layer */ -/* Evict on close is not supported under parallel at this time. - * In the meantime, we just run a simple check that EoC can't be - * enabled in parallel HDF5. - */ -#ifndef H5_HAVE_PARALLEL - /* Uncomment to manually inspect cache states */ /* (Requires debug build of the library) */ /* #define EOC_MANUAL_INSPECTION */ @@ -974,89 +968,3 @@ error: exit(EXIT_FAILURE); } /* end main() */ - -#else - -/*------------------------------------------------------------------------- - * Function: check_evict_on_close_parallel_fail() - * - * Purpose: Verify that the H5Pset_evict_on_close() call fails in - * parallel HDF5. - * - * Return: SUCCEED/FAIL - * - *------------------------------------------------------------------------- - */ -static herr_t -check_evict_on_close_parallel_fail(void) -{ - hid_t fapl_id = H5I_INVALID_HID; - bool evict_on_close; - herr_t status; - - TESTING("evict on close fails in parallel"); - - /* Create a fapl */ - if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) - TEST_ERROR; - - /* Set the evict on close property (should fail)*/ - evict_on_close = true; - H5E_BEGIN_TRY - { - status = H5Pset_evict_on_close(fapl_id, evict_on_close); - } - H5E_END_TRY - if (status >= 0) - FAIL_PUTS_ERROR("H5Pset_evict_on_close() did not fail in parallel HDF5."); - - /* close fapl */ - if (H5Pclose(fapl_id) < 0) - TEST_ERROR; - - PASSED(); - return SUCCEED; - -error: - H5_FAILED(); - return FAIL; - -} /* check_evict_on_close_parallel_fail() */ - -/*------------------------------------------------------------------------- - * Function: main (parallel version) - * - * Return: EXIT_FAILURE/EXIT_SUCCESS - * - *------------------------------------------------------------------------- - */ -int -main(void) -{ - unsigned nerrors = 0; /* number of test errors */ - - printf("Testing evict-on-close cache behavior\n"); - - /* Initialize */ - h5_reset(); - - /* Test that EoC fails in parallel HDF5 */ - nerrors += check_evict_on_close_parallel_fail() < 0 ? 1 : 0; - - if (nerrors) - goto error; - - printf("All evict-on-close tests passed.\n"); - printf("Note that EoC is not supported under parallel so most tests are skipped.\n"); - - exit(EXIT_SUCCESS); - -error: - - printf("***** %u evict-on-close test%s FAILED! *****\n", nerrors, nerrors > 1 ? "S" : ""); - - exit(EXIT_FAILURE); - -} /* main() - parallel */ - -#endif /* H5_HAVE_PARALLEL */ diff --git a/testpar/t_file.c b/testpar/t_file.c index a6a541b..700ccc2 100644 --- a/testpar/t_file.c +++ b/testpar/t_file.c @@ -1060,3 +1060,62 @@ test_invalid_libver_bounds_file_close_assert(void) ret = H5Pclose(fcpl_id); VRFY((SUCCEED == ret), "H5Pclose"); } + +/* + * Tests that H5Pevict_on_close properly succeeds in serial/one rank and fails when + * called by multiple ranks. + */ +void +test_evict_on_close_parallel_unsupp(void) +{ + const char *filename = NULL; + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + hid_t fid = H5I_INVALID_HID; + hid_t fapl_id = H5I_INVALID_HID; + herr_t ret; + + filename = (const char *)GetTestParameters(); + + /* set up MPI parameters */ + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + /* setup file access plist */ + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + VRFY((fapl_id != H5I_INVALID_HID), "H5Pcreate"); + ret = H5Pset_libver_bounds(fapl_id, H5F_LIBVER_EARLIEST, H5F_LIBVER_V18); + VRFY((SUCCEED == ret), "H5Pset_libver_bounds"); + + ret = H5Pset_evict_on_close(fapl_id, true); + VRFY((SUCCEED == ret), "H5Pset_evict_on_close"); + + /* test on 1 rank */ + ret = H5Pset_fapl_mpio(fapl_id, MPI_COMM_SELF, info); + VRFY((SUCCEED == ret), "H5Pset_fapl_mpio"); + + if (mpi_rank == 0) { + fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + VRFY((SUCCEED == ret), "H5Fcreate"); + ret = H5Fclose(fid); + VRFY((SUCCEED == ret), "H5Fclose"); + } + + VRFY((MPI_SUCCESS == MPI_Barrier(MPI_COMM_WORLD)), "MPI_Barrier"); + + /* test on multiple ranks if we have them */ + if (mpi_size > 1) { + ret = H5Pset_fapl_mpio(fapl_id, comm, info); + VRFY((SUCCEED == ret), "H5Pset_fapl_mpio"); + + H5E_BEGIN_TRY + { + fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + } + H5E_END_TRY + VRFY((fid == H5I_INVALID_HID), "H5Fcreate"); + } + + ret = H5Pclose(fapl_id); + VRFY((SUCCEED == ret), "H5Pclose"); +} diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index 2d85e1a..2428c71 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -366,6 +366,9 @@ main(int argc, char **argv) AddTest("invlibverassert", test_invalid_libver_bounds_file_close_assert, NULL, "Invalid libver bounds assertion failure", PARATESTFILE); + AddTest("evictparassert", test_evict_on_close_parallel_unsupp, NULL, "Evict on close in parallel failure", + PARATESTFILE); + AddTest("idsetw", dataset_writeInd, NULL, "dataset independent write", PARATESTFILE); AddTest("idsetr", dataset_readInd, NULL, "dataset independent read", PARATESTFILE); diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index 5699760..6bbdb0d 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -233,6 +233,7 @@ void zero_dim_dset(void); void test_file_properties(void); void test_delete(void); void test_invalid_libver_bounds_file_close_assert(void); +void test_evict_on_close_parallel_unsupp(void); void multiple_dset_write(void); void multiple_group_write(void); void multiple_group_read(void); -- cgit v0.12