From 6578c452b5d7b0db82d3cf4e58f8a2a11e909b52 Mon Sep 17 00:00:00 2001 From: vchoi-hdfgroup <55293060+vchoi-hdfgroup@users.noreply.github.com> Date: Tue, 24 Oct 2023 21:48:28 -0500 Subject: Preserve MPI-I/O file hints when fapl is closed (#3755) * Fix for issue #3025: Save the MPI info in the file struct so H5Fget_access_plist() can retrieve it from there. --- src/H5Fint.c | 23 +++++++++--- src/H5Fpkg.h | 1 + testpar/t_file.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++ testpar/testphdf5.c | 2 ++ testpar/testphdf5.h | 1 + 5 files changed, 124 insertions(+), 5 deletions(-) diff --git a/src/H5Fint.c b/src/H5Fint.c index 4093b4b..439fa4f 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -402,7 +402,6 @@ H5F_get_access_plist(H5F_t *f, bool app_ref) HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set collective metadata read flag"); if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { MPI_Comm mpi_comm; - MPI_Info mpi_info; /* Retrieve and set MPI communicator */ if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f))) @@ -410,10 +409,8 @@ H5F_get_access_plist(H5F_t *f, bool app_ref) if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &mpi_comm) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI communicator"); - /* Retrieve and set MPI info object */ - if (H5P_get(old_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI info object"); - if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0) + /* Retrieve MPI info object */ + if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info object"); } #endif /* H5_HAVE_PARALLEL */ @@ -1133,6 +1130,12 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F /* initialize point of no return */ f->shared->point_of_no_return = false; +#ifdef H5_HAVE_PARALLEL + /* Initialize this just in case we fail before setting this field and */ + /* we try to call H5_mpi_info_free() on uninitialized memory in H5F__dest() */ + f->shared->mpi_info = MPI_INFO_NULL; +#endif /* H5_HAVE_PARALLEL */ + /* Copy the file creation and file access property lists into the * new file handle. We do this early because some values might need * to change as the file is being opened. @@ -1209,6 +1212,8 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag"); if (H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->shared->coll_md_write)) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag"); + if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't set MPI info object"); #endif /* H5_HAVE_PARALLEL */ if (H5P_get(plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) < 0) @@ -1414,6 +1419,14 @@ H5F__dest(H5F_t *f, bool flush, bool free_on_failure) f->shared->efc = NULL; } /* end if */ +#ifdef H5_HAVE_PARALLEL + if (f->shared->mpi_info != MPI_INFO_NULL) { + /* Free MPI info saved in the file struct */ + if (H5_mpi_info_free(&f->shared->mpi_info) < 0) + HDONE_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, "can't free MPI info"); + } +#endif + /* With the shutdown modifications, the contents of the metadata cache * should be clean at this point, with the possible exception of the * the superblock and superblock extension. diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index bc5c90b..e81b250 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -359,6 +359,7 @@ struct H5F_shared_t { #ifdef H5_HAVE_PARALLEL H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */ bool coll_md_write; /* Do all metadata writes collectively */ + MPI_Info mpi_info; /* MPI info */ #endif /* H5_HAVE_PARALLEL */ }; diff --git a/testpar/t_file.c b/testpar/t_file.c index 700ccc2..ce55270 100644 --- a/testpar/t_file.c +++ b/testpar/t_file.c @@ -1119,3 +1119,105 @@ test_evict_on_close_parallel_unsupp(void) ret = H5Pclose(fapl_id); VRFY((SUCCEED == ret), "H5Pclose"); } + +/* + * Verify that MPI I/O hints are preserved after closing the file access property list + * as described in issue #3025 + * This is a test program from the user. + */ +void +test_fapl_preserve_hints(void) +{ + hid_t fid = H5I_INVALID_HID; /* HDF5 file ID */ + hid_t fapl_id = H5I_INVALID_HID; /* File access plist */ + const char *filename; + + int nkeys_used; + bool same = false; + + MPI_Info info = MPI_INFO_NULL; + const char *key = "hdf_info_fapl"; + const char *value = "xyz"; + + MPI_Info info_used = MPI_INFO_NULL; + int flag = -1; + char value_used[20]; + char key_used[20]; + + int i; + herr_t ret; /* Generic return value */ + int mpi_ret; /* MPI return value */ + + filename = (const char *)GetTestParameters(); + + /* set up MPI parameters */ + mpi_ret = MPI_Info_create(&info); + VRFY((mpi_ret >= 0), "MPI_Info_create succeeded"); + + mpi_ret = MPI_Info_set(info, key, value); + VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_set succeeded"); + + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + VRFY((fapl_id != H5I_INVALID_HID), "H5Pcreate"); + + ret = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, info); + VRFY((ret >= 0), "H5Pset_fapl_mpio"); + + fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + VRFY((fid != H5I_INVALID_HID), "H5Fcreate succeeded"); + + ret = H5Pclose(fapl_id); + VRFY((ret >= 0), "H5Pclose succeeded"); + + fapl_id = H5Fget_access_plist(fid); + VRFY((fapl_id != H5I_INVALID_HID), "H5Fget_access_plist succeeded"); + + ret = H5Pget_fapl_mpio(fapl_id, NULL, &info_used); + VRFY((ret >= 0), "H5Pget_fapl_mpio succeeded"); + + VRFY((info_used != MPI_INFO_NULL), "H5Pget_fapl_mpio"); + + mpi_ret = MPI_Info_get_nkeys(info_used, &nkeys_used); + VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_get_nkeys succeeded"); + + /* Loop over the # of keys */ + for (i = 0; i < nkeys_used; i++) { + + /* Memset the buffers to zero */ + memset(key_used, 0, 20); + memset(value_used, 0, 20); + + /* Get the nth key */ + mpi_ret = MPI_Info_get_nthkey(info_used, i, key_used); + VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_get_nthkey succeeded"); + + if (!strcmp(key_used, key)) { + + mpi_ret = MPI_Info_get(info_used, key_used, 20, value_used, &flag); + VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_get succeeded"); + + if (!strcmp(value_used, value)) { + + /* Both key_used and value_used are the same */ + same = true; + break; + } + } + } /* end for */ + + VRFY((same == true), "key_used and value_used are the same"); + + ret = H5Pclose(fapl_id); + VRFY((ret >= 0), "H5Pclose succeeded"); + + ret = H5Fclose(fid); + VRFY((ret >= 0), "H5Fclose succeeded"); + + /* Free the MPI info object */ + mpi_ret = MPI_Info_free(&info); + VRFY((mpi_ret >= 0), "MPI_Info_free succeeded"); + + mpi_ret = MPI_Info_free(&info_used); + VRFY((mpi_ret >= 0), "MPI_Info_free succeeded"); + +} /* end test_fapl_preserve_hints() */ diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index 2428c71..985c3de 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -368,6 +368,8 @@ main(int argc, char **argv) AddTest("evictparassert", test_evict_on_close_parallel_unsupp, NULL, "Evict on close in parallel failure", PARATESTFILE); + AddTest("fapl_preserve", test_fapl_preserve_hints, NULL, "preserve MPI I/O hints after fapl closed", + PARATESTFILE); AddTest("idsetw", dataset_writeInd, NULL, "dataset independent write", PARATESTFILE); AddTest("idsetr", dataset_readInd, NULL, "dataset independent read", PARATESTFILE); diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index 6bbdb0d..45f1a94 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -234,6 +234,7 @@ void test_file_properties(void); void test_delete(void); void test_invalid_libver_bounds_file_close_assert(void); void test_evict_on_close_parallel_unsupp(void); +void test_fapl_preserve_hints(void); void multiple_dset_write(void); void multiple_group_write(void); void multiple_group_read(void); -- cgit v0.12