From b2363a8195408331797cd32820fbb0dfc288f646 Mon Sep 17 00:00:00 2001 From: raylu-hdf <60487644+raylu-hdf@users.noreply.github.com> Date: Tue, 12 Jul 2022 10:55:34 -0500 Subject: H5Oflush fails for parallel (#1876) * H5Oflush causes H5Fclose to trigger an assertion failure in metadata cache for parallel. This commit makes sure H5Oflush fails for parallel until this problem is solved in the future. * Committing clang-format changes * Changed the use of H5F_get_driver_id to H5F_HAS_FEATURE. Co-authored-by: songyulu Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- src/H5Dint.c | 5 ++ src/H5Oflush.c | 5 ++ src/H5VLnative_datatype.c | 5 ++ src/H5VLnative_group.c | 5 ++ testpar/CMakeLists.txt | 1 + testpar/Makefile.am | 2 +- testpar/t_oflush.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++ testpar/testphdf5.c | 1 + testpar/testphdf5.h | 1 + 9 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 testpar/t_oflush.c diff --git a/src/H5Dint.c b/src/H5Dint.c index ee49464..0407a2e 100644 --- a/src/H5Dint.c +++ b/src/H5Dint.c @@ -3223,6 +3223,11 @@ H5D__flush(H5D_t *dset, hid_t dset_id) HDassert(dset); HDassert(dset->shared); + /* Currently, H5Oflush causes H5Fclose to trigger an assertion failure in metadata cache. + * Leave this situation for the future solution */ + if (H5F_HAS_FEATURE(dset->oloc.file, H5FD_FEAT_HAS_MPI)) + HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "H5Oflush isn't supported for parallel") + /* Flush any dataset information still cached in memory */ if (H5D__flush_real(dset) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTFLUSH, FAIL, "unable to flush cached dataset info") diff --git a/src/H5Oflush.c b/src/H5Oflush.c index 116a04b..1d31973 100644 --- a/src/H5Oflush.c +++ b/src/H5Oflush.c @@ -75,6 +75,11 @@ H5O_flush(H5O_loc_t *oloc, hid_t obj_id) FUNC_ENTER_NOAPI(FAIL) + /* Currently, H5Oflush causes H5Fclose to trigger an assertion failure in metadata cache. + * Leave this situation for the future solution */ + if (H5F_HAS_FEATURE(oloc->file, H5FD_FEAT_HAS_MPI)) + HGOTO_ERROR(H5E_ARGS, H5E_UNSUPPORTED, FAIL, "H5Oflush isn't supported for parallel") + /* Get the object pointer */ if (NULL == (obj_ptr = H5VL_object(obj_id))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "invalid object identifier") diff --git a/src/H5VLnative_datatype.c b/src/H5VLnative_datatype.c index bf6f37c..027b4ac 100644 --- a/src/H5VLnative_datatype.c +++ b/src/H5VLnative_datatype.c @@ -237,6 +237,11 @@ H5VL__native_datatype_specific(void *obj, H5VL_datatype_specific_args_t *args, h switch (args->op_type) { /* H5VL_DATATYPE_FLUSH */ case H5VL_DATATYPE_FLUSH: { + /* Currently, H6Oflush causes H5Fclose to trigger an assertion failure in metadata cache. + * Leave this situation for the future solution */ + if (H5F_HAS_FEATURE(dt->oloc.file, H5FD_FEAT_HAS_MPI)) + HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "H5Oflush isn't supported for parallel") + if (H5O_flush_common(&dt->oloc, args->args.flush.type_id) < 0) HGOTO_ERROR(H5E_DATATYPE, H5E_CANTFLUSH, FAIL, "unable to flush datatype") diff --git a/src/H5VLnative_group.c b/src/H5VLnative_group.c index 54f8337..08ac2aa 100644 --- a/src/H5VLnative_group.c +++ b/src/H5VLnative_group.c @@ -280,6 +280,11 @@ H5VL__native_group_specific(void *obj, H5VL_group_specific_args_t *args, hid_t H /* H5Gflush */ case H5VL_GROUP_FLUSH: { + /* Currently, H5Oflush causes H5Fclose to trigger an assertion failure in metadata cache. + * Leave this situation for the future solution */ + if (H5F_HAS_FEATURE(grp->oloc.file, H5FD_FEAT_HAS_MPI)) + HGOTO_ERROR(H5E_SYM, H5E_UNSUPPORTED, FAIL, "H5Oflush isn't supported for parallel") + if (H5O_flush_common(&grp->oloc, args->args.flush.grp_id) < 0) HGOTO_ERROR(H5E_SYM, H5E_CANTFLUSH, FAIL, "unable to flush group") diff --git a/testpar/CMakeLists.txt b/testpar/CMakeLists.txt index 32f4a0f..15723c9 100644 --- a/testpar/CMakeLists.txt +++ b/testpar/CMakeLists.txt @@ -18,6 +18,7 @@ set (testphdf5_SOURCES ${HDF5_TEST_PAR_SOURCE_DIR}/t_filter_read.c ${HDF5_TEST_PAR_SOURCE_DIR}/t_prop.c ${HDF5_TEST_PAR_SOURCE_DIR}/t_coll_md_read.c + ${HDF5_TEST_PAR_SOURCE_DIR}/t_oflush.c ) #-- Adding test for testhdf5 diff --git a/testpar/Makefile.am b/testpar/Makefile.am index cbde0c1..ff4a3dd 100644 --- a/testpar/Makefile.am +++ b/testpar/Makefile.am @@ -37,7 +37,7 @@ check_PROGRAMS = $(TEST_PROG_PARA) t_pflush1 t_pflush2 testphdf5_SOURCES=testphdf5.c t_dset.c t_file.c t_file_image.c t_mdset.c \ t_ph5basic.c t_coll_chunk.c t_span_tree.c t_chunk_alloc.c t_filter_read.c \ - t_prop.c t_coll_md_read.c + t_prop.c t_coll_md_read.c t_oflush.c # The tests all depend on the hdf5 library and the test library LDADD = $(LIBH5TEST) $(LIBHDF5) diff --git a/testpar/t_oflush.c b/testpar/t_oflush.c new file mode 100644 index 0000000..1a4ee69 --- /dev/null +++ b/testpar/t_oflush.c @@ -0,0 +1,118 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* Test for H5Oflush. For the current design, H5Oflush doesn't work correctly + * with parallel. It causes an assertion failure in metadata cache during + * H5Fclose. This test makes sure H5Oflush fails for dataset, group, and named + * datatype properly until the problem is solved. */ + +#include "testphdf5.h" +#include "H5Dprivate.h" +#include "H5private.h" + +#define DATASETNAME "IntArray" +#define NX 5 +#define NY 6 +#define RANK 2 + +void +test_oflush(void) +{ + int mpi_size, mpi_rank; + hid_t file, dataset; + hid_t dataspace; + hid_t fapl_id; + const char *filename; + hid_t gid, dtype_flush; + hsize_t dimsf[2]; + herr_t ret; + int data[NX][NY]; + int i, j; + + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + + /* Make sure MPIO driver is used */ + fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, FACC_MPIO); + VRFY((fapl_id >= 0), "fapl creation succeeded"); + + /* Data buffer initialization */ + for (j = 0; j < NX; j++) + for (i = 0; i < NY; i++) + data[j][i] = i + j; + + filename = GetTestParameters(); + + file = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + VRFY((file >= 0), "file creation succeeded"); + + /* Describe the size of the array and create the data space for fixed + * size dataset */ + dimsf[0] = NX; + dimsf[1] = NY; + + dataspace = H5Screate_simple(RANK, dimsf, NULL); + VRFY((dataspace >= 0), "data space creation succeeded"); + + /* Create a new dataset within the file using defined dataspace and + * datatype and default dataset creation properties */ + dataset = H5Dcreate2(file, DATASETNAME, H5T_NATIVE_INT, dataspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((dataset >= 0), "dataset creation succeeded"); + + /* Write the data to the dataset using default transfer properties */ + ret = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + VRFY((ret >= 0), "dataset creation succeeded"); + + /* Make sure H5Oflush fails with dataset */ + H5E_BEGIN_TRY + { + ret = H5Oflush(dataset); + } + H5E_END_TRY + VRFY((ret < 0), "H5Oflush should fail as expected"); + + H5Sclose(dataspace); + H5Dclose(dataset); + + /* Create a group */ + gid = H5Gcreate(file, "group", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((gid >= 0), "group creation succeeded"); + + /* Make sure H5Oflush fails with group */ + H5E_BEGIN_TRY + { + ret = H5Oflush(gid); + } + H5E_END_TRY + VRFY((ret < 0), "H5Oflush should fail as expected"); + + H5Gclose(gid); + + /* Create a named datatype */ + dtype_flush = H5Tcopy(H5T_NATIVE_INT); + H5Tcommit(file, "dtype", dtype_flush, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + /* Make sure H5Oflush fails with named datatype */ + H5E_BEGIN_TRY + { + ret = H5Oflush(dtype_flush); + } + H5E_END_TRY + VRFY((ret < 0), "H5Oflush should fail as expected"); + + H5Tclose(dtype_flush); + + /* Close and release resources */ + H5Fclose(file); + H5Pclose(fapl_id); +} diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index d7b5305..0f284f9 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -351,6 +351,7 @@ main(int argc, char **argv) AddTest("mpiodup", test_fapl_mpio_dup, NULL, "fapl_mpio duplicate", NULL); AddTest("split", test_split_comm_access, NULL, "dataset using split communicators", PARATESTFILE); + AddTest("h5oflusherror", test_oflush, NULL, "H5Oflush failure", PARATESTFILE); #ifdef PB_OUT /* temporary: disable page buffering when parallel */ AddTest("page_buffer", test_page_buffer_access, NULL, "page buffer usage in parallel", PARATESTFILE); diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index 16f45d3..1fbc105 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -294,6 +294,7 @@ void test_dense_attr(void); void test_partial_no_selection_coll_md_read(void); void test_multi_chunk_io_addrmap_issue(void); void test_link_chunk_io_sort_chunk_issue(void); +void test_oflush(void); /* commonly used prototypes */ hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type); -- cgit v0.12