diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2023-02-21 15:30:45 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-21 15:30:45 (GMT) |
commit | d8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08 (patch) | |
tree | 9880bfcd20524f93471585191e64e4d527eec758 | |
parent | 3dcee39ceda225798289336f3b3abcd96e83c9c1 (diff) | |
download | hdf5-d8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08.zip hdf5-d8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08.tar.gz hdf5-d8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08.tar.bz2 |
Fix issue with collective metadata writes of global heap data (#2480) (#2486)
-rw-r--r-- | release_docs/RELEASE.txt | 15 | ||||
-rw-r--r-- | src/H5Cmpio.c | 8 | ||||
-rw-r--r-- | testpar/CMakeLists.txt | 2 | ||||
-rw-r--r-- | testpar/Makefile.am | 2 | ||||
-rw-r--r-- | testpar/t_coll_md.c (renamed from testpar/t_coll_md_read.c) | 79 | ||||
-rw-r--r-- | testpar/testphdf5.c | 2 | ||||
-rw-r--r-- | testpar/testphdf5.h | 1 |
7 files changed, 104 insertions, 5 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index a261621..9405ce6 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -109,7 +109,20 @@ Bug Fixes since HDF5-1.14.0 release =================================== Library ------- - - + - Fixed an issue with collective metadata writes of global heap data + + New test failures in parallel netCDF started occurring with debug + builds of HDF5 due to an assertion failure and this was reported in + GitHub issue #2433. The assertion failure began happening after the + collective metadata write pathway in the library was updated to use + vector I/O so that parallel-enabled HDF5 Virtual File Drivers (other + than the existing MPI I/O VFD) can support collective metadata writes. + + The assertion failure was fixed by updating collective metadata writes + to treat global heap metadata as raw data, as done elsewhere in the + library. + + (JTH - 2023/02/16, GH #2433) Java Library diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c index 6af346c..cfd0780 100644 --- a/src/H5Cmpio.c +++ b/src/H5Cmpio.c @@ -1003,6 +1003,10 @@ H5C__collective_write(H5F_t *f) bufs[0] = base_buf; types[0] = entry_ptr->type->mem_type; + /* Treat global heap as raw data */ + if (types[0] == H5FD_MEM_GHEAP) + types[0] = H5FD_MEM_DRAW; + node = H5SL_next(node); i = 1; while (node) { @@ -1016,6 +1020,10 @@ H5C__collective_write(H5F_t *f) bufs[i] = entry_ptr->image_ptr; types[i] = entry_ptr->type->mem_type; + /* Treat global heap as raw data */ + if (types[i] == H5FD_MEM_GHEAP) + types[i] = H5FD_MEM_DRAW; + /* Advance to next node & array location */ node = H5SL_next(node); i++; diff --git a/testpar/CMakeLists.txt b/testpar/CMakeLists.txt index c950b1b..d876a21 100644 --- a/testpar/CMakeLists.txt +++ b/testpar/CMakeLists.txt @@ -17,7 +17,7 @@ set (testphdf5_SOURCES ${HDF5_TEST_PAR_SOURCE_DIR}/t_chunk_alloc.c ${HDF5_TEST_PAR_SOURCE_DIR}/t_filter_read.c ${HDF5_TEST_PAR_SOURCE_DIR}/t_prop.c - ${HDF5_TEST_PAR_SOURCE_DIR}/t_coll_md_read.c + ${HDF5_TEST_PAR_SOURCE_DIR}/t_coll_md.c ${HDF5_TEST_PAR_SOURCE_DIR}/t_oflush.c ) diff --git a/testpar/Makefile.am b/testpar/Makefile.am index 0506961..539750a 100644 --- a/testpar/Makefile.am +++ b/testpar/Makefile.am @@ -44,7 +44,7 @@ check_PROGRAMS = $(TEST_PROG_PARA) t_pflush1 t_pflush2 testphdf5_SOURCES=testphdf5.c t_dset.c t_file.c t_file_image.c t_mdset.c \ t_ph5basic.c t_coll_chunk.c t_span_tree.c t_chunk_alloc.c t_filter_read.c \ - t_prop.c t_coll_md_read.c t_oflush.c + t_prop.c t_coll_md.c t_oflush.c # The tests all depend on the hdf5 library and the test library LDADD = $(LIBH5TEST) $(LIBHDF5) diff --git a/testpar/t_coll_md_read.c b/testpar/t_coll_md.c index e402428..aa72486 100644 --- a/testpar/t_coll_md_read.c +++ b/testpar/t_coll_md.c @@ -11,8 +11,9 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* - * A test suite to test HDF5's collective metadata read capabilities, as enabled - * by making a call to H5Pset_all_coll_metadata_ops(). + * A test suite to test HDF5's collective metadata read and write capabilities, + * as enabled by making a call to H5Pset_all_coll_metadata_ops() and/or + * H5Pset_coll_metadata_write(). */ #include "testphdf5.h" @@ -38,6 +39,10 @@ #define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME "linked_chunk_io_sort_chunk_issue" #define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS 1 +#define COLL_GHEAP_WRITE_ATTR_NELEMS 10 +#define COLL_GHEAP_WRITE_ATTR_NAME "coll_gheap_write_attr" +#define COLL_GHEAP_WRITE_ATTR_DIMS 1 + /* * A test for issue HDFFV-10501. A parallel hang was reported which occurred * in linked-chunk I/O when collective metadata reads are enabled and some ranks @@ -524,3 +529,73 @@ test_link_chunk_io_sort_chunk_issue(void) VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded"); VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded"); } + +/* + * A test for GitHub issue #2433 which causes a collective metadata write + * of global heap data. This test is meant to ensure that global heap data + * gets correctly mapped as raw data during a collective metadata write + * using vector I/O. + * + * An assertion exists in the library that should be triggered if global + * heap data is not correctly mapped as raw data. + */ +void +test_collective_global_heap_write(void) +{ + const char *filename; + hsize_t attr_dims[COLL_GHEAP_WRITE_ATTR_DIMS]; + hid_t file_id = H5I_INVALID_HID; + hid_t fapl_id = H5I_INVALID_HID; + hid_t attr_id = H5I_INVALID_HID; + hid_t vl_type = H5I_INVALID_HID; + hid_t fspace_id = H5I_INVALID_HID; + hvl_t vl_data; + int mpi_rank, mpi_size; + int data_buf[COLL_GHEAP_WRITE_ATTR_NELEMS]; + + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); + + filename = GetTestParameters(); + + fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type); + VRFY((fapl_id >= 0), "create_faccess_plist succeeded"); + + /* + * Even though the testphdf5 framework currently sets collective metadata + * writes on the FAPL, we call it here just to be sure this is futureproof, + * since demonstrating this issue relies upon it. + */ + VRFY((H5Pset_coll_metadata_write(fapl_id, true) >= 0), "Set collective metadata writes succeeded"); + + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + VRFY((file_id >= 0), "H5Fcreate succeeded"); + + attr_dims[0] = 1; + + fspace_id = H5Screate_simple(COLL_GHEAP_WRITE_ATTR_DIMS, attr_dims, NULL); + VRFY((fspace_id >= 0), "H5Screate_simple succeeded"); + + vl_type = H5Tvlen_create(H5T_NATIVE_INT); + VRFY((vl_type >= 0), "H5Tvlen_create succeeded"); + + vl_data.len = COLL_GHEAP_WRITE_ATTR_NELEMS; + vl_data.p = data_buf; + + /* + * Create a variable-length attribute that will get written to the global heap + */ + attr_id = H5Acreate2(file_id, COLL_GHEAP_WRITE_ATTR_NAME, vl_type, fspace_id, H5P_DEFAULT, H5P_DEFAULT); + VRFY((attr_id >= 0), "H5Acreate2 succeeded"); + + for (size_t i = 0; i < COLL_GHEAP_WRITE_ATTR_NELEMS; i++) + data_buf[i] = (int)i; + + VRFY((H5Awrite(attr_id, vl_type, &vl_data) >= 0), "H5Awrite succeeded"); + + VRFY((H5Sclose(fspace_id) >= 0), "H5Sclose succeeded"); + VRFY((H5Tclose(vl_type) >= 0), "H5Sclose succeeded"); + VRFY((H5Aclose(attr_id) >= 0), "H5Aclose succeeded"); + VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded"); + VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded"); +} diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index cc32dee..e7befd6 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -502,6 +502,8 @@ main(int argc, char **argv) "Collective MD read with multi chunk I/O (H5D__chunk_addrmap)", PARATESTFILE); AddTest("LC_coll_MD_read", test_link_chunk_io_sort_chunk_issue, NULL, "Collective MD read with link chunk I/O (H5D__sort_chunk)", PARATESTFILE); + AddTest("GH_coll_MD_wr", test_collective_global_heap_write, NULL, + "Collective MD write of global heap data", PARATESTFILE); /* Display testing information */ TestInfo(argv[0]); diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index 14b8297..2a21ee6 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -293,6 +293,7 @@ void test_dense_attr(void); void test_partial_no_selection_coll_md_read(void); void test_multi_chunk_io_addrmap_issue(void); void test_link_chunk_io_sort_chunk_issue(void); +void test_collective_global_heap_write(void); void test_oflush(void); /* commonly used prototypes */ |