summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2023-02-21 15:30:45 (GMT)
committerGitHub <noreply@github.com>2023-02-21 15:30:45 (GMT)
commitd8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08 (patch)
tree9880bfcd20524f93471585191e64e4d527eec758
parent3dcee39ceda225798289336f3b3abcd96e83c9c1 (diff)
downloadhdf5-d8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08.zip
hdf5-d8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08.tar.gz
hdf5-d8fd9c2f79ad0aceb0b55b33b7480a4063b8cf08.tar.bz2
Fix issue with collective metadata writes of global heap data (#2480) (#2486)
-rw-r--r--release_docs/RELEASE.txt15
-rw-r--r--src/H5Cmpio.c8
-rw-r--r--testpar/CMakeLists.txt2
-rw-r--r--testpar/Makefile.am2
-rw-r--r--testpar/t_coll_md.c (renamed from testpar/t_coll_md_read.c)79
-rw-r--r--testpar/testphdf5.c2
-rw-r--r--testpar/testphdf5.h1
7 files changed, 104 insertions, 5 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index a261621..9405ce6 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -109,7 +109,20 @@ Bug Fixes since HDF5-1.14.0 release
===================================
Library
-------
- -
+ - Fixed an issue with collective metadata writes of global heap data
+
+ New test failures in parallel netCDF started occurring with debug
+ builds of HDF5 due to an assertion failure and this was reported in
+ GitHub issue #2433. The assertion failure began happening after the
+ collective metadata write pathway in the library was updated to use
+ vector I/O so that parallel-enabled HDF5 Virtual File Drivers (other
+ than the existing MPI I/O VFD) can support collective metadata writes.
+
+ The assertion failure was fixed by updating collective metadata writes
+ to treat global heap metadata as raw data, as done elsewhere in the
+ library.
+
+ (JTH - 2023/02/16, GH #2433)
Java Library
diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c
index 6af346c..cfd0780 100644
--- a/src/H5Cmpio.c
+++ b/src/H5Cmpio.c
@@ -1003,6 +1003,10 @@ H5C__collective_write(H5F_t *f)
bufs[0] = base_buf;
types[0] = entry_ptr->type->mem_type;
+ /* Treat global heap as raw data */
+ if (types[0] == H5FD_MEM_GHEAP)
+ types[0] = H5FD_MEM_DRAW;
+
node = H5SL_next(node);
i = 1;
while (node) {
@@ -1016,6 +1020,10 @@ H5C__collective_write(H5F_t *f)
bufs[i] = entry_ptr->image_ptr;
types[i] = entry_ptr->type->mem_type;
+ /* Treat global heap as raw data */
+ if (types[i] == H5FD_MEM_GHEAP)
+ types[i] = H5FD_MEM_DRAW;
+
/* Advance to next node & array location */
node = H5SL_next(node);
i++;
diff --git a/testpar/CMakeLists.txt b/testpar/CMakeLists.txt
index c950b1b..d876a21 100644
--- a/testpar/CMakeLists.txt
+++ b/testpar/CMakeLists.txt
@@ -17,7 +17,7 @@ set (testphdf5_SOURCES
${HDF5_TEST_PAR_SOURCE_DIR}/t_chunk_alloc.c
${HDF5_TEST_PAR_SOURCE_DIR}/t_filter_read.c
${HDF5_TEST_PAR_SOURCE_DIR}/t_prop.c
- ${HDF5_TEST_PAR_SOURCE_DIR}/t_coll_md_read.c
+ ${HDF5_TEST_PAR_SOURCE_DIR}/t_coll_md.c
${HDF5_TEST_PAR_SOURCE_DIR}/t_oflush.c
)
diff --git a/testpar/Makefile.am b/testpar/Makefile.am
index 0506961..539750a 100644
--- a/testpar/Makefile.am
+++ b/testpar/Makefile.am
@@ -44,7 +44,7 @@ check_PROGRAMS = $(TEST_PROG_PARA) t_pflush1 t_pflush2
testphdf5_SOURCES=testphdf5.c t_dset.c t_file.c t_file_image.c t_mdset.c \
t_ph5basic.c t_coll_chunk.c t_span_tree.c t_chunk_alloc.c t_filter_read.c \
- t_prop.c t_coll_md_read.c t_oflush.c
+ t_prop.c t_coll_md.c t_oflush.c
# The tests all depend on the hdf5 library and the test library
LDADD = $(LIBH5TEST) $(LIBHDF5)
diff --git a/testpar/t_coll_md_read.c b/testpar/t_coll_md.c
index e402428..aa72486 100644
--- a/testpar/t_coll_md_read.c
+++ b/testpar/t_coll_md.c
@@ -11,8 +11,9 @@
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
- * A test suite to test HDF5's collective metadata read capabilities, as enabled
- * by making a call to H5Pset_all_coll_metadata_ops().
+ * A test suite to test HDF5's collective metadata read and write capabilities,
+ * as enabled by making a call to H5Pset_all_coll_metadata_ops() and/or
+ * H5Pset_coll_metadata_write().
*/
#include "testphdf5.h"
@@ -38,6 +39,10 @@
#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME "linked_chunk_io_sort_chunk_issue"
#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS 1
+#define COLL_GHEAP_WRITE_ATTR_NELEMS 10
+#define COLL_GHEAP_WRITE_ATTR_NAME "coll_gheap_write_attr"
+#define COLL_GHEAP_WRITE_ATTR_DIMS 1
+
/*
* A test for issue HDFFV-10501. A parallel hang was reported which occurred
* in linked-chunk I/O when collective metadata reads are enabled and some ranks
@@ -524,3 +529,73 @@ test_link_chunk_io_sort_chunk_issue(void)
VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
}
+
+/*
+ * A test for GitHub issue #2433 which causes a collective metadata write
+ * of global heap data. This test is meant to ensure that global heap data
+ * gets correctly mapped as raw data during a collective metadata write
+ * using vector I/O.
+ *
+ * An assertion exists in the library that should be triggered if global
+ * heap data is not correctly mapped as raw data.
+ */
+void
+test_collective_global_heap_write(void)
+{
+ const char *filename;
+ hsize_t attr_dims[COLL_GHEAP_WRITE_ATTR_DIMS];
+ hid_t file_id = H5I_INVALID_HID;
+ hid_t fapl_id = H5I_INVALID_HID;
+ hid_t attr_id = H5I_INVALID_HID;
+ hid_t vl_type = H5I_INVALID_HID;
+ hid_t fspace_id = H5I_INVALID_HID;
+ hvl_t vl_data;
+ int mpi_rank, mpi_size;
+ int data_buf[COLL_GHEAP_WRITE_ATTR_NELEMS];
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
+
+ filename = GetTestParameters();
+
+ fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type);
+ VRFY((fapl_id >= 0), "create_faccess_plist succeeded");
+
+ /*
+ * Even though the testphdf5 framework currently sets collective metadata
+ * writes on the FAPL, we call it here just to be sure this is futureproof,
+ * since demonstrating this issue relies upon it.
+ */
+ VRFY((H5Pset_coll_metadata_write(fapl_id, true) >= 0), "Set collective metadata writes succeeded");
+
+ file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+ VRFY((file_id >= 0), "H5Fcreate succeeded");
+
+ attr_dims[0] = 1;
+
+ fspace_id = H5Screate_simple(COLL_GHEAP_WRITE_ATTR_DIMS, attr_dims, NULL);
+ VRFY((fspace_id >= 0), "H5Screate_simple succeeded");
+
+ vl_type = H5Tvlen_create(H5T_NATIVE_INT);
+ VRFY((vl_type >= 0), "H5Tvlen_create succeeded");
+
+ vl_data.len = COLL_GHEAP_WRITE_ATTR_NELEMS;
+ vl_data.p = data_buf;
+
+ /*
+ * Create a variable-length attribute that will get written to the global heap
+ */
+ attr_id = H5Acreate2(file_id, COLL_GHEAP_WRITE_ATTR_NAME, vl_type, fspace_id, H5P_DEFAULT, H5P_DEFAULT);
+ VRFY((attr_id >= 0), "H5Acreate2 succeeded");
+
+ for (size_t i = 0; i < COLL_GHEAP_WRITE_ATTR_NELEMS; i++)
+ data_buf[i] = (int)i;
+
+ VRFY((H5Awrite(attr_id, vl_type, &vl_data) >= 0), "H5Awrite succeeded");
+
+ VRFY((H5Sclose(fspace_id) >= 0), "H5Sclose succeeded");
+ VRFY((H5Tclose(vl_type) >= 0), "H5Sclose succeeded");
+ VRFY((H5Aclose(attr_id) >= 0), "H5Aclose succeeded");
+ VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
+ VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
+}
diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c
index cc32dee..e7befd6 100644
--- a/testpar/testphdf5.c
+++ b/testpar/testphdf5.c
@@ -502,6 +502,8 @@ main(int argc, char **argv)
"Collective MD read with multi chunk I/O (H5D__chunk_addrmap)", PARATESTFILE);
AddTest("LC_coll_MD_read", test_link_chunk_io_sort_chunk_issue, NULL,
"Collective MD read with link chunk I/O (H5D__sort_chunk)", PARATESTFILE);
+ AddTest("GH_coll_MD_wr", test_collective_global_heap_write, NULL,
+ "Collective MD write of global heap data", PARATESTFILE);
/* Display testing information */
TestInfo(argv[0]);
diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h
index 14b8297..2a21ee6 100644
--- a/testpar/testphdf5.h
+++ b/testpar/testphdf5.h
@@ -293,6 +293,7 @@ void test_dense_attr(void);
void test_partial_no_selection_coll_md_read(void);
void test_multi_chunk_io_addrmap_issue(void);
void test_link_chunk_io_sort_chunk_issue(void);
+void test_collective_global_heap_write(void);
void test_oflush(void);
/* commonly used prototypes */