summaryrefslogtreecommitdiffstats
path: root/testpar
diff options
context:
space:
mode:
Diffstat (limited to 'testpar')
-rw-r--r--testpar/CMakeLists.txt6
-rw-r--r--testpar/CMakeTests.cmake78
-rw-r--r--testpar/CMakeVFDTests.cmake30
-rw-r--r--testpar/t_cache.c16
-rw-r--r--testpar/t_coll_md_read.c336
-rw-r--r--testpar/t_dset.c2
-rw-r--r--testpar/t_shapesame.c32
-rw-r--r--testpar/t_span_tree.c28
-rw-r--r--testpar/testphdf5.c5
-rw-r--r--testpar/testphdf5.h2
10 files changed, 464 insertions, 71 deletions
diff --git a/testpar/CMakeLists.txt b/testpar/CMakeLists.txt
index 0b3cbe3..96ce0c0 100644
--- a/testpar/CMakeLists.txt
+++ b/testpar/CMakeLists.txt
@@ -47,7 +47,7 @@ set (H5P_TESTS
t_mpi
t_bigio
t_cache
- t_cache_image
+ #t_cache_image
t_pflush1
t_pflush2
t_pread
@@ -58,8 +58,8 @@ set (H5P_TESTS
t_filters_parallel
)
-foreach (testp ${H5P_TESTS})
- ADD_H5P_EXE(${testp})
+foreach (h5_testp ${H5P_TESTS})
+ ADD_H5P_EXE(${h5_testp})
endforeach ()
include (CMakeTests.cmake)
diff --git a/testpar/CMakeTests.cmake b/testpar/CMakeTests.cmake
index a0d7f59..214801b 100644
--- a/testpar/CMakeTests.cmake
+++ b/testpar/CMakeTests.cmake
@@ -15,17 +15,83 @@
### T E S T I N G ###
##############################################################################
##############################################################################
+# Remove any output file left over from previous test run
+add_test (NAME MPI_TEST-clear-testphdf5-objects
+ COMMAND ${CMAKE_COMMAND}
+ -E remove
+ ParaTest.h5
+ WORKING_DIRECTORY
+ ${HDF5_TEST_PAR_BINARY_DIR}
+)
+set_tests_properties (MPI_TEST-clear-testphdf5-objects PROPERTIES FIXTURES_SETUP par_clear_testphdf5)
-add_test (NAME TEST_PAR_testphdf5 COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:testphdf5> ${MPIEXEC_POSTFLAGS})
+set (SKIP_testphdf5 "")
+#if (HDF5_OPENMPI_VERSION_SKIP)
+# set (SKIP_testphdf5 "${SKIP_testphdf5};-x;ecdsetw")
+#endif ()
-foreach (testp ${H5P_TESTS})
- add_test (NAME TEST_PAR_${testp} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:${testp}> ${MPIEXEC_POSTFLAGS})
+add_test (NAME MPI_TEST_testphdf5 COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:testphdf5> ${MPIEXEC_POSTFLAGS} ${SKIP_testphdf5})
+set_tests_properties (MPI_TEST_testphdf5 PROPERTIES
+ FIXTURES_REQUIRED par_clear_testphdf5
+ ENVIRONMENT "HDF5_ALARM_SECONDS=3600;srcdir=${HDF5_TEST_PAR_BINARY_DIR}"
+ WORKING_DIRECTORY ${HDF5_TEST_PAR_BINARY_DIR}
+)
+if (last_test)
+ set_tests_properties (MPI_TEST_testphdf5 PROPERTIES DEPENDS ${last_test})
+endif ()
+set (last_test "MPI_TEST_testphdf5")
+
+#if (HDF5_OPENMPI_VERSION_SKIP)
+# list (REMOVE_ITEM H5P_TESTS t_shapesame)
+#endif ()
+
+set (test_par_CLEANFILES
+ t_cache_image_00.h5
+ t_cache_image_01.h5
+ t_cache_image_02.h5
+ flush.h5
+ noflush.h5
+ reloc_t_pread_data_file.h5
+ reloc_t_pread_group_0_file.h5
+ reloc_t_pread_group_1_file.h5
+ shutdown.h5
+ after_mpi_fin.h5
+ #the following should have been removed by the programs
+ bigio_test.h5
+ CacheTestDummy.h5
+ t_filters_parallel.h5
+ MPItest.h5
+ ShapeSameTest.h5
+)
+
+# Remove any output file left over from previous test run
+add_test (NAME MPI_TEST-clear-objects
+ COMMAND ${CMAKE_COMMAND}
+ -E remove
+ ${test_par_CLEANFILES}
+ WORKING_DIRECTORY
+ ${HDF5_TEST_PAR_BINARY_DIR}
+)
+set_tests_properties (MPI_TEST-clear-objects PROPERTIES FIXTURES_SETUP par_clear_objects)
+
+foreach (h5_testp ${H5P_TESTS})
+ add_test (NAME MPI_TEST_${h5_testp} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${MPIEXEC_MAX_NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:${h5_testp}> ${MPIEXEC_POSTFLAGS})
+ set_tests_properties (MPI_TEST_${h5_testp} PROPERTIES
+ FIXTURES_REQUIRED par_clear_objects
+ ENVIRONMENT "HDF5_ALARM_SECONDS=3600;srcdir=${HDF5_TEST_PAR_BINARY_DIR}"
+ WORKING_DIRECTORY ${HDF5_TEST_PAR_BINARY_DIR}
+ )
+ if (last_test)
+ set_tests_properties (MPI_TEST_${h5_testp} PROPERTIES DEPENDS ${last_test})
+ endif ()
+ set (last_test "MPI_TEST_${h5_testp}")
endforeach ()
# The t_pflush1 test is hard-coded to fail.
-set_tests_properties (TEST_PAR_t_pflush1 PROPERTIES WILL_FAIL "true")
-#set_property (TEST TEST_PAR_t_pflush1 PROPERTY PASS_REGULAR_EXPRESSION "PASSED")
-set_tests_properties (TEST_PAR_t_pflush2 PROPERTIES DEPENDS TEST_PAR_t_pflush1)
+set_tests_properties (MPI_TEST_t_pflush1 PROPERTIES WILL_FAIL "true")
+#set_property (TEST MPI_TEST_t_pflush1 PROPERTY PASS_REGULAR_EXPRESSION "PASSED")
+set_tests_properties (MPI_TEST_t_pflush2 PROPERTIES DEPENDS MPI_TEST_t_pflush1)
+set_tests_properties (MPI_TEST_t_prestart PROPERTIES DEPENDS MPI_TEST_t_pshutdown)
##############################################################################
##############################################################################
diff --git a/testpar/CMakeVFDTests.cmake b/testpar/CMakeVFDTests.cmake
index b6b065f..c0b848b 100644
--- a/testpar/CMakeVFDTests.cmake
+++ b/testpar/CMakeVFDTests.cmake
@@ -33,25 +33,41 @@
set (VFD_LIST ${VFD_LIST} direct)
endif ()
+foreach (vfdtest ${VFD_LIST})
+ file (MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/${vfdtest}")
+endforeach ()
+
macro (ADD_VFD_TEST vfdname resultcode)
if (NOT HDF5_ENABLE_USING_MEMCHECKER)
- foreach (test ${H5P_VFD_TESTS})
+ foreach (h5_test ${H5P_VFD_TESTS})
+ add_test (
+ NAME MPI_TEST_VFD-${vfdname}-${h5_test}-clear-objects
+ COMMAND ${CMAKE_COMMAND}
+ -E remove
+ ${vfdname}-shared/${vfdname}-${h5_test}.out
+ ${vfdname}-shared/${vfdname}-${h5_test}.out.err
+ )
add_test (
- NAME TEST_PAR_VFD-${vfdname}-${test}
+ NAME MPI_TEST_VFD-${vfdname}-${h5_test}
COMMAND "${CMAKE_COMMAND}"
- -D "TEST_PROGRAM=$<TARGET_FILE:${test}>"
+ -D "TEST_PROGRAM=$<TARGET_FILE:${h5_test}>"
-D "TEST_ARGS:STRING="
-D "TEST_VFD:STRING=${vfdname}"
-D "TEST_EXPECT=${resultcode}"
- -D "TEST_OUTPUT=${test}"
- -D "TEST_FOLDER=${PROJECT_BINARY_DIR}"
+ -D "TEST_OUTPUT=${vfdname}-${h5_test}.out"
+ -D "TEST_FOLDER=${PROJECT_BINARY_DIR}/${vfdname}"
-P "${HDF_RESOURCES_DIR}/vfdTest.cmake"
)
+ set_tests_properties (MPI_TEST_VFD-${vfdname}-${h5_test} PROPERTIES
+ DEPENDS MPI_TEST_VFD-${vfdname}-${h5_test}-clear-objects
+ ENVIRONMENT "srcdir=${HDF5_TEST_PAR_BINARY_DIR}/${vfdname}"
+ WORKING_DIRECTORY ${HDF5_TEST_PAR_BINARY_DIR}/${vfdname}
+ )
endforeach ()
endif ()
endmacro ()
# Run test with different Virtual File Driver
- foreach (vfd ${VFD_LIST})
- ADD_VFD_TEST (${vfd} 0)
+ foreach (h5_vfd ${VFD_LIST})
+ ADD_VFD_TEST (${h5_vfd} 0)
endforeach ()
diff --git a/testpar/t_cache.c b/testpar/t_cache.c
index 7488728..41c95e2 100644
--- a/testpar/t_cache.c
+++ b/testpar/t_cache.c
@@ -7225,7 +7225,7 @@ smoke_check_6(int metadata_write_strategy)
/* some error occured in the server -- report failure */
nerrors++;
if ( verbose ) {
- HDfprintf(stdout, "%d:%s: server_main() failed.\n",
+ HDfprintf(stdout, "%d:%s: server_main() failed.\n",
world_mpi_rank, FUNC);
}
}
@@ -7241,7 +7241,7 @@ smoke_check_6(int metadata_write_strategy)
fid = -1;
cache_ptr = NULL;
if ( verbose ) {
- HDfprintf(stdout, "%d:%s: setup_cache_for_test() failed.\n",
+ HDfprintf(stdout, "%d:%s: setup_cache_for_test() failed.\n",
world_mpi_rank, FUNC);
}
}
@@ -7250,7 +7250,7 @@ smoke_check_6(int metadata_write_strategy)
virt_num_data_entries = NUM_DATA_ENTRIES;
/* insert the first half collectively */
- file_ptr->coll_md_read = H5P_USER_TRUE;
+ H5CX_set_coll_metadata_read(TRUE);
for ( i = 0; i < virt_num_data_entries/2; i++ )
{
struct datum * entry_ptr;
@@ -7271,7 +7271,7 @@ smoke_check_6(int metadata_write_strategy)
}
/* insert the other half independently */
- file_ptr->coll_md_read = H5P_USER_FALSE;
+ H5CX_set_coll_metadata_read(FALSE);
for ( i = virt_num_data_entries/2; i < virt_num_data_entries; i++ )
{
struct datum * entry_ptr;
@@ -7291,7 +7291,7 @@ smoke_check_6(int metadata_write_strategy)
HDassert(cache_ptr->max_cache_size*0.8 > cache_ptr->coll_list_size);
}
- /* flush the file */
+ /* flush the file */
if ( H5Fflush(fid, H5F_SCOPE_GLOBAL) < 0 ) {
nerrors++;
if ( verbose ) {
@@ -7301,7 +7301,7 @@ smoke_check_6(int metadata_write_strategy)
}
/* Protect the first half of the entries collectively */
- file_ptr->coll_md_read = H5P_USER_TRUE;
+ H5CX_set_coll_metadata_read(TRUE);
for ( i = 0; i < (virt_num_data_entries / 2); i++ )
{
struct datum * entry_ptr;
@@ -7322,13 +7322,13 @@ smoke_check_6(int metadata_write_strategy)
}
/* protect the other half independently */
- file_ptr->coll_md_read = H5P_USER_FALSE;
+ H5CX_set_coll_metadata_read(FALSE);
for ( i = virt_num_data_entries/2; i < virt_num_data_entries; i++ )
{
struct datum * entry_ptr;
entry_ptr = &(data[i]);
- lock_entry(file_ptr, i);
+ lock_entry(file_ptr, i);
if(FALSE != entry_ptr->header.coll_access) {
nerrors++;
diff --git a/testpar/t_coll_md_read.c b/testpar/t_coll_md_read.c
index f945d2b..912388c 100644
--- a/testpar/t_coll_md_read.c
+++ b/testpar/t_coll_md_read.c
@@ -32,6 +32,14 @@
#define PARTIAL_NO_SELECTION_Y_DIM_SCALE 5
#define PARTIAL_NO_SELECTION_X_DIM_SCALE 5
+#define MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS 2
+
+#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_NO_SEL_PROCESS (mpi_rank == mpi_size - 1)
+#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME "linked_chunk_io_sort_chunk_issue"
+#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_Y_DIM_SCALE 20000
+#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE 1
+#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS 1
+
/*
* A test for issue HDFFV-10501. A parallel hang was reported which occurred
* in linked-chunk I/O when collective metadata reads are enabled and some ranks
@@ -57,13 +65,13 @@ void test_partial_no_selection_coll_md_read(void)
hsize_t stride[PARTIAL_NO_SELECTION_DATASET_NDIMS];
hsize_t count[PARTIAL_NO_SELECTION_DATASET_NDIMS];
hsize_t block[PARTIAL_NO_SELECTION_DATASET_NDIMS];
- hid_t file_id = -1;
- hid_t fapl_id = -1;
- hid_t dset_id = -1;
- hid_t dcpl_id = -1;
- hid_t dxpl_id = -1;
- hid_t fspace_id = -1;
- hid_t mspace_id = -1;
+ hid_t file_id = H5I_INVALID_HID;
+ hid_t fapl_id = H5I_INVALID_HID;
+ hid_t dset_id = H5I_INVALID_HID;
+ hid_t dcpl_id = H5I_INVALID_HID;
+ hid_t dxpl_id = H5I_INVALID_HID;
+ hid_t fspace_id = H5I_INVALID_HID;
+ hid_t mspace_id = H5I_INVALID_HID;
int mpi_rank, mpi_size;
void *data = NULL;
void *read_buf = NULL;
@@ -86,7 +94,7 @@ void test_partial_no_selection_coll_md_read(void)
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
VRFY((file_id >= 0), "H5Fcreate succeeded");
- dataset_dims = malloc(PARTIAL_NO_SELECTION_DATASET_NDIMS * sizeof(*dataset_dims));
+ dataset_dims = HDmalloc(PARTIAL_NO_SELECTION_DATASET_NDIMS * sizeof(*dataset_dims));
VRFY((dataset_dims != NULL), "malloc succeeded");
dataset_dims[0] = PARTIAL_NO_SELECTION_Y_DIM_SCALE * mpi_size;
@@ -129,7 +137,7 @@ void test_partial_no_selection_coll_md_read(void)
mspace_id = H5Screate_simple(1, sel_dims, NULL);
VRFY((mspace_id >= 0), "H5Screate_simple succeeded");
- data = calloc(1, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
+ data = HDcalloc(1, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
VRFY((data != NULL), "calloc succeeded");
dxpl_id = H5Pcreate(H5P_DATASET_XFER);
@@ -151,7 +159,7 @@ void test_partial_no_selection_coll_md_read(void)
*/
VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_ONE_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded");
- read_buf = malloc(count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
+ read_buf = HDmalloc(count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
VRFY((read_buf != NULL), "malloc succeeded");
/*
@@ -171,21 +179,321 @@ void test_partial_no_selection_coll_md_read(void)
* Check data integrity just to be sure.
*/
if (!PARTIAL_NO_SELECTION_NO_SEL_PROCESS) {
- VRFY((!memcmp(data, read_buf, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int))), "memcmp succeeded");
+ VRFY((!HDmemcmp(data, read_buf, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int))), "memcmp succeeded");
+ }
+
+ if (dataset_dims) {
+ HDfree(dataset_dims);
+ dataset_dims = NULL;
}
+ if (data) {
+ HDfree(data);
+ data = NULL;
+ }
+
+ if (read_buf) {
+ HDfree(read_buf);
+ read_buf = NULL;
+ }
+
+ VRFY((H5Sclose(fspace_id) >= 0), "H5Sclose succeeded");
+ VRFY((H5Sclose(mspace_id) >= 0), "H5Sclose succeeded");
+ VRFY((H5Pclose(dcpl_id) >= 0), "H5Pclose succeeded");
+ VRFY((H5Pclose(dxpl_id) >= 0), "H5Pclose succeeded");
+ VRFY((H5Dclose(dset_id) >= 0), "H5Dclose succeeded");
+ VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
+ VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
+}
+
+/*
+ * A test for HDFFV-10562 which attempts to verify that using multi-chunk
+ * I/O with collective metadata reads enabled doesn't causes issues due to
+ * collective metadata reads being made only by process 0 in H5D__chunk_addrmap().
+ *
+ * Failure in this test may either cause a hang, or, due to how the MPI calls
+ * pertaining to this issue might mistakenly match up, may cause an MPI error
+ * message similar to:
+ *
+ * #008: H5Dmpio.c line 2546 in H5D__obtain_mpio_mode(): MPI_BCast failed
+ * major: Internal error (too specific to document in detail)
+ * minor: Some MPI function failed
+ * #009: H5Dmpio.c line 2546 in H5D__obtain_mpio_mode(): Message truncated, error stack:
+ *PMPI_Bcast(1600)..................: MPI_Bcast(buf=0x1df98e0, count=18, MPI_BYTE, root=0, comm=0x84000006) failed
+ *MPIR_Bcast_impl(1452).............:
+ *MPIR_Bcast(1476)..................:
+ *MPIR_Bcast_intra(1249)............:
+ *MPIR_SMP_Bcast(1088)..............:
+ *MPIR_Bcast_binomial(239)..........:
+ *MPIDI_CH3U_Receive_data_found(131): Message from rank 0 and tag 2 truncated; 2616 bytes received but buffer size is 18
+ * major: Internal error (too specific to document in detail)
+ * minor: MPI Error String
+ *
+ */
+void test_multi_chunk_io_addrmap_issue(void)
+{
+ const char *filename;
+ hsize_t start[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
+ hsize_t stride[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
+ hsize_t count[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
+ hsize_t block[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
+ hsize_t dims[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS] = {10, 5};
+ hsize_t chunk_dims[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS] = {5, 5};
+ hsize_t max_dims[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS] = {H5S_UNLIMITED, H5S_UNLIMITED};
+ hid_t file_id = H5I_INVALID_HID;
+ hid_t fapl_id = H5I_INVALID_HID;
+ hid_t dset_id = H5I_INVALID_HID;
+ hid_t dcpl_id = H5I_INVALID_HID;
+ hid_t dxpl_id = H5I_INVALID_HID;
+ hid_t space_id = H5I_INVALID_HID;
+ void *read_buf = NULL;
+ int mpi_rank;
+ int data[5][5] = { {0, 1, 2, 3, 4},
+ {0, 1, 2, 3, 4},
+ {0, 1, 2, 3, 4},
+ {0, 1, 2, 3, 4},
+ {0, 1, 2, 3, 4} };
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+
+ filename = GetTestParameters();
+
+ fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type);
+ VRFY((fapl_id >= 0), "create_faccess_plist succeeded");
+
+ /*
+ * Even though the testphdf5 framework currently sets collective metadata reads
+ * on the FAPL, we call it here just to be sure this is futureproof, since
+ * demonstrating this issue relies upon it.
+ */
+ VRFY((H5Pset_all_coll_metadata_ops(fapl_id, true) >= 0), "Set collective metadata reads succeeded");
+
+ file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+ VRFY((file_id >= 0), "H5Fcreate succeeded");
+
+ space_id = H5Screate_simple(MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS, dims, max_dims);
+ VRFY((space_id >= 0), "H5Screate_simple succeeded");
+
+ dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
+ VRFY((dcpl_id >= 0), "H5Pcreate succeeded");
+
+ VRFY((H5Pset_chunk(dcpl_id, MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS, chunk_dims) >= 0), "H5Pset_chunk succeeded");
+
+ dset_id = H5Dcreate2(file_id, "dset", H5T_NATIVE_INT, space_id, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
+ VRFY((dset_id >= 0), "H5Dcreate2 succeeded");
+
+ dxpl_id = H5Pcreate(H5P_DATASET_XFER);
+ VRFY((dxpl_id >= 0), "H5Pcreate succeeded");
+
+ VRFY((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE) >= 0), "H5Pset_dxpl_mpio succeeded");
+ VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_MULTI_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded");
+
+ start[1] = 0;
+ stride[0] = stride[1] = 1;
+ count[0] = count[1] = 5;
+ block[0] = block[1] = 1;
+
+ if (mpi_rank == 0)
+ start[0] = 0;
+ else
+ start[0] = 5;
+
+ VRFY((H5Sselect_hyperslab(space_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded");
+ if (mpi_rank != 0)
+ VRFY((H5Sselect_none(space_id) >= 0), "H5Sselect_none succeeded");
+
+ VRFY((H5Dwrite(dset_id, H5T_NATIVE_INT, H5S_ALL, space_id, dxpl_id, data) >= 0), "H5Dwrite succeeded");
+
+ VRFY((H5Fflush(file_id, H5F_SCOPE_GLOBAL) >= 0), "H5Fflush succeeded");
+
+ read_buf = HDmalloc(50 * sizeof(int));
+ VRFY((read_buf != NULL), "malloc succeeded");
+
+ VRFY((H5Dread(dset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "H5Dread succeeded");
+
+ if (read_buf) {
+ HDfree(read_buf);
+ read_buf = NULL;
+ }
+
+ VRFY((H5Sclose(space_id) >= 0), "H5Sclose succeeded");
+ VRFY((H5Pclose(dcpl_id) >= 0), "H5Pclose succeeded");
+ VRFY((H5Pclose(dxpl_id) >= 0), "H5Pclose succeeded");
+ VRFY((H5Dclose(dset_id) >= 0), "H5Dclose succeeded");
+ VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
+ VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
+}
+
+/*
+ * A test for HDFFV-10562 which attempts to verify that using linked-chunk
+ * I/O with collective metadata reads enabled doesn't cause issues due to
+ * collective metadata reads being made only by process 0 in H5D__sort_chunk().
+ *
+ * NOTE: Due to the way that the threshold value which pertains to this test
+ * is currently calculated within HDF5, there are several conditions that this
+ * test must maintain. Refer to the function H5D__sort_chunk in H5Dmpio.c for
+ * a better idea of why.
+ *
+ * Condition 1: We need to make sure that the test always selects every single
+ * chunk in the dataset. It is fine if the selection is split up among multiple
+ * ranks, but their combined selection must cover the whole dataset.
+ *
+ * Condition 2: The number of chunks in the dataset divided by the number of MPI
+ * ranks must exceed or equal 10000. In other words, each MPI rank must be
+ * responsible for 10000 or more unique chunks.
+ *
+ * Condition 3: This test will currently only be reliably reproducable for 2 or 3
+ * MPI ranks. The threshold value calculated reduces to a constant 100 / mpi_size,
+ * and is compared against a default value of 30%.
+ *
+ * Failure in this test may either cause a hang, or, due to how the MPI calls
+ * pertaining to this issue might mistakenly match up, may cause an MPI error
+ * message similar to:
+ *
+ * #008: H5Dmpio.c line 2338 in H5D__sort_chunk(): MPI_BCast failed
+ * major: Internal error (too specific to document in detail)
+ * minor: Some MPI function failed
+ * #009: H5Dmpio.c line 2338 in H5D__sort_chunk(): Other MPI error, error stack:
+ *PMPI_Bcast(1600)........: MPI_Bcast(buf=0x7eae610, count=320000, MPI_BYTE, root=0, comm=0x84000006) failed
+ *MPIR_Bcast_impl(1452)...:
+ *MPIR_Bcast(1476)........:
+ *MPIR_Bcast_intra(1249)..:
+ *MPIR_SMP_Bcast(1088)....:
+ *MPIR_Bcast_binomial(250): message sizes do not match across processes in the collective routine: Received 2096 but expected 320000
+ * major: Internal error (too specific to document in detail)
+ * minor: MPI Error String
+ */
+void test_link_chunk_io_sort_chunk_issue(void)
+{
+ const char *filename;
+ hsize_t *dataset_dims = NULL;
+ hsize_t max_dataset_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
+ hsize_t sel_dims[1];
+ hsize_t chunk_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS] = { LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS };
+ hsize_t start[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
+ hsize_t stride[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
+ hsize_t count[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
+ hsize_t block[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
+ hid_t file_id = H5I_INVALID_HID;
+ hid_t fapl_id = H5I_INVALID_HID;
+ hid_t dset_id = H5I_INVALID_HID;
+ hid_t dcpl_id = H5I_INVALID_HID;
+ hid_t dxpl_id = H5I_INVALID_HID;
+ hid_t fspace_id = H5I_INVALID_HID;
+ hid_t mspace_id = H5I_INVALID_HID;
+ int mpi_rank, mpi_size;
+ void *data = NULL;
+ void *read_buf = NULL;
+
+ MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
+
+ filename = GetTestParameters();
+
+ fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type);
+ VRFY((fapl_id >= 0), "create_faccess_plist succeeded");
+
+ /*
+ * Even though the testphdf5 framework currently sets collective metadata reads
+ * on the FAPL, we call it here just to be sure this is futureproof, since
+ * demonstrating this issue relies upon it.
+ */
+ VRFY((H5Pset_all_coll_metadata_ops(fapl_id, true) >= 0), "Set collective metadata reads succeeded");
+
+ file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+ VRFY((file_id >= 0), "H5Fcreate succeeded");
+
+ dataset_dims = HDmalloc(LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS * sizeof(*dataset_dims));
+ VRFY((dataset_dims != NULL), "malloc succeeded");
+
+ dataset_dims[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE * mpi_size * LINK_CHUNK_IO_SORT_CHUNK_ISSUE_Y_DIM_SCALE;
+ max_dataset_dims[0] = H5S_UNLIMITED;
+
+ fspace_id = H5Screate_simple(LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS, dataset_dims, max_dataset_dims);
+ VRFY((fspace_id >= 0), "H5Screate_simple succeeded");
+
+ /*
+ * Set up chunking on the dataset in order to reproduce the problem.
+ */
+ dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
+ VRFY((dcpl_id >= 0), "H5Pcreate succeeded");
+
+ VRFY((H5Pset_chunk(dcpl_id, LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS, chunk_dims) >= 0), "H5Pset_chunk succeeded");
+
+ dset_id = H5Dcreate2(file_id, LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME, H5T_NATIVE_INT, fspace_id, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
+ VRFY((dset_id >= 0), "H5Dcreate2 succeeded");
+
+ /*
+ * Setup hyperslab selection to split the dataset among the ranks.
+ *
+ * The ranks will write rows across the dataset.
+ */
+ stride[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE;
+ count[0] = (dataset_dims[0] / LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) / mpi_size;
+ start[0] = count[0] * mpi_rank;
+ block[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE;
+
+ VRFY((H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded");
+
+ sel_dims[0] = count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE);
+
+ mspace_id = H5Screate_simple(1, sel_dims, NULL);
+ VRFY((mspace_id >= 0), "H5Screate_simple succeeded");
+
+ data = HDcalloc(1, count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int));
+ VRFY((data != NULL), "calloc succeeded");
+
+ dxpl_id = H5Pcreate(H5P_DATASET_XFER);
+ VRFY((dxpl_id >= 0), "H5Pcreate succeeded");
+
+ /*
+ * Enable collective access for the data transfer.
+ */
+ VRFY((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE) >= 0), "H5Pset_dxpl_mpio succeeded");
+
+ VRFY((H5Dwrite(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, dxpl_id, data) >= 0), "H5Dwrite succeeded");
+
+ VRFY((H5Fflush(file_id, H5F_SCOPE_GLOBAL) >= 0), "H5Fflush succeeded");
+
+ /*
+ * Ensure that linked-chunk I/O is performed since this is
+ * the particular code path where the issue lies and we don't
+ * want the library doing multi-chunk I/O behind our backs.
+ */
+ VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_ONE_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded");
+
+ read_buf = HDmalloc(count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int));
+ VRFY((read_buf != NULL), "malloc succeeded");
+
+ VRFY((H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded");
+
+ sel_dims[0] = count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE);
+
+ VRFY((H5Sclose(mspace_id) >= 0), "H5Sclose succeeded");
+
+ mspace_id = H5Screate_simple(1, sel_dims, NULL);
+ VRFY((mspace_id >= 0), "H5Screate_simple succeeded");
+
+ read_buf = HDrealloc(read_buf, count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int));
+ VRFY((read_buf != NULL), "realloc succeeded");
+
+ /*
+ * Finally have each rank read their section of data back from the dataset.
+ */
+ VRFY((H5Dread(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, dxpl_id, read_buf) >= 0), "H5Dread succeeded");
+
if (dataset_dims) {
- free(dataset_dims);
+ HDfree(dataset_dims);
dataset_dims = NULL;
}
if (data) {
- free(data);
+ HDfree(data);
data = NULL;
}
if (read_buf) {
- free(read_buf);
+ HDfree(read_buf);
read_buf = NULL;
}
diff --git a/testpar/t_dset.c b/testpar/t_dset.c
index 35501d8..d4e556d 100644
--- a/testpar/t_dset.c
+++ b/testpar/t_dset.c
@@ -2659,8 +2659,10 @@ compress_readAll(void)
nerrors++;
}
+#if MPI_VERSION >= 3
ret = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read);
VRFY((ret >= 0), "H5Dwrite succeeded");
+#endif
ret = H5Pclose(xfer_plist);
VRFY((ret >= 0), "H5Pclose succeeded");
diff --git a/testpar/t_shapesame.c b/testpar/t_shapesame.c
index eddbada..f5282bd 100644
--- a/testpar/t_shapesame.c
+++ b/testpar/t_shapesame.c
@@ -948,9 +948,9 @@ contig_hs_dr_pio_test__d2m_l2s(struct hs_dr_pio_test_vars_t * tv_ptr)
/* verify that H5S_select_shape_same() reports the two
* selections as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->small_ds_slice_sid,
+ check = H5S__select_shape_same_test(tv_ptr->small_ds_slice_sid,
tv_ptr->file_large_ds_sid_0);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed");
/* Read selection from disk */
@@ -1216,9 +1216,9 @@ contig_hs_dr_pio_test__d2m_s2l(struct hs_dr_pio_test_vars_t * tv_ptr)
/* verify that H5S_select_shape_same() reports the two
* selections as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->file_small_ds_sid_0,
+ check = H5S__select_shape_same_test(tv_ptr->file_small_ds_sid_0,
tv_ptr->mem_large_ds_sid);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed");
/* Read selection from disk */
@@ -1531,9 +1531,9 @@ contig_hs_dr_pio_test__m2d_l2s(struct hs_dr_pio_test_vars_t * tv_ptr)
* memory slice through the cube selection and the
* on disk full square selections as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->file_small_ds_sid_0,
+ check = H5S__select_shape_same_test(tv_ptr->file_small_ds_sid_0,
tv_ptr->mem_large_ds_sid);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed.");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed.");
/* write the slice from the in memory large data set to the
@@ -1864,9 +1864,9 @@ contig_hs_dr_pio_test__m2d_s2l(struct hs_dr_pio_test_vars_t * tv_ptr)
* on disk slice through the large data set selection
* as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->mem_small_ds_sid,
+ check = H5S__select_shape_same_test(tv_ptr->mem_small_ds_sid,
tv_ptr->file_large_ds_sid_0);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed");
/* write the small data set slice from memory to the
@@ -3149,9 +3149,9 @@ ckrbrd_hs_dr_pio_test__d2m_l2s(struct hs_dr_pio_test_vars_t * tv_ptr)
/* verify that H5S_select_shape_same() reports the two
* selections as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->small_ds_slice_sid,
+ check = H5S__select_shape_same_test(tv_ptr->small_ds_slice_sid,
tv_ptr->file_large_ds_sid_0);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed");
/* Read selection from disk */
@@ -3415,9 +3415,9 @@ ckrbrd_hs_dr_pio_test__d2m_s2l(struct hs_dr_pio_test_vars_t * tv_ptr)
/* verify that H5S_select_shape_same() reports the two
* selections as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->file_small_ds_sid_0,
+ check = H5S__select_shape_same_test(tv_ptr->file_small_ds_sid_0,
tv_ptr->mem_large_ds_sid);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed");
/* Read selection from disk */
@@ -3800,9 +3800,9 @@ ckrbrd_hs_dr_pio_test__m2d_l2s(struct hs_dr_pio_test_vars_t * tv_ptr)
* large dataset and the checkerboard selection of the process
* slice of the small data set as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->file_small_ds_sid_1,
+ check = H5S__select_shape_same_test(tv_ptr->file_small_ds_sid_1,
tv_ptr->mem_large_ds_sid);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed.");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed.");
/* write the checker board selection of the slice from the in
@@ -4155,9 +4155,9 @@ ckrbrd_hs_dr_pio_test__m2d_s2l(struct hs_dr_pio_test_vars_t * tv_ptr)
* on disk slice through the large data set selection
* as having the same shape.
*/
- check = H5S_select_shape_same_test(tv_ptr->mem_small_ds_sid,
+ check = H5S__select_shape_same_test(tv_ptr->mem_small_ds_sid,
tv_ptr->file_large_ds_sid_1);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed");
/* write the small data set slice from memory to the
diff --git a/testpar/t_span_tree.c b/testpar/t_span_tree.c
index 02d2cca..cc79af2 100644
--- a/testpar/t_span_tree.c
+++ b/testpar/t_span_tree.c
@@ -2334,9 +2334,9 @@ lower_dim_size_comp_test__run_test(const int chunk_edge_size,
/* verify that H5S_select_shape_same() reports the two
* selections as having the same shape.
*/
- check = H5S_select_shape_same_test(mem_large_ds_sid,
+ check = H5S__select_shape_same_test(mem_large_ds_sid,
file_small_ds_sid);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed (1)");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed (1)");
ret = H5Dread(small_dataset,
@@ -2455,9 +2455,9 @@ lower_dim_size_comp_test__run_test(const int chunk_edge_size,
/* verify that H5S_select_shape_same() reports the two
* selections as having the same shape.
*/
- check = H5S_select_shape_same_test(mem_small_ds_sid,
+ check = H5S__select_shape_same_test(mem_small_ds_sid,
file_large_ds_sid);
- VRFY((check == TRUE), "H5S_select_shape_same_test passed (2)");
+ VRFY((check == TRUE), "H5S__select_shape_same_test passed (2)");
ret = H5Dread(large_dataset,
@@ -2622,31 +2622,27 @@ void
lower_dim_size_comp_test(void)
{
/* const char *fcnName = "lower_dim_size_comp_test()"; */
- int chunk_edge_size = 0;
- int use_collective_io = 1;
- hid_t dset_type = H5T_NATIVE_UINT;
+ int chunk_edge_size = 0;
+ int use_collective_io;
+
#if 0
HDsleep(60);
#endif
- HDcompile_assert(sizeof(uint32_t) == sizeof(unsigned));
- for ( use_collective_io = (hbool_t)0;
- (int)use_collective_io <= 1;
- (hbool_t)(use_collective_io++) ) {
+ HDcompile_assert(sizeof(uint32_t) == sizeof(unsigned));
+ for(use_collective_io = 0; use_collective_io <= 1; use_collective_io++) {
chunk_edge_size = 0;
lower_dim_size_comp_test__run_test(chunk_edge_size,
(hbool_t)use_collective_io,
- dset_type);
-
+ H5T_NATIVE_UINT);
chunk_edge_size = 5;
lower_dim_size_comp_test__run_test(chunk_edge_size,
(hbool_t)use_collective_io,
- dset_type);
- }
+ H5T_NATIVE_UINT);
+ } /* end for */
return;
-
} /* lower_dim_size_comp_test() */
diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c
index 69b66ae..999e17a 100644
--- a/testpar/testphdf5.c
+++ b/testpar/testphdf5.c
@@ -549,7 +549,10 @@ int main(int argc, char **argv)
AddTest("noselcollmdread", test_partial_no_selection_coll_md_read, NULL,
"Collective Metadata read with some ranks having no selection", PARATESTFILE);
-
+ AddTest("MC coll MD read", test_multi_chunk_io_addrmap_issue, NULL,
+ "Collective MD read with multi chunk I/O (H5D__chunk_addrmap)", PARATESTFILE);
+ AddTest("LC coll MD read", test_link_chunk_io_sort_chunk_issue, NULL,
+ "Collective MD read with link chunk I/O (H5D__sort_chunk)", PARATESTFILE);
/* Display testing information */
TestInfo(argv[0]);
diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h
index 176574e..4409221 100644
--- a/testpar/testphdf5.h
+++ b/testpar/testphdf5.h
@@ -295,6 +295,8 @@ void compress_readAll(void);
#endif /* H5_HAVE_FILTER_DEFLATE */
void test_dense_attr(void);
void test_partial_no_selection_coll_md_read(void);
+void test_multi_chunk_io_addrmap_issue(void);
+void test_link_chunk_io_sort_chunk_issue(void);
/* commonly used prototypes */
hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type);