summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Mainzer <mainzer@hdfgroup.org>2010-11-18 20:56:25 (GMT)
committerJohn Mainzer <mainzer@hdfgroup.org>2010-11-18 20:56:25 (GMT)
commit8ed20b39d6684b59f4c8a7adb46bf2dfb876c9e1 (patch)
treef7ab14717fc0c5145fbe8ce0f7434412e0130015
parent0bb0aa86e71049ae91234a91822968222b53c6cf (diff)
downloadhdf5-8ed20b39d6684b59f4c8a7adb46bf2dfb876c9e1.zip
hdf5-8ed20b39d6684b59f4c8a7adb46bf2dfb876c9e1.tar.gz
hdf5-8ed20b39d6684b59f4c8a7adb46bf2dfb876c9e1.tar.bz2
[svn-r19825]
Checked in fix for failure in shape same tests that appeared after Quincy's recent massage of the test code. The problem was a race condition created when Quincey re-worked the code selecting either collective or independant I/O. Previously, when independant I/O was selected in the test, I had used H5Pset_dxpl_mpio() and H5Pset_dxpl_mpio_collective_opt() to select collective semantics with independant I/O going on under the hood. Quincey modified this to call H5Pset_dxpl_mpio() when collective I/O was selected, and do nothing in the independant I/O case. As a result, processes were able to race ahead and modify the initial values of the data set before some processes had verified that the initialization was correct. Solved the problem by adding barriers, and making all barriers dependant on independant I/O being selected. Tested parallel on amani and phoenix. h5committested. Note that parallel on amani and h5committest on heiwa failed several times before I got a clean pass without code changes. The failures on amani seemed to be time outs caused by contention for the machine -- worryingly, they occurred in the shape same tests. However, given subsequent passes and passes on jam and phoenix, I am going ahead with the commit. The failure on heiwa was in the fheap test. I don't see how this can be related to changes in testpar, and in any case, it went away on the second try.
-rw-r--r--testpar/t_rank_projection.c48
1 files changed, 38 insertions, 10 deletions
diff --git a/testpar/t_rank_projection.c b/testpar/t_rank_projection.c
index c0b0d19..7159ee0 100644
--- a/testpar/t_rank_projection.c
+++ b/testpar/t_rank_projection.c
@@ -505,9 +505,11 @@ contig_hyperslab_dr_pio_test__run_test(const int test_num,
/* sync with the other processes before checking data */
- mrc = MPI_Barrier(MPI_COMM_WORLD);
- VRFY((mrc==MPI_SUCCESS), "Sync after small dataset writes");
+ if ( ! use_collective_io ) {
+ mrc = MPI_Barrier(MPI_COMM_WORLD);
+ VRFY((mrc==MPI_SUCCESS), "Sync after small dataset writes");
+ }
/* read the small data set back to verify that it contains the
* expected data. Note that each process reads in the entire
@@ -611,8 +613,11 @@ contig_hyperslab_dr_pio_test__run_test(const int test_num,
/* sync with the other processes before checking data */
- mrc = MPI_Barrier(MPI_COMM_WORLD);
- VRFY((mrc==MPI_SUCCESS), "Sync after large dataset writes");
+ if ( ! use_collective_io ) {
+
+ mrc = MPI_Barrier(MPI_COMM_WORLD);
+ VRFY((mrc==MPI_SUCCESS), "Sync after large dataset writes");
+ }
/* read the small data set back to verify that it contains the
@@ -628,7 +633,7 @@ contig_hyperslab_dr_pio_test__run_test(const int test_num,
VRFY((ret >= 0), "H5Dread() large_dataset initial read succeeded");
- /* verify that the correct data was written to the small data set */
+ /* verify that the correct data was written to the large data set */
expected_value = 0;
mis_match = FALSE;
ptr_1 = large_ds_buf_1;
@@ -646,6 +651,15 @@ contig_hyperslab_dr_pio_test__run_test(const int test_num,
VRFY( (mis_match == FALSE), "large ds init data good.");
+ /* sync with the other processes before changing data */
+
+ if ( ! use_collective_io ) {
+
+ mrc = MPI_Barrier(MPI_COMM_WORLD);
+ VRFY((mrc==MPI_SUCCESS), "Sync initial values check");
+ }
+
+
/* first, verify that we can read from disk correctly using selections
* of different rank that H5S_select_shape_same() views as being of the
* same shape.
@@ -2800,7 +2814,6 @@ checker_board_hyperslab_dr_pio_test__run_test(const int test_num,
VRFY((ret != FAIL), "H5Dcreate2() large_dataset succeeded");
-
/* setup xfer property list */
xfer_plist = H5Pcreate(H5P_DATASET_XFER);
VRFY((xfer_plist >= 0), "H5Pcreate(H5P_DATASET_XFER) succeeded");
@@ -2810,6 +2823,7 @@ checker_board_hyperslab_dr_pio_test__run_test(const int test_num,
VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
}
+
/* setup selection to write initial data to the small and large data sets */
start[0] = mpi_rank;
stride[0] = 2 * (mpi_size + 1);
@@ -2870,9 +2884,11 @@ checker_board_hyperslab_dr_pio_test__run_test(const int test_num,
/* sync with the other processes before checking data */
- mrc = MPI_Barrier(MPI_COMM_WORLD);
- VRFY((mrc==MPI_SUCCESS), "Sync after small dataset writes");
+ if ( ! use_collective_io ) {
+ mrc = MPI_Barrier(MPI_COMM_WORLD);
+ VRFY((mrc==MPI_SUCCESS), "Sync after small dataset writes");
+ }
/* read the small data set back to verify that it contains the
* expected data. Note that each process reads in the entire
@@ -2976,8 +2992,11 @@ checker_board_hyperslab_dr_pio_test__run_test(const int test_num,
/* sync with the other processes before checking data */
- mrc = MPI_Barrier(MPI_COMM_WORLD);
- VRFY((mrc==MPI_SUCCESS), "Sync after large dataset writes");
+ if ( ! use_collective_io ) {
+
+ mrc = MPI_Barrier(MPI_COMM_WORLD);
+ VRFY((mrc==MPI_SUCCESS), "Sync after large dataset writes");
+ }
/* read the small data set back to verify that it contains the
@@ -3009,6 +3028,15 @@ checker_board_hyperslab_dr_pio_test__run_test(const int test_num,
expected_value++;
}
VRFY( (mis_match == FALSE), "large ds init data good.");
+
+ /* sync with the other processes before changing data */
+
+ if ( ! use_collective_io ) {
+
+ mrc = MPI_Barrier(MPI_COMM_WORLD);
+ VRFY((mrc==MPI_SUCCESS), "Sync after initial values check");
+ }
+
/***********************************/
/***** INITIALIZATION COMPLETE *****/