summaryrefslogtreecommitdiffstats
path: root/testpar
diff options
context:
space:
mode:
authorMuQun Yang <ymuqun@hdfgroup.org>2006-03-09 14:52:09 (GMT)
committerMuQun Yang <ymuqun@hdfgroup.org>2006-03-09 14:52:09 (GMT)
commit0f0a5eb91807e3541aacba55905cf72ac19f0aad (patch)
tree56dd9b8a897a3959237480ab046809b830c5ad16 /testpar
parent3ba9425ab266840c7bc8812a68d29e21abffa4f5 (diff)
downloadhdf5-0f0a5eb91807e3541aacba55905cf72ac19f0aad.zip
hdf5-0f0a5eb91807e3541aacba55905cf72ac19f0aad.tar.gz
hdf5-0f0a5eb91807e3541aacba55905cf72ac19f0aad.tar.bz2
[svn-r12044] Purpose:
Add new tests Description: Collective IO doesn't work for some platforms/mpio packages when more than one process has no contributions to IO. Solution: 1. Add a collective IO test to verify the correctness of the library when more than one process has no contributions to IO. 2. Add the similar MPI-IO test in t_mpi to help us maintain in more platforms. Platforms tested: heping, mir, copper Misc. update:
Diffstat (limited to 'testpar')
-rw-r--r--testpar/t_coll_chunk.c2
-rw-r--r--testpar/t_mpi.c260
2 files changed, 236 insertions, 26 deletions
diff --git a/testpar/t_coll_chunk.c b/testpar/t_coll_chunk.c
index f403b53..fbc3505 100644
--- a/testpar/t_coll_chunk.c
+++ b/testpar/t_coll_chunk.c
@@ -470,7 +470,7 @@ ccslab_set(int mpi_rank,
block[1] = 1;
stride[0] = 1;
stride[1] = 1;
- count[0] = ((mpi_rank == (mpi_size-1))?0:SPACE_DIM1/mpi_size);
+ count[0] = ((mpi_rank >= MAX(1,(mpi_size-2)))?0:SPACE_DIM1/mpi_size);
count[1] = SPACE_DIM2;
start[0] = mpi_rank*count[0];
start[1] = 0;
diff --git a/testpar/t_mpi.c b/testpar/t_mpi.c
index f2889d5..1ec47f5 100644
--- a/testpar/t_mpi.c
+++ b/testpar/t_mpi.c
@@ -830,6 +830,177 @@ static int test_mpio_derived_dtype(char *filename) {
return retcode;
}
/*
+
+Function: test_mpio_special_collective
+
+Test Whether collective IO is still working when more than one process
+has no contribution to IO. To properly test this case, at least FOUR
+processes are needed.
+
+1. Details for the test:
+1) Create one derived datatype with MPI_Type_hindexed:
+
+2) Choosing at least two processes to contribute none for IO with
+ the buf size inside MPI_Write_at_all to 0.
+3) Choosing at least two processes to have real contributions for IO.
+4) Do collective IO.
+
+2. This test will fail with the MPI-IO package that doesn't support this. For example,
+mpich 1.2.6.
+
+If this bug has been fixed in the previous not-working package, this test will issue a printf message to tell the developer to change
+the configuration specific file of HDF5 so that we can change our configurationsetting to support special collective IO; currently only special collective IO.
+
+If it turns out that the previous working MPI-IO package no longer works, this test will also issue a message to inform the corresponding failure so that
+we can turn off the support for special collective IO; currently only special collective IO.
+*/
+
+static int test_mpio_special_collective(char *filename) {
+
+ char hostname[128];
+ int mpi_size, mpi_rank;
+ MPI_File fh;
+ MPI_Datatype etype,buftype,filetype;
+ char mpi_err_str[MPI_MAX_ERROR_STRING];
+ int mpi_err_strlen;
+ int mpi_err;
+ char writedata[2];
+ char *buf;
+ char expect_val;
+ int i, irank;
+ int count,bufcount;
+ int blocklens[2];
+ MPI_Aint offsets[2];
+ int nerrors = 0; /* number of errors */
+ MPI_Offset mpi_off;
+ MPI_Status mpi_stat;
+ int retcode;
+
+ MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
+ MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+ retcode = 0;
+
+ /* create MPI data type */
+ etype = MPI_BYTE;
+ if(mpi_rank == 0 || mpi_rank == 1) {
+ count = DIMSIZE;
+ bufcount = 1;
+ }
+ else {
+ count = 0;
+ bufcount = 0;
+ }
+
+ blocklens[0] = count;
+ offsets[0] = mpi_rank*count;
+ blocklens[1] = count;
+ offsets[1] = (mpi_size+mpi_rank)*count;
+
+ if(count !=0) {
+ if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&filetype))
+ != MPI_SUCCESS){
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
+ return 1;
+ }
+
+ if((mpi_err=MPI_Type_commit(&filetype))!=MPI_SUCCESS){
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
+ return 1;
+ }
+
+
+ if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&buftype))
+ != MPI_SUCCESS){
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
+ return 1;
+ }
+
+ if((mpi_err=MPI_Type_commit(&buftype))!=MPI_SUCCESS){
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
+ return 1;
+ }
+ }
+ else {
+
+ filetype = MPI_BYTE;
+ buftype = MPI_BYTE;
+ }
+
+ /* Open a file */
+ if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename,
+ MPI_MODE_RDWR | MPI_MODE_CREATE ,
+ MPI_INFO_NULL, &fh))
+ != MPI_SUCCESS){
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_File_open failed (%s)\n", mpi_err_str);
+ return 1;
+ }
+
+ /* each process writes some data */
+ for (i=0; i < 2*DIMSIZE; i++)
+ writedata[i] = mpi_rank*DIMSIZE + i;
+
+
+ mpi_off = 0;
+ if((mpi_err = MPI_File_set_view(fh, mpi_off, MPI_BYTE, filetype, "native", MPI_INFO_NULL))
+ != MPI_SUCCESS) {
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_File_set_view failed (%s)\n", mpi_err_str);
+ return 1;
+ }
+
+ buf = writedata;
+ if ((mpi_err = MPI_File_write_at_all(fh, mpi_off, buf, bufcount, buftype,
+ &mpi_stat))
+ != MPI_SUCCESS){
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n",
+ (long) mpi_off, bufcount, mpi_err_str);
+ return 1;
+ };
+
+ if ((mpi_err = MPI_File_close(&fh))
+ != MPI_SUCCESS){
+ MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
+ printf("MPI_File_close failed. \n");
+ return 1;
+ };
+
+ mpi_err = MPI_Barrier(MPI_COMM_WORLD);
+#ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS
+ if(retcode != 0) {
+ if(mpi_rank == 0) {
+ printf("special collective IO is NOT working at this platform\n");
+ printf("Go back to hdf5/config and find the corresponding\n");
+ printf("configure-specific file (for example, powerpc-ibm-aix5.x) and add\n");
+ printf("hdf5_mpi_special_collective_io_works=${hdf5_mpi_special_collective_io_works='no'}\n");
+ printf(" at the end of the file.\n");
+ printf(" Please report to hdfhelp@ncsa.uiuc.edu about this problem.\n");
+ }
+ retcode = 1;
+ }
+#else
+ if(retcode == 0) {
+ if(mpi_rank == 0) {
+ printf(" This is NOT an error, What it really says is\n");
+ printf("special collective IO is WORKING at this platform\n");
+ printf(" Go back to hdf5/config and find the corresponding \n");
+ printf(" configure-specific file (for example, powerpc-ibm-aix5.x) and delete the line\n");
+ printf("hdf5_mpi_special_collective_io_works=${hdf5_mpi_special_collective_io_works='no'}\n");
+ printf(" at the end of the file.\n");
+ printf("Please report to hdfhelp@ncsa.uiuc.edu about this problem.\n");
+ }
+ retcode = 1;
+ }
+#endif
+ return retcode;
+}
+
+/*
* parse the command line options
*/
static int
@@ -947,31 +1118,6 @@ main(int argc, char **argv)
/* set alarm. */
ALARM_ON;
- /*=======================================
- * MPIO complicated derived datatype test
- *=======================================*/
- /* test_mpio_derived_dtype often hangs when fails.
- * Do not run it if it is known NOT working unless ask to
- * run explicitly by high verbose mode.
- */
-#ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- MPI_BANNER("MPIO complicated derived datatype test...");
- ret_code = test_mpio_derived_dtype(filenames[0]);
-#else
- if (VERBOSE_HI){
- MPI_BANNER("MPIO complicated derived datatype test...");
- ret_code = test_mpio_derived_dtype(filenames[0]);
- }else{
- MPI_BANNER("MPIO complicated derived datatype test SKIPPED.");
- ret_code = 0; /* fake ret_code */
- }
-#endif
- ret_code = errors_sum(ret_code);
- if (mpi_rank==0 && ret_code > 0){
- printf("***FAILED with %d total errors\n", ret_code);
- nerrors += ret_code;
- }
-
/*=======================================
* MPIO 1 write Many read test
@@ -1028,6 +1174,70 @@ main(int argc, char **argv)
nerrors += ret_code;
}
+ /*=======================================
+ * MPIO complicated derived datatype test
+ *=======================================*/
+ /* test_mpio_derived_dtype often hangs when fails.
+ * Do not run it if it is known NOT working unless ask to
+ * run explicitly by high verbose mode.
+ */
+#ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ MPI_BANNER("MPIO complicated derived datatype test...");
+ ret_code = test_mpio_derived_dtype(filenames[0]);
+#else
+ if (VERBOSE_HI){
+ MPI_BANNER("MPIO complicated derived datatype test...");
+ ret_code = test_mpio_derived_dtype(filenames[0]);
+ }else{
+ MPI_BANNER("MPIO complicated derived datatype test SKIPPED.");
+ ret_code = 0; /* fake ret_code */
+ }
+#endif
+ ret_code = errors_sum(ret_code);
+ if (mpi_rank==0 && ret_code > 0){
+ printf("***FAILED with %d total errors\n", ret_code);
+ nerrors += ret_code;
+ }
+
+ /*=======================================
+ * MPIO special collective IO test
+ *=======================================*/
+ /* test_special_collective_io often hangs when fails.
+ * Do not run it if it is known NOT working unless ask to
+ * run explicitly by high verbose mode.
+ */
+ if(mpi_size !=4){
+ MPI_BANNER("MPIO special collective io test SKIPPED.");
+ if(mpi_rank == 0){
+ printf("Use FOUR processes to run this test\n");
+ printf("If you still see the <test SKIPPED>, use <-vh> option to verify the test\n");
+ }
+ ret_code = 0;
+ goto sc_finish;
+ }
+
+#ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS
+ MPI_BANNER("MPIO special collective io test...");
+ ret_code = test_mpio_special_collective(filenames[0]);
+
+#else
+ if (VERBOSE_HI){
+ MPI_BANNER("MPIO special collective io test...");
+ ret_code = test_mpio_special_collective(filenames[0]);
+ }else{
+ MPI_BANNER("MPIO special collective io test SKIPPED.");
+ ret_code = 0; /* fake ret_code */
+ }
+#endif
+
+sc_finish:
+ ret_code = errors_sum(ret_code);
+ if (mpi_rank==0 && ret_code > 0){
+ printf("***FAILED with %d total errors\n", ret_code);
+ nerrors += ret_code;
+ }
+
+
finish:
/* make sure all processes are finished before final report, cleanup
* and exit.