summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlbert Cheng <acheng@hdfgroup.org>2011-01-16 09:44:22 (GMT)
committerAlbert Cheng <acheng@hdfgroup.org>2011-01-16 09:44:22 (GMT)
commite6622c86a87c58e9158b512d6d6e873d8f85ea5f (patch)
treee110aa563f7e16d1f5fc57182bf488ce62e51727
parent1ed956f2cbabeea56d286089cd06f0697050ab1a (diff)
downloadhdf5-e6622c86a87c58e9158b512d6d6e873d8f85ea5f.zip
hdf5-e6622c86a87c58e9158b512d6d6e873d8f85ea5f.tar.gz
hdf5-e6622c86a87c58e9158b512d6d6e873d8f85ea5f.tar.bz2
[svn-r19959] Bug: the VRFY macro, for some reason, called MPI_Finalize() when it encountered
error and wanted to exit the test program. This was not good since if only a subset of processes called MPI_Finalize(), the other processes will likely hang. That happened in AIX that it would waited till the alarm signal to kill the processes. Definitely a waste of time. Solution: Changed it to call MPI_Abort. That showed another problem. HDF5 has setup atexit post-process to try to close unclose objects, release resources, etc. But if the MPI processes have encountered an error and has been aborted, it is not likely any more MPI calls can function properly. E.g., it would attempt to free some communicators in the HDF5 MPIO file handle. It would again hang. Solution: need to call H5dont_atexit() to disable any atexit post-processing. This must be done early, like before calling H5open. This is added to each parallel test main program. testphdf5.h: Changed macros VRFY and MESG. Added comments too. testphdf5.c: t_mpi.c: t_cache.c: t_shapesame.c: Added H5dont_atexit. Tested: h5committest.
-rw-r--r--testpar/t_cache.c8
-rw-r--r--testpar/t_mpi.c8
-rw-r--r--testpar/t_shapesame.c9
-rw-r--r--testpar/testpar.h37
-rw-r--r--testpar/testphdf5.c9
5 files changed, 57 insertions, 14 deletions
diff --git a/testpar/t_cache.c b/testpar/t_cache.c
index 066e763..3c626d3 100644
--- a/testpar/t_cache.c
+++ b/testpar/t_cache.c
@@ -7236,6 +7236,14 @@ main(int argc, char **argv)
world_server_mpi_rank = mpi_size - 1;
world_mpi_comm = MPI_COMM_WORLD;
+ /* Attempt to turn off atexit post processing so that in case errors
+ * happen during the test and the process is aborted, it will not get
+ * hang in the atexit post processing in which it may try to make MPI
+ * calls. By then, MPI calls may not work.
+ */
+ if (H5dont_atexit() < 0){
+ printf("Failed to turn off atexit processing. Continue.\n", mpi_rank);
+ };
H5open();
express_test = do_express_test();
diff --git a/testpar/t_mpi.c b/testpar/t_mpi.c
index 9033e70..c3e04de 100644
--- a/testpar/t_mpi.c
+++ b/testpar/t_mpi.c
@@ -1137,6 +1137,14 @@ main(int argc, char **argv)
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+ /* Attempt to turn off atexit post processing so that in case errors
+ * happen during the test and the process is aborted, it will not get
+ * hang in the atexit post processing in which it may try to make MPI
+ * calls. By then, MPI calls may not work.
+ */
+ if (H5dont_atexit() < 0){
+ printf("Failed to turn off atexit processing. Continue.\n", mpi_rank);
+ };
H5open();
if (parse_options(argc, argv) != 0){
if (MAINPROCESS)
diff --git a/testpar/t_shapesame.c b/testpar/t_shapesame.c
index 00b6ee5..8f89f9f 100644
--- a/testpar/t_shapesame.c
+++ b/testpar/t_shapesame.c
@@ -4753,6 +4753,15 @@ int main(int argc, char **argv)
printf("PHDF5 TESTS START\n");
printf("===================================\n");
}
+
+ /* Attempt to turn off atexit post processing so that in case errors
+ * happen during the test and the process is aborted, it will not get
+ * hang in the atexit post processing in which it may try to make MPI
+ * calls. By then, MPI calls may not work.
+ */
+ if (H5dont_atexit() < 0){
+ printf("Failed to turn off atexit processing. Continue.\n", mpi_rank);
+ };
H5open();
h5_show_hostname();
diff --git a/testpar/testpar.h b/testpar/testpar.h
index 02fc915..ce11204 100644
--- a/testpar/testpar.h
+++ b/testpar/testpar.h
@@ -26,25 +26,36 @@
/* Define some handy debugging shorthands, routines, ... */
/* debugging tools */
-#define MESG(x) \
- if (VERBOSE_MED) printf("%s\n", x); \
+/* Print message mesg if verbose level is at least medium and
+ * mesg is not an empty string.
+ */
+#define MESG(mesg) \
+ if (VERBOSE_MED && *mesg != '\0') \
+ printf("%s\n", mesg)
+/*
+ * VRFY: Verify if the condition val is true.
+ * If it is true, then call MESG to print mesg, depending on the verbose
+ * level.
+ * If val is not true, it prints error messages and if the verbose
+ * level is lower than medium, it calls MPI_Abort to abort the program.
+ * If verbose level is at least medium, it will not abort.
+ * This will allow program to continue and can be used for debugging.
+ * (The "do {...} while(0)" is to group all the statements as one unit.)
+ */
#define VRFY(val, mesg) do { \
if (val) { \
- if (*mesg != '\0') { \
- MESG(mesg); \
- } \
+ MESG(mesg); \
} else { \
printf("Proc %d: ", mpi_rank); \
- printf("*** PHDF5 ERROR ***\n"); \
- printf(" Assertion (%s) failed at line %4d in %s\n", \
+ printf("*** Parallel ERROR ***\n"); \
+ printf(" VRFY (%s) failed at line %4d in %s\n", \
mesg, (int)__LINE__, __FILE__); \
++nerrors; \
fflush(stdout); \
- if (!VERBOSE_MED) { \
- printf("aborting MPI process\n"); \
- MPI_Finalize(); \
- exit(nerrors); \
+ if (!VERBOSE_MED) { \
+ printf("aborting MPI processes\n"); \
+ MPI_Abort(MPI_COMM_WORLD, 1); \
} \
} \
} while(0)
@@ -56,9 +67,7 @@
*/
#define INFO(val, mesg) do { \
if (val) { \
- if (*mesg != '\0') { \
- MESG(mesg); \
- } \
+ MESG(mesg); \
} else { \
printf("Proc %d: ", mpi_rank); \
printf("*** PHDF5 REMARK (not an error) ***\n"); \
diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c
index 26358a5..8b24f87 100644
--- a/testpar/testphdf5.c
+++ b/testpar/testphdf5.c
@@ -335,6 +335,15 @@ int main(int argc, char **argv)
printf("PHDF5 TESTS START\n");
printf("===================================\n");
}
+
+ /* Attempt to turn off atexit post processing so that in case errors
+ * happen during the test and the process is aborted, it will not get
+ * hang in the atexit post processing in which it may try to make MPI
+ * calls. By then, MPI calls may not work.
+ */
+ if (H5dont_atexit() < 0){
+ printf("Failed to turn off atexit processing. Continue.\n", mpi_rank);
+ };
H5open();
h5_show_hostname();