From 94c1e60ea84dfee632ee0d355bada11885ecbed5 Mon Sep 17 00:00:00 2001 From: Albert Cheng Date: Fri, 15 Apr 2011 18:35:56 -0500 Subject: [svn-r20532] Blue-Gene port: dawndev mpirun issued diagnose messages to stderr if program exit non-zero. The messages messed up the matching with expected output. Added a filter to screen out the messages. Tested: LLNL dawndev --- tools/h5ls/testh5ls.sh.in | 93 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/tools/h5ls/testh5ls.sh.in b/tools/h5ls/testh5ls.sh.in index 3083028..d793c90 100644 --- a/tools/h5ls/testh5ls.sh.in +++ b/tools/h5ls/testh5ls.sh.in @@ -52,6 +52,90 @@ TESTING() { echo "Testing $* $SPACES" |cut -c1-70 |tr -d '\012' } +# Some systems will dump some messages to stdout for various reasons. +# Remove them from the stdout result file. +# $1 is the file name of the file to be filtered. +# Cases of filter needed: +# 1. Sandia Red-Storm +# yod always prints these two lines at the beginning. +# LibLustre: NAL NID: 0004a605 (5) +# Lustre: OBD class driver Build Version: 1, info@clusterfs.com +# 2. LANL Lambda +# mpijob mirun -np always add an extra line at the end like: +# P4 procgroup file is /users/acheng/.lsbatch/host10524.l82 +STDOUT_FILTER() { + result_file=$1 + tmp_file=/tmp/h5test_tmp_$$ + # Filter Sandia Red-Storm yod messages. + cp $result_file $tmp_file + sed -e '/^LibLustre:/d' -e '/^Lustre:/d' \ + < $tmp_file > $result_file + # Filter LANL Lambda mpirun message. + cp $result_file $tmp_file + sed -e '/^P4 procgroup file is/d' \ + < $tmp_file > $result_file + # cleanup + rm -f $tmp_file +} + +# Some systems will dump some messages to stderr for various reasons. +# Remove them from the stderr result file. +# $1 is the file name of the file to be filtered. +# Cases of filter needed: +# 1. MPE: +# In parallel mode and if MPE library is used, it prints the following +# two message lines whether the MPE tracing is used or not. +# Writing logfile. +# Finished writing logfile. +# 2. LANL MPI: +# The LANL MPI will print some messages like the following, +# LA-MPI: *** mpirun (1.5.10) +# LA-MPI: *** 3 process(es) on 2 host(s): 2*fln21 1*fln22 +# LA-MPI: *** libmpi (1.5.10) +# LA-MPI: *** Copyright 2001-2004, ACL, Los Alamos National Laboratory +# 3. h5diff debug output: +# Debug output all have prefix "h5diff debug: ". +# 4. AIX system prints messages like these when it is aborting: +# ERROR: 0031-300 Forcing all remote tasks to exit due to exit code 1 in task 0 +# ERROR: 0031-250 task 4: Terminated +# ERROR: 0031-250 task 3: Terminated +# ERROR: 0031-250 task 2: Terminated +# ERROR: 0031-250 task 1: Terminated +# 5. LLNL Blue-Gene mpirun prints messages like there when it exit non-zero: +# BE_MPI (ERROR): The error message in the job record is as follows: +# BE_MPI (ERROR): "killed by exit(1) on node 0" + + +STDERR_FILTER() { + result_file=$1 + tmp_file=/tmp/h5test_tmp_$$ + # Filter LLNL Blue-Gene error messages in both serial and parallel modes + # since mpirun is used in both modes. + cp $result_file $tmp_file + sed -e '/ BE_MPI (ERROR): /d' \ + < $tmp_file > $result_file + # Filter MPE messages + if test -n "$pmode"; then + cp $result_file $tmp_file + sed -e '/^Writing logfile./d' -e '/^Finished writing logfile./d' \ + < $tmp_file > $result_file + fi + # Filter LANL MPI messages + # and LLNL srun messages + # and AIX error messages + if test -n "$pmode"; then + cp $result_file $tmp_file + sed -e '/^LA-MPI:/d' -e '/^srun:/d' -e '/^ERROR:/d' \ + < $tmp_file > $result_file + fi + # Filter h5diff debug output + cp $result_file $tmp_file + sed -e '/^h5diff debug: /d' \ + < $tmp_file > $result_file + # clean up temporary files. + rm -f $tmp_file +} + # Run a test and print PASS or *FAIL*. For now, if h5ls can complete # with exit status 0, consider it pass. If a test fails then increment # the `nerrors' global variable and (if $verbose is set) display up to $NLINS @@ -64,6 +148,8 @@ TOOLTEST() { expect="$srcdir/../testfiles/$1" actual="../testfiles/`basename $1 .ls`.out" actual_err="../testfiles/`basename $1 .ls`.err" + actual_sav=${actual}-sav + actual_err_sav=${actual_err}-sav shift retvalexpect=$1 shift @@ -81,6 +167,11 @@ TOOLTEST() { ) >$actual 2>$actual_err exitcode=$? + # save actual and actual_err in case they are needed later. + cp $actual $actual_sav + STDOUT_FILTER $actual + cp $actual_err $actual_err_sav + STDERR_FILTER $actual_err cat $actual_err >> $actual if [ $h5haveexitcode = 'yes' -a $exitcode -ne $retvalexpect ]; then echo "*FAILED*" @@ -107,7 +198,7 @@ TOOLTEST() { # Clean up output file if test -z "$HDF5_NOCLEANUP"; then - rm -f $actual $actual_err + rm -f $actual $actual_err $actual_sav $actual_err_sav fi } -- cgit v0.12