From fe1a5b4279bbbf705c6d3b3886ffe2a963bd07ec Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Thu, 20 Dec 2018 03:03:16 -0800 Subject: Added a shell script so we can run the parallel flush test on OpenMPI. --- MANIFEST | 1 + config/conclude.am | 2 +- configure.ac | 8 +++++++- release_docs/INSTALL_parallel | 17 ++++++++++++---- test/h5test.c | 2 +- testpar/Makefile.am | 12 +++++++++-- testpar/testpflush.sh.in | 47 +++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 testpar/testpflush.sh.in diff --git a/MANIFEST b/MANIFEST index d840e73..c89ad44 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1282,6 +1282,7 @@ ./testpar/t_span_tree.c ./testpar/t_init_term.c ./testpar/testpar.h +./testpar/testpflush.sh.in ./testpar/testphdf5.c ./testpar/testphdf5.h diff --git a/config/conclude.am b/config/conclude.am index d0dbf3a..9da1da5 100644 --- a/config/conclude.am +++ b/config/conclude.am @@ -249,7 +249,7 @@ build-check-p: $(LIB) $(PROGS) $(chk_TESTS) echo "**** Hint ****"; \ echo "Parallel test files reside in the current directory" \ "by default."; \ - echo "Set HDF5_PARAPREFIX to use another directory. E.g.,"; \ + echo "Set HDF5_PARAPREFIX to use another directory. e.g.,"; \ echo " HDF5_PARAPREFIX=/PFS/user/me"; \ echo " export HDF5_PARAPREFIX"; \ echo " make check"; \ diff --git a/configure.ac b/configure.ac index 0a1370f..3416705 100644 --- a/configure.ac +++ b/configure.ac @@ -841,9 +841,13 @@ fi ## command). The value of this variable is ## substituted in *.in files. ## +## RUNPARALLELSCRIPT -- Identical to RUNPARALLEL but without +## the special makefile protection for environment +## variables. AC_SUBST([PARALLEL]) AC_SUBST([RUNSERIAL]) AC_SUBST([RUNPARALLEL]) +AC_SUBST([RUNPARALLELSCRIPT]) AC_SUBST([TESTPARALLEL]) ## ---------------------------------------------------------------------- @@ -2511,9 +2515,10 @@ case "X-$enable_parallel" in PAC_PROG_FC_MPI_CHECK fi - ## Set RUNPARALLEL to mpiexec if not set yet. + ## Set RUNPARALLEL and RUNPARALLELSCRIPT to mpiexec if not set yet. if test "X$PARALLEL" = "Xyes" -a -z "$RUNPARALLEL"; then RUNPARALLEL="mpiexec -n \$\${NPROCS:=6}" + RUNPARALLELSCRIPT="mpiexec -n ${NPROCS:=6}" fi ;; @@ -3444,6 +3449,7 @@ AC_CONFIG_FILES([src/libhdf5.settings test/test_filter_plugin.sh test/test_usecases.sh testpar/Makefile + testpar/testpflush.sh tools/Makefile tools/lib/Makefile tools/src/Makefile diff --git a/release_docs/INSTALL_parallel b/release_docs/INSTALL_parallel index f32fffc..1bf1dd4 100644 --- a/release_docs/INSTALL_parallel +++ b/release_docs/INSTALL_parallel @@ -100,9 +100,9 @@ qsub -I -q debug -l mppwidth=8 mkdir build-hdf5; cd build-hdf5/ - configure HDF5: - RUNSERIAL="aprun -q -n 1" RUNPARALLEL="aprun -q -n 6" FC=ftn CC=cc /path/to/source/configure --enable-fortran --enable-parallel --disable-shared + RUNSERIAL="aprun -q -n 1" RUNPARALLEL="aprun -q -n 6" RUNPARALLELSCRIPT="aprun -q -n 6" FC=ftn CC=cc /path/to/source/configure --enable-fortran --enable-parallel --disable-shared - RUNSERIAL and RUNPARALLEL tells the library how it should launch programs that are part of the build procedure. + RUNSERIAL, RUNPARALLEL, and RUNPARALLELSCRIPT tell the library how it should launch programs that are part of the build procedure. - Compile HDF5: gmake @@ -155,12 +155,21 @@ to run a parallel application on one processor and on many processors. If the compiler is `mpicc' and the user hasn't specified values for RUNSERIAL and RUNPARALLEL then configure chooses `mpiexec' from the same directory as `mpicc': - RUNSERIAL: /usr/local/mpi/bin/mpiexec -np 1 - RUNPARALLEL: /usr/local/mpi/bin/mpiexec -np $${NPROCS:=6} + RUNSERIAL: mpiexec -n 1 + RUNPARALLEL: mpiexec -n $${NPROCS:=6} The `$${NPROCS:=6}' will be substituted with the value of the NPROCS environment variable at the time `make check' is run (or the value 6). +RUNPARALLELSCRIPT is identical to RUNPARALLEL but is used in parallel shell +scripts and lacks the environment variable protection. + + RUNPARALLELSCRIPT: mpiexec -n ${NPROCS:=6} + +Note that some MPI implementations (e.g. OpenMPI 4.0) disallow oversubscribing +nodes by default so you'll have to either set NPROCS equal to the number of +processors available (or fewer) or redefine RUNPARALLEL(SCRIPT) with appropriate +flag(s) (--oversubscribe in OpenMPI). 4. Parallel test suite ---------------------- diff --git a/test/h5test.c b/test/h5test.c index 0226aa7..f943ad0 100644 --- a/test/h5test.c +++ b/test/h5test.c @@ -652,7 +652,7 @@ h5_fixname_real(const char *base_name, hid_t fapl, const char *_suffix, printf("*** Hint ***\n" "You can use environment variable HDF5_PARAPREFIX to " "run parallel test files in a\n" - "different directory or to add file type prefix. E.g.,\n" + "different directory or to add file type prefix. e.g.,\n" " HDF5_PARAPREFIX=pfs:/PFS/user/me\n" " export HDF5_PARAPREFIX\n" "*** End of Hint ***\n"); diff --git a/testpar/Makefile.am b/testpar/Makefile.am index 7262ca6..a11099d 100644 --- a/testpar/Makefile.am +++ b/testpar/Makefile.am @@ -21,11 +21,19 @@ include $(top_srcdir)/config/commence.am AM_CPPFLAGS+=-I$(top_srcdir)/src -I$(top_srcdir)/test +# Test scripts-- +# testpflush.sh: +TEST_SCRIPT_PARA = testpflush.sh +SCRIPT_DEPEND = t_pflush1$(EXEEXT) t_pflush2$(EXEEXT) + +check_SCRIPTS = $(TEST_SCRIPT_PARA) + # Test programs. These are our main targets. # -TEST_PROG_PARA=t_mpi t_bigio testphdf5 t_cache t_cache_image t_pflush1 t_pflush2 t_pread t_pshutdown t_prestart t_init_term t_shapesame t_filters_parallel +TEST_PROG_PARA=t_mpi t_bigio testphdf5 t_cache t_cache_image t_pread t_pshutdown t_prestart t_init_term t_shapesame t_filters_parallel -check_PROGRAMS = $(TEST_PROG_PARA) +# t_pflush1 and t_pflush2 are used by testpflush.sh +check_PROGRAMS = $(TEST_PROG_PARA) t_pflush1 t_pflush2 testphdf5_SOURCES=testphdf5.c t_dset.c t_file.c t_file_image.c t_mdset.c \ t_ph5basic.c t_coll_chunk.c t_span_tree.c t_chunk_alloc.c t_filter_read.c \ diff --git a/testpar/testpflush.sh.in b/testpar/testpflush.sh.in new file mode 100644 index 0000000..e1eb1ef --- /dev/null +++ b/testpar/testpflush.sh.in @@ -0,0 +1,47 @@ +#! /bin/sh +# +# Copyright by The HDF Group. +# Copyright by the Board of Trustees of the University of Illinois. +# All rights reserved. +# +# This file is part of HDF5. The full HDF5 copyright notice, including +# terms governing use, modification, and redistribution, is contained in +# the COPYING file, which can be found at the root of the source code +# distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. +# If you do not have access to either file, you may request a copy from +# help@hdfgroup.org. +# +# +# Test script for the parallel flush test +# +# The parallel flush test uses two programs to test flush operations +# in parallel HDF5. The first program purposely exits without calling +# MPI_Finalize(), which is an error under the MPI standard and mpiexec +# in some implementations will return an error code even though all +# processes exit successfully. This script lets us swallow the error +# from the first program. +# +# True errors in the first program will be detected as errors in the +# second program, so watch out for that. +# +# Programmer: Dana Robinson +# Fall 2018 + +# The build (current) directory might be different than the source directory. +if test -z "$srcdir"; then + srcdir=. +fi + +# ========================================== +# Run the first parallel flush test program +# (note that we ignore any errors here) +# ========================================== +@RUNPARALLELSCRIPT@ ./t_pflush1 + +# =========================================== +# Run the second parallel flush test program +# The return code of this call is the return +# code of the script. +# =========================================== +@RUNPARALLELSCRIPT@ ./t_pflush2 + -- cgit v0.12 From 64eb1489de409b08026faa112fd1baad98ef1bcc Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Thu, 20 Dec 2018 03:43:53 -0800 Subject: Added a helpful message to the flush script. --- testpar/testpflush.sh.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/testpar/testpflush.sh.in b/testpar/testpflush.sh.in index e1eb1ef..f2ce029 100644 --- a/testpar/testpflush.sh.in +++ b/testpar/testpflush.sh.in @@ -36,8 +36,14 @@ fi # Run the first parallel flush test program # (note that we ignore any errors here) # ========================================== +echo "*** NOTE ***********************************************************" +echo "You may see complaints from mpiexec et al. that not all processes" +echo "called MPI_Finalize(). This is an intended characteristic of the" +echo "test and should not be considered an error." +echo "********************************************************************" @RUNPARALLELSCRIPT@ ./t_pflush1 + # =========================================== # Run the second parallel flush test program # The return code of this call is the return -- cgit v0.12 From 4faca62679ef0850794934b31a091c1f079aca0c Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Mon, 31 Dec 2018 05:04:30 -0800 Subject: Eliminated the need for a separate script variable. --- configure.ac | 7 +------ testpar/testpflush.sh.in | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/configure.ac b/configure.ac index 89adcae..9b890fd 100644 --- a/configure.ac +++ b/configure.ac @@ -841,13 +841,9 @@ fi ## command). The value of this variable is ## substituted in *.in files. ## -## RUNPARALLELSCRIPT -- Identical to RUNPARALLEL but without -## the special makefile protection for environment -## variables. AC_SUBST([PARALLEL]) AC_SUBST([RUNSERIAL]) AC_SUBST([RUNPARALLEL]) -AC_SUBST([RUNPARALLELSCRIPT]) AC_SUBST([TESTPARALLEL]) ## ---------------------------------------------------------------------- @@ -2481,10 +2477,9 @@ case "X-$enable_parallel" in PAC_PROG_FC_MPI_CHECK fi - ## Set RUNPARALLEL and RUNPARALLELSCRIPT to mpiexec if not set yet. + ## Set RUNPARALLEL to mpiexec if not set yet. if test "X$PARALLEL" = "Xyes" -a -z "$RUNPARALLEL"; then RUNPARALLEL="mpiexec -n \$\${NPROCS:=6}" - RUNPARALLELSCRIPT="mpiexec -n ${NPROCS:=6}" fi ;; diff --git a/testpar/testpflush.sh.in b/testpar/testpflush.sh.in index f2ce029..02f0e26 100644 --- a/testpar/testpflush.sh.in +++ b/testpar/testpflush.sh.in @@ -32,6 +32,17 @@ if test -z "$srcdir"; then srcdir=. fi +# Turn the $$ we use to avoid Autotools munging into $ +# +# Allowing $$ to substitute in both the RUNPARALLEL string and the +# regexp is intentional. There doesn't seem to be a way around +# this using quote shenanigans. The downside is that there is a remote +# chance that the shell's pid will match a number in the RUNPARALLEL +# variable, but that seems less likely to cause problems than expecting +# library builders to specify two almost identical versions of the +# RUNPARALLEL command, one for use in scripts and one via Makefiles. +RUNPARALLELSCRIPT=`echo "@RUNPARALLEL@" | sed "s/$$/\$/g"` + # ========================================== # Run the first parallel flush test program # (note that we ignore any errors here) @@ -41,7 +52,7 @@ echo "You may see complaints from mpiexec et al. that not all processes" echo "called MPI_Finalize(). This is an intended characteristic of the" echo "test and should not be considered an error." echo "********************************************************************" -@RUNPARALLELSCRIPT@ ./t_pflush1 +eval ${RUNPARALLELSCRIPT} ./t_pflush1 # =========================================== @@ -49,5 +60,5 @@ echo "********************************************************************" # The return code of this call is the return # code of the script. # =========================================== -@RUNPARALLELSCRIPT@ ./t_pflush2 +eval ${RUNPARALLELSCRIPT} ./t_pflush2 -- cgit v0.12 From d6c2a96ac2f103d90b96d5b39814810e6a31ef99 Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Mon, 31 Dec 2018 05:07:30 -0800 Subject: Updated the parallel install docs. --- release_docs/INSTALL_parallel | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/release_docs/INSTALL_parallel b/release_docs/INSTALL_parallel index 1bf1dd4..d3d7830 100644 --- a/release_docs/INSTALL_parallel +++ b/release_docs/INSTALL_parallel @@ -100,9 +100,9 @@ qsub -I -q debug -l mppwidth=8 mkdir build-hdf5; cd build-hdf5/ - configure HDF5: - RUNSERIAL="aprun -q -n 1" RUNPARALLEL="aprun -q -n 6" RUNPARALLELSCRIPT="aprun -q -n 6" FC=ftn CC=cc /path/to/source/configure --enable-fortran --enable-parallel --disable-shared + RUNSERIAL="aprun -q -n 1" RUNPARALLEL="aprun -q -n 6" FC=ftn CC=cc /path/to/source/configure --enable-fortran --enable-parallel --disable-shared - RUNSERIAL, RUNPARALLEL, and RUNPARALLELSCRIPT tell the library how it should launch programs that are part of the build procedure. + RUNSERIAL and RUNPARALLEL tell the library how it should launch programs that are part of the build procedure. - Compile HDF5: gmake @@ -161,14 +161,9 @@ RUNPARALLEL then configure chooses `mpiexec' from the same directory as `mpicc': The `$${NPROCS:=6}' will be substituted with the value of the NPROCS environment variable at the time `make check' is run (or the value 6). -RUNPARALLELSCRIPT is identical to RUNPARALLEL but is used in parallel shell -scripts and lacks the environment variable protection. - - RUNPARALLELSCRIPT: mpiexec -n ${NPROCS:=6} - Note that some MPI implementations (e.g. OpenMPI 4.0) disallow oversubscribing nodes by default so you'll have to either set NPROCS equal to the number of -processors available (or fewer) or redefine RUNPARALLEL(SCRIPT) with appropriate +processors available (or fewer) or redefine RUNPARALLEL with appropriate flag(s) (--oversubscribe in OpenMPI). 4. Parallel test suite -- cgit v0.12