From aab23acc05664e1d0c066e5b6518535446a8eb89 Mon Sep 17 00:00:00 2001 From: Albert Cheng Date: Wed, 14 Jul 2010 12:07:50 -0500 Subject: [svn-r19070] Document update: ID 1921 change mpirun to mpiexec Changed the use of mpirun to mpiexec which is the MPI-2 official standard. INSTALL_parallel: Also bring in previous update of Red Storm support. --- release_docs/INSTALL_parallel | 90 +++++++++++++++++++++++++++++++++++-------- release_docs/RELEASE.txt | 3 ++ 2 files changed, 78 insertions(+), 15 deletions(-) diff --git a/release_docs/INSTALL_parallel b/release_docs/INSTALL_parallel index e8f8bad..04643b2 100644 --- a/release_docs/INSTALL_parallel +++ b/release_docs/INSTALL_parallel @@ -51,17 +51,21 @@ parallel HDF5 with one of the above, just set CC as it and configure. The "--enable-parallel" is optional in this case. $ CC=/usr/local/mpi/bin/mpicc ./configure --prefix= - $ make - $ make check + $ make # build the library + $ make check # verify the correctness + # Read the Details section about parallel tests. $ make install 2.2. IBM SP ----------- -Make sure your environment variables are set correctly to compile and execute -a single process mpi applications for the SP machine. Unfortunately, the -setting varies from machine to machine. E.g., the following works for the -IBM SP machine at LLNL. +During the build stage, the H5detect is compiled and executed to generate +the source file H5Tinit.c which is compiled as part of the HDF5 library. In +parallel mode, make sure your environment variables are set correctly to +execute a single process mpi application. Otherwise, multiple processes +attempt to write to the same H5Tinit.c file, resulting in a scrambled +source file. Unfortunately, the setting varies from machine to machine. +E.g., the following works for the IBM SP machine at LLNL. setenv MP_PROCS 1 setenv MP_NODES 1 @@ -104,6 +108,30 @@ Linux kernels 2.4 and greater. 2.4. Red Storm (Cray XT3) (for v1.8 and later) ------------------------- +Both serial and parallel HDF5 are supported in Red Storm. + +2.4.1 Building serial HDF5 for Red Storm +------------------------------------------ +The following steps are for building the serial HDF5 for the Red Storm +compute nodes. They would probably work for other Cray XT3 systems but have +not been verified. + +# Assume you already have a copy of HDF5 source code in directory `hdf5' and +# want to install the binary in directory `/project/hdf5/hdf5'. + +$ cd hdf5 +$ bin/yodconfigure configure +$ env RUNSERIAL="yod -sz 1" \ + CC=cc FC=ftn CXX=CC \ + ./configure --prefix=/project/hdf5/hdf5 +$ make +$ make check + +# if all is well, install the binary. +$ make install + +2.4.2 Building parallel HDF5 for Red Storm +------------------------------------------ The following steps are for building the Parallel HDF5 for the Red Storm compute nodes. They would probably work for other Cray XT3 systems but have not been verified. @@ -115,15 +143,47 @@ not been verified. $ cd hdf5 $ bin/yodconfigure configure $ env RUNSERIAL="yod -sz 1" RUNPARALLEL="yod -sz 3" \ - CC="mpicc -DRED_STORM" F9X=mpif90 \ - ./configure --disable-hl --without-zlib --disable-stream-vfd \ - --enable-parallel --prefix=/project/hdf5/phdf5 + CC=cc FC=ftn \ + ./configure --enable-parallel --prefix=/project/hdf5/phdf5 $ make $ make check # if all is well, install the binary. $ make install +2.4.3 Red Storm known problems +------------------------------ +For Red Storm, a Cray XT3 system, the yod command sometimes gives the +message, "yod allocation delayed for node recovery". This interferes with +test suites that do not expect seeing this message. To bypass this problem, +I launch the executables with a command shell script called "myyod" which +consists of the following lines. (You should set $RUNSERIAL and $RUNPARALLEL +to use myyod instead of yod.) +==== myyod ======= +#!/bin/sh +# sleep 2 seconds to allow time for the node recovery else it pops the +# message, +# yod allocation delayed for node recovery +sleep 2 +yod $* +==== end of myyod ======= + +For Red Storm, a Cray XT3 system, the tools/h5ls/testh5ls.sh will fail on +the test "Testing h5ls -w80 -r -g tgroup.h5" fails. This test is +expected to fail and exit with a non-zero code but the yod command does +not propagate the exit code of the executables. Yod always returns 0 if it +can launch the executable. The test suite shell expects a non-zero for +this particular test, therefore it concludes the test has failed when it +receives 0 from yod. To bypass this problem for now, change the following +lines in the tools/h5ls/testh5ls.sh. +======== Original ========= +# The following combination of arguments is expected to return an error message +# and return value 1 +TOOLTEST tgroup-1.ls 1 -w80 -r -g tgroup.h5 +======== Skip the test ========= +echo SKIP TOOLTEST tgroup-1.ls 1 -w80 -r -g tgroup.h5 +======== end of bypass ======== + 3. Detail explanation --------------------- @@ -169,10 +229,10 @@ For example, If a parallel library is being built then configure attempts to determine how to run a parallel application on one processor and on many processors. If the compiler is `mpicc' and the user hasn't specified values for RUNSERIAL and -RUNPARALLEL then configure chooses `mpirun' from the same directory as `mpicc': +RUNPARALLEL then configure chooses `mpiexec' from the same directory as `mpicc': - RUNSERIAL: /usr/local/mpi/bin/mpirun -np 1 - RUNPARALLEL: /usr/local/mpi/bin/mpirun -np $${NPROCS:=3} + RUNSERIAL: /usr/local/mpi/bin/mpiexec -np 1 + RUNPARALLEL: /usr/local/mpi/bin/mpiexec -np $${NPROCS:=3} The `$${NPROCS:=3}' will be substituted with the value of the NPROCS environment variable at the time `make check' is run (or the value 3). @@ -218,7 +278,7 @@ Appendix A. Sample programs --------------------------- Here are sample MPI-IO C and Fortran programs. You may use them to run simple tests of your MPI compilers and the parallel file system. The MPI commands -used here are mpicc, mpif90 and mpirun. Replace them with the commands of +used here are mpicc, mpif90 and mpiexec. Replace them with the commands of your system. The programs assume they run in the parallel file system. Thus they create @@ -229,10 +289,10 @@ programs to use a different file name. Example compiling and running: % mpicc Sample_mpio.c -o c.out -% mpirun -np 4 c.out +% mpiexec -np 4 c.out % mpif90 Sample_mpio.f90 -o f.out -% mpirun -np 4 f.out +% mpiexec -np 4 f.out ==> Sample_mpio.c <== diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 7c6bd8f..6af47e9 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -382,6 +382,9 @@ Bug Fixes since HDF5-1.8.0 release Configuration ------------- + - Removed recognition of the parallel compilers of LAM(hcc) and + ChMPIon(cmpicc) since we have no access to these two MPI implementations + and cannot verify their correctness. (AKC - 2010/7/14 - Bug 1921) - Removed the following config files, as we no longer support them: config/dec-osf*, config/hpux11.00, config/irix5.x, config/powerpc-ibm-aix4.x config/rs6000-ibm-aix5.x config/unicos* -- cgit v0.12