From ec695cf34f7256d7af9bbc2fcf5366da0119fcea Mon Sep 17 00:00:00 2001 From: Scot Breitenfeld Date: Tue, 7 Nov 2023 09:50:03 -0600 Subject: Renamed h5fuse.sh to h5fuse (#3834) * provide an alternative to mapfile for older bash --- configure.ac | 4 +- release_docs/RELEASE.txt | 6 + testpar/t_subfiling_vfd.c | 4 +- utils/Makefile.am | 2 +- utils/subfiling_vfd/CMakeLists.txt | 8 +- utils/subfiling_vfd/h5fuse.in | 279 +++++++++++++++++++++++++++++++++++++ utils/subfiling_vfd/h5fuse.sh.in | 279 ------------------------------------- 7 files changed, 294 insertions(+), 288 deletions(-) create mode 100755 utils/subfiling_vfd/h5fuse.in delete mode 100755 utils/subfiling_vfd/h5fuse.sh.in diff --git a/configure.ac b/configure.ac index 44ab43a..30f5d2c 100644 --- a/configure.ac +++ b/configure.ac @@ -4236,10 +4236,10 @@ AC_CONFIG_FILES([Makefile hl/fortran/examples/Makefile hl/fortran/examples/run-hlfortran-ex.sh]) -AC_CONFIG_FILES([utils/subfiling_vfd/h5fuse.sh], [chmod +x utils/subfiling_vfd/h5fuse.sh]) +AC_CONFIG_FILES([utils/subfiling_vfd/h5fuse], [chmod +x utils/subfiling_vfd/h5fuse]) if test -n "$TESTPARALLEL"; then if test "X$SUBFILING_VFD" = "Xyes"; then - AC_CONFIG_LINKS([testpar/h5fuse.sh:utils/subfiling_vfd/h5fuse.sh]) + AC_CONFIG_LINKS([testpar/h5fuse:utils/subfiling_vfd/h5fuse]) fi fi diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 5dd36ea..5e85378 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -920,6 +920,12 @@ Bug Fixes since HDF5-1.14.0 release Tools ----- + + - Renamed h5fuse.sh to h5fuse + + Addresses Discussion #3791 + + - Fixed an issue with unmatched MPI messages in ph5diff The "manager" MPI rank in ph5diff was unintentionally sending "program end" diff --git a/testpar/t_subfiling_vfd.c b/testpar/t_subfiling_vfd.c index 72613a3..45cb363 100644 --- a/testpar/t_subfiling_vfd.c +++ b/testpar/t_subfiling_vfd.c @@ -1898,7 +1898,7 @@ test_subfiling_h5fuse(void) if (MAINPROCESS) { FILE *h5fuse_script; - h5fuse_script = fopen("h5fuse.sh", "r"); + h5fuse_script = fopen("h5fuse", "r"); if (h5fuse_script) fclose(h5fuse_script); else @@ -2014,7 +2014,7 @@ test_subfiling_h5fuse(void) SUBF_FILENAME, file_inode); args[0] = strdup("env"); - args[1] = strdup("./h5fuse.sh"); + args[1] = strdup("./h5fuse"); args[2] = strdup("-q"); args[3] = strdup("-f"); args[4] = tmp_filename; diff --git a/utils/Makefile.am b/utils/Makefile.am index 229712e..740f5c3 100644 --- a/utils/Makefile.am +++ b/utils/Makefile.am @@ -41,6 +41,6 @@ endif # All subdirectories SUBDIRS=$(MIRROR_VFD_DIR) $(TESTUTIL_DIR) $(TOOLS_DIR) -bin_SCRIPTS = subfiling_vfd/h5fuse.sh +bin_SCRIPTS = subfiling_vfd/h5fuse include $(top_srcdir)/config/conclude.am diff --git a/utils/subfiling_vfd/CMakeLists.txt b/utils/subfiling_vfd/CMakeLists.txt index 3acdc6b..62b6255 100644 --- a/utils/subfiling_vfd/CMakeLists.txt +++ b/utils/subfiling_vfd/CMakeLists.txt @@ -1,20 +1,20 @@ cmake_minimum_required (VERSION 3.18) project (HDF5_UTILS_SUBFILINGVFD C) -configure_file (${HDF5_UTILS_SUBFILINGVFD_SOURCE_DIR}/h5fuse.sh.in ${HDF5_UTILS_SUBFILINGVFD_BINARY_DIR}/h5fuse.sh @ONLY) +configure_file (${HDF5_UTILS_SUBFILINGVFD_SOURCE_DIR}/h5fuse.in ${HDF5_UTILS_SUBFILINGVFD_BINARY_DIR}/h5fuse @ONLY) -# Copy h5fuse.sh to testpar directory for subfiling tests +# Copy h5fuse to testpar directory for subfiling tests if (HDF5_ENABLE_PARALLEL AND HDF5_TEST_PARALLEL) file ( COPY - ${HDF5_UTILS_SUBFILINGVFD_BINARY_DIR}/h5fuse.sh + ${HDF5_UTILS_SUBFILINGVFD_BINARY_DIR}/h5fuse DESTINATION ${HDF5_TEST_PAR_BINARY_DIR} ) endif () install ( - FILES ${HDF5_UTILS_SUBFILINGVFD_BINARY_DIR}/h5fuse.sh + FILES ${HDF5_UTILS_SUBFILINGVFD_BINARY_DIR}/h5fuse DESTINATION ${HDF5_INSTALL_BIN_DIR} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE COMPONENT utilsapplications diff --git a/utils/subfiling_vfd/h5fuse.in b/utils/subfiling_vfd/h5fuse.in new file mode 100755 index 0000000..a4c6a05 --- /dev/null +++ b/utils/subfiling_vfd/h5fuse.in @@ -0,0 +1,279 @@ +#!/usr/bin/env bash +# +# Copyright by The HDF Group. +# All rights reserved. +# +# This file is part of HDF5. The full HDF5 copyright notice, including +# terms governing use, modification, and redistribution, is contained in +# the COPYING file, which can be found at the root of the source code +# distribution tree, or in https://www.hdfgroup.org/licenses. +# If you do not have access to either file, you may request a copy from +# help@hdfgroup.org. +# +BLD='\033[1m' +GRN='\033[0;32m' +RED='\033[0;31m' +CYN='\033[0;36m' +NC='\033[0m' # No Color + +############################################################ +# Usage # +############################################################ +function usage { + echo "" + # Display usage + echo "Purpose: Combine subfiles into a single HDF5 file. Requires the subfiling + configuration file either as a command-line argument or the script will + search for the *.config file in the current directory." + echo "" + echo "usage: h5fuse [-f filename] [-h] [-p] [-q] [-r] [-v] " + echo "-f filename Subfile configuration file." + echo "-h Print this help." + echo "-q Quiet all output. [no]" + echo "-p h5fuse is being run in parallel, with more than one rank. [no]" + echo "-r Remove subfiles after being processed. [no]" + echo "-v Verbose output. [no]" + echo "" +} + +function gen_mpi { + +# Program to determine MPI rank and size if being run in parallel (-p). + +cat > "${c_src}" << EOL +#include +#include +int main() { + MPI_Init(NULL, NULL); + int world_size; + MPI_Comm_size(MPI_COMM_WORLD, &world_size); + int world_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + printf("%d %d", world_rank, world_size); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); +} +EOL + +} + +function fuse { + +# function for fusing the files + +mpi_rank=0 +mpi_size=1 +nstart=1 +nend=$nsubfiles + +if [ "$parallel" == "true" ]; then + + hex=$(hexdump -n 16 -v -e '/1 "%02X"' /dev/urandom) + c_exec="h5fuse_"${hex} + c_src=${c_exec}.c + + # Generate and compile an MPI program to get MPI rank and size + if [ ! -f "${c_src}" ]; then + gen_mpi + CC=@CC@ + ${CC} "${c_src}" -o "${c_exec}" + fi + wait + rank_size=$(./"${c_exec}") + read -r mpi_rank mpi_size <<<"$rank_size" + + rm -f "${c_src}" "${c_exec}" + + # Divide the subfiles among the ranks + iwork1=$(( nsubfiles / mpi_size )) + iwork2=$(( nsubfiles % mpi_size )) + min=$(( mpi_rank < iwork2 ? mpi_rank : iwork2 )) + nstart=$(( mpi_rank * iwork1 + 1 + min )) + nend=$(( nstart + iwork1 - 1 )) + if [ $iwork2 -gt "$mpi_rank" ]; then + nend=$(( nend + 1 )) + fi +fi + +############################################################ +# COMBINE SUBFILES INTO AN HDF5 FILE # +############################################################ +icnt=1 +skip=0 +seek=0 +seek_cnt=0 +for i in "${subfiles[@]}"; do + + subfile="${subfile_dir}/${i}" + + # bs=BYTES read and write up to BYTES bytes at a time; overrides ibs and obs + # ibs=BYTES read up to BYTES bytes at a time + # obs=BYTES write BYTES bytes at a time + # seek=N skip N obs-sized blocks at start of output + # skip=N skip N ibs-sized blocks at start of input + + status=1 + fsize=${subfiles_size[icnt-1]} + if [ "$fsize" -eq "0" ]; then + seek_cnt=$((seek_cnt+1)) + seek=$seek_cnt + if [ "$rm_subf" == "true" ]; then + if [ -f "${subfile}" ]; then + \rm -f "$subfile" + fi + fi + else + if [ $icnt -ge "$nstart" ] && [ $icnt -le "$nend" ]; then + records_left=$fsize + while [ "$status" -gt 0 ]; do + if [ $((skip*stripe_size)) -le "$fsize" ] && [ "$records_left" -gt 0 ]; then + EXEC="dd count=1 bs=$stripe_size if=$subfile of=$hdf5_file skip=$skip seek=$seek conv=notrunc" + if [ "$verbose" == "true" ]; then + echo -e "$GRN $EXEC $NC" + fi + err=$( $EXEC 2>&1 1>/dev/null ) + if [ $? -ne 0 ]; then + echo -e "$CYN ERR: dd Utility Failed $NC" + echo -e "$CYN MSG: $err $NC" + exit $FAILED + fi + records_left=$((records_left-stripe_size)) + skip=$((skip+1)) + seek=$((seek_cnt+skip*nsubfiles)) + else + status=0 + skip=0 + fi + done; wait + if [ "$rm_subf" == "true" ]; then + \rm -f "$subfile" + fi + fi + seek_cnt=$((seek_cnt+1)) + seek=$seek_cnt + fi + icnt=$(( icnt +1 )) +done; wait + +} + +############################################################ +############################################################ +# Main program # +############################################################ +############################################################ + +############################################################ +# Process the input options. Add options as needed. # +############################################################ +# Get the options +file_config="" +verbose="false" +quiet="false" +rm_subf="false" +parallel="false" +while getopts "hpqrvf:" option; do + case $option in + f) # subfiling configuration file + file_config=$OPTARG;; + h) # display Help + usage + exit;; + p) # HDF5 fused file + parallel="true";; + q) # quiet all output + quiet="true";; + r) # remove completed subfiles + rm_subf="true";; + v) # verbose output + verbose="true";; + \?) # Invalid option + echo -e "$RED ERROR: Invalid option ${BLD}-${OPTARG}${RED} $NC" + usage + exit 1;; + * ) usage + exit 1;; + esac +done + +FAILED=1 +############################################################ +# Configure file checks # +############################################################ +# +SUBF_CONFDIR="${H5FD_SUBFILING_CONFIG_FILE_PREFIX:-$PWD}" + +# Try to find the config file +if [ -z "$file_config" ]; then + nfiles=$(find "$SUBF_CONFDIR" -maxdepth 1 -type f -iname "*.config" -printf '.' | wc -m) + if [[ "$nfiles" != "1" ]]; then + if [[ "$nfiles" == "0" ]]; then + echo -e "$RED Failed to find .config file in ${SUBF_CONFDIR} $NC" + usage + exit $FAILED + else + echo -e "$RED More than one .config file found in ${SUBF_CONFDIR} $NC" + usage + exit $FAILED + fi + fi + file_config=$(find "${SUBF_CONFDIR}" -maxdepth 1 -type f -iname '*.config') +fi + +if [ ! -f "$file_config" ]; then + echo -e "${RED} configuration file ${BLD}$file_config${NC} ${RED}does not exist. $NC" + exit $FAILED +fi + +stripe_size=$(grep "stripe_size=" "$file_config" | cut -d "=" -f2) +if test -z "$stripe_size"; then + echo -e "$RED failed to find stripe_size in $file_config $NC" + exit $FAILED +fi + +hdf5_file="$(grep "hdf5_file=" "$file_config" | cut -d "=" -f2)" +if test -z "$hdf5_file"; then + echo -e "$RED failed to find hdf5 output file in $file_config $NC" + exit $FAILED +fi + +subfile_dir="$(grep "subfile_dir=" "$file_config" | cut -d "=" -f2)" +if test -z "$subfile_dir"; then + echo -e "$RED failed to find subfile directory in $file_config $NC" + exit $FAILED +fi + +subfs=$(sed -e '1,/subfile_dir=/d' "$file_config") +if command -v mapfile > /dev/null; then + # For bash 4.4+ + mapfile -t subfiles <<< "$subfs" +else + while IFS= read -r line; do + subfiles+=("$line") + done <<< "$subfs" +fi +if [ ${#subfiles[@]} -eq 0 ]; then + echo -e "$RED failed to find subfiles list in $file_config $NC" + exit $FAILED +fi +nsubfiles=${#subfiles[@]} + +# Get the number of local subfiles +subfiles_loc=() +subfiles_size=() +for i in "${subfiles[@]}"; do + subfile="${subfile_dir}/${i}" + if [ -f "${subfile}" ]; then + subfiles_loc+=("$subfile") + subfiles_size+=($(wc -c "${subfile}" | awk '{print $1}')) + else + subfiles_size+=(0) + fi +done + +if [ "$quiet" == "false" ]; then + TIMEFORMAT="COMPLETION TIME = %R s" + time fuse +else + fuse +fi diff --git a/utils/subfiling_vfd/h5fuse.sh.in b/utils/subfiling_vfd/h5fuse.sh.in deleted file mode 100755 index 6f4bf61..0000000 --- a/utils/subfiling_vfd/h5fuse.sh.in +++ /dev/null @@ -1,279 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright by The HDF Group. -# All rights reserved. -# -# This file is part of HDF5. The full HDF5 copyright notice, including -# terms governing use, modification, and redistribution, is contained in -# the COPYING file, which can be found at the root of the source code -# distribution tree, or in https://www.hdfgroup.org/licenses. -# If you do not have access to either file, you may request a copy from -# help@hdfgroup.org. -# -BLD='\033[1m' -GRN='\033[0;32m' -RED='\033[0;31m' -CYN='\033[0;36m' -NC='\033[0m' # No Color - -############################################################ -# Usage # -############################################################ -function usage { - echo "" - # Display usage - echo "Purpose: Combine subfiles into a single HDF5 file. Requires the subfiling - configuration file either as a command-line argument or the script will - search for the *.config file in the current directory." - echo "" - echo "usage: h5fuse.sh [-f filename] [-h] [-p] [-q] [-r] [-v] " - echo "-f filename Subfile configuration file." - echo "-h Print this help." - echo "-q Quiet all output. [no]" - echo "-p h5fuse.sh is being run in parallel, with more than one rank. [no]" - echo "-r Remove subfiles after being processed. [no]" - echo "-v Verbose output. [no]" - echo "" -} - -function gen_mpi { - -# Program to determine MPI rank and size if being run in parallel (-p). - -cat > "${c_src}" << EOL -#include -#include -int main() { - MPI_Init(NULL, NULL); - int world_size; - MPI_Comm_size(MPI_COMM_WORLD, &world_size); - int world_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); - printf("%d %d", world_rank, world_size); - MPI_Barrier(MPI_COMM_WORLD); - MPI_Finalize(); -} -EOL - -} - -function fuse { - -# function for fusing the files - -mpi_rank=0 -mpi_size=1 -nstart=1 -nend=$nsubfiles - -if [ "$parallel" == "true" ]; then - - hex=$(hexdump -n 16 -v -e '/1 "%02X"' /dev/urandom) - c_exec="h5fuse_"${hex} - c_src=${c_exec}.c - - # Generate and compile an MPI program to get MPI rank and size - if [ ! -f "${c_src}" ]; then - gen_mpi - CC=@CC@ - ${CC} "${c_src}" -o "${c_exec}" - fi - wait - rank_size=$(./"${c_exec}") - read -r mpi_rank mpi_size <<<"$rank_size" - - rm -f "${c_src}" "${c_exec}" - - # Divide the subfiles among the ranks - iwork1=$(( nsubfiles / mpi_size )) - iwork2=$(( nsubfiles % mpi_size )) - min=$(( mpi_rank < iwork2 ? mpi_rank : iwork2 )) - nstart=$(( mpi_rank * iwork1 + 1 + min )) - nend=$(( nstart + iwork1 - 1 )) - if [ $iwork2 -gt "$mpi_rank" ]; then - nend=$(( nend + 1 )) - fi -fi - -############################################################ -# COMBINE SUBFILES INTO AN HDF5 FILE # -############################################################ -icnt=1 -skip=0 -seek=0 -seek_cnt=0 -for i in "${subfiles[@]}"; do - - subfile="${subfile_dir}/${i}" - - # bs=BYTES read and write up to BYTES bytes at a time; overrides ibs and obs - # ibs=BYTES read up to BYTES bytes at a time - # obs=BYTES write BYTES bytes at a time - # seek=N skip N obs-sized blocks at start of output - # skip=N skip N ibs-sized blocks at start of input - - status=1 - fsize=${subfiles_size[icnt-1]} - if [ "$fsize" -eq "0" ]; then - seek_cnt=$((seek_cnt+1)) - seek=$seek_cnt - if [ "$rm_subf" == "true" ]; then - if [ -f "${subfile}" ]; then - \rm -f "$subfile" - fi - fi - else - if [ $icnt -ge "$nstart" ] && [ $icnt -le "$nend" ]; then - records_left=$fsize - while [ "$status" -gt 0 ]; do - if [ $((skip*stripe_size)) -le "$fsize" ] && [ "$records_left" -gt 0 ]; then - EXEC="dd count=1 bs=$stripe_size if=$subfile of=$hdf5_file skip=$skip seek=$seek conv=notrunc" - if [ "$verbose" == "true" ]; then - echo -e "$GRN $EXEC $NC" - fi - err=$( $EXEC 2>&1 1>/dev/null ) - if [ $? -ne 0 ]; then - echo -e "$CYN ERR: dd Utility Failed $NC" - echo -e "$CYN MSG: $err $NC" - exit $FAILED - fi - records_left=$((records_left-stripe_size)) - skip=$((skip+1)) - seek=$((seek_cnt+skip*nsubfiles)) - else - status=0 - skip=0 - fi - done; wait - if [ "$rm_subf" == "true" ]; then - \rm -f "$subfile" - fi - fi - seek_cnt=$((seek_cnt+1)) - seek=$seek_cnt - fi - icnt=$(( icnt +1 )) -done; wait - -} - -############################################################ -############################################################ -# Main program # -############################################################ -############################################################ - -############################################################ -# Process the input options. Add options as needed. # -############################################################ -# Get the options -file_config="" -verbose="false" -quiet="false" -rm_subf="false" -parallel="false" -while getopts "hpqrvf:" option; do - case $option in - f) # subfiling configuration file - file_config=$OPTARG;; - h) # display Help - usage - exit;; - p) # HDF5 fused file - parallel="true";; - q) # quiet all output - quiet="true";; - r) # remove completed subfiles - rm_subf="true";; - v) # verbose output - verbose="true";; - \?) # Invalid option - echo -e "$RED ERROR: Invalid option ${BLD}-${OPTARG}${RED} $NC" - usage - exit 1;; - * ) usage - exit 1;; - esac -done - -FAILED=1 -############################################################ -# Configure file checks # -############################################################ -# -SUBF_CONFDIR="${H5FD_SUBFILING_CONFIG_FILE_PREFIX:-$PWD}" - -# Try to find the config file -if [ -z "$file_config" ]; then - nfiles=$(find "$SUBF_CONFDIR" -maxdepth 1 -type f -iname "*.config" -printf '.' | wc -m) - if [[ "$nfiles" != "1" ]]; then - if [[ "$nfiles" == "0" ]]; then - echo -e "$RED Failed to find .config file in ${SUBF_CONFDIR} $NC" - usage - exit $FAILED - else - echo -e "$RED More than one .config file found in ${SUBF_CONFDIR} $NC" - usage - exit $FAILED - fi - fi - file_config=$(find "${SUBF_CONFDIR}" -maxdepth 1 -type f -iname '*.config') -fi - -if [ ! -f "$file_config" ]; then - echo -e "${RED} configuration file ${BLD}$file_config${NC} ${RED}does not exist. $NC" - exit $FAILED -fi - -stripe_size=$(grep "stripe_size=" "$file_config" | cut -d "=" -f2) -if test -z "$stripe_size"; then - echo -e "$RED failed to find stripe_size in $file_config $NC" - exit $FAILED -fi - -hdf5_file="$(grep "hdf5_file=" "$file_config" | cut -d "=" -f2)" -if test -z "$hdf5_file"; then - echo -e "$RED failed to find hdf5 output file in $file_config $NC" - exit $FAILED -fi - -subfile_dir="$(grep "subfile_dir=" "$file_config" | cut -d "=" -f2)" -if test -z "$subfile_dir"; then - echo -e "$RED failed to find subfile directory in $file_config $NC" - exit $FAILED -fi - -subfs=$(sed -e '1,/subfile_dir=/d' "$file_config") -if command -v mapfile > /dev/null; then - # For bash 4.4+ - mapfile -t subfiles <<< "$subfs" -else - while IFS= read -r line; do - subfiles+=("$line") - done <<< "$subfs" -fi -if [ ${#subfiles[@]} -eq 0 ]; then - echo -e "$RED failed to find subfiles list in $file_config $NC" - exit $FAILED -fi -nsubfiles=${#subfiles[@]} - -# Get the number of local subfiles -subfiles_loc=() -subfiles_size=() -for i in "${subfiles[@]}"; do - subfile="${subfile_dir}/${i}" - if [ -f "${subfile}" ]; then - subfiles_loc+=("$subfile") - subfiles_size+=($(wc -c "${subfile}" | awk '{print $1}')) - else - subfiles_size+=(0) - fi -done - -if [ "$quiet" == "false" ]; then - TIMEFORMAT="COMPLETION TIME = %R s" - time fuse -else - fuse -fi -- cgit v0.12