#!/bin/bash # # Copyright by The HDF Group. # All rights reserved. # # This file is part of HDF5. The full HDF5 copyright notice, including # terms governing use, modification, and redistribution, is contained in # the COPYING file, which can be found at the root of the source code # distribution tree, or in https://www.hdfgroup.org/licenses. # If you do not have access to either file, you may request a copy from # help@hdfgroup.org. # BLD='\033[1m' GRN='\033[0;32m' RED='\033[0;31m' PUR='\033[0;35m' NC='\033[0m' # No Color ############################################################ # Usage # ############################################################ function usage() { echo "" # Display usage echo "Purpose: Combine subfiles into a single HDF5 file. Requires the subfiling configuration file either as a command-line argument, or the script will search for the *.config file in the current directory." echo "" echo "usage: h5fuse.sh [-h] [-f filename]" echo "-h Print this help." echo "-f filename Subfile configuration file." echo "" } ############################################################ ############################################################ # Main program # ############################################################ ############################################################ ############################################################ # Process the input options. Add options as needed. # ############################################################ # Get the options file_config="" while getopts ":h:f:" option; do case $option in h) # display Help usage exit;; f) # subfiling configureation file file_config=$OPTARG;; \?) # Invalid option echo -e "$RED ERROR: Invalid option ${BLD}-${OPTARG}${RED} $NC" usage exit 1;; * ) usage exit 1;; esac done FAILED=1 nfiles=1 ############################################################ # Configure file checks # ############################################################ if [ -z "$file_config" ]; then nfiles=$(find . -maxdepth 1 -type f -iname "*.config" -printf '.' | wc -m) if [[ "$nfiles" != "1" ]]; then if [[ "$nfiles" == "0" ]]; then echo -e "$RED Failed to find .config file in current directory. $NC" usage exit $FAILED else echo -e "$RED More than one .config file found in current directory. $NC" usage exit $FAILED fi fi file_config=$(find . -maxdepth 1 -type f -iname "*.config") fi if [ ! -f "$file_config" ]; then echo -e "${RED} configuration file ${BLD}$file_config${NC} ${RED}does not exist. $NC" exit $FAILED fi stripe_size=$(grep "stripe_size=" "$file_config" | cut -d "=" -f2) if test -z "$stripe_size"; then echo -e "$RED failed to find stripe_size in $file_config $NC" exit $FAILED fi hdf5_file="$(grep "hdf5_file=" "$file_config" | cut -d "=" -f2)" if test -z "$hdf5_file"; then echo -e "$RED failed to find hdf5 output file in $file_config $NC" exit $FAILED fi subfile_dir="$(grep "subfile_dir=" "$file_config" | cut -d "=" -f2)" if test -z "$subfile_dir"; then echo -e "$RED failed to find subfile directory in $file_config $NC" exit $FAILED fi subfiles=( $( sed -e '1,/subfile_dir=/d' "$file_config" ) ) #for i in "${subfiles[@]}"; do # echo "$i" #done if [ ${#subfiles[@]} -eq 0 ]; then echo -e "$RED failed to find subfiles list in $file_config $NC" exit $FAILED fi rm -f "$hdf5_file" ## COMBINE SUBFILES INTO AN HDF5 FILE ## skip=0 status=$nfiles START="$(date +%s%N)" while [ "$status" -gt 0 ]; do icnt=0 for i in "${subfiles[@]}"; do subfile="${subfile_dir}/${i}" # Verify the file exists if [ ! -f "${subfile}" ]; then echo -e "$RED ERROR: file \"${subfile}\" does not exist. $NC" exit $FAILED fi # Verify the file is not being accessed by a process t_max=60 t_sleep=1 t_elapsed=0 while fuser -s "${subfile}"; do if [[ $((t_elapsed % 5)) -eq 0 ]]; then echo -e "$GRN waiting for process to finish accessing file \"${subfile}\" ... [${t_elapsed}s/${t_max}s] $NC" fi sleep $t_sleep t_elapsed=$((t_elapsed+t_sleep)) if [[ $t_elapsed -ge $t_max ]]; then echo -e "$RED ERROR: file \"${subfile}\" still has process accessing it after ${t_elapsed}s $NC" exit $FAILED fi done fsize=$(wc -c "${subfile}" | awk '{print $1}') if [ $((skip*stripe_size)) -le "$fsize" ]; then EXEC="dd count=1 bs=$stripe_size if=$subfile of=$hdf5_file skip=$skip oflag=append conv=notrunc" echo -e "$GRN $EXEC $NC" err="$( $EXEC 2>&1 > /dev/null &)" icnt=$((icnt+1)) else subfiles=("${subfiles[@]:0:icnt}" "${subfiles[@]:$((icnt+1))}") status=${#subfiles[@]} fi done; wait skip=$((skip+1)) done END=$(( $(date +%s%N) - START )) DURATION_SEC=$(awk -vp="$END" -vq=0.000000001 'BEGIN{printf "%.4f" ,p * q}') echo -e "$PUR COMPLETION TIME = $DURATION_SEC s $NC"