summaryrefslogtreecommitdiffstats
path: root/utils/subfiling_vfd
diff options
context:
space:
mode:
Diffstat (limited to 'utils/subfiling_vfd')
-rw-r--r--utils/subfiling_vfd/CMakeLists.txt11
-rwxr-xr-xutils/subfiling_vfd/h5fuse.sh89
-rwxr-xr-xutils/subfiling_vfd/h5fuse.sh.in165
3 files changed, 265 insertions, 0 deletions
diff --git a/utils/subfiling_vfd/CMakeLists.txt b/utils/subfiling_vfd/CMakeLists.txt
new file mode 100644
index 0000000..696c9da
--- /dev/null
+++ b/utils/subfiling_vfd/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required (VERSION 3.10)
+project (HDF5_UTILS_SUBFILINGVFD C)
+
+configure_file (${HDF5_UTILS_SUBFILINGVFD_SOURCE_DIR}/h5fuse.sh.in ${HDF5_BINARY_DIR}/h5fuse.sh @ONLY)
+
+install (
+ FILES ${HDF5_BINARY_DIR}/h5fuse.sh
+ DESTINATION ${HDF5_INSTALL_BIN_DIR}
+ PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE
+ COMPONENT utilsapplications
+)
diff --git a/utils/subfiling_vfd/h5fuse.sh b/utils/subfiling_vfd/h5fuse.sh
new file mode 100755
index 0000000..8170586
--- /dev/null
+++ b/utils/subfiling_vfd/h5fuse.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+#
+
+# Purpose: Combine subfiles into a single HDF5 file. Requires the subfiling
+# configuration file either as a command-line argument, or the script will
+# search for the *.config file in the current directory.
+
+GRN='\033[0;32m'
+RED='\033[0;31m'
+PUR='\033[0;35m'
+NC='\033[0m' # No Color
+
+## CONFIG FILE CHECKS ##
+
+if [ $# -eq 0 ]; then
+ nfiles=$(find . -maxdepth 1 -type f -iname "*.config" -printf '.' | wc -m)
+ if [[ "$nfiles" != "1" ]]; then
+ echo -e "$RED More than one .config file found in current directory. $NC"
+ exit 1
+ fi
+ file_config=$(find . -maxdepth 1 -type f -iname "*.config")
+else
+ file_config=$1
+fi
+
+if [ ! -f "$file_config" ]; then
+ echo -e "$RED $file_config does not exist. $NC"
+ exit 1
+fi
+
+stripe_size=$(grep "stripe_size=" $file_config | cut -d "=" -f2)
+if test -z "$stripe_size"; then
+ echo -e "$RED failed to find stripe_size in $file_config $NC"
+ exit 1
+fi
+
+subfiles=( $( sed -e '1,/hdf5_file=/d' $file_config ) )
+#for i in "${subfiles[@]}"; do
+# echo "$i"
+#done
+if test -z "$subfiles"; then
+ echo -e "$RED failed to find subfiles list in $file_config $NC"
+ exit 1
+fi
+
+hdf5_file=$(grep "hdf5_file=" $file_config | cut -d "=" -f2)
+if test -z "$hdf5_file"; then
+ echo -e "$RED failed to find hdf5 output file in $file_config $NC"
+ exit 1
+fi
+
+rm -f $hdf5_file
+
+## COMBINE SUBFILES INTO AN HDF5 FILE ##
+
+skip=0
+status=$nfiles
+START="$(date +%s%N)"
+while [ $status -gt 0 ]; do
+ icnt=0
+ for i in "${subfiles[@]}"; do
+ fsize=$(wc -c $i | awk '{print $1}')
+ if [ $(($skip*$stripe_size)) -le $fsize ]; then
+ EXEC="dd count=1 bs=$stripe_size if=$i of=$hdf5_file skip=$skip oflag=append conv=notrunc"
+ echo -e "$GRN $EXEC $NC"
+ err="$( $EXEC 2>&1 > /dev/null &)"
+ icnt=$(($icnt+1))
+ else
+ subfiles=("${subfiles[@]:0:$icnt}" "${subfiles[@]:$(($icnt+1))}")
+ status=${#subfiles[@]}
+ fi
+ done; wait
+ skip=$(($skip+1))
+done
+END=$[ $(date +%s%N) - ${START} ]
+DURATION_SEC=$(awk -vp=$END -vq=0.000000001 'BEGIN{printf "%.4f" ,p * q}')
+echo -e "$PUR COMPLETION TIME = $DURATION_SEC s $NC"
+
+
diff --git a/utils/subfiling_vfd/h5fuse.sh.in b/utils/subfiling_vfd/h5fuse.sh.in
new file mode 100755
index 0000000..b526b0b
--- /dev/null
+++ b/utils/subfiling_vfd/h5fuse.sh.in
@@ -0,0 +1,165 @@
+#!/bin/bash
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+#
+BLD='\033[1m'
+GRN='\033[0;32m'
+RED='\033[0;31m'
+PUR='\033[0;35m'
+NC='\033[0m' # No Color
+
+############################################################
+# Usage #
+############################################################
+function usage() {
+ echo ""
+ # Display usage
+ echo "Purpose: Combine subfiles into a single HDF5 file. Requires the subfiling
+ configuration file either as a command-line argument, or the script will
+ search for the *.config file in the current directory."
+ echo ""
+ echo "usage: h5fuse.sh [-h] [-f filename]"
+ echo "-h Print this help."
+ echo "-f filename Subfile configuration file."
+ echo ""
+}
+
+############################################################
+############################################################
+# Main program #
+############################################################
+############################################################
+
+############################################################
+# Process the input options. Add options as needed. #
+############################################################
+# Get the options
+file_config=""
+
+while getopts ":h:f:" option; do
+ case $option in
+ h) # display Help
+ usage
+ exit;;
+ f) # subfiling configureation file
+ file_config=$OPTARG;;
+ \?) # Invalid option
+ echo -e "$RED ERROR: Invalid option ${BLD}-${OPTARG}${RED} $NC"
+ usage
+ exit 1;;
+ * ) usage
+ exit 1;;
+ esac
+done
+
+FAILED=1
+nfiles=1
+############################################################
+# Configure file checks #
+############################################################
+if [ -z "$file_config" ]; then
+ nfiles=$(find . -maxdepth 1 -type f -iname "*.config" -printf '.' | wc -m)
+ if [[ "$nfiles" != "1" ]]; then
+ if [[ "$nfiles" == "0" ]]; then
+ echo -e "$RED Failed to find .config file in current directory. $NC"
+ usage
+ exit $FAILED
+ else
+ echo -e "$RED More than one .config file found in current directory. $NC"
+ usage
+ exit $FAILED
+ fi
+ fi
+ file_config=$(find . -maxdepth 1 -type f -iname "*.config")
+fi
+
+if [ ! -f "$file_config" ]; then
+ echo -e "${RED} configuration file ${BLD}$file_config${NC} ${RED}does not exist. $NC"
+ exit $FAILED
+fi
+
+stripe_size=$(grep "stripe_size=" "$file_config" | cut -d "=" -f2)
+if test -z "$stripe_size"; then
+ echo -e "$RED failed to find stripe_size in $file_config $NC"
+ exit $FAILED
+fi
+
+hdf5_file="$(grep "hdf5_file=" "$file_config" | cut -d "=" -f2)"
+if test -z "$hdf5_file"; then
+ echo -e "$RED failed to find hdf5 output file in $file_config $NC"
+ exit $FAILED
+fi
+
+subfile_dir="$(grep "subfile_dir=" "$file_config" | cut -d "=" -f2)"
+if test -z "$subfile_dir"; then
+ echo -e "$RED failed to find subfile directory in $file_config $NC"
+ exit $FAILED
+fi
+
+subfiles=( $( sed -e '1,/subfile_dir=/d' "$file_config" ) )
+#for i in "${subfiles[@]}"; do
+# echo "$i"
+#done
+if [ ${#subfiles[@]} -eq 0 ]; then
+ echo -e "$RED failed to find subfiles list in $file_config $NC"
+ exit $FAILED
+fi
+
+rm -f "$hdf5_file"
+
+## COMBINE SUBFILES INTO AN HDF5 FILE ##
+
+skip=0
+status=$nfiles
+START="$(date +%s%N)"
+while [ "$status" -gt 0 ]; do
+ icnt=0
+ for i in "${subfiles[@]}"; do
+ subfile="${subfile_dir}/${i}"
+ # Verify the file exists
+ if [ ! -f "${subfile}" ]; then
+ echo -e "$RED ERROR: file \"${subfile}\" does not exist. $NC"
+ exit $FAILED
+ fi
+
+ # Verify the file is not being accessed by a process
+ t_max=60
+ t_sleep=1
+ t_elapsed=0
+
+ while fuser -s "${subfile}"; do
+ if [[ $((t_elapsed % 5)) -eq 0 ]]; then
+ echo -e "$GRN waiting for process to finish accessing file \"${subfile}\" ... [${t_elapsed}s/${t_max}s] $NC"
+ fi
+ sleep $t_sleep
+ t_elapsed=$((t_elapsed+t_sleep))
+ if [[ $t_elapsed -ge $t_max ]]; then
+ echo -e "$RED ERROR: file \"${subfile}\" still has process accessing it after ${t_elapsed}s $NC"
+ exit $FAILED
+ fi
+ done
+
+ fsize=$(wc -c "${subfile}" | awk '{print $1}')
+ if [ $((skip*stripe_size)) -le "$fsize" ]; then
+ EXEC="dd count=1 bs=$stripe_size if=$subfile of=$hdf5_file skip=$skip oflag=append conv=notrunc"
+ echo -e "$GRN $EXEC $NC"
+ err="$( $EXEC 2>&1 > /dev/null &)"
+ icnt=$((icnt+1))
+ else
+ subfiles=("${subfiles[@]:0:icnt}" "${subfiles[@]:$((icnt+1))}")
+ status=${#subfiles[@]}
+ fi
+ done; wait
+ skip=$((skip+1))
+done
+END=$(( $(date +%s%N) - START ))
+DURATION_SEC=$(awk -vp="$END" -vq=0.000000001 'BEGIN{printf "%.4f" ,p * q}')
+echo -e "$PUR COMPLETION TIME = $DURATION_SEC s $NC"