summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorScot Breitenfeld <brtnfld@hdfgroup.org>2022-01-11 16:51:26 (GMT)
committerGitHub <noreply@github.com>2022-01-11 16:51:26 (GMT)
commit2bd950648e3f4f036c787f0f8a851ed9aa503d50 (patch)
treee19716b20d0f006b17a3cf4ebc38f4b9e93d74c3
parent65b1670861c62228cc68f98d4356edd180cfcb1f (diff)
downloadhdf5-2bd950648e3f4f036c787f0f8a851ed9aa503d50.zip
hdf5-2bd950648e3f4f036c787f0f8a851ed9aa503d50.tar.gz
hdf5-2bd950648e3f4f036c787f0f8a851ed9aa503d50.tar.bz2
Added script to join subfiles into a single HDF5 file (#1350)
-rw-r--r--MANIFEST3
-rwxr-xr-xutils/subfiling_vfd/h5fuse.sh89
2 files changed, 92 insertions, 0 deletions
diff --git a/MANIFEST b/MANIFEST
index 34d937f..a837154 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -3011,6 +3011,9 @@
./utils/mirror_vfd/mirror_server_stop.c
./utils/mirror_vfd/mirror_writer.c
+# Subfiling VFD utilities
+./utils/subfiling_vfd/h5fuse.sh
+
# test utilities
./utils/test/Makefile.am
./utils/test/swmr_check_compat_vfd.c
diff --git a/utils/subfiling_vfd/h5fuse.sh b/utils/subfiling_vfd/h5fuse.sh
new file mode 100755
index 0000000..8170586
--- /dev/null
+++ b/utils/subfiling_vfd/h5fuse.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+#
+
+# Purpose: Combine subfiles into a single HDF5 file. Requires the subfiling
+# configuration file either as a command-line argument, or the script will
+# search for the *.config file in the current directory.
+
+GRN='\033[0;32m'
+RED='\033[0;31m'
+PUR='\033[0;35m'
+NC='\033[0m' # No Color
+
+## CONFIG FILE CHECKS ##
+
+if [ $# -eq 0 ]; then
+ nfiles=$(find . -maxdepth 1 -type f -iname "*.config" -printf '.' | wc -m)
+ if [[ "$nfiles" != "1" ]]; then
+ echo -e "$RED More than one .config file found in current directory. $NC"
+ exit 1
+ fi
+ file_config=$(find . -maxdepth 1 -type f -iname "*.config")
+else
+ file_config=$1
+fi
+
+if [ ! -f "$file_config" ]; then
+ echo -e "$RED $file_config does not exist. $NC"
+ exit 1
+fi
+
+stripe_size=$(grep "stripe_size=" $file_config | cut -d "=" -f2)
+if test -z "$stripe_size"; then
+ echo -e "$RED failed to find stripe_size in $file_config $NC"
+ exit 1
+fi
+
+subfiles=( $( sed -e '1,/hdf5_file=/d' $file_config ) )
+#for i in "${subfiles[@]}"; do
+# echo "$i"
+#done
+if test -z "$subfiles"; then
+ echo -e "$RED failed to find subfiles list in $file_config $NC"
+ exit 1
+fi
+
+hdf5_file=$(grep "hdf5_file=" $file_config | cut -d "=" -f2)
+if test -z "$hdf5_file"; then
+ echo -e "$RED failed to find hdf5 output file in $file_config $NC"
+ exit 1
+fi
+
+rm -f $hdf5_file
+
+## COMBINE SUBFILES INTO AN HDF5 FILE ##
+
+skip=0
+status=$nfiles
+START="$(date +%s%N)"
+while [ $status -gt 0 ]; do
+ icnt=0
+ for i in "${subfiles[@]}"; do
+ fsize=$(wc -c $i | awk '{print $1}')
+ if [ $(($skip*$stripe_size)) -le $fsize ]; then
+ EXEC="dd count=1 bs=$stripe_size if=$i of=$hdf5_file skip=$skip oflag=append conv=notrunc"
+ echo -e "$GRN $EXEC $NC"
+ err="$( $EXEC 2>&1 > /dev/null &)"
+ icnt=$(($icnt+1))
+ else
+ subfiles=("${subfiles[@]:0:$icnt}" "${subfiles[@]:$(($icnt+1))}")
+ status=${#subfiles[@]}
+ fi
+ done; wait
+ skip=$(($skip+1))
+done
+END=$[ $(date +%s%N) - ${START} ]
+DURATION_SEC=$(awk -vp=$END -vq=0.000000001 'BEGIN{printf "%.4f" ,p * q}')
+echo -e "$PUR COMPLETION TIME = $DURATION_SEC s $NC"
+
+