summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrawarren <richardwarren2@verizon.net>2021-11-29 21:25:23 (GMT)
committerGitHub <noreply@github.com>2021-11-29 21:25:23 (GMT)
commit720ddb20f347f5ea4e573c44f64e1886d1dc1038 (patch)
treebdd32da8424488f6d10221518c08ed907ac2be18
parent9cdc6d58bdc0a8bce74559d15fae1284beb82033 (diff)
downloadhdf5-720ddb20f347f5ea4e573c44f64e1886d1dc1038.zip
hdf5-720ddb20f347f5ea4e573c44f64e1886d1dc1038.tar.gz
hdf5-720ddb20f347f5ea4e573c44f64e1886d1dc1038.tar.bz2
Add support for parallel tools based on the 3rd party library mpiFileUtils (libMFU) … (#1177)
Adds tool h5dwalk and configure options to enable building it. Co-authored-by: Richard Warren <Richard.Warren@hdfgroup.org> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Richard.Warren <richard.warren@jelly.ad.hdfgroup.org> Co-authored-by: Larry Knox <lrknox@hdfgroup.org>
-rw-r--r--CMakeLists.txt25
-rw-r--r--MANIFEST20
-rw-r--r--Makefile.am4
-rw-r--r--config/cmake/FindCIRCLE.cmake49
-rw-r--r--config/cmake/FindDTCMP.cmake48
-rw-r--r--config/cmake/FindMFU.cmake100
-rw-r--r--configure.ac142
-rw-r--r--release_docs/RELEASE.txt19
-rw-r--r--utils/CMakeLists.txt5
-rw-r--r--utils/Makefile.am8
-rw-r--r--utils/tools/CMakeLists.txt12
-rw-r--r--utils/tools/Makefile.am38
-rw-r--r--utils/tools/h5dwalk/CMakeLists.txt66
-rw-r--r--utils/tools/h5dwalk/Makefile.am37
-rw-r--r--utils/tools/h5dwalk/h5dwalk.142
-rw-r--r--utils/tools/h5dwalk/h5dwalk.c1712
-rw-r--r--utils/tools/test/CMakeLists.txt8
-rw-r--r--utils/tools/test/Makefile.am32
-rw-r--r--utils/tools/test/h5dwalk/CMakeLists.txt15
-rw-r--r--utils/tools/test/h5dwalk/CMakeTests.cmake56
-rw-r--r--utils/tools/test/h5dwalk/Makefile.am43
-rw-r--r--utils/tools/test/h5dwalk/copy_demo_files.sh.in86
-rw-r--r--utils/tools/test/h5dwalk/help.h5dwalk13
-rw-r--r--utils/tools/test/h5dwalk/testh5dwalk.sh.in249
24 files changed, 2825 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2b5ff7f..dcee4f9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1007,6 +1007,31 @@ endif ()
# Option to build HDF5 Utilities
#-----------------------------------------------------------------------------
if (EXISTS "${HDF5_SOURCE_DIR}/utils" AND IS_DIRECTORY "${HDF5_SOURCE_DIR}/utils")
+ option (HDF5_BUILD_PARALLEL_TOOLS "Build Parallel HDF5 Tools" OFF)
+ if (HDF5_BUILD_PARALLEL_TOOLS AND HDF5_ENABLE_PARALLEL)
+ set (CMAKE_PREFIX_PATH "$HDF_RESOURCES_DIR")
+ find_package(MFU REQUIRED)
+ if (MFU_FOUND)
+ message(STATUS "LL_PATH=${LL_PATH}")
+ set (H5_HAVE_LIBMFU 1)
+ set (H5_HAVE_MFU_H 1)
+ set (CMAKE_REQUIRED_INCLUDES "${MFU_INCLUDE_DIR}")
+ set (MFU_LIBRARY_DEBUG "$MFU_LIBRARY")
+ set (MFU_LIBRARY_RELEASE "$MFU_LIBRARY")
+ endif ()
+ find_package(CIRCLE REQUIRED)
+ if (CIRCLE_FOUND)
+ set (H5_HAVE_LIBCIRCLE 1)
+ set (H5_HAVE_CIRCLE_H 1)
+ set (CMAKE_REQUIRED_INCLUDES "${CIRCLE_INCLUDE_DIR}")
+ endif ()
+ find_package(DTCMP REQUIRED)
+ if (DTCMP_FOUND)
+ set (H5_HAVE_LIBDTCMP 1)
+ set (H5_HAVE_DTCMP_H 1)
+ set (CMAKE_REQUIRED_INCLUDES "${DTCMP_INCLUDE_DIR}")
+ endif ()
+ endif ()
add_subdirectory (utils)
endif ()
diff --git a/MANIFEST b/MANIFEST
index 1cbb856..1550a94 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -202,6 +202,10 @@
./config/intel-warnings/win-general
./config/intel-warnings/ifort-general
+./config/cmake/FindMFU.cmake
+./config/cmake/FindDTCMP.cmake
+./config/cmake/FindCIRCLE.cmake
+
./config/site-specific/BlankForm
./doc/branches-explained.md
@@ -3001,6 +3005,22 @@
./utils/test/Makefile.am
./utils/test/swmr_check_compat_vfd.c
+# parallel tools (h5dwalk) and tests
+./utils/tools/CMakeLists.txt
+./utils/tools/Makefile.am
+./utils/tools/h5dwalk/CMakeLists.txt
+./utils/tools/h5dwalk/Makefile.am
+./utils/tools/h5dwalk/h5dwalk.1
+./utils/tools/h5dwalk/h5dwalk.c
+./utils/tools/test/CMakeLists.txt
+./utils/tools/test/Makefile.am
+./utils/tools/test/h5dwalk/CMakeLists.txt
+./utils/tools/test/h5dwalk/CMakeTests.cmake
+./utils/tools/test/h5dwalk/Makefile.am
+./utils/tools/test/h5dwalk/copy_demo_files.sh.in
+./utils/tools/test/h5dwalk/help.h5dwalk
+./utils/tools/test/h5dwalk/testh5dwalk.sh.in
+
# high level libraries
./hl/Makefile.am
./hl/examples/Makefile.am
diff --git a/Makefile.am b/Makefile.am
index 8518114..2a544f4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -88,9 +88,9 @@ else
TOOLS_DIR=
endif
-SUBDIRS = src $(TESTSERIAL_DIR) $(TESTPARALLEL_DIR) bin utils $(TOOLS_DIR) . \
+SUBDIRS = src $(TESTSERIAL_DIR) $(TESTPARALLEL_DIR) bin $(TOOLS_DIR) utils . \
$(CXX_DIR) $(FORTRAN_DIR) $(JAVA_DIR) $(HDF5_HL_DIR)
-DIST_SUBDIRS = src test testpar utils tools . c++ fortran hl examples java
+DIST_SUBDIRS = src test testpar tools utils . c++ fortran hl examples java
# Some files generated during configure that should be cleaned
DISTCLEANFILES=config/stamp1 config/stamp2
diff --git a/config/cmake/FindCIRCLE.cmake b/config/cmake/FindCIRCLE.cmake
new file mode 100644
index 0000000..b36d76c
--- /dev/null
+++ b/config/cmake/FindCIRCLE.cmake
@@ -0,0 +1,49 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindCIRCLE
+--------
+
+Find the native CIRCLE includes and library
+
+This module defines
+
+::
+
+ CIRCLE_INCLUDE_DIR, where to find CIRCLE.h, etc.
+ CIRCLE_LIBRARIES, the libraries required to use CIRCLE.
+ CIRCLE_FOUND, If false, do not try to use CIRCLE.
+
+also defined, but not for general use are
+
+::
+
+ CIRCLE_LIBRARY, where to find the CIRCLE library.
+#]=======================================================================]
+
+if(DEFINED ENV{MFU_ROOT})
+ set(ENV{MFU_INCLUDE} "$ENV{MFU_ROOT}/include")
+ set(ENV{MFU_LIB} "$ENV{MFU_ROOT}/lib")
+ set(ENV{MFU_LIB64} "$ENV{MFU_ROOT}/lib64")
+else()
+ message("CIRCLE_LIBRARY: If you have problems building this library,\nconsider setting the MFU_ROOT environment variable to indicate\nwhere to find the support libraries and header files!")
+endif()
+
+find_path(CIRCLE_INCLUDE_DIR
+ NAMES libcircle.h
+ HINTS ENV MFU_INCLUDE)
+
+find_library(CIRCLE_LIBRARY
+ NAMES circle
+ HINTS ENV MFU_LIB ENV MFU_LIB64
+ )
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(CIRCLE REQUIRED_VARS CIRCLE_LIBRARY CIRCLE_INCLUDE_DIR)
+
+if(CIRCLE_FOUND)
+ set(CIRCLE_LIBRARIES ${CIRCLE_LIBRARY} )
+endif()
+
+mark_as_advanced(CIRCLE_INCLUDE_DIR CIRCLE_LIBRARY)
diff --git a/config/cmake/FindDTCMP.cmake b/config/cmake/FindDTCMP.cmake
new file mode 100644
index 0000000..b95ef20
--- /dev/null
+++ b/config/cmake/FindDTCMP.cmake
@@ -0,0 +1,48 @@
+# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+#[=======================================================================[.rst:
+FindDTCMP
+--------
+
+Find the native DTCMP includes and library
+
+This module defines
+
+::
+
+ DTCMP_INCLUDE_DIR, where to find DTCMP.h, etc.
+ DTCMP_LIBRARIES, the libraries required to use DTCMP.
+ DTCMP_FOUND, If false, do not try to use DTCMP.
+
+also defined, but not for general use are
+
+::
+
+ DTCMP_LIBRARY, where to find the DTCMP library.
+#]=======================================================================]
+
+if(DEFINED ENV{MFU_ROOT})
+ set(ENV{MFU_INCLUDE} "$ENV{MFU_ROOT}/include")
+ set(ENV{MFU_LIB} "$ENV{MFU_ROOT}/lib")
+ set(ENV{MFU_LIB64} "$ENV{MFU_ROOT}/lib64")
+else()
+ message("DTCMP_LIBRARY: If you have problems building this library,\nconsider setting the MFU_ROOT environment variable to indicate\nwhere to find the support libraries and header files!")
+endif()
+
+find_path(DTCMP_INCLUDE_DIR
+ NAMES dtcmp.h
+ HINTS ENV MFU_INCLUDE)
+
+find_library(DTCMP_LIBRARY
+ NAMES dtcmp
+ HINTS ENV MFU_LIB ENV MFU_LIB64)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(DTCMP REQUIRED_VARS DTCMP_LIBRARY DTCMP_INCLUDE_DIR)
+
+if(DTCMP_FOUND)
+ set(DTCMP_LIBRARIES ${DTCMP_LIBRARY} )
+endif()
+
+mark_as_advanced(DTCMP_INCLUDE_DIR DTCMP_LIBRARY)
diff --git a/config/cmake/FindMFU.cmake b/config/cmake/FindMFU.cmake
new file mode 100644
index 0000000..37699e2
--- /dev/null
+++ b/config/cmake/FindMFU.cmake
@@ -0,0 +1,100 @@
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+#
+#########################################################################
+
+# - Derived from the FindTiff.cmake and FindJPEG.cmake that is included with cmake
+# FindMFU
+
+# Find the native MFU includes and library
+
+# Imported targets
+##################
+
+# This module defines the following :prop_tgt:`IMPORTED` targets:
+#
+# MFU::MFU
+# The MFU library, if found.
+#
+# Result variables
+###################
+
+# This module will set the following variables in your project:
+
+# MFU_FOUND, true if the MFU headers and libraries were found.
+# MFU_INCLUDE_DIR, the directory containing the MFU headers.
+# MFU_INCLUDE_DIRS, the directory containing the MFU headers.
+# MFU_LIBRARIES, libraries to link against to use MFU.
+
+# Cache variables
+#################
+
+# The following variables may also be set:
+
+# MFU_LIBRARY, where to find the MFU library.
+# message (STATUS "Finding MFU library and headers..." )
+#########################################################################
+
+
+
+FIND_PATH(MFU_INCLUDE_DIR
+ NAMES mfu.h
+ HINTS "$ENV{MFU_ROOT}/include"
+)
+FIND_LIBRARY(MFU_LIBRARY
+ NAMES mfu
+ HINTS "$ENV{MFU_ROOT}/lib64"
+)
+
+if(NOT MFU_LIBRARY)
+ set(mfu_names ${MFU_NAMES} mfu libmfu)
+ find_library(MFU_LIBRARY NAMES ${mfu_names})
+ include(SelectLibraryConfigurations)
+ select_library_configurations(MFU)
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(MFU
+ REQUIRED_VARS MFU_LIBRARY MFU_INCLUDE_DIR)
+
+if(MFU_FOUND)
+ set(MFU_LIBRARIES "${MFU_LIBRARY}")
+ set(MFU_INCLUDE_DIRS "${MFU_INCLUDE_DIR}")
+ set(LL_PATH "$ENV{MFU_ROOT}/lib64:$ENV{MFU_ROOT}/lib")
+ if(NOT TARGET MFU::MFU)
+ add_library(MFU::MFU UNKNOWN IMPORTED)
+ if(MFU_INCLUDE_DIRS)
+ set_target_properties(MFU::MFU PROPERTIES
+ INTERFACE_INCLUDE_DIRECTORIES "${MFU_INCLUDE_DIRS}")
+ endif()
+ if(EXISTS "${MFU_LIBRARY}")
+ set_target_properties(MFU::MFU PROPERTIES
+ IMPORTED_LINK_INTERFACE_LANGUAGES "C"
+ IMPORTED_LOCATION "${MFU_LIBRARY}")
+ endif()
+ endif()
+endif()
+
+# Report the results.
+if (NOT MFU_FOUND)
+ set (MFU_DIR_MESSAGE
+ "Mfu was not found. Make sure MFU_LIBRARY and MFU_INCLUDE_DIR are set or set the MFU_INSTALL environment variable."
+ )
+ if (NOT MFU_FIND_QUIETLY)
+ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.15.0")
+ message (VERBOSE "${MFU_DIR_MESSAGE}")
+ endif ()
+ else ()
+ if (MFU_FIND_REQUIRED)
+ message (FATAL_ERROR "Mfu was NOT found and is Required by this project")
+ endif ()
+ endif ()
+endif ()
diff --git a/configure.ac b/configure.ac
index 8559792..ca82d29 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1574,6 +1574,7 @@ case "X-$withval" in
;;
esac
+
## ----------------------------------------------------------------------
## Make the external filters list available to *.in files
## At this point it's unset (no external filters by default) but it
@@ -1682,6 +1683,7 @@ fi
## command-line switch. The value is an include path and/or a library path.
## If the library path is specified then it must be preceded by a comma.
##
+AC_SUBST([LL_PATH])
AC_SUBST([USE_FILTER_SZIP]) USE_FILTER_SZIP="no"
AC_ARG_WITH([szlib],
[AS_HELP_STRING([--with-szlib=DIR],
@@ -1766,7 +1768,7 @@ if test "x$HAVE_SZLIB" = "xyes" -a "x$HAVE_SZLIB_H" = "xyes"; then
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$szlib_lib"
fi
- AC_SUBST([LL_PATH]) LL_PATH="$LD_LIBRARY_PATH"
+ LL_PATH="$LD_LIBRARY_PATH"
AC_CACHE_VAL([hdf5_cv_szlib_can_encode],
[AC_RUN_IFELSE(
@@ -3032,6 +3034,138 @@ if test -n "$PARALLEL"; then
fi
## ----------------------------------------------------------------------
+## Build parallel tools if parallel tools, parallel, and build tools options
+## are all enabled.
+##
+AC_SUBST([PARALLEL_TOOLS])
+
+## Default is no parallel tools
+PARALLEL_TOOLS=no
+
+AC_MSG_CHECKING([parallel tools])
+AC_ARG_ENABLE([parallel-tools],
+ [AS_HELP_STRING([--enable-parallel-tools],
+ [Enable building parallel tools.
+ [default=no]])],
+ [PARALLEL_TOOLS=$enableval])
+
+if test "X${PARALLEL_TOOLS}" = "Xyes"; then
+ if test "X${HDF5_TOOLS}" != "Xyes"; then
+ AC_MSG_ERROR([--enable-tools is required for --enable-parallel-tools])
+ fi
+ if test "X${PARALLEL}" != "Xyes"; then
+ AC_MSG_ERROR([--enable-parallel is required for --enable-parallel-tools])
+ fi
+fi
+
+case "X-$PARALLEL_TOOLS" in
+ X-|X-no)
+ AC_MSG_RESULT([no])
+ ;;
+ X-yes)
+ AC_MSG_RESULT([yes])
+ ;;
+ *)
+ ;;
+esac
+
+if test "X${PARALLEL_TOOLS}" = "Xyes"; then
+ ## Is the mpiFileUtils library (libmfu) required and available?
+ ##
+ AC_SUBST([H5DWALK_LDFLAGS])
+ AC_SUBST([H5DWALK_LIBS])
+ AC_SUBST([H5DWALK_CPPFLAGS])
+
+ H5DWALK_LDFLAGS="${H5DWALK_LDFLAGS}"
+
+ ## Default is not present
+ HAVE_LIBMFU=no
+
+ AC_ARG_WITH([libmfu],
+ [AS_HELP_STRING([--with-libmfu=DIR],
+ [Use the libmfu library [default=no]])],,
+ [withval=no])
+
+ case "X-$withval" in
+ X-yes)
+ HAVE_LIBMFU="yes"
+ AC_CHECK_HEADERS([mfu.h],, [unset HAVE_LIBMFU])
+ if test "x$HAVE_LIBMFU" = "xyes"; then
+ AC_CHECK_LIB([mfu], [mfu_init], [H5DWALK_LIBS="-lmfu"], [unset HAVE_LIBMFU])
+ fi
+ if test -z "$HAVE_LIBMFU" -a -n "$HDF5_CONFIG_ABORT"; then
+ AC_MSG_ERROR([couldn't find libmfu library])
+ fi
+ ;;
+ X-|X-no|X-none)
+ HAVE_LIBMFU="no"
+ AC_MSG_CHECKING([for libmfu library])
+ AC_MSG_RESULT([suppressed])
+ ;;
+ *)
+ HAVE_LIBMFU="yes"
+ case "$withval" in
+ *,*)
+ libmfu_inc="`echo $withval |cut -f1 -d,`"
+ libmfu_lib="`echo $withval |cut -f2 -d, -s`"
+ ;;
+ *)
+ if test -n "$withval"; then
+ libmfu_inc="$withval/include"
+ libmfu_lib="$withval/lib64"
+ libcircle_lib="$withval/lib"
+ fi
+ ;;
+ esac
+
+ saved_CPPFLAGS="$CPPFLAGS"
+ saved_AM_CPPFLAGS="$AM_CPPFLAGS"
+ saved_LDFLAGS="$LDFLAGS"
+ saved_AM_LDFLAGS="$AM_LDFLAGS"
+
+ ## For these checks we need the libmfu locations added to CPPFLAGS,
+ ## AM_CPPFLAGS, LDFLAGS, and AM_LDFLAGS. The third param should set them
+ ## back to these saved values. If the checks pass, then normally these four
+ ## flag variables would be updated, but in this case we put the changes in
+ ## variables specific to H5DWALK since they aren't used elsewhere.
+ if test -n "$libmfu_inc"; then
+ CPPFLAGS="$CPPFLAGS -I$libmfu_inc"
+ AM_CPPFLAGS="$AM_CPPFLAGS -I$libmfu_inc"
+ fi
+
+ if test -n "$libmfu_lib"; then
+ LDFLAGS="$LDFLAGS -L$libmfu_lib -L$libcircle_lib"
+ AM_LDFLAGS="$AM_LDFLAGS -L$libmfu_lib -L$libcircle_lib"
+ fi
+
+ if test "x$HAVE_LIBMFU" = "xyes"; then
+ AC_CHECK_LIB([mfu], [mfu_init],[H5DWALK_LIBS="-lmfu"], [CPPFLAGS="$saved_CPPFLAGS"; AM_CPPFLAGS="$saved_AM_CPPFLAGS"; LDFLAGS="$saved_LDFLAGS"; AM_LDFLAGS="$saved_AM_LDFLAGS"; unset HAVE_LIBMFU])
+ if test -n "$HAVE_LIBMFU"; then
+ AC_CHECK_HEADERS([mfu.h],[H5DWALK_CPPFLAGS="-I$libmfu_inc"],[CPPFLAGS="$saved_CPPFLAGS"; AM_CPPFLAGS="$saved_AM_CPPFLAGS"; LDFLAGS="$saved_LDFLAGS"; AM_LDFLAGS="$saved_AM_LDFLAGS"; unset HAVE_LIBMFU])
+ fi
+ fi
+
+ if test -z "$HAVE_LIBMFU" -a -n "$HDF5_CONFIG_ABORT"; then
+ AC_MSG_ERROR([couldn't find libmfu library])
+ else
+ H5DWALK_LDFLAGS="-L$libmfu_lib -L$libcircle_lib"
+ fi
+
+ if test -z "$LD_LIBRARY_PATH"; then
+ export LD_LIBRARY_PATH="$libmfu_lib:$libcircle_lib"
+ else
+ export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$libmfu_lib:$libcircle_lib"
+ fi
+
+ LL_PATH="$LD_LIBRARY_PATH"
+ ;;
+ esac
+fi
+
+##
+AM_CONDITIONAL([PARALLEL_TOOLS_CONDITIONAL], [test "X$HAVE_LIBMFU" = "Xyes"])
+
+## ----------------------------------------------------------------------
## Check if the map API is enabled by --enable-map-api
##
AC_SUBST([MAP_API])
@@ -4037,6 +4171,12 @@ AC_CONFIG_FILES([src/libhdf5.settings
utils/Makefile
utils/mirror_vfd/Makefile
utils/test/Makefile
+ utils/tools/Makefile
+ utils/tools/h5dwalk/Makefile
+ utils/tools/test/Makefile
+ utils/tools/test/h5dwalk/Makefile
+ utils/tools/test/h5dwalk/copy_demo_files.sh
+ utils/tools/test/h5dwalk/testh5dwalk.sh
tools/Makefile
tools/lib/Makefile
tools/libtest/Makefile
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index e035d03..d309e63 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -47,6 +47,12 @@ New Features
Configuration:
-------------
+ - Added new configure option to support building parallel tools.
+ See Tools below (autotools - CMake):
+ --enable-parallel-tools HDF5_BUILD_PARALLEL_TOOLS
+
+ (RAW - 2021/10/25)
+
- Added new configure options to enable dimension scales APIs (H5DS*) to
use new object references with the native VOL connector (aka native HDF5
library). New references are always used for non-native terminal VOL
@@ -929,6 +935,19 @@ New Features
Tools:
------
+ - Added a new (unix ONLY) parallel meta tool 'h5dwalk', which utilizes the
+ mpifileutils (https://hpc.github.io/mpifileutils) open source utility
+ library to enable parallel execution of other HDF5 tools.
+ This approach can greatly enhance the serial hdf5 tool performance over large
+ collections of files by utilizing MPI parallelism to distribute an application
+ load over many independent MPI ranks and files.
+
+ An introduction to the mpifileutils library and initial 'User Guide' for
+ the new 'h5dwalk" tool can be found at:
+ https://github.com/HDFGroup/hdf5doc/tree/master/RFCs/HDF5/tools/parallel_tools
+
+ (RAW - 2021/10/25)
+
- Refactored the perform tools and removed depends on test library.
Moved the perf and h5perf tools from tools/test/perform to
diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt
index 7c263d1..a248ce1 100644
--- a/utils/CMakeLists.txt
+++ b/utils/CMakeLists.txt
@@ -9,3 +9,8 @@ option (HDF5_BUILD_UTILS "Build HDF5 Utils" ON)
if (HDF5_BUILD_UTILS)
add_subdirectory (mirror_vfd)
endif ()
+
+#-- Add the h5dwalk and test executables
+if (HDF5_BUILD_PARALLEL_TOOLS AND HDF5_ENABLE_PARALLEL)
+ add_subdirectory(tools)
+endif()
diff --git a/utils/Makefile.am b/utils/Makefile.am
index 876dfb1..cd63db4 100644
--- a/utils/Makefile.am
+++ b/utils/Makefile.am
@@ -32,7 +32,13 @@ else
MIRROR_VFD_DIR=
endif
+if BUILD_TOOLS_CONDITIONAL
+ TOOLS_DIR =tools
+else
+ TOOLS_DIR=
+endif
+
# All subdirectories
-SUBDIRS=$(MIRROR_VFD_DIR) $(TESTUTIL_DIR)
+SUBDIRS=$(MIRROR_VFD_DIR) $(TESTUTIL_DIR) $(TOOLS_DIR)
include $(top_srcdir)/config/conclude.am
diff --git a/utils/tools/CMakeLists.txt b/utils/tools/CMakeLists.txt
new file mode 100644
index 0000000..49562d7
--- /dev/null
+++ b/utils/tools/CMakeLists.txt
@@ -0,0 +1,12 @@
+cmake_minimum_required (VERSION 3.12)
+project (HDF5_UTILS_TOOLS C)
+
+
+if (HDF5_BUILD_PARALLEL_TOOLS)
+ add_subdirectory (h5dwalk)
+endif()
+
+#-- Add the tests
+if (BUILD_TESTING)
+ add_subdirectory (test)
+endif()
diff --git a/utils/tools/Makefile.am b/utils/tools/Makefile.am
new file mode 100644
index 0000000..0c89aff
--- /dev/null
+++ b/utils/tools/Makefile.am
@@ -0,0 +1,38 @@
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+##
+## Makefile.am
+## Run automake to generate a Makefile.in from this file.
+##
+#
+# Tools HDF5 Makefile(.in)
+#
+
+include $(top_srcdir)/config/commence.am
+
+if PARALLEL_TOOLS_CONDITIONAL
+ H5DWALK=h5dwalk
+else
+ H5DWALK=
+endif
+
+if BUILD_TESTS_CONDITIONAL
+ TESTSERIAL_DIR =test
+else
+ TESTSERIAL_DIR=
+endif
+
+CONFIG=ordered
+
+# All subdirectories
+SUBDIRS=$(H5DWALK) $(TESTSERIAL_DIR)
+
+include $(top_srcdir)/config/conclude.am
diff --git a/utils/tools/h5dwalk/CMakeLists.txt b/utils/tools/h5dwalk/CMakeLists.txt
new file mode 100644
index 0000000..244cc26
--- /dev/null
+++ b/utils/tools/h5dwalk/CMakeLists.txt
@@ -0,0 +1,66 @@
+cmake_minimum_required (VERSION 3.12)
+project (HDF5_UTILS_TOOLS_H5DWALK C)
+
+# --------------------------------------------------------------------
+# Add the h5dwalk and test executables
+# --------------------------------------------------------------------
+if (NOT ONLY_SHARED_LIBS)
+ add_executable (h5dwalk ${HDF5_UTILS_TOOLS_H5DWALK_SOURCE_DIR}/h5dwalk.c)
+# add_custom_target(generate_demo ALL
+# DEPENDS "${HDF5_TOOLS_DIR}/test/demo_destfiles.test"
+# )
+ target_include_directories (h5dwalk PRIVATE "${HDF5_TOOLS_DIR}/lib;${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};${CIRCLE_INCLUDE_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
+ target_compile_options(h5dwalk PRIVATE "${HDF5_CMAKE_C_FLAGS}")
+ TARGET_C_PROPERTIES (h5dwalk STATIC)
+ target_link_libraries (h5dwalk PRIVATE ${HDF5_TOOLS_LIB_TARGET} ${HDF5_LIB_TARGET} ${MFU_LIBRARY} "$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_LIBRARIES}>")
+ set_target_properties (h5dwalk PROPERTIES FOLDER tools)
+ set_global_variable (HDF5_UTILS_TO_EXPORT "${HDF5_UTILS_TO_EXPORT};h5dwalk")
+
+ set (H5_DEP_EXECUTABLES h5dwalk)
+endif ()
+
+if (BUILD_SHARED_LIBS)
+ add_executable (h5dwalk-shared ${HDF5_UTILS_TOOLS_H5DWALK_SOURCE_DIR}/h5dwalk.c)
+ target_include_directories (h5dwalk-shared PRIVATE "${HDF5_TOOLS_DIR}/lib;${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};${CIRCLE_INCLUDE_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
+ target_compile_options(h5dwalk-shared PRIVATE "${HDF5_CMAKE_C_FLAGS}")
+ TARGET_C_PROPERTIES (h5dwalk-shared SHARED)
+ target_link_libraries (h5dwalk-shared PRIVATE ${HDF5_TOOLS_LIBSH_TARGET} ${HDF5_LIBSH_TARGET} ${MFU_LIBRARY} "$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_LIBRARIES}>")
+ set_target_properties (h5dwalk-shared PROPERTIES FOLDER tools)
+ set_global_variable (HDF5_UTILS_TO_EXPORT "${HDF5_UTILS_TO_EXPORT};h5dwalk-shared")
+
+ set (H5_DEP_EXECUTABLES ${H5_DEP_EXECUTABLES} h5dwalk-shared)
+endif ()
+
+#-----------------------------------------------------------------------------
+# Add Target to clang-format
+#-----------------------------------------------------------------------------
+if (HDF5_ENABLE_FORMATTERS)
+ if (NOT ONLY_SHARED_LIBS)
+ clang_format (HDF5_H5DWALK_SRC_FORMAT h5dwalk)
+ else ()
+ clang_format (HDF5_H5DWALK_SRC_FORMAT h5dwalk-shared)
+ endif ()
+endif ()
+
+##############################################################################
+##############################################################################
+### I N S T A L L A T I O N ###
+##############################################################################
+##############################################################################
+
+#-----------------------------------------------------------------------------
+# Rules for Installation of tools using make Install target
+#-----------------------------------------------------------------------------
+if (HDF5_EXPORTED_TARGETS)
+ foreach (exec ${H5_DEP_EXECUTABLES})
+ INSTALL_PROGRAM_PDB (${exec} ${HDF5_INSTALL_BIN_DIR} toolsapplications)
+ endforeach ()
+
+ install (
+ TARGETS
+ ${H5_DEP_EXECUTABLES}
+ EXPORT
+ ${HDF5_EXPORTED_TARGETS}
+ RUNTIME DESTINATION ${HDF5_INSTALL_BIN_DIR} COMPONENT toolsapplications
+ )
+endif ()
diff --git a/utils/tools/h5dwalk/Makefile.am b/utils/tools/h5dwalk/Makefile.am
new file mode 100644
index 0000000..34cdb32
--- /dev/null
+++ b/utils/tools/h5dwalk/Makefile.am
@@ -0,0 +1,37 @@
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+##
+## Makefile.am
+## Run automake to generate a Makefile.in from this file.
+#
+# HDF5 Library Makefile(.in)
+#
+
+include $(top_srcdir)/config/commence.am
+
+# Include src directory
+AM_CPPFLAGS+=-I$(top_srcdir)/src -I$(top_srcdir)/tools/lib $(H5DWALK_CPPFLAGS)
+
+# These are our main targets, the tools
+# h5dwalk_SOURCES=h5dwalk.c $(TOOLSOURCES)
+bin_PROGRAMS=h5dwalk
+#bin_SCRIPTS=install-examples
+
+# Add h5stat specific linker flags here
+h5dwalk_LDFLAGS = $(LT_STATIC_EXEC) $(AM_LDFLAGS) $(H5DWALK_LDFLAGS)
+
+# Tell automake to clean h5redeploy script
+CLEANFILES=
+
+# All programs rely on hdf5 library and h5tools library
+h5dwalk_LDADD=$(LIBH5TOOLS) $(LIBHDF5) $(H5DWALK_LIBS)
+
+include $(top_srcdir)/config/conclude.am
diff --git a/utils/tools/h5dwalk/h5dwalk.1 b/utils/tools/h5dwalk/h5dwalk.1
new file mode 100644
index 0000000..60e1080
--- /dev/null
+++ b/utils/tools/h5dwalk/h5dwalk.1
@@ -0,0 +1,42 @@
+.TH "h5dwalk" 1
+.SH NAME
+h5dwalk \- Provides a means of extending HDF5 tools by using parallelism on groups of files.
+.SH SYNOPSIS
+h5dwalk [OPTIONS] -T h5tool [H5TOOL_options...]
+.SH DESCRIPTION
+h5dwalk utilizes the mpiFileUtils library to invoke a selected HDF5 tool on a collection of files. The mpiFileUtils library provides the facilities to walk directory trees and provide a selection of files contained therein. This selection can be filtered in various ways. At present, h5dwalk filters the original file selection to include only HDF5 formatted files. The resulting collection or collections can be utilized as the file inputs to the selected h5tool.
+.SH OPTIONS
+.TP
+.B \-h
+or
+.B \-\-help
+Print a usage message and exit.
+.TP
+.B \-i
+or
+.B \-\-input filename
+Read command input from a file. Not yet implemented.
+.TP
+.B \-o
+or
+.B \-\-output filename
+Captures the hdf5 tool output into a named file.
+.TP
+.B \-l
+or
+.B \-\-log [file]
+Captures hdf5 tool output into a individual log files. If an optional file (directory) is specified, then output from all tool instances will be written in the given file directory. Without the optional filename, each tool instance ouput will be captured in a new log file whose name is associated with the hdf5 tool that was run and is written in the current working directory.
+.TP
+.B \-E
+or
+.B \-\-error [file]
+Show all HDF5 error reporting. Behavior is similar to --log, i.e. errors can either be logged in a single named file or in individual tool specific files. Not yet implemented.
+.TP
+.B \-T
+or
+.B \-\-tool hdf5_tool
+Specifies the hdf5 tool that should be invoked for each file in a collection of files. The collection consists of individual HDF5 files found by walking a specified directory tree which is used in place of the normal tool filename argument. The '-T' option should appear on the command line just prior to the HDF5 tool argument options.
+.TP
+.SH "SEE ALSO"
+\&\fIh5dump\fR\|(1), \fIh5diff\fR\|(1), \fIh5repart\fR\|(1), \fIh5diff\fR\|(1),
+\&\fIh5import\fR\|(1), \fIgif2h5\fR\|(1), \fIh52gif\fR\|(1), \fIh5perf\fR\|(1)
diff --git a/utils/tools/h5dwalk/h5dwalk.c b/utils/tools/h5dwalk/h5dwalk.c
new file mode 100644
index 0000000..1f42aed
--- /dev/null
+++ b/utils/tools/h5dwalk/h5dwalk.c
@@ -0,0 +1,1712 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the COPYING file, which can be found at the root of the source code *
+ * distribution tree, or in https://www.hdfgroup.org/licenses. *
+ * If you do not have access to either file, you may request a copy from *
+ * help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include "H5private.h"
+#include "h5tools.h"
+#include "h5tools_utils.h"
+#include "hdf5.h"
+
+#include "libcircle.h"
+#include "dtcmp.h"
+#include "mfu.h"
+#include "mfu_flist.h"
+#include "mfu_errors.h"
+#include "mfu_flist_internal.h"
+
+/* Name of tool */
+#define PROGRAMNAME "h5dwalk"
+
+#ifdef DAOS_SUPPORT
+#include "mfu_daos.h"
+#endif
+
+static char *user_cmd = NULL;
+static char mpierrstr[MPI_MAX_ERROR_STRING];
+static int mpierrlen;
+static int sg_mpi_rank = 0;
+static int current_input_index = 0;
+static int processing_inputfile = 0;
+
+static void dh5tool_flist_write_text(const char *name, mfu_flist bflist);
+static void run_command(int argc, char **argv, char *cmdline, const char *fname);
+static void add_executable(int argc, char **argv, char *cmdstring, int *f_index, int f_count);
+static int process_input_file(char *inputname, int myrank, int size);
+static void usage(void);
+
+H5_ATTR_NORETURN void h5dwalk_exit(int status);
+
+/* keep stats during walk */
+uint64_t total_dirs = 0;
+uint64_t total_files = 0;
+uint64_t total_links = 0;
+uint64_t total_unknown = 0;
+uint64_t total_bytes = 0;
+/* global flags which indicate whether we need
+ * to capture tool outputs into a file...
+ * Related to this is whether the stderr should
+ * be logged seperately.
+ */
+#define BUFT_SIZE 131072
+/* FIXME: 'buft_max' should probably be configurable.. */
+size_t buft_max = 64;
+size_t buft_count = 0;
+buf_t **buf_cache = NULL;
+
+int log_output_in_single_file = 0;
+char *output_log_file = NULL;
+
+int log_stdout_in_file = 0;
+char *txtlog = NULL;
+
+int log_errors_in_file = 0;
+char *errlog = NULL;
+
+int use_config_file = 0;
+int config_index[4] = {
+ 0,
+};
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#define MAX_DISTRIBUTE_SEPARATORS 128
+struct distribute_option {
+ int separator_number;
+ uint64_t separators[MAX_DISTRIBUTE_SEPARATORS];
+};
+
+static const char * s_opts = "hl*E*i:o:T:";
+static struct h5_long_options l_opts[] = {{"help", no_arg, 'h'},
+ {"log_text", optional_arg, 'l'},
+ {"error", optional_arg, 'E'},
+ {"input", require_arg, 'i'},
+ {"output", require_arg, 'o'},
+ {"tool", require_arg, 'T'},
+ {NULL, 0, '\0'}};
+static void
+save_command(const char *argv0)
+{
+ assert(argv0);
+ user_cmd = HDstrdup(argv0);
+}
+
+static void
+create_default_separators(struct distribute_option *option, mfu_flist *flist, uint64_t *size,
+ size_t *separators, uint64_t *global_max_file_size)
+{
+ /* get local max file size for Allreduce */
+ uint64_t local_max_file_size = 0;
+ for (uint64_t i = 0; i < *size; i++) {
+ uint64_t file_size = mfu_flist_file_get_size(*flist, i);
+ if (file_size > local_max_file_size) {
+ local_max_file_size = file_size;
+ }
+ }
+
+ /* get the max file size across all ranks */
+ MPI_Allreduce(&local_max_file_size, global_max_file_size, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
+
+ /* print and convert max file size to appropriate units */
+ double max_size_tmp;
+ const char *max_size_units;
+ mfu_format_bytes(*global_max_file_size, &max_size_tmp, &max_size_units);
+ HDprintf("Max File Size: %.3lf %s\n", max_size_tmp, max_size_units);
+
+ /* round next_pow_2 to next multiple of 10 */
+ uint64_t max_magnitude_bin = (uint64_t)((ceil(log2((double)(*global_max_file_size)) / 10)) * 10);
+
+ /* get bin ranges based on max file size */
+ option->separators[0] = 1;
+
+ /* plus one is for zero count bin */
+ *separators = (size_t)(max_magnitude_bin / 10);
+ uint64_t power = 10;
+ for (int i = 1; power <= max_magnitude_bin; i++) {
+ double raised_2 = pow(2, (double)(power));
+ option->separators[i] = (uint64_t)raised_2;
+ power += 10;
+ }
+}
+
+static int
+h5dwalk_map_fn(mfu_flist flist __attribute__((unused)), uint64_t idx, int ranks,
+ void *args __attribute__((unused)))
+{
+ int rank = (int)((int)idx % ranks);
+ return rank;
+}
+
+static int
+print_flist_distribution(int file_histogram, struct distribute_option *option, mfu_flist *pflist, int rank)
+{
+ /* file list to use */
+ mfu_flist flist = *pflist;
+
+ /* get local size for each rank, and max file sizes */
+ uint64_t size = mfu_flist_size(flist);
+ uint64_t global_max_file_size;
+
+ size_t separators = 0;
+ if (file_histogram) {
+ /* create default separators */
+ create_default_separators(option, &flist, &size, &separators, &global_max_file_size);
+ }
+ else {
+ separators = (size_t)option->separator_number;
+ }
+
+ /* allocate a count for each bin, initialize the bin counts to 0
+ * it is separator + 1 because the last bin is the last separator
+ * to the DISTRIBUTE_MAX */
+ uint64_t *dist = (uint64_t *)MFU_MALLOC((separators + 1) * sizeof(uint64_t));
+
+ /* initialize the bin counts to 0 */
+ for (size_t i = 0; i <= separators; i++) {
+ dist[i] = 0;
+ }
+
+ /* for each file, identify appropriate bin and increment its count */
+ for (size_t i = 0; i < size; i++) {
+ /* get the size of the file */
+ uint64_t file_size = mfu_flist_file_get_size(flist, i);
+
+ /* loop through the bins and find the one the file belongs to,
+ * set last bin to -1, if a bin is not found while looping through the
+ * list of file size separators, then it belongs in the last bin
+ * so (last file size - MAX bin) */
+ int64_t max_bin_flag = -1;
+ for (size_t j = 0; j < separators; j++) {
+ if (file_size <= option->separators[j]) {
+ /* found the bin set bin index & increment its count */
+ dist[j]++;
+
+ /* a file for this bin was found so can't belong to
+ * last bin (so set the flag) & exit the loop */
+ max_bin_flag = 1;
+ break;
+ }
+ }
+
+ /* if max_bin_flag is still -1 then the file belongs to the last bin */
+ if (max_bin_flag < 0) {
+ dist[separators]++;
+ }
+ }
+
+ /* get the total sum across all of the bins */
+ uint64_t *disttotal = (uint64_t *)MFU_MALLOC((separators + 1) * sizeof(uint64_t));
+ MPI_Allreduce(dist, disttotal, (int)(separators + 1), MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+
+ /* Print the file distribution */
+ if (rank == 0) {
+ /* number of files in a bin */
+ uint64_t number;
+ double size_tmp;
+ const char *size_units;
+ HDprintf("%-27s %s\n", "Range", "Number");
+ for (size_t i = 0; i <= separators; i++) {
+ HDprintf("%s", "[ ");
+ if (i == 0) {
+ HDprintf("%7.3lf %3s", 0.000, "B");
+ }
+ else {
+ mfu_format_bytes((uint64_t)option->separators[i - 1], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s", size_tmp, size_units);
+ }
+
+ printf("%s", " - ");
+
+ if (file_histogram) {
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ number = disttotal[i];
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s ) %" PRIu64 "\n", size_tmp, size_units, number);
+ }
+ else {
+ if (i == separators) {
+ number = disttotal[i];
+ HDprintf("%10s ) %" PRIu64 "\n", "MAX", number);
+ }
+ else {
+ number = disttotal[i];
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s ) %" PRIu64 "\n", size_tmp, size_units, number);
+ }
+ }
+ }
+ }
+
+ /* free the memory used to hold bin counts */
+ mfu_free(&disttotal);
+ mfu_free(&dist);
+
+ return 0;
+}
+
+/* * Search the right position to insert the separator * If the separator exists already, return failure *
+ * Otherwise, locate the right position, and move the array forward to save the separator.
+ */
+static int
+distribute_separator_add(struct distribute_option *option, uint64_t separator)
+{
+ int low = 0;
+ int high;
+ int middle;
+ int pos;
+ int count;
+
+ count = option->separator_number;
+ option->separator_number++;
+ if (option->separator_number > MAX_DISTRIBUTE_SEPARATORS) {
+ HDprintf("Too many separators");
+ return -1;
+ }
+
+ if (count == 0) {
+ option->separators[0] = separator;
+ return 0;
+ }
+
+ high = count - 1;
+ while (low < high) {
+ middle = (high - low) / 2 + low;
+ if (option->separators[middle] == separator)
+ return -1;
+ /* In the left half */
+ else if (option->separators[middle] < separator)
+ low = middle + 1;
+ /* In the right half */
+ else
+ high = middle;
+ }
+ assert(low == high);
+ if (option->separators[low] == separator)
+ return -1;
+
+ if (option->separators[low] < separator)
+ pos = low + 1;
+ else
+ pos = low;
+
+ if (pos < count)
+ HDmemmove(&option->separators[low + 1], &option->separators[low],
+ sizeof(*option->separators) * (uint64_t)(count - pos));
+
+ option->separators[pos] = separator;
+ return 0;
+}
+
+static int
+distribution_parse(struct distribute_option *option, const char *string)
+{
+ char * ptr;
+ char * next;
+ unsigned long long separator;
+ char * str;
+ int status = 0;
+
+ if (strncmp(string, "size", strlen("size")) != 0) {
+ return -1;
+ }
+
+ option->separator_number = 0;
+ if (strlen(string) == strlen("size")) {
+ return 0;
+ }
+
+ if (string[strlen("size")] != ':') {
+ return -1;
+ }
+
+ str = HDstrdup(string);
+ /* Parse separators */
+ ptr = str + strlen("size:");
+ next = ptr;
+ while (ptr && ptr < str + strlen(string)) {
+ next = strchr(ptr, ',');
+ if (next != NULL) {
+ *next = '\0';
+ next++;
+ }
+
+ if (mfu_abtoull(ptr, &separator) != MFU_SUCCESS) {
+ HDprintf("Invalid separator \"%s\"\n", ptr);
+ status = -1;
+ goto out;
+ }
+
+ if (distribute_separator_add(option, separator)) {
+ HDprintf("Duplicated separator \"%llu\"\n", separator);
+ status = -1;
+ goto out;
+ }
+
+ ptr = next;
+ }
+
+out:
+ mfu_free(&str);
+ return status;
+}
+
+static void
+usage(void)
+{
+ if (sg_mpi_rank)
+ return;
+
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "Usage: h5dwalk [options] <path> ...\n");
+#ifdef DAOS_SUPPORT
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "DAOS paths can be specified as:\n");
+ PRINTVALSTREAM(rawoutstream, " daos://<pool>/<cont>[/<path>] | <UNS path>\n");
+#endif
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "Options:\n");
+ PRINTVALSTREAM(rawoutstream, " -i, --input <file> - read list from file\n");
+ PRINTVALSTREAM(rawoutstream, " -o, --output <file> - write output summary to the named file.\n");
+ PRINTVALSTREAM(rawoutstream,
+ " -E, --error <file> - write processed errors to file in text format\n");
+ PRINTVALSTREAM(
+ rawoutstream,
+ " -l, --log_text <dir> - write individual tool outputs to a file. Logs can be written to an "
+ "optional named directory.\n");
+ PRINTVALSTREAM(rawoutstream, " -T, --tool <executable> - name of the HDF5 tool to invoke\n");
+ PRINTVALSTREAM(rawoutstream, " -h, --help - print usage\n");
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "For more information see https://mpifileutils.readthedocs.io. \n");
+ PRINTVALSTREAM(rawoutstream, "\n");
+}
+
+/* given an index, return pointer to that file element,
+ * NULL if index is not in range */
+static elem_t *
+list_get_elem(flist_t *flist, uint64_t idx)
+{
+ /* return pointer to element if index is within range */
+ uint64_t max = flist->list_count;
+ if (idx < max) {
+ elem_t *elem = flist->list_index[idx];
+ return elem;
+ }
+ return NULL;
+}
+
+#ifdef VERBOSE
+/* print information about a file given the index and rank (used in print_files) */
+static void
+print_file(mfu_flist flist, uint64_t idx)
+{
+ /* store types as strings for print_file */
+ char type_str_unknown[] = "UNK";
+ char type_str_dir[] = "DIR";
+ char type_str_file[] = "REG";
+ char type_str_link[] = "LNK";
+
+ /* get filename */
+ const char *file = mfu_flist_file_get_name(flist, idx);
+
+ if (mfu_flist_have_detail(flist)) {
+ /* get mode */
+ mode_t mode = (mode_t)mfu_flist_file_get_mode(flist, idx);
+ uint64_t acc = mfu_flist_file_get_atime(flist, idx);
+ uint64_t mod = mfu_flist_file_get_mtime(flist, idx);
+ uint64_t cre = mfu_flist_file_get_ctime(flist, idx);
+ uint64_t size = mfu_flist_file_get_size(flist, idx);
+ const char *username = mfu_flist_file_get_username(flist, idx);
+ const char *groupname = mfu_flist_file_get_groupname(flist, idx);
+
+ char access_s[30];
+ char modify_s[30];
+ char create_s[30];
+ time_t access_t = (time_t)acc;
+ time_t modify_t = (time_t)mod;
+ time_t create_t = (time_t)cre;
+ size_t access_rc = strftime(access_s, sizeof(access_s) - 1, "%FT%T", localtime(&access_t));
+ size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%b %e %Y %H:%M", localtime(&modify_t));
+ size_t create_rc = strftime(create_s, sizeof(create_s) - 1, "%FT%T", localtime(&create_t));
+ if (access_rc == 0 || modify_rc == 0 || create_rc == 0) {
+ /* error */
+ access_s[0] = '\0';
+ modify_s[0] = '\0';
+ create_s[0] = '\0';
+ }
+
+ char mode_format[11];
+ mfu_format_mode(mode, mode_format);
+
+ double size_tmp;
+ const char *size_units;
+ mfu_format_bytes(size, &size_tmp, &size_units);
+
+ HDprintf("%s %s %s %7.3f %3s %s %s\n", mode_format, username, groupname, size_tmp, size_units,
+ modify_s, file);
+ }
+ else {
+ /* get type */
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ char * type_str = type_str_unknown;
+ if (type == MFU_TYPE_DIR) {
+ type_str = type_str_dir;
+ }
+ else if (type == MFU_TYPE_FILE) {
+ type_str = type_str_file;
+ }
+ else if (type == MFU_TYPE_LINK) {
+ type_str = type_str_link;
+ }
+
+ HDprintf("Type=%s File=%s\n", type_str, file);
+ }
+}
+
+/* TODO: move this somewhere or modify existing print_file */
+/* print information about a file given the index and rank (used in print_files) */
+static size_t
+print_file_text(mfu_flist flist, uint64_t idx, char *buffer, size_t bufsize)
+{
+ size_t numbytes = 0;
+
+ /* store types as strings for print_file */
+ char type_str_unknown[] = "UNK";
+ char type_str_dir[] = "DIR";
+ char type_str_file[] = "REG";
+ char type_str_link[] = "LNK";
+
+ /* get filename */
+ const char *file = mfu_flist_file_get_name(flist, idx);
+
+ if (mfu_flist_have_detail(flist)) {
+ /* get mode */
+ mode_t mode = (mode_t)mfu_flist_file_get_mode(flist, idx);
+
+ uint64_t acc = mfu_flist_file_get_atime(flist, idx);
+ uint64_t mod = mfu_flist_file_get_mtime(flist, idx);
+ uint64_t cre = mfu_flist_file_get_ctime(flist, idx);
+ uint64_t size = mfu_flist_file_get_size(flist, idx);
+ const char *username = mfu_flist_file_get_username(flist, idx);
+ const char *groupname = mfu_flist_file_get_groupname(flist, idx);
+
+ char access_s[30];
+ char modify_s[30];
+ char create_s[30];
+ time_t access_t = (time_t)acc;
+ time_t modify_t = (time_t)mod;
+ time_t create_t = (time_t)cre;
+ size_t access_rc = strftime(access_s, sizeof(access_s) - 1, "%FT%T", localtime(&access_t));
+ size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%b %e %Y %H:%M", localtime(&modify_t));
+ size_t create_rc = strftime(create_s, sizeof(create_s) - 1, "%FT%T", localtime(&create_t));
+ if (access_rc == 0 || modify_rc == 0 || create_rc == 0) {
+ /* error */
+ access_s[0] = '\0';
+ modify_s[0] = '\0';
+ create_s[0] = '\0';
+ }
+
+ char mode_format[11];
+ mfu_format_mode(mode, mode_format);
+
+ double size_tmp;
+ const char *size_units;
+ mfu_format_bytes(size, &size_tmp, &size_units);
+
+ numbytes = (size_t)snHDprintf(buffer, bufsize, "%s %s %s %7.3f %3s %s %s\n", mode_format, username,
+ groupname, size_tmp, size_units, modify_s, file);
+ }
+ else {
+ /* get type */
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ char * type_str = type_str_unknown;
+ if (type == MFU_TYPE_DIR) {
+ type_str = type_str_dir;
+ }
+ else if (type == MFU_TYPE_FILE) {
+ type_str = type_str_file;
+ }
+ else if (type == MFU_TYPE_LINK) {
+ type_str = type_str_link;
+ }
+
+ numbytes = (size_t)snHDprintf(buffer, bufsize, "Type=%s File=%s\n", type_str, file);
+ }
+
+ return numbytes;
+}
+#endif
+
+static size_t
+get_local_bufsize(uint64_t *bufsize)
+{
+ size_t total = 0;
+ if (buft_count > 0) {
+ buf_t *lastbuf = buf_cache[buft_count - 1];
+ size_t remaining = lastbuf->count;
+ total = (lastbuf->bufsize * buft_count) - remaining;
+ *bufsize = (uint64_t)(lastbuf->bufsize);
+ }
+ return total;
+}
+
+static void
+dh5tool_flist_write_text(const char *name, mfu_flist bflist)
+{
+ /* convert handle to flist_t */
+ flist_t *flist = (flist_t *)bflist;
+
+ /* get our rank and size of the communicator */
+ int rank, ranks;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &ranks);
+
+ /* start timer */
+ double start_write = MPI_Wtime();
+
+ /* total list items */
+ uint64_t all_count = mfu_flist_global_size(flist);
+
+ /* report the filename we're writing to */
+ if (mfu_rank == 0) {
+ MFU_LOG(MFU_LOG_INFO, "Writing to output file: %s", name);
+ }
+
+ uint64_t idx = 0;
+ char * ptr = NULL;
+
+ /* if we block things up into 128MB chunks, how many iterations
+ * to write everything? */
+ // uint64_t maxwrite = 128 * 1024 * 1024;
+ uint64_t maxwrite = 0;
+ size_t local_total = get_local_bufsize(&maxwrite);
+ uint64_t iters = 0;
+ if (local_total > 0)
+ iters = (uint64_t)local_total / maxwrite;
+
+ if (iters * maxwrite < (uint64_t)local_total) {
+ iters++;
+ }
+
+ /* get max iterations across all procs */
+ uint64_t all_iters;
+ MPI_Allreduce(&iters, &all_iters, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
+
+ /* use mpi io hints to stripe across OSTs */
+ MPI_Info info;
+ MPI_Info_create(&info);
+
+ /* change number of ranks to string to pass to MPI_Info */
+ char str_buf[12];
+ HDprintf(str_buf, "%d", ranks);
+
+ /* no. of I/O devices for lustre striping is number of ranks */
+ MPI_Info_set(info, "striping_factor", str_buf);
+
+ /* open file */
+ MPI_Status status;
+ MPI_File fh;
+ const char *datarep = "native";
+ int amode = MPI_MODE_WRONLY | MPI_MODE_CREATE;
+
+ int mpirc = MPI_File_open(MPI_COMM_WORLD, (const char *)name, amode, info, &fh);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to open file for writing: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* truncate file to 0 bytes */
+ mpirc = MPI_File_set_size(fh, 0);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to truncate file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* set file view to be sequence of datatypes past header */
+ mpirc = MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* compute byte offset to write our element */
+ uint64_t offset = 0;
+ uint64_t bytes = (uint64_t)local_total;
+ MPI_Exscan(&bytes, &offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Offset write_offset = (MPI_Offset)offset;
+
+ uint64_t written = 0;
+ while (all_iters > 0) {
+ /* compute number of bytes left to write */
+ uint64_t remaining = (uint64_t)local_total - written;
+
+ /* maybe Incr pointer to our next buffer */
+ if (remaining == 0) {
+ idx++;
+ if (buf_cache[idx]->buf == NULL) {
+ }
+ }
+
+ /* compute count we'll write in this iteration */
+ int write_count = (int)maxwrite;
+ if (remaining < maxwrite) {
+ write_count = (int)remaining;
+ }
+ /* Get the buffer to output to the selected file */
+ ptr = buf_cache[idx]->buf;
+
+ /* collective write of file data */
+ mpirc = MPI_File_write_at_all(fh, write_offset, ptr, write_count, MPI_BYTE, &status);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to write to file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* update our offset into the file */
+ write_offset += (MPI_Offset)write_count;
+
+ /* update number of bytes written so far */
+ written += (uint64_t)write_count;
+
+ /* update pointer into our buffer */
+ ptr += write_count;
+
+ /* decrement our collective write loop counter */
+ all_iters--;
+ }
+
+ /* free buffer */
+ // mfu_free(&buf);
+
+ /* close file */
+ mpirc = MPI_File_close(&fh);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to close file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* free mpi info */
+ MPI_Info_free(&info);
+
+ /* end timer */
+ double end_write = MPI_Wtime();
+
+ /* report write count, time, and rate */
+ if (mfu_rank == 0) {
+ double secs = end_write - start_write;
+ double rate = 0.0;
+ if (secs > 0.0) {
+ rate = ((double)all_count) / secs;
+ }
+ MFU_LOG(MFU_LOG_INFO, "Wrote %lu files in %.3lf seconds (%.3lf files/sec)", all_count, secs, rate);
+ }
+
+ return;
+}
+
+static void
+filter_hdf_files(mfu_flist *pflist, char *regex_exp, int exclude, int name)
+{
+ mfu_flist flist = *pflist;
+ mfu_flist eligible = mfu_flist_subset(flist);
+ uint64_t idx = 0;
+ uint64_t files = mfu_flist_size(flist);
+ while (idx < files) {
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK || type == MFU_TYPE_UNKNOWN) {
+ const char *file = mfu_flist_file_get_name(flist, idx);
+ int accessible = H5Fis_accessible(file, H5P_DEFAULT);
+ if (accessible)
+ mfu_flist_file_copy(flist, idx, eligible);
+ }
+ idx++;
+ }
+
+ mfu_flist_summarize(eligible);
+
+ /* assume we'll use the full list */
+ // mfu_flist srclist = flist;
+ mfu_flist srclist = eligible;
+
+ /* filter the list if needed */
+ mfu_flist filtered_flist = MFU_FLIST_NULL;
+ if (regex_exp != NULL) {
+ /* filter the list based on regex */
+ filtered_flist = mfu_flist_filter_regex(eligible, regex_exp, exclude, name);
+
+ /* update our source list to use the filtered list instead of the original */
+ srclist = filtered_flist;
+ }
+
+ mfu_flist_free(&flist);
+ *pflist = srclist;
+ return;
+}
+
+static int
+fill_file_list(mfu_flist new_flist, const char *config_filename, int myrank, int size)
+{
+ int index = 0;
+ char linebuf[PATH_MAX] = {
+ '\0',
+ };
+ FILE *config = HDfopen(config_filename, "r");
+ if (config == NULL)
+ return -1;
+ while (HDfgets(linebuf, sizeof(linebuf), config) != NULL) {
+ struct stat statbuf;
+ char * eol = HDstrchr(linebuf, '\n');
+ if (eol)
+ *eol = '\0';
+ if (HDstat(linebuf, &statbuf) == 0) {
+ if (myrank == (index % size)) {
+ mfu_flist_insert_stat((flist_t *)new_flist, linebuf, O_RDONLY, &statbuf);
+ }
+ index++;
+ }
+ linebuf[0] = 0;
+ }
+ HDfclose(config);
+ return index;
+}
+
+static int
+count_dirpaths(int argc, int startcnt, const char *argv[], int **index_out)
+{
+ int k;
+ int path_cnt = 0;
+ int idx_count = (argc - startcnt);
+ int * index = NULL;
+ struct stat pathcheck;
+
+ if (idx_count > 0) {
+ index = (int *)malloc((size_t)(argc - startcnt) * sizeof(int));
+ assert(index);
+ }
+ else
+ return 0;
+
+ for (k = startcnt; k < argc; k++) {
+ char *slash = NULL;
+ int c = *argv[k];
+ if ((c == '.') || (c == '/')) {
+ index[path_cnt++] = k;
+ }
+ else if ((c == '@')) {
+ const char *configFile = argv[k] + 1;
+ if (stat(configFile, &pathcheck) == 0) {
+ if (S_ISREG(pathcheck.st_mode)) {
+ config_index[use_config_file++] = k;
+ }
+ }
+ }
+ else if ((slash = strchr(argv[k], '/')) != NULL) {
+ if (stat(argv[k], &pathcheck) == 0) {
+ if (S_ISDIR(pathcheck.st_mode))
+ index[path_cnt++] = k;
+ }
+ }
+ }
+ if ((path_cnt == 0) && (index != NULL)) {
+ free(index);
+ return 0;
+ }
+ *index_out = index;
+ return path_cnt;
+}
+
+static char **
+copy_args(int argc, const char *argv[], int *mfu_argc, int *copy_len)
+{
+ int i, bytes_copied = 0;
+ int check_mfu_args = 1;
+ char **argv_copy = (char **)MFU_MALLOC((size_t)(argc + 2) * sizeof(char **));
+ assert(argv_copy);
+ assert(mfu_argc);
+ assert(copy_len);
+ save_command(argv[0]);
+
+ for (i = 0; i < argc; i++) {
+ argv_copy[i] = HDstrdup(argv[i]);
+ bytes_copied += (int)(strlen(argv[i]) + 1);
+ argv_copy[i] = HDstrdup(argv[i]);
+ if (check_mfu_args && (HDstrncmp(argv[i], "-T", 2) == 0)) {
+ check_mfu_args = 0;
+ *mfu_argc = i + 1;
+ }
+ }
+ argv_copy[i] = 0;
+ *copy_len = bytes_copied;
+ return argv_copy;
+}
+
+typedef struct hash_entry {
+ int hash;
+ char * name;
+ struct hash_entry *next; /* table Collision */
+ int nextCount;
+} hash_entry_t;
+
+#ifndef NAME_ENTRIES
+#define NAME_ENTRIES 4096
+#endif
+
+static hash_entry_t filename_cache[NAME_ENTRIES];
+
+static int
+get_copy_count(char *fname, char *appname)
+{
+ int filehash = 0, apphash = 0;
+ size_t k, applen = strlen(appname);
+ size_t filelen = strlen(fname);
+ int hash_index;
+
+ for (k = 0; k < filelen; k++) {
+ filehash += fname[k];
+ }
+ for (k = 0; k < applen; k++) {
+ apphash += appname[k];
+ }
+ hash_index = filehash % NAME_ENTRIES;
+ if (filename_cache[hash_index].name == NULL) {
+ filename_cache[hash_index].hash = apphash;
+ filename_cache[hash_index].name = HDstrdup(fname);
+ filename_cache[hash_index].next = NULL;
+ filename_cache[hash_index].nextCount = 1;
+ return 0;
+ }
+ else if ((apphash == filename_cache[hash_index].hash) &&
+ (strcmp(filename_cache[hash_index].name, fname) == 0)) {
+ int retval = filename_cache[hash_index].nextCount++;
+ return retval;
+ }
+ else { /* Collision */
+ hash_entry_t *nextEntry = &filename_cache[hash_index];
+ hash_entry_t *lastEntry = nextEntry;
+ while (nextEntry) {
+ if ((apphash == nextEntry->hash) && (strcmp(nextEntry->name, fname) == 0)) {
+ /* Match (increment nextCount and return) */
+ int retval = nextEntry->nextCount++;
+ return retval;
+ }
+ else {
+ /* No Match (continue search) */
+ lastEntry = nextEntry;
+ nextEntry = lastEntry->next;
+ }
+ }
+ nextEntry = (hash_entry_t *)malloc(sizeof(hash_entry_t));
+ if (nextEntry) {
+ lastEntry->next = nextEntry;
+ nextEntry->name = HDstrdup(fname);
+ nextEntry->hash = apphash;
+ nextEntry->next = NULL;
+ nextEntry->nextCount = 1;
+ }
+ }
+ return 0;
+}
+
+static void
+run_command(int argc __attribute__((unused)), char **argv, char *cmdline, const char *fname)
+{
+ char filepath[1024];
+ char *toolname = argv[0];
+ char *buf = NULL;
+ int use_stdout = 0;
+
+#ifdef H5_HAVE_WINDOWS
+ HDprintf("ERROR: %s %s: Unable to support fork/exec on WINDOWS\n", PROGRAMNAME, __func__);
+ h5dwalk_exit(EXIT_FAILURE);
+#else
+
+ /* create a copy of the 1st file passed to the application */
+ HDstrcpy(filepath, fname);
+
+ if (log_output_in_single_file || use_stdout) {
+ pid_t pid;
+ int pipefd[2];
+ buf_t * thisbuft = NULL;
+ buf_t **bufs = buf_cache;
+
+ if (bufs == NULL) {
+ bufs = (buf_t **)MFU_CALLOC(buft_max, sizeof(buf_t *));
+ assert((bufs != NULL));
+ buf_cache = bufs;
+#ifdef VERBOSE
+ if (buft_count == 0) {
+ HDprintf("[%d] Initial buf_cache allocation: buft_count=%d\n", sg_mpi_rank, buft_count);
+ }
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert((thisbuft != NULL));
+ }
+ else {
+ thisbuft = bufs[buft_count - 1];
+ assert((thisbuft != NULL));
+ /* Check for remaining space in the current buffer */
+ /* If none, then create a new buffer */
+ if (thisbuft->count == 0) {
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ }
+ }
+ if ((thisbuft->buf == NULL)) {
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ assert((thisbuft->buf != NULL));
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->count = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ }
+ if (pipe(pipefd) == -1) {
+ perror("pipe");
+ exit(EXIT_FAILURE);
+ }
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+ if (pid == 0) {
+ close(pipefd[0]);
+ dup2(pipefd[1], fileno(stdout));
+ dup2(pipefd[1], fileno(stderr));
+ execvp(argv[0], argv);
+ }
+ else {
+ int w_status;
+ size_t nbytes;
+ size_t read_bytes = 0;
+ uint64_t remaining, offset;
+ close(pipefd[1]);
+ buf = thisbuft->buf;
+ remaining = thisbuft->count;
+ offset = thisbuft->chars;
+ nbytes = strlen(cmdline);
+ /* Record the command line for the log! */
+ if (nbytes < remaining) {
+ HDstrcpy(&buf[offset], cmdline);
+ thisbuft->chars += nbytes;
+ thisbuft->count -= nbytes;
+ remaining -= nbytes;
+ }
+ else { /* We're running out of space in the current buffer */
+ char *nextpart;
+ strncpy(&buf[offset], cmdline, remaining);
+ nextpart = &cmdline[remaining + 1];
+ thisbuft->count = 0;
+ thisbuft->chars += remaining;
+
+ /* Create a new read buffer */
+#ifdef VERBOSE
+ HDprintf("[%d] Allocate-1 a new read buffer:: buft_count=%d\n", sg_mpi_rank, buft_count);
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert(thisbuft != NULL);
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ /* Copy the remaining cmdline text into the new buffer */
+ HDstrcpy(buf, nextpart);
+ /* And update our buffer info */
+ // thisbuft->chars = strlen(nextpart) +1;
+ thisbuft->chars = strlen(nextpart);
+ thisbuft->count = BUFT_SIZE - thisbuft->chars;
+ }
+ offset = thisbuft->chars;
+
+ do {
+ waitpid(pid, &w_status, WNOHANG);
+ if ((nbytes = (size_t)read(pipefd[0], &buf[offset], remaining)) > 0) {
+ offset += nbytes;
+ read_bytes += nbytes;
+ remaining -= nbytes;
+ if (remaining == 0) {
+ /* Update the current buffer prior to allocating the new one */
+ thisbuft->count = 0;
+ thisbuft->chars += read_bytes;
+#ifdef VERBOSE
+ HDprintf("[%d] Allocate-2 a new read buffer:: buft_count=%d\n", sg_mpi_rank,
+ buft_count);
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert(thisbuft != NULL);
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ thisbuft->chars = BUFT_SIZE;
+ offset = 0;
+ remaining = BUFT_SIZE;
+ }
+ }
+ } while (!WIFEXITED(w_status));
+ close(pipefd[0]);
+ wait(NULL);
+
+ thisbuft->count = remaining;
+ thisbuft->chars = thisbuft->bufsize - remaining;
+ }
+ }
+ else if (log_stdout_in_file) {
+ int log_instance = -1;
+ pid_t pid;
+ size_t log_len;
+ char logpath[2048];
+ char logErrors[2048];
+ char current_dir[2048];
+ char * logbase = HDstrdup(basename(filepath));
+ char * thisapp = HDstrdup(basename(toolname));
+
+ if (processing_inputfile == 0)
+ log_instance = get_copy_count(logbase, thisapp);
+
+ if (txtlog == NULL) {
+ if ((log_instance > 0) || processing_inputfile) {
+ if (processing_inputfile)
+ log_instance = current_input_index;
+ HDsprintf(logpath, "%s/%s_%s.log_%d", HDgetcwd(current_dir, sizeof(current_dir)), logbase,
+ thisapp, log_instance);
+ }
+ else {
+ HDsprintf(logpath, "%s/%s_%s.log", HDgetcwd(current_dir, sizeof(current_dir)), logbase,
+ thisapp);
+ }
+ }
+ else {
+ log_len = strlen(txtlog);
+ if ((log_instance > 0) || processing_inputfile) {
+ if (processing_inputfile)
+ log_instance = current_input_index;
+ if (txtlog[log_len - 1] == '/')
+ HDsprintf(logpath, "%s%s_%s.log_%d", txtlog, logbase, thisapp, log_instance);
+ else
+ HDsprintf(logpath, "%s/%s_%s.log_%d", txtlog, logbase, thisapp, log_instance);
+ }
+ else {
+ if (txtlog[log_len - 1] == '/')
+ HDsprintf(logpath, "%s%s_%s.log", txtlog, logbase, thisapp);
+ else
+ HDsprintf(logpath, "%s/%s_%s.log", txtlog, logbase, thisapp);
+ }
+ }
+
+ if (log_errors_in_file) {
+ /* We co-locate the error logs in the same directories as the regular log files.
+ * The easiest way to do this is to simply replace the .log with .err in a
+ * copy of the logpath variable.
+ */
+ log_len = strlen(logpath);
+ HDstrcpy(logErrors, logpath);
+ HDstrcpy(&logErrors[log_len - 3], "err");
+ }
+ if (mfu_debug_level == MFU_LOG_VERBOSE) {
+ HDprintf("\tCreating logfile: %s\n", logpath);
+ fflush(stdout);
+ }
+ pid = fork();
+ if (pid == 0) {
+ int efd;
+ int fd = open(logpath, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ dup2(fd, fileno(stdout));
+ if (log_errors_in_file) {
+ efd = open(logErrors, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ dup2(efd, fileno(stderr));
+ close(efd);
+ }
+ else
+ dup2(fd, fileno(stderr));
+ close(fd);
+ execvp(argv[0], argv);
+ }
+ int status;
+ pid = wait(&status);
+ if (logbase)
+ free(logbase);
+ if (thisapp)
+ free(thisapp);
+ } /* else if(log_stdout_in_file) */
+#endif /* #ifdef H5_HAVE_WINDOWS */
+}
+
+int MFU_PRED_EXEC(mfu_flist flist, uint64_t idx, void *arg);
+int MFU_PRED_PRINT(mfu_flist flist, uint64_t idx, void *arg);
+
+int
+MFU_PRED_EXEC(mfu_flist flist, uint64_t idx, void *arg)
+{
+ /* get file name for this item */
+ int file_substituted = 0;
+ const char *fname = mfu_flist_file_get_name(flist, idx);
+
+ char *toolname = NULL;
+ char filepath[1024];
+
+ size_t b_offset;
+
+ /* get pointer to encoded argc count and argv array */
+ int * count_ptr = arg;
+ char *buf = (char *)arg + sizeof(int);
+
+ /* get number of argv parameters */
+ int k = 0, count = *count_ptr;
+ toolname = buf;
+
+ /* Get a copy of fname */
+ HDstrcpy(filepath, fname);
+
+ /* allocate a char* for each item in the argv array,
+ * plus one more for a trailing NULL
+ * 'count' in this case is the number of args, so
+ * so we add (+1) for the toolname and another (+1)
+ * for the trailing NULL to terminate the list
+ */
+
+ char cmdline[2048];
+ char **argv = (char **)MFU_CALLOC((size_t)(count + 2), sizeof(char *));
+
+ argv[k++] = HDstrdup(toolname);
+
+ HDmemset(cmdline, 0, sizeof(cmdline));
+ buf += HDstrlen(toolname) + 1;
+ /* Reconstruct the command line that the user provided for the h5tool */
+ for (k = 1; k < count; k++) {
+ if (buf[0] == '&') {
+ const char *fname_arg = NULL;
+ mfu_flist flist_arg;
+ void * check_ptr[2] = {NULL, NULL};
+
+ HDmemcpy(check_ptr, &buf[1], sizeof(void *));
+ flist_arg = (mfu_flist)check_ptr[0];
+
+ /* +2 (see below) accounts for the '&' and the trailing zero pad */
+ buf += sizeof(mfu_flist *) + 2;
+ fname_arg = mfu_flist_file_get_name(flist_arg, idx);
+ if (fname_arg == NULL) {
+ HDprintf("[%d] Warning: Unable to resolve file_substitution %d (idx=%ld)\n", sg_mpi_rank,
+ file_substituted, idx);
+ argv[k] = HDstrdup(fname);
+ }
+ else {
+ argv[k] = HDstrdup(fname_arg);
+ file_substituted++;
+ }
+ }
+ else {
+ argv[k] = HDstrdup(buf);
+ buf += HDstrlen(argv[k]) + 1;
+ }
+ }
+
+ HDsprintf(cmdline, "\n---------\nCommand:");
+ b_offset = strlen(cmdline);
+ for (k = 0; k < count; k++) {
+ HDsprintf(&cmdline[b_offset], " %s", argv[k]);
+ b_offset = strlen(cmdline);
+ }
+ HDsprintf(&cmdline[b_offset], "\n");
+ run_command(count, argv, cmdline, fname);
+
+ mfu_free(argv);
+
+ return 0;
+}
+
+int
+MFU_PRED_PRINT(mfu_flist flist, uint64_t idx, void *arg __attribute__((unused)))
+{
+ const char *name = mfu_flist_file_get_name(flist, idx);
+ HDprintf("%s\n", name);
+ return 1;
+}
+
+static void
+pred_commit(mfu_pred *p)
+{
+ mfu_pred *cur = p;
+ while (cur) {
+ if (cur->f == MFU_PRED_PRINT || cur->f == MFU_PRED_EXEC) {
+ break;
+ }
+ cur = cur->next;
+ }
+}
+
+static void
+add_executable(int argc, char **argv, char *cmdstring, int *f_index, int f_count __attribute__((unused)))
+{
+ char cmdline[2048];
+ HDsprintf(cmdline, "\n---------\nCommand: %s\n", cmdstring);
+ argv[argc] = NULL;
+ run_command(argc, argv, cmdline, argv[f_index[0]]);
+ return;
+}
+
+static int
+process_input_file(char *inputname, int myrank, int size)
+{
+ int index = 0;
+ char linebuf[PATH_MAX] = {
+ '\0',
+ };
+ FILE * config = HDfopen(inputname, "r");
+ mfu_flist flist1 = NULL;
+
+ if (config == NULL)
+ return -1;
+
+ flist1 = mfu_flist_new();
+
+ /* Flag the fact that we're processing an inputfile (script)
+ * so that we can generate a meaningful logfile name...
+ */
+ processing_inputfile = 1;
+
+ while (HDfgets(linebuf, sizeof(linebuf), config) != NULL) {
+ const char *delim = " \n";
+ char * cmdline = NULL;
+ char * cmd = NULL;
+ char * arg = NULL;
+ char * argv[256];
+ int fileindex[256];
+ int filecount = 0;
+ int token = 0;
+ struct stat statbuf;
+
+ char *eol = strchr(linebuf, '\n');
+ if (eol) {
+ *eol = '\0';
+ }
+ cmdline = HDstrdup(linebuf);
+ cmd = HDstrtok(linebuf, delim);
+ if (cmd) {
+ arg = cmd;
+ while (arg != NULL) {
+ char c = arg[0];
+ if (token > 0) {
+ if ((c == '.') || (c == '/')) {
+ /* 'arg' looks to be a filepath */
+ if (stat(arg, &statbuf) == 0) {
+ mfu_flist_insert_stat(flist1, arg, O_RDONLY, &statbuf);
+ }
+ fileindex[filecount++] = token;
+ }
+ }
+ argv[token++] = arg;
+ arg = strtok(NULL, delim);
+ }
+
+ if (myrank == (index % size)) {
+ current_input_index = index;
+ add_executable(token, argv, cmdline, fileindex, filecount);
+ }
+ index++;
+ }
+ linebuf[0] = 0;
+ HDfree(cmdline);
+ }
+
+ if (output_log_file) {
+ dh5tool_flist_write_text(output_log_file, flist1);
+ }
+ HDfclose(config);
+
+ mfu_flist_free(&flist1);
+ return 0;
+}
+
+int
+main(int argc, const char *argv[])
+{
+ int i;
+ int rc = 0;
+
+ char *env_var = NULL;
+
+ /* initialize MPI */
+ MPI_Init(&argc, (char ***)&argv);
+ mfu_init();
+
+ /* Initialize h5tools lib */
+ h5tools_init();
+
+ h5tools_setprogname(PROGRAMNAME);
+ h5tools_setstatus(EXIT_SUCCESS);
+
+ /* get our rank and the size of comm_world */
+ int rank, ranks;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &ranks);
+
+ /* Assign the static global mpi_rank (for debugging) */
+ sg_mpi_rank = rank;
+
+#if 0
+ env_var = HDgetenv("HDF5_H5DWALK_PRINT_CMDLINE");
+ if (env_var) {
+ int enable = HDatoi(env_var);
+ if (enable) {
+
+ }
+ }
+#endif
+ /* pointer to mfu_walk_opts */
+ mfu_walk_opts_t *walk_opts = mfu_walk_opts_new();
+
+#ifdef DAOS_SUPPORT
+ /* DAOS vars */
+ daos_args_t *daos_args = daos_args_new();
+#endif
+
+ int args_byte_length = -1;
+ int mfu_argc = argc;
+ char * args_buf = NULL;
+ char **h5tool_argv = copy_args(argc, argv, &mfu_argc, &args_byte_length);
+
+ char *inputname = NULL;
+ char *outputname = NULL;
+ char *sortfields = NULL;
+ char *distribution = NULL;
+
+ int text = 0;
+ int h5tool_argc = 0;
+
+ mfu_debug_level = MFU_LOG_WARN;
+ h5tool_argv[argc] = 0;
+
+ /* The struct option declaration can found in bits/getopt_ext.h
+ * I've reproduced it here:
+ * struct option { char * name; int has_arg; int *flag; int val};
+ */
+ int opt;
+ int tool_selected = 0;
+ int tool_args_start = -1;
+ int last_mfu_arg = 0;
+
+ mfu_pred *pred_head = NULL;
+
+ while (!tool_selected) {
+ opt = H5_get_option(argc, argv, s_opts, l_opts);
+ switch ((char)opt) {
+ default:
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ break;
+ case 'i':
+ inputname = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ if (inputname)
+ tool_selected = 1;
+ break;
+ case 'o':
+ outputname = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ if (outputname) {
+ log_output_in_single_file = 1;
+ output_log_file = HDstrdup(H5_optarg);
+ text = 1; /* Format TXT, not HDF5 */
+ }
+ break;
+ case 'E':
+ log_errors_in_file = 1;
+ errlog = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ break;
+ case 'l':
+ log_stdout_in_file = 1;
+ if (H5_optarg)
+ txtlog = HDstrdup(H5_optarg);
+ break;
+ case 'T':
+ /* We need to stop parsing user options at this point.
+ * all remaining arguments should be utilized as the
+ * arguments to the selected HDF5 tools.
+ * We also want to avoid any misinterpretations if
+ * HDF5 tool options conflict with the MFU options.
+ */
+ tool_selected = 1;
+ tool_args_start = H5_optind;
+ h5tool_argc = argc - mfu_argc;
+ last_mfu_arg = H5_optind;
+ /* Don't allow any further parsing of arguments */
+ break;
+ case 'h':
+ usage();
+ h5dwalk_exit(EXIT_SUCCESS);
+ break;
+ case '?':
+ usage();
+ h5dwalk_exit(EXIT_SUCCESS);
+ break;
+ }
+ }
+
+ if (inputname != NULL) {
+ if (tool_selected && (rank == 0)) {
+ if ((log_output_in_single_file == 0) && (log_stdout_in_file == 0))
+ puts("WARNING: When utilizing --input, the only other supported "
+ "runtime argument is --output or -l");
+ }
+ rc = process_input_file(inputname, rank, ranks);
+ mfu_finalize();
+ h5dwalk_exit(rc);
+ }
+
+ /**************************************************************/
+ /* We might consider doing a tool specific argument checking */
+ /* to prevent runtime errors. We would also like to allow */
+ /* the same command line interface for parallel invocations */
+ /* so that users don't get confused. Effectively, we should */
+ /* strip out all MFU related arguments and retain copies of */
+ /* everything else to pass into a serial instance of the tool */
+ /* */
+ /* As we move forward, we might allow the HDF5 tool to be */
+ /* queried for an acceptable set set of runtime arguments. */
+ /* This could be just a simple string to allow getopt_long */
+ /* to be invoked on the remaing command line arguments. */
+ /**************************************************************/
+
+ int *path_indices = NULL;
+ int numpaths = count_dirpaths(argc, tool_args_start, argv, &path_indices);
+
+ const char **argpaths = NULL;
+
+ /* store src and dest path strings */
+ const char *path1 = NULL;
+ const char *path2 = NULL;
+ size_t pathlen_total = 0;
+
+ if (numpaths && path_indices) {
+ argpaths = &argv[path_indices[0]];
+ }
+ /* pointer to mfu_file src and dest objects */
+ /* The dst object will only be used for tools which
+ * accept 2 (or more?) file arguments */
+ mfu_file_t *mfu_src_file = NULL;
+ mfu_file_t *mfu_dst_file = NULL;
+
+ /* first item is source and second is dest */
+ mfu_param_path *srcpath = NULL;
+ mfu_param_path *destpath = NULL;
+ mfu_param_path *paths = NULL;
+
+ mfu_flist flist1 = NULL;
+ mfu_flist flist2 = NULL;
+
+ /* allocate structure to define walk options */
+ if (use_config_file > 0) {
+ int count1 = 0, count2 = 0;
+ for (i = 0; i < use_config_file; i++) {
+ int index = config_index[i];
+ const char *config_file = argv[index];
+ if (i == 0) {
+ flist1 = mfu_flist_new();
+ count1 = fill_file_list(flist1, config_file + 1, rank, ranks);
+ }
+ else if (i == 1) {
+ flist2 = mfu_flist_new();
+ count2 = fill_file_list(flist2, config_file + 1, rank, ranks);
+ }
+ }
+ if (count1 != count2) {
+ HDprintf("config files have different file counts: (1) %d and (2) %d\n", count1, count2);
+ }
+ }
+ else if (numpaths > 0) {
+
+ /* allocate space for each path */
+ paths = (mfu_param_path *)MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));
+ mfu_src_file = mfu_file_new();
+
+ /* process each path */
+ mfu_param_path_set_all((uint64_t)numpaths, (const char **)argpaths, paths, mfu_src_file, true);
+
+ /* don't allow user to specify input file with walk */
+ if (inputname != NULL) {
+ if (paths) {
+ mfu_free(&paths);
+ }
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ }
+ }
+ else {
+ /* if we're not walking, we must be reading,
+ * and for that we need a file */
+ if (inputname == NULL) {
+ if (rank == 0) {
+ MFU_LOG(MFU_LOG_ERR, "Either a <path> or --input is required.");
+ }
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ }
+ }
+
+ if (numpaths > 0) {
+ flist1 = mfu_flist_new();
+ srcpath = &paths[0];
+ path1 = srcpath->path;
+ pathlen_total += strlen(path1);
+ mfu_flist_walk_param_paths(1, srcpath, walk_opts, flist1, mfu_src_file);
+ }
+ if (numpaths > 1) {
+ flist2 = mfu_flist_new();
+ mfu_dst_file = mfu_file_new();
+ destpath = &paths[1];
+ path2 = destpath->path;
+ pathlen_total += HDstrlen(path2);
+ mfu_flist_walk_param_paths(1, destpath, walk_opts, flist2, mfu_dst_file);
+ }
+
+ if (tool_selected && (args_byte_length > 0)) {
+ pred_head = mfu_pred_new();
+ args_buf = (char *)HDmalloc((size_t)(args_byte_length + pathlen_total));
+ }
+
+ /* filter files to only include hdf5 files */
+ if (flist1) {
+ filter_hdf_files(&flist1, NULL, 0, 0);
+ }
+ if (flist2) {
+ filter_hdf_files(&flist2, NULL, 0, 0);
+ }
+
+ /* if (numpaths > 1)
+ * In a case where we requeire the list indices of files from multiple
+ * directories to match, we must utilize a mapping function.
+ * The question to answer is how does the mapping function work?
+ * The most probable is a sort function, e.g.
+ * 1) an alphabet sort?
+ * 2) sort by file size?
+ * 3) something else?
+ */
+ if (args_buf != NULL) {
+ int k = 0;
+ char *ptr = args_buf + sizeof(int);
+ *(int *)args_buf = h5tool_argc;
+ for (i = tool_args_start - 1; i < argc; i++) {
+ int copy_flist = -1;
+ if (i == config_index[k]) {
+ copy_flist = k;
+ }
+ else if (path_indices && (i == path_indices[k])) {
+ copy_flist = k;
+ }
+
+ /* Maybe copy one of the flist pointers */
+ if (copy_flist >= 0) {
+ /* The '&' indicates that what follows is a pointer */
+ *ptr++ = '&';
+ /* Select which argument list should be used */
+ if (k == 0) {
+ HDmemcpy(ptr, &flist1, sizeof(void *));
+ }
+ if (k == 1) {
+ HDmemcpy(ptr, &flist2, sizeof(void *));
+ }
+ ptr += sizeof(mfu_flist *);
+ k++;
+ }
+ else {
+ HDstrcpy(ptr, argv[i]);
+ ptr += HDstrlen(argv[i]);
+ }
+ *ptr++ = 0;
+ }
+ *ptr++ = 0;
+
+ mfu_pred_add(pred_head, MFU_PRED_EXEC, (void *)args_buf);
+ pred_commit(pred_head);
+ }
+
+ /* apply predicates to each item in list */
+ mfu_flist flist3 = mfu_flist_filter_pred(flist1, pred_head);
+
+ /* print summary statistics of flist */
+ mfu_flist_print_summary(flist1);
+
+ /* write data to cache file */
+ if (outputname != NULL) {
+ if (!text) {
+ if (rank == 0) {
+ puts("ouput capture needs to be a text formated file");
+ }
+ }
+ else {
+ dh5tool_flist_write_text(outputname, flist1);
+ }
+ }
+
+#ifdef DAOS_SUPPORT
+ daos_cleanup(daos_args, mfu_file, NULL);
+#endif
+
+ /* free users, groups, and files objects */
+ mfu_flist_free(&flist1);
+ if (flist2)
+ mfu_flist_free(&flist2);
+ if (flist3)
+ mfu_flist_free(&flist3);
+
+ /* free memory allocated for options */
+ mfu_free(&distribution);
+ mfu_free(&sortfields);
+ mfu_free(&outputname);
+ mfu_free(&inputname);
+
+ /* free the path parameters */
+ mfu_param_path_free_all((uint64_t)numpaths, paths);
+
+ /* free memory allocated to hold params */
+ mfu_free(&paths);
+
+ /* free the walk options */
+ mfu_walk_opts_delete(&walk_opts);
+
+ /* delete file object */
+ mfu_file_delete(&mfu_src_file);
+
+ h5tools_close();
+ /* shut down MPI */
+ mfu_finalize();
+ MPI_Finalize();
+
+ return rc;
+}
+
+/*-------------------------------------------------------------------------
+ * Function: h5dwalk_exit
+ *
+ * Purpose: close the tools library and exit
+ *
+ * Return: none
+ *
+ * Programmer: Albert Cheng
+ * Date: Feb 6, 2005
+ *
+ * Comments:
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+H5_ATTR_NORETURN void
+h5dwalk_exit(int status)
+{
+ int require_finalize = 0;
+ h5tools_close();
+ mfu_finalize();
+
+ /* Check to see whether we need to call MPI_Finalize */
+ MPI_Initialized(&require_finalize);
+ if (require_finalize)
+ MPI_Finalize();
+
+ HDexit(status);
+}
diff --git a/utils/tools/test/CMakeLists.txt b/utils/tools/test/CMakeLists.txt
new file mode 100644
index 0000000..0f5335d
--- /dev/null
+++ b/utils/tools/test/CMakeLists.txt
@@ -0,0 +1,8 @@
+cmake_minimum_required (VERSION 3.12)
+project (HDF5_TOOLS_TEST C)
+
+#-- Add the h5diff tests
+if (HDF5_BUILD_PARALLEL_TOOLS)
+ add_subdirectory (h5dwalk)
+endif()
+
diff --git a/utils/tools/test/Makefile.am b/utils/tools/test/Makefile.am
new file mode 100644
index 0000000..88104f2
--- /dev/null
+++ b/utils/tools/test/Makefile.am
@@ -0,0 +1,32 @@
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+##
+## Makefile.am
+## Run automake to generate a Makefile.in from this file.
+##
+#
+# Tools HDF5 Makefile(.in)
+#
+
+include $(top_srcdir)/config/commence.am
+
+if PARALLEL_TOOLS_CONDITIONAL
+ H5DWALK=h5dwalk
+else
+ H5DWALK=
+endif
+
+CONFIG=ordered
+
+# All subdirectories
+SUBDIRS=$(H5DWALK)
+
+include $(top_srcdir)/config/conclude.am
diff --git a/utils/tools/test/h5dwalk/CMakeLists.txt b/utils/tools/test/h5dwalk/CMakeLists.txt
new file mode 100644
index 0000000..5f6c992
--- /dev/null
+++ b/utils/tools/test/h5dwalk/CMakeLists.txt
@@ -0,0 +1,15 @@
+cmake_minimum_required (VERSION 3.12)
+project (HDF5_TOOLS_TEST_H5DWALK)
+
+if (HDF5_BUILD_PARALLEL_TOOLS)
+ add_custom_command(
+ OUTPUT ${HDF5_TOOLS_DIR}/test/demo_destfiles.test
+ COMMAND bash -c ${HDF5_TOOLS_SRC_H5DWALK_SOURCE_DIR}/copy_demo_files.sh
+ ARGS ${HDF5_TOOLS_DIR}/test ${CMAKE_BINARY_DIR}/bin
+ DEPENDS ${HDF5_TOOLS_SRC_H5DWALK_SOURCE_DIR}/copy_demo_files.sh
+ )
+endif ()
+
+if (HDF5_TEST_TOOLS AND HDF5_TEST_SERIAL)
+ include (CMakeTests.cmake)
+endif ()
diff --git a/utils/tools/test/h5dwalk/CMakeTests.cmake b/utils/tools/test/h5dwalk/CMakeTests.cmake
new file mode 100644
index 0000000..b9e52c5
--- /dev/null
+++ b/utils/tools/test/h5dwalk/CMakeTests.cmake
@@ -0,0 +1,56 @@
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+#
+
+##############################################################################
+##############################################################################
+### T E S T I N G ###
+##############################################################################
+##############################################################################
+
+ file (MAKE_DIRECTORY "${PROJECT_BINARY_DIR}/testfiles")
+
+
+##############################################################################
+##############################################################################
+### T H E T E S T S M A C R O S ###
+##############################################################################
+##############################################################################
+
+ macro (ADD_H5_TEST resultfile resultcode)
+ # If using memchecker add tests without using scripts
+ if (HDF5_ENABLE_USING_MEMCHECKER)
+ message("Entered ADD_H5_TEST - 0")
+ add_test (NAME H5DWALK-${resultfile} COMMAND ${CMAKE_CROSSCOMPILING_EMULATOR} $<TARGET_FILE:h5dwalk${tgt_file_ext}> ${ARGN})
+ set_tests_properties (H5DWALK-${resultfile} PROPERTIES
+ WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/testfiles")
+ if ("${resultcode}" STREQUAL "1")
+ set_tests_properties (H5DWALK-${resultfile} PROPERTIES WILL_FAIL "true")
+ endif ()
+ else ()
+ # Remove any output file left over from previous test run
+ add_test (
+ NAME H5DWALK-${resultfile}
+ COMMAND "${CMAKE_COMMAND}"
+ -D "TEST_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR}"
+ -D "TEST_PROGRAM=$<TARGET_FILE:h5dwalk${tgt_file_ext}>"
+ -D "TEST_ARGS=${ARGN}"
+ -D "TEST_FOLDER=${PROJECT_BINARY_DIR}/testfiles"
+ -D "TEST_OUTPUT=${resultfile}.out"
+ -D "TEST_EXPECT=${resultcode}"
+ -D "TEST_REFERENCE=${resultfile}.h5dwalk"
+ -D "TEST_LIBRARY_DIRECTORY=${LL_PATH}"
+ -P "${HDF_RESOURCES_EXT_DIR}/runTest.cmake"
+ )
+ endif ()
+ endmacro ()
+
+ ADD_H5_TEST(help-1 0 -h)
diff --git a/utils/tools/test/h5dwalk/Makefile.am b/utils/tools/test/h5dwalk/Makefile.am
new file mode 100644
index 0000000..c32dd0f
--- /dev/null
+++ b/utils/tools/test/h5dwalk/Makefile.am
@@ -0,0 +1,43 @@
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+##
+## Makefile.am
+## Run automake to generate a Makefile.in from this file.
+#
+# HDF5 Library Makefile(.in)
+#
+
+include $(top_srcdir)/config/commence.am
+
+# Include src directory
+AM_CPPFLAGS+=-I$(top_srcdir)/src -I$(top_srcdir)/tools/lib
+
+install-examples:
+ @echo "Creating demo files" && \
+ . copy_demo_files.sh $(top_srcdir)/src
+
+bin_SCRIPTS:install-examples
+
+#test script and program
+TEST_SCRIPT=testh5dwalk.sh copy_demo_files.sh
+check_SCRIPTS=$(TEST_SCRIPT)
+SCRIPT_DEPEND=../../h5dwalk/h5dwalk$(EXEEXT)
+
+# Tell automake to clean h5redeploy script
+CLEANFILES=
+
+# These were generated by configure. Remove them only when distclean.
+DISTCLEANFILES=testh5dwalk.sh copy_demo_files.sh
+
+# All programs rely on hdf5 library and h5tools library
+LDADD=$(LIBH5TOOLS) $(LIBHDF5)
+
+include $(top_srcdir)/config/conclude.am
diff --git a/utils/tools/test/h5dwalk/copy_demo_files.sh.in b/utils/tools/test/h5dwalk/copy_demo_files.sh.in
new file mode 100644
index 0000000..f20bf43
--- /dev/null
+++ b/utils/tools/test/h5dwalk/copy_demo_files.sh.in
@@ -0,0 +1,86 @@
+#! /bin/sh
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+#
+srcdir=@srcdir@
+TOP_BUILDDIR=..
+
+# Determine if backward compatibility options enabled
+DEPRECATED_SYMBOLS="yes"
+
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+
+CP='cp'
+
+THIS_DIR=`pwd`
+SRC_TOOLS_DIR=$srcdir/../../../../tools
+
+nerrors=0
+verbose=yes
+exit_code=$EXIT_SUCCESS
+
+
+# Add Testing files into the local testfiles directory::
+TESTDIR=./testfiles
+test -d $TESTDIR || mkdir $TESTDIR
+
+echo "HDF5 \"$THIS_DIR/testfiles/h5diff_basic1.h5\" {" > "$THIS_DIR"/testfiles/h5diff_basic1.h5_h5dump.txt
+echo "FILE_CONTENTS {
+ group /
+ group /g1
+ dataset /g1/d1
+ dataset /g1/d2
+ dataset /g1/dset1
+ dataset /g1/dset10
+ dataset /g1/dset11
+ dataset /g1/dset12
+ dataset /g1/dset3
+ dataset /g1/dset5
+ dataset /g1/dset6
+ dataset /g1/dset7
+ dataset /g1/dset8
+ dataset /g1/dset9
+ dataset /g1/fp1
+ dataset /g1/fp15
+ dataset /g1/fp16
+ dataset /g1/fp17
+ dataset /g1/fp18
+ dataset /g1/fp18_COPY
+ dataset /g1/fp19
+ dataset /g1/fp19_COPY
+ dataset /g1/fp2
+ dataset /g1/fp20
+ dataset /g1/fp20_COPY
+ dataset /g1/ld
+ }
+}" >> "$THIS_DIR"/testfiles/h5diff_basic1.h5_h5dump.txt
+
+# Create the help-1.txt output file for '-h' validation
+echo "
+Usage: h5dwalk [options] <path> ...
+
+Options:
+ -i, --input <file> - read list from file
+ -o, --output <file> - write output summary to the named file.
+ -E, --error <file> - write processed errors to file in text format
+ -l, --log_text <dir> - write individual tool outputs to a file. Logs can be written to an optional named directory.
+ -T, --tool <executable> - name of the HDF5 tool to invoke
+ -h, --help - print usage
+
+For more information see https://mpifileutils.readthedocs.io.
+" > "$THIS_DIR"/testfiles/help-1.txt
+
+# Make a copy of the help-1.txt output file for --help validation
+$CP "$srcdir"/help.h5dwalk "$THIS_DIR"/testfiles/help-1.txt
+$CP "$srcdir"/help.h5dwalk "$THIS_DIR"/testfiles/help-2.txt
+# Make a copy of a simple HDF5 datafile which will be used as input for h5dump -n (see the expected output above)
+$CP "$SRC_TOOLS_DIR"/test/h5diff/testfiles/h5diff_basic1.h5 "$THIS_DIR"/testfiles
diff --git a/utils/tools/test/h5dwalk/help.h5dwalk b/utils/tools/test/h5dwalk/help.h5dwalk
new file mode 100644
index 0000000..986cbba
--- /dev/null
+++ b/utils/tools/test/h5dwalk/help.h5dwalk
@@ -0,0 +1,13 @@
+
+Usage: h5dwalk [options] <path> ...
+
+Options:
+ -i, --input <file> - read list from file
+ -o, --output <file> - write output summary to the named file.
+ -E, --error <file> - write processed errors to file in text format
+ -l, --log_text <dir> - write individual tool outputs to a file. Logs can be written to an optional named directory.
+ -T, --tool <executable> - name of the HDF5 tool to invoke
+ -h, --help - print usage
+
+For more information see https://mpifileutils.readthedocs.io.
+
diff --git a/utils/tools/test/h5dwalk/testh5dwalk.sh.in b/utils/tools/test/h5dwalk/testh5dwalk.sh.in
new file mode 100644
index 0000000..4f6dbde
--- /dev/null
+++ b/utils/tools/test/h5dwalk/testh5dwalk.sh.in
@@ -0,0 +1,249 @@
+#! /bin/sh
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+#
+
+# Tests for the h5dwalk tool
+
+#
+#
+
+srcdir=@srcdir@
+
+# Determine which filters are available
+USE_FILTER_SZIP="@USE_FILTER_SZIP@"
+USE_FILTER_DEFLATE="@USE_FILTER_DEFLATE@"
+
+
+TESTNAME=h5dwalk
+
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+
+THIS_DIR="`pwd`"
+ROOTDIR="`cd ../../../..; pwd`"
+cd "$THIS_DIR"
+TOP_DIR="$ROOTDIR"
+
+
+H5DWALK=../../h5dwalk/h5dwalk
+H5DWALK_BIN="$TOP_DIR/utils/tools/h5dwalk/h5dwalk"
+
+
+H5DUMP="$TOP_DIR/src/h5dump/h5dump"
+H5DUMP_BIN="$TOP_DIR/tools/src/h5dump/h5dump"
+
+RM='rm -rf'
+CMP='cmp -s'
+DIFF='diff -c'
+CP='cp'
+DIRNAME='dirname'
+LS='ls'
+AWK='awk'
+WC='wc'
+
+nerrors=0
+verbose=yes
+
+export LD_LIBRARY_PATH=@LL_PATH@
+
+# source dirs
+SRC_TOOLS="$TOP_DIR/tools/test"
+SRC_TOOLS_TESTFILES="$SRC_TOOLS/testfiles"
+
+# testfiles source dirs for tools
+SRC_H5LS_TESTFILES="$SRC_TOOLS_TESTFILES"
+SRC_H5DUMP_TESTFILES="$SRC_TOOLS_TESTFILES"
+SRC_H5DIFF_TESTFILES="$SRC_TOOLS/h5diff/testfiles"
+SRC_H5COPY_TESTFILES="$SRC_TOOLS/h5copy/testfiles"
+SRC_H5REPACK_TESTFILES="$SRC_TOOLS/h5repack/testfiles"
+SRC_H5JAM_TESTFILES="$SRC_TOOLS/h5jam/testfiles"
+SRC_H5DWALK_TESTFILES="$SRC_TOOLS/h5dwalk/testfiles"
+SRC_H5IMPORT_TESTFILES="$SRC_TOOLS/h5import/testfiles"
+
+TESTDIR=./testfiles
+test -d $TESTDIR || mkdir $TESTDIR
+
+echo "SRC_H5DIFF_TESTFILES = $SRC_H5DIFF_TESTFILES"
+echo "Creating demo files"
+. ./copy_demo_files.sh
+
+
+
+CLEAN_TESTFILES_AND_TESTDIR()
+{
+ echo "cleaning logfiles"
+ $RM $TESTDIR/*log*
+}
+
+# Print a line-line message left justified in a field of 70 characters
+# beginning with the word "Testing".
+#
+TESTING() {
+ SPACES=" "
+ echo "Testing $* $SPACES" | cut -c1-70 | tr -d '\012'
+}
+
+# Run a test and print PASS or *FAIL*. If a test fails then increment
+# the `nerrors' global variable and (if $verbose is set) display the
+# difference between the actual output and the expected output. The
+# expected output is given as the first argument to this function and
+# the actual output file is calculated by replacing the `.ddl' with
+# `.out'. The actual output is not removed if $HDF5_NOCLEANUP has a
+# non-zero value.
+#
+TOOLTEST() {
+ expect="$TESTDIR/$1"
+ expect_err="$TESTDIR/`basename $1`.err"
+ actual="$TESTDIR/`basename $1`.out"
+ actual_err="$TESTDIR/`basename $1`.out.err"
+ actual_sav=${actual}-sav
+ actual_err_sav=${actual_err}-sav
+ shift
+
+ # Run test.
+
+ TESTING $H5DWALK $@
+ (
+ cd $TESTDIR
+ $RUNSERIAL $H5DWALK_BIN $@
+ ) 1> $actual 2> $actual_err
+
+ # save actual and actual_err in case they are needed later.
+ cp $actual $actual_sav
+ cp $actual_err $actual_err_sav
+
+ if [ ! -f $expect ]; then
+ # Compare error files if the expect file doesn't exist.
+ if $CMP $expect_err $actual_err; then
+ echo " PASSED"
+ else
+ echo "*FAILED*"
+ echo " Expected result (*.err) differs from actual result (*.out.err)"
+ nerrors="`expr $nerrors + 1`"
+ test yes = "$verbose" && $DIFF $expect_err $actual_err |sed 's/^/ /'
+ fi
+ elif $CMP $expect $actual; then
+ echo " PASSED"
+ else
+ echo "*FAILED*"
+ echo " Expected result (*.ddl) differs from actual result (*.out)"
+ nerrors="`expr $nerrors + 1`"
+ test yes = "$verbose" && $DIFF $expect $actual |sed 's/^/ /'
+ fi
+
+ # Clean up output file
+ if test -z "$HDF5_NOCLEANUP"; then
+ rm -f $actual $actual_err $actual_sav $actual_err_sav
+ fi
+}
+
+TOOL_LOGTEST() {
+ expect="$TESTDIR/`basename $1`.txt"
+ expect_err="$TESTDIR/`basename $1`.err"
+ actual="$TESTDIR/`basename $1`.log"
+ actual_err="$TESTDIR/`basename $1`.out.err"
+ actual_sav=${actual}-sav
+ actual_err_sav=${actual_err}-sav
+ shift
+
+ echo "running logtest"
+
+ # Run test.
+ TESTING $H5DWALK $@
+ (
+ cd $TESTDIR
+ $RUNSERIAL $H5DWALK_BIN $@
+
+ ) 1> $actual 2> $actual_err
+ expect_len="`wc -l < $expect`"
+
+ if [ ! -f $actual ]; then
+ echo "*FAILED*"
+ echo " The expected .log file is missing"
+ echo " Perhaps the test failed to run?"
+ else
+ actual_len="`wc -l < $actual`"
+ if [ $actual_len -eq $expect_len ]; then
+ echo " PASSED"
+ else
+ echo "*FAILED*"
+ echo " The generated .log file length does not match the expected length. $actual_len != $expected_len"
+ fi
+ fi
+
+ # Clean up output file
+ if test -z "$HDF5_NOCLEANUP"; then
+ rm -f $actual $actual_err $actual_sav $actual_err_sav
+ fi
+}
+
+TOOL_CHK_LOGLEN() {
+ expect=$1
+ shift
+
+ echo "running tool_chk_loglen"
+
+ # Run test.
+ TESTING $H5DWALK $@
+ (
+ cd $TESTDIR
+ $RUNSERIAL $H5DWALK_BIN $@
+ )
+
+ expect_len="`wc -l < $expect`"
+ if [ "$expect_len" -gt 0 ]; then
+ echo " PASSED"
+ else
+ echo "*FAILED*"
+ echo " The generated .log file is empty!."
+ fi
+
+ # Clean up output file
+ if test -z "$HDF5_NOCLEANUP"; then
+ rm -f $expect
+ fi
+}
+
+
+# Print a "SKIP" message
+SKIP() {
+ TESTING $H5DWALK $@6
+ echo " -SKIP-"
+}
+
+
+
+##############################################################################
+##############################################################################
+### T H E T E S T S ###
+##############################################################################
+##############################################################################
+
+TOOLTEST help-1.txt -h
+TOOLTEST help-2.txt --help
+TOOL_LOGTEST h5diff_basic1.h5_h5dump -l -T $H5DUMP_BIN -n ./h5diff_basic1.h5
+TOOL_CHK_LOGLEN showme-h5dump.log -o `pwd`/showme-h5dump.log -T $H5DUMP_BIN -n `pwd`
+
+
+#
+#
+# Clean up temporary files/directories
+CLEAN_TESTFILES_AND_TESTDIR
+
+if test $nerrors -eq 0 ; then
+ echo "All $TESTNAME tests passed."
+ exit $EXIT_SUCCESS
+else
+ echo "$TESTNAME tests failed with $nerrors errors."
+ exit $EXIT_FAILURE
+fi
+