summaryrefslogtreecommitdiffstats
path: root/utils/tools/h5dwalk
diff options
context:
space:
mode:
authorrawarren <richardwarren2@verizon.net>2021-11-29 21:25:23 (GMT)
committerGitHub <noreply@github.com>2021-11-29 21:25:23 (GMT)
commit720ddb20f347f5ea4e573c44f64e1886d1dc1038 (patch)
treebdd32da8424488f6d10221518c08ed907ac2be18 /utils/tools/h5dwalk
parent9cdc6d58bdc0a8bce74559d15fae1284beb82033 (diff)
downloadhdf5-720ddb20f347f5ea4e573c44f64e1886d1dc1038.zip
hdf5-720ddb20f347f5ea4e573c44f64e1886d1dc1038.tar.gz
hdf5-720ddb20f347f5ea4e573c44f64e1886d1dc1038.tar.bz2
Add support for parallel tools based on the 3rd party library mpiFileUtils (libMFU) … (#1177)
Adds tool h5dwalk and configure options to enable building it. Co-authored-by: Richard Warren <Richard.Warren@hdfgroup.org> Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Richard.Warren <richard.warren@jelly.ad.hdfgroup.org> Co-authored-by: Larry Knox <lrknox@hdfgroup.org>
Diffstat (limited to 'utils/tools/h5dwalk')
-rw-r--r--utils/tools/h5dwalk/CMakeLists.txt66
-rw-r--r--utils/tools/h5dwalk/Makefile.am37
-rw-r--r--utils/tools/h5dwalk/h5dwalk.142
-rw-r--r--utils/tools/h5dwalk/h5dwalk.c1712
4 files changed, 1857 insertions, 0 deletions
diff --git a/utils/tools/h5dwalk/CMakeLists.txt b/utils/tools/h5dwalk/CMakeLists.txt
new file mode 100644
index 0000000..244cc26
--- /dev/null
+++ b/utils/tools/h5dwalk/CMakeLists.txt
@@ -0,0 +1,66 @@
+cmake_minimum_required (VERSION 3.12)
+project (HDF5_UTILS_TOOLS_H5DWALK C)
+
+# --------------------------------------------------------------------
+# Add the h5dwalk and test executables
+# --------------------------------------------------------------------
+if (NOT ONLY_SHARED_LIBS)
+ add_executable (h5dwalk ${HDF5_UTILS_TOOLS_H5DWALK_SOURCE_DIR}/h5dwalk.c)
+# add_custom_target(generate_demo ALL
+# DEPENDS "${HDF5_TOOLS_DIR}/test/demo_destfiles.test"
+# )
+ target_include_directories (h5dwalk PRIVATE "${HDF5_TOOLS_DIR}/lib;${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};${CIRCLE_INCLUDE_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
+ target_compile_options(h5dwalk PRIVATE "${HDF5_CMAKE_C_FLAGS}")
+ TARGET_C_PROPERTIES (h5dwalk STATIC)
+ target_link_libraries (h5dwalk PRIVATE ${HDF5_TOOLS_LIB_TARGET} ${HDF5_LIB_TARGET} ${MFU_LIBRARY} "$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_LIBRARIES}>")
+ set_target_properties (h5dwalk PROPERTIES FOLDER tools)
+ set_global_variable (HDF5_UTILS_TO_EXPORT "${HDF5_UTILS_TO_EXPORT};h5dwalk")
+
+ set (H5_DEP_EXECUTABLES h5dwalk)
+endif ()
+
+if (BUILD_SHARED_LIBS)
+ add_executable (h5dwalk-shared ${HDF5_UTILS_TOOLS_H5DWALK_SOURCE_DIR}/h5dwalk.c)
+ target_include_directories (h5dwalk-shared PRIVATE "${HDF5_TOOLS_DIR}/lib;${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};${CIRCLE_INCLUDE_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
+ target_compile_options(h5dwalk-shared PRIVATE "${HDF5_CMAKE_C_FLAGS}")
+ TARGET_C_PROPERTIES (h5dwalk-shared SHARED)
+ target_link_libraries (h5dwalk-shared PRIVATE ${HDF5_TOOLS_LIBSH_TARGET} ${HDF5_LIBSH_TARGET} ${MFU_LIBRARY} "$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_LIBRARIES}>")
+ set_target_properties (h5dwalk-shared PROPERTIES FOLDER tools)
+ set_global_variable (HDF5_UTILS_TO_EXPORT "${HDF5_UTILS_TO_EXPORT};h5dwalk-shared")
+
+ set (H5_DEP_EXECUTABLES ${H5_DEP_EXECUTABLES} h5dwalk-shared)
+endif ()
+
+#-----------------------------------------------------------------------------
+# Add Target to clang-format
+#-----------------------------------------------------------------------------
+if (HDF5_ENABLE_FORMATTERS)
+ if (NOT ONLY_SHARED_LIBS)
+ clang_format (HDF5_H5DWALK_SRC_FORMAT h5dwalk)
+ else ()
+ clang_format (HDF5_H5DWALK_SRC_FORMAT h5dwalk-shared)
+ endif ()
+endif ()
+
+##############################################################################
+##############################################################################
+### I N S T A L L A T I O N ###
+##############################################################################
+##############################################################################
+
+#-----------------------------------------------------------------------------
+# Rules for Installation of tools using make Install target
+#-----------------------------------------------------------------------------
+if (HDF5_EXPORTED_TARGETS)
+ foreach (exec ${H5_DEP_EXECUTABLES})
+ INSTALL_PROGRAM_PDB (${exec} ${HDF5_INSTALL_BIN_DIR} toolsapplications)
+ endforeach ()
+
+ install (
+ TARGETS
+ ${H5_DEP_EXECUTABLES}
+ EXPORT
+ ${HDF5_EXPORTED_TARGETS}
+ RUNTIME DESTINATION ${HDF5_INSTALL_BIN_DIR} COMPONENT toolsapplications
+ )
+endif ()
diff --git a/utils/tools/h5dwalk/Makefile.am b/utils/tools/h5dwalk/Makefile.am
new file mode 100644
index 0000000..34cdb32
--- /dev/null
+++ b/utils/tools/h5dwalk/Makefile.am
@@ -0,0 +1,37 @@
+#
+# Copyright by The HDF Group.
+# All rights reserved.
+#
+# This file is part of HDF5. The full HDF5 copyright notice, including
+# terms governing use, modification, and redistribution, is contained in
+# the COPYING file, which can be found at the root of the source code
+# distribution tree, or in https://www.hdfgroup.org/licenses.
+# If you do not have access to either file, you may request a copy from
+# help@hdfgroup.org.
+##
+## Makefile.am
+## Run automake to generate a Makefile.in from this file.
+#
+# HDF5 Library Makefile(.in)
+#
+
+include $(top_srcdir)/config/commence.am
+
+# Include src directory
+AM_CPPFLAGS+=-I$(top_srcdir)/src -I$(top_srcdir)/tools/lib $(H5DWALK_CPPFLAGS)
+
+# These are our main targets, the tools
+# h5dwalk_SOURCES=h5dwalk.c $(TOOLSOURCES)
+bin_PROGRAMS=h5dwalk
+#bin_SCRIPTS=install-examples
+
+# Add h5stat specific linker flags here
+h5dwalk_LDFLAGS = $(LT_STATIC_EXEC) $(AM_LDFLAGS) $(H5DWALK_LDFLAGS)
+
+# Tell automake to clean h5redeploy script
+CLEANFILES=
+
+# All programs rely on hdf5 library and h5tools library
+h5dwalk_LDADD=$(LIBH5TOOLS) $(LIBHDF5) $(H5DWALK_LIBS)
+
+include $(top_srcdir)/config/conclude.am
diff --git a/utils/tools/h5dwalk/h5dwalk.1 b/utils/tools/h5dwalk/h5dwalk.1
new file mode 100644
index 0000000..60e1080
--- /dev/null
+++ b/utils/tools/h5dwalk/h5dwalk.1
@@ -0,0 +1,42 @@
+.TH "h5dwalk" 1
+.SH NAME
+h5dwalk \- Provides a means of extending HDF5 tools by using parallelism on groups of files.
+.SH SYNOPSIS
+h5dwalk [OPTIONS] -T h5tool [H5TOOL_options...]
+.SH DESCRIPTION
+h5dwalk utilizes the mpiFileUtils library to invoke a selected HDF5 tool on a collection of files. The mpiFileUtils library provides the facilities to walk directory trees and provide a selection of files contained therein. This selection can be filtered in various ways. At present, h5dwalk filters the original file selection to include only HDF5 formatted files. The resulting collection or collections can be utilized as the file inputs to the selected h5tool.
+.SH OPTIONS
+.TP
+.B \-h
+or
+.B \-\-help
+Print a usage message and exit.
+.TP
+.B \-i
+or
+.B \-\-input filename
+Read command input from a file. Not yet implemented.
+.TP
+.B \-o
+or
+.B \-\-output filename
+Captures the hdf5 tool output into a named file.
+.TP
+.B \-l
+or
+.B \-\-log [file]
+Captures hdf5 tool output into a individual log files. If an optional file (directory) is specified, then output from all tool instances will be written in the given file directory. Without the optional filename, each tool instance ouput will be captured in a new log file whose name is associated with the hdf5 tool that was run and is written in the current working directory.
+.TP
+.B \-E
+or
+.B \-\-error [file]
+Show all HDF5 error reporting. Behavior is similar to --log, i.e. errors can either be logged in a single named file or in individual tool specific files. Not yet implemented.
+.TP
+.B \-T
+or
+.B \-\-tool hdf5_tool
+Specifies the hdf5 tool that should be invoked for each file in a collection of files. The collection consists of individual HDF5 files found by walking a specified directory tree which is used in place of the normal tool filename argument. The '-T' option should appear on the command line just prior to the HDF5 tool argument options.
+.TP
+.SH "SEE ALSO"
+\&\fIh5dump\fR\|(1), \fIh5diff\fR\|(1), \fIh5repart\fR\|(1), \fIh5diff\fR\|(1),
+\&\fIh5import\fR\|(1), \fIgif2h5\fR\|(1), \fIh52gif\fR\|(1), \fIh5perf\fR\|(1)
diff --git a/utils/tools/h5dwalk/h5dwalk.c b/utils/tools/h5dwalk/h5dwalk.c
new file mode 100644
index 0000000..1f42aed
--- /dev/null
+++ b/utils/tools/h5dwalk/h5dwalk.c
@@ -0,0 +1,1712 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the COPYING file, which can be found at the root of the source code *
+ * distribution tree, or in https://www.hdfgroup.org/licenses. *
+ * If you do not have access to either file, you may request a copy from *
+ * help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include "H5private.h"
+#include "h5tools.h"
+#include "h5tools_utils.h"
+#include "hdf5.h"
+
+#include "libcircle.h"
+#include "dtcmp.h"
+#include "mfu.h"
+#include "mfu_flist.h"
+#include "mfu_errors.h"
+#include "mfu_flist_internal.h"
+
+/* Name of tool */
+#define PROGRAMNAME "h5dwalk"
+
+#ifdef DAOS_SUPPORT
+#include "mfu_daos.h"
+#endif
+
+static char *user_cmd = NULL;
+static char mpierrstr[MPI_MAX_ERROR_STRING];
+static int mpierrlen;
+static int sg_mpi_rank = 0;
+static int current_input_index = 0;
+static int processing_inputfile = 0;
+
+static void dh5tool_flist_write_text(const char *name, mfu_flist bflist);
+static void run_command(int argc, char **argv, char *cmdline, const char *fname);
+static void add_executable(int argc, char **argv, char *cmdstring, int *f_index, int f_count);
+static int process_input_file(char *inputname, int myrank, int size);
+static void usage(void);
+
+H5_ATTR_NORETURN void h5dwalk_exit(int status);
+
+/* keep stats during walk */
+uint64_t total_dirs = 0;
+uint64_t total_files = 0;
+uint64_t total_links = 0;
+uint64_t total_unknown = 0;
+uint64_t total_bytes = 0;
+/* global flags which indicate whether we need
+ * to capture tool outputs into a file...
+ * Related to this is whether the stderr should
+ * be logged seperately.
+ */
+#define BUFT_SIZE 131072
+/* FIXME: 'buft_max' should probably be configurable.. */
+size_t buft_max = 64;
+size_t buft_count = 0;
+buf_t **buf_cache = NULL;
+
+int log_output_in_single_file = 0;
+char *output_log_file = NULL;
+
+int log_stdout_in_file = 0;
+char *txtlog = NULL;
+
+int log_errors_in_file = 0;
+char *errlog = NULL;
+
+int use_config_file = 0;
+int config_index[4] = {
+ 0,
+};
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#define MAX_DISTRIBUTE_SEPARATORS 128
+struct distribute_option {
+ int separator_number;
+ uint64_t separators[MAX_DISTRIBUTE_SEPARATORS];
+};
+
+static const char * s_opts = "hl*E*i:o:T:";
+static struct h5_long_options l_opts[] = {{"help", no_arg, 'h'},
+ {"log_text", optional_arg, 'l'},
+ {"error", optional_arg, 'E'},
+ {"input", require_arg, 'i'},
+ {"output", require_arg, 'o'},
+ {"tool", require_arg, 'T'},
+ {NULL, 0, '\0'}};
+static void
+save_command(const char *argv0)
+{
+ assert(argv0);
+ user_cmd = HDstrdup(argv0);
+}
+
+static void
+create_default_separators(struct distribute_option *option, mfu_flist *flist, uint64_t *size,
+ size_t *separators, uint64_t *global_max_file_size)
+{
+ /* get local max file size for Allreduce */
+ uint64_t local_max_file_size = 0;
+ for (uint64_t i = 0; i < *size; i++) {
+ uint64_t file_size = mfu_flist_file_get_size(*flist, i);
+ if (file_size > local_max_file_size) {
+ local_max_file_size = file_size;
+ }
+ }
+
+ /* get the max file size across all ranks */
+ MPI_Allreduce(&local_max_file_size, global_max_file_size, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
+
+ /* print and convert max file size to appropriate units */
+ double max_size_tmp;
+ const char *max_size_units;
+ mfu_format_bytes(*global_max_file_size, &max_size_tmp, &max_size_units);
+ HDprintf("Max File Size: %.3lf %s\n", max_size_tmp, max_size_units);
+
+ /* round next_pow_2 to next multiple of 10 */
+ uint64_t max_magnitude_bin = (uint64_t)((ceil(log2((double)(*global_max_file_size)) / 10)) * 10);
+
+ /* get bin ranges based on max file size */
+ option->separators[0] = 1;
+
+ /* plus one is for zero count bin */
+ *separators = (size_t)(max_magnitude_bin / 10);
+ uint64_t power = 10;
+ for (int i = 1; power <= max_magnitude_bin; i++) {
+ double raised_2 = pow(2, (double)(power));
+ option->separators[i] = (uint64_t)raised_2;
+ power += 10;
+ }
+}
+
+static int
+h5dwalk_map_fn(mfu_flist flist __attribute__((unused)), uint64_t idx, int ranks,
+ void *args __attribute__((unused)))
+{
+ int rank = (int)((int)idx % ranks);
+ return rank;
+}
+
+static int
+print_flist_distribution(int file_histogram, struct distribute_option *option, mfu_flist *pflist, int rank)
+{
+ /* file list to use */
+ mfu_flist flist = *pflist;
+
+ /* get local size for each rank, and max file sizes */
+ uint64_t size = mfu_flist_size(flist);
+ uint64_t global_max_file_size;
+
+ size_t separators = 0;
+ if (file_histogram) {
+ /* create default separators */
+ create_default_separators(option, &flist, &size, &separators, &global_max_file_size);
+ }
+ else {
+ separators = (size_t)option->separator_number;
+ }
+
+ /* allocate a count for each bin, initialize the bin counts to 0
+ * it is separator + 1 because the last bin is the last separator
+ * to the DISTRIBUTE_MAX */
+ uint64_t *dist = (uint64_t *)MFU_MALLOC((separators + 1) * sizeof(uint64_t));
+
+ /* initialize the bin counts to 0 */
+ for (size_t i = 0; i <= separators; i++) {
+ dist[i] = 0;
+ }
+
+ /* for each file, identify appropriate bin and increment its count */
+ for (size_t i = 0; i < size; i++) {
+ /* get the size of the file */
+ uint64_t file_size = mfu_flist_file_get_size(flist, i);
+
+ /* loop through the bins and find the one the file belongs to,
+ * set last bin to -1, if a bin is not found while looping through the
+ * list of file size separators, then it belongs in the last bin
+ * so (last file size - MAX bin) */
+ int64_t max_bin_flag = -1;
+ for (size_t j = 0; j < separators; j++) {
+ if (file_size <= option->separators[j]) {
+ /* found the bin set bin index & increment its count */
+ dist[j]++;
+
+ /* a file for this bin was found so can't belong to
+ * last bin (so set the flag) & exit the loop */
+ max_bin_flag = 1;
+ break;
+ }
+ }
+
+ /* if max_bin_flag is still -1 then the file belongs to the last bin */
+ if (max_bin_flag < 0) {
+ dist[separators]++;
+ }
+ }
+
+ /* get the total sum across all of the bins */
+ uint64_t *disttotal = (uint64_t *)MFU_MALLOC((separators + 1) * sizeof(uint64_t));
+ MPI_Allreduce(dist, disttotal, (int)(separators + 1), MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+
+ /* Print the file distribution */
+ if (rank == 0) {
+ /* number of files in a bin */
+ uint64_t number;
+ double size_tmp;
+ const char *size_units;
+ HDprintf("%-27s %s\n", "Range", "Number");
+ for (size_t i = 0; i <= separators; i++) {
+ HDprintf("%s", "[ ");
+ if (i == 0) {
+ HDprintf("%7.3lf %3s", 0.000, "B");
+ }
+ else {
+ mfu_format_bytes((uint64_t)option->separators[i - 1], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s", size_tmp, size_units);
+ }
+
+ printf("%s", " - ");
+
+ if (file_histogram) {
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ number = disttotal[i];
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s ) %" PRIu64 "\n", size_tmp, size_units, number);
+ }
+ else {
+ if (i == separators) {
+ number = disttotal[i];
+ HDprintf("%10s ) %" PRIu64 "\n", "MAX", number);
+ }
+ else {
+ number = disttotal[i];
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s ) %" PRIu64 "\n", size_tmp, size_units, number);
+ }
+ }
+ }
+ }
+
+ /* free the memory used to hold bin counts */
+ mfu_free(&disttotal);
+ mfu_free(&dist);
+
+ return 0;
+}
+
+/* * Search the right position to insert the separator * If the separator exists already, return failure *
+ * Otherwise, locate the right position, and move the array forward to save the separator.
+ */
+static int
+distribute_separator_add(struct distribute_option *option, uint64_t separator)
+{
+ int low = 0;
+ int high;
+ int middle;
+ int pos;
+ int count;
+
+ count = option->separator_number;
+ option->separator_number++;
+ if (option->separator_number > MAX_DISTRIBUTE_SEPARATORS) {
+ HDprintf("Too many separators");
+ return -1;
+ }
+
+ if (count == 0) {
+ option->separators[0] = separator;
+ return 0;
+ }
+
+ high = count - 1;
+ while (low < high) {
+ middle = (high - low) / 2 + low;
+ if (option->separators[middle] == separator)
+ return -1;
+ /* In the left half */
+ else if (option->separators[middle] < separator)
+ low = middle + 1;
+ /* In the right half */
+ else
+ high = middle;
+ }
+ assert(low == high);
+ if (option->separators[low] == separator)
+ return -1;
+
+ if (option->separators[low] < separator)
+ pos = low + 1;
+ else
+ pos = low;
+
+ if (pos < count)
+ HDmemmove(&option->separators[low + 1], &option->separators[low],
+ sizeof(*option->separators) * (uint64_t)(count - pos));
+
+ option->separators[pos] = separator;
+ return 0;
+}
+
+static int
+distribution_parse(struct distribute_option *option, const char *string)
+{
+ char * ptr;
+ char * next;
+ unsigned long long separator;
+ char * str;
+ int status = 0;
+
+ if (strncmp(string, "size", strlen("size")) != 0) {
+ return -1;
+ }
+
+ option->separator_number = 0;
+ if (strlen(string) == strlen("size")) {
+ return 0;
+ }
+
+ if (string[strlen("size")] != ':') {
+ return -1;
+ }
+
+ str = HDstrdup(string);
+ /* Parse separators */
+ ptr = str + strlen("size:");
+ next = ptr;
+ while (ptr && ptr < str + strlen(string)) {
+ next = strchr(ptr, ',');
+ if (next != NULL) {
+ *next = '\0';
+ next++;
+ }
+
+ if (mfu_abtoull(ptr, &separator) != MFU_SUCCESS) {
+ HDprintf("Invalid separator \"%s\"\n", ptr);
+ status = -1;
+ goto out;
+ }
+
+ if (distribute_separator_add(option, separator)) {
+ HDprintf("Duplicated separator \"%llu\"\n", separator);
+ status = -1;
+ goto out;
+ }
+
+ ptr = next;
+ }
+
+out:
+ mfu_free(&str);
+ return status;
+}
+
+static void
+usage(void)
+{
+ if (sg_mpi_rank)
+ return;
+
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "Usage: h5dwalk [options] <path> ...\n");
+#ifdef DAOS_SUPPORT
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "DAOS paths can be specified as:\n");
+ PRINTVALSTREAM(rawoutstream, " daos://<pool>/<cont>[/<path>] | <UNS path>\n");
+#endif
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "Options:\n");
+ PRINTVALSTREAM(rawoutstream, " -i, --input <file> - read list from file\n");
+ PRINTVALSTREAM(rawoutstream, " -o, --output <file> - write output summary to the named file.\n");
+ PRINTVALSTREAM(rawoutstream,
+ " -E, --error <file> - write processed errors to file in text format\n");
+ PRINTVALSTREAM(
+ rawoutstream,
+ " -l, --log_text <dir> - write individual tool outputs to a file. Logs can be written to an "
+ "optional named directory.\n");
+ PRINTVALSTREAM(rawoutstream, " -T, --tool <executable> - name of the HDF5 tool to invoke\n");
+ PRINTVALSTREAM(rawoutstream, " -h, --help - print usage\n");
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "For more information see https://mpifileutils.readthedocs.io. \n");
+ PRINTVALSTREAM(rawoutstream, "\n");
+}
+
+/* given an index, return pointer to that file element,
+ * NULL if index is not in range */
+static elem_t *
+list_get_elem(flist_t *flist, uint64_t idx)
+{
+ /* return pointer to element if index is within range */
+ uint64_t max = flist->list_count;
+ if (idx < max) {
+ elem_t *elem = flist->list_index[idx];
+ return elem;
+ }
+ return NULL;
+}
+
+#ifdef VERBOSE
+/* print information about a file given the index and rank (used in print_files) */
+static void
+print_file(mfu_flist flist, uint64_t idx)
+{
+ /* store types as strings for print_file */
+ char type_str_unknown[] = "UNK";
+ char type_str_dir[] = "DIR";
+ char type_str_file[] = "REG";
+ char type_str_link[] = "LNK";
+
+ /* get filename */
+ const char *file = mfu_flist_file_get_name(flist, idx);
+
+ if (mfu_flist_have_detail(flist)) {
+ /* get mode */
+ mode_t mode = (mode_t)mfu_flist_file_get_mode(flist, idx);
+ uint64_t acc = mfu_flist_file_get_atime(flist, idx);
+ uint64_t mod = mfu_flist_file_get_mtime(flist, idx);
+ uint64_t cre = mfu_flist_file_get_ctime(flist, idx);
+ uint64_t size = mfu_flist_file_get_size(flist, idx);
+ const char *username = mfu_flist_file_get_username(flist, idx);
+ const char *groupname = mfu_flist_file_get_groupname(flist, idx);
+
+ char access_s[30];
+ char modify_s[30];
+ char create_s[30];
+ time_t access_t = (time_t)acc;
+ time_t modify_t = (time_t)mod;
+ time_t create_t = (time_t)cre;
+ size_t access_rc = strftime(access_s, sizeof(access_s) - 1, "%FT%T", localtime(&access_t));
+ size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%b %e %Y %H:%M", localtime(&modify_t));
+ size_t create_rc = strftime(create_s, sizeof(create_s) - 1, "%FT%T", localtime(&create_t));
+ if (access_rc == 0 || modify_rc == 0 || create_rc == 0) {
+ /* error */
+ access_s[0] = '\0';
+ modify_s[0] = '\0';
+ create_s[0] = '\0';
+ }
+
+ char mode_format[11];
+ mfu_format_mode(mode, mode_format);
+
+ double size_tmp;
+ const char *size_units;
+ mfu_format_bytes(size, &size_tmp, &size_units);
+
+ HDprintf("%s %s %s %7.3f %3s %s %s\n", mode_format, username, groupname, size_tmp, size_units,
+ modify_s, file);
+ }
+ else {
+ /* get type */
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ char * type_str = type_str_unknown;
+ if (type == MFU_TYPE_DIR) {
+ type_str = type_str_dir;
+ }
+ else if (type == MFU_TYPE_FILE) {
+ type_str = type_str_file;
+ }
+ else if (type == MFU_TYPE_LINK) {
+ type_str = type_str_link;
+ }
+
+ HDprintf("Type=%s File=%s\n", type_str, file);
+ }
+}
+
+/* TODO: move this somewhere or modify existing print_file */
+/* print information about a file given the index and rank (used in print_files) */
+static size_t
+print_file_text(mfu_flist flist, uint64_t idx, char *buffer, size_t bufsize)
+{
+ size_t numbytes = 0;
+
+ /* store types as strings for print_file */
+ char type_str_unknown[] = "UNK";
+ char type_str_dir[] = "DIR";
+ char type_str_file[] = "REG";
+ char type_str_link[] = "LNK";
+
+ /* get filename */
+ const char *file = mfu_flist_file_get_name(flist, idx);
+
+ if (mfu_flist_have_detail(flist)) {
+ /* get mode */
+ mode_t mode = (mode_t)mfu_flist_file_get_mode(flist, idx);
+
+ uint64_t acc = mfu_flist_file_get_atime(flist, idx);
+ uint64_t mod = mfu_flist_file_get_mtime(flist, idx);
+ uint64_t cre = mfu_flist_file_get_ctime(flist, idx);
+ uint64_t size = mfu_flist_file_get_size(flist, idx);
+ const char *username = mfu_flist_file_get_username(flist, idx);
+ const char *groupname = mfu_flist_file_get_groupname(flist, idx);
+
+ char access_s[30];
+ char modify_s[30];
+ char create_s[30];
+ time_t access_t = (time_t)acc;
+ time_t modify_t = (time_t)mod;
+ time_t create_t = (time_t)cre;
+ size_t access_rc = strftime(access_s, sizeof(access_s) - 1, "%FT%T", localtime(&access_t));
+ size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%b %e %Y %H:%M", localtime(&modify_t));
+ size_t create_rc = strftime(create_s, sizeof(create_s) - 1, "%FT%T", localtime(&create_t));
+ if (access_rc == 0 || modify_rc == 0 || create_rc == 0) {
+ /* error */
+ access_s[0] = '\0';
+ modify_s[0] = '\0';
+ create_s[0] = '\0';
+ }
+
+ char mode_format[11];
+ mfu_format_mode(mode, mode_format);
+
+ double size_tmp;
+ const char *size_units;
+ mfu_format_bytes(size, &size_tmp, &size_units);
+
+ numbytes = (size_t)snHDprintf(buffer, bufsize, "%s %s %s %7.3f %3s %s %s\n", mode_format, username,
+ groupname, size_tmp, size_units, modify_s, file);
+ }
+ else {
+ /* get type */
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ char * type_str = type_str_unknown;
+ if (type == MFU_TYPE_DIR) {
+ type_str = type_str_dir;
+ }
+ else if (type == MFU_TYPE_FILE) {
+ type_str = type_str_file;
+ }
+ else if (type == MFU_TYPE_LINK) {
+ type_str = type_str_link;
+ }
+
+ numbytes = (size_t)snHDprintf(buffer, bufsize, "Type=%s File=%s\n", type_str, file);
+ }
+
+ return numbytes;
+}
+#endif
+
+static size_t
+get_local_bufsize(uint64_t *bufsize)
+{
+ size_t total = 0;
+ if (buft_count > 0) {
+ buf_t *lastbuf = buf_cache[buft_count - 1];
+ size_t remaining = lastbuf->count;
+ total = (lastbuf->bufsize * buft_count) - remaining;
+ *bufsize = (uint64_t)(lastbuf->bufsize);
+ }
+ return total;
+}
+
+static void
+dh5tool_flist_write_text(const char *name, mfu_flist bflist)
+{
+ /* convert handle to flist_t */
+ flist_t *flist = (flist_t *)bflist;
+
+ /* get our rank and size of the communicator */
+ int rank, ranks;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &ranks);
+
+ /* start timer */
+ double start_write = MPI_Wtime();
+
+ /* total list items */
+ uint64_t all_count = mfu_flist_global_size(flist);
+
+ /* report the filename we're writing to */
+ if (mfu_rank == 0) {
+ MFU_LOG(MFU_LOG_INFO, "Writing to output file: %s", name);
+ }
+
+ uint64_t idx = 0;
+ char * ptr = NULL;
+
+ /* if we block things up into 128MB chunks, how many iterations
+ * to write everything? */
+ // uint64_t maxwrite = 128 * 1024 * 1024;
+ uint64_t maxwrite = 0;
+ size_t local_total = get_local_bufsize(&maxwrite);
+ uint64_t iters = 0;
+ if (local_total > 0)
+ iters = (uint64_t)local_total / maxwrite;
+
+ if (iters * maxwrite < (uint64_t)local_total) {
+ iters++;
+ }
+
+ /* get max iterations across all procs */
+ uint64_t all_iters;
+ MPI_Allreduce(&iters, &all_iters, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
+
+ /* use mpi io hints to stripe across OSTs */
+ MPI_Info info;
+ MPI_Info_create(&info);
+
+ /* change number of ranks to string to pass to MPI_Info */
+ char str_buf[12];
+ HDprintf(str_buf, "%d", ranks);
+
+ /* no. of I/O devices for lustre striping is number of ranks */
+ MPI_Info_set(info, "striping_factor", str_buf);
+
+ /* open file */
+ MPI_Status status;
+ MPI_File fh;
+ const char *datarep = "native";
+ int amode = MPI_MODE_WRONLY | MPI_MODE_CREATE;
+
+ int mpirc = MPI_File_open(MPI_COMM_WORLD, (const char *)name, amode, info, &fh);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to open file for writing: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* truncate file to 0 bytes */
+ mpirc = MPI_File_set_size(fh, 0);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to truncate file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* set file view to be sequence of datatypes past header */
+ mpirc = MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* compute byte offset to write our element */
+ uint64_t offset = 0;
+ uint64_t bytes = (uint64_t)local_total;
+ MPI_Exscan(&bytes, &offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Offset write_offset = (MPI_Offset)offset;
+
+ uint64_t written = 0;
+ while (all_iters > 0) {
+ /* compute number of bytes left to write */
+ uint64_t remaining = (uint64_t)local_total - written;
+
+ /* maybe Incr pointer to our next buffer */
+ if (remaining == 0) {
+ idx++;
+ if (buf_cache[idx]->buf == NULL) {
+ }
+ }
+
+ /* compute count we'll write in this iteration */
+ int write_count = (int)maxwrite;
+ if (remaining < maxwrite) {
+ write_count = (int)remaining;
+ }
+ /* Get the buffer to output to the selected file */
+ ptr = buf_cache[idx]->buf;
+
+ /* collective write of file data */
+ mpirc = MPI_File_write_at_all(fh, write_offset, ptr, write_count, MPI_BYTE, &status);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to write to file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* update our offset into the file */
+ write_offset += (MPI_Offset)write_count;
+
+ /* update number of bytes written so far */
+ written += (uint64_t)write_count;
+
+ /* update pointer into our buffer */
+ ptr += write_count;
+
+ /* decrement our collective write loop counter */
+ all_iters--;
+ }
+
+ /* free buffer */
+ // mfu_free(&buf);
+
+ /* close file */
+ mpirc = MPI_File_close(&fh);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to close file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* free mpi info */
+ MPI_Info_free(&info);
+
+ /* end timer */
+ double end_write = MPI_Wtime();
+
+ /* report write count, time, and rate */
+ if (mfu_rank == 0) {
+ double secs = end_write - start_write;
+ double rate = 0.0;
+ if (secs > 0.0) {
+ rate = ((double)all_count) / secs;
+ }
+ MFU_LOG(MFU_LOG_INFO, "Wrote %lu files in %.3lf seconds (%.3lf files/sec)", all_count, secs, rate);
+ }
+
+ return;
+}
+
+static void
+filter_hdf_files(mfu_flist *pflist, char *regex_exp, int exclude, int name)
+{
+ mfu_flist flist = *pflist;
+ mfu_flist eligible = mfu_flist_subset(flist);
+ uint64_t idx = 0;
+ uint64_t files = mfu_flist_size(flist);
+ while (idx < files) {
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK || type == MFU_TYPE_UNKNOWN) {
+ const char *file = mfu_flist_file_get_name(flist, idx);
+ int accessible = H5Fis_accessible(file, H5P_DEFAULT);
+ if (accessible)
+ mfu_flist_file_copy(flist, idx, eligible);
+ }
+ idx++;
+ }
+
+ mfu_flist_summarize(eligible);
+
+ /* assume we'll use the full list */
+ // mfu_flist srclist = flist;
+ mfu_flist srclist = eligible;
+
+ /* filter the list if needed */
+ mfu_flist filtered_flist = MFU_FLIST_NULL;
+ if (regex_exp != NULL) {
+ /* filter the list based on regex */
+ filtered_flist = mfu_flist_filter_regex(eligible, regex_exp, exclude, name);
+
+ /* update our source list to use the filtered list instead of the original */
+ srclist = filtered_flist;
+ }
+
+ mfu_flist_free(&flist);
+ *pflist = srclist;
+ return;
+}
+
+static int
+fill_file_list(mfu_flist new_flist, const char *config_filename, int myrank, int size)
+{
+ int index = 0;
+ char linebuf[PATH_MAX] = {
+ '\0',
+ };
+ FILE *config = HDfopen(config_filename, "r");
+ if (config == NULL)
+ return -1;
+ while (HDfgets(linebuf, sizeof(linebuf), config) != NULL) {
+ struct stat statbuf;
+ char * eol = HDstrchr(linebuf, '\n');
+ if (eol)
+ *eol = '\0';
+ if (HDstat(linebuf, &statbuf) == 0) {
+ if (myrank == (index % size)) {
+ mfu_flist_insert_stat((flist_t *)new_flist, linebuf, O_RDONLY, &statbuf);
+ }
+ index++;
+ }
+ linebuf[0] = 0;
+ }
+ HDfclose(config);
+ return index;
+}
+
+static int
+count_dirpaths(int argc, int startcnt, const char *argv[], int **index_out)
+{
+ int k;
+ int path_cnt = 0;
+ int idx_count = (argc - startcnt);
+ int * index = NULL;
+ struct stat pathcheck;
+
+ if (idx_count > 0) {
+ index = (int *)malloc((size_t)(argc - startcnt) * sizeof(int));
+ assert(index);
+ }
+ else
+ return 0;
+
+ for (k = startcnt; k < argc; k++) {
+ char *slash = NULL;
+ int c = *argv[k];
+ if ((c == '.') || (c == '/')) {
+ index[path_cnt++] = k;
+ }
+ else if ((c == '@')) {
+ const char *configFile = argv[k] + 1;
+ if (stat(configFile, &pathcheck) == 0) {
+ if (S_ISREG(pathcheck.st_mode)) {
+ config_index[use_config_file++] = k;
+ }
+ }
+ }
+ else if ((slash = strchr(argv[k], '/')) != NULL) {
+ if (stat(argv[k], &pathcheck) == 0) {
+ if (S_ISDIR(pathcheck.st_mode))
+ index[path_cnt++] = k;
+ }
+ }
+ }
+ if ((path_cnt == 0) && (index != NULL)) {
+ free(index);
+ return 0;
+ }
+ *index_out = index;
+ return path_cnt;
+}
+
+static char **
+copy_args(int argc, const char *argv[], int *mfu_argc, int *copy_len)
+{
+ int i, bytes_copied = 0;
+ int check_mfu_args = 1;
+ char **argv_copy = (char **)MFU_MALLOC((size_t)(argc + 2) * sizeof(char **));
+ assert(argv_copy);
+ assert(mfu_argc);
+ assert(copy_len);
+ save_command(argv[0]);
+
+ for (i = 0; i < argc; i++) {
+ argv_copy[i] = HDstrdup(argv[i]);
+ bytes_copied += (int)(strlen(argv[i]) + 1);
+ argv_copy[i] = HDstrdup(argv[i]);
+ if (check_mfu_args && (HDstrncmp(argv[i], "-T", 2) == 0)) {
+ check_mfu_args = 0;
+ *mfu_argc = i + 1;
+ }
+ }
+ argv_copy[i] = 0;
+ *copy_len = bytes_copied;
+ return argv_copy;
+}
+
+typedef struct hash_entry {
+ int hash;
+ char * name;
+ struct hash_entry *next; /* table Collision */
+ int nextCount;
+} hash_entry_t;
+
+#ifndef NAME_ENTRIES
+#define NAME_ENTRIES 4096
+#endif
+
+static hash_entry_t filename_cache[NAME_ENTRIES];
+
+static int
+get_copy_count(char *fname, char *appname)
+{
+ int filehash = 0, apphash = 0;
+ size_t k, applen = strlen(appname);
+ size_t filelen = strlen(fname);
+ int hash_index;
+
+ for (k = 0; k < filelen; k++) {
+ filehash += fname[k];
+ }
+ for (k = 0; k < applen; k++) {
+ apphash += appname[k];
+ }
+ hash_index = filehash % NAME_ENTRIES;
+ if (filename_cache[hash_index].name == NULL) {
+ filename_cache[hash_index].hash = apphash;
+ filename_cache[hash_index].name = HDstrdup(fname);
+ filename_cache[hash_index].next = NULL;
+ filename_cache[hash_index].nextCount = 1;
+ return 0;
+ }
+ else if ((apphash == filename_cache[hash_index].hash) &&
+ (strcmp(filename_cache[hash_index].name, fname) == 0)) {
+ int retval = filename_cache[hash_index].nextCount++;
+ return retval;
+ }
+ else { /* Collision */
+ hash_entry_t *nextEntry = &filename_cache[hash_index];
+ hash_entry_t *lastEntry = nextEntry;
+ while (nextEntry) {
+ if ((apphash == nextEntry->hash) && (strcmp(nextEntry->name, fname) == 0)) {
+ /* Match (increment nextCount and return) */
+ int retval = nextEntry->nextCount++;
+ return retval;
+ }
+ else {
+ /* No Match (continue search) */
+ lastEntry = nextEntry;
+ nextEntry = lastEntry->next;
+ }
+ }
+ nextEntry = (hash_entry_t *)malloc(sizeof(hash_entry_t));
+ if (nextEntry) {
+ lastEntry->next = nextEntry;
+ nextEntry->name = HDstrdup(fname);
+ nextEntry->hash = apphash;
+ nextEntry->next = NULL;
+ nextEntry->nextCount = 1;
+ }
+ }
+ return 0;
+}
+
+static void
+run_command(int argc __attribute__((unused)), char **argv, char *cmdline, const char *fname)
+{
+ char filepath[1024];
+ char *toolname = argv[0];
+ char *buf = NULL;
+ int use_stdout = 0;
+
+#ifdef H5_HAVE_WINDOWS
+ HDprintf("ERROR: %s %s: Unable to support fork/exec on WINDOWS\n", PROGRAMNAME, __func__);
+ h5dwalk_exit(EXIT_FAILURE);
+#else
+
+ /* create a copy of the 1st file passed to the application */
+ HDstrcpy(filepath, fname);
+
+ if (log_output_in_single_file || use_stdout) {
+ pid_t pid;
+ int pipefd[2];
+ buf_t * thisbuft = NULL;
+ buf_t **bufs = buf_cache;
+
+ if (bufs == NULL) {
+ bufs = (buf_t **)MFU_CALLOC(buft_max, sizeof(buf_t *));
+ assert((bufs != NULL));
+ buf_cache = bufs;
+#ifdef VERBOSE
+ if (buft_count == 0) {
+ HDprintf("[%d] Initial buf_cache allocation: buft_count=%d\n", sg_mpi_rank, buft_count);
+ }
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert((thisbuft != NULL));
+ }
+ else {
+ thisbuft = bufs[buft_count - 1];
+ assert((thisbuft != NULL));
+ /* Check for remaining space in the current buffer */
+ /* If none, then create a new buffer */
+ if (thisbuft->count == 0) {
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ }
+ }
+ if ((thisbuft->buf == NULL)) {
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ assert((thisbuft->buf != NULL));
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->count = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ }
+ if (pipe(pipefd) == -1) {
+ perror("pipe");
+ exit(EXIT_FAILURE);
+ }
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+ if (pid == 0) {
+ close(pipefd[0]);
+ dup2(pipefd[1], fileno(stdout));
+ dup2(pipefd[1], fileno(stderr));
+ execvp(argv[0], argv);
+ }
+ else {
+ int w_status;
+ size_t nbytes;
+ size_t read_bytes = 0;
+ uint64_t remaining, offset;
+ close(pipefd[1]);
+ buf = thisbuft->buf;
+ remaining = thisbuft->count;
+ offset = thisbuft->chars;
+ nbytes = strlen(cmdline);
+ /* Record the command line for the log! */
+ if (nbytes < remaining) {
+ HDstrcpy(&buf[offset], cmdline);
+ thisbuft->chars += nbytes;
+ thisbuft->count -= nbytes;
+ remaining -= nbytes;
+ }
+ else { /* We're running out of space in the current buffer */
+ char *nextpart;
+ strncpy(&buf[offset], cmdline, remaining);
+ nextpart = &cmdline[remaining + 1];
+ thisbuft->count = 0;
+ thisbuft->chars += remaining;
+
+ /* Create a new read buffer */
+#ifdef VERBOSE
+ HDprintf("[%d] Allocate-1 a new read buffer:: buft_count=%d\n", sg_mpi_rank, buft_count);
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert(thisbuft != NULL);
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ /* Copy the remaining cmdline text into the new buffer */
+ HDstrcpy(buf, nextpart);
+ /* And update our buffer info */
+ // thisbuft->chars = strlen(nextpart) +1;
+ thisbuft->chars = strlen(nextpart);
+ thisbuft->count = BUFT_SIZE - thisbuft->chars;
+ }
+ offset = thisbuft->chars;
+
+ do {
+ waitpid(pid, &w_status, WNOHANG);
+ if ((nbytes = (size_t)read(pipefd[0], &buf[offset], remaining)) > 0) {
+ offset += nbytes;
+ read_bytes += nbytes;
+ remaining -= nbytes;
+ if (remaining == 0) {
+ /* Update the current buffer prior to allocating the new one */
+ thisbuft->count = 0;
+ thisbuft->chars += read_bytes;
+#ifdef VERBOSE
+ HDprintf("[%d] Allocate-2 a new read buffer:: buft_count=%d\n", sg_mpi_rank,
+ buft_count);
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert(thisbuft != NULL);
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ thisbuft->chars = BUFT_SIZE;
+ offset = 0;
+ remaining = BUFT_SIZE;
+ }
+ }
+ } while (!WIFEXITED(w_status));
+ close(pipefd[0]);
+ wait(NULL);
+
+ thisbuft->count = remaining;
+ thisbuft->chars = thisbuft->bufsize - remaining;
+ }
+ }
+ else if (log_stdout_in_file) {
+ int log_instance = -1;
+ pid_t pid;
+ size_t log_len;
+ char logpath[2048];
+ char logErrors[2048];
+ char current_dir[2048];
+ char * logbase = HDstrdup(basename(filepath));
+ char * thisapp = HDstrdup(basename(toolname));
+
+ if (processing_inputfile == 0)
+ log_instance = get_copy_count(logbase, thisapp);
+
+ if (txtlog == NULL) {
+ if ((log_instance > 0) || processing_inputfile) {
+ if (processing_inputfile)
+ log_instance = current_input_index;
+ HDsprintf(logpath, "%s/%s_%s.log_%d", HDgetcwd(current_dir, sizeof(current_dir)), logbase,
+ thisapp, log_instance);
+ }
+ else {
+ HDsprintf(logpath, "%s/%s_%s.log", HDgetcwd(current_dir, sizeof(current_dir)), logbase,
+ thisapp);
+ }
+ }
+ else {
+ log_len = strlen(txtlog);
+ if ((log_instance > 0) || processing_inputfile) {
+ if (processing_inputfile)
+ log_instance = current_input_index;
+ if (txtlog[log_len - 1] == '/')
+ HDsprintf(logpath, "%s%s_%s.log_%d", txtlog, logbase, thisapp, log_instance);
+ else
+ HDsprintf(logpath, "%s/%s_%s.log_%d", txtlog, logbase, thisapp, log_instance);
+ }
+ else {
+ if (txtlog[log_len - 1] == '/')
+ HDsprintf(logpath, "%s%s_%s.log", txtlog, logbase, thisapp);
+ else
+ HDsprintf(logpath, "%s/%s_%s.log", txtlog, logbase, thisapp);
+ }
+ }
+
+ if (log_errors_in_file) {
+ /* We co-locate the error logs in the same directories as the regular log files.
+ * The easiest way to do this is to simply replace the .log with .err in a
+ * copy of the logpath variable.
+ */
+ log_len = strlen(logpath);
+ HDstrcpy(logErrors, logpath);
+ HDstrcpy(&logErrors[log_len - 3], "err");
+ }
+ if (mfu_debug_level == MFU_LOG_VERBOSE) {
+ HDprintf("\tCreating logfile: %s\n", logpath);
+ fflush(stdout);
+ }
+ pid = fork();
+ if (pid == 0) {
+ int efd;
+ int fd = open(logpath, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ dup2(fd, fileno(stdout));
+ if (log_errors_in_file) {
+ efd = open(logErrors, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ dup2(efd, fileno(stderr));
+ close(efd);
+ }
+ else
+ dup2(fd, fileno(stderr));
+ close(fd);
+ execvp(argv[0], argv);
+ }
+ int status;
+ pid = wait(&status);
+ if (logbase)
+ free(logbase);
+ if (thisapp)
+ free(thisapp);
+ } /* else if(log_stdout_in_file) */
+#endif /* #ifdef H5_HAVE_WINDOWS */
+}
+
+int MFU_PRED_EXEC(mfu_flist flist, uint64_t idx, void *arg);
+int MFU_PRED_PRINT(mfu_flist flist, uint64_t idx, void *arg);
+
+int
+MFU_PRED_EXEC(mfu_flist flist, uint64_t idx, void *arg)
+{
+ /* get file name for this item */
+ int file_substituted = 0;
+ const char *fname = mfu_flist_file_get_name(flist, idx);
+
+ char *toolname = NULL;
+ char filepath[1024];
+
+ size_t b_offset;
+
+ /* get pointer to encoded argc count and argv array */
+ int * count_ptr = arg;
+ char *buf = (char *)arg + sizeof(int);
+
+ /* get number of argv parameters */
+ int k = 0, count = *count_ptr;
+ toolname = buf;
+
+ /* Get a copy of fname */
+ HDstrcpy(filepath, fname);
+
+ /* allocate a char* for each item in the argv array,
+ * plus one more for a trailing NULL
+ * 'count' in this case is the number of args, so
+ * so we add (+1) for the toolname and another (+1)
+ * for the trailing NULL to terminate the list
+ */
+
+ char cmdline[2048];
+ char **argv = (char **)MFU_CALLOC((size_t)(count + 2), sizeof(char *));
+
+ argv[k++] = HDstrdup(toolname);
+
+ HDmemset(cmdline, 0, sizeof(cmdline));
+ buf += HDstrlen(toolname) + 1;
+ /* Reconstruct the command line that the user provided for the h5tool */
+ for (k = 1; k < count; k++) {
+ if (buf[0] == '&') {
+ const char *fname_arg = NULL;
+ mfu_flist flist_arg;
+ void * check_ptr[2] = {NULL, NULL};
+
+ HDmemcpy(check_ptr, &buf[1], sizeof(void *));
+ flist_arg = (mfu_flist)check_ptr[0];
+
+ /* +2 (see below) accounts for the '&' and the trailing zero pad */
+ buf += sizeof(mfu_flist *) + 2;
+ fname_arg = mfu_flist_file_get_name(flist_arg, idx);
+ if (fname_arg == NULL) {
+ HDprintf("[%d] Warning: Unable to resolve file_substitution %d (idx=%ld)\n", sg_mpi_rank,
+ file_substituted, idx);
+ argv[k] = HDstrdup(fname);
+ }
+ else {
+ argv[k] = HDstrdup(fname_arg);
+ file_substituted++;
+ }
+ }
+ else {
+ argv[k] = HDstrdup(buf);
+ buf += HDstrlen(argv[k]) + 1;
+ }
+ }
+
+ HDsprintf(cmdline, "\n---------\nCommand:");
+ b_offset = strlen(cmdline);
+ for (k = 0; k < count; k++) {
+ HDsprintf(&cmdline[b_offset], " %s", argv[k]);
+ b_offset = strlen(cmdline);
+ }
+ HDsprintf(&cmdline[b_offset], "\n");
+ run_command(count, argv, cmdline, fname);
+
+ mfu_free(argv);
+
+ return 0;
+}
+
+int
+MFU_PRED_PRINT(mfu_flist flist, uint64_t idx, void *arg __attribute__((unused)))
+{
+ const char *name = mfu_flist_file_get_name(flist, idx);
+ HDprintf("%s\n", name);
+ return 1;
+}
+
+static void
+pred_commit(mfu_pred *p)
+{
+ mfu_pred *cur = p;
+ while (cur) {
+ if (cur->f == MFU_PRED_PRINT || cur->f == MFU_PRED_EXEC) {
+ break;
+ }
+ cur = cur->next;
+ }
+}
+
+static void
+add_executable(int argc, char **argv, char *cmdstring, int *f_index, int f_count __attribute__((unused)))
+{
+ char cmdline[2048];
+ HDsprintf(cmdline, "\n---------\nCommand: %s\n", cmdstring);
+ argv[argc] = NULL;
+ run_command(argc, argv, cmdline, argv[f_index[0]]);
+ return;
+}
+
+static int
+process_input_file(char *inputname, int myrank, int size)
+{
+ int index = 0;
+ char linebuf[PATH_MAX] = {
+ '\0',
+ };
+ FILE * config = HDfopen(inputname, "r");
+ mfu_flist flist1 = NULL;
+
+ if (config == NULL)
+ return -1;
+
+ flist1 = mfu_flist_new();
+
+ /* Flag the fact that we're processing an inputfile (script)
+ * so that we can generate a meaningful logfile name...
+ */
+ processing_inputfile = 1;
+
+ while (HDfgets(linebuf, sizeof(linebuf), config) != NULL) {
+ const char *delim = " \n";
+ char * cmdline = NULL;
+ char * cmd = NULL;
+ char * arg = NULL;
+ char * argv[256];
+ int fileindex[256];
+ int filecount = 0;
+ int token = 0;
+ struct stat statbuf;
+
+ char *eol = strchr(linebuf, '\n');
+ if (eol) {
+ *eol = '\0';
+ }
+ cmdline = HDstrdup(linebuf);
+ cmd = HDstrtok(linebuf, delim);
+ if (cmd) {
+ arg = cmd;
+ while (arg != NULL) {
+ char c = arg[0];
+ if (token > 0) {
+ if ((c == '.') || (c == '/')) {
+ /* 'arg' looks to be a filepath */
+ if (stat(arg, &statbuf) == 0) {
+ mfu_flist_insert_stat(flist1, arg, O_RDONLY, &statbuf);
+ }
+ fileindex[filecount++] = token;
+ }
+ }
+ argv[token++] = arg;
+ arg = strtok(NULL, delim);
+ }
+
+ if (myrank == (index % size)) {
+ current_input_index = index;
+ add_executable(token, argv, cmdline, fileindex, filecount);
+ }
+ index++;
+ }
+ linebuf[0] = 0;
+ HDfree(cmdline);
+ }
+
+ if (output_log_file) {
+ dh5tool_flist_write_text(output_log_file, flist1);
+ }
+ HDfclose(config);
+
+ mfu_flist_free(&flist1);
+ return 0;
+}
+
+int
+main(int argc, const char *argv[])
+{
+ int i;
+ int rc = 0;
+
+ char *env_var = NULL;
+
+ /* initialize MPI */
+ MPI_Init(&argc, (char ***)&argv);
+ mfu_init();
+
+ /* Initialize h5tools lib */
+ h5tools_init();
+
+ h5tools_setprogname(PROGRAMNAME);
+ h5tools_setstatus(EXIT_SUCCESS);
+
+ /* get our rank and the size of comm_world */
+ int rank, ranks;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &ranks);
+
+ /* Assign the static global mpi_rank (for debugging) */
+ sg_mpi_rank = rank;
+
+#if 0
+ env_var = HDgetenv("HDF5_H5DWALK_PRINT_CMDLINE");
+ if (env_var) {
+ int enable = HDatoi(env_var);
+ if (enable) {
+
+ }
+ }
+#endif
+ /* pointer to mfu_walk_opts */
+ mfu_walk_opts_t *walk_opts = mfu_walk_opts_new();
+
+#ifdef DAOS_SUPPORT
+ /* DAOS vars */
+ daos_args_t *daos_args = daos_args_new();
+#endif
+
+ int args_byte_length = -1;
+ int mfu_argc = argc;
+ char * args_buf = NULL;
+ char **h5tool_argv = copy_args(argc, argv, &mfu_argc, &args_byte_length);
+
+ char *inputname = NULL;
+ char *outputname = NULL;
+ char *sortfields = NULL;
+ char *distribution = NULL;
+
+ int text = 0;
+ int h5tool_argc = 0;
+
+ mfu_debug_level = MFU_LOG_WARN;
+ h5tool_argv[argc] = 0;
+
+ /* The struct option declaration can found in bits/getopt_ext.h
+ * I've reproduced it here:
+ * struct option { char * name; int has_arg; int *flag; int val};
+ */
+ int opt;
+ int tool_selected = 0;
+ int tool_args_start = -1;
+ int last_mfu_arg = 0;
+
+ mfu_pred *pred_head = NULL;
+
+ while (!tool_selected) {
+ opt = H5_get_option(argc, argv, s_opts, l_opts);
+ switch ((char)opt) {
+ default:
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ break;
+ case 'i':
+ inputname = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ if (inputname)
+ tool_selected = 1;
+ break;
+ case 'o':
+ outputname = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ if (outputname) {
+ log_output_in_single_file = 1;
+ output_log_file = HDstrdup(H5_optarg);
+ text = 1; /* Format TXT, not HDF5 */
+ }
+ break;
+ case 'E':
+ log_errors_in_file = 1;
+ errlog = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ break;
+ case 'l':
+ log_stdout_in_file = 1;
+ if (H5_optarg)
+ txtlog = HDstrdup(H5_optarg);
+ break;
+ case 'T':
+ /* We need to stop parsing user options at this point.
+ * all remaining arguments should be utilized as the
+ * arguments to the selected HDF5 tools.
+ * We also want to avoid any misinterpretations if
+ * HDF5 tool options conflict with the MFU options.
+ */
+ tool_selected = 1;
+ tool_args_start = H5_optind;
+ h5tool_argc = argc - mfu_argc;
+ last_mfu_arg = H5_optind;
+ /* Don't allow any further parsing of arguments */
+ break;
+ case 'h':
+ usage();
+ h5dwalk_exit(EXIT_SUCCESS);
+ break;
+ case '?':
+ usage();
+ h5dwalk_exit(EXIT_SUCCESS);
+ break;
+ }
+ }
+
+ if (inputname != NULL) {
+ if (tool_selected && (rank == 0)) {
+ if ((log_output_in_single_file == 0) && (log_stdout_in_file == 0))
+ puts("WARNING: When utilizing --input, the only other supported "
+ "runtime argument is --output or -l");
+ }
+ rc = process_input_file(inputname, rank, ranks);
+ mfu_finalize();
+ h5dwalk_exit(rc);
+ }
+
+ /**************************************************************/
+ /* We might consider doing a tool specific argument checking */
+ /* to prevent runtime errors. We would also like to allow */
+ /* the same command line interface for parallel invocations */
+ /* so that users don't get confused. Effectively, we should */
+ /* strip out all MFU related arguments and retain copies of */
+ /* everything else to pass into a serial instance of the tool */
+ /* */
+ /* As we move forward, we might allow the HDF5 tool to be */
+ /* queried for an acceptable set set of runtime arguments. */
+ /* This could be just a simple string to allow getopt_long */
+ /* to be invoked on the remaing command line arguments. */
+ /**************************************************************/
+
+ int *path_indices = NULL;
+ int numpaths = count_dirpaths(argc, tool_args_start, argv, &path_indices);
+
+ const char **argpaths = NULL;
+
+ /* store src and dest path strings */
+ const char *path1 = NULL;
+ const char *path2 = NULL;
+ size_t pathlen_total = 0;
+
+ if (numpaths && path_indices) {
+ argpaths = &argv[path_indices[0]];
+ }
+ /* pointer to mfu_file src and dest objects */
+ /* The dst object will only be used for tools which
+ * accept 2 (or more?) file arguments */
+ mfu_file_t *mfu_src_file = NULL;
+ mfu_file_t *mfu_dst_file = NULL;
+
+ /* first item is source and second is dest */
+ mfu_param_path *srcpath = NULL;
+ mfu_param_path *destpath = NULL;
+ mfu_param_path *paths = NULL;
+
+ mfu_flist flist1 = NULL;
+ mfu_flist flist2 = NULL;
+
+ /* allocate structure to define walk options */
+ if (use_config_file > 0) {
+ int count1 = 0, count2 = 0;
+ for (i = 0; i < use_config_file; i++) {
+ int index = config_index[i];
+ const char *config_file = argv[index];
+ if (i == 0) {
+ flist1 = mfu_flist_new();
+ count1 = fill_file_list(flist1, config_file + 1, rank, ranks);
+ }
+ else if (i == 1) {
+ flist2 = mfu_flist_new();
+ count2 = fill_file_list(flist2, config_file + 1, rank, ranks);
+ }
+ }
+ if (count1 != count2) {
+ HDprintf("config files have different file counts: (1) %d and (2) %d\n", count1, count2);
+ }
+ }
+ else if (numpaths > 0) {
+
+ /* allocate space for each path */
+ paths = (mfu_param_path *)MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));
+ mfu_src_file = mfu_file_new();
+
+ /* process each path */
+ mfu_param_path_set_all((uint64_t)numpaths, (const char **)argpaths, paths, mfu_src_file, true);
+
+ /* don't allow user to specify input file with walk */
+ if (inputname != NULL) {
+ if (paths) {
+ mfu_free(&paths);
+ }
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ }
+ }
+ else {
+ /* if we're not walking, we must be reading,
+ * and for that we need a file */
+ if (inputname == NULL) {
+ if (rank == 0) {
+ MFU_LOG(MFU_LOG_ERR, "Either a <path> or --input is required.");
+ }
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ }
+ }
+
+ if (numpaths > 0) {
+ flist1 = mfu_flist_new();
+ srcpath = &paths[0];
+ path1 = srcpath->path;
+ pathlen_total += strlen(path1);
+ mfu_flist_walk_param_paths(1, srcpath, walk_opts, flist1, mfu_src_file);
+ }
+ if (numpaths > 1) {
+ flist2 = mfu_flist_new();
+ mfu_dst_file = mfu_file_new();
+ destpath = &paths[1];
+ path2 = destpath->path;
+ pathlen_total += HDstrlen(path2);
+ mfu_flist_walk_param_paths(1, destpath, walk_opts, flist2, mfu_dst_file);
+ }
+
+ if (tool_selected && (args_byte_length > 0)) {
+ pred_head = mfu_pred_new();
+ args_buf = (char *)HDmalloc((size_t)(args_byte_length + pathlen_total));
+ }
+
+ /* filter files to only include hdf5 files */
+ if (flist1) {
+ filter_hdf_files(&flist1, NULL, 0, 0);
+ }
+ if (flist2) {
+ filter_hdf_files(&flist2, NULL, 0, 0);
+ }
+
+ /* if (numpaths > 1)
+ * In a case where we requeire the list indices of files from multiple
+ * directories to match, we must utilize a mapping function.
+ * The question to answer is how does the mapping function work?
+ * The most probable is a sort function, e.g.
+ * 1) an alphabet sort?
+ * 2) sort by file size?
+ * 3) something else?
+ */
+ if (args_buf != NULL) {
+ int k = 0;
+ char *ptr = args_buf + sizeof(int);
+ *(int *)args_buf = h5tool_argc;
+ for (i = tool_args_start - 1; i < argc; i++) {
+ int copy_flist = -1;
+ if (i == config_index[k]) {
+ copy_flist = k;
+ }
+ else if (path_indices && (i == path_indices[k])) {
+ copy_flist = k;
+ }
+
+ /* Maybe copy one of the flist pointers */
+ if (copy_flist >= 0) {
+ /* The '&' indicates that what follows is a pointer */
+ *ptr++ = '&';
+ /* Select which argument list should be used */
+ if (k == 0) {
+ HDmemcpy(ptr, &flist1, sizeof(void *));
+ }
+ if (k == 1) {
+ HDmemcpy(ptr, &flist2, sizeof(void *));
+ }
+ ptr += sizeof(mfu_flist *);
+ k++;
+ }
+ else {
+ HDstrcpy(ptr, argv[i]);
+ ptr += HDstrlen(argv[i]);
+ }
+ *ptr++ = 0;
+ }
+ *ptr++ = 0;
+
+ mfu_pred_add(pred_head, MFU_PRED_EXEC, (void *)args_buf);
+ pred_commit(pred_head);
+ }
+
+ /* apply predicates to each item in list */
+ mfu_flist flist3 = mfu_flist_filter_pred(flist1, pred_head);
+
+ /* print summary statistics of flist */
+ mfu_flist_print_summary(flist1);
+
+ /* write data to cache file */
+ if (outputname != NULL) {
+ if (!text) {
+ if (rank == 0) {
+ puts("ouput capture needs to be a text formated file");
+ }
+ }
+ else {
+ dh5tool_flist_write_text(outputname, flist1);
+ }
+ }
+
+#ifdef DAOS_SUPPORT
+ daos_cleanup(daos_args, mfu_file, NULL);
+#endif
+
+ /* free users, groups, and files objects */
+ mfu_flist_free(&flist1);
+ if (flist2)
+ mfu_flist_free(&flist2);
+ if (flist3)
+ mfu_flist_free(&flist3);
+
+ /* free memory allocated for options */
+ mfu_free(&distribution);
+ mfu_free(&sortfields);
+ mfu_free(&outputname);
+ mfu_free(&inputname);
+
+ /* free the path parameters */
+ mfu_param_path_free_all((uint64_t)numpaths, paths);
+
+ /* free memory allocated to hold params */
+ mfu_free(&paths);
+
+ /* free the walk options */
+ mfu_walk_opts_delete(&walk_opts);
+
+ /* delete file object */
+ mfu_file_delete(&mfu_src_file);
+
+ h5tools_close();
+ /* shut down MPI */
+ mfu_finalize();
+ MPI_Finalize();
+
+ return rc;
+}
+
+/*-------------------------------------------------------------------------
+ * Function: h5dwalk_exit
+ *
+ * Purpose: close the tools library and exit
+ *
+ * Return: none
+ *
+ * Programmer: Albert Cheng
+ * Date: Feb 6, 2005
+ *
+ * Comments:
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+H5_ATTR_NORETURN void
+h5dwalk_exit(int status)
+{
+ int require_finalize = 0;
+ h5tools_close();
+ mfu_finalize();
+
+ /* Check to see whether we need to call MPI_Finalize */
+ MPI_Initialized(&require_finalize);
+ if (require_finalize)
+ MPI_Finalize();
+
+ HDexit(status);
+}