summaryrefslogtreecommitdiffstats
path: root/utils/tools/h5dwalk/h5dwalk.c
diff options
context:
space:
mode:
Diffstat (limited to 'utils/tools/h5dwalk/h5dwalk.c')
-rw-r--r--utils/tools/h5dwalk/h5dwalk.c1714
1 files changed, 1714 insertions, 0 deletions
diff --git a/utils/tools/h5dwalk/h5dwalk.c b/utils/tools/h5dwalk/h5dwalk.c
new file mode 100644
index 0000000..5a22d75
--- /dev/null
+++ b/utils/tools/h5dwalk/h5dwalk.c
@@ -0,0 +1,1714 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the COPYING file, which can be found at the root of the source code *
+ * distribution tree, or in https://www.hdfgroup.org/licenses. *
+ * If you do not have access to either file, you may request a copy from *
+ * help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+#include "H5private.h"
+#include "h5tools.h"
+#include "h5tools_utils.h"
+#include "hdf5.h"
+
+#include "libcircle.h"
+#include "dtcmp.h"
+#include "mfu.h"
+#include "mfu_flist.h"
+#include "mfu_errors.h"
+#include "mfu_flist_internal.h"
+
+/* Name of tool */
+#define PROGRAMNAME "h5dwalk"
+
+#ifdef DAOS_SUPPORT
+#include "mfu_daos.h"
+#endif
+
+static char *user_cmd = NULL;
+static char mpierrstr[MPI_MAX_ERROR_STRING];
+static int mpierrlen;
+static int sg_mpi_rank = 0;
+static int current_input_index = 0;
+static int processing_inputfile = 0;
+
+static void dh5tool_flist_write_text(const char *name, mfu_flist bflist);
+static void run_command(int argc, char **argv, char *cmdline, const char *fname);
+static void add_executable(int argc, char **argv, char *cmdstring, int *f_index, int f_count);
+static int process_input_file(char *inputname, int myrank, int size);
+static void usage(void);
+
+H5_ATTR_NORETURN void h5dwalk_exit(int status);
+
+/* keep stats during walk */
+uint64_t total_dirs = 0;
+uint64_t total_files = 0;
+uint64_t total_links = 0;
+uint64_t total_unknown = 0;
+uint64_t total_bytes = 0;
+/* global flags which indicate whether we need
+ * to capture tool outputs into a file...
+ * Related to this is whether the stderr should
+ * be logged separately.
+ */
+#define BUFT_SIZE 131072
+/* FIXME: 'buft_max' should probably be configurable.. */
+size_t buft_max = 64;
+size_t buft_count = 0;
+buf_t **buf_cache = NULL;
+
+int log_output_in_single_file = 0;
+char *output_log_file = NULL;
+
+int log_stdout_in_file = 0;
+char *txtlog = NULL;
+
+int log_errors_in_file = 0;
+char *errlog = NULL;
+
+int use_config_file = 0;
+int config_index[4] = {
+ 0,
+};
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#define MAX_DISTRIBUTE_SEPARATORS 128
+struct distribute_option {
+ int separator_number;
+ uint64_t separators[MAX_DISTRIBUTE_SEPARATORS];
+};
+
+static const char * s_opts = "hl*E*i:o:T:";
+static struct h5_long_options l_opts[] = {{"help", no_arg, 'h'},
+ {"log_text", optional_arg, 'l'},
+ {"error", optional_arg, 'E'},
+ {"input", require_arg, 'i'},
+ {"output", require_arg, 'o'},
+ {"tool", require_arg, 'T'},
+ {NULL, 0, '\0'}};
+static void
+save_command(const char *argv0)
+{
+ assert(argv0);
+ user_cmd = HDstrdup(argv0);
+}
+
+static void
+create_default_separators(struct distribute_option *option, mfu_flist *flist, uint64_t *size,
+ size_t *separators, uint64_t *global_max_file_size)
+{
+ /* get local max file size for Allreduce */
+ uint64_t local_max_file_size = 0;
+ for (uint64_t i = 0; i < *size; i++) {
+ uint64_t file_size = mfu_flist_file_get_size(*flist, i);
+ if (file_size > local_max_file_size) {
+ local_max_file_size = file_size;
+ }
+ }
+
+ /* get the max file size across all ranks */
+ MPI_Allreduce(&local_max_file_size, global_max_file_size, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
+
+ /* print and convert max file size to appropriate units */
+ double max_size_tmp;
+ const char *max_size_units;
+ mfu_format_bytes(*global_max_file_size, &max_size_tmp, &max_size_units);
+ HDprintf("Max File Size: %.3lf %s\n", max_size_tmp, max_size_units);
+
+ /* round next_pow_2 to next multiple of 10 */
+ uint64_t max_magnitude_bin = (uint64_t)((ceil(log2((double)(*global_max_file_size)) / 10)) * 10);
+
+ /* get bin ranges based on max file size */
+ option->separators[0] = 1;
+
+ /* plus one is for zero count bin */
+ *separators = (size_t)(max_magnitude_bin / 10);
+ uint64_t power = 10;
+ for (int i = 1; power <= max_magnitude_bin; i++) {
+ double raised_2 = pow(2, (double)(power));
+ option->separators[i] = (uint64_t)raised_2;
+ power += 10;
+ }
+}
+
+static int
+h5dwalk_map_fn(mfu_flist flist __attribute__((unused)), uint64_t idx, int ranks,
+ void *args __attribute__((unused)))
+{
+ int rank = (int)((int)idx % ranks);
+ return rank;
+}
+
+static int
+print_flist_distribution(int file_histogram, struct distribute_option *option, mfu_flist *pflist, int rank)
+{
+ /* file list to use */
+ mfu_flist flist = *pflist;
+
+ /* get local size for each rank, and max file sizes */
+ uint64_t size = mfu_flist_size(flist);
+ uint64_t global_max_file_size;
+
+ size_t separators = 0;
+ if (file_histogram) {
+ /* create default separators */
+ create_default_separators(option, &flist, &size, &separators, &global_max_file_size);
+ }
+ else {
+ separators = (size_t)option->separator_number;
+ }
+
+ /* allocate a count for each bin, initialize the bin counts to 0
+ * it is separator + 1 because the last bin is the last separator
+ * to the DISTRIBUTE_MAX */
+ uint64_t *dist = (uint64_t *)MFU_MALLOC((separators + 1) * sizeof(uint64_t));
+
+ /* initialize the bin counts to 0 */
+ for (size_t i = 0; i <= separators; i++) {
+ dist[i] = 0;
+ }
+
+ /* for each file, identify appropriate bin and increment its count */
+ for (size_t i = 0; i < size; i++) {
+ /* get the size of the file */
+ uint64_t file_size = mfu_flist_file_get_size(flist, i);
+
+ /* loop through the bins and find the one the file belongs to,
+ * set last bin to -1, if a bin is not found while looping through the
+ * list of file size separators, then it belongs in the last bin
+ * so (last file size - MAX bin) */
+ int64_t max_bin_flag = -1;
+ for (size_t j = 0; j < separators; j++) {
+ if (file_size <= option->separators[j]) {
+ /* found the bin set bin index & increment its count */
+ dist[j]++;
+
+ /* a file for this bin was found so can't belong to
+ * last bin (so set the flag) & exit the loop */
+ max_bin_flag = 1;
+ break;
+ }
+ }
+
+ /* if max_bin_flag is still -1 then the file belongs to the last bin */
+ if (max_bin_flag < 0) {
+ dist[separators]++;
+ }
+ }
+
+ /* get the total sum across all of the bins */
+ uint64_t *disttotal = (uint64_t *)MFU_MALLOC((separators + 1) * sizeof(uint64_t));
+ MPI_Allreduce(dist, disttotal, (int)(separators + 1), MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+
+ /* Print the file distribution */
+ if (rank == 0) {
+ /* number of files in a bin */
+ uint64_t number;
+ double size_tmp;
+ const char *size_units;
+ HDprintf("%-27s %s\n", "Range", "Number");
+ for (size_t i = 0; i <= separators; i++) {
+ HDprintf("%s", "[ ");
+ if (i == 0) {
+ HDprintf("%7.3lf %3s", 0.000, "B");
+ }
+ else {
+ mfu_format_bytes((uint64_t)option->separators[i - 1], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s", size_tmp, size_units);
+ }
+
+ printf("%s", " - ");
+
+ if (file_histogram) {
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ number = disttotal[i];
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s ) %" PRIu64 "\n", size_tmp, size_units, number);
+ }
+ else {
+ if (i == separators) {
+ number = disttotal[i];
+ HDprintf("%10s ) %" PRIu64 "\n", "MAX", number);
+ }
+ else {
+ number = disttotal[i];
+ mfu_format_bytes((uint64_t)option->separators[i], &size_tmp, &size_units);
+ HDprintf("%7.3lf %3s ) %" PRIu64 "\n", size_tmp, size_units, number);
+ }
+ }
+ }
+ }
+
+ /* free the memory used to hold bin counts */
+ mfu_free(&disttotal);
+ mfu_free(&dist);
+
+ return 0;
+}
+
+/* * Search the right position to insert the separator * If the separator exists already, return failure *
+ * Otherwise, locate the right position, and move the array forward to save the separator.
+ */
+static int
+distribute_separator_add(struct distribute_option *option, uint64_t separator)
+{
+ int low = 0;
+ int high;
+ int middle;
+ int pos;
+ int count;
+
+ count = option->separator_number;
+ option->separator_number++;
+ if (option->separator_number > MAX_DISTRIBUTE_SEPARATORS) {
+ HDprintf("Too many separators");
+ return -1;
+ }
+
+ if (count == 0) {
+ option->separators[0] = separator;
+ return 0;
+ }
+
+ high = count - 1;
+ while (low < high) {
+ middle = (high - low) / 2 + low;
+ if (option->separators[middle] == separator)
+ return -1;
+ /* In the left half */
+ else if (option->separators[middle] < separator)
+ low = middle + 1;
+ /* In the right half */
+ else
+ high = middle;
+ }
+ assert(low == high);
+ if (option->separators[low] == separator)
+ return -1;
+
+ if (option->separators[low] < separator)
+ pos = low + 1;
+ else
+ pos = low;
+
+ if (pos < count)
+ HDmemmove(&option->separators[low + 1], &option->separators[low],
+ sizeof(*option->separators) * (uint64_t)(count - pos));
+
+ option->separators[pos] = separator;
+ return 0;
+}
+
+static int
+distribution_parse(struct distribute_option *option, const char *string)
+{
+ char * ptr;
+ char * next;
+ unsigned long long separator;
+ char * str;
+ int status = 0;
+
+ if (strncmp(string, "size", strlen("size")) != 0) {
+ return -1;
+ }
+
+ option->separator_number = 0;
+ if (strlen(string) == strlen("size")) {
+ return 0;
+ }
+
+ if (string[strlen("size")] != ':') {
+ return -1;
+ }
+
+ str = HDstrdup(string);
+ /* Parse separators */
+ ptr = str + strlen("size:");
+ next = ptr;
+ while (ptr && ptr < str + strlen(string)) {
+ next = strchr(ptr, ',');
+ if (next != NULL) {
+ *next = '\0';
+ next++;
+ }
+
+ if (mfu_abtoull(ptr, &separator) != MFU_SUCCESS) {
+ HDprintf("Invalid separator \"%s\"\n", ptr);
+ status = -1;
+ goto out;
+ }
+
+ if (distribute_separator_add(option, separator)) {
+ HDprintf("Duplicated separator \"%llu\"\n", separator);
+ status = -1;
+ goto out;
+ }
+
+ ptr = next;
+ }
+
+out:
+ mfu_free(&str);
+ return status;
+}
+
+static void
+usage(void)
+{
+ if (sg_mpi_rank)
+ return;
+
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "Usage: h5dwalk [options] <path> ...\n");
+#ifdef DAOS_SUPPORT
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "DAOS paths can be specified as:\n");
+ PRINTVALSTREAM(rawoutstream, " daos://<pool>/<cont>[/<path>] | <UNS path>\n");
+#endif
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "Options:\n");
+ PRINTVALSTREAM(rawoutstream, " -i, --input <file> - read list from file\n");
+ PRINTVALSTREAM(rawoutstream, " -o, --output <file> - write output summary to the named file.\n");
+ PRINTVALSTREAM(rawoutstream,
+ " -E, --error <file> - write processed errors to file in text format\n");
+ PRINTVALSTREAM(
+ rawoutstream,
+ " -l, --log_text <dir> - write individual tool outputs to a file. Logs can be written to an "
+ "optional named directory.\n");
+ PRINTVALSTREAM(rawoutstream, " -T, --tool <executable> - name of the HDF5 tool to invoke\n");
+ PRINTVALSTREAM(rawoutstream, " -h, --help - print usage\n");
+ PRINTVALSTREAM(rawoutstream, "\n");
+ PRINTVALSTREAM(rawoutstream, "For more information see https://mpifileutils.readthedocs.io. \n");
+ PRINTVALSTREAM(rawoutstream, "\n");
+}
+
+/* given an index, return pointer to that file element,
+ * NULL if index is not in range */
+static elem_t *
+list_get_elem(flist_t *flist, uint64_t idx)
+{
+ /* return pointer to element if index is within range */
+ uint64_t max = flist->list_count;
+ if (idx < max) {
+ elem_t *elem = flist->list_index[idx];
+ return elem;
+ }
+ return NULL;
+}
+
+#ifdef VERBOSE
+/* print information about a file given the index and rank (used in print_files) */
+static void
+print_file(mfu_flist flist, uint64_t idx)
+{
+ /* store types as strings for print_file */
+ char type_str_unknown[] = "UNK";
+ char type_str_dir[] = "DIR";
+ char type_str_file[] = "REG";
+ char type_str_link[] = "LNK";
+
+ /* get filename */
+ const char *file = mfu_flist_file_get_name(flist, idx);
+
+ if (mfu_flist_have_detail(flist)) {
+ /* get mode */
+ mode_t mode = (mode_t)mfu_flist_file_get_mode(flist, idx);
+ uint64_t acc = mfu_flist_file_get_atime(flist, idx);
+ uint64_t mod = mfu_flist_file_get_mtime(flist, idx);
+ uint64_t cre = mfu_flist_file_get_ctime(flist, idx);
+ uint64_t size = mfu_flist_file_get_size(flist, idx);
+ const char *username = mfu_flist_file_get_username(flist, idx);
+ const char *groupname = mfu_flist_file_get_groupname(flist, idx);
+
+ char access_s[30];
+ char modify_s[30];
+ char create_s[30];
+ time_t access_t = (time_t)acc;
+ time_t modify_t = (time_t)mod;
+ time_t create_t = (time_t)cre;
+ size_t access_rc = strftime(access_s, sizeof(access_s) - 1, "%FT%T", localtime(&access_t));
+ size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%b %e %Y %H:%M", localtime(&modify_t));
+ size_t create_rc = strftime(create_s, sizeof(create_s) - 1, "%FT%T", localtime(&create_t));
+ if (access_rc == 0 || modify_rc == 0 || create_rc == 0) {
+ /* error */
+ access_s[0] = '\0';
+ modify_s[0] = '\0';
+ create_s[0] = '\0';
+ }
+
+ char mode_format[11];
+ mfu_format_mode(mode, mode_format);
+
+ double size_tmp;
+ const char *size_units;
+ mfu_format_bytes(size, &size_tmp, &size_units);
+
+ HDprintf("%s %s %s %7.3f %3s %s %s\n", mode_format, username, groupname, size_tmp, size_units,
+ modify_s, file);
+ }
+ else {
+ /* get type */
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ char * type_str = type_str_unknown;
+ if (type == MFU_TYPE_DIR) {
+ type_str = type_str_dir;
+ }
+ else if (type == MFU_TYPE_FILE) {
+ type_str = type_str_file;
+ }
+ else if (type == MFU_TYPE_LINK) {
+ type_str = type_str_link;
+ }
+
+ HDprintf("Type=%s File=%s\n", type_str, file);
+ }
+}
+
+/* TODO: move this somewhere or modify existing print_file */
+/* print information about a file given the index and rank (used in print_files) */
+static size_t
+print_file_text(mfu_flist flist, uint64_t idx, char *buffer, size_t bufsize)
+{
+ size_t numbytes = 0;
+
+ /* store types as strings for print_file */
+ char type_str_unknown[] = "UNK";
+ char type_str_dir[] = "DIR";
+ char type_str_file[] = "REG";
+ char type_str_link[] = "LNK";
+
+ /* get filename */
+ const char *file = mfu_flist_file_get_name(flist, idx);
+
+ if (mfu_flist_have_detail(flist)) {
+ /* get mode */
+ mode_t mode = (mode_t)mfu_flist_file_get_mode(flist, idx);
+
+ uint64_t acc = mfu_flist_file_get_atime(flist, idx);
+ uint64_t mod = mfu_flist_file_get_mtime(flist, idx);
+ uint64_t cre = mfu_flist_file_get_ctime(flist, idx);
+ uint64_t size = mfu_flist_file_get_size(flist, idx);
+ const char *username = mfu_flist_file_get_username(flist, idx);
+ const char *groupname = mfu_flist_file_get_groupname(flist, idx);
+
+ char access_s[30];
+ char modify_s[30];
+ char create_s[30];
+ time_t access_t = (time_t)acc;
+ time_t modify_t = (time_t)mod;
+ time_t create_t = (time_t)cre;
+ size_t access_rc = strftime(access_s, sizeof(access_s) - 1, "%FT%T", localtime(&access_t));
+ size_t modify_rc = strftime(modify_s, sizeof(modify_s) - 1, "%b %e %Y %H:%M", localtime(&modify_t));
+ size_t create_rc = strftime(create_s, sizeof(create_s) - 1, "%FT%T", localtime(&create_t));
+ if (access_rc == 0 || modify_rc == 0 || create_rc == 0) {
+ /* error */
+ access_s[0] = '\0';
+ modify_s[0] = '\0';
+ create_s[0] = '\0';
+ }
+
+ char mode_format[11];
+ mfu_format_mode(mode, mode_format);
+
+ double size_tmp;
+ const char *size_units;
+ mfu_format_bytes(size, &size_tmp, &size_units);
+
+ numbytes = (size_t)snHDprintf(buffer, bufsize, "%s %s %s %7.3f %3s %s %s\n", mode_format, username,
+ groupname, size_tmp, size_units, modify_s, file);
+ }
+ else {
+ /* get type */
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ char * type_str = type_str_unknown;
+ if (type == MFU_TYPE_DIR) {
+ type_str = type_str_dir;
+ }
+ else if (type == MFU_TYPE_FILE) {
+ type_str = type_str_file;
+ }
+ else if (type == MFU_TYPE_LINK) {
+ type_str = type_str_link;
+ }
+
+ numbytes = (size_t)snHDprintf(buffer, bufsize, "Type=%s File=%s\n", type_str, file);
+ }
+
+ return numbytes;
+}
+#endif
+
+static size_t
+get_local_bufsize(uint64_t *bufsize)
+{
+ size_t total = 0;
+ if (buft_count > 0) {
+ buf_t *lastbuf = buf_cache[buft_count - 1];
+ size_t remaining = lastbuf->count;
+ total = (lastbuf->bufsize * buft_count) - remaining;
+ *bufsize = (uint64_t)(lastbuf->bufsize);
+ }
+ return total;
+}
+
+static void
+dh5tool_flist_write_text(const char *name, mfu_flist bflist)
+{
+ /* convert handle to flist_t */
+ flist_t *flist = (flist_t *)bflist;
+
+ /* get our rank and size of the communicator */
+ int rank, ranks;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &ranks);
+
+ /* start timer */
+ double start_write = MPI_Wtime();
+
+ /* total list items */
+ uint64_t all_count = mfu_flist_global_size(flist);
+
+ /* report the filename we're writing to */
+ if (mfu_rank == 0) {
+ MFU_LOG(MFU_LOG_INFO, "Writing to output file: %s", name);
+ }
+
+ uint64_t idx = 0;
+ char * ptr = NULL;
+
+ /* if we block things up into 128MB chunks, how many iterations
+ * to write everything? */
+ // uint64_t maxwrite = 128 * 1024 * 1024;
+ uint64_t maxwrite = 0;
+ size_t local_total = get_local_bufsize(&maxwrite);
+ uint64_t iters = 0;
+ if (local_total > 0)
+ iters = (uint64_t)local_total / maxwrite;
+
+ if (iters * maxwrite < (uint64_t)local_total) {
+ iters++;
+ }
+
+ /* get max iterations across all procs */
+ uint64_t all_iters;
+ MPI_Allreduce(&iters, &all_iters, 1, MPI_UINT64_T, MPI_MAX, MPI_COMM_WORLD);
+
+ /* use mpi io hints to stripe across OSTs */
+ MPI_Info info;
+ MPI_Info_create(&info);
+
+ /* change number of ranks to string to pass to MPI_Info */
+ char str_buf[12];
+ HDprintf(str_buf, "%d", ranks);
+
+ /* no. of I/O devices for lustre striping is number of ranks */
+ MPI_Info_set(info, "striping_factor", str_buf);
+
+ /* open file */
+ MPI_Status status;
+ MPI_File fh;
+ const char *datarep = "native";
+ int amode = MPI_MODE_WRONLY | MPI_MODE_CREATE;
+
+ int mpirc = MPI_File_open(MPI_COMM_WORLD, (const char *)name, amode, info, &fh);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to open file for writing: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* truncate file to 0 bytes */
+ mpirc = MPI_File_set_size(fh, 0);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to truncate file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* set file view to be sequence of datatypes past header */
+ mpirc = MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, datarep, MPI_INFO_NULL);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to set view on file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* compute byte offset to write our element */
+ uint64_t offset = 0;
+ uint64_t bytes = (uint64_t)local_total;
+ MPI_Exscan(&bytes, &offset, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD);
+ MPI_Offset write_offset = (MPI_Offset)offset;
+
+ uint64_t written = 0;
+ while (all_iters > 0) {
+ /* compute number of bytes left to write */
+ uint64_t remaining = (uint64_t)local_total - written;
+
+ /* maybe Incr pointer to our next buffer */
+ if (remaining == 0) {
+ idx++;
+ if (buf_cache[idx]->buf == NULL) {
+ }
+ }
+
+ /* compute count we'll write in this iteration */
+ int write_count = (int)maxwrite;
+ if (remaining < maxwrite) {
+ write_count = (int)remaining;
+ }
+ /* Get the buffer to output to the selected file */
+ ptr = buf_cache[idx]->buf;
+
+ /* collective write of file data */
+ mpirc = MPI_File_write_at_all(fh, write_offset, ptr, write_count, MPI_BYTE, &status);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to write to file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* update our offset into the file */
+ write_offset += (MPI_Offset)write_count;
+
+ /* update number of bytes written so far */
+ written += (uint64_t)write_count;
+
+ /* update pointer into our buffer */
+ ptr += write_count;
+
+ /* decrement our collective write loop counter */
+ all_iters--;
+ }
+
+ /* free buffer */
+ // mfu_free(&buf);
+
+ /* close file */
+ mpirc = MPI_File_close(&fh);
+ if (mpirc != MPI_SUCCESS) {
+ MPI_Error_string(mpirc, mpierrstr, &mpierrlen);
+ MFU_ABORT(1, "Failed to close file: `%s' rc=%d %s", name, mpirc, mpierrstr);
+ }
+
+ /* free mpi info */
+ MPI_Info_free(&info);
+
+ /* end timer */
+ double end_write = MPI_Wtime();
+
+ /* report write count, time, and rate */
+ if (mfu_rank == 0) {
+ double secs = end_write - start_write;
+ double rate = 0.0;
+ if (secs > 0.0) {
+ rate = ((double)all_count) / secs;
+ }
+ MFU_LOG(MFU_LOG_INFO, "Wrote %lu files in %.3lf seconds (%.3lf files/sec)", all_count, secs, rate);
+ }
+
+ return;
+}
+
+static void
+filter_hdf_files(mfu_flist *pflist, char *regex_exp, int exclude, int name)
+{
+ mfu_flist flist = *pflist;
+ mfu_flist eligible = mfu_flist_subset(flist);
+ uint64_t idx = 0;
+ uint64_t files = mfu_flist_size(flist);
+ while (idx < files) {
+ mfu_filetype type = mfu_flist_file_get_type(flist, idx);
+ if (type == MFU_TYPE_FILE || type == MFU_TYPE_LINK || type == MFU_TYPE_UNKNOWN) {
+ const char *file = mfu_flist_file_get_name(flist, idx);
+ int accessible = H5Fis_accessible(file, H5P_DEFAULT);
+ if (accessible)
+ mfu_flist_file_copy(flist, idx, eligible);
+ }
+ idx++;
+ }
+
+ mfu_flist_summarize(eligible);
+
+ /* assume we'll use the full list */
+ // mfu_flist srclist = flist;
+ mfu_flist srclist = eligible;
+
+ /* filter the list if needed */
+ mfu_flist filtered_flist = MFU_FLIST_NULL;
+ if (regex_exp != NULL) {
+ /* filter the list based on regex */
+ filtered_flist = mfu_flist_filter_regex(eligible, regex_exp, exclude, name);
+
+ /* update our source list to use the filtered list instead of the original */
+ srclist = filtered_flist;
+ }
+
+ mfu_flist_free(&flist);
+ *pflist = srclist;
+ return;
+}
+
+static int
+fill_file_list(mfu_flist new_flist, const char *config_filename, int myrank, int size)
+{
+ int index = 0;
+ char linebuf[PATH_MAX] = {
+ '\0',
+ };
+ FILE *config = HDfopen(config_filename, "r");
+ if (config == NULL)
+ return -1;
+ while (HDfgets(linebuf, sizeof(linebuf), config) != NULL) {
+ struct stat statbuf;
+ char * eol = HDstrchr(linebuf, '\n');
+ if (eol)
+ *eol = '\0';
+ if (HDstat(linebuf, &statbuf) == 0) {
+ if (myrank == (index % size)) {
+ mfu_flist_insert_stat((flist_t *)new_flist, linebuf, O_RDONLY, &statbuf);
+ }
+ index++;
+ }
+ linebuf[0] = 0;
+ }
+ HDfclose(config);
+ return index;
+}
+
+static int
+count_dirpaths(int argc, int startcnt, const char *argv[], int **index_out)
+{
+ int k;
+ int path_cnt = 0;
+ int idx_count = (argc - startcnt);
+ int * index = NULL;
+ struct stat pathcheck;
+
+ if (idx_count > 0) {
+ index = (int *)malloc((size_t)(argc - startcnt) * sizeof(int));
+ assert(index);
+ }
+ else
+ return 0;
+
+ for (k = startcnt; k < argc; k++) {
+ char *slash = NULL;
+ int c = *argv[k];
+ if ((c == '.') || (c == '/')) {
+ index[path_cnt++] = k;
+ }
+ else if ((c == '@')) {
+ const char *configFile = argv[k] + 1;
+ if (stat(configFile, &pathcheck) == 0) {
+ if (S_ISREG(pathcheck.st_mode)) {
+ config_index[use_config_file++] = k;
+ }
+ }
+ }
+ else if ((slash = strchr(argv[k], '/')) != NULL) {
+ if (stat(argv[k], &pathcheck) == 0) {
+ if (S_ISDIR(pathcheck.st_mode))
+ index[path_cnt++] = k;
+ }
+ }
+ }
+ if ((path_cnt == 0) && (index != NULL)) {
+ free(index);
+ return 0;
+ }
+ *index_out = index;
+ return path_cnt;
+}
+
+static char **
+copy_args(int argc, const char *argv[], int *mfu_argc, int *copy_len)
+{
+ int i, bytes_copied = 0;
+ int check_mfu_args = 1;
+ char **argv_copy = (char **)MFU_MALLOC((size_t)(argc + 2) * sizeof(char **));
+ assert(argv_copy);
+ assert(mfu_argc);
+ assert(copy_len);
+ save_command(argv[0]);
+
+ for (i = 0; i < argc; i++) {
+ argv_copy[i] = HDstrdup(argv[i]);
+ bytes_copied += (int)(strlen(argv[i]) + 1);
+ argv_copy[i] = HDstrdup(argv[i]);
+ if (check_mfu_args && (HDstrncmp(argv[i], "-T", 2) == 0)) {
+ check_mfu_args = 0;
+ *mfu_argc = i + 1;
+ }
+ }
+ argv_copy[i] = 0;
+ *copy_len = bytes_copied;
+ return argv_copy;
+}
+
+typedef struct hash_entry {
+ int hash;
+ char * name;
+ struct hash_entry *next; /* table Collision */
+ int nextCount;
+} hash_entry_t;
+
+#ifndef NAME_ENTRIES
+#define NAME_ENTRIES 4096
+#endif
+
+static hash_entry_t filename_cache[NAME_ENTRIES];
+
+static int
+get_copy_count(char *fname, char *appname)
+{
+ int filehash = 0, apphash = 0;
+ size_t k, applen = strlen(appname);
+ size_t filelen = strlen(fname);
+ int hash_index;
+
+ for (k = 0; k < filelen; k++) {
+ filehash += fname[k];
+ }
+ for (k = 0; k < applen; k++) {
+ apphash += appname[k];
+ }
+ hash_index = filehash % NAME_ENTRIES;
+ if (filename_cache[hash_index].name == NULL) {
+ filename_cache[hash_index].hash = apphash;
+ filename_cache[hash_index].name = HDstrdup(fname);
+ filename_cache[hash_index].next = NULL;
+ filename_cache[hash_index].nextCount = 1;
+ return 0;
+ }
+ else if ((apphash == filename_cache[hash_index].hash) &&
+ (strcmp(filename_cache[hash_index].name, fname) == 0)) {
+ int retval = filename_cache[hash_index].nextCount++;
+ return retval;
+ }
+ else { /* Collision */
+ hash_entry_t *nextEntry = &filename_cache[hash_index];
+ hash_entry_t *lastEntry = nextEntry;
+ while (nextEntry) {
+ if ((apphash == nextEntry->hash) && (strcmp(nextEntry->name, fname) == 0)) {
+ /* Match (increment nextCount and return) */
+ int retval = nextEntry->nextCount++;
+ return retval;
+ }
+ else {
+ /* No Match (continue search) */
+ lastEntry = nextEntry;
+ nextEntry = lastEntry->next;
+ }
+ }
+ nextEntry = (hash_entry_t *)malloc(sizeof(hash_entry_t));
+ if (nextEntry) {
+ lastEntry->next = nextEntry;
+ nextEntry->name = HDstrdup(fname);
+ nextEntry->hash = apphash;
+ nextEntry->next = NULL;
+ nextEntry->nextCount = 1;
+ }
+ }
+ return 0;
+}
+
+static void
+run_command(int argc __attribute__((unused)), char **argv, char *cmdline, const char *fname)
+{
+ char filepath[1024];
+ char *toolname = argv[0];
+ char *buf = NULL;
+ int use_stdout = 0;
+
+#ifdef H5_HAVE_WINDOWS
+ HDprintf("ERROR: %s %s: Unable to support fork/exec on WINDOWS\n", PROGRAMNAME, __func__);
+ h5dwalk_exit(EXIT_FAILURE);
+#else
+
+ /* create a copy of the 1st file passed to the application */
+ HDstrcpy(filepath, fname);
+
+ if (log_output_in_single_file || use_stdout) {
+ pid_t pid;
+ int pipefd[2];
+ buf_t * thisbuft = NULL;
+ buf_t **bufs = buf_cache;
+
+ if (bufs == NULL) {
+ bufs = (buf_t **)MFU_CALLOC(buft_max, sizeof(buf_t *));
+ assert((bufs != NULL));
+ buf_cache = bufs;
+#ifdef VERBOSE
+ if (buft_count == 0) {
+ HDprintf("[%d] Initial buf_cache allocation: buft_count=%d\n", sg_mpi_rank, buft_count);
+ }
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert((thisbuft != NULL));
+ }
+ else {
+ thisbuft = bufs[buft_count - 1];
+ assert((thisbuft != NULL));
+ /* Check for remaining space in the current buffer */
+ /* If none, then create a new buffer */
+ if (thisbuft->count == 0) {
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ }
+ }
+ if ((thisbuft->buf == NULL)) {
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ assert((thisbuft->buf != NULL));
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->count = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ }
+ if (pipe(pipefd) == -1) {
+ perror("pipe");
+ exit(EXIT_FAILURE);
+ }
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+ if (pid == 0) {
+ close(pipefd[0]);
+ dup2(pipefd[1], fileno(stdout));
+ dup2(pipefd[1], fileno(stderr));
+ execvp(argv[0], argv);
+ }
+ else {
+ int w_status;
+ size_t nbytes;
+ size_t read_bytes = 0;
+ uint64_t remaining, offset;
+ close(pipefd[1]);
+ buf = thisbuft->buf;
+ remaining = thisbuft->count;
+ offset = thisbuft->chars;
+ nbytes = strlen(cmdline);
+ /* Record the command line for the log! */
+ if (nbytes < remaining) {
+ HDstrcpy(&buf[offset], cmdline);
+ thisbuft->chars += nbytes;
+ thisbuft->count -= nbytes;
+ remaining -= nbytes;
+ }
+ else { /* We're running out of space in the current buffer */
+ char *nextpart;
+ strncpy(&buf[offset], cmdline, remaining);
+ nextpart = &cmdline[remaining + 1];
+ thisbuft->count = 0;
+ thisbuft->chars += remaining;
+
+ /* Create a new read buffer */
+#ifdef VERBOSE
+ HDprintf("[%d] Allocate-1 a new read buffer:: buft_count=%d\n", sg_mpi_rank, buft_count);
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert(thisbuft != NULL);
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ /* Copy the remaining cmdline text into the new buffer */
+ HDstrcpy(buf, nextpart);
+ /* And update our buffer info */
+ // thisbuft->chars = strlen(nextpart) +1;
+ thisbuft->chars = strlen(nextpart);
+ thisbuft->count = BUFT_SIZE - thisbuft->chars;
+ }
+ offset = thisbuft->chars;
+
+ do {
+ waitpid(pid, &w_status, WNOHANG);
+ if ((nbytes = (size_t)read(pipefd[0], &buf[offset], remaining)) > 0) {
+ offset += nbytes;
+ read_bytes += nbytes;
+ remaining -= nbytes;
+ if (remaining == 0) {
+ /* Update the current buffer prior to allocating the new one */
+ thisbuft->count = 0;
+ thisbuft->chars += read_bytes;
+#ifdef VERBOSE
+ HDprintf("[%d] Allocate-2 a new read buffer:: buft_count=%d\n", sg_mpi_rank,
+ buft_count);
+#endif
+ bufs[buft_count++] = thisbuft = (buf_t *)MFU_CALLOC(1, sizeof(buf_t));
+ assert(thisbuft != NULL);
+ thisbuft->buf = MFU_MALLOC(BUFT_SIZE);
+ thisbuft->bufsize = BUFT_SIZE;
+ thisbuft->dt = MPI_CHAR;
+ thisbuft->chars = BUFT_SIZE;
+ offset = 0;
+ remaining = BUFT_SIZE;
+ }
+ }
+ } while (!WIFEXITED(w_status));
+ close(pipefd[0]);
+ wait(NULL);
+
+ thisbuft->count = remaining;
+ thisbuft->chars = thisbuft->bufsize - remaining;
+ }
+ }
+ else if (log_stdout_in_file) {
+ int log_instance = -1;
+ pid_t pid;
+ size_t log_len;
+ char logpath[2048];
+ char logErrors[2048];
+ char current_dir[2048];
+ char * logbase = HDstrdup(basename(filepath));
+ char * thisapp = HDstrdup(basename(toolname));
+
+ if (processing_inputfile == 0)
+ log_instance = get_copy_count(logbase, thisapp);
+
+ if (txtlog == NULL) {
+ if ((log_instance > 0) || processing_inputfile) {
+ if (processing_inputfile)
+ log_instance = current_input_index;
+ HDsnprintf(logpath, sizeof(logpath), "%s/%s_%s.log_%d",
+ HDgetcwd(current_dir, sizeof(current_dir)), logbase, thisapp, log_instance);
+ }
+ else {
+ HDsnprintf(logpath, sizeof(logpath), "%s/%s_%s.log",
+ HDgetcwd(current_dir, sizeof(current_dir)), logbase, thisapp);
+ }
+ }
+ else {
+ log_len = strlen(txtlog);
+ if ((log_instance > 0) || processing_inputfile) {
+ if (processing_inputfile)
+ log_instance = current_input_index;
+ if (txtlog[log_len - 1] == '/')
+ HDsnprintf(logpath, sizeof(logpath), "%s%s_%s.log_%d", txtlog, logbase, thisapp,
+ log_instance);
+ else
+ HDsnprintf(logpath, sizeof(logpath), "%s/%s_%s.log_%d", txtlog, logbase, thisapp,
+ log_instance);
+ }
+ else {
+ if (txtlog[log_len - 1] == '/')
+ HDsnprintf(logpath, sizeof(logpath), "%s%s_%s.log", txtlog, logbase, thisapp);
+ else
+ HDsnprintf(logpath, sizeof(logpath), "%s/%s_%s.log", txtlog, logbase, thisapp);
+ }
+ }
+
+ if (log_errors_in_file) {
+ /* We co-locate the error logs in the same directories as the regular log files.
+ * The easiest way to do this is to simply replace the .log with .err in a
+ * copy of the logpath variable.
+ */
+ log_len = strlen(logpath);
+ HDstrcpy(logErrors, logpath);
+ HDstrcpy(&logErrors[log_len - 3], "err");
+ }
+ if (mfu_debug_level == MFU_LOG_VERBOSE) {
+ HDprintf("\tCreating logfile: %s\n", logpath);
+ fflush(stdout);
+ }
+ pid = fork();
+ if (pid == 0) {
+ int efd;
+ int fd = open(logpath, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ dup2(fd, fileno(stdout));
+ if (log_errors_in_file) {
+ efd = open(logErrors, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ dup2(efd, fileno(stderr));
+ close(efd);
+ }
+ else
+ dup2(fd, fileno(stderr));
+ close(fd);
+ execvp(argv[0], argv);
+ }
+ int status;
+ pid = wait(&status);
+ if (logbase)
+ free(logbase);
+ if (thisapp)
+ free(thisapp);
+ } /* else if(log_stdout_in_file) */
+#endif /* #ifdef H5_HAVE_WINDOWS */
+}
+
+int MFU_PRED_EXEC(mfu_flist flist, uint64_t idx, void *arg);
+int MFU_PRED_PRINT(mfu_flist flist, uint64_t idx, void *arg);
+
+int
+MFU_PRED_EXEC(mfu_flist flist, uint64_t idx, void *arg)
+{
+ /* get file name for this item */
+ int file_substituted = 0;
+ const char *fname = mfu_flist_file_get_name(flist, idx);
+
+ char *toolname = NULL;
+ char filepath[1024];
+
+ size_t b_offset;
+
+ /* get pointer to encoded argc count and argv array */
+ int * count_ptr = arg;
+ char *buf = (char *)arg + sizeof(int);
+
+ /* get number of argv parameters */
+ int k = 0, count = *count_ptr;
+ toolname = buf;
+
+ /* Get a copy of fname */
+ HDstrcpy(filepath, fname);
+
+ /* allocate a char* for each item in the argv array,
+ * plus one more for a trailing NULL
+ * 'count' in this case is the number of args, so
+ * so we add (+1) for the toolname and another (+1)
+ * for the trailing NULL to terminate the list
+ */
+
+ char cmdline[2048];
+ char **argv = (char **)MFU_CALLOC((size_t)(count + 2), sizeof(char *));
+
+ argv[k++] = HDstrdup(toolname);
+
+ HDmemset(cmdline, 0, sizeof(cmdline));
+ buf += HDstrlen(toolname) + 1;
+ /* Reconstruct the command line that the user provided for the h5tool */
+ for (k = 1; k < count; k++) {
+ if (buf[0] == '&') {
+ const char *fname_arg = NULL;
+ mfu_flist flist_arg;
+ void * check_ptr[2] = {NULL, NULL};
+
+ HDmemcpy(check_ptr, &buf[1], sizeof(void *));
+ flist_arg = (mfu_flist)check_ptr[0];
+
+ /* +2 (see below) accounts for the '&' and the trailing zero pad */
+ buf += sizeof(mfu_flist *) + 2;
+ fname_arg = mfu_flist_file_get_name(flist_arg, idx);
+ if (fname_arg == NULL) {
+ HDprintf("[%d] Warning: Unable to resolve file_substitution %d (idx=%ld)\n", sg_mpi_rank,
+ file_substituted, idx);
+ argv[k] = HDstrdup(fname);
+ }
+ else {
+ argv[k] = HDstrdup(fname_arg);
+ file_substituted++;
+ }
+ }
+ else {
+ argv[k] = HDstrdup(buf);
+ buf += HDstrlen(argv[k]) + 1;
+ }
+ }
+
+ HDsnprintf(cmdline, sizeof(cmdline), "\n---------\nCommand:");
+ b_offset = strlen(cmdline);
+ for (k = 0; k < count; k++) {
+ HDsprintf(&cmdline[b_offset], " %s", argv[k]);
+ b_offset = strlen(cmdline);
+ }
+ HDsprintf(&cmdline[b_offset], "\n");
+ run_command(count, argv, cmdline, fname);
+
+ mfu_free(argv);
+
+ return 0;
+}
+
+int
+MFU_PRED_PRINT(mfu_flist flist, uint64_t idx, void *arg __attribute__((unused)))
+{
+ const char *name = mfu_flist_file_get_name(flist, idx);
+ HDprintf("%s\n", name);
+ return 1;
+}
+
+static void
+pred_commit(mfu_pred *p)
+{
+ mfu_pred *cur = p;
+ while (cur) {
+ if (cur->f == MFU_PRED_PRINT || cur->f == MFU_PRED_EXEC) {
+ break;
+ }
+ cur = cur->next;
+ }
+}
+
+static void
+add_executable(int argc, char **argv, char *cmdstring, int *f_index, int f_count __attribute__((unused)))
+{
+ char cmdline[2048];
+ HDsnprintf(cmdline, sizeof(cmdline), "\n---------\nCommand: %s\n", cmdstring);
+ argv[argc] = NULL;
+ run_command(argc, argv, cmdline, argv[f_index[0]]);
+ return;
+}
+
+static int
+process_input_file(char *inputname, int myrank, int size)
+{
+ int index = 0;
+ char linebuf[PATH_MAX] = {
+ '\0',
+ };
+ FILE * config = HDfopen(inputname, "r");
+ mfu_flist flist1 = NULL;
+
+ if (config == NULL)
+ return -1;
+
+ flist1 = mfu_flist_new();
+
+ /* Flag the fact that we're processing an inputfile (script)
+ * so that we can generate a meaningful logfile name...
+ */
+ processing_inputfile = 1;
+
+ while (HDfgets(linebuf, sizeof(linebuf), config) != NULL) {
+ const char *delim = " \n";
+ char * cmdline = NULL;
+ char * cmd = NULL;
+ char * arg = NULL;
+ char * argv[256];
+ int fileindex[256];
+ int filecount = 0;
+ int token = 0;
+ struct stat statbuf;
+
+ char *eol = strchr(linebuf, '\n');
+ if (eol) {
+ *eol = '\0';
+ }
+ cmdline = HDstrdup(linebuf);
+ cmd = HDstrtok(linebuf, delim);
+ if (cmd) {
+ arg = cmd;
+ while (arg != NULL) {
+ char c = arg[0];
+ if (token > 0) {
+ if ((c == '.') || (c == '/')) {
+ /* 'arg' looks to be a filepath */
+ if (stat(arg, &statbuf) == 0) {
+ mfu_flist_insert_stat(flist1, arg, O_RDONLY, &statbuf);
+ }
+ fileindex[filecount++] = token;
+ }
+ }
+ argv[token++] = arg;
+ arg = strtok(NULL, delim);
+ }
+
+ if (myrank == (index % size)) {
+ current_input_index = index;
+ add_executable(token, argv, cmdline, fileindex, filecount);
+ }
+ index++;
+ }
+ linebuf[0] = 0;
+ HDfree(cmdline);
+ }
+
+ if (output_log_file) {
+ dh5tool_flist_write_text(output_log_file, flist1);
+ }
+ HDfclose(config);
+
+ mfu_flist_free(&flist1);
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int i;
+ int rc = 0;
+
+ char *env_var = NULL;
+
+ /* initialize MPI */
+ MPI_Init(&argc, (char ***)&argv);
+ mfu_init();
+
+ /* Initialize h5tools lib */
+ h5tools_init();
+
+ h5tools_setprogname(PROGRAMNAME);
+ h5tools_setstatus(EXIT_SUCCESS);
+
+ /* get our rank and the size of comm_world */
+ int rank, ranks;
+ MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+ MPI_Comm_size(MPI_COMM_WORLD, &ranks);
+
+ /* Assign the static global mpi_rank (for debugging) */
+ sg_mpi_rank = rank;
+
+#if 0
+ env_var = HDgetenv("HDF5_H5DWALK_PRINT_CMDLINE");
+ if (env_var) {
+ int enable = HDatoi(env_var);
+ if (enable) {
+
+ }
+ }
+#endif
+ /* pointer to mfu_walk_opts */
+ mfu_walk_opts_t *walk_opts = mfu_walk_opts_new();
+
+#ifdef DAOS_SUPPORT
+ /* DAOS vars */
+ daos_args_t *daos_args = daos_args_new();
+#endif
+
+ int args_byte_length = -1;
+ int mfu_argc = argc;
+ char * args_buf = NULL;
+ char **h5tool_argv = copy_args(argc, argv, &mfu_argc, &args_byte_length);
+
+ char *inputname = NULL;
+ char *outputname = NULL;
+ char *sortfields = NULL;
+ char *distribution = NULL;
+
+ int text = 0;
+ int h5tool_argc = 0;
+
+ mfu_debug_level = MFU_LOG_WARN;
+ h5tool_argv[argc] = 0;
+
+ /* The struct option declaration can found in bits/getopt_ext.h
+ * I've reproduced it here:
+ * struct option { char * name; int has_arg; int *flag; int val};
+ */
+ int opt;
+ int tool_selected = 0;
+ int tool_args_start = -1;
+ int last_mfu_arg = 0;
+
+ mfu_pred *pred_head = NULL;
+
+ while (!tool_selected) {
+ opt = H5_get_option(argc, (const char *const *)argv, s_opts, l_opts);
+ switch ((char)opt) {
+ default:
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ break;
+ case 'i':
+ inputname = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ if (inputname)
+ tool_selected = 1;
+ break;
+ case 'o':
+ outputname = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ if (outputname) {
+ log_output_in_single_file = 1;
+ output_log_file = HDstrdup(H5_optarg);
+ text = 1; /* Format TXT, not HDF5 */
+ }
+ break;
+ case 'E':
+ log_errors_in_file = 1;
+ errlog = HDstrdup(H5_optarg);
+ last_mfu_arg = H5_optind;
+ break;
+ case 'l':
+ log_stdout_in_file = 1;
+ if (H5_optarg)
+ txtlog = HDstrdup(H5_optarg);
+ break;
+ case 'T':
+ /* We need to stop parsing user options at this point.
+ * all remaining arguments should be utilized as the
+ * arguments to the selected HDF5 tools.
+ * We also want to avoid any misinterpretations if
+ * HDF5 tool options conflict with the MFU options.
+ */
+ tool_selected = 1;
+ tool_args_start = H5_optind;
+ h5tool_argc = argc - mfu_argc;
+ last_mfu_arg = H5_optind;
+ /* Don't allow any further parsing of arguments */
+ break;
+ case 'h':
+ usage();
+ h5dwalk_exit(EXIT_SUCCESS);
+ break;
+ case '?':
+ usage();
+ h5dwalk_exit(EXIT_SUCCESS);
+ break;
+ }
+ }
+
+ if (inputname != NULL) {
+ if (tool_selected && (rank == 0)) {
+ if ((log_output_in_single_file == 0) && (log_stdout_in_file == 0))
+ puts("WARNING: When utilizing --input, the only other supported "
+ "runtime argument is --output or -l");
+ }
+ rc = process_input_file(inputname, rank, ranks);
+ mfu_finalize();
+ h5dwalk_exit(rc);
+ }
+
+ /**************************************************************/
+ /* We might consider doing a tool specific argument checking */
+ /* to prevent runtime errors. We would also like to allow */
+ /* the same command line interface for parallel invocations */
+ /* so that users don't get confused. Effectively, we should */
+ /* strip out all MFU related arguments and retain copies of */
+ /* everything else to pass into a serial instance of the tool */
+ /* */
+ /* As we move forward, we might allow the HDF5 tool to be */
+ /* queried for an acceptable set set of runtime arguments. */
+ /* This could be just a simple string to allow getopt_long */
+ /* to be invoked on the remaining command line arguments. */
+ /**************************************************************/
+
+ int *path_indices = NULL;
+ int numpaths = count_dirpaths(argc, tool_args_start, argv, &path_indices);
+
+ const char **argpaths = NULL;
+
+ /* store src and dest path strings */
+ const char *path1 = NULL;
+ const char *path2 = NULL;
+ size_t pathlen_total = 0;
+
+ if (numpaths && path_indices) {
+ argpaths = &argv[path_indices[0]];
+ }
+ /* pointer to mfu_file src and dest objects */
+ /* The dst object will only be used for tools which
+ * accept 2 (or more?) file arguments */
+ mfu_file_t *mfu_src_file = NULL;
+ mfu_file_t *mfu_dst_file = NULL;
+
+ /* first item is source and second is dest */
+ mfu_param_path *srcpath = NULL;
+ mfu_param_path *destpath = NULL;
+ mfu_param_path *paths = NULL;
+
+ mfu_flist flist1 = NULL;
+ mfu_flist flist2 = NULL;
+
+ /* allocate structure to define walk options */
+ if (use_config_file > 0) {
+ int count1 = 0, count2 = 0;
+ for (i = 0; i < use_config_file; i++) {
+ int index = config_index[i];
+ const char *config_file = argv[index];
+ if (i == 0) {
+ flist1 = mfu_flist_new();
+ count1 = fill_file_list(flist1, config_file + 1, rank, ranks);
+ }
+ else if (i == 1) {
+ flist2 = mfu_flist_new();
+ count2 = fill_file_list(flist2, config_file + 1, rank, ranks);
+ }
+ }
+ if (count1 != count2) {
+ HDprintf("config files have different file counts: (1) %d and (2) %d\n", count1, count2);
+ }
+ }
+ else if (numpaths > 0) {
+
+ /* allocate space for each path */
+ paths = (mfu_param_path *)MFU_MALLOC((size_t)numpaths * sizeof(mfu_param_path));
+ mfu_src_file = mfu_file_new();
+
+ /* process each path */
+ mfu_param_path_set_all((uint64_t)numpaths, (const char **)argpaths, paths, mfu_src_file, true);
+
+ /* don't allow user to specify input file with walk */
+ if (inputname != NULL) {
+ if (paths) {
+ mfu_free(&paths);
+ }
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ }
+ }
+ else {
+ /* if we're not walking, we must be reading,
+ * and for that we need a file */
+ if (inputname == NULL) {
+ if (rank == 0) {
+ MFU_LOG(MFU_LOG_ERR, "Either a <path> or --input is required.");
+ }
+ usage();
+ h5dwalk_exit(EXIT_FAILURE);
+ }
+ }
+
+ if (numpaths > 0) {
+ flist1 = mfu_flist_new();
+ srcpath = &paths[0];
+ path1 = srcpath->path;
+ pathlen_total += strlen(path1);
+ mfu_flist_walk_param_paths(1, srcpath, walk_opts, flist1, mfu_src_file);
+ }
+ if (numpaths > 1) {
+ flist2 = mfu_flist_new();
+ mfu_dst_file = mfu_file_new();
+ destpath = &paths[1];
+ path2 = destpath->path;
+ pathlen_total += HDstrlen(path2);
+ mfu_flist_walk_param_paths(1, destpath, walk_opts, flist2, mfu_dst_file);
+ }
+
+ if (tool_selected && (args_byte_length > 0)) {
+ pred_head = mfu_pred_new();
+ args_buf = (char *)HDmalloc((size_t)(args_byte_length + pathlen_total));
+ }
+
+ /* filter files to only include hdf5 files */
+ if (flist1) {
+ filter_hdf_files(&flist1, NULL, 0, 0);
+ }
+ if (flist2) {
+ filter_hdf_files(&flist2, NULL, 0, 0);
+ }
+
+ /* if (numpaths > 1)
+ * In a case where we requeire the list indices of files from multiple
+ * directories to match, we must utilize a mapping function.
+ * The question to answer is how does the mapping function work?
+ * The most probable is a sort function, e.g.
+ * 1) an alphabet sort?
+ * 2) sort by file size?
+ * 3) something else?
+ */
+ if (args_buf != NULL) {
+ int k = 0;
+ char *ptr = args_buf + sizeof(int);
+ *(int *)args_buf = h5tool_argc;
+ for (i = tool_args_start - 1; i < argc; i++) {
+ int copy_flist = -1;
+ if (i == config_index[k]) {
+ copy_flist = k;
+ }
+ else if (path_indices && (i == path_indices[k])) {
+ copy_flist = k;
+ }
+
+ /* Maybe copy one of the flist pointers */
+ if (copy_flist >= 0) {
+ /* The '&' indicates that what follows is a pointer */
+ *ptr++ = '&';
+ /* Select which argument list should be used */
+ if (k == 0) {
+ HDmemcpy(ptr, &flist1, sizeof(void *));
+ }
+ if (k == 1) {
+ HDmemcpy(ptr, &flist2, sizeof(void *));
+ }
+ ptr += sizeof(mfu_flist *);
+ k++;
+ }
+ else {
+ HDstrcpy(ptr, argv[i]);
+ ptr += HDstrlen(argv[i]);
+ }
+ *ptr++ = 0;
+ }
+ *ptr++ = 0;
+
+ mfu_pred_add(pred_head, MFU_PRED_EXEC, (void *)args_buf);
+ pred_commit(pred_head);
+ }
+
+ /* apply predicates to each item in list */
+ mfu_flist flist3 = mfu_flist_filter_pred(flist1, pred_head);
+
+ /* print summary statistics of flist */
+ mfu_flist_print_summary(flist1);
+
+ /* write data to cache file */
+ if (outputname != NULL) {
+ if (!text) {
+ if (rank == 0) {
+ puts("output capture needs to be a text formatted file");
+ }
+ }
+ else {
+ dh5tool_flist_write_text(outputname, flist1);
+ }
+ }
+
+#ifdef DAOS_SUPPORT
+ daos_cleanup(daos_args, mfu_file, NULL);
+#endif
+
+ /* free users, groups, and files objects */
+ mfu_flist_free(&flist1);
+ if (flist2)
+ mfu_flist_free(&flist2);
+ if (flist3)
+ mfu_flist_free(&flist3);
+
+ /* free memory allocated for options */
+ mfu_free(&distribution);
+ mfu_free(&sortfields);
+ mfu_free(&outputname);
+ mfu_free(&inputname);
+
+ /* free the path parameters */
+ mfu_param_path_free_all((uint64_t)numpaths, paths);
+
+ /* free memory allocated to hold params */
+ mfu_free(&paths);
+
+ /* free the walk options */
+ mfu_walk_opts_delete(&walk_opts);
+
+ /* delete file object */
+ mfu_file_delete(&mfu_src_file);
+
+ h5tools_close();
+ /* shut down MPI */
+ mfu_finalize();
+ MPI_Finalize();
+
+ return rc;
+}
+
+/*-------------------------------------------------------------------------
+ * Function: h5dwalk_exit
+ *
+ * Purpose: close the tools library and exit
+ *
+ * Return: none
+ *
+ * Programmer: Albert Cheng
+ * Date: Feb 6, 2005
+ *
+ * Comments:
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+H5_ATTR_NORETURN void
+h5dwalk_exit(int status)
+{
+ int require_finalize = 0;
+ h5tools_close();
+ mfu_finalize();
+
+ /* Check to see whether we need to call MPI_Finalize */
+ MPI_Initialized(&require_finalize);
+ if (require_finalize)
+ MPI_Finalize();
+
+ HDexit(status);
+}