summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2022-08-09 23:05:37 (GMT)
committerGitHub <noreply@github.com>2022-08-09 23:05:37 (GMT)
commitef33ac8bac5fd201b41d1a3084f03834f47729a2 (patch)
treead4756b872abff6d16f11d9a6c6c949e8f359cad
parentb84241e57a97309b15846da4cc74611a66d92f6d (diff)
downloadhdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.zip
hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.gz
hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.bz2
Subfiling VFD - tidying up and fixing a few new testing failures (#1977)
* Rename Subfiling IOC "thread_pool_count" field to "thread_pool_size" * Add simple HDF5 example for Subfiling VFD * Subfiling VFD - never cache app topology as it may change * Subfiling VFD - cleanup unused funtionality and tidy up some TODOs * Subfiling VFD - tidy up subfiling error handling in H5subfiling_common.c * Subfiling VFD - show number of failed I/O requests on close * Subfiling VFD - Update file cmp callback after switching to MPI I/O VFD * Amend RELEASE.txt with info about h5fuse.sh and Subfiling limitations * Subfiling VFD - switch to using H5_basename and H5_dirname
-rw-r--r--examples/CMakeLists.txt1
-rw-r--r--examples/CMakeTests.cmake2
-rw-r--r--examples/Makefile.am10
-rw-r--r--examples/ph5_subfiling.c362
-rw-r--r--release_docs/RELEASE.txt19
-rw-r--r--src/H5FDsubfiling/H5FDioc.c100
-rw-r--r--src/H5FDsubfiling/H5FDioc.h10
-rw-r--r--src/H5FDsubfiling/H5FDioc_threads.c73
-rw-r--r--src/H5FDsubfiling/H5FDsubfile_int.c53
-rw-r--r--src/H5FDsubfiling/H5FDsubfiling.c277
-rw-r--r--src/H5FDsubfiling/H5subfiling_common.c1463
-rw-r--r--src/H5FDsubfiling/H5subfiling_common.h27
-rw-r--r--testpar/t_vfd.c10
13 files changed, 916 insertions, 1491 deletions
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 04d33dd..58454c7 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -47,6 +47,7 @@ if (H5_HAVE_PARALLEL)
ph5example
ph5_filtered_writes
ph5_filtered_writes_no_sel
+ ph5_subfiling
)
endif ()
diff --git a/examples/CMakeTests.cmake b/examples/CMakeTests.cmake
index 878aac8..afab096 100644
--- a/examples/CMakeTests.cmake
+++ b/examples/CMakeTests.cmake
@@ -30,6 +30,8 @@ set (test_ex_CLEANFILES
group.h5
groups.h5
hard_link.h5
+ h5_subfiling_default_example.h5
+ h5_subfiling_custom_example.h5
mount1.h5
mount2.h5
one_index_file.h5
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 161f789..2a02838 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,7 +20,7 @@
include $(top_srcdir)/config/commence.am
if BUILD_PARALLEL_CONDITIONAL
- EXAMPLE_PROG_PARA = ph5example ph5_filtered_writes ph5_filtered_writes_no_sel
+ EXAMPLE_PROG_PARA = ph5example ph5_filtered_writes ph5_filtered_writes_no_sel ph5_subfiling
endif
INSTALL_SCRIPT_FILES = run-c-ex.sh
@@ -51,8 +51,9 @@ INSTALL_FILES = h5_write.c h5_read.c h5_extend_write.c h5_chunk_read.c h5_compou
h5_reference_deprec.c h5_ref_extern.c h5_ref_compat.c h5_ref2reg_deprec.c \
h5_extlink.c h5_elink_unix2win.c h5_shared_mesg.c h5_debug_trace.c \
ph5example.c ph5_filtered_writes.c ph5_filtered_writes_no_sel.c \
- h5_vds.c h5_vds-exc.c h5_vds-exclim.c h5_vds-eiger.c h5_vds-simpleIO.c \
- h5_vds-percival.c h5_vds-percival-unlim.c h5_vds-percival-unlim-maxmin.c
+ ph5_subfiling.c h5_vds.c h5_vds-exc.c h5_vds-exclim.c h5_vds-eiger.c \
+ h5_vds-simpleIO.c h5_vds-percival.c h5_vds-percival-unlim.c \
+ h5_vds-percival-unlim-maxmin.c
@@ -120,7 +121,8 @@ h5_ref2reg_deprec: $(srcdir)/h5_ref2reg_deprec.c
h5_drivers: $(srcdir)/h5_drivers.c
ph5example: $(srcdir)/ph5example.c
ph5_filtered_writes: $(srcdir)/ph5_filtered_writes.c
-ph5_filtered_writes_no_sel: $(srcdir)/ph5_filtered_writes_no_sel.c
+ph5_filtered_writes_no_sel: $(srcdir)/ph5_filtered_writes_no_sel.c
+ph5_subfiling: $(srcdir)/ph5_subfiling.c
h5_dtransform: $(srcdir)/h5_dtransform.c
h5_extlink: $(srcdir)/h5_extlink.c $(EXTLINK_DIRS)
h5_elink_unix2win: $(srcdir)/h5_elink_unix2win.c $(EXTLINK_DIRS)
diff --git a/examples/ph5_subfiling.c b/examples/ph5_subfiling.c
new file mode 100644
index 0000000..9142749
--- /dev/null
+++ b/examples/ph5_subfiling.c
@@ -0,0 +1,362 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the COPYING file, which can be found at the root of the source code *
+ * distribution tree, or in https://www.hdfgroup.org/licenses. *
+ * If you do not have access to either file, you may request a copy from *
+ * help@hdfgroup.org. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Example of using HDF5's Subfiling VFD to write to an
+ * HDF5 file that is striped across multiple sub-files
+ *
+ * If the HDF5_NOCLEANUP environment variable is set, the
+ * files that this example creates will not be removed as
+ * the example finishes.
+ *
+ * In general, the current working directory in which compiling
+ * is done, is not suitable for parallel I/O and there is no
+ * standard pathname for parallel file systems. In some cases,
+ * the parallel file name may even need some parallel file type
+ * prefix such as: "pfs:/GF/...". Therefore, this example parses
+ * the HDF5_PARAPREFIX environment variable for a prefix, if one
+ * is needed.
+ */
+
+#include <stdlib.h>
+
+#include "hdf5.h"
+
+#if defined(H5_HAVE_PARALLEL) && defined(H5_HAVE_SUBFILING_VFD)
+
+#define EXAMPLE_FILE "h5_subfiling_default_example.h5"
+#define EXAMPLE_FILE2 "h5_subfiling_custom_example.h5"
+
+#define EXAMPLE_DSET_NAME "DSET"
+#define EXAMPLE_DSET_DIMS 2
+
+/* Have each MPI rank write 64MiB of data */
+#define EXAMPLE_DSET_NY 16777216
+
+/* Dataset datatype */
+#define EXAMPLE_DSET_DATATYPE H5T_NATIVE_INT
+typedef int EXAMPLE_DSET_C_DATATYPE;
+
+/* Cleanup created files */
+static void
+cleanup(char *filename, hid_t fapl_id)
+{
+ hbool_t do_cleanup = getenv(HDF5_NOCLEANUP) ? 0 : 1;
+
+ if (do_cleanup)
+ H5Fdelete(filename, fapl_id);
+}
+
+static void
+subfiling_write_default(hid_t fapl_id, int mpi_size, int mpi_rank)
+{
+ EXAMPLE_DSET_C_DATATYPE *data;
+ hsize_t dset_dims[EXAMPLE_DSET_DIMS];
+ hsize_t start[EXAMPLE_DSET_DIMS];
+ hsize_t count[EXAMPLE_DSET_DIMS];
+ hid_t file_id;
+ hid_t dset_id;
+ hid_t filespace;
+ char filename[512];
+ char *par_prefix;
+
+ /*
+ * Set Subfiling VFD on FAPL using default settings
+ * (use IOC VFD, 1 IOC per node, 32MiB stripe size)
+ *
+ * Note that all of Subfiling's configuration settings
+ * can be adjusted with environment variables as well
+ * in this case.
+ */
+ H5Pset_fapl_subfiling(fapl_id, NULL);
+
+ /*
+ * OPTIONAL: Set alignment of objects in HDF5 file to
+ * be equal to the Subfiling stripe size.
+ * Choosing a Subfiling stripe size and HDF5
+ * object alignment value that are some
+ * multiple of the disk block size can
+ * generally help performance by ensuring
+ * that I/O is well-aligned and doesn't
+ * excessively cross stripe boundaries.
+ *
+ * Note that this option can substantially
+ * increase the size of the resulting HDF5
+ * files, so it is a good idea to keep an eye
+ * on this.
+ */
+ H5Pset_alignment(fapl_id, 0, 33554432); /* Align to default 32MiB stripe size */
+
+ /* Parse any parallel prefix and create filename */
+ par_prefix = getenv("HDF5_PARAPREFIX");
+
+ snprintf(filename, sizeof(filename), "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "",
+ EXAMPLE_FILE);
+
+ /*
+ * Create a new file collectively
+ */
+ file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+
+ /*
+ * Create the dataspace for the dataset. The first
+ * dimension varies with the number of MPI ranks
+ * while the second dimension is fixed.
+ */
+ dset_dims[0] = mpi_size;
+ dset_dims[1] = EXAMPLE_DSET_NY;
+ filespace = H5Screate_simple(EXAMPLE_DSET_DIMS, dset_dims, NULL);
+
+ /*
+ * Create the dataset with default properties
+ */
+ dset_id = H5Dcreate2(file_id, EXAMPLE_DSET_NAME, EXAMPLE_DSET_DATATYPE, filespace, H5P_DEFAULT,
+ H5P_DEFAULT, H5P_DEFAULT);
+
+ /*
+ * Each MPI rank writes from a contiguous memory
+ * region to the hyperslab in the file
+ */
+ start[0] = mpi_rank;
+ start[1] = 0;
+ count[0] = 1;
+ count[1] = dset_dims[1];
+ H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, NULL, count, NULL);
+
+ /*
+ * Initialize data buffer
+ */
+ data = malloc(count[0] * count[1] * sizeof(EXAMPLE_DSET_C_DATATYPE));
+ for (size_t i = 0; i < count[0] * count[1]; i++) {
+ data[i] = mpi_rank + i;
+ }
+
+ /*
+ * Write to dataset
+ */
+ H5Dwrite(dset_id, EXAMPLE_DSET_DATATYPE, H5S_BLOCK, filespace, H5P_DEFAULT, data);
+
+ /*
+ * Close/release resources.
+ */
+
+ free(data);
+
+ H5Dclose(dset_id);
+ H5Sclose(filespace);
+ H5Fclose(file_id);
+
+ cleanup(EXAMPLE_FILE, fapl_id);
+}
+
+static void
+subfiling_write_custom(hid_t fapl_id, int mpi_size, int mpi_rank)
+{
+ EXAMPLE_DSET_C_DATATYPE *data;
+ H5FD_subfiling_config_t subf_config;
+ H5FD_ioc_config_t ioc_config;
+ hsize_t dset_dims[EXAMPLE_DSET_DIMS];
+ hsize_t start[EXAMPLE_DSET_DIMS];
+ hsize_t count[EXAMPLE_DSET_DIMS];
+ hid_t file_id;
+ hid_t ioc_fapl;
+ hid_t dset_id;
+ hid_t filespace;
+ char filename[512];
+ char *par_prefix;
+
+ /*
+ * Get a default Subfiling and IOC configuration
+ */
+ H5Pget_fapl_subfiling(fapl_id, &subf_config);
+ H5Pget_fapl_ioc(fapl_id, &ioc_config);
+
+ /*
+ * Set Subfiling configuration to use a 1MiB
+ * stripe size and the SELECT_IOC_EVERY_NTH_RANK
+ * selection method. By default, without a setting
+ * in the H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ * environment variable, this will use every MPI
+ * rank as an I/O concentrator.
+ */
+ subf_config.shared_cfg.stripe_size = 1048576;
+ subf_config.shared_cfg.ioc_selection = SELECT_IOC_EVERY_NTH_RANK;
+
+ /*
+ * Set IOC configuration to use 2 worker threads
+ * per IOC instead of the default setting and
+ * update IOC configuration with new subfiling
+ * configuration.
+ */
+ ioc_config.thread_pool_size = 2;
+ ioc_config.subf_config = subf_config.shared_cfg;
+
+ /*
+ * Create a File Access Property List for
+ * the IOC VFD and set our new configuration
+ * on it. We make a copy of the original
+ * FAPL here so we get the MPI parameters
+ * set on it
+ */
+ ioc_fapl = H5Pcopy(fapl_id);
+ H5Pset_fapl_ioc(ioc_fapl, &ioc_config);
+
+ /*
+ * Close FAPLs in the default configurations
+ * we retrieved and update the subfiling
+ * configuration with our new IOC FAPL
+ */
+ H5Pclose(ioc_config.under_fapl_id);
+ H5Pclose(subf_config.ioc_fapl_id);
+ subf_config.ioc_fapl_id = ioc_fapl;
+
+ /*
+ * Finally, set our new Subfiling configuration
+ * on the original FAPL
+ */
+ H5Pset_fapl_subfiling(fapl_id, &subf_config);
+
+ /*
+ * OPTIONAL: Set alignment of objects in HDF5 file to
+ * be equal to the Subfiling stripe size.
+ * Choosing a Subfiling stripe size and HDF5
+ * object alignment value that are some
+ * multiple of the disk block size can
+ * generally help performance by ensuring
+ * that I/O is well-aligned and doesn't
+ * excessively cross stripe boundaries.
+ *
+ * Note that this option can substantially
+ * increase the size of the resulting HDF5
+ * files, so it is a good idea to keep an eye
+ * on this.
+ */
+ H5Pset_alignment(fapl_id, 0, 1048576); /* Align to custom 1MiB stripe size */
+
+ /* Parse any parallel prefix and create filename */
+ par_prefix = getenv("HDF5_PARAPREFIX");
+
+ snprintf(filename, sizeof(filename), "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "",
+ EXAMPLE_FILE2);
+
+ /*
+ * Create a new file collectively
+ */
+ file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+
+ /*
+ * Create the dataspace for the dataset. The first
+ * dimension varies with the number of MPI ranks
+ * while the second dimension is fixed.
+ */
+ dset_dims[0] = mpi_size;
+ dset_dims[1] = EXAMPLE_DSET_NY;
+ filespace = H5Screate_simple(EXAMPLE_DSET_DIMS, dset_dims, NULL);
+
+ /*
+ * Create the dataset with default properties
+ */
+ dset_id = H5Dcreate2(file_id, EXAMPLE_DSET_NAME, EXAMPLE_DSET_DATATYPE, filespace, H5P_DEFAULT,
+ H5P_DEFAULT, H5P_DEFAULT);
+
+ /*
+ * Each MPI rank writes from a contiguous memory
+ * region to the hyperslab in the file
+ */
+ start[0] = mpi_rank;
+ start[1] = 0;
+ count[0] = 1;
+ count[1] = dset_dims[1];
+ H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, NULL, count, NULL);
+
+ /*
+ * Initialize data buffer
+ */
+ data = malloc(count[0] * count[1] * sizeof(EXAMPLE_DSET_C_DATATYPE));
+ for (size_t i = 0; i < count[0] * count[1]; i++) {
+ data[i] = mpi_rank + i;
+ }
+
+ /*
+ * Write to dataset
+ */
+ H5Dwrite(dset_id, EXAMPLE_DSET_DATATYPE, H5S_BLOCK, filespace, H5P_DEFAULT, data);
+
+ /*
+ * Close/release resources.
+ */
+
+ free(data);
+
+ H5Dclose(dset_id);
+ H5Sclose(filespace);
+ H5Fclose(file_id);
+
+ cleanup(EXAMPLE_FILE2, fapl_id);
+}
+
+int
+main(int argc, char **argv)
+{
+ MPI_Comm comm = MPI_COMM_WORLD;
+ MPI_Info info = MPI_INFO_NULL;
+ hid_t fapl_id;
+ int mpi_size;
+ int mpi_rank;
+ int mpi_thread_required = MPI_THREAD_MULTIPLE;
+ int mpi_thread_provided = 0;
+
+ /* HDF5 Subfiling VFD requires MPI_Init_thread with MPI_THREAD_MULTIPLE */
+ MPI_Init_thread(&argc, &argv, mpi_thread_required, &mpi_thread_provided);
+ if (mpi_thread_provided < mpi_thread_required) {
+ printf("MPI_THREAD_MULTIPLE not supported\n");
+ MPI_Abort(comm, -1);
+ }
+
+ MPI_Comm_size(comm, &mpi_size);
+ MPI_Comm_rank(comm, &mpi_rank);
+
+ /*
+ * Set up File Access Property List with MPI
+ * parameters for the Subfiling VFD to use
+ */
+ fapl_id = H5Pcreate(H5P_FILE_ACCESS);
+ H5Pset_mpi_params(fapl_id, comm, info);
+
+ /* Use Subfiling VFD with default settings */
+ subfiling_write_default(fapl_id, mpi_size, mpi_rank);
+
+ /* Use Subfiling VFD with custom settings */
+ subfiling_write_custom(fapl_id, mpi_size, mpi_rank);
+
+ H5Pclose(fapl_id);
+
+ if (mpi_rank == 0)
+ printf("PHDF5 example finished with no errors\n");
+
+ MPI_Finalize();
+
+ return 0;
+}
+
+#else
+
+/* dummy program since HDF5 is not parallel-enabled */
+int
+main(void)
+{
+ printf(
+ "Example program cannot run - HDF5 must be built with parallel support and Subfiling VFD support\n");
+ return 0;
+}
+
+#endif /* H5_HAVE_PARALLEL && H5_HAVE_SUBFILING_VFD */
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index ab43011..c7b41cd 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -103,8 +103,25 @@ New Features
while also minimizing the locking issues of the single shared file approach
on a parallel file system.
+ Also included with the Subfiling VFD is a new h5fuse.sh script which
+ reads a Subfiling configuration file and then combines the various
+ sub-files back into a single HDF5 file. By default, the h5fuse.sh script
+ looks in the current directory for the Subfiling configuration file,
+ but can also be pointed to the configuration file with a command-line
+ option.
+
The Subfiling VFD can be used by calling H5Pset_fapl_subfiling() on a
- File Access Property List and using that FAPL for file operations.
+ File Access Property List and using that FAPL for file operations. Note
+ that the Subfiling VFD currently has the following limitations:
+
+ * Does not currently support HDF5 collective I/O, other than collective
+ metadata writes and reads as set by H5Pset_coll_metadata_write() and
+ H5Pset_all_coll_metadata_ops()
+
+ * The Subfiling VFD should not currently be used with an HDF5 library
+ that has been built with thread-safety enabled. This can cause deadlocks
+ when failures occur due to interactions between the VFD's internal
+ threads and HDF5's global lock.
(JTH - 2022/07/22)
diff --git a/src/H5FDsubfiling/H5FDioc.c b/src/H5FDsubfiling/H5FDioc.c
index 6bfb1b7..5030055 100644
--- a/src/H5FDsubfiling/H5FDioc.c
+++ b/src/H5FDsubfiling/H5FDioc.c
@@ -61,36 +61,6 @@ typedef struct H5FD_ioc_t {
char *file_dir; /* Directory where we find files */
char *file_path; /* The user defined filename */
-
-#ifndef H5_HAVE_WIN32_API
- /* On most systems the combination of device and i-node number uniquely
- * identify a file. Note that Cygwin, MinGW and other Windows POSIX
- * environments have the stat function (which fakes inodes)
- * and will use the 'device + inodes' scheme as opposed to the
- * Windows code further below.
- */
- dev_t device; /* file device number */
-#else
- /* Files in windows are uniquely identified by the volume serial
- * number and the file index (both low and high parts).
- *
- * There are caveats where these numbers can change, especially
- * on FAT file systems. On NTFS, however, a file should keep
- * those numbers the same until renamed or deleted (though you
- * can use ReplaceFile() on NTFS to keep the numbers the same
- * while renaming).
- *
- * See the MSDN "BY_HANDLE_FILE_INFORMATION Structure" entry for
- * more information.
- *
- * http://msdn.microsoft.com/en-us/library/aa363788(v=VS.85).aspx
- */
- DWORD nFileIndexLow;
- DWORD nFileIndexHigh;
- DWORD dwVolumeSerialNumber;
-
- HANDLE hFile; /* Native windows file handle */
-#endif /* H5_HAVE_WIN32_API */
} H5FD_ioc_t;
/*
@@ -490,7 +460,7 @@ H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI I/O VFD on IOC under FAPL");
/* Specific to this I/O Concentrator */
- config_out->thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
+ config_out->thread_pool_size = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
done:
if (H5_mpi_comm_free(&comm) < 0)
@@ -536,7 +506,12 @@ H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa)
if (fa->magic != H5FD_IOC_FAPL_MAGIC)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid H5FD_ioc_config_t magic value");
- /* TODO: add extra IOC configuration validation code */
+ if (fa->under_fapl_id < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid under FAPL ID");
+
+ if (fa->subf_config.ioc_selection < SELECT_IOC_ONE_PER_NODE ||
+ fa->subf_config.ioc_selection >= ioc_selection_options)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC selection method");
done:
H5_SUBFILING_FUNC_LEAVE;
@@ -850,24 +825,15 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
}
if (NULL != (file_ptr->file_path = HDrealpath(name, NULL))) {
- char *path = NULL;
- char *directory = dirname(path);
-
- if (NULL == (path = HDstrdup(file_ptr->file_path)))
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy subfiling subfile path");
- if (NULL == (file_ptr->file_dir = HDstrdup(directory))) {
- HDfree(path);
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL,
- "can't copy subfiling subfile directory path");
+ if (H5_dirname(file_ptr->file_path, &file_ptr->file_dir) < 0) {
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "couldn't get subfile dirname");
}
-
- HDfree(path);
}
else {
if (ENOENT == errno) {
if (NULL == (file_ptr->file_path = HDstrdup(name)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy file name");
- if (NULL == (file_ptr->file_dir = HDstrdup(".")))
+ if (NULL == (file_ptr->file_dir = H5MM_strdup(".")))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, "can't set subfile directory path");
}
else
@@ -983,7 +949,7 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr)
#ifdef H5FD_IOC_DEBUG
{
- subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->fa.context_id);
+ subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->context_id);
if (sf_context) {
if (sf_context->topology->rank_is_ioc)
HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
@@ -1035,7 +1001,7 @@ done:
HDfree(file_ptr->file_path);
file_ptr->file_path = NULL;
- HDfree(file_ptr->file_dir);
+ H5MM_free(file_ptr->file_dir);
file_ptr->file_dir = NULL;
/* Release the file info */
@@ -1089,8 +1055,31 @@ H5FD__ioc_cmp(const H5FD_t *_f1, const H5FD_t *_f2)
HDassert(f1);
HDassert(f2);
- ret_value = H5FD_cmp(f1->ioc_file, f2->ioc_file);
+ if (f1->ioc_file && f1->ioc_file->cls && f1->ioc_file->cls->cmp && f2->ioc_file && f2->ioc_file->cls &&
+ f2->ioc_file->cls->cmp) {
+ ret_value = H5FD_cmp(f1->ioc_file, f2->ioc_file);
+ }
+ else {
+ h5_stat_t st1;
+ h5_stat_t st2;
+
+ /*
+ * If under VFD has no compare routine, get
+ * inode of HDF5 stub file and compare them
+ *
+ * Note that the compare callback doesn't
+ * allow for failure, so we just return -1
+ * if stat fails.
+ */
+ if (HDstat(f1->file_path, &st1) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, -1, "couldn't stat file");
+ if (HDstat(f2->file_path, &st2) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, -1, "couldn't stat file");
+
+ ret_value = (st1.st_ino > st2.st_ino) - (st1.st_ino < st2.st_ino);
+ }
+done:
H5_SUBFILING_FUNC_LEAVE;
} /* end H5FD__ioc_cmp */
@@ -1607,8 +1596,6 @@ H5FD__ioc_del(const char *name, hid_t fapl)
MPI_Comm comm = MPI_COMM_NULL;
MPI_Info info = MPI_INFO_NULL;
FILE *config_file = NULL;
- char *name_copy = NULL;
- char *name_copy2 = NULL;
char *tmp_filename = NULL;
char *base_filename = NULL;
char *file_dirname = NULL;
@@ -1647,13 +1634,10 @@ H5FD__ioc_del(const char *name, hid_t fapl)
if (HDstat(name, &st) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SYSERRSTR, FAIL, "HDstat failed");
- if (NULL == (name_copy = HDstrdup(name)))
- H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename");
- if (NULL == (name_copy2 = HDstrdup(name)))
- H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename");
-
- base_filename = basename(name_copy);
- file_dirname = dirname(name_copy2);
+ if (H5_basename(name, &base_filename) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't get file basename");
+ if (H5_dirname(name, &file_dirname) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't get file dirname");
/* Try to open the subfiling configuration file and get the number of IOCs */
if (NULL == (tmp_filename = HDmalloc(PATH_MAX)))
@@ -1732,8 +1716,8 @@ done:
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI info object");
HDfree(tmp_filename);
- HDfree(name_copy);
- HDfree(name_copy2);
+ H5MM_free(file_dirname);
+ H5MM_free(base_filename);
H5_SUBFILING_FUNC_LEAVE;
}
diff --git a/src/H5FDsubfiling/H5FDioc.h b/src/H5FDsubfiling/H5FDioc.h
index 48102ac..7173aa9 100644
--- a/src/H5FDsubfiling/H5FDioc.h
+++ b/src/H5FDsubfiling/H5FDioc.h
@@ -108,7 +108,7 @@
* for compatibility with legacy HDF5 applications. The default driver used
* is currently the #H5FD_MPIO driver.
*
- * \var int32_t H5FD_ioc_config_t::thread_pool_count
+ * \var int32_t H5FD_ioc_config_t::thread_pool_size
* The number of I/O concentrator worker threads to use.
*
* This value can also be set or adjusted with the #H5FD_IOC_THREAD_POOL_SIZE
@@ -121,10 +121,10 @@
*
*/
typedef struct H5FD_ioc_config_t {
- uint32_t magic; /* Must be set to H5FD_IOC_FAPL_MAGIC */
- uint32_t version; /* Must be set to H5FD_IOC_CURR_FAPL_VERSION */
- hid_t under_fapl_id; /* FAPL setup with the VFD to use for I/O to the HDF5 stub file */
- int32_t thread_pool_count; /* Number of I/O concentrator worker threads to use */
+ uint32_t magic; /* Must be set to H5FD_IOC_FAPL_MAGIC */
+ uint32_t version; /* Must be set to H5FD_IOC_CURR_FAPL_VERSION */
+ hid_t under_fapl_id; /* FAPL setup with the VFD to use for I/O to the HDF5 stub file */
+ int32_t thread_pool_size; /* Number of I/O concentrator worker threads to use */
H5FD_subfiling_shared_config_t subf_config; /* Subfiling driver configuration */
} H5FD_ioc_config_t;
//! <!-- [H5FD_ioc_config_t_snip] -->
diff --git a/src/H5FDsubfiling/H5FDioc_threads.c b/src/H5FDsubfiling/H5FDioc_threads.c
index 4c1887f..813fb3f 100644
--- a/src/H5FDsubfiling/H5FDioc_threads.c
+++ b/src/H5FDsubfiling/H5FDioc_threads.c
@@ -112,9 +112,9 @@ static void ioc_io_queue_add_entry(ioc_data_t *ioc_data, sf_work_request_t *wk_r
int
initialize_ioc_threads(void *_sf_context)
{
- subfiling_context_t *sf_context = _sf_context;
- ioc_data_t *ioc_data = NULL;
- unsigned thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
+ subfiling_context_t *sf_context = _sf_context;
+ ioc_data_t *ioc_data = NULL;
+ unsigned thread_pool_size = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
char *env_value;
int ret_value = 0;
#ifdef H5FD_IOC_COLLECT_STATS
@@ -173,12 +173,12 @@ initialize_ioc_threads(void *_sf_context)
if ((env_value = HDgetenv(H5FD_IOC_THREAD_POOL_SIZE)) != NULL) {
int value_check = HDatoi(env_value);
if (value_check > 0) {
- thread_pool_count = (unsigned int)value_check;
+ thread_pool_size = (unsigned int)value_check;
}
}
/* Initialize a thread pool for the I/O concentrator's worker threads */
- if (hg_thread_pool_init(thread_pool_count, &ioc_data->io_thread_pool) < 0)
+ if (hg_thread_pool_init(thread_pool_size, &ioc_data->io_thread_pool) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, (-1), "can't initialize IOC worker thread pool");
/* Create the main IOC thread that will receive and dispatch I/O requests */
@@ -194,11 +194,9 @@ initialize_ioc_threads(void *_sf_context)
t_end = MPI_Wtime();
#ifdef H5FD_IOC_DEBUG
- if (sf_verbose_flag) {
- if (sf_context->topology->subfile_rank == 0) {
- HDprintf("%s: time = %lf seconds\n", __func__, (t_end - t_start));
- HDfflush(stdout);
- }
+ if (sf_context->topology->subfile_rank == 0) {
+ HDprintf("%s: time = %lf seconds\n", __func__, (t_end - t_start));
+ HDfflush(stdout);
}
#endif
@@ -242,6 +240,10 @@ finalize_ioc_threads(void *_sf_context)
hg_thread_join(ioc_data->ioc_main_thread);
}
+ if (ioc_data->io_queue.num_failed > 0)
+ H5_SUBFILING_DONE_ERROR(H5E_IO, H5E_CLOSEERROR, -1, "%" PRId32 " I/O requests failed",
+ ioc_data->io_queue.num_failed);
+
HDfree(ioc_data);
H5_SUBFILING_FUNC_LEAVE;
@@ -418,7 +420,6 @@ ioc_main(ioc_data_t *ioc_data)
wk_req.subfile_rank = subfile_rank;
wk_req.context_id = ioc_data->sf_context_id;
wk_req.start_time = queue_start_time;
- wk_req.buffer = NULL;
ioc_io_queue_add_entry(ioc_data, &wk_req);
@@ -521,8 +522,6 @@ handle_work_request(void *arg)
atomic_fetch_add(&ioc_data->sf_work_pending, 1);
- msg->in_progress = 1;
-
switch (msg->tag) {
case WRITE_INDEP:
op_ret = ioc_file_queue_write_indep(msg, msg->subfile_rank, msg->source, sf_context->sf_data_comm,
@@ -744,15 +743,10 @@ ioc_file_queue_write_indep(sf_work_request_t *msg, int subfile_rank, int source,
t_start = MPI_Wtime();
t_queue_delay = t_start - msg->start_time;
-#ifdef H5FD_IOC_DEBUG
- if (sf_verbose_flag) {
- if (sf_logfile) {
- HDfprintf(sf_logfile,
- "[ioc(%d) %s]: msg from %d: datasize=%ld\toffset=%ld, "
- "queue_delay = %lf seconds\n",
- subfile_rank, __func__, source, data_size, file_offset, t_queue_delay);
- }
- }
+#ifdef H5_SUBFILING_DEBUG
+ H5_subfiling_log(file_context_id,
+ "[ioc(%d) %s]: msg from %d: datasize=%ld\toffset=%ld, queue_delay = %lf seconds\n",
+ subfile_rank, __func__, source, data_size, file_offset, t_queue_delay);
#endif
#endif
@@ -799,20 +793,16 @@ ioc_file_queue_write_indep(sf_work_request_t *msg, int subfile_rank, int source,
t_start = t_end;
-#ifdef H5FD_IOC_DEBUG
- if (sf_verbose_flag) {
- if (sf_logfile) {
- HDfprintf(sf_logfile, "[ioc(%d) %s] MPI_Recv(%ld bytes, from = %d) status = %d\n", subfile_rank,
- __func__, data_size, source, mpi_code);
- }
- }
+#ifdef H5_SUBFILING_DEBUG
+ H5_subfiling_log(file_context_id, "[ioc(%d) %s] MPI_Recv(%ld bytes, from = %d) status = %d\n",
+ subfile_rank, __func__, data_size, source, mpi_code);
#endif
#endif
sf_fid = sf_context->sf_fid;
-#ifdef H5FD_IOC_DEBUG
+#ifdef H5_SUBFILING_DEBUG
if (sf_fid < 0)
H5_subfiling_log(file_context_id, "%s: WARNING: attempt to write data to closed subfile FID %d",
__func__, sf_fid);
@@ -919,13 +909,10 @@ ioc_file_queue_read_indep(sf_work_request_t *msg, int subfile_rank, int source,
t_start = MPI_Wtime();
t_queue_delay = t_start - msg->start_time;
-#ifdef H5FD_IOC_DEBUG
- if (sf_verbose_flag && (sf_logfile != NULL)) {
- HDfprintf(sf_logfile,
- "[ioc(%d) %s] msg from %d: datasize=%ld\toffset=%ld "
- "queue_delay=%lf seconds\n",
- subfile_rank, __func__, source, data_size, file_offset, t_queue_delay);
- }
+#ifdef H5_SUBFILING_DEBUG
+ H5_subfiling_log(file_context_id,
+ "[ioc(%d) %s] msg from %d: datasize=%ld\toffset=%ld queue_delay=%lf seconds\n",
+ subfile_rank, __func__, source, data_size, file_offset, t_queue_delay);
#endif
#endif
@@ -959,10 +946,9 @@ ioc_file_queue_read_indep(sf_work_request_t *msg, int subfile_rank, int source,
sf_pread_time += t_read;
sf_queue_delay_time += t_queue_delay;
-#ifdef H5FD_IOC_DEBUG
- if (sf_verbose_flag && (sf_logfile != NULL)) {
- HDfprintf(sf_logfile, "[ioc(%d)] MPI_Send to source(%d) completed\n", subfile_rank, source);
- }
+#ifdef H5_SUBFILING_DEBUG
+ H5_subfiling_log(sf_context->sf_context_id, "[ioc(%d)] MPI_Send to source(%d) completed\n", subfile_rank,
+ source);
#endif
#endif
@@ -1598,7 +1584,7 @@ ioc_io_queue_complete_entry(ioc_data_t *ioc_data, ioc_io_queue_entry_t *entry_pt
#ifdef H5FD_IOC_COLLECT_STATS
/* Compute the queued and execution time */
queued_time = entry_ptr->dispatch_time - entry_ptr->q_time;
- execution_time = H5_now_usec() = entry_ptr->dispatch_time;
+ execution_time = H5_now_usec() - entry_ptr->dispatch_time;
ioc_data->io_queue.requests_completed++;
@@ -1608,8 +1594,6 @@ ioc_io_queue_complete_entry(ioc_data_t *ioc_data, ioc_io_queue_entry_t *entry_pt
hg_thread_mutex_unlock(&ioc_data->io_queue.q_mutex);
- HDassert(entry_ptr->wk_req.buffer == NULL);
-
ioc_io_queue_free_entry(entry_ptr);
entry_ptr = NULL;
@@ -1642,7 +1626,6 @@ ioc_io_queue_free_entry(ioc_io_queue_entry_t *q_entry_ptr)
HDassert(q_entry_ptr->magic == H5FD_IOC__IO_Q_ENTRY_MAGIC);
HDassert(q_entry_ptr->next == NULL);
HDassert(q_entry_ptr->prev == NULL);
- HDassert(q_entry_ptr->wk_req.buffer == NULL);
q_entry_ptr->magic = 0;
diff --git a/src/H5FDsubfiling/H5FDsubfile_int.c b/src/H5FDsubfiling/H5FDsubfile_int.c
index af14db3..22a5bd0 100644
--- a/src/H5FDsubfiling/H5FDsubfile_int.c
+++ b/src/H5FDsubfiling/H5FDsubfile_int.c
@@ -192,6 +192,59 @@ done:
* invalid data if other ranks perform writes while this
* operation is in progress.
*
+ * SUBFILING NOTE:
+ * The EOF calculation for subfiling is somewhat different
+ * than for the more traditional HDF5 file implementations.
+ * This statement derives from the fact that unlike "normal"
+ * HDF5 files, subfiling introduces a multi-file representation
+ * of a single HDF5 file. The plurality of sub-files represents
+ * a software RAID-0 based HDF5 file. As such, each sub-file
+ * contains a designated portion of the address space of the
+ * virtual HDF5 storage. We have no notion of HDF5 datatypes,
+ * datasets, metadata, or other HDF5 structures; only BYTES.
+ *
+ * The organization of the bytes within sub-files is consistent
+ * with the RAID-0 striping, i.e. there are IO Concentrators
+ * (IOCs) which correspond to a stripe-count (in Lustre) as
+ * well as a stripe_size. The combination of these two
+ * variables determines the "address" (a combination of IOC
+ * and a file offset) of any storage operation.
+ *
+ * Having a defined storage layout, the virtual file EOF
+ * calculation should be the MAXIMUM value returned by the
+ * collection of IOCs. Every MPI rank which hosts an IOC
+ * maintains its own EOF by updating that value for each
+ * WRITE operation that completes, i.e. if a new local EOF
+ * is greater than the existing local EOF, the new EOF
+ * will replace the old. The local EOF calculation is as
+ * follows.
+ * 1. At file creation, each IOC is assigned a rank value
+ * (0 to N-1, where N is the total number of IOCs) and
+ * a 'sf_base_addr' = 'subfile_rank' * 'sf_stripe_size')
+ * we also determine the 'sf_blocksize_per_stripe' which
+ * is simply the 'sf_stripe_size' * 'n_ioc_concentrators'
+ *
+ * 2. For every write operation, the IOC receives a message
+ * containing a file_offset and the data_size.
+ *
+ * 3. The file_offset + data_size are in turn used to
+ * create a stripe_id:
+ * IOC-(ioc_rank) IOC-(ioc_rank+1)
+ * |<- sf_base_address |<- sf_base_address |
+ * ID +--------------------+--------------------+
+ * 0:|<- sf_stripe_size ->|<- sf_stripe_size ->|
+ * 1:|<- sf_stripe_size ->|<- sf_stripe_size ->|
+ * ~ ~ ~
+ * N:|<- sf_stripe_size ->|<- sf_stripe_size ->|
+ * +--------------------+--------------------+
+ *
+ * The new 'stripe_id' is then used to calculate a
+ * potential new EOF:
+ * sf_eof = (stripe_id * sf_blocksize_per_stripe) + sf_base_addr
+ * + ((file_offset + data_size) % sf_stripe_size)
+ *
+ * 4. If (sf_eof > current_sf_eof), then current_sf_eof = sf_eof.
+ *
* Return: SUCCEED/FAIL
*
* Programmer: JRM -- 1/18/22
diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c
index 4cdded3..8fe8f77 100644
--- a/src/H5FDsubfiling/H5FDsubfiling.c
+++ b/src/H5FDsubfiling/H5FDsubfiling.c
@@ -108,37 +108,6 @@ typedef struct H5FD_subfiling_t {
char *file_dir; /* Directory where we find files */
char *file_path; /* The user defined filename */
-#ifndef H5_HAVE_WIN32_API
- /* On most systems the combination of device and i-node number uniquely
- * identify a file. Note that Cygwin, MinGW and other Windows POSIX
- * environments have the stat function (which fakes inodes)
- * and will use the 'device + inodes' scheme as opposed to the
- * Windows code further below.
- */
- dev_t device; /* file device number */
- ino_t inode; /* file i-node number */
-#else
- /* Files in windows are uniquely identified by the volume serial
- * number and the file index (both low and high parts).
- *
- * There are caveats where these numbers can change, especially
- * on FAT file systems. On NTFS, however, a file should keep
- * those numbers the same until renamed or deleted (though you
- * can use ReplaceFile() on NTFS to keep the numbers the same
- * while renaming).
- *
- * See the MSDN "BY_HANDLE_FILE_INFORMATION Structure" entry for
- * more information.
- *
- * http://msdn.microsoft.com/en-us/library/aa363788(v=VS.85).aspx
- */
- DWORD nFileIndexLow;
- DWORD nFileIndexHigh;
- DWORD dwVolumeSerialNumber;
-
- HANDLE hFile; /* Native windows file handle */
-#endif /* H5_HAVE_WIN32_API */
-
/*
* The element layouts above this point are identical with the
* H5FD_ioc_t structure. As a result,
@@ -175,18 +144,6 @@ typedef struct H5FD_subfiling_t {
#define REGION_OVERFLOW(A, Z) \
(ADDR_OVERFLOW(A) || SIZE_OVERFLOW(Z) || HADDR_UNDEF == (A) + (Z) || (HDoff_t)((A) + (Z)) < (HDoff_t)(A))
-#define H5FD_SUBFILING_DEBUG_OP_CALLS 0 /* debugging print toggle; 0 disables */
-
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
-#define H5FD_SUBFILING_LOG_CALL(name) \
- do { \
- HDprintf("called %s()\n", (name)); \
- HDfflush(stdout); \
- } while (0)
-#else
-#define H5FD_SUBFILING_LOG_CALL(name) /* no-op */
-#endif /* H5FD_SUBFILING_DEBUG_OP_CALLS */
-
/* Prototypes */
static herr_t H5FD__subfiling_term(void);
static void *H5FD__subfiling_fapl_get(H5FD_t *_file);
@@ -393,18 +350,6 @@ H5FD__subfiling_term(void)
herr_t ret_value = SUCCEED;
if (H5FD_SUBFILING_g >= 0) {
- /* Free the subfiling application layout information */
- if (sf_app_layout) {
- HDfree(sf_app_layout->layout);
- sf_app_layout->layout = NULL;
-
- HDfree(sf_app_layout->node_ranks);
- sf_app_layout->node_ranks = NULL;
-
- HDfree(sf_app_layout);
- sf_app_layout = NULL;
- }
-
/* Unregister from HDF5 error API */
if (H5subfiling_err_class_g >= 0) {
if (H5Eunregister_class(H5subfiling_err_class_g) < 0)
@@ -646,12 +591,21 @@ H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa)
HDassert(fa != NULL);
if (fa->version != H5FD_SUBFILING_CURR_FAPL_VERSION)
- H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_subfiling_config_t version");
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "unknown H5FD_subfiling_config_t version");
if (fa->magic != H5FD_SUBFILING_FAPL_MAGIC)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid H5FD_subfiling_config_t magic value");
- /* TODO: add extra subfiling configuration validation code */
+ if (fa->ioc_fapl_id < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC FAPL ID");
+
+ if (!fa->require_ioc)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Subfiling VFD currently always requires IOC VFD to be used");
+
+ if (fa->shared_cfg.ioc_selection < SELECT_IOC_ONE_PER_NODE ||
+ fa->shared_cfg.ioc_selection >= ioc_selection_options)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC selection method");
done:
H5_SUBFILING_FUNC_LEAVE;
@@ -724,8 +678,6 @@ H5FD__copy_plist(hid_t fapl_id, hid_t *id_out_ptr)
int ret_value = 0;
H5P_genplist_t *plist_ptr = NULL;
- H5FD_SUBFILING_LOG_CALL(__func__);
-
HDassert(id_out_ptr != NULL);
if (FALSE == H5P_isa_class(fapl_id, H5P_FILE_ACCESS))
@@ -917,24 +869,22 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
}
if (NULL != (file_ptr->file_path = HDrealpath(name, NULL))) {
- char *path = NULL;
- char *directory = dirname(path);
+ char *path = NULL;
- if (NULL == (path = HDstrdup(file_ptr->file_path)))
+ if (NULL == (path = H5MM_strdup(file_ptr->file_path)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy subfiling subfile path");
- if (NULL == (file_ptr->file_dir = HDstrdup(directory))) {
- HDfree(path);
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL,
- "can't copy subfiling subfile directory path");
+ if (H5_dirname(path, &file_ptr->file_dir) < 0) {
+ H5MM_free(path);
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "couldn't get subfile dirname");
}
- HDfree(path);
+ H5MM_free(path);
}
else {
if (ENOENT == errno) {
if (NULL == (file_ptr->file_path = HDstrdup(name)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy file name");
- if (NULL == (file_ptr->file_dir = HDstrdup(".")))
+ if (NULL == (file_ptr->file_dir = H5MM_strdup(".")))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, "can't set subfile directory path");
}
else
@@ -1041,21 +991,6 @@ H5FD__subfiling_close_int(H5FD_subfiling_t *file_ptr)
HDassert(file_ptr);
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- {
- subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->context_id);
-
- HDassert(sf_context);
- HDassert(sf_context->topology);
-
- if (sf_context->topology->rank_is_ioc)
- HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
- else
- HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank);
- HDfflush(stdout);
- }
-#endif
-
if (file_ptr->sf_file && H5FD_close(file_ptr->sf_file) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_IO, H5E_CANTCLOSEFILE, FAIL, "unable to close subfile");
@@ -1081,7 +1016,7 @@ done:
HDfree(file_ptr->file_path);
file_ptr->file_path = NULL;
- HDfree(file_ptr->file_dir);
+ H5MM_free(file_ptr->file_dir);
file_ptr->file_dir = NULL;
/* Release the file info */
@@ -1237,87 +1172,18 @@ H5FD__subfiling_set_eoa(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, haddr_t a
* Return: End of file address, the first address past the end of the
* "file", either the filesystem file or the HDF5 file.
*
- * SUBFILING NOTE:
- * The EOF calculation for subfiling is somewhat different
- * than for the more traditional HDF5 file implementations.
- * This statement derives from the fact that unlike "normal"
- * HDF5 files, subfiling introduces a multi-file representation
- * of a single HDF5 file. The plurality of sub-files represents
- * a software RAID-0 based HDF5 file. As such, each sub-file
- * contains a designated portion of the address space of the
- * virtual HDF5 storage. We have no notion of HDF5 datatypes,
- * datasets, metadata, or other HDF5 structures; only BYTES.
- *
- * The organization of the bytes within sub-files is consistent
- * with the RAID-0 striping, i.e. there are IO Concentrators
- * (IOCs) which correspond to a stripe-count (in Lustre) as
- * well as a stripe_size. The combination of these two
- * variables determines the "address" (a combination of IOC
- * and a file offset) of any storage operation.
- *
- * Having a defined storage layout, the virtual file EOF
- * calculation should be the MAXIMUM value returned by the
- * collection of IOCs. Every MPI rank which hosts an IOC
- * maintains its own EOF by updating that value for each
- * WRITE operation that completes, i.e. if a new local EOF
- * is greater than the existing local EOF, the new EOF
- * will replace the old. The local EOF calculation is as
- * follows.
- * 1. At file creation, each IOC is assigned a rank value
- * (0 to N-1, where N is the total number of IOCs) and
- * a 'sf_base_addr' = 'subfile_rank' * 'sf_stripe_size')
- * we also determine the 'sf_blocksize_per_stripe' which
- * is simply the 'sf_stripe_size' * 'n_ioc_concentrators'
- *
- * 2. For every write operation, the IOC receives a message
- * containing a file_offset and the data_size.
- *
- * 3. The file_offset + data_size are in turn used to
- * create a stripe_id:
- * IOC-(ioc_rank) IOC-(ioc_rank+1)
- * |<- sf_base_address |<- sf_base_address |
- * ID +--------------------+--------------------+
- * 0:|<- sf_stripe_size ->|<- sf_stripe_size ->|
- * 1:|<- sf_stripe_size ->|<- sf_stripe_size ->|
- * ~ ~ ~
- * N:|<- sf_stripe_size ->|<- sf_stripe_size ->|
- * +--------------------+--------------------+
- *
- * The new 'stripe_id' is then used to calculate a
- * potential new EOF:
- * sf_eof = (stripe_id * sf_blocksize_per_stripe) + sf_base_addr
- * + ((file_offset + data_size) % sf_stripe_size)
- *
- * 4. If (sf_eof > current_sf_eof), then current_sf_eof = sf_eof.
- *
- *
- * Programmer: Richard Warren
+ * NOTE: This VFD mimics the MPI I/O VFD and so does not try
+ * to keep the EOF updated. The EOF is mostly just needed
+ * right after the file is opened so the library can determine
+ * if the file is empty, truncated or okay.
*
*-------------------------------------------------------------------------
*/
static haddr_t
H5FD__subfiling_get_eof(const H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type)
{
- const H5FD_subfiling_t *file = (const H5FD_subfiling_t *)_file;
-#if 0
- int64_t logical_eof = -1;
-#endif
- haddr_t ret_value = HADDR_UNDEF;
-
-#if 0
- /*
- * TODO: this is a heavy weight implementation. We need something like this
- * for file open, and probably for file close. However, in between, something
- * similar to the current solution in the MPIIO VFD might be more appropriate.
- */
- if (H5FD__subfiling__get_real_eof(file->fa.context_id, &logical_eof) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, HADDR_UNDEF, "can't get EOF")
-
- /* Return the global max of all the subfile EOF values */
- ret_value = (haddr_t)(logical_eof);
-
-done:
-#endif
+ const H5FD_subfiling_t *file = (const H5FD_subfiling_t *)_file;
+ haddr_t ret_value = HADDR_UNDEF;
ret_value = file->eof;
@@ -1390,8 +1256,7 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL,
"addr overflow, addr = %" PRIuHADDR ", size = %" PRIuHADDR, addr, size);
- /* TODO: Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE)
- */
+ /* Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) */
{
H5FD_mpio_xfer_t xfer_mode;
@@ -1419,11 +1284,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT);
}
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- HDprintf("[%s %d] addr=%ld, size=%ld\n", __func__, file_ptr->mpi_rank, addr, size);
- HDfflush(stdout);
-#endif
-
/*
* Retrieve the subfiling context object and the number
* of I/O concentrators.
@@ -1442,14 +1302,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
ioc_total = sf_context->topology->n_io_concentrators;
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- if (sf_context->topology->rank_is_ioc)
- HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
- else
- HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank);
- HDfflush(stdout);
-#endif
-
if (ioc_total == 0) {
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid number of I/O concentrators (%d)",
ioc_total);
@@ -1539,18 +1391,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"can't allocate subfile I/O buffers vector");
- /* TODO: The following is left for future work */
- /*
- * Set ASYNC MODE
- * H5FD_class_aio_t *async_file_ptr = (H5FD_class_aio_t *)file_ptr->sf_file;
- * uint64_t op_code_begin = OPC_BEGIN;
- * uint64_t op_code_complete = OPC_COMPLETE;
- * const void *input = NULL;
- * void *output = NULL;
- * H5FDctl(file_ptr->sf_file, op_code_begin, flags, input, &output);
- * (*async_file_ptr->h5fdctl)(file_ptr->sf_file, op_code_begin, flags, input, &output);
- */
-
for (int64_t i = 0; i < max_io_req_per_ioc; i++) {
uint32_t final_vec_len = vector_len;
int next_ioc = ioc_start;
@@ -1588,9 +1428,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
if (MPI_SUCCESS != MPI_Bcast(buf, (int)size, MPI_BYTE, 0, file_ptr->comm))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "can't broadcast data from rank 0");
}
-
- /* TODO: The following is left for future work */
- /* H5FDctl(file_ptr->sf_file, op_code_complete, flags, input, &output); */
}
}
@@ -1658,8 +1495,7 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL,
"addr overflow, addr = %" PRIuHADDR ", size = %" PRIuHADDR, addr, size);
- /* TODO: Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE)
- */
+ /* Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) */
{
H5FD_mpio_xfer_t xfer_mode;
@@ -1684,11 +1520,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT);
}
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- HDprintf("[%s %d] addr=%ld, size=%ld\n", __func__, file_ptr->mpi_rank, addr, size);
- HDfflush(stdout);
-#endif
-
/*
* Retrieve the subfiling context object and the number
* of I/O concentrators.
@@ -1707,14 +1538,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
ioc_total = sf_context->topology->n_io_concentrators;
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- if (sf_context->topology->rank_is_ioc)
- HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
- else
- HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank);
- HDfflush(stdout);
-#endif
-
if (ioc_total == 0) {
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid number of I/O concentrators (%d)",
ioc_total);
@@ -1804,18 +1627,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"can't allocate subfile I/O buffers vector");
- /* TODO: The following is left for future work */
- /*
- * Set ASYNC MODE
- * H5FD_class_aio_t *async_file_ptr = (H5FD_class_aio_t *)file_ptr->sf_file;
- * uint64_t op_code_begin = OPC_BEGIN;
- * uint64_t op_code_complete = OPC_COMPLETE;
- * const void *input = NULL;
- * void *output = NULL;
- * H5FDctl(file_ptr->sf_file, op_code_begin, flags, input, &output);
- * (*async_file_ptr->h5fdctl)(file_ptr->sf_file, op_code_begin, flags, input, &output);
- */
-
for (int64_t i = 0; i < max_io_req_per_ioc; i++) {
uint32_t final_vec_len = vector_len;
int next_ioc = ioc_start;
@@ -1845,9 +1656,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
io_bufs) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "write to subfile failed");
}
-
- /* TODO: The following is left for future work */
- /* H5FDctl(file_ptr->sf_file, op_code_complete, flags, input, &output); */
}
}
@@ -1858,15 +1666,11 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
file_ptr->pos = addr;
file_ptr->op = OP_WRITE;
-#if 1 /* Mimic the MPI I/O VFD */
+ /* Mimic the MPI I/O VFD */
file_ptr->eof = HADDR_UNDEF;
if (file_ptr->pos > file_ptr->local_eof)
file_ptr->local_eof = file_ptr->pos;
-#else
- if (file_ptr->pos > file_ptr->eof)
- file_ptr->eof = file_ptr->pos;
-#endif
done:
HDfree(io_bufs);
@@ -2235,7 +2039,6 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5
HDassert(file);
/* Extend the file to make sure it's large enough */
-#if 1 /* Mimic the MPI I/O VFD */
if (!H5F_addr_eq(file->eoa, file->last_eoa)) {
int64_t sf_eof;
int64_t eoa;
@@ -2274,29 +2077,6 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5
/* Update the 'last' eoa value */
file->last_eoa = file->eoa;
}
-#else
- if (!H5F_addr_eq(file->eoa, file->eof)) {
-
- /* Update the eof value */
- file->eof = file->eoa;
-
- /* Reset last file I/O information */
- file->pos = HADDR_UNDEF;
- file->op = OP_UNKNOWN;
-
- /* Update the 'last' eoa value */
- file->last_eoa = file->eoa;
- } /* end if */
-
- /* truncate sub-files */
- /* This is a hack. We should be doing the truncate of the sub-files via calls to
- * H5FD_truncate() with the IOC. However, that system is messed up at present.
- * thus the following hack.
- * JRM -- 12/18/21
- */
- if (H5FD__subfiling__truncate_sub_files(file->context_id, file->eof, file->comm) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTUPDATE, FAIL, "sub-file truncate request failed");
-#endif
done:
H5_SUBFILING_FUNC_LEAVE_API;
@@ -2325,7 +2105,6 @@ H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw)
HDassert(file);
- /* TODO: Consider lock only on IOC ranks for one IOC per subfile case */
if (file->fa.require_ioc) {
#ifdef VERBOSE
HDputs("Subfiling driver doesn't support file locking");
diff --git a/src/H5FDsubfiling/H5subfiling_common.c b/src/H5FDsubfiling/H5subfiling_common.c
index b75dd81..d83d8c5 100644
--- a/src/H5FDsubfiling/H5subfiling_common.c
+++ b/src/H5FDsubfiling/H5subfiling_common.c
@@ -19,30 +19,13 @@
#include "H5subfiling_common.h"
#include "H5subfiling_err.h"
+#include "H5MMprivate.h"
+
typedef struct { /* Format of a context map entry */
void *file_handle; /* key value (linear search of the cache) */
int64_t sf_context_id; /* The return value if matching file_handle */
} file_map_to_context_t;
-typedef struct stat_record {
- int64_t op_count; /* How many ops in total */
- double min; /* minimum (time) */
- double max; /* maximum (time) */
- double total; /* average (time) */
-} stat_record_t;
-
-/* Stat (OP) Categories */
-typedef enum stat_category {
- WRITE_STAT = 0,
- WRITE_WAIT,
- READ_STAT,
- READ_WAIT,
- FOPEN_STAT,
- FCLOSE_STAT,
- QUEUE_STAT,
- TOTAL_STAT_COUNT
-} stat_category_t;
-
/* Identifiers for HDF5's error API */
hid_t H5subfiling_err_stack_g = H5I_INVALID_HID;
hid_t H5subfiling_err_class_g = H5I_INVALID_HID;
@@ -55,31 +38,10 @@ static sf_topology_t *sf_topology_cache = NULL;
static size_t sf_context_cache_limit = 16;
static size_t sf_topology_cache_limit = 4;
-app_layout_t *sf_app_layout = NULL;
-
static file_map_to_context_t *sf_open_file_map = NULL;
static int sf_file_map_size = 0;
#define DEFAULT_FILE_MAP_ENTRIES 8
-/* Definitions for recording subfiling statistics */
-static stat_record_t subfiling_stats[TOTAL_STAT_COUNT];
-#define SF_WRITE_OPS (subfiling_stats[WRITE_STAT].op_count)
-#define SF_WRITE_TIME (subfiling_stats[WRITE_STAT].total / (double)subfiling_stats[WRITE_STAT].op_count)
-#define SF_WRITE_WAIT_TIME (subfiling_stats[WRITE_WAIT].total / (double)subfiling_stats[WRITE_WAIT].op_count)
-#define SF_READ_OPS (subfiling_stats[READ_STAT].op_count)
-#define SF_READ_TIME (subfiling_stats[READ_STAT].total / (double)subfiling_stats[READ_STAT].op_count)
-#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total / (double)subfiling_stats[READ_WAIT].op_count)
-#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total)
-
-int sf_verbose_flag = 0;
-
-#ifdef H5_SUBFILING_DEBUG
-char sf_logile_name[PATH_MAX];
-FILE *sf_logfile = NULL;
-
-static int sf_open_file_count = 0;
-#endif
-
static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context);
static herr_t H5_free_subfiling_topology(sf_topology_t *topology);
@@ -92,7 +54,7 @@ static herr_t init_subfiling_context(subfiling_context_t *sf_context,
sf_topology_t *app_topology, MPI_Comm file_comm);
static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags);
static herr_t record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index);
-static herr_t ioc_open_file(sf_work_request_t *msg, int file_acc_flags);
+static herr_t ioc_open_file(int64_t file_context_id, int file_acc_flags);
static herr_t generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char *filename_out,
size_t filename_out_len, char **filename_basename_out,
char **subfile_dir_out);
@@ -101,8 +63,6 @@ static herr_t create_config_file(subfiling_context_t *sf_context, const char *ba
static herr_t open_config_file(subfiling_context_t *sf_context, const char *base_filename,
const char *subfile_dir, const char *mode, FILE **config_file_out);
-static void initialize_statistics(void);
-static int numDigits(int n);
static int get_next_fid_map_index(void);
static void clear_fid_map_entry(void *file_handle, int64_t sf_context_id);
static int compare_hostid(const void *h1, const void *h2);
@@ -113,79 +73,6 @@ static herr_t gather_topology_info(sf_topology_t *info, MPI_Comm comm);
static int identify_ioc_ranks(sf_topology_t *info, int node_count, int iocs_per_node);
static inline void assign_ioc_ranks(sf_topology_t *app_topology, int ioc_count, int rank_multiple);
-static void
-initialize_statistics(void)
-{
- HDmemset(subfiling_stats, 0, sizeof(subfiling_stats));
-}
-
-static int
-numDigits(int n)
-{
- if (n < 0)
- n = (n == INT_MIN) ? INT_MAX : -n;
- if (n < 10)
- return 1;
- if (n < 100)
- return 2;
- if (n < 1000)
- return 3;
- if (n < 10000)
- return 4;
- if (n < 100000)
- return 5;
- if (n < 1000000)
- return 6;
- if (n < 10000000)
- return 7;
- if (n < 100000000)
- return 8;
- if (n < 1000000000)
- return 9;
- return 10;
-}
-
-/*-------------------------------------------------------------------------
- * Function: set_verbose_flag
- *
- * Purpose: For debugging purposes, I allow a verbose setting to
- * have printing of relevant information into an IOC specific
- * file that is opened as a result of enabling the flag
- * and closed when the verbose setting is disabled.
- *
- * Return: None
- * Errors: None
- *
- * Programmer: Richard Warren
- *
- * Changes: Initial Version/None.
- *-------------------------------------------------------------------------
- */
-void
-set_verbose_flag(int subfile_rank, int new_value)
-{
-#ifdef H5_SUBFILING_DEBUG
- sf_verbose_flag = (int)(new_value & 0x0FF);
- if (sf_verbose_flag) {
- char logname[64];
- HDsnprintf(logname, sizeof(logname), "ioc_%d.log", subfile_rank);
- if (sf_open_file_count > 1)
- sf_logfile = fopen(logname, "a+");
- else
- sf_logfile = fopen(logname, "w+");
- }
- else if (sf_logfile) {
- fclose(sf_logfile);
- sf_logfile = NULL;
- }
-#else
- (void)subfile_rank;
- (void)new_value;
-#endif
-
- return;
-}
-
static int
get_next_fid_map_index(void)
{
@@ -300,8 +187,9 @@ compare_hostid(const void *h1, const void *h2)
static herr_t
get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type, char **ioc_sel_info_str)
{
- char *opt_value = NULL;
- char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA);
+ char *opt_value = NULL;
+ char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA);
+ herr_t ret_value = SUCCEED;
HDassert(ioc_selection_type);
HDassert(ioc_sel_info_str);
@@ -323,31 +211,24 @@ get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_t
errno = 0;
check_value = HDstrtol(env_value, NULL, 0);
- if (errno == ERANGE) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
- " environment variable\n",
- __func__);
-#endif
-
- return FAIL;
- }
+ if (errno == ERANGE)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
+ "couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ " environment variable");
- if ((check_value < 0) || (check_value >= ioc_selection_options)) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
- " environment variable\n",
- __func__, check_value);
-#endif
-
- return FAIL;
- }
+ if ((check_value < 0) || (check_value >= ioc_selection_options))
+ H5_SUBFILING_GOTO_ERROR(
+ H5E_VFL, H5E_BADVALUE, FAIL,
+ "invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ " environment variable",
+ check_value);
*ioc_selection_type = (H5FD_subfiling_ioc_select_t)check_value;
*ioc_sel_info_str = opt_value;
}
- return SUCCEED;
+done:
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -379,6 +260,7 @@ count_nodes(sf_topology_t *info, MPI_Comm comm)
int hostid_index = -1;
int my_rank;
int mpi_code;
+ int ret_value = 0;
HDassert(info);
HDassert(info->app_layout);
@@ -386,20 +268,12 @@ count_nodes(sf_topology_t *info, MPI_Comm comm)
HDassert(info->app_layout->node_ranks);
HDassert(MPI_COMM_NULL != comm);
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &my_rank))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code);
-#endif
-
- return -1;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &my_rank)))
+ H5_SUBFILING_MPI_GOTO_ERROR(-1, "MPI_Comm_rank failed", mpi_code);
app_layout = info->app_layout;
node_count = app_layout->node_count;
- if (node_count == 0)
- gather_topology_info(info, comm);
-
nextid = app_layout->layout[0].hostid;
/* Possibly record my hostid_index */
if (app_layout->layout[0].rank == my_rank) {
@@ -428,7 +302,10 @@ count_nodes(sf_topology_t *info, MPI_Comm comm)
app_layout->node_count = node_count;
- return node_count;
+ ret_value = node_count;
+
+done:
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -455,6 +332,7 @@ gather_topology_info(sf_topology_t *info, MPI_Comm comm)
long hostid;
int sf_world_size;
int sf_world_rank;
+ herr_t ret_value = SUCCEED;
HDassert(info);
HDassert(info->app_layout);
@@ -477,18 +355,14 @@ gather_topology_info(sf_topology_t *info, MPI_Comm comm)
int mpi_code;
if (MPI_SUCCESS !=
- (mpi_code = MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: MPI_Allgather failed with rc %d\n", __func__, mpi_code);
-#endif
+ (mpi_code = MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code);
- return FAIL;
- }
-
- qsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid);
+ HDqsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid);
}
- return SUCCEED;
+done:
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -567,8 +441,10 @@ assign_ioc_ranks(sf_topology_t *app_topology, int ioc_count, int rank_multiple)
for (int k = 0, ioc_next = 0; ioc_next < ioc_count; ioc_next++) {
ioc_index = rank_multiple * k++;
io_concentrators[ioc_next] = (int)(app_layout->layout[ioc_index].rank);
- if (io_concentrators[ioc_next] == app_layout->world_rank)
- app_topology->rank_is_ioc = TRUE;
+ if (io_concentrators[ioc_next] == app_layout->world_rank) {
+ app_topology->subfile_rank = ioc_next;
+ app_topology->rank_is_ioc = TRUE;
+ }
}
app_topology->n_io_concentrators = ioc_count;
}
@@ -610,13 +486,6 @@ H5_new_subfiling_object_id(sf_obj_type_t obj_type, int64_t index_val)
* open at a time, then only a single subfiling context cache
* entry will be used.
*
- * Topologies are static, e.g. for any one I/O concentrator
- * allocation strategy, the results should always be the same.
- *
- * TODO: The one exception to this being the 1 IOC per N MPI
- * ranks strategy. The value of N can be changed on a per-file
- * basis, so we need to address that at some point.
- *
* Return: Pointer to underlying subfiling object if subfiling object
* ID is valid
*
@@ -636,14 +505,11 @@ H5_get_subfiling_object(int64_t object_id)
{
int64_t obj_type = (object_id >> 32) & 0x0FFFF;
int64_t obj_index = object_id & 0x0FFFF;
+ void *ret_value = NULL;
- if (obj_index < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid object index for subfiling object ID %" PRId64 "\n", __func__, object_id);
-#endif
-
- return NULL;
- }
+ if (obj_index < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL,
+ "invalid object index for subfiling object ID %" PRId64, object_id);
if (obj_type == SF_CONTEXT) {
/* Contexts provide information principally about
@@ -658,13 +524,9 @@ H5_get_subfiling_object(int64_t object_id)
/* Create subfiling context cache if it doesn't exist */
if (!sf_context_cache) {
- if (NULL == (sf_context_cache = HDcalloc(sf_context_cache_limit, sizeof(subfiling_context_t)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfiling context cache\n", __func__);
-#endif
-
- return NULL;
- }
+ if (NULL == (sf_context_cache = HDcalloc(sf_context_cache_limit, sizeof(subfiling_context_t))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL,
+ "couldn't allocate space for subfiling context cache");
}
/* Make more space in context cache if needed */
@@ -677,13 +539,9 @@ H5_get_subfiling_object(int64_t object_id)
sf_context_cache_limit *= 2;
if (NULL == (tmp_realloc = HDrealloc(sf_context_cache,
- sf_context_cache_limit * sizeof(subfiling_context_t)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfiling context cache\n", __func__);
-#endif
-
- return NULL;
- }
+ sf_context_cache_limit * sizeof(subfiling_context_t))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL,
+ "couldn't allocate space for subfiling context cache");
sf_context_cache = tmp_realloc;
@@ -698,13 +556,9 @@ H5_get_subfiling_object(int64_t object_id)
else if (obj_type == SF_TOPOLOGY) {
/* Create subfiling topology cache if it doesn't exist */
if (!sf_topology_cache) {
- if (NULL == (sf_topology_cache = HDcalloc(sf_topology_cache_limit, sizeof(sf_topology_t)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfiling topology cache\n", __func__);
-#endif
-
- return NULL;
- }
+ if (NULL == (sf_topology_cache = HDcalloc(sf_topology_cache_limit, sizeof(sf_topology_t))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL,
+ "couldn't allocate space for subfiling topology cache");
}
/* We will likely only cache a single topology
@@ -712,13 +566,9 @@ H5_get_subfiling_object(int64_t object_id)
* In that context, we will identify the number of
* nodes along with the number of MPI ranks on a node.
*/
- if ((size_t)obj_index >= sf_topology_cache_limit) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid object index for subfiling topology object ID\n", __func__);
-#endif
-
- return NULL;
- }
+ if ((size_t)obj_index >= sf_topology_cache_limit)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL,
+ "invalid object index for subfiling topology object ID");
/* Return direct pointer to the topology cache entry */
return (void *)&sf_topology_cache[obj_index];
@@ -728,7 +578,8 @@ H5_get_subfiling_object(int64_t object_id)
HDprintf("%s: Unknown subfiling object type for ID %" PRId64 "\n", __func__, object_id);
#endif
- return NULL;
+done:
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -746,23 +597,21 @@ H5_free_subfiling_object(int64_t object_id)
{
subfiling_context_t *sf_context = NULL;
int64_t obj_type = (object_id >> 32) & 0x0FFFF;
+ herr_t ret_value = SUCCEED;
- if (obj_type != SF_CONTEXT) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid subfiling object type for ID %" PRId64 "\n", __func__, object_id);
-#endif
-
- return FAIL;
- }
+ if (obj_type != SF_CONTEXT)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid subfiling object type for ID %" PRId64,
+ object_id);
- sf_context = H5_get_subfiling_object(object_id);
- if (!sf_context)
- return FAIL;
+ if (NULL == (sf_context = H5_get_subfiling_object(object_id)))
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
+ "couldn't get subfiling context for subfiling object ID");
if (H5_free_subfiling_object_int(sf_context) < 0)
- return FAIL;
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
- return SUCCEED;
+done:
+ H5_SUBFILING_FUNC_LEAVE;
}
static herr_t
@@ -858,15 +707,7 @@ H5_free_subfiling_topology(sf_topology_t *topology)
HDfree(topology->subfile_fd);
topology->subfile_fd = NULL;
- /*
- * The below assumes that the subfiling application layout
- * is retrieved once and used for subsequent file opens for
- * the duration that the Subfiling VFD is in use
- */
- HDassert(topology->app_layout == sf_app_layout);
-
-#if 0
- if (topology->app_layout && (topology->app_layout != sf_app_layout)) {
+ if (topology->app_layout) {
HDfree(topology->app_layout->layout);
topology->app_layout->layout = NULL;
@@ -875,7 +716,6 @@ H5_free_subfiling_topology(sf_topology_t *topology)
HDfree(topology->app_layout);
}
-#endif
topology->app_layout = NULL;
@@ -927,112 +767,55 @@ H5_open_subfiles(const char *base_filename, void *file_handle,
int mpi_code;
herr_t ret_value = SUCCEED;
- if (!base_filename) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid base filename\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (!subfiling_config) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid subfiling configuration pointer\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (!context_id_out) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: context_id_out is NULL\n", __func__);
-#endif
+ if (!base_filename)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling base filename");
- ret_value = FAIL;
- goto done;
- }
+ if (!subfiling_config)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling configuration");
- initialize_statistics();
-
-#if 0 /* TODO */
- /* Maybe set the verbose flag for more debugging info */
- envValue = HDgetenv("H5_SF_VERBOSE_FLAG");
- if (envValue != NULL) {
- int check_value = atoi(envValue);
- if (check_value > 0)
- sf_verbose_flag = 1;
- }
-#endif
+ if (!context_id_out)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling context ID pointer");
/* Initialize new subfiling context ID based on configuration information */
- if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't initialize subfiling context\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (init_subfiling(subfiling_config, file_comm, &context_id) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context");
/* Retrieve the subfiling object for the newly-created context ID */
- if (NULL == (sf_context = H5_get_subfiling_object(context_id))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get subfiling object from context ID\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (sf_context = H5_get_subfiling_object(context_id)))
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't get subfiling object from context ID");
/* Save some basic things in the new subfiling context */
sf_context->h5_file_handle = file_handle;
- if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't copy base HDF5 filename\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (sf_context->h5_filename = HDstrdup(base_filename)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate space for subfiling filename");
/*
* If we're actually using the IOCs, we will
* start the service threads on the identified
* ranks as part of the subfile opening.
*/
- if (open_subfile_with_context(sf_context, file_acc_flags) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't open subfiles\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (open_subfile_with_context(sf_context, file_acc_flags) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "couldn't open subfiling subfiles");
#ifdef H5_SUBFILING_DEBUG
{
struct tm *tm = NULL;
time_t cur_time;
int mpi_rank;
+ int mpi_code;
/* Open debugging logfile */
- if (MPI_SUCCESS != MPI_Comm_rank(file_comm, &mpi_rank)) {
- HDprintf("%s: couldn't get MPI rank\n", __func__);
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &mpi_rank)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code);
HDsnprintf(sf_context->sf_logfile_name, PATH_MAX, "%s.log.%d", sf_context->h5_filename, mpi_rank);
- if (NULL == (sf_context->sf_logfile = HDfopen(sf_context->sf_logfile_name, "a"))) {
- HDprintf("%s: couldn't open subfiling debug logfile\n", __func__);
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (sf_context->sf_logfile = HDfopen(sf_context->sf_logfile_name, "a")))
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL,
+ "couldn't open subfiling debug logfile");
cur_time = time(NULL);
tm = localtime(&cur_time);
@@ -1052,38 +835,24 @@ done:
* Form consensus on whether opening subfiles was
* successful
*/
- if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&l_errors, &g_errors, 1, MPI_INT, MPI_SUM, file_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("[%s %d]: MPI_Allreduce failed with rc %d\n", __func__,
- sf_context->topology->app_layout->world_rank, mpi_code);
-#endif
-
- ret_value = FAIL;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&l_errors, &g_errors, 1, MPI_INT, MPI_SUM, file_comm)))
+ H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Allreduce failed", mpi_code);
if (g_errors > 0) {
-#ifdef H5_SUBFILING_DEBUG
- if (sf_context->topology->app_layout->world_rank == 0) {
- HDprintf("%s: one or more IOC ranks couldn't open subfiles\n", __func__);
- }
-#endif
-
- ret_value = FAIL;
+ H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL,
+ "one or more IOC ranks couldn't open subfiles");
}
if (ret_value < 0) {
clear_fid_map_entry(file_handle, context_id);
- if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't free subfiling object\n", __func__);
-#endif
- }
+ if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
*context_id_out = -1;
}
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*
@@ -1120,48 +889,25 @@ init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm,
HDassert(file_index >= 0);
/* Use the file's index to create a new subfiling context ID */
- if ((context_id = H5_new_subfiling_object_id(SF_CONTEXT, file_index)) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create new subfiling context ID\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if ((context_id = H5_new_subfiling_object_id(SF_CONTEXT, file_index)) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't create new subfiling context ID");
/* Create a new subfiling context object with the created context ID */
- if (NULL == (new_context = H5_get_subfiling_object(context_id))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create new subfiling object\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (new_context = H5_get_subfiling_object(context_id)))
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't create new subfiling object");
/*
* Setup the application topology information, including the computed
* number and distribution map of the set of I/O concentrators
*/
- if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't initialize application topology\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize application topology");
new_context->sf_context_id = context_id;
- if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't initialize subfiling topology object\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
+ "couldn't initialize subfiling application topology object");
new_context->sf_base_addr = 0;
if (new_context->topology->rank_is_ioc) {
@@ -1175,14 +921,11 @@ done:
if (ret_value < 0) {
HDfree(app_topology);
- if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't free subfiling object\n", __func__);
-#endif
- }
+ if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
}
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -1224,14 +967,13 @@ static herr_t
init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
sf_topology_t **app_topology_out)
{
- sf_topology_t *app_topology = NULL;
- app_layout_t *app_layout = sf_app_layout;
- char *env_value = NULL;
- char *ioc_sel_str = NULL;
- int *io_concentrators = NULL;
- long ioc_select_val = -1;
- long iocs_per_node = 1;
- int ioc_count = 0;
+ sf_topology_t *app_topology = NULL;
+ app_layout_t *app_layout = NULL;
+ char *env_value = NULL;
+ char *ioc_sel_str = NULL;
+ long ioc_select_val = -1;
+ long iocs_per_node = 1;
+ int ioc_count = 0;
int comm_rank;
int comm_size;
int mpi_code;
@@ -1241,33 +983,16 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
HDassert(app_topology_out);
HDassert(!*app_topology_out);
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &comm_rank))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code);
-#endif
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &comm_rank)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code);
- ret_value = FAIL;
- goto done;
- }
-
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(comm, &comm_size))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get MPI communicator size; rc = %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(comm, &comm_size)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mpi_code);
/* Check if an IOC selection type was specified by environment variable */
- if (get_ioc_selection_criteria_from_env(&ioc_selection_type, &ioc_sel_str) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get IOC selection type from environment\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (get_ioc_selection_criteria_from_env(&ioc_selection_type, &ioc_sel_str) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
+ "couldn't get IOC selection type from environment");
/* Sanity checking on different IOC selection strategies */
switch (ioc_selection_type) {
@@ -1318,67 +1043,29 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
}
/* Allocate new application topology information object */
- if (NULL == (app_topology = HDcalloc(1, sizeof(*app_topology)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create new subfiling topology object\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (app_topology = HDcalloc(1, sizeof(*app_topology))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't create new subfiling topology object");
app_topology->subfile_rank = -1;
app_topology->selection_type = ioc_selection_type;
- if (NULL == (app_topology->io_concentrators = HDcalloc((size_t)comm_size, sizeof(int)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate array of I/O concentrator ranks\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- io_concentrators = app_topology->io_concentrators;
- HDassert(io_concentrators);
+ if (NULL == (app_topology->io_concentrators = HDcalloc((size_t)comm_size, sizeof(int))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate array of I/O concentrator ranks");
if (!app_layout) {
- /* TODO: this is dangerous if a new comm size is greater than what
- * was allocated. Can't reuse app layout.
- */
+ if (NULL == (app_layout = HDcalloc(1, sizeof(*app_layout))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate application layout structure");
- if (NULL == (app_layout = HDcalloc(1, sizeof(*app_layout)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate application layout structure\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (NULL == (app_layout->node_ranks = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(int)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate application layout node rank array\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (NULL == (app_layout->layout = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(layout_t)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate application layout array\n", __func__);
-#endif
+ if (NULL == (app_layout->node_ranks = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(int))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate application layout node rank array");
- ret_value = FAIL;
- goto done;
- }
-
- /*
- * Once the application layout has been filled once, any additional
- * file open operations won't be required to gather that information.
- */
- sf_app_layout = app_layout;
+ if (NULL == (app_layout->layout = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(layout_t))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate application layout array");
}
app_layout->world_size = comm_size;
@@ -1386,6 +1073,8 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
app_topology->app_layout = app_layout;
+ gather_topology_info(app_topology, comm);
+
/*
* Determine which ranks are I/O concentrator ranks, based on the
* given IOC selection strategy and MPI information.
@@ -1396,17 +1085,11 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
app_topology->selection_type = SELECT_IOC_ONE_PER_NODE;
- if ((node_count = count_nodes(app_topology, comm)) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't determine number of nodes used\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if ((node_count = count_nodes(app_topology, comm)) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
+ "couldn't determine number of nodes used");
/* Check for an IOC-per-node value set in the environment */
- /* TODO: should this env. var. be interpreted for other selection types? */
if ((env_value = HDgetenv(H5FD_SUBFILING_IOC_PER_NODE))) {
errno = 0;
ioc_select_val = HDstrtol(env_value, NULL, 0);
@@ -1465,11 +1148,7 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
case SELECT_IOC_WITH_CONFIG:
default:
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid IOC selection strategy\n", __func__);
-#endif
- ret_value = FAIL;
- goto done;
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid IOC selection strategy");
break;
}
@@ -1480,14 +1159,9 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
* Create a vector of "potential" file descriptors
* which can be indexed by the IOC ID
*/
- if (NULL == (app_topology->subfile_fd = HDcalloc((size_t)ioc_count, sizeof(int)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate subfile file descriptor array\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (app_topology->subfile_fd = HDcalloc((size_t)ioc_count, sizeof(int))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate subfile file descriptor array");
*app_topology_out = app_topology;
@@ -1505,7 +1179,7 @@ done:
}
}
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -1576,15 +1250,9 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co
errno = 0;
stripe_size = HDstrtoll(env_value, NULL, 0);
- if (ERANGE == errno) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid stripe size setting '%s' for " H5FD_SUBFILING_STRIPE_SIZE "\n", __func__,
- env_value);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (ERANGE == errno)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL,
+ "invalid stripe size setting for " H5FD_SUBFILING_STRIPE_SIZE);
if (stripe_size > 0) {
sf_context->sf_stripe_size = (int64_t)stripe_size;
@@ -1599,14 +1267,8 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co
/* Check for a subfile name prefix setting in the environment */
if ((env_value = HDgetenv(H5FD_SUBFILING_SUBFILE_PREFIX))) {
- if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't copy subfile prefix value\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile prefix value");
}
/*
@@ -1614,124 +1276,44 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co
* to/from IOC ranks
*/
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &comm_rank))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_msg_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create sub-communicator for IOC messages; rc = %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_msg_comm, MPI_ERRORS_RETURN))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't set MPI error handler on IOC message sub-communicator; rc = %d\n", __func__,
- mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_data_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create sub-communicator for IOC data; rc = %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_data_comm, MPI_ERRORS_RETURN))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't set MPI error handler on IOC data sub-communicator; rc = %d\n", __func__,
- mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &comm_rank)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code);
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_eof_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create sub-communicator for IOC EOF; rc = %d\n", __func__, mpi_code);
-#endif
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_msg_comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code);
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_msg_comm, MPI_ERRORS_RETURN)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code);
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_eof_comm, MPI_ERRORS_RETURN))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't set MPI error handler on IOC EOF sub-communicator; rc = %d\n", __func__,
- mpi_code);
-#endif
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_data_comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code);
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_data_comm, MPI_ERRORS_RETURN)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code);
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_barrier_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create sub-communicator for barriers; rc = %d\n", __func__, mpi_code);
-#endif
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_eof_comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code);
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_eof_comm, MPI_ERRORS_RETURN)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code);
- if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_barrier_comm, MPI_ERRORS_RETURN))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't set MPI error handler on barrier sub-communicator; rc = %d\n", __func__,
- mpi_code);
-#endif
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_barrier_comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code);
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_barrier_comm, MPI_ERRORS_RETURN)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code);
/* Create an MPI sub-communicator for IOC ranks */
if (app_topology->n_io_concentrators > 1) {
if (MPI_SUCCESS != (mpi_code = MPI_Comm_split(file_comm, app_topology->rank_is_ioc, comm_rank,
- &sf_context->sf_group_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't create sub-communicator for IOC ranks; rc = %d\n", __func__, mpi_code);
-#endif
+ &sf_context->sf_group_comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_split failed", mpi_code);
- ret_value = FAIL;
- goto done;
- }
-
- if (MPI_SUCCESS !=
- (mpi_code = MPI_Comm_rank(sf_context->sf_group_comm, &sf_context->sf_group_rank))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get MPI rank from IOC rank sub-communicator; rc = %d\n", __func__,
- mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (MPI_SUCCESS !=
- (mpi_code = MPI_Comm_size(sf_context->sf_group_comm, &sf_context->sf_group_size))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get MPI comm size from IOC rank sub-communicator; rc = %d\n", __func__,
- mpi_code);
-#endif
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(sf_context->sf_group_comm, &sf_context->sf_group_rank)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code);
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(sf_context->sf_group_comm, &sf_context->sf_group_size)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mpi_code);
}
done:
@@ -1739,7 +1321,7 @@ done:
H5_free_subfiling_object_int(sf_context);
}
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -1779,68 +1361,35 @@ done:
static herr_t
open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
{
- double start_time;
herr_t ret_value = SUCCEED;
HDassert(sf_context);
- start_time = MPI_Wtime();
-
/*
* Save the HDF5 file ID (fid) to subfile context mapping.
* There shouldn't be any issue, but check the status and
* return if there was a problem.
*/
- if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't record HDF5 file ID to subfile context mapping\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
+ "couldn't record HDF5 file ID to subfile context mapping");
/*
* If this rank is an I/O concentrator, actually open
* the subfile belonging to this IOC rank
*/
if (sf_context->topology->rank_is_ioc) {
- sf_work_request_t msg = {{file_acc_flags, (int64_t)sf_context->h5_file_id, sf_context->sf_context_id},
- OPEN_OP,
- sf_context->topology->app_layout->world_rank,
- sf_context->topology->subfile_rank,
- sf_context->sf_context_id,
- start_time,
- NULL,
- 0,
- 0,
- 0,
- 0};
- h5_stat_t st;
+ h5_stat_t st;
/* Retrieve Inode value for HDF5 stub file */
- if (HDstat(sf_context->h5_filename, &st) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("[%s %d]: couldn't stat file %s\n", __func__,
- sf_context->topology->app_layout->world_rank, sf_context->h5_filename);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDstat(sf_context->h5_filename, &st) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't stat HDF5 stub file");
HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t));
sf_context->h5_file_id = (uint64_t)st.st_ino;
- if (ioc_open_file(&msg, file_acc_flags) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("[%s %d]: couldn't open subfile\n", __func__,
- sf_context->topology->app_layout->world_rank);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (ioc_open_file(sf_context->sf_context_id, file_acc_flags) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile");
}
done:
@@ -1848,7 +1397,7 @@ done:
clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id);
}
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -1889,14 +1438,8 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i
if (sf_file_map_size == 0) {
if (NULL ==
- (sf_open_file_map = HDmalloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map)))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate open file map\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ (sf_open_file_map = HDmalloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping");
sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES;
for (int i = 0; i < sf_file_map_size; i++) {
@@ -1925,14 +1468,9 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i
void *tmp_realloc;
if (NULL == (tmp_realloc = HDrealloc(sf_open_file_map,
- ((size_t)(sf_file_map_size * 2) * sizeof(*sf_open_file_map))))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't reallocate open file map\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ ((size_t)(sf_file_map_size * 2) * sizeof(*sf_open_file_map)))))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't reallocate open file mapping");
sf_open_file_map = tmp_realloc;
sf_file_map_size *= 2;
@@ -1950,7 +1488,7 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i
}
done:
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -1992,10 +1530,9 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
+ioc_open_file(int64_t file_context_id, int file_acc_flags)
{
- subfiling_context_t *sf_context = NULL;
- int64_t file_context_id;
+ subfiling_context_t *sf_context = NULL;
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
char *filepath = NULL;
char *subfile_dir = NULL;
@@ -2003,51 +1540,24 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
int fd = -1;
herr_t ret_value = SUCCEED;
- HDassert(msg);
-
- /* Retrieve subfiling context ID from RPC message */
- file_context_id = msg->header[2];
-
- if (NULL == (sf_context = H5_get_subfiling_object(file_context_id))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get subfiling object from context ID\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (sf_context = H5_get_subfiling_object(file_context_id)))
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL,
+ "couldn't get subfiling object from context ID");
/* Only IOC ranks should be here */
HDassert(sf_context->topology);
HDassert(sf_context->topology->subfile_rank >= 0);
- if (NULL == (filepath = HDcalloc(1, PATH_MAX))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfile filename\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (filepath = HDcalloc(1, PATH_MAX)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate space for subfile filename");
/* Generate the name of the subfile that this IOC rank will open */
- if (generate_subfile_name(sf_context, file_acc_flags, filepath, PATH_MAX, &base, &subfile_dir) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't generate name for subfile\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (generate_subfile_name(sf_context, file_acc_flags, filepath, PATH_MAX, &base, &subfile_dir) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "couldn't generate name for subfile");
- if (NULL == (sf_context->sf_filename = HDstrdup(filepath))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't copy subfile name\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (sf_context->sf_filename = HDstrdup(filepath)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile name");
/* Attempt to create/open the subfile for this IOC rank */
if ((fd = HDopen(filepath, file_acc_flags, mode)) < 0)
@@ -2080,11 +1590,11 @@ done:
}
}
- HDfree(base);
- HDfree(subfile_dir);
+ H5MM_free(base);
+ H5MM_free(subfile_dir);
HDfree(filepath);
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*
@@ -2134,28 +1644,16 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
*/
n_io_concentrators = sf_context->topology->n_io_concentrators;
- if (NULL == (prefix = HDmalloc(PATH_MAX))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfile prefix\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (prefix = HDmalloc(PATH_MAX)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate space for subfile prefix");
/* Under normal operation, we co-locate subfiles with the HDF5 file */
- HDstrncpy(prefix, sf_context->h5_filename, PATH_MAX);
+ HDstrncpy(prefix, sf_context->h5_filename, PATH_MAX - 1);
+ prefix[PATH_MAX - 1] = '\0';
- base = basename(prefix);
-
- if (NULL == (*filename_basename_out = HDstrdup(base))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfile basename\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (H5_basename(prefix, &base) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't get subfile basename");
if (sf_context->subfile_prefix) {
/* Note: Users may specify a directory name which is inaccessible
@@ -2165,28 +1663,12 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* if so, we could default to creating the subfiles in the
* current directory. (?)
*/
- if (NULL == (*subfile_dir_out = HDstrdup(sf_context->subfile_prefix))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't copy subfile prefix\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- subfile_dir = *subfile_dir_out;
+ if (NULL == (subfile_dir = H5MM_strdup(sf_context->subfile_prefix)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile prefix");
}
else {
- subfile_dir = dirname(prefix);
-
- if (NULL == (*subfile_dir_out = HDstrdup(subfile_dir))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't copy subfile prefix\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (H5_dirname(prefix, &subfile_dir) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't get subfile prefix");
}
/*
@@ -2194,14 +1676,9 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* we aren't truncating the file.
*/
if (0 == (file_acc_flags & O_TRUNC)) {
- if (open_config_file(sf_context, base, subfile_dir, "r", &config_file) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't open existing subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (open_config_file(sf_context, base, subfile_dir, "r", &config_file) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL,
+ "couldn't open existing subfiling configuration file");
}
/*
@@ -2210,14 +1687,9 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* in order to generate the correct subfile names.
*/
if (config_file) {
- if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't read from subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL,
+ "couldn't read from subfiling configuration file");
}
/*
@@ -2231,34 +1703,36 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* and the configuration file will be named:
* ABC.h5.subfile_<file-number>.config
*/
- num_digits = numDigits(n_io_concentrators);
+ num_digits = (int)(HDlog10(n_io_concentrators) + 1);
HDsnprintf(filename_out, filename_out_len, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, subfile_dir, base,
sf_context->h5_file_id, num_digits, sf_context->topology->subfile_rank + 1,
n_io_concentrators);
-done:
- if (config_file && (EOF == HDfclose(config_file))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__);
-#endif
+ *filename_basename_out = base;
+ *subfile_dir_out = subfile_dir;
- ret_value = FAIL;
- }
+done:
+ if (config_file && (EOF == HDfclose(config_file)))
+ H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
+ "couldn't close subfiling configuration file");
if (ret_value < 0) {
+ H5MM_free(subfile_dir);
+ H5MM_free(base);
+
if (*filename_basename_out) {
- HDfree(*filename_basename_out);
+ H5MM_free(*filename_basename_out);
*filename_basename_out = NULL;
}
if (*subfile_dir_out) {
- HDfree(*subfile_dir_out);
+ H5MM_free(*subfile_dir_out);
*subfile_dir_out = NULL;
}
}
HDfree(prefix);
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -2292,33 +1766,18 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c
HDassert(base_filename);
HDassert(subfile_dir);
- if (sf_context->h5_file_id == UINT64_MAX) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid HDF5 file ID %" PRIu64 "\n", __func__, sf_context->h5_file_id);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
- if (*base_filename == '\0') {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid base HDF5 filename %s\n", __func__, base_filename);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (sf_context->h5_file_id == UINT64_MAX)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid HDF5 file ID %" PRIu64,
+ sf_context->h5_file_id);
+ if (*base_filename == '\0')
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid base HDF5 filename '%s'",
+ base_filename);
if (*subfile_dir == '\0')
subfile_dir = ".";
- if (NULL == (config_filename = HDmalloc(PATH_MAX))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfiling configuration file filename\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (config_filename = HDmalloc(PATH_MAX)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate space for subfiling configuration filename");
HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir,
base_filename, sf_context->h5_file_id);
@@ -2329,14 +1788,9 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c
config_file_exists = (ret == 0) || ((ret < 0) && (ENOENT != errno));
- if (config_file_exists && (ret != 0)) {
-#ifdef H5_SUBFILING_DEBUG
- HDperror("couldn't check existence of configuration file");
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (config_file_exists && (ret != 0))
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL,
+ "couldn't check existence of subfiling configuration file");
/*
* If a config file doesn't exist, create one. If a
@@ -2349,100 +1803,61 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c
int n_io_concentrators = sf_context->topology->n_io_concentrators;
int num_digits;
- if (NULL == (config_file = HDfopen(config_filename, "w+"))) {
-#ifdef H5_SUBFILING_DEBUG
- HDperror("couldn't open subfiling configuration file");
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (NULL == (line_buf = HDmalloc(PATH_MAX))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate buffer for writing to subfiling configuration file\n", __func__);
-#endif
+ if (NULL == (config_file = HDfopen(config_filename, "w+")))
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL,
+ "couldn't open subfiling configuration file");
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (line_buf = HDmalloc(PATH_MAX)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate buffer for writing to subfiling configuration file");
/* Write the subfiling stripe size to the configuration file */
HDsnprintf(line_buf, PATH_MAX, "stripe_size=%" PRId64 "\n", sf_context->sf_stripe_size);
- if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL,
+ "failed to write to subfiling configuration file");
/* Write the number of I/O concentrators to the configuration file */
HDsnprintf(line_buf, PATH_MAX, "aggregator_count=%d\n", n_io_concentrators);
- if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL,
+ "failed to write to subfiling configuration file");
/* Write the base HDF5 filename to the configuration file */
HDsnprintf(line_buf, PATH_MAX, "hdf5_file=%s\n", sf_context->h5_filename);
- if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL,
+ "failed to write to subfiling configuration file");
/* Write the optional subfile directory prefix to the configuration file */
HDsnprintf(line_buf, PATH_MAX, "subfile_dir=%s\n", subfile_dir);
- if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL,
+ "failed to write to subfiling configuration file");
/* Write out each subfile name to the configuration file */
- num_digits = numDigits(n_io_concentrators);
+ num_digits = (int)(HDlog10(n_io_concentrators) + 1);
for (int k = 0; k < n_io_concentrators; k++) {
HDsnprintf(line_buf, PATH_MAX, "%s" H5FD_SUBFILING_FILENAME_TEMPLATE "\n", base_filename,
sf_context->h5_file_id, num_digits, k + 1, n_io_concentrators);
- if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL,
+ "failed to write to subfiling configuration file");
}
}
done:
if (config_file) {
- if (EOF == HDfclose(config_file)) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- }
+ if (EOF == HDfclose(config_file))
+ H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
+ "couldn't close subfiling configuration file");
}
HDfree(line_buf);
HDfree(config_filename);
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -2477,33 +1892,18 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con
*config_file_out = NULL;
- if (sf_context->h5_file_id == UINT64_MAX) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid HDF5 file ID %" PRIu64 "\n", __func__, sf_context->h5_file_id);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
- if (*base_filename == '\0') {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid base HDF5 filename %s\n", __func__, base_filename);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (sf_context->h5_file_id == UINT64_MAX)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid HDF5 file ID %" PRIu64,
+ sf_context->h5_file_id);
+ if (*base_filename == '\0')
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid base HDF5 filename '%s'",
+ base_filename);
if (*subfile_dir == '\0')
subfile_dir = ".";
- if (NULL == (config_filename = HDmalloc(PATH_MAX))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for subfiling configuration file filename\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (config_filename = HDmalloc(PATH_MAX)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate space for subfiling configuration filename");
HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir,
base_filename, sf_context->h5_file_id);
@@ -2517,40 +1917,26 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con
if (!config_file_exists)
goto done;
- if (config_file_exists && (ret != 0)) {
-#ifdef H5_SUBFILING_DEBUG
- HDperror("couldn't check existence of configuration file");
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (NULL == (config_file = HDfopen(config_filename, mode))) {
-#ifdef H5_SUBFILING_DEBUG
- HDperror("couldn't open subfiling configuration file");
-#endif
+ if (config_file_exists && (ret != 0))
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL,
+ "couldn't check existence of subfiling configuration file");
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (config_file = HDfopen(config_filename, mode)))
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL,
+ "couldn't open subfiling configuration file");
*config_file_out = config_file;
done:
if (ret_value < 0) {
- if (config_file && (EOF == HDfclose(config_file))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- }
+ if (config_file && (EOF == HDfclose(config_file)))
+ H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
+ "couldn't close subfiling configuration file");
}
HDfree(config_filename);
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -2575,79 +1961,42 @@ H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators)
HDassert(config_file);
HDassert(n_io_concentrators);
- if (HDfseek(config_file, 0, SEEK_END) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, errno);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if ((config_file_len = HDftell(config_file)) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (HDfseek(config_file, 0, SEEK_SET) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't seek to beginning of subfiling configuration file; errno = %d\n", __func__,
- errno);
-#endif
+ if (HDfseek(config_file, 0, SEEK_END) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL,
+ "couldn't seek to end of subfiling configuration file");
- ret_value = FAIL;
- goto done;
- }
+ if ((config_file_len = HDftell(config_file)) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL,
+ "couldn't get size of subfiling configuration file");
- if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__);
-#endif
+ if (HDfseek(config_file, 0, SEEK_SET) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL,
+ "couldn't seek to beginning of subfiling configuration file");
- ret_value = FAIL;
- goto done;
- }
+ if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "couldn't allocate space for reading from subfiling configuration file");
- if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_READERROR, FAIL,
+ "couldn't read from subfiling configuration file");
config_buf[config_file_len] = '\0';
- if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n",
- __func__);
-#endif
+ if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count")))
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL,
+ "malformed subfiling configuration file - no aggregator count entry");
- ret_value = FAIL;
- goto done;
- }
+ if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs))
+ H5_SUBFILING_SYS_GOTO_ERROR(
+ H5E_FILE, H5E_CANTGET, FAIL,
+ "couldn't get number of I/O concentrators from subfiling configuration file");
- if (read_n_io_concs <= 0) {
- HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n",
- __func__, read_n_io_concs);
- ret_value = FAIL;
- goto done;
- }
+ if (read_n_io_concs <= 0)
+ H5_SUBFILING_GOTO_ERROR(
+ H5E_FILE, H5E_BADVALUE, FAIL,
+ "invalid number of I/O concentrators (%d) read from subfiling configuration file",
+ read_n_io_concs);
*n_io_concentrators = read_n_io_concs;
@@ -2703,77 +2052,44 @@ H5_close_subfiles(int64_t subfiling_context_id)
{
subfiling_context_t *sf_context = NULL;
MPI_Request barrier_req = MPI_REQUEST_NULL;
-#ifdef H5_SUBFILING_DEBUG
- double t0 = 0.0;
- double t1 = 0.0;
- double t2 = 0.0;
-#endif
- int mpi_code;
- herr_t ret_value = SUCCEED;
-
-#ifdef H5_SUBFILING_DEBUG
- t0 = MPI_Wtime();
-#endif
-
- if (NULL == (sf_context = H5_get_subfiling_object(subfiling_context_id))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get subfiling object from context ID\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ int mpi_code;
+ herr_t ret_value = SUCCEED;
- /* We make the subfile close operation collective.
- * Otherwise, there may be a race condition between
- * our closing the subfiles and the user application
- * moving ahead and possibly re-opening a file.
- *
- * If we can, we utilize an async barrier which gives
- * us the opportunity to reduce the CPU load due to
- * MPI spinning while waiting for the barrier to
- * complete. This is especially important if there
- * is heavy thread utilization due to subfiling
- * activities, i.e. the thread pool might be
- * extremely busy servicing I/O requests from all
- * HDF5 application ranks.
- */
+ if (NULL == (sf_context = H5_get_subfiling_object(subfiling_context_id)))
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "couldn't get subfiling object from context ID");
+
+ /* We make the subfile close operation collective.
+ * Otherwise, there may be a race condition between
+ * our closing the subfiles and the user application
+ * moving ahead and possibly re-opening a file.
+ *
+ * If we can, we utilize an async barrier which gives
+ * us the opportunity to reduce the CPU load due to
+ * MPI spinning while waiting for the barrier to
+ * complete. This is especially important if there
+ * is heavy thread utilization due to subfiling
+ * activities, i.e. the thread pool might be
+ * extremely busy servicing I/O requests from all
+ * HDF5 application ranks.
+ */
#if MPI_VERSION > 3 || (MPI_VERSION == 3 && MPI_SUBVERSION >= 1)
{
int barrier_complete = 0;
- if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: MPI_Ibarrier failed with rc %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code);
while (!barrier_complete) {
useconds_t t_delay = 5;
usleep(t_delay);
- if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: MPI_Test failed with rc %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code);
}
}
#else
- if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: MPI_Barrier failed with rc %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
#endif
/* The map from file handle to subfiling context can now be cleared */
@@ -2784,49 +2100,11 @@ H5_close_subfiles(int64_t subfiling_context_id)
if (sf_context->topology->rank_is_ioc) {
if (sf_context->sf_fid >= 0) {
errno = 0;
- if (HDclose(sf_context->sf_fid) < 0) {
- HDperror("H5_close_subfiles - couldn't close subfile");
-
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't close subfile\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (HDclose(sf_context->sf_fid) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "couldn't close subfile");
sf_context->sf_fid = -1;
}
-
-#ifdef H5_SUBFILING_DEBUG
- /* FIXME: If we've had multiple files open, our statistics
- * will be messed up!
- */
- if (sf_verbose_flag) {
- t1 = t2;
- if (sf_logfile != NULL) {
- if (SF_WRITE_OPS > 0)
- HDfprintf(
- sf_logfile,
- "[%d] pwrite perf: wrt_ops=%ld wait=%lf pwrite=%lf IOC_shutdown = %lf seconds\n",
- sf_context->sf_group_rank, SF_WRITE_OPS, SF_WRITE_WAIT_TIME, SF_WRITE_TIME,
- (t1 - t0));
- if (SF_READ_OPS > 0)
- HDfprintf(sf_logfile,
- "[%d] pread perf: read_ops=%ld wait=%lf pread=%lf IOC_shutdown = %lf seconds\n",
- sf_context->sf_group_rank, SF_READ_OPS, SF_READ_WAIT_TIME, SF_READ_TIME,
- (t1 - t0));
-
- HDfprintf(sf_logfile, "[%d] Avg queue time=%lf seconds\n", sf_context->sf_group_rank,
- SF_QUEUE_DELAYS / (double)(SF_WRITE_OPS + SF_READ_OPS));
-
- HDfflush(sf_logfile);
-
- HDfclose(sf_logfile);
- sf_logfile = NULL;
- }
- }
-#endif
}
/*
@@ -2838,50 +2116,27 @@ H5_close_subfiles(int64_t subfiling_context_id)
{
int barrier_complete = 0;
- if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: MPI_Ibarrier failed with rc %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code);
while (!barrier_complete) {
useconds_t t_delay = 5;
usleep(t_delay);
- if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: MPI_Test failed with rc %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code);
}
}
#else
- if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: MPI_Barrier failed with rc %d\n", __func__, mpi_code);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
+ if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
#endif
done:
- if (sf_context && H5_free_subfiling_object_int(sf_context) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't free subfiling context object\n", __func__);
-#endif
+ if (sf_context && H5_free_subfiling_object_int(sf_context) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "couldn't free subfiling context object");
- ret_value = FAIL;
- }
-
- return ret_value;
+ H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
@@ -2904,13 +2159,10 @@ done:
int64_t
H5_subfile_fhandle_to_context(void *file_handle)
{
- if (!sf_open_file_map) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: open file map is invalid\n", __func__);
-#endif
+ int64_t ret_value = -1;
- return -1;
- }
+ if (!sf_open_file_map)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, -1, "open file map is NULL");
for (int i = 0; i < sf_file_map_size; i++) {
if (sf_open_file_map[i].file_handle == file_handle) {
@@ -2918,7 +2170,8 @@ H5_subfile_fhandle_to_context(void *file_handle)
}
}
- return -1;
+done:
+ H5_SUBFILING_FUNC_LEAVE;
} /* end H5_subfile_fhandle_to_context() */
#ifdef H5_SUBFILING_DEBUG
diff --git a/src/H5FDsubfiling/H5subfiling_common.h b/src/H5FDsubfiling/H5subfiling_common.h
index 3195c9d..6e2965f 100644
--- a/src/H5FDsubfiling/H5subfiling_common.h
+++ b/src/H5FDsubfiling/H5subfiling_common.h
@@ -23,6 +23,7 @@
#include "H5Iprivate.h"
#include "H5FDsubfiling.h"
+#include "H5FDioc.h"
/*
* Some definitions for debugging the Subfiling feature
@@ -189,25 +190,15 @@ typedef struct {
*/
typedef struct {
/* {Datasize, Offset, FileID} */
- int64_t header[3]; /* The basic RPC input plus */
- int tag; /* the supplied OPCODE tag */
- int source; /* Rank of who sent the message */
- int subfile_rank; /* The IOC rank */
- int64_t context_id; /* context to be used to complete */
- double start_time; /* the request, + time of receipt */
- /* from which we calc Time(queued) */
- void *buffer; /* for writes, we keep the buffer */
- /* around for awhile... */
- volatile int in_progress; /* Not used! */
- volatile int serialize; /* worker thread needs to wait while true */
- volatile int dependents; //* If current work item has dependents */
- int depend_id; /* work queue index of the dependent */
+ int64_t header[3]; /* The basic RPC input plus */
+ int tag; /* the supplied OPCODE tag */
+ int source; /* Rank of who sent the message */
+ int subfile_rank; /* The IOC rank */
+ int64_t context_id; /* context to be used to complete */
+ double start_time; /* the request, + time of receipt */
+ /* from which we calc Time(queued) */
} sf_work_request_t;
-extern int sf_verbose_flag;
-
-extern app_layout_t *sf_app_layout;
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -225,8 +216,6 @@ H5_DLL herr_t H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_con
H5_DLL void H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...);
-void set_verbose_flag(int subfile_rank, int new_value);
-
#ifdef __cplusplus
}
#endif
diff --git a/testpar/t_vfd.c b/testpar/t_vfd.c
index f5f0267..9b47f07 100644
--- a/testpar/t_vfd.c
+++ b/testpar/t_vfd.c
@@ -342,11 +342,11 @@ setup_vfd_test_file(int file_name_id, char *file_name, int mpi_size, H5FD_mpio_x
/* shared_cfg = */ shared_conf,
};
H5FD_ioc_config_t ioc_config = {
- /* magic = */ H5FD_IOC_FAPL_MAGIC,
- /* version = */ H5FD_IOC_CURR_FAPL_VERSION,
- /* under_fapl_id = */ H5P_DEFAULT,
- /* thread_pool_count = */ H5FD_IOC_DEFAULT_THREAD_POOL_SIZE,
- /* subf_config = */ shared_conf,
+ /* magic = */ H5FD_IOC_FAPL_MAGIC,
+ /* version = */ H5FD_IOC_CURR_FAPL_VERSION,
+ /* under_fapl_id = */ H5P_DEFAULT,
+ /* thread_pool_size = */ H5FD_IOC_DEFAULT_THREAD_POOL_SIZE,
+ /* subf_config = */ shared_conf,
};
hid_t ioc_fapl = H5I_INVALID_HID;