summaryrefslogtreecommitdiffstats
path: root/src/H5FDsubfiling/H5FDsubfiling.h
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2022-08-04 17:56:48 (GMT)
committerGitHub <noreply@github.com>2022-08-04 17:56:48 (GMT)
commitbf07e0f2c9b381509abbde59fca8bea5445da261 (patch)
tree69551f0ec6658cc4e970bf1080fa4c5b256b289f /src/H5FDsubfiling/H5FDsubfiling.h
parenta71534fcc248737491adcfd770c7ab69b4adc2d4 (diff)
downloadhdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.zip
hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.gz
hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.bz2
Subfiling updates for release (#1963)
* Remove generated file h5fuse.sh * Link pthreads library when Subfiling VFD is built * Switch to MPI I/O driver for Subfiling HDF5 stub file * Rough first implementation for Subfiling file deletion * Subfiling VFD - get file dirname for file deletion * Subfiling VFD - set lock callback to NULL for now to avoid performance issues * Committing clang-format changes * Minor tidying up of Subfiling testing * Fixups for Subfiling VFD support in tools * Tidy up Subfiling public interface and add Doxygen * Respect Subfiling configuration settings from application * Add release note for Subfiling VFD * Committing clang-format changes * Committing clang-format changes * Shorten some Subfiling environment variable names Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Diffstat (limited to 'src/H5FDsubfiling/H5FDsubfiling.h')
-rw-r--r--src/H5FDsubfiling/H5FDsubfiling.h358
1 files changed, 258 insertions, 100 deletions
diff --git a/src/H5FDsubfiling/H5FDsubfiling.h b/src/H5FDsubfiling/H5FDsubfiling.h
index 3de5155..3bc448b 100644
--- a/src/H5FDsubfiling/H5FDsubfiling.h
+++ b/src/H5FDsubfiling/H5FDsubfiling.h
@@ -14,120 +14,240 @@
#ifndef H5FDsubfiling_H
#define H5FDsubfiling_H
-#include "H5FDioc.h"
-
#ifdef H5_HAVE_SUBFILING_VFD
+/**
+ * \def H5FD_SUBFILING
+ * Macro that returns the identifier for the #H5FD_SUBFILING driver. \hid_t{file driver}
+ */
#define H5FD_SUBFILING (H5FDperform_init(H5FD_subfiling_init))
#else
#define H5FD_SUBFILING (H5I_INVALID_HID)
#endif
+/**
+ * \def H5FD_SUBFILING_NAME
+ * The canonical name for the #H5FD_SUBFILING driver
+ */
#define H5FD_SUBFILING_NAME "subfiling"
#ifdef H5_HAVE_SUBFILING_VFD
#ifndef H5FD_SUBFILING_FAPL_MAGIC
-#define H5FD_CURR_SUBFILING_FAPL_VERSION 1
-#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331
+/**
+ * \def H5FD_SUBFILING_CURR_FAPL_VERSION
+ * The version number of the H5FD_subfiling_config_t configuration
+ * structure for the #H5FD_SUBFILING driver
+ */
+#define H5FD_SUBFILING_CURR_FAPL_VERSION 1
+/**
+ * \def H5FD_SUBFILING_FAPL_MAGIC
+ * Unique number used to distinguish the #H5FD_SUBFILING driver from other HDF5 file drivers
+ */
+#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331
#endif
-/****************************************************************************
- *
- * Structure: H5FD_subfiling_config_t
- *
- * Purpose:
- *
- * H5FD_subfiling_config_t is a public structure that is used to pass
- * subfiling configuration data to the appropriate subfiling VFD via
- * the FAPL. A pointer to an instance of this structure is a parameter
- * to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling().
- *
- * `magic` (uint32_t)
- *
- * Magic is a somewhat unique number which identifies this VFD from
- * other VFDs. Used in combination with a version number, we can
- * validate a user generated file access property list (fapl).
- * This field should be set to H5FD_SUBFILING_FAPL_MAGIC.
- *
- * `version` (uint32_t)
- *
- * Version number of the H5FD_subfiling_config_t structure. Any instance
- * passed to the above calls must have a recognized version number, or
- * an error will be flagged.
- *
- * This field should be set to H5FD_CURR_SUBFILING_FAPL_VERSION.
- *
- *** IO Concentrator Info ***
- *** These fields will be replicated in the stacked IOC VFD which
- *** provides the extended support for aggregating reads and writes
- *** and allows global file access to node-local storage containers.
- *
- * `stripe_count` (int32_t)
- *
- * The integer value which identifies the total number of
- * subfiles that have been algorithmically been selected to
- * to contain the segments of raw data which make up an HDF5
- * file. This value is used to implement the RAID-0 functionality
- * when reading or writing datasets.
+/**
+ * \def H5FD_SUBFILING_DEFAULT_STRIPE_SIZE
+ * The default stripe size (in bytes) for data stripes in sub-files
+ */
+#define H5FD_SUBFILING_DEFAULT_STRIPE_SIZE (32 * 1024 * 1024)
+
+/**
+ * \def H5FD_SUBFILING_FILENAME_TEMPLATE
+ * The basic template for a sub-file filename
+ */
+#define H5FD_SUBFILING_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d"
+
+/**
+ * \def H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE
+ * The basic template for a #H5FD_SUBFILING driver configuration filename
+ */
+#define H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config"
+
+/*
+ * Environment variables interpreted by the HDF5 Subfiling feature
+ */
+
+/**
+ * \def H5FD_SUBFILING_STRIPE_SIZE
+ * Macro for name of the environment variable that specifies the size
+ * (in bytes) for data stripes in sub-files
*
- * `stripe_depth` (int64_t)
+ * The value set for this environment variable is interpreted as a
+ * long long value and must be > 0.
+ */
+#define H5FD_SUBFILING_STRIPE_SIZE "H5FD_SUBFILING_STRIPE_SIZE"
+/**
+ * \def H5FD_SUBFILING_IOC_PER_NODE
+ * Macro for name of the environment variable that specifies the number
+ * of MPI ranks per node to use as I/O concentrators
*
- * The stripe depth defines a limit on the maximum number of contiguous
- * bytes that can be read or written in a single operation on any
- * selected subfile. Larger IO operations can exceed this limit
- * by utilizing MPI derived types to construct an IO request which
- * gathers additional data segments from memory for the IO request.
+ * The value set for this environment variable is interpreted as a
+ * long value and must be > 0.
+ */
+#define H5FD_SUBFILING_IOC_PER_NODE "H5FD_SUBFILING_IOC_PER_NODE"
+/**
+ * \def H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ * Macro for name of the environment variable that provides information
+ * for selection MPI ranks as I/O concentrators
*
- * `ioc_selection` (enum io_selection datatype)
+ * The value set for this environment variable is interpreted differently,
+ * depending on the IOC selection method chosen.
*
- * The io_selection_t defines a specific algorithm by which IO
- * concentrators (IOCs) and sub-files are identified. The available
- * algorithms are: SELECT_IOC_ONE_PER_NODE, SELECT_IOC_EVERY_NTH_RANK,
- * SELECT_IOC_WITH_CONFIG, and SELECT_IOC_TOTAL.
+ * For #SELECT_IOC_ONE_PER_NODE, this value is ignored.
*
- *** STACKING and other VFD support
- *** i.e. FAPL caching
- ***
+ * For #SELECT_IOC_EVERY_NTH_RANK, this value is interpreted as a
+ * long value and must be > 0. The value will correspond to the
+ * `N` value when selecting every `N`-th MPI rank as an I/O
+ * concentrator.
*
- * `ioc_fapl_id` (hid_t)
+ * For #SELECT_IOC_WITH_CONFIG, this value is ignored as that particular
+ * IOC selection method is not currently supported.
*
- * A valid file access property list (fapl) is cached on each
- * process and thus enables selection of an alternative provider
- * for subsequent file operations.
- * By default, Sub-filing employs an additional support VFD that
- * provides file IO proxy capabilities to all MPI ranks in a
- * distributed parallel application. This IO indirection
- * thus allows application access all sub-files even while
- * these may actually be node-local and thus not directly
- * accessible to remote ranks.
+ * For #SELECT_IOC_TOTAL, this value is interpreted as a long value
+ * and must be > 0. The value will correspond to the total number
+ * of I/O concentrators to be used.
+ */
+#define H5FD_SUBFILING_IOC_SELECTION_CRITERIA "H5FD_SUBFILING_IOC_SELECTION_CRITERIA"
+/**
+ * \def H5FD_SUBFILING_SUBFILE_PREFIX
+ * Macro for name of the environment variable that specifies a prefix
+ * to apply to the filenames generated for sub-files
*
- ****************************************************************************/
+ * The value set for this environment variable is interpreted as a
+ * pathname.
+ */
+#define H5FD_SUBFILING_SUBFILE_PREFIX "H5FD_SUBFILING_SUBFILE_PREFIX"
-/*
- * In addition to the common configuration fields, we can have
- * VFD specific fields. Here's one for the subfiling VFD.
- *
- * `require_ioc` (hbool_t)
- *
- * Require_IOC is a boolean flag with a default value of TRUE.
- * This flag indicates that the stacked H5FDioc VFD should be
- * employed for sub-filing operations. The default flag can be
- * overridden with an environment variable: H5_REQUIRE_IOC=0
- *
+/**
+ * \enum H5FD_subfiling_ioc_select_t
+ * This enum defines the various constants to allow different
+ * allocations of MPI ranks as I/O concentrators.
+ *
+ * \var SELECT_IOC_ONE_PER_NODE
+ * Default selection method. One MPI rank per node is used as an
+ * I/O concentrator. If this selection method is used, the number
+ * of I/O concentrators per node can be adjusted with the
+ * #H5FD_SUBFILING_IOC_PER_NODE environment variable.
+ *
+ * \var SELECT_IOC_EVERY_NTH_RANK
+ * Starting with MPI rank 0, a stride of 'N' is applied to the MPI
+ * rank values to determine the next I/O concentrator. The
+ * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must
+ * be set to the value desired for 'N'.
+ *
+ * \var SELECT_IOC_WITH_CONFIG
+ * Currently unsupported. Use a configuration file to determine
+ * the mapping from MPI ranks to I/O concentrators. The
+ * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must
+ * be set to the path to the configuration file.
+ *
+ * \var SELECT_IOC_TOTAL
+ * Specifies that a total of 'N' I/O concentrators should be used.
+ * Starting with MPI rank 0, a stride of 'MPI comm size' / 'N' is
+ * applied to the MPI rank values to determine the next I/O
+ * concentrator. The #H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ * environment variable must be set to the value desired for 'N'.
+ *
+ * \var ioc_selection_options
+ * Unused. Sentinel value
*/
+typedef enum {
+ SELECT_IOC_ONE_PER_NODE = 0, /* Default */
+ SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */
+ SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */
+ SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */
+ ioc_selection_options /* Sentinel value */
+} H5FD_subfiling_ioc_select_t;
+
+/**
+ * \struct H5FD_subfiling_shared_config_t
+ * \brief Subfiling configuration structure that is shared between the #H5FD_SUBFILING
+ * and #H5FD_IOC drivers
+ *
+ * \var H5FD_subfiling_ioc_select_t H5FD_subfiling_shared_config_t::ioc_selection
+ * The method to use for selecting MPI ranks to be I/O concentrators. The
+ * current default is to select one MPI rank per node to be an I/O concentrator.
+ * Refer to #H5FD_subfiling_ioc_select_t for a description of the algorithms
+ * available for use.
+ *
+ * \var int64_t H5FD_subfiling_shared_config_t::stripe_size
+ * The stripe size defines the size (in bytes) of the data stripes in the
+ * sub-files for the logical HDF5 file. Data is striped across the sub-files
+ * in a round-robin wrap-around fashion in segments equal to the stripe size.
+ *
+ * For example, in an HDF5 file consisting of four sub-files with a 1MiB stripe
+ * size, the first and fifth 1MiB of data would reside in the first sub-file,
+ * the second and sixth 1MiB of data would reside in the second sub-file and so
+ * on.
+ *
+ * This value can also be set or adjusted with the #H5FD_SUBFILING_STRIPE_SIZE
+ * environment variable.
+ *
+ * \var int32_t H5FD_subfiling_shared_config_t::stripe_count
+ * The number of I/O concentrators (and, currently, the number of sub-files)
+ * to use for the logical HDF5 file. This value is used in conjunction with
+ * the IOC selection method to determine which MPI ranks will be assigned as
+ * I/O concentrators.
+ *
+ * Alternatively, the mapping between MPI ranks and I/O concentrators can be
+ * set or adjusted with a combination of the #ioc_selection field and the
+ * #H5FD_SUBFILING_IOC_PER_NODE and #H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ * environment variables.
+ */
+typedef struct H5FD_subfiling_shared_config_t {
+ H5FD_subfiling_ioc_select_t ioc_selection; /* Method to select I/O concentrators */
+ int64_t stripe_size; /* Size (in bytes) of data stripes in sub-files */
+ int32_t stripe_count; /* Number of I/O concentrators to use */
+} H5FD_subfiling_shared_config_t;
//! <!-- [H5FD_subfiling_config_t_snip] -->
/**
- * Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling()
+ * \struct H5FD_subfiling_config_t
+ * \brief Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling()
+ *
+ * \details H5FD_subfiling_config_t is a public structure that is used to pass
+ * subfiling configuration data to the #H5FD_SUBFILING driver via
+ * a File Access Property List. A pointer to an instance of this structure
+ * is a parameter to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling().
+ *
+ * \var uint32_t H5FD_subfiling_config_t::magic
+ * A somewhat unique number which distinguishes the #H5FD_SUBFILING driver
+ * from other drivers. Used in combination with a version number, it can
+ * help to validate a user-generated File Access Property List. This field
+ * should be set to #H5FD_SUBFILING_FAPL_MAGIC.
+ *
+ * \var uint32_t H5FD_subfiling_config_t::version
+ * Version number of the H5FD_subfiling_config_t structure. Any instance
+ * passed to H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() must have
+ * a recognized version number or an error will be raised. Currently, this
+ * field should be set to #H5FD_SUBFILING_CURR_FAPL_VERSION.
+ *
+ * \var hid_t H5FD_subfiling_config_t::ioc_fapl_id
+ * The File Access Property List which is setup with the file driver that
+ * the #H5FD_SUBFILING driver will use for servicing I/O requests to the
+ * sub-files. Currently, the File Access Property List must be setup with
+ * the #H5FD_IOC driver by calling H5Pset_fapl_ioc(), but future development
+ * may allow other file drivers to be used.
+ *
+ * \var hbool_t H5FD_subfiling_config_t::require_ioc
+ * A boolean flag which indicates whether the #H5FD_SUBFILING driver should
+ * use the #H5FD_IOC driver for its I/O operations. This field should currently
+ * always be set to TRUE.
+ *
+ * \var H5FD_subfiling_shared_config_t H5FD_subfiling_config_t::shared_cfg
+ * A structure which contains the subfiling parameters that are shared between
+ * the #H5FD_SUBFILING and #H5FD_IOC drivers. This includes the sub-file stripe
+ * size, number of I/O concentrators, IOC selection method, etc.
+ *
*/
typedef struct H5FD_subfiling_config_t {
- uint32_t magic; /* set to H5FD_SUBFILING_FAPL_MAGIC */
- uint32_t version; /* set to H5FD_CURR_SUBFILING_FAPL_VERSION */
- int32_t stripe_count; /* How many io concentrators */
- int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */
- ioc_selection_t ioc_selection; /* Method to select IO Concentrators */
- hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */
- hbool_t require_ioc;
+ uint32_t magic; /* Must be set to H5FD_SUBFILING_FAPL_MAGIC */
+ uint32_t version; /* Must be set to H5FD_SUBFILING_CURR_FAPL_VERSION */
+ hid_t ioc_fapl_id; /* The FAPL setup with the stacked VFD to use for I/O concentrators */
+ hbool_t require_ioc; /* Whether to use the IOC VFD (currently must always be TRUE) */
+ H5FD_subfiling_shared_config_t
+ shared_cfg; /* Subfiling/IOC parameters (stripe size, stripe count, etc.) */
} H5FD_subfiling_config_t;
//! <!-- [H5FD_subfiling_config_t_snip] -->
@@ -135,41 +255,79 @@ typedef struct H5FD_subfiling_config_t {
extern "C" {
#endif
+/**
+ * \brief Internal routine to initialize #H5FD_SUBFILING driver. Not meant to be
+ * called directly by an HDF5 application
+ */
H5_DLL hid_t H5FD_subfiling_init(void);
/**
* \ingroup FAPL
*
- * \brief Modifies the file access property list to use the #H5FD_SUBFILING driver
+ * \brief Modifies the specified File Access Property List to use the #H5FD_SUBFILING driver
*
* \fapl_id
- * \param[in] vfd_config #H5FD_SUBFILING driver specific properties. If NULL, then
- * the IO concentrator VFD will be used.
+ * \param[in] vfd_config Pointer to #H5FD_SUBFILING driver configuration structure. May be NULL.
* \returns \herr_t
*
- * \details H5Pset_fapl_core() modifies the file access property list to use the
+ * \details H5Pset_fapl_subfiling() modifies the File Access Property List to use the
* #H5FD_SUBFILING driver.
*
- * \todo Expand details!
- *
- * \since 1.14.0
+ * The #H5FD_SUBFILING driver is an MPI-based file driver that allows an
+ * HDF5 application to distribute a logical HDF5 file across a collection
+ * of "sub-files" in equal-sized data segment "stripes". I/O to the logical
+ * HDF5 file is then directed to the appropriate "sub-file" according to the
+ * #H5FD_SUBFILING configuration and a system of I/O concentrators, which
+ * are MPI ranks operating worker threads.
+ *
+ * By allowing a configurable stripe size, number of I/O concentrators and
+ * method for selecting MPI ranks as I/O concentrators, the #H5FD_SUBFILING
+ * driver aims to enable an HDF5 application to find a middle ground between
+ * the single shared file and file-per-process approaches to parallel file I/O
+ * for the particular machine the application is running on. In general, the
+ * goal is to avoid some of the complexity of the file-per-process approach
+ * while also minimizing the locking issues of the single shared file approach
+ * on a parallel file system.
+ *
+ * \note Since the #H5FD_SUBFILING driver is an MPI-based file driver, the HDF5
+ * application should ensure that H5Pset_mpi_params() is called before this
+ * routine so that the appropriate MPI communicator and info objects will be
+ * setup for use by the #H5FD_SUBFILING and #H5FD_IOC drivers.
+ *
+ * \note The current architecture of the #H5FD_SUBFILING driver requires that the
+ * HDF5 application must have been initialized with MPI_Init_thread() using
+ * a value of MPI_THREAD_MULTIPLE for the thread support level.
+ *
+ * \note The \p vfd_config parameter may be NULL. In this case, the reference
+ * implementation I/O concentrator VFD will be used with the default settings
+ * of one I/O concentrator per node and a stripe size of 32MiB. Refer to the
+ * H5FD_subfiling_config_t documentation for information about configuration
+ * for the #H5FD_SUBFILING driver.
+ *
+ * \since 1.13.2
*
*/
-H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config);
+H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config);
/**
* \ingroup FAPL
*
- * \brief Queries subfiling file driver properties
+ * \brief Queries a File Access Property List for #H5FD_SUBFILING file driver properties
*
* \fapl_id
- * \param[out] config_out The subfiling fapl data.
+ * \param[out] config_out Pointer to H5FD_subfiling_config_t structure through which the
+ * #H5FD_SUBFILING file driver properties will be returned.
*
* \returns \herr_t
*
- * \details H5Pget_fapl_subfiling() queries the #H5FD_SUBFILING driver properties as set
- * by H5Pset_fapl_subfiling(). If the #H5FD_SUBFILING driver has not been set on
- * the File Access Property List, a default configuration is returned.
+ * \details H5Pget_fapl_subfiling() queries the specified File Access Property List for
+ * #H5FD_SUBFILING driver properties as set by H5Pset_fapl_subfiling(). If the
+ * #H5FD_SUBFILING driver has not been set on the File Access Property List, a
+ * default configuration is returned. An HDF5 application may use this
+ * functionality to manually configure the #H5FD_SUBFILING driver by calling
+ * H5Pget_fapl_subfiling() on a newly-created File Access Property List, adjusting
+ * the default values and then calling H5Pset_fapl_subfiling() with the configured
+ * H5FD_subfiling_config_t structure.
*
- * \since 1.14.0
+ * \since 1.13.2
*
*/
H5_DLL herr_t H5Pget_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *config_out);