diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2022-08-04 17:56:48 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-04 17:56:48 (GMT) |
commit | bf07e0f2c9b381509abbde59fca8bea5445da261 (patch) | |
tree | 69551f0ec6658cc4e970bf1080fa4c5b256b289f /src/H5FDsubfiling/H5FDsubfiling.h | |
parent | a71534fcc248737491adcfd770c7ab69b4adc2d4 (diff) | |
download | hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.zip hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.gz hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.bz2 |
Subfiling updates for release (#1963)
* Remove generated file h5fuse.sh
* Link pthreads library when Subfiling VFD is built
* Switch to MPI I/O driver for Subfiling HDF5 stub file
* Rough first implementation for Subfiling file deletion
* Subfiling VFD - get file dirname for file deletion
* Subfiling VFD - set lock callback to NULL for now to avoid performance
issues
* Committing clang-format changes
* Minor tidying up of Subfiling testing
* Fixups for Subfiling VFD support in tools
* Tidy up Subfiling public interface and add Doxygen
* Respect Subfiling configuration settings from application
* Add release note for Subfiling VFD
* Committing clang-format changes
* Committing clang-format changes
* Shorten some Subfiling environment variable names
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Diffstat (limited to 'src/H5FDsubfiling/H5FDsubfiling.h')
-rw-r--r-- | src/H5FDsubfiling/H5FDsubfiling.h | 358 |
1 files changed, 258 insertions, 100 deletions
diff --git a/src/H5FDsubfiling/H5FDsubfiling.h b/src/H5FDsubfiling/H5FDsubfiling.h index 3de5155..3bc448b 100644 --- a/src/H5FDsubfiling/H5FDsubfiling.h +++ b/src/H5FDsubfiling/H5FDsubfiling.h @@ -14,120 +14,240 @@ #ifndef H5FDsubfiling_H #define H5FDsubfiling_H -#include "H5FDioc.h" - #ifdef H5_HAVE_SUBFILING_VFD +/** + * \def H5FD_SUBFILING + * Macro that returns the identifier for the #H5FD_SUBFILING driver. \hid_t{file driver} + */ #define H5FD_SUBFILING (H5FDperform_init(H5FD_subfiling_init)) #else #define H5FD_SUBFILING (H5I_INVALID_HID) #endif +/** + * \def H5FD_SUBFILING_NAME + * The canonical name for the #H5FD_SUBFILING driver + */ #define H5FD_SUBFILING_NAME "subfiling" #ifdef H5_HAVE_SUBFILING_VFD #ifndef H5FD_SUBFILING_FAPL_MAGIC -#define H5FD_CURR_SUBFILING_FAPL_VERSION 1 -#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331 +/** + * \def H5FD_SUBFILING_CURR_FAPL_VERSION + * The version number of the H5FD_subfiling_config_t configuration + * structure for the #H5FD_SUBFILING driver + */ +#define H5FD_SUBFILING_CURR_FAPL_VERSION 1 +/** + * \def H5FD_SUBFILING_FAPL_MAGIC + * Unique number used to distinguish the #H5FD_SUBFILING driver from other HDF5 file drivers + */ +#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331 #endif -/**************************************************************************** - * - * Structure: H5FD_subfiling_config_t - * - * Purpose: - * - * H5FD_subfiling_config_t is a public structure that is used to pass - * subfiling configuration data to the appropriate subfiling VFD via - * the FAPL. A pointer to an instance of this structure is a parameter - * to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling(). - * - * `magic` (uint32_t) - * - * Magic is a somewhat unique number which identifies this VFD from - * other VFDs. Used in combination with a version number, we can - * validate a user generated file access property list (fapl). - * This field should be set to H5FD_SUBFILING_FAPL_MAGIC. - * - * `version` (uint32_t) - * - * Version number of the H5FD_subfiling_config_t structure. Any instance - * passed to the above calls must have a recognized version number, or - * an error will be flagged. - * - * This field should be set to H5FD_CURR_SUBFILING_FAPL_VERSION. - * - *** IO Concentrator Info *** - *** These fields will be replicated in the stacked IOC VFD which - *** provides the extended support for aggregating reads and writes - *** and allows global file access to node-local storage containers. - * - * `stripe_count` (int32_t) - * - * The integer value which identifies the total number of - * subfiles that have been algorithmically been selected to - * to contain the segments of raw data which make up an HDF5 - * file. This value is used to implement the RAID-0 functionality - * when reading or writing datasets. +/** + * \def H5FD_SUBFILING_DEFAULT_STRIPE_SIZE + * The default stripe size (in bytes) for data stripes in sub-files + */ +#define H5FD_SUBFILING_DEFAULT_STRIPE_SIZE (32 * 1024 * 1024) + +/** + * \def H5FD_SUBFILING_FILENAME_TEMPLATE + * The basic template for a sub-file filename + */ +#define H5FD_SUBFILING_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d" + +/** + * \def H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE + * The basic template for a #H5FD_SUBFILING driver configuration filename + */ +#define H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config" + +/* + * Environment variables interpreted by the HDF5 Subfiling feature + */ + +/** + * \def H5FD_SUBFILING_STRIPE_SIZE + * Macro for name of the environment variable that specifies the size + * (in bytes) for data stripes in sub-files * - * `stripe_depth` (int64_t) + * The value set for this environment variable is interpreted as a + * long long value and must be > 0. + */ +#define H5FD_SUBFILING_STRIPE_SIZE "H5FD_SUBFILING_STRIPE_SIZE" +/** + * \def H5FD_SUBFILING_IOC_PER_NODE + * Macro for name of the environment variable that specifies the number + * of MPI ranks per node to use as I/O concentrators * - * The stripe depth defines a limit on the maximum number of contiguous - * bytes that can be read or written in a single operation on any - * selected subfile. Larger IO operations can exceed this limit - * by utilizing MPI derived types to construct an IO request which - * gathers additional data segments from memory for the IO request. + * The value set for this environment variable is interpreted as a + * long value and must be > 0. + */ +#define H5FD_SUBFILING_IOC_PER_NODE "H5FD_SUBFILING_IOC_PER_NODE" +/** + * \def H5FD_SUBFILING_IOC_SELECTION_CRITERIA + * Macro for name of the environment variable that provides information + * for selection MPI ranks as I/O concentrators * - * `ioc_selection` (enum io_selection datatype) + * The value set for this environment variable is interpreted differently, + * depending on the IOC selection method chosen. * - * The io_selection_t defines a specific algorithm by which IO - * concentrators (IOCs) and sub-files are identified. The available - * algorithms are: SELECT_IOC_ONE_PER_NODE, SELECT_IOC_EVERY_NTH_RANK, - * SELECT_IOC_WITH_CONFIG, and SELECT_IOC_TOTAL. + * For #SELECT_IOC_ONE_PER_NODE, this value is ignored. * - *** STACKING and other VFD support - *** i.e. FAPL caching - *** + * For #SELECT_IOC_EVERY_NTH_RANK, this value is interpreted as a + * long value and must be > 0. The value will correspond to the + * `N` value when selecting every `N`-th MPI rank as an I/O + * concentrator. * - * `ioc_fapl_id` (hid_t) + * For #SELECT_IOC_WITH_CONFIG, this value is ignored as that particular + * IOC selection method is not currently supported. * - * A valid file access property list (fapl) is cached on each - * process and thus enables selection of an alternative provider - * for subsequent file operations. - * By default, Sub-filing employs an additional support VFD that - * provides file IO proxy capabilities to all MPI ranks in a - * distributed parallel application. This IO indirection - * thus allows application access all sub-files even while - * these may actually be node-local and thus not directly - * accessible to remote ranks. + * For #SELECT_IOC_TOTAL, this value is interpreted as a long value + * and must be > 0. The value will correspond to the total number + * of I/O concentrators to be used. + */ +#define H5FD_SUBFILING_IOC_SELECTION_CRITERIA "H5FD_SUBFILING_IOC_SELECTION_CRITERIA" +/** + * \def H5FD_SUBFILING_SUBFILE_PREFIX + * Macro for name of the environment variable that specifies a prefix + * to apply to the filenames generated for sub-files * - ****************************************************************************/ + * The value set for this environment variable is interpreted as a + * pathname. + */ +#define H5FD_SUBFILING_SUBFILE_PREFIX "H5FD_SUBFILING_SUBFILE_PREFIX" -/* - * In addition to the common configuration fields, we can have - * VFD specific fields. Here's one for the subfiling VFD. - * - * `require_ioc` (hbool_t) - * - * Require_IOC is a boolean flag with a default value of TRUE. - * This flag indicates that the stacked H5FDioc VFD should be - * employed for sub-filing operations. The default flag can be - * overridden with an environment variable: H5_REQUIRE_IOC=0 - * +/** + * \enum H5FD_subfiling_ioc_select_t + * This enum defines the various constants to allow different + * allocations of MPI ranks as I/O concentrators. + * + * \var SELECT_IOC_ONE_PER_NODE + * Default selection method. One MPI rank per node is used as an + * I/O concentrator. If this selection method is used, the number + * of I/O concentrators per node can be adjusted with the + * #H5FD_SUBFILING_IOC_PER_NODE environment variable. + * + * \var SELECT_IOC_EVERY_NTH_RANK + * Starting with MPI rank 0, a stride of 'N' is applied to the MPI + * rank values to determine the next I/O concentrator. The + * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must + * be set to the value desired for 'N'. + * + * \var SELECT_IOC_WITH_CONFIG + * Currently unsupported. Use a configuration file to determine + * the mapping from MPI ranks to I/O concentrators. The + * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must + * be set to the path to the configuration file. + * + * \var SELECT_IOC_TOTAL + * Specifies that a total of 'N' I/O concentrators should be used. + * Starting with MPI rank 0, a stride of 'MPI comm size' / 'N' is + * applied to the MPI rank values to determine the next I/O + * concentrator. The #H5FD_SUBFILING_IOC_SELECTION_CRITERIA + * environment variable must be set to the value desired for 'N'. + * + * \var ioc_selection_options + * Unused. Sentinel value */ +typedef enum { + SELECT_IOC_ONE_PER_NODE = 0, /* Default */ + SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */ + SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */ + SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */ + ioc_selection_options /* Sentinel value */ +} H5FD_subfiling_ioc_select_t; + +/** + * \struct H5FD_subfiling_shared_config_t + * \brief Subfiling configuration structure that is shared between the #H5FD_SUBFILING + * and #H5FD_IOC drivers + * + * \var H5FD_subfiling_ioc_select_t H5FD_subfiling_shared_config_t::ioc_selection + * The method to use for selecting MPI ranks to be I/O concentrators. The + * current default is to select one MPI rank per node to be an I/O concentrator. + * Refer to #H5FD_subfiling_ioc_select_t for a description of the algorithms + * available for use. + * + * \var int64_t H5FD_subfiling_shared_config_t::stripe_size + * The stripe size defines the size (in bytes) of the data stripes in the + * sub-files for the logical HDF5 file. Data is striped across the sub-files + * in a round-robin wrap-around fashion in segments equal to the stripe size. + * + * For example, in an HDF5 file consisting of four sub-files with a 1MiB stripe + * size, the first and fifth 1MiB of data would reside in the first sub-file, + * the second and sixth 1MiB of data would reside in the second sub-file and so + * on. + * + * This value can also be set or adjusted with the #H5FD_SUBFILING_STRIPE_SIZE + * environment variable. + * + * \var int32_t H5FD_subfiling_shared_config_t::stripe_count + * The number of I/O concentrators (and, currently, the number of sub-files) + * to use for the logical HDF5 file. This value is used in conjunction with + * the IOC selection method to determine which MPI ranks will be assigned as + * I/O concentrators. + * + * Alternatively, the mapping between MPI ranks and I/O concentrators can be + * set or adjusted with a combination of the #ioc_selection field and the + * #H5FD_SUBFILING_IOC_PER_NODE and #H5FD_SUBFILING_IOC_SELECTION_CRITERIA + * environment variables. + */ +typedef struct H5FD_subfiling_shared_config_t { + H5FD_subfiling_ioc_select_t ioc_selection; /* Method to select I/O concentrators */ + int64_t stripe_size; /* Size (in bytes) of data stripes in sub-files */ + int32_t stripe_count; /* Number of I/O concentrators to use */ +} H5FD_subfiling_shared_config_t; //! <!-- [H5FD_subfiling_config_t_snip] --> /** - * Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() + * \struct H5FD_subfiling_config_t + * \brief Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() + * + * \details H5FD_subfiling_config_t is a public structure that is used to pass + * subfiling configuration data to the #H5FD_SUBFILING driver via + * a File Access Property List. A pointer to an instance of this structure + * is a parameter to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling(). + * + * \var uint32_t H5FD_subfiling_config_t::magic + * A somewhat unique number which distinguishes the #H5FD_SUBFILING driver + * from other drivers. Used in combination with a version number, it can + * help to validate a user-generated File Access Property List. This field + * should be set to #H5FD_SUBFILING_FAPL_MAGIC. + * + * \var uint32_t H5FD_subfiling_config_t::version + * Version number of the H5FD_subfiling_config_t structure. Any instance + * passed to H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() must have + * a recognized version number or an error will be raised. Currently, this + * field should be set to #H5FD_SUBFILING_CURR_FAPL_VERSION. + * + * \var hid_t H5FD_subfiling_config_t::ioc_fapl_id + * The File Access Property List which is setup with the file driver that + * the #H5FD_SUBFILING driver will use for servicing I/O requests to the + * sub-files. Currently, the File Access Property List must be setup with + * the #H5FD_IOC driver by calling H5Pset_fapl_ioc(), but future development + * may allow other file drivers to be used. + * + * \var hbool_t H5FD_subfiling_config_t::require_ioc + * A boolean flag which indicates whether the #H5FD_SUBFILING driver should + * use the #H5FD_IOC driver for its I/O operations. This field should currently + * always be set to TRUE. + * + * \var H5FD_subfiling_shared_config_t H5FD_subfiling_config_t::shared_cfg + * A structure which contains the subfiling parameters that are shared between + * the #H5FD_SUBFILING and #H5FD_IOC drivers. This includes the sub-file stripe + * size, number of I/O concentrators, IOC selection method, etc. + * */ typedef struct H5FD_subfiling_config_t { - uint32_t magic; /* set to H5FD_SUBFILING_FAPL_MAGIC */ - uint32_t version; /* set to H5FD_CURR_SUBFILING_FAPL_VERSION */ - int32_t stripe_count; /* How many io concentrators */ - int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */ - ioc_selection_t ioc_selection; /* Method to select IO Concentrators */ - hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */ - hbool_t require_ioc; + uint32_t magic; /* Must be set to H5FD_SUBFILING_FAPL_MAGIC */ + uint32_t version; /* Must be set to H5FD_SUBFILING_CURR_FAPL_VERSION */ + hid_t ioc_fapl_id; /* The FAPL setup with the stacked VFD to use for I/O concentrators */ + hbool_t require_ioc; /* Whether to use the IOC VFD (currently must always be TRUE) */ + H5FD_subfiling_shared_config_t + shared_cfg; /* Subfiling/IOC parameters (stripe size, stripe count, etc.) */ } H5FD_subfiling_config_t; //! <!-- [H5FD_subfiling_config_t_snip] --> @@ -135,41 +255,79 @@ typedef struct H5FD_subfiling_config_t { extern "C" { #endif +/** + * \brief Internal routine to initialize #H5FD_SUBFILING driver. Not meant to be + * called directly by an HDF5 application + */ H5_DLL hid_t H5FD_subfiling_init(void); /** * \ingroup FAPL * - * \brief Modifies the file access property list to use the #H5FD_SUBFILING driver + * \brief Modifies the specified File Access Property List to use the #H5FD_SUBFILING driver * * \fapl_id - * \param[in] vfd_config #H5FD_SUBFILING driver specific properties. If NULL, then - * the IO concentrator VFD will be used. + * \param[in] vfd_config Pointer to #H5FD_SUBFILING driver configuration structure. May be NULL. * \returns \herr_t * - * \details H5Pset_fapl_core() modifies the file access property list to use the + * \details H5Pset_fapl_subfiling() modifies the File Access Property List to use the * #H5FD_SUBFILING driver. * - * \todo Expand details! - * - * \since 1.14.0 + * The #H5FD_SUBFILING driver is an MPI-based file driver that allows an + * HDF5 application to distribute a logical HDF5 file across a collection + * of "sub-files" in equal-sized data segment "stripes". I/O to the logical + * HDF5 file is then directed to the appropriate "sub-file" according to the + * #H5FD_SUBFILING configuration and a system of I/O concentrators, which + * are MPI ranks operating worker threads. + * + * By allowing a configurable stripe size, number of I/O concentrators and + * method for selecting MPI ranks as I/O concentrators, the #H5FD_SUBFILING + * driver aims to enable an HDF5 application to find a middle ground between + * the single shared file and file-per-process approaches to parallel file I/O + * for the particular machine the application is running on. In general, the + * goal is to avoid some of the complexity of the file-per-process approach + * while also minimizing the locking issues of the single shared file approach + * on a parallel file system. + * + * \note Since the #H5FD_SUBFILING driver is an MPI-based file driver, the HDF5 + * application should ensure that H5Pset_mpi_params() is called before this + * routine so that the appropriate MPI communicator and info objects will be + * setup for use by the #H5FD_SUBFILING and #H5FD_IOC drivers. + * + * \note The current architecture of the #H5FD_SUBFILING driver requires that the + * HDF5 application must have been initialized with MPI_Init_thread() using + * a value of MPI_THREAD_MULTIPLE for the thread support level. + * + * \note The \p vfd_config parameter may be NULL. In this case, the reference + * implementation I/O concentrator VFD will be used with the default settings + * of one I/O concentrator per node and a stripe size of 32MiB. Refer to the + * H5FD_subfiling_config_t documentation for information about configuration + * for the #H5FD_SUBFILING driver. + * + * \since 1.13.2 * */ -H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config); +H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config); /** * \ingroup FAPL * - * \brief Queries subfiling file driver properties + * \brief Queries a File Access Property List for #H5FD_SUBFILING file driver properties * * \fapl_id - * \param[out] config_out The subfiling fapl data. + * \param[out] config_out Pointer to H5FD_subfiling_config_t structure through which the + * #H5FD_SUBFILING file driver properties will be returned. * * \returns \herr_t * - * \details H5Pget_fapl_subfiling() queries the #H5FD_SUBFILING driver properties as set - * by H5Pset_fapl_subfiling(). If the #H5FD_SUBFILING driver has not been set on - * the File Access Property List, a default configuration is returned. + * \details H5Pget_fapl_subfiling() queries the specified File Access Property List for + * #H5FD_SUBFILING driver properties as set by H5Pset_fapl_subfiling(). If the + * #H5FD_SUBFILING driver has not been set on the File Access Property List, a + * default configuration is returned. An HDF5 application may use this + * functionality to manually configure the #H5FD_SUBFILING driver by calling + * H5Pget_fapl_subfiling() on a newly-created File Access Property List, adjusting + * the default values and then calling H5Pset_fapl_subfiling() with the configured + * H5FD_subfiling_config_t structure. * - * \since 1.14.0 + * \since 1.13.2 * */ H5_DLL herr_t H5Pget_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *config_out); |