summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2022-08-04 17:56:48 (GMT)
committerGitHub <noreply@github.com>2022-08-04 17:56:48 (GMT)
commitbf07e0f2c9b381509abbde59fca8bea5445da261 (patch)
tree69551f0ec6658cc4e970bf1080fa4c5b256b289f /src
parenta71534fcc248737491adcfd770c7ab69b4adc2d4 (diff)
downloadhdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.zip
hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.gz
hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.bz2
Subfiling updates for release (#1963)
* Remove generated file h5fuse.sh * Link pthreads library when Subfiling VFD is built * Switch to MPI I/O driver for Subfiling HDF5 stub file * Rough first implementation for Subfiling file deletion * Subfiling VFD - get file dirname for file deletion * Subfiling VFD - set lock callback to NULL for now to avoid performance issues * Committing clang-format changes * Minor tidying up of Subfiling testing * Fixups for Subfiling VFD support in tools * Tidy up Subfiling public interface and add Doxygen * Respect Subfiling configuration settings from application * Add release note for Subfiling VFD * Committing clang-format changes * Committing clang-format changes * Shorten some Subfiling environment variable names Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Diffstat (limited to 'src')
-rw-r--r--src/H5FDsubfiling/H5FDioc.c277
-rw-r--r--src/H5FDsubfiling/H5FDioc.h193
-rw-r--r--src/H5FDsubfiling/H5FDioc_threads.c20
-rw-r--r--src/H5FDsubfiling/H5FDsubfiling.c186
-rw-r--r--src/H5FDsubfiling/H5FDsubfiling.h358
-rw-r--r--src/H5FDsubfiling/H5subfiling_common.c371
-rw-r--r--src/H5FDsubfiling/H5subfiling_common.h51
-rw-r--r--src/H5trace.c5
8 files changed, 925 insertions, 536 deletions
diff --git a/src/H5FDsubfiling/H5FDioc.c b/src/H5FDsubfiling/H5FDioc.c
index 2eb7970..f779c46 100644
--- a/src/H5FDsubfiling/H5FDioc.c
+++ b/src/H5FDsubfiling/H5FDioc.c
@@ -16,6 +16,8 @@
* another underlying VFD. Maintains two files simultaneously.
*/
+#include <libgen.h>
+
/* This source code file is part of the H5FD driver module */
#include "H5FDdrvr_module.h"
@@ -25,7 +27,7 @@
#include "H5FDprivate.h" /* File drivers */
#include "H5FDioc.h" /* IOC file driver */
#include "H5FDioc_priv.h" /* IOC file driver */
-#include "H5FDsec2.h" /* Sec2 VFD */
+#include "H5FDmpio.h" /* MPI I/O VFD */
#include "H5FLprivate.h" /* Free Lists */
#include "H5Fprivate.h" /* File access */
#include "H5Iprivate.h" /* IDs */
@@ -53,7 +55,7 @@ typedef struct H5FD_ioc_t {
int mpi_rank;
int mpi_size;
- H5FD_t *ioc_file; /* native HDF5 file pointer (sec2) */
+ H5FD_t *ioc_file; /* native HDF5 file pointer */
int64_t context_id; /* The value used to lookup a subfiling context for the file */
@@ -68,7 +70,6 @@ typedef struct H5FD_ioc_t {
* Windows code further below.
*/
dev_t device; /* file device number */
- ino_t inode; /* file i-node number */
#else
/* Files in windows are uniquely identified by the volume serial
* number and the file index (both low and high parts).
@@ -161,7 +162,7 @@ static herr_t H5FD__ioc_ctl(H5FD_t *file, uint64_t op_code, uint64_t flags,
const void *input, void **result);
*/
-static herr_t H5FD__ioc_get_default_config(H5FD_ioc_config_t *config_out);
+static herr_t H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out);
static herr_t H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa);
static int H5FD__copy_plist(hid_t fapl_id, hid_t *id_out_ptr);
@@ -358,13 +359,13 @@ H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
if (vfd_config == NULL) {
- if (NULL == (ioc_conf = HDcalloc(1, sizeof(*ioc_conf))))
+ if (NULL == (ioc_conf = H5FL_CALLOC(H5FD_ioc_config_t)))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"can't allocate IOC VFD configuration");
- ioc_conf->ioc_fapl_id = H5I_INVALID_HID;
+ ioc_conf->under_fapl_id = H5I_INVALID_HID;
/* Get IOC VFD defaults */
- if (H5FD__ioc_get_default_config(ioc_conf) < 0)
+ if (H5FD__ioc_get_default_config(fapl_id, ioc_conf) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't get default IOC VFD configuration");
vfd_config = ioc_conf;
@@ -377,9 +378,9 @@ H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config)
done:
if (ioc_conf) {
- if (ioc_conf->ioc_fapl_id >= 0 && H5I_dec_ref(ioc_conf->ioc_fapl_id) < 0)
- H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTDEC, FAIL, "can't close IOC FAPL");
- HDfree(ioc_conf);
+ if (ioc_conf->under_fapl_id >= 0 && H5I_dec_ref(ioc_conf->under_fapl_id) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTDEC, FAIL, "can't close IOC under FAPL");
+ H5FL_FREE(H5FD_ioc_config_t, ioc_conf);
}
H5_SUBFILING_FUNC_LEAVE;
@@ -423,7 +424,7 @@ H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out)
}
if (use_default_config) {
- if (H5FD__ioc_get_default_config(config_out) < 0)
+ if (H5FD__ioc_get_default_config(fapl_id, config_out) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get default IOC VFD configuration");
}
else {
@@ -431,8 +432,8 @@ H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out)
HDmemcpy(config_out, config_ptr, sizeof(H5FD_ioc_config_t));
/* Copy the driver info value */
- if (H5FD__copy_plist(config_ptr->ioc_fapl_id, &(config_out->ioc_fapl_id)) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't copy IOC FAPL");
+ if (H5FD__copy_plist(config_ptr->under_fapl_id, &(config_out->under_fapl_id)) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't copy IOC under FAPL");
}
done:
@@ -451,35 +452,53 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5FD__ioc_get_default_config(H5FD_ioc_config_t *config_out)
+H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out)
{
- herr_t ret_value = SUCCEED;
+ MPI_Comm comm = MPI_COMM_NULL;
+ MPI_Info info = MPI_INFO_NULL;
+ herr_t ret_value = SUCCEED;
HDassert(config_out);
HDmemset(config_out, 0, sizeof(*config_out));
config_out->magic = H5FD_IOC_FAPL_MAGIC;
- config_out->version = H5FD_CURR_IOC_FAPL_VERSION;
- config_out->ioc_fapl_id = H5I_INVALID_HID;
- config_out->stripe_count = 0;
- config_out->stripe_depth = H5FD_DEFAULT_STRIPE_DEPTH;
- config_out->ioc_selection = SELECT_IOC_ONE_PER_NODE;
+ config_out->version = H5FD_IOC_CURR_FAPL_VERSION;
+ config_out->under_fapl_id = H5I_INVALID_HID;
+
+ /*
+ * Use default subfiling configuration. Do NOT call
+ * H5Pget_fapl_subfiling here as that can cause issues
+ */
+ config_out->subf_config.ioc_selection = SELECT_IOC_ONE_PER_NODE;
+ config_out->subf_config.stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE;
+ config_out->subf_config.stripe_count = 0;
/* Create a default FAPL and choose an appropriate underlying driver */
- if ((config_out->ioc_fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0)
+ if ((config_out->under_fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTCREATE, FAIL, "can't create default FAPL");
- /* Currently, only sec2 vfd supported */
- if (H5Pset_fapl_sec2(config_out->ioc_fapl_id) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set Sec2 VFD on IOC FAPL");
+ /* Check if any MPI parameters were set on the FAPL */
+ if (H5Pget_mpi_params(fapl_id, &comm, &info) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI Comm/Info");
+ if (comm == MPI_COMM_NULL)
+ comm = MPI_COMM_WORLD;
+
+ /* Hardwire MPI I/O VFD for now */
+ if (H5Pset_fapl_mpio(config_out->under_fapl_id, comm, info) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI I/O VFD on IOC under FAPL");
/* Specific to this I/O Concentrator */
- config_out->thread_pool_count = H5FD_IOC_THREAD_POOL_SIZE;
+ config_out->thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
done:
+ if (H5_mpi_comm_free(&comm) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTFREE, FAIL, "can't free MPI Communicator");
+ if (H5_mpi_info_free(&info) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTFREE, FAIL, "can't free MPI Info object");
+
if (ret_value < 0) {
- if (config_out->ioc_fapl_id >= 0 && H5Pclose(config_out->ioc_fapl_id) < 0)
+ if (config_out->under_fapl_id >= 0 && H5Pclose(config_out->under_fapl_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTCLOSEOBJ, FAIL, "can't close FAPL");
}
@@ -510,7 +529,7 @@ H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa)
HDassert(fa != NULL);
- if (fa->version != H5FD_CURR_IOC_FAPL_VERSION)
+ if (fa->version != H5FD_IOC_CURR_FAPL_VERSION)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_ioc_config_t version");
if (fa->magic != H5FD_IOC_FAPL_MAGIC)
@@ -696,8 +715,8 @@ H5FD__ioc_fapl_copy(const void *_old_fa)
HDmemcpy(new_fa_ptr, old_fa_ptr, sizeof(H5FD_ioc_config_t));
/* Copy the FAPL */
- if (H5FD__copy_plist(old_fa_ptr->ioc_fapl_id, &(new_fa_ptr->ioc_fapl_id)) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy the IOC FAPL");
+ if (H5FD__copy_plist(old_fa_ptr->under_fapl_id, &(new_fa_ptr->under_fapl_id)) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy the IOC under FAPL");
ret_value = (void *)new_fa_ptr;
@@ -728,8 +747,8 @@ H5FD__ioc_fapl_free(void *_fapl)
/* Check arguments */
HDassert(fapl);
- if (fapl->ioc_fapl_id >= 0 && H5I_dec_ref(fapl->ioc_fapl_id) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "can't close FAPL ID");
+ if (fapl->under_fapl_id >= 0 && H5I_dec_ref(fapl->under_fapl_id) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "can't close IOC under FAPL ID");
/* Free the property list */
fapl = H5FL_FREE(H5FD_ioc_config_t, fapl);
@@ -774,10 +793,10 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
if (NULL == (file_ptr = (H5FD_ioc_t *)H5FL_CALLOC(H5FD_ioc_t)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTALLOC, NULL, "unable to allocate file struct");
- file_ptr->comm = MPI_COMM_NULL;
- file_ptr->info = MPI_INFO_NULL;
- file_ptr->context_id = -1;
- file_ptr->fa.ioc_fapl_id = H5I_INVALID_HID;
+ file_ptr->comm = MPI_COMM_NULL;
+ file_ptr->info = MPI_INFO_NULL;
+ file_ptr->context_id = -1;
+ file_ptr->fa.under_fapl_id = H5I_INVALID_HID;
/* Get the driver-specific file access properties */
if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(fapl_id)))
@@ -814,7 +833,7 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
config_ptr = H5P_peek_driver_info(plist_ptr);
if (!config_ptr || (H5P_FILE_ACCESS_DEFAULT == fapl_id)) {
- if (H5FD__ioc_get_default_config(&default_config) < 0)
+ if (H5FD__ioc_get_default_config(fapl_id, &default_config) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get default IOC VFD configuration");
config_ptr = &default_config;
}
@@ -848,11 +867,11 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
}
/* Copy the ioc FAPL. */
- if (H5FD__copy_plist(config_ptr->ioc_fapl_id, &(file_ptr->fa.ioc_fapl_id)) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy FAPL");
+ if (H5FD__copy_plist(config_ptr->under_fapl_id, &(file_ptr->fa.under_fapl_id)) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy IOC under FAPL");
/* Check the underlying driver (sec2/mpio/etc.) */
- if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(config_ptr->ioc_fapl_id)))
+ if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(config_ptr->under_fapl_id)))
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list");
if (H5P_peek(plist_ptr, H5F_ACS_FILE_DRV_NAME, &driver_prop) < 0)
@@ -861,13 +880,13 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL,
"invalid driver ID in file access property list");
- if (driver->value != H5_VFD_SEC2) {
+ if (driver->value != H5_VFD_MPIO) {
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL,
- "unable to open file '%s' - only Sec2 VFD is currently supported", name);
+ "unable to open file '%s' - only MPI I/O VFD is currently supported", name);
}
else {
- subfiling_context_t *sf_context = NULL;
- uint64_t inode_id = UINT64_MAX;
+ subfiling_context_t *sf_context = NULL;
+ void *file_handle = NULL;
int ioc_flags;
int l_error = 0;
int g_error = 0;
@@ -881,34 +900,12 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
if (H5F_ACC_EXCL & flags)
ioc_flags |= O_EXCL;
- file_ptr->ioc_file = H5FD_open(file_ptr->file_path, flags, config_ptr->ioc_fapl_id, HADDR_UNDEF);
+ file_ptr->ioc_file = H5FD_open(file_ptr->file_path, flags, config_ptr->under_fapl_id, HADDR_UNDEF);
if (file_ptr->ioc_file) {
- h5_stat_t sb;
- void *file_handle = NULL;
-
- if (file_ptr->mpi_rank == 0) {
- if (H5FDget_vfd_handle(file_ptr->ioc_file, config_ptr->ioc_fapl_id, &file_handle) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle");
-
- if (HDfstat(*(int *)file_handle, &sb) < 0)
- H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file");
-
- HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t));
- file_ptr->inode = sb.st_ino;
- inode_id = (uint64_t)sb.st_ino;
- }
-
- if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&inode_id, 1, MPI_UINT64_T, 0, file_ptr->comm)))
- H5_SUBFILING_MPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code);
-
- if (file_ptr->mpi_rank != 0)
- file_ptr->inode = (ino_t)inode_id;
+ if (H5FDget_vfd_handle(file_ptr->ioc_file, config_ptr->under_fapl_id, &file_handle) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle");
}
else {
- /* The two-step file opening approach may be
- * the root cause for the sec2 open to return a NULL.
- * It is prudent then, to collectively fail (early) in this case.
- */
l_error = 1;
}
@@ -925,7 +922,7 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
* context ID will be returned, which is used for
* further interactions with this file's subfiles.
*/
- if (H5_open_subfiles(file_ptr->file_path, inode_id, file_ptr->fa.ioc_selection, ioc_flags,
+ if (H5_open_subfiles(file_ptr->file_path, file_handle, &file_ptr->fa.subf_config, ioc_flags,
file_ptr->comm, &file_ptr->context_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiles for file '%s'",
name);
@@ -991,9 +988,9 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr)
}
#endif
- if (file_ptr->fa.ioc_fapl_id >= 0 && H5I_dec_ref(file_ptr->fa.ioc_fapl_id) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_ARGS, FAIL, "can't close FAPL");
- file_ptr->fa.ioc_fapl_id = H5I_INVALID_HID;
+ if (file_ptr->fa.under_fapl_id >= 0 && H5I_dec_ref(file_ptr->fa.under_fapl_id) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_ARGS, FAIL, "can't close IOC under FAPL");
+ file_ptr->fa.under_fapl_id = H5I_INVALID_HID;
/* Close underlying file */
if (file_ptr->ioc_file) {
@@ -1331,7 +1328,7 @@ H5FD__ioc_get_handle(H5FD_t *_file, hid_t H5_ATTR_UNUSED fapl, void **file_handl
HDassert(file->ioc_file);
HDassert(file_handle);
- if (H5FD_get_vfd_handle(file->ioc_file, file->fa.ioc_fapl_id, file_handle) < 0)
+ if (H5FD_get_vfd_handle(file->ioc_file, file->fa.under_fapl_id, file_handle) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "unable to get handle of R/W file");
done:
@@ -1597,12 +1594,138 @@ done:
static herr_t
H5FD__ioc_del(const char *name, hid_t fapl)
{
- herr_t ret_value = SUCCEED;
+ H5P_genplist_t *plist;
+ h5_stat_t st;
+ MPI_Comm comm = MPI_COMM_NULL;
+ MPI_Info info = MPI_INFO_NULL;
+ FILE *config_file = NULL;
+ char *name_copy = NULL;
+ char *name_copy2 = NULL;
+ char *tmp_filename = NULL;
+ char *base_filename = NULL;
+ char *file_dirname = NULL;
+ int mpi_rank = INT_MAX;
+ int mpi_code;
+ herr_t ret_value = SUCCEED;
+
+ /* TODO: Eventually this routine should share common code
+ * with H5_subfiling_common's routines so it doesn't get
+ * out of sync
+ */
+
+ if (NULL == (plist = H5P_object_verify(fapl, H5P_FILE_ACCESS)))
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
+ HDassert(H5FD_IOC == H5P_peek_driver(plist));
+
+ if (H5FD_mpi_self_initialized) {
+ comm = MPI_COMM_WORLD;
+ }
+ else {
+ /* Get the MPI communicator and info from the fapl */
+ if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &info) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI info object");
+ if (H5P_get(plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &comm) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI communicator");
+ }
+
+ /* Get the MPI rank of this process */
+ if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &mpi_rank)))
+ H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code);
+
+ if (mpi_rank == 0) {
+ int n_io_concentrators = 0;
+ int num_digits = 0;
+
+ if (HDstat(name, &st) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SYSERRSTR, FAIL, "HDstat failed");
- (void)name;
- (void)fapl;
+ if (NULL == (name_copy = HDstrdup(name)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename");
+ if (NULL == (name_copy2 = HDstrdup(name)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename");
- /* TODO: implement later */
+ base_filename = basename(name_copy);
+ file_dirname = dirname(name_copy2);
+
+ /* Try to open the subfiling configuration file and get the number of IOCs */
+ if (NULL == (tmp_filename = HDmalloc(PATH_MAX)))
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+ "can't allocate config file name buffer");
+
+ /* TODO: No support for subfile directory prefix currently */
+ HDsnprintf(tmp_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, file_dirname,
+ base_filename, (uint64_t)st.st_ino);
+
+ if (NULL == (config_file = HDfopen(tmp_filename, "r"))) {
+ if (ENOENT == errno) {
+#ifdef H5FD_IOC_DEBUG
+ HDprintf("** WARNING: couldn't delete Subfiling configuration file '%s'\n", tmp_filename);
+#endif
+
+ H5_SUBFILING_GOTO_DONE(SUCCEED);
+ }
+ else
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL,
+ "can't open subfiling config file");
+ }
+
+ if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_READERROR, FAIL, "can't read subfiling config file");
+
+ /* Delete the Subfiling configuration file */
+ if (EOF == HDfclose(config_file)) {
+ config_file = NULL;
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
+ "can't close subfiling config file");
+ }
+
+ config_file = NULL;
+
+ if (HDremove(tmp_filename) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
+ "can't delete subfiling config file");
+
+ /* Try to delete each of the subfiles */
+ num_digits = (int)(HDlog10(n_io_concentrators) + 1);
+
+ for (int i = 0; i < n_io_concentrators; i++) {
+ /* TODO: No support for subfile directory prefix currently */
+ HDsnprintf(tmp_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, file_dirname,
+ base_filename, (uint64_t)st.st_ino, num_digits, i + 1, n_io_concentrators);
+
+ if (HDremove(tmp_filename) < 0) {
+#ifdef H5FD_IOC_DEBUG
+ HDprintf("** WARNING: couldn't delete subfile '%s'\n", tmp_filename);
+#endif
+
+ if (ENOENT != errno)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTDELETEFILE, FAIL, "can't delete subfile");
+ }
+ }
+
+ /* Delete the HDF5 stub file */
+ if (HDremove(name) < 0)
+ H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTDELETEFILE, FAIL, "can't delete HDF5 file");
+ }
+
+done:
+ if (config_file)
+ if (EOF == HDfclose(config_file))
+ H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "can't close subfiling config file");
+
+ /* Set up a barrier (don't want processes to run ahead of the delete) */
+ if (MPI_SUCCESS != (mpi_code = MPI_Barrier(comm)))
+ H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
+
+ /* Free duplicated MPI Communicator and Info objects */
+ if (H5_mpi_comm_free(&comm) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI communicator");
+ if (H5_mpi_info_free(&info) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI info object");
+
+ HDfree(tmp_filename);
+ HDfree(name_copy);
+ HDfree(name_copy2);
H5_SUBFILING_FUNC_LEAVE;
}
diff --git a/src/H5FDsubfiling/H5FDioc.h b/src/H5FDsubfiling/H5FDioc.h
index 04850f3..48102ac 100644
--- a/src/H5FDsubfiling/H5FDioc.h
+++ b/src/H5FDsubfiling/H5FDioc.h
@@ -11,7 +11,7 @@
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
- * Purpose: The public header file for the "io concentrator" driver.
+ * Purpose: The public header file for the "I/O concentrator" driver.
* This provides a similar functionality to that of the subfiling driver
* but introduces the necessary file access functionality via a multi-
* threading MPI service
@@ -20,72 +20,191 @@
#ifndef H5FDioc_H
#define H5FDioc_H
+#include "H5FDsubfiling.h"
+
#ifdef H5_HAVE_IOC_VFD
+/**
+ * \def H5FD_IOC
+ * Macro that returns the identifier for the #H5FD_IOC driver. \hid_t{file driver}
+ */
#define H5FD_IOC (H5FDperform_init(H5FD_ioc_init))
#else
#define H5FD_IOC (H5I_INVALID_HID)
#endif
+/**
+ * \def H5FD_IOC_NAME
+ * The canonical name for the #H5FD_IOC driver
+ */
#define H5FD_IOC_NAME "ioc"
#ifdef H5_HAVE_IOC_VFD
#ifndef H5FD_IOC_FAPL_MAGIC
-#define H5FD_CURR_IOC_FAPL_VERSION 1
-#define H5FD_IOC_FAPL_MAGIC 0xFED21331
+/**
+ * \def H5FD_IOC_CURR_FAPL_VERSION
+ * The version number of the H5FD_ioc_config_t configuration
+ * structure for the #H5FD_IOC driver
+ */
+#define H5FD_IOC_CURR_FAPL_VERSION 1
+/**
+ * \def H5FD_IOC_FAPL_MAGIC
+ * Unique number used to distinguish the #H5FD_IOC driver from other HDF5 file drivers
+ */
+#define H5FD_IOC_FAPL_MAGIC 0xFED21331
#endif
-#define H5FD_IOC_THREAD_POOL_SIZE 4
+/**
+ * \def H5FD_IOC_DEFAULT_THREAD_POOL_SIZE
+ * The default number of I/O concentrator worker threads
+ */
+#define H5FD_IOC_DEFAULT_THREAD_POOL_SIZE 4
/*
* Environment variables interpreted by the IOC VFD
*/
-#define H5_IOC_THREAD_POOL_COUNT "H5_IOC_THREAD_POOL_COUNT"
-/*
- * Define the various constants to allow different allocations
- * of subfile ranks. The choices are self explanatory, starting
- * with the default of one IO Concentrator (IOC) per node and
- * lastly, defining a fixed number.
+/**
+ * \def H5FD_IOC_THREAD_POOL_SIZE
+ * Macro for name of the environment variable that controls/overrides
+ * the number of I/O concentrator worker threads
+ *
+ * The value set for this environment variable is interpreted as an
+ * int value and must be > 0.
*/
-typedef enum {
- SELECT_IOC_ONE_PER_NODE = 0, /* Default */
- SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */
- SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */
- SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */
- ioc_selection_options /* (Uses same selection as every Nth rank) */
-} ioc_selection_t;
+#define H5FD_IOC_THREAD_POOL_SIZE "H5FD_IOC_THREAD_POOL_SIZE"
-/*
- * In addition to the common configuration fields, we can have
- * VFD specific fields. Here's one for the IO Concentrator VFD.
+//! <!-- [H5FD_ioc_config_t_snip] -->
+/**
+ * \struct H5FD_ioc_config_t
+ * \brief Configuration structure for H5Pset_fapl_ioc() / H5Pget_fapl_ioc()
*
- * thread_pool_count (int32_t)
- * Indicate the number of helper threads that we want for
- * creating a thread pool
+ * \details H5FD_ioc_config_t is a public structure that is used to pass
+ * configuration data to the #H5FD_IOC driver via a File Access
+ * Property List. A pointer to an instance of this structure is
+ * a parameter to H5Pset_fapl_ioc() and H5Pget_fapl_ioc().
+ *
+ * The #H5FD_IOC driver shares much of its configuration with the
+ * #H5FD_SUBFILING driver and so its configuration structure
+ * contains an instance of a H5FD_subfiling_shared_config_t
+ * configuration structure.
+ *
+ * \var uint32_t H5FD_ioc_config_t::magic
+ * A somewhat unique number which distinguishes the #H5FD_IOC driver
+ * from other drivers. Used in combination with a version number, it
+ * can help to validate a user-generated File Access Property List.
+ * This field should be set to #H5FD_IOC_FAPL_MAGIC.
+ *
+ * \var uint32_t H5FD_ioc_config_t::version
+ * Version number of the H5FD_ioc_config_t structure. Any instance passed
+ * to H5Pset_fapl_ioc() / H5Pget_fapl_ioc() must have a recognized version
+ * number or an error will be raised. Currently, this field should be set
+ * to #H5FD_IOC_CURR_FAPL_VERSION.
+ *
+ * \var hid_t H5FD_ioc_config_t::under_fapl_id
+ * The File Access Property List which is setup with the file driver
+ * to use for I/O to the HDF5 stub file. The stub file looks like a
+ * typical HDF5 file, but currently only contains the superblock metadata
+ * for compatibility with legacy HDF5 applications. The default driver used
+ * is currently the #H5FD_MPIO driver.
+ *
+ * \var int32_t H5FD_ioc_config_t::thread_pool_count
+ * The number of I/O concentrator worker threads to use.
+ *
+ * This value can also be set or adjusted with the #H5FD_IOC_THREAD_POOL_SIZE
+ * environment variable.
+ *
+ * \var H5FD_subfiling_shared_config_t H5FD_ioc_config_t::subf_config
+ * Subfiling configuration data for the parent #H5FD_SUBFILING driver. This
+ * includes the sub-file stripe size, number of I/O concentrators, IOC
+ * selection method, etc.
*
- * ----------------------------------------------------------------------------
*/
-
typedef struct H5FD_ioc_config_t {
- uint32_t magic; /* set to H5FD_IOC_FAPL_MAGIC */
- uint32_t version; /* set to H5FD_CURR_IOC_FAPL_VERSION */
- int32_t stripe_count; /* How many io concentrators */
- int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */
- ioc_selection_t ioc_selection; /* Method to select IO Concentrators */
- hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */
- int32_t thread_pool_count;
+ uint32_t magic; /* Must be set to H5FD_IOC_FAPL_MAGIC */
+ uint32_t version; /* Must be set to H5FD_IOC_CURR_FAPL_VERSION */
+ hid_t under_fapl_id; /* FAPL setup with the VFD to use for I/O to the HDF5 stub file */
+ int32_t thread_pool_count; /* Number of I/O concentrator worker threads to use */
+ H5FD_subfiling_shared_config_t subf_config; /* Subfiling driver configuration */
} H5FD_ioc_config_t;
+//! <!-- [H5FD_ioc_config_t_snip] -->
#ifdef __cplusplus
extern "C" {
#endif
-H5_DLL hid_t H5FD_ioc_init(void);
-H5_DLL herr_t H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_ptr);
-H5_DLL herr_t H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_ptr);
-H5_DLL void begin_thread_exclusive(void);
-H5_DLL void end_thread_exclusive(void);
+/**
+ * \brief Internal routine to initialize #H5FD_IOC driver. Not meant to be
+ * called directly by an HDF5 application
+ */
+H5_DLL hid_t H5FD_ioc_init(void);
+/**
+ * \ingroup FAPL
+ *
+ * \brief Modifies the specified File Access Property List to use the #H5FD_IOC driver
+ *
+ * \fapl_id
+ * \param[in] vfd_config Pointer to #H5FD_IOC driver configuration structure. May be NULL.
+ * \returns \herr_t
+ *
+ * \details H5Pset_fapl_ioc() modifies the File Access Property List to use the
+ * #H5FD_IOC driver.
+ *
+ * The #H5FD_IOC driver is a reference implementation of an "I/O concentrator"
+ * file driver that works in conjunction with the #H5FD_SUBFILING driver and
+ * provides the I/O backend for servicing I/O requests to sub-files.
+ *
+ * Typically, an HDF5 application won't need to call this routine directly.
+ * The #H5FD_IOC driver is usually set up as a side effect of an HDF5 application
+ * using the #H5FD_SUBFILING driver, but this routine is provided in case the
+ * application wishes to manually configure the #H5FD_IOC driver.
+ *
+ * \note The \p vfd_config parameter may be NULL. In this case, the driver will
+ * be setup with default settings. Note that in this case, it is assumed
+ * the parent #H5FD_SUBFILING driver was also setup with default settings.
+ * If the two drivers differ in configuration settings, application behavior
+ * may not be as expected.
+ *
+ * \since 1.13.2
+ *
+ */
+H5_DLL herr_t H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config);
+/**
+ * \ingroup FAPL
+ *
+ * \brief Queries a File Access Property List for #H5FD_IOC file driver properties
+ *
+ * \fapl_id
+ * \param[out] config_out Pointer to H5FD_ioc_config_t structure through which the
+ * #H5FD_IOC file driver properties will be returned.
+ *
+ * \returns \herr_t
+ *
+ * \details H5Pget_fapl_ioc() queries the specified File Access Property List for
+ * #H5FD_IOC driver properties as set by H5Pset_fapl_ioc(). If the #H5FD_IOC
+ * driver has not been set on the File Access Property List, a default
+ * configuration is returned. An HDF5 application may use this functionality
+ * to manually configure the #H5FD_IOC driver by calling H5Pget_fapl_ioc()
+ * on a newly-created File Access Property List, adjusting the default
+ * values and then calling H5Pset_fapl_ioc() with the configured
+ * H5FD_ioc_config_t structure.
+ *
+ * \since 1.13.2
+ *
+ */
+H5_DLL herr_t H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out);
+/**
+ * \brief Internal routine for managing exclusive access to critical sections
+ * by the #H5FD_IOC driver's worker threads. Not meant to be called
+ * directly by an HDF5 application
+ */
+H5_DLL void H5FD_ioc_begin_thread_exclusive(void);
+/**
+ * \brief Internal routine for managing exclusive access to critical sections
+ * by the #H5FD_IOC driver's worker threads. Not meant to be called
+ * directly by an HDF5 application
+ */
+H5_DLL void H5FD_ioc_end_thread_exclusive(void);
#ifdef __cplusplus
}
diff --git a/src/H5FDsubfiling/H5FDioc_threads.c b/src/H5FDsubfiling/H5FDioc_threads.c
index 2d50503..4c1887f 100644
--- a/src/H5FDsubfiling/H5FDioc_threads.c
+++ b/src/H5FDsubfiling/H5FDioc_threads.c
@@ -14,10 +14,6 @@
#include "H5FDsubfiling.h"
-#ifndef HG_TEST_NUM_THREADS_DEFAULT
-#define HG_TEST_NUM_THREADS_DEFAULT 4
-#endif
-
#define MIN_READ_RETRIES 10
/*
@@ -118,7 +114,7 @@ initialize_ioc_threads(void *_sf_context)
{
subfiling_context_t *sf_context = _sf_context;
ioc_data_t *ioc_data = NULL;
- unsigned thread_pool_count = HG_TEST_NUM_THREADS_DEFAULT;
+ unsigned thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
char *env_value;
int ret_value = 0;
#ifdef H5FD_IOC_COLLECT_STATS
@@ -174,7 +170,7 @@ initialize_ioc_threads(void *_sf_context)
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, (-1), "can't initialize IOC thread queue mutex");
/* Allow experimentation with the number of helper threads */
- if ((env_value = HDgetenv(H5_IOC_THREAD_POOL_COUNT)) != NULL) {
+ if ((env_value = HDgetenv(H5FD_IOC_THREAD_POOL_SIZE)) != NULL) {
int value_check = HDatoi(env_value);
if (value_check > 0) {
thread_pool_count = (unsigned int)value_check;
@@ -589,7 +585,7 @@ handle_work_request(void *arg)
}
/*-------------------------------------------------------------------------
- * Function: begin_thread_exclusive
+ * Function: H5FD_ioc_begin_thread_exclusive
*
* Purpose: Mutex lock to restrict access to code or variables.
*
@@ -603,13 +599,13 @@ handle_work_request(void *arg)
*-------------------------------------------------------------------------
*/
void
-begin_thread_exclusive(void)
+H5FD_ioc_begin_thread_exclusive(void)
{
hg_thread_mutex_lock(&ioc_thread_mutex);
}
/*-------------------------------------------------------------------------
- * Function: end_thread_exclusive
+ * Function: H5FD_ioc_end_thread_exclusive
*
* Purpose: Mutex unlock. Should only be called by the current holder
* of the locked mutex.
@@ -624,7 +620,7 @@ begin_thread_exclusive(void)
*-------------------------------------------------------------------------
*/
void
-end_thread_exclusive(void)
+H5FD_ioc_end_thread_exclusive(void)
{
hg_thread_mutex_unlock(&ioc_thread_mutex);
}
@@ -840,13 +836,13 @@ ioc_file_queue_write_indep(sf_work_request_t *msg, int subfile_rank, int source,
sf_queue_delay_time += t_queue_delay;
#endif
- begin_thread_exclusive();
+ H5FD_ioc_begin_thread_exclusive();
/* Adjust EOF if necessary */
if (sf_eof > sf_context->sf_eof)
sf_context->sf_eof = sf_eof;
- end_thread_exclusive();
+ H5FD_ioc_end_thread_exclusive();
done:
if (send_nack) {
diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c
index 2b436fe..a38c020 100644
--- a/src/H5FDsubfiling/H5FDsubfiling.c
+++ b/src/H5FDsubfiling/H5FDsubfiling.c
@@ -209,11 +209,13 @@ static herr_t H5FD__subfiling_read_vector(H5FD_t *file, hid_t dxpl_id, uint32_t
static herr_t H5FD__subfiling_write_vector(H5FD_t *file, hid_t dxpl_id, uint32_t count, H5FD_mem_t types[],
haddr_t addrs[], size_t sizes[], const void *bufs[] /* in */);
static herr_t H5FD__subfiling_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
+#if 0
static herr_t H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw);
static herr_t H5FD__subfiling_unlock(H5FD_t *_file);
-static herr_t H5FD__subfiling_del(const char *name, hid_t fapl);
-static herr_t H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input,
- void **output);
+#endif
+static herr_t H5FD__subfiling_del(const char *name, hid_t fapl);
+static herr_t H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input,
+ void **output);
static herr_t H5FD__subfiling_get_default_config(hid_t fapl_id, H5FD_subfiling_config_t *config_out);
static herr_t H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa);
@@ -246,46 +248,46 @@ static herr_t iovec_fill_uniform(subfiling_context_t *sf_context, int64_t iovec_
void H5FD__subfiling_mpi_finalize(void);
static const H5FD_class_t H5FD_subfiling_g = {
- H5FD_CLASS_VERSION, /* VFD interface version */
- H5_VFD_SUBFILING, /* value */
- H5FD_SUBFILING_NAME, /* name */
- MAXADDR, /* maxaddr */
- H5F_CLOSE_WEAK, /* fc_degree */
- H5FD__subfiling_term, /* terminate */
- NULL, /* sb_size */
- NULL, /* sb_encode */
- NULL, /* sb_decode */
- sizeof(H5FD_subfiling_config_t), /* fapl_size */
- H5FD__subfiling_fapl_get, /* fapl_get */
- H5FD__subfiling_fapl_copy, /* fapl_copy */
- H5FD__subfiling_fapl_free, /* fapl_free */
- 0, /* dxpl_size */
- NULL, /* dxpl_copy */
- NULL, /* dxpl_free */
- H5FD__subfiling_open, /* open */
- H5FD__subfiling_close, /* close */
- H5FD__subfiling_cmp, /* cmp */
- H5FD__subfiling_query, /* query */
- NULL, /* get_type_map */
- NULL, /* alloc */
- NULL, /* free */
- H5FD__subfiling_get_eoa, /* get_eoa */
- H5FD__subfiling_set_eoa, /* set_eoa */
- H5FD__subfiling_get_eof, /* get_eof */
- H5FD__subfiling_get_handle, /* get_handle */
- H5FD__subfiling_read, /* read */
- H5FD__subfiling_write, /* write */
- H5FD__subfiling_read_vector, /* read_vector */
- H5FD__subfiling_write_vector, /* write_vector */
- NULL, /* read_selection */
- NULL, /* write_selection */
- NULL, /* flush */
- H5FD__subfiling_truncate, /* truncate */
- H5FD__subfiling_lock, /* lock */
- H5FD__subfiling_unlock, /* unlock */
- H5FD__subfiling_del, /* del */
- H5FD__subfiling_ctl, /* ctl */
- H5FD_FLMAP_DICHOTOMY /* fl_map */
+ H5FD_CLASS_VERSION, /* VFD interface version */
+ H5_VFD_SUBFILING, /* value */
+ H5FD_SUBFILING_NAME, /* name */
+ MAXADDR, /* maxaddr */
+ H5F_CLOSE_WEAK, /* fc_degree */
+ H5FD__subfiling_term, /* terminate */
+ NULL, /* sb_size */
+ NULL, /* sb_encode */
+ NULL, /* sb_decode */
+ sizeof(H5FD_subfiling_config_t), /* fapl_size */
+ H5FD__subfiling_fapl_get, /* fapl_get */
+ H5FD__subfiling_fapl_copy, /* fapl_copy */
+ H5FD__subfiling_fapl_free, /* fapl_free */
+ 0, /* dxpl_size */
+ NULL, /* dxpl_copy */
+ NULL, /* dxpl_free */
+ H5FD__subfiling_open, /* open */
+ H5FD__subfiling_close, /* close */
+ H5FD__subfiling_cmp, /* cmp */
+ H5FD__subfiling_query, /* query */
+ NULL, /* get_type_map */
+ NULL, /* alloc */
+ NULL, /* free */
+ H5FD__subfiling_get_eoa, /* get_eoa */
+ H5FD__subfiling_set_eoa, /* set_eoa */
+ H5FD__subfiling_get_eof, /* get_eof */
+ H5FD__subfiling_get_handle, /* get_handle */
+ H5FD__subfiling_read, /* read */
+ H5FD__subfiling_write, /* write */
+ H5FD__subfiling_read_vector, /* read_vector */
+ H5FD__subfiling_write_vector, /* write_vector */
+ NULL, /* read_selection */
+ NULL, /* write_selection */
+ NULL, /* flush */
+ H5FD__subfiling_truncate, /* truncate */
+ NULL /* H5FD__subfiling_lock */, /* lock */
+ NULL /* H5FD__subfiling_unlock */, /* unlock */
+ H5FD__subfiling_del, /* del */
+ H5FD__subfiling_ctl, /* ctl */
+ H5FD_FLMAP_DICHOTOMY /* fl_map */
};
/* Declare a free list to manage the H5FD_subfiling_t struct */
@@ -457,7 +459,7 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config)
+H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config)
{
H5FD_subfiling_config_t *subfiling_conf = NULL;
H5P_genplist_t *plist = NULL;
@@ -485,7 +487,7 @@ H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config)
if (H5FD__subfiling_validate_config(vfd_config) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid subfiling VFD configuration");
- ret_value = H5P_set_driver(plist, H5FD_SUBFILING, (void *)vfd_config, NULL);
+ ret_value = H5P_set_driver(plist, H5FD_SUBFILING, vfd_config, NULL);
done:
if (subfiling_conf) {
@@ -564,13 +566,14 @@ H5FD__subfiling_get_default_config(hid_t fapl_id, H5FD_subfiling_config_t *confi
HDmemset(config_out, 0, sizeof(*config_out));
- config_out->magic = H5FD_SUBFILING_FAPL_MAGIC;
- config_out->version = H5FD_CURR_SUBFILING_FAPL_VERSION;
- config_out->ioc_fapl_id = H5I_INVALID_HID;
- config_out->stripe_count = 0;
- config_out->stripe_depth = H5FD_DEFAULT_STRIPE_DEPTH;
- config_out->ioc_selection = SELECT_IOC_ONE_PER_NODE;
- config_out->require_ioc = TRUE;
+ config_out->magic = H5FD_SUBFILING_FAPL_MAGIC;
+ config_out->version = H5FD_SUBFILING_CURR_FAPL_VERSION;
+ config_out->ioc_fapl_id = H5I_INVALID_HID;
+ config_out->require_ioc = TRUE;
+
+ config_out->shared_cfg.ioc_selection = SELECT_IOC_ONE_PER_NODE;
+ config_out->shared_cfg.stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE;
+ config_out->shared_cfg.stripe_count = 0;
if ((h5_require_ioc = HDgetenv("H5_REQUIRE_IOC")) != NULL) {
int value_check = HDatoi(h5_require_ioc);
@@ -644,7 +647,7 @@ H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa)
HDassert(fa != NULL);
- if (fa->version != H5FD_CURR_SUBFILING_FAPL_VERSION)
+ if (fa->version != H5FD_SUBFILING_CURR_FAPL_VERSION)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_subfiling_config_t version");
if (fa->magic != H5FD_SUBFILING_FAPL_MAGIC)
@@ -842,9 +845,9 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
H5FD_class_t *driver = NULL; /* VFD for file */
H5P_genplist_t *plist_ptr = NULL;
H5FD_driver_prop_t driver_prop; /* Property for driver ID & info */
- hbool_t bcasted_inode = FALSE;
- hbool_t bcasted_eof = FALSE;
- int64_t sf_eof = -1;
+ hbool_t bcasted_eof = FALSE;
+ int64_t sf_eof = -1;
+ void *file_handle = NULL;
int mpi_code; /* MPI return code */
H5FD_t *ret_value = NULL;
@@ -958,34 +961,16 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
H5E_FILE, H5E_CANTOPENFILE, NULL,
"unable to open file '%s' - only IOC and Sec2 VFDs are currently supported for subfiles", name);
- if (driver->value == H5_VFD_IOC) {
- h5_stat_t sb;
- uint64_t fid;
- void *file_handle = NULL;
-
- if (file_ptr->mpi_rank == 0) {
- if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get file handle");
-
- if (HDfstat(*(int *)file_handle, &sb) < 0)
- H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file");
-
- HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t));
- fid = (uint64_t)sb.st_ino;
- }
-
- if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&fid, 1, MPI_UINT64_T, 0, file_ptr->comm)))
- H5_SUBFILING_MPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code);
-
- bcasted_inode = TRUE;
+ if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get file handle");
+ if (driver->value == H5_VFD_IOC) {
/* Get a copy of the context ID for later use */
- file_ptr->context_id = H5_subfile_fid_to_context(fid);
+ file_ptr->context_id = H5_subfile_fhandle_to_context(file_handle);
file_ptr->fa.require_ioc = true;
}
else if (driver->value == H5_VFD_SEC2) {
- uint64_t inode_id = (uint64_t)-1;
- int ioc_flags;
+ int ioc_flags;
/* Translate the HDF5 file open flags into standard POSIX open flags */
ioc_flags = (H5F_ACC_RDWR & flags) ? O_RDWR : O_RDONLY;
@@ -996,44 +981,12 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
if (H5F_ACC_EXCL & flags)
ioc_flags |= O_EXCL;
- /* Let MPI rank 0 to the file stat operation and broadcast a result */
- if (file_ptr->mpi_rank == 0) {
- if (file_ptr->sf_file) {
- h5_stat_t sb;
- void *file_handle = NULL;
-
- if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle");
-
- /* We create a new file descriptor for our file structure.
- * Basically, we want these separate so that sec2 can
- * deal with the opened file for additional operations
- * (especially close) without interfering with subfiling.
- */
- file_ptr->fd = HDdup(*(int *)file_handle);
-
- if (HDfstat(*(int *)file_handle, &sb) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file");
- inode_id = sb.st_ino;
- }
- }
-
- if (MPI_SUCCESS == MPI_Bcast(&inode_id, 1, MPI_UNSIGNED_LONG_LONG, 0, file_ptr->comm)) {
- file_ptr->inode = inode_id;
- }
-
- bcasted_inode = TRUE;
-
- /* All ranks can now detect an error and fail. */
- if (inode_id == (uint64_t)-1)
- H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file = %s\n", name);
-
/*
* Open the subfiles for this HDF5 file. A subfiling
* context ID will be returned, which is used for
* further interactions with this file's subfiles.
*/
- if (H5_open_subfiles(file_ptr->file_path, inode_id, file_ptr->fa.ioc_selection, ioc_flags,
+ if (H5_open_subfiles(file_ptr->file_path, file_handle, &file_ptr->fa.shared_cfg, ioc_flags,
file_ptr->comm, &file_ptr->context_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiling files = %s\n",
name);
@@ -1062,13 +1015,6 @@ done:
if (file_ptr) {
/* Participate in possible MPI collectives on failure */
if (file_ptr->comm != MPI_COMM_NULL) {
- if (!bcasted_inode) {
- uint64_t tmp_inode = UINT64_MAX;
-
- if (MPI_SUCCESS !=
- (mpi_code = MPI_Bcast(&tmp_inode, 1, MPI_UNSIGNED_LONG_LONG, 0, file_ptr->comm)))
- H5_SUBFILING_MPI_DONE_ERROR(NULL, "MPI_Bcast failed", mpi_code);
- }
if (!bcasted_eof) {
sf_eof = -1;
@@ -2367,6 +2313,7 @@ done:
*
*-------------------------------------------------------------------------
*/
+#if 0
static herr_t
H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw)
{
@@ -2415,6 +2362,7 @@ H5FD__subfiling_unlock(H5FD_t *_file)
done:
H5_SUBFILING_FUNC_LEAVE_API;
} /* end H5FD__subfiling_unlock() */
+#endif
static herr_t
H5FD__subfiling_del(const char *name, hid_t fapl)
diff --git a/src/H5FDsubfiling/H5FDsubfiling.h b/src/H5FDsubfiling/H5FDsubfiling.h
index 3de5155..3bc448b 100644
--- a/src/H5FDsubfiling/H5FDsubfiling.h
+++ b/src/H5FDsubfiling/H5FDsubfiling.h
@@ -14,120 +14,240 @@
#ifndef H5FDsubfiling_H
#define H5FDsubfiling_H
-#include "H5FDioc.h"
-
#ifdef H5_HAVE_SUBFILING_VFD
+/**
+ * \def H5FD_SUBFILING
+ * Macro that returns the identifier for the #H5FD_SUBFILING driver. \hid_t{file driver}
+ */
#define H5FD_SUBFILING (H5FDperform_init(H5FD_subfiling_init))
#else
#define H5FD_SUBFILING (H5I_INVALID_HID)
#endif
+/**
+ * \def H5FD_SUBFILING_NAME
+ * The canonical name for the #H5FD_SUBFILING driver
+ */
#define H5FD_SUBFILING_NAME "subfiling"
#ifdef H5_HAVE_SUBFILING_VFD
#ifndef H5FD_SUBFILING_FAPL_MAGIC
-#define H5FD_CURR_SUBFILING_FAPL_VERSION 1
-#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331
+/**
+ * \def H5FD_SUBFILING_CURR_FAPL_VERSION
+ * The version number of the H5FD_subfiling_config_t configuration
+ * structure for the #H5FD_SUBFILING driver
+ */
+#define H5FD_SUBFILING_CURR_FAPL_VERSION 1
+/**
+ * \def H5FD_SUBFILING_FAPL_MAGIC
+ * Unique number used to distinguish the #H5FD_SUBFILING driver from other HDF5 file drivers
+ */
+#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331
#endif
-/****************************************************************************
- *
- * Structure: H5FD_subfiling_config_t
- *
- * Purpose:
- *
- * H5FD_subfiling_config_t is a public structure that is used to pass
- * subfiling configuration data to the appropriate subfiling VFD via
- * the FAPL. A pointer to an instance of this structure is a parameter
- * to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling().
- *
- * `magic` (uint32_t)
- *
- * Magic is a somewhat unique number which identifies this VFD from
- * other VFDs. Used in combination with a version number, we can
- * validate a user generated file access property list (fapl).
- * This field should be set to H5FD_SUBFILING_FAPL_MAGIC.
- *
- * `version` (uint32_t)
- *
- * Version number of the H5FD_subfiling_config_t structure. Any instance
- * passed to the above calls must have a recognized version number, or
- * an error will be flagged.
- *
- * This field should be set to H5FD_CURR_SUBFILING_FAPL_VERSION.
- *
- *** IO Concentrator Info ***
- *** These fields will be replicated in the stacked IOC VFD which
- *** provides the extended support for aggregating reads and writes
- *** and allows global file access to node-local storage containers.
- *
- * `stripe_count` (int32_t)
- *
- * The integer value which identifies the total number of
- * subfiles that have been algorithmically been selected to
- * to contain the segments of raw data which make up an HDF5
- * file. This value is used to implement the RAID-0 functionality
- * when reading or writing datasets.
+/**
+ * \def H5FD_SUBFILING_DEFAULT_STRIPE_SIZE
+ * The default stripe size (in bytes) for data stripes in sub-files
+ */
+#define H5FD_SUBFILING_DEFAULT_STRIPE_SIZE (32 * 1024 * 1024)
+
+/**
+ * \def H5FD_SUBFILING_FILENAME_TEMPLATE
+ * The basic template for a sub-file filename
+ */
+#define H5FD_SUBFILING_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d"
+
+/**
+ * \def H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE
+ * The basic template for a #H5FD_SUBFILING driver configuration filename
+ */
+#define H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config"
+
+/*
+ * Environment variables interpreted by the HDF5 Subfiling feature
+ */
+
+/**
+ * \def H5FD_SUBFILING_STRIPE_SIZE
+ * Macro for name of the environment variable that specifies the size
+ * (in bytes) for data stripes in sub-files
*
- * `stripe_depth` (int64_t)
+ * The value set for this environment variable is interpreted as a
+ * long long value and must be > 0.
+ */
+#define H5FD_SUBFILING_STRIPE_SIZE "H5FD_SUBFILING_STRIPE_SIZE"
+/**
+ * \def H5FD_SUBFILING_IOC_PER_NODE
+ * Macro for name of the environment variable that specifies the number
+ * of MPI ranks per node to use as I/O concentrators
*
- * The stripe depth defines a limit on the maximum number of contiguous
- * bytes that can be read or written in a single operation on any
- * selected subfile. Larger IO operations can exceed this limit
- * by utilizing MPI derived types to construct an IO request which
- * gathers additional data segments from memory for the IO request.
+ * The value set for this environment variable is interpreted as a
+ * long value and must be > 0.
+ */
+#define H5FD_SUBFILING_IOC_PER_NODE "H5FD_SUBFILING_IOC_PER_NODE"
+/**
+ * \def H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ * Macro for name of the environment variable that provides information
+ * for selection MPI ranks as I/O concentrators
*
- * `ioc_selection` (enum io_selection datatype)
+ * The value set for this environment variable is interpreted differently,
+ * depending on the IOC selection method chosen.
*
- * The io_selection_t defines a specific algorithm by which IO
- * concentrators (IOCs) and sub-files are identified. The available
- * algorithms are: SELECT_IOC_ONE_PER_NODE, SELECT_IOC_EVERY_NTH_RANK,
- * SELECT_IOC_WITH_CONFIG, and SELECT_IOC_TOTAL.
+ * For #SELECT_IOC_ONE_PER_NODE, this value is ignored.
*
- *** STACKING and other VFD support
- *** i.e. FAPL caching
- ***
+ * For #SELECT_IOC_EVERY_NTH_RANK, this value is interpreted as a
+ * long value and must be > 0. The value will correspond to the
+ * `N` value when selecting every `N`-th MPI rank as an I/O
+ * concentrator.
*
- * `ioc_fapl_id` (hid_t)
+ * For #SELECT_IOC_WITH_CONFIG, this value is ignored as that particular
+ * IOC selection method is not currently supported.
*
- * A valid file access property list (fapl) is cached on each
- * process and thus enables selection of an alternative provider
- * for subsequent file operations.
- * By default, Sub-filing employs an additional support VFD that
- * provides file IO proxy capabilities to all MPI ranks in a
- * distributed parallel application. This IO indirection
- * thus allows application access all sub-files even while
- * these may actually be node-local and thus not directly
- * accessible to remote ranks.
+ * For #SELECT_IOC_TOTAL, this value is interpreted as a long value
+ * and must be > 0. The value will correspond to the total number
+ * of I/O concentrators to be used.
+ */
+#define H5FD_SUBFILING_IOC_SELECTION_CRITERIA "H5FD_SUBFILING_IOC_SELECTION_CRITERIA"
+/**
+ * \def H5FD_SUBFILING_SUBFILE_PREFIX
+ * Macro for name of the environment variable that specifies a prefix
+ * to apply to the filenames generated for sub-files
*
- ****************************************************************************/
+ * The value set for this environment variable is interpreted as a
+ * pathname.
+ */
+#define H5FD_SUBFILING_SUBFILE_PREFIX "H5FD_SUBFILING_SUBFILE_PREFIX"
-/*
- * In addition to the common configuration fields, we can have
- * VFD specific fields. Here's one for the subfiling VFD.
- *
- * `require_ioc` (hbool_t)
- *
- * Require_IOC is a boolean flag with a default value of TRUE.
- * This flag indicates that the stacked H5FDioc VFD should be
- * employed for sub-filing operations. The default flag can be
- * overridden with an environment variable: H5_REQUIRE_IOC=0
- *
+/**
+ * \enum H5FD_subfiling_ioc_select_t
+ * This enum defines the various constants to allow different
+ * allocations of MPI ranks as I/O concentrators.
+ *
+ * \var SELECT_IOC_ONE_PER_NODE
+ * Default selection method. One MPI rank per node is used as an
+ * I/O concentrator. If this selection method is used, the number
+ * of I/O concentrators per node can be adjusted with the
+ * #H5FD_SUBFILING_IOC_PER_NODE environment variable.
+ *
+ * \var SELECT_IOC_EVERY_NTH_RANK
+ * Starting with MPI rank 0, a stride of 'N' is applied to the MPI
+ * rank values to determine the next I/O concentrator. The
+ * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must
+ * be set to the value desired for 'N'.
+ *
+ * \var SELECT_IOC_WITH_CONFIG
+ * Currently unsupported. Use a configuration file to determine
+ * the mapping from MPI ranks to I/O concentrators. The
+ * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must
+ * be set to the path to the configuration file.
+ *
+ * \var SELECT_IOC_TOTAL
+ * Specifies that a total of 'N' I/O concentrators should be used.
+ * Starting with MPI rank 0, a stride of 'MPI comm size' / 'N' is
+ * applied to the MPI rank values to determine the next I/O
+ * concentrator. The #H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ * environment variable must be set to the value desired for 'N'.
+ *
+ * \var ioc_selection_options
+ * Unused. Sentinel value
*/
+typedef enum {
+ SELECT_IOC_ONE_PER_NODE = 0, /* Default */
+ SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */
+ SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */
+ SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */
+ ioc_selection_options /* Sentinel value */
+} H5FD_subfiling_ioc_select_t;
+
+/**
+ * \struct H5FD_subfiling_shared_config_t
+ * \brief Subfiling configuration structure that is shared between the #H5FD_SUBFILING
+ * and #H5FD_IOC drivers
+ *
+ * \var H5FD_subfiling_ioc_select_t H5FD_subfiling_shared_config_t::ioc_selection
+ * The method to use for selecting MPI ranks to be I/O concentrators. The
+ * current default is to select one MPI rank per node to be an I/O concentrator.
+ * Refer to #H5FD_subfiling_ioc_select_t for a description of the algorithms
+ * available for use.
+ *
+ * \var int64_t H5FD_subfiling_shared_config_t::stripe_size
+ * The stripe size defines the size (in bytes) of the data stripes in the
+ * sub-files for the logical HDF5 file. Data is striped across the sub-files
+ * in a round-robin wrap-around fashion in segments equal to the stripe size.
+ *
+ * For example, in an HDF5 file consisting of four sub-files with a 1MiB stripe
+ * size, the first and fifth 1MiB of data would reside in the first sub-file,
+ * the second and sixth 1MiB of data would reside in the second sub-file and so
+ * on.
+ *
+ * This value can also be set or adjusted with the #H5FD_SUBFILING_STRIPE_SIZE
+ * environment variable.
+ *
+ * \var int32_t H5FD_subfiling_shared_config_t::stripe_count
+ * The number of I/O concentrators (and, currently, the number of sub-files)
+ * to use for the logical HDF5 file. This value is used in conjunction with
+ * the IOC selection method to determine which MPI ranks will be assigned as
+ * I/O concentrators.
+ *
+ * Alternatively, the mapping between MPI ranks and I/O concentrators can be
+ * set or adjusted with a combination of the #ioc_selection field and the
+ * #H5FD_SUBFILING_IOC_PER_NODE and #H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ * environment variables.
+ */
+typedef struct H5FD_subfiling_shared_config_t {
+ H5FD_subfiling_ioc_select_t ioc_selection; /* Method to select I/O concentrators */
+ int64_t stripe_size; /* Size (in bytes) of data stripes in sub-files */
+ int32_t stripe_count; /* Number of I/O concentrators to use */
+} H5FD_subfiling_shared_config_t;
//! <!-- [H5FD_subfiling_config_t_snip] -->
/**
- * Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling()
+ * \struct H5FD_subfiling_config_t
+ * \brief Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling()
+ *
+ * \details H5FD_subfiling_config_t is a public structure that is used to pass
+ * subfiling configuration data to the #H5FD_SUBFILING driver via
+ * a File Access Property List. A pointer to an instance of this structure
+ * is a parameter to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling().
+ *
+ * \var uint32_t H5FD_subfiling_config_t::magic
+ * A somewhat unique number which distinguishes the #H5FD_SUBFILING driver
+ * from other drivers. Used in combination with a version number, it can
+ * help to validate a user-generated File Access Property List. This field
+ * should be set to #H5FD_SUBFILING_FAPL_MAGIC.
+ *
+ * \var uint32_t H5FD_subfiling_config_t::version
+ * Version number of the H5FD_subfiling_config_t structure. Any instance
+ * passed to H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() must have
+ * a recognized version number or an error will be raised. Currently, this
+ * field should be set to #H5FD_SUBFILING_CURR_FAPL_VERSION.
+ *
+ * \var hid_t H5FD_subfiling_config_t::ioc_fapl_id
+ * The File Access Property List which is setup with the file driver that
+ * the #H5FD_SUBFILING driver will use for servicing I/O requests to the
+ * sub-files. Currently, the File Access Property List must be setup with
+ * the #H5FD_IOC driver by calling H5Pset_fapl_ioc(), but future development
+ * may allow other file drivers to be used.
+ *
+ * \var hbool_t H5FD_subfiling_config_t::require_ioc
+ * A boolean flag which indicates whether the #H5FD_SUBFILING driver should
+ * use the #H5FD_IOC driver for its I/O operations. This field should currently
+ * always be set to TRUE.
+ *
+ * \var H5FD_subfiling_shared_config_t H5FD_subfiling_config_t::shared_cfg
+ * A structure which contains the subfiling parameters that are shared between
+ * the #H5FD_SUBFILING and #H5FD_IOC drivers. This includes the sub-file stripe
+ * size, number of I/O concentrators, IOC selection method, etc.
+ *
*/
typedef struct H5FD_subfiling_config_t {
- uint32_t magic; /* set to H5FD_SUBFILING_FAPL_MAGIC */
- uint32_t version; /* set to H5FD_CURR_SUBFILING_FAPL_VERSION */
- int32_t stripe_count; /* How many io concentrators */
- int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */
- ioc_selection_t ioc_selection; /* Method to select IO Concentrators */
- hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */
- hbool_t require_ioc;
+ uint32_t magic; /* Must be set to H5FD_SUBFILING_FAPL_MAGIC */
+ uint32_t version; /* Must be set to H5FD_SUBFILING_CURR_FAPL_VERSION */
+ hid_t ioc_fapl_id; /* The FAPL setup with the stacked VFD to use for I/O concentrators */
+ hbool_t require_ioc; /* Whether to use the IOC VFD (currently must always be TRUE) */
+ H5FD_subfiling_shared_config_t
+ shared_cfg; /* Subfiling/IOC parameters (stripe size, stripe count, etc.) */
} H5FD_subfiling_config_t;
//! <!-- [H5FD_subfiling_config_t_snip] -->
@@ -135,41 +255,79 @@ typedef struct H5FD_subfiling_config_t {
extern "C" {
#endif
+/**
+ * \brief Internal routine to initialize #H5FD_SUBFILING driver. Not meant to be
+ * called directly by an HDF5 application
+ */
H5_DLL hid_t H5FD_subfiling_init(void);
/**
* \ingroup FAPL
*
- * \brief Modifies the file access property list to use the #H5FD_SUBFILING driver
+ * \brief Modifies the specified File Access Property List to use the #H5FD_SUBFILING driver
*
* \fapl_id
- * \param[in] vfd_config #H5FD_SUBFILING driver specific properties. If NULL, then
- * the IO concentrator VFD will be used.
+ * \param[in] vfd_config Pointer to #H5FD_SUBFILING driver configuration structure. May be NULL.
* \returns \herr_t
*
- * \details H5Pset_fapl_core() modifies the file access property list to use the
+ * \details H5Pset_fapl_subfiling() modifies the File Access Property List to use the
* #H5FD_SUBFILING driver.
*
- * \todo Expand details!
- *
- * \since 1.14.0
+ * The #H5FD_SUBFILING driver is an MPI-based file driver that allows an
+ * HDF5 application to distribute a logical HDF5 file across a collection
+ * of "sub-files" in equal-sized data segment "stripes". I/O to the logical
+ * HDF5 file is then directed to the appropriate "sub-file" according to the
+ * #H5FD_SUBFILING configuration and a system of I/O concentrators, which
+ * are MPI ranks operating worker threads.
+ *
+ * By allowing a configurable stripe size, number of I/O concentrators and
+ * method for selecting MPI ranks as I/O concentrators, the #H5FD_SUBFILING
+ * driver aims to enable an HDF5 application to find a middle ground between
+ * the single shared file and file-per-process approaches to parallel file I/O
+ * for the particular machine the application is running on. In general, the
+ * goal is to avoid some of the complexity of the file-per-process approach
+ * while also minimizing the locking issues of the single shared file approach
+ * on a parallel file system.
+ *
+ * \note Since the #H5FD_SUBFILING driver is an MPI-based file driver, the HDF5
+ * application should ensure that H5Pset_mpi_params() is called before this
+ * routine so that the appropriate MPI communicator and info objects will be
+ * setup for use by the #H5FD_SUBFILING and #H5FD_IOC drivers.
+ *
+ * \note The current architecture of the #H5FD_SUBFILING driver requires that the
+ * HDF5 application must have been initialized with MPI_Init_thread() using
+ * a value of MPI_THREAD_MULTIPLE for the thread support level.
+ *
+ * \note The \p vfd_config parameter may be NULL. In this case, the reference
+ * implementation I/O concentrator VFD will be used with the default settings
+ * of one I/O concentrator per node and a stripe size of 32MiB. Refer to the
+ * H5FD_subfiling_config_t documentation for information about configuration
+ * for the #H5FD_SUBFILING driver.
+ *
+ * \since 1.13.2
*
*/
-H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config);
+H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config);
/**
* \ingroup FAPL
*
- * \brief Queries subfiling file driver properties
+ * \brief Queries a File Access Property List for #H5FD_SUBFILING file driver properties
*
* \fapl_id
- * \param[out] config_out The subfiling fapl data.
+ * \param[out] config_out Pointer to H5FD_subfiling_config_t structure through which the
+ * #H5FD_SUBFILING file driver properties will be returned.
*
* \returns \herr_t
*
- * \details H5Pget_fapl_subfiling() queries the #H5FD_SUBFILING driver properties as set
- * by H5Pset_fapl_subfiling(). If the #H5FD_SUBFILING driver has not been set on
- * the File Access Property List, a default configuration is returned.
+ * \details H5Pget_fapl_subfiling() queries the specified File Access Property List for
+ * #H5FD_SUBFILING driver properties as set by H5Pset_fapl_subfiling(). If the
+ * #H5FD_SUBFILING driver has not been set on the File Access Property List, a
+ * default configuration is returned. An HDF5 application may use this
+ * functionality to manually configure the #H5FD_SUBFILING driver by calling
+ * H5Pget_fapl_subfiling() on a newly-created File Access Property List, adjusting
+ * the default values and then calling H5Pset_fapl_subfiling() with the configured
+ * H5FD_subfiling_config_t structure.
*
- * \since 1.14.0
+ * \since 1.13.2
*
*/
H5_DLL herr_t H5Pget_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *config_out);
diff --git a/src/H5FDsubfiling/H5subfiling_common.c b/src/H5FDsubfiling/H5subfiling_common.c
index 25e80fb..b75dd81 100644
--- a/src/H5FDsubfiling/H5subfiling_common.c
+++ b/src/H5FDsubfiling/H5subfiling_common.c
@@ -19,9 +19,9 @@
#include "H5subfiling_common.h"
#include "H5subfiling_err.h"
-typedef struct { /* Format of a context map entry */
- uint64_t h5_file_id; /* key value (linear search of the cache) */
- int64_t sf_context_id; /* The return value if matching h5_file_id */
+typedef struct { /* Format of a context map entry */
+ void *file_handle; /* key value (linear search of the cache) */
+ int64_t sf_context_id; /* The return value if matching file_handle */
} file_map_to_context_t;
typedef struct stat_record {
@@ -83,13 +83,15 @@ static int sf_open_file_count = 0;
static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context);
static herr_t H5_free_subfiling_topology(sf_topology_t *topology);
-static herr_t init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *context_id_out);
-static herr_t init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm,
+static herr_t init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm,
+ int64_t *context_id_out);
+static herr_t init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
sf_topology_t **app_topology_out);
-static herr_t init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology,
- MPI_Comm file_comm);
+static herr_t init_subfiling_context(subfiling_context_t *sf_context,
+ H5FD_subfiling_shared_config_t *subfiling_config,
+ sf_topology_t *app_topology, MPI_Comm file_comm);
static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags);
-static herr_t record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next_index);
+static herr_t record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index);
static herr_t ioc_open_file(sf_work_request_t *msg, int file_acc_flags);
static herr_t generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char *filename_out,
size_t filename_out_len, char **filename_basename_out,
@@ -102,10 +104,10 @@ static herr_t open_config_file(subfiling_context_t *sf_context, const char *base
static void initialize_statistics(void);
static int numDigits(int n);
static int get_next_fid_map_index(void);
-static void clear_fid_map_entry(uint64_t sf_fid, int64_t sf_context_id);
+static void clear_fid_map_entry(void *file_handle, int64_t sf_context_id);
static int compare_hostid(const void *h1, const void *h2);
-static herr_t get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type,
- char **ioc_sel_info_str);
+static herr_t get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type,
+ char **ioc_sel_info_str);
static int count_nodes(sf_topology_t *info, MPI_Comm comm);
static herr_t gather_topology_info(sf_topology_t *info, MPI_Comm comm);
static int identify_ioc_ranks(sf_topology_t *info, int node_count, int iocs_per_node);
@@ -192,7 +194,7 @@ get_next_fid_map_index(void)
HDassert(sf_open_file_map || (sf_file_map_size == 0));
for (int i = 0; i < sf_file_map_size; i++) {
- if (sf_open_file_map[i].h5_file_id == UINT64_MAX) {
+ if (sf_open_file_map[i].file_handle == NULL) {
index = i;
break;
}
@@ -222,14 +224,13 @@ get_next_fid_map_index(void)
*-------------------------------------------------------------------------
*/
static void
-clear_fid_map_entry(uint64_t sf_fid, int64_t sf_context_id)
+clear_fid_map_entry(void *file_handle, int64_t sf_context_id)
{
if (sf_open_file_map) {
- int i;
- for (i = 0; i < sf_file_map_size; i++) {
- if ((sf_open_file_map[i].h5_file_id == sf_fid) &&
+ for (int i = 0; i < sf_file_map_size; i++) {
+ if ((sf_open_file_map[i].file_handle == file_handle) &&
(sf_open_file_map[i].sf_context_id == sf_context_id)) {
- sf_open_file_map[i].h5_file_id = UINT64_MAX;
+ sf_open_file_map[i].file_handle = NULL;
sf_open_file_map[i].sf_context_id = -1;
return;
}
@@ -287,8 +288,9 @@ compare_hostid(const void *h1, const void *h2)
Purpose: Return a character string which represents either the
default selection method: SELECT_IOC_ONE_PER_NODE; or
if the user has selected a method via the environment
- variable (H5_IOC_SELECTION_CRITERIA), we return that
- along with any optional qualifier with for that method.
+ variable (H5FD_SUBFILING_IOC_SELECTION_CRITERIA), we
+ return that along with any optional qualifier with for
+ that method.
Errors: None.
@@ -296,10 +298,10 @@ compare_hostid(const void *h1, const void *h2)
-------------------------------------------------------------------------
*/
static herr_t
-get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **ioc_sel_info_str)
+get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type, char **ioc_sel_info_str)
{
char *opt_value = NULL;
- char *env_value = HDgetenv(H5_IOC_SELECTION_CRITERIA);
+ char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA);
HDassert(ioc_selection_type);
HDassert(ioc_sel_info_str);
@@ -323,7 +325,8 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **
if (errno == ERANGE) {
#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't parse value from " H5_IOC_SELECTION_CRITERIA " environment variable\n",
+ HDprintf("%s: couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
+ " environment variable\n",
__func__);
#endif
@@ -332,7 +335,7 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **
if ((check_value < 0) || (check_value >= ioc_selection_options)) {
#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid IOC selection type value %ld from " H5_IOC_SELECTION_CRITERIA
+ HDprintf("%s: invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
" environment variable\n",
__func__, check_value);
#endif
@@ -340,7 +343,7 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **
return FAIL;
}
- *ioc_selection_type = (ioc_selection_t)check_value;
+ *ioc_selection_type = (H5FD_subfiling_ioc_select_t)check_value;
*ioc_sel_info_str = opt_value;
}
@@ -784,6 +787,7 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context)
sf_context->sf_context_id = -1;
sf_context->h5_file_id = UINT64_MAX;
+ sf_context->h5_file_handle = NULL;
sf_context->sf_fid = -1;
sf_context->sf_write_count = 0;
sf_context->sf_read_count = 0;
@@ -912,8 +916,9 @@ H5_free_subfiling_topology(sf_topology_t *topology)
*/
/* TODO: revise description */
herr_t
-H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t ioc_selection_type,
- int file_acc_flags, MPI_Comm file_comm, int64_t *context_id_out)
+H5_open_subfiles(const char *base_filename, void *file_handle,
+ H5FD_subfiling_shared_config_t *subfiling_config, int file_acc_flags, MPI_Comm file_comm,
+ int64_t *context_id_out)
{
subfiling_context_t *sf_context = NULL;
int64_t context_id = -1;
@@ -931,6 +936,15 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t
goto done;
}
+ if (!subfiling_config) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: invalid subfiling configuration pointer\n", __func__);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
if (!context_id_out) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: context_id_out is NULL\n", __func__);
@@ -953,7 +967,7 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t
#endif
/* Initialize new subfiling context ID based on configuration information */
- if (init_subfiling(ioc_selection_type, file_comm, &context_id) < 0) {
+ if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't initialize subfiling context\n", __func__);
#endif
@@ -973,7 +987,7 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t
}
/* Save some basic things in the new subfiling context */
- sf_context->h5_file_id = h5_file_id;
+ sf_context->h5_file_handle = file_handle;
if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) {
#ifdef H5_SUBFILING_DEBUG
@@ -1058,7 +1072,7 @@ done:
}
if (ret_value < 0) {
- clear_fid_map_entry(h5_file_id, context_id);
+ clear_fid_map_entry(file_handle, context_id);
if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) {
#ifdef H5_SUBFILING_DEBUG
@@ -1092,7 +1106,7 @@ done:
-------------------------------------------------------------------------
*/
static herr_t
-init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *context_id_out)
+init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm, int64_t *context_id_out)
{
subfiling_context_t *new_context = NULL;
sf_topology_t *app_topology = NULL;
@@ -1129,7 +1143,7 @@ init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *conte
* Setup the application topology information, including the computed
* number and distribution map of the set of I/O concentrators
*/
- if (init_app_topology(ioc_selection_type, comm, &app_topology) < 0) {
+ if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't initialize application topology\n", __func__);
#endif
@@ -1140,7 +1154,7 @@ init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *conte
new_context->sf_context_id = context_id;
- if (init_subfiling_context(new_context, app_topology, comm) < 0) {
+ if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't initialize subfiling topology object\n", __func__);
#endif
@@ -1207,7 +1221,8 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm, sf_topology_t **app_topology_out)
+init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
+ sf_topology_t **app_topology_out)
{
sf_topology_t *app_topology = NULL;
app_layout_t *app_layout = sf_app_layout;
@@ -1392,11 +1407,11 @@ init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm, sf_topology
/* Check for an IOC-per-node value set in the environment */
/* TODO: should this env. var. be interpreted for other selection types? */
- if ((env_value = HDgetenv(H5_IOC_COUNT_PER_NODE))) {
+ if ((env_value = HDgetenv(H5FD_SUBFILING_IOC_PER_NODE))) {
errno = 0;
ioc_select_val = HDstrtol(env_value, NULL, 0);
if ((ERANGE == errno)) {
- HDprintf("invalid value '%s' for " H5_IOC_COUNT_PER_NODE "\n", env_value);
+ HDprintf("invalid value '%s' for " H5FD_SUBFILING_IOC_PER_NODE "\n", env_value);
ioc_select_val = 1;
}
@@ -1509,7 +1524,8 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology, MPI_Comm file_comm)
+init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_config_t *subfiling_config,
+ sf_topology_t *app_topology, MPI_Comm file_comm)
{
char *env_value = NULL;
int comm_rank;
@@ -1518,6 +1534,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
HDassert(sf_context);
HDassert(sf_context->topology == NULL);
+ HDassert(subfiling_config);
HDassert(app_topology);
HDassert(app_topology->n_io_concentrators > 0);
HDassert(MPI_COMM_NULL != file_comm);
@@ -1529,10 +1546,11 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
sf_context->sf_barrier_comm = MPI_COMM_NULL;
sf_context->sf_group_comm = MPI_COMM_NULL;
sf_context->sf_intercomm = MPI_COMM_NULL;
- sf_context->sf_stripe_size = H5FD_DEFAULT_STRIPE_DEPTH;
+ sf_context->sf_stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE;
sf_context->sf_write_count = 0;
sf_context->sf_read_count = 0;
sf_context->sf_eof = HADDR_UNDEF;
+ sf_context->h5_file_handle = NULL;
sf_context->sf_fid = -1;
sf_context->sf_group_size = 1;
sf_context->sf_group_rank = 0;
@@ -1545,8 +1563,14 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
sf_context->sf_logfile = NULL;
#endif
- /* Check for an IOC stripe size setting in the environment */
- if ((env_value = HDgetenv(H5_IOC_STRIPE_SIZE))) {
+ /*
+ * Set IOC stripe size from subfiling configuration, then check
+ * for a setting from the environment
+ */
+ if (subfiling_config->stripe_size > 0)
+ sf_context->sf_stripe_size = subfiling_config->stripe_size;
+
+ if ((env_value = HDgetenv(H5FD_SUBFILING_STRIPE_SIZE))) {
long long stripe_size = -1;
errno = 0;
@@ -1554,7 +1578,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
stripe_size = HDstrtoll(env_value, NULL, 0);
if (ERANGE == errno) {
#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: invalid stripe size setting '%s' for " H5_IOC_STRIPE_SIZE "\n", __func__,
+ HDprintf("%s: invalid stripe size setting '%s' for " H5FD_SUBFILING_STRIPE_SIZE "\n", __func__,
env_value);
#endif
@@ -1574,7 +1598,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
sf_context->sf_blocksize_per_stripe = sf_context->sf_stripe_size * app_topology->n_io_concentrators;
/* Check for a subfile name prefix setting in the environment */
- if ((env_value = HDgetenv(H5_IOC_SUBFILE_PREFIX))) {
+ if ((env_value = HDgetenv(H5FD_SUBFILING_SUBFILE_PREFIX))) {
if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't copy subfile prefix value\n", __func__);
@@ -1767,7 +1791,7 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
* There shouldn't be any issue, but check the status and
* return if there was a problem.
*/
- if (record_fid_to_subfile(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0) {
+ if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't record HDF5 file ID to subfile context mapping\n", __func__);
#endif
@@ -1792,6 +1816,21 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
0,
0,
0};
+ h5_stat_t st;
+
+ /* Retrieve Inode value for HDF5 stub file */
+ if (HDstat(sf_context->h5_filename, &st) < 0) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("[%s %d]: couldn't stat file %s\n", __func__,
+ sf_context->topology->app_layout->world_rank, sf_context->h5_filename);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t));
+ sf_context->h5_file_id = (uint64_t)st.st_ino;
if (ioc_open_file(&msg, file_acc_flags) < 0) {
#ifdef H5_SUBFILING_DEBUG
@@ -1806,7 +1845,7 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
done:
if (ret_value < 0) {
- clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id);
+ clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id);
}
return ret_value;
@@ -1843,7 +1882,7 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next_index)
+record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index)
{
int index;
herr_t ret_value = SUCCEED;
@@ -1861,17 +1900,17 @@ record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next
sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES;
for (int i = 0; i < sf_file_map_size; i++) {
- sf_open_file_map[i].h5_file_id = UINT64_MAX;
+ sf_open_file_map[i].file_handle = NULL;
sf_open_file_map[i].sf_context_id = -1;
}
}
for (index = 0; index < sf_file_map_size; index++) {
- if (sf_open_file_map[index].h5_file_id == h5_file_id)
+ if (sf_open_file_map[index].file_handle == file_handle)
goto done;
- if (sf_open_file_map[index].h5_file_id == UINT64_MAX) {
- sf_open_file_map[index].h5_file_id = h5_file_id;
+ if (sf_open_file_map[index].file_handle == NULL) {
+ sf_open_file_map[index].file_handle = file_handle;
sf_open_file_map[index].sf_context_id = subfile_context_id;
if (next_index) {
@@ -1899,14 +1938,14 @@ record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next
sf_file_map_size *= 2;
for (int i = index; i < sf_file_map_size; i++) {
- sf_open_file_map[i].h5_file_id = UINT64_MAX;
+ sf_open_file_map[i].file_handle = NULL;
}
if (next_index) {
*next_index = index;
}
- sf_open_file_map[index].h5_file_id = h5_file_id;
+ sf_open_file_map[index].file_handle = file_handle;
sf_open_file_map[index++].sf_context_id = subfile_context_id;
}
@@ -1957,13 +1996,12 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
{
subfiling_context_t *sf_context = NULL;
int64_t file_context_id;
- hbool_t mutex_locked = FALSE;
- mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
- char *filepath = NULL;
- char *subfile_dir = NULL;
- char *base = NULL;
- int fd = -1;
- herr_t ret_value = SUCCEED;
+ mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+ char *filepath = NULL;
+ char *subfile_dir = NULL;
+ char *base = NULL;
+ int fd = -1;
+ herr_t ret_value = SUCCEED;
HDassert(msg);
@@ -2011,9 +2049,6 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
goto done;
}
- begin_thread_exclusive();
- mutex_locked = TRUE;
-
/* Attempt to create/open the subfile for this IOC rank */
if ((fd = HDopen(filepath, file_acc_flags, mode)) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "failed to open subfile");
@@ -2033,11 +2068,6 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
}
done:
- if (mutex_locked) {
- end_thread_exclusive();
- mutex_locked = FALSE;
- }
-
if (ret_value < 0) {
if (sf_context) {
HDfree(sf_context->sf_filename);
@@ -2074,7 +2104,6 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
size_t filename_out_len, char **filename_basename_out, char **subfile_dir_out)
{
FILE *config_file = NULL;
- char *config_buf = NULL;
char *subfile_dir = NULL;
char *prefix = NULL;
char *base = NULL;
@@ -2181,83 +2210,14 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* in order to generate the correct subfile names.
*/
if (config_file) {
- char *ioc_substr = NULL;
- long config_file_len = 0;
-
- if (HDfseek(config_file, 0, SEEK_END) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__,
- errno);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if ((config_file_len = HDftell(config_file)) < 0) {
+ if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) {
#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno);
+ HDprintf("%s: couldn't read from subfiling configuration file\n", __func__);
#endif
ret_value = FAIL;
goto done;
}
-
- if (HDfseek(config_file, 0, SEEK_SET) < 0) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__,
- errno);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- config_buf[config_file_len] = '\0';
-
- if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &n_io_concentrators)) {
-#ifdef H5_SUBFILING_DEBUG
- HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n",
- __func__);
-#endif
-
- ret_value = FAIL;
- goto done;
- }
-
- if (n_io_concentrators <= 0) {
- HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n",
- __func__, n_io_concentrators);
- ret_value = FAIL;
- goto done;
- }
}
/*
@@ -2272,7 +2232,7 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* ABC.h5.subfile_<file-number>.config
*/
num_digits = numDigits(n_io_concentrators);
- HDsnprintf(filename_out, filename_out_len, "%s/%s" SF_FILENAME_TEMPLATE, subfile_dir, base,
+ HDsnprintf(filename_out, filename_out_len, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, subfile_dir, base,
sf_context->h5_file_id, num_digits, sf_context->topology->subfile_rank + 1,
n_io_concentrators);
@@ -2296,7 +2256,6 @@ done:
}
}
- HDfree(config_buf);
HDfree(prefix);
return ret_value;
@@ -2361,8 +2320,8 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c
goto done;
}
- HDsnprintf(config_filename, PATH_MAX, "%s/%s" SF_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename,
- sf_context->h5_file_id);
+ HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir,
+ base_filename, sf_context->h5_file_id);
/* Determine whether a subfiling configuration file exists */
errno = 0;
@@ -2455,7 +2414,7 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c
/* Write out each subfile name to the configuration file */
num_digits = numDigits(n_io_concentrators);
for (int k = 0; k < n_io_concentrators; k++) {
- HDsnprintf(line_buf, PATH_MAX, "%s" SF_FILENAME_TEMPLATE "\n", base_filename,
+ HDsnprintf(line_buf, PATH_MAX, "%s" H5FD_SUBFILING_FILENAME_TEMPLATE "\n", base_filename,
sf_context->h5_file_id, num_digits, k + 1, n_io_concentrators);
if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) {
@@ -2546,8 +2505,8 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con
goto done;
}
- HDsnprintf(config_filename, PATH_MAX, "%s/%s" SF_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename,
- sf_context->h5_file_id);
+ HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir,
+ base_filename, sf_context->h5_file_id);
/* Determine whether a subfiling configuration file exists */
errno = 0;
@@ -2595,6 +2554,110 @@ done:
}
/*-------------------------------------------------------------------------
+ * Function: H5_get_num_iocs_from_config_file
+ *
+ * Purpose: Reads a Subfiling configuration file to get the number of
+ * I/O concentrators used for the logical HDF5 file.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators)
+{
+ char *config_buf = NULL;
+ char *ioc_substr = NULL;
+ long config_file_len = 0;
+ int read_n_io_concs = 0;
+ herr_t ret_value = SUCCEED;
+
+ HDassert(config_file);
+ HDassert(n_io_concentrators);
+
+ if (HDfseek(config_file, 0, SEEK_END) < 0) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, errno);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ if ((config_file_len = HDftell(config_file)) < 0) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ if (HDfseek(config_file, 0, SEEK_SET) < 0) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: couldn't seek to beginning of subfiling configuration file; errno = %d\n", __func__,
+ errno);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ config_buf[config_file_len] = '\0';
+
+ if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) {
+#ifdef H5_SUBFILING_DEBUG
+ HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n",
+ __func__);
+#endif
+
+ ret_value = FAIL;
+ goto done;
+ }
+
+ if (read_n_io_concs <= 0) {
+ HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n",
+ __func__, read_n_io_concs);
+ ret_value = FAIL;
+ goto done;
+ }
+
+ *n_io_concentrators = read_n_io_concs;
+
+done:
+ HDfree(config_buf);
+
+ H5_SUBFILING_FUNC_LEAVE;
+}
+
+/*-------------------------------------------------------------------------
* Function: H5_close_subfiles
*
* Purpose: This is a simple wrapper function for the internal version
@@ -2713,9 +2776,9 @@ H5_close_subfiles(int64_t subfiling_context_id)
}
#endif
- /* The map from FID to subfiling context can now be cleared */
- if (sf_context->h5_file_id != UINT64_MAX) {
- clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id);
+ /* The map from file handle to subfiling context can now be cleared */
+ if (sf_context->h5_file_handle != NULL) {
+ clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id);
}
if (sf_context->topology->rank_is_ioc) {
@@ -2822,10 +2885,10 @@ done:
}
/*-------------------------------------------------------------------------
- * Function: H5_subfile_fid_to_context
+ * Function: H5_subfile_fhandle_to_context
*
* Purpose: This is a basic lookup function which returns the subfiling
- * context id associated with the specified file->inode.
+ * context id associated with the specified file handle.
*
* Return: Non-negative subfiling context ID if the context exists
* Negative on failure or if the subfiling context doesn't
@@ -2839,7 +2902,7 @@ done:
*-------------------------------------------------------------------------
*/
int64_t
-H5_subfile_fid_to_context(uint64_t sf_fid)
+H5_subfile_fhandle_to_context(void *file_handle)
{
if (!sf_open_file_map) {
#ifdef H5_SUBFILING_DEBUG
@@ -2850,13 +2913,13 @@ H5_subfile_fid_to_context(uint64_t sf_fid)
}
for (int i = 0; i < sf_file_map_size; i++) {
- if (sf_open_file_map[i].h5_file_id == sf_fid) {
+ if (sf_open_file_map[i].file_handle == file_handle) {
return sf_open_file_map[i].sf_context_id;
}
}
return -1;
-} /* end H5_subfile_fid_to_context() */
+} /* end H5_subfile_fhandle_to_context() */
#ifdef H5_SUBFILING_DEBUG
void
@@ -2873,7 +2936,7 @@ H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...)
goto done;
}
- begin_thread_exclusive();
+ H5FD_ioc_begin_thread_exclusive();
if (sf_context->sf_logfile) {
HDvfprintf(sf_context->sf_logfile, fmt, log_args);
@@ -2886,7 +2949,7 @@ H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...)
HDfflush(stdout);
}
- end_thread_exclusive();
+ H5FD_ioc_end_thread_exclusive();
done:
va_end(log_args);
diff --git a/src/H5FDsubfiling/H5subfiling_common.h b/src/H5FDsubfiling/H5subfiling_common.h
index 19c5c0c..3195c9d 100644
--- a/src/H5FDsubfiling/H5subfiling_common.h
+++ b/src/H5FDsubfiling/H5subfiling_common.h
@@ -22,8 +22,7 @@
#include "H5private.h"
#include "H5Iprivate.h"
-/* TODO: needed for ioc_selection_t, which also needs to be public */
-#include "H5FDioc.h"
+#include "H5FDsubfiling.h"
/*
* Some definitions for debugging the Subfiling feature
@@ -31,30 +30,6 @@
/* #define H5_SUBFILING_DEBUG */
/*
- * The following is our basic template for a subfile filename.
- * Note that eventually we shouldn't use 0_of_N since we
- * intend to use the user defined HDF5 filename for a
- * zeroth subfile as well as for all metadata.
- */
-#define SF_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d"
-
-/*
- * The following is our basic template for a subfiling
- * configuration filename.
- */
-#define SF_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config"
-
-/*
- * Environment variables interpreted by the HDF5 subfiling feature
- */
-#define H5_IOC_SELECTION_CRITERIA "H5_IOC_SELECTION_CRITERIA"
-#define H5_IOC_COUNT_PER_NODE "H5_IOC_COUNT_PER_NODE"
-#define H5_IOC_STRIPE_SIZE "H5_IOC_STRIPE_SIZE"
-#define H5_IOC_SUBFILE_PREFIX "H5_IOC_SUBFILE_PREFIX"
-
-#define H5FD_DEFAULT_STRIPE_DEPTH (32 * 1024 * 1024)
-
-/*
* MPI Tags are 32 bits, we treat them as unsigned
* to allow the use of the available bits for RPC
* selections, i.e. a message from the VFD read or write functions
@@ -166,18 +141,19 @@ typedef struct app_layout_t {
/* This typedef defines things related to IOC selections */
typedef struct topology {
- app_layout_t *app_layout; /* Pointer to our layout struct */
- bool rank_is_ioc; /* Indicates that we host an IOC */
- int subfile_rank; /* Valid only if rank_is_ioc */
- int n_io_concentrators; /* Number of IO concentrators */
- int *io_concentrators; /* Vector of ranks which are IOCs */
- int *subfile_fd; /* file descriptor (if IOC) */
- ioc_selection_t selection_type; /* Cache our IOC selection criteria */
+ app_layout_t *app_layout; /* Pointer to our layout struct */
+ bool rank_is_ioc; /* Indicates that we host an IOC */
+ int subfile_rank; /* Valid only if rank_is_ioc */
+ int n_io_concentrators; /* Number of IO concentrators */
+ int *io_concentrators; /* Vector of ranks which are IOCs */
+ int *subfile_fd; /* file descriptor (if IOC) */
+ H5FD_subfiling_ioc_select_t selection_type; /* Cache our IOC selection criteria */
} sf_topology_t;
typedef struct {
int64_t sf_context_id; /* Generated context ID which embeds the cache index */
uint64_t h5_file_id; /* GUID (basically the inode value) */
+ void *h5_file_handle; /* Low-level handle for the HDF5 stub file */
int sf_fid; /* value returned by open(file,..) */
size_t sf_write_count; /* Statistics: write_count */
size_t sf_read_count; /* Statistics: read_count */
@@ -236,15 +212,16 @@ extern app_layout_t *sf_app_layout;
extern "C" {
#endif
-H5_DLL herr_t H5_open_subfiles(const char *base_filename, uint64_t h5_file_id,
- ioc_selection_t ioc_selection_type, int file_acc_flags, MPI_Comm file_comm,
- int64_t *context_id_out);
+H5_DLL herr_t H5_open_subfiles(const char *base_filename, void *h5_file_handle,
+ H5FD_subfiling_shared_config_t *subfiling_config, int file_acc_flags,
+ MPI_Comm file_comm, int64_t *context_id_out);
H5_DLL herr_t H5_close_subfiles(int64_t subfiling_context_id);
H5_DLL int64_t H5_new_subfiling_object_id(sf_obj_type_t obj_type, int64_t index_val);
H5_DLL void *H5_get_subfiling_object(int64_t object_id);
-H5_DLL int64_t H5_subfile_fid_to_context(uint64_t h5_fid);
+H5_DLL int64_t H5_subfile_fhandle_to_context(void *file_handle);
H5_DLL herr_t H5_free_subfiling_object(int64_t object_id);
+H5_DLL herr_t H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators);
H5_DLL void H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...);
diff --git a/src/H5trace.c b/src/H5trace.c
index 2b714fb..fd9da44 100644
--- a/src/H5trace.c
+++ b/src/H5trace.c
@@ -1098,6 +1098,11 @@ H5_trace_args(H5RS_str_t *rs, const char *type, va_list ap)
H5RS_acat(rs, "H5_VFD_ROS3");
break;
#endif
+#ifdef H5_HAVE_SUBFILING_VFD
+ case H5_VFD_SUBFILING:
+ H5RS_acat(rs, "H5_VFD_SUBFILING");
+ break;
+#endif
case H5_VFD_ONION:
H5RS_acat(rs, "H5_VFD_ONION");
break;