diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2022-08-04 17:56:48 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-04 17:56:48 (GMT) |
commit | bf07e0f2c9b381509abbde59fca8bea5445da261 (patch) | |
tree | 69551f0ec6658cc4e970bf1080fa4c5b256b289f /src | |
parent | a71534fcc248737491adcfd770c7ab69b4adc2d4 (diff) | |
download | hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.zip hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.gz hdf5-bf07e0f2c9b381509abbde59fca8bea5445da261.tar.bz2 |
Subfiling updates for release (#1963)
* Remove generated file h5fuse.sh
* Link pthreads library when Subfiling VFD is built
* Switch to MPI I/O driver for Subfiling HDF5 stub file
* Rough first implementation for Subfiling file deletion
* Subfiling VFD - get file dirname for file deletion
* Subfiling VFD - set lock callback to NULL for now to avoid performance
issues
* Committing clang-format changes
* Minor tidying up of Subfiling testing
* Fixups for Subfiling VFD support in tools
* Tidy up Subfiling public interface and add Doxygen
* Respect Subfiling configuration settings from application
* Add release note for Subfiling VFD
* Committing clang-format changes
* Committing clang-format changes
* Shorten some Subfiling environment variable names
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/H5FDsubfiling/H5FDioc.c | 277 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDioc.h | 193 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDioc_threads.c | 20 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDsubfiling.c | 186 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDsubfiling.h | 358 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5subfiling_common.c | 371 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5subfiling_common.h | 51 | ||||
-rw-r--r-- | src/H5trace.c | 5 |
8 files changed, 925 insertions, 536 deletions
diff --git a/src/H5FDsubfiling/H5FDioc.c b/src/H5FDsubfiling/H5FDioc.c index 2eb7970..f779c46 100644 --- a/src/H5FDsubfiling/H5FDioc.c +++ b/src/H5FDsubfiling/H5FDioc.c @@ -16,6 +16,8 @@ * another underlying VFD. Maintains two files simultaneously. */ +#include <libgen.h> + /* This source code file is part of the H5FD driver module */ #include "H5FDdrvr_module.h" @@ -25,7 +27,7 @@ #include "H5FDprivate.h" /* File drivers */ #include "H5FDioc.h" /* IOC file driver */ #include "H5FDioc_priv.h" /* IOC file driver */ -#include "H5FDsec2.h" /* Sec2 VFD */ +#include "H5FDmpio.h" /* MPI I/O VFD */ #include "H5FLprivate.h" /* Free Lists */ #include "H5Fprivate.h" /* File access */ #include "H5Iprivate.h" /* IDs */ @@ -53,7 +55,7 @@ typedef struct H5FD_ioc_t { int mpi_rank; int mpi_size; - H5FD_t *ioc_file; /* native HDF5 file pointer (sec2) */ + H5FD_t *ioc_file; /* native HDF5 file pointer */ int64_t context_id; /* The value used to lookup a subfiling context for the file */ @@ -68,7 +70,6 @@ typedef struct H5FD_ioc_t { * Windows code further below. */ dev_t device; /* file device number */ - ino_t inode; /* file i-node number */ #else /* Files in windows are uniquely identified by the volume serial * number and the file index (both low and high parts). @@ -161,7 +162,7 @@ static herr_t H5FD__ioc_ctl(H5FD_t *file, uint64_t op_code, uint64_t flags, const void *input, void **result); */ -static herr_t H5FD__ioc_get_default_config(H5FD_ioc_config_t *config_out); +static herr_t H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out); static herr_t H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa); static int H5FD__copy_plist(hid_t fapl_id, hid_t *id_out_ptr); @@ -358,13 +359,13 @@ H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config) H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list"); if (vfd_config == NULL) { - if (NULL == (ioc_conf = HDcalloc(1, sizeof(*ioc_conf)))) + if (NULL == (ioc_conf = H5FL_CALLOC(H5FD_ioc_config_t))) H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate IOC VFD configuration"); - ioc_conf->ioc_fapl_id = H5I_INVALID_HID; + ioc_conf->under_fapl_id = H5I_INVALID_HID; /* Get IOC VFD defaults */ - if (H5FD__ioc_get_default_config(ioc_conf) < 0) + if (H5FD__ioc_get_default_config(fapl_id, ioc_conf) < 0) H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't get default IOC VFD configuration"); vfd_config = ioc_conf; @@ -377,9 +378,9 @@ H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config) done: if (ioc_conf) { - if (ioc_conf->ioc_fapl_id >= 0 && H5I_dec_ref(ioc_conf->ioc_fapl_id) < 0) - H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTDEC, FAIL, "can't close IOC FAPL"); - HDfree(ioc_conf); + if (ioc_conf->under_fapl_id >= 0 && H5I_dec_ref(ioc_conf->under_fapl_id) < 0) + H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTDEC, FAIL, "can't close IOC under FAPL"); + H5FL_FREE(H5FD_ioc_config_t, ioc_conf); } H5_SUBFILING_FUNC_LEAVE; @@ -423,7 +424,7 @@ H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out) } if (use_default_config) { - if (H5FD__ioc_get_default_config(config_out) < 0) + if (H5FD__ioc_get_default_config(fapl_id, config_out) < 0) H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get default IOC VFD configuration"); } else { @@ -431,8 +432,8 @@ H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out) HDmemcpy(config_out, config_ptr, sizeof(H5FD_ioc_config_t)); /* Copy the driver info value */ - if (H5FD__copy_plist(config_ptr->ioc_fapl_id, &(config_out->ioc_fapl_id)) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't copy IOC FAPL"); + if (H5FD__copy_plist(config_ptr->under_fapl_id, &(config_out->under_fapl_id)) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't copy IOC under FAPL"); } done: @@ -451,35 +452,53 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5FD__ioc_get_default_config(H5FD_ioc_config_t *config_out) +H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out) { - herr_t ret_value = SUCCEED; + MPI_Comm comm = MPI_COMM_NULL; + MPI_Info info = MPI_INFO_NULL; + herr_t ret_value = SUCCEED; HDassert(config_out); HDmemset(config_out, 0, sizeof(*config_out)); config_out->magic = H5FD_IOC_FAPL_MAGIC; - config_out->version = H5FD_CURR_IOC_FAPL_VERSION; - config_out->ioc_fapl_id = H5I_INVALID_HID; - config_out->stripe_count = 0; - config_out->stripe_depth = H5FD_DEFAULT_STRIPE_DEPTH; - config_out->ioc_selection = SELECT_IOC_ONE_PER_NODE; + config_out->version = H5FD_IOC_CURR_FAPL_VERSION; + config_out->under_fapl_id = H5I_INVALID_HID; + + /* + * Use default subfiling configuration. Do NOT call + * H5Pget_fapl_subfiling here as that can cause issues + */ + config_out->subf_config.ioc_selection = SELECT_IOC_ONE_PER_NODE; + config_out->subf_config.stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE; + config_out->subf_config.stripe_count = 0; /* Create a default FAPL and choose an appropriate underlying driver */ - if ((config_out->ioc_fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) + if ((config_out->under_fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTCREATE, FAIL, "can't create default FAPL"); - /* Currently, only sec2 vfd supported */ - if (H5Pset_fapl_sec2(config_out->ioc_fapl_id) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set Sec2 VFD on IOC FAPL"); + /* Check if any MPI parameters were set on the FAPL */ + if (H5Pget_mpi_params(fapl_id, &comm, &info) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI Comm/Info"); + if (comm == MPI_COMM_NULL) + comm = MPI_COMM_WORLD; + + /* Hardwire MPI I/O VFD for now */ + if (H5Pset_fapl_mpio(config_out->under_fapl_id, comm, info) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI I/O VFD on IOC under FAPL"); /* Specific to this I/O Concentrator */ - config_out->thread_pool_count = H5FD_IOC_THREAD_POOL_SIZE; + config_out->thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE; done: + if (H5_mpi_comm_free(&comm) < 0) + H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTFREE, FAIL, "can't free MPI Communicator"); + if (H5_mpi_info_free(&info) < 0) + H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTFREE, FAIL, "can't free MPI Info object"); + if (ret_value < 0) { - if (config_out->ioc_fapl_id >= 0 && H5Pclose(config_out->ioc_fapl_id) < 0) + if (config_out->under_fapl_id >= 0 && H5Pclose(config_out->under_fapl_id) < 0) H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTCLOSEOBJ, FAIL, "can't close FAPL"); } @@ -510,7 +529,7 @@ H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa) HDassert(fa != NULL); - if (fa->version != H5FD_CURR_IOC_FAPL_VERSION) + if (fa->version != H5FD_IOC_CURR_FAPL_VERSION) H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_ioc_config_t version"); if (fa->magic != H5FD_IOC_FAPL_MAGIC) @@ -696,8 +715,8 @@ H5FD__ioc_fapl_copy(const void *_old_fa) HDmemcpy(new_fa_ptr, old_fa_ptr, sizeof(H5FD_ioc_config_t)); /* Copy the FAPL */ - if (H5FD__copy_plist(old_fa_ptr->ioc_fapl_id, &(new_fa_ptr->ioc_fapl_id)) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy the IOC FAPL"); + if (H5FD__copy_plist(old_fa_ptr->under_fapl_id, &(new_fa_ptr->under_fapl_id)) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy the IOC under FAPL"); ret_value = (void *)new_fa_ptr; @@ -728,8 +747,8 @@ H5FD__ioc_fapl_free(void *_fapl) /* Check arguments */ HDassert(fapl); - if (fapl->ioc_fapl_id >= 0 && H5I_dec_ref(fapl->ioc_fapl_id) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "can't close FAPL ID"); + if (fapl->under_fapl_id >= 0 && H5I_dec_ref(fapl->under_fapl_id) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "can't close IOC under FAPL ID"); /* Free the property list */ fapl = H5FL_FREE(H5FD_ioc_config_t, fapl); @@ -774,10 +793,10 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) if (NULL == (file_ptr = (H5FD_ioc_t *)H5FL_CALLOC(H5FD_ioc_t))) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTALLOC, NULL, "unable to allocate file struct"); - file_ptr->comm = MPI_COMM_NULL; - file_ptr->info = MPI_INFO_NULL; - file_ptr->context_id = -1; - file_ptr->fa.ioc_fapl_id = H5I_INVALID_HID; + file_ptr->comm = MPI_COMM_NULL; + file_ptr->info = MPI_INFO_NULL; + file_ptr->context_id = -1; + file_ptr->fa.under_fapl_id = H5I_INVALID_HID; /* Get the driver-specific file access properties */ if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(fapl_id))) @@ -814,7 +833,7 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) config_ptr = H5P_peek_driver_info(plist_ptr); if (!config_ptr || (H5P_FILE_ACCESS_DEFAULT == fapl_id)) { - if (H5FD__ioc_get_default_config(&default_config) < 0) + if (H5FD__ioc_get_default_config(fapl_id, &default_config) < 0) H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get default IOC VFD configuration"); config_ptr = &default_config; } @@ -848,11 +867,11 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) } /* Copy the ioc FAPL. */ - if (H5FD__copy_plist(config_ptr->ioc_fapl_id, &(file_ptr->fa.ioc_fapl_id)) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy FAPL"); + if (H5FD__copy_plist(config_ptr->under_fapl_id, &(file_ptr->fa.under_fapl_id)) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy IOC under FAPL"); /* Check the underlying driver (sec2/mpio/etc.) */ - if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(config_ptr->ioc_fapl_id))) + if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(config_ptr->under_fapl_id))) H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list"); if (H5P_peek(plist_ptr, H5F_ACS_FILE_DRV_NAME, &driver_prop) < 0) @@ -861,13 +880,13 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "invalid driver ID in file access property list"); - if (driver->value != H5_VFD_SEC2) { + if (driver->value != H5_VFD_MPIO) { H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, - "unable to open file '%s' - only Sec2 VFD is currently supported", name); + "unable to open file '%s' - only MPI I/O VFD is currently supported", name); } else { - subfiling_context_t *sf_context = NULL; - uint64_t inode_id = UINT64_MAX; + subfiling_context_t *sf_context = NULL; + void *file_handle = NULL; int ioc_flags; int l_error = 0; int g_error = 0; @@ -881,34 +900,12 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) if (H5F_ACC_EXCL & flags) ioc_flags |= O_EXCL; - file_ptr->ioc_file = H5FD_open(file_ptr->file_path, flags, config_ptr->ioc_fapl_id, HADDR_UNDEF); + file_ptr->ioc_file = H5FD_open(file_ptr->file_path, flags, config_ptr->under_fapl_id, HADDR_UNDEF); if (file_ptr->ioc_file) { - h5_stat_t sb; - void *file_handle = NULL; - - if (file_ptr->mpi_rank == 0) { - if (H5FDget_vfd_handle(file_ptr->ioc_file, config_ptr->ioc_fapl_id, &file_handle) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle"); - - if (HDfstat(*(int *)file_handle, &sb) < 0) - H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file"); - - HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t)); - file_ptr->inode = sb.st_ino; - inode_id = (uint64_t)sb.st_ino; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&inode_id, 1, MPI_UINT64_T, 0, file_ptr->comm))) - H5_SUBFILING_MPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code); - - if (file_ptr->mpi_rank != 0) - file_ptr->inode = (ino_t)inode_id; + if (H5FDget_vfd_handle(file_ptr->ioc_file, config_ptr->under_fapl_id, &file_handle) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle"); } else { - /* The two-step file opening approach may be - * the root cause for the sec2 open to return a NULL. - * It is prudent then, to collectively fail (early) in this case. - */ l_error = 1; } @@ -925,7 +922,7 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) * context ID will be returned, which is used for * further interactions with this file's subfiles. */ - if (H5_open_subfiles(file_ptr->file_path, inode_id, file_ptr->fa.ioc_selection, ioc_flags, + if (H5_open_subfiles(file_ptr->file_path, file_handle, &file_ptr->fa.subf_config, ioc_flags, file_ptr->comm, &file_ptr->context_id) < 0) H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiles for file '%s'", name); @@ -991,9 +988,9 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr) } #endif - if (file_ptr->fa.ioc_fapl_id >= 0 && H5I_dec_ref(file_ptr->fa.ioc_fapl_id) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_ARGS, FAIL, "can't close FAPL"); - file_ptr->fa.ioc_fapl_id = H5I_INVALID_HID; + if (file_ptr->fa.under_fapl_id >= 0 && H5I_dec_ref(file_ptr->fa.under_fapl_id) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_ARGS, FAIL, "can't close IOC under FAPL"); + file_ptr->fa.under_fapl_id = H5I_INVALID_HID; /* Close underlying file */ if (file_ptr->ioc_file) { @@ -1331,7 +1328,7 @@ H5FD__ioc_get_handle(H5FD_t *_file, hid_t H5_ATTR_UNUSED fapl, void **file_handl HDassert(file->ioc_file); HDassert(file_handle); - if (H5FD_get_vfd_handle(file->ioc_file, file->fa.ioc_fapl_id, file_handle) < 0) + if (H5FD_get_vfd_handle(file->ioc_file, file->fa.under_fapl_id, file_handle) < 0) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "unable to get handle of R/W file"); done: @@ -1597,12 +1594,138 @@ done: static herr_t H5FD__ioc_del(const char *name, hid_t fapl) { - herr_t ret_value = SUCCEED; + H5P_genplist_t *plist; + h5_stat_t st; + MPI_Comm comm = MPI_COMM_NULL; + MPI_Info info = MPI_INFO_NULL; + FILE *config_file = NULL; + char *name_copy = NULL; + char *name_copy2 = NULL; + char *tmp_filename = NULL; + char *base_filename = NULL; + char *file_dirname = NULL; + int mpi_rank = INT_MAX; + int mpi_code; + herr_t ret_value = SUCCEED; + + /* TODO: Eventually this routine should share common code + * with H5_subfiling_common's routines so it doesn't get + * out of sync + */ + + if (NULL == (plist = H5P_object_verify(fapl, H5P_FILE_ACCESS))) + H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list"); + HDassert(H5FD_IOC == H5P_peek_driver(plist)); + + if (H5FD_mpi_self_initialized) { + comm = MPI_COMM_WORLD; + } + else { + /* Get the MPI communicator and info from the fapl */ + if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &info) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI info object"); + if (H5P_get(plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &comm) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI communicator"); + } + + /* Get the MPI rank of this process */ + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &mpi_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); + + if (mpi_rank == 0) { + int n_io_concentrators = 0; + int num_digits = 0; + + if (HDstat(name, &st) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SYSERRSTR, FAIL, "HDstat failed"); - (void)name; - (void)fapl; + if (NULL == (name_copy = HDstrdup(name))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename"); + if (NULL == (name_copy2 = HDstrdup(name))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename"); - /* TODO: implement later */ + base_filename = basename(name_copy); + file_dirname = dirname(name_copy2); + + /* Try to open the subfiling configuration file and get the number of IOCs */ + if (NULL == (tmp_filename = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "can't allocate config file name buffer"); + + /* TODO: No support for subfile directory prefix currently */ + HDsnprintf(tmp_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, file_dirname, + base_filename, (uint64_t)st.st_ino); + + if (NULL == (config_file = HDfopen(tmp_filename, "r"))) { + if (ENOENT == errno) { +#ifdef H5FD_IOC_DEBUG + HDprintf("** WARNING: couldn't delete Subfiling configuration file '%s'\n", tmp_filename); +#endif + + H5_SUBFILING_GOTO_DONE(SUCCEED); + } + else + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "can't open subfiling config file"); + } + + if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_READERROR, FAIL, "can't read subfiling config file"); + + /* Delete the Subfiling configuration file */ + if (EOF == HDfclose(config_file)) { + config_file = NULL; + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "can't close subfiling config file"); + } + + config_file = NULL; + + if (HDremove(tmp_filename) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "can't delete subfiling config file"); + + /* Try to delete each of the subfiles */ + num_digits = (int)(HDlog10(n_io_concentrators) + 1); + + for (int i = 0; i < n_io_concentrators; i++) { + /* TODO: No support for subfile directory prefix currently */ + HDsnprintf(tmp_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, file_dirname, + base_filename, (uint64_t)st.st_ino, num_digits, i + 1, n_io_concentrators); + + if (HDremove(tmp_filename) < 0) { +#ifdef H5FD_IOC_DEBUG + HDprintf("** WARNING: couldn't delete subfile '%s'\n", tmp_filename); +#endif + + if (ENOENT != errno) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTDELETEFILE, FAIL, "can't delete subfile"); + } + } + + /* Delete the HDF5 stub file */ + if (HDremove(name) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTDELETEFILE, FAIL, "can't delete HDF5 file"); + } + +done: + if (config_file) + if (EOF == HDfclose(config_file)) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "can't close subfiling config file"); + + /* Set up a barrier (don't want processes to run ahead of the delete) */ + if (MPI_SUCCESS != (mpi_code = MPI_Barrier(comm))) + H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Barrier failed", mpi_code); + + /* Free duplicated MPI Communicator and Info objects */ + if (H5_mpi_comm_free(&comm) < 0) + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI communicator"); + if (H5_mpi_info_free(&info) < 0) + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI info object"); + + HDfree(tmp_filename); + HDfree(name_copy); + HDfree(name_copy2); H5_SUBFILING_FUNC_LEAVE; } diff --git a/src/H5FDsubfiling/H5FDioc.h b/src/H5FDsubfiling/H5FDioc.h index 04850f3..48102ac 100644 --- a/src/H5FDsubfiling/H5FDioc.h +++ b/src/H5FDsubfiling/H5FDioc.h @@ -11,7 +11,7 @@ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ /* - * Purpose: The public header file for the "io concentrator" driver. + * Purpose: The public header file for the "I/O concentrator" driver. * This provides a similar functionality to that of the subfiling driver * but introduces the necessary file access functionality via a multi- * threading MPI service @@ -20,72 +20,191 @@ #ifndef H5FDioc_H #define H5FDioc_H +#include "H5FDsubfiling.h" + #ifdef H5_HAVE_IOC_VFD +/** + * \def H5FD_IOC + * Macro that returns the identifier for the #H5FD_IOC driver. \hid_t{file driver} + */ #define H5FD_IOC (H5FDperform_init(H5FD_ioc_init)) #else #define H5FD_IOC (H5I_INVALID_HID) #endif +/** + * \def H5FD_IOC_NAME + * The canonical name for the #H5FD_IOC driver + */ #define H5FD_IOC_NAME "ioc" #ifdef H5_HAVE_IOC_VFD #ifndef H5FD_IOC_FAPL_MAGIC -#define H5FD_CURR_IOC_FAPL_VERSION 1 -#define H5FD_IOC_FAPL_MAGIC 0xFED21331 +/** + * \def H5FD_IOC_CURR_FAPL_VERSION + * The version number of the H5FD_ioc_config_t configuration + * structure for the #H5FD_IOC driver + */ +#define H5FD_IOC_CURR_FAPL_VERSION 1 +/** + * \def H5FD_IOC_FAPL_MAGIC + * Unique number used to distinguish the #H5FD_IOC driver from other HDF5 file drivers + */ +#define H5FD_IOC_FAPL_MAGIC 0xFED21331 #endif -#define H5FD_IOC_THREAD_POOL_SIZE 4 +/** + * \def H5FD_IOC_DEFAULT_THREAD_POOL_SIZE + * The default number of I/O concentrator worker threads + */ +#define H5FD_IOC_DEFAULT_THREAD_POOL_SIZE 4 /* * Environment variables interpreted by the IOC VFD */ -#define H5_IOC_THREAD_POOL_COUNT "H5_IOC_THREAD_POOL_COUNT" -/* - * Define the various constants to allow different allocations - * of subfile ranks. The choices are self explanatory, starting - * with the default of one IO Concentrator (IOC) per node and - * lastly, defining a fixed number. +/** + * \def H5FD_IOC_THREAD_POOL_SIZE + * Macro for name of the environment variable that controls/overrides + * the number of I/O concentrator worker threads + * + * The value set for this environment variable is interpreted as an + * int value and must be > 0. */ -typedef enum { - SELECT_IOC_ONE_PER_NODE = 0, /* Default */ - SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */ - SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */ - SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */ - ioc_selection_options /* (Uses same selection as every Nth rank) */ -} ioc_selection_t; +#define H5FD_IOC_THREAD_POOL_SIZE "H5FD_IOC_THREAD_POOL_SIZE" -/* - * In addition to the common configuration fields, we can have - * VFD specific fields. Here's one for the IO Concentrator VFD. +//! <!-- [H5FD_ioc_config_t_snip] --> +/** + * \struct H5FD_ioc_config_t + * \brief Configuration structure for H5Pset_fapl_ioc() / H5Pget_fapl_ioc() * - * thread_pool_count (int32_t) - * Indicate the number of helper threads that we want for - * creating a thread pool + * \details H5FD_ioc_config_t is a public structure that is used to pass + * configuration data to the #H5FD_IOC driver via a File Access + * Property List. A pointer to an instance of this structure is + * a parameter to H5Pset_fapl_ioc() and H5Pget_fapl_ioc(). + * + * The #H5FD_IOC driver shares much of its configuration with the + * #H5FD_SUBFILING driver and so its configuration structure + * contains an instance of a H5FD_subfiling_shared_config_t + * configuration structure. + * + * \var uint32_t H5FD_ioc_config_t::magic + * A somewhat unique number which distinguishes the #H5FD_IOC driver + * from other drivers. Used in combination with a version number, it + * can help to validate a user-generated File Access Property List. + * This field should be set to #H5FD_IOC_FAPL_MAGIC. + * + * \var uint32_t H5FD_ioc_config_t::version + * Version number of the H5FD_ioc_config_t structure. Any instance passed + * to H5Pset_fapl_ioc() / H5Pget_fapl_ioc() must have a recognized version + * number or an error will be raised. Currently, this field should be set + * to #H5FD_IOC_CURR_FAPL_VERSION. + * + * \var hid_t H5FD_ioc_config_t::under_fapl_id + * The File Access Property List which is setup with the file driver + * to use for I/O to the HDF5 stub file. The stub file looks like a + * typical HDF5 file, but currently only contains the superblock metadata + * for compatibility with legacy HDF5 applications. The default driver used + * is currently the #H5FD_MPIO driver. + * + * \var int32_t H5FD_ioc_config_t::thread_pool_count + * The number of I/O concentrator worker threads to use. + * + * This value can also be set or adjusted with the #H5FD_IOC_THREAD_POOL_SIZE + * environment variable. + * + * \var H5FD_subfiling_shared_config_t H5FD_ioc_config_t::subf_config + * Subfiling configuration data for the parent #H5FD_SUBFILING driver. This + * includes the sub-file stripe size, number of I/O concentrators, IOC + * selection method, etc. * - * ---------------------------------------------------------------------------- */ - typedef struct H5FD_ioc_config_t { - uint32_t magic; /* set to H5FD_IOC_FAPL_MAGIC */ - uint32_t version; /* set to H5FD_CURR_IOC_FAPL_VERSION */ - int32_t stripe_count; /* How many io concentrators */ - int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */ - ioc_selection_t ioc_selection; /* Method to select IO Concentrators */ - hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */ - int32_t thread_pool_count; + uint32_t magic; /* Must be set to H5FD_IOC_FAPL_MAGIC */ + uint32_t version; /* Must be set to H5FD_IOC_CURR_FAPL_VERSION */ + hid_t under_fapl_id; /* FAPL setup with the VFD to use for I/O to the HDF5 stub file */ + int32_t thread_pool_count; /* Number of I/O concentrator worker threads to use */ + H5FD_subfiling_shared_config_t subf_config; /* Subfiling driver configuration */ } H5FD_ioc_config_t; +//! <!-- [H5FD_ioc_config_t_snip] --> #ifdef __cplusplus extern "C" { #endif -H5_DLL hid_t H5FD_ioc_init(void); -H5_DLL herr_t H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_ptr); -H5_DLL herr_t H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_ptr); -H5_DLL void begin_thread_exclusive(void); -H5_DLL void end_thread_exclusive(void); +/** + * \brief Internal routine to initialize #H5FD_IOC driver. Not meant to be + * called directly by an HDF5 application + */ +H5_DLL hid_t H5FD_ioc_init(void); +/** + * \ingroup FAPL + * + * \brief Modifies the specified File Access Property List to use the #H5FD_IOC driver + * + * \fapl_id + * \param[in] vfd_config Pointer to #H5FD_IOC driver configuration structure. May be NULL. + * \returns \herr_t + * + * \details H5Pset_fapl_ioc() modifies the File Access Property List to use the + * #H5FD_IOC driver. + * + * The #H5FD_IOC driver is a reference implementation of an "I/O concentrator" + * file driver that works in conjunction with the #H5FD_SUBFILING driver and + * provides the I/O backend for servicing I/O requests to sub-files. + * + * Typically, an HDF5 application won't need to call this routine directly. + * The #H5FD_IOC driver is usually set up as a side effect of an HDF5 application + * using the #H5FD_SUBFILING driver, but this routine is provided in case the + * application wishes to manually configure the #H5FD_IOC driver. + * + * \note The \p vfd_config parameter may be NULL. In this case, the driver will + * be setup with default settings. Note that in this case, it is assumed + * the parent #H5FD_SUBFILING driver was also setup with default settings. + * If the two drivers differ in configuration settings, application behavior + * may not be as expected. + * + * \since 1.13.2 + * + */ +H5_DLL herr_t H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config); +/** + * \ingroup FAPL + * + * \brief Queries a File Access Property List for #H5FD_IOC file driver properties + * + * \fapl_id + * \param[out] config_out Pointer to H5FD_ioc_config_t structure through which the + * #H5FD_IOC file driver properties will be returned. + * + * \returns \herr_t + * + * \details H5Pget_fapl_ioc() queries the specified File Access Property List for + * #H5FD_IOC driver properties as set by H5Pset_fapl_ioc(). If the #H5FD_IOC + * driver has not been set on the File Access Property List, a default + * configuration is returned. An HDF5 application may use this functionality + * to manually configure the #H5FD_IOC driver by calling H5Pget_fapl_ioc() + * on a newly-created File Access Property List, adjusting the default + * values and then calling H5Pset_fapl_ioc() with the configured + * H5FD_ioc_config_t structure. + * + * \since 1.13.2 + * + */ +H5_DLL herr_t H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out); +/** + * \brief Internal routine for managing exclusive access to critical sections + * by the #H5FD_IOC driver's worker threads. Not meant to be called + * directly by an HDF5 application + */ +H5_DLL void H5FD_ioc_begin_thread_exclusive(void); +/** + * \brief Internal routine for managing exclusive access to critical sections + * by the #H5FD_IOC driver's worker threads. Not meant to be called + * directly by an HDF5 application + */ +H5_DLL void H5FD_ioc_end_thread_exclusive(void); #ifdef __cplusplus } diff --git a/src/H5FDsubfiling/H5FDioc_threads.c b/src/H5FDsubfiling/H5FDioc_threads.c index 2d50503..4c1887f 100644 --- a/src/H5FDsubfiling/H5FDioc_threads.c +++ b/src/H5FDsubfiling/H5FDioc_threads.c @@ -14,10 +14,6 @@ #include "H5FDsubfiling.h" -#ifndef HG_TEST_NUM_THREADS_DEFAULT -#define HG_TEST_NUM_THREADS_DEFAULT 4 -#endif - #define MIN_READ_RETRIES 10 /* @@ -118,7 +114,7 @@ initialize_ioc_threads(void *_sf_context) { subfiling_context_t *sf_context = _sf_context; ioc_data_t *ioc_data = NULL; - unsigned thread_pool_count = HG_TEST_NUM_THREADS_DEFAULT; + unsigned thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE; char *env_value; int ret_value = 0; #ifdef H5FD_IOC_COLLECT_STATS @@ -174,7 +170,7 @@ initialize_ioc_threads(void *_sf_context) H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, (-1), "can't initialize IOC thread queue mutex"); /* Allow experimentation with the number of helper threads */ - if ((env_value = HDgetenv(H5_IOC_THREAD_POOL_COUNT)) != NULL) { + if ((env_value = HDgetenv(H5FD_IOC_THREAD_POOL_SIZE)) != NULL) { int value_check = HDatoi(env_value); if (value_check > 0) { thread_pool_count = (unsigned int)value_check; @@ -589,7 +585,7 @@ handle_work_request(void *arg) } /*------------------------------------------------------------------------- - * Function: begin_thread_exclusive + * Function: H5FD_ioc_begin_thread_exclusive * * Purpose: Mutex lock to restrict access to code or variables. * @@ -603,13 +599,13 @@ handle_work_request(void *arg) *------------------------------------------------------------------------- */ void -begin_thread_exclusive(void) +H5FD_ioc_begin_thread_exclusive(void) { hg_thread_mutex_lock(&ioc_thread_mutex); } /*------------------------------------------------------------------------- - * Function: end_thread_exclusive + * Function: H5FD_ioc_end_thread_exclusive * * Purpose: Mutex unlock. Should only be called by the current holder * of the locked mutex. @@ -624,7 +620,7 @@ begin_thread_exclusive(void) *------------------------------------------------------------------------- */ void -end_thread_exclusive(void) +H5FD_ioc_end_thread_exclusive(void) { hg_thread_mutex_unlock(&ioc_thread_mutex); } @@ -840,13 +836,13 @@ ioc_file_queue_write_indep(sf_work_request_t *msg, int subfile_rank, int source, sf_queue_delay_time += t_queue_delay; #endif - begin_thread_exclusive(); + H5FD_ioc_begin_thread_exclusive(); /* Adjust EOF if necessary */ if (sf_eof > sf_context->sf_eof) sf_context->sf_eof = sf_eof; - end_thread_exclusive(); + H5FD_ioc_end_thread_exclusive(); done: if (send_nack) { diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c index 2b436fe..a38c020 100644 --- a/src/H5FDsubfiling/H5FDsubfiling.c +++ b/src/H5FDsubfiling/H5FDsubfiling.c @@ -209,11 +209,13 @@ static herr_t H5FD__subfiling_read_vector(H5FD_t *file, hid_t dxpl_id, uint32_t static herr_t H5FD__subfiling_write_vector(H5FD_t *file, hid_t dxpl_id, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], size_t sizes[], const void *bufs[] /* in */); static herr_t H5FD__subfiling_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); +#if 0 static herr_t H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw); static herr_t H5FD__subfiling_unlock(H5FD_t *_file); -static herr_t H5FD__subfiling_del(const char *name, hid_t fapl); -static herr_t H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input, - void **output); +#endif +static herr_t H5FD__subfiling_del(const char *name, hid_t fapl); +static herr_t H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input, + void **output); static herr_t H5FD__subfiling_get_default_config(hid_t fapl_id, H5FD_subfiling_config_t *config_out); static herr_t H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa); @@ -246,46 +248,46 @@ static herr_t iovec_fill_uniform(subfiling_context_t *sf_context, int64_t iovec_ void H5FD__subfiling_mpi_finalize(void); static const H5FD_class_t H5FD_subfiling_g = { - H5FD_CLASS_VERSION, /* VFD interface version */ - H5_VFD_SUBFILING, /* value */ - H5FD_SUBFILING_NAME, /* name */ - MAXADDR, /* maxaddr */ - H5F_CLOSE_WEAK, /* fc_degree */ - H5FD__subfiling_term, /* terminate */ - NULL, /* sb_size */ - NULL, /* sb_encode */ - NULL, /* sb_decode */ - sizeof(H5FD_subfiling_config_t), /* fapl_size */ - H5FD__subfiling_fapl_get, /* fapl_get */ - H5FD__subfiling_fapl_copy, /* fapl_copy */ - H5FD__subfiling_fapl_free, /* fapl_free */ - 0, /* dxpl_size */ - NULL, /* dxpl_copy */ - NULL, /* dxpl_free */ - H5FD__subfiling_open, /* open */ - H5FD__subfiling_close, /* close */ - H5FD__subfiling_cmp, /* cmp */ - H5FD__subfiling_query, /* query */ - NULL, /* get_type_map */ - NULL, /* alloc */ - NULL, /* free */ - H5FD__subfiling_get_eoa, /* get_eoa */ - H5FD__subfiling_set_eoa, /* set_eoa */ - H5FD__subfiling_get_eof, /* get_eof */ - H5FD__subfiling_get_handle, /* get_handle */ - H5FD__subfiling_read, /* read */ - H5FD__subfiling_write, /* write */ - H5FD__subfiling_read_vector, /* read_vector */ - H5FD__subfiling_write_vector, /* write_vector */ - NULL, /* read_selection */ - NULL, /* write_selection */ - NULL, /* flush */ - H5FD__subfiling_truncate, /* truncate */ - H5FD__subfiling_lock, /* lock */ - H5FD__subfiling_unlock, /* unlock */ - H5FD__subfiling_del, /* del */ - H5FD__subfiling_ctl, /* ctl */ - H5FD_FLMAP_DICHOTOMY /* fl_map */ + H5FD_CLASS_VERSION, /* VFD interface version */ + H5_VFD_SUBFILING, /* value */ + H5FD_SUBFILING_NAME, /* name */ + MAXADDR, /* maxaddr */ + H5F_CLOSE_WEAK, /* fc_degree */ + H5FD__subfiling_term, /* terminate */ + NULL, /* sb_size */ + NULL, /* sb_encode */ + NULL, /* sb_decode */ + sizeof(H5FD_subfiling_config_t), /* fapl_size */ + H5FD__subfiling_fapl_get, /* fapl_get */ + H5FD__subfiling_fapl_copy, /* fapl_copy */ + H5FD__subfiling_fapl_free, /* fapl_free */ + 0, /* dxpl_size */ + NULL, /* dxpl_copy */ + NULL, /* dxpl_free */ + H5FD__subfiling_open, /* open */ + H5FD__subfiling_close, /* close */ + H5FD__subfiling_cmp, /* cmp */ + H5FD__subfiling_query, /* query */ + NULL, /* get_type_map */ + NULL, /* alloc */ + NULL, /* free */ + H5FD__subfiling_get_eoa, /* get_eoa */ + H5FD__subfiling_set_eoa, /* set_eoa */ + H5FD__subfiling_get_eof, /* get_eof */ + H5FD__subfiling_get_handle, /* get_handle */ + H5FD__subfiling_read, /* read */ + H5FD__subfiling_write, /* write */ + H5FD__subfiling_read_vector, /* read_vector */ + H5FD__subfiling_write_vector, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ + NULL, /* flush */ + H5FD__subfiling_truncate, /* truncate */ + NULL /* H5FD__subfiling_lock */, /* lock */ + NULL /* H5FD__subfiling_unlock */, /* unlock */ + H5FD__subfiling_del, /* del */ + H5FD__subfiling_ctl, /* ctl */ + H5FD_FLMAP_DICHOTOMY /* fl_map */ }; /* Declare a free list to manage the H5FD_subfiling_t struct */ @@ -457,7 +459,7 @@ done: *------------------------------------------------------------------------- */ herr_t -H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config) +H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config) { H5FD_subfiling_config_t *subfiling_conf = NULL; H5P_genplist_t *plist = NULL; @@ -485,7 +487,7 @@ H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config) if (H5FD__subfiling_validate_config(vfd_config) < 0) H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid subfiling VFD configuration"); - ret_value = H5P_set_driver(plist, H5FD_SUBFILING, (void *)vfd_config, NULL); + ret_value = H5P_set_driver(plist, H5FD_SUBFILING, vfd_config, NULL); done: if (subfiling_conf) { @@ -564,13 +566,14 @@ H5FD__subfiling_get_default_config(hid_t fapl_id, H5FD_subfiling_config_t *confi HDmemset(config_out, 0, sizeof(*config_out)); - config_out->magic = H5FD_SUBFILING_FAPL_MAGIC; - config_out->version = H5FD_CURR_SUBFILING_FAPL_VERSION; - config_out->ioc_fapl_id = H5I_INVALID_HID; - config_out->stripe_count = 0; - config_out->stripe_depth = H5FD_DEFAULT_STRIPE_DEPTH; - config_out->ioc_selection = SELECT_IOC_ONE_PER_NODE; - config_out->require_ioc = TRUE; + config_out->magic = H5FD_SUBFILING_FAPL_MAGIC; + config_out->version = H5FD_SUBFILING_CURR_FAPL_VERSION; + config_out->ioc_fapl_id = H5I_INVALID_HID; + config_out->require_ioc = TRUE; + + config_out->shared_cfg.ioc_selection = SELECT_IOC_ONE_PER_NODE; + config_out->shared_cfg.stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE; + config_out->shared_cfg.stripe_count = 0; if ((h5_require_ioc = HDgetenv("H5_REQUIRE_IOC")) != NULL) { int value_check = HDatoi(h5_require_ioc); @@ -644,7 +647,7 @@ H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa) HDassert(fa != NULL); - if (fa->version != H5FD_CURR_SUBFILING_FAPL_VERSION) + if (fa->version != H5FD_SUBFILING_CURR_FAPL_VERSION) H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_subfiling_config_t version"); if (fa->magic != H5FD_SUBFILING_FAPL_MAGIC) @@ -842,9 +845,9 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma H5FD_class_t *driver = NULL; /* VFD for file */ H5P_genplist_t *plist_ptr = NULL; H5FD_driver_prop_t driver_prop; /* Property for driver ID & info */ - hbool_t bcasted_inode = FALSE; - hbool_t bcasted_eof = FALSE; - int64_t sf_eof = -1; + hbool_t bcasted_eof = FALSE; + int64_t sf_eof = -1; + void *file_handle = NULL; int mpi_code; /* MPI return code */ H5FD_t *ret_value = NULL; @@ -958,34 +961,16 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file '%s' - only IOC and Sec2 VFDs are currently supported for subfiles", name); - if (driver->value == H5_VFD_IOC) { - h5_stat_t sb; - uint64_t fid; - void *file_handle = NULL; - - if (file_ptr->mpi_rank == 0) { - if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get file handle"); - - if (HDfstat(*(int *)file_handle, &sb) < 0) - H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file"); - - HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t)); - fid = (uint64_t)sb.st_ino; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&fid, 1, MPI_UINT64_T, 0, file_ptr->comm))) - H5_SUBFILING_MPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code); - - bcasted_inode = TRUE; + if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get file handle"); + if (driver->value == H5_VFD_IOC) { /* Get a copy of the context ID for later use */ - file_ptr->context_id = H5_subfile_fid_to_context(fid); + file_ptr->context_id = H5_subfile_fhandle_to_context(file_handle); file_ptr->fa.require_ioc = true; } else if (driver->value == H5_VFD_SEC2) { - uint64_t inode_id = (uint64_t)-1; - int ioc_flags; + int ioc_flags; /* Translate the HDF5 file open flags into standard POSIX open flags */ ioc_flags = (H5F_ACC_RDWR & flags) ? O_RDWR : O_RDONLY; @@ -996,44 +981,12 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma if (H5F_ACC_EXCL & flags) ioc_flags |= O_EXCL; - /* Let MPI rank 0 to the file stat operation and broadcast a result */ - if (file_ptr->mpi_rank == 0) { - if (file_ptr->sf_file) { - h5_stat_t sb; - void *file_handle = NULL; - - if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle"); - - /* We create a new file descriptor for our file structure. - * Basically, we want these separate so that sec2 can - * deal with the opened file for additional operations - * (especially close) without interfering with subfiling. - */ - file_ptr->fd = HDdup(*(int *)file_handle); - - if (HDfstat(*(int *)file_handle, &sb) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file"); - inode_id = sb.st_ino; - } - } - - if (MPI_SUCCESS == MPI_Bcast(&inode_id, 1, MPI_UNSIGNED_LONG_LONG, 0, file_ptr->comm)) { - file_ptr->inode = inode_id; - } - - bcasted_inode = TRUE; - - /* All ranks can now detect an error and fail. */ - if (inode_id == (uint64_t)-1) - H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file = %s\n", name); - /* * Open the subfiles for this HDF5 file. A subfiling * context ID will be returned, which is used for * further interactions with this file's subfiles. */ - if (H5_open_subfiles(file_ptr->file_path, inode_id, file_ptr->fa.ioc_selection, ioc_flags, + if (H5_open_subfiles(file_ptr->file_path, file_handle, &file_ptr->fa.shared_cfg, ioc_flags, file_ptr->comm, &file_ptr->context_id) < 0) H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiling files = %s\n", name); @@ -1062,13 +1015,6 @@ done: if (file_ptr) { /* Participate in possible MPI collectives on failure */ if (file_ptr->comm != MPI_COMM_NULL) { - if (!bcasted_inode) { - uint64_t tmp_inode = UINT64_MAX; - - if (MPI_SUCCESS != - (mpi_code = MPI_Bcast(&tmp_inode, 1, MPI_UNSIGNED_LONG_LONG, 0, file_ptr->comm))) - H5_SUBFILING_MPI_DONE_ERROR(NULL, "MPI_Bcast failed", mpi_code); - } if (!bcasted_eof) { sf_eof = -1; @@ -2367,6 +2313,7 @@ done: * *------------------------------------------------------------------------- */ +#if 0 static herr_t H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw) { @@ -2415,6 +2362,7 @@ H5FD__subfiling_unlock(H5FD_t *_file) done: H5_SUBFILING_FUNC_LEAVE_API; } /* end H5FD__subfiling_unlock() */ +#endif static herr_t H5FD__subfiling_del(const char *name, hid_t fapl) diff --git a/src/H5FDsubfiling/H5FDsubfiling.h b/src/H5FDsubfiling/H5FDsubfiling.h index 3de5155..3bc448b 100644 --- a/src/H5FDsubfiling/H5FDsubfiling.h +++ b/src/H5FDsubfiling/H5FDsubfiling.h @@ -14,120 +14,240 @@ #ifndef H5FDsubfiling_H #define H5FDsubfiling_H -#include "H5FDioc.h" - #ifdef H5_HAVE_SUBFILING_VFD +/** + * \def H5FD_SUBFILING + * Macro that returns the identifier for the #H5FD_SUBFILING driver. \hid_t{file driver} + */ #define H5FD_SUBFILING (H5FDperform_init(H5FD_subfiling_init)) #else #define H5FD_SUBFILING (H5I_INVALID_HID) #endif +/** + * \def H5FD_SUBFILING_NAME + * The canonical name for the #H5FD_SUBFILING driver + */ #define H5FD_SUBFILING_NAME "subfiling" #ifdef H5_HAVE_SUBFILING_VFD #ifndef H5FD_SUBFILING_FAPL_MAGIC -#define H5FD_CURR_SUBFILING_FAPL_VERSION 1 -#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331 +/** + * \def H5FD_SUBFILING_CURR_FAPL_VERSION + * The version number of the H5FD_subfiling_config_t configuration + * structure for the #H5FD_SUBFILING driver + */ +#define H5FD_SUBFILING_CURR_FAPL_VERSION 1 +/** + * \def H5FD_SUBFILING_FAPL_MAGIC + * Unique number used to distinguish the #H5FD_SUBFILING driver from other HDF5 file drivers + */ +#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331 #endif -/**************************************************************************** - * - * Structure: H5FD_subfiling_config_t - * - * Purpose: - * - * H5FD_subfiling_config_t is a public structure that is used to pass - * subfiling configuration data to the appropriate subfiling VFD via - * the FAPL. A pointer to an instance of this structure is a parameter - * to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling(). - * - * `magic` (uint32_t) - * - * Magic is a somewhat unique number which identifies this VFD from - * other VFDs. Used in combination with a version number, we can - * validate a user generated file access property list (fapl). - * This field should be set to H5FD_SUBFILING_FAPL_MAGIC. - * - * `version` (uint32_t) - * - * Version number of the H5FD_subfiling_config_t structure. Any instance - * passed to the above calls must have a recognized version number, or - * an error will be flagged. - * - * This field should be set to H5FD_CURR_SUBFILING_FAPL_VERSION. - * - *** IO Concentrator Info *** - *** These fields will be replicated in the stacked IOC VFD which - *** provides the extended support for aggregating reads and writes - *** and allows global file access to node-local storage containers. - * - * `stripe_count` (int32_t) - * - * The integer value which identifies the total number of - * subfiles that have been algorithmically been selected to - * to contain the segments of raw data which make up an HDF5 - * file. This value is used to implement the RAID-0 functionality - * when reading or writing datasets. +/** + * \def H5FD_SUBFILING_DEFAULT_STRIPE_SIZE + * The default stripe size (in bytes) for data stripes in sub-files + */ +#define H5FD_SUBFILING_DEFAULT_STRIPE_SIZE (32 * 1024 * 1024) + +/** + * \def H5FD_SUBFILING_FILENAME_TEMPLATE + * The basic template for a sub-file filename + */ +#define H5FD_SUBFILING_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d" + +/** + * \def H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE + * The basic template for a #H5FD_SUBFILING driver configuration filename + */ +#define H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config" + +/* + * Environment variables interpreted by the HDF5 Subfiling feature + */ + +/** + * \def H5FD_SUBFILING_STRIPE_SIZE + * Macro for name of the environment variable that specifies the size + * (in bytes) for data stripes in sub-files * - * `stripe_depth` (int64_t) + * The value set for this environment variable is interpreted as a + * long long value and must be > 0. + */ +#define H5FD_SUBFILING_STRIPE_SIZE "H5FD_SUBFILING_STRIPE_SIZE" +/** + * \def H5FD_SUBFILING_IOC_PER_NODE + * Macro for name of the environment variable that specifies the number + * of MPI ranks per node to use as I/O concentrators * - * The stripe depth defines a limit on the maximum number of contiguous - * bytes that can be read or written in a single operation on any - * selected subfile. Larger IO operations can exceed this limit - * by utilizing MPI derived types to construct an IO request which - * gathers additional data segments from memory for the IO request. + * The value set for this environment variable is interpreted as a + * long value and must be > 0. + */ +#define H5FD_SUBFILING_IOC_PER_NODE "H5FD_SUBFILING_IOC_PER_NODE" +/** + * \def H5FD_SUBFILING_IOC_SELECTION_CRITERIA + * Macro for name of the environment variable that provides information + * for selection MPI ranks as I/O concentrators * - * `ioc_selection` (enum io_selection datatype) + * The value set for this environment variable is interpreted differently, + * depending on the IOC selection method chosen. * - * The io_selection_t defines a specific algorithm by which IO - * concentrators (IOCs) and sub-files are identified. The available - * algorithms are: SELECT_IOC_ONE_PER_NODE, SELECT_IOC_EVERY_NTH_RANK, - * SELECT_IOC_WITH_CONFIG, and SELECT_IOC_TOTAL. + * For #SELECT_IOC_ONE_PER_NODE, this value is ignored. * - *** STACKING and other VFD support - *** i.e. FAPL caching - *** + * For #SELECT_IOC_EVERY_NTH_RANK, this value is interpreted as a + * long value and must be > 0. The value will correspond to the + * `N` value when selecting every `N`-th MPI rank as an I/O + * concentrator. * - * `ioc_fapl_id` (hid_t) + * For #SELECT_IOC_WITH_CONFIG, this value is ignored as that particular + * IOC selection method is not currently supported. * - * A valid file access property list (fapl) is cached on each - * process and thus enables selection of an alternative provider - * for subsequent file operations. - * By default, Sub-filing employs an additional support VFD that - * provides file IO proxy capabilities to all MPI ranks in a - * distributed parallel application. This IO indirection - * thus allows application access all sub-files even while - * these may actually be node-local and thus not directly - * accessible to remote ranks. + * For #SELECT_IOC_TOTAL, this value is interpreted as a long value + * and must be > 0. The value will correspond to the total number + * of I/O concentrators to be used. + */ +#define H5FD_SUBFILING_IOC_SELECTION_CRITERIA "H5FD_SUBFILING_IOC_SELECTION_CRITERIA" +/** + * \def H5FD_SUBFILING_SUBFILE_PREFIX + * Macro for name of the environment variable that specifies a prefix + * to apply to the filenames generated for sub-files * - ****************************************************************************/ + * The value set for this environment variable is interpreted as a + * pathname. + */ +#define H5FD_SUBFILING_SUBFILE_PREFIX "H5FD_SUBFILING_SUBFILE_PREFIX" -/* - * In addition to the common configuration fields, we can have - * VFD specific fields. Here's one for the subfiling VFD. - * - * `require_ioc` (hbool_t) - * - * Require_IOC is a boolean flag with a default value of TRUE. - * This flag indicates that the stacked H5FDioc VFD should be - * employed for sub-filing operations. The default flag can be - * overridden with an environment variable: H5_REQUIRE_IOC=0 - * +/** + * \enum H5FD_subfiling_ioc_select_t + * This enum defines the various constants to allow different + * allocations of MPI ranks as I/O concentrators. + * + * \var SELECT_IOC_ONE_PER_NODE + * Default selection method. One MPI rank per node is used as an + * I/O concentrator. If this selection method is used, the number + * of I/O concentrators per node can be adjusted with the + * #H5FD_SUBFILING_IOC_PER_NODE environment variable. + * + * \var SELECT_IOC_EVERY_NTH_RANK + * Starting with MPI rank 0, a stride of 'N' is applied to the MPI + * rank values to determine the next I/O concentrator. The + * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must + * be set to the value desired for 'N'. + * + * \var SELECT_IOC_WITH_CONFIG + * Currently unsupported. Use a configuration file to determine + * the mapping from MPI ranks to I/O concentrators. The + * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must + * be set to the path to the configuration file. + * + * \var SELECT_IOC_TOTAL + * Specifies that a total of 'N' I/O concentrators should be used. + * Starting with MPI rank 0, a stride of 'MPI comm size' / 'N' is + * applied to the MPI rank values to determine the next I/O + * concentrator. The #H5FD_SUBFILING_IOC_SELECTION_CRITERIA + * environment variable must be set to the value desired for 'N'. + * + * \var ioc_selection_options + * Unused. Sentinel value */ +typedef enum { + SELECT_IOC_ONE_PER_NODE = 0, /* Default */ + SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */ + SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */ + SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */ + ioc_selection_options /* Sentinel value */ +} H5FD_subfiling_ioc_select_t; + +/** + * \struct H5FD_subfiling_shared_config_t + * \brief Subfiling configuration structure that is shared between the #H5FD_SUBFILING + * and #H5FD_IOC drivers + * + * \var H5FD_subfiling_ioc_select_t H5FD_subfiling_shared_config_t::ioc_selection + * The method to use for selecting MPI ranks to be I/O concentrators. The + * current default is to select one MPI rank per node to be an I/O concentrator. + * Refer to #H5FD_subfiling_ioc_select_t for a description of the algorithms + * available for use. + * + * \var int64_t H5FD_subfiling_shared_config_t::stripe_size + * The stripe size defines the size (in bytes) of the data stripes in the + * sub-files for the logical HDF5 file. Data is striped across the sub-files + * in a round-robin wrap-around fashion in segments equal to the stripe size. + * + * For example, in an HDF5 file consisting of four sub-files with a 1MiB stripe + * size, the first and fifth 1MiB of data would reside in the first sub-file, + * the second and sixth 1MiB of data would reside in the second sub-file and so + * on. + * + * This value can also be set or adjusted with the #H5FD_SUBFILING_STRIPE_SIZE + * environment variable. + * + * \var int32_t H5FD_subfiling_shared_config_t::stripe_count + * The number of I/O concentrators (and, currently, the number of sub-files) + * to use for the logical HDF5 file. This value is used in conjunction with + * the IOC selection method to determine which MPI ranks will be assigned as + * I/O concentrators. + * + * Alternatively, the mapping between MPI ranks and I/O concentrators can be + * set or adjusted with a combination of the #ioc_selection field and the + * #H5FD_SUBFILING_IOC_PER_NODE and #H5FD_SUBFILING_IOC_SELECTION_CRITERIA + * environment variables. + */ +typedef struct H5FD_subfiling_shared_config_t { + H5FD_subfiling_ioc_select_t ioc_selection; /* Method to select I/O concentrators */ + int64_t stripe_size; /* Size (in bytes) of data stripes in sub-files */ + int32_t stripe_count; /* Number of I/O concentrators to use */ +} H5FD_subfiling_shared_config_t; //! <!-- [H5FD_subfiling_config_t_snip] --> /** - * Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() + * \struct H5FD_subfiling_config_t + * \brief Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() + * + * \details H5FD_subfiling_config_t is a public structure that is used to pass + * subfiling configuration data to the #H5FD_SUBFILING driver via + * a File Access Property List. A pointer to an instance of this structure + * is a parameter to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling(). + * + * \var uint32_t H5FD_subfiling_config_t::magic + * A somewhat unique number which distinguishes the #H5FD_SUBFILING driver + * from other drivers. Used in combination with a version number, it can + * help to validate a user-generated File Access Property List. This field + * should be set to #H5FD_SUBFILING_FAPL_MAGIC. + * + * \var uint32_t H5FD_subfiling_config_t::version + * Version number of the H5FD_subfiling_config_t structure. Any instance + * passed to H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() must have + * a recognized version number or an error will be raised. Currently, this + * field should be set to #H5FD_SUBFILING_CURR_FAPL_VERSION. + * + * \var hid_t H5FD_subfiling_config_t::ioc_fapl_id + * The File Access Property List which is setup with the file driver that + * the #H5FD_SUBFILING driver will use for servicing I/O requests to the + * sub-files. Currently, the File Access Property List must be setup with + * the #H5FD_IOC driver by calling H5Pset_fapl_ioc(), but future development + * may allow other file drivers to be used. + * + * \var hbool_t H5FD_subfiling_config_t::require_ioc + * A boolean flag which indicates whether the #H5FD_SUBFILING driver should + * use the #H5FD_IOC driver for its I/O operations. This field should currently + * always be set to TRUE. + * + * \var H5FD_subfiling_shared_config_t H5FD_subfiling_config_t::shared_cfg + * A structure which contains the subfiling parameters that are shared between + * the #H5FD_SUBFILING and #H5FD_IOC drivers. This includes the sub-file stripe + * size, number of I/O concentrators, IOC selection method, etc. + * */ typedef struct H5FD_subfiling_config_t { - uint32_t magic; /* set to H5FD_SUBFILING_FAPL_MAGIC */ - uint32_t version; /* set to H5FD_CURR_SUBFILING_FAPL_VERSION */ - int32_t stripe_count; /* How many io concentrators */ - int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */ - ioc_selection_t ioc_selection; /* Method to select IO Concentrators */ - hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */ - hbool_t require_ioc; + uint32_t magic; /* Must be set to H5FD_SUBFILING_FAPL_MAGIC */ + uint32_t version; /* Must be set to H5FD_SUBFILING_CURR_FAPL_VERSION */ + hid_t ioc_fapl_id; /* The FAPL setup with the stacked VFD to use for I/O concentrators */ + hbool_t require_ioc; /* Whether to use the IOC VFD (currently must always be TRUE) */ + H5FD_subfiling_shared_config_t + shared_cfg; /* Subfiling/IOC parameters (stripe size, stripe count, etc.) */ } H5FD_subfiling_config_t; //! <!-- [H5FD_subfiling_config_t_snip] --> @@ -135,41 +255,79 @@ typedef struct H5FD_subfiling_config_t { extern "C" { #endif +/** + * \brief Internal routine to initialize #H5FD_SUBFILING driver. Not meant to be + * called directly by an HDF5 application + */ H5_DLL hid_t H5FD_subfiling_init(void); /** * \ingroup FAPL * - * \brief Modifies the file access property list to use the #H5FD_SUBFILING driver + * \brief Modifies the specified File Access Property List to use the #H5FD_SUBFILING driver * * \fapl_id - * \param[in] vfd_config #H5FD_SUBFILING driver specific properties. If NULL, then - * the IO concentrator VFD will be used. + * \param[in] vfd_config Pointer to #H5FD_SUBFILING driver configuration structure. May be NULL. * \returns \herr_t * - * \details H5Pset_fapl_core() modifies the file access property list to use the + * \details H5Pset_fapl_subfiling() modifies the File Access Property List to use the * #H5FD_SUBFILING driver. * - * \todo Expand details! - * - * \since 1.14.0 + * The #H5FD_SUBFILING driver is an MPI-based file driver that allows an + * HDF5 application to distribute a logical HDF5 file across a collection + * of "sub-files" in equal-sized data segment "stripes". I/O to the logical + * HDF5 file is then directed to the appropriate "sub-file" according to the + * #H5FD_SUBFILING configuration and a system of I/O concentrators, which + * are MPI ranks operating worker threads. + * + * By allowing a configurable stripe size, number of I/O concentrators and + * method for selecting MPI ranks as I/O concentrators, the #H5FD_SUBFILING + * driver aims to enable an HDF5 application to find a middle ground between + * the single shared file and file-per-process approaches to parallel file I/O + * for the particular machine the application is running on. In general, the + * goal is to avoid some of the complexity of the file-per-process approach + * while also minimizing the locking issues of the single shared file approach + * on a parallel file system. + * + * \note Since the #H5FD_SUBFILING driver is an MPI-based file driver, the HDF5 + * application should ensure that H5Pset_mpi_params() is called before this + * routine so that the appropriate MPI communicator and info objects will be + * setup for use by the #H5FD_SUBFILING and #H5FD_IOC drivers. + * + * \note The current architecture of the #H5FD_SUBFILING driver requires that the + * HDF5 application must have been initialized with MPI_Init_thread() using + * a value of MPI_THREAD_MULTIPLE for the thread support level. + * + * \note The \p vfd_config parameter may be NULL. In this case, the reference + * implementation I/O concentrator VFD will be used with the default settings + * of one I/O concentrator per node and a stripe size of 32MiB. Refer to the + * H5FD_subfiling_config_t documentation for information about configuration + * for the #H5FD_SUBFILING driver. + * + * \since 1.13.2 * */ -H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config); +H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config); /** * \ingroup FAPL * - * \brief Queries subfiling file driver properties + * \brief Queries a File Access Property List for #H5FD_SUBFILING file driver properties * * \fapl_id - * \param[out] config_out The subfiling fapl data. + * \param[out] config_out Pointer to H5FD_subfiling_config_t structure through which the + * #H5FD_SUBFILING file driver properties will be returned. * * \returns \herr_t * - * \details H5Pget_fapl_subfiling() queries the #H5FD_SUBFILING driver properties as set - * by H5Pset_fapl_subfiling(). If the #H5FD_SUBFILING driver has not been set on - * the File Access Property List, a default configuration is returned. + * \details H5Pget_fapl_subfiling() queries the specified File Access Property List for + * #H5FD_SUBFILING driver properties as set by H5Pset_fapl_subfiling(). If the + * #H5FD_SUBFILING driver has not been set on the File Access Property List, a + * default configuration is returned. An HDF5 application may use this + * functionality to manually configure the #H5FD_SUBFILING driver by calling + * H5Pget_fapl_subfiling() on a newly-created File Access Property List, adjusting + * the default values and then calling H5Pset_fapl_subfiling() with the configured + * H5FD_subfiling_config_t structure. * - * \since 1.14.0 + * \since 1.13.2 * */ H5_DLL herr_t H5Pget_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *config_out); diff --git a/src/H5FDsubfiling/H5subfiling_common.c b/src/H5FDsubfiling/H5subfiling_common.c index 25e80fb..b75dd81 100644 --- a/src/H5FDsubfiling/H5subfiling_common.c +++ b/src/H5FDsubfiling/H5subfiling_common.c @@ -19,9 +19,9 @@ #include "H5subfiling_common.h" #include "H5subfiling_err.h" -typedef struct { /* Format of a context map entry */ - uint64_t h5_file_id; /* key value (linear search of the cache) */ - int64_t sf_context_id; /* The return value if matching h5_file_id */ +typedef struct { /* Format of a context map entry */ + void *file_handle; /* key value (linear search of the cache) */ + int64_t sf_context_id; /* The return value if matching file_handle */ } file_map_to_context_t; typedef struct stat_record { @@ -83,13 +83,15 @@ static int sf_open_file_count = 0; static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context); static herr_t H5_free_subfiling_topology(sf_topology_t *topology); -static herr_t init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *context_id_out); -static herr_t init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm, +static herr_t init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm, + int64_t *context_id_out); +static herr_t init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, sf_topology_t **app_topology_out); -static herr_t init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology, - MPI_Comm file_comm); +static herr_t init_subfiling_context(subfiling_context_t *sf_context, + H5FD_subfiling_shared_config_t *subfiling_config, + sf_topology_t *app_topology, MPI_Comm file_comm); static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags); -static herr_t record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next_index); +static herr_t record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index); static herr_t ioc_open_file(sf_work_request_t *msg, int file_acc_flags); static herr_t generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char *filename_out, size_t filename_out_len, char **filename_basename_out, @@ -102,10 +104,10 @@ static herr_t open_config_file(subfiling_context_t *sf_context, const char *base static void initialize_statistics(void); static int numDigits(int n); static int get_next_fid_map_index(void); -static void clear_fid_map_entry(uint64_t sf_fid, int64_t sf_context_id); +static void clear_fid_map_entry(void *file_handle, int64_t sf_context_id); static int compare_hostid(const void *h1, const void *h2); -static herr_t get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, - char **ioc_sel_info_str); +static herr_t get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type, + char **ioc_sel_info_str); static int count_nodes(sf_topology_t *info, MPI_Comm comm); static herr_t gather_topology_info(sf_topology_t *info, MPI_Comm comm); static int identify_ioc_ranks(sf_topology_t *info, int node_count, int iocs_per_node); @@ -192,7 +194,7 @@ get_next_fid_map_index(void) HDassert(sf_open_file_map || (sf_file_map_size == 0)); for (int i = 0; i < sf_file_map_size; i++) { - if (sf_open_file_map[i].h5_file_id == UINT64_MAX) { + if (sf_open_file_map[i].file_handle == NULL) { index = i; break; } @@ -222,14 +224,13 @@ get_next_fid_map_index(void) *------------------------------------------------------------------------- */ static void -clear_fid_map_entry(uint64_t sf_fid, int64_t sf_context_id) +clear_fid_map_entry(void *file_handle, int64_t sf_context_id) { if (sf_open_file_map) { - int i; - for (i = 0; i < sf_file_map_size; i++) { - if ((sf_open_file_map[i].h5_file_id == sf_fid) && + for (int i = 0; i < sf_file_map_size; i++) { + if ((sf_open_file_map[i].file_handle == file_handle) && (sf_open_file_map[i].sf_context_id == sf_context_id)) { - sf_open_file_map[i].h5_file_id = UINT64_MAX; + sf_open_file_map[i].file_handle = NULL; sf_open_file_map[i].sf_context_id = -1; return; } @@ -287,8 +288,9 @@ compare_hostid(const void *h1, const void *h2) Purpose: Return a character string which represents either the default selection method: SELECT_IOC_ONE_PER_NODE; or if the user has selected a method via the environment - variable (H5_IOC_SELECTION_CRITERIA), we return that - along with any optional qualifier with for that method. + variable (H5FD_SUBFILING_IOC_SELECTION_CRITERIA), we + return that along with any optional qualifier with for + that method. Errors: None. @@ -296,10 +298,10 @@ compare_hostid(const void *h1, const void *h2) ------------------------------------------------------------------------- */ static herr_t -get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **ioc_sel_info_str) +get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type, char **ioc_sel_info_str) { char *opt_value = NULL; - char *env_value = HDgetenv(H5_IOC_SELECTION_CRITERIA); + char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA); HDassert(ioc_selection_type); HDassert(ioc_sel_info_str); @@ -323,7 +325,8 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char ** if (errno == ERANGE) { #ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't parse value from " H5_IOC_SELECTION_CRITERIA " environment variable\n", + HDprintf("%s: couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA + " environment variable\n", __func__); #endif @@ -332,7 +335,7 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char ** if ((check_value < 0) || (check_value >= ioc_selection_options)) { #ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid IOC selection type value %ld from " H5_IOC_SELECTION_CRITERIA + HDprintf("%s: invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA " environment variable\n", __func__, check_value); #endif @@ -340,7 +343,7 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char ** return FAIL; } - *ioc_selection_type = (ioc_selection_t)check_value; + *ioc_selection_type = (H5FD_subfiling_ioc_select_t)check_value; *ioc_sel_info_str = opt_value; } @@ -784,6 +787,7 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context) sf_context->sf_context_id = -1; sf_context->h5_file_id = UINT64_MAX; + sf_context->h5_file_handle = NULL; sf_context->sf_fid = -1; sf_context->sf_write_count = 0; sf_context->sf_read_count = 0; @@ -912,8 +916,9 @@ H5_free_subfiling_topology(sf_topology_t *topology) */ /* TODO: revise description */ herr_t -H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t ioc_selection_type, - int file_acc_flags, MPI_Comm file_comm, int64_t *context_id_out) +H5_open_subfiles(const char *base_filename, void *file_handle, + H5FD_subfiling_shared_config_t *subfiling_config, int file_acc_flags, MPI_Comm file_comm, + int64_t *context_id_out) { subfiling_context_t *sf_context = NULL; int64_t context_id = -1; @@ -931,6 +936,15 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t goto done; } + if (!subfiling_config) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: invalid subfiling configuration pointer\n", __func__); +#endif + + ret_value = FAIL; + goto done; + } + if (!context_id_out) { #ifdef H5_SUBFILING_DEBUG HDprintf("%s: context_id_out is NULL\n", __func__); @@ -953,7 +967,7 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t #endif /* Initialize new subfiling context ID based on configuration information */ - if (init_subfiling(ioc_selection_type, file_comm, &context_id) < 0) { + if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) { #ifdef H5_SUBFILING_DEBUG HDprintf("%s: couldn't initialize subfiling context\n", __func__); #endif @@ -973,7 +987,7 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t } /* Save some basic things in the new subfiling context */ - sf_context->h5_file_id = h5_file_id; + sf_context->h5_file_handle = file_handle; if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) { #ifdef H5_SUBFILING_DEBUG @@ -1058,7 +1072,7 @@ done: } if (ret_value < 0) { - clear_fid_map_entry(h5_file_id, context_id); + clear_fid_map_entry(file_handle, context_id); if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) { #ifdef H5_SUBFILING_DEBUG @@ -1092,7 +1106,7 @@ done: ------------------------------------------------------------------------- */ static herr_t -init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *context_id_out) +init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm, int64_t *context_id_out) { subfiling_context_t *new_context = NULL; sf_topology_t *app_topology = NULL; @@ -1129,7 +1143,7 @@ init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *conte * Setup the application topology information, including the computed * number and distribution map of the set of I/O concentrators */ - if (init_app_topology(ioc_selection_type, comm, &app_topology) < 0) { + if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) { #ifdef H5_SUBFILING_DEBUG HDprintf("%s: couldn't initialize application topology\n", __func__); #endif @@ -1140,7 +1154,7 @@ init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *conte new_context->sf_context_id = context_id; - if (init_subfiling_context(new_context, app_topology, comm) < 0) { + if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) { #ifdef H5_SUBFILING_DEBUG HDprintf("%s: couldn't initialize subfiling topology object\n", __func__); #endif @@ -1207,7 +1221,8 @@ done: *------------------------------------------------------------------------- */ static herr_t -init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm, sf_topology_t **app_topology_out) +init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, + sf_topology_t **app_topology_out) { sf_topology_t *app_topology = NULL; app_layout_t *app_layout = sf_app_layout; @@ -1392,11 +1407,11 @@ init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm, sf_topology /* Check for an IOC-per-node value set in the environment */ /* TODO: should this env. var. be interpreted for other selection types? */ - if ((env_value = HDgetenv(H5_IOC_COUNT_PER_NODE))) { + if ((env_value = HDgetenv(H5FD_SUBFILING_IOC_PER_NODE))) { errno = 0; ioc_select_val = HDstrtol(env_value, NULL, 0); if ((ERANGE == errno)) { - HDprintf("invalid value '%s' for " H5_IOC_COUNT_PER_NODE "\n", env_value); + HDprintf("invalid value '%s' for " H5FD_SUBFILING_IOC_PER_NODE "\n", env_value); ioc_select_val = 1; } @@ -1509,7 +1524,8 @@ done: *------------------------------------------------------------------------- */ static herr_t -init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology, MPI_Comm file_comm) +init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_config_t *subfiling_config, + sf_topology_t *app_topology, MPI_Comm file_comm) { char *env_value = NULL; int comm_rank; @@ -1518,6 +1534,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol HDassert(sf_context); HDassert(sf_context->topology == NULL); + HDassert(subfiling_config); HDassert(app_topology); HDassert(app_topology->n_io_concentrators > 0); HDassert(MPI_COMM_NULL != file_comm); @@ -1529,10 +1546,11 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol sf_context->sf_barrier_comm = MPI_COMM_NULL; sf_context->sf_group_comm = MPI_COMM_NULL; sf_context->sf_intercomm = MPI_COMM_NULL; - sf_context->sf_stripe_size = H5FD_DEFAULT_STRIPE_DEPTH; + sf_context->sf_stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE; sf_context->sf_write_count = 0; sf_context->sf_read_count = 0; sf_context->sf_eof = HADDR_UNDEF; + sf_context->h5_file_handle = NULL; sf_context->sf_fid = -1; sf_context->sf_group_size = 1; sf_context->sf_group_rank = 0; @@ -1545,8 +1563,14 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol sf_context->sf_logfile = NULL; #endif - /* Check for an IOC stripe size setting in the environment */ - if ((env_value = HDgetenv(H5_IOC_STRIPE_SIZE))) { + /* + * Set IOC stripe size from subfiling configuration, then check + * for a setting from the environment + */ + if (subfiling_config->stripe_size > 0) + sf_context->sf_stripe_size = subfiling_config->stripe_size; + + if ((env_value = HDgetenv(H5FD_SUBFILING_STRIPE_SIZE))) { long long stripe_size = -1; errno = 0; @@ -1554,7 +1578,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol stripe_size = HDstrtoll(env_value, NULL, 0); if (ERANGE == errno) { #ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid stripe size setting '%s' for " H5_IOC_STRIPE_SIZE "\n", __func__, + HDprintf("%s: invalid stripe size setting '%s' for " H5FD_SUBFILING_STRIPE_SIZE "\n", __func__, env_value); #endif @@ -1574,7 +1598,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol sf_context->sf_blocksize_per_stripe = sf_context->sf_stripe_size * app_topology->n_io_concentrators; /* Check for a subfile name prefix setting in the environment */ - if ((env_value = HDgetenv(H5_IOC_SUBFILE_PREFIX))) { + if ((env_value = HDgetenv(H5FD_SUBFILING_SUBFILE_PREFIX))) { if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) { #ifdef H5_SUBFILING_DEBUG HDprintf("%s: couldn't copy subfile prefix value\n", __func__); @@ -1767,7 +1791,7 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags) * There shouldn't be any issue, but check the status and * return if there was a problem. */ - if (record_fid_to_subfile(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0) { + if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) { #ifdef H5_SUBFILING_DEBUG HDprintf("%s: couldn't record HDF5 file ID to subfile context mapping\n", __func__); #endif @@ -1792,6 +1816,21 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags) 0, 0, 0}; + h5_stat_t st; + + /* Retrieve Inode value for HDF5 stub file */ + if (HDstat(sf_context->h5_filename, &st) < 0) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("[%s %d]: couldn't stat file %s\n", __func__, + sf_context->topology->app_layout->world_rank, sf_context->h5_filename); +#endif + + ret_value = FAIL; + goto done; + } + + HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t)); + sf_context->h5_file_id = (uint64_t)st.st_ino; if (ioc_open_file(&msg, file_acc_flags) < 0) { #ifdef H5_SUBFILING_DEBUG @@ -1806,7 +1845,7 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags) done: if (ret_value < 0) { - clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id); + clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id); } return ret_value; @@ -1843,7 +1882,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next_index) +record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index) { int index; herr_t ret_value = SUCCEED; @@ -1861,17 +1900,17 @@ record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES; for (int i = 0; i < sf_file_map_size; i++) { - sf_open_file_map[i].h5_file_id = UINT64_MAX; + sf_open_file_map[i].file_handle = NULL; sf_open_file_map[i].sf_context_id = -1; } } for (index = 0; index < sf_file_map_size; index++) { - if (sf_open_file_map[index].h5_file_id == h5_file_id) + if (sf_open_file_map[index].file_handle == file_handle) goto done; - if (sf_open_file_map[index].h5_file_id == UINT64_MAX) { - sf_open_file_map[index].h5_file_id = h5_file_id; + if (sf_open_file_map[index].file_handle == NULL) { + sf_open_file_map[index].file_handle = file_handle; sf_open_file_map[index].sf_context_id = subfile_context_id; if (next_index) { @@ -1899,14 +1938,14 @@ record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next sf_file_map_size *= 2; for (int i = index; i < sf_file_map_size; i++) { - sf_open_file_map[i].h5_file_id = UINT64_MAX; + sf_open_file_map[i].file_handle = NULL; } if (next_index) { *next_index = index; } - sf_open_file_map[index].h5_file_id = h5_file_id; + sf_open_file_map[index].file_handle = file_handle; sf_open_file_map[index++].sf_context_id = subfile_context_id; } @@ -1957,13 +1996,12 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags) { subfiling_context_t *sf_context = NULL; int64_t file_context_id; - hbool_t mutex_locked = FALSE; - mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; - char *filepath = NULL; - char *subfile_dir = NULL; - char *base = NULL; - int fd = -1; - herr_t ret_value = SUCCEED; + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + char *filepath = NULL; + char *subfile_dir = NULL; + char *base = NULL; + int fd = -1; + herr_t ret_value = SUCCEED; HDassert(msg); @@ -2011,9 +2049,6 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags) goto done; } - begin_thread_exclusive(); - mutex_locked = TRUE; - /* Attempt to create/open the subfile for this IOC rank */ if ((fd = HDopen(filepath, file_acc_flags, mode)) < 0) H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "failed to open subfile"); @@ -2033,11 +2068,6 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags) } done: - if (mutex_locked) { - end_thread_exclusive(); - mutex_locked = FALSE; - } - if (ret_value < 0) { if (sf_context) { HDfree(sf_context->sf_filename); @@ -2074,7 +2104,6 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char size_t filename_out_len, char **filename_basename_out, char **subfile_dir_out) { FILE *config_file = NULL; - char *config_buf = NULL; char *subfile_dir = NULL; char *prefix = NULL; char *base = NULL; @@ -2181,83 +2210,14 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * in order to generate the correct subfile names. */ if (config_file) { - char *ioc_substr = NULL; - long config_file_len = 0; - - if (HDfseek(config_file, 0, SEEK_END) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, - errno); -#endif - - ret_value = FAIL; - goto done; - } - - if ((config_file_len = HDftell(config_file)) < 0) { + if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) { #ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno); + HDprintf("%s: couldn't read from subfiling configuration file\n", __func__); #endif ret_value = FAIL; goto done; } - - if (HDfseek(config_file, 0, SEEK_SET) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, - errno); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno); -#endif - - ret_value = FAIL; - goto done; - } - - config_buf[config_file_len] = '\0'; - - if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &n_io_concentrators)) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n", - __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (n_io_concentrators <= 0) { - HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n", - __func__, n_io_concentrators); - ret_value = FAIL; - goto done; - } } /* @@ -2272,7 +2232,7 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * ABC.h5.subfile_<file-number>.config */ num_digits = numDigits(n_io_concentrators); - HDsnprintf(filename_out, filename_out_len, "%s/%s" SF_FILENAME_TEMPLATE, subfile_dir, base, + HDsnprintf(filename_out, filename_out_len, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, subfile_dir, base, sf_context->h5_file_id, num_digits, sf_context->topology->subfile_rank + 1, n_io_concentrators); @@ -2296,7 +2256,6 @@ done: } } - HDfree(config_buf); HDfree(prefix); return ret_value; @@ -2361,8 +2320,8 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c goto done; } - HDsnprintf(config_filename, PATH_MAX, "%s/%s" SF_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename, - sf_context->h5_file_id); + HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir, + base_filename, sf_context->h5_file_id); /* Determine whether a subfiling configuration file exists */ errno = 0; @@ -2455,7 +2414,7 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c /* Write out each subfile name to the configuration file */ num_digits = numDigits(n_io_concentrators); for (int k = 0; k < n_io_concentrators; k++) { - HDsnprintf(line_buf, PATH_MAX, "%s" SF_FILENAME_TEMPLATE "\n", base_filename, + HDsnprintf(line_buf, PATH_MAX, "%s" H5FD_SUBFILING_FILENAME_TEMPLATE "\n", base_filename, sf_context->h5_file_id, num_digits, k + 1, n_io_concentrators); if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { @@ -2546,8 +2505,8 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con goto done; } - HDsnprintf(config_filename, PATH_MAX, "%s/%s" SF_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename, - sf_context->h5_file_id); + HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir, + base_filename, sf_context->h5_file_id); /* Determine whether a subfiling configuration file exists */ errno = 0; @@ -2595,6 +2554,110 @@ done: } /*------------------------------------------------------------------------- + * Function: H5_get_num_iocs_from_config_file + * + * Purpose: Reads a Subfiling configuration file to get the number of + * I/O concentrators used for the logical HDF5 file. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +herr_t +H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators) +{ + char *config_buf = NULL; + char *ioc_substr = NULL; + long config_file_len = 0; + int read_n_io_concs = 0; + herr_t ret_value = SUCCEED; + + HDassert(config_file); + HDassert(n_io_concentrators); + + if (HDfseek(config_file, 0, SEEK_END) < 0) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, errno); +#endif + + ret_value = FAIL; + goto done; + } + + if ((config_file_len = HDftell(config_file)) < 0) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno); +#endif + + ret_value = FAIL; + goto done; + } + + if (HDfseek(config_file, 0, SEEK_SET) < 0) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: couldn't seek to beginning of subfiling configuration file; errno = %d\n", __func__, + errno); +#endif + + ret_value = FAIL; + goto done; + } + + if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__); +#endif + + ret_value = FAIL; + goto done; + } + + if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno); +#endif + + ret_value = FAIL; + goto done; + } + + config_buf[config_file_len] = '\0'; + + if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__); +#endif + + ret_value = FAIL; + goto done; + } + + if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) { +#ifdef H5_SUBFILING_DEBUG + HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n", + __func__); +#endif + + ret_value = FAIL; + goto done; + } + + if (read_n_io_concs <= 0) { + HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n", + __func__, read_n_io_concs); + ret_value = FAIL; + goto done; + } + + *n_io_concentrators = read_n_io_concs; + +done: + HDfree(config_buf); + + H5_SUBFILING_FUNC_LEAVE; +} + +/*------------------------------------------------------------------------- * Function: H5_close_subfiles * * Purpose: This is a simple wrapper function for the internal version @@ -2713,9 +2776,9 @@ H5_close_subfiles(int64_t subfiling_context_id) } #endif - /* The map from FID to subfiling context can now be cleared */ - if (sf_context->h5_file_id != UINT64_MAX) { - clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id); + /* The map from file handle to subfiling context can now be cleared */ + if (sf_context->h5_file_handle != NULL) { + clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id); } if (sf_context->topology->rank_is_ioc) { @@ -2822,10 +2885,10 @@ done: } /*------------------------------------------------------------------------- - * Function: H5_subfile_fid_to_context + * Function: H5_subfile_fhandle_to_context * * Purpose: This is a basic lookup function which returns the subfiling - * context id associated with the specified file->inode. + * context id associated with the specified file handle. * * Return: Non-negative subfiling context ID if the context exists * Negative on failure or if the subfiling context doesn't @@ -2839,7 +2902,7 @@ done: *------------------------------------------------------------------------- */ int64_t -H5_subfile_fid_to_context(uint64_t sf_fid) +H5_subfile_fhandle_to_context(void *file_handle) { if (!sf_open_file_map) { #ifdef H5_SUBFILING_DEBUG @@ -2850,13 +2913,13 @@ H5_subfile_fid_to_context(uint64_t sf_fid) } for (int i = 0; i < sf_file_map_size; i++) { - if (sf_open_file_map[i].h5_file_id == sf_fid) { + if (sf_open_file_map[i].file_handle == file_handle) { return sf_open_file_map[i].sf_context_id; } } return -1; -} /* end H5_subfile_fid_to_context() */ +} /* end H5_subfile_fhandle_to_context() */ #ifdef H5_SUBFILING_DEBUG void @@ -2873,7 +2936,7 @@ H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...) goto done; } - begin_thread_exclusive(); + H5FD_ioc_begin_thread_exclusive(); if (sf_context->sf_logfile) { HDvfprintf(sf_context->sf_logfile, fmt, log_args); @@ -2886,7 +2949,7 @@ H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...) HDfflush(stdout); } - end_thread_exclusive(); + H5FD_ioc_end_thread_exclusive(); done: va_end(log_args); diff --git a/src/H5FDsubfiling/H5subfiling_common.h b/src/H5FDsubfiling/H5subfiling_common.h index 19c5c0c..3195c9d 100644 --- a/src/H5FDsubfiling/H5subfiling_common.h +++ b/src/H5FDsubfiling/H5subfiling_common.h @@ -22,8 +22,7 @@ #include "H5private.h" #include "H5Iprivate.h" -/* TODO: needed for ioc_selection_t, which also needs to be public */ -#include "H5FDioc.h" +#include "H5FDsubfiling.h" /* * Some definitions for debugging the Subfiling feature @@ -31,30 +30,6 @@ /* #define H5_SUBFILING_DEBUG */ /* - * The following is our basic template for a subfile filename. - * Note that eventually we shouldn't use 0_of_N since we - * intend to use the user defined HDF5 filename for a - * zeroth subfile as well as for all metadata. - */ -#define SF_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d" - -/* - * The following is our basic template for a subfiling - * configuration filename. - */ -#define SF_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config" - -/* - * Environment variables interpreted by the HDF5 subfiling feature - */ -#define H5_IOC_SELECTION_CRITERIA "H5_IOC_SELECTION_CRITERIA" -#define H5_IOC_COUNT_PER_NODE "H5_IOC_COUNT_PER_NODE" -#define H5_IOC_STRIPE_SIZE "H5_IOC_STRIPE_SIZE" -#define H5_IOC_SUBFILE_PREFIX "H5_IOC_SUBFILE_PREFIX" - -#define H5FD_DEFAULT_STRIPE_DEPTH (32 * 1024 * 1024) - -/* * MPI Tags are 32 bits, we treat them as unsigned * to allow the use of the available bits for RPC * selections, i.e. a message from the VFD read or write functions @@ -166,18 +141,19 @@ typedef struct app_layout_t { /* This typedef defines things related to IOC selections */ typedef struct topology { - app_layout_t *app_layout; /* Pointer to our layout struct */ - bool rank_is_ioc; /* Indicates that we host an IOC */ - int subfile_rank; /* Valid only if rank_is_ioc */ - int n_io_concentrators; /* Number of IO concentrators */ - int *io_concentrators; /* Vector of ranks which are IOCs */ - int *subfile_fd; /* file descriptor (if IOC) */ - ioc_selection_t selection_type; /* Cache our IOC selection criteria */ + app_layout_t *app_layout; /* Pointer to our layout struct */ + bool rank_is_ioc; /* Indicates that we host an IOC */ + int subfile_rank; /* Valid only if rank_is_ioc */ + int n_io_concentrators; /* Number of IO concentrators */ + int *io_concentrators; /* Vector of ranks which are IOCs */ + int *subfile_fd; /* file descriptor (if IOC) */ + H5FD_subfiling_ioc_select_t selection_type; /* Cache our IOC selection criteria */ } sf_topology_t; typedef struct { int64_t sf_context_id; /* Generated context ID which embeds the cache index */ uint64_t h5_file_id; /* GUID (basically the inode value) */ + void *h5_file_handle; /* Low-level handle for the HDF5 stub file */ int sf_fid; /* value returned by open(file,..) */ size_t sf_write_count; /* Statistics: write_count */ size_t sf_read_count; /* Statistics: read_count */ @@ -236,15 +212,16 @@ extern app_layout_t *sf_app_layout; extern "C" { #endif -H5_DLL herr_t H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, - ioc_selection_t ioc_selection_type, int file_acc_flags, MPI_Comm file_comm, - int64_t *context_id_out); +H5_DLL herr_t H5_open_subfiles(const char *base_filename, void *h5_file_handle, + H5FD_subfiling_shared_config_t *subfiling_config, int file_acc_flags, + MPI_Comm file_comm, int64_t *context_id_out); H5_DLL herr_t H5_close_subfiles(int64_t subfiling_context_id); H5_DLL int64_t H5_new_subfiling_object_id(sf_obj_type_t obj_type, int64_t index_val); H5_DLL void *H5_get_subfiling_object(int64_t object_id); -H5_DLL int64_t H5_subfile_fid_to_context(uint64_t h5_fid); +H5_DLL int64_t H5_subfile_fhandle_to_context(void *file_handle); H5_DLL herr_t H5_free_subfiling_object(int64_t object_id); +H5_DLL herr_t H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators); H5_DLL void H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...); diff --git a/src/H5trace.c b/src/H5trace.c index 2b714fb..fd9da44 100644 --- a/src/H5trace.c +++ b/src/H5trace.c @@ -1098,6 +1098,11 @@ H5_trace_args(H5RS_str_t *rs, const char *type, va_list ap) H5RS_acat(rs, "H5_VFD_ROS3"); break; #endif +#ifdef H5_HAVE_SUBFILING_VFD + case H5_VFD_SUBFILING: + H5RS_acat(rs, "H5_VFD_SUBFILING"); + break; +#endif case H5_VFD_ONION: H5RS_acat(rs, "H5_VFD_ONION"); break; |