summaryrefslogtreecommitdiffstats
path: root/src/H5FDsubfiling/H5FDsubfiling.c
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2022-08-09 23:05:37 (GMT)
committerGitHub <noreply@github.com>2022-08-09 23:05:37 (GMT)
commitef33ac8bac5fd201b41d1a3084f03834f47729a2 (patch)
treead4756b872abff6d16f11d9a6c6c949e8f359cad /src/H5FDsubfiling/H5FDsubfiling.c
parentb84241e57a97309b15846da4cc74611a66d92f6d (diff)
downloadhdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.zip
hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.gz
hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.bz2
Subfiling VFD - tidying up and fixing a few new testing failures (#1977)
* Rename Subfiling IOC "thread_pool_count" field to "thread_pool_size" * Add simple HDF5 example for Subfiling VFD * Subfiling VFD - never cache app topology as it may change * Subfiling VFD - cleanup unused funtionality and tidy up some TODOs * Subfiling VFD - tidy up subfiling error handling in H5subfiling_common.c * Subfiling VFD - show number of failed I/O requests on close * Subfiling VFD - Update file cmp callback after switching to MPI I/O VFD * Amend RELEASE.txt with info about h5fuse.sh and Subfiling limitations * Subfiling VFD - switch to using H5_basename and H5_dirname
Diffstat (limited to 'src/H5FDsubfiling/H5FDsubfiling.c')
-rw-r--r--src/H5FDsubfiling/H5FDsubfiling.c277
1 files changed, 28 insertions, 249 deletions
diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c
index 4cdded3..8fe8f77 100644
--- a/src/H5FDsubfiling/H5FDsubfiling.c
+++ b/src/H5FDsubfiling/H5FDsubfiling.c
@@ -108,37 +108,6 @@ typedef struct H5FD_subfiling_t {
char *file_dir; /* Directory where we find files */
char *file_path; /* The user defined filename */
-#ifndef H5_HAVE_WIN32_API
- /* On most systems the combination of device and i-node number uniquely
- * identify a file. Note that Cygwin, MinGW and other Windows POSIX
- * environments have the stat function (which fakes inodes)
- * and will use the 'device + inodes' scheme as opposed to the
- * Windows code further below.
- */
- dev_t device; /* file device number */
- ino_t inode; /* file i-node number */
-#else
- /* Files in windows are uniquely identified by the volume serial
- * number and the file index (both low and high parts).
- *
- * There are caveats where these numbers can change, especially
- * on FAT file systems. On NTFS, however, a file should keep
- * those numbers the same until renamed or deleted (though you
- * can use ReplaceFile() on NTFS to keep the numbers the same
- * while renaming).
- *
- * See the MSDN "BY_HANDLE_FILE_INFORMATION Structure" entry for
- * more information.
- *
- * http://msdn.microsoft.com/en-us/library/aa363788(v=VS.85).aspx
- */
- DWORD nFileIndexLow;
- DWORD nFileIndexHigh;
- DWORD dwVolumeSerialNumber;
-
- HANDLE hFile; /* Native windows file handle */
-#endif /* H5_HAVE_WIN32_API */
-
/*
* The element layouts above this point are identical with the
* H5FD_ioc_t structure. As a result,
@@ -175,18 +144,6 @@ typedef struct H5FD_subfiling_t {
#define REGION_OVERFLOW(A, Z) \
(ADDR_OVERFLOW(A) || SIZE_OVERFLOW(Z) || HADDR_UNDEF == (A) + (Z) || (HDoff_t)((A) + (Z)) < (HDoff_t)(A))
-#define H5FD_SUBFILING_DEBUG_OP_CALLS 0 /* debugging print toggle; 0 disables */
-
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
-#define H5FD_SUBFILING_LOG_CALL(name) \
- do { \
- HDprintf("called %s()\n", (name)); \
- HDfflush(stdout); \
- } while (0)
-#else
-#define H5FD_SUBFILING_LOG_CALL(name) /* no-op */
-#endif /* H5FD_SUBFILING_DEBUG_OP_CALLS */
-
/* Prototypes */
static herr_t H5FD__subfiling_term(void);
static void *H5FD__subfiling_fapl_get(H5FD_t *_file);
@@ -393,18 +350,6 @@ H5FD__subfiling_term(void)
herr_t ret_value = SUCCEED;
if (H5FD_SUBFILING_g >= 0) {
- /* Free the subfiling application layout information */
- if (sf_app_layout) {
- HDfree(sf_app_layout->layout);
- sf_app_layout->layout = NULL;
-
- HDfree(sf_app_layout->node_ranks);
- sf_app_layout->node_ranks = NULL;
-
- HDfree(sf_app_layout);
- sf_app_layout = NULL;
- }
-
/* Unregister from HDF5 error API */
if (H5subfiling_err_class_g >= 0) {
if (H5Eunregister_class(H5subfiling_err_class_g) < 0)
@@ -646,12 +591,21 @@ H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa)
HDassert(fa != NULL);
if (fa->version != H5FD_SUBFILING_CURR_FAPL_VERSION)
- H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_subfiling_config_t version");
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "unknown H5FD_subfiling_config_t version");
if (fa->magic != H5FD_SUBFILING_FAPL_MAGIC)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid H5FD_subfiling_config_t magic value");
- /* TODO: add extra subfiling configuration validation code */
+ if (fa->ioc_fapl_id < 0)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC FAPL ID");
+
+ if (!fa->require_ioc)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Subfiling VFD currently always requires IOC VFD to be used");
+
+ if (fa->shared_cfg.ioc_selection < SELECT_IOC_ONE_PER_NODE ||
+ fa->shared_cfg.ioc_selection >= ioc_selection_options)
+ H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC selection method");
done:
H5_SUBFILING_FUNC_LEAVE;
@@ -724,8 +678,6 @@ H5FD__copy_plist(hid_t fapl_id, hid_t *id_out_ptr)
int ret_value = 0;
H5P_genplist_t *plist_ptr = NULL;
- H5FD_SUBFILING_LOG_CALL(__func__);
-
HDassert(id_out_ptr != NULL);
if (FALSE == H5P_isa_class(fapl_id, H5P_FILE_ACCESS))
@@ -917,24 +869,22 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
}
if (NULL != (file_ptr->file_path = HDrealpath(name, NULL))) {
- char *path = NULL;
- char *directory = dirname(path);
+ char *path = NULL;
- if (NULL == (path = HDstrdup(file_ptr->file_path)))
+ if (NULL == (path = H5MM_strdup(file_ptr->file_path)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy subfiling subfile path");
- if (NULL == (file_ptr->file_dir = HDstrdup(directory))) {
- HDfree(path);
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL,
- "can't copy subfiling subfile directory path");
+ if (H5_dirname(path, &file_ptr->file_dir) < 0) {
+ H5MM_free(path);
+ H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "couldn't get subfile dirname");
}
- HDfree(path);
+ H5MM_free(path);
}
else {
if (ENOENT == errno) {
if (NULL == (file_ptr->file_path = HDstrdup(name)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy file name");
- if (NULL == (file_ptr->file_dir = HDstrdup(".")))
+ if (NULL == (file_ptr->file_dir = H5MM_strdup(".")))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, "can't set subfile directory path");
}
else
@@ -1041,21 +991,6 @@ H5FD__subfiling_close_int(H5FD_subfiling_t *file_ptr)
HDassert(file_ptr);
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- {
- subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->context_id);
-
- HDassert(sf_context);
- HDassert(sf_context->topology);
-
- if (sf_context->topology->rank_is_ioc)
- HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
- else
- HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank);
- HDfflush(stdout);
- }
-#endif
-
if (file_ptr->sf_file && H5FD_close(file_ptr->sf_file) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_IO, H5E_CANTCLOSEFILE, FAIL, "unable to close subfile");
@@ -1081,7 +1016,7 @@ done:
HDfree(file_ptr->file_path);
file_ptr->file_path = NULL;
- HDfree(file_ptr->file_dir);
+ H5MM_free(file_ptr->file_dir);
file_ptr->file_dir = NULL;
/* Release the file info */
@@ -1237,87 +1172,18 @@ H5FD__subfiling_set_eoa(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, haddr_t a
* Return: End of file address, the first address past the end of the
* "file", either the filesystem file or the HDF5 file.
*
- * SUBFILING NOTE:
- * The EOF calculation for subfiling is somewhat different
- * than for the more traditional HDF5 file implementations.
- * This statement derives from the fact that unlike "normal"
- * HDF5 files, subfiling introduces a multi-file representation
- * of a single HDF5 file. The plurality of sub-files represents
- * a software RAID-0 based HDF5 file. As such, each sub-file
- * contains a designated portion of the address space of the
- * virtual HDF5 storage. We have no notion of HDF5 datatypes,
- * datasets, metadata, or other HDF5 structures; only BYTES.
- *
- * The organization of the bytes within sub-files is consistent
- * with the RAID-0 striping, i.e. there are IO Concentrators
- * (IOCs) which correspond to a stripe-count (in Lustre) as
- * well as a stripe_size. The combination of these two
- * variables determines the "address" (a combination of IOC
- * and a file offset) of any storage operation.
- *
- * Having a defined storage layout, the virtual file EOF
- * calculation should be the MAXIMUM value returned by the
- * collection of IOCs. Every MPI rank which hosts an IOC
- * maintains its own EOF by updating that value for each
- * WRITE operation that completes, i.e. if a new local EOF
- * is greater than the existing local EOF, the new EOF
- * will replace the old. The local EOF calculation is as
- * follows.
- * 1. At file creation, each IOC is assigned a rank value
- * (0 to N-1, where N is the total number of IOCs) and
- * a 'sf_base_addr' = 'subfile_rank' * 'sf_stripe_size')
- * we also determine the 'sf_blocksize_per_stripe' which
- * is simply the 'sf_stripe_size' * 'n_ioc_concentrators'
- *
- * 2. For every write operation, the IOC receives a message
- * containing a file_offset and the data_size.
- *
- * 3. The file_offset + data_size are in turn used to
- * create a stripe_id:
- * IOC-(ioc_rank) IOC-(ioc_rank+1)
- * |<- sf_base_address |<- sf_base_address |
- * ID +--------------------+--------------------+
- * 0:|<- sf_stripe_size ->|<- sf_stripe_size ->|
- * 1:|<- sf_stripe_size ->|<- sf_stripe_size ->|
- * ~ ~ ~
- * N:|<- sf_stripe_size ->|<- sf_stripe_size ->|
- * +--------------------+--------------------+
- *
- * The new 'stripe_id' is then used to calculate a
- * potential new EOF:
- * sf_eof = (stripe_id * sf_blocksize_per_stripe) + sf_base_addr
- * + ((file_offset + data_size) % sf_stripe_size)
- *
- * 4. If (sf_eof > current_sf_eof), then current_sf_eof = sf_eof.
- *
- *
- * Programmer: Richard Warren
+ * NOTE: This VFD mimics the MPI I/O VFD and so does not try
+ * to keep the EOF updated. The EOF is mostly just needed
+ * right after the file is opened so the library can determine
+ * if the file is empty, truncated or okay.
*
*-------------------------------------------------------------------------
*/
static haddr_t
H5FD__subfiling_get_eof(const H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type)
{
- const H5FD_subfiling_t *file = (const H5FD_subfiling_t *)_file;
-#if 0
- int64_t logical_eof = -1;
-#endif
- haddr_t ret_value = HADDR_UNDEF;
-
-#if 0
- /*
- * TODO: this is a heavy weight implementation. We need something like this
- * for file open, and probably for file close. However, in between, something
- * similar to the current solution in the MPIIO VFD might be more appropriate.
- */
- if (H5FD__subfiling__get_real_eof(file->fa.context_id, &logical_eof) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, HADDR_UNDEF, "can't get EOF")
-
- /* Return the global max of all the subfile EOF values */
- ret_value = (haddr_t)(logical_eof);
-
-done:
-#endif
+ const H5FD_subfiling_t *file = (const H5FD_subfiling_t *)_file;
+ haddr_t ret_value = HADDR_UNDEF;
ret_value = file->eof;
@@ -1390,8 +1256,7 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL,
"addr overflow, addr = %" PRIuHADDR ", size = %" PRIuHADDR, addr, size);
- /* TODO: Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE)
- */
+ /* Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) */
{
H5FD_mpio_xfer_t xfer_mode;
@@ -1419,11 +1284,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT);
}
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- HDprintf("[%s %d] addr=%ld, size=%ld\n", __func__, file_ptr->mpi_rank, addr, size);
- HDfflush(stdout);
-#endif
-
/*
* Retrieve the subfiling context object and the number
* of I/O concentrators.
@@ -1442,14 +1302,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
ioc_total = sf_context->topology->n_io_concentrators;
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- if (sf_context->topology->rank_is_ioc)
- HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
- else
- HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank);
- HDfflush(stdout);
-#endif
-
if (ioc_total == 0) {
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid number of I/O concentrators (%d)",
ioc_total);
@@ -1539,18 +1391,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"can't allocate subfile I/O buffers vector");
- /* TODO: The following is left for future work */
- /*
- * Set ASYNC MODE
- * H5FD_class_aio_t *async_file_ptr = (H5FD_class_aio_t *)file_ptr->sf_file;
- * uint64_t op_code_begin = OPC_BEGIN;
- * uint64_t op_code_complete = OPC_COMPLETE;
- * const void *input = NULL;
- * void *output = NULL;
- * H5FDctl(file_ptr->sf_file, op_code_begin, flags, input, &output);
- * (*async_file_ptr->h5fdctl)(file_ptr->sf_file, op_code_begin, flags, input, &output);
- */
-
for (int64_t i = 0; i < max_io_req_per_ioc; i++) {
uint32_t final_vec_len = vector_len;
int next_ioc = ioc_start;
@@ -1588,9 +1428,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr
if (MPI_SUCCESS != MPI_Bcast(buf, (int)size, MPI_BYTE, 0, file_ptr->comm))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "can't broadcast data from rank 0");
}
-
- /* TODO: The following is left for future work */
- /* H5FDctl(file_ptr->sf_file, op_code_complete, flags, input, &output); */
}
}
@@ -1658,8 +1495,7 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL,
"addr overflow, addr = %" PRIuHADDR ", size = %" PRIuHADDR, addr, size);
- /* TODO: Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE)
- */
+ /* Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) */
{
H5FD_mpio_xfer_t xfer_mode;
@@ -1684,11 +1520,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT);
}
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- HDprintf("[%s %d] addr=%ld, size=%ld\n", __func__, file_ptr->mpi_rank, addr, size);
- HDfflush(stdout);
-#endif
-
/*
* Retrieve the subfiling context object and the number
* of I/O concentrators.
@@ -1707,14 +1538,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
ioc_total = sf_context->topology->n_io_concentrators;
-#if H5FD_SUBFILING_DEBUG_OP_CALLS
- if (sf_context->topology->rank_is_ioc)
- HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
- else
- HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank);
- HDfflush(stdout);
-#endif
-
if (ioc_total == 0) {
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid number of I/O concentrators (%d)",
ioc_total);
@@ -1804,18 +1627,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"can't allocate subfile I/O buffers vector");
- /* TODO: The following is left for future work */
- /*
- * Set ASYNC MODE
- * H5FD_class_aio_t *async_file_ptr = (H5FD_class_aio_t *)file_ptr->sf_file;
- * uint64_t op_code_begin = OPC_BEGIN;
- * uint64_t op_code_complete = OPC_COMPLETE;
- * const void *input = NULL;
- * void *output = NULL;
- * H5FDctl(file_ptr->sf_file, op_code_begin, flags, input, &output);
- * (*async_file_ptr->h5fdctl)(file_ptr->sf_file, op_code_begin, flags, input, &output);
- */
-
for (int64_t i = 0; i < max_io_req_per_ioc; i++) {
uint32_t final_vec_len = vector_len;
int next_ioc = ioc_start;
@@ -1845,9 +1656,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
io_bufs) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "write to subfile failed");
}
-
- /* TODO: The following is left for future work */
- /* H5FDctl(file_ptr->sf_file, op_code_complete, flags, input, &output); */
}
}
@@ -1858,15 +1666,11 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add
file_ptr->pos = addr;
file_ptr->op = OP_WRITE;
-#if 1 /* Mimic the MPI I/O VFD */
+ /* Mimic the MPI I/O VFD */
file_ptr->eof = HADDR_UNDEF;
if (file_ptr->pos > file_ptr->local_eof)
file_ptr->local_eof = file_ptr->pos;
-#else
- if (file_ptr->pos > file_ptr->eof)
- file_ptr->eof = file_ptr->pos;
-#endif
done:
HDfree(io_bufs);
@@ -2235,7 +2039,6 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5
HDassert(file);
/* Extend the file to make sure it's large enough */
-#if 1 /* Mimic the MPI I/O VFD */
if (!H5F_addr_eq(file->eoa, file->last_eoa)) {
int64_t sf_eof;
int64_t eoa;
@@ -2274,29 +2077,6 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5
/* Update the 'last' eoa value */
file->last_eoa = file->eoa;
}
-#else
- if (!H5F_addr_eq(file->eoa, file->eof)) {
-
- /* Update the eof value */
- file->eof = file->eoa;
-
- /* Reset last file I/O information */
- file->pos = HADDR_UNDEF;
- file->op = OP_UNKNOWN;
-
- /* Update the 'last' eoa value */
- file->last_eoa = file->eoa;
- } /* end if */
-
- /* truncate sub-files */
- /* This is a hack. We should be doing the truncate of the sub-files via calls to
- * H5FD_truncate() with the IOC. However, that system is messed up at present.
- * thus the following hack.
- * JRM -- 12/18/21
- */
- if (H5FD__subfiling__truncate_sub_files(file->context_id, file->eof, file->comm) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTUPDATE, FAIL, "sub-file truncate request failed");
-#endif
done:
H5_SUBFILING_FUNC_LEAVE_API;
@@ -2325,7 +2105,6 @@ H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw)
HDassert(file);
- /* TODO: Consider lock only on IOC ranks for one IOC per subfile case */
if (file->fa.require_ioc) {
#ifdef VERBOSE
HDputs("Subfiling driver doesn't support file locking");