diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2022-09-30 02:06:17 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-30 02:06:17 (GMT) |
commit | 79bdc6183ee96cc36bd04569d4cce48202a8ae68 (patch) | |
tree | 86f1719b0b908ff9d174501bb339da384d9df5a4 /src/H5FDsubfiling | |
parent | 1bec0ce2d785b5c0d7f4514fb50f44b524fe4f67 (diff) | |
download | hdf5-79bdc6183ee96cc36bd04569d4cce48202a8ae68.zip hdf5-79bdc6183ee96cc36bd04569d4cce48202a8ae68.tar.gz hdf5-79bdc6183ee96cc36bd04569d4cce48202a8ae68.tar.bz2 |
Subfiling testing fix and documentation (#2132)
* Fix a sporadic failure in Subfiling VFD tests
* Subfiling VFD - add note to H5Pget_fapl_subfiling documentation
Adds note about how H5Pget_fapl_subfiling only returns the original
settings on a FAPL and those settings could have been modified by the
Subfiling VFD's environment variables
Diffstat (limited to 'src/H5FDsubfiling')
-rw-r--r-- | src/H5FDsubfiling/H5FDioc_threads.c | 9 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDsubfile_int.c | 48 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDsubfiling.h | 10 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5subfiling_common.h | 1 |
4 files changed, 59 insertions, 9 deletions
diff --git a/src/H5FDsubfiling/H5FDioc_threads.c b/src/H5FDsubfiling/H5FDioc_threads.c index b3e8ebc..5bbecab 100644 --- a/src/H5FDsubfiling/H5FDioc_threads.c +++ b/src/H5FDsubfiling/H5FDioc_threads.c @@ -1157,6 +1157,7 @@ ioc_file_truncate(sf_work_request_t *msg) int64_t subfile_idx; int fd; int ioc_idx; + int mpi_code; int ret_value = 0; HDassert(msg); @@ -1181,6 +1182,14 @@ ioc_file_truncate(sf_work_request_t *msg) if (HDftruncate(fd, (off_t)length) != 0) H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SEEKERROR, -1, "HDftruncate failed"); + /* + * Send a completion message back to the source that + * requested the truncation operation + */ + if (MPI_SUCCESS != (mpi_code = MPI_Send(msg->header, 1, H5_subfiling_rpc_msg_type, msg->source, + TRUNC_COMPLETED, sf_context->sf_eof_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Send failed", mpi_code); + #ifdef H5FD_IOC_DEBUG HDprintf("[ioc(%d) %s]: truncated subfile to %lld bytes. ret = %d\n", ioc_idx, __func__, (long long)length, errno); diff --git a/src/H5FDsubfiling/H5FDsubfile_int.c b/src/H5FDsubfiling/H5FDsubfile_int.c index c089509..be71b3d 100644 --- a/src/H5FDsubfiling/H5FDsubfile_int.c +++ b/src/H5FDsubfiling/H5FDsubfile_int.c @@ -73,7 +73,9 @@ herr_t H5FD__subfiling__truncate_sub_files(hid_t context_id, int64_t logical_file_eof, MPI_Comm comm) { subfiling_context_t *sf_context = NULL; + MPI_Request *recv_reqs = NULL; int64_t msg[3] = {0}; + int64_t *recv_msgs = NULL; int mpi_size; int mpi_code; herr_t ret_value = SUCCEED; @@ -93,13 +95,35 @@ H5FD__subfiling__truncate_sub_files(hid_t context_id, int64_t logical_file_eof, int64_t num_full_stripes; int64_t num_leftover_stripes; int64_t partial_stripe_len; + int num_subfiles_owned; num_full_stripes = logical_file_eof / sf_context->sf_blocksize_per_stripe; partial_stripe_len = logical_file_eof % sf_context->sf_blocksize_per_stripe; num_leftover_stripes = partial_stripe_len / sf_context->sf_stripe_size; + num_subfiles_owned = sf_context->sf_num_fids; + + if (NULL == (recv_reqs = HDmalloc((size_t)num_subfiles_owned * sizeof(*recv_reqs)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "can't allocate receive requests array"); + if (NULL == (recv_msgs = HDmalloc((size_t)num_subfiles_owned * 3 * sizeof(*recv_msgs)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate message array"); + + /* + * Post early receives for messages from the IOC main + * thread that will signal completion of the truncate + * operation + */ + for (int i = 0; i < num_subfiles_owned; i++) { + if (MPI_SUCCESS != + (mpi_code = MPI_Irecv(&recv_msgs[3 * i], 1, H5_subfiling_rpc_msg_type, + sf_context->topology->io_concentrators[sf_context->topology->ioc_idx], + TRUNC_COMPLETED, sf_context->sf_eof_comm, &recv_reqs[i]))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Irecv failed", mpi_code); + } + /* Compute the EOF for each subfile this IOC owns */ - for (int i = 0; i < sf_context->sf_num_fids; i++) { + for (int i = 0; i < num_subfiles_owned; i++) { int64_t subfile_eof = num_full_stripes * sf_context->sf_stripe_size; int64_t global_subfile_idx; @@ -125,14 +149,18 @@ H5FD__subfiling__truncate_sub_files(hid_t context_id, int64_t logical_file_eof, H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Send failed", mpi_code); } - /* sanity check -- compute the file eof using the same mechanism used to - * compute the subfile eof. Assert that the computed value and the - * actual value match. - * - * Do this only for debug builds -- probably delete this before release. - * - * JRM -- 12/15/21 - */ + /* Wait for truncate operations to complete */ + if (MPI_SUCCESS != (mpi_code = MPI_Waitall(num_subfiles_owned, recv_reqs, MPI_STATUSES_IGNORE))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Waitall", mpi_code); + + /* sanity check -- compute the file eof using the same mechanism used to + * compute the subfile eof. Assert that the computed value and the + * actual value match. + * + * Do this only for debug builds -- probably delete this before release. + * + * JRM -- 12/15/21 + */ #ifndef NDEBUG { @@ -160,6 +188,8 @@ H5FD__subfiling__truncate_sub_files(hid_t context_id, int64_t logical_file_eof, H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); done: + HDfree(recv_msgs); + HDfree(recv_reqs); H5_SUBFILING_FUNC_LEAVE; } /* H5FD__subfiling__truncate_sub_files() */ diff --git a/src/H5FDsubfiling/H5FDsubfiling.h b/src/H5FDsubfiling/H5FDsubfiling.h index 93d0c3e..23dae62 100644 --- a/src/H5FDsubfiling/H5FDsubfiling.h +++ b/src/H5FDsubfiling/H5FDsubfiling.h @@ -359,6 +359,16 @@ H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t * the default values and then calling H5Pset_fapl_subfiling() with the configured * H5FD_subfiling_config_t structure. * + * \note H5Pget_fapl_subfiling() returns the #H5FD_SUBFILING driver properties as they + * were initially set for the File Access Property List using H5Pset_fapl_subfiling(). + * Alternatively, the driver properties can be modified at runtime according to values + * set for the #H5FD_SUBFILING_STRIPE_SIZE, #H5FD_SUBFILING_IOC_PER_NODE and + * #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variables. However, driver + * properties set through environment variables will not be reflected in what is + * returned by H5Pget_fapl_subfiling(), so an application may need to check those + * environment variables to get accurate values for the #H5FD_SUBFILING driver + * properties. + * * \since 1.13.2 * */ diff --git a/src/H5FDsubfiling/H5subfiling_common.h b/src/H5FDsubfiling/H5subfiling_common.h index ba6dfdc..d4eecee 100644 --- a/src/H5FDsubfiling/H5subfiling_common.h +++ b/src/H5FDsubfiling/H5subfiling_common.h @@ -107,6 +107,7 @@ #define WRITE_COLL (COLL_FUNC | WRITE_OP) #define GET_EOF_COMPLETED (COMPLETED | GET_EOF_OP) +#define TRUNC_COMPLETED (COMPLETED | TRUNC_OP) #define SET_LOGGING (LOGGING_OP) |