summaryrefslogtreecommitdiffstats
path: root/src/H5FDsubfile_int.c
diff options
context:
space:
mode:
authormainzer <mainzer#hdfgroup.org>2021-11-17 15:56:47 (GMT)
committermainzer <mainzer#hdfgroup.org>2021-11-17 15:56:47 (GMT)
commitffc254983fde7b409a633d11114bc5cdbd5d1999 (patch)
treefe34e6b60a779c69f3b64b6fea387674e26d9059 /src/H5FDsubfile_int.c
parent9975eaeb21744ea03c8d3a45a496c9c0594428d5 (diff)
downloadhdf5-ffc254983fde7b409a633d11114bc5cdbd5d1999.zip
hdf5-ffc254983fde7b409a633d11114bc5cdbd5d1999.tar.gz
hdf5-ffc254983fde7b409a633d11114bc5cdbd5d1999.tar.bz2
Interim checkin of selection_io_with_subfiling_vfd branch
Moddified testpar/t_vfd.c to test the subfiling vfd with default configuration. Must update this code to run with a variety of configurations -- most particularly multiple IO concentrators, and stripe depth small enough to test the other IO concentrators. testpar/t_vfd.c exposed a large number of race condidtions -- symtoms included: 1) Crashes (usually seg faults) 2) Heap corruption 3) Stack corruption 4) Double frees of heap space 5) Hangs 6) Out of order execution of I/O requests / violations of POSIX semantics 7) Swapped write requests Items 1 - 4 turned out to be primarily caused by file close issues -- specifically, the main I/O concentrator thread and its pool of worker threads were not being shut down properly on file close. Addressing this issue in combination with some other minor fixes seems to have addressed these issues. Items 5 & 6 appear to have been caused by issue of I/O requests to the thread pool in an order that did not maintain POSIX semantics. A rewrite of the I/O request dispatch code appears to have solved these issues. Item 7 seems to have been caused by multiple write requests from a given rank being read by the wrong worker thread. Code to issue "unique" tags for each write request via the ACK message appears to have cleaned this up. Note that the code is still in poor condtition. A partial list of known defects includes: a) Race condiditon on file close that allows superblock writes to arrive at the I/O concentrator after it has been shutdown. This defect is most evident when testpar/t_subfiling_vfd is run with 8 ranks. b) No error reporting from I/O concentrators -- must design and implement this. For now, mostly just asserts, which suggests that it should be run in debug mode. c) Much commented out and/or un-used code. d) Code orgnaization e) Build system with bits of Mercury is awkward -- think of shifting to pthreads with our own thread pool code. f) Need to add native support for vector and selection I/O to the subfiling VFD. g) Need to review, and posibly rework configuration code. h) Need to store subfile configuration data in a superblock extension message, and add code to use this data on file open. i) Test code is inadequate -- expect more issues as it is extended. In particular, there is no unit test code for the I/O request dispatch code. While I think it is correct at present, we need test code to verify this. Similarly, we need to test with multiple I/O concentrators and much smaller stripe depth. My actual code changes were limited to: src/H5FDioc.c src/H5FDioc_threads.c src/H5FDsubfile_int.c src/H5FDsubfile_mpi.c src/H5FDsubfiling.c src/H5FDsubfiling.h src/H5FDsubfiling_priv.h testpar/t_subfiling_vfd.c testpar/t_vfd.c I'm not sure what is going on with the deletions in src/mercury/src/util. Tested parallel/debug on Charis and Jelly
Diffstat (limited to 'src/H5FDsubfile_int.c')
-rw-r--r--src/H5FDsubfile_int.c41
1 files changed, 39 insertions, 2 deletions
diff --git a/src/H5FDsubfile_int.c b/src/H5FDsubfile_int.c
index 0d231fa..ecf706f 100644
--- a/src/H5FDsubfile_int.c
+++ b/src/H5FDsubfile_int.c
@@ -71,7 +71,7 @@ static stat_record_t subfiling_stats[TOTAL_STAT_COUNT];
#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total / (double)subfiling_stats[READ_WAIT].op_count)
#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total)
-#define SF_ALIGNMENT 8
+#define SF_ALIGNMENT 8
static void
maybe_initialize_statistics(void)
@@ -460,7 +460,9 @@ close__subfiles(subfiling_context_t *sf_context, uint64_t fid)
HDassert((sf_context != NULL));
t0 = MPI_Wtime();
-#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1
+/* TODO: can't use comm world here -- must use communicator set in the file open */
+//#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1
+#if 0 /* JRM */ /* Just use regular barrier */
MPI_Request b_req = MPI_REQUEST_NULL;
int mpi_status = MPI_Ibarrier(MPI_COMM_WORLD, &b_req);
if (mpi_status == MPI_SUCCESS) {
@@ -502,6 +504,7 @@ close__subfiles(subfiling_context_t *sf_context, uint64_t fid)
* currently open, we can shutdown the IO concentrator
* as part of the file close.
*/
+#if 0 /* JRM */ /* delete this if all goes well */
if (file_open_count == 1) {
/* Shutdown the main IOC thread */
H5FD_ioc_set_shutdown_flag(1);
@@ -514,9 +517,37 @@ close__subfiles(subfiling_context_t *sf_context, uint64_t fid)
t1 = t2;
t_main_exit = t2 - t1;
H5FD_ioc_finalize_threads();
+
+ t2 = MPI_Wtime();
+ }
+#else /* JRM */
+ if (file_open_count == 1) {
+
+ HDassert(0 == atomic_load(&sf_shutdown_flag));
+
+ /* Shutdown the main IOC thread */
+ atomic_init(&sf_shutdown_flag, 1);
+
+ /* Allow ioc_main to exit.*/
+ do {
+
+ usleep(20);
+
+ } while (0 != atomic_load(&sf_shutdown_flag));
+
+ t1 = MPI_Wtime();
+ H5FD_ioc_wait_thread_main();
+ t2 = MPI_Wtime();
+ t1 = t2;
+ t_main_exit = t2 - t1;
+
+ H5FD_ioc_take_down_thread_pool();
+
t2 = MPI_Wtime();
}
+#endif /* JRM */
+
t_finalize_threads = t2 - t1;
if ((subfile_fid = sf_context->sf_fid) > 0) {
@@ -654,8 +685,10 @@ sf_write_data(int fd, int64_t file_offset, void *data_buffer, int64_t data_size,
ssize_t written = 0;
while (bytes_remaining) {
if ((written = pwrite(fd, this_data, (size_t)bytes_remaining, file_offset)) < 0) {
+ int saved_errno = errno;
struct stat statbuf;
perror("pwrite failed!");
+ HDprintf("\nerrno = %d (%s)\n\n", saved_errno, strerror(saved_errno));
fstat(fd, &statbuf);
HDprintf("[ioc(%d) %s] pwrite(fd, data, bytes_remaining=%ld, "
"file_offset=%ld), fd=%d, st_size=%ld\n",
@@ -1502,7 +1535,11 @@ H5FD__open_subfiles(void *_config_info, uint64_t h5_file_id, int flags)
/* Ensure that the IOC service won't exit
* as we prepare to start up..
*/
+#if 0 /* JRM */ /* delete if all goes well */
H5FD_ioc_set_shutdown_flag(0);
+#else /* JRM */
+ atomic_init(&sf_shutdown_flag, 0);
+#endif /* JRM */
/* If we're actually using the IOCs, we will
* start the service threads on the identified