diff options
author | David Young <dyoung@hdfgroup.org> | 2020-07-15 21:47:07 (GMT) |
---|---|---|
committer | David Young <dyoung@hdfgroup.org> | 2020-07-15 21:47:07 (GMT) |
commit | c66f7d5b65c3cff222b52f3bbc58f655f59205c4 (patch) | |
tree | b33a0b822aabdd7cb20fe3e9fe68b7023a6da48d | |
parent | 0e9f3038d220d4781a0718332782673f5133a3ec (diff) | |
parent | c9d8f3e4f145bfdbdf72579075999ed0b84275c7 (diff) | |
download | hdf5-c66f7d5b65c3cff222b52f3bbc58f655f59205c4.zip hdf5-c66f7d5b65c3cff222b52f3bbc58f655f59205c4.tar.gz hdf5-c66f7d5b65c3cff222b52f3bbc58f655f59205c4.tar.bz2 |
Merge branch 'feature/vfd_swmr' into may-merge-mainzer-to-mine
-rw-r--r-- | src/H5FDvfd_swmr.c | 13 | ||||
-rw-r--r-- | src/H5FDvfd_swmr_private.h | 1 | ||||
-rw-r--r-- | src/H5Fvfd_swmr.c | 82 | ||||
-rw-r--r-- | src/H5PB.c | 42 | ||||
-rw-r--r-- | src/H5PBprivate.h | 1 | ||||
-rw-r--r-- | test/testvfdswmr.sh.in | 60 |
6 files changed, 140 insertions, 59 deletions
diff --git a/src/H5FDvfd_swmr.c b/src/H5FDvfd_swmr.c index fe977a3..a7b6d33 100644 --- a/src/H5FDvfd_swmr.c +++ b/src/H5FDvfd_swmr.c @@ -1296,20 +1296,13 @@ H5FD__vfd_swmr_index_deserialize(const H5FD_vfd_swmr_t *file, HGOTO_DONE(FALSE); /* If the index magic is incorrect, then assume that is a - * temporary error such as a "torn write." Try again. + * temporary error and try again. * * XXX XXX XXX * Under the new protocol, where the index is written in * one write(2), and the header is written in a distinct - * second write(2), it is reasonable to expect that the - * index-write is complete when the index-read occurs. - * So we should not read bad magic because we read a - * "torn" write. - * - * (I am not sure I believe any recent version of UNIX or - * Linux suffers from torn writes on local filesystems! - * Linux manual pages indicate that there was an issue, but - * it was fixed.) + * second write(2), and the header and index are read in + * the reverse order, the index magic usually will be intact. * * It is possible under the new protocol that we read * the header on tick `t`, then an arbitrary delay diff --git a/src/H5FDvfd_swmr_private.h b/src/H5FDvfd_swmr_private.h index c6f5a97..df57f24 100644 --- a/src/H5FDvfd_swmr_private.h +++ b/src/H5FDvfd_swmr_private.h @@ -87,6 +87,7 @@ H5_DLL herr_t H5F_vfd_swmr_reader_end_of_tick(struct H5F_t *f, bool); H5_DLL herr_t H5F_vfd_swmr_remove_entry_eot(struct H5F_t *f); H5_DLL herr_t H5F_vfd_swmr_insert_entry_eot(struct H5F_t *f); +H5_DLL void H5F_vfd_swmr_update_entry_eot(eot_queue_entry_t *); H5_DLL herr_t H5F_dump_eot_queue(void); #endif /* _H5FDvfd_swmr_private_H */ diff --git a/src/H5Fvfd_swmr.c b/src/H5Fvfd_swmr.c index dfbf05a..f6cbb24 100644 --- a/src/H5Fvfd_swmr.c +++ b/src/H5Fvfd_swmr.c @@ -1401,6 +1401,59 @@ done: } /* end H5F_vfd_swmr_reader_end_of_tick() */ +static void +insert_eot_entry(eot_queue_entry_t *entry_ptr) +{ + eot_queue_entry_t *prec_ptr; /* The predecessor entry on the EOT end of tick queue */ + + /* Find the insertion point for the entry on the EOT queue */ + TAILQ_FOREACH_REVERSE(prec_ptr, &eot_queue_g, eot_queue, link) { + if (timespeccmp(&prec_ptr->end_of_tick, &entry_ptr->end_of_tick, <=)) + break; + } + + hlog_fast(eotq, "%s: entry %p after %p file %p " + "tick %" PRIu64 " ending %jd.%09ld", __func__, + (void *)entry_ptr, (void *)prec_ptr, (void *)entry_ptr->vfd_swmr_file, + entry_ptr->tick_num, (intmax_t)entry_ptr->end_of_tick.tv_sec, + entry_ptr->end_of_tick.tv_nsec); + + /* Insert the entry onto the EOT queue */ + if (prec_ptr != NULL) + TAILQ_INSERT_AFTER(&eot_queue_g, prec_ptr, entry_ptr, link); + else + TAILQ_INSERT_HEAD(&eot_queue_g, entry_ptr, link); +} + + +/* Update an entry on the EOT queue and move it to its proper place. + */ +void +H5F_vfd_swmr_update_entry_eot(eot_queue_entry_t *entry) +{ + H5F_t *f = entry->vfd_swmr_file; + + /* Free the entry on the EOT queue that corresponds to f */ + + TAILQ_REMOVE(&eot_queue_g, entry, link); + + hlog_fast(eotq, "%s: updating entry %p file %p " + "tick %" PRIu64 " ending %jd.%09ld", __func__, + (void *)entry, (void *)entry->vfd_swmr_file, + entry->tick_num, (intmax_t)entry->end_of_tick.tv_sec, + entry->end_of_tick.tv_nsec); + + assert(entry->vfd_swmr_writer == f->shared->vfd_swmr_writer); + entry->tick_num = f->shared->tick_num; + entry->end_of_tick = f->shared->end_of_tick; + + hlog_fast(eotq, "%s: ... to tick %" PRIu64 " ending %jd.%09ld", __func__, + entry->tick_num, (intmax_t)entry->end_of_tick.tv_sec, + entry->end_of_tick.tv_nsec); + + insert_eot_entry(entry); +} + /*------------------------------------------------------------------------- * @@ -1460,7 +1513,6 @@ herr_t H5F_vfd_swmr_insert_entry_eot(H5F_t *f) { eot_queue_entry_t *entry_ptr; /* An entry on the EOT end of tick queue */ - eot_queue_entry_t *prec_ptr; /* The predecessor entry on the EOT end of tick queue */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) @@ -1475,23 +1527,7 @@ H5F_vfd_swmr_insert_entry_eot(H5F_t *f) entry_ptr->end_of_tick = f->shared->end_of_tick; entry_ptr->vfd_swmr_file = f; - /* Found the position to insert the entry on the EOT queue */ - TAILQ_FOREACH_REVERSE(prec_ptr, &eot_queue_g, eot_queue, link) { - if (timespeccmp(&prec_ptr->end_of_tick, &entry_ptr->end_of_tick, <=)) - break; - } - - hlog_fast(eotq, "%s: entry %p after %p file %p " - "tick %" PRIu64 " ending %jd.%09ld", __func__, - (void *)entry_ptr, (void *)prec_ptr, (void *)entry_ptr->vfd_swmr_file, - entry_ptr->tick_num, (intmax_t)entry_ptr->end_of_tick.tv_sec, - entry_ptr->end_of_tick.tv_nsec); - - /* Insert the entry onto the EOT queue */ - if (prec_ptr != NULL) - TAILQ_INSERT_AFTER(&eot_queue_g, prec_ptr, entry_ptr, link); - else - TAILQ_INSERT_HEAD(&eot_queue_g, entry_ptr, link); + insert_eot_entry(entry_ptr); done: FUNC_LEAVE_NOAPI(ret_value) @@ -2045,7 +2081,15 @@ H5F_vfd_swmr_process_eot_queue(bool entering_api) } if(timespeccmp(&now, &head->end_of_tick, <)) break; - if (f->shared->vfd_swmr_writer) { + /* If the H5F_shared_t is labeled with a later EOT time than + * the queue entry is, then we have already performed the + * H5F_shared_t's EOT processing. That can happen if + * multiple H5F_t share the H5F_shared_t. Just update the + * EOT queue entry and move to the next. + */ + if (timespeccmp(&head->end_of_tick, &f->shared->end_of_tick, <)) { + H5F_vfd_swmr_update_entry_eot(head); + } else if (f->shared->vfd_swmr_writer) { if (H5F_vfd_swmr_writer_end_of_tick(f, false) < 0) HGOTO_ERROR(H5E_FUNC, H5E_CANTSET, FAIL, "end of tick error for VFD SWMR writer"); @@ -117,6 +117,8 @@ static herr_t H5PB__write_raw(H5F_shared_t *, H5FD_mem_t, haddr_t, size_t, const void *); +static void H5PB_log_access_by_size_counts(const H5PB_t *); + /*********************/ /* Package Variables */ /*********************/ @@ -143,6 +145,7 @@ H5FL_DEFINE_STATIC(H5PB_entry_t); HLOG_OUTLET_DECL(pagebuffer); HLOG_OUTLET_SHORT_DEFN(pagebuffer, all); +HLOG_OUTLET_SHORT_DEFN(pb_access_sizes, pagebuffer); HLOG_OUTLET_SHORT_DEFN(pbflush, pagebuffer); HLOG_OUTLET_SHORT_DEFN(pbflush_entry, pbflush); HLOG_OUTLET_SHORT_DEFN(pbio, pagebuffer); @@ -744,6 +747,8 @@ H5PB_dest(H5F_shared_t *shared) pb_ptr = shared->pb_ptr; + H5PB_log_access_by_size_counts(pb_ptr); + HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC); /* the current implementation if very inefficient, and will @@ -943,6 +948,37 @@ done: } /* H5PB_page_exists */ +static void +H5PB_count_access_by_size(H5PB_t *pb, size_t size) +{ + const size_t nslots = NELMTS(pb->access_size_count); + size_t i, hi; + + for (hi = pb->page_size, i = 0; i < nslots - 1; i++, hi *= 2){ + if (size <= hi) + break; + } + pb->access_size_count[i]++; +} + +static void +H5PB_log_access_by_size_counts(const H5PB_t *pb) +{ + const size_t nslots = NELMTS(pb->access_size_count); + size_t i, lo, hi; + + for (lo = 0, hi = pb->page_size, i = 0; + i < nslots - 1; + i++, lo = hi + 1, hi *= 2) { + hlog_fast(pb_access_sizes, + "%p %16" PRIu64 " accesses %8zu - %8zu bytes long", + (const void *)pb, pb->access_size_count[i], lo, hi); + } + hlog_fast(pb_access_sizes, + "%p %16" PRIu64 " accesses %8zu - greater bytes long", + (const void *)pb, pb->access_size_count[i], lo); +} + /*------------------------------------------------------------------------- * @@ -1145,6 +1181,9 @@ H5PB_read(H5F_shared_t *shared, H5FD_mem_t type, haddr_t addr, size_t size, pb_ptr = shared->pb_ptr; + if (pb_ptr != NULL) + H5PB_count_access_by_size(pb_ptr, size); + if ( pb_ptr == NULL ) { bypass_pb = TRUE; /* case 1) -- page buffer is disabled */ @@ -2484,6 +2523,9 @@ H5PB_write(H5F_shared_t *shared, H5FD_mem_t type, haddr_t addr, size_t size, pb_ptr = shared->pb_ptr; + if (pb_ptr != NULL) + H5PB_count_access_by_size(pb_ptr, size); + if ( pb_ptr == NULL ) { bypass_pb = TRUE; /* case 1) -- page buffer is disabled */ diff --git a/src/H5PBprivate.h b/src/H5PBprivate.h index 32e681e..97de7ae 100644 --- a/src/H5PBprivate.h +++ b/src/H5PBprivate.h @@ -628,6 +628,7 @@ typedef struct H5PB_t { int64_t flushes[H5PB__NUM_STAT_TYPES]; int64_t evictions[H5PB__NUM_STAT_TYPES]; int64_t clears[H5PB__NUM_STAT_TYPES]; + uint64_t access_size_count[6]; int64_t max_lru_len; int64_t max_lru_size; int64_t lru_md_skips; diff --git a/test/testvfdswmr.sh.in b/test/testvfdswmr.sh.in index eeeff1d..42384c5 100644 --- a/test/testvfdswmr.sh.in +++ b/test/testvfdswmr.sh.in @@ -624,28 +624,28 @@ if [ ${do_groups:-no} = yes ]; then rm -f vfd_swmr_group_reader.*.{out,rc} fi -# -# Test many small datasets of one and two dimensions. -# -# Perform 50 iterations on 1000 extensible datasets configured with 16x16 -# chunks of 32-bit unsigned integer elements, -# expanding each dataset by a chunk in one dimension (up to 50x1 16x16 chunks) -# on each iteration. -# -# Perform the test again, extending each dataset -# in *two* dimensions (up to 50x50 16x16 chunks). -# -for dims in 1 2; do +for options in "-d 1" "-d 2" "-d 1 -V"; do if [ ${do_many_small:-no} = no ]; then continue fi - echo launch vfd_swmr_bigset_writer many small, $dims dimensions + # + # Test many small datasets of one and two dimensions. + # + # Perform 50 iterations on 1000 extensible datasets configured with + # 16x16 chunks of 32-bit unsigned integer elements, + # expanding each dataset by a chunk in one dimension (up to 50x1 + # 16x16 chunks) on each iteration. + # + # Perform the test again, extending each dataset + # in *two* dimensions (up to 50x50 16x16 chunks). + # + echo launch vfd_swmr_bigset_writer many small, options $options catch_out_err_and_rc vfd_swmr_bigset_writer \ - ../vfd_swmr_bigset_writer -n 50 -d $dims -s 1000 -r 16 -c 16 -q & + ../vfd_swmr_bigset_writer -n 50 $options -s 1000 -r 16 -c 16 -q & pid_writer=$! catch_out_err_and_rc vfd_swmr_bigset_reader \ - ../vfd_swmr_bigset_reader -n 50 -d $dims -s 1000 -r 16 -c 16 -q -W & + ../vfd_swmr_bigset_reader -n 50 $options -s 1000 -r 16 -c 16 -q -W & pid_reader=$! # Wait for the reader to finish before signalling the @@ -673,28 +673,28 @@ for dims in 1 2; do rm -f vfd_swmr_bigset_reader.*.{out,rc} done -# -# Test a few big datasets of one and two dimensions. -# -# Perform 50 iterations on 5 extensible datasets configured with 256x256 -# chunks of 32-bit unsigned integer elements, -# expanding each dataset by a chunk in one dimension (up to 50x1 256x256 chunks) -# on each iteration. -# -# Perform the test again, extending each dataset -# in *two* dimensions (up to 50x50 256x256 chunks). -# -for dims in 1 2; do +for options in "-d 1" "-d 2" "-d 1 -V"; do + # + # Test a few big datasets of one and two dimensions. + # + # Perform 50 iterations on 5 extensible datasets configured with + # 256x256 chunks of 32-bit unsigned integer elements, + # expanding each dataset by a chunk in one dimension (up to 50x1 + # 256x256 chunks) on each iteration. + # + # Perform the test again, extending each dataset + # in *two* dimensions (up to 50x50 256x256 chunks). + # if [ ${do_few_big:-no} = no ]; then continue fi - echo launch vfd_swmr_bigset_writer few big, $dims dimensions + echo launch vfd_swmr_bigset_writer few big, options $options catch_out_err_and_rc vfd_swmr_bigset_writer \ - ../vfd_swmr_bigset_writer -n 50 -d $dims -s 5 -r 256 -c 256 -q & + ../vfd_swmr_bigset_writer -n 50 $options -s 5 -r 256 -c 256 -q & pid_writer=$! catch_out_err_and_rc vfd_swmr_bigset_reader \ - ../vfd_swmr_bigset_reader -n 50 -d $dims -s 5 -r 256 -c 256 -q -W & + ../vfd_swmr_bigset_reader -n 50 $options -s 5 -r 256 -c 256 -q -W & pid_reader=$! # Wait for the reader to finish before signalling the |