From 4f8917f9086088ac5cc7fa3c3deb400afbf33b68 Mon Sep 17 00:00:00 2001 From: mainzer Date: Thu, 3 Jan 2019 13:50:53 -0600 Subject: Interim checkin to allow Vailin to address assertion failure in the memory manager -- details shown below. Note that there are other issues as well -- this is not a working version. [mainzer@jelly test]$ ./vfd_swmr Testing Configure VFD SWMR with fapl PASSED Testing VFD SWMR configuration for the file and fapl PASSED Testing H5Fvfd_swmr_end_tick() for VFD SWMR PASSED Testing Create/Open/Flush an HDF5 file for VFD SWMR PASSED Testing Verify the metadata file for VFD SWMR writer vfd_swmr: H5MVsection.c:233: H5MV__sect_can_merge: Assertion `((sect1->sect_info.addr)!=((haddr_t)(long)(-1)) && (sect2->sect_info.addr)!=((haddr_t)(long)(-1)) && (sect1->sect_info.addr)<(sect2->sect_info.addr))' failed. Abort (core dumped) [mainzer@jelly test]$ --- src/H5AC.c | 65 +-- src/H5ACproxy_entry.c | 1 + src/H5B2cache.c | 3 + src/H5Bcache.c | 1 + src/H5C.c | 1044 +++++++++++++++++++++++++++++++++++++-------- src/H5Cepoch.c | 27 +- src/H5Cpkg.h | 211 ++++++--- src/H5Cprefetched.c | 1 + src/H5Cprivate.h | 231 ++++++++-- src/H5EAcache.c | 5 + src/H5FAcache.c | 3 + src/H5FDvfd_swmr.c | 286 ++++++++++--- src/H5FScache.c | 2 + src/H5Fint.c | 306 +++++++++++-- src/H5Fpkg.h | 10 + src/H5Fsuper_cache.c | 258 +++++++++++ src/H5Gcache.c | 1 + src/H5HFcache.c | 3 + src/H5HGcache.c | 1 + src/H5HLcache.c | 2 + src/H5Ocache.c | 2 + src/H5PB.c | 21 +- src/H5PBpkg.h | 10 +- test/cache.c | 13 +- test/cache_common.c | 31 +- test/cache_common.h | 2 +- test/vfd_swmr.c | 2 +- test/vfd_swmr_generator.c | 2 +- 28 files changed, 2099 insertions(+), 445 deletions(-) diff --git a/src/H5AC.c b/src/H5AC.c index 16bd30a..d7f5002 100644 --- a/src/H5AC.c +++ b/src/H5AC.c @@ -99,36 +99,41 @@ hbool_t H5_coll_api_sanity_check_g = false; */ static const H5AC_class_t *const H5AC_class_s[] = { - H5AC_BT, /* ( 0) B-tree nodes */ - H5AC_SNODE, /* ( 1) symbol table nodes */ - H5AC_LHEAP_PRFX, /* ( 2) local heap prefix */ - H5AC_LHEAP_DBLK, /* ( 3) local heap data block */ - H5AC_GHEAP, /* ( 4) global heap */ - H5AC_OHDR, /* ( 5) object header */ - H5AC_OHDR_CHK, /* ( 6) object header chunk */ - H5AC_BT2_HDR, /* ( 7) v2 B-tree header */ - H5AC_BT2_INT, /* ( 8) v2 B-tree internal node */ - H5AC_BT2_LEAF, /* ( 9) v2 B-tree leaf node */ - H5AC_FHEAP_HDR, /* (10) fractal heap header */ - H5AC_FHEAP_DBLOCK, /* (11) fractal heap direct block */ - H5AC_FHEAP_IBLOCK, /* (12) fractal heap indirect block */ - H5AC_FSPACE_HDR, /* (13) free space header */ - H5AC_FSPACE_SINFO, /* (14) free space sections */ - H5AC_SOHM_TABLE, /* (15) shared object header message master table */ - H5AC_SOHM_LIST, /* (16) shared message index stored as a list */ - H5AC_EARRAY_HDR, /* (17) extensible array header */ - H5AC_EARRAY_IBLOCK, /* (18) extensible array index block */ - H5AC_EARRAY_SBLOCK, /* (19) extensible array super block */ - H5AC_EARRAY_DBLOCK, /* (20) extensible array data block */ + H5AC_BT, /* ( 0) B-tree nodes */ + H5AC_SNODE, /* ( 1) symbol table nodes */ + H5AC_LHEAP_PRFX, /* ( 2) local heap prefix */ + H5AC_LHEAP_DBLK, /* ( 3) local heap data block */ + H5AC_GHEAP, /* ( 4) global heap */ + H5AC_OHDR, /* ( 5) object header */ + H5AC_OHDR_CHK, /* ( 6) object header chunk */ + H5AC_BT2_HDR, /* ( 7) v2 B-tree header */ + H5AC_BT2_INT, /* ( 8) v2 B-tree internal node */ + H5AC_BT2_LEAF, /* ( 9) v2 B-tree leaf node */ + H5AC_FHEAP_HDR, /* (10) fractal heap header */ + H5AC_FHEAP_DBLOCK, /* (11) fractal heap direct block */ + H5AC_FHEAP_IBLOCK, /* (12) fractal heap indirect block */ + H5AC_FSPACE_HDR, /* (13) free space header */ + H5AC_FSPACE_SINFO, /* (14) free space sections */ + H5AC_SOHM_TABLE, /* (15) shared object header message */ + /* master table */ + H5AC_SOHM_LIST, /* (16) shared message index stored as */ + /* a list */ + H5AC_EARRAY_HDR, /* (17) extensible array header */ + H5AC_EARRAY_IBLOCK, /* (18) extensible array index block */ + H5AC_EARRAY_SBLOCK, /* (19) extensible array super block */ + H5AC_EARRAY_DBLOCK, /* (20) extensible array data block */ H5AC_EARRAY_DBLK_PAGE, /* (21) extensible array data block page */ - H5AC_FARRAY_HDR, /* (22) fixed array header */ - H5AC_FARRAY_DBLOCK, /* (23) fixed array data block */ - H5AC_FARRAY_DBLK_PAGE, /* (24) fixed array data block page */ - H5AC_SUPERBLOCK, /* (25) file superblock */ - H5AC_DRVRINFO, /* (26) driver info block (supplements superblock) */ - H5AC_EPOCH_MARKER, /* (27) epoch marker - always internal to cache */ - H5AC_PROXY_ENTRY, /* (28) cache entry proxy */ - H5AC_PREFETCHED_ENTRY /* (29) prefetched entry - always internal to cache */ + H5AC_FARRAY_HDR, /* (22) fixed array header */ + H5AC_FARRAY_DBLOCK, /* (23) fixed array data block */ + H5AC_FARRAY_DBLK_PAGE, /* (24) fixed array data block page */ + H5AC_SUPERBLOCK, /* (25) file superblock */ + H5AC_DRVRINFO, /* (26) driver info block (supplements */ + /* superblock) */ + H5AC_EPOCH_MARKER, /* (27) epoch marker - always internal */ + /* to cache */ + H5AC_PROXY_ENTRY, /* (28) cache entry proxy */ + H5AC_PREFETCHED_ENTRY /* (29) prefetched entry - always */ + /* internal to cache */ }; @@ -1342,7 +1347,7 @@ H5AC_move_entry(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t ne HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "can't log moved entry") #endif /* H5_HAVE_PARALLEL */ - if(H5C_move_entry(f->shared->cache, type, old_addr, new_addr) < 0) + if(H5C_move_entry(f, type, old_addr, new_addr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, "H5C_move_entry() failed") #ifdef H5_HAVE_PARALLEL diff --git a/src/H5ACproxy_entry.c b/src/H5ACproxy_entry.c index 498d023..1302b83 100644 --- a/src/H5ACproxy_entry.c +++ b/src/H5ACproxy_entry.c @@ -82,6 +82,7 @@ const H5AC_class_t H5AC_PROXY_ENTRY[1] = {{ H5AC__proxy_entry_notify, /* 'notify' callback */ H5AC__proxy_entry_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5B2cache.c b/src/H5B2cache.c index 2e1d37b..8b89d5b 100644 --- a/src/H5B2cache.c +++ b/src/H5B2cache.c @@ -113,6 +113,7 @@ const H5AC_class_t H5AC_BT2_HDR[1] = {{ H5B2__cache_hdr_notify, /* 'notify' callback */ H5B2__cache_hdr_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5B2 inherits cache-like properties from H5AC */ @@ -131,6 +132,7 @@ const H5AC_class_t H5AC_BT2_INT[1] = {{ H5B2__cache_int_notify, /* 'notify' callback */ H5B2__cache_int_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5B2 inherits cache-like properties from H5AC */ @@ -149,6 +151,7 @@ const H5AC_class_t H5AC_BT2_LEAF[1] = {{ H5B2__cache_leaf_notify, /* 'notify' callback */ H5B2__cache_leaf_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5Bcache.c b/src/H5Bcache.c index a0a75c8..e3febe1 100644 --- a/src/H5Bcache.c +++ b/src/H5Bcache.c @@ -81,6 +81,7 @@ const H5AC_class_t H5AC_BT[1] = {{ NULL, /* 'notify' callback */ H5B__cache_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /*******************/ diff --git a/src/H5C.c b/src/H5C.c index 52c8dfa..bd2d2e2 100644 --- a/src/H5C.c +++ b/src/H5C.c @@ -322,14 +322,21 @@ H5C_create(size_t max_cache_size, cache_ptr->slist_ring_size[i] = (size_t)0; } /* end for */ - for(i = 0; i < H5C__HASH_TABLE_LEN; i++) + for(i = 0; i < H5C__HASH_TABLE_LEN; i++) { (cache_ptr->index)[i] = NULL; + } cache_ptr->il_len = 0; cache_ptr->il_size = (size_t)0; cache_ptr->il_head = NULL; cache_ptr->il_tail = NULL; + /* Fields supporting VFD SWMR */ + cache_ptr->vfd_swmr_reader = FALSE; + for(i = 0; i < H5C__PAGE_HASH_TABLE_LEN; i++) { + (cache_ptr->page_index)[i] = NULL; + } + /* Tagging Field Initializations */ cache_ptr->ignore_tags = FALSE; @@ -735,8 +742,9 @@ herr_t H5C_prep_for_file_close(H5F_t *f) { H5C_t * cache_ptr; - hbool_t image_generated = FALSE; /* Whether a cache image was generated */ - herr_t ret_value = SUCCEED; /* Return value */ + hbool_t image_generated = FALSE; /* Whether a cache image was */ + /* generated */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) @@ -914,6 +922,358 @@ done: /*------------------------------------------------------------------------- + * Function: H5C_evict_or_refresh_all_entries_in_page + * + * Purpose: When a file is opened in VFD SWMR reader mode, we must be + * able to ensure that the metadata cache contains no stale + * entries at the end of each tick. + * + * To do this, we must identify pages that have changed in + * the last tick, and either evict, or refresh all modified + * entries in the modified pages. If an evicted entry is + * needed subsequently, it must be reloaded, almost always + * from the metadata file. + * + * This function performs this function of a given page buffer + * page. + * + * This is done by mapping the supplied page to associated + * hash bucket in the page_index, and then scanning the + * contents of the bucket for entries residing in the + * target page. + * + * For each such entry, we test to see if it is pinned. + * If it is not, we simply evict it. + * + * Pinned entries may in turn be divided into tagged and + * un-tagged entries. + * + * For pinned tagged entries, it would be best if we could + * simply tell the associated cache client to refresh it. + * However, until we have that facility, we look up its tag, + * and evict all entries associated with that on disk object. + * + * For pinned, un-tagged entries (i.e. super block, global + * heaps, etc. we must instruct the client to refresh the + * entry. Fortunately, this is only necessary for the + * super block in the initial VFD SWMR implementation. + * + * Note that there is also the possibility that while the + * page was modified, one or more metadata entries in + * that page were not. Eventually we should write code + * to detect this -- but not for the prototype. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer -- 12/16/18 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_evict_or_refresh_all_entries_in_page(H5F_t * f, uint64_t page, + uint64_t tick) +{ + int i; + size_t image_len; + size_t original_image_len; + void * image_ptr = NULL; + void * new_image_ptr = NULL; + unsigned flush_flags = (H5C__FLUSH_INVALIDATE_FLAG | + H5C__FLUSH_CLEAR_ONLY_FLAG); + haddr_t tag; + H5PB_t * pb_ptr = NULL; + H5C_t * cache_ptr = NULL; + H5C_cache_entry_t * entry_ptr; + H5C_cache_entry_t * follow_ptr = NULL; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity check */ + HDassert(f); + HDassert(f->shared); + + cache_ptr = f->shared->cache; + + HDassert(cache_ptr); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + HDassert(cache_ptr->vfd_swmr_reader); + + /* since file must be opened R/O for a VFD SWMR reader, the skip + * list must be empty. Verify this. + */ + HDassert(cache_ptr->slist_len == 0); + + pb_ptr = f->shared->pb_ptr; + + HDassert(pb_ptr); + HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC); + + i = H5C__PI_HASH_FCN(page); + + entry_ptr = (cache_ptr->page_index)[i]; + + while (entry_ptr) { + + HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); + + if ( entry_ptr->page == page ) { + + HDassert(entry_ptr->addr >= (haddr_t)(page * pb_ptr->page_size)); + HDassert(entry_ptr->addr < (haddr_t)((page+1) * pb_ptr->page_size)); + + /* since end of tick occurs only on API call entry in + * the VFD SWMR reader case, the entry must not be protected. + * + * since the VFD SWMR reader must have opened the file R/O, + * the entry must be clean. + */ + HDassert(!(entry_ptr->is_protected)); + HDassert(!(entry_ptr->is_dirty)); + + /* we must evict the entry, as page has been modified, and + * thus the entry may be out of date. + * + * Note that we should eventually modify this code to be more + * intelligent, and only evict entries if they have in fact changed. + * However, no time for that in the first cut. + */ + if ( entry_ptr->is_pinned ) { + + if ( entry_ptr->tag_info ) { + + tag = entry_ptr->tag_info->tag; + + HDassert(!(entry_ptr->tag_info->corked)); + + /* passing TRUE for the match_global parameter. Look + * into this and verify that it is the right thing to + * do. + */ + if ( H5C_evict_tagged_entries(f, tag, TRUE) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, \ + "can't evict pinned and tagged entries") + + /* *entry_ptr should be evicted -- set entry_ptr to NULL */ + entry_ptr = NULL; + + } else if ( entry_ptr->type->refresh ) { + /* If the entry is pinned and not tagged, it is a global + * object such as the super block, persistant free + * space manager, or a global heap. + * + * In this case, we must either refresh the entry, + * or verify that it has not changed, and that no + * action is required. + * + * For the initial prototype, we avoid global heaps, + * and claim that we need not concern ourselves with + * free space managers, since the VFD SWMR reader opens + * the file R/O. + */ + + /* 1) Get the on disk size of the entry. Since the + * the entry is already loaded, we can use the + * size listed in the entry. + * + * This will almost always be correct, but we + * allow a second try as it is possible that the + * version of the entry may change on the writer. + */ + image_len = entry_ptr->size; + original_image_len = image_len; + + /* 2) Allocate and read the buffer. + * + * Note that this will be satisfied from the metadata + * file via the VFD SWMR reade VFD. + * + * For this reason, we don't nead to check for reads + * past the EOA. Torn reads and checksums are also + * not an issue, since pages in the metadata file + * are checksumed and re-tried if necessary in the + * VFD SWMR reader VFD. + */ + if ( NULL == (image_ptr = (uint8_t *) + H5MM_malloc(image_len + H5C_IMAGE_EXTRA_SPACE)) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for image buffer") + +#if H5C_DO_MEMORY_SANITY_CHECKS + HDmemcpy(image_ptr + image_len, H5C_IMAGE_SANITY_VALUE, + H5C_IMAGE_EXTRA_SPACE); +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + + if ( H5F_block_read(f, entry_ptr->type->mem_type, + entry_ptr->addr, + image_len, image_ptr) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_READERROR, FAIL, \ + "Can't read image (1)") + + /* 3) Call the refresh callback. If it doesn't + * request a different image size, goto 6) + */ + if ( entry_ptr->type->refresh(f, (void *)entry_ptr, + image_ptr, &image_len) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, FAIL, \ + "Can't refresh entry (1)") + + if ( image_len != original_image_len ) { + + /* 4) If image_len has changed, re-allocate and re-read + * the image. + * + * Note: Generate a log entry in this case + */ + + if ( NULL == (new_image_ptr = H5MM_realloc(image_ptr, + image_len + H5C_IMAGE_EXTRA_SPACE)) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, \ + "re-alloc of image buffer failed.") + + image_ptr = new_image_ptr; + +#if H5C_DO_MEMORY_SANITY_CHECKS + HDmemcpy(image_ptr + image_len, H5C_IMAGE_SANITY_VALUE, + H5C_IMAGE_EXTRA_SPACE); +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + + if ( H5F_block_read(f, entry_ptr->type->mem_type, + entry_ptr->addr, + image_len, image_ptr) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_READERROR, FAIL, \ + "Can't read image (2)") + + /* 5) Call the refresh callback again. Requesting + * a different buffer size again is an error. + */ + original_image_len = image_len; + if ( entry_ptr->type->refresh(f, (void *)entry_ptr, + image_ptr, + &image_len) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, FAIL, \ + "Can't refresh entry (2)") + + if ( image_len != original_image_len ) + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "2nd refresh call changed image_len.") + } + + /* 6) Mark the entry as having been looked at this + * this tick to accooadate later sanity chackes. + */ + entry_ptr->refreshed_in_tick = tick; + + /* 7) Free the old image if it exists, and replace + * it with the new image. + */ + if ( entry_ptr->image_ptr ) { + + entry_ptr->image_ptr = H5MM_xfree(entry_ptr->image_ptr); + } + entry_ptr->image_ptr = image_ptr; + + /* 8) Since *entry_ptr has been refreshed and not + * evicted, we can leave entry_ptr defined, and + * and continue the scan of the bucket from + * that point. + */ + + } else { + + /* The entry is pinned, is not tagged, and has no + * callback. + * + * This should be un-reachable. If it is reached, we + * probably have another refresh callback to write. + */ + HDassert(FALSE); + } + } else { /* simply evict the entry */ + + /* since the entry is clean, it must not be on the + * skip list -- thus no need for the + * H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG. + */ + if ( H5C__flush_single_entry(f, entry_ptr, flush_flags) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, \ + "can't evict unpinned entry") + + /* *entry_ptr should be evicted -- set entry_ptr to NULL */ + entry_ptr = NULL; + } + + /* If entry_ptr is NULL, it was evicted, and we must continue + * the scan from follow_ptr, or start at the head of the + * bucket list it follow_ptr is NULL as well. + * + * If follow_ptr isn't NULL, set entry_ptr to follow_ptr->pi_next. + * Otherwise, set entry_ptr to point to the first item in the hash + * bucket. + */ + if ( entry_ptr ) { + + /* *entry_ptr was refreshed, not evicted. Continue the + * the scan from that point, and update follow_ptr. + */ + follow_ptr = entry_ptr; + entry_ptr = entry_ptr->pi_next; + + } else if ( follow_ptr ) { + + /* *entry_ptr was evicted. Since follow_ptr is not NULL, + * we can continue the scan from that point. + */ + entry_ptr = follow_ptr->pi_next; + + } else { + + /* follow_ptr is null as well, so we have to re-start + * the scan from the head of the page index bucket list. + */ + + entry_ptr = (cache_ptr->page_index)[i]; + } + } else { + + /* entry belongs to another page -- skip it and go on. */ + follow_ptr = entry_ptr; + entry_ptr = entry_ptr->pi_next; + } + } /* end while */ + + /* at this point, all entries residing in the target page should have + * been either evicted or refreshed -- verify this. + */ + entry_ptr = (cache_ptr->page_index)[i]; + + while (entry_ptr) { + + HDassert((entry_ptr->page != page) || + (entry_ptr->refreshed_in_tick == tick));; + + entry_ptr = entry_ptr->pi_next; + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_evict_or_refresh_all_entries_in_page() */ + + +/*------------------------------------------------------------------------- * Function: H5C_expunge_entry * * Purpose: Use this function to tell the cache to expunge an entry @@ -932,7 +1292,8 @@ H5C_expunge_entry(H5F_t *f, const H5C_class_t *type, haddr_t addr, unsigned flag { H5C_t * cache_ptr; H5C_cache_entry_t * entry_ptr = NULL; - unsigned flush_flags = (H5C__FLUSH_INVALIDATE_FLAG | H5C__FLUSH_CLEAR_ONLY_FLAG); + unsigned flush_flags = (H5C__FLUSH_INVALIDATE_FLAG | + H5C__FLUSH_CLEAR_ONLY_FLAG); herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) @@ -1246,7 +1607,8 @@ H5C_insert_entry(H5F_t * f, hbool_t insert_pinned; hbool_t flush_last; #ifdef H5_HAVE_PARALLEL - hbool_t coll_access = FALSE; /* whether access to the cache entry is done collectively */ + hbool_t coll_access = FALSE; /* whether access to the cache */ + /* entry is done collectively */ #endif /* H5_HAVE_PARALLEL */ hbool_t set_flush_marker; hbool_t write_permitted = TRUE; @@ -1265,6 +1627,14 @@ H5C_insert_entry(H5F_t * f, HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + /* if this is a VFD SWMR reader, verify that the page buffer is + * configured. + */ + HDassert( ( ! cache_ptr->vfd_swmr_reader ) || + ( ( f->shared->pb_ptr ) && + ( f->shared->pb_ptr->magic == H5PB__H5PB_T_MAGIC ) ) ); + HDassert( type ); HDassert( type->mem_type == cache_ptr->class_table_ptr[type->id]->mem_type ); HDassert( type->image_len ); @@ -1297,9 +1667,13 @@ H5C_insert_entry(H5F_t * f, if(test_entry_ptr != NULL) { if(test_entry_ptr == entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "entry already in cache") + else - HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "duplicate entry in cache") + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ + "duplicate entry in cache") } /* end if */ entry_ptr->magic = H5C__H5C_CACHE_ENTRY_T_MAGIC; @@ -1369,25 +1743,39 @@ H5C_insert_entry(H5F_t * f, #endif /* H5_HAVE_PARALLEL */ /* initialize cache image related fields */ - entry_ptr->include_in_image = FALSE; - entry_ptr->lru_rank = 0; - entry_ptr->image_dirty = FALSE; - entry_ptr->fd_parent_count = 0; - entry_ptr->fd_parent_addrs = NULL; - entry_ptr->fd_child_count = 0; - entry_ptr->fd_dirty_child_count = 0; - entry_ptr->image_fd_height = 0; - entry_ptr->prefetched = FALSE; - entry_ptr->prefetch_type_id = 0; - entry_ptr->age = 0; - entry_ptr->prefetched_dirty = FALSE; + entry_ptr->include_in_image = FALSE; + entry_ptr->lru_rank = 0; + entry_ptr->image_dirty = FALSE; + entry_ptr->fd_parent_count = 0; + entry_ptr->fd_parent_addrs = NULL; + entry_ptr->fd_child_count = 0; + entry_ptr->fd_dirty_child_count = 0; + entry_ptr->image_fd_height = 0; + entry_ptr->prefetched = FALSE; + entry_ptr->prefetch_type_id = 0; + entry_ptr->age = 0; + entry_ptr->prefetched_dirty = FALSE; #ifndef NDEBUG /* debugging field */ - entry_ptr->serialization_count = 0; + entry_ptr->serialization_count = 0; #endif /* NDEBUG */ - entry_ptr->tl_next = NULL; - entry_ptr->tl_prev = NULL; - entry_ptr->tag_info = NULL; + /* initialize tag list fields */ + entry_ptr->tl_next = NULL; + entry_ptr->tl_prev = NULL; + entry_ptr->tag_info = NULL; + + /* initialize fields supporting VFD SWMR */ + if ( cache_ptr->vfd_swmr_reader ) { + + entry_ptr->page = (addr / f->shared->pb_ptr->page_size); + + } else { + + entry_ptr->page = 0; + } + entry_ptr->refreshed_in_tick = 0; + entry_ptr->pi_next = NULL; + entry_ptr->pi_prev = NULL; /* Apply tag to newly inserted entry */ if(H5C__tag_entry(cache_ptr, entry_ptr) < 0) @@ -1396,36 +1784,60 @@ H5C_insert_entry(H5F_t * f, H5C__RESET_CACHE_ENTRY_STATS(entry_ptr) - if(cache_ptr->flash_size_increase_possible && - (entry_ptr->size > cache_ptr->flash_size_increase_threshold)) - if(H5C__flash_increase_cache_size(cache_ptr, 0, entry_ptr->size) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5C__flash_increase_cache_size failed") + if ( cache_ptr->flash_size_increase_possible && + ( entry_ptr->size > cache_ptr->flash_size_increase_threshold ) ) { + + if ( H5C__flash_increase_cache_size(cache_ptr, 0, entry_ptr->size) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ + "H5C__flash_increase_cache_size failed") + } + + if(cache_ptr->index_size >= cache_ptr->max_cache_size) { - if(cache_ptr->index_size >= cache_ptr->max_cache_size) empty_space = 0; - else + + } else { + empty_space = cache_ptr->max_cache_size - cache_ptr->index_size; + } - if(cache_ptr->evictions_enabled && - (((cache_ptr->index_size + entry_ptr->size) > cache_ptr->max_cache_size) + if ( ( cache_ptr->evictions_enabled ) && + ( ( (cache_ptr->index_size + entry_ptr->size) > + cache_ptr->max_cache_size + ) || - (((empty_space + cache_ptr->clean_index_size) < cache_ptr->min_clean_size)))) { + ( (empty_space + cache_ptr->clean_index_size) < + cache_ptr->min_clean_size + ) + ) + ) { size_t space_needed; - if(empty_space <= entry_ptr->size) + if ( empty_space <= entry_ptr->size ) { + cache_ptr->cache_full = TRUE; + } + + if ( cache_ptr->check_write_permitted != NULL ) { + + if ( ( cache_ptr->check_write_permitted)(f, &write_permitted) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ + "Can't get write_permitted") + + } else { - if(cache_ptr->check_write_permitted != NULL) { - if((cache_ptr->check_write_permitted)(f, &write_permitted) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "Can't get write_permitted") - } /* end if */ - else write_permitted = cache_ptr->write_permitted; + } HDassert(entry_ptr->size <= H5C_MAX_ENTRY_SIZE); space_needed = entry_ptr->size; - if(space_needed > cache_ptr->max_cache_size) + + if ( space_needed > cache_ptr->max_cache_size ) { + space_needed = cache_ptr->max_cache_size; + } /* Note that space_needed is just the amount of space that * needed to insert the new entry without exceeding the cache @@ -1452,8 +1864,10 @@ H5C_insert_entry(H5F_t * f, * no point in worrying about the third. */ - if(H5C__make_space_in_cache(f, space_needed, write_permitted) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5C__make_space_in_cache failed") + if ( H5C__make_space_in_cache(f, space_needed, write_permitted) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ + "H5C__make_space_in_cache failed") } /* end if */ H5C__INSERT_IN_INDEX(cache_ptr, entry_ptr, FAIL) @@ -1468,42 +1882,63 @@ H5C_insert_entry(H5F_t * f, if((H5C_validate_protected_entry_list(cache_ptr) < 0) || (H5C_validate_pinned_entry_list(cache_ptr) < 0) || (H5C_validate_lru_list(cache_ptr) < 0)) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed just before done") + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "an extreme sanity check failed just before done") #endif /* H5C_DO_EXTREME_SANITY_CHECKS */ /* If the entry's type has a 'notify' callback send a 'after insertion' * notice now that the entry is fully integrated into the cache. */ - if(entry_ptr->type->notify && - (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_AFTER_INSERT, entry_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, "can't notify client about entry inserted into cache") + if ( ( entry_ptr->type->notify ) && + ( (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_AFTER_INSERT, + entry_ptr) < 0 ) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, \ + "can't notify client about entry inserted into cache") H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr) #ifdef H5_HAVE_PARALLEL if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { + coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE); - /* If not explicitly disabled, get the cmdr setting from the API context */ - if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read) + /* If not explicitly disabled, get the cmdr setting from the + * API context + */ + if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read) { + coll_access = H5CX_get_coll_metadata_read(); + } } /* end if */ entry_ptr->coll_access = coll_access; + if(coll_access) { H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, FAIL) - /* Make sure the size of the collective entries in the cache remain in check */ + /* Make sure the size of the collective entries in the cache + * remain in check + */ if(H5P_USER_TRUE == f->coll_md_read) { - if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) { + + if ( cache_ptr->max_cache_size * 80 < + cache_ptr->coll_list_size * 100) { + if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries") + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "can't clear collective metadata entries") } /* end if */ } /* end if */ else { - if(cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100) { + if ( cache_ptr->max_cache_size * 40 < + cache_ptr->coll_list_size * 100) { + if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries") + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "can't clear collective metadata entries") } /* end if */ } /* end else */ } /* end if */ @@ -1514,14 +1949,17 @@ done: if((H5C_validate_protected_entry_list(cache_ptr) < 0) || (H5C_validate_pinned_entry_list(cache_ptr) < 0) || (H5C_validate_lru_list(cache_ptr) < 0)) - HDONE_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed on exit") + HDONE_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "an extreme sanity check failed on exit") #endif /* H5C_DO_EXTREME_SANITY_CHECKS */ if(ret_value < 0 && entry_tagged) if(H5C__untag_entry(cache_ptr, entry_ptr) < 0) - HDONE_ERROR(H5E_CACHE, H5E_CANTREMOVE, FAIL, "can't remove entry from tag list") + HDONE_ERROR(H5E_CACHE, H5E_CANTREMOVE, FAIL, \ + "can't remove entry from tag list") FUNC_LEAVE_NOAPI(ret_value) + } /* H5C_insert_entry() */ @@ -1817,39 +2255,68 @@ done: * Programmer: John Mainzer * 6/2/04 * + * Changes: Added code to update cache entry page field required + * by VFD SWMR. To do this, we need the page size used + * by the page buffer. The simple way to do this was to + * replace the cache_ptr parameter with a pointer to the + * H5F_t, and then lookup the cache_ptr and pb_ptr as + * required. + * JRM -- 12/13/18 + * *------------------------------------------------------------------------- */ herr_t -H5C_move_entry(H5C_t * cache_ptr, - const H5C_class_t * type, - haddr_t old_addr, - haddr_t new_addr) +H5C_move_entry(H5F_t * f, + const H5C_class_t * type, + haddr_t old_addr, + haddr_t new_addr) { + H5C_t * cache_ptr = NULL; H5C_cache_entry_t * entry_ptr = NULL; H5C_cache_entry_t * test_entry_ptr = NULL; - herr_t ret_value = SUCCEED; /* Return value */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) + /* sanity checks */ + HDassert(f); + HDassert(f->shared); + + cache_ptr = f->shared->cache; + HDassert(cache_ptr); HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + + /* if this is a VFD SWMR reader, verify that the page buffer is + * configured. + */ + HDassert(( ! cache_ptr->vfd_swmr_reader ) || + ( ( f->shared->pb_ptr ) && + ( f->shared->pb_ptr->magic == H5PB__H5PB_T_MAGIC ) )); + + + HDassert(type); HDassert(H5F_addr_defined(old_addr)); HDassert(H5F_addr_defined(new_addr)); HDassert(H5F_addr_ne(old_addr, new_addr)); #if H5C_DO_EXTREME_SANITY_CHECKS - if((H5C_validate_protected_entry_list(cache_ptr) < 0) || - (H5C_validate_pinned_entry_list(cache_ptr) < 0) || - (H5C_validate_lru_list(cache_ptr) < 0)) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed on entry") + if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_lru_list(cache_ptr) < 0 ) ) + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "an extreme sanity check failed on entry") #endif /* H5C_DO_EXTREME_SANITY_CHECKS */ H5C__SEARCH_INDEX(cache_ptr, old_addr, entry_ptr, FAIL) - if(entry_ptr == NULL || entry_ptr->type != type) + if ( ( entry_ptr == NULL ) || ( entry_ptr->type != type ) ) { + /* the old item doesn't exist in the cache, so we are done. */ HGOTO_DONE(SUCCEED) + } HDassert(entry_ptr->addr == old_addr); HDassert(entry_ptr->type == type); @@ -1858,16 +2325,21 @@ H5C_move_entry(H5C_t * cache_ptr, /* (Moving a R/O entry would mark it dirty, which shouldn't * happen. QAK - 2016/12/02) */ - if(entry_ptr->is_read_only) + if ( entry_ptr->is_read_only ) HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, "can't move R/O entry") H5C__SEARCH_INDEX(cache_ptr, new_addr, test_entry_ptr, FAIL) - if(test_entry_ptr != NULL) { /* we are hosed */ - if(test_entry_ptr->type == type) - HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, "target already moved & reinserted???") + if ( test_entry_ptr != NULL ) { /* we are hosed */ + + if ( test_entry_ptr->type == type ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, \ + "target already moved & reinserted???") else - HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, "new address already in use?") + + HGOTO_ERROR(H5E_CACHE, H5E_CANTMOVE, FAIL, \ + "new address already in use?") } /* end if */ /* If we get this far we have work to do. Remove *entry_ptr from @@ -1885,10 +2357,12 @@ H5C_move_entry(H5C_t * cache_ptr, * change the addr. If the entry is only in the process of being flushed, * don't mark it as dirty either, lest we confuse the flush call back. */ - if(!entry_ptr->destroy_in_progress) { + if ( ! entry_ptr->destroy_in_progress ) { + H5C__DELETE_FROM_INDEX(cache_ptr, entry_ptr, FAIL) - if(entry_ptr->in_slist) { + if ( entry_ptr->in_slist ) { + HDassert(cache_ptr->slist_ptr); H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr, FALSE) } /* end if */ @@ -1896,8 +2370,18 @@ H5C_move_entry(H5C_t * cache_ptr, entry_ptr->addr = new_addr; - if(!entry_ptr->destroy_in_progress) { - hbool_t was_dirty; /* Whether the entry was previously dirty */ + /* update the page in which the entry resides if the file is opened + * as a VFD SWMR reader. + */ + if ( cache_ptr->vfd_swmr_reader ) { + + entry_ptr->page = (new_addr / f->shared->pb_ptr->page_size); + + } + + if ( ! entry_ptr->destroy_in_progress ) { + + hbool_t was_dirty; /* Whether the entry was previously dirty */ /* Remember previous dirty status */ was_dirty = entry_ptr->is_dirty; @@ -1906,11 +2390,17 @@ H5C_move_entry(H5C_t * cache_ptr, entry_ptr->is_dirty = TRUE; /* This shouldn't be needed, but it keeps the test code happy */ - if(entry_ptr->image_up_to_date) { + if ( entry_ptr->image_up_to_date ) { + entry_ptr->image_up_to_date = FALSE; - if(entry_ptr->flush_dep_nparents > 0) - if(H5C__mark_flush_dep_unserialized(entry_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, "Can't propagate serialization status to fd parents") + + if ( entry_ptr->flush_dep_nparents > 0 ) { + + if ( H5C__mark_flush_dep_unserialized(entry_ptr) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, \ + "Can't propagate serialization status to fd parents") + } } /* end if */ /* Modify cache data structures */ @@ -1918,23 +2408,35 @@ H5C_move_entry(H5C_t * cache_ptr, H5C__INSERT_ENTRY_IN_SLIST(cache_ptr, entry_ptr, FAIL) /* Skip some actions if we're in the middle of flushing the entry */ - if(!entry_ptr->flush_in_progress) { + if ( !entry_ptr->flush_in_progress ) { + /* Update the replacement policy for the entry */ H5C__UPDATE_RP_FOR_MOVE(cache_ptr, entry_ptr, was_dirty, FAIL) /* Check for entry changing status and do notifications, etc. */ if(!was_dirty) { - /* If the entry's type has a 'notify' callback send a 'entry dirtied' - * notice now that the entry is fully integrated into the cache. + + /* If the entry's type has a 'notify' callback send a 'entry + * dirtied' notice now that the entry is fully integrated + * into the cache. */ - if(entry_ptr->type->notify && - (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_ENTRY_DIRTIED, entry_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, "can't notify client about entry dirty flag set") + if ( ( entry_ptr->type->notify ) && + ( (entry_ptr->type->notify) + (H5C_NOTIFY_ACTION_ENTRY_DIRTIED, entry_ptr) < 0 ) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, \ + "can't notify client about entry dirty flag set") + + /* Propagate the dirty flag up the flush dependency chain + * if appropriate + */ + if ( entry_ptr->flush_dep_nparents > 0 ) { + + if ( H5C__mark_flush_dep_dirty(entry_ptr) < 0 ) - /* Propagate the dirty flag up the flush dependency chain if appropriate */ - if(entry_ptr->flush_dep_nparents > 0) - if(H5C__mark_flush_dep_dirty(entry_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTMARKDIRTY, FAIL, "Can't propagate flush dep dirty flag") + HGOTO_ERROR(H5E_CACHE, H5E_CANTMARKDIRTY, FAIL, \ + "Can't propagate flush dep dirty flag") + } } /* end if */ } /* end if */ } /* end if */ @@ -1942,14 +2444,18 @@ H5C_move_entry(H5C_t * cache_ptr, H5C__UPDATE_STATS_FOR_MOVE(cache_ptr, entry_ptr) done: + #if H5C_DO_EXTREME_SANITY_CHECKS - if((H5C_validate_protected_entry_list(cache_ptr) < 0) || - (H5C_validate_pinned_entry_list(cache_ptr) < 0) || - (H5C_validate_lru_list(cache_ptr) < 0)) - HDONE_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "an extreme sanity check failed on exit") + if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || + ( H5C_validate_lru_list(cache_ptr) < 0 ) ) + + HDONE_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "an extreme sanity check failed on exit") #endif /* H5C_DO_EXTREME_SANITY_CHECKS */ FUNC_LEAVE_NOAPI(ret_value) + } /* H5C_move_entry() */ @@ -6522,17 +7028,19 @@ H5C_load_entry(H5F_t * f, haddr_t addr, void * udata) { - hbool_t dirty = FALSE; /* Flag indicating whether thing was dirtied during deserialize */ - uint8_t * image = NULL; /* Buffer for disk image */ - void * thing = NULL; /* Pointer to thing loaded */ - H5C_cache_entry_t *entry = NULL; /* Alias for thing loaded, as cache entry */ - size_t len; /* Size of image in file */ + hbool_t dirty = FALSE; /* Flag indicating whether thing */ + /* was dirtied during deserialize */ + uint8_t * image = NULL; /* Buffer for disk image */ + void * thing = NULL; /* Pointer to thing loaded */ + H5C_cache_entry_t *entry = NULL; /* Alias for thing loaded, as */ + /* cache entry */ + size_t len; /* Size of image in file */ #ifdef H5_HAVE_PARALLEL - int mpi_rank = 0; /* MPI process rank */ - MPI_Comm comm = MPI_COMM_NULL; /* File MPI Communicator */ - int mpi_code; /* MPI error code */ + int mpi_rank = 0; /* MPI process rank */ + MPI_Comm comm = MPI_COMM_NULL; /* File MPI Communicator */ + int mpi_code; /* MPI error code */ #endif /* H5_HAVE_PARALLEL */ - void * ret_value = NULL; /* Return value */ + void * ret_value = NULL; /* Return value */ FUNC_ENTER_NOAPI_NOINIT @@ -6540,13 +7048,28 @@ H5C_load_entry(H5F_t * f, HDassert(f); HDassert(f->shared); HDassert(f->shared->cache); + HDassert(f->shared->cache->magic == H5C__H5C_T_MAGIC ); + + /* if this is a VFD SWMR reader, verify that the page buffer is + * configured. + */ + HDassert( ( ! f->shared->cache->vfd_swmr_reader ) || + ( ( f->shared->pb_ptr ) && + ( f->shared->pb_ptr->magic == H5PB__H5PB_T_MAGIC ) ) ); + HDassert(type); HDassert(H5F_addr_defined(addr)); HDassert(type->get_initial_load_size); - if(type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) + + if ( type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG ) { + HDassert(type->get_final_load_size); - else + + } else { + HDassert(NULL == type->get_final_load_size); + } + HDassert(type->deserialize); /* Can't see how skip reads could be usefully combined with @@ -6555,41 +7078,61 @@ H5C_load_entry(H5F_t * f, HDassert(!((type->flags & H5C__CLASS_SKIP_READS) && (type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG))); - /* Call the get_initial_load_size callback, to retrieve the initial size of image */ - if(type->get_initial_load_size(udata, &len) < 0) + /* Call the get_initial_load_size callback, to retrieve the initial + * size of image + */ + if ( type->get_initial_load_size(udata, &len) < 0 ) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "can't retrieve image size") + HDassert(len > 0); /* Check for possible speculative read off the end of the file */ - if(type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) - if(H5C__verify_len_eoa(f, type, addr, &len, FALSE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "invalid len with respect to EOA") + if ( type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG ) { + + if ( H5C__verify_len_eoa(f, type, addr, &len, FALSE) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, \ + "invalid len with respect to EOA") + } /* Allocate the buffer for reading the on-disk entry image */ - if(NULL == (image = (uint8_t *)H5MM_malloc(len + H5C_IMAGE_EXTRA_SPACE))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "memory allocation failed for on disk image buffer") + if ( NULL == (image = (uint8_t *)H5MM_malloc(len + H5C_IMAGE_EXTRA_SPACE)) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, \ + "memory allocation failed for on disk image buffer") + #if H5C_DO_MEMORY_SANITY_CHECKS HDmemcpy(image + len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); #endif /* H5C_DO_MEMORY_SANITY_CHECKS */ #ifdef H5_HAVE_PARALLEL - if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { - if((mpi_rank = H5F_mpi_get_rank(f)) < 0) + if ( H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) ) { + + if ( (mpi_rank = H5F_mpi_get_rank(f)) < 0 ) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "Can't get MPI rank") - if((comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL) + + if ( (comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL ) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "get_comm request failed") + } /* end if */ #endif /* H5_HAVE_PARALLEL */ /* Get the on-disk entry image */ - if(0 == (type->flags & H5C__CLASS_SKIP_READS)) { - unsigned tries, max_tries; /* The # of read attempts */ - unsigned retries; /* The # of retries */ - htri_t chk_ret; /* return from verify_chksum callback */ - size_t actual_len = len; /* The actual length, after speculative reads have been resolved */ - uint64_t nanosec = 1; /* # of nanoseconds to sleep between retries */ - void *new_image; /* Pointer to image */ - hbool_t len_changed = TRUE; /* Whether to re-check speculative entries */ + if ( 0 == (type->flags & H5C__CLASS_SKIP_READS) ) { + + unsigned tries, max_tries; /* The # of read attempts */ + unsigned retries; /* The # of retries */ + htri_t chk_ret; /* return from verify_chksum callback */ + size_t actual_len = len; /* The actual length, after speculative */ + /* reads have been resolved */ + uint64_t nanosec = 1; /* # of nanoseconds to sleep between */ + /* retries */ + void *new_image; /* Pointer to image */ + hbool_t len_changed = TRUE; /* Whether to re-check speculative */ + /* entries */ /* Get the # of read attempts */ max_tries = tries = H5F_GET_READ_ATTEMPTS(f); @@ -6602,31 +7145,44 @@ H5C_load_entry(H5F_t * f, * --perform checksum verification */ do { - if(actual_len != len) { - if(NULL == (new_image = H5MM_realloc(image, len + H5C_IMAGE_EXTRA_SPACE))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "image null after H5MM_realloc()") + if ( actual_len != len ) { + + if ( NULL == (new_image = H5MM_realloc(image, + len + H5C_IMAGE_EXTRA_SPACE)) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, \ + "image null after H5MM_realloc()") + image = (uint8_t *)new_image; + #if H5C_DO_MEMORY_SANITY_CHECKS - HDmemcpy(image + len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); + HDmemcpy(image + len, H5C_IMAGE_SANITY_VALUE, + H5C_IMAGE_EXTRA_SPACE); #endif /* H5C_DO_MEMORY_SANITY_CHECKS */ } /* end if */ #ifdef H5_HAVE_PARALLEL - if(!coll_access || 0 == mpi_rank) { + if ( !coll_access || 0 == mpi_rank ) { #endif /* H5_HAVE_PARALLEL */ - if(H5F_block_read(f, type->mem_type, addr, len, image) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*") + + if ( H5F_block_read(f, type->mem_type, addr, len, image) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, \ + "Can't read image*") #ifdef H5_HAVE_PARALLEL } /* end if */ /* if the collective metadata read optimization is turned on, * bcast the metadata read from process 0 to all ranks in the file * communicator */ - if(coll_access) { + if ( coll_access ) { + int buf_size; H5_CHECKED_ASSIGN(buf_size, int, len, size_t); - if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm))) + if ( MPI_SUCCESS != + (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm))) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) } /* end if */ #endif /* H5_HAVE_PARALLEL */ @@ -6634,46 +7190,77 @@ H5C_load_entry(H5F_t * f, /* If the entry could be read speculatively and the length is still * changing, check for updating the actual size */ - if((type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) && len_changed) { + if( ( type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG ) && + ( len_changed ) ) { + /* Retrieve the actual length */ actual_len = len; - if(type->get_final_load_size(image, len, udata, &actual_len) < 0) - continue; /* Transfer control to while() and count towards retries */ + if ( type->get_final_load_size(image, len, udata, + &actual_len) < 0 ) { + + /* Transfer control to while() and count towards retries */ + continue; + } /* Check for the length changing */ - if(actual_len != len) { - /* Verify that the length isn't past the EOA for the file */ - if(H5C__verify_len_eoa(f, type, addr, &actual_len, TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "actual_len exceeds EOA") + if ( actual_len != len ) { + + /* Verify that the length isn't past the EOA for + * the file + */ + if ( H5C__verify_len_eoa(f, type, addr, + &actual_len, TRUE) < 0) + + HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, \ + "actual_len exceeds EOA") /* Expand buffer to new size */ - if(NULL == (new_image = H5MM_realloc(image, actual_len + H5C_IMAGE_EXTRA_SPACE))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "image null after H5MM_realloc()") + if ( NULL == + (new_image = H5MM_realloc(image, + actual_len + H5C_IMAGE_EXTRA_SPACE))) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, \ + "image null after H5MM_realloc()") + image = (uint8_t *)new_image; + #if H5C_DO_MEMORY_SANITY_CHECKS - HDmemcpy(image + actual_len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); + HDmemcpy(image + actual_len, H5C_IMAGE_SANITY_VALUE, + H5C_IMAGE_EXTRA_SPACE); #endif /* H5C_DO_MEMORY_SANITY_CHECKS */ - if(actual_len > len) { + if ( actual_len > len ) { #ifdef H5_HAVE_PARALLEL - if(!coll_access || 0 == mpi_rank) { + if ( !coll_access || 0 == mpi_rank ) { #endif /* H5_HAVE_PARALLEL */ - /* If the thing's image needs to be bigger for a speculatively - * loaded thing, go get the on-disk image again (the extra portion). + /* If the thing's image needs to be bigger for + * a speculatively loaded thing, go get the + * on-disk image again (the extra portion). */ - if(H5F_block_read(f, type->mem_type, addr + len, actual_len - len, image + len) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "can't read image") + if ( H5F_block_read(f, type->mem_type, addr + len, + actual_len - len, image + len) < 0) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, \ + "can't read image") #ifdef H5_HAVE_PARALLEL } - /* If the collective metadata read optimization is turned on, - * Bcast the metadata read from process 0 to all ranks in the file - * communicator */ - if(coll_access) { + /* If the collective metadata read optimization is + * turned on, Bcast the metadata read from process + * 0 to all ranks in the file communicator + */ + if ( coll_access ) { + int buf_size; - H5_CHECKED_ASSIGN(buf_size, int, actual_len - len, size_t); - if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image + len, buf_size, MPI_BYTE, 0, comm))) - HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) + H5_CHECKED_ASSIGN(buf_size, int, actual_len - len, \ + size_t); + + if ( MPI_SUCCESS != + (mpi_code = MPI_Bcast(image + len, buf_size, + MPI_BYTE, 0, comm)) ) + + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", \ + mpi_code) } /* end if */ #endif /* H5_HAVE_PARALLEL */ } /* end if */ @@ -6694,28 +7281,39 @@ H5C_load_entry(H5F_t * f, break; /* Verify the checksum for the metadata image */ - if((chk_ret = type->verify_chksum(image, actual_len, udata)) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "failure from verify_chksum callback") + if ( (chk_ret = type->verify_chksum(image, actual_len, udata)) < 0) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, \ + "failure from verify_chksum callback") + if(chk_ret == TRUE) break; /* Sleep for some time */ H5_nanosleep(nanosec); nanosec *= 2; /* Double the sleep time next time */ + } while(--tries); /* Check for too many tries */ if(tries == 0) - HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "incorrect metadatda checksum after all read attempts") + HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, \ + "incorrect metadatda checksum after all read attempts") /* Calculate and track the # of retries */ retries = max_tries - tries; - if(retries) /* Does not track 0 retry */ - if(H5F_track_metadata_read_retries(f, (unsigned)type->mem_type, retries) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "cannot track read tries = %u ", retries) + if ( retries ) { /* Does not track 0 retry */ + + if ( H5F_track_metadata_read_retries(f, (unsigned)type->mem_type, + retries) < 0) + + HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, \ + "cannot track read tries = %u ", retries) + } /* Set the final length (in case it wasn't set earlier) */ len = actual_len; + } /* end if !H5C__CLASS_SKIP_READS */ /* Deserialize the on-disk image into the native memory form */ @@ -6753,7 +7351,7 @@ H5C_load_entry(H5F_t * f, entry->image_ptr = image; entry->image_up_to_date = !dirty; entry->type = type; - entry->is_dirty = dirty; + entry->is_dirty = dirty; entry->dirtied = FALSE; entry->is_protected = FALSE; entry->is_read_only = FALSE; @@ -6813,9 +7411,23 @@ H5C_load_entry(H5F_t * f, entry->serialization_count = 0; #endif /* NDEBUG */ - entry->tl_next = NULL; - entry->tl_prev = NULL; - entry->tag_info = NULL; + /* initialize tag list fields */ + entry->tl_next = NULL; + entry->tl_prev = NULL; + entry->tag_info = NULL; + + /* initialize fields supporting VFD SWMR */ + if ( f->shared->cache->vfd_swmr_reader ) { + + entry->page = (addr / f->shared->pb_ptr->page_size); + + } else { + + entry->page = 0; + } + entry->refreshed_in_tick = 0; + entry->pi_next = NULL; + entry->pi_prev = NULL; H5C__RESET_CACHE_ENTRY_STATS(entry); @@ -8517,6 +9129,11 @@ done: * Programmer: Mohamad Chaarawi * 2/10/16 * + * Changes: Added code to update the page field in the VFD SWMR reader + * case. + * + * JRM -- 12/14/18 + * *------------------------------------------------------------------------- */ herr_t @@ -8534,6 +9151,14 @@ H5C__generate_image(H5F_t *f, H5C_t *cache_ptr, H5C_cache_entry_t *entry_ptr) HDassert(f); HDassert(cache_ptr); HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + + /* if this is a VFD SWMR reader, verify that the page buffer is + * configured. + */ + HDassert( ( ! cache_ptr->vfd_swmr_reader ) || + ( ( f->shared->pb_ptr ) && + ( f->shared->pb_ptr->magic == H5PB__H5PB_T_MAGIC ) ) ); + HDassert(entry_ptr); HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); HDassert(!entry_ptr->image_up_to_date); @@ -8545,16 +9170,24 @@ H5C__generate_image(H5F_t *f, H5C_t *cache_ptr, H5C_cache_entry_t *entry_ptr) old_addr = entry_ptr->addr; /* Call client's pre-serialize callback, if there's one */ - if(entry_ptr->type->pre_serialize && - (entry_ptr->type->pre_serialize)(f, (void *)entry_ptr, - entry_ptr->addr, entry_ptr->size, &new_addr, &new_len, &serialize_flags) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to pre-serialize entry") + if ( ( entry_ptr->type->pre_serialize ) && + ( (entry_ptr->type->pre_serialize)(f, (void *)entry_ptr, + entry_ptr->addr, entry_ptr->size, + &new_addr, &new_len, + &serialize_flags) < 0 ) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "unable to pre-serialize entry") /* Check for any flags set in the pre-serialize callback */ - if(serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET) { + if ( serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET ) { + /* Check for unexpected flags from serialize callback */ - if(serialize_flags & ~(H5C__SERIALIZE_RESIZED_FLAG | H5C__SERIALIZE_MOVED_FLAG)) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unknown serialize flag(s)") + if ( serialize_flags & ~(H5C__SERIALIZE_RESIZED_FLAG | + H5C__SERIALIZE_MOVED_FLAG) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "unknown serialize flag(s)") #ifdef H5_HAVE_PARALLEL /* In the parallel case, resizes and moves in @@ -8583,28 +9216,40 @@ H5C__generate_image(H5F_t *f, H5C_t *cache_ptr, H5C_cache_entry_t *entry_ptr) * If that ceases to be the case, further * tests will be necessary. */ - if(cache_ptr->aux_ptr != NULL) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "resize/move in serialize occurred in parallel case") + if ( cache_ptr->aux_ptr != NULL ) + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "resize/move in serialize occurred in parallel case") #endif /* If required, resize the buffer and update the entry and the cache - * data structures */ - if(serialize_flags & H5C__SERIALIZE_RESIZED_FLAG) { + * data structures + */ + if ( serialize_flags & H5C__SERIALIZE_RESIZED_FLAG ) { + /* Sanity check */ HDassert(new_len > 0); /* Allocate a new image buffer */ - if(NULL == (entry_ptr->image_ptr = H5MM_realloc(entry_ptr->image_ptr, new_len + H5C_IMAGE_EXTRA_SPACE))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for on disk image buffer") + if ( NULL == (entry_ptr->image_ptr = + H5MM_realloc(entry_ptr->image_ptr, + new_len + H5C_IMAGE_EXTRA_SPACE)) ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for on disk image buffer") + #if H5C_DO_MEMORY_SANITY_CHECKS - HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + new_len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); + HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + new_len, \ + H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); #endif /* H5C_DO_MEMORY_SANITY_CHECKS */ /* Update statistics for resizing the entry */ - H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, new_len); + H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, \ + new_len); /* Update the hash table for the size change */ - H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, entry_ptr->size, new_len, entry_ptr, !(entry_ptr->is_dirty)); + H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, entry_ptr->size, \ + new_len, entry_ptr, !(entry_ptr->is_dirty)); /* The entry can't be protected since we are in the process of * flushing it. Thus we must update the replacement policy data @@ -8619,21 +9264,25 @@ H5C__generate_image(H5F_t *f, H5C_t *cache_ptr, H5C_cache_entry_t *entry_ptr) */ HDassert(entry_ptr->is_dirty); HDassert(entry_ptr->in_slist); - H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, entry_ptr->size, new_len); + H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, entry_ptr->size, \ + new_len); /* Finally, update the entry for its new size */ entry_ptr->size = new_len; + } /* end if */ /* If required, udate the entry and the cache data structures * for a move */ - if(serialize_flags & H5C__SERIALIZE_MOVED_FLAG) { + if ( serialize_flags & H5C__SERIALIZE_MOVED_FLAG ) { + /* Update stats and entries relocated counter */ H5C__UPDATE_STATS_FOR_MOVE(cache_ptr, entry_ptr) /* We must update cache data structures for the change in address */ if(entry_ptr->addr == old_addr) { + /* Delete the entry from the hash table and the slist */ H5C__DELETE_FROM_INDEX(cache_ptr, entry_ptr, FAIL); H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr, FALSE); @@ -8641,21 +9290,37 @@ H5C__generate_image(H5F_t *f, H5C_t *cache_ptr, H5C_cache_entry_t *entry_ptr) /* Update the entry for its new address */ entry_ptr->addr = new_addr; + /* In the VFD SWMR reader case, update the entry page field */ + if ( cache_ptr->vfd_swmr_reader ) { + + entry_ptr->page = (new_addr / f->shared->pb_ptr->page_size); + } + /* And then reinsert in the index and slist */ H5C__INSERT_IN_INDEX(cache_ptr, entry_ptr, FAIL); H5C__INSERT_ENTRY_IN_SLIST(cache_ptr, entry_ptr, FAIL); - } /* end if */ - else /* move is already done for us -- just do sanity checks */ + + } else { /* move is already done for us -- just do sanity checks */ + HDassert(entry_ptr->addr == new_addr); + HDassert(( ! cache_ptr->vfd_swmr_reader ) || + ( entry_ptr->page == + (entry_ptr->addr / f->shared->pb_ptr->page_size) )); + } } /* end if */ } /* end if(serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET) */ /* Serialize object into buffer */ - if(entry_ptr->type->serialize(f, entry_ptr->image_ptr, entry_ptr->size, (void *)entry_ptr) < 0) + if ( entry_ptr->type->serialize(f, entry_ptr->image_ptr, entry_ptr->size, + (void *)entry_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to serialize entry") + #if H5C_DO_MEMORY_SANITY_CHECKS - HDassert(0 == HDmemcmp(((uint8_t *)entry_ptr->image_ptr) + entry_ptr->size, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE)); + HDassert(0 == HDmemcmp(((uint8_t *)entry_ptr->image_ptr) + entry_ptr->size, + H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE)); #endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + entry_ptr->image_up_to_date = TRUE; /* Propagate the fact that the entry is serialized up the @@ -8665,12 +9330,19 @@ H5C__generate_image(H5F_t *f, H5C_t *cache_ptr, H5C_cache_entry_t *entry_ptr) * for flush dependency parents. */ HDassert(entry_ptr->flush_dep_nunser_children == 0); - if(entry_ptr->flush_dep_nparents > 0) - if(H5C__mark_flush_dep_serialized(entry_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, "Can't propagate serialization status to fd parents") + + if ( entry_ptr->flush_dep_nparents > 0 ) { + + if ( H5C__mark_flush_dep_serialized(entry_ptr) < 0 ) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, \ + "Can't propagate serialization status to fd parents") + } done: + FUNC_LEAVE_NOAPI(ret_value) + } /* H5C__generate_image */ diff --git a/src/H5Cepoch.c b/src/H5Cepoch.c index 8bcab9f..386d23b 100644 --- a/src/H5Cepoch.c +++ b/src/H5Cepoch.c @@ -91,20 +91,21 @@ static herr_t H5C__epoch_marker_fsf_size(const void H5_ATTR_UNUSED * thing, const H5AC_class_t H5AC_EPOCH_MARKER[1] = {{ - /* id = */ H5AC_EPOCH_MARKER_ID, - /* name = */ "epoch marker", - /* mem_type = */ H5FD_MEM_DEFAULT, /* value doesn't matter */ - /* flags = */ H5AC__CLASS_NO_FLAGS_SET, + /* id = */ H5AC_EPOCH_MARKER_ID, + /* name = */ "epoch marker", + /* mem_type = */ H5FD_MEM_DEFAULT, /* value doesn't matter */ + /* flags = */ H5AC__CLASS_NO_FLAGS_SET, /* get_initial_load_size = */ H5C__epoch_marker_get_initial_load_size, - /* get_final_load_size = */ H5C__epoch_marker_get_final_load_size, - /* verify_chksum = */ H5C__epoch_marker_verify_chksum, - /* deserialize = */ H5C__epoch_marker_deserialize, - /* image_len = */ H5C__epoch_marker_image_len, - /* pre_serialize = */ H5C__epoch_marker_pre_serialize, - /* serialize = */ H5C__epoch_marker_serialize, - /* notify = */ H5C__epoch_marker_notify, - /* free_icr = */ H5C__epoch_marker_free_icr, - /* fsf_size = */ H5C__epoch_marker_fsf_size, + /* get_final_load_size = */ H5C__epoch_marker_get_final_load_size, + /* verify_chksum = */ H5C__epoch_marker_verify_chksum, + /* deserialize = */ H5C__epoch_marker_deserialize, + /* image_len = */ H5C__epoch_marker_image_len, + /* pre_serialize = */ H5C__epoch_marker_pre_serialize, + /* serialize = */ H5C__epoch_marker_serialize, + /* notify = */ H5C__epoch_marker_notify, + /* free_icr = */ H5C__epoch_marker_free_icr, + /* fsf_size = */ H5C__epoch_marker_fsf_size, + /* refresh = */ NULL, }}; diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h index 98d7a01..1fe4523 100644 --- a/src/H5Cpkg.h +++ b/src/H5Cpkg.h @@ -47,8 +47,9 @@ #define H5C__MAX_EPOCH_MARKERS 10 /* Cache configuration settings */ -#define H5C__HASH_TABLE_LEN (64 * 1024) /* must be a power of 2 */ -#define H5C__H5C_T_MAGIC 0x005CAC0E +#define H5C__HASH_TABLE_LEN (64 * 1024) /* must be a power of 2 */ +#define H5C__PAGE_HASH_TABLE_LEN ( 4 * 1024) /* must be a poser of 2 */ +#define H5C__H5C_T_MAGIC 0x005CAC0E /* Initial allocated size of the "flush_dep_parent" array */ #define H5C_FLUSH_DEP_PARENT_INIT 8 @@ -976,14 +977,31 @@ if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \ * * JRM -- 10/15/15 * + * - Updated the existing index macros to maintain a second + * hash table when cache_ptr->vfd_swrm_writer is true. This + * hash table bins entries by the page buffer page they reside + * in, thus facilitating the eviction of entries on a given page + * when that page is modified. + * + * JRM -- 12/14/18 + * ***********************************************************************/ -/* H5C__HASH_TABLE_LEN is defined in H5Cpkg.h. It mut be a power of two. */ +/* H5C__HASH_TABLE_LEN is defined in H5Cpkg.h. It must be a power of two. */ #define H5C__HASH_MASK ((size_t)(H5C__HASH_TABLE_LEN - 1) << 3) #define H5C__HASH_FCN(x) (int)((unsigned)((x) & H5C__HASH_MASK) >> 3) + +/* H5C__PAGE_HASH_TABLE_LEN is defined in H5Cpkg.h. + * It must ve a power of two. + */ +#define H5C__PI_HASH_MASK ((uint64_t)(H5C__PAGE_HASH_TABLE_LEN - 1)) + +#define H5C__PI_HASH_FCN(x) (int)(((uint64_t)(x)) & H5C__PI_HASH_MASK) + + #if H5C_DO_SANITY_CHECKS #define H5C__PRE_HT_INSERT_SC(cache_ptr, entry_ptr, fail_val) \ @@ -993,6 +1011,8 @@ if ( ( (cache_ptr) == NULL ) || \ ( ! H5F_addr_defined((entry_ptr)->addr) ) || \ ( (entry_ptr)->ht_next != NULL ) || \ ( (entry_ptr)->ht_prev != NULL ) || \ + ( (entry_ptr)->pi_next != NULL ) || \ + ( (entry_ptr)->pi_prev != NULL ) || \ ( (entry_ptr)->size <= 0 ) || \ ( H5C__HASH_FCN((entry_ptr)->addr) < 0 ) || \ ( H5C__HASH_FCN((entry_ptr)->addr) >= H5C__HASH_TABLE_LEN ) || \ @@ -1038,45 +1058,52 @@ if ( ( (cache_ptr) == NULL ) || \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "post HT insert SC failed") \ } -#define H5C__PRE_HT_REMOVE_SC(cache_ptr, entry_ptr) \ -if ( ( (cache_ptr) == NULL ) || \ - ( (cache_ptr)->magic != H5C__H5C_T_MAGIC ) || \ - ( (cache_ptr)->index_len < 1 ) || \ - ( (entry_ptr) == NULL ) || \ - ( (cache_ptr)->index_size < (entry_ptr)->size ) || \ - ( ! H5F_addr_defined((entry_ptr)->addr) ) || \ - ( (entry_ptr)->size <= 0 ) || \ - ( H5C__HASH_FCN((entry_ptr)->addr) < 0 ) || \ - ( H5C__HASH_FCN((entry_ptr)->addr) >= H5C__HASH_TABLE_LEN ) || \ - ( ((cache_ptr)->index)[(H5C__HASH_FCN((entry_ptr)->addr))] \ - == NULL ) || \ - ( ( ((cache_ptr)->index)[(H5C__HASH_FCN((entry_ptr)->addr))] \ - != (entry_ptr) ) && \ - ( (entry_ptr)->ht_prev == NULL ) ) || \ - ( ( ((cache_ptr)->index)[(H5C__HASH_FCN((entry_ptr)->addr))] == \ - (entry_ptr) ) && \ - ( (entry_ptr)->ht_prev != NULL ) ) || \ - ( (cache_ptr)->index_size != \ - ((cache_ptr)->clean_index_size + \ - (cache_ptr)->dirty_index_size) ) || \ - ( (cache_ptr)->index_size < ((cache_ptr)->clean_index_size) ) || \ - ( (cache_ptr)->index_size < ((cache_ptr)->dirty_index_size) ) || \ - ( (entry_ptr)->ring <= H5C_RING_UNDEFINED ) || \ - ( (entry_ptr)->ring >= H5C_RING_NTYPES ) || \ - ( (cache_ptr)->index_ring_len[(entry_ptr)->ring] <= 0 ) || \ - ( (cache_ptr)->index_ring_len[(entry_ptr)->ring] > \ - (cache_ptr)->index_len ) || \ - ( (cache_ptr)->index_ring_size[(entry_ptr)->ring] < \ - (entry_ptr)->size ) || \ - ( (cache_ptr)->index_ring_size[(entry_ptr)->ring] > \ - (cache_ptr)->index_size ) || \ - ( (cache_ptr)->index_ring_size[(entry_ptr)->ring] != \ - ((cache_ptr)->clean_index_ring_size[(entry_ptr)->ring] + \ - (cache_ptr)->dirty_index_ring_size[(entry_ptr)->ring]) ) || \ - ( (cache_ptr)->index_len != (cache_ptr)->il_len ) || \ - ( (cache_ptr)->index_size != (cache_ptr)->il_size ) ) { \ - HDassert(FALSE); \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "pre HT remove SC failed") \ +#define H5C__PRE_HT_REMOVE_SC(cache_ptr, entry_ptr) \ +if ( ( (cache_ptr) == NULL ) || \ + ( (cache_ptr)->magic != H5C__H5C_T_MAGIC ) || \ + ( (cache_ptr)->index_len < 1 ) || \ + ( (entry_ptr) == NULL ) || \ + ( (cache_ptr)->index_size < (entry_ptr)->size ) || \ + ( ! H5F_addr_defined((entry_ptr)->addr) ) || \ + ( (entry_ptr)->size <= 0 ) || \ + ( H5C__HASH_FCN((entry_ptr)->addr) < 0 ) || \ + ( H5C__HASH_FCN((entry_ptr)->addr) >= H5C__HASH_TABLE_LEN ) || \ + ( ((cache_ptr)->index)[(H5C__HASH_FCN((entry_ptr)->addr))] \ + == NULL ) || \ + ( ( ((cache_ptr)->index)[(H5C__HASH_FCN((entry_ptr)->addr))] \ + != (entry_ptr) ) && \ + ( (entry_ptr)->ht_prev == NULL ) ) || \ + ( ( ((cache_ptr)->index)[(H5C__HASH_FCN((entry_ptr)->addr))] == \ + (entry_ptr) ) && \ + ( (entry_ptr)->ht_prev != NULL ) ) || \ + ( (cache_ptr)->index_size != \ + ((cache_ptr)->clean_index_size + \ + (cache_ptr)->dirty_index_size) ) || \ + ( ( (cache_ptr)->vfd_swmr_reader ) && \ + ( ( ( (cache_ptr)->page_index[(H5C__PI_HASH_FCN((entry_ptr)->page))] \ + != (entry_ptr) ) && \ + ( (entry_ptr)->pi_prev == NULL ) ) || \ + ( ( (cache_ptr)->page_index[(H5C__PI_HASH_FCN((entry_ptr)->page))] \ + == (entry_ptr) ) && \ + ( (entry_ptr)->pi_prev != NULL ) ) ) ) || \ + ( (cache_ptr)->index_size < ((cache_ptr)->clean_index_size) ) || \ + ( (cache_ptr)->index_size < ((cache_ptr)->dirty_index_size) ) || \ + ( (entry_ptr)->ring <= H5C_RING_UNDEFINED ) || \ + ( (entry_ptr)->ring >= H5C_RING_NTYPES ) || \ + ( (cache_ptr)->index_ring_len[(entry_ptr)->ring] <= 0 ) || \ + ( (cache_ptr)->index_ring_len[(entry_ptr)->ring] > \ + (cache_ptr)->index_len ) || \ + ( (cache_ptr)->index_ring_size[(entry_ptr)->ring] < \ + (entry_ptr)->size ) || \ + ( (cache_ptr)->index_ring_size[(entry_ptr)->ring] > \ + (cache_ptr)->index_size ) || \ + ( (cache_ptr)->index_ring_size[(entry_ptr)->ring] != \ + ((cache_ptr)->clean_index_ring_size[(entry_ptr)->ring] + \ + (cache_ptr)->dirty_index_ring_size[(entry_ptr)->ring]) ) || \ + ( (cache_ptr)->index_len != (cache_ptr)->il_len ) || \ + ( (cache_ptr)->index_size != (cache_ptr)->il_size ) ) { \ + HDassert(FALSE); \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "pre HT remove SC failed") \ } #define H5C__POST_HT_REMOVE_SC(cache_ptr, entry_ptr) \ @@ -1087,6 +1114,8 @@ if ( ( (cache_ptr) == NULL ) || \ ( (entry_ptr)->size <= 0 ) || \ ( (entry_ptr)->ht_prev != NULL ) || \ ( (entry_ptr)->ht_prev != NULL ) || \ + ( (entry_ptr)->pi_prev != NULL ) || \ + ( (entry_ptr)->pi_prev != NULL ) || \ ( (cache_ptr)->index_size != \ ((cache_ptr)->clean_index_size + \ (cache_ptr)->dirty_index_size) ) || \ @@ -1117,7 +1146,9 @@ if ( ( (cache_ptr) == NULL ) || \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "pre HT search SC failed") \ } -/* (Keep in sync w/H5C_TEST__POST_SUC_HT_SEARCH_SC macro in test/cache_common.h -QAK) */ +/* (Keep in sync w/H5C_TEST__POST_SUC_HT_SEARCH_SC macro in + * test/cache_common.h -QAK) + */ #define H5C__POST_SUC_HT_SEARCH_SC(cache_ptr, entry_ptr, k, fail_val) \ if ( ( (cache_ptr) == NULL ) || \ ( (cache_ptr)->magic != H5C__H5C_T_MAGIC ) || \ @@ -1136,15 +1167,19 @@ if ( ( (cache_ptr) == NULL ) || \ ( (entry_ptr)->ht_prev->ht_next != (entry_ptr) ) ) || \ ( ( (entry_ptr)->ht_next != NULL ) && \ ( (entry_ptr)->ht_next->ht_prev != (entry_ptr) ) ) ) { \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "post successful HT search SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \ + "post successful HT search SC failed") \ } -/* (Keep in sync w/H5C_TEST__POST_HT_SHIFT_TO_FRONT macro in test/cache_common.h -QAK) */ +/* (Keep in sync w/H5C_TEST__POST_HT_SHIFT_TO_FRONT macro in + * test/cache_common.h -QAK) + */ #define H5C__POST_HT_SHIFT_TO_FRONT(cache_ptr, entry_ptr, k, fail_val) \ if ( ( (cache_ptr) == NULL ) || \ ( ((cache_ptr)->index)[k] != (entry_ptr) ) || \ ( (entry_ptr)->ht_prev != NULL ) ) { \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "post HT shift to front SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \ + "post HT shift to front SC failed") \ } #define H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \ @@ -1179,7 +1214,8 @@ if ( ( (cache_ptr) == NULL ) || \ ( (cache_ptr)->index_len != (cache_ptr)->il_len ) || \ ( (cache_ptr)->index_size != (cache_ptr)->il_size ) ) { \ HDassert(FALSE); \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "pre HT entry size change SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "pre HT entry size change SC failed") \ } #define H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \ @@ -1209,7 +1245,8 @@ if ( ( (cache_ptr) == NULL ) || \ ( (cache_ptr)->index_len != (cache_ptr)->il_len ) || \ ( (cache_ptr)->index_size != (cache_ptr)->il_size ) ) { \ HDassert(FALSE); \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "post HT entry size change SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "post HT entry size change SC failed") \ } #define H5C__PRE_HT_UPDATE_FOR_ENTRY_CLEAN_SC(cache_ptr, entry_ptr) \ @@ -1236,7 +1273,8 @@ if ( \ ((cache_ptr)->clean_index_ring_size[(entry_ptr)->ring] + \ (cache_ptr)->dirty_index_ring_size[(entry_ptr)->ring]) ) ) { \ HDassert(FALSE); \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "pre HT update for entry clean SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "pre HT update for entry clean SC failed") \ } #define H5C__PRE_HT_UPDATE_FOR_ENTRY_DIRTY_SC(cache_ptr, entry_ptr) \ @@ -1263,7 +1301,8 @@ if ( \ ((cache_ptr)->clean_index_ring_size[(entry_ptr)->ring] + \ (cache_ptr)->dirty_index_ring_size[(entry_ptr)->ring]) ) ) { \ HDassert(FALSE); \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "pre HT update for entry dirty SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "pre HT update for entry dirty SC failed") \ } #define H5C__POST_HT_UPDATE_FOR_ENTRY_CLEAN_SC(cache_ptr, entry_ptr) \ @@ -1279,7 +1318,8 @@ if ( ( (cache_ptr)->index_size != \ ((cache_ptr)->clean_index_ring_size[(entry_ptr)->ring] + \ (cache_ptr)->dirty_index_ring_size[(entry_ptr)->ring]) ) ) { \ HDassert(FALSE); \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "post HT update for entry clean SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "post HT update for entry clean SC failed") \ } #define H5C__POST_HT_UPDATE_FOR_ENTRY_DIRTY_SC(cache_ptr, entry_ptr) \ @@ -1295,7 +1335,8 @@ if ( ( (cache_ptr)->index_size != \ ((cache_ptr)->clean_index_ring_size[(entry_ptr)->ring] + \ (cache_ptr)->dirty_index_ring_size[(entry_ptr)->ring]) ) ) { \ HDassert(FALSE); \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "post HT update for entry dirty SC failed") \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "post HT update for entry dirty SC failed") \ } #else /* H5C_DO_SANITY_CHECKS */ @@ -1323,6 +1364,14 @@ if ( ( (cache_ptr)->index_size != \ { \ int k; \ H5C__PRE_HT_INSERT_SC(cache_ptr, entry_ptr, fail_val) \ + if ( cache_ptr->vfd_swmr_reader ) { \ + k = H5C__PI_HASH_FCN((entry_ptr)->page); \ + if ( ( (cache_ptr)->page_index)[k] != NULL ) { \ + (entry_ptr)->pi_next = ((cache_ptr)->page_index)[k]; \ + (entry_ptr)->pi_next->pi_prev = (entry_ptr); \ + } \ + ((cache_ptr)->page_index)[k] = (entry_ptr); \ + } \ k = H5C__HASH_FCN((entry_ptr)->addr); \ if(((cache_ptr)->index)[k] != NULL) { \ (entry_ptr)->ht_next = ((cache_ptr)->index)[k]; \ @@ -1358,13 +1407,30 @@ if ( ( (cache_ptr)->index_size != \ { \ int k; \ H5C__PRE_HT_REMOVE_SC(cache_ptr, entry_ptr) \ + if ( cache_ptr->vfd_swmr_reader ) { \ + k = H5C__PI_HASH_FCN((entry_ptr)->page); \ + if ( (entry_ptr)->ht_next ) { \ + (entry_ptr)->pi_next->pi_prev = (entry_ptr)->pi_prev; \ + } \ + if ( (entry_ptr)->ht_prev ) { \ + (entry_ptr)->pi_prev->pi_next = (entry_ptr)->pi_next; \ + } \ + if ( ( (cache_ptr)->page_index)[k] == (entry_ptr) ) { \ + ((cache_ptr)->page_index)[k] = (entry_ptr)->pi_next; \ + } \ + (entry_ptr)->pi_next = NULL; \ + (entry_ptr)->pi_prev = NULL; \ + } \ k = H5C__HASH_FCN((entry_ptr)->addr); \ - if((entry_ptr)->ht_next) \ + if ( (entry_ptr)->ht_next ) { \ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \ - if((entry_ptr)->ht_prev) \ + } \ + if ( (entry_ptr)->ht_prev ) { \ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \ - if(((cache_ptr)->index)[k] == (entry_ptr)) \ + } \ + if ( ( (cache_ptr)->index)[k] == (entry_ptr) ) { \ ((cache_ptr)->index)[k] = (entry_ptr)->ht_next; \ + } \ (entry_ptr)->ht_next = NULL; \ (entry_ptr)->ht_prev = NULL; \ (cache_ptr)->index_len--; \ @@ -1372,7 +1438,7 @@ if ( ( (cache_ptr)->index_size != \ ((cache_ptr)->index_ring_len[entry_ptr->ring])--; \ ((cache_ptr)->index_ring_size[entry_ptr->ring]) \ -= (entry_ptr)->size; \ - if((entry_ptr)->is_dirty) { \ + if ( (entry_ptr)->is_dirty ) { \ (cache_ptr)->dirty_index_size -= (entry_ptr)->size; \ ((cache_ptr)->dirty_index_ring_size[entry_ptr->ring]) \ -= (entry_ptr)->size; \ @@ -1381,7 +1447,7 @@ if ( ( (cache_ptr)->index_size != \ ((cache_ptr)->clean_index_ring_size[entry_ptr->ring]) \ -= (entry_ptr)->size; \ } \ - if((entry_ptr)->flush_me_last) { \ + if ( (entry_ptr)->flush_me_last ) { \ (cache_ptr)->num_last_entries--; \ HDassert((cache_ptr)->num_last_entries <= 1); \ } \ @@ -3721,6 +3787,33 @@ typedef struct H5C_tag_info_t { * * This field is NULL if the index is empty. * + * Page Index: + * + * For the VFD SWMR reader, it is necessary to map modified pages to + * entries contained in that page so that they can be invalidated. The + * page index is a hash table that provides this service. Note that it + * is only maintained for files that are opened in VFD SWMR reader mode. + * + * Structurally, the page index is identical to the index in the page + * buffer. Specifically, it is a hash table with chaining. The hash + * table size must be a power of two, not the usual prime number. The + * hash function simply clips the high order bits off the page offset + * of the entry's base address. + * + * The page index is maintained by the same macros that maintain the + * regular index. As such, it does not require separate length and + * size fields, as it shares them with the regular index. Instead, + * the only ancilary field needed is the vfd_swrm_reader boolean, which + * indicates whether the page index must be maintained. + * + * vfd_swmr_reader: Boolean flag that is TRUE iff the file has been + * opened as a VFD SWMR reader. The remaining fields in + * the page index section are valid iff this field is TRUE. + * + * page_index Array of pointer to H5C_cache_entry_t of size + * H5C__PAGE_HASH_TABLE_LEN. This size must be a power of + * two, not the usual prime number. + * * * With the addition of the take ownership flag, it is possible that * an entry may be removed from the cache as the result of the flush of @@ -4706,6 +4799,10 @@ struct H5C_t { H5C_cache_entry_t * il_head; H5C_cache_entry_t * il_tail; + /* Fields supporting VFD SWMR */ + hbool_t vfd_swmr_reader; + H5C_cache_entry_t * page_index[H5C__PAGE_HASH_TABLE_LEN]; + /* Fields to detect entries removed during scans */ int64_t entries_removed_counter; H5C_cache_entry_t * last_entry_removed_ptr; diff --git a/src/H5Cprefetched.c b/src/H5Cprefetched.c index 0befdf9..1f3c8a5 100644 --- a/src/H5Cprefetched.c +++ b/src/H5Cprefetched.c @@ -106,6 +106,7 @@ const H5AC_class_t H5AC_PREFETCHED_ENTRY[1] = {{ /* notify = */ H5C__prefetched_entry_notify, /* free_icr = */ H5C__prefetched_entry_free_icr, /* fsf_size = */ H5C__prefetched_entry_fsf_size, + /* refresh = */ NULL, }}; diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index a3e46db..e44475a 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -268,7 +268,7 @@ /* Typedef for the main structure for the cache (defined in H5Cpkg.h) */ typedef struct H5C_t H5C_t; -/* +/*************************************************************************** * * Struct H5C_class_t * @@ -384,10 +384,11 @@ typedef struct H5C_t H5C_t; * * The typedef for the get_load_size callback is as follows: * - * typedef herr_t (*H5C_get_final_load_size_func_t)(const void *image_ptr, - * size_t image_len, - * void *udata_ptr, - * size_t *actual_len_ptr); + * typedef + * herr_t (*H5C_get_final_load_size_func_t)(const void *image_ptr, + * size_t image_len, + * void *udata_ptr, + * size_t *actual_len_ptr); * * The parameters of the get_load_size callback are as follows: * @@ -404,7 +405,8 @@ typedef struct H5C_t H5C_t; * actual_len_ptr: Pointer to the location containing the actual length * of the metadata entry on disk. * - * Processing in the get_final_load_size function should proceed as follows: + * Processing in the get_final_load_size function should proceed as + * follows: * * If successful, the function will place the length in the *actual_len_ptr * associated with supplied image and/or user data and then return SUCCEED. @@ -843,6 +845,103 @@ typedef struct H5C_t H5C_t; * push error information on the error stack with the error API * routines. * + * REFRESH_ENTRY: Pointer to the refresh entry callback. + * + * This callback exists to support VFD SWMR readers, and should not + * be used outside this context. + * + * At the end of each tick, the VFD SWMR reader is informed of pages + * in the page buffer that have been modified since the last tick. + * + * To avoid message from the past bugs, it is necessary to either + * evict or refresh entries that have been modified in the past tick, + * and thus reside in such modified pages. + * + * To this end, the metadata cache is informed of all such pages, + * and must either evict, or update all entries contained in these + * pages, or determine that the entry in question has not been modified, + * and thus that no action is required. + * + * If the entry is unpinned, it is possible to simply evict it, and + * this is probably the most efficient way to address the issue. + * + * If the entry is pinned and tagged, it is possible to evict the + * entire on disk data structure of which it is part via the evict + * tagged entry facility. This is inefficient, but it is simple and + * uses existing code -- hence this is plan A for the initial + * implementation of VFD SWMR. + * + * However, there remains the case of the pinned entry that is not + * tagged, and thus not subject to eviction via the evict tagged + * entries call -- the most important example of this is the super + * block which is pinned and may not be evicted until file close. + * + * Another example is free space manager headers -- however, these + * are a non-issue in the context of VFD SWMR readers as such files + * must only be opened R/O and thus will not have active free space + * managers. + * + * The refresh entry callback exists to address this issue. As + * indicated above, it is essential for the superblock, and desireable + * whenever it is not possible to simply evict an entry that resides + * in a modified page cache page. + * + * Functionally, the call is similar to the deserialize call, the + * primary difference being that the client receives both a pointer + * to the existing entry, and a buffer containing its image. The + * client must deserialize this image an update itself as appropriate. + * + * The typedef for the VFD SWMR refresh callback is as follows: + * + * typedef void *(*H5C_vfd_swmr_refresh_func_t)(H5F_t * f, + * void * entry_ptr, + * const void * image_ptr, + * size_t * len_ptr); + * + * The parameters of the deserialize callback are as follows: + * + * f: Pointer to the containing instance of H5F_t. + * + * entry_ptr: Pointer to the metadata cache entry that is being + * refreshed. This entry is place on the protected list + * for the duration of the refresh callback as the client + * will typically modify it during the refresh operation. + * + * image_ptr: Pointer to a buffer of length *len_ptr containing the + * most recent version of the entry's on disk image from + * the VFD SWMR metadata file. The length of the buffer + * is specified in the len parameter below. + * + * len_ptr: Pointer to size_t containing the length in + * bytes of the buffer pointed to by *image_ptr. + * + * If the supplied buffer is too small, the callback must + * place the correct value in *len_ptr and return success. + * The metadata cache will read the larger image, and call + * the refresh function again. + * + * Processing in the refresh function should proceed as follows: + * + * The target entry will be protected for the duration of the + * refresh call. This allows entry resizes if necessary, and + * prevents re-entrant refresh calls. + * + * If the supplied image contains valid data, and is of the correct + * length, the refresh function must parse it, and apply updates to + * the in core representatin of the metadata cache entry as required. + * Note that since the file is opened R/O, any updates must not + * cause the entry to be marked dirty. + * + * If the image contains valid data, but is too small, the refresh + * callback must copy the correct image length to *len_ptr, and + * return success. The metadata cache will make a second call with + * the correct image length. If the entry must change size, the + * refresh callback must call H5C_resize_entry(). + * + * If the image contains invalid data, or if, for whatever reason, + * the refresh function cannot apply its contents, the refresh + * function must return failure. + * ***************************************************************************/ /* Actions that can be reported to 'notify' client callback */ @@ -861,44 +960,59 @@ typedef enum H5C_notify_action_t { */ H5C_NOTIFY_ACTION_ENTRY_DIRTIED, /* Entry has been marked dirty. */ H5C_NOTIFY_ACTION_ENTRY_CLEANED, /* Entry has been marked clean. */ - H5C_NOTIFY_ACTION_CHILD_DIRTIED, /* Dependent child has been marked dirty. */ - H5C_NOTIFY_ACTION_CHILD_CLEANED, /* Dependent child has been marked clean. */ - H5C_NOTIFY_ACTION_CHILD_UNSERIALIZED, /* Dependent child has been marked unserialized. */ - H5C_NOTIFY_ACTION_CHILD_SERIALIZED /* Dependent child has been marked serialized. */ + H5C_NOTIFY_ACTION_CHILD_DIRTIED, /* Dependent child has been marked + * dirty. + */ + H5C_NOTIFY_ACTION_CHILD_CLEANED, /* Dependent child has been marked + * clean. + */ + H5C_NOTIFY_ACTION_CHILD_UNSERIALIZED, /* Dependent child has been marked + * unserialized. + */ + H5C_NOTIFY_ACTION_CHILD_SERIALIZED /* Dependent child has been marked + * serialized. + */ } H5C_notify_action_t; /* Cache client callback function pointers */ -typedef herr_t (*H5C_get_initial_load_size_func_t)(void *udata_ptr, size_t *image_len_ptr); +typedef herr_t (*H5C_get_initial_load_size_func_t)(void *udata_ptr, + size_t *image_len_ptr); typedef herr_t (*H5C_get_final_load_size_func_t)(const void *image_ptr, size_t image_len, void *udata_ptr, size_t *actual_len_ptr); -typedef htri_t (*H5C_verify_chksum_func_t)(const void *image_ptr, size_t len, void *udata_ptr); +typedef htri_t (*H5C_verify_chksum_func_t)(const void *image_ptr, size_t len, + void *udata_ptr); typedef void *(*H5C_deserialize_func_t)(const void *image_ptr, size_t len, void *udata_ptr, hbool_t *dirty_ptr); -typedef herr_t (*H5C_image_len_func_t)(const void *thing, size_t *image_len_ptr); +typedef herr_t (*H5C_image_len_func_t)(const void *thing, + size_t *image_len_ptr); typedef herr_t (*H5C_pre_serialize_func_t)(H5F_t *f, void *thing, haddr_t addr, - size_t len, haddr_t *new_addr_ptr, size_t *new_len_ptr, unsigned *flags_ptr); + size_t len, haddr_t *new_addr_ptr, size_t *new_len_ptr, + unsigned *flags_ptr); typedef herr_t (*H5C_serialize_func_t)(const H5F_t *f, void *image_ptr, size_t len, void *thing); typedef herr_t (*H5C_notify_func_t)(H5C_notify_action_t action, void *thing); typedef herr_t (*H5C_free_icr_func_t)(void *thing); typedef herr_t (*H5C_get_fsf_size_t)(const void * thing, hsize_t *fsf_size_ptr); +typedef herr_t (*H5C_vfd_swmr_refresh_func_t)(H5F_t * f, void * entry_ptr, + const void * image_ptr, size_t *len_ptr); /* Metadata cache client class definition */ typedef struct H5C_class_t { - int id; - const char * name; - H5FD_mem_t mem_type; - unsigned flags; + int id; + const char * name; + H5FD_mem_t mem_type; + unsigned flags; H5C_get_initial_load_size_func_t get_initial_load_size; H5C_get_final_load_size_func_t get_final_load_size; - H5C_verify_chksum_func_t verify_chksum; - H5C_deserialize_func_t deserialize; - H5C_image_len_func_t image_len; - H5C_pre_serialize_func_t pre_serialize; - H5C_serialize_func_t serialize; - H5C_notify_func_t notify; - H5C_free_icr_func_t free_icr; - H5C_get_fsf_size_t fsf_size; + H5C_verify_chksum_func_t verify_chksum; + H5C_deserialize_func_t deserialize; + H5C_image_len_func_t image_len; + H5C_pre_serialize_func_t pre_serialize; + H5C_serialize_func_t serialize; + H5C_notify_func_t notify; + H5C_free_icr_func_t free_icr; + H5C_get_fsf_size_t fsf_size; + H5C_vfd_swmr_refresh_func_t refresh; } H5C_class_t; /* Type definitions of callback functions used by the cache as a whole */ @@ -1574,6 +1688,35 @@ typedef int H5C_ring_t; * tag_info: Pointer to the common tag state for all entries belonging to * an object. NULL for untagged entries. * + * Fields supporting VFD SWMR + * + * The following fields exist to support the page index. These fields are + * only defined when the vfd_swmr_reader field in the associated instance of + * H5C_t is set to TRUE. + * + * page: Page offset of the page containing the base address of the + * metadata cache entry. + * + * refreshed_in_tick: When an entry is refreshed as part of the VFD SWMR + * reader end of tick processing, this field is used to + * record the tick in which this occured. The field is + * used primarily for sanity checking. + * + * pi_next: Next pointer used by the page index hash table that maps + * page buffer pages to any metadata cache entries that + * reside in the target page. + * + * This field points to the next entry in the doubly linked + * list of entries in the hash bin, or NULL if there is no + * next entry. + * + * pi_prev: Prev pointer used by the page index hash table that maps + * page buffer pages to any metadata cache entries that + * reside in the target page. + * + * This field points to the next entry in the doubly linked + * list of entries in the hash bin, or NULL if there is no + * next entry * * Cache entry stats collection fields: * @@ -1673,6 +1816,12 @@ typedef struct H5C_cache_entry_t { struct H5C_cache_entry_t *tl_prev; struct H5C_tag_info_t *tag_info; + /* fields supporting VFD SWMR */ + uint64_t page; + uint64_t refreshed_in_tick; + struct H5C_cache_entry_t *pi_next; + struct H5C_cache_entry_t *pi_prev; + #if H5C_COLLECT_CACHE_ENTRY_STATS /* cache entry stats fields */ int32_t accesses; @@ -2240,6 +2389,8 @@ H5_DLL void H5C_def_auto_resize_rpt_fcn(H5C_t *cache_ptr, int32_t version, size_t old_min_clean_size, size_t new_min_clean_size); H5_DLL herr_t H5C_dest(H5F_t *f); H5_DLL herr_t H5C_evict(H5F_t *f); +H5_DLL herr_t H5C_evict_or_refresh_all_entries_in_page(H5F_t * f, uint64_t page, + uint64_t tick); H5_DLL herr_t H5C_expunge_entry(H5F_t *f, const H5C_class_t *type, haddr_t addr, unsigned flags); H5_DLL herr_t H5C_flush_cache(H5F_t *f, unsigned flags); @@ -2265,24 +2416,26 @@ H5_DLL herr_t H5C_get_entry_status(const H5F_t *f, haddr_t addr, hbool_t *is_protected_ptr, hbool_t *is_pinned_ptr, hbool_t *is_corked_ptr, hbool_t *is_flush_dep_parent_ptr, hbool_t *is_flush_dep_child_ptr, hbool_t *image_up_to_date_ptr); -H5_DLL herr_t H5C_get_evictions_enabled(const H5C_t *cache_ptr, hbool_t *evictions_enabled_ptr); +H5_DLL herr_t H5C_get_evictions_enabled(const H5C_t *cache_ptr, + hbool_t *evictions_enabled_ptr); H5_DLL void * H5C_get_aux_ptr(const H5C_t *cache_ptr); H5_DLL FILE *H5C_get_trace_file_ptr(const H5C_t *cache_ptr); H5_DLL FILE *H5C_get_trace_file_ptr_from_entry(const H5C_cache_entry_t *entry_ptr); H5_DLL herr_t H5C_image_stats(H5C_t * cache_ptr, hbool_t print_header); H5_DLL herr_t H5C_insert_entry(H5F_t *f, const H5C_class_t *type, haddr_t addr, - void *thing, unsigned int flags); + void *thing, unsigned int flags); H5_DLL herr_t H5C_load_cache_image_on_next_protect(H5F_t *f, haddr_t addr, hsize_t len, hbool_t rw); H5_DLL herr_t H5C_mark_entry_dirty(void *thing); H5_DLL herr_t H5C_mark_entry_clean(void *thing); H5_DLL herr_t H5C_mark_entry_unserialized(void *thing); H5_DLL herr_t H5C_mark_entry_serialized(void *thing); -H5_DLL herr_t H5C_move_entry(H5C_t *cache_ptr, const H5C_class_t *type, +H5_DLL herr_t H5C_move_entry(H5F_t *f, const H5C_class_t *type, haddr_t old_addr, haddr_t new_addr); H5_DLL herr_t H5C_pin_protected_entry(void *thing); H5_DLL herr_t H5C_prep_for_file_close(H5F_t *f); -H5_DLL herr_t H5C_create_flush_dependency(void *parent_thing, void *child_thing); +H5_DLL herr_t H5C_create_flush_dependency(void *parent_thing, + void *child_thing); H5_DLL void * H5C_protect(H5F_t *f, const H5C_class_t *type, haddr_t addr, void *udata, unsigned flags); H5_DLL herr_t H5C_reset_cache_hit_rate_stats(H5C_t *cache_ptr); @@ -2290,14 +2443,16 @@ H5_DLL herr_t H5C_resize_entry(void *thing, size_t new_size); H5_DLL herr_t H5C_set_cache_auto_resize_config(H5C_t *cache_ptr, H5C_auto_size_ctl_t *config_ptr); H5_DLL herr_t H5C_set_cache_image_config(const H5F_t *f, H5C_t *cache_ptr, H5C_cache_image_ctl_t *config_ptr); -H5_DLL herr_t H5C_set_evictions_enabled(H5C_t *cache_ptr, hbool_t evictions_enabled); +H5_DLL herr_t H5C_set_evictions_enabled(H5C_t *cache_ptr, + hbool_t evictions_enabled); H5_DLL herr_t H5C_set_prefix(H5C_t *cache_ptr, char *prefix); H5_DLL herr_t H5C_set_trace_file_ptr(H5C_t *cache_ptr, FILE *trace_file_ptr); H5_DLL herr_t H5C_stats(H5C_t *cache_ptr, const char *cache_name, hbool_t display_detailed_stats); H5_DLL void H5C_stats__reset(H5C_t *cache_ptr); H5_DLL herr_t H5C_unpin_entry(void *thing); -H5_DLL herr_t H5C_destroy_flush_dependency(void *parent_thing, void *child_thing); +H5_DLL herr_t H5C_destroy_flush_dependency(void *parent_thing, + void *child_thing); H5_DLL herr_t H5C_unprotect(H5F_t *f, haddr_t addr, void *thing, unsigned int flags); H5_DLL herr_t H5C_validate_cache_image_config(H5C_cache_image_ctl_t * ctl_ptr); @@ -2305,16 +2460,20 @@ H5_DLL herr_t H5C_validate_resize_config(H5C_auto_size_ctl_t *config_ptr, unsigned int tests); H5_DLL herr_t H5C_ignore_tags(H5C_t *cache_ptr); H5_DLL hbool_t H5C_get_ignore_tags(const H5C_t *cache_ptr); -H5_DLL herr_t H5C_retag_entries(H5C_t * cache_ptr, haddr_t src_tag, haddr_t dest_tag); -H5_DLL herr_t H5C_cork(H5C_t *cache_ptr, haddr_t obj_addr, unsigned action, hbool_t *corked); -H5_DLL herr_t H5C_get_entry_ring(const H5F_t *f, haddr_t addr, H5C_ring_t *ring); +H5_DLL herr_t H5C_retag_entries(H5C_t * cache_ptr, haddr_t src_tag, + haddr_t dest_tag); +H5_DLL herr_t H5C_cork(H5C_t *cache_ptr, haddr_t obj_addr, unsigned action, + hbool_t *corked); +H5_DLL herr_t H5C_get_entry_ring(const H5F_t *f, haddr_t addr, + H5C_ring_t *ring); H5_DLL herr_t H5C_unsettle_entry_ring(void *thing); H5_DLL herr_t H5C_unsettle_ring(H5F_t * f, H5C_ring_t ring); H5_DLL herr_t H5C_remove_entry(void *thing); H5_DLL herr_t H5C_cache_image_status(H5F_t * f, hbool_t *load_ci_ptr, hbool_t *write_ci_ptr); H5_DLL hbool_t H5C_cache_image_pending(const H5C_t *cache_ptr); -H5_DLL herr_t H5C_get_mdc_image_info(H5C_t *cache_ptr, haddr_t *image_addr, hsize_t *image_len); +H5_DLL herr_t H5C_get_mdc_image_info(H5C_t *cache_ptr, haddr_t *image_addr, + hsize_t *image_len); #ifdef H5_HAVE_PARALLEL H5_DLL herr_t H5C_apply_candidate_list(H5F_t *f, H5C_t *cache_ptr, diff --git a/src/H5EAcache.c b/src/H5EAcache.c index 8138991..d81d1b3 100644 --- a/src/H5EAcache.c +++ b/src/H5EAcache.c @@ -144,6 +144,7 @@ const H5AC_class_t H5AC_EARRAY_HDR[1] = {{ H5EA__cache_hdr_notify, /* 'notify' callback */ H5EA__cache_hdr_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5EA index block inherits cache-like properties from H5AC */ @@ -162,6 +163,7 @@ const H5AC_class_t H5AC_EARRAY_IBLOCK[1] = {{ H5EA__cache_iblock_notify, /* 'notify' callback */ H5EA__cache_iblock_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5EA super block inherits cache-like properties from H5AC */ @@ -180,6 +182,7 @@ const H5AC_class_t H5AC_EARRAY_SBLOCK[1] = {{ H5EA__cache_sblock_notify, /* 'notify' callback */ H5EA__cache_sblock_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5EA data block inherits cache-like properties from H5AC */ @@ -198,6 +201,7 @@ const H5AC_class_t H5AC_EARRAY_DBLOCK[1] = {{ H5EA__cache_dblock_notify, /* 'notify' callback */ H5EA__cache_dblock_free_icr, /* 'free_icr' callback */ H5EA__cache_dblock_fsf_size, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5EA data block page inherits cache-like properties from H5AC */ @@ -216,6 +220,7 @@ const H5AC_class_t H5AC_EARRAY_DBLK_PAGE[1] = {{ H5EA__cache_dblk_page_notify, /* 'notify' callback */ H5EA__cache_dblk_page_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5FAcache.c b/src/H5FAcache.c index 1f199e9..c0ecca2 100644 --- a/src/H5FAcache.c +++ b/src/H5FAcache.c @@ -121,6 +121,7 @@ const H5AC_class_t H5AC_FARRAY_HDR[1] = {{ H5FA__cache_hdr_notify, /* 'notify' callback */ H5FA__cache_hdr_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5FA data block inherits cache-like properties from H5AC */ @@ -139,6 +140,7 @@ const H5AC_class_t H5AC_FARRAY_DBLOCK[1] = {{ H5FA__cache_dblock_notify, /* 'notify' callback */ H5FA__cache_dblock_free_icr, /* 'free_icr' callback */ H5FA__cache_dblock_fsf_size, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5FA data block page inherits cache-like properties from H5AC */ @@ -157,6 +159,7 @@ const H5AC_class_t H5AC_FARRAY_DBLK_PAGE[1] = {{ H5FA__cache_dblk_page_notify, /* 'notify' callback */ H5FA__cache_dblk_page_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5FDvfd_swmr.c b/src/H5FDvfd_swmr.c index 4846f5a..6bf0252 100644 --- a/src/H5FDvfd_swmr.c +++ b/src/H5FDvfd_swmr.c @@ -769,92 +769,149 @@ done: * Function: H5FD__vfd_swmr_load_hdr_and_idx() * * Purpose: Load and decode the header and index in the metadata file + * * Try to load and decode the header: + * * --If fail, RETRY + * * --If succeed: - * --If the size of header and index does not fit within md_pages_reserved, return error + * + * --If the size of header and index does not fit within + * md_pages_reserved, return error + * * --If NOT an initial open call: - * --If tick_num just read is the same as the VFD's local copy, just return - * --If tick_num just read is less than the VFD's local copy, return error - * --If tick_num just read is greater than the VFD's local copy or an initial open call: + * + * --If tick_num just read is the same as the VFD's + * local copy, just return + * + * --If tick_num just read is less than the VFD's + * local copy, return error + * + * --If tick_num just read is greater than the VFD's + * local copy or an initial open call: + * * --Try to load and decode the index: + * * --If fail, RETRY + * * --If succeed: - * --If tick_num in header matches that in index, replace the VFD's - * local copy with the header and index just read - * --If tick_num in header is 1 greater than that in index, RETRY + * + * --If tick_num in header matches that in + * index, replace the VFD's local copy with + * the header and index just read + * + * --If tick_num in header is 1 greater than + * that in index, RETRY + * * --Otherwise, return error * * Return: Success: SUCCEED * Failure: FAIL * + * Programmer: Vailin Choi + * *------------------------------------------------------------------------- */ static herr_t H5FD__vfd_swmr_load_hdr_and_idx(H5FD_t *_file, hbool_t open) { - H5FD_vfd_swmr_t *file = (H5FD_vfd_swmr_t *)_file; /* VFD SWMR file struct */ - unsigned load_retries = H5FD_VFD_SWMR_MD_LOAD_RETRY_MAX; /* Retries for loading header and index */ - uint64_t nanosec = 1; /* # of nanoseconds to sleep between retries */ - H5FD_vfd_swmr_md_header md_header; /* Metadata file header */ - H5FD_vfd_swmr_md_index md_index; /* Metadata file index */ - herr_t ret_value = SUCCEED; /* Return value */ + H5FD_vfd_swmr_t *file = /* VFD SWMR file struct */ + (H5FD_vfd_swmr_t *)_file; + unsigned load_retries = /* Retries for loading header */ + H5FD_VFD_SWMR_MD_LOAD_RETRY_MAX; /* and index */ + uint64_t nanosec = 1; /* # of nanoseconds to sleep */ + /* between retries */ + H5FD_vfd_swmr_md_header md_header; /* Metadata file header */ + H5FD_vfd_swmr_md_index md_index; /* Metadata file index */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC + do { HDmemset(&md_header, 0, sizeof(H5FD_vfd_swmr_md_header)); HDmemset(&md_index, 0, sizeof(H5FD_vfd_swmr_md_index)); /* Load and decode the header */ + if(H5FD__vfd_swmr_header_deserialize(_file, &md_header) >= 0) { /* Error if header + index does not fit within md_pages_reserved */ if((H5FD_MD_HEADER_SIZE + md_header.index_length) > - (uint64_t)((hsize_t)file->md_pages_reserved * md_header.fs_page_size)) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "header + index does not fit within md_pages_reserved") + (uint64_t)((hsize_t)file->md_pages_reserved * + md_header.fs_page_size)) + + HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, \ + "header + index does not fit within md_pages_reserved") if(!open) { - if(md_header.tick_num == file->md_header.tick_num) + + if(md_header.tick_num == file->md_header.tick_num) { + break; - else if(md_header.tick_num < file->md_header.tick_num) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "tick number read is less than local copy") + + } else if(md_header.tick_num < file->md_header.tick_num) + + HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, \ + "tick number read is less than local copy") } HDassert(md_header.tick_num > file->md_header.tick_num || open); /* Load and decode the index */ - if(H5FD__vfd_swmr_index_deserialize(_file, &md_index, &md_header) >= 0) { + if(H5FD__vfd_swmr_index_deserialize(_file, &md_index, + &md_header) >= 0) { /* tick_num is the same in both header and index */ if(md_header.tick_num == md_index.tick_num) { + /* Copy header to VFD local copy */ - HDmemcpy(&file->md_header, &md_header, sizeof(H5FD_vfd_swmr_md_header)); + HDmemcpy(&file->md_header, &md_header, + sizeof(H5FD_vfd_swmr_md_header)); /* Free VFD local entries */ if(file->md_index.entries) { + HDassert(file->md_index.num_entries); - file->md_index.entries = (H5FD_vfd_swmr_idx_entry_t *)H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, file->md_index.entries); + + file->md_index.entries = (H5FD_vfd_swmr_idx_entry_t *) + H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, + file->md_index.entries); } /* Copy index info to VFD local copy */ file->md_index.tick_num = md_index.tick_num; file->md_index.num_entries = md_index.num_entries; + /* Allocate and copy index entries */ if(md_index.num_entries) { - if(NULL == (file->md_index.entries = H5FL_SEQ_MALLOC(H5FD_vfd_swmr_idx_entry_t, md_index.num_entries))) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "memory allocation failed for index entries") - HDmemcpy(file->md_index.entries, md_index.entries, md_index.num_entries * sizeof(H5FD_vfd_swmr_idx_entry_t)); + if(NULL == (file->md_index.entries = + H5FL_SEQ_MALLOC(H5FD_vfd_swmr_idx_entry_t, + md_index.num_entries))) + + HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for index entries") + + HDmemcpy(file->md_index.entries, md_index.entries, + md_index.num_entries * + sizeof(H5FD_vfd_swmr_idx_entry_t)); } break; } - /* Error when tick_num in header is more than one greater that in the index */ - else if(md_header.tick_num > (md_index.tick_num + 1)) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "tick number mis-match in header and index") + /* Error when tick_num in header is more than one greater + * that in the index + */ + else if (md_header.tick_num > (md_index.tick_num + 1)) + + HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, \ + "tick number mis-match in header and index") if(md_index.entries) { + HDassert(md_index.num_entries); - md_index.entries = (H5FD_vfd_swmr_idx_entry_t *)H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, md_index.entries); + md_index.entries = (H5FD_vfd_swmr_idx_entry_t *) + H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, + md_index.entries); } } /* end if index ok */ @@ -866,18 +923,27 @@ H5FD__vfd_swmr_load_hdr_and_idx(H5FD_t *_file, hbool_t open) } while(--load_retries); - /* Exhaust all retries for loading and decoding the md file header and index */ + /* Exhaust all retries for loading and decoding the md file header + * and index + */ if(load_retries == 0) - HGOTO_ERROR(H5E_VFL, H5E_CANTLOAD, FAIL, "error in loading/decoding the metadata file header and index") + + HGOTO_ERROR(H5E_VFL, H5E_CANTLOAD, FAIL, \ + "error in loading/decoding the metadata file header and index") done: + /* Free index entries obtained from H5FD__vfd_swmr_index_deserialize() */ if(md_index.entries) { + HDassert(md_index.num_entries); - md_index.entries = (H5FD_vfd_swmr_idx_entry_t *)H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, md_index.entries); + md_index.entries = (H5FD_vfd_swmr_idx_entry_t *) + H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, + md_index.entries); } FUNC_LEAVE_NOAPI(ret_value) + } /* H5FD__vfd_swmr_load_hdr_and_idx() */ @@ -892,19 +958,27 @@ done: * Return: Success: SUCCEED * Failure: FAIL * + * Programmer: Vailin Choi + * *------------------------------------------------------------------------- */ static herr_t -H5FD__vfd_swmr_header_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_header *md_header) +H5FD__vfd_swmr_header_deserialize(H5FD_t *_file, + H5FD_vfd_swmr_md_header *md_header) { - H5FD_vfd_swmr_t *file = (H5FD_vfd_swmr_t *)_file; /* VFD SWMR file struct */ + H5FD_vfd_swmr_t *file = /* VFD SWMR file struct */ + (H5FD_vfd_swmr_t *)_file; struct stat stat_buf; /* Buffer for stat info */ uint8_t image[H5FD_MD_HEADER_SIZE]; /* Buffer for element data */ uint32_t stored_chksum; /* Stored metadata checksum value */ - uint32_t computed_chksum; /* Computed metadata checksum value */ - uint64_t nanosec = 1; /* # of nanoseconds to sleep between retries */ - unsigned file_retries = H5FD_VFD_SWMR_MD_FILE_RETRY_MAX; /* Retries for 'stat' the file */ - unsigned header_retries = H5FD_VFD_SWMR_MD_HEADER_RETRY_MAX; /* Retries for loading header */ + uint32_t computed_chksum; /* Computed metadata checksum */ + /* value */ + uint64_t nanosec = 1; /* # of nanoseconds to sleep */ + /* between retries */ + unsigned file_retries = /* Retries for 'stat' the file */ + H5FD_VFD_SWMR_MD_FILE_RETRY_MAX; + unsigned header_retries = /* Retries for loading header */ + H5FD_VFD_SWMR_MD_HEADER_RETRY_MAX; uint8_t *p = NULL; /* Pointer to buffer */ herr_t ret_value = SUCCEED; /* Return value */ @@ -914,7 +988,9 @@ H5FD__vfd_swmr_header_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_header *md_hea do { /* Retrieve the metadata file size */ if(HDfstat(file->md_fd, &stat_buf)) - HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "unable to fstat the md file") + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \ + "unable to fstat the md file") /* Verify file size is at least header size */ if(stat_buf.st_size >= H5FD_MD_HEADER_SIZE) @@ -927,23 +1003,33 @@ H5FD__vfd_swmr_header_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_header *md_hea /* Exhaust all retries for "stat" the md file */ if(file_retries == 0) - HGOTO_ERROR(H5E_VFL, H5E_OPENERROR, FAIL, "unable to the metadata file after all retry attempts") + + HGOTO_ERROR(H5E_VFL, H5E_OPENERROR, FAIL, \ + "unable to the metadata file after all retry attempts") /* Try to get valid magic and checksum for header */ p = image; do { /* Set file pointer to the beginning the file */ if(HDlseek(file->md_fd, (HDoff_t)H5FD_MD_HEADER_OFF, SEEK_SET) < 0) - HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, "unable to seek in metadata file") + + HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, \ + "unable to seek in metadata file") + /* Read the header */ - if(HDread(file->md_fd, image, H5FD_MD_HEADER_SIZE) < H5FD_MD_HEADER_SIZE) - HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "error in reading the header in metadata file") + if(HDread(file->md_fd, image, H5FD_MD_HEADER_SIZE) < + H5FD_MD_HEADER_SIZE) + + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, \ + "error in reading the header in metadata file") /* Verify magic number */ if(HDmemcmp(p, H5FD_MD_HEADER_MAGIC, (size_t)H5_SIZEOF_MAGIC) == 0) { /* Verify stored and computed checksums are equal */ - H5F_get_checksums(image, H5FD_MD_HEADER_SIZE, &stored_chksum, &computed_chksum); + H5F_get_checksums(image, H5FD_MD_HEADER_SIZE, &stored_chksum, + &computed_chksum); + if(stored_chksum == computed_chksum) break; } @@ -954,7 +1040,9 @@ H5FD__vfd_swmr_header_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_header *md_hea /* Exhaust all retries for loading the header */ if(header_retries == 0) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "incorrect checksum after after all read attempts") + + HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, \ + "incorrect checksum after after all read attempts") /* Header magic is already valid */ p += H5_SIZEOF_MAGIC; @@ -972,7 +1060,9 @@ H5FD__vfd_swmr_header_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_header *md_hea HDassert((size_t)(p - (const uint8_t *)&image[0]) <= H5FD_MD_HEADER_SIZE); done: + FUNC_LEAVE_NOAPI(ret_value) + } /* H5FD__vfd_swmr_header_deserialize() */ @@ -991,10 +1081,13 @@ done: * Return: Success: SUCCEED * Failure: FAIL * + * Programmer: Vailin Choi + * *------------------------------------------------------------------------- */ static herr_t -H5FD__vfd_swmr_index_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_index *md_index, H5FD_vfd_swmr_md_header *md_header) +H5FD__vfd_swmr_index_deserialize(H5FD_t *_file, + H5FD_vfd_swmr_md_index *md_index, H5FD_vfd_swmr_md_header *md_header) { H5FD_vfd_swmr_t *file = (H5FD_vfd_swmr_t *)_file; /* VFD SWMR file struct */ uint8_t *image; /* Buffer */ @@ -1002,10 +1095,13 @@ H5FD__vfd_swmr_index_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_index *md_index struct stat stat_buf; /* Buffer for stat info */ uint32_t stored_chksum; /* Stored metadata checksum value */ uint32_t computed_chksum; /* Computed metadata checksum value */ - uint64_t nanosec = 1; /* # of nanoseconds to sleep between retries */ + uint64_t nanosec = 1; /* # of nanoseconds to sleep between */ + /* retries */ unsigned i; /* Local index variable */ - unsigned file_retries = H5FD_VFD_SWMR_MD_FILE_RETRY_MAX; /* Retries for 'stat' the file */ - unsigned index_retries = H5FD_VFD_SWMR_MD_INDEX_RETRY_MAX; /* Retries for loading the index */ + unsigned file_retries = /* Retries for 'stat' the file */ + H5FD_VFD_SWMR_MD_FILE_RETRY_MAX; + unsigned index_retries = /* Retries for loading the index */ + H5FD_VFD_SWMR_MD_INDEX_RETRY_MAX; herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC @@ -1014,10 +1110,13 @@ H5FD__vfd_swmr_index_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_index *md_index do { /* Retrieve the metadata file size */ if(HDfstat(file->md_fd, &stat_buf)) - HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "unable to fstat the md file") + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \ + "unable to fstat the md file") /* Verify file size is at least header size */ - if((uint64_t)stat_buf.st_size >= (H5FD_MD_HEADER_SIZE + md_header->index_length)) + if((uint64_t)stat_buf.st_size >= + (H5FD_MD_HEADER_SIZE + md_header->index_length)) break; /* Sleep and double the sleep time next time */ @@ -1027,21 +1126,31 @@ H5FD__vfd_swmr_index_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_index *md_index /* Allocate buffer for reading index */ if(NULL == (image = (uint8_t *)H5MM_malloc(md_header->index_length))) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "memory allocation failed for index's on disk image buffer") + + HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for index's on disk image buffer") /* Verify magic and checksum for index */ p = image; do { if(HDlseek(file->md_fd, (HDoff_t)md_header->index_offset, SEEK_SET) < 0) - HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, "unable to seek in metadata file") - if(HDread(file->md_fd, image, md_header->index_length) < (int64_t)md_header->index_length) - HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "error in reading the header in metadata file") + + HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, \ + "unable to seek in metadata file") + + if(HDread(file->md_fd, image, md_header->index_length) < + (int64_t)md_header->index_length) + + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, \ + "error in reading the header in metadata file") /* Verify valid magic for index */ if(HDmemcmp(p, H5FD_MD_INDEX_MAGIC, (size_t)H5_SIZEOF_MAGIC) == 0) { /* Verify stored and computed checksums are equal */ - H5F_get_checksums(image, md_header->index_length, &stored_chksum, &computed_chksum); + H5F_get_checksums(image, md_header->index_length, &stored_chksum, + &computed_chksum); + if(stored_chksum == computed_chksum) break; } @@ -1052,20 +1161,29 @@ H5FD__vfd_swmr_index_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_index *md_index /* Exhaust all retries for loading the index */ if(index_retries == 0) - HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "incorrect checksum after after all read attempts") + + HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, \ + "incorrect checksum after after all read attempts") /* Magic is already valid */ p += H5_SIZEOF_MAGIC; - /* Deserialize the index info: tick number, number of entries, entries, checksum */ + /* Deserialize the index info: tick number, number of entries, entries, + * checksum + */ UINT64DECODE(p, md_index->tick_num); UINT32DECODE(p, md_index->num_entries); /* Read index entries */ if(md_index->num_entries) { + /* Allocate memory for index entries */ - if(NULL == (md_index->entries = H5FL_SEQ_MALLOC(H5FD_vfd_swmr_idx_entry_t, md_index->num_entries))) - HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, "memory allocation failed for index entries") + if(NULL == (md_index->entries = + H5FL_SEQ_MALLOC(H5FD_vfd_swmr_idx_entry_t, + md_index->num_entries))) + + HGOTO_ERROR(H5E_VFL, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for index entries") /* Decode index entries */ for(i = 0; i < md_index->num_entries; i++) { @@ -1084,47 +1202,66 @@ H5FD__vfd_swmr_index_deserialize(H5FD_t *_file, H5FD_vfd_swmr_md_index *md_index HDassert((size_t)(p - image) <= md_header->index_length); done: - if(image) + if(image) { + image = (uint8_t *)H5MM_xfree(image); + } + if(ret_value < 0) { + if(md_index->entries) { + HDassert(md_index->num_entries); - md_index->entries = (H5FD_vfd_swmr_idx_entry_t *)H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, md_index->entries); + + md_index->entries = (H5FD_vfd_swmr_idx_entry_t *) + H5FL_SEQ_FREE(H5FD_vfd_swmr_idx_entry_t, md_index->entries); } } FUNC_LEAVE_NOAPI(ret_value) + } /* H5FD__vfd_swmr_index_deserialize() */ /*------------------------------------------------------------------------- * Function: H5FD_vfd_swmr_get_tick_and_idx() * - * Purpose: Retrieve tick_num, num_entries and index from the metadata file - * --If the parameter "reload_hdr_and_index" is true, load and decode - * the header and index via H5FD__vfd_swmr_load_hdr_and_idx(), which - * may replace the VFD's local copies of header and index with the + * Purpose: Retrieve tick_num, num_entries and index from the metadata + * file + * + * --If the parameter "reload_hdr_and_index" is true, load and + * decode the header and index via + * H5FD__vfd_swmr_load_hdr_and_idx(), which may replace the + * VFD's local copies of header and index with the * latest info read. - * --Return tick_num, num_entries and index from the VFD's local copies. + * + * --Return tick_num, num_entries and index from the VFD's + * local copies. * * Return: Success: SUCCEED * Failure: FAIL * + * Programmer: Vailin Choi + * *------------------------------------------------------------------------- */ herr_t H5FD_vfd_swmr_get_tick_and_idx(H5FD_t *_file, hbool_t reload_hdr_and_index, - uint64_t *tick_ptr, uint32_t *num_entries_ptr, H5FD_vfd_swmr_idx_entry_t index[]) + uint64_t *tick_ptr, uint32_t *num_entries_ptr, + H5FD_vfd_swmr_idx_entry_t index[]) { - H5FD_vfd_swmr_t *file = (H5FD_vfd_swmr_t *)_file; /* VFD SWMR file struct */ - herr_t ret_value = SUCCEED; /* Return value */ + H5FD_vfd_swmr_t *file = (H5FD_vfd_swmr_t *)_file; /* VFD SWMR file struct */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) /* Load and decode the header and index as indicated */ if(reload_hdr_and_index) { + if(H5FD__vfd_swmr_load_hdr_and_idx(_file, FALSE) < 0) - HGOTO_ERROR(H5E_VFL, H5E_CANTLOAD, FAIL, "unable to load/decode md header and index") + + HGOTO_ERROR(H5E_VFL, H5E_CANTLOAD, FAIL, \ + "unable to load/decode md header and index") } /* Return tick_num */ @@ -1132,16 +1269,23 @@ H5FD_vfd_swmr_get_tick_and_idx(H5FD_t *_file, hbool_t reload_hdr_and_index, *tick_ptr = file->md_header.tick_num; if(num_entries_ptr != NULL) { + if(*num_entries_ptr >= file->md_index.num_entries && index != NULL) { + HDassert(*num_entries_ptr); - HDmemcpy(index, file->md_index.entries, (file->md_index.num_entries * sizeof(H5FD_vfd_swmr_idx_entry_t))); + + HDmemcpy(index, file->md_index.entries, + (file->md_index.num_entries * + sizeof(H5FD_vfd_swmr_idx_entry_t))); } *num_entries_ptr = file->md_index.num_entries; } done: + FUNC_LEAVE_NOAPI(ret_value) + } /* H5FD_vfd_swmr_get_tick_and_idx() */ diff --git a/src/H5FScache.c b/src/H5FScache.c index ac0874e..b40ac79 100644 --- a/src/H5FScache.c +++ b/src/H5FScache.c @@ -122,6 +122,7 @@ const H5AC_class_t H5AC_FSPACE_HDR[1] = {{ H5FS__cache_hdr_notify, /* 'notify' callback */ H5FS__cache_hdr_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5FS section info inherits cache-like properties from H5AC */ @@ -140,6 +141,7 @@ const H5AC_class_t H5AC_FSPACE_SINFO[1] = {{ H5FS__cache_sinfo_notify, /* 'notify' callback */ H5FS__cache_sinfo_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5Fint.c b/src/H5Fint.c index 5b5dd16..d56406e 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -1107,18 +1107,21 @@ H5F__new(H5F_file_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get VFD SWMR config info") /* Initialization for VFD SWMR */ - f->shared->vfd_swmr = FALSE; - f->shared->vfd_swmr_writer = FALSE; - f->shared->tick_num = 0; - f->shared->mdf_idx = NULL; - f->shared->mdf_idx_len = 0; - f->shared->mdf_idx_entries_used = 0; - - f->shared->vfd_swmr_md_fd = -1; - f->shared->fs_man_md = NULL; - f->shared->dl_head_ptr = NULL; - f->shared->dl_tail_ptr = NULL; - f->shared->dl_len = 0; + f->shared->vfd_swmr = FALSE; + f->shared->vfd_swmr_writer = FALSE; + f->shared->tick_num = 0; + f->shared->mdf_idx = NULL; + f->shared->mdf_idx_len = 0; + f->shared->mdf_idx_entries_used = 0; + f->shared->old_mdf_idx = NULL; + f->shared->old_mdf_idx_len = 0; + f->shared->old_mdf_idx_entries_used = 0; + + f->shared->vfd_swmr_md_fd = -1; + f->shared->fs_man_md = NULL; + f->shared->dl_head_ptr = NULL; + f->shared->dl_tail_ptr = NULL; + f->shared->dl_len = 0; /* Create a metadata cache with the specified number of elements. * The cache might be created with a different number of elements and @@ -4323,6 +4326,7 @@ H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries, /* Allocate space for the entry in the metadata file */ if((md_addr = H5MV_alloc(f, index[i].length)) == HADDR_UNDEF) + HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \ "error in allocating space from the metadata file") @@ -4771,6 +4775,9 @@ H5F_vfd_swmr_writer_end_of_tick(void) if ( ( f->shared->tick_num == 1 ) && ( H5F__vfd_swmr_writer__create_index(f) < 0 ) ) + HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, FAIL, \ + "unable to allocate metadata file index") + /* 4) Scan the page buffer tick list, and use it to update * the metadata file index, adding or modifying entries as @@ -4940,36 +4947,283 @@ done: /*------------------------------------------------------------------------- * Function: H5F_vfd_swmr_reader_end_of_tick * - * Purpose: Dummy right now + * Purpose: Main routine for VFD SWMR reader end of tick operations. + * The following operations must be performed: + * + * 1) Direct the VFD SWMR reader VFD to load the current header + * from the metadata file, and report the current tick. + * + * If the tick reported has not increased since the last + * call, do nothing and exit. + * + * 2) If the tick has increased, obtain a copy of the new + * index from the VFD SWMR reader VFD, and compare it with + * the old index to identify all pages that have been updated + * in the previous tick. + * + * If any such pages or multi-page metadata entries are found: + * + * a) direct the page buffer to evict any such superceeded + * pages, and + * + * b) direct the metadata cache to either evict or refresh + * any entries residing in the superceeded pages. + * + * Note that this operation MUST be performed in this order, + * as the metadata cache will refer to the page buffer + * when refreshing entries. + * + * 9) Increment the tick, and update the end of tick. * * Return: SUCCEED/FAIL * + * Programmer: John Mainzer 12/29/18 + * + * Changes: None. + * *------------------------------------------------------------------------- */ herr_t H5F_vfd_swmr_reader_end_of_tick(void) { + int pass = 0; uint64_t tmp_tick_num = 0; + H5F_t * f; + H5FD_vfd_swmr_idx_entry_t * tmp_mdf_idx; + int32_t tmp_mdf_idx_len; + int32_t tmp_mdf_idx_entries_used; + uint32_t mdf_idx_entries_used; + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) - /* construct */ - if(vfd_swmr_file_g) { - HDassert(vfd_swmr_file_g->shared); - HDassert(vfd_swmr_file_g->shared->lf); + f = vfd_swmr_file_g; + + HDassert(f); + HDassert(f->shared); + HDassert(f->shared->pb_ptr); + HDassert(f->shared->vfd_swmr); + HDassert(!f->shared->vfd_swmr_writer); + HDassert(f->shared->lf); + + /* 1) Direct the VFD SWMR reader VFD to load the current header + * from the metadata file, and report the current tick. + * + * If the tick reported has not increased since the last + * call, do nothing and exit. + */ + if ( H5FD_vfd_swmr_get_tick_and_idx(f->shared->lf, TRUE, &tmp_tick_num, + NULL, NULL) < 0 ) - if(H5FD_vfd_swmr_get_tick_and_idx(vfd_swmr_file_g->shared->lf, TRUE, &tmp_tick_num, NULL, NULL) < 0) - HGOTO_ERROR(H5E_ARGS, H5E_CANTGET, FAIL, "error in retrieving tick_num from driver") - if(tmp_tick_num != tick_num_g) { - vfd_swmr_file_g->shared->tick_num = tick_num_g = tmp_tick_num; + HGOTO_ERROR(H5E_ARGS, H5E_CANTGET, FAIL, \ + "error in retrieving tick_num from driver") - /* Update end_of_tick */ - if(H5F__vfd_swmr_update_end_of_tick_and_tick_num(vfd_swmr_file_g, FALSE) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "unable to update end of tick") - } - } + if ( tmp_tick_num != tick_num_g ) { + + /* swap the old and new metadata file indexes */ + + tmp_mdf_idx = f->shared->old_mdf_idx; + tmp_mdf_idx_len = f->shared->old_mdf_idx_len; + tmp_mdf_idx_entries_used = f->shared->old_mdf_idx_entries_used; + + f->shared->old_mdf_idx = f->shared->mdf_idx; + f->shared->old_mdf_idx_len = f->shared->mdf_idx_len; + f->shared->old_mdf_idx_entries_used = f->shared->mdf_idx_entries_used; + + f->shared->mdf_idx = tmp_mdf_idx; + f->shared->mdf_idx_len = tmp_mdf_idx_len; + f->shared->mdf_idx_entries_used = tmp_mdf_idx_entries_used; + + /* if f->shared->mdf_idx is NULL, allocate an index */ + if ( H5F__vfd_swmr_writer__create_index(f) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, FAIL, \ + "unable to allocate metadata file index") + + + mdf_idx_entries_used = (uint32_t)(f->shared->mdf_idx_len); + if ( H5FD_vfd_swmr_get_tick_and_idx(f->shared->lf, FALSE, NULL, + &mdf_idx_entries_used, + f->shared->mdf_idx) < 0 ) + + HGOTO_ERROR(H5E_ARGS, H5E_CANTGET, FAIL, \ + "error in retrieving tick_num from driver") + + HDassert(tmp_mdf_idx_entries_used <= f->shared->mdf_idx_len); + + f->shared->mdf_idx_entries_used = tmp_mdf_idx_entries_used; + + /* if an old metadata file index exists, compare it with the + * new index and evict any modified, new, or deleted pages + * and any associated metadata cache entries. + * + * Note that we must do this in two passes -- page buffer first, + * and then metadata cache. This is necessary as the metadata + * cache may attempt to refresh entries rather than evict them, + * in which case it may access an entry in the page buffer. + */ + pass = 0; + while ( pass <= 1 ) { + + haddr_t page_addr; + int32_t i = 0; + int32_t j = 0; + H5FD_vfd_swmr_idx_entry_t * new_mdf_idx; + H5FD_vfd_swmr_idx_entry_t * old_mdf_idx; + int32_t new_mdf_idx_entries_used; + int32_t old_mdf_idx_entries_used; + + new_mdf_idx = f->shared->mdf_idx; + new_mdf_idx_entries_used = f->shared->mdf_idx_entries_used; + + old_mdf_idx = f->shared->old_mdf_idx; + old_mdf_idx_entries_used = f->shared->old_mdf_idx_entries_used; + + while ( ( i < old_mdf_idx_entries_used ) && + ( j < new_mdf_idx_entries_used ) ) { + + if ( old_mdf_idx[i].hdf5_page_offset == + new_mdf_idx[j].hdf5_page_offset ) { + + if ( old_mdf_idx[i].md_file_page_offset != + new_mdf_idx[j].md_file_page_offset ) { + + /* the page has been altered -- evict it and + * any contained metadata cache entries. + */ + if ( pass == 0 ) { + + page_addr = (haddr_t) + (new_mdf_idx[j].hdf5_page_offset * + f->shared->pb_ptr->page_size); + + if ( H5PB_remove_entry(f, page_addr) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTFLUSH, FAIL, \ + "remove page buffer entry failed") + } else { + + if ( H5C_evict_or_refresh_all_entries_in_page(f, + new_mdf_idx[j].hdf5_page_offset, + tmp_tick_num) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTFLUSH, FAIL, \ + "evict or refresh stale MDC entries failed") + } + } + i++; + j++; + + } else if ( old_mdf_idx[i].hdf5_page_offset < + new_mdf_idx[j].hdf5_page_offset ) { + + /* the page has been removed from the new version + * of the index. Evict it and any contained metadata + * cache entries. + * + * If we are careful about removing entries from the + * the index so as to ensure that they haven't changed + * for several ticks, we can probably omit this. However, + * lets not worry about this for the first cut. + */ + if ( pass == 0 ) { + + page_addr = (haddr_t)(old_mdf_idx[i].hdf5_page_offset * + f->shared->pb_ptr->page_size); + + if ( H5PB_remove_entry(f, page_addr) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTFLUSH, FAIL, \ + "remove page buffer entry failed") + } else { + + if ( H5C_evict_or_refresh_all_entries_in_page(f, + old_mdf_idx[i].hdf5_page_offset, + tmp_tick_num) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTFLUSH, FAIL, \ + "evict or refresh stale MDC entries failed") + } + + i++; + + } else { /* ( old_mdf_idx[i].hdf5_page_offset > */ + /* new_mdf_idx[j].hdf5_page_offset ) */ + + /* the page has been added to the index. No action + * is required. + */ + j++; + + } + + /* sanity checks to verify that the old and new indicies + * are sorted as expected. + */ + HDassert( ( i == 0 ) || + ( i >= old_mdf_idx_entries_used ) || + ( old_mdf_idx[i - 1].hdf5_page_offset < + old_mdf_idx[i].hdf5_page_offset ) ); + + HDassert( ( j == 0 ) || + ( j >= new_mdf_idx_entries_used ) || + ( new_mdf_idx[j - 1].hdf5_page_offset < + new_mdf_idx[j].hdf5_page_offset ) ); + + } + + /* cleanup any left overs in the old index */ + while ( i < old_mdf_idx_entries_used ) { + + /* the page has been removed from the new version of the + * index. Evict it from the page buffer and also evict any + * contained metadata cache entries + */ + if ( pass == 0 ) { + + page_addr = (haddr_t)(old_mdf_idx[i].hdf5_page_offset * + f->shared->pb_ptr->page_size); + + if ( H5PB_remove_entry(f, page_addr) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTFLUSH, FAIL, \ + "remove page buffer entry failed") + } else { + + if ( H5C_evict_or_refresh_all_entries_in_page(f, + old_mdf_idx[i].hdf5_page_offset, + tmp_tick_num) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTFLUSH, FAIL, \ + "evict or refresh stale MDC entries failed") + } + + i++; + } + + pass++; + + } /* while ( pass <= 1 ) */ + + /* At this point, we should have evicted or refreshed all stale + * page buffer and metadata cache entries. + * + * Start the next tick. + */ + vfd_swmr_file_g->shared->tick_num = tick_num_g = tmp_tick_num; + + /* Update end_of_tick */ + if ( H5F__vfd_swmr_update_end_of_tick_and_tick_num(vfd_swmr_file_g, + FALSE) < 0 ) + + HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, \ + "unable to update end of tick") + + } /* if ( tmp_tick_num != tick_num_g ) */ done: + FUNC_LEAVE_NOAPI(ret_value) + } /* end H5F_vfd_swmr_reader_end_of_tick() */ diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index 55aae9e..357ac07 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -430,6 +430,16 @@ struct H5F_file_t { * through mdf_idx_entries_used - 1. */ + /* Old VFD SWMMR metadata file index. These fields are used only + * by the VFD SWMR reader to store the previous version of the + * metadata file index so that it can be compared with the current + * versoin to identify page buffer and metadata cache entries that + * must be evicted or refreshed to avoid message from the past bugs. + */ + H5FD_vfd_swmr_idx_entry_t * old_mdf_idx; + int32_t old_mdf_idx_len; + int32_t old_mdf_idx_entries_used; + /* Metadata file for VFD SWMR writer */ int vfd_swmr_md_fd; /* POSIX: file descriptor for the * metadata file diff --git a/src/H5Fsuper_cache.c b/src/H5Fsuper_cache.c index 361f8a1..65cef15 100644 --- a/src/H5Fsuper_cache.c +++ b/src/H5Fsuper_cache.c @@ -84,6 +84,8 @@ static herr_t H5F__cache_drvrinfo_image_len(const void *thing, size_t *image_len static herr_t H5F__cache_drvrinfo_serialize(const H5F_t *f, void *image, size_t len, void *thing); static herr_t H5F__cache_drvrinfo_free_icr(void *thing); +static herr_t H5F__cache_superblock_refresh(H5F_t *f, void * _thing, const void * _image, + size_t * len_ptr); /* Local encode/decode routines */ static herr_t H5F__superblock_prefix_decode(H5F_super_t *sblock, @@ -114,6 +116,7 @@ const H5AC_class_t H5AC_SUPERBLOCK[1] = {{ NULL, /* 'notify' callback */ H5F__cache_superblock_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + H5F__cache_superblock_refresh, /* VFD SWMR 'refresh' callback */ }}; /* H5F driver info block inherits cache-like properties from H5AC */ @@ -132,6 +135,7 @@ const H5AC_class_t H5AC_DRVRINFO[1] = {{ NULL, /* 'notify' callback */ H5F__cache_drvrinfo_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; @@ -1087,3 +1091,257 @@ H5F__cache_drvrinfo_free_icr(void *_thing) FUNC_LEAVE_NOAPI(SUCCEED) } /* H5F__cache_drvrinfo_free_icr() */ + +/*------------------------------------------------------------------------- + * Function: H5F__cache_superblock_refresh + * + * Purpose: Examine the supplied image buffer, and update the + * superblock accordingly. + * + * This function is only called when the file is opened in + * VFD SWMR reader mode -- which implies that the file has + * been opened R/O. Thus the internal representation of + * the superblock must be clean, and may be modified without + * concern for local changes. + * + * Further, most of the superblock is fixed once the file + * is created, for the most part, this function simply + * verifies the expected values. + * + * Return: Success: Pointer to new object + * Failure: NULL + * + * Programmer: John Mainzer + * 12/21/19 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5F__cache_superblock_refresh(H5F_t *f, void * _thing, const void * _image, + size_t * len_ptr) +{ + H5F_super_t *sblock = (H5F_super_t *)_thing; + const uint8_t *image = (const uint8_t *)_image; + size_t expected_image_len; + unsigned super_vers; /* Superblock version */ + uint8_t sizeof_addr; /* Size of addresses in file */ + uint8_t sizeof_size; /* Size of offsets in file */ + uint32_t status_flags; /* File status flags */ + unsigned sym_leaf_k; /* Size of leaves in symbol tables */ + haddr_t base_addr; /* Absolute base address for rel.addrs. */ + /* (superblock for file is at this offset) */ + haddr_t stored_eof; + haddr_t ext_addr; /* Relative address of superblock extension */ + haddr_t driver_addr; /* File driver information block address */ + haddr_t root_addr; /* Root group address */ + H5G_entry_t root_ent; /* Root group symbol table entry */ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + /* santity checks */ + HDassert(f); + HDassert(sblock); + HDassert(sblock == f->shared->sblock); + HDassert(image); + HDassert(len_ptr); + HDassert(*len_ptr >= H5F_SUPERBLOCK_FIXED_SIZE + 6); + + /* skip the signature */ + image += H5F_SIGNATURE_LEN; + + /* get the superblock version */ + super_vers = *image++; + + if ( sblock->super_vers != super_vers ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected superblock vers") + + /* verify sizes of addresses and offsets */ + if(super_vers < HDF5_SUPERBLOCK_VERSION_2) { + sizeof_addr = image[4]; + sizeof_size = image[5]; + } /* end if */ + else { + sizeof_addr = image[0]; + sizeof_size = image[1]; + } /* end else */ + + if ( sblock->sizeof_addr != sizeof_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected sizeof_addr") + + if ( sblock->sizeof_size != sizeof_size ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected sizeof_size") + + /* compute expected image len */ + expected_image_len = H5F_SUPERBLOCK_FIXED_SIZE + + (size_t)H5F_SUPERBLOCK_VARLEN_SIZE(super_vers, sizeof_addr, sizeof_size); + + if ( expected_image_len != *len_ptr ) { + + *len_ptr = expected_image_len; + HGOTO_DONE(SUCCEED) + } + + /* at this point, we know that the supplied image is of + * the correct length. + */ + + /* validate the older version of the superblock */ + if(sblock->super_vers < HDF5_SUPERBLOCK_VERSION_2) { + + unsigned snode_btree_k; /* B-tree symbol table internal node 'K' value */ + unsigned chunk_btree_k; /* B-tree chunk internal node 'K' value */ + + /* Freespace version (hard-wired) */ + if(HDF5_FREESPACE_VERSION != *image++) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "bad free space version number") + + /* Root group version number (hard-wired) */ + if(HDF5_OBJECTDIR_VERSION != *image++) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "bad object directory version number") + + /* Skip over reserved byte */ + image++; + + /* Shared header version number (hard-wired) */ + if(HDF5_SHAREDHEADER_VERSION != *image++) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "bad shared-header format version number") + + /* Skip over size of file addresses (already decoded and checked) */ + image++; + + /* Skip over size of file sizes (already decoded and checked) */ + image++; + + /* Skip over reserved byte */ + image++; + + /* Various B-tree sizes */ + UINT16DECODE(image, sym_leaf_k); + if ( sym_leaf_k != sblock->sym_leaf_k ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected sym_leaf_k") + + /* Need 'get' call to set other array values */ + UINT16DECODE(image, snode_btree_k); + if ( snode_btree_k != sblock->btree_k[H5B_SNODE_ID] ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected snode_btree_k") + + /* File status flags (not really used yet) */ + UINT32DECODE(image, status_flags); + if ( status_flags != sblock->status_flags ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected status_flags") + + /* + * If the superblock version # is greater than 0, read in the indexed + * storage B-tree internal 'K' value + */ + if(sblock->super_vers > HDF5_SUPERBLOCK_VERSION_DEF) { + UINT16DECODE(image, chunk_btree_k); + + if ( chunk_btree_k != sblock->btree_k[H5B_CHUNK_ID] ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected chunk_btree_k") + + /* Reserved bytes are present only in version 1 */ + if(sblock->super_vers == HDF5_SUPERBLOCK_VERSION_1) + image += 2; /* reserved */ + } /* end if */ + + /* Remainder of "variable-sized" portion of superblock */ + H5F_addr_decode(f, (const uint8_t **)&image, &base_addr/*out*/); + H5F_addr_decode(f, (const uint8_t **)&image, &ext_addr/*out*/); + H5F_addr_decode(f, (const uint8_t **)&image, &stored_eof/*out*/); + H5F_addr_decode(f, (const uint8_t **)&image, &driver_addr/*out*/); + + if ( base_addr != sblock->base_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected base_addr") + + if ( ext_addr != sblock->ext_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected ext_addr") + + /* use stored_eof to update EOA below */ + + if ( driver_addr != sblock->driver_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected driver_addr") + + /* decode the root group symbol table entry */ + if(H5G_ent_decode(f, (const uint8_t **)&image, &root_ent) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTDECODE, FAIL, "can't decode root group symbol table entry") + + /* Set the root group address to the correct value */ + root_addr = root_ent.header; + + if ( root_addr != sblock->root_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected root_addr") + + HDassert(root_ent.type == H5G_CACHED_STAB); + + if ( ( root_ent.type != sblock->root_ent->type ) || + ( root_ent.cache.stab.btree_addr != + sblock->root_ent->cache.stab.btree_addr ) || + ( root_ent.cache.stab.heap_addr != + sblock->root_ent->cache.stab.heap_addr ) ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected root_ent data") + + + /* NOTE: Driver info block is decoded separately, later */ + + } /* end if */ + else { + uint32_t read_chksum; + uint32_t computed_chksum; + + /* Skip over size of file addresses (already decoded and checked) */ + image++; + + /* Skip over size of file sizes (already decoded and checked) */ + image++; + + /* File status flags (not really used yet) */ + status_flags = *image++; + if ( status_flags != sblock->status_flags ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected status_flags") + + /* Base, superblock extension, end of file & root group object header addresses */ + H5F_addr_decode(f, (const uint8_t **)&image, &base_addr/*out*/); + H5F_addr_decode(f, (const uint8_t **)&image, &ext_addr/*out*/); + H5F_addr_decode(f, (const uint8_t **)&image, &stored_eof/*out*/); + H5F_addr_decode(f, (const uint8_t **)&image, &driver_addr/*out*/); + + if ( base_addr != sblock->base_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected base_addr") + + if ( ext_addr != sblock->ext_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected ext_addr") + + /* use stored_eof to update EOA below */ + + if ( driver_addr != sblock->driver_addr ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected driver_addr") + + /* Decode checksum */ + UINT32DECODE(image, read_chksum); + + if ( H5F_get_checksums((const uint8_t *)_image, + (size_t)(image - (const uint8_t *)_image), + NULL, &computed_chksum) < 0 ) + HGOTO_ERROR(H5E_FILE, H5E_SYSTEM, FAIL, "can't compute chksum") + + if ( read_chksum != computed_chksum ) + HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "unexpected checksum") + + } /* end else */ + + /* Sanity check */ + HDassert((size_t)(image - (const uint8_t *)_image) <= *len_ptr); + + /* update the EOA */ + if(H5F__set_eoa(f, H5FD_MEM_DEFAULT, stored_eof - base_addr) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "unable to update EOA") + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5F__cache_superblock_refresh() */ diff --git a/src/H5Gcache.c b/src/H5Gcache.c index b447cad..6641034 100644 --- a/src/H5Gcache.c +++ b/src/H5Gcache.c @@ -100,6 +100,7 @@ const H5AC_class_t H5AC_SNODE[1] = {{ NULL, /* 'notify' callback */ H5G__cache_node_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5HFcache.c b/src/H5HFcache.c index 0c5d3aa..19d2282 100644 --- a/src/H5HFcache.c +++ b/src/H5HFcache.c @@ -146,6 +146,7 @@ const H5AC_class_t H5AC_FHEAP_HDR[1] = {{ NULL, /* 'notify' callback */ H5HF__cache_hdr_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5HF indirect block inherits cache-like properties from H5AC */ @@ -164,6 +165,7 @@ const H5AC_class_t H5AC_FHEAP_IBLOCK[1] = {{ H5HF__cache_iblock_notify, /* 'notify' callback */ H5HF__cache_iblock_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5HF direct block inherits cache-like properties from H5AC */ @@ -182,6 +184,7 @@ const H5AC_class_t H5AC_FHEAP_DBLOCK[1] = {{ H5HF__cache_dblock_notify, /* 'notify' callback */ H5HF__cache_dblock_free_icr, /* 'free_icr' callback */ H5HF__cache_dblock_fsf_size, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5HGcache.c b/src/H5HGcache.c index beaea7b..658c122 100644 --- a/src/H5HGcache.c +++ b/src/H5HGcache.c @@ -95,6 +95,7 @@ const H5AC_class_t H5AC_GHEAP[1] = {{ NULL, /* 'notify' callback */ H5HG__cache_heap_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5HLcache.c b/src/H5HLcache.c index 926f787..69eada4 100644 --- a/src/H5HLcache.c +++ b/src/H5HLcache.c @@ -117,6 +117,7 @@ const H5AC_class_t H5AC_LHEAP_PRFX[1] = {{ NULL, /* 'notify' callback */ H5HL__cache_prefix_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; const H5AC_class_t H5AC_LHEAP_DBLK[1] = {{ @@ -134,6 +135,7 @@ const H5AC_class_t H5AC_LHEAP_DBLK[1] = {{ H5HL__cache_datablock_notify, /* 'notify' callback */ H5HL__cache_datablock_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; diff --git a/src/H5Ocache.c b/src/H5Ocache.c index e7cad83..7e1dd82 100644 --- a/src/H5Ocache.c +++ b/src/H5Ocache.c @@ -116,6 +116,7 @@ const H5AC_class_t H5AC_OHDR[1] = {{ H5O__cache_notify, /* 'notify' callback */ H5O__cache_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* H5O object header chunk inherits cache-like properties from H5AC */ @@ -134,6 +135,7 @@ const H5AC_class_t H5AC_OHDR_CHK[1] = {{ H5O__cache_chk_notify, /* 'notify' callback */ H5O__cache_chk_free_icr, /* 'free_icr' callback */ NULL, /* 'fsf_size' callback */ + NULL, /* VFD SWMR 'refresh' callback */ }}; /* Declare external the free list for H5O_unknown_t's */ diff --git a/src/H5PB.c b/src/H5PB.c index 089c75d..cb92e7b 100644 --- a/src/H5PB.c +++ b/src/H5PB.c @@ -1132,6 +1132,14 @@ done: * * JRM -- 10/23/18 * + * We also need to evict modified pages from the page + * buffer in the VFD SWMR reader case to avoid message from + * the past bugs. This function will serve for this for + * now, but for efficiency, we may want a version that takes + * a list of pages instead. + * + * JRM -- 12/30/18 + * *------------------------------------------------------------------------- */ herr_t @@ -1725,6 +1733,7 @@ H5PB_vfd_swmr__update_index(H5F_t * f, HDfprintf(stderr, "\n\nmax mdf index len (%d)exceeded.\n\n", f->shared->mdf_idx_len); + HDfprintf(stderr, "tick = %lld.\n", f->shared->tick_num); exit(1); } @@ -2405,7 +2414,9 @@ H5PB__flush_entry(H5F_t *f, H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr) hbool_t skip_write = FALSE; size_t write_size; haddr_t eoa; /* Current EOA for the file */ +#if VFD_IO /* JRM */ H5FD_t *file; /* file driver */ +#endif /* VFD_IO */ /* JRM */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) @@ -2529,7 +2540,9 @@ H5PB__load_page(H5F_t *f, H5PB_t *pb_ptr, haddr_t addr, H5FD_mem_t type, haddr_t eof = HADDR_UNDEF; H5PB_entry_t *entry_ptr = NULL; void *image_ptr = NULL; +#if VFD_IO /* JRM */ H5FD_t *file; /* File driver pointer */ +#endif /* VFD_IO */ /* JRM */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) @@ -2538,9 +2551,6 @@ H5PB__load_page(H5F_t *f, H5PB_t *pb_ptr, haddr_t addr, H5FD_mem_t type, HDassert(f); HDassert(f->shared); HDassert(f->shared->lf); - - file = f->shared->lf; - HDassert(pb_ptr); HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC); HDassert((entry_ptr_ptr == NULL) || (*entry_ptr_ptr == NULL)); @@ -2598,6 +2608,7 @@ H5PB__load_page(H5F_t *f, H5PB_t *pb_ptr, haddr_t addr, H5FD_mem_t type, * image buffer associated with the new entry. */ #if VFD_IO /* JRM */ + file = f->shared->lf; if ( ( ! skip_read ) && ( H5FD_read(file, type, addr, entry_ptr->size, image_ptr) < 0 ) ) #else /* VFD_IO */ /* JRM */ @@ -3106,7 +3117,9 @@ H5PB__read_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, { H5PB_t *pb_ptr; /* Page buffer for this file */ H5PB_entry_t *entry_ptr; /* Pointer to page buffer entry */ +#if VFD_IO /* JRM */ H5FD_t *file; /* File driver pointer */ +#endif /* VFD_IO */ /* JRM */ uint64_t page; /* page offset of addr */ haddr_t page_addr; /* page containg addr */ static haddr_t prev_addr = HADDR_UNDEF; /* addr of last call */ @@ -3127,7 +3140,9 @@ H5PB__read_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, HDassert(pb_ptr->min_rd_pages < pb_ptr->max_pages); HDassert(f->shared->lf); +#if VFD_IO /* JRM */ file = f->shared->lf; +#endif /* VFD_IO */ /* JRM */ HDassert(H5FD_MEM_DRAW != type); HDassert(buf); diff --git a/src/H5PBpkg.h b/src/H5PBpkg.h index 4af81e6..fd4b21c 100644 --- a/src/H5PBpkg.h +++ b/src/H5PBpkg.h @@ -694,19 +694,19 @@ if ( ( (entry_ptr) == NULL ) || \ #define H5PB__UPDATE_STATS_FOR_BYPASS(pb_ptr, type, size) \ { \ - int i; \ + int ii; \ \ HDassert(pb_ptr); \ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \ \ if ( H5FD_MEM_DRAW == (type) ) { \ - i = H5PB__STATS_RD; \ + ii = H5PB__STATS_RD; \ } else if ( (size) > (pb_ptr)->page_size ) { \ - i = H5PB__STATS_MPMDE; \ + ii = H5PB__STATS_MPMDE; \ } else { \ - i = H5PB__STATS_MD; \ + ii = H5PB__STATS_MD; \ } \ - ((pb_ptr)->bypasses[i])++; \ + ((pb_ptr)->bypasses[ii])++; \ } /* H5PB__UPDATE_STATS_FOR_BYPASS */ diff --git a/test/cache.c b/test/cache.c index d5e3c6c..3883ac3 100644 --- a/test/cache.c +++ b/test/cache.c @@ -13976,7 +13976,7 @@ check_move_entry__run_test(H5F_t * file_ptr, if(!spec_ptr->is_protected) unprotect_entry(file_ptr, spec_ptr->entry_type, spec_ptr->entry_index, flags); - move_entry(cache_ptr, spec_ptr->entry_type, spec_ptr->entry_index, FALSE); + move_entry(file_ptr, spec_ptr->entry_type, spec_ptr->entry_index, FALSE); } @@ -14074,7 +14074,7 @@ check_move_entry__run_test(H5F_t * file_ptr, } /* put the entry back where it started from */ - move_entry(cache_ptr, spec_ptr->entry_type, spec_ptr->entry_index, TRUE); + move_entry(file_ptr, spec_ptr->entry_type, spec_ptr->entry_index, TRUE); return; @@ -16960,7 +16960,8 @@ check_move_entry_errs(unsigned paged) } /* end if */ if(pass) { - result = H5C_move_entry(cache_ptr, types[0], entry_0_0_ptr->addr, entry_0_1_ptr->addr); + result = H5C_move_entry(file_ptr, types[0], entry_0_0_ptr->addr, + entry_0_1_ptr->addr); if(result >= 0) { pass = FALSE; @@ -16969,7 +16970,8 @@ check_move_entry_errs(unsigned paged) } /* end if */ if(pass) { - result = H5C_move_entry(cache_ptr, types[0], entry_0_0_ptr->addr, entry_1_0_ptr->addr); + result = H5C_move_entry(file_ptr, types[0], entry_0_0_ptr->addr, + entry_1_0_ptr->addr); if(result >= 0) { pass = FALSE; @@ -17000,7 +17002,8 @@ check_move_entry_errs(unsigned paged) } /* end if */ if(pass) { - result = H5C_move_entry(cache_ptr, types[0], entry_ptr->header.addr, entry_ptr->header.addr + 10); + result = H5C_move_entry(file_ptr, types[0], entry_ptr->header.addr, + entry_ptr->header.addr + 10); if(result >= 0) { pass = FALSE; diff --git a/test/cache_common.c b/test/cache_common.c index b078964..3041dfd 100644 --- a/test/cache_common.c +++ b/test/cache_common.c @@ -308,6 +308,7 @@ static const H5C_class_t pico_class[1] = {{ NULL, pico_free_icr, NULL, + NULL, }}; static const H5C_class_t nano_class[1] = {{ @@ -325,6 +326,7 @@ static const H5C_class_t nano_class[1] = {{ NULL, nano_free_icr, NULL, + NULL, }}; static const H5C_class_t micro_class[1] = {{ @@ -342,6 +344,7 @@ static const H5C_class_t micro_class[1] = {{ NULL, micro_free_icr, NULL, + NULL, }}; static const H5C_class_t tiny_class[1] = {{ @@ -359,6 +362,7 @@ static const H5C_class_t tiny_class[1] = {{ NULL, tiny_free_icr, NULL, + NULL, }}; static const H5C_class_t small_class[1] = {{ @@ -376,6 +380,7 @@ static const H5C_class_t small_class[1] = {{ NULL, small_free_icr, NULL, + NULL, }}; static const H5C_class_t medium_class[1] = {{ @@ -393,6 +398,7 @@ static const H5C_class_t medium_class[1] = {{ NULL, medium_free_icr, NULL, + NULL, }}; static const H5C_class_t large_class[1] = {{ @@ -410,6 +416,7 @@ static const H5C_class_t large_class[1] = {{ NULL, large_free_icr, NULL, + NULL, }}; static const H5C_class_t huge_class[1] = {{ @@ -427,6 +434,7 @@ static const H5C_class_t huge_class[1] = {{ NULL, huge_free_icr, NULL, + NULL, }}; static const H5C_class_t monster_class[1] = {{ @@ -444,6 +452,7 @@ static const H5C_class_t monster_class[1] = {{ NULL, monster_free_icr, NULL, + NULL, }}; static const H5C_class_t variable_class[1] = {{ @@ -461,6 +470,7 @@ static const H5C_class_t variable_class[1] = {{ NULL, variable_free_icr, NULL, + NULL, }}; static const H5C_class_t notify_class[1] = {{ @@ -478,6 +488,7 @@ static const H5C_class_t notify_class[1] = {{ notify_notify, notify_free_icr, NULL, + NULL, }}; /* callback table declaration */ @@ -2088,7 +2099,7 @@ execute_flush_op(H5F_t * file_ptr, } /* end else */ } /* end if */ else - move_entry(cache_ptr, op_ptr->type, op_ptr->idx, op_ptr->flag); + move_entry(file_ptr, op_ptr->type, op_ptr->idx, op_ptr->flag); break; case FLUSH_OP__ORDER: @@ -3928,10 +3939,10 @@ mark_entry_dirty(int32_t type, */ void -move_entry(H5C_t * cache_ptr, - int32_t type, - int32_t idx, - hbool_t main_addr) +move_entry(H5F_t * file_ptr, + int32_t type, + int32_t idx, + hbool_t main_addr) { herr_t result; hbool_t done = TRUE; /* will set to FALSE if we have work to do */ @@ -3942,7 +3953,7 @@ move_entry(H5C_t * cache_ptr, if ( pass ) { - HDassert( cache_ptr ); + HDassert( file_ptr ); HDassert( ( 0 <= type ) && ( type < NUMBER_OF_ENTRY_TYPES ) ); HDassert( ( 0 <= idx ) && ( idx <= max_indices[type] ) ); @@ -3952,7 +3963,7 @@ move_entry(H5C_t * cache_ptr, HDassert( entry_ptr->index == idx ); HDassert( entry_ptr->type == type ); HDassert( entry_ptr == entry_ptr->self ); - HDassert( entry_ptr->cache_ptr == cache_ptr ); + HDassert( entry_ptr->cache_ptr == file_ptr->shared->cache ); HDassert( !entry_ptr->is_read_only ); HDassert( !entry_ptr->header.is_read_only ); @@ -3987,7 +3998,7 @@ move_entry(H5C_t * cache_ptr, mark_flush_dep_dirty(entry_ptr); entry_ptr->action = TEST_ENTRY_ACTION_MOVE; - result = H5C_move_entry(cache_ptr, types[type], old_addr, new_addr); + result = H5C_move_entry(file_ptr, types[type], old_addr, new_addr); entry_ptr->action = TEST_ENTRY_ACTION_NUL; } @@ -4544,7 +4555,7 @@ row_major_scan_forward(H5F_t * file_ptr, if(verbose) HDfprintf(stdout, "4(r, %d, %d, %d) ", type, tmp_idx, (int)move_to_main_addr); - move_entry(cache_ptr, type, tmp_idx, move_to_main_addr); + move_entry(file_ptr, type, tmp_idx, move_to_main_addr); HDassert(cache_ptr->slist_size == cache_ptr->dirty_index_size); } /* end if */ @@ -4964,7 +4975,7 @@ row_major_scan_backward(H5F_t * file_ptr, HDfprintf(stdout, "(r, %d, %d, %d) ", type, tmp_idx, (int)move_to_main_addr); - move_entry(cache_ptr, type, tmp_idx, move_to_main_addr); + move_entry(file_ptr, type, tmp_idx, move_to_main_addr); } tmp_idx++; diff --git a/test/cache_common.h b/test/cache_common.h index 9c66357..ffef413 100644 --- a/test/cache_common.h +++ b/test/cache_common.h @@ -599,7 +599,7 @@ H5TEST_DLL void insert_entry(H5F_t * file_ptr, H5TEST_DLL void mark_entry_dirty(int32_t type, int32_t idx); -H5TEST_DLL void move_entry(H5C_t * cache_ptr, +H5TEST_DLL void move_entry(H5F_t * file_ptr, int32_t type, int32_t idx, hbool_t main_addr); diff --git a/test/vfd_swmr.c b/test/vfd_swmr.c index ccdc027..2427eb1 100644 --- a/test/vfd_swmr.c +++ b/test/vfd_swmr.c @@ -793,7 +793,7 @@ test_writer_md(void) my_config->tick_len = 1; my_config->max_lag = 3; my_config->vfd_swmr_writer = TRUE; - my_config->md_pages_reserved = 2; + my_config->md_pages_reserved = 256; HDstrcpy(my_config->md_file_path, MD_FILENAME); /* Set the VFD SWMR configuration in fapl */ diff --git a/test/vfd_swmr_generator.c b/test/vfd_swmr_generator.c index f569935..cfdcdf7 100644 --- a/test/vfd_swmr_generator.c +++ b/test/vfd_swmr_generator.c @@ -179,7 +179,7 @@ gen_skeleton(const char *filename, hbool_t verbose, hbool_t vfd_swmr_write, config->tick_len = 4; config->max_lag = 6; config->vfd_swmr_writer = TRUE; - config->md_pages_reserved = 200; + config->md_pages_reserved = 512; HDstrcpy(config->md_file_path, "my_md_file"); /* Enable VFD SWMR configuration in fapl */ -- cgit v0.12