diff options
Diffstat (limited to 'src/H5B.c')
-rw-r--r-- | src/H5B.c | 722 |
1 files changed, 538 insertions, 184 deletions
@@ -111,7 +111,6 @@ #include "H5MFprivate.h" /* File memory management */ #include "H5Pprivate.h" /* Property lists */ - /****************/ /* Local Macros */ /****************/ @@ -154,12 +153,15 @@ static H5B_ins_t H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, hbool_t *rt_key_changed, H5B_ins_ud_t *split_bt_ud/*out*/); static herr_t H5B_insert_child(H5B_t *bt, unsigned *bt_flags, - unsigned idx, haddr_t child, + unsigned *idx, haddr_t child, H5B_ins_t anchor, const void *md_key); static herr_t H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx, void *udata, H5B_ins_ud_t *split_bt_ud/*out*/); static H5B_t * H5B_copy(const H5B_t *old_bt); +static herr_t H5B_find_node(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, + haddr_t addr, void *udata, void *parent, + H5B_t **node); /*********************/ @@ -216,7 +218,7 @@ H5FL_SEQ_DEFINE_STATIC(size_t); */ herr_t H5B_create(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, void *udata, - haddr_t *addr_p/*out*/) + void *parent, haddr_t *addr_p/*out*/) { H5B_t *bt = NULL; H5B_shared_t *shared=NULL; /* Pointer to shared B-tree info */ @@ -249,6 +251,7 @@ H5B_create(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, void *udata, if(NULL == (bt->native = H5FL_BLK_MALLOC(native_block, shared->sizeof_keys)) || NULL == (bt->child = H5FL_SEQ_MALLOC(haddr_t, (size_t)shared->two_k))) HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "memory allocation failed for B-tree root node") + bt->parent = parent; if(HADDR_UNDEF == (*addr_p = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode))) HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "file allocation failed for B-tree root node") @@ -301,7 +304,8 @@ done: *------------------------------------------------------------------------- */ htri_t -H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata) +H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, + void *udata, void *parent) { H5B_t *bt = NULL; H5RC_t *rc_shared; /* Ref-counted shared info */ @@ -335,6 +339,7 @@ H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *u */ cache_udata.f = f; cache_udata.type = type; + cache_udata.parent = parent; cache_udata.rc_shared = rc_shared; if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ))) HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node") @@ -358,7 +363,7 @@ H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *u HDassert(idx < bt->nchildren); if(bt->level > 0) { - if((ret_value = H5B_find(f, dxpl_id, type, bt->child[idx], udata)) < 0) + if((ret_value = H5B_find(f, dxpl_id, type, bt->child[idx], udata, bt)) < 0) HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in subtree") } /* end if */ else { @@ -375,6 +380,109 @@ done: /*------------------------------------------------------------------------- + * Function: H5B_find_node + * + * Purpose: Locate the B-tree node containing the item specified in + * UDATA, if present. If found, the B-tree node will be + * pinned on return and must be unpinned after the caller is + * done using it. If not found, *node will be set to NULL. + * + * Return: Non-negative on success. Negative on failure. + * + * Programmer: Neil Fortner + * Aug 18 2010 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5B_find_node(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, + void *udata, void *parent, H5B_t **node) +{ + H5B_t *bt = NULL; + H5RC_t *rc_shared; /* Ref-counted shared info */ + H5B_shared_t *shared; /* Pointer to shared B-tree info */ + H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */ + unsigned idx = 0, lt = 0, rt; /* Final, left & right key indices */ + int cmp = 1; /* Key comparison value */ + htri_t found; /* Whether the correct node has been found */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5B_find_node, FAIL) + + /* + * Check arguments. + */ + HDassert(f); + HDassert(type); + HDassert(type->decode); + HDassert(type->cmp3); + HDassert(H5F_addr_defined(addr)); + + /* Get shared info for B-tree */ + if(NULL == (rc_shared = (type->get_shared)(f, udata))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object") + shared = (H5B_shared_t *)H5RC_GET_OBJ(rc_shared); + HDassert(shared); + + /* + * Perform a binary search to locate the child which contains + * the thing for which we're searching. + */ + cache_udata.f = f; + cache_udata.type = type; + cache_udata.parent = parent; + cache_udata.rc_shared = rc_shared; + if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node") + + rt = bt->nchildren; + while(lt < rt && cmp) { + idx = (lt + rt) / 2; + /* compare */ + if((cmp = (type->cmp3)(H5B_NKEY(bt, shared, idx), udata, H5B_NKEY(bt, shared, (idx + 1)))) < 0) + rt = idx; + else + lt = idx + 1; + } /* end while */ + /* Check if not found */ + if(cmp) + *node = NULL; + else { + /* + * Follow the link to the subtree, or return the leaf node. + */ + HDassert(idx < bt->nchildren); + + if(bt->level > 0) { + if(H5B_find_node(f, dxpl_id, type, bt->child[idx], udata, bt, node) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in subtree") + } /* end if */ + else { + /* Check if this is really the correct child */ + if((found = (type->found)(f, dxpl_id, bt->child[idx], H5B_NKEY(bt, shared, idx), udata)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in leaf node") + + if(found) { + /* Return this leaf node, pinned */ + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__PIN_ENTRY_FLAG) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release node") + *node = bt; + bt = NULL; + } /* end if */ + else + *node = NULL; + } /* end else */ + } /* end else */ + +done: + if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release node") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5B_find_node() */ + + +/*------------------------------------------------------------------------- * Function: H5B_split * * Purpose: Split a single node into two nodes. The old node will @@ -405,6 +513,7 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx, H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */ unsigned nleft, nright; /* Number of keys in left & right halves */ double split_ratios[3]; /* B-tree split ratios */ + hbool_t bt_pinned = FALSE; herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI_NOINIT(H5B_split) @@ -480,10 +589,11 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx, /* * Create the new B-tree node. */ - if(H5B_create(f, dxpl_id, shared->type, udata, &split_bt_ud->addr/*out*/) < 0) + if(H5B_create(f, dxpl_id, shared->type, udata, bt_ud->bt->parent, &split_bt_ud->addr/*out*/) < 0) HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to create B-tree") cache_udata.f = f; cache_udata.type = shared->type; + cache_udata.parent = bt_ud->bt->parent; cache_udata.rc_shared = bt_ud->bt->rc_shared; if(NULL == (split_bt_ud->bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, split_bt_ud->addr, &cache_udata, H5AC_WRITE))) HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree") @@ -509,29 +619,117 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx, bt_ud->cache_flags |= H5AC__DIRTIED_FLAG; bt_ud->bt->nchildren = nleft; + /* Actions to take if swmr writes are on */ + if(shared->swmr_write) { + haddr_t new_bt_addr = HADDR_UNDEF; + unsigned i; + + /* + * We must clone the old btree so readers with an out-of-date version + * of the parent can still see all its children, via the shadowed + * non-split bt. Remove it from cache but do not mark it free on disk. + */ + /* Allocate space for the cloned child */ + H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t); + if(HADDR_UNDEF == (new_bt_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move b-tree") + + /* Pin old entry so it is not flushed when we unprotect */ + if(H5AC_pin_protected_entry(bt_ud->bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPIN, FAIL, "unable to pin old b-tree node") + bt_pinned = TRUE; + + /* Unprotect bt so we can move it. Also, note that it will be marked + * dirty so it will be written to the new location. */ + HDassert(bt_ud->cache_flags & H5AC__DIRTIED_FLAG); + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->addr, bt_ud->bt, bt_ud->cache_flags) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old b-tree") + bt_ud->cache_flags = H5AC__NO_FLAGS_SET; + + /* Move the location of the old child on the disk */ + if(H5AC_move_entry(f, H5AC_BT, bt_ud->addr, new_bt_addr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node") + bt_ud->addr = new_bt_addr; + + /* Re-protect bt at new address */ + if(bt_ud->bt != H5AC_protect(f, dxpl_id, H5AC_BT, new_bt_addr, &cache_udata, H5AC_WRITE)) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree node") + + /* + * Update flush dependencies for children that moved to the new node + */ + if(bt_ud->bt->level > 0) { + H5B_t *child; + + for(i=0; i<nright; i++) { + /* Protect child b-tree node */ + cache_udata.parent = split_bt_ud->bt; + if(NULL == (child = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, split_bt_ud->bt->child[i], &cache_udata, H5AC_WRITE))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node") + + /* Update the flush dependency, if necessary */ + HDassert(child->parent); + if(child->parent == bt_ud->bt) { + child->parent = split_bt_ud->bt; + if(H5AC_destroy_flush_dependency(bt_ud->bt, child) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNDEPEND, FAIL, "unable to destroy flush dependency") + if(H5AC_create_flush_dependency(child->parent, child) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency") + } /* end if */ + else + HDassert(child->parent == split_bt_ud->bt); + + /* Unprotect the child */ + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, split_bt_ud->bt->child[idx], child, H5AC__NO_FLAGS_SET) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node") + } /* end for */ + } /* end if */ + else { + /* At leaf node, delegate to client */ + HDassert(shared->type->update_flush_dep); + for(i=0; i<nright; i++) { + if((shared->type->update_flush_dep)(H5B_NKEY(split_bt_ud->bt, shared, i), udata, bt_ud->bt, split_bt_ud->bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to update flush dependency") + } /* end for */ + } /* end else */ + + /* + * Update left sibling to point to new bt. Only necessary when doing + * swmr writes as otherwise the address of bt doesn't change. + */ + if(H5F_addr_defined(bt_ud->bt->left)) { + H5B_t *tmp_bt; + + if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->left, &cache_udata, H5AC_WRITE))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling") + + tmp_bt->right = bt_ud->addr; + + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->bt->left, tmp_bt, H5AC__DIRTIED_FLAG) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node") + } /* end if */ + } /* end if */ + /* - * Update sibling pointers. + * Update other sibling pointers. */ split_bt_ud->bt->left = bt_ud->addr; split_bt_ud->bt->right = bt_ud->bt->right; if(H5F_addr_defined(bt_ud->bt->right)) { - H5B_t *tmp_bt; - H5B_cache_ud_t cache_udata2; /* User-data for metadata cache callback */ + H5B_t *tmp_bt; - cache_udata2.f = f; - cache_udata2.type = shared->type; - cache_udata2.rc_shared = bt_ud->bt->rc_shared; - if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, &cache_udata2, H5AC_WRITE))) - HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling") + if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, &cache_udata, H5AC_WRITE))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling") - tmp_bt->left = split_bt_ud->addr; + tmp_bt->left = split_bt_ud->addr; if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, tmp_bt, H5AC__DIRTIED_FLAG) < 0) HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node") } /* end if */ bt_ud->bt->right = split_bt_ud->addr; + bt_ud->cache_flags |= H5AC__DIRTIED_FLAG; done: if(ret_value < 0) { @@ -539,6 +737,13 @@ done: HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node") split_bt_ud->bt = NULL; split_bt_ud->addr = HADDR_UNDEF; + split_bt_ud->cache_flags = H5AC__NO_FLAGS_SET; + } /* end if */ + + if(bt_pinned) { + HDassert(shared->swmr_write); + if(H5AC_unpin_entry(bt_ud->bt) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin old root") } /* end if */ FUNC_LEAVE_NOAPI(ret_value) @@ -560,7 +765,7 @@ done: */ herr_t H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, - void *udata) + void *udata, void *parent) { /* * These are defined this way to satisfy alignment constraints. @@ -580,6 +785,8 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, H5B_shared_t *shared; /* Pointer to shared B-tree info */ H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */ H5B_ins_t my_ins = H5B_INS_ERROR; + hbool_t bt_protected = FALSE; + hbool_t nrbt_pinned = FALSE; /* TRUE if new_root_bt is pinned */ herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI(H5B_insert, FAIL) @@ -599,20 +806,27 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, /* Protect the root node */ cache_udata.f = f; cache_udata.type = type; + cache_udata.parent = parent; cache_udata.rc_shared = rc_shared; bt_ud.addr = addr; if(NULL == (bt_ud.bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE))) HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to locate root of B-tree") + bt_protected = TRUE; /* Insert the object */ if((int)(my_ins = H5B_insert_helper(f, dxpl_id, &bt_ud, type, lt_key, <_key_changed, md_key, udata, rt_key, &rt_key_changed, &split_bt_ud/*out*/)) < 0) HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to insert key") + + /* Check if the root node split */ if(H5B_INS_NOOP == my_ins) { + /* The root node did not split - just update the flush dependency (if + * necessary) and exit */ HDassert(!split_bt_ud.bt); HGOTO_DONE(SUCCEED) } /* end if */ + HDassert(H5B_INS_RIGHT == my_ins); HDassert(split_bt_ud.bt); HDassert(H5F_addr_defined(split_bt_ud.addr)); @@ -631,33 +845,33 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, * at the old root's previous address. This prevents the B-tree from * "moving". */ - H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t); - if(HADDR_UNDEF == (old_root_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode))) - HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move root") - - /* - * Move the node to the new location - */ + /* Note that this is not necessary if swmr writes are on, as H5B_split + * already moved the node in this case */ + if(!shared->swmr_write) { + H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t); + if(HADDR_UNDEF == (old_root_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move root") + + /* Unprotect the old root so we can move it. Also force it to be marked + * dirty so it is written to the new location. */ + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, H5AC__DIRTIED_FLAG) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old root") + bt_protected = FALSE; + + /* Move the location of the old root on the disk */ + if(H5AC_move_entry(f, H5AC_BT, bt_ud.addr, old_root_addr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node") + bt_ud.addr = old_root_addr; + + /* Update the split b-tree's left pointer to point to the new location */ + split_bt_ud.bt->left = bt_ud.addr; + split_bt_ud.cache_flags |= H5AC__DIRTIED_FLAG; + } /* end else */ /* Make a copy of the old root information */ if(NULL == (new_root_bt = H5B_copy(bt_ud.bt))) HGOTO_ERROR(H5E_BTREE, H5E_CANTCOPY, FAIL, "unable to copy old root"); - /* Unprotect the old root so we can move it. Also force it to be marked - * dirty so it is written to the new location. */ - if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, H5AC__DIRTIED_FLAG) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old root") - bt_ud.bt = NULL; /* Make certain future references will be caught */ - - /* Move the location of the old root on the disk */ - if(H5AC_move_entry(f, H5AC_BT, bt_ud.addr, old_root_addr) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node") - bt_ud.addr = old_root_addr; - - /* Update the split b-tree's left pointer to point to the new location */ - split_bt_ud.bt->left = bt_ud.addr; - split_bt_ud.cache_flags |= H5AC__DIRTIED_FLAG; - /* clear the old root info at the old address (we already copied it) */ new_root_bt->left = HADDR_UNDEF; new_root_bt->right = HADDR_UNDEF; @@ -673,23 +887,61 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, HDmemcpy(H5B_NKEY(new_root_bt, shared, 1), md_key, shared->type->sizeof_nkey); HDmemcpy(H5B_NKEY(new_root_bt, shared, 2), rt_key, shared->type->sizeof_nkey); - /* Insert the modified copy of the old root into the file again */ - if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__NO_FLAGS_SET) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add old B-tree root node to cache") + /* Insert the modified copy of the old root into the file again, and pin if + * doing swmr writes */ + if(shared->swmr_write) { + if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__PIN_ENTRY_FLAG) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add new B-tree root node to cache") + nrbt_pinned = TRUE; + + /* Set up flush dependencies */ + HDassert(parent); + HDassert(bt_ud.bt->parent == parent); + if(H5AC_destroy_flush_dependency(parent, bt_ud.bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to destroy flush dependency") + if(H5AC_create_flush_dependency(new_root_bt, bt_ud.bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency") + bt_ud.bt->parent = new_root_bt; + + HDassert(split_bt_ud.bt->parent == parent); + if(H5AC_destroy_flush_dependency(parent, split_bt_ud.bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to destroy flush dependency") + if(H5AC_create_flush_dependency(new_root_bt, split_bt_ud.bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency") + split_bt_ud.bt->parent = new_root_bt; + + HDassert(new_root_bt->parent == parent); + } /* end if */ + else { + if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__NO_FLAGS_SET) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add new B-tree root node to cache") -done: - if(ret_value < 0) - if(new_root_bt && H5B_node_dest(new_root_bt) < 0) - HDONE_ERROR(H5E_BTREE, H5E_CANTRELEASE, FAIL, "unable to free B-tree root node"); + /* Mark new_root_bt as NULL, as it is not pinned or protected and does + * not need to be freed as it is now in the cache. */ + new_root_bt = NULL; + } /* end else */ - if(bt_ud.bt) - if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, bt_ud.cache_flags) < 0) +done: + if(bt_protected) + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, + bt_ud.cache_flags) < 0) HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to unprotect old root") if(split_bt_ud.bt) if(H5AC_unprotect(f, dxpl_id, H5AC_BT, split_bt_ud.addr, split_bt_ud.bt, split_bt_ud.cache_flags) < 0) HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to unprotect new child") + if(nrbt_pinned) { + HDassert(shared->swmr_write); + if(H5AC_unpin_entry(new_root_bt) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin new root") + } /* end if */ + else if(new_root_bt) { + HDassert(ret_value < 0); + if(H5B_node_dest(new_root_bt) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTFREE, FAIL, "unable to free B-tree root node"); + } /* end if */ + #ifdef H5B_DEBUG if(ret_value >= 0) H5B_assert(f, dxpl_id, addr, type, udata); @@ -715,7 +967,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx, +H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned *idx, haddr_t child, H5B_ins_t anchor, const void *md_key) { H5B_shared_t *shared; /* Pointer to shared B-tree info */ @@ -733,8 +985,8 @@ H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx, /* Check for inserting right-most key into node (common when just appending * records to an unlimited dimension chunked dataset) */ - base = H5B_NKEY(bt, shared, (idx + 1)); - if((idx + 1) == bt->nchildren) { + base = H5B_NKEY(bt, shared, (*idx + 1)); + if((*idx + 1) == bt->nchildren) { /* Make room for the new key */ HDmemcpy(base + shared->type->sizeof_nkey, base, shared->type->sizeof_nkey); /* No overlap possible - memcpy() OK */ @@ -742,34 +994,34 @@ H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx, /* The MD_KEY is the left key of the new node */ if(H5B_INS_RIGHT == anchor) - idx++; /* Don't have to memmove() child addresses down, just add new child */ + (*idx)++; /* Don't have to memmove() child addresses down, just add new child */ else /* Make room for the new child address */ - bt->child[idx + 1] = bt->child[idx]; + bt->child[*idx + 1] = bt->child[*idx]; } /* end if */ else { /* Make room for the new key */ HDmemmove(base + shared->type->sizeof_nkey, base, - (bt->nchildren - idx) * shared->type->sizeof_nkey); + (bt->nchildren - *idx) * shared->type->sizeof_nkey); HDmemcpy(base, md_key, shared->type->sizeof_nkey); /* The MD_KEY is the left key of the new node */ if(H5B_INS_RIGHT == anchor) - idx++; + (*idx)++; /* Make room for the new child address */ - HDmemmove(bt->child + idx + 1, bt->child + idx, - (bt->nchildren - idx) * sizeof(haddr_t)); + HDmemmove(bt->child + *idx + 1, bt->child + *idx, + (bt->nchildren - *idx) * sizeof(haddr_t)); } /* end if */ - bt->child[idx] = child; + bt->child[*idx] = child; bt->nchildren += 1; /* Mark node as dirty */ *bt_flags |= H5AC__DIRTIED_FLAG; FUNC_LEAVE_NOAPI(SUCCEED) -} +} /* end H5B_insert_child() */ /*------------------------------------------------------------------------- @@ -873,6 +1125,7 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, /* Set up user data for cache callbacks */ cache_udata.f = f; cache_udata.type = type; + cache_udata.parent = bt; cache_udata.rc_shared = rc_shared; if(0 == bt->nchildren) { @@ -1046,18 +1299,30 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, else HDmemcpy(rt_key, H5B_NKEY(bt, shared, idx + 1), type->sizeof_nkey); } /* end if */ - if(H5B_INS_CHANGE == my_ins) { - /* - * The insertion simply changed the address for the child. - */ - HDassert(!child_bt_ud.bt); - HDassert(bt->level == 0); - bt->child[idx] = new_child_bt_ud.addr; - bt_ud->cache_flags |= H5AC__DIRTIED_FLAG; - } else if(H5B_INS_LEFT == my_ins || H5B_INS_RIGHT == my_ins) { + + /* + * Handle changes/additions to children + */ + HDassert(!(bt->level == 0) != !(child_bt_ud.bt)); + if(H5B_INS_LEFT == my_ins || H5B_INS_RIGHT == my_ins) { hbool_t *tmp_bt_flags_ptr = NULL; H5B_t *tmp_bt; + /* Update child pointer to (old) child if swmr writes are on and level > + * 0, as it has been moved by H5B_split (one level down) */ + if(shared->swmr_write && bt->level > 0) { + HDassert(child_bt_ud.bt); + HDassert(bt_ud->bt->child[idx] != child_bt_ud.addr); + + bt_ud->bt->child[idx] = child_bt_ud.addr; + } /* end if */ +#ifndef NDEBUG + if(!(shared->swmr_write) && bt->level > 0) { + HDassert(child_bt_ud.bt); + HDassert(bt_ud->bt->child[idx] == child_bt_ud.addr); + } /* end if */ +#endif /* NDEBUG */ + /* * If this node is full then split it before inserting the new child. */ @@ -1079,9 +1344,34 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, } /* end else */ /* Insert the child */ - if(H5B_insert_child(tmp_bt, tmp_bt_flags_ptr, idx, new_child_bt_ud.addr, my_ins, md_key) < 0) + if(H5B_insert_child(tmp_bt, tmp_bt_flags_ptr, &idx, new_child_bt_ud.addr, my_ins, md_key) < 0) HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, H5B_INS_ERROR, "can't insert child") - } + + /* Set up flush dependency on child client object, if appropriate */ + if(shared->swmr_write && bt->level == 0) { + HDassert(!child_bt_ud.bt); + HDassert(shared->type->create_flush_dep); + if((shared->type->create_flush_dep)(H5B_NKEY(tmp_bt, shared, idx), udata, tmp_bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, H5B_INS_ERROR, "unable to create flush dependency") + } /* end if */ + } else { + if(H5B_INS_CHANGE == my_ins) { + /* + * The insertion simply changed the address for the child. + */ + HDassert(!child_bt_ud.bt); + HDassert(bt->level == 0); + bt->child[idx] = new_child_bt_ud.addr; + bt_ud->cache_flags |= H5AC__DIRTIED_FLAG; + } /* end if */ + /*Set up flush dependency on child client object, if appropriate */ + if(shared->swmr_write && bt->level == 0) { + HDassert(!child_bt_ud.bt); + HDassert(shared->type->create_flush_dep); + if((shared->type->create_flush_dep)(H5B_NKEY(bt, shared, idx), udata, bt) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, H5B_INS_ERROR, "unable to create flush dependency") + } /* end if */ + } /* end if */ /* * If this node split, return the mid key (the one that is shared @@ -1105,7 +1395,8 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, done: if(child_bt_ud.bt) - if(H5AC_unprotect(f, dxpl_id, H5AC_BT, child_bt_ud.addr, child_bt_ud.bt, child_bt_ud.cache_flags) < 0) + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, child_bt_ud.addr, child_bt_ud.bt, + child_bt_ud.cache_flags) < 0) HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to unprotect child") if(new_child_bt_ud.bt) @@ -1113,7 +1404,7 @@ done: HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to unprotect new child") FUNC_LEAVE_NOAPI(ret_value) -} +} /* end H5B_insert_helper() */ /*------------------------------------------------------------------------- @@ -1128,19 +1419,24 @@ done: * matzke@llnl.gov * Jun 23 1997 * + * Modifications: Neil Fortner + * Jun 23 2011 + * Replaced original function with new algorithm that doesn't + * use sibling pointers (for SWMR consistency) or unprotect + * nodes during recursion (for performance and safety). + * *------------------------------------------------------------------------- */ static herr_t H5B_iterate_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, - H5B_operator_t op, void *udata) + H5B_operator_t op, void *udata, void *parent) { - H5B_t *bt = NULL; /* Pointer to current B-tree node */ - H5RC_t *rc_shared; /* Ref-counted shared info */ + H5B_t *bt = NULL; /* Pointer to current B-tree node */ + H5RC_t *rc_shared; /* Ref-counted shared info */ H5B_shared_t *shared; /* Pointer to shared B-tree info */ H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */ - uint8_t *native = NULL; /* Array of keys in native format */ - haddr_t *child = NULL; /* Array of child pointers */ - herr_t ret_value; /* Return value */ + unsigned i; /* Index */ + herr_t ret_value = H5_ITER_CONT; /* Return value */ FUNC_ENTER_NOAPI_NOINIT(H5B_iterate_helper) @@ -1155,108 +1451,33 @@ H5B_iterate_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t add /* Get shared info for B-tree */ if(NULL == (rc_shared = (type->get_shared)(f, udata))) - HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object") + HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object") shared = (H5B_shared_t *)H5RC_GET_OBJ(rc_shared); HDassert(shared); /* Protect the initial/current node */ cache_udata.f = f; cache_udata.type = type; + cache_udata.parent = parent; cache_udata.rc_shared = rc_shared; if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ))) - HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "unable to load B-tree node") - - if(bt->level > 0) { - haddr_t left_child = bt->child[0]; /* Address of left-most child in node */ - - /* Release current node */ - if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node") - bt = NULL; + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "unable to load B-tree node") - /* Keep following the left-most child until we reach a leaf node. */ - if((ret_value = H5B_iterate_helper(f, dxpl_id, type, left_child, op, udata)) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "unable to list B-tree node") - } /* end if */ - else { - unsigned nchildren; /* Number of child pointers */ - haddr_t next_addr; /* Address of next node to the right */ - - /* Allocate space for a copy of the native records & child pointers */ - if(NULL == (native = H5FL_BLK_MALLOC(native_block, shared->sizeof_keys))) - HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, H5_ITER_ERROR, "memory allocation failed for shared B-tree native records") - if(NULL == (child = H5FL_SEQ_MALLOC(haddr_t, (size_t)shared->two_k))) - HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, H5_ITER_ERROR, "memory allocation failed for shared B-tree child addresses") - - /* Cache information from this node */ - nchildren = bt->nchildren; - next_addr = bt->right; - - /* Copy the native keys & child pointers into local arrays */ - HDmemcpy(native, bt->native, shared->sizeof_keys); - HDmemcpy(child, bt->child, (nchildren * sizeof(haddr_t))); - - /* Release current node */ - if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node") - bt = NULL; - - /* - * We've reached the left-most leaf. Now follow the right-sibling - * pointer from leaf to leaf until we've processed all leaves. - */ - ret_value = H5_ITER_CONT; - while(ret_value == H5_ITER_CONT) { - haddr_t *curr_child; /* Pointer to node's child addresses */ - uint8_t *curr_native; /* Pointer to node's native keys */ - unsigned u; /* Local index variable */ - - /* - * Perform the iteration operator, which might invoke an - * application callback. - */ - for(u = 0, curr_child = child, curr_native = native; u < nchildren && ret_value == H5_ITER_CONT; u++, curr_child++, curr_native += type->sizeof_nkey) { - ret_value = (*op)(f, dxpl_id, curr_native, *curr_child, curr_native + type->sizeof_nkey, udata); - if(ret_value < 0) - HERROR(H5E_BTREE, H5E_CANTLIST, "iterator function failed"); - } /* end for */ - - /* Check for continuing iteration */ - if(ret_value == H5_ITER_CONT) { - /* Check for another node */ - if(H5F_addr_defined(next_addr)) { - /* Protect the next node to the right */ - addr = next_addr; - if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ))) - HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "B-tree node") - - /* Cache information from this node */ - nchildren = bt->nchildren; - next_addr = bt->right; - - /* Copy the native keys & child pointers into local arrays */ - HDmemcpy(native, bt->native, shared->sizeof_keys); - HDmemcpy(child, bt->child, nchildren * sizeof(haddr_t)); - - /* Unprotect node */ - if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node") - bt = NULL; - } /* end if */ - else - /* Exit loop */ - break; - } /* end if */ - } /* end while */ - } /* end else */ + /* Iterate over children */ + for(i=0; i<bt->nchildren && ret_value == H5_ITER_CONT; i++) { + if(bt->level > 0) { + /* Keep following the left-most child until we reach a leaf node. */ + if((ret_value = H5B_iterate_helper(f, dxpl_id, type, bt->child[i], op, udata, bt)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "unable to list B-tree node") + } /* end if */ + else + if((ret_value = (*op)(f, dxpl_id, H5B_NKEY(bt, shared, i), bt->child[i], H5B_NKEY(bt, shared, i + 1), udata)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "iterator function failed") + } /* end for */ done: if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0) HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node") - if(native) - native = H5FL_BLK_FREE(native_block, native); - if(child) - child = H5FL_SEQ_FREE(haddr_t, child); FUNC_LEAVE_NOAPI(ret_value) } /* end H5B_iterate_helper() */ @@ -1278,7 +1499,7 @@ done: */ herr_t H5B_iterate(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, - H5B_operator_t op, void *udata) + H5B_operator_t op, void *udata, void *parent) { herr_t ret_value; /* Return value */ @@ -1294,7 +1515,7 @@ H5B_iterate(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, HDassert(udata); /* Iterate over the B-tree records */ - if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata)) < 0) + if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata, parent)) < 0) HERROR(H5E_BTREE, H5E_BADITER, "B-tree iteration failed"); FUNC_LEAVE_NOAPI(ret_value) @@ -1329,7 +1550,8 @@ static H5B_ins_t H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type, int level, uint8_t *lt_key/*out*/, hbool_t *lt_key_changed/*out*/, void *udata, - uint8_t *rt_key/*out*/, hbool_t *rt_key_changed/*out*/) + uint8_t *rt_key/*out*/, hbool_t *rt_key_changed/*out*/, + void *parent) { H5B_t *bt = NULL, *sibling = NULL; unsigned bt_flags = H5AC__NO_FLAGS_SET; @@ -1343,7 +1565,6 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type FUNC_ENTER_NOAPI(H5B_remove_helper, H5B_INS_ERROR) HDassert(f); - HDassert(H5F_addr_defined(addr)); HDassert(type); HDassert(type->decode); HDassert(type->cmp3); @@ -1364,6 +1585,7 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type cache_udata.f = f; cache_udata.type = type; cache_udata.rc_shared = rc_shared; + cache_udata.parent = parent; if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE))) HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5B_INS_ERROR, "unable to load B-tree node") @@ -1388,7 +1610,7 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type if((int)(ret_value = H5B_remove_helper(f, dxpl_id, bt->child[idx], type, level + 1, H5B_NKEY(bt, shared, idx)/*out*/, lt_key_changed/*out*/, udata, H5B_NKEY(bt, shared, idx + 1)/*out*/, - rt_key_changed/*out*/)) < 0) + rt_key_changed/*out*/, bt)) < 0) HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, H5B_INS_ERROR, "key not found in subtree") } else if(type->remove) { /* @@ -1504,9 +1726,10 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type bt->nchildren = 0; /* Delete the node from disk (via the metadata cache) */ - bt_flags |= H5AC__DIRTIED_FLAG; + if(!shared->swmr_write) + bt_flags |= H5AC__DIRTIED_FLAG | H5AC__FREE_FILE_SPACE_FLAG; H5_CHECK_OVERFLOW(shared->sizeof_rnode, size_t, hsize_t); - if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, bt_flags | H5AC__DELETED_FLAG | H5AC__FREE_FILE_SPACE_FLAG) < 0) { + if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, bt_flags | H5AC__DELETED_FLAG) < 0) { bt = NULL; bt_flags = H5AC__NO_FLAGS_SET; HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to free B-tree node") @@ -1652,7 +1875,8 @@ done: *------------------------------------------------------------------------- */ herr_t -H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata) +H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, + void *udata, void *parent) { /* These are defined this way to satisfy alignment constraints */ uint64_t _lt_key[128], _rt_key[128]; @@ -1672,7 +1896,8 @@ H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void /* The actual removal */ if(H5B_remove_helper(f, dxpl_id, addr, type, 0, lt_key, <_key_changed, - udata, rt_key, &rt_key_changed) == H5B_INS_ERROR) + udata, rt_key, &rt_key_changed, parent) + == H5B_INS_ERROR) HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to remove entry from B-tree") #ifdef H5B_DEBUG @@ -1697,7 +1922,8 @@ done: *------------------------------------------------------------------------- */ herr_t -H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata) +H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, + void *udata, void *parent) { H5B_t *bt = NULL; /* B-tree node being operated on */ H5RC_t *rc_shared; /* Ref-counted shared info */ @@ -1722,6 +1948,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void /* Lock this B-tree node into memory for now */ cache_udata.f = f; cache_udata.type = type; + cache_udata.parent = parent; cache_udata.rc_shared = rc_shared; if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE))) HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node") @@ -1730,7 +1957,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void if(bt->level > 0) { /* Iterate over all children in node, deleting them */ for(u = 0; u < bt->nchildren; u++) - if(H5B_delete(f, dxpl_id, type, bt->child[u], udata) < 0) + if(H5B_delete(f, dxpl_id, type, bt->child[u], udata, bt) < 0) HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, FAIL, "unable to delete B-tree node") } /* end if */ @@ -1751,7 +1978,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void } /* end else */ done: - if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__DELETED_FLAG | H5AC__FREE_FILE_SPACE_FLAG) < 0) + if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__DELETED_FLAG | (shared->swmr_write ? 0 : H5AC__FREE_FILE_SPACE_FLAG)) < 0) HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node in cache") FUNC_LEAVE_NOAPI(ret_value) @@ -1816,6 +2043,11 @@ HDmemset(shared->page, 0, shared->sizeof_rnode); for(u = 0; u < (shared->two_k + 1); u++) shared->nkey[u] = u * type->sizeof_nkey; + /* Determine if we are doing SWMR writes. Only enable for chunks for now. + */ + shared->swmr_write = (H5F_INTENT(f) & H5F_ACC_SWMR_WRITE) > 0 + && type->id == H5B_CHUNK_ID; + /* Set return value */ ret_value = shared; @@ -1948,7 +2180,7 @@ done: */ static herr_t H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, - const H5B_info_ud_t *info_udata) + const H5B_info_ud_t *info_udata, void *parent) { H5B_t *bt = NULL; /* Pointer to current B-tree node */ H5RC_t *rc_shared; /* Ref-counted shared info */ @@ -1984,6 +2216,7 @@ H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t ad /* Protect the initial/current node */ cache_udata.f = f; cache_udata.type = type; + cache_udata.parent = parent; cache_udata.rc_shared = rc_shared; if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ))) HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node") @@ -2028,7 +2261,7 @@ H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t ad /* Check for another "row" of B-tree nodes to iterate over */ if(level > 0) { /* Keep following the left-most child until we reach a leaf node. */ - if(H5B_get_info_helper(f, dxpl_id, type, left_child, info_udata) < 0) + if(H5B_get_info_helper(f, dxpl_id, type, left_child, info_udata, bt) < 0) HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, FAIL, "unable to list B-tree node") } /* end if */ @@ -2054,7 +2287,7 @@ done: */ herr_t H5B_get_info(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, - H5B_info_t *bt_info, H5B_operator_t op, void *udata) + H5B_info_t *bt_info, H5B_operator_t op, void *udata, void *parent) { H5B_info_ud_t info_udata; /* User-data for B-tree size iteration */ herr_t ret_value = SUCCEED; /* Return value */ @@ -2078,13 +2311,13 @@ H5B_get_info(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, info_udata.udata = udata; /* Iterate over the B-tree nodes */ - if(H5B_get_info_helper(f, dxpl_id, type, addr, &info_udata) < 0) + if(H5B_get_info_helper(f, dxpl_id, type, addr, &info_udata, parent) < 0) HGOTO_ERROR(H5E_BTREE, H5E_BADITER, FAIL, "B-tree iteration failed") /* Iterate over the B-tree records, making any "leaf" callbacks */ /* (Only if operator defined) */ if(op) - if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata)) < 0) + if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata, parent)) < 0) HERROR(H5E_BTREE, H5E_BADITER, "B-tree iteration failed"); done: @@ -2105,7 +2338,8 @@ done: *------------------------------------------------------------------------- */ htri_t -H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr) +H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, + void *parent) { H5B_t *bt = NULL; /* The B-tree */ H5RC_t *rc_shared; /* Ref-counted shared info */ @@ -2135,6 +2369,7 @@ H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr) */ cache_udata.f = f; cache_udata.type = type; + cache_udata.parent = parent; cache_udata.rc_shared = rc_shared; if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ))) HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree node") @@ -2179,3 +2414,122 @@ H5B_node_dest(H5B_t *bt) FUNC_LEAVE_NOAPI(SUCCEED) } /* end H5B_node_dest() */ + +/*------------------------------------------------------------------------- + * Function: H5B_support + * + * Purpose: Add a flush dependency between the b-tree child object (as + * the flush dependency child) and the b-tree node containing + * the address of that child. If the child has not yet been + * inserted, does nothing. + * + * Return: TRUE if flush dependency created + * FALSE if child is not in b-tree + * Negative on failure + * + * Programmer: Neil Fortner + * Aug 18 2010 + * + *------------------------------------------------------------------------- + */ +htri_t +H5B_support(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, + void *udata, void *parent, void *child) +{ + H5B_t *node = NULL; /* Node containing direct link to child */ + herr_t ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(H5B_support, FAIL) + + /* + * Check arguments. + */ + HDassert(f); + HDassert(type); + HDassert(type->decode); + HDassert(type->cmp3); + HDassert(type->found); + HDassert(H5F_addr_defined(addr)); + HDassert(udata); + HDassert(child); + + /* Lookup the node which points to the requested child */ + if(H5B_find_node(f, dxpl_id, type, addr, udata, parent, &node) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in B-tree") + + /* Set the return value, and add the flush dependency if the ndoe was found + */ + if(node) { + if(H5AC_create_flush_dependency(node, child) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency") + ret_value = TRUE; + } /* end if */ + else + ret_value = FALSE; + +done: + if(node && H5AC_unpin_entry(node) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin node") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5B_support() */ + + +/*------------------------------------------------------------------------- + * Function: H5B_unsupport + * + * Purpose: Destroy a flush dependency between the b-tree child object + * and the b-tree node containing the address of that child. + * This dependency *must* have been created earlier either by + * a call to H5B_support() or by the client (presumably via + * the create_flush_dep() callback on insertion). + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * Aug 18 2010 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B_unsupport(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, + void *udata, void *parent, void *child) +{ + H5B_t *node = NULL; /* Node containing direct link to child */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5B_unsupport, FAIL) + + /* + * Check arguments. + */ + HDassert(f); + HDassert(type); + HDassert(type->decode); + HDassert(type->cmp3); + HDassert(type->found); + HDassert(H5F_addr_defined(addr)); + HDassert(udata); + HDassert(child); + + /* Lookup the node which points to the requested child */ + if(H5B_find_node(f, dxpl_id, type, addr, udata, parent, &node) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in B-tree") + + /* It is an error if the node does not exist yet - the client should only + * call this function if support() succeeded or if the client's + * create_flush_dep() callback has been called. */ + if(!node) + HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "node not found in B-tree") + + /* Add the flush dependency */ + if(H5AC_destroy_flush_dependency(node, child) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNDEPEND, FAIL, "unable to destroy flush dependency") + +done: + if(node && H5AC_unpin_entry(node) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin node") + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5B_unsupport() */ + |