summaryrefslogtreecommitdiffstats
path: root/src/H5B.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/H5B.c')
-rw-r--r--src/H5B.c722
1 files changed, 538 insertions, 184 deletions
diff --git a/src/H5B.c b/src/H5B.c
index 40b221d..61c5856 100644
--- a/src/H5B.c
+++ b/src/H5B.c
@@ -111,7 +111,6 @@
#include "H5MFprivate.h" /* File memory management */
#include "H5Pprivate.h" /* Property lists */
-
/****************/
/* Local Macros */
/****************/
@@ -154,12 +153,15 @@ static H5B_ins_t H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
hbool_t *rt_key_changed,
H5B_ins_ud_t *split_bt_ud/*out*/);
static herr_t H5B_insert_child(H5B_t *bt, unsigned *bt_flags,
- unsigned idx, haddr_t child,
+ unsigned *idx, haddr_t child,
H5B_ins_t anchor, const void *md_key);
static herr_t H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
unsigned idx, void *udata,
H5B_ins_ud_t *split_bt_ud/*out*/);
static H5B_t * H5B_copy(const H5B_t *old_bt);
+static herr_t H5B_find_node(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type,
+ haddr_t addr, void *udata, void *parent,
+ H5B_t **node);
/*********************/
@@ -216,7 +218,7 @@ H5FL_SEQ_DEFINE_STATIC(size_t);
*/
herr_t
H5B_create(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, void *udata,
- haddr_t *addr_p/*out*/)
+ void *parent, haddr_t *addr_p/*out*/)
{
H5B_t *bt = NULL;
H5B_shared_t *shared=NULL; /* Pointer to shared B-tree info */
@@ -249,6 +251,7 @@ H5B_create(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, void *udata,
if(NULL == (bt->native = H5FL_BLK_MALLOC(native_block, shared->sizeof_keys)) ||
NULL == (bt->child = H5FL_SEQ_MALLOC(haddr_t, (size_t)shared->two_k)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "memory allocation failed for B-tree root node")
+ bt->parent = parent;
if(HADDR_UNDEF == (*addr_p = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "file allocation failed for B-tree root node")
@@ -301,7 +304,8 @@ done:
*-------------------------------------------------------------------------
*/
htri_t
-H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata)
+H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent)
{
H5B_t *bt = NULL;
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -335,6 +339,7 @@ H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *u
*/
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
@@ -358,7 +363,7 @@ H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *u
HDassert(idx < bt->nchildren);
if(bt->level > 0) {
- if((ret_value = H5B_find(f, dxpl_id, type, bt->child[idx], udata)) < 0)
+ if((ret_value = H5B_find(f, dxpl_id, type, bt->child[idx], udata, bt)) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in subtree")
} /* end if */
else {
@@ -375,6 +380,109 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5B_find_node
+ *
+ * Purpose: Locate the B-tree node containing the item specified in
+ * UDATA, if present. If found, the B-tree node will be
+ * pinned on return and must be unpinned after the caller is
+ * done using it. If not found, *node will be set to NULL.
+ *
+ * Return: Non-negative on success. Negative on failure.
+ *
+ * Programmer: Neil Fortner
+ * Aug 18 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5B_find_node(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent, H5B_t **node)
+{
+ H5B_t *bt = NULL;
+ H5RC_t *rc_shared; /* Ref-counted shared info */
+ H5B_shared_t *shared; /* Pointer to shared B-tree info */
+ H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
+ unsigned idx = 0, lt = 0, rt; /* Final, left & right key indices */
+ int cmp = 1; /* Key comparison value */
+ htri_t found; /* Whether the correct node has been found */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5B_find_node, FAIL)
+
+ /*
+ * Check arguments.
+ */
+ HDassert(f);
+ HDassert(type);
+ HDassert(type->decode);
+ HDassert(type->cmp3);
+ HDassert(H5F_addr_defined(addr));
+
+ /* Get shared info for B-tree */
+ if(NULL == (rc_shared = (type->get_shared)(f, udata)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object")
+ shared = (H5B_shared_t *)H5RC_GET_OBJ(rc_shared);
+ HDassert(shared);
+
+ /*
+ * Perform a binary search to locate the child which contains
+ * the thing for which we're searching.
+ */
+ cache_udata.f = f;
+ cache_udata.type = type;
+ cache_udata.parent = parent;
+ cache_udata.rc_shared = rc_shared;
+ if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
+
+ rt = bt->nchildren;
+ while(lt < rt && cmp) {
+ idx = (lt + rt) / 2;
+ /* compare */
+ if((cmp = (type->cmp3)(H5B_NKEY(bt, shared, idx), udata, H5B_NKEY(bt, shared, (idx + 1)))) < 0)
+ rt = idx;
+ else
+ lt = idx + 1;
+ } /* end while */
+ /* Check if not found */
+ if(cmp)
+ *node = NULL;
+ else {
+ /*
+ * Follow the link to the subtree, or return the leaf node.
+ */
+ HDassert(idx < bt->nchildren);
+
+ if(bt->level > 0) {
+ if(H5B_find_node(f, dxpl_id, type, bt->child[idx], udata, bt, node) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in subtree")
+ } /* end if */
+ else {
+ /* Check if this is really the correct child */
+ if((found = (type->found)(f, dxpl_id, bt->child[idx], H5B_NKEY(bt, shared, idx), udata)) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in leaf node")
+
+ if(found) {
+ /* Return this leaf node, pinned */
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__PIN_ENTRY_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release node")
+ *node = bt;
+ bt = NULL;
+ } /* end if */
+ else
+ *node = NULL;
+ } /* end else */
+ } /* end else */
+
+done:
+ if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release node")
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5B_find_node() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5B_split
*
* Purpose: Split a single node into two nodes. The old node will
@@ -405,6 +513,7 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx,
H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
unsigned nleft, nright; /* Number of keys in left & right halves */
double split_ratios[3]; /* B-tree split ratios */
+ hbool_t bt_pinned = FALSE;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5B_split)
@@ -480,10 +589,11 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx,
/*
* Create the new B-tree node.
*/
- if(H5B_create(f, dxpl_id, shared->type, udata, &split_bt_ud->addr/*out*/) < 0)
+ if(H5B_create(f, dxpl_id, shared->type, udata, bt_ud->bt->parent, &split_bt_ud->addr/*out*/) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to create B-tree")
cache_udata.f = f;
cache_udata.type = shared->type;
+ cache_udata.parent = bt_ud->bt->parent;
cache_udata.rc_shared = bt_ud->bt->rc_shared;
if(NULL == (split_bt_ud->bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, split_bt_ud->addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree")
@@ -509,29 +619,117 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx,
bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
bt_ud->bt->nchildren = nleft;
+ /* Actions to take if swmr writes are on */
+ if(shared->swmr_write) {
+ haddr_t new_bt_addr = HADDR_UNDEF;
+ unsigned i;
+
+ /*
+ * We must clone the old btree so readers with an out-of-date version
+ * of the parent can still see all its children, via the shadowed
+ * non-split bt. Remove it from cache but do not mark it free on disk.
+ */
+ /* Allocate space for the cloned child */
+ H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t);
+ if(HADDR_UNDEF == (new_bt_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move b-tree")
+
+ /* Pin old entry so it is not flushed when we unprotect */
+ if(H5AC_pin_protected_entry(bt_ud->bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPIN, FAIL, "unable to pin old b-tree node")
+ bt_pinned = TRUE;
+
+ /* Unprotect bt so we can move it. Also, note that it will be marked
+ * dirty so it will be written to the new location. */
+ HDassert(bt_ud->cache_flags & H5AC__DIRTIED_FLAG);
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->addr, bt_ud->bt, bt_ud->cache_flags) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old b-tree")
+ bt_ud->cache_flags = H5AC__NO_FLAGS_SET;
+
+ /* Move the location of the old child on the disk */
+ if(H5AC_move_entry(f, H5AC_BT, bt_ud->addr, new_bt_addr) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node")
+ bt_ud->addr = new_bt_addr;
+
+ /* Re-protect bt at new address */
+ if(bt_ud->bt != H5AC_protect(f, dxpl_id, H5AC_BT, new_bt_addr, &cache_udata, H5AC_WRITE))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree node")
+
+ /*
+ * Update flush dependencies for children that moved to the new node
+ */
+ if(bt_ud->bt->level > 0) {
+ H5B_t *child;
+
+ for(i=0; i<nright; i++) {
+ /* Protect child b-tree node */
+ cache_udata.parent = split_bt_ud->bt;
+ if(NULL == (child = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, split_bt_ud->bt->child[i], &cache_udata, H5AC_WRITE)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
+
+ /* Update the flush dependency, if necessary */
+ HDassert(child->parent);
+ if(child->parent == bt_ud->bt) {
+ child->parent = split_bt_ud->bt;
+ if(H5AC_destroy_flush_dependency(bt_ud->bt, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNDEPEND, FAIL, "unable to destroy flush dependency")
+ if(H5AC_create_flush_dependency(child->parent, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ } /* end if */
+ else
+ HDassert(child->parent == split_bt_ud->bt);
+
+ /* Unprotect the child */
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, split_bt_ud->bt->child[idx], child, H5AC__NO_FLAGS_SET) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
+ } /* end for */
+ } /* end if */
+ else {
+ /* At leaf node, delegate to client */
+ HDassert(shared->type->update_flush_dep);
+ for(i=0; i<nright; i++) {
+ if((shared->type->update_flush_dep)(H5B_NKEY(split_bt_ud->bt, shared, i), udata, bt_ud->bt, split_bt_ud->bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to update flush dependency")
+ } /* end for */
+ } /* end else */
+
+ /*
+ * Update left sibling to point to new bt. Only necessary when doing
+ * swmr writes as otherwise the address of bt doesn't change.
+ */
+ if(H5F_addr_defined(bt_ud->bt->left)) {
+ H5B_t *tmp_bt;
+
+ if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->left, &cache_udata, H5AC_WRITE)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling")
+
+ tmp_bt->right = bt_ud->addr;
+
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->bt->left, tmp_bt, H5AC__DIRTIED_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
+ } /* end if */
+ } /* end if */
+
/*
- * Update sibling pointers.
+ * Update other sibling pointers.
*/
split_bt_ud->bt->left = bt_ud->addr;
split_bt_ud->bt->right = bt_ud->bt->right;
if(H5F_addr_defined(bt_ud->bt->right)) {
- H5B_t *tmp_bt;
- H5B_cache_ud_t cache_udata2; /* User-data for metadata cache callback */
+ H5B_t *tmp_bt;
- cache_udata2.f = f;
- cache_udata2.type = shared->type;
- cache_udata2.rc_shared = bt_ud->bt->rc_shared;
- if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, &cache_udata2, H5AC_WRITE)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling")
+ if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, &cache_udata, H5AC_WRITE)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling")
- tmp_bt->left = split_bt_ud->addr;
+ tmp_bt->left = split_bt_ud->addr;
if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, tmp_bt, H5AC__DIRTIED_FLAG) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
} /* end if */
bt_ud->bt->right = split_bt_ud->addr;
+ bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
done:
if(ret_value < 0) {
@@ -539,6 +737,13 @@ done:
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
split_bt_ud->bt = NULL;
split_bt_ud->addr = HADDR_UNDEF;
+ split_bt_ud->cache_flags = H5AC__NO_FLAGS_SET;
+ } /* end if */
+
+ if(bt_pinned) {
+ HDassert(shared->swmr_write);
+ if(H5AC_unpin_entry(bt_ud->bt) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin old root")
} /* end if */
FUNC_LEAVE_NOAPI(ret_value)
@@ -560,7 +765,7 @@ done:
*/
herr_t
H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- void *udata)
+ void *udata, void *parent)
{
/*
* These are defined this way to satisfy alignment constraints.
@@ -580,6 +785,8 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
H5B_shared_t *shared; /* Pointer to shared B-tree info */
H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
H5B_ins_t my_ins = H5B_INS_ERROR;
+ hbool_t bt_protected = FALSE;
+ hbool_t nrbt_pinned = FALSE; /* TRUE if new_root_bt is pinned */
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5B_insert, FAIL)
@@ -599,20 +806,27 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
/* Protect the root node */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
bt_ud.addr = addr;
if(NULL == (bt_ud.bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to locate root of B-tree")
+ bt_protected = TRUE;
/* Insert the object */
if((int)(my_ins = H5B_insert_helper(f, dxpl_id, &bt_ud, type, lt_key,
&lt_key_changed, md_key, udata, rt_key, &rt_key_changed,
&split_bt_ud/*out*/)) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to insert key")
+
+ /* Check if the root node split */
if(H5B_INS_NOOP == my_ins) {
+ /* The root node did not split - just update the flush dependency (if
+ * necessary) and exit */
HDassert(!split_bt_ud.bt);
HGOTO_DONE(SUCCEED)
} /* end if */
+
HDassert(H5B_INS_RIGHT == my_ins);
HDassert(split_bt_ud.bt);
HDassert(H5F_addr_defined(split_bt_ud.addr));
@@ -631,33 +845,33 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
* at the old root's previous address. This prevents the B-tree from
* "moving".
*/
- H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t);
- if(HADDR_UNDEF == (old_root_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move root")
-
- /*
- * Move the node to the new location
- */
+ /* Note that this is not necessary if swmr writes are on, as H5B_split
+ * already moved the node in this case */
+ if(!shared->swmr_write) {
+ H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t);
+ if(HADDR_UNDEF == (old_root_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move root")
+
+ /* Unprotect the old root so we can move it. Also force it to be marked
+ * dirty so it is written to the new location. */
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, H5AC__DIRTIED_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old root")
+ bt_protected = FALSE;
+
+ /* Move the location of the old root on the disk */
+ if(H5AC_move_entry(f, H5AC_BT, bt_ud.addr, old_root_addr) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node")
+ bt_ud.addr = old_root_addr;
+
+ /* Update the split b-tree's left pointer to point to the new location */
+ split_bt_ud.bt->left = bt_ud.addr;
+ split_bt_ud.cache_flags |= H5AC__DIRTIED_FLAG;
+ } /* end else */
/* Make a copy of the old root information */
if(NULL == (new_root_bt = H5B_copy(bt_ud.bt)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTCOPY, FAIL, "unable to copy old root");
- /* Unprotect the old root so we can move it. Also force it to be marked
- * dirty so it is written to the new location. */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, H5AC__DIRTIED_FLAG) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old root")
- bt_ud.bt = NULL; /* Make certain future references will be caught */
-
- /* Move the location of the old root on the disk */
- if(H5AC_move_entry(f, H5AC_BT, bt_ud.addr, old_root_addr) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node")
- bt_ud.addr = old_root_addr;
-
- /* Update the split b-tree's left pointer to point to the new location */
- split_bt_ud.bt->left = bt_ud.addr;
- split_bt_ud.cache_flags |= H5AC__DIRTIED_FLAG;
-
/* clear the old root info at the old address (we already copied it) */
new_root_bt->left = HADDR_UNDEF;
new_root_bt->right = HADDR_UNDEF;
@@ -673,23 +887,61 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
HDmemcpy(H5B_NKEY(new_root_bt, shared, 1), md_key, shared->type->sizeof_nkey);
HDmemcpy(H5B_NKEY(new_root_bt, shared, 2), rt_key, shared->type->sizeof_nkey);
- /* Insert the modified copy of the old root into the file again */
- if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add old B-tree root node to cache")
+ /* Insert the modified copy of the old root into the file again, and pin if
+ * doing swmr writes */
+ if(shared->swmr_write) {
+ if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__PIN_ENTRY_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add new B-tree root node to cache")
+ nrbt_pinned = TRUE;
+
+ /* Set up flush dependencies */
+ HDassert(parent);
+ HDassert(bt_ud.bt->parent == parent);
+ if(H5AC_destroy_flush_dependency(parent, bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to destroy flush dependency")
+ if(H5AC_create_flush_dependency(new_root_bt, bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ bt_ud.bt->parent = new_root_bt;
+
+ HDassert(split_bt_ud.bt->parent == parent);
+ if(H5AC_destroy_flush_dependency(parent, split_bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to destroy flush dependency")
+ if(H5AC_create_flush_dependency(new_root_bt, split_bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ split_bt_ud.bt->parent = new_root_bt;
+
+ HDassert(new_root_bt->parent == parent);
+ } /* end if */
+ else {
+ if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__NO_FLAGS_SET) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add new B-tree root node to cache")
-done:
- if(ret_value < 0)
- if(new_root_bt && H5B_node_dest(new_root_bt) < 0)
- HDONE_ERROR(H5E_BTREE, H5E_CANTRELEASE, FAIL, "unable to free B-tree root node");
+ /* Mark new_root_bt as NULL, as it is not pinned or protected and does
+ * not need to be freed as it is now in the cache. */
+ new_root_bt = NULL;
+ } /* end else */
- if(bt_ud.bt)
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, bt_ud.cache_flags) < 0)
+done:
+ if(bt_protected)
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt,
+ bt_ud.cache_flags) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to unprotect old root")
if(split_bt_ud.bt)
if(H5AC_unprotect(f, dxpl_id, H5AC_BT, split_bt_ud.addr, split_bt_ud.bt, split_bt_ud.cache_flags) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to unprotect new child")
+ if(nrbt_pinned) {
+ HDassert(shared->swmr_write);
+ if(H5AC_unpin_entry(new_root_bt) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin new root")
+ } /* end if */
+ else if(new_root_bt) {
+ HDassert(ret_value < 0);
+ if(H5B_node_dest(new_root_bt) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTFREE, FAIL, "unable to free B-tree root node");
+ } /* end if */
+
#ifdef H5B_DEBUG
if(ret_value >= 0)
H5B_assert(f, dxpl_id, addr, type, udata);
@@ -715,7 +967,7 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx,
+H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned *idx,
haddr_t child, H5B_ins_t anchor, const void *md_key)
{
H5B_shared_t *shared; /* Pointer to shared B-tree info */
@@ -733,8 +985,8 @@ H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx,
/* Check for inserting right-most key into node (common when just appending
* records to an unlimited dimension chunked dataset)
*/
- base = H5B_NKEY(bt, shared, (idx + 1));
- if((idx + 1) == bt->nchildren) {
+ base = H5B_NKEY(bt, shared, (*idx + 1));
+ if((*idx + 1) == bt->nchildren) {
/* Make room for the new key */
HDmemcpy(base + shared->type->sizeof_nkey, base,
shared->type->sizeof_nkey); /* No overlap possible - memcpy() OK */
@@ -742,34 +994,34 @@ H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx,
/* The MD_KEY is the left key of the new node */
if(H5B_INS_RIGHT == anchor)
- idx++; /* Don't have to memmove() child addresses down, just add new child */
+ (*idx)++; /* Don't have to memmove() child addresses down, just add new child */
else
/* Make room for the new child address */
- bt->child[idx + 1] = bt->child[idx];
+ bt->child[*idx + 1] = bt->child[*idx];
} /* end if */
else {
/* Make room for the new key */
HDmemmove(base + shared->type->sizeof_nkey, base,
- (bt->nchildren - idx) * shared->type->sizeof_nkey);
+ (bt->nchildren - *idx) * shared->type->sizeof_nkey);
HDmemcpy(base, md_key, shared->type->sizeof_nkey);
/* The MD_KEY is the left key of the new node */
if(H5B_INS_RIGHT == anchor)
- idx++;
+ (*idx)++;
/* Make room for the new child address */
- HDmemmove(bt->child + idx + 1, bt->child + idx,
- (bt->nchildren - idx) * sizeof(haddr_t));
+ HDmemmove(bt->child + *idx + 1, bt->child + *idx,
+ (bt->nchildren - *idx) * sizeof(haddr_t));
} /* end if */
- bt->child[idx] = child;
+ bt->child[*idx] = child;
bt->nchildren += 1;
/* Mark node as dirty */
*bt_flags |= H5AC__DIRTIED_FLAG;
FUNC_LEAVE_NOAPI(SUCCEED)
-}
+} /* end H5B_insert_child() */
/*-------------------------------------------------------------------------
@@ -873,6 +1125,7 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
/* Set up user data for cache callbacks */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = bt;
cache_udata.rc_shared = rc_shared;
if(0 == bt->nchildren) {
@@ -1046,18 +1299,30 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
else
HDmemcpy(rt_key, H5B_NKEY(bt, shared, idx + 1), type->sizeof_nkey);
} /* end if */
- if(H5B_INS_CHANGE == my_ins) {
- /*
- * The insertion simply changed the address for the child.
- */
- HDassert(!child_bt_ud.bt);
- HDassert(bt->level == 0);
- bt->child[idx] = new_child_bt_ud.addr;
- bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
- } else if(H5B_INS_LEFT == my_ins || H5B_INS_RIGHT == my_ins) {
+
+ /*
+ * Handle changes/additions to children
+ */
+ HDassert(!(bt->level == 0) != !(child_bt_ud.bt));
+ if(H5B_INS_LEFT == my_ins || H5B_INS_RIGHT == my_ins) {
hbool_t *tmp_bt_flags_ptr = NULL;
H5B_t *tmp_bt;
+ /* Update child pointer to (old) child if swmr writes are on and level >
+ * 0, as it has been moved by H5B_split (one level down) */
+ if(shared->swmr_write && bt->level > 0) {
+ HDassert(child_bt_ud.bt);
+ HDassert(bt_ud->bt->child[idx] != child_bt_ud.addr);
+
+ bt_ud->bt->child[idx] = child_bt_ud.addr;
+ } /* end if */
+#ifndef NDEBUG
+ if(!(shared->swmr_write) && bt->level > 0) {
+ HDassert(child_bt_ud.bt);
+ HDassert(bt_ud->bt->child[idx] == child_bt_ud.addr);
+ } /* end if */
+#endif /* NDEBUG */
+
/*
* If this node is full then split it before inserting the new child.
*/
@@ -1079,9 +1344,34 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
} /* end else */
/* Insert the child */
- if(H5B_insert_child(tmp_bt, tmp_bt_flags_ptr, idx, new_child_bt_ud.addr, my_ins, md_key) < 0)
+ if(H5B_insert_child(tmp_bt, tmp_bt_flags_ptr, &idx, new_child_bt_ud.addr, my_ins, md_key) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, H5B_INS_ERROR, "can't insert child")
- }
+
+ /* Set up flush dependency on child client object, if appropriate */
+ if(shared->swmr_write && bt->level == 0) {
+ HDassert(!child_bt_ud.bt);
+ HDassert(shared->type->create_flush_dep);
+ if((shared->type->create_flush_dep)(H5B_NKEY(tmp_bt, shared, idx), udata, tmp_bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, H5B_INS_ERROR, "unable to create flush dependency")
+ } /* end if */
+ } else {
+ if(H5B_INS_CHANGE == my_ins) {
+ /*
+ * The insertion simply changed the address for the child.
+ */
+ HDassert(!child_bt_ud.bt);
+ HDassert(bt->level == 0);
+ bt->child[idx] = new_child_bt_ud.addr;
+ bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
+ } /* end if */
+ /*Set up flush dependency on child client object, if appropriate */
+ if(shared->swmr_write && bt->level == 0) {
+ HDassert(!child_bt_ud.bt);
+ HDassert(shared->type->create_flush_dep);
+ if((shared->type->create_flush_dep)(H5B_NKEY(bt, shared, idx), udata, bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, H5B_INS_ERROR, "unable to create flush dependency")
+ } /* end if */
+ } /* end if */
/*
* If this node split, return the mid key (the one that is shared
@@ -1105,7 +1395,8 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
done:
if(child_bt_ud.bt)
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, child_bt_ud.addr, child_bt_ud.bt, child_bt_ud.cache_flags) < 0)
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, child_bt_ud.addr, child_bt_ud.bt,
+ child_bt_ud.cache_flags) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to unprotect child")
if(new_child_bt_ud.bt)
@@ -1113,7 +1404,7 @@ done:
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to unprotect new child")
FUNC_LEAVE_NOAPI(ret_value)
-}
+} /* end H5B_insert_helper() */
/*-------------------------------------------------------------------------
@@ -1128,19 +1419,24 @@ done:
* matzke@llnl.gov
* Jun 23 1997
*
+ * Modifications: Neil Fortner
+ * Jun 23 2011
+ * Replaced original function with new algorithm that doesn't
+ * use sibling pointers (for SWMR consistency) or unprotect
+ * nodes during recursion (for performance and safety).
+ *
*-------------------------------------------------------------------------
*/
static herr_t
H5B_iterate_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- H5B_operator_t op, void *udata)
+ H5B_operator_t op, void *udata, void *parent)
{
- H5B_t *bt = NULL; /* Pointer to current B-tree node */
- H5RC_t *rc_shared; /* Ref-counted shared info */
+ H5B_t *bt = NULL; /* Pointer to current B-tree node */
+ H5RC_t *rc_shared; /* Ref-counted shared info */
H5B_shared_t *shared; /* Pointer to shared B-tree info */
H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
- uint8_t *native = NULL; /* Array of keys in native format */
- haddr_t *child = NULL; /* Array of child pointers */
- herr_t ret_value; /* Return value */
+ unsigned i; /* Index */
+ herr_t ret_value = H5_ITER_CONT; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5B_iterate_helper)
@@ -1155,108 +1451,33 @@ H5B_iterate_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t add
/* Get shared info for B-tree */
if(NULL == (rc_shared = (type->get_shared)(f, udata)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object")
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object")
shared = (H5B_shared_t *)H5RC_GET_OBJ(rc_shared);
HDassert(shared);
/* Protect the initial/current node */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "unable to load B-tree node")
-
- if(bt->level > 0) {
- haddr_t left_child = bt->child[0]; /* Address of left-most child in node */
-
- /* Release current node */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- bt = NULL;
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "unable to load B-tree node")
- /* Keep following the left-most child until we reach a leaf node. */
- if((ret_value = H5B_iterate_helper(f, dxpl_id, type, left_child, op, udata)) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "unable to list B-tree node")
- } /* end if */
- else {
- unsigned nchildren; /* Number of child pointers */
- haddr_t next_addr; /* Address of next node to the right */
-
- /* Allocate space for a copy of the native records & child pointers */
- if(NULL == (native = H5FL_BLK_MALLOC(native_block, shared->sizeof_keys)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, H5_ITER_ERROR, "memory allocation failed for shared B-tree native records")
- if(NULL == (child = H5FL_SEQ_MALLOC(haddr_t, (size_t)shared->two_k)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, H5_ITER_ERROR, "memory allocation failed for shared B-tree child addresses")
-
- /* Cache information from this node */
- nchildren = bt->nchildren;
- next_addr = bt->right;
-
- /* Copy the native keys & child pointers into local arrays */
- HDmemcpy(native, bt->native, shared->sizeof_keys);
- HDmemcpy(child, bt->child, (nchildren * sizeof(haddr_t)));
-
- /* Release current node */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- bt = NULL;
-
- /*
- * We've reached the left-most leaf. Now follow the right-sibling
- * pointer from leaf to leaf until we've processed all leaves.
- */
- ret_value = H5_ITER_CONT;
- while(ret_value == H5_ITER_CONT) {
- haddr_t *curr_child; /* Pointer to node's child addresses */
- uint8_t *curr_native; /* Pointer to node's native keys */
- unsigned u; /* Local index variable */
-
- /*
- * Perform the iteration operator, which might invoke an
- * application callback.
- */
- for(u = 0, curr_child = child, curr_native = native; u < nchildren && ret_value == H5_ITER_CONT; u++, curr_child++, curr_native += type->sizeof_nkey) {
- ret_value = (*op)(f, dxpl_id, curr_native, *curr_child, curr_native + type->sizeof_nkey, udata);
- if(ret_value < 0)
- HERROR(H5E_BTREE, H5E_CANTLIST, "iterator function failed");
- } /* end for */
-
- /* Check for continuing iteration */
- if(ret_value == H5_ITER_CONT) {
- /* Check for another node */
- if(H5F_addr_defined(next_addr)) {
- /* Protect the next node to the right */
- addr = next_addr;
- if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "B-tree node")
-
- /* Cache information from this node */
- nchildren = bt->nchildren;
- next_addr = bt->right;
-
- /* Copy the native keys & child pointers into local arrays */
- HDmemcpy(native, bt->native, shared->sizeof_keys);
- HDmemcpy(child, bt->child, nchildren * sizeof(haddr_t));
-
- /* Unprotect node */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- bt = NULL;
- } /* end if */
- else
- /* Exit loop */
- break;
- } /* end if */
- } /* end while */
- } /* end else */
+ /* Iterate over children */
+ for(i=0; i<bt->nchildren && ret_value == H5_ITER_CONT; i++) {
+ if(bt->level > 0) {
+ /* Keep following the left-most child until we reach a leaf node. */
+ if((ret_value = H5B_iterate_helper(f, dxpl_id, type, bt->child[i], op, udata, bt)) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "unable to list B-tree node")
+ } /* end if */
+ else
+ if((ret_value = (*op)(f, dxpl_id, H5B_NKEY(bt, shared, i), bt->child[i], H5B_NKEY(bt, shared, i + 1), udata)) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "iterator function failed")
+ } /* end for */
done:
if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- if(native)
- native = H5FL_BLK_FREE(native_block, native);
- if(child)
- child = H5FL_SEQ_FREE(haddr_t, child);
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5B_iterate_helper() */
@@ -1278,7 +1499,7 @@ done:
*/
herr_t
H5B_iterate(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- H5B_operator_t op, void *udata)
+ H5B_operator_t op, void *udata, void *parent)
{
herr_t ret_value; /* Return value */
@@ -1294,7 +1515,7 @@ H5B_iterate(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
HDassert(udata);
/* Iterate over the B-tree records */
- if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata)) < 0)
+ if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata, parent)) < 0)
HERROR(H5E_BTREE, H5E_BADITER, "B-tree iteration failed");
FUNC_LEAVE_NOAPI(ret_value)
@@ -1329,7 +1550,8 @@ static H5B_ins_t
H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type,
int level, uint8_t *lt_key/*out*/,
hbool_t *lt_key_changed/*out*/, void *udata,
- uint8_t *rt_key/*out*/, hbool_t *rt_key_changed/*out*/)
+ uint8_t *rt_key/*out*/, hbool_t *rt_key_changed/*out*/,
+ void *parent)
{
H5B_t *bt = NULL, *sibling = NULL;
unsigned bt_flags = H5AC__NO_FLAGS_SET;
@@ -1343,7 +1565,6 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
FUNC_ENTER_NOAPI(H5B_remove_helper, H5B_INS_ERROR)
HDassert(f);
- HDassert(H5F_addr_defined(addr));
HDassert(type);
HDassert(type->decode);
HDassert(type->cmp3);
@@ -1364,6 +1585,7 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
cache_udata.f = f;
cache_udata.type = type;
cache_udata.rc_shared = rc_shared;
+ cache_udata.parent = parent;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5B_INS_ERROR, "unable to load B-tree node")
@@ -1388,7 +1610,7 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
if((int)(ret_value = H5B_remove_helper(f, dxpl_id,
bt->child[idx], type, level + 1, H5B_NKEY(bt, shared, idx)/*out*/,
lt_key_changed/*out*/, udata, H5B_NKEY(bt, shared, idx + 1)/*out*/,
- rt_key_changed/*out*/)) < 0)
+ rt_key_changed/*out*/, bt)) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, H5B_INS_ERROR, "key not found in subtree")
} else if(type->remove) {
/*
@@ -1504,9 +1726,10 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
bt->nchildren = 0;
/* Delete the node from disk (via the metadata cache) */
- bt_flags |= H5AC__DIRTIED_FLAG;
+ if(!shared->swmr_write)
+ bt_flags |= H5AC__DIRTIED_FLAG | H5AC__FREE_FILE_SPACE_FLAG;
H5_CHECK_OVERFLOW(shared->sizeof_rnode, size_t, hsize_t);
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, bt_flags | H5AC__DELETED_FLAG | H5AC__FREE_FILE_SPACE_FLAG) < 0) {
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, bt_flags | H5AC__DELETED_FLAG) < 0) {
bt = NULL;
bt_flags = H5AC__NO_FLAGS_SET;
HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to free B-tree node")
@@ -1652,7 +1875,8 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata)
+H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent)
{
/* These are defined this way to satisfy alignment constraints */
uint64_t _lt_key[128], _rt_key[128];
@@ -1672,7 +1896,8 @@ H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
/* The actual removal */
if(H5B_remove_helper(f, dxpl_id, addr, type, 0, lt_key, &lt_key_changed,
- udata, rt_key, &rt_key_changed) == H5B_INS_ERROR)
+ udata, rt_key, &rt_key_changed, parent)
+ == H5B_INS_ERROR)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to remove entry from B-tree")
#ifdef H5B_DEBUG
@@ -1697,7 +1922,8 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata)
+H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent)
{
H5B_t *bt = NULL; /* B-tree node being operated on */
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -1722,6 +1948,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
/* Lock this B-tree node into memory for now */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
@@ -1730,7 +1957,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
if(bt->level > 0) {
/* Iterate over all children in node, deleting them */
for(u = 0; u < bt->nchildren; u++)
- if(H5B_delete(f, dxpl_id, type, bt->child[u], udata) < 0)
+ if(H5B_delete(f, dxpl_id, type, bt->child[u], udata, bt) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, FAIL, "unable to delete B-tree node")
} /* end if */
@@ -1751,7 +1978,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
} /* end else */
done:
- if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__DELETED_FLAG | H5AC__FREE_FILE_SPACE_FLAG) < 0)
+ if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__DELETED_FLAG | (shared->swmr_write ? 0 : H5AC__FREE_FILE_SPACE_FLAG)) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node in cache")
FUNC_LEAVE_NOAPI(ret_value)
@@ -1816,6 +2043,11 @@ HDmemset(shared->page, 0, shared->sizeof_rnode);
for(u = 0; u < (shared->two_k + 1); u++)
shared->nkey[u] = u * type->sizeof_nkey;
+ /* Determine if we are doing SWMR writes. Only enable for chunks for now.
+ */
+ shared->swmr_write = (H5F_INTENT(f) & H5F_ACC_SWMR_WRITE) > 0
+ && type->id == H5B_CHUNK_ID;
+
/* Set return value */
ret_value = shared;
@@ -1948,7 +2180,7 @@ done:
*/
static herr_t
H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- const H5B_info_ud_t *info_udata)
+ const H5B_info_ud_t *info_udata, void *parent)
{
H5B_t *bt = NULL; /* Pointer to current B-tree node */
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -1984,6 +2216,7 @@ H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t ad
/* Protect the initial/current node */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
@@ -2028,7 +2261,7 @@ H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t ad
/* Check for another "row" of B-tree nodes to iterate over */
if(level > 0) {
/* Keep following the left-most child until we reach a leaf node. */
- if(H5B_get_info_helper(f, dxpl_id, type, left_child, info_udata) < 0)
+ if(H5B_get_info_helper(f, dxpl_id, type, left_child, info_udata, bt) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, FAIL, "unable to list B-tree node")
} /* end if */
@@ -2054,7 +2287,7 @@ done:
*/
herr_t
H5B_get_info(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- H5B_info_t *bt_info, H5B_operator_t op, void *udata)
+ H5B_info_t *bt_info, H5B_operator_t op, void *udata, void *parent)
{
H5B_info_ud_t info_udata; /* User-data for B-tree size iteration */
herr_t ret_value = SUCCEED; /* Return value */
@@ -2078,13 +2311,13 @@ H5B_get_info(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
info_udata.udata = udata;
/* Iterate over the B-tree nodes */
- if(H5B_get_info_helper(f, dxpl_id, type, addr, &info_udata) < 0)
+ if(H5B_get_info_helper(f, dxpl_id, type, addr, &info_udata, parent) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_BADITER, FAIL, "B-tree iteration failed")
/* Iterate over the B-tree records, making any "leaf" callbacks */
/* (Only if operator defined) */
if(op)
- if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata)) < 0)
+ if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata, parent)) < 0)
HERROR(H5E_BTREE, H5E_BADITER, "B-tree iteration failed");
done:
@@ -2105,7 +2338,8 @@ done:
*-------------------------------------------------------------------------
*/
htri_t
-H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr)
+H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *parent)
{
H5B_t *bt = NULL; /* The B-tree */
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -2135,6 +2369,7 @@ H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr)
*/
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree node")
@@ -2179,3 +2414,122 @@ H5B_node_dest(H5B_t *bt)
FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5B_node_dest() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5B_support
+ *
+ * Purpose: Add a flush dependency between the b-tree child object (as
+ * the flush dependency child) and the b-tree node containing
+ * the address of that child. If the child has not yet been
+ * inserted, does nothing.
+ *
+ * Return: TRUE if flush dependency created
+ * FALSE if child is not in b-tree
+ * Negative on failure
+ *
+ * Programmer: Neil Fortner
+ * Aug 18 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+htri_t
+H5B_support(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent, void *child)
+{
+ H5B_t *node = NULL; /* Node containing direct link to child */
+ herr_t ret_value; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5B_support, FAIL)
+
+ /*
+ * Check arguments.
+ */
+ HDassert(f);
+ HDassert(type);
+ HDassert(type->decode);
+ HDassert(type->cmp3);
+ HDassert(type->found);
+ HDassert(H5F_addr_defined(addr));
+ HDassert(udata);
+ HDassert(child);
+
+ /* Lookup the node which points to the requested child */
+ if(H5B_find_node(f, dxpl_id, type, addr, udata, parent, &node) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in B-tree")
+
+ /* Set the return value, and add the flush dependency if the ndoe was found
+ */
+ if(node) {
+ if(H5AC_create_flush_dependency(node, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ ret_value = TRUE;
+ } /* end if */
+ else
+ ret_value = FALSE;
+
+done:
+ if(node && H5AC_unpin_entry(node) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin node")
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5B_support() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5B_unsupport
+ *
+ * Purpose: Destroy a flush dependency between the b-tree child object
+ * and the b-tree node containing the address of that child.
+ * This dependency *must* have been created earlier either by
+ * a call to H5B_support() or by the client (presumably via
+ * the create_flush_dep() callback on insertion).
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Neil Fortner
+ * Aug 18 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5B_unsupport(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent, void *child)
+{
+ H5B_t *node = NULL; /* Node containing direct link to child */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5B_unsupport, FAIL)
+
+ /*
+ * Check arguments.
+ */
+ HDassert(f);
+ HDassert(type);
+ HDassert(type->decode);
+ HDassert(type->cmp3);
+ HDassert(type->found);
+ HDassert(H5F_addr_defined(addr));
+ HDassert(udata);
+ HDassert(child);
+
+ /* Lookup the node which points to the requested child */
+ if(H5B_find_node(f, dxpl_id, type, addr, udata, parent, &node) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in B-tree")
+
+ /* It is an error if the node does not exist yet - the client should only
+ * call this function if support() succeeded or if the client's
+ * create_flush_dep() callback has been called. */
+ if(!node)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "node not found in B-tree")
+
+ /* Add the flush dependency */
+ if(H5AC_destroy_flush_dependency(node, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNDEPEND, FAIL, "unable to destroy flush dependency")
+
+done:
+ if(node && H5AC_unpin_entry(node) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin node")
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5B_unsupport() */
+