summaryrefslogtreecommitdiffstats
path: root/src/H5B.c
diff options
context:
space:
mode:
authorNeil Fortner <nfortne2@hdfgroup.org>2012-02-27 16:34:55 (GMT)
committerNeil Fortner <nfortne2@hdfgroup.org>2012-02-27 16:34:55 (GMT)
commit73c139e29b45941dfc4e558d9096a0869a184260 (patch)
tree422cc12e6675238cd92edadc078fa427a4f6bcd0 /src/H5B.c
parente2cd00055204f6b264002e70b21a6e2ca4c98c77 (diff)
downloadhdf5-73c139e29b45941dfc4e558d9096a0869a184260.zip
hdf5-73c139e29b45941dfc4e558d9096a0869a184260.tar.gz
hdf5-73c139e29b45941dfc4e558d9096a0869a184260.tar.bz2
[svn-r21989] Purpose: Add SWMR capability to v1 b-tree
Description: Adds SWMR capability to v1 b-trees, and the chunk index using v1 b-trees. With this implementation, flush dependencies are always on when in the cache. This will allow attritbutes to be used for "checkpointing" data when object header dependencies are fixed - i.e. if a writer writes data before an attribute in that dataset's object header, then if a reader sees the updated attribute the written data is guaranteed to be visible, as long as that dataset's b-tree nodes are evicted from the reader's cache. Also adds support for compression with SWMR. Also fixes earray implementation to not free (reuse) the file space for deleted chunks and outdated versions of compressed chunks when doing SWMR writes. These should eventually be added to a timeout list. Adds testing for these cases. Tested: durandal
Diffstat (limited to 'src/H5B.c')
-rw-r--r--src/H5B.c722
1 files changed, 538 insertions, 184 deletions
diff --git a/src/H5B.c b/src/H5B.c
index 40b221d..61c5856 100644
--- a/src/H5B.c
+++ b/src/H5B.c
@@ -111,7 +111,6 @@
#include "H5MFprivate.h" /* File memory management */
#include "H5Pprivate.h" /* Property lists */
-
/****************/
/* Local Macros */
/****************/
@@ -154,12 +153,15 @@ static H5B_ins_t H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
hbool_t *rt_key_changed,
H5B_ins_ud_t *split_bt_ud/*out*/);
static herr_t H5B_insert_child(H5B_t *bt, unsigned *bt_flags,
- unsigned idx, haddr_t child,
+ unsigned *idx, haddr_t child,
H5B_ins_t anchor, const void *md_key);
static herr_t H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
unsigned idx, void *udata,
H5B_ins_ud_t *split_bt_ud/*out*/);
static H5B_t * H5B_copy(const H5B_t *old_bt);
+static herr_t H5B_find_node(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type,
+ haddr_t addr, void *udata, void *parent,
+ H5B_t **node);
/*********************/
@@ -216,7 +218,7 @@ H5FL_SEQ_DEFINE_STATIC(size_t);
*/
herr_t
H5B_create(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, void *udata,
- haddr_t *addr_p/*out*/)
+ void *parent, haddr_t *addr_p/*out*/)
{
H5B_t *bt = NULL;
H5B_shared_t *shared=NULL; /* Pointer to shared B-tree info */
@@ -249,6 +251,7 @@ H5B_create(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, void *udata,
if(NULL == (bt->native = H5FL_BLK_MALLOC(native_block, shared->sizeof_keys)) ||
NULL == (bt->child = H5FL_SEQ_MALLOC(haddr_t, (size_t)shared->two_k)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "memory allocation failed for B-tree root node")
+ bt->parent = parent;
if(HADDR_UNDEF == (*addr_p = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "file allocation failed for B-tree root node")
@@ -301,7 +304,8 @@ done:
*-------------------------------------------------------------------------
*/
htri_t
-H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata)
+H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent)
{
H5B_t *bt = NULL;
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -335,6 +339,7 @@ H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *u
*/
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
@@ -358,7 +363,7 @@ H5B_find(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *u
HDassert(idx < bt->nchildren);
if(bt->level > 0) {
- if((ret_value = H5B_find(f, dxpl_id, type, bt->child[idx], udata)) < 0)
+ if((ret_value = H5B_find(f, dxpl_id, type, bt->child[idx], udata, bt)) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in subtree")
} /* end if */
else {
@@ -375,6 +380,109 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5B_find_node
+ *
+ * Purpose: Locate the B-tree node containing the item specified in
+ * UDATA, if present. If found, the B-tree node will be
+ * pinned on return and must be unpinned after the caller is
+ * done using it. If not found, *node will be set to NULL.
+ *
+ * Return: Non-negative on success. Negative on failure.
+ *
+ * Programmer: Neil Fortner
+ * Aug 18 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5B_find_node(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent, H5B_t **node)
+{
+ H5B_t *bt = NULL;
+ H5RC_t *rc_shared; /* Ref-counted shared info */
+ H5B_shared_t *shared; /* Pointer to shared B-tree info */
+ H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
+ unsigned idx = 0, lt = 0, rt; /* Final, left & right key indices */
+ int cmp = 1; /* Key comparison value */
+ htri_t found; /* Whether the correct node has been found */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5B_find_node, FAIL)
+
+ /*
+ * Check arguments.
+ */
+ HDassert(f);
+ HDassert(type);
+ HDassert(type->decode);
+ HDassert(type->cmp3);
+ HDassert(H5F_addr_defined(addr));
+
+ /* Get shared info for B-tree */
+ if(NULL == (rc_shared = (type->get_shared)(f, udata)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object")
+ shared = (H5B_shared_t *)H5RC_GET_OBJ(rc_shared);
+ HDassert(shared);
+
+ /*
+ * Perform a binary search to locate the child which contains
+ * the thing for which we're searching.
+ */
+ cache_udata.f = f;
+ cache_udata.type = type;
+ cache_udata.parent = parent;
+ cache_udata.rc_shared = rc_shared;
+ if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
+
+ rt = bt->nchildren;
+ while(lt < rt && cmp) {
+ idx = (lt + rt) / 2;
+ /* compare */
+ if((cmp = (type->cmp3)(H5B_NKEY(bt, shared, idx), udata, H5B_NKEY(bt, shared, (idx + 1)))) < 0)
+ rt = idx;
+ else
+ lt = idx + 1;
+ } /* end while */
+ /* Check if not found */
+ if(cmp)
+ *node = NULL;
+ else {
+ /*
+ * Follow the link to the subtree, or return the leaf node.
+ */
+ HDassert(idx < bt->nchildren);
+
+ if(bt->level > 0) {
+ if(H5B_find_node(f, dxpl_id, type, bt->child[idx], udata, bt, node) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in subtree")
+ } /* end if */
+ else {
+ /* Check if this is really the correct child */
+ if((found = (type->found)(f, dxpl_id, bt->child[idx], H5B_NKEY(bt, shared, idx), udata)) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in leaf node")
+
+ if(found) {
+ /* Return this leaf node, pinned */
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__PIN_ENTRY_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release node")
+ *node = bt;
+ bt = NULL;
+ } /* end if */
+ else
+ *node = NULL;
+ } /* end else */
+ } /* end else */
+
+done:
+ if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release node")
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5B_find_node() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5B_split
*
* Purpose: Split a single node into two nodes. The old node will
@@ -405,6 +513,7 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx,
H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
unsigned nleft, nright; /* Number of keys in left & right halves */
double split_ratios[3]; /* B-tree split ratios */
+ hbool_t bt_pinned = FALSE;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5B_split)
@@ -480,10 +589,11 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx,
/*
* Create the new B-tree node.
*/
- if(H5B_create(f, dxpl_id, shared->type, udata, &split_bt_ud->addr/*out*/) < 0)
+ if(H5B_create(f, dxpl_id, shared->type, udata, bt_ud->bt->parent, &split_bt_ud->addr/*out*/) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to create B-tree")
cache_udata.f = f;
cache_udata.type = shared->type;
+ cache_udata.parent = bt_ud->bt->parent;
cache_udata.rc_shared = bt_ud->bt->rc_shared;
if(NULL == (split_bt_ud->bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, split_bt_ud->addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree")
@@ -509,29 +619,117 @@ H5B_split(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud, unsigned idx,
bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
bt_ud->bt->nchildren = nleft;
+ /* Actions to take if swmr writes are on */
+ if(shared->swmr_write) {
+ haddr_t new_bt_addr = HADDR_UNDEF;
+ unsigned i;
+
+ /*
+ * We must clone the old btree so readers with an out-of-date version
+ * of the parent can still see all its children, via the shadowed
+ * non-split bt. Remove it from cache but do not mark it free on disk.
+ */
+ /* Allocate space for the cloned child */
+ H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t);
+ if(HADDR_UNDEF == (new_bt_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move b-tree")
+
+ /* Pin old entry so it is not flushed when we unprotect */
+ if(H5AC_pin_protected_entry(bt_ud->bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPIN, FAIL, "unable to pin old b-tree node")
+ bt_pinned = TRUE;
+
+ /* Unprotect bt so we can move it. Also, note that it will be marked
+ * dirty so it will be written to the new location. */
+ HDassert(bt_ud->cache_flags & H5AC__DIRTIED_FLAG);
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->addr, bt_ud->bt, bt_ud->cache_flags) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old b-tree")
+ bt_ud->cache_flags = H5AC__NO_FLAGS_SET;
+
+ /* Move the location of the old child on the disk */
+ if(H5AC_move_entry(f, H5AC_BT, bt_ud->addr, new_bt_addr) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node")
+ bt_ud->addr = new_bt_addr;
+
+ /* Re-protect bt at new address */
+ if(bt_ud->bt != H5AC_protect(f, dxpl_id, H5AC_BT, new_bt_addr, &cache_udata, H5AC_WRITE))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree node")
+
+ /*
+ * Update flush dependencies for children that moved to the new node
+ */
+ if(bt_ud->bt->level > 0) {
+ H5B_t *child;
+
+ for(i=0; i<nright; i++) {
+ /* Protect child b-tree node */
+ cache_udata.parent = split_bt_ud->bt;
+ if(NULL == (child = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, split_bt_ud->bt->child[i], &cache_udata, H5AC_WRITE)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
+
+ /* Update the flush dependency, if necessary */
+ HDassert(child->parent);
+ if(child->parent == bt_ud->bt) {
+ child->parent = split_bt_ud->bt;
+ if(H5AC_destroy_flush_dependency(bt_ud->bt, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNDEPEND, FAIL, "unable to destroy flush dependency")
+ if(H5AC_create_flush_dependency(child->parent, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ } /* end if */
+ else
+ HDassert(child->parent == split_bt_ud->bt);
+
+ /* Unprotect the child */
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, split_bt_ud->bt->child[idx], child, H5AC__NO_FLAGS_SET) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
+ } /* end for */
+ } /* end if */
+ else {
+ /* At leaf node, delegate to client */
+ HDassert(shared->type->update_flush_dep);
+ for(i=0; i<nright; i++) {
+ if((shared->type->update_flush_dep)(H5B_NKEY(split_bt_ud->bt, shared, i), udata, bt_ud->bt, split_bt_ud->bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to update flush dependency")
+ } /* end for */
+ } /* end else */
+
+ /*
+ * Update left sibling to point to new bt. Only necessary when doing
+ * swmr writes as otherwise the address of bt doesn't change.
+ */
+ if(H5F_addr_defined(bt_ud->bt->left)) {
+ H5B_t *tmp_bt;
+
+ if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->left, &cache_udata, H5AC_WRITE)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling")
+
+ tmp_bt->right = bt_ud->addr;
+
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->bt->left, tmp_bt, H5AC__DIRTIED_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
+ } /* end if */
+ } /* end if */
+
/*
- * Update sibling pointers.
+ * Update other sibling pointers.
*/
split_bt_ud->bt->left = bt_ud->addr;
split_bt_ud->bt->right = bt_ud->bt->right;
if(H5F_addr_defined(bt_ud->bt->right)) {
- H5B_t *tmp_bt;
- H5B_cache_ud_t cache_udata2; /* User-data for metadata cache callback */
+ H5B_t *tmp_bt;
- cache_udata2.f = f;
- cache_udata2.type = shared->type;
- cache_udata2.rc_shared = bt_ud->bt->rc_shared;
- if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, &cache_udata2, H5AC_WRITE)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling")
+ if(NULL == (tmp_bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, &cache_udata, H5AC_WRITE)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load right sibling")
- tmp_bt->left = split_bt_ud->addr;
+ tmp_bt->left = split_bt_ud->addr;
if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud->bt->right, tmp_bt, H5AC__DIRTIED_FLAG) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
} /* end if */
bt_ud->bt->right = split_bt_ud->addr;
+ bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
done:
if(ret_value < 0) {
@@ -539,6 +737,13 @@ done:
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node")
split_bt_ud->bt = NULL;
split_bt_ud->addr = HADDR_UNDEF;
+ split_bt_ud->cache_flags = H5AC__NO_FLAGS_SET;
+ } /* end if */
+
+ if(bt_pinned) {
+ HDassert(shared->swmr_write);
+ if(H5AC_unpin_entry(bt_ud->bt) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin old root")
} /* end if */
FUNC_LEAVE_NOAPI(ret_value)
@@ -560,7 +765,7 @@ done:
*/
herr_t
H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- void *udata)
+ void *udata, void *parent)
{
/*
* These are defined this way to satisfy alignment constraints.
@@ -580,6 +785,8 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
H5B_shared_t *shared; /* Pointer to shared B-tree info */
H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
H5B_ins_t my_ins = H5B_INS_ERROR;
+ hbool_t bt_protected = FALSE;
+ hbool_t nrbt_pinned = FALSE; /* TRUE if new_root_bt is pinned */
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5B_insert, FAIL)
@@ -599,20 +806,27 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
/* Protect the root node */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
bt_ud.addr = addr;
if(NULL == (bt_ud.bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to locate root of B-tree")
+ bt_protected = TRUE;
/* Insert the object */
if((int)(my_ins = H5B_insert_helper(f, dxpl_id, &bt_ud, type, lt_key,
&lt_key_changed, md_key, udata, rt_key, &rt_key_changed,
&split_bt_ud/*out*/)) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to insert key")
+
+ /* Check if the root node split */
if(H5B_INS_NOOP == my_ins) {
+ /* The root node did not split - just update the flush dependency (if
+ * necessary) and exit */
HDassert(!split_bt_ud.bt);
HGOTO_DONE(SUCCEED)
} /* end if */
+
HDassert(H5B_INS_RIGHT == my_ins);
HDassert(split_bt_ud.bt);
HDassert(H5F_addr_defined(split_bt_ud.addr));
@@ -631,33 +845,33 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
* at the old root's previous address. This prevents the B-tree from
* "moving".
*/
- H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t);
- if(HADDR_UNDEF == (old_root_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move root")
-
- /*
- * Move the node to the new location
- */
+ /* Note that this is not necessary if swmr writes are on, as H5B_split
+ * already moved the node in this case */
+ if(!shared->swmr_write) {
+ H5_CHECK_OVERFLOW(shared->sizeof_rnode,size_t,hsize_t);
+ if(HADDR_UNDEF == (old_root_addr = H5MF_alloc(f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)shared->sizeof_rnode)))
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move root")
+
+ /* Unprotect the old root so we can move it. Also force it to be marked
+ * dirty so it is written to the new location. */
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, H5AC__DIRTIED_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old root")
+ bt_protected = FALSE;
+
+ /* Move the location of the old root on the disk */
+ if(H5AC_move_entry(f, H5AC_BT, bt_ud.addr, old_root_addr) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node")
+ bt_ud.addr = old_root_addr;
+
+ /* Update the split b-tree's left pointer to point to the new location */
+ split_bt_ud.bt->left = bt_ud.addr;
+ split_bt_ud.cache_flags |= H5AC__DIRTIED_FLAG;
+ } /* end else */
/* Make a copy of the old root information */
if(NULL == (new_root_bt = H5B_copy(bt_ud.bt)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTCOPY, FAIL, "unable to copy old root");
- /* Unprotect the old root so we can move it. Also force it to be marked
- * dirty so it is written to the new location. */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, H5AC__DIRTIED_FLAG) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release old root")
- bt_ud.bt = NULL; /* Make certain future references will be caught */
-
- /* Move the location of the old root on the disk */
- if(H5AC_move_entry(f, H5AC_BT, bt_ud.addr, old_root_addr) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to move B-tree root node")
- bt_ud.addr = old_root_addr;
-
- /* Update the split b-tree's left pointer to point to the new location */
- split_bt_ud.bt->left = bt_ud.addr;
- split_bt_ud.cache_flags |= H5AC__DIRTIED_FLAG;
-
/* clear the old root info at the old address (we already copied it) */
new_root_bt->left = HADDR_UNDEF;
new_root_bt->right = HADDR_UNDEF;
@@ -673,23 +887,61 @@ H5B_insert(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
HDmemcpy(H5B_NKEY(new_root_bt, shared, 1), md_key, shared->type->sizeof_nkey);
HDmemcpy(H5B_NKEY(new_root_bt, shared, 2), rt_key, shared->type->sizeof_nkey);
- /* Insert the modified copy of the old root into the file again */
- if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add old B-tree root node to cache")
+ /* Insert the modified copy of the old root into the file again, and pin if
+ * doing swmr writes */
+ if(shared->swmr_write) {
+ if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__PIN_ENTRY_FLAG) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add new B-tree root node to cache")
+ nrbt_pinned = TRUE;
+
+ /* Set up flush dependencies */
+ HDassert(parent);
+ HDassert(bt_ud.bt->parent == parent);
+ if(H5AC_destroy_flush_dependency(parent, bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to destroy flush dependency")
+ if(H5AC_create_flush_dependency(new_root_bt, bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ bt_ud.bt->parent = new_root_bt;
+
+ HDassert(split_bt_ud.bt->parent == parent);
+ if(H5AC_destroy_flush_dependency(parent, split_bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to destroy flush dependency")
+ if(H5AC_create_flush_dependency(new_root_bt, split_bt_ud.bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ split_bt_ud.bt->parent = new_root_bt;
+
+ HDassert(new_root_bt->parent == parent);
+ } /* end if */
+ else {
+ if(H5AC_insert_entry(f, dxpl_id, H5AC_BT, addr, new_root_bt, H5AC__NO_FLAGS_SET) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL, "unable to add new B-tree root node to cache")
-done:
- if(ret_value < 0)
- if(new_root_bt && H5B_node_dest(new_root_bt) < 0)
- HDONE_ERROR(H5E_BTREE, H5E_CANTRELEASE, FAIL, "unable to free B-tree root node");
+ /* Mark new_root_bt as NULL, as it is not pinned or protected and does
+ * not need to be freed as it is now in the cache. */
+ new_root_bt = NULL;
+ } /* end else */
- if(bt_ud.bt)
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt, bt_ud.cache_flags) < 0)
+done:
+ if(bt_protected)
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, bt_ud.addr, bt_ud.bt,
+ bt_ud.cache_flags) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to unprotect old root")
if(split_bt_ud.bt)
if(H5AC_unprotect(f, dxpl_id, H5AC_BT, split_bt_ud.addr, split_bt_ud.bt, split_bt_ud.cache_flags) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to unprotect new child")
+ if(nrbt_pinned) {
+ HDassert(shared->swmr_write);
+ if(H5AC_unpin_entry(new_root_bt) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin new root")
+ } /* end if */
+ else if(new_root_bt) {
+ HDassert(ret_value < 0);
+ if(H5B_node_dest(new_root_bt) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTFREE, FAIL, "unable to free B-tree root node");
+ } /* end if */
+
#ifdef H5B_DEBUG
if(ret_value >= 0)
H5B_assert(f, dxpl_id, addr, type, udata);
@@ -715,7 +967,7 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx,
+H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned *idx,
haddr_t child, H5B_ins_t anchor, const void *md_key)
{
H5B_shared_t *shared; /* Pointer to shared B-tree info */
@@ -733,8 +985,8 @@ H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx,
/* Check for inserting right-most key into node (common when just appending
* records to an unlimited dimension chunked dataset)
*/
- base = H5B_NKEY(bt, shared, (idx + 1));
- if((idx + 1) == bt->nchildren) {
+ base = H5B_NKEY(bt, shared, (*idx + 1));
+ if((*idx + 1) == bt->nchildren) {
/* Make room for the new key */
HDmemcpy(base + shared->type->sizeof_nkey, base,
shared->type->sizeof_nkey); /* No overlap possible - memcpy() OK */
@@ -742,34 +994,34 @@ H5B_insert_child(H5B_t *bt, unsigned *bt_flags, unsigned idx,
/* The MD_KEY is the left key of the new node */
if(H5B_INS_RIGHT == anchor)
- idx++; /* Don't have to memmove() child addresses down, just add new child */
+ (*idx)++; /* Don't have to memmove() child addresses down, just add new child */
else
/* Make room for the new child address */
- bt->child[idx + 1] = bt->child[idx];
+ bt->child[*idx + 1] = bt->child[*idx];
} /* end if */
else {
/* Make room for the new key */
HDmemmove(base + shared->type->sizeof_nkey, base,
- (bt->nchildren - idx) * shared->type->sizeof_nkey);
+ (bt->nchildren - *idx) * shared->type->sizeof_nkey);
HDmemcpy(base, md_key, shared->type->sizeof_nkey);
/* The MD_KEY is the left key of the new node */
if(H5B_INS_RIGHT == anchor)
- idx++;
+ (*idx)++;
/* Make room for the new child address */
- HDmemmove(bt->child + idx + 1, bt->child + idx,
- (bt->nchildren - idx) * sizeof(haddr_t));
+ HDmemmove(bt->child + *idx + 1, bt->child + *idx,
+ (bt->nchildren - *idx) * sizeof(haddr_t));
} /* end if */
- bt->child[idx] = child;
+ bt->child[*idx] = child;
bt->nchildren += 1;
/* Mark node as dirty */
*bt_flags |= H5AC__DIRTIED_FLAG;
FUNC_LEAVE_NOAPI(SUCCEED)
-}
+} /* end H5B_insert_child() */
/*-------------------------------------------------------------------------
@@ -873,6 +1125,7 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
/* Set up user data for cache callbacks */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = bt;
cache_udata.rc_shared = rc_shared;
if(0 == bt->nchildren) {
@@ -1046,18 +1299,30 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
else
HDmemcpy(rt_key, H5B_NKEY(bt, shared, idx + 1), type->sizeof_nkey);
} /* end if */
- if(H5B_INS_CHANGE == my_ins) {
- /*
- * The insertion simply changed the address for the child.
- */
- HDassert(!child_bt_ud.bt);
- HDassert(bt->level == 0);
- bt->child[idx] = new_child_bt_ud.addr;
- bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
- } else if(H5B_INS_LEFT == my_ins || H5B_INS_RIGHT == my_ins) {
+
+ /*
+ * Handle changes/additions to children
+ */
+ HDassert(!(bt->level == 0) != !(child_bt_ud.bt));
+ if(H5B_INS_LEFT == my_ins || H5B_INS_RIGHT == my_ins) {
hbool_t *tmp_bt_flags_ptr = NULL;
H5B_t *tmp_bt;
+ /* Update child pointer to (old) child if swmr writes are on and level >
+ * 0, as it has been moved by H5B_split (one level down) */
+ if(shared->swmr_write && bt->level > 0) {
+ HDassert(child_bt_ud.bt);
+ HDassert(bt_ud->bt->child[idx] != child_bt_ud.addr);
+
+ bt_ud->bt->child[idx] = child_bt_ud.addr;
+ } /* end if */
+#ifndef NDEBUG
+ if(!(shared->swmr_write) && bt->level > 0) {
+ HDassert(child_bt_ud.bt);
+ HDassert(bt_ud->bt->child[idx] == child_bt_ud.addr);
+ } /* end if */
+#endif /* NDEBUG */
+
/*
* If this node is full then split it before inserting the new child.
*/
@@ -1079,9 +1344,34 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
} /* end else */
/* Insert the child */
- if(H5B_insert_child(tmp_bt, tmp_bt_flags_ptr, idx, new_child_bt_ud.addr, my_ins, md_key) < 0)
+ if(H5B_insert_child(tmp_bt, tmp_bt_flags_ptr, &idx, new_child_bt_ud.addr, my_ins, md_key) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, H5B_INS_ERROR, "can't insert child")
- }
+
+ /* Set up flush dependency on child client object, if appropriate */
+ if(shared->swmr_write && bt->level == 0) {
+ HDassert(!child_bt_ud.bt);
+ HDassert(shared->type->create_flush_dep);
+ if((shared->type->create_flush_dep)(H5B_NKEY(tmp_bt, shared, idx), udata, tmp_bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, H5B_INS_ERROR, "unable to create flush dependency")
+ } /* end if */
+ } else {
+ if(H5B_INS_CHANGE == my_ins) {
+ /*
+ * The insertion simply changed the address for the child.
+ */
+ HDassert(!child_bt_ud.bt);
+ HDassert(bt->level == 0);
+ bt->child[idx] = new_child_bt_ud.addr;
+ bt_ud->cache_flags |= H5AC__DIRTIED_FLAG;
+ } /* end if */
+ /*Set up flush dependency on child client object, if appropriate */
+ if(shared->swmr_write && bt->level == 0) {
+ HDassert(!child_bt_ud.bt);
+ HDassert(shared->type->create_flush_dep);
+ if((shared->type->create_flush_dep)(H5B_NKEY(bt, shared, idx), udata, bt) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, H5B_INS_ERROR, "unable to create flush dependency")
+ } /* end if */
+ } /* end if */
/*
* If this node split, return the mid key (the one that is shared
@@ -1105,7 +1395,8 @@ H5B_insert_helper(H5F_t *f, hid_t dxpl_id, H5B_ins_ud_t *bt_ud,
done:
if(child_bt_ud.bt)
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, child_bt_ud.addr, child_bt_ud.bt, child_bt_ud.cache_flags) < 0)
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, child_bt_ud.addr, child_bt_ud.bt,
+ child_bt_ud.cache_flags) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to unprotect child")
if(new_child_bt_ud.bt)
@@ -1113,7 +1404,7 @@ done:
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to unprotect new child")
FUNC_LEAVE_NOAPI(ret_value)
-}
+} /* end H5B_insert_helper() */
/*-------------------------------------------------------------------------
@@ -1128,19 +1419,24 @@ done:
* matzke@llnl.gov
* Jun 23 1997
*
+ * Modifications: Neil Fortner
+ * Jun 23 2011
+ * Replaced original function with new algorithm that doesn't
+ * use sibling pointers (for SWMR consistency) or unprotect
+ * nodes during recursion (for performance and safety).
+ *
*-------------------------------------------------------------------------
*/
static herr_t
H5B_iterate_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- H5B_operator_t op, void *udata)
+ H5B_operator_t op, void *udata, void *parent)
{
- H5B_t *bt = NULL; /* Pointer to current B-tree node */
- H5RC_t *rc_shared; /* Ref-counted shared info */
+ H5B_t *bt = NULL; /* Pointer to current B-tree node */
+ H5RC_t *rc_shared; /* Ref-counted shared info */
H5B_shared_t *shared; /* Pointer to shared B-tree info */
H5B_cache_ud_t cache_udata; /* User-data for metadata cache callback */
- uint8_t *native = NULL; /* Array of keys in native format */
- haddr_t *child = NULL; /* Array of child pointers */
- herr_t ret_value; /* Return value */
+ unsigned i; /* Index */
+ herr_t ret_value = H5_ITER_CONT; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5B_iterate_helper)
@@ -1155,108 +1451,33 @@ H5B_iterate_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t add
/* Get shared info for B-tree */
if(NULL == (rc_shared = (type->get_shared)(f, udata)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object")
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTGET, FAIL, "can't retrieve B-tree's shared ref. count object")
shared = (H5B_shared_t *)H5RC_GET_OBJ(rc_shared);
HDassert(shared);
/* Protect the initial/current node */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "unable to load B-tree node")
-
- if(bt->level > 0) {
- haddr_t left_child = bt->child[0]; /* Address of left-most child in node */
-
- /* Release current node */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- bt = NULL;
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "unable to load B-tree node")
- /* Keep following the left-most child until we reach a leaf node. */
- if((ret_value = H5B_iterate_helper(f, dxpl_id, type, left_child, op, udata)) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "unable to list B-tree node")
- } /* end if */
- else {
- unsigned nchildren; /* Number of child pointers */
- haddr_t next_addr; /* Address of next node to the right */
-
- /* Allocate space for a copy of the native records & child pointers */
- if(NULL == (native = H5FL_BLK_MALLOC(native_block, shared->sizeof_keys)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, H5_ITER_ERROR, "memory allocation failed for shared B-tree native records")
- if(NULL == (child = H5FL_SEQ_MALLOC(haddr_t, (size_t)shared->two_k)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, H5_ITER_ERROR, "memory allocation failed for shared B-tree child addresses")
-
- /* Cache information from this node */
- nchildren = bt->nchildren;
- next_addr = bt->right;
-
- /* Copy the native keys & child pointers into local arrays */
- HDmemcpy(native, bt->native, shared->sizeof_keys);
- HDmemcpy(child, bt->child, (nchildren * sizeof(haddr_t)));
-
- /* Release current node */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- bt = NULL;
-
- /*
- * We've reached the left-most leaf. Now follow the right-sibling
- * pointer from leaf to leaf until we've processed all leaves.
- */
- ret_value = H5_ITER_CONT;
- while(ret_value == H5_ITER_CONT) {
- haddr_t *curr_child; /* Pointer to node's child addresses */
- uint8_t *curr_native; /* Pointer to node's native keys */
- unsigned u; /* Local index variable */
-
- /*
- * Perform the iteration operator, which might invoke an
- * application callback.
- */
- for(u = 0, curr_child = child, curr_native = native; u < nchildren && ret_value == H5_ITER_CONT; u++, curr_child++, curr_native += type->sizeof_nkey) {
- ret_value = (*op)(f, dxpl_id, curr_native, *curr_child, curr_native + type->sizeof_nkey, udata);
- if(ret_value < 0)
- HERROR(H5E_BTREE, H5E_CANTLIST, "iterator function failed");
- } /* end for */
-
- /* Check for continuing iteration */
- if(ret_value == H5_ITER_CONT) {
- /* Check for another node */
- if(H5F_addr_defined(next_addr)) {
- /* Protect the next node to the right */
- addr = next_addr;
- if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
- HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5_ITER_ERROR, "B-tree node")
-
- /* Cache information from this node */
- nchildren = bt->nchildren;
- next_addr = bt->right;
-
- /* Copy the native keys & child pointers into local arrays */
- HDmemcpy(native, bt->native, shared->sizeof_keys);
- HDmemcpy(child, bt->child, nchildren * sizeof(haddr_t));
-
- /* Unprotect node */
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- bt = NULL;
- } /* end if */
- else
- /* Exit loop */
- break;
- } /* end if */
- } /* end while */
- } /* end else */
+ /* Iterate over children */
+ for(i=0; i<bt->nchildren && ret_value == H5_ITER_CONT; i++) {
+ if(bt->level > 0) {
+ /* Keep following the left-most child until we reach a leaf node. */
+ if((ret_value = H5B_iterate_helper(f, dxpl_id, type, bt->child[i], op, udata, bt)) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "unable to list B-tree node")
+ } /* end if */
+ else
+ if((ret_value = (*op)(f, dxpl_id, H5B_NKEY(bt, shared, i), bt->child[i], H5B_NKEY(bt, shared, i + 1), udata)) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, H5_ITER_ERROR, "iterator function failed")
+ } /* end for */
done:
if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__NO_FLAGS_SET) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5_ITER_ERROR, "unable to release B-tree node")
- if(native)
- native = H5FL_BLK_FREE(native_block, native);
- if(child)
- child = H5FL_SEQ_FREE(haddr_t, child);
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5B_iterate_helper() */
@@ -1278,7 +1499,7 @@ done:
*/
herr_t
H5B_iterate(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- H5B_operator_t op, void *udata)
+ H5B_operator_t op, void *udata, void *parent)
{
herr_t ret_value; /* Return value */
@@ -1294,7 +1515,7 @@ H5B_iterate(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
HDassert(udata);
/* Iterate over the B-tree records */
- if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata)) < 0)
+ if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata, parent)) < 0)
HERROR(H5E_BTREE, H5E_BADITER, "B-tree iteration failed");
FUNC_LEAVE_NOAPI(ret_value)
@@ -1329,7 +1550,8 @@ static H5B_ins_t
H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type,
int level, uint8_t *lt_key/*out*/,
hbool_t *lt_key_changed/*out*/, void *udata,
- uint8_t *rt_key/*out*/, hbool_t *rt_key_changed/*out*/)
+ uint8_t *rt_key/*out*/, hbool_t *rt_key_changed/*out*/,
+ void *parent)
{
H5B_t *bt = NULL, *sibling = NULL;
unsigned bt_flags = H5AC__NO_FLAGS_SET;
@@ -1343,7 +1565,6 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
FUNC_ENTER_NOAPI(H5B_remove_helper, H5B_INS_ERROR)
HDassert(f);
- HDassert(H5F_addr_defined(addr));
HDassert(type);
HDassert(type->decode);
HDassert(type->cmp3);
@@ -1364,6 +1585,7 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
cache_udata.f = f;
cache_udata.type = type;
cache_udata.rc_shared = rc_shared;
+ cache_udata.parent = parent;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, H5B_INS_ERROR, "unable to load B-tree node")
@@ -1388,7 +1610,7 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
if((int)(ret_value = H5B_remove_helper(f, dxpl_id,
bt->child[idx], type, level + 1, H5B_NKEY(bt, shared, idx)/*out*/,
lt_key_changed/*out*/, udata, H5B_NKEY(bt, shared, idx + 1)/*out*/,
- rt_key_changed/*out*/)) < 0)
+ rt_key_changed/*out*/, bt)) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, H5B_INS_ERROR, "key not found in subtree")
} else if(type->remove) {
/*
@@ -1504,9 +1726,10 @@ H5B_remove_helper(H5F_t *f, hid_t dxpl_id, haddr_t addr, const H5B_class_t *type
bt->nchildren = 0;
/* Delete the node from disk (via the metadata cache) */
- bt_flags |= H5AC__DIRTIED_FLAG;
+ if(!shared->swmr_write)
+ bt_flags |= H5AC__DIRTIED_FLAG | H5AC__FREE_FILE_SPACE_FLAG;
H5_CHECK_OVERFLOW(shared->sizeof_rnode, size_t, hsize_t);
- if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, bt_flags | H5AC__DELETED_FLAG | H5AC__FREE_FILE_SPACE_FLAG) < 0) {
+ if(H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, bt_flags | H5AC__DELETED_FLAG) < 0) {
bt = NULL;
bt_flags = H5AC__NO_FLAGS_SET;
HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, H5B_INS_ERROR, "unable to free B-tree node")
@@ -1652,7 +1875,8 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata)
+H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent)
{
/* These are defined this way to satisfy alignment constraints */
uint64_t _lt_key[128], _rt_key[128];
@@ -1672,7 +1896,8 @@ H5B_remove(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
/* The actual removal */
if(H5B_remove_helper(f, dxpl_id, addr, type, 0, lt_key, &lt_key_changed,
- udata, rt_key, &rt_key_changed) == H5B_INS_ERROR)
+ udata, rt_key, &rt_key_changed, parent)
+ == H5B_INS_ERROR)
HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "unable to remove entry from B-tree")
#ifdef H5B_DEBUG
@@ -1697,7 +1922,8 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void *udata)
+H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent)
{
H5B_t *bt = NULL; /* B-tree node being operated on */
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -1722,6 +1948,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
/* Lock this B-tree node into memory for now */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_WRITE)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
@@ -1730,7 +1957,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
if(bt->level > 0) {
/* Iterate over all children in node, deleting them */
for(u = 0; u < bt->nchildren; u++)
- if(H5B_delete(f, dxpl_id, type, bt->child[u], udata) < 0)
+ if(H5B_delete(f, dxpl_id, type, bt->child[u], udata, bt) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, FAIL, "unable to delete B-tree node")
} /* end if */
@@ -1751,7 +1978,7 @@ H5B_delete(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr, void
} /* end else */
done:
- if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__DELETED_FLAG | H5AC__FREE_FILE_SPACE_FLAG) < 0)
+ if(bt && H5AC_unprotect(f, dxpl_id, H5AC_BT, addr, bt, H5AC__DELETED_FLAG | (shared->swmr_write ? 0 : H5AC__FREE_FILE_SPACE_FLAG)) < 0)
HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release B-tree node in cache")
FUNC_LEAVE_NOAPI(ret_value)
@@ -1816,6 +2043,11 @@ HDmemset(shared->page, 0, shared->sizeof_rnode);
for(u = 0; u < (shared->two_k + 1); u++)
shared->nkey[u] = u * type->sizeof_nkey;
+ /* Determine if we are doing SWMR writes. Only enable for chunks for now.
+ */
+ shared->swmr_write = (H5F_INTENT(f) & H5F_ACC_SWMR_WRITE) > 0
+ && type->id == H5B_CHUNK_ID;
+
/* Set return value */
ret_value = shared;
@@ -1948,7 +2180,7 @@ done:
*/
static herr_t
H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- const H5B_info_ud_t *info_udata)
+ const H5B_info_ud_t *info_udata, void *parent)
{
H5B_t *bt = NULL; /* Pointer to current B-tree node */
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -1984,6 +2216,7 @@ H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t ad
/* Protect the initial/current node */
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to load B-tree node")
@@ -2028,7 +2261,7 @@ H5B_get_info_helper(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t ad
/* Check for another "row" of B-tree nodes to iterate over */
if(level > 0) {
/* Keep following the left-most child until we reach a leaf node. */
- if(H5B_get_info_helper(f, dxpl_id, type, left_child, info_udata) < 0)
+ if(H5B_get_info_helper(f, dxpl_id, type, left_child, info_udata, bt) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_CANTLIST, FAIL, "unable to list B-tree node")
} /* end if */
@@ -2054,7 +2287,7 @@ done:
*/
herr_t
H5B_get_info(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
- H5B_info_t *bt_info, H5B_operator_t op, void *udata)
+ H5B_info_t *bt_info, H5B_operator_t op, void *udata, void *parent)
{
H5B_info_ud_t info_udata; /* User-data for B-tree size iteration */
herr_t ret_value = SUCCEED; /* Return value */
@@ -2078,13 +2311,13 @@ H5B_get_info(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
info_udata.udata = udata;
/* Iterate over the B-tree nodes */
- if(H5B_get_info_helper(f, dxpl_id, type, addr, &info_udata) < 0)
+ if(H5B_get_info_helper(f, dxpl_id, type, addr, &info_udata, parent) < 0)
HGOTO_ERROR(H5E_BTREE, H5E_BADITER, FAIL, "B-tree iteration failed")
/* Iterate over the B-tree records, making any "leaf" callbacks */
/* (Only if operator defined) */
if(op)
- if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata)) < 0)
+ if((ret_value = H5B_iterate_helper(f, dxpl_id, type, addr, op, udata, parent)) < 0)
HERROR(H5E_BTREE, H5E_BADITER, "B-tree iteration failed");
done:
@@ -2105,7 +2338,8 @@ done:
*-------------------------------------------------------------------------
*/
htri_t
-H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr)
+H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *parent)
{
H5B_t *bt = NULL; /* The B-tree */
H5RC_t *rc_shared; /* Ref-counted shared info */
@@ -2135,6 +2369,7 @@ H5B_valid(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr)
*/
cache_udata.f = f;
cache_udata.type = type;
+ cache_udata.parent = parent;
cache_udata.rc_shared = rc_shared;
if(NULL == (bt = (H5B_t *)H5AC_protect(f, dxpl_id, H5AC_BT, addr, &cache_udata, H5AC_READ)))
HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree node")
@@ -2179,3 +2414,122 @@ H5B_node_dest(H5B_t *bt)
FUNC_LEAVE_NOAPI(SUCCEED)
} /* end H5B_node_dest() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5B_support
+ *
+ * Purpose: Add a flush dependency between the b-tree child object (as
+ * the flush dependency child) and the b-tree node containing
+ * the address of that child. If the child has not yet been
+ * inserted, does nothing.
+ *
+ * Return: TRUE if flush dependency created
+ * FALSE if child is not in b-tree
+ * Negative on failure
+ *
+ * Programmer: Neil Fortner
+ * Aug 18 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+htri_t
+H5B_support(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent, void *child)
+{
+ H5B_t *node = NULL; /* Node containing direct link to child */
+ herr_t ret_value; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5B_support, FAIL)
+
+ /*
+ * Check arguments.
+ */
+ HDassert(f);
+ HDassert(type);
+ HDassert(type->decode);
+ HDassert(type->cmp3);
+ HDassert(type->found);
+ HDassert(H5F_addr_defined(addr));
+ HDassert(udata);
+ HDassert(child);
+
+ /* Lookup the node which points to the requested child */
+ if(H5B_find_node(f, dxpl_id, type, addr, udata, parent, &node) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in B-tree")
+
+ /* Set the return value, and add the flush dependency if the ndoe was found
+ */
+ if(node) {
+ if(H5AC_create_flush_dependency(node, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTDEPEND, FAIL, "unable to create flush dependency")
+ ret_value = TRUE;
+ } /* end if */
+ else
+ ret_value = FALSE;
+
+done:
+ if(node && H5AC_unpin_entry(node) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin node")
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5B_support() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5B_unsupport
+ *
+ * Purpose: Destroy a flush dependency between the b-tree child object
+ * and the b-tree node containing the address of that child.
+ * This dependency *must* have been created earlier either by
+ * a call to H5B_support() or by the client (presumably via
+ * the create_flush_dep() callback on insertion).
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Neil Fortner
+ * Aug 18 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5B_unsupport(H5F_t *f, hid_t dxpl_id, const H5B_class_t *type, haddr_t addr,
+ void *udata, void *parent, void *child)
+{
+ H5B_t *node = NULL; /* Node containing direct link to child */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5B_unsupport, FAIL)
+
+ /*
+ * Check arguments.
+ */
+ HDassert(f);
+ HDassert(type);
+ HDassert(type->decode);
+ HDassert(type->cmp3);
+ HDassert(type->found);
+ HDassert(H5F_addr_defined(addr));
+ HDassert(udata);
+ HDassert(child);
+
+ /* Lookup the node which points to the requested child */
+ if(H5B_find_node(f, dxpl_id, type, addr, udata, parent, &node) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "can't lookup key in B-tree")
+
+ /* It is an error if the node does not exist yet - the client should only
+ * call this function if support() succeeded or if the client's
+ * create_flush_dep() callback has been called. */
+ if(!node)
+ HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "node not found in B-tree")
+
+ /* Add the flush dependency */
+ if(H5AC_destroy_flush_dependency(node, child) < 0)
+ HGOTO_ERROR(H5E_BTREE, H5E_CANTUNDEPEND, FAIL, "unable to destroy flush dependency")
+
+done:
+ if(node && H5AC_unpin_entry(node) < 0)
+ HDONE_ERROR(H5E_BTREE, H5E_CANTUNPIN, FAIL, "unable to unpin node")
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5B_unsupport() */
+