diff options
Diffstat (limited to 'src/H5B2internal.c')
-rw-r--r-- | src/H5B2internal.c | 1443 |
1 files changed, 1443 insertions, 0 deletions
diff --git a/src/H5B2internal.c b/src/H5B2internal.c new file mode 100644 index 0000000..e74ae59 --- /dev/null +++ b/src/H5B2internal.c @@ -0,0 +1,1443 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/*------------------------------------------------------------------------- + * + * Created: H5B2internal.c + * Dec 01 2016 + * Quincey Koziol <koziol@lbl.gov> + * + * Purpose: Routines for managing v2 B-tree internal ndoes. + * + *------------------------------------------------------------------------- + */ + +/****************/ +/* Module Setup */ +/****************/ + +#include "H5B2module.h" /* This source code file is part of the H5B2 module */ + + +/***********/ +/* Headers */ +/***********/ +#include "H5private.h" /* Generic Functions */ +#include "H5B2pkg.h" /* v2 B-trees */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5MFprivate.h" /* File memory management */ + + +/****************/ +/* Local Macros */ +/****************/ + + +/******************/ +/* Local Typedefs */ +/******************/ + + +/********************/ +/* Package Typedefs */ +/********************/ + + +/********************/ +/* Local Prototypes */ +/********************/ +static herr_t H5B2__shadow_internal(H5B2_internal_t *internal, hid_t dxpl_id, + H5B2_node_ptr_t *curr_node_ptr); + + +/*********************/ +/* Package Variables */ +/*********************/ + +/* Declare a free list to manage the H5B2_internal_t struct */ +H5FL_DEFINE(H5B2_internal_t); + + +/*****************************/ +/* Library Private Variables */ +/*****************************/ + + +/*******************/ +/* Local Variables */ +/*******************/ + + + +/*------------------------------------------------------------------------- + * Function: H5B2__create_internal + * + * Purpose: Creates empty internal node of a B-tree and update node pointer + * to point to it. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Quincey Koziol + * koziol@ncsa.uiuc.edu + * Feb 3 2005 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B2__create_internal(H5B2_hdr_t *hdr, hid_t dxpl_id, void *parent, + H5B2_node_ptr_t *node_ptr, uint16_t depth) +{ + H5B2_internal_t *internal = NULL; /* Pointer to new internal node created */ + hbool_t inserted = FALSE; /* Whether the internal node was inserted into cache */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Check arguments. */ + HDassert(hdr); + HDassert(node_ptr); + HDassert(depth > 0); + + /* Allocate memory for internal node information */ + if(NULL == (internal = H5FL_CALLOC(H5B2_internal_t))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for B-tree internal info") + + /* Increment ref. count on B-tree header */ + if(H5B2__hdr_incr(hdr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTINC, FAIL, "can't increment ref. count on B-tree header") + + /* Share B-tree header information */ + internal->hdr = hdr; + + /* Allocate space for the native keys in memory */ + if(NULL == (internal->int_native = (uint8_t *)H5FL_FAC_MALLOC(hdr->node_info[depth].nat_rec_fac))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for B-tree internal native keys") + HDmemset(internal->int_native, 0, hdr->cls->nrec_size * hdr->node_info[depth].max_nrec); + + /* Allocate space for the node pointers in memory */ + if(NULL == (internal->node_ptrs = (H5B2_node_ptr_t *)H5FL_FAC_MALLOC(hdr->node_info[depth].node_ptr_fac))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for B-tree internal node pointers") + HDmemset(internal->node_ptrs, 0, sizeof(H5B2_node_ptr_t) * (hdr->node_info[depth].max_nrec + 1)); + + /* Set depth of the node */ + internal->depth = depth; + + /* Set parent */ + internal->parent = parent; + + /* Set shadowed epoch to header's epoch */ + internal->shadow_epoch = hdr->shadow_epoch; + + /* Allocate space on disk for the internal node */ + if(HADDR_UNDEF == (node_ptr->addr = H5MF_alloc(hdr->f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)hdr->node_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "file allocation failed for B-tree internal node") + + /* Cache the new B-tree node */ + if(H5AC_insert_entry(hdr->f, dxpl_id, H5AC_BT2_INT, node_ptr->addr, internal, H5AC__NO_FLAGS_SET) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTINIT, FAIL, "can't add B-tree internal node to cache") + inserted = TRUE; + + /* Add internal node as child of 'top' proxy */ + if(hdr->top_proxy) { + if(H5AC_proxy_entry_add_child(hdr->top_proxy, hdr->f, dxpl_id, internal) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSET, FAIL, "unable to add v2 B-tree node as child of proxy") + internal->top_proxy = hdr->top_proxy; + } /* end if */ + +done: + if(ret_value < 0) { + if(internal) { + /* Remove from cache, if inserted */ + if(inserted) + if(H5AC_remove_entry(internal) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTREMOVE, FAIL, "unable to remove v2 B-tree internal node from cache") + + /* Release internal node's disk space */ + if(H5F_addr_defined(node_ptr->addr) && H5MF_xfree(hdr->f, H5FD_MEM_BTREE, dxpl_id, node_ptr->addr, (hsize_t)hdr->node_size) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTFREE, FAIL, "unable to release file space for v2 B-tree internal node") + + /* Destroy internal node */ + if(H5B2__internal_free(internal) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTFREE, FAIL, "unable to release v2 B-tree internal node") + } /* end if */ + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5B2__create_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__protect_internal + * + * Purpose: "Protect" an internal node in the metadata cache + * + * Return: Pointer to internal node on success/NULL on failure + * + * Programmer: Quincey Koziol + * koziol@hdfgroup.org + * Aug 25 2006 + * + *------------------------------------------------------------------------- + */ +H5B2_internal_t * +H5B2__protect_internal(H5B2_hdr_t *hdr, hid_t dxpl_id, void *parent, + H5B2_node_ptr_t *node_ptr, uint16_t depth, hbool_t shadow, unsigned flags) +{ + H5B2_internal_cache_ud_t udata; /* User data to pass through to cache 'deserialize' callback */ + H5B2_internal_t *internal = NULL; /* v2 B-tree internal node */ + H5B2_internal_t *ret_value = NULL; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Check arguments. */ + HDassert(hdr); + HDassert(node_ptr); + HDassert(H5F_addr_defined(node_ptr->addr)); + HDassert(depth > 0); + + /* only H5AC__READ_ONLY_FLAG may appear in flags */ + HDassert((flags & (unsigned)(~H5AC__READ_ONLY_FLAG)) == 0); + + /* Set up user data for callback */ + udata.f = hdr->f; + udata.hdr = hdr; + udata.parent = parent; + udata.nrec = node_ptr->node_nrec; + udata.depth = depth; + + /* Protect the internal node */ + if(NULL == (internal = (H5B2_internal_t *)H5AC_protect(hdr->f, dxpl_id, H5AC_BT2_INT, node_ptr->addr, &udata, flags))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, NULL, "unable to protect B-tree internal node") + + /* Create top proxy, if it doesn't exist */ + if(hdr->top_proxy && NULL == internal->top_proxy) { + /* Add internal node as child of 'top' proxy */ + if(H5AC_proxy_entry_add_child(hdr->top_proxy, hdr->f, dxpl_id, internal) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSET, NULL, "unable to add v2 B-tree internal node as child of proxy") + internal->top_proxy = hdr->top_proxy; + } /* end if */ + + /* Shadow the node, if requested */ + if(shadow) + if(H5B2__shadow_internal(internal, dxpl_id, node_ptr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOPY, NULL, "unable to shadow internal node") + + /* Set return value */ + ret_value = internal; + +done: + /* Clean up on error */ + if(!ret_value) { + /* Release the internal node, if it was protected */ + if(internal) { + /* Remove from v2 B-tree's proxy, if added */ + if(internal->top_proxy) { + if(H5AC_proxy_entry_remove_child(internal->top_proxy, internal) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNDEPEND, NULL, "unable to destroy flush dependency between internal node and v2 B-tree 'top' proxy") + internal->top_proxy = NULL; + } /* end if */ + + /* Unprotect internal node */ + if(H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, node_ptr->addr, internal, H5AC__NO_FLAGS_SET) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, NULL, "unable to unprotect v2 B-tree internal node, address = %llu", (unsigned long long)node_ptr->addr) + } /* end if */ + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5B2__protect_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__neighbor_internal + * + * Purpose: Locate a record relative to the specified information in a + * B-tree internal node and return that information by filling in + * fields of the + * caller-supplied UDATA pointer depending on the type of leaf node + * requested. The UDATA can point to additional data passed + * to the key comparison function. + * + * The 'OP' routine is called with the record found and the + * OP_DATA pointer, to allow caller to return information about + * the record. + * + * The RANGE indicates whether to search for records less than or + * equal to, or greater than or equal to the information passed + * in with UDATA. + * + * Return: Non-negative on success, negative on failure. + * + * Programmer: Quincey Koziol + * koziol@ncsa.uiuc.edu + * Mar 9 2005 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B2__neighbor_internal(H5B2_hdr_t *hdr, hid_t dxpl_id, uint16_t depth, + H5B2_node_ptr_t *curr_node_ptr, void *neighbor_loc, H5B2_compare_t comp, + void *parent, void *udata, H5B2_found_t op, void *op_data) +{ + H5B2_internal_t *internal; /* Pointer to internal node */ + unsigned idx = 0; /* Location of record which matches key */ + int cmp = 0; /* Comparison value of records */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Check arguments. */ + HDassert(hdr); + HDassert(depth > 0); + HDassert(curr_node_ptr); + HDassert(H5F_addr_defined(curr_node_ptr->addr)); + HDassert(op); + + /* Lock current B-tree node */ + if(NULL == (internal = H5B2__protect_internal(hdr, dxpl_id, parent, curr_node_ptr, depth, FALSE, H5AC__READ_ONLY_FLAG))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree internal node") + + /* Locate node pointer for child */ + if(H5B2__locate_record(hdr->cls, internal->nrec, hdr->nat_off, internal->int_native, udata, &idx, &cmp) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOMPARE, FAIL, "can't compare btree2 records") + if(cmp > 0) + idx++; + + /* Set the neighbor location, if appropriate */ + if(comp == H5B2_COMPARE_LESS) { + if(idx > 0) + neighbor_loc = H5B2_INT_NREC(internal, hdr, idx - 1); + } /* end if */ + else { + HDassert(comp == H5B2_COMPARE_GREATER); + + if(idx < internal->nrec) + neighbor_loc = H5B2_INT_NREC(internal, hdr, idx); + } /* end else */ + + /* Attempt to find neighboring record */ + if(depth > 1) { + if(H5B2__neighbor_internal(hdr, dxpl_id, (uint16_t)(depth - 1), &internal->node_ptrs[idx], neighbor_loc, comp, internal, udata, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "unable to find neighbor record in B-tree internal node") + } /* end if */ + else { + if(H5B2__neighbor_leaf(hdr, dxpl_id, &internal->node_ptrs[idx], neighbor_loc, comp, internal, udata, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_NOTFOUND, FAIL, "unable to find neighbor record in B-tree leaf node") + } /* end else */ + +done: + /* Release the B-tree internal node */ + if(internal && H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, curr_node_ptr->addr, internal, H5AC__NO_FLAGS_SET) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release internal B-tree node") + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5B2__neighbor_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__insert_internal + * + * Purpose: Adds a new record to a B-tree node. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Quincey Koziol + * koziol@ncsa.uiuc.edu + * Mar 2 2005 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B2__insert_internal(H5B2_hdr_t *hdr, hid_t dxpl_id, uint16_t depth, + unsigned *parent_cache_info_flags_ptr, H5B2_node_ptr_t *curr_node_ptr, + H5B2_nodepos_t curr_pos, void *parent, void *udata) +{ + H5B2_internal_t *internal = NULL; /* Pointer to internal node */ + unsigned internal_flags = H5AC__NO_FLAGS_SET; + unsigned idx = 0; /* Location of record which matches key */ + H5B2_nodepos_t next_pos = H5B2_POS_MIDDLE; /* Position of node */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Check arguments. */ + HDassert(hdr); + HDassert(depth > 0); + HDassert(curr_node_ptr); + HDassert(H5F_addr_defined(curr_node_ptr->addr)); + + /* Lock current B-tree node */ + if(NULL == (internal = H5B2__protect_internal(hdr, dxpl_id, parent, curr_node_ptr, depth, FALSE, H5AC__NO_FLAGS_SET))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree internal node") + + /* Sanity check number of records */ + HDassert(internal->nrec == curr_node_ptr->node_nrec); + + /* Split or redistribute child node pointers, if necessary */ + { + int cmp; /* Comparison value of records */ + unsigned retries; /* Number of times to attempt redistribution */ + size_t split_nrec; /* Number of records to split node at */ + + /* Locate node pointer for child */ + if(H5B2__locate_record(hdr->cls, internal->nrec, hdr->nat_off, internal->int_native, + udata, &idx, &cmp) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOMPARE, FAIL, "can't compare btree2 records") + if(cmp == 0) + HGOTO_ERROR(H5E_BTREE, H5E_EXISTS, FAIL, "record is already in B-tree") + if(cmp > 0) + idx++; + + /* Set the number of redistribution retries */ + /* This takes care of the case where a B-tree node needs to be + * redistributed, but redistributing the node causes the index + * for insertion to move to another node, which also needs to be + * redistributed. Now, we loop trying to redistribute and then + * eventually force a split */ + retries = 2; + + /* Determine the correct number of records to split child node at */ + split_nrec = hdr->node_info[depth - 1].split_nrec; + + /* Preemptively split/redistribute a node we will enter */ + while(internal->node_ptrs[idx].node_nrec == split_nrec) { + /* Attempt to redistribute records among children */ + if(idx == 0) { /* Left-most child */ + if(retries > 0 && (internal->node_ptrs[idx + 1].node_nrec < split_nrec)) { + if(H5B2__redistribute2(hdr, dxpl_id, depth, internal, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__split1(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to split child node") + } /* end else */ + } /* end if */ + else if(idx == internal->nrec) { /* Right-most child */ + if(retries > 0 && (internal->node_ptrs[idx - 1].node_nrec < split_nrec)) { + if(H5B2__redistribute2(hdr, dxpl_id, depth, internal, (idx - 1)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__split1(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to split child node") + } /* end else */ + } /* end if */ + else { /* Middle child */ + if(retries > 0 && ((internal->node_ptrs[idx + 1].node_nrec < split_nrec) || + (internal->node_ptrs[idx - 1].node_nrec < split_nrec))) { + if(H5B2__redistribute3(hdr, dxpl_id, depth, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__split1(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to split child node") + } /* end else */ + } /* end else */ + + /* Locate node pointer for child (after split/redistribute) */ + /* Actually, this can be easily updated (for 2-node redistrib.) and shouldn't require re-searching */ + if(H5B2__locate_record(hdr->cls, internal->nrec, hdr->nat_off, internal->int_native, + udata, &idx, &cmp) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOMPARE, FAIL, "can't compare btree2 records") + if(cmp == 0) + HGOTO_ERROR(H5E_BTREE, H5E_EXISTS, FAIL, "record is already in B-tree") + if(cmp > 0) + idx++; + + /* Decrement the number of redistribution retries left */ + retries--; + } /* end while */ + } /* end block */ + + /* Check if this node is left/right-most */ + if(H5B2_POS_MIDDLE != curr_pos) { + if(idx == 0) { + if(H5B2_POS_LEFT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_LEFT; + } /* end if */ + else if(idx == internal->nrec) { + if(H5B2_POS_RIGHT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_RIGHT; + } /* end else */ + } /* end if */ + + /* Attempt to insert node */ + if(depth > 1) { + if(H5B2__insert_internal(hdr, dxpl_id, (uint16_t)(depth - 1), &internal_flags, &internal->node_ptrs[idx], next_pos, internal, udata) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, FAIL, "unable to insert record into B-tree internal node") + } /* end if */ + else { + if(H5B2__insert_leaf(hdr, dxpl_id, &internal->node_ptrs[idx], next_pos, internal, udata) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, FAIL, "unable to insert record into B-tree leaf node") + } /* end else */ + + /* Update record count for node pointer to current node */ + curr_node_ptr->all_nrec++; + + /* Mark node as dirty */ + internal_flags |= H5AC__DIRTIED_FLAG; + +done: + /* Release the B-tree internal node */ + if(internal) { + /* Shadow the node if doing SWMR writes */ + if(hdr->swmr_write && (internal_flags & H5AC__DIRTIED_FLAG)) + if(H5B2__shadow_internal(internal, dxpl_id, curr_node_ptr) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTCOPY, FAIL, "unable to shadow internal B-tree node") + + /* Unprotect node */ + if(H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, curr_node_ptr->addr, internal, internal_flags) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release internal B-tree node") + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5B2__insert_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__update_internal + * + * Purpose: Insert or modify a record in a B-tree internal node. + * If the record exists already, it is modified as if H5B2_modify + * was called). If it doesn't exist, it is inserted as if + * H5B2_insert was called. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Quincey Koziol + * koziol@hdfgroup.org + * Dec 24 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B2__update_internal(H5B2_hdr_t *hdr, hid_t dxpl_id, uint16_t depth, + unsigned *parent_cache_info_flags_ptr, H5B2_node_ptr_t *curr_node_ptr, + H5B2_update_status_t *status, H5B2_nodepos_t curr_pos, void *parent, + void *udata, H5B2_modify_t op, void *op_data) +{ + H5B2_internal_t *internal = NULL; /* Pointer to internal node */ + unsigned internal_flags = H5AC__NO_FLAGS_SET; + int cmp; /* Comparison value of records */ + unsigned idx = 0; /* Location of record which matches key */ + H5B2_nodepos_t next_pos = H5B2_POS_MIDDLE; /* Position of node */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Check arguments. */ + HDassert(hdr); + HDassert(depth > 0); + HDassert(curr_node_ptr); + HDassert(H5F_addr_defined(curr_node_ptr->addr)); + + /* Lock current B-tree node */ + if(NULL == (internal = H5B2__protect_internal(hdr, dxpl_id, parent, curr_node_ptr, depth, FALSE, H5AC__NO_FLAGS_SET))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree internal node") + + /* Sanity check number of records */ + HDassert(internal->nrec == curr_node_ptr->node_nrec); + + /* Locate node pointer for child */ + if(H5B2__locate_record(hdr->cls, internal->nrec, hdr->nat_off, internal->int_native, udata, &idx, &cmp) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOMPARE, FAIL, "can't compare btree2 records") + + /* Check for modifying existing record */ + if(0 == cmp) { + hbool_t changed = FALSE; /* Whether the 'modify' callback changed the record */ + + /* Make callback for current record */ + if((op)(H5B2_INT_NREC(internal, hdr, idx), op_data, &changed) < 0) { + /* Make certain that the callback didn't modify the value if it failed */ + HDassert(changed == FALSE); + + HGOTO_ERROR(H5E_BTREE, H5E_CANTMODIFY, FAIL, "'modify' callback failed for B-tree update operation") + } /* end if */ + + /* Mark the node as dirty if it changed */ + internal_flags |= (changed ? H5AC__DIRTIED_FLAG : 0); + + /* Indicate that the record was modified */ + *status = H5B2_UPDATE_MODIFY_DONE; + } /* end if */ + else { + /* Adjust index to leave room for node to insert */ + if(cmp > 0) + idx++; + + /* Check if this node is left/right-most */ + if(H5B2_POS_MIDDLE != curr_pos) { + if(idx == 0) { + if(H5B2_POS_LEFT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_LEFT; + } /* end if */ + else if(idx == internal->nrec) { + if(H5B2_POS_RIGHT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_RIGHT; + } /* end else */ + } /* end if */ + + /* Attempt to update record in child */ + if(depth > 1) { + if(H5B2__update_internal(hdr, dxpl_id, (uint16_t)(depth - 1), &internal_flags, &internal->node_ptrs[idx], status, next_pos, internal, udata, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUPDATE, FAIL, "unable to update record in internal B-tree node") + } /* end if */ + else { + if(H5B2__update_leaf(hdr, dxpl_id, &internal->node_ptrs[idx], status, next_pos, internal, udata, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUPDATE, FAIL, "unable to update record in leaf B-tree node") + } /* end else */ + + /* Take actions based on child's status report */ + switch(*status) { + case H5B2_UPDATE_MODIFY_DONE: + /* No action */ + break; + + case H5B2_UPDATE_SHADOW_DONE: + /* If child node was shadowed (if SWMR is enabled), mark this node dirty */ + if(hdr->swmr_write) + internal_flags |= H5AC__DIRTIED_FLAG; + + /* No further modifications up the tree are necessary though, so downgrade to merely "modified" */ + *status = H5B2_UPDATE_MODIFY_DONE; + break; + + case H5B2_UPDATE_INSERT_DONE: + /* Mark node as dirty */ + internal_flags |= H5AC__DIRTIED_FLAG; + + /* Update total record count for node pointer to current node */ + curr_node_ptr->all_nrec++; + break; + + case H5B2_UPDATE_INSERT_CHILD_FULL: + /* Split/redistribute this node */ + if(internal->nrec == hdr->node_info[depth].split_nrec) { + hbool_t could_split = FALSE; /* Whether the child node could split */ + + if(idx == 0) { /* Left-most child */ + /* Check for left-most child and its neighbor being close to full */ + if((internal->node_ptrs[idx].node_nrec + internal->node_ptrs[idx + 1].node_nrec) + >= ((hdr->node_info[depth - 1].split_nrec * 2) - 1)) + could_split = TRUE; + } /* end if */ + else if(idx == internal->nrec) { /* Right-most child */ + /* Check for right-most child and its neighbor being close to full */ + if((internal->node_ptrs[idx - 1].node_nrec + internal->node_ptrs[idx].node_nrec) + >= ((hdr->node_info[depth - 1].split_nrec * 2) - 1)) + could_split = TRUE; + } /* end else-if */ + else { /* Middle child */ + /* Check for middle child and its left neighbor being close to full */ + if((internal->node_ptrs[idx - 1].node_nrec + internal->node_ptrs[idx].node_nrec) + >= ((hdr->node_info[depth - 1].split_nrec * 2) - 1)) + could_split = TRUE; + /* Check for middle child and its right neighbor being close to full */ + else if((internal->node_ptrs[idx].node_nrec + internal->node_ptrs[idx + 1].node_nrec) + >= ((hdr->node_info[depth - 1].split_nrec * 2) - 1)) + could_split = TRUE; + } /* end if */ + + /* If this node is full and the child node insertion could + * cause a split, punt back up to caller, leaving the + * "insert child full" status. + */ + if(could_split) { + /* Release the internal B-tree node */ + if(H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, curr_node_ptr->addr, internal, internal_flags) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release internal B-tree node") + internal = NULL; + + /* Punt back to caller */ + HGOTO_DONE(SUCCEED); + } /* end if */ + } /* end if */ + + /* Release the internal B-tree node */ + if(H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, curr_node_ptr->addr, internal, internal_flags) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release internal B-tree node") + internal = NULL; + + /* Indicate that the record was inserted */ + *status = H5B2_UPDATE_INSERT_DONE; + + /* Dodge sideways into inserting a record into this node */ + if(H5B2__insert_internal(hdr, dxpl_id, depth, parent_cache_info_flags_ptr, curr_node_ptr, curr_pos, parent, udata) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTINSERT, FAIL, "unable to insert record into internal B-tree node") + break; + + case H5B2_UPDATE_UNKNOWN: + default: + HDassert(0 && "Invalid update status"); + HGOTO_ERROR(H5E_BTREE, H5E_CANTUPDATE, FAIL, "invalid update status") + } /* end switch */ + } /* end else */ + +done: + /* Release the internal B-tree node */ + if(internal) { + /* Check if we should shadow this node */ + if(hdr->swmr_write && (internal_flags & H5AC__DIRTIED_FLAG)) { + /* Attempt to shadow the node if doing SWMR writes */ + if(H5B2__shadow_internal(internal, dxpl_id, curr_node_ptr) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTCOPY, FAIL, "unable to shadow internal B-tree node") + + /* Change the state to "shadowed" if only modified currently */ + /* (Triggers parent to be marked dirty) */ + if(*status == H5B2_UPDATE_MODIFY_DONE) + *status = H5B2_UPDATE_SHADOW_DONE; + } /* end if */ + + /* Unprotect node */ + if(H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, curr_node_ptr->addr, internal, internal_flags) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release internal B-tree node") + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5B2__update_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__shadow_internal + * + * Purpose: "Shadow" an internal node - copy it to a new location, + * leaving the data in the old location intact (for now). + * This is done when writing in SWMR mode to ensure that + * readers do not see nodes that are out of date with + * respect to each other and thereby inconsistent. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * Apr 27 2012 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5B2__shadow_internal(H5B2_internal_t *internal, hid_t dxpl_id, + H5B2_node_ptr_t *curr_node_ptr) +{ + H5B2_hdr_t *hdr; /* B-tree header */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* + * Check arguments. + */ + HDassert(internal); + HDassert(curr_node_ptr); + HDassert(H5F_addr_defined(curr_node_ptr->addr)); + hdr = internal->hdr; + HDassert(hdr); + HDassert(hdr->swmr_write); + + /* We only need to shadow the node if it has not been shadowed since the + * last time the header was flushed, as otherwise it will be unreachable by + * the readers so there will be no need to shadow. To check if it has been + * shadowed, compare the epoch of this node and the header. If this node's + * epoch is <= to the header's, it hasn't been shadowed yet. */ + if(internal->shadow_epoch <= hdr->shadow_epoch) { + haddr_t new_node_addr; /* Address to move node to */ + + /* + * We must clone the old node so readers with an out-of-date version of + * the parent can still see the correct number of children, via the + * shadowed node. Remove it from cache but do not mark it free on disk. + */ + /* Allocate space for the cloned node */ + if(HADDR_UNDEF == (new_node_addr = H5MF_alloc(hdr->f, H5FD_MEM_BTREE, dxpl_id, (hsize_t)hdr->node_size))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTALLOC, FAIL, "unable to allocate file space to move B-tree node") + + /* Move the location of the node on the disk */ + if(H5AC_move_entry(hdr->f, H5AC_BT2_INT, curr_node_ptr->addr, new_node_addr, dxpl_id) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTMOVE, FAIL, "unable to move B-tree node") + curr_node_ptr->addr = new_node_addr; + + /* Should free the space in the file, but this is not supported by + * SWMR_WRITE code yet - QAK, 2016/12/01 + */ + + /* Set shadow epoch for node ahead of header */ + internal->shadow_epoch = hdr->shadow_epoch + 1; + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5B2__shadow_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__remove_internal + * + * Purpose: Removes a record from a B-tree node. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Quincey Koziol + * koziol@ncsa.uiuc.edu + * Mar 3 2005 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B2__remove_internal(H5B2_hdr_t *hdr, hid_t dxpl_id, hbool_t *depth_decreased, + void *swap_loc, void *swap_parent, uint16_t depth, H5AC_info_t *parent_cache_info, + unsigned *parent_cache_info_flags_ptr, H5B2_nodepos_t curr_pos, + H5B2_node_ptr_t *curr_node_ptr, void *udata, H5B2_remove_t op, void *op_data) +{ + H5AC_info_t *new_cache_info; /* Pointer to new cache info */ + unsigned *new_cache_info_flags_ptr = NULL; + H5B2_node_ptr_t *new_node_ptr; /* Pointer to new node pointer */ + H5B2_internal_t *internal; /* Pointer to internal node */ + H5B2_nodepos_t next_pos = H5B2_POS_MIDDLE; /* Position of next node */ + unsigned internal_flags = H5AC__NO_FLAGS_SET; + haddr_t internal_addr = HADDR_UNDEF; /* Address of internal node */ + size_t merge_nrec; /* Number of records to merge node at */ + hbool_t collapsed_root = FALSE; /* Whether the root was collapsed */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Check arguments. */ + HDassert(hdr); + HDassert(depth > 0); + HDassert(parent_cache_info); + HDassert(curr_node_ptr); + HDassert(H5F_addr_defined(curr_node_ptr->addr)); + + /* Lock current B-tree node */ + if(NULL == (internal = H5B2__protect_internal(hdr, dxpl_id, parent_cache_info, curr_node_ptr, depth, FALSE, H5AC__NO_FLAGS_SET))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree internal node") + internal_addr = curr_node_ptr->addr; + + /* Determine the correct number of records to merge at */ + merge_nrec = hdr->node_info[depth - 1].merge_nrec; + + /* Check for needing to collapse the root node */ + /* (The root node is the only internal node allowed to have 1 record) */ + if(internal->nrec == 1 && + ((internal->node_ptrs[0].node_nrec + internal->node_ptrs[1].node_nrec) <= ((merge_nrec * 2) + 1))) { + + /* Merge children of root node */ + if(H5B2__merge2(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, 0) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + + /* Let the cache know that the object is deleted */ + internal_flags |= H5AC__DELETED_FLAG; + if(!hdr->swmr_write) + internal_flags |= H5AC__FREE_FILE_SPACE_FLAG; + + /* Reset information in header's root node pointer */ + curr_node_ptr->addr = internal->node_ptrs[0].addr; + curr_node_ptr->node_nrec = internal->node_ptrs[0].node_nrec; + + /* Update flush dependency for child, if using SWMR */ + if(hdr->swmr_write) + if(H5B2__update_flush_depend(hdr, dxpl_id, depth, curr_node_ptr, internal, hdr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUPDATE, FAIL, "unable to update child node to new parent") + + /* Indicate that the level of the B-tree decreased */ + *depth_decreased = TRUE; + + /* Set pointers for advancing to child node */ + new_cache_info = parent_cache_info; + new_cache_info_flags_ptr = parent_cache_info_flags_ptr; + new_node_ptr = curr_node_ptr; + + /* Set flag to indicate root was collapsed */ + collapsed_root = TRUE; + + /* Indicate position of next node */ + next_pos = H5B2_POS_ROOT; + } /* end if */ + /* Merge or redistribute child node pointers, if necessary */ + else { + unsigned idx = 0; /* Location of record which matches key */ + int cmp = 0; /* Comparison value of records */ + unsigned retries; /* Number of times to attempt redistribution */ + + /* Shadow the node if doing SWMR writes */ + if(hdr->swmr_write) { + if(H5B2__shadow_internal(internal, dxpl_id, curr_node_ptr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOPY, FAIL, "unable to shadow internal node") + internal_addr = curr_node_ptr->addr; + } /* end if */ + + /* Locate node pointer for child */ + if(swap_loc) + idx = 0; + else { + if(H5B2__locate_record(hdr->cls, internal->nrec, hdr->nat_off, internal->int_native, udata, &idx, &cmp) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOMPARE, FAIL, "can't compare btree2 records") + if(cmp >= 0) + idx++; + } /* end else */ + + /* Set the number of redistribution retries */ + /* This takes care of the case where a B-tree node needs to be + * redistributed, but redistributing the node causes the index + * for removal to move to another node, which also needs to be + * redistributed. Now, we loop trying to redistribute and then + * eventually force a merge */ + retries = 2; + + /* Preemptively merge/redistribute a node we will enter */ + while(internal->node_ptrs[idx].node_nrec == merge_nrec) { + /* Attempt to redistribute records among children */ + /* (NOTE: These 2-node redistributions should actually get the + * record to promote from the node with more records. - QAK) + */ + /* (NOTE: This code is the same in both H5B2__remove_internal() and + * H5B2__remove_internal_by_idx(), fix bugs in both places! - QAK) + */ + if(idx == 0) { /* Left-most child */ + if(retries > 0 && (internal->node_ptrs[idx + 1].node_nrec > merge_nrec)) { + if(H5B2__redistribute2(hdr, dxpl_id, depth, internal, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__merge2(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + } /* end else */ + } /* end if */ + else if(idx == internal->nrec) { /* Right-most child */ + if(retries > 0 && (internal->node_ptrs[idx - 1].node_nrec > merge_nrec)) { + if(H5B2__redistribute2(hdr, dxpl_id, depth, internal, (idx - 1)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__merge2(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, (idx - 1)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + } /* end else */ + } /* end if */ + else { /* Middle child */ + if(retries > 0 && ((internal->node_ptrs[idx + 1].node_nrec > merge_nrec) || + (internal->node_ptrs[idx - 1].node_nrec > merge_nrec))) { + if(H5B2__redistribute3(hdr, dxpl_id, depth, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__merge3(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + } /* end else */ + } /* end else */ + + /* Locate node pointer for child (after merge/redistribute) */ + if(swap_loc) + idx = 0; + else { +/* Actually, this can be easily updated (for 2-node redistrib.) and shouldn't require re-searching */ + if(H5B2__locate_record(hdr->cls, internal->nrec, hdr->nat_off, internal->int_native, udata, &idx, &cmp) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOMPARE, FAIL, "can't compare btree2 records") + if(cmp >= 0) + idx++; + } /* end else */ + + /* Decrement the number of redistribution retries left */ + retries--; + } /* end while */ + + /* Handle deleting a record from an internal node */ + if(!swap_loc && cmp == 0) { + swap_loc = H5B2_INT_NREC(internal, hdr, idx - 1); + swap_parent = internal; + } /* end if */ + + /* Swap record to delete with record from leaf, if we are the last internal node */ + if(swap_loc && depth == 1) + if(H5B2__swap_leaf(hdr, dxpl_id, depth, internal, &internal_flags, idx, swap_loc) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSWAP, FAIL, "Can't swap records in B-tree") + + /* Set pointers for advancing to child node */ + new_cache_info_flags_ptr = &internal_flags; + new_cache_info = &internal->cache_info; + new_node_ptr = &internal->node_ptrs[idx]; + + /* Indicate position of next node */ + if(H5B2_POS_MIDDLE != curr_pos) { + if(idx == 0) { + if(H5B2_POS_LEFT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_LEFT; + } /* end if */ + else if(idx == internal->nrec) { + if(H5B2_POS_RIGHT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_RIGHT; + } /* end if */ + } /* end if */ + } /* end else */ + + /* Attempt to remove record from child node */ + if(depth > 1) { + if(H5B2__remove_internal(hdr, dxpl_id, depth_decreased, swap_loc, swap_parent, (uint16_t)(depth - 1), + new_cache_info, new_cache_info_flags_ptr, next_pos, new_node_ptr, udata, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDELETE, FAIL, "unable to remove record from B-tree internal node") + } /* end if */ + else { + if(H5B2__remove_leaf(hdr, dxpl_id, new_node_ptr, next_pos, new_cache_info, udata, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDELETE, FAIL, "unable to remove record from B-tree leaf node") + } /* end else */ + + /* Update record count for node pointer to current node */ + if(!collapsed_root) + new_node_ptr->all_nrec--; + + /* Mark node as dirty */ + if(!(hdr->swmr_write && collapsed_root)) + internal_flags |= H5AC__DIRTIED_FLAG; + +#ifdef H5B2_DEBUG + H5B2__assert_internal((!collapsed_root ? (curr_node_ptr->all_nrec - 1) : new_node_ptr->all_nrec), hdr, internal); +#endif /* H5B2_DEBUG */ + +done: + /* Release the B-tree internal node */ + if(internal && H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, internal_addr, internal, internal_flags) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release internal B-tree node") + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5B2__remove_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__remove_internal_by_idx + * + * Purpose: Removes a record from a B-tree node, according to the offset + * in the B-tree records + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Quincey Koziol + * koziol@hdfgroup.org + * Nov 14 2006 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B2__remove_internal_by_idx(H5B2_hdr_t *hdr, hid_t dxpl_id, + hbool_t *depth_decreased, void *swap_loc, void *swap_parent, uint16_t depth, + H5AC_info_t *parent_cache_info, unsigned *parent_cache_info_flags_ptr, + H5B2_node_ptr_t *curr_node_ptr, H5B2_nodepos_t curr_pos, hsize_t n, + H5B2_remove_t op, void *op_data) +{ + H5AC_info_t *new_cache_info; /* Pointer to new cache info */ + unsigned *new_cache_info_flags_ptr = NULL; + H5B2_node_ptr_t *new_node_ptr; /* Pointer to new node pointer */ + H5B2_internal_t *internal; /* Pointer to internal node */ + H5B2_nodepos_t next_pos = H5B2_POS_MIDDLE; /* Position of next node */ + unsigned internal_flags = H5AC__NO_FLAGS_SET; + haddr_t internal_addr = HADDR_UNDEF; /* Address of internal node */ + size_t merge_nrec; /* Number of records to merge node at */ + hbool_t collapsed_root = FALSE; /* Whether the root was collapsed */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* Check arguments. */ + HDassert(hdr); + HDassert(depth > 0); + HDassert(parent_cache_info); + HDassert(curr_node_ptr); + HDassert(H5F_addr_defined(curr_node_ptr->addr)); + + /* Lock current B-tree node */ + if(NULL == (internal = H5B2__protect_internal(hdr, dxpl_id, parent_cache_info, curr_node_ptr, depth, FALSE, H5AC__NO_FLAGS_SET))) + HGOTO_ERROR(H5E_BTREE, H5E_CANTPROTECT, FAIL, "unable to protect B-tree internal node") + internal_addr = curr_node_ptr->addr; + HDassert(internal->nrec == curr_node_ptr->node_nrec); + HDassert(depth == hdr->depth || internal->nrec > 1); + + /* Determine the correct number of records to merge at */ + merge_nrec = hdr->node_info[depth - 1].merge_nrec; + + /* Check for needing to collapse the root node */ + /* (The root node is the only internal node allowed to have 1 record) */ + if(internal->nrec == 1 && + ((internal->node_ptrs[0].node_nrec + internal->node_ptrs[1].node_nrec) <= ((merge_nrec * 2) + 1))) { + HDassert(depth == hdr->depth); + + /* Merge children of root node */ + if(H5B2__merge2(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, 0) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + + /* Let the cache know that the object is deleted */ + internal_flags |= H5AC__DELETED_FLAG; + if(!hdr->swmr_write) + internal_flags |= H5AC__FREE_FILE_SPACE_FLAG; + + /* Reset information in header's root node pointer */ + curr_node_ptr->addr = internal->node_ptrs[0].addr; + curr_node_ptr->node_nrec = internal->node_ptrs[0].node_nrec; + + /* Update flush dependency for child, if using SWMR */ + if(hdr->swmr_write) + if(H5B2__update_flush_depend(hdr, dxpl_id, depth, curr_node_ptr, internal, hdr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTUPDATE, FAIL, "unable to update child node to new parent") + + /* Indicate that the level of the B-tree decreased */ + *depth_decreased = TRUE; + + /* Set pointers for advancing to child node */ + new_cache_info = parent_cache_info; + new_cache_info_flags_ptr = parent_cache_info_flags_ptr; + new_node_ptr = curr_node_ptr; + + /* Set flag to indicate root was collapsed */ + collapsed_root = TRUE; + + /* Indicate position of next node */ + next_pos = H5B2_POS_ROOT; + } /* end if */ + /* Merge or redistribute child node pointers, if necessary */ + else { + hsize_t orig_n = n; /* Original index looked for */ + unsigned idx; /* Location of record which matches key */ + hbool_t found = FALSE; /* Comparison value of records */ + unsigned retries; /* Number of times to attempt redistribution */ + + /* Shadow the node if doing SWMR writes */ + if(hdr->swmr_write) { + if(H5B2__shadow_internal(internal, dxpl_id, curr_node_ptr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTCOPY, FAIL, "unable to shadow internal node") + internal_addr = curr_node_ptr->addr; + } /* end if */ + + /* Locate node pointer for child */ + if(swap_loc) + idx = 0; + else { + /* Search for record with correct index */ + for(idx = 0; idx < internal->nrec; idx++) { + /* Check which child node contains indexed record */ + if(internal->node_ptrs[idx].all_nrec >= n) { + /* Check if record is in this node */ + if(internal->node_ptrs[idx].all_nrec == n) { + /* Indicate the record was found and that the index + * in child nodes is zero from now on + */ + found = TRUE; + n = 0; + + /* Increment to next record */ + idx++; + } /* end if */ + + /* Break out of loop early */ + break; + } /* end if */ + + /* Decrement index we are looking for to account for the node we + * just advanced past. + */ + n -= (internal->node_ptrs[idx].all_nrec + 1); + } /* end for */ + } /* end else */ + + /* Set the number of redistribution retries */ + /* This takes care of the case where a B-tree node needs to be + * redistributed, but redistributing the node causes the index + * for removal to move to another node, which also needs to be + * redistributed. Now, we loop trying to redistribute and then + * eventually force a merge */ + retries = 2; + + /* Preemptively merge/redistribute a node we will enter */ + while(internal->node_ptrs[idx].node_nrec == merge_nrec) { + /* Attempt to redistribute records among children */ + /* (NOTE: These 2-node redistributions should actually get the + * record to promote from the node with more records. - QAK) + */ + /* (NOTE: This code is the same in both H5B2__remove_internal() and + * H5B2__remove_internal_by_idx(), fix bugs in both places! - QAK) + */ + if(idx == 0) { /* Left-most child */ + if(retries > 0 && (internal->node_ptrs[idx + 1].node_nrec > merge_nrec)) { + if(H5B2__redistribute2(hdr, dxpl_id, depth, internal, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__merge2(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + } /* end else */ + } /* end if */ + else if(idx == internal->nrec) { /* Right-most child */ + if(retries > 0 && (internal->node_ptrs[idx - 1].node_nrec > merge_nrec)) { + if(H5B2__redistribute2(hdr, dxpl_id, depth, internal, (idx - 1)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__merge2(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, (idx - 1)) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + } /* end else */ + } /* end if */ + else { /* Middle child */ + if(retries > 0 && ((internal->node_ptrs[idx + 1].node_nrec > merge_nrec) || + (internal->node_ptrs[idx - 1].node_nrec > merge_nrec))) { + if(H5B2__redistribute3(hdr, dxpl_id, depth, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTREDISTRIBUTE, FAIL, "unable to redistribute child node records") + } /* end if */ + else { + if(H5B2__merge3(hdr, dxpl_id, depth, curr_node_ptr, + parent_cache_info_flags_ptr, internal, &internal_flags, idx) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSPLIT, FAIL, "unable to merge child node") + } /* end else */ + } /* end else */ + + /* Locate node pointer for child (after merge/redistribute) */ + if(swap_loc) + idx = 0; + else { + /* Count from the orginal index value again */ + n = orig_n; + + /* Reset "found" flag - the record may have shifted during the + * redistribute/merge + */ + found = FALSE; + + /* Search for record with correct index */ + for(idx = 0; idx < internal->nrec; idx++) { + /* Check which child node contains indexed record */ + if(internal->node_ptrs[idx].all_nrec >= n) { + /* Check if record is in this node */ + if(internal->node_ptrs[idx].all_nrec == n) { + /* Indicate the record was found and that the index + * in child nodes is zero from now on + */ + found = TRUE; + n = 0; + + /* Increment to next record */ + idx++; + } /* end if */ + + /* Break out of loop early */ + break; + } /* end if */ + + /* Decrement index we are looking for to account for the node we + * just advanced past. + */ + n -= (internal->node_ptrs[idx].all_nrec + 1); + } /* end for */ + } /* end else */ + + /* Decrement the number of redistribution retries left */ + retries--; + } /* end while */ + + /* Handle deleting a record from an internal node */ + if(!swap_loc && found) { + swap_loc = H5B2_INT_NREC(internal, hdr, idx - 1); + swap_parent = internal; + } /* end if */ + + /* Swap record to delete with record from leaf, if we are the last internal node */ + if(swap_loc && depth == 1) + if(H5B2__swap_leaf(hdr, dxpl_id, depth, internal, &internal_flags, idx, swap_loc) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTSWAP, FAIL, "can't swap records in B-tree") + + /* Set pointers for advancing to child node */ + new_cache_info_flags_ptr = &internal_flags; + new_cache_info = &internal->cache_info; + new_node_ptr = &internal->node_ptrs[idx]; + + /* Indicate position of next node */ + if(H5B2_POS_MIDDLE != curr_pos) { + if(idx == 0) { + if(H5B2_POS_LEFT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_LEFT; + } /* end if */ + else if(idx == internal->nrec) { + if(H5B2_POS_RIGHT == curr_pos || H5B2_POS_ROOT == curr_pos) + next_pos = H5B2_POS_RIGHT; + } /* end if */ + } /* end if */ + } /* end else */ + + /* Attempt to remove record from child node */ + if(depth > 1) { + if(H5B2__remove_internal_by_idx(hdr, dxpl_id, depth_decreased, swap_loc, swap_parent, (uint16_t)(depth - 1), + new_cache_info, new_cache_info_flags_ptr, new_node_ptr, next_pos, n, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDELETE, FAIL, "unable to remove record from B-tree internal node") + } /* end if */ + else { + if(H5B2__remove_leaf_by_idx(hdr, dxpl_id, new_node_ptr, next_pos, new_cache_info, (unsigned)n, op, op_data) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDELETE, FAIL, "unable to remove record from B-tree leaf node") + } /* end else */ + + /* Update record count for node pointer to child node */ + if(!collapsed_root) + new_node_ptr->all_nrec--; + + /* Mark node as dirty */ + if(!(hdr->swmr_write && collapsed_root)) + internal_flags |= H5AC__DIRTIED_FLAG; + +#ifdef H5B2_DEBUG + H5B2__assert_internal((!collapsed_root ? (curr_node_ptr->all_nrec - 1) : new_node_ptr->all_nrec), hdr, internal); +#endif /* H5B2_DEBUG */ + +done: + /* Release the B-tree internal node */ + if(internal && H5AC_unprotect(hdr->f, dxpl_id, H5AC_BT2_INT, internal_addr, internal, internal_flags) < 0) + HDONE_ERROR(H5E_BTREE, H5E_CANTUNPROTECT, FAIL, "unable to release internal B-tree node") + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5B2__remove_internal_by_idx() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__internal_free + * + * Purpose: Destroys a B-tree internal node in memory. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Quincey Koziol + * koziol@ncsa.uiuc.edu + * Feb 2 2005 + * + *------------------------------------------------------------------------- + */ +herr_t +H5B2__internal_free(H5B2_internal_t *internal) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE + + /* + * Check arguments. + */ + HDassert(internal); + + /* Release internal node's native key buffer */ + if(internal->int_native) + internal->int_native = (uint8_t *)H5FL_FAC_FREE(internal->hdr->node_info[internal->depth].nat_rec_fac, internal->int_native); + + /* Release internal node's node pointer buffer */ + if(internal->node_ptrs) + internal->node_ptrs = (H5B2_node_ptr_t *)H5FL_FAC_FREE(internal->hdr->node_info[internal->depth].node_ptr_fac, internal->node_ptrs); + + /* Decrement ref. count on B-tree header */ + if(H5B2__hdr_decr(internal->hdr) < 0) + HGOTO_ERROR(H5E_BTREE, H5E_CANTDEC, FAIL, "can't decrement ref. count on B-tree header") + + /* Sanity check */ + HDassert(NULL == internal->top_proxy); + + /* Free B-tree internal node info */ + internal = H5FL_FREE(H5B2_internal_t, internal); + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5B2__internal_free() */ + +#ifdef H5B2_DEBUG + +/*------------------------------------------------------------------------- + * Function: H5B2__assert_internal + * + * Purpose: Verify than an internal node is mostly sane + * + * Return: Non-negative on success, negative on failure + * + * Programmer: Quincey Koziol + * koziol@ncsa.uiuc.edu + * Feb 19 2005 + * + *------------------------------------------------------------------------- + */ +H5_ATTR_PURE herr_t +H5B2__assert_internal(hsize_t parent_all_nrec, const H5B2_hdr_t *hdr, const H5B2_internal_t *internal) +{ + hsize_t tot_all_nrec; /* Total number of records at or below this node */ + uint16_t u, v; /* Local index variables */ + + /* General sanity checking on node */ + HDassert(internal->nrec <= hdr->node_info->split_nrec); + + /* Sanity checking on node pointers */ + tot_all_nrec = internal->nrec; + for(u = 0; u < internal->nrec + 1; u++) { + tot_all_nrec += internal->node_ptrs[u].all_nrec; + + HDassert(H5F_addr_defined(internal->node_ptrs[u].addr)); + HDassert(internal->node_ptrs[u].addr > 0); + for(v = 0; v < u; v++) + HDassert(internal->node_ptrs[u].addr != internal->node_ptrs[v].addr); + } /* end for */ + + /* Sanity check all_nrec total in parent */ + if(parent_all_nrec > 0) + HDassert(tot_all_nrec == parent_all_nrec); + + return(0); +} /* end H5B2__assert_internal() */ + + +/*------------------------------------------------------------------------- + * Function: H5B2__assert_internal2 + * + * Purpose: Verify than internal nodes are mostly sane + * + * Return: Non-negative on success, negative on failure + * + * Programmer: Quincey Koziol + * koziol@ncsa.uiuc.edu + * Feb 19 2005 + * + *------------------------------------------------------------------------- + */ +H5_ATTR_PURE herr_t +H5B2__assert_internal2(hsize_t parent_all_nrec, const H5B2_hdr_t *hdr, const H5B2_internal_t *internal, const H5B2_internal_t *internal2) +{ + hsize_t tot_all_nrec; /* Total number of records at or below this node */ + uint16_t u, v; /* Local index variables */ + + /* General sanity checking on node */ + HDassert(internal->nrec <= hdr->node_info->split_nrec); + + /* Sanity checking on node pointers */ + tot_all_nrec =internal->nrec; + for(u =0; u < internal->nrec + 1; u++) { + tot_all_nrec += internal->node_ptrs[u].all_nrec; + + HDassert(H5F_addr_defined(internal->node_ptrs[u].addr)); + HDassert(internal->node_ptrs[u].addr > 0); + for(v = 0; v < u; v++) + HDassert(internal->node_ptrs[u].addr != internal->node_ptrs[v].addr); + for(v = 0; v < internal2->nrec + 1; v++) + HDassert(internal->node_ptrs[u].addr != internal2->node_ptrs[v].addr); + } /* end for */ + + /* Sanity check all_nrec total in parent */ + if(parent_all_nrec > 0) + HDassert(tot_all_nrec == parent_all_nrec); + + return(0); +} /* end H5B2__assert_internal2() */ +#endif /* H5B2_DEBUG */ + |