summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Fortner <nfortne2@hdfgroup.org>2010-05-20 21:59:01 (GMT)
committerNeil Fortner <nfortne2@hdfgroup.org>2010-05-20 21:59:01 (GMT)
commit8bd419e0885f859e34c5809cdde3117366f3c7b1 (patch)
treefbabcbafb151b3da100d73de7a5d81dd9ef59f40
parent83f50e5a722f7a58562a0885fef695f74bce7285 (diff)
downloadhdf5-8bd419e0885f859e34c5809cdde3117366f3c7b1.zip
hdf5-8bd419e0885f859e34c5809cdde3117366f3c7b1.tar.gz
hdf5-8bd419e0885f859e34c5809cdde3117366f3c7b1.tar.bz2
[svn-r18869] Purpose: Fix bug in dataset shrinking algorithm
Description: Previously, it was possible for a chunk to be flushed due to chunk operations in the callback from H5B_iterate in H5D_chunk_prune_by_extent. Because flushing the chunk can force it to be reallocated if it is filtered, this can change the contents of the chunk b-tree in the middle of H5B_iterate. Because H5B_iterate uses a locally cached copy of the b-tree, this causes subsequent operations to be passed incorrect data. Rewrote H5D_chunk_prune_by_extent to avoid H5B_iterate entirely. Also fixed a bug in the dataset expand algorithm that could cause extra chunks to be created. Tested: jam, linew, smirom (h5committest)
-rw-r--r--src/H5Dchunk.c503
-rw-r--r--src/H5Dpkg.h3
-rw-r--r--test/set_extent.c138
3 files changed, 382 insertions, 262 deletions
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index 9b9298e..b0cb0b1 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -99,20 +99,13 @@
/* Local Typedefs */
/******************/
-/* Stack of chunks to remove during a "prune" iteration */
-typedef struct H5D_chunk_prune_stack_t {
- H5D_chunk_rec_t rec; /* Chunk record */
- struct H5D_chunk_prune_stack_t *next; /* Next chunk in stack */
-} H5D_chunk_prune_stack_t;
-
/* Callback info for iteration to prune chunks */
typedef struct H5D_chunk_it_ud1_t {
H5D_chunk_common_ud_t common; /* Common info for B-tree user data (must be first) */
const H5D_chk_idx_info_t *idx_info; /* Chunked index info */
const H5D_io_info_t *io_info; /* I/O info for dataset operation */
- const hsize_t *dims; /* New dataset dimensions */
- const hbool_t *shrunk_dims; /* Dimensions which have been shrunk */
- H5D_chunk_prune_stack_t *rm_stack; /* Stack of chunks outside the new dimensions */
+ const hsize_t *space_dim; /* New dataset dimensions */
+ const hbool_t *shrunk_dim; /* Dimensions which have been shrunk */
H5S_t *chunk_space; /* Dataspace for a chunk */
uint32_t elmts_per_chunk;/* Elements in chunk */
hsize_t *hyper_start; /* Starting location of hyperslab */
@@ -270,9 +263,6 @@ H5FL_DEFINE(H5D_chunk_info_t);
/* Declare a free list to manage the chunk sequence information */
H5FL_BLK_DEFINE_STATIC(chunk);
-/* Declare a free list to manage H5D_chunk_sl_ck_t objects */
-H5FL_DEFINE_STATIC(H5D_chunk_prune_stack_t);
-
/*-------------------------------------------------------------------------
@@ -2326,7 +2316,7 @@ H5D_chunk_flush_entry(const H5D_t *dset, hid_t dxpl_id, const H5D_dxpl_cache_t *
HDassert(!ent->locked);
buf = ent->chunk;
- if(ent->dirty) {
+ if(ent->dirty && !ent->deleted) {
H5D_chunk_ud_t udata; /* pass through B-tree */
hbool_t must_insert = FALSE; /* Whether the chunk must go through the "insert" method */
@@ -2710,6 +2700,9 @@ H5D_chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata,
*/
rdcc->stats.nhits++;
+ /* Still save the chunk address so the cache stays consistent */
+ chunk_addr = udata->addr;
+
if(NULL == (chunk = H5D_chunk_alloc(chunk_size, pline)))
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed for raw data chunk")
@@ -2809,6 +2802,7 @@ H5D_chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata,
ent->locked = 0;
ent->dirty = FALSE;
+ ent->deleted = FALSE;
ent->chunk_addr = chunk_addr;
for(u = 0; u < layout->u.chunk.ndims; u++)
ent->offset[u] = io_info->store->chunk.offset[u];
@@ -3258,7 +3252,7 @@ H5D_chunk_allocate(H5D_t *dset, hid_t dxpl_id, hbool_t full_overwrite,
/* Check if allocation along this dimension is really necessary */
if(min_unalloc[op_dim] > max_unalloc[op_dim])
- carry = TRUE;
+ continue;
else {
/* Reset the chunk offset indices */
HDmemset(chunk_offset, 0, (layout->u.chunk.ndims * sizeof(chunk_offset[0])));
@@ -3275,6 +3269,18 @@ H5D_chunk_allocate(H5D_t *dset, hid_t dxpl_id, hbool_t full_overwrite,
if(H5D_chunk_get_info(dset, dxpl_id, chunk_offset, &udata) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address")
HDassert(!H5F_addr_defined(udata.addr));
+
+ /* Make sure the chunk is really in the dataset and outside the
+ * original dimensions */
+ {
+ hbool_t outside_orig = FALSE;
+ for(i=0; i<space_ndims; i++) {
+ HDassert(chunk_offset[i] < space_dim[i]);
+ if(chunk_offset[i] >= old_dim[i])
+ outside_orig = TRUE;
+ } /* end for */
+ HDassert(outside_orig);
+ } /* end block */
#endif /* NDEBUG */
/* Check for VL datatype & non-default fill value */
@@ -3418,12 +3424,13 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata)
+H5D_chunk_prune_fill(H5D_chunk_it_ud1_t *udata)
{
const H5D_io_info_t *io_info = udata->io_info; /* Local pointer to I/O info */
H5D_t *dset = io_info->dset; /* Local pointer to the dataset info */
const H5O_layout_t *layout = &(dset->shared->layout); /* Dataset's layout */
unsigned rank = udata->common.layout->ndims - 1; /* Dataset rank */
+ const hsize_t *chunk_offset = io_info->store->chunk.offset; /* Chunk offset */
H5S_sel_iter_t chunk_iter; /* Memory selection iteration info */
hssize_t sel_nelmts; /* Number of elements in selection */
hsize_t count[H5O_LAYOUT_NDIMS]; /* Element count of hyperslab */
@@ -3436,6 +3443,17 @@ H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata
FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_prune_fill)
+ /* Get the info for the chunk in the file */
+ if(H5D_chunk_get_info(dset, io_info->dxpl_id, chunk_offset, &chk_udata) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address")
+
+ /* If this chunk does not exist in cache or on disk, no need to do anything
+ */
+ if(!H5F_addr_defined(chk_udata.addr)
+ && !H5D_chunk_in_cache(dset, chunk_offset,
+ io_info->store->chunk.index))
+ HGOTO_DONE(SUCCEED)
+
/* Initialize the fill value buffer, if necessary */
if(!udata->fb_info_init) {
H5_CHECK_OVERFLOW(udata->elmts_per_chunk, uint32_t, size_t);
@@ -3449,7 +3467,8 @@ H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata
/* Compute the # of elements to leave with existing value, in each dimension */
for(u = 0; u < rank; u++) {
- count[u] = MIN(layout->u.chunk.dim[u], (udata->dims[u] - chunk_rec->offset[u]));
+ count[u] = MIN(layout->u.chunk.dim[u], (udata->space_dim[u]
+ - chunk_offset[u]));
HDassert(count[u] > 0);
} /* end for */
@@ -3461,20 +3480,9 @@ H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata
if(H5S_select_hyperslab(udata->chunk_space, H5S_SELECT_NOTB, udata->hyper_start, NULL, count, NULL) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTSELECT, FAIL, "unable to select hyperslab")
- /* Calculate the index of this chunk */
- if(H5V_chunk_index(rank, chunk_rec->offset, layout->u.chunk.dim, layout->u.chunk.down_chunks, &io_info->store->chunk.index) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't get chunk index")
-
/* Lock the chunk into the cache, to get a pointer to the chunk buffer */
- /* (Casting away const OK -QAK) */
- io_info->store->chunk.offset = (hsize_t *)chunk_rec->offset;
- chk_udata.common.layout = &layout->u.chunk;
- chk_udata.common.storage = &layout->storage.u.chunk;
- chk_udata.common.offset = chunk_rec->offset;
- chk_udata.nbytes = chunk_rec->nbytes;
- chk_udata.filter_mask = chunk_rec->filter_mask;
- chk_udata.addr = chunk_rec->chunk_addr;
- if(NULL == (chunk = (void *)H5D_chunk_lock(udata->io_info, &chk_udata, FALSE, &idx_hint)))
+ if(NULL == (chunk = (void *)H5D_chunk_lock(io_info, &chk_udata, FALSE,
+ &idx_hint)))
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "unable to lock raw data chunk")
@@ -3524,82 +3532,6 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5D_chunk_prune_cb
- *
- * Purpose: Search for chunks that are no longer inside the pruned
- * dataset's extent
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu
- * March 26, 2002
- *
- *-------------------------------------------------------------------------
- */
-/* ARGSUSED */
-static int
-H5D_chunk_prune_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata)
-{
- H5D_chunk_it_ud1_t *udata = (H5D_chunk_it_ud1_t *)_udata; /* User data */
- H5D_chunk_prune_stack_t *stack_node = NULL; /* Stack node for chunk to remove */
- unsigned rank; /* Current # of dimensions */
- hbool_t should_delete = FALSE; /* Whether the chunk should be deleted */
- hbool_t needs_fill = FALSE; /* Whether the chunk overlaps the new extent and needs fill valiues */
- unsigned u; /* Local index variable */
- int ret_value = H5_ITER_CONT; /* Return value */
-
- FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_prune_cb)
-
- /* Figure out what chunks are no longer in use for the specified extent and release them */
- rank = udata->common.layout->ndims - 1;
- for(u = 0; u < rank; u++)
- /* The chunk record points to a chunk of storage that contains the
- * beginning of the logical address space represented by UDATA.
- */
- if(udata->shrunk_dims[u]) {
- if(chunk_rec->offset[u] >= udata->dims[u]) {
- /* Indicate that the chunk will be deleted */
- should_delete = TRUE;
-
- /* Break out of loop, we know the chunk is outside the current dimensions */
- break;
- } /* end if */
- /* Check for chunk that overlaps new extent and will need fill values */
- else if((chunk_rec->offset[u] + udata->common.layout->dim[u]) > udata->dims[u])
- /* Indicate that the chunk needs filling */
- /* (but continue in loop, since it could be outside the extent in
- * another dimension -QAK)
- */
- needs_fill = TRUE;
- } /* end if */
-
- /* Check for chunk to delete */
- if(should_delete) {
- /* Allocate space for the removal stack node */
- if(NULL == (stack_node = H5FL_MALLOC(H5D_chunk_prune_stack_t)))
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, H5_ITER_ERROR, "memory allocation failed for removal stack node")
-
- /* Store the record for the chunk */
- stack_node->rec = *chunk_rec;
-
- /* Push the chunk description onto the stack */
- stack_node->next = udata->rm_stack;
- udata->rm_stack = stack_node;
- } /* end if */
- /* Check for chunk that overlaps the new dataset dimensions and needs filling */
- else if(needs_fill)
- /* Write the fill value */
- if(H5D_chunk_prune_fill(chunk_rec, udata) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, H5_ITER_ERROR, "unable to write fill value")
-
-done:
- /* It is currently impossible to fail after the stack node has been
- * malloc'ed. No need to free it here on failure. */
- FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D_chunk_prune_cb() */
-
-
-/*-------------------------------------------------------------------------
* Function: H5D_chunk_prune_by_extent
*
* Purpose: This function searches for chunks that are no longer necessary
@@ -3696,11 +3628,26 @@ done:
* To release the chunks, we traverse the B-tree to obtain a list of unused
* allocated chunks, and then call H5B_remove() for each chunk.
*
+ * Modifications: Neil Fortner
+ * 4 May 2010
+ * Rewrote algorithm to work in a way similar to
+ * H5D_chunk_allocate: it now iterates over all chunks that need
+ * to be filled or removed, and does so as appropriate. This
+ * avoids various issues with coherency of locally cached data
+ * which could occur with the previous implementation.
+ *
*-------------------------------------------------------------------------
*/
herr_t
-H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims)
+H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dim)
{
+ hsize_t min_mod_chunk_off[H5O_LAYOUT_NDIMS]; /* Offset of first chunk to modify in each dimension */
+ hsize_t max_mod_chunk_off[H5O_LAYOUT_NDIMS]; /* Offset of last chunk to modify in each dimension */
+ hssize_t max_fill_chunk_off[H5O_LAYOUT_NDIMS]; /* Offset of last chunk that might be filled in each dimension */
+ hbool_t fill_dim[H5O_LAYOUT_NDIMS]; /* Whether the plane of edge chunks in this dimension needs to be filled */
+ hbool_t dims_outside_fill[H5O_LAYOUT_NDIMS]; /* Dimensions in chunk offset outside fill dimensions */
+ int ndims_outside_fill = 0; /* Number of dimensions in chunk offset outside fill dimensions */
+ hbool_t has_fill = FALSE; /* Whether there are chunks that must be filled */
H5D_chk_idx_info_t idx_info; /* Chunked index info */
H5D_io_info_t chk_io_info; /* Chunked I/O info object */
H5D_storage_t chk_store; /* Chunk storage information */
@@ -3708,21 +3655,24 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims)
H5D_dxpl_cache_t *dxpl_cache = &_dxpl_cache; /* Data transfer property cache */
const H5O_layout_t *layout = &(dset->shared->layout); /* Dataset's layout */
const H5D_rdcc_t *rdcc = &(dset->shared->cache.chunk); /*raw data chunk cache */
- H5D_rdcc_ent_t *ent = NULL, *next = NULL; /* Cache entries */
- hsize_t curr_dims[H5O_LAYOUT_NDIMS]; /* Current dataspace dimensions */
- hbool_t shrunk_dims[H5O_LAYOUT_NDIMS]; /* Dimensions which have shrunk */
+ H5D_rdcc_ent_t *ent = NULL; /* Cache entry */
+ unsigned idx = 0; /* Hash index number */
+ int space_ndims; /* Dataset's space rank */
+ hsize_t space_dim[H5O_LAYOUT_NDIMS]; /* Current dataspace dimensions */
+ int op_dim; /* Current operationg dimension */
+ hbool_t shrunk_dim[H5O_LAYOUT_NDIMS]; /* Dimensions which have shrunk */
H5D_chunk_it_ud1_t udata; /* Chunk index iterator user data */
hbool_t udata_init = FALSE; /* Whether the chunk index iterator user data has been initialized */
- hbool_t needs_fill; /* Whether we need to write the fill value */
- H5D_chunk_prune_stack_t *fill_stack = NULL; /* Stack of chunks to fill */
- H5D_chunk_prune_stack_t *tmp_stack; /* Temporary stack node pointer */
H5D_chunk_common_ud_t idx_udata; /* User data for index removal routine */
+ H5D_chunk_ud_t chk_udata; /* User data for getting chunk info */
H5S_t *chunk_space = NULL; /* Dataspace for a chunk */
- hsize_t chunk_dims[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */
+ hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */
+ hsize_t chunk_offset[H5O_LAYOUT_NDIMS]; /* Offset of current chunk */
hsize_t hyper_start[H5O_LAYOUT_NDIMS]; /* Starting location of hyperslab */
uint32_t elmts_per_chunk; /* Elements in chunk */
- unsigned rank; /* Current # of dimensions */
- unsigned u; /* Local index variable */
+ hbool_t chk_on_disk; /* Whether a chunk exists on disk */
+ hbool_t carry; /* Flag to indicate that chunk increment carrys to higher dimension (sorta) */
+ int i; /* Local index variable */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(H5D_chunk_prune_by_extent, FAIL)
@@ -3732,19 +3682,26 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims)
HDassert(layout->u.chunk.ndims > 0 && layout->u.chunk.ndims <= H5O_LAYOUT_NDIMS);
HDassert(dxpl_cache);
- /* set the removal stack pointer in udata to NULL, so if the function fails
- * early it will not try to free the nonexistent stack */
- udata.rm_stack = NULL;
-
/* Fill the DXPL cache values for later use */
if(H5D_get_dxpl_cache(dxpl_id, &dxpl_cache) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache")
/* Go get the rank & dimensions (including the element size) */
- rank = layout->u.chunk.ndims - 1;
- if(H5S_get_simple_extent_dims(dset->shared->space, curr_dims, NULL) < 0)
+ if((space_ndims = H5S_get_simple_extent_dims(dset->shared->space, space_dim,
+ NULL)) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get dataset dimensions")
- curr_dims[rank] = layout->u.chunk.dim[rank];
+ space_dim[space_ndims] = layout->u.chunk.dim[space_ndims];
+
+ /* The last dimension in chunk_offset is always 0 */
+ chunk_offset[space_ndims] = (hsize_t)0;
+
+ /* Check if any old dimensions are 0, if so we do not have to do anything */
+ for(op_dim=0; op_dim<space_ndims; op_dim++)
+ if(old_dim[op_dim] == 0) {
+ /* Reset any cached chunk info for this dataset */
+ H5D_chunk_cinfo_cache_reset(&dset->shared->cache.chunk.last);
+ HGOTO_DONE(SUCCEED)
+ } /* end if */
/* Round up to the next integer # of chunks, to accomodate partial chunks */
/* Use current dims because the indices have already been updated! -NAF */
@@ -3752,22 +3709,25 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims)
/* (also copy the chunk dimensions into 'hsize_t' array for creating dataspace) */
/* (also compute the dimensions which have been shrunk) */
elmts_per_chunk = 1;
- for(u = 0; u < rank; u++) {
- elmts_per_chunk *= layout->u.chunk.dim[u];
- chunk_dims[u] = layout->u.chunk.dim[u];
- shrunk_dims[u] = curr_dims[u] < old_dims[u];
+ for(i = 0; i < space_ndims; i++) {
+ elmts_per_chunk *= layout->u.chunk.dim[i];
+ chunk_dim[i] = layout->u.chunk.dim[i];
+ shrunk_dim[i] = space_dim[i] < old_dim[i];
} /* end for */
/* Create a dataspace for a chunk & set the extent */
- if(NULL == (chunk_space = H5S_create_simple(rank, chunk_dims, NULL)))
+ if(NULL == (chunk_space = H5S_create_simple((unsigned)space_ndims,
+ chunk_dim, NULL)))
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCREATE, FAIL, "can't create simple dataspace")
/* Reset hyperslab start array */
/* (hyperslabs will always start from origin) */
HDmemset(hyper_start, 0, sizeof(hyper_start));
- /* Set up chunked I/O info object, for operations on chunks (in callback) */
- /* (Casting away const OK -QAK) */
+ /* Set up chunked I/O info object, for operations on chunks (in callback)
+ * Note that we only need to set chunk_offset once, as the array's address
+ * will never change. */
+ chk_store.chunk.offset = chunk_offset;
H5D_BUILD_IO_INFO_RD(&chk_io_info, dset, dxpl_cache, dxpl_id, &chk_store, NULL);
/* Compose chunked index info struct */
@@ -3783,91 +3743,226 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims)
udata.common.storage = &layout->storage.u.chunk;
udata.io_info = &chk_io_info;
udata.idx_info = &idx_info;
- udata.dims = curr_dims;
- udata.shrunk_dims = shrunk_dims;
+ udata.space_dim = space_dim;
+ udata.shrunk_dim = shrunk_dim;
udata.elmts_per_chunk = elmts_per_chunk;
udata.chunk_space = chunk_space;
udata.hyper_start = hyper_start;
udata_init = TRUE;
- /*-------------------------------------------------------------------------
- * Figure out what chunks are no longer in use for the specified extent
- * and release them from the linked list raw data cache
- *-------------------------------------------------------------------------
+ /* Initialize user data for removal */
+ idx_udata.layout = &layout->u.chunk;
+ idx_udata.storage = &layout->storage.u.chunk;
+
+ /*
+ * Determine the chunks which need to be filled or removed
*/
- for(ent = rdcc->head; ent; ent = next) {
- /* Get pointer to next extry in cache, in case this one is evicted */
- next = ent->next;
+ for(op_dim=0; op_dim<space_ndims; op_dim++) {
+ /* Calculate the largest offset of chunks that might need to be
+ * modified in this dimension */
+ max_mod_chunk_off[op_dim] = chunk_dim[op_dim] * ((old_dim[op_dim] - 1)
+ / chunk_dim[op_dim]);
+
+ /* Calculate the largest offset of chunks that might need to be
+ * filled in this dimension */
+ if(0 == space_dim[op_dim])
+ max_fill_chunk_off[op_dim] = -1;
+ else
+ max_fill_chunk_off[op_dim] = (hssize_t)(chunk_dim[op_dim]
+ * ((MIN(space_dim[op_dim], old_dim[op_dim]) - 1)
+ / chunk_dim[op_dim]));
+
+ if(shrunk_dim[op_dim]) {
+ /* Calculate the smallest offset of chunks that might need to be
+ * modified in this dimension. Note that this array contains
+ * garbage for all dimensions which are not shrunk. These locations
+ * must not be read from! */
+ min_mod_chunk_off[op_dim] = chunk_dim[op_dim] * (space_dim[op_dim]
+ / chunk_dim[op_dim]);
+
+ /* Determine if we need to fill chunks in this dimension */
+ if((hssize_t)min_mod_chunk_off[op_dim]
+ == max_fill_chunk_off[op_dim]) {
+ fill_dim[op_dim] = TRUE;
+ has_fill = TRUE;
+ } /* end if */
+ else
+ fill_dim[op_dim] = FALSE;
+ } /* end if */
+ else
+ fill_dim[op_dim] = FALSE;
+ } /* end for */
- needs_fill = FALSE;
+ /* Check the cache for any entries that are outside the bounds. Mark these
+ * entries as deleted so they are not flushed to disk accidentally. This is
+ * only necessary if there are chunks that need to be filled. */
+ if(has_fill)
+ for(ent = rdcc->head; ent; ent = ent->next)
+ /* Check for chunk offset outside of new dimensions */
+ for(i = 0; i<space_ndims; i++)
+ if((hsize_t)ent->offset[i] >= space_dim[i]) {
+ /* Mark the entry as "deleted" */
+ ent->deleted = TRUE;
+ break;
+ } /* end if */
- /* Check for chunk offset outside of new dimensions */
- for(u = 0; u < rank; u++) {
- if((hsize_t)ent->offset[u] >= curr_dims[u]) {
- /* Evict the entry from the cache, but do not flush it to disk */
- if(H5D_chunk_cache_evict(dset, dxpl_id, dxpl_cache, ent, FALSE) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTREMOVE, FAIL, "unable to evict chunk")
+ /* Main loop: fill or remove chunks */
+ for(op_dim=0; op_dim<space_ndims; op_dim++) {
+ /* Check if modification along this dimension is really necessary */
+ if(!shrunk_dim[op_dim])
+ continue;
+ else {
+ HDassert((hsize_t) max_mod_chunk_off[op_dim]
+ >= min_mod_chunk_off[op_dim]);
- /* We don't need to write the fill value */
- needs_fill = FALSE;
+ /* Reset the chunk offset indices */
+ HDmemset(chunk_offset, 0, ((unsigned)space_ndims
+ * sizeof(chunk_offset[0])));
+ chunk_offset[op_dim] = min_mod_chunk_off[op_dim];
+
+ /* Initialize "dims_outside_fill" array */
+ ndims_outside_fill = 0;
+ for(i=0; i<space_ndims; i++)
+ if((hssize_t)chunk_offset[i] > max_fill_chunk_off[i]) {
+ dims_outside_fill[i] = TRUE;
+ ndims_outside_fill++;
+ } /* end if */
+ else
+ dims_outside_fill[i] = FALSE;
- /* Break out of loop, chunk is evicted */
- break;
- } else if(!H5F_addr_defined(ent->chunk_addr) && shrunk_dims[u]
- && (ent->offset[u] + chunk_dims[u]) > curr_dims[u])
- /* We need to write the fill value to the unused parts of chunk */
- needs_fill = TRUE;
- } /* end for */
+ carry = FALSE;
+ } /* end if */
- if(needs_fill) {
- /* Allocate space for the stack node */
- if(NULL == (tmp_stack = H5FL_MALLOC(H5D_chunk_prune_stack_t)))
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for stack node")
+ while(!carry) {
+ /* Calculate the index of this chunk */
+ if(H5V_chunk_index((unsigned)space_ndims, chunk_offset,
+ layout->u.chunk.dim, layout->u.chunk.down_chunks,
+ &(chk_io_info.store->chunk.index)) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't get chunk index")
+
+ if(0 == ndims_outside_fill) {
+ HDassert(fill_dim[op_dim]);
+ HDassert(chunk_offset[op_dim] == min_mod_chunk_off[op_dim]);
+
+ /* Fill the unused parts of the chunk */
+ if(H5D_chunk_prune_fill(&udata) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to write fill value")
+ } /* end if */
+ else {
+ chk_on_disk = FALSE;
- /* Set up chunk record for fill routine */
- tmp_stack->rec.nbytes = dset->shared->layout.u.chunk.size;
- HDmemcpy(tmp_stack->rec.offset, ent->offset, sizeof(tmp_stack->rec.offset));
- tmp_stack->rec.filter_mask = 0; /* Since the chunk is already in cache this doesn't matter */
- tmp_stack->rec.chunk_addr = ent->chunk_addr;
+#ifndef NDEBUG
+ /* Make sure this chunk is really outside the new dimensions */
+ {
+ hbool_t outside_dim = FALSE;
+
+ for(i=0; i<space_ndims; i++)
+ if(chunk_offset[i] >= space_dim[i]){
+ outside_dim = TRUE;
+ break;
+ } /* end if */
+ HDassert(outside_dim);
+ } /* end block */
+#endif /* NDEBUG */
- /* Push the chunk description onto the stack */
- tmp_stack->next = fill_stack;
- fill_stack = tmp_stack;
- } /* end if */
- } /* end for */
+ /* Search for the chunk in the cache */
+ if(rdcc->nslots > 0) {
+ idx = H5D_CHUNK_HASH(dset->shared,
+ chk_io_info.store->chunk.index);
+ ent = rdcc->slot[idx];
+
+ if(ent)
+ for(i=0; i<space_ndims; i++)
+ if(chunk_offset[i]
+ != ent->offset[i]) {
+ ent = NULL;
+ break;
+ } /* end if */
+ } /* end if */
- /* Traverse the stack of chunks to be filled, filling each. We will free
- * the nodes later in the "done" section. */
- tmp_stack = fill_stack;
- while(tmp_stack) {
- /* Write the fill value */
- if(H5D_chunk_prune_fill(&(tmp_stack->rec), &udata) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to write fill value")
+ /* Evict the entry from the cache, but do not flush it to disk
+ */
+ if(ent) {
+ /* Determine if the chunk is allocated on disk, and
+ * therefore needs to be removed from disk */
+ chk_on_disk = H5F_addr_defined(ent->chunk_addr);
- /* Advance the stack pointer */
- tmp_stack = tmp_stack->next;
- } /* end while */
+ /* Remove the chunk from cache */
+ if(H5D_chunk_cache_evict(dset, dxpl_id, dxpl_cache, ent,
+ FALSE) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTREMOVE, FAIL, "unable to evict chunk")
- /* Iterate over the chunks */
- if((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D_chunk_prune_cb, &udata) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve prune chunks from index")
+ ent = NULL;
+ } /* end if */
+ else {
+ /* Determine if the chunk is allocated on disk, and
+ * therefore needs to be removed from disk */
+ /* Get the info for the chunk in the file */
+ if(H5D_chunk_get_info(dset, dxpl_id, chunk_offset,
+ &chk_udata) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address")
+
+ chk_on_disk = H5F_addr_defined(chk_udata.addr);
+ } /* end else */
- /* Traverse the stack of chunks to be deleted, removing each. We will free
- * the nodes later in the "done" section. */
- idx_udata.layout = &layout->u.chunk;
- idx_udata.storage = &layout->storage.u.chunk;
- tmp_stack = udata.rm_stack;
- while(tmp_stack) {
- /* Update the offset in idx_udata */
- idx_udata.offset = tmp_stack->rec.offset;
+ /* Remove the chunk from disk, if present */
+ if(chk_on_disk) {
+ /* Update the offset in idx_udata */
+ idx_udata.offset = chunk_offset;
- /* Remove the chunk from disk */
- if((layout->storage.u.chunk.ops->remove)(&idx_info, &idx_udata) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTDELETE, FAIL, "unable to remove chunk entry from index")
+ /* Remove the chunk from disk */
+ if((layout->storage.u.chunk.ops->remove)(&idx_info, &idx_udata)
+ < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTDELETE, FAIL, "unable to remove chunk entry from index")
+ } /* end if */
+ } /* end else */
- /* Advance the stack pointer */
- tmp_stack = tmp_stack->next;
- } /* end while */
+ /* Increment indices */
+ carry = TRUE;
+ for(i = (int)(space_ndims - 1); i >= 0; --i) {
+ chunk_offset[i] += chunk_dim[i];
+ if(chunk_offset[i] > (hsize_t) max_mod_chunk_off[i]) {
+ /* Left maximum dimensions, "wrap around" and check if this
+ * dimension is no longer outside the fill dimension */
+ if(i == op_dim) {
+ chunk_offset[i] = min_mod_chunk_off[i];
+ if(dims_outside_fill[i] && fill_dim[i]) {
+ dims_outside_fill[i] = FALSE;
+ ndims_outside_fill--;
+ } /* end if */
+ } /* end if */
+ else {
+ chunk_offset[i] = 0;
+ if(dims_outside_fill[i] && max_fill_chunk_off[i] >= 0) {
+ dims_outside_fill[i] = FALSE;
+ ndims_outside_fill--;
+ } /* end if */
+ } /* end else */
+ } /* end if */
+ else {
+ /* Check if we just went outside the fill dimension */
+ if(!dims_outside_fill[i] && (hssize_t)chunk_offset[i]
+ > max_fill_chunk_off[i]) {
+ dims_outside_fill[i] = TRUE;
+ ndims_outside_fill++;
+ } /* end if */
+
+ /* We found the next chunk, so leave the loop */
+ carry = FALSE;
+ break;
+ } /* end else */
+ } /* end for */
+ } /* end while(!carry) */
+
+ /* Adjust max_mod_chunk_off so we don't modify the same chunk twice.
+ * Also check if this dimension started from 0 (and hence removed all
+ * of the chunks). */
+ if(min_mod_chunk_off[op_dim] == 0)
+ break;
+ else
+ max_mod_chunk_off[op_dim] = min_mod_chunk_off[op_dim]
+ - chunk_dim[op_dim];
+ } /* end for(op_dim=0...) */
/* Reset any cached chunk info for this dataset */
H5D_chunk_cinfo_cache_reset(&dset->shared->cache.chunk.last);
@@ -3881,24 +3976,6 @@ done:
HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info")
} /* end if */
- /* Free stack of filled chunks */
- tmp_stack = fill_stack;
- while(tmp_stack) {
- /* Free the stack node and advance the stack pointer */
- tmp_stack = tmp_stack->next;
- fill_stack = H5FL_FREE(H5D_chunk_prune_stack_t, fill_stack);
- fill_stack = tmp_stack;
- } /* end while */
-
- /* Free stack of removed chunks */
- tmp_stack = udata.rm_stack;
- while(tmp_stack) {
- /* Free the stack node and advance the stack pointer */
- tmp_stack = tmp_stack->next;
- udata.rm_stack = H5FL_FREE(H5D_chunk_prune_stack_t, udata.rm_stack);
- udata.rm_stack = tmp_stack;
- } /* end while */
-
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D_chunk_prune_by_extent() */
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index dbd495c..bb8fd71 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -488,6 +488,7 @@ typedef struct {
typedef struct H5D_rdcc_ent_t {
hbool_t locked; /*entry is locked in cache */
hbool_t dirty; /*needs to be written to disk? */
+ hbool_t deleted; /*chunk about to be deleted (do not flush) */
hsize_t offset[H5O_LAYOUT_NDIMS]; /*chunk name */
uint32_t rd_count; /*bytes remaining to be read */
uint32_t wr_count; /*bytes remaining to be written */
@@ -612,7 +613,7 @@ H5_DLL herr_t H5D_chunk_allocated(H5D_t *dset, hid_t dxpl_id, hsize_t *nbytes);
H5_DLL herr_t H5D_chunk_allocate(H5D_t *dset, hid_t dxpl_id,
hbool_t full_overwrite, hsize_t old_dim[]);
H5_DLL herr_t H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id,
- const hsize_t *old_dims);
+ const hsize_t *old_dim);
#ifdef H5_HAVE_PARALLEL
H5_DLL herr_t H5D_chunk_addrmap(const H5D_io_info_t *io_info, haddr_t chunk_addr[]);
#endif /* H5_HAVE_PARALLEL */
diff --git a/test/set_extent.c b/test/set_extent.c
index 9d669a2..172c0ee 100644
--- a/test/set_extent.c
+++ b/test/set_extent.c
@@ -76,6 +76,7 @@ const char *FILENAME[] = {
#define DIME2 7
#define ISTORE_IK 64
#define RAND4_NITER 100
+#define RAND4_SPARSE_SWITCH 10
#define RAND4_FAIL_DUMP(NDIM_SETS, J, K, L, M) { \
H5_FAILED(); AT(); \
test_random_rank4_dump(NDIM_SETS, dim_log, cdims, J, K, L, M); \
@@ -99,7 +100,8 @@ static int test_rank3( hid_t fapl,
hbool_t set_istore_k);
static int test_random_rank4( hid_t fapl,
hid_t dcpl,
- hbool_t do_fillvalue);
+ hbool_t do_fillvalue,
+ hbool_t do_sparse);
static int test_external( hid_t fapl );
static int test_layouts( H5D_layout_t layout, hid_t fapl );
@@ -128,8 +130,11 @@ int main( void )
/* Copy the file access property list */
if((fapl2 = H5Pcopy(fapl)) < 0) TEST_ERROR
+ /* Set chunk cache so only part of the chunks can be cached on fapl */
+ if(H5Pset_cache(fapl, 0, 8, 256 * sizeof(int), 0.75) < 0) TEST_ERROR
+
/* Disable chunk caching on fapl2 */
- if(H5Pset_cache(fapl2, 521, 0, 0, 0.) < 0) TEST_ERROR
+ if(H5Pset_cache(fapl2, 0, 0, 0, 0.) < 0) TEST_ERROR
/* Set the "use the latest version of the format" bounds for creating objects in the file */
if(H5Pset_libver_bounds(fapl2, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) TEST_ERROR
@@ -320,11 +325,17 @@ static int do_ranks( hid_t fapl )
if(H5Pset_fill_time(dcpl, H5D_FILL_TIME_IFSET) < 0)
TEST_ERROR
- if(test_random_rank4(fapl, dcpl, do_fillvalue) < 0) {
+ if(test_random_rank4(fapl, dcpl, do_fillvalue, FALSE) < 0) {
DO_RANKS_PRINT_CONFIG("Randomized rank 4")
goto error;
} /* end if */
+ if(!(config & CONFIG_EARLY_ALLOC))
+ if(test_random_rank4(fapl, dcpl, do_fillvalue, TRUE) < 0) {
+ DO_RANKS_PRINT_CONFIG("Randomized rank 4 with sparse allocation")
+ goto error;
+ } /* end if */
+
/* Close dcpl */
if(H5Pclose(dcpl) < 0)
TEST_ERROR
@@ -2658,7 +2669,8 @@ error:
*
*-------------------------------------------------------------------------
*/
-static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue )
+static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue,
+ hbool_t do_sparse )
{
hid_t file = -1;
hid_t dset = -1;
@@ -2667,12 +2679,16 @@ static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue )
hid_t my_dcpl = -1;
hsize_t dims[4]; /* Dataset's dimensions */
hsize_t old_dims[4]; /* Old dataset dimensions */
+ hsize_t min_unwritten_dims[4]; /* Minimum dimensions since last write */
+ hsize_t *valid_dims = old_dims; /* Dimensions of region still containing written data */
hsize_t cdims[4]; /* Chunk dimensions */
const hsize_t mdims[4] = {10, 10, 10, 10}; /* Memory buffer dimensions */
const hsize_t start[4] = {0, 0, 0, 0}; /* Start for hyperslabe operations on memory */
static int rbuf[10][10][10][10]; /* Read buffer */
static int wbuf[10][10][10][10]; /* Write buffer */
static hsize_t dim_log[RAND4_NITER+1][4]; /* Log of dataset dimensions */
+ hbool_t zero_dim = FALSE; /* Whether a dimension is 0 */
+ hbool_t writing = TRUE; /* Whether we're writing to the dset */
volatile unsigned i, j, k, l, m; /* Local indices */
char filename[NAME_BUF_SIZE];
@@ -2681,9 +2697,9 @@ static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue )
if ((file = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl)) < 0)
TEST_ERROR
- /* Generate random chunk dimensions, 2-6 */
+ /* Generate random chunk dimensions, 2-4 */
for(i=0; i<4; i++)
- cdims[i] = (hsize_t)((HDrandom() % 5) + 2);
+ cdims[i] = (hsize_t)((HDrandom() % 3) + 2);
/* Generate initial dataset size, 1-10 */
for(i=0; i<4; i++) {
@@ -2713,60 +2729,86 @@ static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue )
/* Main loop */
for(i=0; i<RAND4_NITER; i++) {
- /* Generate random write buffer */
- for(j=0; j<dims[0]; j++)
- for(k=0; k<dims[1]; k++)
- for(l=0; l<dims[2]; l++)
- for(m=0; m<dims[3]; m++)
- wbuf[j][k][l][m] = HDrandom();
-
- /* Write data */
- if(H5Dwrite(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT, wbuf)
- < 0)
- RAND4_FAIL_DUMP(i+1, -1, -1, -1, -1)
-
- /* Generate new dataset size, 1-10 */
+ if(writing && !zero_dim) {
+ /* Generate random write buffer */
+ for(j=0; j<dims[0]; j++)
+ for(k=0; k<dims[1]; k++)
+ for(l=0; l<dims[2]; l++)
+ for(m=0; m<dims[3]; m++)
+ wbuf[j][k][l][m] = HDrandom();
+
+ /* Write data */
+ if(H5Dwrite(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT,
+ wbuf) < 0)
+ RAND4_FAIL_DUMP(i+1, -1, -1, -1, -1)
+ } /* end if */
+
+ /* Generate new dataset size, 0-10 (0 much less likely) */
+ zero_dim = FALSE;
for(j=0; j<4; j++) {
old_dims[j] = dims[j];
- dims[j] = (hsize_t)((HDrandom() % 10) + 1);
+ if((dims[j] = (hsize_t)(HDrandom() % 11)) == 0)
+ if((dims[j] = (hsize_t)(HDrandom() % 11)) == 0)
+ zero_dim = TRUE;
dim_log[i+1][j] = dims[j];
} /* end for */
+ /* If writing is disabled, update min_unwritten_dims */
+ if(!writing)
+ for(j=0; j<4; j++)
+ if(old_dims[j] < min_unwritten_dims[j])
+ min_unwritten_dims[j] = old_dims[j];
+
/* Resize dataset */
if(H5Dset_extent(dset, dims) < 0)
RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1)
- /* Read data from resized dataset */
- if(H5Sselect_hyperslab(mspace, H5S_SELECT_SET, start, NULL, dims, NULL)
- < 0)
- RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1)
- if(H5Dread(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT, rbuf)
- < 0)
- RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1)
-
- /* Verify correctness of read data */
- if(do_fillvalue) {
- for(j=0; j<dims[0]; j++)
- for(k=0; k<dims[1]; k++)
- for(l=0; l<dims[2]; l++)
- for(m=0; m<dims[3]; m++)
- if(j >= old_dims[0] || k >= old_dims[1]
- || l >= old_dims[2] || m >= old_dims[3]) {
- if(FILL_VALUE != rbuf[j][k][l][m])
- RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m)
- } /* end if */
- else
+ if(!zero_dim) {
+ /* Read data from resized dataset */
+ if(H5Sselect_hyperslab(mspace, H5S_SELECT_SET, start, NULL, dims,
+ NULL) < 0)
+ RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1)
+ if(H5Dread(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT, rbuf)
+ < 0)
+ RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1)
+
+ /* Verify correctness of read data */
+ if(do_fillvalue) {
+ for(j=0; j<dims[0]; j++)
+ for(k=0; k<dims[1]; k++)
+ for(l=0; l<dims[2]; l++)
+ for(m=0; m<dims[3]; m++)
+ if(j >= valid_dims[0] || k >= valid_dims[1]
+ || l >= valid_dims[2]
+ || m >= valid_dims[3]) {
+ if(FILL_VALUE != rbuf[j][k][l][m])
+ RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m)
+ } /* end if */
+ else
+ if(wbuf[j][k][l][m] != rbuf[j][k][l][m])
+ RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m)
+ } /* end if */
+ else {
+ for(j=0; j<MIN(dims[0],valid_dims[0]); j++)
+ for(k=0; k<MIN(dims[1],valid_dims[1]); k++)
+ for(l=0; l<MIN(dims[2],valid_dims[2]); l++)
+ for(m=0; m<MIN(dims[3],valid_dims[3]); m++)
if(wbuf[j][k][l][m] != rbuf[j][k][l][m])
RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m)
+ } /* end else */
+ } /* end if */
+
+ /* Handle the switch between writing and not writing */
+ if(do_sparse && !(i % RAND4_SPARSE_SWITCH)) {
+ writing = !writing;
+ if(!writing) {
+ for(j=0; j<4; j++)
+ min_unwritten_dims[j] = old_dims[j];
+ valid_dims = min_unwritten_dims;
+ } /* end if */
+ else
+ valid_dims = old_dims;
} /* end if */
- else {
- for(j=0; j<MIN(dims[0],old_dims[0]); j++)
- for(k=0; k<MIN(dims[1],old_dims[1]); k++)
- for(l=0; l<MIN(dims[2],old_dims[2]); l++)
- for(m=0; m<MIN(dims[3],old_dims[3]); m++)
- if(wbuf[j][k][l][m] != rbuf[j][k][l][m])
- RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m)
- } /* end else */
} /* end for */
/* Close */