diff options
author | Neil Fortner <nfortne2@hdfgroup.org> | 2010-05-20 21:55:55 (GMT) |
---|---|---|
committer | Neil Fortner <nfortne2@hdfgroup.org> | 2010-05-20 21:55:55 (GMT) |
commit | 2db0e8ecf33c397b39f9ab9327f5326e9b91fb54 (patch) | |
tree | 1e3680a95718f2e8b80e03107bf4ebf7465f2281 | |
parent | 00aa7f1d8a39d900e90e1f68a58c7bce7eb3b113 (diff) | |
download | hdf5-2db0e8ecf33c397b39f9ab9327f5326e9b91fb54.zip hdf5-2db0e8ecf33c397b39f9ab9327f5326e9b91fb54.tar.gz hdf5-2db0e8ecf33c397b39f9ab9327f5326e9b91fb54.tar.bz2 |
[svn-r18868] Purpose: Fix bug in dataset shrinking algorithm
Description:
Previously, it was possible for a chunk to be flushed due to chunk operations in
the callback from H5B_iterate in H5D_chunk_prune_by_extent. Because flushing
the chunk can force it to be reallocated if it is filtered, this can change the
contents of the chunk b-tree in the middle of H5B_iterate. Because H5B_iterate
uses a locally cached copy of the b-tree, this causes subsequent operations
to be passed incorrect data. Rewrote H5D_chunk_prune_by_extent to avoid
H5B_iterate entirely.
Also fixed a bug in the dataset expand algorithm that could cause extra chunks
to be created.
Tested: jam, linew, smirom (h5committest)
-rw-r--r-- | release_docs/RELEASE.txt | 2 | ||||
-rw-r--r-- | src/H5Dchunk.c | 503 | ||||
-rw-r--r-- | src/H5Dpkg.h | 3 | ||||
-rw-r--r-- | test/set_extent.c | 138 |
4 files changed, 384 insertions, 262 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 12f5e32..13807dc 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -225,6 +225,8 @@ Bug Fixes since HDF5-1.8.0 release Library ------- + - Fixed a file corruption bug that could happen when shrinking a + compressed dataset. (NAF - 2010/05/20) - Fixed some memory leaks in VL datatype conversion when strings are used as fill values. (MAM - 2010/05/12 - BZ# 1826) - Fixed a bug when copying objects with NULL references with the diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 9b9298e..b0cb0b1 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -99,20 +99,13 @@ /* Local Typedefs */ /******************/ -/* Stack of chunks to remove during a "prune" iteration */ -typedef struct H5D_chunk_prune_stack_t { - H5D_chunk_rec_t rec; /* Chunk record */ - struct H5D_chunk_prune_stack_t *next; /* Next chunk in stack */ -} H5D_chunk_prune_stack_t; - /* Callback info for iteration to prune chunks */ typedef struct H5D_chunk_it_ud1_t { H5D_chunk_common_ud_t common; /* Common info for B-tree user data (must be first) */ const H5D_chk_idx_info_t *idx_info; /* Chunked index info */ const H5D_io_info_t *io_info; /* I/O info for dataset operation */ - const hsize_t *dims; /* New dataset dimensions */ - const hbool_t *shrunk_dims; /* Dimensions which have been shrunk */ - H5D_chunk_prune_stack_t *rm_stack; /* Stack of chunks outside the new dimensions */ + const hsize_t *space_dim; /* New dataset dimensions */ + const hbool_t *shrunk_dim; /* Dimensions which have been shrunk */ H5S_t *chunk_space; /* Dataspace for a chunk */ uint32_t elmts_per_chunk;/* Elements in chunk */ hsize_t *hyper_start; /* Starting location of hyperslab */ @@ -270,9 +263,6 @@ H5FL_DEFINE(H5D_chunk_info_t); /* Declare a free list to manage the chunk sequence information */ H5FL_BLK_DEFINE_STATIC(chunk); -/* Declare a free list to manage H5D_chunk_sl_ck_t objects */ -H5FL_DEFINE_STATIC(H5D_chunk_prune_stack_t); - /*------------------------------------------------------------------------- @@ -2326,7 +2316,7 @@ H5D_chunk_flush_entry(const H5D_t *dset, hid_t dxpl_id, const H5D_dxpl_cache_t * HDassert(!ent->locked); buf = ent->chunk; - if(ent->dirty) { + if(ent->dirty && !ent->deleted) { H5D_chunk_ud_t udata; /* pass through B-tree */ hbool_t must_insert = FALSE; /* Whether the chunk must go through the "insert" method */ @@ -2710,6 +2700,9 @@ H5D_chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata, */ rdcc->stats.nhits++; + /* Still save the chunk address so the cache stays consistent */ + chunk_addr = udata->addr; + if(NULL == (chunk = H5D_chunk_alloc(chunk_size, pline))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed for raw data chunk") @@ -2809,6 +2802,7 @@ H5D_chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata, ent->locked = 0; ent->dirty = FALSE; + ent->deleted = FALSE; ent->chunk_addr = chunk_addr; for(u = 0; u < layout->u.chunk.ndims; u++) ent->offset[u] = io_info->store->chunk.offset[u]; @@ -3258,7 +3252,7 @@ H5D_chunk_allocate(H5D_t *dset, hid_t dxpl_id, hbool_t full_overwrite, /* Check if allocation along this dimension is really necessary */ if(min_unalloc[op_dim] > max_unalloc[op_dim]) - carry = TRUE; + continue; else { /* Reset the chunk offset indices */ HDmemset(chunk_offset, 0, (layout->u.chunk.ndims * sizeof(chunk_offset[0]))); @@ -3275,6 +3269,18 @@ H5D_chunk_allocate(H5D_t *dset, hid_t dxpl_id, hbool_t full_overwrite, if(H5D_chunk_get_info(dset, dxpl_id, chunk_offset, &udata) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") HDassert(!H5F_addr_defined(udata.addr)); + + /* Make sure the chunk is really in the dataset and outside the + * original dimensions */ + { + hbool_t outside_orig = FALSE; + for(i=0; i<space_ndims; i++) { + HDassert(chunk_offset[i] < space_dim[i]); + if(chunk_offset[i] >= old_dim[i]) + outside_orig = TRUE; + } /* end for */ + HDassert(outside_orig); + } /* end block */ #endif /* NDEBUG */ /* Check for VL datatype & non-default fill value */ @@ -3418,12 +3424,13 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata) +H5D_chunk_prune_fill(H5D_chunk_it_ud1_t *udata) { const H5D_io_info_t *io_info = udata->io_info; /* Local pointer to I/O info */ H5D_t *dset = io_info->dset; /* Local pointer to the dataset info */ const H5O_layout_t *layout = &(dset->shared->layout); /* Dataset's layout */ unsigned rank = udata->common.layout->ndims - 1; /* Dataset rank */ + const hsize_t *chunk_offset = io_info->store->chunk.offset; /* Chunk offset */ H5S_sel_iter_t chunk_iter; /* Memory selection iteration info */ hssize_t sel_nelmts; /* Number of elements in selection */ hsize_t count[H5O_LAYOUT_NDIMS]; /* Element count of hyperslab */ @@ -3436,6 +3443,17 @@ H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_prune_fill) + /* Get the info for the chunk in the file */ + if(H5D_chunk_get_info(dset, io_info->dxpl_id, chunk_offset, &chk_udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") + + /* If this chunk does not exist in cache or on disk, no need to do anything + */ + if(!H5F_addr_defined(chk_udata.addr) + && !H5D_chunk_in_cache(dset, chunk_offset, + io_info->store->chunk.index)) + HGOTO_DONE(SUCCEED) + /* Initialize the fill value buffer, if necessary */ if(!udata->fb_info_init) { H5_CHECK_OVERFLOW(udata->elmts_per_chunk, uint32_t, size_t); @@ -3449,7 +3467,8 @@ H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata /* Compute the # of elements to leave with existing value, in each dimension */ for(u = 0; u < rank; u++) { - count[u] = MIN(layout->u.chunk.dim[u], (udata->dims[u] - chunk_rec->offset[u])); + count[u] = MIN(layout->u.chunk.dim[u], (udata->space_dim[u] + - chunk_offset[u])); HDassert(count[u] > 0); } /* end for */ @@ -3461,20 +3480,9 @@ H5D_chunk_prune_fill(const H5D_chunk_rec_t *chunk_rec, H5D_chunk_it_ud1_t *udata if(H5S_select_hyperslab(udata->chunk_space, H5S_SELECT_NOTB, udata->hyper_start, NULL, count, NULL) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTSELECT, FAIL, "unable to select hyperslab") - /* Calculate the index of this chunk */ - if(H5V_chunk_index(rank, chunk_rec->offset, layout->u.chunk.dim, layout->u.chunk.down_chunks, &io_info->store->chunk.index) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't get chunk index") - /* Lock the chunk into the cache, to get a pointer to the chunk buffer */ - /* (Casting away const OK -QAK) */ - io_info->store->chunk.offset = (hsize_t *)chunk_rec->offset; - chk_udata.common.layout = &layout->u.chunk; - chk_udata.common.storage = &layout->storage.u.chunk; - chk_udata.common.offset = chunk_rec->offset; - chk_udata.nbytes = chunk_rec->nbytes; - chk_udata.filter_mask = chunk_rec->filter_mask; - chk_udata.addr = chunk_rec->chunk_addr; - if(NULL == (chunk = (void *)H5D_chunk_lock(udata->io_info, &chk_udata, FALSE, &idx_hint))) + if(NULL == (chunk = (void *)H5D_chunk_lock(io_info, &chk_udata, FALSE, + &idx_hint))) HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "unable to lock raw data chunk") @@ -3524,82 +3532,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5D_chunk_prune_cb - * - * Purpose: Search for chunks that are no longer inside the pruned - * dataset's extent - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu - * March 26, 2002 - * - *------------------------------------------------------------------------- - */ -/* ARGSUSED */ -static int -H5D_chunk_prune_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata) -{ - H5D_chunk_it_ud1_t *udata = (H5D_chunk_it_ud1_t *)_udata; /* User data */ - H5D_chunk_prune_stack_t *stack_node = NULL; /* Stack node for chunk to remove */ - unsigned rank; /* Current # of dimensions */ - hbool_t should_delete = FALSE; /* Whether the chunk should be deleted */ - hbool_t needs_fill = FALSE; /* Whether the chunk overlaps the new extent and needs fill valiues */ - unsigned u; /* Local index variable */ - int ret_value = H5_ITER_CONT; /* Return value */ - - FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_prune_cb) - - /* Figure out what chunks are no longer in use for the specified extent and release them */ - rank = udata->common.layout->ndims - 1; - for(u = 0; u < rank; u++) - /* The chunk record points to a chunk of storage that contains the - * beginning of the logical address space represented by UDATA. - */ - if(udata->shrunk_dims[u]) { - if(chunk_rec->offset[u] >= udata->dims[u]) { - /* Indicate that the chunk will be deleted */ - should_delete = TRUE; - - /* Break out of loop, we know the chunk is outside the current dimensions */ - break; - } /* end if */ - /* Check for chunk that overlaps new extent and will need fill values */ - else if((chunk_rec->offset[u] + udata->common.layout->dim[u]) > udata->dims[u]) - /* Indicate that the chunk needs filling */ - /* (but continue in loop, since it could be outside the extent in - * another dimension -QAK) - */ - needs_fill = TRUE; - } /* end if */ - - /* Check for chunk to delete */ - if(should_delete) { - /* Allocate space for the removal stack node */ - if(NULL == (stack_node = H5FL_MALLOC(H5D_chunk_prune_stack_t))) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, H5_ITER_ERROR, "memory allocation failed for removal stack node") - - /* Store the record for the chunk */ - stack_node->rec = *chunk_rec; - - /* Push the chunk description onto the stack */ - stack_node->next = udata->rm_stack; - udata->rm_stack = stack_node; - } /* end if */ - /* Check for chunk that overlaps the new dataset dimensions and needs filling */ - else if(needs_fill) - /* Write the fill value */ - if(H5D_chunk_prune_fill(chunk_rec, udata) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, H5_ITER_ERROR, "unable to write fill value") - -done: - /* It is currently impossible to fail after the stack node has been - * malloc'ed. No need to free it here on failure. */ - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D_chunk_prune_cb() */ - - -/*------------------------------------------------------------------------- * Function: H5D_chunk_prune_by_extent * * Purpose: This function searches for chunks that are no longer necessary @@ -3696,11 +3628,26 @@ done: * To release the chunks, we traverse the B-tree to obtain a list of unused * allocated chunks, and then call H5B_remove() for each chunk. * + * Modifications: Neil Fortner + * 4 May 2010 + * Rewrote algorithm to work in a way similar to + * H5D_chunk_allocate: it now iterates over all chunks that need + * to be filled or removed, and does so as appropriate. This + * avoids various issues with coherency of locally cached data + * which could occur with the previous implementation. + * *------------------------------------------------------------------------- */ herr_t -H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims) +H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dim) { + hsize_t min_mod_chunk_off[H5O_LAYOUT_NDIMS]; /* Offset of first chunk to modify in each dimension */ + hsize_t max_mod_chunk_off[H5O_LAYOUT_NDIMS]; /* Offset of last chunk to modify in each dimension */ + hssize_t max_fill_chunk_off[H5O_LAYOUT_NDIMS]; /* Offset of last chunk that might be filled in each dimension */ + hbool_t fill_dim[H5O_LAYOUT_NDIMS]; /* Whether the plane of edge chunks in this dimension needs to be filled */ + hbool_t dims_outside_fill[H5O_LAYOUT_NDIMS]; /* Dimensions in chunk offset outside fill dimensions */ + int ndims_outside_fill = 0; /* Number of dimensions in chunk offset outside fill dimensions */ + hbool_t has_fill = FALSE; /* Whether there are chunks that must be filled */ H5D_chk_idx_info_t idx_info; /* Chunked index info */ H5D_io_info_t chk_io_info; /* Chunked I/O info object */ H5D_storage_t chk_store; /* Chunk storage information */ @@ -3708,21 +3655,24 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims) H5D_dxpl_cache_t *dxpl_cache = &_dxpl_cache; /* Data transfer property cache */ const H5O_layout_t *layout = &(dset->shared->layout); /* Dataset's layout */ const H5D_rdcc_t *rdcc = &(dset->shared->cache.chunk); /*raw data chunk cache */ - H5D_rdcc_ent_t *ent = NULL, *next = NULL; /* Cache entries */ - hsize_t curr_dims[H5O_LAYOUT_NDIMS]; /* Current dataspace dimensions */ - hbool_t shrunk_dims[H5O_LAYOUT_NDIMS]; /* Dimensions which have shrunk */ + H5D_rdcc_ent_t *ent = NULL; /* Cache entry */ + unsigned idx = 0; /* Hash index number */ + int space_ndims; /* Dataset's space rank */ + hsize_t space_dim[H5O_LAYOUT_NDIMS]; /* Current dataspace dimensions */ + int op_dim; /* Current operationg dimension */ + hbool_t shrunk_dim[H5O_LAYOUT_NDIMS]; /* Dimensions which have shrunk */ H5D_chunk_it_ud1_t udata; /* Chunk index iterator user data */ hbool_t udata_init = FALSE; /* Whether the chunk index iterator user data has been initialized */ - hbool_t needs_fill; /* Whether we need to write the fill value */ - H5D_chunk_prune_stack_t *fill_stack = NULL; /* Stack of chunks to fill */ - H5D_chunk_prune_stack_t *tmp_stack; /* Temporary stack node pointer */ H5D_chunk_common_ud_t idx_udata; /* User data for index removal routine */ + H5D_chunk_ud_t chk_udata; /* User data for getting chunk info */ H5S_t *chunk_space = NULL; /* Dataspace for a chunk */ - hsize_t chunk_dims[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */ + hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */ + hsize_t chunk_offset[H5O_LAYOUT_NDIMS]; /* Offset of current chunk */ hsize_t hyper_start[H5O_LAYOUT_NDIMS]; /* Starting location of hyperslab */ uint32_t elmts_per_chunk; /* Elements in chunk */ - unsigned rank; /* Current # of dimensions */ - unsigned u; /* Local index variable */ + hbool_t chk_on_disk; /* Whether a chunk exists on disk */ + hbool_t carry; /* Flag to indicate that chunk increment carrys to higher dimension (sorta) */ + int i; /* Local index variable */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5D_chunk_prune_by_extent, FAIL) @@ -3732,19 +3682,26 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims) HDassert(layout->u.chunk.ndims > 0 && layout->u.chunk.ndims <= H5O_LAYOUT_NDIMS); HDassert(dxpl_cache); - /* set the removal stack pointer in udata to NULL, so if the function fails - * early it will not try to free the nonexistent stack */ - udata.rm_stack = NULL; - /* Fill the DXPL cache values for later use */ if(H5D_get_dxpl_cache(dxpl_id, &dxpl_cache) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache") /* Go get the rank & dimensions (including the element size) */ - rank = layout->u.chunk.ndims - 1; - if(H5S_get_simple_extent_dims(dset->shared->space, curr_dims, NULL) < 0) + if((space_ndims = H5S_get_simple_extent_dims(dset->shared->space, space_dim, + NULL)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get dataset dimensions") - curr_dims[rank] = layout->u.chunk.dim[rank]; + space_dim[space_ndims] = layout->u.chunk.dim[space_ndims]; + + /* The last dimension in chunk_offset is always 0 */ + chunk_offset[space_ndims] = (hsize_t)0; + + /* Check if any old dimensions are 0, if so we do not have to do anything */ + for(op_dim=0; op_dim<space_ndims; op_dim++) + if(old_dim[op_dim] == 0) { + /* Reset any cached chunk info for this dataset */ + H5D_chunk_cinfo_cache_reset(&dset->shared->cache.chunk.last); + HGOTO_DONE(SUCCEED) + } /* end if */ /* Round up to the next integer # of chunks, to accomodate partial chunks */ /* Use current dims because the indices have already been updated! -NAF */ @@ -3752,22 +3709,25 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims) /* (also copy the chunk dimensions into 'hsize_t' array for creating dataspace) */ /* (also compute the dimensions which have been shrunk) */ elmts_per_chunk = 1; - for(u = 0; u < rank; u++) { - elmts_per_chunk *= layout->u.chunk.dim[u]; - chunk_dims[u] = layout->u.chunk.dim[u]; - shrunk_dims[u] = curr_dims[u] < old_dims[u]; + for(i = 0; i < space_ndims; i++) { + elmts_per_chunk *= layout->u.chunk.dim[i]; + chunk_dim[i] = layout->u.chunk.dim[i]; + shrunk_dim[i] = space_dim[i] < old_dim[i]; } /* end for */ /* Create a dataspace for a chunk & set the extent */ - if(NULL == (chunk_space = H5S_create_simple(rank, chunk_dims, NULL))) + if(NULL == (chunk_space = H5S_create_simple((unsigned)space_ndims, + chunk_dim, NULL))) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCREATE, FAIL, "can't create simple dataspace") /* Reset hyperslab start array */ /* (hyperslabs will always start from origin) */ HDmemset(hyper_start, 0, sizeof(hyper_start)); - /* Set up chunked I/O info object, for operations on chunks (in callback) */ - /* (Casting away const OK -QAK) */ + /* Set up chunked I/O info object, for operations on chunks (in callback) + * Note that we only need to set chunk_offset once, as the array's address + * will never change. */ + chk_store.chunk.offset = chunk_offset; H5D_BUILD_IO_INFO_RD(&chk_io_info, dset, dxpl_cache, dxpl_id, &chk_store, NULL); /* Compose chunked index info struct */ @@ -3783,91 +3743,226 @@ H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, const hsize_t *old_dims) udata.common.storage = &layout->storage.u.chunk; udata.io_info = &chk_io_info; udata.idx_info = &idx_info; - udata.dims = curr_dims; - udata.shrunk_dims = shrunk_dims; + udata.space_dim = space_dim; + udata.shrunk_dim = shrunk_dim; udata.elmts_per_chunk = elmts_per_chunk; udata.chunk_space = chunk_space; udata.hyper_start = hyper_start; udata_init = TRUE; - /*------------------------------------------------------------------------- - * Figure out what chunks are no longer in use for the specified extent - * and release them from the linked list raw data cache - *------------------------------------------------------------------------- + /* Initialize user data for removal */ + idx_udata.layout = &layout->u.chunk; + idx_udata.storage = &layout->storage.u.chunk; + + /* + * Determine the chunks which need to be filled or removed */ - for(ent = rdcc->head; ent; ent = next) { - /* Get pointer to next extry in cache, in case this one is evicted */ - next = ent->next; + for(op_dim=0; op_dim<space_ndims; op_dim++) { + /* Calculate the largest offset of chunks that might need to be + * modified in this dimension */ + max_mod_chunk_off[op_dim] = chunk_dim[op_dim] * ((old_dim[op_dim] - 1) + / chunk_dim[op_dim]); + + /* Calculate the largest offset of chunks that might need to be + * filled in this dimension */ + if(0 == space_dim[op_dim]) + max_fill_chunk_off[op_dim] = -1; + else + max_fill_chunk_off[op_dim] = (hssize_t)(chunk_dim[op_dim] + * ((MIN(space_dim[op_dim], old_dim[op_dim]) - 1) + / chunk_dim[op_dim])); + + if(shrunk_dim[op_dim]) { + /* Calculate the smallest offset of chunks that might need to be + * modified in this dimension. Note that this array contains + * garbage for all dimensions which are not shrunk. These locations + * must not be read from! */ + min_mod_chunk_off[op_dim] = chunk_dim[op_dim] * (space_dim[op_dim] + / chunk_dim[op_dim]); + + /* Determine if we need to fill chunks in this dimension */ + if((hssize_t)min_mod_chunk_off[op_dim] + == max_fill_chunk_off[op_dim]) { + fill_dim[op_dim] = TRUE; + has_fill = TRUE; + } /* end if */ + else + fill_dim[op_dim] = FALSE; + } /* end if */ + else + fill_dim[op_dim] = FALSE; + } /* end for */ - needs_fill = FALSE; + /* Check the cache for any entries that are outside the bounds. Mark these + * entries as deleted so they are not flushed to disk accidentally. This is + * only necessary if there are chunks that need to be filled. */ + if(has_fill) + for(ent = rdcc->head; ent; ent = ent->next) + /* Check for chunk offset outside of new dimensions */ + for(i = 0; i<space_ndims; i++) + if((hsize_t)ent->offset[i] >= space_dim[i]) { + /* Mark the entry as "deleted" */ + ent->deleted = TRUE; + break; + } /* end if */ - /* Check for chunk offset outside of new dimensions */ - for(u = 0; u < rank; u++) { - if((hsize_t)ent->offset[u] >= curr_dims[u]) { - /* Evict the entry from the cache, but do not flush it to disk */ - if(H5D_chunk_cache_evict(dset, dxpl_id, dxpl_cache, ent, FALSE) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTREMOVE, FAIL, "unable to evict chunk") + /* Main loop: fill or remove chunks */ + for(op_dim=0; op_dim<space_ndims; op_dim++) { + /* Check if modification along this dimension is really necessary */ + if(!shrunk_dim[op_dim]) + continue; + else { + HDassert((hsize_t) max_mod_chunk_off[op_dim] + >= min_mod_chunk_off[op_dim]); - /* We don't need to write the fill value */ - needs_fill = FALSE; + /* Reset the chunk offset indices */ + HDmemset(chunk_offset, 0, ((unsigned)space_ndims + * sizeof(chunk_offset[0]))); + chunk_offset[op_dim] = min_mod_chunk_off[op_dim]; + + /* Initialize "dims_outside_fill" array */ + ndims_outside_fill = 0; + for(i=0; i<space_ndims; i++) + if((hssize_t)chunk_offset[i] > max_fill_chunk_off[i]) { + dims_outside_fill[i] = TRUE; + ndims_outside_fill++; + } /* end if */ + else + dims_outside_fill[i] = FALSE; - /* Break out of loop, chunk is evicted */ - break; - } else if(!H5F_addr_defined(ent->chunk_addr) && shrunk_dims[u] - && (ent->offset[u] + chunk_dims[u]) > curr_dims[u]) - /* We need to write the fill value to the unused parts of chunk */ - needs_fill = TRUE; - } /* end for */ + carry = FALSE; + } /* end if */ - if(needs_fill) { - /* Allocate space for the stack node */ - if(NULL == (tmp_stack = H5FL_MALLOC(H5D_chunk_prune_stack_t))) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for stack node") + while(!carry) { + /* Calculate the index of this chunk */ + if(H5V_chunk_index((unsigned)space_ndims, chunk_offset, + layout->u.chunk.dim, layout->u.chunk.down_chunks, + &(chk_io_info.store->chunk.index)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't get chunk index") + + if(0 == ndims_outside_fill) { + HDassert(fill_dim[op_dim]); + HDassert(chunk_offset[op_dim] == min_mod_chunk_off[op_dim]); + + /* Fill the unused parts of the chunk */ + if(H5D_chunk_prune_fill(&udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to write fill value") + } /* end if */ + else { + chk_on_disk = FALSE; - /* Set up chunk record for fill routine */ - tmp_stack->rec.nbytes = dset->shared->layout.u.chunk.size; - HDmemcpy(tmp_stack->rec.offset, ent->offset, sizeof(tmp_stack->rec.offset)); - tmp_stack->rec.filter_mask = 0; /* Since the chunk is already in cache this doesn't matter */ - tmp_stack->rec.chunk_addr = ent->chunk_addr; +#ifndef NDEBUG + /* Make sure this chunk is really outside the new dimensions */ + { + hbool_t outside_dim = FALSE; + + for(i=0; i<space_ndims; i++) + if(chunk_offset[i] >= space_dim[i]){ + outside_dim = TRUE; + break; + } /* end if */ + HDassert(outside_dim); + } /* end block */ +#endif /* NDEBUG */ - /* Push the chunk description onto the stack */ - tmp_stack->next = fill_stack; - fill_stack = tmp_stack; - } /* end if */ - } /* end for */ + /* Search for the chunk in the cache */ + if(rdcc->nslots > 0) { + idx = H5D_CHUNK_HASH(dset->shared, + chk_io_info.store->chunk.index); + ent = rdcc->slot[idx]; + + if(ent) + for(i=0; i<space_ndims; i++) + if(chunk_offset[i] + != ent->offset[i]) { + ent = NULL; + break; + } /* end if */ + } /* end if */ - /* Traverse the stack of chunks to be filled, filling each. We will free - * the nodes later in the "done" section. */ - tmp_stack = fill_stack; - while(tmp_stack) { - /* Write the fill value */ - if(H5D_chunk_prune_fill(&(tmp_stack->rec), &udata) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to write fill value") + /* Evict the entry from the cache, but do not flush it to disk + */ + if(ent) { + /* Determine if the chunk is allocated on disk, and + * therefore needs to be removed from disk */ + chk_on_disk = H5F_addr_defined(ent->chunk_addr); - /* Advance the stack pointer */ - tmp_stack = tmp_stack->next; - } /* end while */ + /* Remove the chunk from cache */ + if(H5D_chunk_cache_evict(dset, dxpl_id, dxpl_cache, ent, + FALSE) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTREMOVE, FAIL, "unable to evict chunk") - /* Iterate over the chunks */ - if((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D_chunk_prune_cb, &udata) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve prune chunks from index") + ent = NULL; + } /* end if */ + else { + /* Determine if the chunk is allocated on disk, and + * therefore needs to be removed from disk */ + /* Get the info for the chunk in the file */ + if(H5D_chunk_get_info(dset, dxpl_id, chunk_offset, + &chk_udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") + + chk_on_disk = H5F_addr_defined(chk_udata.addr); + } /* end else */ - /* Traverse the stack of chunks to be deleted, removing each. We will free - * the nodes later in the "done" section. */ - idx_udata.layout = &layout->u.chunk; - idx_udata.storage = &layout->storage.u.chunk; - tmp_stack = udata.rm_stack; - while(tmp_stack) { - /* Update the offset in idx_udata */ - idx_udata.offset = tmp_stack->rec.offset; + /* Remove the chunk from disk, if present */ + if(chk_on_disk) { + /* Update the offset in idx_udata */ + idx_udata.offset = chunk_offset; - /* Remove the chunk from disk */ - if((layout->storage.u.chunk.ops->remove)(&idx_info, &idx_udata) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTDELETE, FAIL, "unable to remove chunk entry from index") + /* Remove the chunk from disk */ + if((layout->storage.u.chunk.ops->remove)(&idx_info, &idx_udata) + < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTDELETE, FAIL, "unable to remove chunk entry from index") + } /* end if */ + } /* end else */ - /* Advance the stack pointer */ - tmp_stack = tmp_stack->next; - } /* end while */ + /* Increment indices */ + carry = TRUE; + for(i = (int)(space_ndims - 1); i >= 0; --i) { + chunk_offset[i] += chunk_dim[i]; + if(chunk_offset[i] > (hsize_t) max_mod_chunk_off[i]) { + /* Left maximum dimensions, "wrap around" and check if this + * dimension is no longer outside the fill dimension */ + if(i == op_dim) { + chunk_offset[i] = min_mod_chunk_off[i]; + if(dims_outside_fill[i] && fill_dim[i]) { + dims_outside_fill[i] = FALSE; + ndims_outside_fill--; + } /* end if */ + } /* end if */ + else { + chunk_offset[i] = 0; + if(dims_outside_fill[i] && max_fill_chunk_off[i] >= 0) { + dims_outside_fill[i] = FALSE; + ndims_outside_fill--; + } /* end if */ + } /* end else */ + } /* end if */ + else { + /* Check if we just went outside the fill dimension */ + if(!dims_outside_fill[i] && (hssize_t)chunk_offset[i] + > max_fill_chunk_off[i]) { + dims_outside_fill[i] = TRUE; + ndims_outside_fill++; + } /* end if */ + + /* We found the next chunk, so leave the loop */ + carry = FALSE; + break; + } /* end else */ + } /* end for */ + } /* end while(!carry) */ + + /* Adjust max_mod_chunk_off so we don't modify the same chunk twice. + * Also check if this dimension started from 0 (and hence removed all + * of the chunks). */ + if(min_mod_chunk_off[op_dim] == 0) + break; + else + max_mod_chunk_off[op_dim] = min_mod_chunk_off[op_dim] + - chunk_dim[op_dim]; + } /* end for(op_dim=0...) */ /* Reset any cached chunk info for this dataset */ H5D_chunk_cinfo_cache_reset(&dset->shared->cache.chunk.last); @@ -3881,24 +3976,6 @@ done: HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info") } /* end if */ - /* Free stack of filled chunks */ - tmp_stack = fill_stack; - while(tmp_stack) { - /* Free the stack node and advance the stack pointer */ - tmp_stack = tmp_stack->next; - fill_stack = H5FL_FREE(H5D_chunk_prune_stack_t, fill_stack); - fill_stack = tmp_stack; - } /* end while */ - - /* Free stack of removed chunks */ - tmp_stack = udata.rm_stack; - while(tmp_stack) { - /* Free the stack node and advance the stack pointer */ - tmp_stack = tmp_stack->next; - udata.rm_stack = H5FL_FREE(H5D_chunk_prune_stack_t, udata.rm_stack); - udata.rm_stack = tmp_stack; - } /* end while */ - FUNC_LEAVE_NOAPI(ret_value) } /* end H5D_chunk_prune_by_extent() */ diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index dbd495c..bb8fd71 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -488,6 +488,7 @@ typedef struct { typedef struct H5D_rdcc_ent_t { hbool_t locked; /*entry is locked in cache */ hbool_t dirty; /*needs to be written to disk? */ + hbool_t deleted; /*chunk about to be deleted (do not flush) */ hsize_t offset[H5O_LAYOUT_NDIMS]; /*chunk name */ uint32_t rd_count; /*bytes remaining to be read */ uint32_t wr_count; /*bytes remaining to be written */ @@ -612,7 +613,7 @@ H5_DLL herr_t H5D_chunk_allocated(H5D_t *dset, hid_t dxpl_id, hsize_t *nbytes); H5_DLL herr_t H5D_chunk_allocate(H5D_t *dset, hid_t dxpl_id, hbool_t full_overwrite, hsize_t old_dim[]); H5_DLL herr_t H5D_chunk_prune_by_extent(H5D_t *dset, hid_t dxpl_id, - const hsize_t *old_dims); + const hsize_t *old_dim); #ifdef H5_HAVE_PARALLEL H5_DLL herr_t H5D_chunk_addrmap(const H5D_io_info_t *io_info, haddr_t chunk_addr[]); #endif /* H5_HAVE_PARALLEL */ diff --git a/test/set_extent.c b/test/set_extent.c index 9d669a2..172c0ee 100644 --- a/test/set_extent.c +++ b/test/set_extent.c @@ -76,6 +76,7 @@ const char *FILENAME[] = { #define DIME2 7 #define ISTORE_IK 64 #define RAND4_NITER 100 +#define RAND4_SPARSE_SWITCH 10 #define RAND4_FAIL_DUMP(NDIM_SETS, J, K, L, M) { \ H5_FAILED(); AT(); \ test_random_rank4_dump(NDIM_SETS, dim_log, cdims, J, K, L, M); \ @@ -99,7 +100,8 @@ static int test_rank3( hid_t fapl, hbool_t set_istore_k); static int test_random_rank4( hid_t fapl, hid_t dcpl, - hbool_t do_fillvalue); + hbool_t do_fillvalue, + hbool_t do_sparse); static int test_external( hid_t fapl ); static int test_layouts( H5D_layout_t layout, hid_t fapl ); @@ -128,8 +130,11 @@ int main( void ) /* Copy the file access property list */ if((fapl2 = H5Pcopy(fapl)) < 0) TEST_ERROR + /* Set chunk cache so only part of the chunks can be cached on fapl */ + if(H5Pset_cache(fapl, 0, 8, 256 * sizeof(int), 0.75) < 0) TEST_ERROR + /* Disable chunk caching on fapl2 */ - if(H5Pset_cache(fapl2, 521, 0, 0, 0.) < 0) TEST_ERROR + if(H5Pset_cache(fapl2, 0, 0, 0, 0.) < 0) TEST_ERROR /* Set the "use the latest version of the format" bounds for creating objects in the file */ if(H5Pset_libver_bounds(fapl2, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) TEST_ERROR @@ -320,11 +325,17 @@ static int do_ranks( hid_t fapl ) if(H5Pset_fill_time(dcpl, H5D_FILL_TIME_IFSET) < 0) TEST_ERROR - if(test_random_rank4(fapl, dcpl, do_fillvalue) < 0) { + if(test_random_rank4(fapl, dcpl, do_fillvalue, FALSE) < 0) { DO_RANKS_PRINT_CONFIG("Randomized rank 4") goto error; } /* end if */ + if(!(config & CONFIG_EARLY_ALLOC)) + if(test_random_rank4(fapl, dcpl, do_fillvalue, TRUE) < 0) { + DO_RANKS_PRINT_CONFIG("Randomized rank 4 with sparse allocation") + goto error; + } /* end if */ + /* Close dcpl */ if(H5Pclose(dcpl) < 0) TEST_ERROR @@ -2658,7 +2669,8 @@ error: * *------------------------------------------------------------------------- */ -static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue ) +static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue, + hbool_t do_sparse ) { hid_t file = -1; hid_t dset = -1; @@ -2667,12 +2679,16 @@ static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue ) hid_t my_dcpl = -1; hsize_t dims[4]; /* Dataset's dimensions */ hsize_t old_dims[4]; /* Old dataset dimensions */ + hsize_t min_unwritten_dims[4]; /* Minimum dimensions since last write */ + hsize_t *valid_dims = old_dims; /* Dimensions of region still containing written data */ hsize_t cdims[4]; /* Chunk dimensions */ const hsize_t mdims[4] = {10, 10, 10, 10}; /* Memory buffer dimensions */ const hsize_t start[4] = {0, 0, 0, 0}; /* Start for hyperslabe operations on memory */ static int rbuf[10][10][10][10]; /* Read buffer */ static int wbuf[10][10][10][10]; /* Write buffer */ static hsize_t dim_log[RAND4_NITER+1][4]; /* Log of dataset dimensions */ + hbool_t zero_dim = FALSE; /* Whether a dimension is 0 */ + hbool_t writing = TRUE; /* Whether we're writing to the dset */ volatile unsigned i, j, k, l, m; /* Local indices */ char filename[NAME_BUF_SIZE]; @@ -2681,9 +2697,9 @@ static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue ) if ((file = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl)) < 0) TEST_ERROR - /* Generate random chunk dimensions, 2-6 */ + /* Generate random chunk dimensions, 2-4 */ for(i=0; i<4; i++) - cdims[i] = (hsize_t)((HDrandom() % 5) + 2); + cdims[i] = (hsize_t)((HDrandom() % 3) + 2); /* Generate initial dataset size, 1-10 */ for(i=0; i<4; i++) { @@ -2713,60 +2729,86 @@ static int test_random_rank4( hid_t fapl, hid_t dcpl, hbool_t do_fillvalue ) /* Main loop */ for(i=0; i<RAND4_NITER; i++) { - /* Generate random write buffer */ - for(j=0; j<dims[0]; j++) - for(k=0; k<dims[1]; k++) - for(l=0; l<dims[2]; l++) - for(m=0; m<dims[3]; m++) - wbuf[j][k][l][m] = HDrandom(); - - /* Write data */ - if(H5Dwrite(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT, wbuf) - < 0) - RAND4_FAIL_DUMP(i+1, -1, -1, -1, -1) - - /* Generate new dataset size, 1-10 */ + if(writing && !zero_dim) { + /* Generate random write buffer */ + for(j=0; j<dims[0]; j++) + for(k=0; k<dims[1]; k++) + for(l=0; l<dims[2]; l++) + for(m=0; m<dims[3]; m++) + wbuf[j][k][l][m] = HDrandom(); + + /* Write data */ + if(H5Dwrite(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT, + wbuf) < 0) + RAND4_FAIL_DUMP(i+1, -1, -1, -1, -1) + } /* end if */ + + /* Generate new dataset size, 0-10 (0 much less likely) */ + zero_dim = FALSE; for(j=0; j<4; j++) { old_dims[j] = dims[j]; - dims[j] = (hsize_t)((HDrandom() % 10) + 1); + if((dims[j] = (hsize_t)(HDrandom() % 11)) == 0) + if((dims[j] = (hsize_t)(HDrandom() % 11)) == 0) + zero_dim = TRUE; dim_log[i+1][j] = dims[j]; } /* end for */ + /* If writing is disabled, update min_unwritten_dims */ + if(!writing) + for(j=0; j<4; j++) + if(old_dims[j] < min_unwritten_dims[j]) + min_unwritten_dims[j] = old_dims[j]; + /* Resize dataset */ if(H5Dset_extent(dset, dims) < 0) RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1) - /* Read data from resized dataset */ - if(H5Sselect_hyperslab(mspace, H5S_SELECT_SET, start, NULL, dims, NULL) - < 0) - RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1) - if(H5Dread(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT, rbuf) - < 0) - RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1) - - /* Verify correctness of read data */ - if(do_fillvalue) { - for(j=0; j<dims[0]; j++) - for(k=0; k<dims[1]; k++) - for(l=0; l<dims[2]; l++) - for(m=0; m<dims[3]; m++) - if(j >= old_dims[0] || k >= old_dims[1] - || l >= old_dims[2] || m >= old_dims[3]) { - if(FILL_VALUE != rbuf[j][k][l][m]) - RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m) - } /* end if */ - else + if(!zero_dim) { + /* Read data from resized dataset */ + if(H5Sselect_hyperslab(mspace, H5S_SELECT_SET, start, NULL, dims, + NULL) < 0) + RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1) + if(H5Dread(dset, H5T_NATIVE_INT, mspace, H5S_ALL, H5P_DEFAULT, rbuf) + < 0) + RAND4_FAIL_DUMP(i+2, -1, -1, -1, -1) + + /* Verify correctness of read data */ + if(do_fillvalue) { + for(j=0; j<dims[0]; j++) + for(k=0; k<dims[1]; k++) + for(l=0; l<dims[2]; l++) + for(m=0; m<dims[3]; m++) + if(j >= valid_dims[0] || k >= valid_dims[1] + || l >= valid_dims[2] + || m >= valid_dims[3]) { + if(FILL_VALUE != rbuf[j][k][l][m]) + RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m) + } /* end if */ + else + if(wbuf[j][k][l][m] != rbuf[j][k][l][m]) + RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m) + } /* end if */ + else { + for(j=0; j<MIN(dims[0],valid_dims[0]); j++) + for(k=0; k<MIN(dims[1],valid_dims[1]); k++) + for(l=0; l<MIN(dims[2],valid_dims[2]); l++) + for(m=0; m<MIN(dims[3],valid_dims[3]); m++) if(wbuf[j][k][l][m] != rbuf[j][k][l][m]) RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m) + } /* end else */ + } /* end if */ + + /* Handle the switch between writing and not writing */ + if(do_sparse && !(i % RAND4_SPARSE_SWITCH)) { + writing = !writing; + if(!writing) { + for(j=0; j<4; j++) + min_unwritten_dims[j] = old_dims[j]; + valid_dims = min_unwritten_dims; + } /* end if */ + else + valid_dims = old_dims; } /* end if */ - else { - for(j=0; j<MIN(dims[0],old_dims[0]); j++) - for(k=0; k<MIN(dims[1],old_dims[1]); k++) - for(l=0; l<MIN(dims[2],old_dims[2]); l++) - for(m=0; m<MIN(dims[3],old_dims[3]); m++) - if(wbuf[j][k][l][m] != rbuf[j][k][l][m]) - RAND4_FAIL_DUMP(i+2, (int)j, (int)k, (int)l, (int)m) - } /* end else */ } /* end for */ /* Close */ |