diff options
author | Binh-Minh Ribler <bmribler@hdfgroup.org> | 2019-08-28 15:08:12 (GMT) |
---|---|---|
committer | Binh-Minh Ribler <bmribler@hdfgroup.org> | 2019-08-28 15:08:12 (GMT) |
commit | e06bf9d263f46360e271569cd59a83786d80d7b6 (patch) | |
tree | 4ce0d2a63e6caf3f24c3fa7f34b4baa4c35c4619 /src/H5Dchunk.c | |
parent | 506a5069a8b401db52ffe314e53751a0f1027128 (diff) | |
download | hdf5-e06bf9d263f46360e271569cd59a83786d80d7b6.zip hdf5-e06bf9d263f46360e271569cd59a83786d80d7b6.tar.gz hdf5-e06bf9d263f46360e271569cd59a83786d80d7b6.tar.bz2 |
HDFFV-10677 and HDFFV-10661
Description:
- Added functions to query chunk information:
H5Dget_num_chunks(dset_id, fspace_id, *nchunks)
Gets the number of written chunks that intersect with the given
dataspace. However, in this version, the intersection is not
yet completed. Thus, the number of all written chunks will be
returned.
H5Dget_chunk_info_by_coord(dset_id, *offset, *filter_mask, *addr, *size)
Given a chunk's logical coordinates, returns the chunk's filter,
address, and size.
H5Dget_chunk_info(dset_id, fspace_id, index, *offset, *filter_mask, *addr, *size)
Given a chunk's index, returns the chunk's logical coordinates, filter,
address, and size. The chunk belongs to a set of chunks that have
nonempty intersection with the specified dataspace. However, in
this version, the intersection is not yet completed, and the index
is of all the written chunks.
These functions comply with VOL.
- Fixed some oversights found in the library for the tests in chunk_info.c
to work correctly. The returned value from a callback function was not
checked in H5EA_iterate(), H5FA_iterate(), and H5D__none_idx_iterate().
This oversight caused a callback function to continue iterating even though
it's supposed to stop.
Platforms tested:
Linux/64 (jelly)
Linux/64 (platypus)
Darwin (osx1011test)
Diffstat (limited to 'src/H5Dchunk.c')
-rw-r--r-- | src/H5Dchunk.c | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 082aac3..4976075 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -216,6 +216,18 @@ typedef struct H5D_chunk_readvv_ud_t { const H5D_t *dset; /* Dataset to operate on */ } H5D_chunk_readvv_ud_t; +typedef struct H5D_chunk_info_iter_ud_t { + hsize_t scaled[H5O_LAYOUT_NDIMS]; /* Logical offset of the chunk */ + hsize_t ndims; /* Number of dimensions in the dataset */ + uint32_t nbytes; /* Size of stored data in the chunk */ + unsigned filter_mask; /* Excluded filters */ + haddr_t chunk_addr; /* Address of the chunk in file */ + hsize_t chunk_idx; /* Chunk index, where the iteration needs to stop */ + hsize_t curr_idx; /* Current index, where the iteration is */ + unsigned idx_hint; /* Index of chunk in cache, if present */ + hbool_t found; /* Whether the chunk was found */ +} H5D_chunk_info_iter_ud_t; + /* Callback info for file selection iteration */ typedef struct H5D_chunk_file_iter_ud_t { H5D_chunk_map_t *fm; /* File->memory chunk mapping info */ @@ -252,6 +264,11 @@ static herr_t H5D__chunk_flush(H5D_t *dset); static herr_t H5D__chunk_io_term(const H5D_chunk_map_t *fm); static herr_t H5D__chunk_dest(H5D_t *dset); +/* Chunk query operation callbacks */ +static int H5D__get_num_chunks_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata); +static int H5D__get_chunk_info_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata); +static int H5D__get_chunk_info_by_coord_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata); + /* "Nonexistent" layout operation callback */ static ssize_t H5D__nonexistent_readvv(const H5D_io_info_t *io_info, @@ -6956,3 +6973,378 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__chunk_format_convert() */ + +/*------------------------------------------------------------------------- + * Function: H5D__get_num_chunks_cb + * + * Purpose: Callback function that increments the number of written + * chunks in the dataset. + * + * Note: Currently, this function only gets the number of all written + * chunks, regardless the dataspace. + * + * Return: H5_ITER_CONT + * + * Programmer: Binh-Minh Ribler + * June 2019 (HDFFV-10677) + * + *------------------------------------------------------------------------- + */ +static int +H5D__get_num_chunks_cb(const H5D_chunk_rec_t H5_ATTR_UNUSED *chunk_rec, void *_udata) +{ + hsize_t *num_chunks = (hsize_t *)_udata; + int ret_value = H5_ITER_CONT; /* Callback return value */ + + FUNC_ENTER_STATIC_NOERR + + HDassert(num_chunks); + + (*num_chunks)++; + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5D__get_num_chunks_cb() */ + + +/*------------------------------------------------------------------------- + * Function: H5D__get_num_chunks + * + * Purpose: Gets the number of written chunks in a dataset. + * + * Note: Currently, this function only gets the number of all written + * chunks, regardless the dataspace. + * + * Return: Success: Non-negative + * Failure: Negative + * + * Programmer: Binh-Minh Ribler + * June 2019 (HDFFV-10677) + * + *------------------------------------------------------------------------- + */ +herr_t +H5D__get_num_chunks(const H5D_t *dset, const H5S_t H5_ATTR_UNUSED *space, hsize_t *nchunks) +{ + H5D_chk_idx_info_t idx_info; /* Chunked index info */ + hsize_t num_chunks = 0; /* Number of written chunks */ + H5D_rdcc_ent_t *ent; /* Cache entry */ + const H5D_rdcc_t *rdcc = NULL; /* Raw data chunk cache */ + const H5O_layout_t *layout; /* Dataset layout */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr) + + HDassert(dset); + HDassert(dset->shared); + HDassert(space); + HDassert(nchunks); + + layout = &(dset->shared->layout); /* Dataset layout */ + rdcc = &(dset->shared->cache.chunk); /* raw data chunk cache */ + HDassert(rdcc); + + /* Search for cached chunks that haven't been written out */ + for(ent = rdcc->head; ent; ent = ent->next) + /* Flush the chunk out to disk, to make certain the size is correct later */ + if(H5D__chunk_flush_entry(dset, ent, FALSE) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "cannot flush indexed storage buffer") + + /* Compose chunked index info struct */ + idx_info.f = dset->oloc.file; + idx_info.pline = &dset->shared->dcpl_cache.pline; + idx_info.layout = &dset->shared->layout.u.chunk; + idx_info.storage = &dset->shared->layout.storage.u.chunk; + + /* If the dataset is not written, number of chunks will be 0 */ + if(!H5F_addr_defined(idx_info.storage->idx_addr)) { + *nchunks = 0; + } + else { + /* Iterate over the allocated chunks */ + if((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D__get_num_chunks_cb, &num_chunks) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve allocated chunk information from index") + *nchunks = num_chunks; + } + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__get_num_chunks() */ + + +/*------------------------------------------------------------------------- + * Function: H5D__get_chunk_info_cb + * + * Purpose: Get the chunk info of the queried chunk, given by its index. + * + * Return: Success: H5_ITER_CONT or H5_ITER_STOP + * H5_ITER_STOP indicates the queried chunk is found + * Failure: Negative (H5_ITER_ERROR) + * + * Programmer: Binh-Minh Ribler + * June 2019 (HDFFV-10677) + * + *------------------------------------------------------------------------- + */ +static int +H5D__get_chunk_info_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata) +{ + H5D_chunk_info_iter_ud_t *chunk_info = (H5D_chunk_info_iter_ud_t *)_udata; + hsize_t ii = 0; /* Dimension index */ + int ret_value = H5_ITER_CONT; /* Callback return value */ + + FUNC_ENTER_STATIC_NOERR + + /* Check args */ + HDassert(chunk_rec); + HDassert(chunk_info); + + /* If this is the queried chunk, retrieve its info and stop iterating */ + if (chunk_info->curr_idx == chunk_info->chunk_idx) { + chunk_info->filter_mask = chunk_rec->filter_mask; + chunk_info->chunk_addr = chunk_rec->chunk_addr; + chunk_info->nbytes = chunk_rec->nbytes; + for (ii = 0; ii < chunk_info->ndims; ii++) + chunk_info->scaled[ii] = chunk_rec->scaled[ii]; + chunk_info->found = TRUE; + + /* Stop iterating */ + ret_value = H5_ITER_STOP; + } + /* Go to the next chunk */ + else + chunk_info->curr_idx++; + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5D__get_chunk_info_cb() */ + + +/*------------------------------------------------------------------------- + * Function: H5D__get_chunk_info + * + * Purpose: Iterate over the chunks in the dataset to get the info + * of the desired chunk. + * + * Note: Currently, this function only gets the number of all written + * chunks, regardless the dataspace. + * + * Return: Success: SUCCEED + * Failure: FAIL + * + * Programmer: Binh-Minh Ribler + * June 2019 (HDFFV-10677) + * + *------------------------------------------------------------------------- + */ +herr_t +H5D__get_chunk_info(const H5D_t *dset, const H5S_t H5_ATTR_UNUSED *space, hsize_t chk_index, hsize_t *offset, unsigned *filter_mask, haddr_t *addr, hsize_t *size) +{ + H5D_chk_idx_info_t idx_info; /* Chunked index info */ + H5D_chunk_info_iter_ud_t udata; /* User data for callback */ + const H5D_rdcc_t *rdcc = NULL; /* Raw data chunk cache */ + H5D_rdcc_ent_t *ent; /* Cache entry index */ + hsize_t ii = 0; /* Dimension index */ + hsize_t nchunks = 0; /* Number of chunks in this dset */ + hsize_t scaled[H5S_MAX_RANK]; + herr_t ret_value = SUCCEED;/* Return value */ + + FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr) + + HDassert(dset); + HDassert(dset->shared); + HDassert(space); + + /* Get the raw data chunk cache */ + rdcc = &(dset->shared->cache.chunk); + HDassert(rdcc); + + /* Search for cached chunks that haven't been written out */ + for(ent = rdcc->head; ent; ent = ent->next) + /* Flush the chunk out to disk, to make certain the size is correct later */ + if(H5D__chunk_flush_entry(dset, ent, FALSE) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "cannot flush indexed storage buffer") + + /* Compose chunked index info struct */ + idx_info.f = dset->oloc.file; + idx_info.pline = &dset->shared->dcpl_cache.pline; + idx_info.layout = &dset->shared->layout.u.chunk; + idx_info.storage = &dset->shared->layout.storage.u.chunk; + + /* Set addr & size for when dset is not written or queried chunk is not found */ + if (addr) + *addr = HADDR_UNDEF; + if (size) + *size = 0; + + /* If the chunk is written, get its info, otherwise, return without error */ + if(H5F_addr_defined(idx_info.storage->idx_addr)) { + /* Initialize before iteration */ + udata.chunk_idx = chk_index; + udata.curr_idx = 0; + udata.ndims = dset->shared->ndims; + udata.nbytes = 0; + udata.filter_mask = 0; + udata.chunk_addr = HADDR_UNDEF; + udata.found = FALSE; + + /* Iterate over the allocated chunks */ + if((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D__get_chunk_info_cb, &udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve allocated chunk information from index") + + /* Obtain requested info if the chunk is found */ + if(udata.found) { + if(filter_mask) + *filter_mask = udata.filter_mask; + if(addr) + *addr = udata.chunk_addr; + if(size) + *size = udata.nbytes; + if(offset) + for(ii = 0; ii < udata.ndims; ii++) + offset[ii] = udata.scaled[ii] * dset->shared->layout.u.chunk.dim[ii]; + } + } /* end if H5F_addr_defined */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__get_chunk_info() */ + + +/*------------------------------------------------------------------------- + * Function: H5D__get_chunk_info_by_coord_cb + * + * Purpose: Get the chunk info of the desired chunk, given its offset + * coordinates. + * + * Return: Success: H5_ITER_CONT or H5_ITER_STOP + * Failure: Negative (H5_ITER_ERROR) + * + * Programmer: Binh-Minh Ribler + * June 2019 (HDFFV-10677) + * + *------------------------------------------------------------------------- + */ +static int +H5D__get_chunk_info_by_coord_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata) +{ + hsize_t ii; + H5D_chunk_info_iter_ud_t *chunk_info = (H5D_chunk_info_iter_ud_t *)_udata; + hbool_t different = FALSE; /* TRUE when a scaled value pair mismatch */ + int ret_value = H5_ITER_CONT; /* Callback return value */ + + FUNC_ENTER_STATIC_NOERR + + /* Check args */ + HDassert(chunk_rec); + HDassert(chunk_info); + + /* Going through the scaled, stop when a mismatch is found */ + for (ii = 0; ii < chunk_info->ndims && !different; ii++) + if (chunk_info->scaled[ii] != chunk_rec->scaled[ii]) + different = TRUE; + + /* Same scaled coords means the chunk is found, copy the chunk info */ + if (!different) { + chunk_info->nbytes = chunk_rec->nbytes; + chunk_info->filter_mask = chunk_rec->filter_mask; + chunk_info->chunk_addr = chunk_rec->chunk_addr; + chunk_info->found = TRUE; + + /* Stop iterating */ + ret_value = H5_ITER_STOP; + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5D__get_chunk_info_by_coord_cb() */ + + +/*------------------------------------------------------------------------- + * Function: H5D__get_chunk_info_by_coord + * + * Purpose: Iterate over the chunks in the dataset to get the info + * of the desired chunk, given by its offset coordinates. + * + * Return: Success: Non-negative + * Failure: Negative + * + * Programmer: Binh-Minh Ribler + * June 2019 (HDFFV-10677) + * + *------------------------------------------------------------------------- + */ +herr_t +H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *offset, unsigned* filter_mask, haddr_t *addr, hsize_t *size) +{ + const H5O_layout_t *layout = NULL; /* Dataset layout */ + const H5D_rdcc_t *rdcc = NULL; /* Raw data chunk cache */ + H5D_rdcc_ent_t *ent; /* Cache entry index */ + H5D_chk_idx_info_t idx_info; /* Chunked index info */ + H5D_chunk_info_iter_ud_t udata; /* User data for callback */ + H5D_chunk_ud_t udata2; + hsize_t scaled[H5S_MAX_RANK]; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr) + + /* Check args */ + HDassert(dset); + HDassert(dset->shared); + HDassert(offset); + + /* Get dataset layout and raw data chunk cache */ + layout = &(dset->shared->layout); + rdcc = &(dset->shared->cache.chunk); + HDassert(layout); + HDassert(rdcc); + HDassert(H5D_CHUNKED == layout->type); + +/* Is this expensive? */ + /* Search for cached chunks that haven't been written out */ + for(ent = rdcc->head; ent; ent = ent->next) + /* Flush the chunk out to disk, to make certain the size is correct later */ + if(H5D__chunk_flush_entry(dset, ent, FALSE) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "cannot flush indexed storage buffer") + + /* Set addr & size for when dset is not written or queried chunk is not found */ + if (addr) + *addr = HADDR_UNDEF; + if (size) + *size = 0; + + /* Compose chunked index info struct */ + idx_info.f = dset->oloc.file; + idx_info.pline = &dset->shared->dcpl_cache.pline; + idx_info.layout = &dset->shared->layout.u.chunk; + idx_info.storage = &dset->shared->layout.storage.u.chunk; + + /* If the dataset is not written, return without errors */ + if(H5F_addr_defined(idx_info.storage->idx_addr)) { + /* Calculate the scaled of this chunk */ + H5VM_chunk_scaled(dset->shared->ndims, offset, layout->u.chunk.dim, udata.scaled); + udata.scaled[dset->shared->ndims] = 0; + + /* Initialize before iteration */ + udata.ndims = dset->shared->ndims; + udata.nbytes = 0; + udata.filter_mask = 0; + udata.chunk_addr = HADDR_UNDEF; + udata.found = FALSE; + + /* Iterate over the allocated chunks to find the requested chunk */ + if((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D__get_chunk_info_by_coord_cb, &udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve information of the chunk by its scaled coordinates") + + /* Obtain requested info if the chunk is found */ + if (udata.found) { + if(filter_mask) + *filter_mask = udata.filter_mask; + if(addr) + *addr = udata.chunk_addr; + if(size) + *size = udata.nbytes; + } + } /* end if H5F_addr_defined */ + +done: + FUNC_LEAVE_NOAPI_TAG(ret_value) +} /* end H5D__get_chunk_info_by_coord() */ + |