From 509a068f628826b8d1d09b7ab267400f4cec0852 Mon Sep 17 00:00:00 2001 From: Gaute Hope Date: Thu, 20 May 2021 00:21:52 +0200 Subject: Add H5Dchunk_iter method for iterating over chunks (#6) * Add H5Dchunk_iter method for iterating over chunks This method iterates over all chunks in dataset, calling a user-supplied callback with the chunk information and optional user supplied data. The iterator is stopped when ITER_STOP is returned by the user-supplied callback or the iterator is exhausted. Existing methods to get chunk_info performs an iteration each time, so to get many or all chunks causes SUM(i) for i = 0 -> N operations for N chunks, as opposed to N operations when using this iterator for this use case. * H5Dchunk_iter: test iterating all chunks, some chunks and failing iteration. * H5D: move H5Dchunk_iter private methods to specific * trace: add H5D_chunk_iter_op_t and trace H5D.c * chunks-iter: document chunk_iter * chunk-iter: chunk add FUNC_ENTER/FUNC_LEAVE macros * Committing clang-format changes Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- bin/trace | 1 + src/H5D.c | 65 +++++++++++++++++++++++++++ src/H5Dchunk.c | 98 ++++++++++++++++++++++++++++++++++++++++ src/H5Dpkg.h | 1 + src/H5Dpublic.h | 53 ++++++++++++++++++++++ src/H5VLconnector.h | 3 +- src/H5VLnative_dataset.c | 18 ++++++++ test/chunk_info.c | 113 +++++++++++++++++++++++++++++++++++++++-------- 8 files changed, 333 insertions(+), 19 deletions(-) diff --git a/bin/trace b/bin/trace index 0611fa5..a2052ce 100755 --- a/bin/trace +++ b/bin/trace @@ -53,6 +53,7 @@ $Source = ""; "H5D_scatter_func_t" => "DS", "H5FD_mpio_xfer_t" => "Dt", "H5D_vds_view_t" => "Dv", + "H5D_chunk_iter_op_t" => "x", "herr_t" => "e", "H5E_auto1_t" => "Ea", "H5E_auto2_t" => "EA", diff --git a/src/H5D.c b/src/H5D.c index 2f0daaf..aa09f5f 100644 --- a/src/H5D.c +++ b/src/H5D.c @@ -2142,3 +2142,68 @@ H5Dget_chunk_info_by_coord(hid_t dset_id, const hsize_t *offset, unsigned *filte done: FUNC_LEAVE_API(ret_value) } /* end H5Dget_chunk_info_by_coord() */ + +/*------------------------------------------------------------------------- + * Function: H5Dchunk_iter + * + * Purpose: Iterates over all chunks in dataset with given callback and user data. + * + * Parameters: + * hid_t dset_id; IN: Chunked dataset ID + * H5D_chunk_iter_op_t cb IN: User callback function, called for every chunk. + * void *op_data IN/OUT: Optional user data passed on to user callback. + * + * Callback information: + * H5D_chunk_iter_op_t is defined as: + * + * typedef int (*H5D_chunk_iter_op_t)( + * const hsize_t *offset, + * uint32_t filter_mask, + * haddr_t addr, + * uint32_t nbytes, + * void *op_data); + * + * H5D_chunk_iter_op_t parameters: + * hsize_t *offset; IN/OUT: Array of starting logical coordinates of chunk. + * uint32_t filter_mask; IN: Filter mask of chunk. + * haddr_t addr; IN: Offset in file of chunk data. + * uint32_t nbytes; IN: Size in number of bytes of chunk data in file. + * void *op_data; IN/OUT: Pointer to any user-defined data + * associated with the operation. + * + * The return values from an operator are: + * Zero (H5_ITER_CONT) causes the iterator to continue, returning zero when all + * elements have been processed. + * Positive (H5_ITER_STOP) causes the iterator to immediately return that positive + * value, indicating short-circuit success. + * Negative (H5_ITER_ERROR) causes the iterator to immediately return that value, + * indicating failure. + * + * Return: Non-negative on success, negative on failure + * + * Programmer: Gaute Hope + * August 2020 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Dchunk_iter(hid_t dset_id, H5D_chunk_iter_op_t cb, void *op_data) +{ + H5VL_object_t *vol_obj = NULL; /* Dataset for this operation */ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_API(FAIL) + H5TRACE3("e", "ix*x", dset_id, cb, op_data); + + /* Check arguments */ + if (NULL == (vol_obj = (H5VL_object_t *)H5I_object_verify(dset_id, H5I_DATASET))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "invalid dataset identifier") + + /* Call private function to get the chunk info given the chunk's index */ + if (H5VL_dataset_specific(vol_obj, H5VL_DATASET_CHUNK_ITER, H5P_DATASET_XFER_DEFAULT, H5_REQUEST_NULL, cb, + op_data) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "Can't iterate over chunks") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5Dchunk_iter() */ diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 25fb2f8..6be29fd 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -245,6 +245,11 @@ typedef struct H5D_chunk_coll_info_t { } H5D_chunk_coll_info_t; #endif /* H5_HAVE_PARALLEL */ +typedef struct H5D_chunk_iter_cb_data_t { + H5D_chunk_iter_op_t cb; /* User defined callback */ + void * op_data; /* User data for user defined callback */ +} H5D_chunk_iter_cb_data_t; + /********************/ /* Local Prototypes */ /********************/ @@ -269,6 +274,7 @@ static herr_t H5D__chunk_dest(H5D_t *dset); static int H5D__get_num_chunks_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata); static int H5D__get_chunk_info_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata); static int H5D__get_chunk_info_by_coord_cb(const H5D_chunk_rec_t *chunk_rec, void *_udata); +static int H5D__chunk_iter_cb(const H5D_chunk_rec_t *chunk_rec, void *udata); /* "Nonexistent" layout operation callback */ static ssize_t H5D__nonexistent_readvv(const H5D_io_info_t *io_info, size_t chunk_max_nseq, @@ -7451,3 +7457,95 @@ H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *offset, unsigned done: FUNC_LEAVE_NOAPI_TAG(ret_value) } /* end H5D__get_chunk_info_by_coord() */ + +/*------------------------------------------------------------------------- + * Function: H5D__chunk_iter + * + * Purpose: Iterate over all the chunks in the dataset with given callbak. + * + * Return: Success: Non-negative + * Failure: Negative + * + * Programmer: Gaute Hope + * August 2020 + * + *------------------------------------------------------------------------- + */ +herr_t +H5D__chunk_iter(const H5D_t *dset, H5D_chunk_iter_op_t cb, void *op_data) +{ + const H5O_layout_t *layout = NULL; /* Dataset layout */ + const H5D_rdcc_t * rdcc = NULL; /* Raw data chunk cache */ + H5D_rdcc_ent_t * ent; /* Cache entry index */ + H5D_chk_idx_info_t idx_info; /* Chunked index info */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr) + + /* Check args */ + HDassert(dset); + HDassert(dset->shared); + + /* Get dataset layout and raw data chunk cache */ + layout = &(dset->shared->layout); + rdcc = &(dset->shared->cache.chunk); + HDassert(layout); + HDassert(rdcc); + HDassert(H5D_CHUNKED == layout->type); + + /* Search for cached chunks that haven't been written out */ + for (ent = rdcc->head; ent; ent = ent->next) + /* Flush the chunk out to disk, to make certain the size is correct later */ + if (H5D__chunk_flush_entry(dset, ent, FALSE) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "cannot flush indexed storage buffer") + + /* Compose chunked index info struct */ + idx_info.f = dset->oloc.file; + idx_info.pline = &dset->shared->dcpl_cache.pline; + idx_info.layout = &dset->shared->layout.u.chunk; + idx_info.storage = &dset->shared->layout.storage.u.chunk; + + /* If the dataset is not written, return without errors */ + if (H5F_addr_defined(idx_info.storage->idx_addr)) { + H5D_chunk_iter_cb_data_t data; + data.cb = cb; + data.op_data = op_data; + + /* Iterate over the allocated chunks calling the iterator callback */ + if ((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D__chunk_iter_cb, &data) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to iterate over chunks.") + } /* end if H5F_addr_defined */ + +done: + FUNC_LEAVE_NOAPI_TAG(ret_value) +} /* end H5D__chunk_iter() */ + +/*------------------------------------------------------------------------- + * Function: H5D__chunk_iter_cb + * + * Purpose: Call the user-defined function with the chunk data. The iterator continues if + * the user-defined function returns H5_ITER_CONT, and stops if H5_ITER_STOP is + * returned. + * + * Return: Success: H5_ITER_CONT or H5_ITER_STOP + * Failure: Negative (H5_ITER_ERROR) + * + * Programmer: Gaute Hope + * August 2020 + * + *------------------------------------------------------------------------- + */ +static int +H5D__chunk_iter_cb(const H5D_chunk_rec_t *chunk_rec, void *udata) +{ + int ret_value = 0; + + FUNC_ENTER_STATIC_NOERR + + const H5D_chunk_iter_cb_data_t *data = (H5D_chunk_iter_cb_data_t *)udata; + + ret_value = (data->cb)(chunk_rec->scaled, chunk_rec->filter_mask, chunk_rec->chunk_addr, + chunk_rec->nbytes, data->op_data); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__chunk_iter_cb */ diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index 64f2c7a..28561c9 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -565,6 +565,7 @@ H5_DLL herr_t H5D__get_chunk_info(const H5D_t *dset, const H5S_t *space, hsize_ unsigned *filter_mask, haddr_t *offset, hsize_t *size); H5_DLL herr_t H5D__get_chunk_info_by_coord(const H5D_t *dset, const hsize_t *coord, unsigned *filter_mask, haddr_t *addr, hsize_t *size); +H5_DLL herr_t H5D__chunk_iter(const H5D_t *dset, H5D_chunk_iter_op_t cb, void *op_data); H5_DLL haddr_t H5D__get_offset(const H5D_t *dset); H5_DLL herr_t H5D__vlen_get_buf_size(H5D_t *dset, hid_t type_id, hid_t space_id, hsize_t *size); H5_DLL herr_t H5D__vlen_get_buf_size_gen(H5VL_object_t *vol_obj, hid_t type_id, hid_t space_id, diff --git a/src/H5Dpublic.h b/src/H5Dpublic.h index e48b727..0b5fac6 100644 --- a/src/H5Dpublic.h +++ b/src/H5Dpublic.h @@ -160,6 +160,14 @@ typedef herr_t (*H5D_scatter_func_t)(const void **src_buf /*out*/, size_t *src_b typedef herr_t (*H5D_gather_func_t)(const void *dst_buf, size_t dst_buf_bytes_used, void *op_data); //! +//! +/** + * Define the operator function pointer for H5Dchunk_iter() + */ +//! +typedef int (*H5D_chunk_iter_op_t)(const hsize_t *offset, uint32_t filter_mask, haddr_t addr, uint32_t nbytes, + void *op_data); + /********************/ /* Public Variables */ /********************/ @@ -630,6 +638,51 @@ H5_DLL herr_t H5Dget_chunk_info_by_coord(hid_t dset_id, const hsize_t *offset, u * -------------------------------------------------------------------------- * \ingroup H5D * + * \brief Iterate over all chunks + * + * \dset_id + * \param[in] cb User callback function, called for every chunk. + * \param[in] op_data User-defined pointer to data required by op + * + * \return \herr_t + * + * \details H5Dget_chunk_iter iterates over all chunks in the dataset, calling the + * user supplied callback with the details of the chunk and the supplied + * \p op_data. + * + * Callback information: + * H5D_chunk_iter_op_t is defined as: + * + * typedef int (*H5D_chunk_iter_op_t)( + * const hsize_t *offset, + * uint32_t filter_mask, + * haddr_t addr, + * uint32_t nbytes, + * void *op_data); + * + * H5D_chunk_iter_op_t parameters: + * hsize_t *offset; IN/OUT: Array of starting logical coordinates of chunk. + * uint32_t filter_mask; IN: Filter mask of chunk. + * haddr_t addr; IN: Offset in file of chunk data. + * uint32_t nbytes; IN: Size in number of bytes of chunk data in file. + * void *op_data; IN/OUT: Pointer to any user-defined data + * associated with the operation. + * + * The return values from an operator are: + * Zero (H5_ITER_CONT) causes the iterator to continue, returning zero when all + * elements have been processed. + * Positive (H5_ITER_STOP) causes the iterator to immediately return that positive + * value, indicating short-circuit success. + * Negative (H5_ITER_ERROR) causes the iterator to immediately return that value, + * indicating failure. + * + */ +H5_DLL herr_t H5Dchunk_iter(hid_t dset_id, H5D_chunk_iter_op_t cb, void *op_data); + +/** + * -------------------------------------------------------------------------- + * \ingroup H5D + * * \brief Retrieves information about a chunk specified by its index * * \dset_id diff --git a/src/H5VLconnector.h b/src/H5VLconnector.h index 2a8ef6e..63613bc 100644 --- a/src/H5VLconnector.h +++ b/src/H5VLconnector.h @@ -82,7 +82,8 @@ typedef enum H5VL_dataset_specific_t { H5VL_DATASET_SET_EXTENT, /* H5Dset_extent */ H5VL_DATASET_FLUSH, /* H5Dflush */ H5VL_DATASET_REFRESH, /* H5Drefresh */ - H5VL_DATASET_WAIT /* H5Dwait */ + H5VL_DATASET_WAIT, /* H5Dwait */ + H5VL_DATASET_CHUNK_ITER /* H5Dchunk_iter */ } H5VL_dataset_specific_t; /* Typedef for VOL connector dataset optional VOL operations */ diff --git a/src/H5VLnative_dataset.c b/src/H5VLnative_dataset.c index d78388e..21491e7 100644 --- a/src/H5VLnative_dataset.c +++ b/src/H5VLnative_dataset.c @@ -353,6 +353,24 @@ H5VL__native_dataset_specific(void *obj, H5VL_dataset_specific_t specific_type, break; } + case H5VL_DATASET_CHUNK_ITER: { /* H5Dchunk_iter */ + H5D_chunk_iter_op_t cb = HDva_arg(arguments, H5D_chunk_iter_op_t); + void * op_data = HDva_arg(arguments, void *); + + HDassert(dset->shared); + + /* Make sure the dataset is chunked */ + if (H5D_CHUNKED != dset->shared->layout.type) { + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a chunked dataset") + } + + /* Call private function */ + if (H5D__chunk_iter(dset, cb, op_data) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't iterate over chunks") + + break; + } + default: HGOTO_ERROR(H5E_VOL, H5E_UNSUPPORTED, FAIL, "invalid specific operation") } /* end switch */ diff --git a/test/chunk_info.c b/test/chunk_info.c index ada2d80..c84f500 100644 --- a/test/chunk_info.c +++ b/test/chunk_info.c @@ -1482,6 +1482,50 @@ error: return FAIL; } /* test_chunk_info_version2_btrees() */ +typedef struct chunk_iter_info_t { + hsize_t offset[2]; + uint32_t filter_mask; + haddr_t addr; + uint32_t nbytes; +} chunk_iter_info_t; + +static int +iter_cb(const hsize_t *offset, uint32_t filter_mask, haddr_t addr, uint32_t nbytes, void *op_data) +{ + chunk_iter_info_t **chunk_info = (chunk_iter_info_t **)op_data; + + (*chunk_info)->offset[0] = offset[0]; + (*chunk_info)->offset[1] = offset[1]; + (*chunk_info)->filter_mask = filter_mask; + (*chunk_info)->addr = addr; + (*chunk_info)->nbytes = nbytes; + + /* printf("offset: [%lld, %lld], addr: %ld, size: %d, filter mask: %d\n", offset[0], offset[1], addr, + * nbytes, filter_mask); */ + + *chunk_info += 1; + + return H5_ITER_CONT; +} + +static int +iter_cb_stop(const hsize_t H5_ATTR_UNUSED *offset, uint32_t H5_ATTR_UNUSED filter_mask, + haddr_t H5_ATTR_UNUSED addr, uint32_t H5_ATTR_UNUSED nbytes, void *op_data) +{ + chunk_iter_info_t **chunk_info = (chunk_iter_info_t **)op_data; + *chunk_info += 1; + return H5_ITER_STOP; +} + +static int +iter_cb_fail(const hsize_t H5_ATTR_UNUSED *offset, uint32_t H5_ATTR_UNUSED filter_mask, + haddr_t H5_ATTR_UNUSED addr, uint32_t H5_ATTR_UNUSED nbytes, void *op_data) +{ + chunk_iter_info_t **chunk_info = (chunk_iter_info_t **)op_data; + *chunk_info += 1; + return H5_ITER_ERROR; +} + /*------------------------------------------------------------------------- * Function: test_basic_query * @@ -1502,24 +1546,26 @@ error: static herr_t test_basic_query(hid_t fapl) { - char filename[FILENAME_BUF_SIZE]; /* File name */ - hid_t basicfile = H5I_INVALID_HID; /* File ID */ - hid_t dspace = H5I_INVALID_HID; /* Dataspace ID */ - hid_t dset = H5I_INVALID_HID; /* Dataset ID */ - hid_t cparms = H5I_INVALID_HID; /* Creation plist */ - hsize_t dims[2] = {NX, NY}; /* Dataset dimensions */ - hsize_t chunk_dims[2] = {CHUNK_NX, CHUNK_NY}; /* Chunk dimensions */ - int direct_buf[CHUNK_NX][CHUNK_NY]; /* Data in chunks */ - unsigned flt_msk = 0; /* Filter mask */ - unsigned read_flt_msk = 0; /* Filter mask after direct read */ - hsize_t offset[2]; /* Offset coordinates of a chunk */ - hsize_t out_offset[2] = {0, 0}; /* Buffer to get offset coordinates */ - hsize_t size = 0; /* Size of an allocated/written chunk */ - hsize_t nchunks = 0; /* Number of chunks */ - haddr_t addr = 0; /* Address of an allocated/written chunk */ - hsize_t chk_index = 0; /* Index of a chunk */ - hsize_t ii, jj; /* Array indices */ - herr_t ret; /* Temporary returned value for verifying failure */ + char filename[FILENAME_BUF_SIZE]; /* File name */ + hid_t basicfile = H5I_INVALID_HID; /* File ID */ + hid_t dspace = H5I_INVALID_HID; /* Dataspace ID */ + hid_t dset = H5I_INVALID_HID; /* Dataset ID */ + hid_t cparms = H5I_INVALID_HID; /* Creation plist */ + hsize_t dims[2] = {NX, NY}; /* Dataset dimensions */ + hsize_t chunk_dims[2] = {CHUNK_NX, CHUNK_NY}; /* Chunk dimensions */ + int direct_buf[CHUNK_NX][CHUNK_NY]; /* Data in chunks */ + unsigned flt_msk = 0; /* Filter mask */ + unsigned read_flt_msk = 0; /* Filter mask after direct read */ + hsize_t offset[2]; /* Offset coordinates of a chunk */ + hsize_t out_offset[2] = {0, 0}; /* Buffer to get offset coordinates */ + hsize_t size = 0; /* Size of an allocated/written chunk */ + hsize_t nchunks = 0; /* Number of chunks */ + haddr_t addr = 0; /* Address of an allocated/written chunk */ + hsize_t chk_index = 0; /* Index of a chunk */ + hsize_t ii, jj; /* Array indices */ + chunk_iter_info_t chunk_infos[2]; /* chunk infos filled up by iterator */ + chunk_iter_info_t *cptr; /* pointer to array of chunks */ + herr_t ret; /* Temporary returned value for verifying failure */ TESTING("basic operations"); @@ -1627,6 +1673,37 @@ test_basic_query(hid_t fapl) if (verify_empty_chunk_info(dset, offset) == FAIL) FAIL_PUTS_ERROR("Verification of H5Dget_chunk_info_by_coord on empty chunk failed\n"); + /* iterate over all chunks */ + cptr = &(chunk_infos[0]); + if (H5Dchunk_iter(dset, &iter_cb, &cptr) < 0) + TEST_ERROR; + + VERIFY(cptr, &(chunk_infos[2]), "Iterator did not iterate all chunks"); + VERIFY(chunk_infos[0].offset[0], 0, "Offset mismatch"); + VERIFY(chunk_infos[0].offset[1], 0, "Offset mismatch"); + VERIFY(chunk_infos[0].filter_mask, 0, "Filter mismatch"); + VERIFY(chunk_infos[0].nbytes, 96, "Size mismatch"); + + VERIFY(chunk_infos[1].offset[0], 1, "Offset mismatch"); + VERIFY(chunk_infos[1].offset[1], 1, "Offset mismatch"); + + /* iterate and stop after one iteration */ + cptr = &(chunk_infos[0]); + if (H5Dchunk_iter(dset, &iter_cb_stop, &cptr) < 0) + TEST_ERROR; + VERIFY(cptr, &(chunk_infos[1]), "Verification of halted iterator failed\n"); + + /* iterate and fail after one iteration */ + cptr = &(chunk_infos[0]); + H5E_BEGIN_TRY + { + ret = H5Dchunk_iter(dset, &iter_cb_fail, &cptr); + } + H5E_END_TRY; + if (ret >= 0) + TEST_ERROR; + VERIFY(cptr, &(chunk_infos[1]), "Verification of halted iterator failed\n"); + /* Release resourse */ if (H5Dclose(dset) < 0) TEST_ERROR -- cgit v0.12