summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5Dchunk.c186
-rw-r--r--src/H5Dmpio.c4
-rw-r--r--src/H5Dpkg.h2
-rw-r--r--test/dsets.c135
4 files changed, 309 insertions, 18 deletions
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index 7627cec..1c27ede 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -172,6 +172,12 @@ static ssize_t H5D_null_readvv(const H5D_io_info_t *io_info,
size_t dset_max_nseq, size_t *dset_curr_seq, size_t dset_len_arr[], hsize_t dset_offset_arr[],
size_t mem_max_nseq, size_t *mem_curr_seq, size_t mem_len_arr[], hsize_t mem_offset_arr[]);
+/* "Unexisted" layout operation callback */
+static ssize_t
+H5D_unexisted_readvv(const H5D_io_info_t *io_info,
+ size_t chunk_max_nseq, size_t *chunk_curr_seq, size_t chunk_len_arr[], hsize_t chunk_offset_arr[],
+ size_t mem_max_nseq, size_t *mem_curr_seq, size_t mem_len_arr[], hsize_t mem_offset_arr[]);
+
/* Helper routines */
static void *H5D_chunk_alloc(size_t size, const H5O_pline_t *pline);
static void *H5D_chunk_xfree(void *chk, const H5O_pline_t *pline);
@@ -229,6 +235,22 @@ const H5D_layout_ops_t H5D_LOPS_NULL[1] = {{
NULL
}};
+/* "unexisted" storage layout I/O ops */
+const H5D_layout_ops_t H5D_LOPS_UNEXISTED[1] = {{
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+#ifdef H5_HAVE_PARALLEL
+ NULL,
+ NULL,
+#endif /* H5_HAVE_PARALLEL */
+ H5D_unexisted_readvv,
+ NULL,
+ NULL
+}};
+
/* Declare a free list to manage the H5F_rdcc_ent_ptr_t sequence information */
H5FL_SEQ_DEFINE_STATIC(H5D_rdcc_ent_ptr_t);
@@ -1335,10 +1357,17 @@ done:
* Programmer: Raymond Lu
* 17 July 2007
*
+ * Modification:Raymond Lu
+ * 4 Feb 2009
+ * Took out the parameter chunk address. This function doesn't
+ * need to check the address. One case that was considered
+ * cacheable was when the chunk is bigger than the cache size
+ * but not allocated on disk. I moved it to uncacheable to
+ * bypass the cache to improve performance.
*-------------------------------------------------------------------------
*/
hbool_t
-H5D_chunk_cacheable(const H5D_io_info_t *io_info, haddr_t caddr)
+H5D_chunk_cacheable(const H5D_io_info_t *io_info)
{
const H5D_t *dataset = io_info->dset;
hbool_t ret_value;
@@ -1367,8 +1396,7 @@ H5D_chunk_cacheable(const H5D_io_info_t *io_info, haddr_t caddr)
* cache, just write the data to it directly.
*/
H5_CHECK_OVERFLOW(dataset->shared->layout.u.chunk.size, uint32_t, size_t);
- if((size_t)dataset->shared->layout.u.chunk.size > dataset->shared->cache.chunk.nbytes
- && H5F_addr_defined(caddr))
+ if((size_t)dataset->shared->layout.u.chunk.size > dataset->shared->cache.chunk.nbytes)
ret_value = FALSE;
else
ret_value = TRUE;
@@ -1447,6 +1475,7 @@ H5D_chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
{
H5SL_node_t *chunk_node; /* Current node in chunk skip list */
H5D_io_info_t nul_io_info; /* "null" I/O info object */
+ H5D_io_info_t unexist_io_info; /* "unexisted" I/O info object */
H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */
H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */
H5D_io_info_t cpt_io_info; /* Compact I/O info object */
@@ -1469,6 +1498,10 @@ H5D_chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
HDmemcpy(&nul_io_info, io_info, sizeof(nul_io_info));
nul_io_info.layout_ops = *H5D_LOPS_NULL;
+ /* Set up "unexisted" I/O info object */
+ HDmemcpy(&unexist_io_info, io_info, sizeof(unexist_io_info));
+ unexist_io_info.layout_ops = *H5D_LOPS_UNEXISTED;
+
/* Set up contiguous I/O info object */
HDmemcpy(&ctg_io_info, io_info, sizeof(ctg_io_info));
ctg_io_info.store = &ctg_store;
@@ -1527,7 +1560,7 @@ H5D_chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
} /* end if */
else {
/* Load the chunk into cache and lock it. */
- if(H5D_chunk_cacheable(io_info, udata.addr)) {
+ if(H5D_chunk_cacheable(io_info)) {
/* Pass in chunk's coordinates in a union. */
io_info->store->chunk.offset = chunk_info->coords;
io_info->store->chunk.index = chunk_info->index;
@@ -1545,10 +1578,7 @@ H5D_chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
/* Point I/O info at contiguous I/O info for this chunk */
chk_io_info = &cpt_io_info;
} /* end if */
- else {
- /* Sanity check */
- HDassert(H5F_addr_defined(udata.addr));
-
+ else if(H5F_addr_defined(udata.addr)) {
/* Set up the storage address information for this chunk */
ctg_store.contig.dset_addr = udata.addr;
@@ -1557,7 +1587,14 @@ H5D_chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
/* Point I/O info at temporary I/O info for this chunk */
chk_io_info = &ctg_io_info;
- } /* end else */
+ } /* end else if */
+ else {
+ /* No chunk cached */
+ chunk = NULL;
+
+ /* Point I/O info at "unexist" I/O info for this chunk */
+ chk_io_info = &unexist_io_info;
+ }
} /* end else */
/* Perform the actual read operation */
@@ -1588,6 +1625,12 @@ done:
* Programmer: Raymond Lu
* Thursday, April 10, 2003
*
+ * Modification:Raymond Lu
+ * 4 Feb 2009
+ * One case that was considered cacheable was when the chunk
+ * was bigger than the cache size but not allocated on disk.
+ * I moved it to uncacheable branch to bypass the cache to
+ * improve performance.
*-------------------------------------------------------------------------
*/
static herr_t
@@ -1644,7 +1687,7 @@ H5D_chunk_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
* simply allocate space instead of load the chunk. */
if(H5D_chunk_get_info(io_info->dset, io_info->dxpl_id, chunk_info->coords, &udata) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address")
- if(H5D_chunk_cacheable(io_info, udata.addr)) {
+ if(H5D_chunk_cacheable(io_info)) {
hbool_t entire_chunk = TRUE; /* Whether whole chunk is selected */
/* Pass in chunk's coordinates in a union. */
@@ -1670,9 +1713,26 @@ H5D_chunk_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
chk_io_info = &cpt_io_info;
} /* end if */
else {
- /* Sanity check */
- HDassert(H5F_addr_defined(udata.addr));
-
+ /* If the chunk hasn't been allocated on disk, do so now. */
+ if(!H5F_addr_defined(udata.addr)) {
+ H5D_chk_idx_info_t idx_info; /* Chunked index info */
+
+ /* Compose chunked index info struct */
+ idx_info.f = io_info->dset->oloc.file;
+ idx_info.dxpl_id = io_info->dxpl_id;
+ idx_info.layout = &(io_info->dset->shared->layout);
+
+ /* Set up the size of chunk for user data */
+ udata.nbytes = io_info->dset->shared->layout.u.chunk.size;
+
+ /* Create the chunk */
+ if((io_info->dset->shared->layout.u.chunk.ops->insert)(&idx_info, &udata) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, "unable to insert/resize chunk")
+
+ /* Make sure the address of the chunk is returned. */
+ if(!H5F_addr_defined(udata.addr))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "chunk address isn't defined")
+ }
/* Set up the storage address information for this chunk */
ctg_store.contig.dset_addr = udata.addr;
@@ -2032,6 +2092,10 @@ done:
* Programmer: Albert Cheng
* June 27, 1998
*
+ * Modification:Raymond Lu
+ * 11 Feb 2009
+ * I added a condition check for H5D_chunk_cinfo_cache_update.
+ * It's the same as H5D_chunk_cacheable.
*-------------------------------------------------------------------------
*/
herr_t
@@ -2069,8 +2133,11 @@ H5D_chunk_get_info(const H5D_t *dset, hid_t dxpl_id, const hsize_t *chunk_offset
if((dset->shared->layout.u.chunk.ops->get_addr)(&idx_info, udata) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't query chunk address")
- /* Cache the information retrieved */
- H5D_chunk_cinfo_cache_update(&dset->shared->cache.chunk.last, udata);
+ /* The same condition check as H5D_chunk_cacheable. */
+ if(dset->shared->dcpl_cache.pline.nused ||
+ ((size_t)dset->shared->layout.u.chunk.size <= dset->shared->cache.chunk.nbytes))
+ /* Cache the information retrieved */
+ H5D_chunk_cinfo_cache_update(&dset->shared->cache.chunk.last, udata);
} /* end if */
done:
@@ -4546,3 +4613,92 @@ H5D_null_readvv(const H5D_io_info_t UNUSED *io_info,
FUNC_LEAVE_NOAPI(bytes_processed)
} /* H5D_null_readvv() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5D_unexisted_readvv
+ *
+ * Purpose: When the chunk doesn't exist on disk and the chunk is bigger
+ * than the cache size, performs fill value I/O operation on
+ * memory buffer, advancing through two I/O vectors, until one
+ * runs out.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Raymond Lu
+ * 6 Feb 2009
+ *
+ *-------------------------------------------------------------------------
+ */
+static ssize_t
+H5D_unexisted_readvv(const H5D_io_info_t *io_info,
+ size_t chunk_max_nseq, size_t *chunk_curr_seq, size_t chunk_len_arr[], hsize_t chunk_offset_arr[],
+ size_t mem_max_nseq, size_t *mem_curr_seq, size_t mem_len_arr[], hsize_t mem_offset_arr[])
+{
+ H5D_t *dset = io_info->dset; /* Local pointer to the dataset info */
+ unsigned char *buf;
+ const H5O_fill_t *fill = &(io_info->dset->shared->dcpl_cache.fill); /*Fill value info*/
+ H5D_fill_buf_info_t fb_info; /* Dataset's fill buffer info */
+ hbool_t fb_info_init = FALSE; /* Whether the fill value buffer has been initialized */
+ ssize_t ret_value; /* Return value */
+
+ size_t u, v; /* Local index variables */
+ size_t size; /* Size of sequence in bytes */
+ ssize_t bytes_processed = 0; /* Eventual return value */
+
+ FUNC_ENTER_NOAPI_NOINIT(H5D_unexisted_readvv)
+
+ /* Check args */
+ HDassert(chunk_len_arr);
+ HDassert(chunk_offset_arr);
+ HDassert(mem_len_arr);
+ HDassert(mem_offset_arr);
+
+ /* Work through all the sequences */
+ for(u = *mem_curr_seq, v = *chunk_curr_seq; u < mem_max_nseq && v < chunk_max_nseq; ) {
+ /* Choose smallest buffer to write */
+ if(chunk_len_arr[v] < mem_len_arr[u])
+ size = chunk_len_arr[v];
+ else
+ size = mem_len_arr[u];
+
+ /* Compute offset in memory */
+ buf = (unsigned char *)io_info->u.rbuf + mem_offset_arr[v];
+
+ /* Initialize the fill value buffer */
+ /* (use the compact dataset storage buffer as the fill value buffer) */
+ if(H5D_fill_init(&fb_info, buf, FALSE,
+ NULL, NULL, NULL, NULL,
+ &dset->shared->dcpl_cache.fill, dset->shared->type,
+ dset->shared->type_id, (size_t)0, mem_len_arr[u], io_info->dxpl_id) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize fill buffer info")
+
+ /* Check for VL datatype & non-default fill value.
+ * Fill the buffer with VL datatype fill values */
+ if(fb_info.has_vlen_fill_type && (H5D_fill_refill_vl(&fb_info, fb_info.elmts_per_buf,
+ io_info->dxpl_id) < 0))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTCONVERT, FAIL, "can't refill fill value buffer")
+
+ /* Update source information */
+ chunk_len_arr[v] -= size;
+ chunk_offset_arr[v] += size;
+ if(chunk_len_arr[v] == 0)
+ v++;
+
+ /* Update destination information */
+ mem_len_arr[u] -= size;
+ mem_offset_arr[u] += size;
+ if(mem_len_arr[u] == 0)
+ u++;
+
+ /* Increment number of bytes copied */
+ bytes_processed += (ssize_t)size;
+ } /* end for */
+
+ /* Update current sequence vectors */
+ *mem_curr_seq = u;
+ *chunk_curr_seq = v;
+
+ ret_value = bytes_processed;
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5D_unexisted_readvv() */
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 99ad001..19be413 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -1224,7 +1224,7 @@ if(H5DEBUG(D))
HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "couldn't get chunk info from skipped list")
/* Load the chunk into cache and lock it. */
- if(H5D_chunk_cacheable(io_info, udata.addr)) {
+ if(H5D_chunk_cacheable(io_info)) {
hbool_t entire_chunk = TRUE; /* Whether whole chunk is selected */
/* Compute # of bytes accessed in chunk */
@@ -1449,7 +1449,7 @@ if(H5DEBUG(D)) {
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O")
/* Load the chunk into cache and lock it. */
- if(H5D_chunk_cacheable(io_info, udata.addr)) {
+ if(H5D_chunk_cacheable(io_info)) {
hbool_t entire_chunk = TRUE; /* Whether whole chunk is selected */
/* Compute # of bytes accessed in chunk */
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index 4bf4e75..e2600a3 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -571,7 +571,7 @@ H5_DLL herr_t H5D_contig_copy(H5F_t *f_src, const H5O_layout_t *layout_src, H5F_
H5O_layout_t *layout_dst, H5T_t *src_dtype, H5O_copy_t *cpy_info, hid_t dxpl_id);
/* Functions that operate on chunked dataset storage */
-H5_DLL hbool_t H5D_chunk_cacheable(const H5D_io_info_t *io_info, haddr_t caddr);
+H5_DLL hbool_t H5D_chunk_cacheable(const H5D_io_info_t *io_info);
H5_DLL herr_t H5D_chunk_cinfo_cache_reset(H5D_chunk_cached_t *last);
H5_DLL herr_t H5D_chunk_create(H5D_t *dset /*in,out*/, hid_t dxpl_id);
H5_DLL herr_t H5D_chunk_init(H5F_t *f, hid_t dapl_id, hid_t dxpl_id, const H5D_t *dset);
diff --git a/test/dsets.c b/test/dsets.c
index e3bac54..575449a 100644
--- a/test/dsets.c
+++ b/test/dsets.c
@@ -44,6 +44,7 @@ const char *FILENAME[] = {
"random_chunks",
"huge_chunks",
"chunk_cache",
+ "big_chunk",
NULL
};
#define FILENAME_BUF_SIZE 1024
@@ -163,6 +164,12 @@ const char *FILENAME[] = {
#define TOO_HUGE_CHUNK_DIM2_1 ((hsize_t)1024)
#define TOO_HUGE_CHUNK_DIM2_2 ((hsize_t)1024)
+/* Parameters for testing bypassing chunk cache */
+#define BYPASS_DATASET "Dset"
+#define BYPASS_DIM 1000
+#define BYPASS_CHUNK_DIM 500
+#define BYPASS_FILL_VALUE 7
+
/* Shared global arrays */
#define DSET_DIM1 100
#define DSET_DIM2 200
@@ -6683,6 +6690,133 @@ error:
/*-------------------------------------------------------------------------
+ * Function: test_big_chunks_bypass_cache
+ *
+ * Purpose: When the chunk size is bigger than the cache size and the
+ * chunk isn't on disk, this test verifies that the library
+ * bypasses the cache.
+ *
+ * Return: Success: 0
+ * Failure: -1
+ *
+ * Programmer: Raymond Lu
+ * 11 Feb 2009
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+test_big_chunks_bypass_cache(hid_t fapl)
+{
+ char filename[FILENAME_BUF_SIZE];
+ hid_t fid = -1; /* File ID */
+ hid_t dcpl = -1; /* Dataset creation property list ID */
+ hid_t sid = -1; /* Dataspace ID */
+ hid_t dsid = -1; /* Dataset ID */
+ hsize_t dim, chunk_dim; /* Dataset and chunk dimensions */
+ size_t rdcc_nelmts, rdcc_nbytes;
+ int fvalue = BYPASS_FILL_VALUE;
+ hsize_t count, stride, offset, block;
+ static int wdata[BYPASS_CHUNK_DIM], rdata[BYPASS_DIM];
+ int i, j;
+ herr_t ret; /* Generic return value */
+
+ TESTING("big chunks bypassing the cache");
+
+ h5_fixname(FILENAME[9], fapl, filename, sizeof filename);
+
+ /* Define cache size to be smaller than chunk size */
+ rdcc_nelmts = BYPASS_CHUNK_DIM/5;
+ rdcc_nbytes = sizeof(int)*BYPASS_CHUNK_DIM/5;
+ if(H5Pset_cache(fapl, 0, rdcc_nelmts, rdcc_nbytes, 0) < 0) FAIL_STACK_ERROR
+
+ /* Create file */
+ if((fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl)) < 0) FAIL_STACK_ERROR
+
+ /* Create 1-D dataspace */
+ dim = BYPASS_DIM;
+ if((sid = H5Screate_simple(1, &dim, NULL)) < 0) FAIL_STACK_ERROR
+
+ /* Create dataset creation property list */
+ if((dcpl = H5Pcreate(H5P_DATASET_CREATE)) < 0) FAIL_STACK_ERROR
+
+ /* Define chunk size. There will be only 2 chunks in the dataset. */
+ chunk_dim = BYPASS_CHUNK_DIM;
+ if(H5Pset_chunk(dcpl, 1, &chunk_dim) < 0) FAIL_STACK_ERROR
+
+ /* Define fill value, fill time, and chunk allocation time */
+ if(H5Pset_fill_value(dcpl, H5T_NATIVE_INT, &fvalue) < 0) FAIL_STACK_ERROR
+
+ if(H5Pset_fill_time(dcpl, H5D_FILL_TIME_IFSET) < 0) FAIL_STACK_ERROR
+
+ if(H5Pset_alloc_time(dcpl, H5D_ALLOC_TIME_INCR) < 0) FAIL_STACK_ERROR
+
+ /* Try to create dataset */
+ dsid = H5Dcreate2(fid, BYPASS_DATASET, H5T_NATIVE_INT, sid, H5P_DEFAULT, dcpl, H5P_DEFAULT);
+
+ /* Select first chunk to write the data */
+ offset = 0;
+ count = 1;
+ stride = 1;
+ block = BYPASS_CHUNK_DIM;
+ if(H5Sselect_hyperslab(sid, H5S_SELECT_SET, &offset, &stride, &count, &block) < 0)
+ FAIL_STACK_ERROR
+
+ for(i = 0; i < BYPASS_CHUNK_DIM; i++)
+ wdata[i] = i;
+
+ /* This write should bypass the cache because the chunk is bigger than the cache size
+ * and it's not allocated on disk. */
+ if(H5Dwrite(dsid, H5T_NATIVE_INT, H5S_ALL, sid, H5P_DEFAULT, wdata) < 0)
+ FAIL_STACK_ERROR
+
+ if(H5Dclose(dsid) < 0) FAIL_STACK_ERROR
+
+ /* Reopen the dataset */
+ if((dsid = H5Dopen(fid, BYPASS_DATASET, H5P_DEFAULT)) < 0) FAIL_STACK_ERROR
+
+ /* Reads both 2 chunks. Reading the second chunk should bypass the cache because the
+ * chunk is bigger than the cache size and it isn't allocated on disk. */
+ if(H5Dread(dsid, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, H5P_DEFAULT, rdata) < 0)
+ FAIL_STACK_ERROR
+
+ for(i = 0; i < BYPASS_CHUNK_DIM; i++)
+ if(rdata[i] != i) {
+ printf(" Read different values than written in the 1st chunk.\n");
+ printf(" At line %d and index %d, rdata = %d. It should be %d.\n", __LINE__,
+ i, rdata[i], i);
+ FAIL_STACK_ERROR
+ }
+
+ for(j = BYPASS_CHUNK_DIM; j < BYPASS_DIM; j++)
+ if(rdata[j] != fvalue) {
+ printf(" Read different values than written in the 2nd chunk.\n");
+ printf(" At line %d and index %d, rdata = %d. It should be %d.\n", __LINE__,
+ i, rdata[i], fvalue);
+ FAIL_STACK_ERROR
+ }
+
+ /* Close IDs */
+ if(H5Sclose(sid) < 0) FAIL_STACK_ERROR
+ if(H5Dclose(dsid) < 0) FAIL_STACK_ERROR
+ if(H5Pclose(dcpl) < 0) FAIL_STACK_ERROR
+ if(H5Fclose(fid) < 0) FAIL_STACK_ERROR
+
+ PASSED();
+ return 0;
+
+error:
+ H5E_BEGIN_TRY {
+ H5Pclose(dcpl);
+ H5Dclose(dsid);
+ H5Sclose(sid);
+ H5Fclose(fid);
+ } H5E_END_TRY;
+ return -1;
+} /* end test_huge_chunks() */
+
+
+
+/*-------------------------------------------------------------------------
* Function: main
*
* Purpose: Tests the dataset interface (H5D)
@@ -6803,6 +6937,7 @@ main(void)
#endif /* H5_NO_DEPRECATED_SYMBOLS */
nerrors += (test_huge_chunks(my_fapl) < 0 ? 1 : 0);
nerrors += (test_chunk_cache(my_fapl) < 0 ? 1 : 0);
+ nerrors += (test_big_chunks_bypass_cache(my_fapl) < 0 ? 1 : 0);
if(H5Fclose(file) < 0)
goto error;