From 0bdedf0a39e859956b6810e848fa67d16b4e10a4 Mon Sep 17 00:00:00 2001 From: Neil Fortner Date: Thu, 12 Feb 2009 15:46:32 -0500 Subject: [svn-r16480] Purpose: Improve chunk cache Description: The meaning of the "nbytes" field in H5D_rdcc_t was not clear, and some places assumed it was the maximum size of the chunk cache, while some assumed it was the current size of the chunk cache. The end result was that only 1 chunk could be held in cache at a time. This field has been replaced by "nbytes_max" and "nbytes_used". Performance of cached I/O should improve greatly. Tested: jam, smirom (h5committest) --- release_docs/RELEASE.txt | 2 + src/H5D.c | 2 +- src/H5Dchunk.c | 28 ++++---- src/H5Dpkg.h | 4 +- src/H5Dtest.c | 46 ++++++++++++ test/tmisc.c | 184 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 250 insertions(+), 16 deletions(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 31b21f6..85b273d 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -142,6 +142,8 @@ Bug Fixes since HDF5-1.8.0 release Library ------- + - Fixed a bug that prevented more than one dataset chunk from being cached + at a time. (NAF - 2009/02/12) - Fixed an assertion failure caused by opening an attribute multiple times through multiple file handles. (NAF - 2009/02/12) - Fixed a problem that could prevent the user from adding attributes (or diff --git a/src/H5D.c b/src/H5D.c index 9c552c2..4dad2aa 100644 --- a/src/H5D.c +++ b/src/H5D.c @@ -712,7 +712,7 @@ H5Dget_access_plist(hid_t dset_id) if (dset->shared->layout.type == H5D_CHUNKED) { if (H5P_set(new_plist, H5D_ACS_DATA_CACHE_NUM_SLOTS_NAME, &(dset->shared->cache.chunk.nslots)) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set data cache number of slots") - if (H5P_set(new_plist, H5D_ACS_DATA_CACHE_BYTE_SIZE_NAME, &(dset->shared->cache.chunk.nbytes)) < 0) + if (H5P_set(new_plist, H5D_ACS_DATA_CACHE_BYTE_SIZE_NAME, &(dset->shared->cache.chunk.nbytes_max)) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set data cache byte size") if (H5P_set(new_plist, H5D_ACS_PREEMPT_READ_CHUNKS_NAME, &(dset->shared->cache.chunk.w0)) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set preempt read chunks") diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 1c27ede..a64f1ef 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -1396,7 +1396,7 @@ H5D_chunk_cacheable(const H5D_io_info_t *io_info) * cache, just write the data to it directly. */ H5_CHECK_OVERFLOW(dataset->shared->layout.u.chunk.size, uint32_t, size_t); - if((size_t)dataset->shared->layout.u.chunk.size > dataset->shared->cache.chunk.nbytes) + if((size_t)dataset->shared->layout.u.chunk.size > dataset->shared->cache.chunk.nbytes_max) ret_value = FALSE; else ret_value = TRUE; @@ -1848,19 +1848,19 @@ H5D_chunk_init(H5F_t *f, hid_t dapl_id, hid_t dxpl_id, const H5D_t *dset) if(rdcc->nslots == H5D_CHUNK_CACHE_NSLOTS_DEFAULT) rdcc->nslots = H5F_RDCC_NSLOTS(f); - if(H5P_get(dapl, H5D_ACS_DATA_CACHE_BYTE_SIZE_NAME, &rdcc->nbytes) < 0) + if(H5P_get(dapl, H5D_ACS_DATA_CACHE_BYTE_SIZE_NAME, &rdcc->nbytes_max) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET,FAIL, "can't get data cache byte size"); - if(rdcc->nbytes == H5D_CHUNK_CACHE_NBYTES_DEFAULT) - rdcc->nbytes = H5F_RDCC_NBYTES(f); + if(rdcc->nbytes_max == H5D_CHUNK_CACHE_NBYTES_DEFAULT) + rdcc->nbytes_max = H5F_RDCC_NBYTES(f); if(H5P_get(dapl, H5D_ACS_PREEMPT_READ_CHUNKS_NAME, &rdcc->w0) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET,FAIL, "can't get preempt read chunks"); if(rdcc->w0 < 0) rdcc->w0 = H5F_RDCC_W0(f); - /* If nbytes or nslots is 0, set them both to 0 and avoid allocating space */ - if(!rdcc->nbytes || !rdcc->nslots) - rdcc->nbytes = rdcc->nslots = 0; + /* If nbytes_max or nslots is 0, set them both to 0 and avoid allocating space */ + if(!rdcc->nbytes_max || !rdcc->nslots) + rdcc->nbytes_max = rdcc->nslots = 0; else { rdcc->slot = H5FL_SEQ_CALLOC(H5D_rdcc_ent_ptr_t, rdcc->nslots); if(NULL == rdcc->slot) @@ -2135,7 +2135,7 @@ H5D_chunk_get_info(const H5D_t *dset, hid_t dxpl_id, const hsize_t *chunk_offset /* The same condition check as H5D_chunk_cacheable. */ if(dset->shared->dcpl_cache.pline.nused || - ((size_t)dset->shared->layout.u.chunk.size <= dset->shared->cache.chunk.nbytes)) + ((size_t)dset->shared->layout.u.chunk.size <= dset->shared->cache.chunk.nbytes_max)) /* Cache the information retrieved */ H5D_chunk_cinfo_cache_update(&dset->shared->cache.chunk.last, udata); } /* end if */ @@ -2345,7 +2345,7 @@ H5D_chunk_cache_evict(const H5D_t *dset, hid_t dxpl_id, const H5D_dxpl_cache_t * /* Remove from cache */ rdcc->slot[ent->idx] = NULL; ent->idx = UINT_MAX; - rdcc->nbytes -= ent->chunk_size; + rdcc->nbytes_used -= ent->chunk_size; --rdcc->nused; /* Free */ @@ -2375,7 +2375,7 @@ H5D_chunk_cache_prune(const H5D_t *dset, hid_t dxpl_id, const H5D_dxpl_cache_t *dxpl_cache, size_t size) { const H5D_rdcc_t *rdcc = &(dset->shared->cache.chunk); - size_t total = rdcc->nbytes; + size_t total = rdcc->nbytes_max; const int nmeth = 2; /*number of methods */ int w[1]; /*weighting as an interval */ H5D_rdcc_ent_t *p[2], *cur; /*list pointers */ @@ -2399,7 +2399,7 @@ H5D_chunk_cache_prune(const H5D_t *dset, hid_t dxpl_id, p[0] = rdcc->head; p[1] = NULL; - while((p[0] || p[1]) && (rdcc->nbytes + size) > total) { + while((p[0] || p[1]) && (rdcc->nbytes_used + size) > total) { int i; /* Local index variable */ /* Introduce new pointers */ @@ -2412,7 +2412,7 @@ H5D_chunk_cache_prune(const H5D_t *dset, hid_t dxpl_id, n[i] = p[i] ? p[i]->next : NULL; /* Give each method a chance */ - for(i = 0; i < nmeth && (rdcc->nbytes + size) > total; i++) { + for(i = 0; i < nmeth && (rdcc->nbytes_used + size) > total; i++) { if(0 == i && p[0] && !p[0]->locked && ((0 == p[0]->rd_count && 0 == p[0]->wr_count) || (0 == p[0]->rd_count && p[0]->chunk_size == p[0]->wr_count) || @@ -2632,7 +2632,7 @@ H5D_chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata, } /* end else */ HDassert(found || chunk_size > 0); - if(!found && rdcc->nslots > 0 && chunk_size <= rdcc->nbytes && + if(!found && rdcc->nslots > 0 && chunk_size <= rdcc->nbytes_max && (!ent || !ent->locked)) { /* * Add the chunk to the cache only if the slot is not already locked. @@ -2662,7 +2662,7 @@ H5D_chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata, HDassert(NULL == rdcc->slot[idx]); rdcc->slot[idx] = ent; ent->idx = idx; - rdcc->nbytes += chunk_size; + rdcc->nbytes_used += chunk_size; rdcc->nused++; /* Add it to the linked list */ diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index e2600a3..3aa425b 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -365,11 +365,12 @@ typedef struct H5D_rdcc_t { unsigned nmisses;/* Number of cache misses */ unsigned nflushes;/* Number of cache flushes */ } stats; - size_t nbytes; /* Current cached raw data in bytes */ + size_t nbytes_max; /* Maximum cached raw data in bytes */ size_t nslots; /* Number of chunk slots allocated */ double w0; /* Chunk preemption policy */ struct H5D_rdcc_ent_t *head; /* Head of doubly linked list */ struct H5D_rdcc_ent_t *tail; /* Tail of doubly linked list */ + size_t nbytes_used; /* Current cached raw data in bytes */ int nused; /* Number of chunk slots in use */ H5D_chunk_cached_t last; /* Cached copy of last chunk information */ struct H5D_rdcc_ent_t **slot; /* Chunk slots, each points to a chunk*/ @@ -667,6 +668,7 @@ H5_DLL htri_t H5D_mpio_opt_possible(const H5D_io_info_t *io_info, #ifdef H5D_TESTING H5_DLL herr_t H5D_layout_version_test(hid_t did, unsigned *version); H5_DLL herr_t H5D_layout_contig_size_test(hid_t did, hsize_t *size); +H5_DLL herr_t H5D_current_cache_size_test(hid_t did, size_t *nbytes_used, int *nused); #endif /* H5D_TESTING */ #endif /*_H5Dpkg_H*/ diff --git a/src/H5Dtest.c b/src/H5Dtest.c index f7124f0..d9a418e 100644 --- a/src/H5Dtest.c +++ b/src/H5Dtest.c @@ -135,3 +135,49 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* H5D_layout_contig_size_test() */ + +/*-------------------------------------------------------------------------- + NAME + H5D_current_cache_size_test + PURPOSE + Determine current the size of the dataset's chunk cache + USAGE + herr_t H5D_layout_contig_size_test(did, size) + hid_t did; IN: Dataset to query + hsize_t *size; OUT: Pointer to location to place size info + RETURNS + Non-negative on success, negative on failure + DESCRIPTION + Checks the size of a contiguous dataset's storage. + GLOBAL VARIABLES + COMMENTS, BUGS, ASSUMPTIONS + DO NOT USE THIS FUNCTION FOR ANYTHING EXCEPT TESTING + EXAMPLES + REVISION LOG +--------------------------------------------------------------------------*/ +herr_t +H5D_current_cache_size_test(hid_t did, size_t *nbytes_used, int *nused) +{ + H5D_t *dset; /* Pointer to dataset to query */ + herr_t ret_value = SUCCEED; /* return value */ + + FUNC_ENTER_NOAPI(H5D_current_cache_size_test, FAIL) + + /* Check args */ + if(NULL == (dset = (H5D_t *)H5I_object_verify(did, H5I_DATASET))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset") + + if(nbytes_used) { + HDassert(dset->shared->layout.type == H5D_CHUNKED); + *nbytes_used = dset->shared->cache.chunk.nbytes_used; + } /* end if */ + + if(nused) { + HDassert(dset->shared->layout.type == H5D_CHUNKED); + *nused = dset->shared->cache.chunk.nused; + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5D_current_cache_size_test() */ + diff --git a/test/tmisc.c b/test/tmisc.c index 746af5d..875a92b 100644 --- a/test/tmisc.c +++ b/test/tmisc.c @@ -300,6 +300,11 @@ unsigned m13_rdata[MISC13_DIM1][MISC13_DIM2]; /* Data read from dataset #define MISC27_FILE "tbad_msg_count.h5" #define MISC27_GROUP "Group" +/* Definitions for misc. test #28 */ +#define MISC28_FILE "tmisc28.h5" +#define MISC28_SIZE 10 +#define MISC28_NSLOTS 10000 + /**************************************************************** ** ** test_misc1(): test unlinking a dataset from a group and immediately @@ -4854,6 +4859,183 @@ test_misc27(void) CHECK(ret, FAIL, "H5Fclose"); } /* end test_misc27() */ + +/**************************************************************** +** +** test_misc28(): Ensure that the dataset chunk cache will hold +** the correct number of chunks in cache without +** evicting them. +** +****************************************************************/ +static void +test_misc28(void) +{ + hid_t fid; /* File ID */ + hid_t sidf; /* File Dataspace ID */ + hid_t sidm; /* Memory Dataspace ID */ + hid_t did; /* Dataset ID */ + hid_t dcpl, fapl; /* Property List IDs */ + hsize_t dims[] = {MISC28_SIZE, MISC28_SIZE}; + hsize_t mdims[] = {MISC28_SIZE}; + hsize_t cdims[] = {1, 1}; + hsize_t start[] = {0,0}; + hsize_t count[] = {MISC28_SIZE, 1}; + size_t nbytes_used; + int nused; + char buf[10]; + int i; + herr_t ret; /* Generic return value */ + + /* Output message about test being performed */ + MESSAGE(5, ("Dataset chunk cache\n")); + + /* Create the fapl and set the cache size. Set nelmts to larger than the + * file size so we can be guaranteed that no chunks will be evicted due to + * a hash collision. Set nbytes to fit exactly 1 column of chunks (10 + * bytes). */ + fapl = H5Pcreate(H5P_FILE_ACCESS); + CHECK(fapl, FAIL, "H5Pcreate"); + ret = H5Pset_cache(fapl, MISC28_NSLOTS, MISC28_NSLOTS, MISC28_SIZE, 0.75); + CHECK(ret, FAIL, "H5Pset_cache"); + + /* Create the dcpl and set the chunk size */ + dcpl = H5Pcreate(H5P_DATASET_CREATE); + CHECK(dcpl, FAIL, "H5Pcreate"); + ret = H5Pset_chunk(dcpl, 2, cdims); + CHECK(ret, FAIL, "H5Pset_chunk"); + + + /* Create a new file and datasets within that file that use these + * property lists + */ + fid = H5Fcreate(MISC28_FILE, H5F_ACC_TRUNC, H5P_DEFAULT, fapl); + CHECK(fid, FAIL, "H5Fcreate"); + + sidf = H5Screate_simple(2, dims, NULL); + CHECK(sidf, FAIL, "H5Screate_simple"); + + did = H5Dcreate2(fid, "dataset", H5T_NATIVE_CHAR, sidf, H5P_DEFAULT, dcpl, H5P_DEFAULT); + CHECK(did, FAIL, "H5Dcreate2"); + + /* Verify that the chunk cache is empty */ + ret = H5D_current_cache_size_test(did, &nbytes_used, &nused); + CHECK(ret, FAIL, "H5D_current_cache_size_test"); + VERIFY(nbytes_used, (size_t) 0, "H5D_current_cache_size_test"); + VERIFY(nused, 0, "H5D_current_cache_size_test"); + + /* Initialize write buffer */ + for(i=0; i