From 58cc7fb19a902b4653bede798e633caaeb214528 Mon Sep 17 00:00:00 2001 From: Pedro Vicente Nunes Date: Wed, 16 May 2007 11:41:43 -0500 Subject: [svn-r13757] h5repack and h5diff hyperslab I/O changed the limit on which hyperslab I/O is done from 1GB to 128MB h5repack currently tests this feature by defining a dataset with dimensions of 128MB + 1byte (the datum being 1 byte integer), in which a 1Kb hyperslab was written. tested: linux there were some errors in running the tests, but these were in the library, due to a recent check-in that will be fixed shortly (at least we hope :-) ) --- tools/h5repack/h5repack_copy.c | 44 ++++++++++++++++++++++++-- tools/h5repack/h5repacktst.c | 70 +++++++++++++++--------------------------- tools/lib/h5diff_dset.c | 44 ++++++++++++++++++++++++++ tools/lib/h5tools.h | 2 +- 4 files changed, 111 insertions(+), 49 deletions(-) diff --git a/tools/h5repack/h5repack_copy.c b/tools/h5repack/h5repack_copy.c index 4253678..54e9dd2 100644 --- a/tools/h5repack/h5repack_copy.c +++ b/tools/h5repack/h5repack_copy.c @@ -155,7 +155,47 @@ out: * with the requested filter, use the input one * * October 2006: Read/write using the file type by default. - * Read/write by hyperslabs for big datasets. + * + * October 2006: Read by hyperslabs for big datasets. + * + * A threshold of H5TOOLS_MALLOCSIZE (128 MB) is the limit upon which I/O hyperslab is done + * i.e., if the memory needed to read a dataset is greater than this limit, + * then hyperslab I/O is done instead of one operation I/O + * For each dataset, the memory needed is calculated according to + * + * memory needed = number of elements * size of each element + * + * if the memory needed is lower than H5TOOLS_MALLOCSIZE, then the following operations + * are done + * + * H5Dread( input_dataset1 ) + * H5Dread( input_dataset2 ) + * + * with all elements in the datasets selected. If the memory needed is greater than + * H5TOOLS_MALLOCSIZE, then the following operations are done instead: + * + * a strip mine is defined for each dimension k (a strip mine is defined as a + * hyperslab whose size is memory manageable) according to the formula + * + * (1) strip_mine_size[k ] = MIN(dimension[k ], H5TOOLS_BUFSIZE / size of memory type) + * + * where H5TOOLS_BUFSIZE is a constant currently defined as 1MB. This formula assures + * that for small datasets (small relative to the H5TOOLS_BUFSIZE constant), the strip + * mine size k is simply defined as its dimension k, but for larger datasets the + * hyperslab size is still memory manageable. + * a cycle is done until the number of elements in the dataset is reached. In each + * iteration, two parameters are defined for the function H5Sselect_hyperslab, + * the start and size of each hyperslab, according to + * + * (2) hyperslab_size [k] = MIN(dimension[k] - hyperslab_offset[k], strip_mine_size [k]) + * + * where hyperslab_offset [k] is initially set to zero, and later incremented in + * hyperslab_size[k] offsets. The reason for the operation + * + * dimension[k] - hyperslab_offset[k] + * + * in (2) is that, when using the strip mine size, it assures that the "remaining" part + * of the dataset that does not fill an entire strip mine is processed. * * November 2006: Use H5Ocopy in the copy of objects. The logic for using * H5Ocopy or not is if a change of filters or layout is requested by the user @@ -273,7 +313,7 @@ int do_copy_objects(hid_t fidin, options->all_filter==1 || options->all_layout==1 || is_ref ) - { + { int j; if ((dset_in=H5Dopen(fidin,travt->objs[i].name))<0) diff --git a/tools/h5repack/h5repacktst.c b/tools/h5repack/h5repacktst.c index 38e050c..6d1feaf 100644 --- a/tools/h5repack/h5repacktst.c +++ b/tools/h5repack/h5repacktst.c @@ -104,7 +104,7 @@ int make_nbit(hid_t loc_id); int make_scaleoffset(hid_t loc_id); int make_all(hid_t loc_id); int make_fill(hid_t loc_id); -int make_big(hid_t loc_id, int set_chunk); +int make_big(hid_t loc_id); int make_testfiles(void); void write_dset_in(hid_t loc_id,const char* dset_name,hid_t file_id,int make_diffs ); void write_attr_in(hid_t loc_id,const char* dset_name,hid_t fid,int make_diffs ); @@ -1480,7 +1480,7 @@ int make_testfiles(void) */ if((loc_id = H5Fcreate(FNAME14,H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT))<0) return -1; - if (make_big(loc_id, 1)<0) + if (make_big(loc_id)<0) goto out; if(H5Fclose(loc_id)<0) return -1; @@ -2592,49 +2592,42 @@ out: /*------------------------------------------------------------------------- * Function: make_big * - * Purpose: used in test read by hyperslabs. Can create 1GB datasets, either with - * chunk layout or with contiguous layout, by iterating trough 1MB hyperslabs. - * Only 1 hyperslab is written. Only the chunk case is called. + * Purpose: used in test read by hyperslabs. Creates a 128MB dataset. + * Only 1 1024Kb hyperslab is written. * *------------------------------------------------------------------------- */ -int make_big(hid_t loc_id, int set_chunk) +int make_big(hid_t loc_id) { hid_t did=-1; hid_t f_sid=-1; hid_t m_sid=-1; hid_t tid; hid_t dcpl; - hsize_t dims[1]={GBLL}; /* dataset dimensions */ - hsize_t hs_size[1]={GBLL/1024}; /* hyperslab dimensions */ - hsize_t chunk_dims[1]={GBLL/1024}; /* chunk dimensions */ - hsize_t hs_start[1]; + hsize_t dims[1]={ H5TOOLS_MALLOCSIZE + 1}; /* dataset dimensions */ + hsize_t hs_size[1]; /* hyperslab dimensions */ + hsize_t hs_start[1]; /* hyperslab start */ + hsize_t chunk_dims[1]={1024}; /* chunk dimensions */ size_t size; - size_t nelmts=(size_t)GBLL/1024; + size_t nelmts=(size_t)1024; signed char fillvalue=-1; signed char *buf=NULL; - int i, j, s; - char c; - char name[20]; - strcpy(name,"conti"); + /* write one 1024 byte hyperslab */ + hs_start[0] = 0; + hs_size[0] = 1024; /* create */ if ((dcpl = H5Pcreate(H5P_DATASET_CREATE))<0) goto out; if (H5Pset_fill_value(dcpl, H5T_NATIVE_SCHAR, &fillvalue)<0) goto out; - - if (set_chunk) - { - strcpy(name,"chunk"); - if(H5Pset_chunk(dcpl, 1, chunk_dims)<0) - goto out; - } + if(H5Pset_chunk(dcpl, 1, chunk_dims)<0) + goto out; if ((f_sid = H5Screate_simple(1,dims,NULL))<0) goto out; - if ((did = H5Dcreate(loc_id,name,H5T_NATIVE_SCHAR,f_sid,dcpl))<0) + if ((did = H5Dcreate(loc_id,"dset",H5T_NATIVE_SCHAR,f_sid,dcpl))<0) goto out; if ((m_sid = H5Screate_simple(1, hs_size, hs_size))<0) goto out; @@ -2643,30 +2636,14 @@ int make_big(hid_t loc_id, int set_chunk) if ((size = H5Tget_size(tid))<=0) goto out; - /* create a evenly divided buffer from 0 to 127 */ - buf=(signed char *) HDmalloc((unsigned)(nelmts*size)); - s = 1024 / 127; - for (i=0, j=0, c=0; i<1024; j++, i++) - { - if ( j==s) - { - c++; - j=0; - }; - - HDmemset(buf, c, nelmts); - - hs_start[0] = i * GBLL/1024; - if (H5Sselect_hyperslab (f_sid,H5S_SELECT_SET,hs_start,NULL,hs_size, NULL)<0) - goto out; - if (H5Dwrite (did,H5T_NATIVE_SCHAR,m_sid,f_sid,H5P_DEFAULT,buf)<0) - goto out; + /* initialize buffer to 0 */ + buf=(signed char *) calloc( nelmts, size); + + if (H5Sselect_hyperslab (f_sid,H5S_SELECT_SET,hs_start,NULL,hs_size, NULL)<0) + goto out; + if (H5Dwrite (did,H5T_NATIVE_SCHAR,m_sid,f_sid,H5P_DEFAULT,buf)<0) + goto out; - /* write only one hyperslab */ - if (i==0) - break; - - } free(buf); buf=NULL; @@ -2693,6 +2670,7 @@ out: } + /*------------------------------------------------------------------------- * Function: make_external * diff --git a/tools/lib/h5diff_dset.c b/tools/lib/h5diff_dset.c index ef6d39f..c714d45 100644 --- a/tools/lib/h5diff_dset.c +++ b/tools/lib/h5diff_dset.c @@ -152,6 +152,50 @@ error: * * Date: May 9, 2003 * + * Modifications: + * + * + * October 2006: Read by hyperslabs for big datasets. + * + * A threshold of H5TOOLS_MALLOCSIZE (128 MB) is the limit upon which I/O hyperslab is done + * i.e., if the memory needed to read a dataset is greater than this limit, + * then hyperslab I/O is done instead of one operation I/O + * For each dataset, the memory needed is calculated according to + * + * memory needed = number of elements * size of each element + * + * if the memory needed is lower than H5TOOLS_MALLOCSIZE, then the following operations + * are done + * + * H5Dread( input_dataset1 ) + * H5Dread( input_dataset2 ) + * + * with all elements in the datasets selected. If the memory needed is greater than + * H5TOOLS_MALLOCSIZE, then the following operations are done instead: + * + * a strip mine is defined for each dimension k (a strip mine is defined as a + * hyperslab whose size is memory manageable) according to the formula + * + * (1) strip_mine_size[k ] = MIN(dimension[k ], H5TOOLS_BUFSIZE / size of memory type) + * + * where H5TOOLS_BUFSIZE is a constant currently defined as 1MB. This formula assures + * that for small datasets (small relative to the H5TOOLS_BUFSIZE constant), the strip + * mine size k is simply defined as its dimension k, but for larger datasets the + * hyperslab size is still memory manageable. + * a cycle is done until the number of elements in the dataset is reached. In each + * iteration, two parameters are defined for the function H5Sselect_hyperslab, + * the start and size of each hyperslab, according to + * + * (2) hyperslab_size [k] = MIN(dimension[k] - hyperslab_offset[k], strip_mine_size [k]) + * + * where hyperslab_offset [k] is initially set to zero, and later incremented in + * hyperslab_size[k] offsets. The reason for the operation + * + * dimension[k] - hyperslab_offset[k] + * + * in (2) is that, when using the strip mine size, it assures that the "remaining" part + * of the dataset that does not fill an entire strip mine is processed. + * *------------------------------------------------------------------------- */ hsize_t diff_datasetid( hid_t did1, diff --git a/tools/lib/h5tools.h b/tools/lib/h5tools.h index fa05a9f..a0f54736 100644 --- a/tools/lib/h5tools.h +++ b/tools/lib/h5tools.h @@ -45,7 +45,7 @@ /* * Maximum size used in a call to malloc */ -#define H5TOOLS_MALLOCSIZE ((size_t)1024 * 1024 * 1024) +#define H5TOOLS_MALLOCSIZE (128 * 1024 * 1024) /* format for hsize_t */ #define HSIZE_T_FORMAT "%"H5_PRINTF_LL_WIDTH"u" -- cgit v0.12