summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tools/h5repack/h5repack_copy.c39
-rw-r--r--tools/h5repack/h5repacktst.c70
-rw-r--r--tools/lib/h5diff_dset.c45
-rw-r--r--tools/lib/h5tools.h2
4 files changed, 107 insertions, 49 deletions
diff --git a/tools/h5repack/h5repack_copy.c b/tools/h5repack/h5repack_copy.c
index 3e9d2b6..6512444 100644
--- a/tools/h5repack/h5repack_copy.c
+++ b/tools/h5repack/h5repack_copy.c
@@ -159,6 +159,45 @@ out:
* October 2006: Read/write using the file type by default.
* Read/write by hyperslabs for big datasets.
*
+ * A threshold of H5TOOLS_MALLOCSIZE (128 MB) is the limit upon which I/O hyperslab is done
+ * i.e., if the memory needed to read a dataset is greater than this limit,
+ * then hyperslab I/O is done instead of one operation I/O
+ * For each dataset, the memory needed is calculated according to
+ *
+ * memory needed = number of elements * size of each element
+ *
+ * if the memory needed is lower than H5TOOLS_MALLOCSIZE, then the following operations
+ * are done
+ *
+ * H5Dread( input_dataset )
+ * H5Dwrite( output_dataset )
+ *
+ * with all elements in the datasets selected. If the memory needed is greater than
+ * H5TOOLS_MALLOCSIZE, then the following operations are done instead:
+ *
+ * a strip mine is defined for each dimension k (a strip mine is defined as a
+ * hyperslab whose size is memory manageable) according to the formula
+ *
+ * (1) strip_mine_size[k ] = MIN(dimension[k ], H5TOOLS_BUFSIZE / size of memory type)
+ *
+ * where H5TOOLS_BUFSIZE is a constant currently defined as 1MB. This formula assures
+ * that for small datasets (small relative to the H5TOOLS_BUFSIZE constant), the strip
+ * mine size k is simply defined as its dimension k, but for larger datasets the
+ * hyperslab size is still memory manageable.
+ * a cycle is done until the number of elements in the dataset is reached. In each
+ * iteration, two parameters are defined for the function H5Sselect_hyperslab,
+ * the start and size of each hyperslab, according to
+ *
+ * (2) hyperslab_size [k] = MIN(dimension[k] - hyperslab_offset[k], strip_mine_size [k])
+ *
+ * where hyperslab_offset [k] is initially set to zero, and later incremented in
+ * hyperslab_size[k] offsets. The reason for the operation
+ *
+ * dimension[k] - hyperslab_offset[k]
+ *
+ * in (2) is that, when using the strip mine size, it assures that the "remaining" part
+ * of the dataset that does not fill an entire strip mine is processed.
+ *
*-------------------------------------------------------------------------
*/
diff --git a/tools/h5repack/h5repacktst.c b/tools/h5repack/h5repacktst.c
index 33d4692..1a6f816 100644
--- a/tools/h5repack/h5repacktst.c
+++ b/tools/h5repack/h5repacktst.c
@@ -79,7 +79,6 @@ int d_status = EXIT_SUCCESS;
#define CDIM1 DIM1/2
#define CDIM2 DIM2/2
#define RANK 2
-#define GBLL ((unsigned long_long) 1024*1024*1024)
/*-------------------------------------------------------------------------
* prototypes
@@ -99,7 +98,7 @@ int make_fletcher32(hid_t loc_id);
int make_all(hid_t loc_id);
int make_fill(hid_t loc_id);
-int make_big(hid_t loc_id, int set_chunk);
+int make_big(hid_t loc_id);
int make_testfiles(void);
void write_dset_in(hid_t loc_id,const char* dset_name,hid_t file_id,int make_diffs );
void write_attr_in(hid_t loc_id,const char* dset_name,hid_t fid,int make_diffs );
@@ -1333,7 +1332,7 @@ int make_testfiles(void)
*/
if((loc_id = H5Fcreate(FNAME14,H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT))<0)
return -1;
- if (make_big(loc_id, 1)<0)
+ if (make_big(loc_id)<0)
goto out;
if(H5Fclose(loc_id)<0)
return -1;
@@ -2219,49 +2218,42 @@ out:
/*-------------------------------------------------------------------------
* Function: make_big
*
- * Purpose: used in test read by hyperslabs. Can create 1GB datasets, either with
- * chunk layout or with contiguous layout, by iterating trough 1MB hyperslabs.
- * Only 1 hyperslab is written. Only the chunk case is called.
+ * Purpose: used in test read by hyperslabs. Creates a 128MB dataset.
+ * Only 1 1024Kb hyperslab is written.
*
*-------------------------------------------------------------------------
*/
-int make_big(hid_t loc_id, int set_chunk)
+int make_big(hid_t loc_id)
{
hid_t did=-1;
hid_t f_sid=-1;
hid_t m_sid=-1;
hid_t tid;
hid_t dcpl;
- hsize_t dims[1]={GBLL}; /* dataset dimensions */
- hsize_t hs_size[1]={GBLL/1024}; /* hyperslab dimensions */
- hsize_t chunk_dims[1]={GBLL/1024}; /* chunk dimensions */
- hsize_t hs_start[1];
+ hsize_t dims[1]={ H5TOOLS_MALLOCSIZE + 1}; /* dataset dimensions */
+ hsize_t hs_size[1]; /* hyperslab dimensions */
+ hsize_t hs_start[1]; /* hyperslab start */
+ hsize_t chunk_dims[1]={1024}; /* chunk dimensions */
size_t size;
- size_t nelmts=(size_t)GBLL/1024;
+ size_t nelmts=(size_t)1024;
signed char fillvalue=-1;
signed char *buf=NULL;
- int i, j, s;
- char c;
- char name[20];
- strcpy(name,"conti");
+ /* write one 1024 byte hyperslab */
+ hs_start[0] = 0;
+ hs_size[0] = 1024;
/* create */
if ((dcpl = H5Pcreate(H5P_DATASET_CREATE))<0)
goto out;
if (H5Pset_fill_value(dcpl, H5T_NATIVE_SCHAR, &fillvalue)<0)
goto out;
-
- if (set_chunk)
- {
- strcpy(name,"chunk");
- if(H5Pset_chunk(dcpl, 1, chunk_dims)<0)
- goto out;
- }
+ if(H5Pset_chunk(dcpl, 1, chunk_dims)<0)
+ goto out;
if ((f_sid = H5Screate_simple(1,dims,NULL))<0)
goto out;
- if ((did = H5Dcreate(loc_id,name,H5T_NATIVE_SCHAR,f_sid,dcpl))<0)
+ if ((did = H5Dcreate(loc_id,"dset",H5T_NATIVE_SCHAR,f_sid,dcpl))<0)
goto out;
if ((m_sid = H5Screate_simple(1, hs_size, hs_size))<0)
goto out;
@@ -2270,30 +2262,14 @@ int make_big(hid_t loc_id, int set_chunk)
if ((size = H5Tget_size(tid))<=0)
goto out;
- /* create a evenly divided buffer from 0 to 127 */
- buf=(signed char *) HDmalloc((unsigned)(nelmts*size));
- s = 1024 / 127;
- for (i=0, j=0, c=0; i<1024; j++, i++)
- {
- if ( j==s)
- {
- c++;
- j=0;
- };
-
- HDmemset(buf, c, nelmts);
-
- hs_start[0] = i * GBLL/1024;
- if (H5Sselect_hyperslab (f_sid,H5S_SELECT_SET,hs_start,NULL,hs_size, NULL)<0)
- goto out;
- if (H5Dwrite (did,H5T_NATIVE_SCHAR,m_sid,f_sid,H5P_DEFAULT,buf)<0)
- goto out;
+ /* initialize buffer to 0 */
+ buf=(signed char *) calloc( nelmts, size);
+
+ if (H5Sselect_hyperslab (f_sid,H5S_SELECT_SET,hs_start,NULL,hs_size, NULL)<0)
+ goto out;
+ if (H5Dwrite (did,H5T_NATIVE_SCHAR,m_sid,f_sid,H5P_DEFAULT,buf)<0)
+ goto out;
- /* write only one hyperslab */
- if (i==0)
- break;
-
- }
free(buf);
buf=NULL;
diff --git a/tools/lib/h5diff_dset.c b/tools/lib/h5diff_dset.c
index d9ba4da..2a88202 100644
--- a/tools/lib/h5diff_dset.c
+++ b/tools/lib/h5diff_dset.c
@@ -127,6 +127,49 @@ error:
*
* Date: May 9, 2003
*
+ * Modifications:
+ *
+ * October 2006: Read by hyperslabs for big datasets.
+ *
+ * A threshold of H5TOOLS_MALLOCSIZE (128 MB) is the limit upon which I/O hyperslab is done
+ * i.e., if the memory needed to read a dataset is greater than this limit,
+ * then hyperslab I/O is done instead of one operation I/O
+ * For each dataset, the memory needed is calculated according to
+ *
+ * memory needed = number of elements * size of each element
+ *
+ * if the memory needed is lower than H5TOOLS_MALLOCSIZE, then the following operations
+ * are done
+ *
+ * H5Dread( input_dataset1 )
+ * H5Dread( input_dataset2 )
+ *
+ * with all elements in the datasets selected. If the memory needed is greater than
+ * H5TOOLS_MALLOCSIZE, then the following operations are done instead:
+ *
+ * a strip mine is defined for each dimension k (a strip mine is defined as a
+ * hyperslab whose size is memory manageable) according to the formula
+ *
+ * (1) strip_mine_size[k ] = MIN(dimension[k ], H5TOOLS_BUFSIZE / size of memory type)
+ *
+ * where H5TOOLS_BUFSIZE is a constant currently defined as 1MB. This formula assures
+ * that for small datasets (small relative to the H5TOOLS_BUFSIZE constant), the strip
+ * mine size k is simply defined as its dimension k, but for larger datasets the
+ * hyperslab size is still memory manageable.
+ * a cycle is done until the number of elements in the dataset is reached. In each
+ * iteration, two parameters are defined for the function H5Sselect_hyperslab,
+ * the start and size of each hyperslab, according to
+ *
+ * (2) hyperslab_size [k] = MIN(dimension[k] - hyperslab_offset[k], strip_mine_size [k])
+ *
+ * where hyperslab_offset [k] is initially set to zero, and later incremented in
+ * hyperslab_size[k] offsets. The reason for the operation
+ *
+ * dimension[k] - hyperslab_offset[k]
+ *
+ * in (2) is that, when using the strip mine size, it assures that the "remaining" part
+ * of the dataset that does not fill an entire strip mine is processed.
+ *
*-------------------------------------------------------------------------
*/
hsize_t diff_datasetid( hid_t did1,
@@ -440,7 +483,7 @@ hsize_t diff_datasetid( hid_t did1,
H5Sselect_all(sm_space);
hs_nelmts = 1;
} /* rank */
-
+
if ( H5Dread(did1,m_tid1,sm_space,sid1,H5P_DEFAULT,sm_buf1) < 0 )
goto error;
if ( H5Dread(did2,m_tid2,sm_space,sid2,H5P_DEFAULT,sm_buf2) < 0 )
diff --git a/tools/lib/h5tools.h b/tools/lib/h5tools.h
index 8a076a7..dcb67c0 100644
--- a/tools/lib/h5tools.h
+++ b/tools/lib/h5tools.h
@@ -45,7 +45,7 @@
/*
* Maximum size used in a call to malloc
*/
-#define H5TOOLS_MALLOCSIZE ((size_t)1024 * 1024 * 1024)
+#define H5TOOLS_MALLOCSIZE (128 * 1024 * 1024)
/* format for hsize_t */