diff options
author | Ray Lu <songyulu@hdfgroup.org> | 2018-11-15 15:43:46 (GMT) |
---|---|---|
committer | Ray Lu <songyulu@hdfgroup.org> | 2018-11-15 15:43:46 (GMT) |
commit | e07d097da16a69cdd3d0d305595b241e6cf39f60 (patch) | |
tree | 01f90d465371c02fe22c6b25d19f6e6abe0824be | |
parent | 73f881a8385fffc7b48f3c2ec3ba538425966cbb (diff) | |
parent | cd13d24e5140578a880aebe4e2d8b899179d0870 (diff) | |
download | hdf5-e07d097da16a69cdd3d0d305595b241e6cf39f60.zip hdf5-e07d097da16a69cdd3d0d305595b241e6cf39f60.tar.gz hdf5-e07d097da16a69cdd3d0d305595b241e6cf39f60.tar.bz2 |
Merge pull request #1316 in HDFFV/hdf5 from ~SONGYULU/hdf5_ray:bugfix/HDFFV-10601-issues-with-chunk-cache-hash to develop
* commit 'cd13d24e5140578a880aebe4e2d8b899179d0870':
HDFFV-10601: I added error checking to the HDF5 functions.
HDFFV10601: Adding performance test to verify the improvement.
HDFFV-10601: I changed to a better way to calculate the number of chunks in a dataset.
HDFFV-10601 Issues with chunk cache hash value calcuation:
-rw-r--r-- | src/H5Dchunk.c | 22 | ||||
-rw-r--r-- | tools/test/perform/CMakeLists.txt | 10 | ||||
-rw-r--r-- | tools/test/perform/Makefile.am | 4 | ||||
-rw-r--r-- | tools/test/perform/chunk_cache.c | 457 |
4 files changed, 479 insertions, 14 deletions
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 22dc05a..cb6b925 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -949,7 +949,10 @@ H5D__chunk_init(H5F_t *f, const H5D_t *dset, hid_t dapl_id) /* Initial scaled dimension sizes */ if(dset->shared->layout.u.chunk.dim[u] == 0) HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "chunk size must be > 0, dim = %u ", u) - rdcc->scaled_dims[u] = dset->shared->curr_dims[u] / dset->shared->layout.u.chunk.dim[u]; + + /* Round up to the next integer # of chunks, to accommodate partial chunks */ + rdcc->scaled_dims[u] = (dset->shared->curr_dims[u] + dset->shared->layout.u.chunk.dim[u] - 1) / + dset->shared->layout.u.chunk.dim[u]; if( !(scaled_power2up = H5VM_power2up(rdcc->scaled_dims[u])) ) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to get the next power of 2") @@ -2799,6 +2802,7 @@ H5D__chunk_hash_val(const H5D_shared_t *shared, const hsize_t *scaled) hsize_t val; /* Intermediate value */ unsigned ndims = shared->ndims; /* Rank of dataset */ unsigned ret = 0; /* Value to return */ + unsigned u; /* Local index variable */ FUNC_ENTER_STATIC_NOERR @@ -2809,17 +2813,11 @@ H5D__chunk_hash_val(const H5D_shared_t *shared, const hsize_t *scaled) /* If the fastest changing dimension doesn't have enough entropy, use * other dimensions too */ - if(ndims > 1 && shared->cache.chunk.scaled_dims[ndims - 1] <= shared->cache.chunk.nslots) { - unsigned u; /* Local index variable */ - - val = scaled[0]; - for(u = 1; u < ndims; u++) { - val <<= shared->cache.chunk.scaled_encode_bits[u]; - val ^= scaled[u]; - } /* end for */ - } /* end if */ - else - val = scaled[ndims - 1]; + val = scaled[0]; + for(u = 1; u < ndims; u++) { + val <<= shared->cache.chunk.scaled_encode_bits[u]; + val ^= scaled[u]; + } /* end for */ /* Modulo value against the number of array slots */ ret = (unsigned)(val % shared->cache.chunk.nslots); diff --git a/tools/test/perform/CMakeLists.txt b/tools/test/perform/CMakeLists.txt index fa41608..14abdec 100644 --- a/tools/test/perform/CMakeLists.txt +++ b/tools/test/perform/CMakeLists.txt @@ -51,6 +51,16 @@ TARGET_C_PROPERTIES (iopipe STATIC) target_link_libraries (iopipe PRIVATE ${HDF5_LIB_TARGET} ${HDF5_TOOLS_LIB_TARGET}) set_target_properties (iopipe PROPERTIES FOLDER perform) +#-- Adding test for chunk_cache +set (chunk_cache_SOURCES + ${HDF5_TOOLS_TEST_PERFORM_SOURCE_DIR}/chunk_cache.c +) +add_executable (chunk_cache ${chunk_cache_SOURCES}) +target_include_directories(chunk_cache PRIVATE "${HDF5_SRC_DIR};${HDF5_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>") +TARGET_C_PROPERTIES (chunk_cache STATIC) +target_link_libraries (chunk_cache PRIVATE ${HDF5_LIB_TARGET} ${HDF5_TOOLS_LIB_TARGET}) +set_target_properties (chunk_cache PROPERTIES FOLDER perform) + #-- Adding test for overhead set (overhead_SOURCES ${HDF5_TOOLS_TEST_PERFORM_SOURCE_DIR}/overhead.c diff --git a/tools/test/perform/Makefile.am b/tools/test/perform/Makefile.am index 5a89a66..39800d7 100644 --- a/tools/test/perform/Makefile.am +++ b/tools/test/perform/Makefile.am @@ -50,12 +50,12 @@ if BUILD_PARALLEL_CONDITIONAL TEST_PROG_PARA=h5perf perf endif # Serial test programs. -TEST_PROG = iopipe chunk overhead zip_perf perf_meta h5perf_serial $(BUILD_ALL_PROGS) +TEST_PROG = iopipe chunk chunk_cache overhead zip_perf perf_meta h5perf_serial $(BUILD_ALL_PROGS) # check_PROGRAMS will be built but not installed. Do not any executable # that is in bin_PROGRAMS already. Otherwise, it will be removed twice in # "make clean" and some systems, e.g., AIX, do not like it. -check_PROGRAMS= iopipe chunk overhead zip_perf perf_meta $(BUILD_ALL_PROGS) perf +check_PROGRAMS= iopipe chunk chunk_cache overhead zip_perf perf_meta $(BUILD_ALL_PROGS) perf h5perf_SOURCES=pio_perf.c pio_engine.c h5perf_serial_SOURCES=sio_perf.c sio_engine.c diff --git a/tools/test/perform/chunk_cache.c b/tools/test/perform/chunk_cache.c new file mode 100644 index 0000000..01571e2 --- /dev/null +++ b/tools/test/perform/chunk_cache.c @@ -0,0 +1,457 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Purpose: check the performance of chunk cache in these two cases (HDFFV-10601): + * 1. partial chunks exist along any dimension. + * 2. number of slots in chunk cache is smaller than the number of chunks + * in the fastest-growing dimension. + */ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include "hdf5.h" + +#define FILENAME "chunk_cache_perf.h5" + +#define RANK 2 + +#define DSET1_NAME "partial_chunks" +#define DSET1_DIM1 9 * 1000 +#define DSET1_DIM2 9 +#define CHUNK1_DIM1 2 * 1000 +#define CHUNK1_DIM2 2 + +#define DSET2_NAME "hash_value" +#define DSET2_DIM1 300 +#define DSET2_DIM2 600 +#define CHUNK2_DIM1 100 +#define CHUNK2_DIM2 100 + +#define RDCC_NSLOTS 5 +#define RDCC_NBYTES 1024 * 1024 * 10 +#define RDCC_W0 0.75F + +#define FILTER_COUNTER 306 +static size_t nbytes_global; + +typedef struct test_time_t { + long tv_sec; + long tv_usec; +} test_time_t; + +/* Local function prototypes for the dummy filter */ +static size_t +counter (unsigned flags, size_t cd_nelmts, + const unsigned *cd_values, size_t nbytes, + size_t *buf_size, void **buf); + +/* This message derives from H5Z */ +const H5Z_class2_t H5Z_COUNTER[1] = {{ + H5Z_CLASS_T_VERS, /* H5Z_class_t version */ + FILTER_COUNTER, /* Filter id number */ + 1, 1, /* Encoding and decoding enabled */ + "counter", /* Filter name for debugging */ + NULL, /* The "can apply" callback */ + NULL, /* The "set local" callback */ + counter, /* The actual filter function */ +}}; + +/*------------------------------------------------------------------------- + * Count number of bytes but don't do anything else. Keep + * track of the data of chunks being read from file into memory. + */ +static size_t +counter (unsigned flags, size_t cd_nelmts, + const unsigned *cd_values, size_t nbytes, + size_t *buf_size, void **buf) +{ + nbytes_global += nbytes; + return nbytes; +} + +/*---------------------------------------------------------------------------*/ +static int +test_time_get_current(test_time_t *tv) +{ + struct timespec tp; + + if (!tv) + return -1; + if (clock_gettime(CLOCK_MONOTONIC, &tp)) + return -1; + + tv->tv_sec = tp.tv_sec; + tv->tv_usec = tp.tv_nsec / 1000; + + return 0; +} + +/*---------------------------------------------------------------------------*/ +static double +test_time_to_double(test_time_t tv) +{ + return (double) tv.tv_sec + (double) (tv.tv_usec) * 0.000001; +} + +/*---------------------------------------------------------------------------*/ +static test_time_t +test_time_add(test_time_t in1, test_time_t in2) +{ + test_time_t out; + + out.tv_sec = in1.tv_sec + in2.tv_sec; + out.tv_usec = in1.tv_usec + in2.tv_usec; + if(out.tv_usec > 1000000) { + out.tv_usec -= 1000000; + out.tv_sec += 1; + } + + return out; +} + +/*---------------------------------------------------------------------------*/ +static test_time_t +test_time_subtract(test_time_t in1, test_time_t in2) +{ + test_time_t out; + + out.tv_sec = in1.tv_sec - in2.tv_sec; + out.tv_usec = in1.tv_usec - in2.tv_usec; + if(out.tv_usec < 0) { + out.tv_usec += 1000000; + out.tv_sec -= 1; + } + + return out; +} +/*------------------------------------------------------------------------- + * Function: cleanup + * + * Purpose: Removes test files + * + * Return: void + * + * Programmer: Robb Matzke + * Thursday, June 4, 1998 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +cleanup (void) +{ + if (!getenv ("HDF5_NOCLEANUP")) { + remove (FILENAME); + } +} + +/*------------------------------------------------------------------------------- + * Create a chunked dataset with partial chunks along either dimensions: + * dataset dimension: 9000 x 9 + * chunk dimension: 2000 x 2 + */ +static int create_dset1(hid_t file) +{ + hid_t dataspace, dataset; + hid_t dcpl; + hsize_t dims[RANK] = {DSET1_DIM1, DSET1_DIM2}; + hsize_t chunk_dims[RANK] = {CHUNK1_DIM1, CHUNK1_DIM2}; + int data[DSET1_DIM1][DSET1_DIM2]; /* data for writing */ + int i, j; + + /* Create the data space. */ + if((dataspace = H5Screate_simple (RANK, dims, NULL)) < 0) + goto error; + + /* Modify dataset creation properties, i.e. enable chunking */ + if((dcpl = H5Pcreate (H5P_DATASET_CREATE)) < 0) + goto error; + if(H5Pset_chunk (dcpl, RANK, chunk_dims) < 0) + goto error; + + /* Set the dummy filter simply for counting the number of bytes being read into the memory */ + if(H5Zregister(H5Z_COUNTER) < 0) + goto error; + + if(H5Pset_filter(dcpl, FILTER_COUNTER, 0, 0, NULL) < 0) + goto error; + + /* Create a new dataset within the file using chunk creation properties. */ + if((dataset = H5Dcreate2 (file, DSET1_NAME, H5T_NATIVE_INT, dataspace, + H5P_DEFAULT, dcpl, H5P_DEFAULT)) < 0) + goto error; + + for (i = 0; i < DSET1_DIM1; i++) + for (j = 0; j < DSET1_DIM2; j++) + data[i][j] = i+j; + + /* Write data to dataset */ + if(H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, data) < 0) + goto error; + + /* Close resources */ + H5Dclose (dataset); + H5Pclose (dcpl); + H5Sclose (dataspace); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose (dataset); + H5Pclose (dcpl); + H5Sclose (dataspace); + } H5E_END_TRY; + + return 1; +} + +/*--------------------------------------------------------------------------- + * Create a chunked dataset for testing hash values: + * dataset dimensions: 300 x 600 + * chunk dimensions: 100 x 100 + */ +static int create_dset2(hid_t file) +{ + hid_t dataspace, dataset; + hid_t dcpl; + hsize_t dims[RANK] = {DSET2_DIM1, DSET2_DIM2}; + hsize_t chunk_dims[RANK] = {CHUNK2_DIM1, CHUNK2_DIM2}; + int data[DSET2_DIM1][DSET2_DIM2]; /* data for writing */ + int i, j; + + /* Create the data space. */ + if((dataspace = H5Screate_simple (RANK, dims, NULL)) < 0) + goto error; + + /* Modify dataset creation properties, i.e. enable chunking */ + if((dcpl = H5Pcreate (H5P_DATASET_CREATE)) < 0) + goto error; + if(H5Pset_chunk (dcpl, RANK, chunk_dims) < 0) + goto error; + + /* Set the dummy filter simply for counting the number of bytes being read into the memory */ + if(H5Zregister(H5Z_COUNTER) < 0) + goto error; + if(H5Pset_filter(dcpl, FILTER_COUNTER, 0, 0, NULL) < 0) + goto error; + + /* Create a new dataset within the file using chunk creation properties. */ + if((dataset = H5Dcreate2 (file, DSET2_NAME, H5T_NATIVE_INT, dataspace, + H5P_DEFAULT, dcpl, H5P_DEFAULT)) < 0) + goto error; + + for (i = 0; i < DSET2_DIM1; i++) + for (j = 0; j < DSET2_DIM2; j++) + data[i][j] = i+j; + + /* Write data to dataset */ + if(H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, + H5P_DEFAULT, data) < 0) + goto error; + + /* Close resources */ + H5Dclose (dataset); + H5Pclose (dcpl); + H5Sclose (dataspace); + + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose (dataset); + H5Pclose (dcpl); + H5Sclose (dataspace); + } H5E_END_TRY; + + return 1; +} +/*--------------------------------------------------------------------------- + * Check the performance of the chunk cache when partial chunks exist + * along the dataset dimensions. + */ +static int check_partial_chunks_perf(hid_t file) +{ + hid_t dataset; + hid_t filespace; + hid_t memspace; + hid_t dapl; + + int rdata[DSET1_DIM2]; /* data for reading */ + int i; + + hsize_t row_rank = 1; + hsize_t row_dim[1] = {DSET1_DIM2}; + hsize_t start[RANK] = {0, 0}; + hsize_t count[RANK] = {1, DSET1_DIM2}; + test_time_t t = {0, 0}, t1 = {0, 0}, t2 = {0, 0}; + + if((dapl = H5Pcreate(H5P_DATASET_ACCESS)) < 0) + goto error; + if(H5Pset_chunk_cache (dapl, RDCC_NSLOTS, RDCC_NBYTES, RDCC_W0) < 0) + goto error; + + dataset = H5Dopen2 (file, DSET1_NAME, dapl); + + memspace = H5Screate_simple(row_rank, row_dim, NULL); + filespace = H5Dget_space(dataset); + + nbytes_global = 0; + + test_time_get_current(&t1); + + /* Read the data row by row */ + for(i = 0; i < DSET1_DIM1; i++) { + start[0] = i; + if(H5Sselect_hyperslab(filespace, H5S_SELECT_SET, + start, NULL, count, NULL) < 0) + goto error; + + if(H5Dread (dataset, H5T_NATIVE_INT, memspace, filespace, + H5P_DEFAULT, rdata) < 0) + goto error; + } + + test_time_get_current(&t2); + t = test_time_add(t, test_time_subtract(t2, t1)); + + printf("1. Partial chunks: total read time is %lf; number of bytes being read from file is %lu\n", test_time_to_double(t), nbytes_global); + + H5Dclose (dataset); + H5Sclose (filespace); + H5Sclose (memspace); + H5Pclose (dapl); + + return 0; +error: + H5E_BEGIN_TRY { + H5Dclose (dataset); + H5Sclose (filespace); + H5Sclose (memspace); + H5Pclose (dapl); + } H5E_END_TRY; + return 1; +} + +/*--------------------------------------------------------------------------- + * Check the performance of chunk cache when the number of cache slots + * is smaller than the number of chunks along the fastest-growing + * dimension of the dataset. + */ +static int check_hash_value_perf(hid_t file) +{ + hid_t dataset; + hid_t filespace; + hid_t memspace; + hid_t dapl; + + int rdata[DSET2_DIM1]; /* data for reading */ + int i; + + hsize_t column_rank = 1; + hsize_t column_dim[1] = {DSET2_DIM1}; + hsize_t start[RANK] = {0, 0}; + hsize_t count[RANK] = {DSET2_DIM1, 1}; + test_time_t t = {0, 0}, t1 = {0, 0}, t2 = {0, 0}; + + if((dapl = H5Pcreate(H5P_DATASET_ACCESS)) < 0) + goto error; + if(H5Pset_chunk_cache (dapl, RDCC_NSLOTS, RDCC_NBYTES, RDCC_W0) < 0) + goto error; + + if((dataset = H5Dopen2 (file, DSET2_NAME, dapl)) < 0) + goto error; + if((memspace = H5Screate_simple(column_rank, column_dim, NULL)) < 0) + goto error; + if((filespace = H5Dget_space(dataset)) < 0) + goto error; + + nbytes_global = 0; + + test_time_get_current(&t1); + + /* Read the data column by column */ + for(i = 0; i < DSET2_DIM2; i++) { + start[1] = i; + if(H5Sselect_hyperslab(filespace, H5S_SELECT_SET, + start, NULL, count, NULL) < 0) + goto error; + + if(H5Dread (dataset, H5T_NATIVE_INT, memspace, filespace, + H5P_DEFAULT, rdata) < 0) + goto error; + } + + test_time_get_current(&t2); + t = test_time_add(t, test_time_subtract(t2, t1)); + + printf("2. Hash value: total read time is %lf; number of bytes being read from file is %lu\n", test_time_to_double(t), nbytes_global); + + H5Dclose (dataset); + H5Sclose (filespace); + H5Sclose (memspace); + H5Pclose (dapl); + return 0; + +error: + H5E_BEGIN_TRY { + H5Dclose (dataset); + H5Sclose (filespace); + H5Sclose (memspace); + H5Pclose (dapl); + } H5E_END_TRY; + return 1; +} + +/*------------------------------------------------------------------------------------- + * Purpose: check the performance of chunk cache in these two cases (HDFFV-10601): + * 1. partial chunks exist along any dimension. + * 2. number of slots in chunk cache is smaller than the number of chunks + * in the fastest-growing dimension. + *-------------------------------------------------------------------------------------*/ +int +main (void) +{ + hid_t file; /* handles */ + int nerrors = 0; + + /* Create a new file. If file exists its contents will be overwritten. */ + if((file = H5Fcreate (FILENAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto error; + + nerrors += create_dset1(file); + nerrors += create_dset2(file); + + if(H5Fclose (file) < 0) + goto error; + + /* Re-open the file for testing performance. */ + if((file = H5Fopen (FILENAME, H5F_ACC_RDONLY, H5P_DEFAULT)) < 0) + goto error; + + nerrors += check_partial_chunks_perf(file); + nerrors += check_hash_value_perf(file); + + if(H5Fclose (file) < 0) + goto error; + + if (nerrors>0) goto error; + cleanup(); + return 0; + +error: + fprintf(stderr, "*** ERRORS DETECTED ***\n"); + return 1; +} |