diff options
author | Jerome Soumagne <jsoumagne@hdfgroup.org> | 2014-06-05 00:46:50 (GMT) |
---|---|---|
committer | Jerome Soumagne <jsoumagne@hdfgroup.org> | 2014-06-05 00:46:50 (GMT) |
commit | cbaffd3924247ce21ed55626aac14d6d1dbf3311 (patch) | |
tree | f8bf49b5ab18681f671208da2190ccc437025051 | |
parent | f73776bd637197d4e372b5c90ca63b353ed76a39 (diff) | |
download | hdf5-cbaffd3924247ce21ed55626aac14d6d1dbf3311.zip hdf5-cbaffd3924247ce21ed55626aac14d6d1dbf3311.tar.gz hdf5-cbaffd3924247ce21ed55626aac14d6d1dbf3311.tar.bz2 |
[svn-r25240] First working version of H5X_Alacrity
Add H5X__alacrity_findBinRange1C
Add H5X__alacrity_readIndex
Generate dataspace selection from selected bins (only part of the index that
corresponds to the selected bins is read)
Update example to use float type (ALACRITY only supports float and double types)
-rw-r--r-- | src/H5Xalacrity.c | 354 | ||||
-rw-r--r-- | testff/h5ff_client_index.c | 10 |
2 files changed, 328 insertions, 36 deletions
diff --git a/src/H5Xalacrity.c b/src/H5Xalacrity.c index 37581f4..3d9ca9c 100644 --- a/src/H5Xalacrity.c +++ b/src/H5Xalacrity.c @@ -187,8 +187,6 @@ H5X__alacrity_get_dataset_data(hid_t file_id, hid_t dataset_id, hid_t trans_id, if (0 == (elmt_size = H5Tget_size(type_id))) HGOTO_ERROR(H5E_DATATYPE, H5E_BADTYPE, FAIL, "invalid size of element"); - printf("*** npoints: %zu\n", (size_t) nelmts); - /* Allocate buffer to hold data */ *buf_size = nelmts * elmt_size; if(NULL == (*buf = H5MM_malloc(*buf_size))) @@ -220,8 +218,8 @@ H5X__alacrity_free_data(void *buf) } static herr_t -H5X__alacrity_create_index(H5X_alacrity_t *alacrity, hid_t file_id, hid_t dataset_id, - hid_t trans_id, const void *buf, size_t buf_size) +H5X__alacrity_create_index(H5X_alacrity_t *alacrity, hid_t file_id, + hid_t dataset_id, hid_t trans_id, const void *buf, size_t UNUSED buf_size) { hid_t type_id, space_id; hid_t metadata_space_id, data_space_id, index_space_id; @@ -241,6 +239,20 @@ H5X__alacrity_create_index(H5X_alacrity_t *alacrity, hid_t file_id, hid_t datase if (0 == (nelmts = (size_t) H5Sget_select_npoints(space_id))) HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "invalid number of elements"); + /****************************/ + int i, j; + int ntuples = 256; + int ncomponents = 3; + float *my_buf = (float *) buf; + for (i = 0; i < ntuples; i++) { + for (j = 0; j < ncomponents; j++) { + printf("%f ", my_buf[ncomponents * i + j]); + } + } + printf("\n"); + printf("*** npoints: %zu\n", (size_t) nelmts); + /****************************/ + ALEncode(&alacrity->config, buf, nelmts, &alacrity->output); /* Get sizes */ @@ -357,15 +369,253 @@ H5X__alacrity_serialize_metadata(H5X_alacrity_t *alacrity, void *buf, FUNC_LEAVE_NOAPI(ret_value) } + +#define H5X_ALACRITY_FINDBINRANGE(UT, ST) \ +static bool \ +H5X__alacrity_findBinRange1C_ ## ST \ +(ALMetadata *meta, UT query_lb, UT query_ub, \ + bin_id_t *start_bin, bin_id_t *end_bin) \ +{ \ + ALBinLayout *bl = &meta->binLayout; \ + int sigbits = meta->significantBits; \ + int insigbits = (meta->elementSize << 3) - sigbits; \ + UT sign_mask_hi = ((UT)1) << (sigbits - 1); \ + \ + UT query_lb_hi = query_lb >> insigbits; \ + UT query_ub_hi = query_ub >> insigbits; \ + ST s_query_lb_hi = CONV_1C_TO_2C(query_lb_hi, sign_mask_hi); \ + ST s_query_ub_hi = CONV_1C_TO_2C(query_ub_hi, sign_mask_hi); \ + \ + high_order_bytes_t *startBinValPtr = bl->binValues; \ + high_order_bytes_t *endBinValPtr = bl->binValues + bl->numBins; \ + high_order_bytes_t *curBinValPtr = startBinValPtr; \ + \ + bool found_start = false, found_end = false; \ + \ + printf("Query bound unsigned high parts: %ld / %lu\n", query_lb_hi, query_ub_hi); \ + printf("Query bound signed high parts: %ld / %lu\n", s_query_lb_hi, s_query_ub_hi); \ + \ + /* Neg. bins, find start bin. */ \ + while (curBinValPtr != endBinValPtr) { \ + UT binval_hi = *curBinValPtr; \ + ST s_binval_hi; \ + \ + if ((binval_hi & sign_mask_hi) == 0) break; /* Break if bin value is positive */ \ + \ + s_binval_hi = CONV_NEG_1C_TO_2C((ST) binval_hi, sign_mask_hi); \ + \ + /* If the bin is at or above the lower bound of the query range, this is the edge bin */ \ + if (s_binval_hi >= s_query_lb_hi) { \ + if (s_binval_hi > s_query_ub_hi) \ + break; \ + \ + found_start = true; \ + *start_bin = curBinValPtr - startBinValPtr; \ + break; \ + } \ + curBinValPtr++; \ + } \ + \ + /* Pos. bins, find start bin. */ \ + if (!found_start) { \ + while (curBinValPtr != endBinValPtr) { \ + UT binval_hi = *curBinValPtr; \ + ST s_binval_hi = CONV_POS_1C_TO_2C((ST)binval_hi, sign_mask_hi); \ + \ + /* If the bin is at or above the lower bound of the query range, this is the edge bin */ \ + if (s_binval_hi >= s_query_lb_hi) { \ + if (s_binval_hi > s_query_ub_hi) \ + break; \ + \ + found_start = true; \ + *start_bin = curBinValPtr - startBinValPtr; \ + break; \ + } \ + curBinValPtr++; \ + } \ + } \ + /* If we didn't find the start bin, quit now */ \ + if (!found_start) \ + return false; \ + \ + /* END FIND START BIN */ \ + \ + /* At this point, the start bin has been found, but it may be entirely above the query interval */ \ + \ + /* Neg. bins, find end bin. */ \ + while (curBinValPtr != endBinValPtr) { \ + UT binval_hi = *curBinValPtr; \ + ST s_binval_hi; \ + \ + if ((binval_hi & sign_mask_hi) == 0) break; /* Break if bin value is positive */ \ + \ + s_binval_hi = CONV_NEG_1C_TO_2C((ST)binval_hi, sign_mask_hi); \ + \ + /* If the bin is strictly above the upper bound of the query range */ \ + /* the previous bin is the last bin to query, making this bin the end bin */ \ + /* (end_bin is exclusive, remember) */ \ + if (s_binval_hi > s_query_ub_hi) { \ + /* If we are at the first bin, then all bins are totally above the query interval. */ \ + /* Just pick the first bin as end bin; it will be deemed invalid in the check */ \ + /* after the loops. */ \ + if (curBinValPtr == startBinValPtr) \ + *end_bin = 0; \ + else \ + *end_bin = curBinValPtr - startBinValPtr; \ + \ + found_end = true; \ + break; \ + } \ + curBinValPtr++; \ + } \ + \ + if (!found_end) { \ + /* Pos. bins, find end bin. Note: bin range = [ VVVV0000, VVVV0000 + 1 ) (V = high bits from bin value) */ \ + while (curBinValPtr != endBinValPtr) { \ + UT binval_hi = *curBinValPtr; \ + ST s_binval_hi = CONV_POS_1C_TO_2C((ST)binval_hi, sign_mask_hi); \ + \ + /* If the bin is strictly above the upper bound of the query range, the previous bin is the last bin */ \ + /* to query, making this bin the end bin (end_bin is exclusive, remember) */ \ + if (s_binval_hi > s_query_ub_hi) { \ + /* If we are at the first bin, then all bins are totally above the query interval. */ \ + /* Just pick the first bin as end bin; it will be deemed invalid in the check */ \ + /* after the loops. */ \ + if (curBinValPtr == startBinValPtr) \ + *end_bin = 0; \ + else \ + *end_bin = curBinValPtr - startBinValPtr; \ + \ + found_end = true; \ + break; \ + } \ + curBinValPtr++; \ + } \ + } \ + \ + /* If we didn't find an end bin, either the last bin is within */ \ + /* the query range (so there was no next bin to ``back up'' from), */ \ + /* or the query range is disjoint from the bin set. Set the end */ \ + /* bin to the last bin; it will solve the first problem, and */ \ + /* the second problem will still be detected momentarily. */ \ + if (!found_end) \ + *end_bin = bl->numBins; \ + \ + /* This shouldn't happen, but if the bin range is empty, return immediately */ \ + if (*start_bin == *end_bin) \ + return false; \ + /* If the start bin is above the query end, or the end bin */ \ + /* is below the query start, fail, because the query range */ \ + /* is disjoint from the bin set */ \ + ST s_first_bin_val = CONV_1C_TO_2C((ST)bl->binValues[*start_bin], sign_mask_hi); \ + ST s_last_bin_val = CONV_1C_TO_2C((ST)bl->binValues[*end_bin - 1], sign_mask_hi); \ + if (s_first_bin_val > s_query_ub_hi || \ + s_last_bin_val < s_query_lb_hi) \ + return false; \ + \ + /* All tests have now passed: */ \ + /* A start and end bin have been found */ \ + /* start_bin_val >= query_lb and end_bin_val <= query_ub by construction */ \ + /* start_bin_val <= query_ub and end_bin_val >= query_lb, so the query */ \ + /* range and bin range intersect */ \ + return true; \ +} + +H5X_ALACRITY_FINDBINRANGE(uint8_t, int8_t); +H5X_ALACRITY_FINDBINRANGE(uint16_t, int16_t); +H5X_ALACRITY_FINDBINRANGE(uint32_t, int32_t); +H5X_ALACRITY_FINDBINRANGE(uint64_t, int64_t); + +static bool +H5X__alacrity_findBinRange1C(ALMetadata *metadata, value_types_t query_lb, + value_types_t query_ub, bin_id_t *start_bin, bin_id_t *end_bin) +{ + bool ret_value = false; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT + + printf("*** Element size: %d\n", metadata->elementSize); + + switch (metadata->elementSize) { + case sizeof(uint64_t): + printf("Calling H5X__alacrity_findBinRange1C_int64_t\n"); + ret_value = H5X__alacrity_findBinRange1C_int64_t(metadata, + query_lb.asUint64, query_ub.asUint64, start_bin, end_bin); + break; + case sizeof(uint32_t): + printf("Calling H5X__alacrity_findBinRange1C_int32_t\n"); + ret_value = H5X__alacrity_findBinRange1C_int32_t(metadata, + query_lb.asUint32, query_ub.asUint32, start_bin, end_bin); + break; + case sizeof(uint16_t): + printf("Calling H5X__alacrity_findBinRange1C_int16_t\n"); + ret_value = H5X__alacrity_findBinRange1C_int16_t(metadata, + query_lb.asUint16, query_ub.asUint16, start_bin, end_bin); + break; + case sizeof(uint8_t): + printf("Calling H5X__alacrity_findBinRange1C_int8_t\n"); + ret_value = H5X__alacrity_findBinRange1C_int8_t(metadata, + query_lb.asUint8, query_ub.asUint8, start_bin, end_bin); + break; + default: + HGOTO_ERROR(H5E_INDEX, H5E_ARGS, FALSE, "Unsupported element size"); + } + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + static herr_t -H5X__alacrity_findBinRange1C(ALMetadata metadata, hid_t query_id, - bin_id_t *start_bin, bin_id_t *end_bin) +H5X__alacrity_readIndex(H5X_alacrity_t *alacrity, bin_id_t start_bin, + bin_id_t end_bin, hid_t rcxt_id, ALIndex *al_index, + size_t *al_index_size) { + const ALMetadata *meta = &alacrity->metadata; + const uint64_t first_bin_off = ALGetIndexBinOffset(meta, start_bin); + const uint64_t last_bin_off = ALGetIndexBinOffset(meta, end_bin); + const uint64_t bin_read_len = last_bin_off - first_bin_off; herr_t ret_value = SUCCEED; /* Return value */ + hid_t file_space_id = FAIL; + hid_t mem_space_id = FAIL; + size_t nelmts; + int rank = 1; FUNC_ENTER_NOAPI_NOINIT + printf("Start Offset: %d\n", first_bin_off); + printf("End Offset: %d\n", last_bin_off); + + /* Get space info */ + if (FAIL == (file_space_id = H5Dget_space(alacrity->index_id))) + HGOTO_ERROR(H5E_INDEX, H5E_CANTGET, FAIL, "can't get dataspace from dataset"); + if (0 == (nelmts = (size_t) H5Sget_select_npoints(file_space_id))) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "invalid number of elements"); + + printf("Index elmts to be read: %d / %d\n", bin_read_len, nelmts); + + if (*al_index == NULL) + *al_index = malloc(bin_read_len); + + if (FAIL == H5Sselect_hyperslab(file_space_id, H5S_SELECT_SET, + &first_bin_off, NULL, &bin_read_len, NULL)) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSET, FAIL, "can't set offset"); + + if (FAIL == (mem_space_id = H5Screate_simple(rank, &bin_read_len, NULL))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTCREATE, FAIL, "can't create simple dataspace"); + + // + /* Read data from dataset */ + if (FAIL == H5Dread_ff(alacrity->index_id, alacrity->opaque_type_id, mem_space_id, + file_space_id, H5P_DEFAULT, *al_index, rcxt_id, H5_EVENT_STACK_NULL)) + HGOTO_ERROR(H5E_INDEX, H5E_READERROR, FAIL, "can't read data"); + + *al_index_size = bin_read_len; + done: + if (FAIL != file_space_id) + H5Sclose(file_space_id); + if (FAIL != mem_space_id) + H5Sclose(mem_space_id); FUNC_LEAVE_NOAPI(ret_value); } @@ -690,10 +940,11 @@ H5X_alacrity_query(void *idx_handle, hid_t query_id, hid_t xxpl_id, hid_t *dataspace_id) { H5X_alacrity_t *alacrity = (H5X_alacrity_t *) idx_handle; - ALIndex alacrity_index; - bin_id_t start_bin, end_bin; + ALIndex al_index = NULL; + bin_id_t start_bin = 0, end_bin = 0; hid_t rcxt_id; herr_t ret_value = SUCCEED; /* Return value */ + hid_t ret_space_id; FUNC_ENTER_NOAPI_NOINIT @@ -731,7 +982,7 @@ H5X_alacrity_query(void *idx_handle, hid_t query_id, hid_t xxpl_id, memstreamInit(&memstream, buf); ALDeserializeMetadata(&alacrity->metadata, &memstream); memstreamDestroy(&memstream, 0); - printf("**** Partition length: %zu\n", alacrity->metadata.partitionLength); + printf("**** Partition length: %zu\n", (size_t) alacrity->metadata.partitionLength); H5MM_free(buf); /* Set this to TRUE so we don't read metadata again */ @@ -743,35 +994,74 @@ H5X_alacrity_query(void *idx_handle, hid_t query_id, hid_t xxpl_id, * query range will fall into these bins, however not all elements in these * bins fall into the query range). */ - H5X__alacrity_findBinRange1C(alacrity->metadata, query_id, &start_bin, &end_bin); + ALBinLayout *bl; +// unsigned int i; + bl = &alacrity->metadata.binLayout; + printf("NumBins: %d\n", bl->numBins); +// for (i = 0; i < bl->numBins; i++) { +// printf("Bin%d: %f\n", i, (float) bl->binValues[i]); +// } + + bool found_bin; + value_types_t query_lb; + value_types_t query_ub; + H5Q_t *query; + + if (NULL == (query = (H5Q_t *) H5I_object_verify(query_id, H5I_QUERY))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a query ID"); + + /* Find bins that satisfy query */ + query_lb.asFloat = 39.1; + query_ub.asFloat = 42.1; + + found_bin = H5X__alacrity_findBinRange1C(&alacrity->metadata, query_lb, + query_ub, &start_bin, &end_bin); + printf("H5X__alacrity_findBinRange1C returned: %d\n", found_bin); + printf("Start bin: %d\n", start_bin); + printf("End bin: %d\n", end_bin); /* Second readIndex */ - ALGetIndexBinOffset(alacrity->metadata, start_bin); - ALGetIndexBinOffset(alacrity->metadata, end_bin); + uint64_t resultCount = bl->binStartOffsets[end_bin] - bl->binStartOffsets[start_bin]; + size_t al_index_size; + + printf("Result count: %d\n", resultCount); + H5X__alacrity_readIndex(alacrity, start_bin, end_bin, rcxt_id, &al_index, + &al_index_size); + + /****************************/ + unsigned int i; + rid_t *my_buf = (rid_t *) al_index; + printf("Index read contains following rIDs: "); + for (i = 0; i < al_index_size / (sizeof(rid_t)); i++) { + printf("%d ", my_buf[i]); + } + printf("\n"); + /****************************/ - H5X__alacrity_readIndex(alacrity, start_bin, end_bin, &alacrity_index); +// if (alacrity->metadata->indexMeta.indexForm == ALCompressedInvertedIndex) +// ALConvertPartialIndexForm(&alacrity->metadata, &alacrity_index, +// ALInvertedIndex, start_bin, end_bin); - if (alacrity->metadata->indexMeta.indexForm == ALCompressedInvertedIndex) - ALConvertPartialIndexForm(&alacrity->metadata, &alacrity_index, - ALInvertedIndex, start_bin, end_bin); + /* Apply the query */ + hsize_t dims = al_index_size / (sizeof(rid_t)); + /* If element satisfies query, add it to the selection */ + if (FAIL == (ret_space_id = H5Screate_simple(1, &dims, NULL))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to create dataspace"); -// if(FAIL == (udata.space_query = H5Scopy(space_id))) -// HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to copy dataspace"); -// if(H5Sselect_none(udata.space_query) < 0) -// HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to reset selection"); -// -// udata.num_elmts = 0; -// udata.query_id = query_id; -// -// /* iterate over every element and apply the query on it. If the -// query is not satisfied, then remove it from the query selection */ -// if (H5Diterate(buf, type_id, space_id, H5X__alacrity_get_query_data_cb, &udata) < 0) -// HGOTO_ERROR(H5E_INDEX, H5E_CANTCOMPUTE, FAIL, "failed to compute buffer size"); -// -// *dataspace_id = udata.space_query; -// printf("Created dataspace from index with %d elements\n", -// (int) H5Sget_select_npoints(*dataspace_id)); + for (i = 0; i < al_index_size / (sizeof(rid_t)); i++) { + const hsize_t point = my_buf[i]; + if (H5Sselect_elements(ret_space_id, H5S_SELECT_APPEND, 1, &point)) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSET, FAIL, "unable to add point to selection"); + } + + printf("Created dataspace from index with %d elements\n", + (int) H5Sget_select_npoints(ret_space_id)); + hsize_t start_coord, end_coord; + H5Sget_select_bounds(ret_space_id, &start_coord, &end_coord); + printf("Bounding box: %d, %d\n", start_coord, end_coord); + + *dataspace_id = ret_space_id; done: FUNC_LEAVE_NOAPI(ret_value) diff --git a/testff/h5ff_client_index.c b/testff/h5ff_client_index.c index b364d26..c637eb1 100644 --- a/testff/h5ff_client_index.c +++ b/testff/h5ff_client_index.c @@ -238,7 +238,7 @@ main(int argc, char **argv) hsize_t ntuples = NTUPLES; hsize_t ncomponents = 3; hsize_t start, total; - int *data; + float *data; hid_t file_id, fapl_id; hid_t estack_id = H5_EVENT_STACK_NULL; herr_t ret; @@ -263,10 +263,11 @@ main(int argc, char **argv) total = ntuples; /* Initialize the dataset. */ - data = (int *) malloc(sizeof(int) * ncomponents * ntuples); + data = (float *) malloc(sizeof(float) * ncomponents * ntuples); + for (i = 0; i < ntuples; i++) { for (j = 0; j < ncomponents; j++) { - data[ncomponents * i + j] = my_rank * ntuples + i; + data[ncomponents * i + j] = (float) (my_rank * ntuples + i); } } @@ -283,7 +284,7 @@ main(int argc, char **argv) ret = H5Pclose(fapl_id); assert(0 == ret); - write_dataset(file_id, dataset_name, total, ncomponents, H5T_NATIVE_INT, + write_dataset(file_id, dataset_name, total, ncomponents, H5T_NATIVE_FLOAT, ntuples, start, data, estack_id); MPI_Barrier(MPI_COMM_WORLD); @@ -305,6 +306,7 @@ main(int argc, char **argv) MPI_Barrier(MPI_COMM_WORLD); EFF_finalize(); + MPI_Finalize(); return 0; |