diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2010-09-28 14:10:29 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2010-09-28 14:10:29 (GMT) |
commit | 40960e0c65137dc8156cfd1859949d7fd5254b3d (patch) | |
tree | b0e35dc9a333a3e48656e7d5e633cdb42b1efcf1 /src/H5Shyper.c | |
parent | 1c61b6fe13f6fd9c84c0407f02865568e0a7f638 (diff) | |
download | hdf5-40960e0c65137dc8156cfd1859949d7fd5254b3d.zip hdf5-40960e0c65137dc8156cfd1859949d7fd5254b3d.tar.gz hdf5-40960e0c65137dc8156cfd1859949d7fd5254b3d.tar.bz2 |
[svn-r19482] Description:
Speed up hyperslab sequence generation routine some more, giving
about a 10% improvement in Ger Van Diepen's LOFAR benchmark.
Tested on:
FreeBSD/32 6.3 (duty) in debug mode
FreeBSD/64 6.3 (liberty) w/C++ & FORTRAN, in debug mode
Linux/32 2.6 (jam) w/PGI compilers, w/default API=1.8.x,
w/C++ & FORTRAN, w/threadsafe, in debug mode
Linux/64-amd64 2.6 (amani) w/Intel compilers, w/default API=1.6.x,
w/C++ & FORTRAN, in production mode
Solaris/32 2.10 (linew) w/deprecated symbols disabled, w/C++ & FORTRAN,
w/szip filter, w/threadsafe, in production mode
Linux/PPC 2.6 (heiwa) w/C++ & FORTRAN, w/threadsafe, in debug mode
Linux/64-ia64 2.6 (cobalt) w/Intel compilers, w/C++ & FORTRAN,
in production mode
Linux/64-amd64 2.6 (abe) w/parallel, w/FORTRAN, in debug mode
Mac OS X/32 10.6.4 (amazon) in debug mode
Mac OS X/32 10.6.4 (amazon) w/C++ & FORTRAN, w/threadsafe,
in production mode
Mac OS X/32 10.6.4 (amazon) w/parallel, in debug mode
Diffstat (limited to 'src/H5Shyper.c')
-rw-r--r-- | src/H5Shyper.c | 134 |
1 files changed, 85 insertions, 49 deletions
diff --git a/src/H5Shyper.c b/src/H5Shyper.c index 1f56737..48f1064 100644 --- a/src/H5Shyper.c +++ b/src/H5Shyper.c @@ -8420,18 +8420,16 @@ H5S_hyper_get_seq_list_single(const H5S_t *space, H5S_sel_iter_t *iter, hsize_t base_offset[H5O_LAYOUT_NDIMS]; /* Base coordinate offset in dataspace */ hsize_t offset[H5O_LAYOUT_NDIMS]; /* Coordinate offset in dataspace */ hsize_t slab[H5O_LAYOUT_NDIMS]; /* Hyperslab size */ - hsize_t skip[H5O_LAYOUT_NDIMS]; /* Bytes to skip between blocks */ hsize_t fast_dim_block; /* Local copies of fastest changing dimension info */ hsize_t acc; /* Accumulator */ hsize_t loc; /* Coordinate offset */ size_t tot_blk_count; /* Total number of blocks left to output */ - size_t blk_count; /* Total number of blocks left to output */ size_t elem_size; /* Size of each element iterating over */ size_t io_left; /* The number of elements left in I/O operation */ size_t actual_elem; /* The actual number of elements to count */ unsigned ndims; /* Number of dimensions of dataset */ unsigned fast_dim; /* Rank of the fastest changing dimension for the dataspace */ - int temp_dim; /* Temporary rank holder */ + unsigned skip_dim; /* Rank of the dimension to skip along */ unsigned u; /* Local index variable */ int i; /* Local index variable */ @@ -8454,7 +8452,6 @@ H5S_hyper_get_seq_list_single(const H5S_t *space, H5S_sel_iter_t *iter, if(iter->u.hyp.iter_rank != 0 && iter->u.hyp.iter_rank < space->extent.rank) { /* Set the aliases for a few important dimension ranks */ ndims = iter->u.hyp.iter_rank; - fast_dim = ndims - 1; /* Set the local copy of the selection offset */ sel_off = iter->u.hyp.sel_off; @@ -8465,7 +8462,6 @@ H5S_hyper_get_seq_list_single(const H5S_t *space, H5S_sel_iter_t *iter, else { /* Set the aliases for a few important dimension ranks */ ndims = space->extent.rank; - fast_dim = ndims - 1; /* Set the local copy of the selection offset */ sel_off = space->select.offset; @@ -8473,6 +8469,7 @@ H5S_hyper_get_seq_list_single(const H5S_t *space, H5S_sel_iter_t *iter, /* Set up the pointer to the size of the memory space */ mem_size = space->extent.size; } /* end else */ + fast_dim = ndims - 1; /* initialize row sizes for each dimension */ elem_size = iter->elmt_size; @@ -8481,10 +8478,6 @@ H5S_hyper_get_seq_list_single(const H5S_t *space, H5S_sel_iter_t *iter, acc *= mem_size[i]; } /* end for */ - /* Compute the amount to skip between sequences */ - for(u = 0; u < ndims; u++) - skip[u] = (mem_size[u] - tdiminfo[u].block) * slab[u]; - /* Copy the base location of the block */ /* (Add in the selection offset) */ for(u = 0; u < ndims; u++) @@ -8518,16 +8511,11 @@ H5S_hyper_get_seq_list_single(const H5S_t *space, H5S_sel_iter_t *iter, /* Check for blocks to operate on */ if(tot_blk_count > 0) { - hsize_t tmp_block[H5O_LAYOUT_NDIMS];/* Temporary block offset */ size_t actual_bytes; /* The actual number of bytes to copy */ /* Set the number of actual bytes */ actual_bytes = actual_elem * elem_size; - /* Set the starting block location */ - for(u = 0; u < ndims; u++) - tmp_block[u] = iter->u.hyp.off[u] - tdiminfo[u].start; - /* Check for 1-dim selection */ if(0 == fast_dim) { /* Sanity checks */ @@ -8539,53 +8527,101 @@ H5S_hyper_get_seq_list_single(const H5S_t *space, H5S_sel_iter_t *iter, *len++ = actual_bytes; } /* end if */ else { - /* Create sequences until an entire row can't be used */ - blk_count = tot_blk_count; - while(blk_count > 0) { - /* Store the sequence information */ - *off++ = loc; - *len++ = actual_bytes; - - /* Set temporary dimension for advancing offsets */ - temp_dim = (int)fast_dim - 1; + hsize_t skip_slab; /* Temporary copy of slab[fast_dim - 1] */ + size_t blk_count; /* Total number of blocks left to output */ + + /* Find first dimension w/block >1 */ + skip_dim = fast_dim; + for(i = (int)(fast_dim - 1); i >= 0; i--) + if(tdiminfo[i].block > 1) { + skip_dim = (unsigned)i; + break; + } /* end if */ + skip_slab = slab[skip_dim]; + + /* Check for being able to use fast algorithm for 1-D */ + if(0 == skip_dim) { + /* Create sequences until an entire row can't be used */ + blk_count = tot_blk_count; + while(blk_count > 0) { + /* Store the sequence information */ + *off++ = loc; + *len++ = actual_bytes; + + /* Increment offset in destination buffer */ + loc += skip_slab; + + /* Decrement block count */ + blk_count--; + } /* end while */ - /* Increment offset in destination buffer */ - loc += slab[temp_dim]; + /* Move to the next location */ + offset[skip_dim] += tot_blk_count; + } /* end if */ + else { + hsize_t tmp_block[H5O_LAYOUT_NDIMS];/* Temporary block offset */ + hsize_t skip[H5O_LAYOUT_NDIMS]; /* Bytes to skip between blocks */ + int temp_dim; /* Temporary rank holder */ - /* Increment the offset and count for the other dimensions */ - while(temp_dim >= 0) { - /* Move to the next row in the curent dimension */ - offset[temp_dim]++; - tmp_block[temp_dim]++; + /* Set the starting block location */ + for(u = 0; u < ndims; u++) + tmp_block[u] = iter->u.hyp.off[u] - tdiminfo[u].start; - /* If this block is still in the range of blocks to output for the dimension, break out of loop */ - if(tmp_block[temp_dim] < tdiminfo[temp_dim].block) - break; - else { - offset[temp_dim] = base_offset[temp_dim]; - loc += skip[temp_dim]; - tmp_block[temp_dim] = 0; - } /* end else */ + /* Compute the amount to skip between sequences */ + for(u = 0; u < ndims; u++) + skip[u] = (mem_size[u] - tdiminfo[u].block) * slab[u]; - /* Decrement dimension count */ - temp_dim--; - } /* end while */ + /* Create sequences until an entire row can't be used */ + blk_count = tot_blk_count; + while(blk_count > 0) { + /* Store the sequence information */ + *off++ = loc; + *len++ = actual_bytes; - /* Decrement block count */ - blk_count--; - } /* end while */ + /* Set temporary dimension for advancing offsets */ + temp_dim = (int)skip_dim; + + /* Increment offset in destination buffer */ + loc += skip_slab; + + /* Increment the offset and count for the other dimensions */ + while(temp_dim >= 0) { + /* Move to the next row in the curent dimension */ + offset[temp_dim]++; + tmp_block[temp_dim]++; + + /* If this block is still in the range of blocks to output for the dimension, break out of loop */ + if(tmp_block[temp_dim] < tdiminfo[temp_dim].block) + break; + else { + offset[temp_dim] = base_offset[temp_dim]; + loc += skip[temp_dim]; + tmp_block[temp_dim] = 0; + } /* end else */ + + /* Decrement dimension count */ + temp_dim--; + } /* end while */ + + /* Decrement block count */ + blk_count--; + } /* end while */ + } /* end else */ } /* end else */ /* Update the iterator, if there were any blocks used */ - /* Update the iterator with the location we stopped */ - /* (Subtract out the selection offset) */ - for(u = 0; u < ndims; u++) - iter->u.hyp.off[u] = (hsize_t)((hssize_t)offset[u] - sel_off[u]); - /* Decrement the number of elements left in selection */ iter->elmt_left -= tot_blk_count * actual_elem; + /* Check if there are elements left in iterator */ + if(iter->elmt_left > 0) { + /* Update the iterator with the location we stopped */ + /* (Subtract out the selection offset) */ + for(u = 0; u < ndims; u++) + iter->u.hyp.off[u] = (hsize_t)((hssize_t)offset[u] - sel_off[u]); + } /* end if */ + /* Increment the number of sequences generated */ *nseq += tot_blk_count; |