summaryrefslogtreecommitdiffstats
path: root/src/H5V.c
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2010-10-01 05:39:05 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2010-10-01 05:39:05 (GMT)
commitdc4756fbf47d8fe84472269f24cedc16c8093c48 (patch)
tree6cc8a9172767ebdbf4cc73c8db5b95b8e7aa4f88 /src/H5V.c
parente171b57b999a4d0b6c0b97c8138880056fc4a956 (diff)
downloadhdf5-dc4756fbf47d8fe84472269f24cedc16c8093c48.zip
hdf5-dc4756fbf47d8fe84472269f24cedc16c8093c48.tar.gz
hdf5-dc4756fbf47d8fe84472269f24cedc16c8093c48.tar.bz2
[svn-r19498] Description:
Optimize the vector-vector memcpy() routine even further, for a total of ~2x speedup. :-) Make a generic vector-vector operation routine and convert other vector-vector read & write routines to use generic routine instead of multiple copies of the basic algorithm. Tested on: FreeBSD/32 6.3 (duty) in debug mode FreeBSD/64 6.3 (liberty) w/C++ & FORTRAN, in debug mode Linux/32 2.6 (jam) w/PGI compilers, w/default API=1.8.x, w/C++ & FORTRAN, w/threadsafe, in debug mode Linux/64-amd64 2.6 (amani) w/Intel compilers, w/default API=1.6.x, w/C++ & FORTRAN, in production mode Solaris/32 2.10 (linew) w/deprecated symbols disabled, w/C++ & FORTRAN, w/szip filter, w/threadsafe, in production mode Linux/PPC 2.6 (heiwa) w/C++ & FORTRAN, w/threadsafe, in debug mode Linux/64-ia64 2.6 (cobalt) w/Intel compilers, w/C++ & FORTRAN, in production mode Linux/64-amd64 2.6 (abe) w/parallel, w/FORTRAN, in debug mode Mac OS X/32 10.6.4 (amazon) in debug mode Mac OS X/32 10.6.4 (amazon) w/C++ & FORTRAN, w/threadsafe, in production mode Mac OS X/32 10.6.4 (amazon) w/parallel, in debug mode
Diffstat (limited to 'src/H5V.c')
-rw-r--r--src/H5V.c403
1 files changed, 368 insertions, 35 deletions
diff --git a/src/H5V.c b/src/H5V.c
index 979b591..37d558f 100644
--- a/src/H5V.c
+++ b/src/H5V.c
@@ -24,6 +24,12 @@
#include "H5Oprivate.h"
#include "H5Vprivate.h"
+/* Local typedefs */
+typedef struct H5V_memcpy_ud_t {
+ unsigned char *dst; /* Pointer to destination buffer */
+ const unsigned char *src; /* Pointer to source buffer */
+} H5V_memcpy_ud_t;
+
/* Local macros */
#define H5V_HYPER_NDIMS H5O_LAYOUT_NDIMS
@@ -1149,9 +1155,9 @@ done:
*
*-------------------------------------------------------------------------
*/
-herr_t
-H5V_array_calc_pre(hsize_t offset, unsigned n, const hsize_t *total_size,
- const hsize_t *down, hsize_t *coords)
+static herr_t
+H5V_array_calc_pre(hsize_t offset, unsigned n, const hsize_t *down,
+ hsize_t *coords)
{
unsigned u; /* Local index variable */
@@ -1159,7 +1165,6 @@ H5V_array_calc_pre(hsize_t offset, unsigned n, const hsize_t *total_size,
/* Sanity check */
HDassert(n <= H5V_HYPER_NDIMS);
- HDassert(total_size);
HDassert(coords);
/* Compute the coordinates from the offset */
@@ -1210,7 +1215,7 @@ H5V_array_calc(hsize_t offset, unsigned n, const hsize_t *total_size, hsize_t *c
HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "can't compute down sizes")
/* Compute the coordinates from the offset */
- if(H5V_array_calc_pre(offset, n, total_size, idx, coords) < 0)
+ if(H5V_array_calc_pre(offset, n, idx, coords) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "can't compute coordinates")
done:
@@ -1287,6 +1292,212 @@ H5V_chunk_index(unsigned ndims, const hsize_t *coord, const uint32_t *chunk,
/*-------------------------------------------------------------------------
+ * Function: H5V_opvv
+ *
+ * Purpose: Perform an operation on a source & destination sequences
+ * of offset/length pairs. Each set of sequnces has an array
+ * of lengths, an array of offsets, the maximum number of
+ * sequences and the current sequence to start at in the sequence.
+ *
+ * There may be different numbers of bytes in the source and
+ * destination sequences, the operation stops when either the
+ * source or destination sequence runs out of information.
+ *
+ * Note: The algorithm in this routine is [basically] the same as for
+ * H5V_memcpyvv(). Changes should be made to both!
+ *
+ * Return: Non-negative # of bytes operated on, on success/Negative on failure
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, September 30, 2010
+ *
+ *-------------------------------------------------------------------------
+ */
+ssize_t
+H5V_opvv(size_t dst_max_nseq, size_t *dst_curr_seq, size_t dst_len_arr[],
+ hsize_t dst_off_arr[],
+ size_t src_max_nseq, size_t *src_curr_seq, size_t src_len_arr[],
+ hsize_t src_off_arr[],
+ H5V_opvv_func_t op, void *op_data)
+{
+ hsize_t *max_dst_off_ptr, *max_src_off_ptr; /* Pointers to max. source and destination offset locations */
+ hsize_t *dst_off_ptr, *src_off_ptr; /* Pointers to source and destination offset arrays */
+ size_t *dst_len_ptr, *src_len_ptr; /* Pointers to source and destination length arrays */
+ hsize_t tmp_dst_off, tmp_src_off; /* Temporary source and destination offset values */
+ size_t tmp_dst_len, tmp_src_len; /* Temporary source and destination length values */
+ size_t acc_len; /* Accumulated length of sequences */
+ ssize_t ret_value = 0; /* Return value (Total size of sequence in bytes) */
+
+ FUNC_ENTER_NOAPI(H5V_opvv, FAIL)
+
+ /* Sanity check */
+ HDassert(dst_curr_seq);
+ HDassert(*dst_curr_seq < dst_max_nseq);
+ HDassert(dst_len_arr);
+ HDassert(dst_off_arr);
+ HDassert(src_curr_seq);
+ HDassert(*src_curr_seq < src_max_nseq);
+ HDassert(src_len_arr);
+ HDassert(src_off_arr);
+ HDassert(op);
+
+ /* Set initial offset & length pointers */
+ dst_len_ptr = dst_len_arr + *dst_curr_seq;
+ dst_off_ptr = dst_off_arr + *dst_curr_seq;
+ src_len_ptr = src_len_arr + *src_curr_seq;
+ src_off_ptr = src_off_arr + *src_curr_seq;
+
+ /* Get temporary source & destination sequence offsets & lengths */
+ tmp_dst_len = *dst_len_ptr;
+ tmp_dst_off = *dst_off_ptr;
+ tmp_src_len = *src_len_ptr;
+ tmp_src_off = *src_off_ptr;
+
+ /* Compute maximum offset pointer values */
+ max_dst_off_ptr = dst_off_arr + dst_max_nseq;
+ max_src_off_ptr = src_off_arr + src_max_nseq;
+
+/* Work through the sequences */
+/* (Choose smallest sequence available initially) */
+
+ /* Source sequence is less than destination sequence */
+ if(tmp_src_len < tmp_dst_len) {
+src_smaller:
+ acc_len = 0;
+ do {
+ /* Make operator callback */
+ if((*op)(tmp_dst_off, tmp_src_off, tmp_src_len, op_data) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_CANTOPERATE, FAIL, "can't perform operation")
+
+ /* Accumulate number of bytes copied */
+ acc_len += tmp_src_len;
+
+ /* Update destination length */
+ tmp_dst_off += tmp_src_len;
+ tmp_dst_len -= tmp_src_len;
+
+ /* Advance source offset & check for being finished */
+ src_off_ptr++;
+ if(src_off_ptr >= max_src_off_ptr) {
+ /* Roll accumulated changes into appropriate counters */
+ *dst_off_ptr = tmp_dst_off;
+ *dst_len_ptr = tmp_dst_len;
+
+ /* Done with sequences */
+ goto finished;
+ } /* end if */
+ tmp_src_off = *src_off_ptr;
+
+ /* Update source information */
+ src_len_ptr++;
+ tmp_src_len = *src_len_ptr;
+ } while(tmp_src_len < tmp_dst_len);
+
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
+
+ /* Transition to next state */
+ if(tmp_dst_len < tmp_src_len)
+ goto dst_smaller;
+ else
+ goto equal;
+ } /* end if */
+ /* Destination sequence is less than source sequence */
+ else if(tmp_dst_len < tmp_src_len) {
+dst_smaller:
+ acc_len = 0;
+ do {
+ /* Make operator callback */
+ if((*op)(tmp_dst_off, tmp_src_off, tmp_dst_len, op_data) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_CANTOPERATE, FAIL, "can't perform operation")
+
+ /* Accumulate number of bytes copied */
+ acc_len += tmp_dst_len;
+
+ /* Update source length */
+ tmp_src_off += tmp_dst_len;
+ tmp_src_len -= tmp_dst_len;
+
+ /* Advance destination offset & check for being finished */
+ dst_off_ptr++;
+ if(dst_off_ptr >= max_dst_off_ptr) {
+ /* Roll accumulated changes into appropriate counters */
+ *src_off_ptr = tmp_src_off;
+ *src_len_ptr = tmp_src_len;
+
+ /* Done with sequences */
+ goto finished;
+ } /* end if */
+ tmp_dst_off = *dst_off_ptr;
+
+ /* Update destination information */
+ dst_len_ptr++;
+ tmp_dst_len = *dst_len_ptr;
+ } while(tmp_dst_len < tmp_src_len);
+
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
+
+ /* Transition to next state */
+ if(tmp_src_len < tmp_dst_len)
+ goto src_smaller;
+ else
+ goto equal;
+ } /* end else-if */
+ /* Destination sequence and source sequence are same length */
+ else {
+equal:
+ acc_len = 0;
+ do {
+ /* Make operator callback */
+ if((*op)(tmp_dst_off, tmp_src_off, tmp_dst_len, op_data) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_CANTOPERATE, FAIL, "can't perform operation")
+
+ /* Accumulate number of bytes copied */
+ acc_len += tmp_dst_len;
+
+ /* Advance source & destination offset & check for being finished */
+ src_off_ptr++;
+ dst_off_ptr++;
+ if(src_off_ptr >= max_src_off_ptr || dst_off_ptr >= max_dst_off_ptr)
+ /* Done with sequences */
+ goto finished;
+ tmp_src_off = *src_off_ptr;
+ tmp_dst_off = *dst_off_ptr;
+
+ /* Update source information */
+ src_len_ptr++;
+ tmp_src_len = *src_len_ptr;
+
+ /* Update destination information */
+ dst_len_ptr++;
+ tmp_dst_len = *dst_len_ptr;
+ } while(tmp_dst_len == tmp_src_len);
+
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
+
+ /* Transition to next state */
+ if(tmp_dst_len < tmp_src_len)
+ goto dst_smaller;
+ else
+ goto src_smaller;
+ } /* end else */
+
+finished:
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
+
+ /* Update current sequence vectors */
+ *dst_curr_seq = (size_t)(dst_off_ptr - dst_off_arr);
+ *src_curr_seq = (size_t)(src_off_ptr - src_off_arr);
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5V_opvv() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5V_memcpyvv
*
* Purpose: Given source and destination buffers in memory (SRC & DST)
@@ -1299,6 +1510,9 @@ H5V_chunk_index(unsigned ndims, const hsize_t *coord, const uint32_t *chunk,
* destination sequences, data copying stops when either the
* source or destination buffer runs out of sequence information.
*
+ * Note: The algorithm in this routine is [basically] the same as for
+ * H5V_opvv(). Changes should be made to both!
+ *
* Return: Non-negative # of bytes copied on success/Negative on failure
*
* Programmer: Quincey Koziol
@@ -1314,8 +1528,12 @@ H5V_memcpyvv(void *_dst,
{
unsigned char *dst; /* Destination buffer pointer */
const unsigned char *src; /* Source buffer pointer */
- size_t size; /* Size of sequence in bytes */
- size_t u,v; /* Local index variables */
+ hsize_t *max_dst_off_ptr, *max_src_off_ptr; /* Pointers to max. source and destination offset locations */
+ hsize_t *dst_off_ptr, *src_off_ptr; /* Pointers to source and destination offset arrays */
+ size_t *dst_len_ptr, *src_len_ptr; /* Pointers to source and destination length arrays */
+ size_t tmp_dst_len; /* Temporary dest. length value */
+ size_t tmp_src_len; /* Temporary source length value */
+ size_t acc_len; /* Accumulated length of sequences */
ssize_t ret_value = 0; /* Return value (Total size of sequence in bytes) */
FUNC_ENTER_NOAPI_NOFUNC(H5V_memcpyvv)
@@ -1332,44 +1550,159 @@ H5V_memcpyvv(void *_dst,
HDassert(src_len_arr);
HDassert(src_off_arr);
- /* Work through all the sequences */
- for(u = *dst_curr_seq, v = *src_curr_seq; u < dst_max_nseq && v < src_max_nseq; ) {
- /* Choose smallest buffer to write */
- if(src_len_arr[v] < dst_len_arr[u])
- size = src_len_arr[v];
- else
- size = dst_len_arr[u];
+ /* Set initial offset & length pointers */
+ dst_len_ptr = dst_len_arr + *dst_curr_seq;
+ dst_off_ptr = dst_off_arr + *dst_curr_seq;
+ src_len_ptr = src_len_arr + *src_curr_seq;
+ src_off_ptr = src_off_arr + *src_curr_seq;
+
+ /* Get temporary source & destination sequence lengths */
+ tmp_dst_len = *dst_len_ptr;
+ tmp_src_len = *src_len_ptr;
+
+ /* Compute maximum offset pointer values */
+ max_dst_off_ptr = dst_off_arr + dst_max_nseq;
+ max_src_off_ptr = src_off_arr + src_max_nseq;
+
+ /* Compute buffer offsets */
+ dst = (unsigned char *)_dst + *dst_off_ptr;
+ src = (const unsigned char *)_src + *src_off_ptr;
+
+/* Work through the sequences */
+/* (Choose smallest sequence available initially) */
+
+ /* Source sequence is less than destination sequence */
+ if(tmp_src_len < tmp_dst_len) {
+src_smaller:
+ acc_len = 0;
+ do {
+ /* Copy data */
+ HDmemcpy(dst, src, tmp_src_len);
+
+ /* Accumulate number of bytes copied */
+ acc_len += tmp_src_len;
+
+ /* Update destination length */
+ tmp_dst_len -= tmp_src_len;
+
+ /* Advance source offset & check for being finished */
+ src_off_ptr++;
+ if(src_off_ptr >= max_src_off_ptr) {
+ /* Roll accumulated changes into appropriate counters */
+ *dst_off_ptr += acc_len;
+ *dst_len_ptr = tmp_dst_len;
+
+ /* Done with sequences */
+ goto finished;
+ } /* end if */
- /* Compute offset on disk */
- dst = (unsigned char *)_dst + dst_off_arr[u];
+ /* Update destination pointer */
+ dst += tmp_src_len;
- /* Compute offset in memory */
- src = (const unsigned char *)_src + src_off_arr[v];
+ /* Update source information */
+ src_len_ptr++;
+ tmp_src_len = *src_len_ptr;
+ src = (const unsigned char *)_src + *src_off_ptr;
+ } while(tmp_src_len < tmp_dst_len);
- /* Copy data */
- HDmemcpy(dst, src, size);
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
- /* Update source information */
- src_len_arr[v] -= size;
- if(0 == src_len_arr[v])
- v++;
+ /* Transition to next state */
+ if(tmp_dst_len < tmp_src_len)
+ goto dst_smaller;
else
- src_off_arr[v] += size;
+ goto equal;
+ } /* end if */
+ /* Destination sequence is less than source sequence */
+ else if(tmp_dst_len < tmp_src_len) {
+dst_smaller:
+ acc_len = 0;
+ do {
+ /* Copy data */
+ HDmemcpy(dst, src, tmp_dst_len);
+
+ /* Accumulate number of bytes copied */
+ acc_len += tmp_dst_len;
+
+ /* Update source length */
+ tmp_src_len -= tmp_dst_len;
+
+ /* Advance destination offset & check for being finished */
+ dst_off_ptr++;
+ if(dst_off_ptr >= max_dst_off_ptr) {
+ /* Roll accumulated changes into appropriate counters */
+ *src_off_ptr += acc_len;
+ *src_len_ptr = tmp_src_len;
+
+ /* Done with sequences */
+ goto finished;
+ } /* end if */
+
+ /* Update source pointer */
+ src += tmp_dst_len;
+
+ /* Update destination information */
+ dst_len_ptr++;
+ tmp_dst_len = *dst_len_ptr;
+ dst = (unsigned char *)_dst + *dst_off_ptr;
+ } while(tmp_dst_len < tmp_src_len);
- /* Update destination information */
- dst_len_arr[u] -= size;
- if(0 == dst_len_arr[u])
- u++;
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
+
+ /* Transition to next state */
+ if(tmp_src_len < tmp_dst_len)
+ goto src_smaller;
+ else
+ goto equal;
+ } /* end else-if */
+ /* Destination sequence and source sequence are same length */
+ else {
+equal:
+ acc_len = 0;
+ do {
+ /* Copy data */
+ HDmemcpy(dst, src, tmp_dst_len);
+
+ /* Accumulate number of bytes copied */
+ acc_len += tmp_dst_len;
+
+ /* Advance source & destination offset & check for being finished */
+ src_off_ptr++;
+ dst_off_ptr++;
+ if(src_off_ptr >= max_src_off_ptr || dst_off_ptr >= max_dst_off_ptr)
+ /* Done with sequences */
+ goto finished;
+
+ /* Update source information */
+ src_len_ptr++;
+ tmp_src_len = *src_len_ptr;
+ src = (const unsigned char *)_src + *src_off_ptr;
+
+ /* Update destination information */
+ dst_len_ptr++;
+ tmp_dst_len = *dst_len_ptr;
+ dst = (unsigned char *)_dst + *dst_off_ptr;
+ } while(tmp_dst_len == tmp_src_len);
+
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
+
+ /* Transition to next state */
+ if(tmp_dst_len < tmp_src_len)
+ goto dst_smaller;
else
- dst_off_arr[u] += size;
+ goto src_smaller;
+ } /* end else */
- /* Increment number of bytes copied */
- ret_value += (ssize_t)size;
- } /* end for */
+finished:
+ /* Roll accumulated sequence lengths into return value */
+ ret_value += (ssize_t)acc_len;
/* Update current sequence vectors */
- *dst_curr_seq = u;
- *src_curr_seq = v;
+ *dst_curr_seq = (size_t)(dst_off_ptr - dst_off_arr);
+ *src_curr_seq = (size_t)(src_off_ptr - src_off_arr);
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5V_memcpyvv() */