summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2003-11-07 20:21:43 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2003-11-07 20:21:43 (GMT)
commita1ee1c91907ce4f43e3f4392b3b2141d444d67f6 (patch)
tree2748cfb97f75fc7e6d4dbb4f57c35ca65945384b
parentc84ab58792ab507aa4fcf0bcc7dc69e9b11f671c (diff)
downloadhdf5-a1ee1c91907ce4f43e3f4392b3b2141d444d67f6.zip
hdf5-a1ee1c91907ce4f43e3f4392b3b2141d444d67f6.tar.gz
hdf5-a1ee1c91907ce4f43e3f4392b3b2141d444d67f6.tar.bz2
[svn-r7824] Purpose:
Code optimization Description: Improved integer & floating-point datatype conversions by removing some corner cases. Got rid of algorithm which walked the buffer of elements to convert backwards, switching to algorithm which computes the non-overlapping space in the buffer and repeatedly converts it in the forward direction. Platforms tested: FreeBSD 4.9 (sleipnir) h5committest Cray SV1 & T3E (T90 is not accessible)
-rw-r--r--release_docs/RELEASE.txt2
-rw-r--r--src/H5Tconv.c149
2 files changed, 98 insertions, 53 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index 6cc21c8..0258653 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -166,6 +166,8 @@ Bug Fixes since HDF5-1.6.0 release
Performance
-------------
+ - More optimizations to inner loops of datatype conversions for
+ integers and floats which give a 10-50% speedup. QAK - 2003/11/07
- Hoisted invariant 'if/else's out of inner datatype conversion loop for
integer and floating-point values, giving about a 20% speedup.
QAK - 2003/10/20
diff --git a/src/H5Tconv.c b/src/H5Tconv.c
index e9fbf90..897f20b 100644
--- a/src/H5Tconv.c
+++ b/src/H5Tconv.c
@@ -172,10 +172,6 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
* as the destination. Overflows can occur when the destination is
* narrower than the source.
*
- * SU: Generic signed to unsigned conversion where the source is
- * the same size or smaller than the destination. Overflow occurs
- * when the source value is negative.
- *
* Ux: Generic conversion for the `Us', `Uu' & `us' cases
* Overflow occurs when the source magnitude is too large for the
* destination.
@@ -196,14 +192,6 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
*((DT*)D) = (DT)(*((ST*)S)); \
}
-#define H5T_CONV_SU_CORE(S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
- if (*((ST*)S)<0) { \
- if (!H5T_overflow_g || (H5T_overflow_g)(src_id, dst_id, S, D)<0) \
- *((DT*)D) = 0; \
- } else \
- *((DT*)D) = (DT)(*((ST*)S)); \
-}
-
#define H5T_CONV_Ux_CORE(S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
if (*((ST*)S) > (D_MAX)) { \
if (!H5T_overflow_g || (H5T_overflow_g)(src_id, dst_id, S, D)<0) \
@@ -214,12 +202,20 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
#define H5T_CONV_sS(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)<=sizeof(DT)); \
- H5T_CONV(H5T_CONV_xX, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, nelmts-1) \
+ H5T_CONV(H5T_CONV_xX, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
+}
+
+#define H5T_CONV_sU_CORE(S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
+ if (*((ST*)S)<0) { \
+ if (!H5T_overflow_g || (H5T_overflow_g)(src_id, dst_id, S, D)<0) \
+ *((DT*)D) = 0; \
+ } else \
+ *((DT*)D) = (DT)(*((ST*)S)); \
}
#define H5T_CONV_sU(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)<=sizeof(DT)); \
- H5T_CONV(H5T_CONV_SU, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, nelmts-1) \
+ H5T_CONV(H5T_CONV_sU, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_uS_CORE(S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
@@ -232,17 +228,17 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
#define H5T_CONV_uS(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)<=sizeof(DT)); \
- H5T_CONV(H5T_CONV_uS, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, nelmts-1) \
+ H5T_CONV(H5T_CONV_uS, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_uU(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)<=sizeof(DT)); \
- H5T_CONV(H5T_CONV_xX, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, nelmts-1) \
+ H5T_CONV(H5T_CONV_xX, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_Ss(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)>=sizeof(DT)); \
- H5T_CONV(H5T_CONV_Xx, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, 0) \
+ H5T_CONV(H5T_CONV_Xx, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_Su_CORE(S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
@@ -259,48 +255,65 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
#define H5T_CONV_Su(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)>=sizeof(DT)); \
- H5T_CONV(H5T_CONV_Su, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, 0) \
+ H5T_CONV(H5T_CONV_Su, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_Us(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)>=sizeof(DT)); \
- H5T_CONV(H5T_CONV_Ux, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, 0) \
+ H5T_CONV(H5T_CONV_Ux, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_Uu(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)>=sizeof(DT)); \
- H5T_CONV(H5T_CONV_Ux, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, 0) \
+ H5T_CONV(H5T_CONV_Ux, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
+}
+
+#define H5T_CONV_su_CORE(S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
+ /* Assumes memory format of unsigned & signed integers is same */ \
+ if (*((ST*)S)<0) { \
+ if (!H5T_overflow_g || (H5T_overflow_g)(src_id, dst_id, S, D)<0) \
+ *((DT*)D) = 0; \
+ } \
}
#define H5T_CONV_su(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)==sizeof(DT)); \
- H5T_CONV(H5T_CONV_SU, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, 0) \
+ H5T_CONV(H5T_CONV_su, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
+}
+
+#define H5T_CONV_us_CORE(S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
+ /* Assumes memory format of unsigned & signed integers is same */ \
+ if (*((ST*)S) > (D_MAX)) { \
+ if (!H5T_overflow_g || (H5T_overflow_g)(src_id, dst_id, S, D)<0) \
+ *((DT*)D) = (D_MAX); \
+ } \
}
#define H5T_CONV_us(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)==sizeof(DT)); \
- H5T_CONV(H5T_CONV_Ux, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, 0) \
+ H5T_CONV(H5T_CONV_us, long_long, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_fF(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)<=sizeof(DT)); \
- H5T_CONV(H5T_CONV_xX, double, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, nelmts-1) \
+ H5T_CONV(H5T_CONV_xX, double, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
#define H5T_CONV_Ff(STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
assert(sizeof(ST)>=sizeof(DT)); \
- H5T_CONV(H5T_CONV_Xx, double, STYPE, DTYPE, ST, DT, D_MIN, D_MAX, 0) \
+ H5T_CONV(H5T_CONV_Xx, double, STYPE, DTYPE, ST, DT, D_MIN, D_MAX) \
}
/* The main part of every integer hardware conversion macro */
-#define H5T_CONV(GUTS,ATYPE,STYPE,DTYPE,ST,DT,D_MIN,D_MAX,STRT) { \
- hsize_t elmtno; /*element number */ \
+#define H5T_CONV(GUTS,ATYPE,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) { \
+ size_t elmtno; /*element number */ \
uint8_t *src, *s; /*source buffer */ \
uint8_t *dst, *d; /*destination buffer */ \
H5T_t *st, *dt; /*data type descriptors */ \
ATYPE aligned; /*aligned type */ \
hbool_t s_mv, d_mv; /*move data to align it? */ \
- size_t s_stride, d_stride; /*src and dst strides */ \
+ ssize_t s_stride, d_stride; /*src and dst strides */ \
+ size_t safe; /* How many elements are safe to process in each pass */ \
\
switch (cdata->command) { \
case H5T_CONV_INIT: \
@@ -322,21 +335,15 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
break; \
\
case H5T_CONV_CONV: \
- /* Initialize pointers */ \
+ /* Initialize source & destination strides */ \
if (buf_stride) { \
+ assert(buf_stride>=sizeof(ST)); \
+ assert(buf_stride>=sizeof(DT)); \
s_stride = d_stride = buf_stride; \
} else { \
s_stride = sizeof(ST); \
d_stride = sizeof(DT); \
} \
- if (STRT) { \
- src = (uint8_t*)buf+(STRT)*s_stride; \
- dst = (uint8_t*)buf+(STRT)*d_stride; \
- s_stride = -s_stride; \
- d_stride = -d_stride; \
- } else { \
- src = dst = buf; \
- } \
\
/* Is alignment required for source or dest? */ \
s_mv = H5T_NATIVE_##STYPE##_ALIGN_g>1 && \
@@ -350,21 +357,57 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
CI_INC_SRC(s_mv) \
CI_INC_DST(d_mv) \
\
- if (s_mv && d_mv) { \
- /* Alignment is required for both source and dest */ \
- s = (uint8_t*)&aligned; \
- H5T_CONV_LOOP(PRE_SALIGN,PRE_DALIGN,POST_SALIGN,POST_DALIGN,GUTS,s,d,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
- } else if(s_mv) { \
- /* Alignment is required only for source */ \
- s = (uint8_t*)&aligned; \
- H5T_CONV_LOOP(PRE_SALIGN,PRE_DNOALIGN,POST_SALIGN,POST_DNOALIGN,GUTS,s,dst,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
- } else if(d_mv) { \
- /* Alignment is required only for destination */ \
- H5T_CONV_LOOP(PRE_SNOALIGN,PRE_DALIGN,POST_SNOALIGN,POST_DALIGN,GUTS,src,d,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
- } else { \
- /* Alignment is not required for both source and destination */ \
- H5T_CONV_LOOP(PRE_SNOALIGN,PRE_DNOALIGN,POST_SNOALIGN,POST_DNOALIGN,GUTS,src,dst,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
- } \
+ /* The outer loop of the type conversion macro, controlling which */ \
+ /* direction the buffer is walked */ \
+ while (nelmts>0) { \
+ /* Check if we need to go backwards through the buffer */ \
+ if(d_stride>s_stride) { \
+ /* Compute the number of "safe" destination elements at */ \
+ /* the end of the buffer (Those which don't overlap with */ \
+ /* any source elements at the beginning of the buffer) */ \
+ safe=nelmts-(((nelmts*s_stride)+(d_stride-1))/d_stride); \
+ \
+ /* If we're down to the last few elements, just wrap up */ \
+ /* with a "real" reverse copy */ \
+ if(safe<2) { \
+ src = (uint8_t*)buf+(nelmts-1)*s_stride; \
+ dst = (uint8_t*)buf+(nelmts-1)*d_stride; \
+ s_stride = -s_stride; \
+ d_stride = -d_stride; \
+ \
+ safe=nelmts; \
+ } /* end if */ \
+ else { \
+ src = (uint8_t*)buf+(nelmts-safe)*s_stride; \
+ dst = (uint8_t*)buf+(nelmts-safe)*d_stride; \
+ } /* end else */ \
+ } /* end if */ \
+ else { \
+ /* Single forward pass over all data */ \
+ src = dst = buf; \
+ safe=nelmts; \
+ } /* end else */ \
+ \
+ /* Perform loop over elements to convert */ \
+ if (s_mv && d_mv) { \
+ /* Alignment is required for both source and dest */ \
+ s = (uint8_t*)&aligned; \
+ H5T_CONV_LOOP(PRE_SALIGN,PRE_DALIGN,POST_SALIGN,POST_DALIGN,GUTS,s,d,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
+ } else if(s_mv) { \
+ /* Alignment is required only for source */ \
+ s = (uint8_t*)&aligned; \
+ H5T_CONV_LOOP(PRE_SALIGN,PRE_DNOALIGN,POST_SALIGN,POST_DNOALIGN,GUTS,s,dst,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
+ } else if(d_mv) { \
+ /* Alignment is required only for destination */ \
+ H5T_CONV_LOOP(PRE_SNOALIGN,PRE_DALIGN,POST_SNOALIGN,POST_DALIGN,GUTS,src,d,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
+ } else { \
+ /* Alignment is not required for both source and destination */ \
+ H5T_CONV_LOOP(PRE_SNOALIGN,PRE_DNOALIGN,POST_SNOALIGN,POST_DNOALIGN,GUTS,src,dst,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
+ } \
+ \
+ /* Decrement number of elements left to convert */ \
+ nelmts-=safe; \
+ } /* end while */ \
break; \
\
default: \
@@ -408,9 +451,9 @@ H5FL_BLK_DEFINE_STATIC(array_seq);
#define H5T_CONV_LOOP_POST_DNOALIGN(DT) { \
}
-/* The inner loop of the type conversion macro */
+/* The inner loop of the type conversion macro, actually converting the elements */
#define H5T_CONV_LOOP(PRE_SALIGN_GUTS,PRE_DALIGN_GUTS,POST_SALIGN_GUTS,POST_DALIGN_GUTS,GUTS,S,D,STYPE,DTYPE,ST,DT,D_MIN,D_MAX) \
- for (elmtno=0; elmtno<nelmts; elmtno++) { \
+ for (elmtno=0; elmtno<safe; elmtno++) { \
/* Handle source pre-alignment */ \
H5_GLUE(H5T_CONV_LOOP_,PRE_SALIGN_GUTS)(ST) \
\