summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2005-04-08 00:44:13 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2005-04-08 00:44:13 (GMT)
commitb6c87bcdac7d8cab42243be5b8472adef1a0caab (patch)
tree59e1e2b68ff6457a23d147d13aa5a437a59c0313
parent227c58bd98436166cfca202df93c9acc36c5cb30 (diff)
downloadhdf5-b6c87bcdac7d8cab42243be5b8472adef1a0caab.zip
hdf5-b6c87bcdac7d8cab42243be5b8472adef1a0caab.tar.gz
hdf5-b6c87bcdac7d8cab42243be5b8472adef1a0caab.tar.bz2
[svn-r10576] Purpose:
Code optimization Description: Use Duff's Device to unroll shuffling loop a bit, for ~30% speedup. Platforms tested: FreeBSD 4.11 (sleipnir) Solaris 2.9 (shanti)
-rw-r--r--src/H5Zshuffle.c82
1 files changed, 76 insertions, 6 deletions
diff --git a/src/H5Zshuffle.c b/src/H5Zshuffle.c
index 84b6a8f..202e15b 100644
--- a/src/H5Zshuffle.c
+++ b/src/H5Zshuffle.c
@@ -158,10 +158,45 @@ H5Z_filter_shuffle(unsigned flags, size_t cd_nelmts, const unsigned cd_values[],
/* Input; unshuffle */
for(i=0; i<bytesoftype; i++) {
_dest=((unsigned char *)dest)+i;
- for(j=0; j<numofelements; j++) {
- *_dest=*_src++;
- _dest+=bytesoftype;
+#define DUFF_GUTS \
+ *_dest=*_src++; \
+ _dest+=bytesoftype;
+#ifdef NO_DUFFS_DEVICE
+ j = numofelements;
+ while(j > 0) {
+ DUFF_GUTS;
+
+ j--;
} /* end for */
+#else /* NO_DUFFS_DEVICE */
+ {
+ size_t duffs_index; /* Counting index for Duff's device */
+
+ duffs_index = (numofelements + 7) / 8;
+ switch (numofelements % 8) {
+ case 0:
+ do
+ {
+ DUFF_GUTS
+ case 7:
+ DUFF_GUTS
+ case 6:
+ DUFF_GUTS
+ case 5:
+ DUFF_GUTS
+ case 4:
+ DUFF_GUTS
+ case 3:
+ DUFF_GUTS
+ case 2:
+ DUFF_GUTS
+ case 1:
+ DUFF_GUTS
+ } while (--duffs_index > 0);
+ } /* end switch */
+ }
+#endif /* NO_DUFFS_DEVICE */
+#undef DUFF_GUTS
} /* end for */
/* Add leftover to the end of data */
@@ -178,10 +213,45 @@ H5Z_filter_shuffle(unsigned flags, size_t cd_nelmts, const unsigned cd_values[],
/* Output; shuffle */
for(i=0; i<bytesoftype; i++) {
_src=((unsigned char *)(*buf))+i;
- for(j=0; j<numofelements; j++) {
- *_dest++=*_src;
- _src+=bytesoftype;
+#define DUFF_GUTS \
+ *_dest++=*_src; \
+ _src+=bytesoftype;
+#ifdef NO_DUFFS_DEVICE
+ j = numofelements;
+ while(j > 0) {
+ DUFF_GUTS;
+
+ j--;
} /* end for */
+#else /* NO_DUFFS_DEVICE */
+ {
+ size_t duffs_index; /* Counting index for Duff's device */
+
+ duffs_index = (numofelements + 7) / 8;
+ switch (numofelements % 8) {
+ case 0:
+ do
+ {
+ DUFF_GUTS
+ case 7:
+ DUFF_GUTS
+ case 6:
+ DUFF_GUTS
+ case 5:
+ DUFF_GUTS
+ case 4:
+ DUFF_GUTS
+ case 3:
+ DUFF_GUTS
+ case 2:
+ DUFF_GUTS
+ case 1:
+ DUFF_GUTS
+ } while (--duffs_index > 0);
+ } /* end switch */
+ }
+#endif /* NO_DUFFS_DEVICE */
+#undef DUFF_GUTS
} /* end for */
/* Add leftover to the end of data */