diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2005-04-08 00:44:13 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2005-04-08 00:44:13 (GMT) |
commit | b6c87bcdac7d8cab42243be5b8472adef1a0caab (patch) | |
tree | 59e1e2b68ff6457a23d147d13aa5a437a59c0313 | |
parent | 227c58bd98436166cfca202df93c9acc36c5cb30 (diff) | |
download | hdf5-b6c87bcdac7d8cab42243be5b8472adef1a0caab.zip hdf5-b6c87bcdac7d8cab42243be5b8472adef1a0caab.tar.gz hdf5-b6c87bcdac7d8cab42243be5b8472adef1a0caab.tar.bz2 |
[svn-r10576] Purpose:
Code optimization
Description:
Use Duff's Device to unroll shuffling loop a bit, for ~30% speedup.
Platforms tested:
FreeBSD 4.11 (sleipnir)
Solaris 2.9 (shanti)
-rw-r--r-- | src/H5Zshuffle.c | 82 |
1 files changed, 76 insertions, 6 deletions
diff --git a/src/H5Zshuffle.c b/src/H5Zshuffle.c index 84b6a8f..202e15b 100644 --- a/src/H5Zshuffle.c +++ b/src/H5Zshuffle.c @@ -158,10 +158,45 @@ H5Z_filter_shuffle(unsigned flags, size_t cd_nelmts, const unsigned cd_values[], /* Input; unshuffle */ for(i=0; i<bytesoftype; i++) { _dest=((unsigned char *)dest)+i; - for(j=0; j<numofelements; j++) { - *_dest=*_src++; - _dest+=bytesoftype; +#define DUFF_GUTS \ + *_dest=*_src++; \ + _dest+=bytesoftype; +#ifdef NO_DUFFS_DEVICE + j = numofelements; + while(j > 0) { + DUFF_GUTS; + + j--; } /* end for */ +#else /* NO_DUFFS_DEVICE */ + { + size_t duffs_index; /* Counting index for Duff's device */ + + duffs_index = (numofelements + 7) / 8; + switch (numofelements % 8) { + case 0: + do + { + DUFF_GUTS + case 7: + DUFF_GUTS + case 6: + DUFF_GUTS + case 5: + DUFF_GUTS + case 4: + DUFF_GUTS + case 3: + DUFF_GUTS + case 2: + DUFF_GUTS + case 1: + DUFF_GUTS + } while (--duffs_index > 0); + } /* end switch */ + } +#endif /* NO_DUFFS_DEVICE */ +#undef DUFF_GUTS } /* end for */ /* Add leftover to the end of data */ @@ -178,10 +213,45 @@ H5Z_filter_shuffle(unsigned flags, size_t cd_nelmts, const unsigned cd_values[], /* Output; shuffle */ for(i=0; i<bytesoftype; i++) { _src=((unsigned char *)(*buf))+i; - for(j=0; j<numofelements; j++) { - *_dest++=*_src; - _src+=bytesoftype; +#define DUFF_GUTS \ + *_dest++=*_src; \ + _src+=bytesoftype; +#ifdef NO_DUFFS_DEVICE + j = numofelements; + while(j > 0) { + DUFF_GUTS; + + j--; } /* end for */ +#else /* NO_DUFFS_DEVICE */ + { + size_t duffs_index; /* Counting index for Duff's device */ + + duffs_index = (numofelements + 7) / 8; + switch (numofelements % 8) { + case 0: + do + { + DUFF_GUTS + case 7: + DUFF_GUTS + case 6: + DUFF_GUTS + case 5: + DUFF_GUTS + case 4: + DUFF_GUTS + case 3: + DUFF_GUTS + case 2: + DUFF_GUTS + case 1: + DUFF_GUTS + } while (--duffs_index > 0); + } /* end switch */ + } +#endif /* NO_DUFFS_DEVICE */ +#undef DUFF_GUTS } /* end for */ /* Add leftover to the end of data */ |