From 6c8e0d20a8362166e9047c95bf9335d2bdb9eb21 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Fri, 17 Aug 2001 17:16:51 -0500 Subject: [svn-r4378] Purpose: Code improvement Description: The byte swapping routine for data conversion was inefficient. Solution: Applied a number of optimizations which should yield around a 2-3 times faster algorithm. Platforms tested: Solaris 2.6 (baldric) --- src/H5Tconv.c | 541 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 534 insertions(+), 7 deletions(-) diff --git a/src/H5Tconv.c b/src/H5Tconv.c index fc23bd1..3f9890b 100644 --- a/src/H5Tconv.c +++ b/src/H5Tconv.c @@ -534,14 +534,541 @@ H5T_conv_order(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts, NULL == (dst = H5I_object(dst_id))) { HRETURN_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data type"); } + md = src->size / 2; - for (i=0; isize) { - for (j=0; jsize-(j+1)]; - buf[src->size-(j+1)] = tmp; - } - } + buf_stride = buf_stride ? buf_stride : src->size; + + /* Optimize for popular sizes */ + switch(md) { + case 1: /* Swap 2-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; i 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + + case 2: /* Swap 4-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; i 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + + case 4: /* Swap 8-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; i 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + + default: /* Swap n-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; isize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + } +#else /* NO_DUFFS_DEVICE */ +{ + size_t duff_count = (nelmts + 7) / 8; + + switch (duff_count % 8) + { + case 0: + do + { + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 7: + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 6: + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 5: + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 4: + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 3: + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 2: + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 1: + /* Generic byte-swapping loop */ + for (j=0; jsize-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + } + while (--duff_count > 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + } /* end switch */ break; case H5T_CONV_FREE: -- cgit v0.12