diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2001-08-17 22:16:11 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2001-08-17 22:16:11 (GMT) |
commit | 5b83004cfb506560073b0728598105e01d771dc6 (patch) | |
tree | 93c0d1c8ee43f3a414afc09a125f43e2d27dc5fb /src | |
parent | 04ca207930fe60702979611848a168a74b548339 (diff) | |
download | hdf5-5b83004cfb506560073b0728598105e01d771dc6.zip hdf5-5b83004cfb506560073b0728598105e01d771dc6.tar.gz hdf5-5b83004cfb506560073b0728598105e01d771dc6.tar.bz2 |
[svn-r4377] Purpose:
Code improvement
Description:
The byte swapping routine for data conversion was inefficient.
Solution:
Applied a number of optimizations which should yield around a 2-3 times
faster algorithm.
Platforms tested:
Solaris 2.6 (baldric)
Diffstat (limited to 'src')
-rw-r--r-- | src/H5Tconv.c | 541 |
1 files changed, 534 insertions, 7 deletions
diff --git a/src/H5Tconv.c b/src/H5Tconv.c index 941149c..2b205fd 100644 --- a/src/H5Tconv.c +++ b/src/H5Tconv.c @@ -534,14 +534,541 @@ H5T_conv_order(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts, NULL == (dst = H5I_object(dst_id))) { HRETURN_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data type"); } + md = src->size / 2; - for (i=0; i<nelmts; i++, buf+=buf_stride?buf_stride:src->size) { - for (j=0; j<md; j++) { - tmp = buf[j]; - buf[j] = buf[src->size-(j+1)]; - buf[src->size-(j+1)] = tmp; - } - } + buf_stride = buf_stride ? buf_stride : src->size; + + /* Optimize for popular sizes */ + switch(md) { + case 1: /* Swap 2-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; i<nelmts; i++, buf+=buf_stride) { + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + } +#else /* NO_DUFFS_DEVICE */ +{ + size_t duff_count = (nelmts + 7) / 8; + + switch (duff_count % 8) + { + case 0: + do + { + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + case 7: + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + case 6: + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + case 5: + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + case 4: + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + case 3: + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + case 2: + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + case 1: + /* Swap the byte pair */ + tmp = buf[0]; + buf[0] = buf[1]; + buf[1] = tmp; + buf+=buf_stride; + } + while (--duff_count > 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + + case 2: /* Swap 4-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; i<nelmts; i++, buf+=buf_stride) { + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + } +#else /* NO_DUFFS_DEVICE */ +{ + size_t duff_count = (nelmts + 7) / 8; + + switch (duff_count % 8) + { + case 0: + do + { + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 7: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 6: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 5: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 4: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 3: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 2: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 1: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[3]; + buf[3] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[1]; + buf[1] = buf[2]; + buf[2] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + } + while (--duff_count > 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + + case 4: /* Swap 8-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; i<nelmts; i++, buf+=buf_stride) { + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + } +#else /* NO_DUFFS_DEVICE */ +{ + size_t duff_count = (nelmts + 7) / 8; + + switch (duff_count % 8) + { + case 0: + do + { + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 7: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 6: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 5: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 4: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 3: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 2: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + case 1: + /* Swap the outer pair of bytes */ + tmp = buf[0]; + buf[0] = buf[7]; + buf[7] = tmp; + + /* Swap the next-outer pair of bytes */ + tmp = buf[1]; + buf[1] = buf[6]; + buf[6] = tmp; + + /* Swap the next-next-outer pair of bytes */ + tmp = buf[2]; + buf[2] = buf[5]; + buf[5] = tmp; + + /* Swap the inner pair of bytes */ + tmp = buf[3]; + buf[3] = buf[4]; + buf[4] = tmp; + + /* Advance the pointer */ + buf+=buf_stride; + } + while (--duff_count > 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + + default: /* Swap n-byte objects */ +#ifdef NO_DUFFS_DEVICE + for (i=0; i<nelmts; i++, buf+=buf_stride) { + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + } +#else /* NO_DUFFS_DEVICE */ +{ + size_t duff_count = (nelmts + 7) / 8; + + switch (duff_count % 8) + { + case 0: + do + { + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 7: + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 6: + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 5: + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 4: + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 3: + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 2: + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + case 1: + /* Generic byte-swapping loop */ + for (j=0; j<md; j++) { + tmp = buf[j]; + buf[j] = buf[src->size-(j+1)]; + buf[src->size-(j+1)] = tmp; + } + + /* Advance the pointer */ + buf+=buf_stride; + } + while (--duff_count > 0); + } +} +#endif /* NO_DUFFS_DEVICE */ + break; + } /* end switch */ break; case H5T_CONV_FREE: |