summaryrefslogtreecommitdiffstats
path: root/tests/benchmarks
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2010-08-24 10:15:36 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2010-08-24 10:36:45 (GMT)
commit9a468f59472e8978aa18b75e9786718a8823bbec (patch)
treef4d10c172a4b1adf836dcdd4551b70c412a8255e /tests/benchmarks
parent469210fdf9287dc8a4933e0761bbff91a031045f (diff)
downloadQt-9a468f59472e8978aa18b75e9786718a8823bbec.zip
Qt-9a468f59472e8978aa18b75e9786718a8823bbec.tar.gz
Qt-9a468f59472e8978aa18b75e9786718a8823bbec.tar.bz2
Unroll the SSSE3 code even more to avoid the need to keep an extra variable for inverting the result
On 32-bit, we're out of registers already, so this variable ended up in memory
Diffstat (limited to 'tests/benchmarks')
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp88
1 files changed, 53 insertions, 35 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 3b86792..9616052 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -1218,42 +1218,60 @@ static int ucstrncmp_ssse3_aligning2(const ushort *a, const ushort *b, int len)
// both a and b are misaligned
// we'll call the alignr function with the alignment *difference* between the two
- int val = (quintptr(a) & 0xf) - (quintptr(b) & 0xf);
- bool invert = false;
- if (val < 0) {
- val = -val;
- //qSwap(a, b);
- asm ("xchg %0, %1" : "+r" (a), "+r" (b));
- invert = true;
- }
-
- // from this point on, b has the shortest alignment
- // and align(a) = align(b) + val
- // round down the alignment so align(b) == align(a) == 0
- int garbage = (quintptr(b) & 0xf);
- a = (const ushort*)(quintptr(a) & ~0xf);
- b = (const ushort*)(quintptr(b) & ~0xf);
-
- // now the first load of b will load 'garbage' extra bytes
- // and the first load of a will load 'garbage + val' extra bytes
- if (val == 8)
- return conditional_invert(ucstrncmp_ssse3_aligning2_alignr<8>(a, b, len, garbage), invert);
- else if (val == 0)
- return conditional_invert(ucstrncmp_ssse3_aligning2_aligned(a, b, len, garbage), invert);
- if (val < 8) {
- if (val < 4)
- return conditional_invert(ucstrncmp_ssse3_aligning2_alignr<2>(a, b, len, garbage), invert);
- else if (val == 4)
- return conditional_invert(ucstrncmp_ssse3_aligning2_alignr<4>(a, b, len, garbage), invert);
- else
- return conditional_invert(ucstrncmp_ssse3_aligning2_alignr<6>(a, b, len, garbage), invert);
+ int offset = (quintptr(a) & 0xf) - (quintptr(b) & 0xf);
+ if (offset >= 0) {
+ // from this point on, b has the shortest alignment
+ // and align(a) = align(b) + offset
+ // round down the alignment so align(b) == align(a) == 0
+ int garbage = (quintptr(b) & 0xf);
+ a = (const ushort*)(quintptr(a) & ~0xf);
+ b = (const ushort*)(quintptr(b) & ~0xf);
+
+ // now the first load of b will load 'garbage' extra bytes
+ // and the first load of a will load 'garbage + offset' extra bytes
+ if (offset == 8)
+ return ucstrncmp_ssse3_aligning2_alignr<8>(a, b, len, garbage);
+ if (offset == 0)
+ return ucstrncmp_ssse3_aligning2_aligned(a, b, len, garbage);
+ if (offset < 8) {
+ if (offset < 4)
+ return ucstrncmp_ssse3_aligning2_alignr<2>(a, b, len, garbage);
+ else if (offset == 4)
+ return ucstrncmp_ssse3_aligning2_alignr<4>(a, b, len, garbage);
+ else
+ return ucstrncmp_ssse3_aligning2_alignr<6>(a, b, len, garbage);
+ } else {
+ if (offset < 12)
+ return ucstrncmp_ssse3_aligning2_alignr<10>(a, b, len, garbage);
+ else if (offset == 12)
+ return ucstrncmp_ssse3_aligning2_alignr<12>(a, b, len, garbage);
+ else
+ return ucstrncmp_ssse3_aligning2_alignr<14>(a, b, len, garbage);
+ }
} else {
- if (val < 12)
- return conditional_invert(ucstrncmp_ssse3_aligning2_alignr<10>(a, b, len, garbage), invert);
- else if (val == 12)
- return conditional_invert(ucstrncmp_ssse3_aligning2_alignr<12>(a, b, len, garbage), invert);
- else
- return conditional_invert(ucstrncmp_ssse3_aligning2_alignr<14>(a, b, len, garbage), invert);
+ // same as above but inverted
+ int garbage = (quintptr(a) & 0xf);
+ a = (const ushort*)(quintptr(a) & ~0xf);
+ b = (const ushort*)(quintptr(b) & ~0xf);
+
+ offset = -offset;
+ if (offset == 8)
+ return -ucstrncmp_ssse3_aligning2_alignr<8>(b, a, len, garbage);
+ if (offset < 8) {
+ if (offset < 4)
+ return -ucstrncmp_ssse3_aligning2_alignr<2>(b, a, len, garbage);
+ else if (offset == 4)
+ return -ucstrncmp_ssse3_aligning2_alignr<4>(b, a, len, garbage);
+ else
+ return -ucstrncmp_ssse3_aligning2_alignr<6>(b, a, len, garbage);
+ } else {
+ if (offset < 12)
+ return -ucstrncmp_ssse3_aligning2_alignr<10>(b, a, len, garbage);
+ else if (offset == 12)
+ return -ucstrncmp_ssse3_aligning2_alignr<12>(b, a, len, garbage);
+ else
+ return -ucstrncmp_ssse3_aligning2_alignr<14>(b, a, len, garbage);
+ }
}
}