diff options
author | Thiago Macieira <thiago.macieira@nokia.com> | 2010-08-18 08:52:27 (GMT) |
---|---|---|
committer | Thiago Macieira <thiago.macieira@nokia.com> | 2010-08-24 10:36:38 (GMT) |
commit | 2bd9d7fbec0bb61298ba0f48a93a4a186b558a38 (patch) | |
tree | 7fa14d2447a26d2c6a86403e1e637162a659a816 /tests/benchmarks | |
parent | 47b98097ac26de6153c7a4894aaa110133e478dc (diff) | |
download | Qt-2bd9d7fbec0bb61298ba0f48a93a4a186b558a38.zip Qt-2bd9d7fbec0bb61298ba0f48a93a4a186b558a38.tar.gz Qt-2bd9d7fbec0bb61298ba0f48a93a4a186b558a38.tar.bz2 |
Add an SSE2-optimised version of ucstrncmp
First results make it 34% faster than current ucstrncmp, 16% faster than
the 32-bit version.
Diffstat (limited to 'tests/benchmarks')
-rw-r--r-- | tests/benchmarks/corelib/tools/qstring/main.cpp | 39 |
1 files changed, 38 insertions, 1 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp index 3d88756..f338e49 100644 --- a/tests/benchmarks/corelib/tools/qstring/main.cpp +++ b/tests/benchmarks/corelib/tools/qstring/main.cpp @@ -880,6 +880,41 @@ static int ucstrncmp_intwise(const ushort *a, const ushort *b, int len) } } +#ifdef __SSE2__ +static inline int bsf_nonzero(register long val) +{ + int result; +# ifdef Q_CC_GNU + // returns the first non-zero bit on a non-zero reg + asm ("bsf %1, %0" : "=r" (result) : "r" (val)); + return result; +# elif defined(Q_CC_MSVC) + _BitScanForward(&result, val); + return result; +# endif +} + +static __attribute__((optimize("no-unroll-loops"))) int ucstrncmp_sse2(const ushort *a, const ushort *b, int len) +{ + qptrdiff counter = 0; + while (len >= 8) { + __m128i m1 = _mm_loadu_si128((__m128i *)(a + counter)); + __m128i m2 = _mm_loadu_si128((__m128i *)(b + counter)); + __m128i cmp = _mm_cmpeq_epi16(m1, m2); + ushort mask = ~uint(_mm_movemask_epi8(cmp)); + if (mask) { + // which ushort isn't equal? + counter += bsf_nonzero(mask)/2; + return a[counter] - b[counter]; + } + + counter += 8; + len -= 8; + } + return ucstrncmp_shortwise(a + counter, b + counter, len); +} +#endif + typedef int (* UcstrncmpFunction)(const ushort *, const ushort *, int); Q_DECLARE_METATYPE(UcstrncmpFunction) @@ -889,6 +924,7 @@ void tst_QString::ucstrncmp_data() const QTest::newRow("selftest") << UcstrncmpFunction(0); QTest::newRow("shortwise") << &ucstrncmp_shortwise; QTest::newRow("intwise") << &ucstrncmp_intwise; + QTest::newRow("sse2") << &ucstrncmp_sse2; } void tst_QString::ucstrncmp() const @@ -897,7 +933,8 @@ void tst_QString::ucstrncmp() const if (!function) { static const UcstrncmpFunction func[] = { &ucstrncmp_shortwise, - &ucstrncmp_intwise + &ucstrncmp_intwise, + &ucstrncmp_sse2 }; static const int functionCount = sizeof func / sizeof func[0]; |