summaryrefslogtreecommitdiffstats
path: root/tests/benchmarks/corelib
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2010-08-18 10:26:53 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2010-08-24 10:36:40 (GMT)
commit08fa99c43897b16f8be924090316f5a4db548c10 (patch)
treeea603de164fb1e747246df5eea161ac895d58565 /tests/benchmarks/corelib
parent6cea948a0ceabe5493bb2a75f236b008b47fd71e (diff)
downloadQt-08fa99c43897b16f8be924090316f5a4db548c10.zip
Qt-08fa99c43897b16f8be924090316f5a4db548c10.tar.gz
Qt-08fa99c43897b16f8be924090316f5a4db548c10.tar.bz2
Add an SSSE3 version of ucstrncmp
Diffstat (limited to 'tests/benchmarks/corelib')
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp80
1 files changed, 79 insertions, 1 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 0b7254f..3c3d1ad 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -987,6 +987,82 @@ static __attribute__((optimize("no-unroll-loops"))) int ucstrncmp_sse2_aligning(
return ucstrncmp_short_tail(a + counter, b + counter, len);
}
+static __attribute__((optimize("no-unroll-loops"))) int ucstrncmp_sse2_aligned(const ushort *a, const ushort *b, int len)
+{
+ qptrdiff counter = 0;
+ while (len >= 8) {
+ __m128i m1 = _mm_load_si128((__m128i *)(a + counter));
+ __m128i m2 = _mm_load_si128((__m128i *)(b + counter));
+ __m128i cmp = _mm_cmpeq_epi16(m1, m2);
+ ushort mask = ~uint(_mm_movemask_epi8(cmp));
+ if (mask) {
+ // which ushort isn't equal?
+ counter += bsf_nonzero(mask)/2;
+ return a[counter] - b[counter];
+ }
+
+ counter += 8;
+ len -= 8;
+ }
+ return ucstrncmp_short_tail(a + counter, b + counter, len);
+}
+
+template<int N> static __attribute__((optimize("no-unroll-loops"))) int ucstrncmp_ssse3_alignr(const ushort *a, const ushort *b, int len)
+{
+ qptrdiff counter = 0;
+ __m128i lower, upper;
+ upper = _mm_load_si128((__m128i *)a);
+
+ do {
+ lower = upper;
+ upper = _mm_load_si128((__m128i *)(a + counter) + 1);
+ __m128i merged = _mm_alignr_epi8(upper, lower, N);
+
+ __m128i m2 = _mm_lddqu_si128((__m128i *)(b + counter));
+ __m128i cmp = _mm_cmpeq_epi16(merged, m2);
+ ushort mask = ~uint(_mm_movemask_epi8(cmp));
+ if (mask) {
+ // which ushort isn't equal?
+ counter += bsf_nonzero(mask)/2;
+ return a[counter + N/2] - b[counter];
+ }
+
+ counter += 8;
+ len -= 8;
+ } while (len >= 8);
+
+ return ucstrncmp_short_tail(a + counter + N/2, b + counter, len);
+}
+
+static int ucstrncmp_ssse3(const ushort *a, const ushort *b, int len)
+{
+ if (len >= 8) {
+ int val = quintptr(a) & 0xf;
+ a -= val/2;
+
+ if (val == 10)
+ return ucstrncmp_ssse3_alignr<10>(a, b, len);
+ else if (val == 2)
+ return ucstrncmp_ssse3_alignr<2>(a, b, len);
+ if (val < 8) {
+ if (val < 4)
+ return ucstrncmp_sse2_aligned(a, b, len);
+ else if (val == 4)
+ return ucstrncmp_ssse3_alignr<4>(a, b, len);
+ else
+ return ucstrncmp_ssse3_alignr<6>(a, b, len);
+ } else {
+ if (val < 12)
+ return ucstrncmp_ssse3_alignr<8>(a, b, len);
+ else if (val == 12)
+ return ucstrncmp_ssse3_alignr<12>(a, b, len);
+ else
+ return ucstrncmp_ssse3_alignr<14>(a, b, len);
+ }
+ }
+ return ucstrncmp_short_tail(a, b, len);
+}
+
#endif
typedef int (* UcstrncmpFunction)(const ushort *, const ushort *, int);
@@ -1000,6 +1076,7 @@ void tst_QString::ucstrncmp_data() const
QTest::newRow("intwise") << &ucstrncmp_intwise;
QTest::newRow("sse2") << &ucstrncmp_sse2;
QTest::newRow("sse2_aligning") << &ucstrncmp_sse2_aligning;
+ QTest::newRow("ssse3") << &ucstrncmp_ssse3;
}
void tst_QString::ucstrncmp() const
@@ -1010,7 +1087,8 @@ void tst_QString::ucstrncmp() const
&ucstrncmp_shortwise,
&ucstrncmp_intwise,
&ucstrncmp_sse2,
- &ucstrncmp_sse2_aligning
+ &ucstrncmp_sse2_aligning,
+ &ucstrncmp_ssse3
};
static const int functionCount = sizeof func / sizeof func[0];