summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2010-08-12 19:18:49 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2010-08-24 10:36:32 (GMT)
commit531a8f198c152e1135db103f22bab648a314926e (patch)
tree6c78d8143d338fa2cbdd17c4b1b0c624ad5fe57f /tests
parentd8ad0812c7fdc2684ce7f09cc13b69f567e82031 (diff)
downloadQt-531a8f198c152e1135db103f22bab648a314926e.zip
Qt-531a8f198c152e1135db103f22bab648a314926e.tar.gz
Qt-531a8f198c152e1135db103f22bab648a314926e.tar.bz2
Add an SSE2 comparison with prolog
The prolog tries to align p1 to a multiple of 16, so as to run aligned loads, which are faster. Unfortunately, my tests so far indicate that the prolog ends up taking longer than the benefit of having aligned loads.
Diffstat (limited to 'tests')
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp38
1 files changed, 38 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 5210034..4e5d1c0 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -238,6 +238,42 @@ static bool equals2_sse2(ushort *p1, ushort *p2, int len)
return equals2_shortwise(p1, p2, len);
}
+
+static inline
+#ifdef Q_CC_GNU
+__attribute__((always_inline))
+#endif
+bool prolog_align(ushort *&p1, ushort *&p2, int &len)
+{
+ const ushort *end = (ushort*) ((quintptr(p1) + 15) & ~15);
+ if (end > p1 + len)
+ end = p1 + len;
+ for ( ; p1 != end; ++p1, ++p2, --len)
+ if (*p1 != *p2)
+ return false;
+ return true;
+}
+
+static bool equals2_sse2_aligning(ushort *p1, ushort *p2, int len)
+{
+ if (len > 8) {
+ if (!prolog_align(p1, p2, len))
+ return false;
+ while (len > 8) {
+ __m128i q1 = _mm_load_si128((__m128i *)p1);
+ __m128i q2 = _mm_loadu_si128((__m128i *)p2);
+ __m128i cmp = _mm_cmpeq_epi16(q1, q2);
+ if (ushort(_mm_movemask_epi8(cmp)) != 0xffff)
+ return false;
+
+ len -= 8;
+ p1 += 8;
+ p2 += 8;
+ }
+ }
+
+ return equals2_shortwise(p1, p2, len);
+}
#endif
void tst_QString::equals2_data() const
@@ -250,6 +286,7 @@ void tst_QString::equals2_data() const
QTest::newRow("intwise") << 3;
#ifdef __SSE2__
QTest::newRow("sse2") << 4;
+ QTest::newRow("sse2_aligning") << 5;
#endif
}
@@ -300,6 +337,7 @@ void tst_QString::equals2() const
equals2_intwise, // 3
#ifdef __SSE2__
equals2_sse2, // 4
+ equals2_sse2_aligning, // 5
#endif
0
};