summaryrefslogtreecommitdiffstats
path: root/tests/benchmarks
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2010-08-12 22:59:27 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2010-08-24 10:36:33 (GMT)
commita9dae34157aeef00512bea49f11a8dd1895f676b (patch)
tree9ef3d5e6e97fe55a4f73c4e7588f6e25f80ce4cd /tests/benchmarks
parent7790cf5b2922a7adf684dc0b7cd0fc1583c0684a (diff)
downloadQt-a9dae34157aeef00512bea49f11a8dd1895f676b.zip
Qt-a9dae34157aeef00512bea49f11a8dd1895f676b.tar.gz
Qt-a9dae34157aeef00512bea49f11a8dd1895f676b.tar.bz2
Add an SSE4.2 version of the string comparison
It's currently slightly worse than SSE2 with prolog aligning (i.e., it's no good)
Diffstat (limited to 'tests/benchmarks')
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp41
-rw-r--r--tests/benchmarks/corelib/tools/qstring/qstring.pro3
2 files changed, 43 insertions, 1 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index e1800c0..1c55f82 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -378,6 +378,41 @@ static bool equals2_ssse3(ushort *p1, ushort *p2, int len)
return equals2_shortwise(p1, p2, len);
}
//#endif
+
+//#ifdef __SSE4_1__
+static bool equals2_sse4(ushort *p1, ushort *p2, int len)
+{
+ // We use the pcmpestrm instruction searching for differences (negative polarity)
+ // it will reset CF if it's all equal
+ // it will reset OF if the first char is equal
+ // it will set ZF & SF if the length is less than 8 (which means we've done the last operation)
+ // the three possible conditions are:
+ // difference found: CF = 1
+ // all equal, not finished: CF = ZF = SF = 0
+ // all equal, finished: CF = 0, ZF = SF = 1
+ len += 8;
+ asm (
+ "0:\n\t"
+ "movdqu (%[p1]), %%xmm0\n\t" // load 8 ushorts
+ "movdqu (%[p2]), %%xmm1\n\t"
+ "addl $16, %[p2]\n\t"
+ "addl $16, %[p1]\n\t"
+ "subl $8, %[len]\n\t"
+ "movl %[len], %%edx\n\t"
+ "pcmpestrm %[mode], %%xmm1, %%xmm0\n\t"
+ "ja 0b\n\t"
+ "1:\n\t"
+ "mov $0, %[len]\n\t"
+ "setnc %%al\n\t"
+ : [len] "+a" (len)
+ : [p1] "r" (p1),
+ [p2] "r" (p2),
+ [mode] "K" (_SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY | _SIDD_UNIT_MASK)
+ : "%edx", "%xmm0", "%xmm1"
+ );
+ return len;
+}
+//#endif
#endif
void tst_QString::equals2_data() const
@@ -393,6 +428,9 @@ void tst_QString::equals2_data() const
QTest::newRow("sse2_aligning") << 5;
#ifdef __SSSE3__
QTest::newRow("ssse3") << 6;
+#ifdef __SSE4_1__
+ QTest::newRow("sse4.2") << 7;
+#endif
#endif
#endif
}
@@ -447,6 +485,9 @@ void tst_QString::equals2() const
equals2_sse2_aligning, // 5
#ifdef __SSSE3__
equals2_ssse3, // 6
+#ifdef __SSE4_1__
+ equals2_sse4, // 7
+#endif
#endif
#endif
0
diff --git a/tests/benchmarks/corelib/tools/qstring/qstring.pro b/tests/benchmarks/corelib/tools/qstring/qstring.pro
index bc6254c..44fb46b 100644
--- a/tests/benchmarks/corelib/tools/qstring/qstring.pro
+++ b/tests/benchmarks/corelib/tools/qstring/qstring.pro
@@ -14,5 +14,6 @@ wince*:{
DEFINES += SRCDIR=\\\"$$PWD/\\\"
}
-ssse3:QMAKE_FLAGS += -mssse3
+sse4:QMAKE_CXXFLAGS += -msse4
+else:ssse3:QMAKE_FLAGS += -mssse3
else:sse2:QMAKE_CXXFLAGS += -msse2