summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp46
1 files changed, 46 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 1a38354..96f2c30 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -1538,6 +1538,51 @@ void fromLatin1_sse2_improved(ushort *dst, const char *str, int size)
fromLatin1_epilog(dst + counter, str + counter, size - counter);
}
+void fromLatin1_sse2_improved2(ushort *dst, const char *str, int size)
+{
+ const __m128i nullMask = _mm_set1_epi32(0);
+ qptrdiff counter = 0;
+ size -= 32;
+ while (size >= counter) {
+ const __m128i chunk1 = _mm_loadu_si128((__m128i*)(str + counter)); // load
+ const __m128i chunk2 = _mm_loadu_si128((__m128i*)(str + counter + 16)); // load
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf1 = _mm_unpacklo_epi8(chunk1, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter), firstHalf1); // store
+
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf1 = _mm_unpackhi_epi8(chunk1, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf1); // store
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf2 = _mm_unpacklo_epi8(chunk2, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter + 16), firstHalf2); // store
+
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf2 = _mm_unpackhi_epi8(chunk2, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter + 24), secondHalf2); // store
+
+ counter += 32;
+ }
+ size += 16;
+ if (size >= counter) {
+ const __m128i chunk = _mm_loadu_si128((__m128i*)(str + counter)); // load
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter), firstHalf); // store
+
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf); // store
+
+ counter += 16;
+ }
+ size += 16;
+ fromLatin1_epilog(dst + counter, str + counter, size - counter);
+}
+
void fromLatin1_prolog_unrolled(ushort *dst, const char *str, int size)
{
// QString's data pointer is most often ending in 0x2 or 0xa
@@ -1787,6 +1832,7 @@ void tst_QString::fromLatin1Alternatives_data() const
#ifdef __SSE2__
QTest::newRow("sse2-qt4.7") << &fromLatin1_sse2_qt47;
QTest::newRow("sse2-improved") << &fromLatin1_sse2_improved;
+ QTest::newRow("sse2-improved2") << &fromLatin1_sse2_improved2;
QTest::newRow("sse2-with-prolog-regular") << &fromLatin1_sse2_withprolog<&fromLatin1_regular>;
QTest::newRow("sse2-with-prolog-unrolled") << &fromLatin1_sse2_withprolog<&fromLatin1_prolog_unrolled>;
QTest::newRow("sse2-with-prolog-sse2-overcommit") << &fromLatin1_sse2_withprolog<&fromLatin1_prolog_sse2_overcommit>;