diff options
author | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-18 16:05:35 (GMT) |
---|---|---|
committer | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-22 14:51:46 (GMT) |
commit | 8f85657308ae7ba4196713ef57f0c918d5d4f64a (patch) | |
tree | e3c0b84f2264fb823f52180d59595b614438cb68 /tests/benchmarks | |
parent | d2db4085bedf7c10791960bcbaf2da03d9860c5a (diff) | |
download | Qt-8f85657308ae7ba4196713ef57f0c918d5d4f64a.zip Qt-8f85657308ae7ba4196713ef57f0c918d5d4f64a.tar.gz Qt-8f85657308ae7ba4196713ef57f0c918d5d4f64a.tar.bz2 |
Add an SSE2 alternative with prolog
Diffstat (limited to 'tests/benchmarks')
-rw-r--r-- | tests/benchmarks/corelib/tools/qstring/main.cpp | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp index c8d9de2..6f5082e 100644 --- a/tests/benchmarks/corelib/tools/qstring/main.cpp +++ b/tests/benchmarks/corelib/tools/qstring/main.cpp @@ -1474,6 +1474,53 @@ void fromLatin1_sse2_qt47(ushort *dst, const char *str, int size) *dst++ = (uchar)*str++; } +static inline void fromLatin1_prolog(ushort *dst, const char *str, uint size) +{ + while (size--) { + *dst++ = (uchar)*str++; + } +} + +void fromLatin1_sse2_withprolog(ushort *dst, const char *str, int size) +{ + // same as the Qt 4.7 code, but we attempt to align at the prolog + // therefore, we issue aligned stores + + if (size >= 16) { + uint misalignment = uint(quintptr(dst) & 0xf); + uint prologCount = (16 - misalignment) / 2; + + fromLatin1_prolog(dst, str, prologCount); + + size -= prologCount; + dst += prologCount; + str += prologCount; + } + + if (size >= 16) { + int chunkCount = size >> 4; // divided by 16 + const __m128i nullMask = _mm_set1_epi32(0); + for (int i = 0; i < chunkCount; ++i) { + const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load + str += 16; + + // unpack the first 8 bytes, padding with zeros + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_store_si128((__m128i*)dst, firstHalf); // store + dst += 8; + + // unpack the last 8 bytes, padding with zeros + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_store_si128((__m128i*)dst, secondHalf); // store + dst += 8; + } + size = size % 16; + } + while (size--) + *dst++ = (uchar)*str++; + +} + typedef void (* FromLatin1Function)(ushort *, const char *, int); Q_DECLARE_METATYPE(FromLatin1Function) @@ -1482,6 +1529,7 @@ void tst_QString::fromLatin1Alternatives_data() const QTest::addColumn<FromLatin1Function>("function"); QTest::newRow("regular") << &fromLatin1_regular; QTest::newRow("sse2-qt4.7") << &fromLatin1_sse2_qt47; + QTest::newRow("sse2-with-prolog") << &fromLatin1_sse2_withprolog; } static void fromLatin1Alternatives_internal(FromLatin1Function function, bool doVerify) |