diff options
author | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-18 16:55:52 (GMT) |
---|---|---|
committer | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-22 14:51:52 (GMT) |
commit | aa7543e735c5c0fe61049c432b264ad1f2cfd598 (patch) | |
tree | acaf1939f5ae0c65612a85579e17c472f14d1f88 /tests/benchmarks | |
parent | cf2a87c8286694d1f52741666a7040bbd88f9c59 (diff) | |
download | Qt-aa7543e735c5c0fe61049c432b264ad1f2cfd598.zip Qt-aa7543e735c5c0fe61049c432b264ad1f2cfd598.tar.gz Qt-aa7543e735c5c0fe61049c432b264ad1f2cfd598.tar.bz2 |
Attempt to improve the epilog code
Diffstat (limited to 'tests/benchmarks')
-rw-r--r-- | tests/benchmarks/corelib/tools/qstring/main.cpp | 91 |
1 files changed, 72 insertions, 19 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp index a5f234e..c365e5a 100644 --- a/tests/benchmarks/corelib/tools/qstring/main.cpp +++ b/tests/benchmarks/corelib/tools/qstring/main.cpp @@ -1477,6 +1477,63 @@ void fromLatin1_sse2_qt47(ushort *dst, const char *str, int size) *dst++ = (uchar)*str++; } +static inline void fromLatin1_epilog(ushort *dst, const char *str, int size) +{ + if (!size) return; + dst[0] = (uchar)str[0]; + if (!--size) return; + dst[1] = (uchar)str[1]; + if (!--size) return; + dst[2] = (uchar)str[2]; + if (!--size) return; + dst[3] = (uchar)str[3]; + if (!--size) return; + dst[4] = (uchar)str[4]; + if (!--size) return; + dst[5] = (uchar)str[5]; + if (!--size) return; + dst[6] = (uchar)str[6]; + if (!--size) return; + dst[7] = (uchar)str[7]; + if (!--size) return; + dst[8] = (uchar)str[8]; + if (!--size) return; + dst[9] = (uchar)str[9]; + if (!--size) return; + dst[10] = (uchar)str[10]; + if (!--size) return; + dst[11] = (uchar)str[11]; + if (!--size) return; + dst[12] = (uchar)str[12]; + if (!--size) return; + dst[13] = (uchar)str[13]; + if (!--size) return; + dst[14] = (uchar)str[14]; + if (!--size) return; + dst[15] = (uchar)str[15]; +} + +void fromLatin1_sse2_improved(ushort *dst, const char *str, int size) +{ + const __m128i nullMask = _mm_set1_epi32(0); + while (size >= 16) { + const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load + + // unpack the first 8 bytes, padding with zeros + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_storeu_si128((__m128i*)dst, firstHalf); // store + + // unpack the last 8 bytes, padding with zeros + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_storeu_si128((__m128i*)(dst + 8), secondHalf); // store + + str += 16; + dst += 16; + size -= 16; + } + fromLatin1_epilog(dst, str, size); +} + void fromLatin1_prolog_unrolled(ushort *dst, const char *str, int size) { // QString's data pointer is most often ending in 0x2 or 0xa @@ -1542,28 +1599,23 @@ void fromLatin1_sse2_withprolog(ushort *dst, const char *str, int size) str += prologCount; } - if (size >= 16) { - int chunkCount = size >> 4; // divided by 16 - const __m128i nullMask = _mm_set1_epi32(0); - for (int i = 0; i < chunkCount; ++i) { - const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load - str += 16; + const __m128i nullMask = _mm_set1_epi32(0); + while (size >= 16) { + const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load - // unpack the first 8 bytes, padding with zeros - const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); - _mm_store_si128((__m128i*)dst, firstHalf); // store - dst += 8; + // unpack the first 8 bytes, padding with zeros + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_store_si128((__m128i*)dst, firstHalf); // store - // unpack the last 8 bytes, padding with zeros - const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); - _mm_store_si128((__m128i*)dst, secondHalf); // store - dst += 8; - } - size = size % 16; - } - while (size--) - *dst++ = (uchar)*str++; + // unpack the last 8 bytes, padding with zeros + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_store_si128((__m128i*)(dst + 8), secondHalf); // store + str += 16; + dst += 16; + size -= 16; + } + fromLatin1_epilog(dst, str, size); } void fromLatin1_prolog_sse4_overcommit(ushort *dst, const char *str, int) @@ -1579,6 +1631,7 @@ void tst_QString::fromLatin1Alternatives_data() const QTest::addColumn<FromLatin1Function>("function"); QTest::newRow("regular") << &fromLatin1_regular; QTest::newRow("sse2-qt4.7") << &fromLatin1_sse2_qt47; + QTest::newRow("sse2-improved") << &fromLatin1_sse2_improved; QTest::newRow("sse2-with-prolog-regular") << &fromLatin1_sse2_withprolog<&fromLatin1_regular>; QTest::newRow("sse2-with-prolog-unrolled") << &fromLatin1_sse2_withprolog<&fromLatin1_prolog_unrolled>; QTest::newRow("sse2-with-prolog-sse2-overcommit") << &fromLatin1_sse2_withprolog<&fromLatin1_prolog_sse2_overcommit>; |