diff options
author | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-02-24 10:29:52 (GMT) |
---|---|---|
committer | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-02-24 10:32:28 (GMT) |
commit | 5d6d552c4c01c1e8884d7641c81671e808eed55d (patch) | |
tree | 8ab3b3218e92c2a313861c497061bad8747324c9 /src/corelib/tools | |
parent | fb111f9b71c99aa06dcf30dfada0d3b12c7ac993 (diff) | |
download | Qt-5d6d552c4c01c1e8884d7641c81671e808eed55d.zip Qt-5d6d552c4c01c1e8884d7641c81671e808eed55d.tar.gz Qt-5d6d552c4c01c1e8884d7641c81671e808eed55d.tar.bz2 |
Comments the SSE implementation of fromLatin1_helper()
Add comments to explain the intrinsics.
Comment on the general method. Share the information that it is not
worth it to do the same on with Neon.
Diffstat (limited to 'src/corelib/tools')
-rw-r--r-- | src/corelib/tools/qstring.cpp | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 9431ef4..f8303bd 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -3650,20 +3650,26 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size) d->data = d->array; d->array[size] = '\0'; ushort *dst = d->data; + /* SIMD: + * Unpacking with SSE has been shown to improve performance on recent CPUs + * The same method gives no improvement with NEON. + */ #if defined(QT_ALWAYS_HAVE_SSE2) if (size >= 16) { int chunkCount = size >> 4; // divided by 16 const __m128i nullMask = _mm_set1_epi32(0); for (int i = 0; i < chunkCount; ++i) { - const __m128i chunk = _mm_loadu_si128((__m128i*)str); + const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load str += 16; + // unpack the first 8 bytes, padding with zeros const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); - _mm_storeu_si128((__m128i*)dst, firstHalf); + _mm_storeu_si128((__m128i*)dst, firstHalf); // store dst += 8; + // unpack the last 8 bytes, padding with zeros const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); - _mm_storeu_si128((__m128i*)dst, secondHalf); + _mm_storeu_si128((__m128i*)dst, secondHalf); // store dst += 8; } size = size % 16; |