summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-02-24 10:29:52 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-02-24 10:32:28 (GMT)
commit5d6d552c4c01c1e8884d7641c81671e808eed55d (patch)
tree8ab3b3218e92c2a313861c497061bad8747324c9
parentfb111f9b71c99aa06dcf30dfada0d3b12c7ac993 (diff)
downloadQt-5d6d552c4c01c1e8884d7641c81671e808eed55d.zip
Qt-5d6d552c4c01c1e8884d7641c81671e808eed55d.tar.gz
Qt-5d6d552c4c01c1e8884d7641c81671e808eed55d.tar.bz2
Comments the SSE implementation of fromLatin1_helper()
Add comments to explain the intrinsics. Comment on the general method. Share the information that it is not worth it to do the same on with Neon.
-rw-r--r--src/corelib/tools/qstring.cpp12
1 files changed, 9 insertions, 3 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 9431ef4..f8303bd 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -3650,20 +3650,26 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
d->data = d->array;
d->array[size] = '\0';
ushort *dst = d->data;
+ /* SIMD:
+ * Unpacking with SSE has been shown to improve performance on recent CPUs
+ * The same method gives no improvement with NEON.
+ */
#if defined(QT_ALWAYS_HAVE_SSE2)
if (size >= 16) {
int chunkCount = size >> 4; // divided by 16
const __m128i nullMask = _mm_set1_epi32(0);
for (int i = 0; i < chunkCount; ++i) {
- const __m128i chunk = _mm_loadu_si128((__m128i*)str);
+ const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
str += 16;
+ // unpack the first 8 bytes, padding with zeros
const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
- _mm_storeu_si128((__m128i*)dst, firstHalf);
+ _mm_storeu_si128((__m128i*)dst, firstHalf); // store
dst += 8;
+ // unpack the last 8 bytes, padding with zeros
const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
- _mm_storeu_si128((__m128i*)dst, secondHalf);
+ _mm_storeu_si128((__m128i*)dst, secondHalf); // store
dst += 8;
}
size = size % 16;