summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-02-24 10:29:52 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-02-24 10:32:28 (GMT)
commit5d6d552c4c01c1e8884d7641c81671e808eed55d (patch)
tree8ab3b3218e92c2a313861c497061bad8747324c9 /src/corelib/tools
parentfb111f9b71c99aa06dcf30dfada0d3b12c7ac993 (diff)
downloadQt-5d6d552c4c01c1e8884d7641c81671e808eed55d.zip
Qt-5d6d552c4c01c1e8884d7641c81671e808eed55d.tar.gz
Qt-5d6d552c4c01c1e8884d7641c81671e808eed55d.tar.bz2
Comments the SSE implementation of fromLatin1_helper()
Add comments to explain the intrinsics. Comment on the general method. Share the information that it is not worth it to do the same on with Neon.
Diffstat (limited to 'src/corelib/tools')
-rw-r--r--src/corelib/tools/qstring.cpp12
1 files changed, 9 insertions, 3 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 9431ef4..f8303bd 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -3650,20 +3650,26 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
d->data = d->array;
d->array[size] = '\0';
ushort *dst = d->data;
+ /* SIMD:
+ * Unpacking with SSE has been shown to improve performance on recent CPUs
+ * The same method gives no improvement with NEON.
+ */
#if defined(QT_ALWAYS_HAVE_SSE2)
if (size >= 16) {
int chunkCount = size >> 4; // divided by 16
const __m128i nullMask = _mm_set1_epi32(0);
for (int i = 0; i < chunkCount; ++i) {
- const __m128i chunk = _mm_loadu_si128((__m128i*)str);
+ const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
str += 16;
+ // unpack the first 8 bytes, padding with zeros
const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
- _mm_storeu_si128((__m128i*)dst, firstHalf);
+ _mm_storeu_si128((__m128i*)dst, firstHalf); // store
dst += 8;
+ // unpack the last 8 bytes, padding with zeros
const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
- _mm_storeu_si128((__m128i*)dst, secondHalf);
+ _mm_storeu_si128((__m128i*)dst, secondHalf); // store
dst += 8;
}
size = size % 16;