From 5d6d552c4c01c1e8884d7641c81671e808eed55d Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Wed, 24 Feb 2010 11:29:52 +0100 Subject: Comments the SSE implementation of fromLatin1_helper() Add comments to explain the intrinsics. Comment on the general method. Share the information that it is not worth it to do the same on with Neon. --- src/corelib/tools/qstring.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 9431ef4..f8303bd 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -3650,20 +3650,26 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size) d->data = d->array; d->array[size] = '\0'; ushort *dst = d->data; + /* SIMD: + * Unpacking with SSE has been shown to improve performance on recent CPUs + * The same method gives no improvement with NEON. + */ #if defined(QT_ALWAYS_HAVE_SSE2) if (size >= 16) { int chunkCount = size >> 4; // divided by 16 const __m128i nullMask = _mm_set1_epi32(0); for (int i = 0; i < chunkCount; ++i) { - const __m128i chunk = _mm_loadu_si128((__m128i*)str); + const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load str += 16; + // unpack the first 8 bytes, padding with zeros const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); - _mm_storeu_si128((__m128i*)dst, firstHalf); + _mm_storeu_si128((__m128i*)dst, firstHalf); // store dst += 8; + // unpack the last 8 bytes, padding with zeros const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); - _mm_storeu_si128((__m128i*)dst, secondHalf); + _mm_storeu_si128((__m128i*)dst, secondHalf); // store dst += 8; } size = size % 16; -- cgit v0.12