diff options
author | Thiago Macieira <thiago.macieira@nokia.com> | 2010-12-22 18:52:18 (GMT) |
---|---|---|
committer | Thiago Macieira <thiago.macieira@nokia.com> | 2011-06-14 09:09:07 (GMT) |
commit | bb3bd601560132df769c32808ae0b36c56d1caab (patch) | |
tree | 0f5a6482ba87d423ff1cae71d1c68a2dc4dd219e | |
parent | bdad106358ae177d1345f5ff85c0e38cfeb5ca90 (diff) | |
download | Qt-bb3bd601560132df769c32808ae0b36c56d1caab.zip Qt-bb3bd601560132df769c32808ae0b36c56d1caab.tar.gz Qt-bb3bd601560132df769c32808ae0b36c56d1caab.tar.bz2 |
Create a function that merges the SSE common code
Reviewed-by: Samuel Rødal
-rw-r--r-- | src/corelib/tools/qstring.cpp | 73 |
1 files changed, 35 insertions, 38 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 7cbef98..0edf291 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -3556,6 +3556,38 @@ bool QString::endsWith(const QChar &c, Qt::CaseSensitivity cs) const Use toLocal8Bit() instead. */ +#if defined(QT_ALWAYS_HAVE_SSE2) +static inline __m128i mergeQuestionMarks(__m128i chunk) +{ + const __m128i questionMark = _mm_set1_epi16('?'); + + // SSE has no compare instruction for unsigned comparison. + // The variables must be shiffted + 0x8000 to be compared + const __m128i signedBitOffset = _mm_set1_epi16(0x8000); + const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000); + + const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset); + const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); + +# ifdef __SSE4_1__ + // replace the non-Latin 1 characters in the chunk with question marks + chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); +# else + // offLimitQuestionMark contains '?' for each 16 bits that was off-limit + // the 16 bits that were correct contains zeros + const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); + + // correctBytes contains the bytes that were in limit + // the 16 bits that were off limits contains zeros + const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk); + + // merge offLimitQuestionMark and correctBytes to have the result + chunk = _mm_or_si128(correctBytes, offLimitQuestionMark); +# endif + return chunk; +} +#endif + static QByteArray toLatin1_helper(const QChar *data, int length) { QByteArray ba; @@ -3566,50 +3598,15 @@ static QByteArray toLatin1_helper(const QChar *data, int length) #if defined(QT_ALWAYS_HAVE_SSE2) if (length >= 16) { const int chunkCount = length >> 4; // divided by 16 - const __m128i questionMark = _mm_set1_epi16('?'); - // SSE has no compare instruction for unsigned comparison. - // The variables must be shiffted + 0x8000 to be compared - const __m128i signedBitOffset = _mm_set1_epi16(0x8000); - const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000); + for (int i = 0; i < chunkCount; ++i) { __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load + chunk1 = mergeQuestionMarks(chunk1); src += 8; - { - // each 16 bit is equal to 0xFF if the source is outside latin 1 (>0xff) - const __m128i signedChunk = _mm_add_epi16(chunk1, signedBitOffset); - const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); - -#ifdef __SSE4_1__ - chunk1 = _mm_blendv_epi8(chunk1, questionMark, offLimitMask); -#else - - // offLimitQuestionMark contains '?' for each 16 bits that was off-limit - // the 16 bits that were correct contains zeros - const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); - - // correctBytes contains the bytes that were in limit - // the 16 bits that were off limits contains zeros - const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk1); - - // merge offLimitQuestionMark and correctBytes to have the result - chunk1 = _mm_or_si128(correctBytes, offLimitQuestionMark); -#endif - } __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load + chunk2 = mergeQuestionMarks(chunk2); src += 8; - { - // exactly the same operations as for the previous chunk of data - const __m128i signedChunk = _mm_add_epi16(chunk2, signedBitOffset); - const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); -#ifdef __SSE4_1__ - chunk2 = _mm_blendv_epi8(chunk2, questionMark, offLimitMask); -#else - const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); - const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk2); - chunk2 = _mm_or_si128(correctBytes, offLimitQuestionMark); -#endif - } // pack the two vector to 16 x 8bits elements const __m128i result = _mm_packus_epi16(chunk1, chunk2); |