summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2010-12-22 18:52:48 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2011-06-22 14:26:56 (GMT)
commit45d2d36c9dbcbce403c78838ea52acd1ab111b68 (patch)
tree76a0888856b6750e7869456c9a4f0984b9bc048f /src
parentaf3bb0f146ec357ae6daf752a5f8bbdb074cde20 (diff)
downloadQt-45d2d36c9dbcbce403c78838ea52acd1ab111b68.zip
Qt-45d2d36c9dbcbce403c78838ea52acd1ab111b68.tar.gz
Qt-45d2d36c9dbcbce403c78838ea52acd1ab111b68.tar.bz2
Add an SSE4.2 even simpler version of toLatin1
Use the new PCMPESTRM instruction (Parallel CoMPare Explicit-length STRings with result in a Mask) which is added in SSE4.2 for facilitating string operations. The "compare ranges" mode allows us to search for characters outside the Latin 1 range and then use the SSE4.1 PBLENDVB instruction to replace those with question marks. Unlike previous SSE compare instructions, the PCMPxSTRx family allows us to operate on unsigned 16-bit values. This saves us another parallel add. Reviewed-By: Samuel Rødal
Diffstat (limited to 'src')
-rw-r--r--src/corelib/tools/qstring.cpp23
1 files changed, 23 insertions, 0 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 0edf291..0828ccb 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -3561,6 +3561,28 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
{
const __m128i questionMark = _mm_set1_epi16('?');
+# ifdef __SSE4_2__
+ // compare the unsigned shorts for the range 0x0100-0xFFFF
+ // note on the use of _mm_cmpestrm:
+ // The MSDN documentation online (http://technet.microsoft.com/en-us/library/bb514080.aspx)
+ // says for range search the following:
+ // For each character c in a, determine whether b0 <= c <= b1 or b2 <= c <= b3
+ //
+ // However, all examples on the Internet, including from Intel
+ // (see http://software.intel.com/en-us/articles/xml-parsing-accelerator-with-intel-streaming-simd-extensions-4-intel-sse4/)
+ // put the range to be searched first
+ //
+ // Disassembly and instruction-level debugging with GCC and ICC show
+ // that they are doing the right thing. Inverting the arguments in the
+ // instruction does cause a bunch of test failures.
+
+ const int mode = _SIDD_UWORD_OPS | _SIDD_CMP_RANGES | _SIDD_UNIT_MASK;
+ const __m128i rangeMatch = _mm_cvtsi32_si128(0xffff0100);
+ const __m128i offLimitMask = _mm_cmpestrm(rangeMatch, 2, chunk, 8, mode);
+
+ // replace the non-Latin 1 characters in the chunk with question marks
+ chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
+# else
// SSE has no compare instruction for unsigned comparison.
// The variables must be shiffted + 0x8000 to be compared
const __m128i signedBitOffset = _mm_set1_epi16(0x8000);
@@ -3584,6 +3606,7 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
// merge offLimitQuestionMark and correctBytes to have the result
chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
# endif
+# endif
return chunk;
}
#endif