diff options
author | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-19 21:07:11 (GMT) |
---|---|---|
committer | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-22 14:52:02 (GMT) |
commit | 3110ab6391971fb7b914ed1f797a0ff9e403501f (patch) | |
tree | 822182b8479950425f435c4877aace205bf170ca | |
parent | 0552c0f64146a18f021e36bcbff106cb815e6fbb (diff) | |
download | Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.zip Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.gz Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.bz2 |
Add an UTF-8 conversion on trusted data and no BOM.
This assumes that there are no overlong sequences, no continuation
characters without the leading, no missing continuations and no BOM.
-rw-r--r-- | tests/benchmarks/corelib/tools/qstring/main.cpp | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp index 4b1ab57..d926aa5 100644 --- a/tests/benchmarks/corelib/tools/qstring/main.cpp +++ b/tests/benchmarks/corelib/tools/qstring/main.cpp @@ -2148,6 +2148,53 @@ int fromUtf8_sse2_optimised_for_ascii(ushort *qch, const char *chars, int len) return dst + counter - qch; } +int fromUtf8_sse2_trusted_no_bom(ushort *qch, const char *chars, int len) +{ + qptrdiff counter = 0; + ushort *dst = qch; + + len -= 16; + const __m128i nullMask = _mm_set1_epi32(0); + while (counter < len) { + const __m128i chunk = _mm_loadu_si128((__m128i*)(chars + counter)); // load + ushort highbytes = _mm_movemask_epi8(chunk); + + // unpack the first 8 bytes, padding with zeros + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_storeu_si128((__m128i*)(dst + counter), firstHalf); // store + + if (!uchar(highbytes)) { + // unpack the last 8 bytes, padding with zeros + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf); // store + + if (!highbytes) { + counter += 16; + continue; + } + } + + // UTF-8 character found + // which one? + counter += bsf_nonzero(highbytes); + extract_utf8_multibyte<true>(dst, chars, counter, len); + } + len += 16; + + while (counter < len) { + uchar ch = chars[counter]; + if ((ch & 0x80) == 0) { + dst[counter] = ch; + ++counter; + continue; + } + + // UTF-8 character found + extract_utf8_multibyte<true>(dst, chars, counter, len); + } + return dst + counter - qch; +} + void tst_QString::fromUtf8Alternatives_data() const { QTest::addColumn<FromUtf8Function>("function"); @@ -2158,6 +2205,7 @@ void tst_QString::fromUtf8Alternatives_data() const QTest::newRow("qt-4.7-stateless") << &fromUtf8_qt47_stateless; QTest::newRow("optimized-for-ascii") << &fromUtf8_optimised_for_ascii; QTest::newRow("sse2-optimized-for-ascii") << &fromUtf8_sse2_optimised_for_ascii; + QTest::newRow("sse2-trusted-no-bom") << &fromUtf8_sse2_trusted_no_bom; } extern StringData fromUtf8Data; |