diff options
author | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-19 21:07:11 (GMT) |
---|---|---|
committer | Thiago Macieira <thiago.macieira@nokia.com> | 2011-03-22 14:52:02 (GMT) |
commit | 3110ab6391971fb7b914ed1f797a0ff9e403501f (patch) | |
tree | 822182b8479950425f435c4877aace205bf170ca /tests | |
parent | 0552c0f64146a18f021e36bcbff106cb815e6fbb (diff) | |
download | Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.zip Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.gz Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.bz2 |
Add an UTF-8 conversion on trusted data and no BOM.
This assumes that there are no overlong sequences, no continuation
characters without the leading, no missing continuations and no BOM.
Diffstat (limited to 'tests')
-rw-r--r-- | tests/benchmarks/corelib/tools/qstring/main.cpp | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp index 4b1ab57..d926aa5 100644 --- a/tests/benchmarks/corelib/tools/qstring/main.cpp +++ b/tests/benchmarks/corelib/tools/qstring/main.cpp @@ -2148,6 +2148,53 @@ int fromUtf8_sse2_optimised_for_ascii(ushort *qch, const char *chars, int len) return dst + counter - qch; } +int fromUtf8_sse2_trusted_no_bom(ushort *qch, const char *chars, int len) +{ + qptrdiff counter = 0; + ushort *dst = qch; + + len -= 16; + const __m128i nullMask = _mm_set1_epi32(0); + while (counter < len) { + const __m128i chunk = _mm_loadu_si128((__m128i*)(chars + counter)); // load + ushort highbytes = _mm_movemask_epi8(chunk); + + // unpack the first 8 bytes, padding with zeros + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_storeu_si128((__m128i*)(dst + counter), firstHalf); // store + + if (!uchar(highbytes)) { + // unpack the last 8 bytes, padding with zeros + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf); // store + + if (!highbytes) { + counter += 16; + continue; + } + } + + // UTF-8 character found + // which one? + counter += bsf_nonzero(highbytes); + extract_utf8_multibyte<true>(dst, chars, counter, len); + } + len += 16; + + while (counter < len) { + uchar ch = chars[counter]; + if ((ch & 0x80) == 0) { + dst[counter] = ch; + ++counter; + continue; + } + + // UTF-8 character found + extract_utf8_multibyte<true>(dst, chars, counter, len); + } + return dst + counter - qch; +} + void tst_QString::fromUtf8Alternatives_data() const { QTest::addColumn<FromUtf8Function>("function"); @@ -2158,6 +2205,7 @@ void tst_QString::fromUtf8Alternatives_data() const QTest::newRow("qt-4.7-stateless") << &fromUtf8_qt47_stateless; QTest::newRow("optimized-for-ascii") << &fromUtf8_optimised_for_ascii; QTest::newRow("sse2-optimized-for-ascii") << &fromUtf8_sse2_optimised_for_ascii; + QTest::newRow("sse2-trusted-no-bom") << &fromUtf8_sse2_trusted_no_bom; } extern StringData fromUtf8Data; |