summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2011-03-19 21:07:11 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2011-03-22 14:52:02 (GMT)
commit3110ab6391971fb7b914ed1f797a0ff9e403501f (patch)
tree822182b8479950425f435c4877aace205bf170ca
parent0552c0f64146a18f021e36bcbff106cb815e6fbb (diff)
downloadQt-3110ab6391971fb7b914ed1f797a0ff9e403501f.zip
Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.gz
Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.bz2
Add an UTF-8 conversion on trusted data and no BOM.
This assumes that there are no overlong sequences, no continuation characters without the leading, no missing continuations and no BOM.
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp48
1 files changed, 48 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 4b1ab57..d926aa5 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -2148,6 +2148,53 @@ int fromUtf8_sse2_optimised_for_ascii(ushort *qch, const char *chars, int len)
return dst + counter - qch;
}
+int fromUtf8_sse2_trusted_no_bom(ushort *qch, const char *chars, int len)
+{
+ qptrdiff counter = 0;
+ ushort *dst = qch;
+
+ len -= 16;
+ const __m128i nullMask = _mm_set1_epi32(0);
+ while (counter < len) {
+ const __m128i chunk = _mm_loadu_si128((__m128i*)(chars + counter)); // load
+ ushort highbytes = _mm_movemask_epi8(chunk);
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter), firstHalf); // store
+
+ if (!uchar(highbytes)) {
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf); // store
+
+ if (!highbytes) {
+ counter += 16;
+ continue;
+ }
+ }
+
+ // UTF-8 character found
+ // which one?
+ counter += bsf_nonzero(highbytes);
+ extract_utf8_multibyte<true>(dst, chars, counter, len);
+ }
+ len += 16;
+
+ while (counter < len) {
+ uchar ch = chars[counter];
+ if ((ch & 0x80) == 0) {
+ dst[counter] = ch;
+ ++counter;
+ continue;
+ }
+
+ // UTF-8 character found
+ extract_utf8_multibyte<true>(dst, chars, counter, len);
+ }
+ return dst + counter - qch;
+}
+
void tst_QString::fromUtf8Alternatives_data() const
{
QTest::addColumn<FromUtf8Function>("function");
@@ -2158,6 +2205,7 @@ void tst_QString::fromUtf8Alternatives_data() const
QTest::newRow("qt-4.7-stateless") << &fromUtf8_qt47_stateless;
QTest::newRow("optimized-for-ascii") << &fromUtf8_optimised_for_ascii;
QTest::newRow("sse2-optimized-for-ascii") << &fromUtf8_sse2_optimised_for_ascii;
+ QTest::newRow("sse2-trusted-no-bom") << &fromUtf8_sse2_trusted_no_bom;
}
extern StringData fromUtf8Data;