summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2011-03-19 21:07:11 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2011-03-22 14:52:02 (GMT)
commit3110ab6391971fb7b914ed1f797a0ff9e403501f (patch)
tree822182b8479950425f435c4877aace205bf170ca /tests
parent0552c0f64146a18f021e36bcbff106cb815e6fbb (diff)
downloadQt-3110ab6391971fb7b914ed1f797a0ff9e403501f.zip
Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.gz
Qt-3110ab6391971fb7b914ed1f797a0ff9e403501f.tar.bz2
Add an UTF-8 conversion on trusted data and no BOM.
This assumes that there are no overlong sequences, no continuation characters without the leading, no missing continuations and no BOM.
Diffstat (limited to 'tests')
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp48
1 files changed, 48 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index 4b1ab57..d926aa5 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -2148,6 +2148,53 @@ int fromUtf8_sse2_optimised_for_ascii(ushort *qch, const char *chars, int len)
return dst + counter - qch;
}
+int fromUtf8_sse2_trusted_no_bom(ushort *qch, const char *chars, int len)
+{
+ qptrdiff counter = 0;
+ ushort *dst = qch;
+
+ len -= 16;
+ const __m128i nullMask = _mm_set1_epi32(0);
+ while (counter < len) {
+ const __m128i chunk = _mm_loadu_si128((__m128i*)(chars + counter)); // load
+ ushort highbytes = _mm_movemask_epi8(chunk);
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter), firstHalf); // store
+
+ if (!uchar(highbytes)) {
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+ _mm_storeu_si128((__m128i*)(dst + counter + 8), secondHalf); // store
+
+ if (!highbytes) {
+ counter += 16;
+ continue;
+ }
+ }
+
+ // UTF-8 character found
+ // which one?
+ counter += bsf_nonzero(highbytes);
+ extract_utf8_multibyte<true>(dst, chars, counter, len);
+ }
+ len += 16;
+
+ while (counter < len) {
+ uchar ch = chars[counter];
+ if ((ch & 0x80) == 0) {
+ dst[counter] = ch;
+ ++counter;
+ continue;
+ }
+
+ // UTF-8 character found
+ extract_utf8_multibyte<true>(dst, chars, counter, len);
+ }
+ return dst + counter - qch;
+}
+
void tst_QString::fromUtf8Alternatives_data() const
{
QTest::addColumn<FromUtf8Function>("function");
@@ -2158,6 +2205,7 @@ void tst_QString::fromUtf8Alternatives_data() const
QTest::newRow("qt-4.7-stateless") << &fromUtf8_qt47_stateless;
QTest::newRow("optimized-for-ascii") << &fromUtf8_optimised_for_ascii;
QTest::newRow("sse2-optimized-for-ascii") << &fromUtf8_sse2_optimised_for_ascii;
+ QTest::newRow("sse2-trusted-no-bom") << &fromUtf8_sse2_trusted_no_bom;
}
extern StringData fromUtf8Data;