summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2011-03-19 13:24:34 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2011-03-22 14:51:56 (GMT)
commit6934b9b7be067d1c61c8796f8a49d40b3b206a3e (patch)
treeb95bf141cea4f79614433eabed42f06084fd8628
parentdfadfaebe1e8a99230d511645bb1fa24bdf9e033 (diff)
downloadQt-6934b9b7be067d1c61c8796f8a49d40b3b206a3e.zip
Qt-6934b9b7be067d1c61c8796f8a49d40b3b206a3e.tar.gz
Qt-6934b9b7be067d1c61c8796f8a49d40b3b206a3e.tar.bz2
Add a stateless copy of the Qt 4.7 UTF-8 codec.
For whatever reason, this code is worse than the stateful code...
-rw-r--r--tests/benchmarks/corelib/tools/qstring/main.cpp99
1 files changed, 99 insertions, 0 deletions
diff --git a/tests/benchmarks/corelib/tools/qstring/main.cpp b/tests/benchmarks/corelib/tools/qstring/main.cpp
index f505151..c7a5096 100644
--- a/tests/benchmarks/corelib/tools/qstring/main.cpp
+++ b/tests/benchmarks/corelib/tools/qstring/main.cpp
@@ -1865,6 +1865,104 @@ int fromUtf8_qt47(ushort *dst, const char *chars, int len)
return qch - dst;
}
+int fromUtf8_qt47_stateless(ushort *dst, const char *chars, int len)
+{
+ // This is the same code as above, but for stateless UTF-8 conversion
+ // no other improvements
+ bool headerdone = false;
+ const ushort replacement = QChar::ReplacementCharacter;
+ int need = 0;
+ int error = -1;
+ uint uc = 0;
+ uint min_uc = 0;
+
+ if (len > 3
+ && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
+ // starts with a byte order mark
+ chars += 3;
+ len -= 3;
+ }
+
+ // QString result(need + len + 1, Qt::Uninitialized); // worst case
+ // ushort *qch = (ushort *)result.unicode();
+ ushort *qch = dst;
+ uchar ch;
+ int invalid = 0;
+
+ for (int i = 0; i < len; ++i) {
+ ch = chars[i];
+ if (need) {
+ if ((ch&0xc0) == 0x80) {
+ uc = (uc << 6) | (ch & 0x3f);
+ --need;
+ if (!need) {
+ // utf-8 bom composes into 0xfeff code point
+ bool nonCharacter;
+ if (!headerdone && uc == 0xfeff) {
+ // don't do anything, just skip the BOM
+ } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && uc > 0xffff && uc < 0x110000) {
+ // surrogate pair
+ //Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
+ *qch++ = QChar::highSurrogate(uc);
+ *qch++ = QChar::lowSurrogate(uc);
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
+ // error: overlong sequence, UTF16 surrogate or non-character
+ *qch++ = replacement;
+ ++invalid;
+ } else {
+ *qch++ = uc;
+ }
+ headerdone = true;
+ }
+ } else {
+ // error
+ i = error;
+ *qch++ = replacement;
+ ++invalid;
+ need = 0;
+ headerdone = true;
+ }
+ } else {
+ if (ch < 128) {
+ *qch++ = ushort(ch);
+ headerdone = true;
+ } else if ((ch & 0xe0) == 0xc0) {
+ uc = ch & 0x1f;
+ need = 1;
+ error = i;
+ min_uc = 0x80;
+ headerdone = true;
+ } else if ((ch & 0xf0) == 0xe0) {
+ uc = ch & 0x0f;
+ need = 2;
+ error = i;
+ min_uc = 0x800;
+ } else if ((ch&0xf8) == 0xf0) {
+ uc = ch & 0x07;
+ need = 3;
+ error = i;
+ min_uc = 0x10000;
+ headerdone = true;
+ } else {
+ // error
+ *qch++ = replacement;
+ ++invalid;
+ headerdone = true;
+ }
+ }
+ }
+ if (need > 0) {
+ // unterminated UTF sequence
+ for (int i = error; i < len; ++i) {
+ *qch++ = replacement;
+ ++invalid;
+ }
+ }
+ //result.truncate(qch - (ushort *)result.unicode());
+ //return result;
+ return qch - dst;
+}
+
void tst_QString::fromUtf8Alternatives_data() const
{
QTest::addColumn<FromUtf8Function>("function");
@@ -1872,6 +1970,7 @@ void tst_QString::fromUtf8Alternatives_data() const
QTest::newRow("latin1-best") << &fromUtf8_latin1best;
QTest::newRow("latin1-qt4.7") << &fromUtf8_latin1_qt47;
QTest::newRow("qt-4.7") << &fromUtf8_qt47;
+ QTest::newRow("qt-4.7-stateless") << &fromUtf8_qt47_stateless;
}
extern StringData fromUtf8Data;