diff options
author | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2009-07-13 16:26:25 (GMT) |
---|---|---|
committer | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2009-07-16 10:09:40 (GMT) |
commit | 09f2012b18e6ab5abbbf07adb8d4bc1ea28d11b0 (patch) | |
tree | 71b42771d27aa66799f993936a0d31252020fb89 /src | |
parent | d124bcf70a8e167ebf997ac2a4222623a5a9acdf (diff) | |
download | Qt-09f2012b18e6ab5abbbf07adb8d4bc1ea28d11b0.zip Qt-09f2012b18e6ab5abbbf07adb8d4bc1ea28d11b0.tar.gz Qt-09f2012b18e6ab5abbbf07adb8d4bc1ea28d11b0.tar.bz2 |
A small optimisation for the utf8->utf16 conversion.
Reviewed-by: Thiago Macieira
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 32 |
1 files changed, 11 insertions, 21 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index d111660..abcc07c 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -142,7 +142,7 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state) { bool headerdone = false; - QChar replacement = QChar::ReplacementCharacter; + ushort replacement = QChar::ReplacementCharacter; int need = 0; int error = -1; uint uc = 0; @@ -166,38 +166,28 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte headerdone = true; } - QString result(len, Qt::Uninitialized); // worst case - QChar *qch = (QChar *)result.unicode(); + QString result(need + len + 1, Qt::Uninitialized); // worst case + ushort *qch = (ushort *)result.unicode(); uchar ch; int invalid = 0; - for (int i=0; i<len; i++) { + for (int i = 0; i < len; ++i) { ch = chars[i]; if (need) { if ((ch&0xc0) == 0x80) { uc = (uc << 6) | (ch & 0x3f); - need--; + --need; if (!need) { // utf-8 bom composes into 0xfeff code point if (!headerdone && uc == 0xfeff) { // dont do anything, just skip the BOM } else if (uc > 0xffff && uc < 0x110000) { // surrogate pair - uc -= 0x10000; - unsigned short high = uc/0x400 + 0xd800; - unsigned short low = uc%0x400 + 0xdc00; - - // resize if necessary - long where = qch - result.unicode(); - if (where + 2 >= result.length()) { - result.resize(where + 2); - qch = result.data() + where; - } - - *qch++ = QChar(high); - *qch++ = QChar(low); + Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length()); + *qch++ = QChar::highSurrogate(uc); + *qch++ = QChar::lowSurrogate(uc); } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - // error + // error: overlong sequence, UTF16 surrogate or BOM *qch++ = replacement; ++invalid; } else { @@ -215,7 +205,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte } } else { if (ch < 128) { - *qch++ = QLatin1Char(ch); + *qch++ = ushort(ch); headerdone = true; } else if ((ch & 0xe0) == 0xc0) { uc = ch & 0x1f; @@ -249,7 +239,7 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte ++invalid; } } - result.truncate(qch - result.unicode()); + result.truncate(qch - (ushort *)result.unicode()); if (state) { state->invalidChars += invalid; state->remainingChars = need; |