diff options
author | Qt Continuous Integration System <qt-info@nokia.com> | 2010-03-09 17:47:56 (GMT) |
---|---|---|
committer | Qt Continuous Integration System <qt-info@nokia.com> | 2010-03-09 17:47:56 (GMT) |
commit | 9227f8606c85ad9a472628a8a06c312452d69f14 (patch) | |
tree | 004e193bd008fb5bf3c33d128223f425a0e8d614 /src/corelib/codecs/qutfcodec.cpp | |
parent | e06d011cc08de370ece3b0c324b0a735a2625820 (diff) | |
parent | 0edda17e2edb4da968b026ac6658f740323d6dd5 (diff) | |
download | Qt-9227f8606c85ad9a472628a8a06c312452d69f14.zip Qt-9227f8606c85ad9a472628a8a06c312452d69f14.tar.gz Qt-9227f8606c85ad9a472628a8a06c312452d69f14.tar.bz2 |
Merge branch '4.7-cutoff' of scm.dev.nokia.troll.no:qt/oslo-staging-1 into 4.7-integration
* '4.7-cutoff' of scm.dev.nokia.troll.no:qt/oslo-staging-1: (82 commits)
Revert 12b6987031be9faee3886d7623888feb4e1762af
Changed TEST_COMPILER from CC to CXX in configure script.
doc: Fixed several qdoc errors.
Carbon: Native filedialog does not apply filters on app-bundles
Cocoa: Native filedialog does not apply filters on app-bundles
Improve performance of QTimer::singleShot
Add a benchmark comparing single shot timer with invokeMethod
use Qt's private mac functions, reduce code redundancy
Assistant: Fix compile warning for empty header.
Doc: mark QEasingCurve support functions as new in 4.7.
Modify the XML test suite not to use non-characters.
Autotest: Fix failing QTextCodec tests
Doc: mark methods as internal (as they were in previous releases)
Doc: add image for Qt Quick to "What's New" page.
qdoc: Clear a static multimap after each qdocconf file.
qdoc: Added some debug output to track down a crash
Fixed mouse wheel handling in scrollareas.
Added two missing keys for X11
Carbon: usage of menu bars can cause exceptions to be thrown
Mac: compile fix
...
Diffstat (limited to 'src/corelib/codecs/qutfcodec.cpp')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 7655c51..f747bf7 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -48,6 +48,19 @@ QT_BEGIN_NAMESPACE enum { Endian = 0, Data = 1 }; +static inline bool isUnicodeNonCharacter(uint ucs4) +{ + // Unicode has a couple of "non-characters" that one can use internally, + // but are not allowed to be used for text interchange. + // + // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, + // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and + // U+FDEF (inclusive) + + return (ucs4 & 0xfffe) == 0xfffe + || (ucs4 - 0xfdd0U) < 16; +} + QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state) { uchar replacement = '?'; @@ -106,16 +119,17 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve if (u < 0x0800) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { + // is it one of the Unicode non-characters? + if (isUnicodeNonCharacter(u)) { + *cursor++ = replacement; + ++ch; + ++invalid; + continue; + } + if (u > 0xffff) { - // see QString::fromUtf8() and QString::utf8() for explanations - if (u > 0x10fe00 && u < 0x10ff00) { - *cursor++ = (u - 0x10fe00); - ++ch; - continue; - } else { - *cursor++ = 0xf0 | ((uchar) (u >> 18)); - *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); - } + *cursor++ = 0xf0 | ((uchar) (u >> 18)); + *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); } else { *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f); } @@ -179,15 +193,16 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte --need; if (!need) { // utf-8 bom composes into 0xfeff code point + bool nonCharacter; if (!headerdone && uc == 0xfeff) { // dont do anything, just skip the BOM - } else if (uc > 0xffff && uc < 0x110000) { + } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && uc > 0xffff && uc < 0x110000) { // surrogate pair Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length()); *qch++ = QChar::highSurrogate(uc); *qch++ = QChar::lowSurrogate(uc); - } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - // error: overlong sequence, UTF16 surrogate or BOM + } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) { + // error: overlong sequence, UTF16 surrogate or non-character *qch++ = replacement; ++invalid; } else { |