diff options
author | Qt Continuous Integration System <qt-info@nokia.com> | 2010-03-07 03:53:07 (GMT) |
---|---|---|
committer | Qt Continuous Integration System <qt-info@nokia.com> | 2010-03-07 03:53:07 (GMT) |
commit | 1bf5e9b3c328a80e71e7deed3419ff9d442db3cb (patch) | |
tree | 7f5eba92e388fc22ddbe6bfb2ecb7cc00af96425 /src/corelib/codecs | |
parent | e56ae7fb7b269afe36a3bd2f4de0c10f8c2a6924 (diff) | |
parent | 670b1cfccd68a69e03544adff5dea1a21dc2b339 (diff) | |
download | Qt-1bf5e9b3c328a80e71e7deed3419ff9d442db3cb.zip Qt-1bf5e9b3c328a80e71e7deed3419ff9d442db3cb.tar.gz Qt-1bf5e9b3c328a80e71e7deed3419ff9d442db3cb.tar.bz2 |
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-1 into 4.7-integration
* '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-1: (57 commits)
fix corewlan detection error when building for 10.5 when 10.6 is also
fixed treatment of zlib on Mac when crossbuilding
Partial overloading support for qdbus cli tool.
Allow empty authority in QUrl::setAuthority as per docs.
Added test for QTBUG-6962: Empty authority ignored by QUrl::setAuthority.
fixed case of GL include directory
check in MAC_APPLICATION_MENU translations
Re-generate the Unicode tables after updates to the program that generates them
Fix the code after merge: DerivedNormalizationProps has two or more columns
add some usefull definitions to qunicodetables_p.h
qchar.cpp: fix identation
finish last commit
prefer DerivedNormalizationProps.txt over CompositionExclusions.txt
improve error reporting a bit more
improve error reporting
fix incorect condition
check if string to int conversions were done w/o errors
improve error reporting for unassigned grapheme/word/sentence break classes
avoid using of qunicodetables_p.h in generator
use QHash for line break map
...
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 39 |
1 files changed, 27 insertions, 12 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index 7655c51..233bd8f 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -48,6 +48,19 @@ QT_BEGIN_NAMESPACE enum { Endian = 0, Data = 1 }; +static inline bool isUnicodeNonCharacter(uint ucs4) +{ + // Unicode has a couple of "non-characters" that one can use internally, + // but are not allowed to be used for text interchange. + // + // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, + // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and + // U+FDEF (inclusive) + + return (ucs4 & 0xfffe) == 0xfffe + || (ucs4 - 0xfdd0U) < 16; +} + QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state) { uchar replacement = '?'; @@ -106,16 +119,17 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve if (u < 0x0800) { *cursor++ = 0xc0 | ((uchar) (u >> 6)); } else { + // is it one of the Unicode non-characters? + if (isUnicodeNonCharacter(u)) { + *cursor = replacement; + ++ch; + ++invalid; + continue; + } + if (u > 0xffff) { - // see QString::fromUtf8() and QString::utf8() for explanations - if (u > 0x10fe00 && u < 0x10ff00) { - *cursor++ = (u - 0x10fe00); - ++ch; - continue; - } else { - *cursor++ = 0xf0 | ((uchar) (u >> 18)); - *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); - } + *cursor++ = 0xf0 | ((uchar) (u >> 18)); + *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); } else { *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f); } @@ -179,15 +193,16 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte --need; if (!need) { // utf-8 bom composes into 0xfeff code point + bool nonCharacter; if (!headerdone && uc == 0xfeff) { // dont do anything, just skip the BOM - } else if (uc > 0xffff && uc < 0x110000) { + } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && uc > 0xffff && uc < 0x110000) { // surrogate pair Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length()); *qch++ = QChar::highSurrogate(uc); *qch++ = QChar::lowSurrogate(uc); - } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - // error: overlong sequence, UTF16 surrogate or BOM + } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) { + // error: overlong sequence, UTF16 surrogate or non-character *qch++ = replacement; ++invalid; } else { |