summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs
diff options
context:
space:
mode:
authorQt Continuous Integration System <qt-info@nokia.com>2010-03-07 03:53:07 (GMT)
committerQt Continuous Integration System <qt-info@nokia.com>2010-03-07 03:53:07 (GMT)
commit1bf5e9b3c328a80e71e7deed3419ff9d442db3cb (patch)
tree7f5eba92e388fc22ddbe6bfb2ecb7cc00af96425 /src/corelib/codecs
parente56ae7fb7b269afe36a3bd2f4de0c10f8c2a6924 (diff)
parent670b1cfccd68a69e03544adff5dea1a21dc2b339 (diff)
downloadQt-1bf5e9b3c328a80e71e7deed3419ff9d442db3cb.zip
Qt-1bf5e9b3c328a80e71e7deed3419ff9d442db3cb.tar.gz
Qt-1bf5e9b3c328a80e71e7deed3419ff9d442db3cb.tar.bz2
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-1 into 4.7-integration
* '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-1: (57 commits) fix corewlan detection error when building for 10.5 when 10.6 is also fixed treatment of zlib on Mac when crossbuilding Partial overloading support for qdbus cli tool. Allow empty authority in QUrl::setAuthority as per docs. Added test for QTBUG-6962: Empty authority ignored by QUrl::setAuthority. fixed case of GL include directory check in MAC_APPLICATION_MENU translations Re-generate the Unicode tables after updates to the program that generates them Fix the code after merge: DerivedNormalizationProps has two or more columns add some usefull definitions to qunicodetables_p.h qchar.cpp: fix identation finish last commit prefer DerivedNormalizationProps.txt over CompositionExclusions.txt improve error reporting a bit more improve error reporting fix incorect condition check if string to int conversions were done w/o errors improve error reporting for unassigned grapheme/word/sentence break classes avoid using of qunicodetables_p.h in generator use QHash for line break map ...
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r--src/corelib/codecs/qutfcodec.cpp39
1 files changed, 27 insertions, 12 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 7655c51..233bd8f 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -48,6 +48,19 @@ QT_BEGIN_NAMESPACE
enum { Endian = 0, Data = 1 };
+static inline bool isUnicodeNonCharacter(uint ucs4)
+{
+ // Unicode has a couple of "non-characters" that one can use internally,
+ // but are not allowed to be used for text interchange.
+ //
+ // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
+ // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
+ // U+FDEF (inclusive)
+
+ return (ucs4 & 0xfffe) == 0xfffe
+ || (ucs4 - 0xfdd0U) < 16;
+}
+
QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
{
uchar replacement = '?';
@@ -106,16 +119,17 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
if (u < 0x0800) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
+ // is it one of the Unicode non-characters?
+ if (isUnicodeNonCharacter(u)) {
+ *cursor = replacement;
+ ++ch;
+ ++invalid;
+ continue;
+ }
+
if (u > 0xffff) {
- // see QString::fromUtf8() and QString::utf8() for explanations
- if (u > 0x10fe00 && u < 0x10ff00) {
- *cursor++ = (u - 0x10fe00);
- ++ch;
- continue;
- } else {
- *cursor++ = 0xf0 | ((uchar) (u >> 18));
- *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
- }
+ *cursor++ = 0xf0 | ((uchar) (u >> 18));
+ *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
} else {
*cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f);
}
@@ -179,15 +193,16 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
--need;
if (!need) {
// utf-8 bom composes into 0xfeff code point
+ bool nonCharacter;
if (!headerdone && uc == 0xfeff) {
// dont do anything, just skip the BOM
- } else if (uc > 0xffff && uc < 0x110000) {
+ } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && uc > 0xffff && uc < 0x110000) {
// surrogate pair
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
*qch++ = QChar::highSurrogate(uc);
*qch++ = QChar::lowSurrogate(uc);
- } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- // error: overlong sequence, UTF16 surrogate or BOM
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
+ // error: overlong sequence, UTF16 surrogate or non-character
*qch++ = replacement;
++invalid;
} else {