summaryrefslogtreecommitdiffstats
path: root/src/corelib/codecs
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r--src/corelib/codecs/codecs.pri3
-rw-r--r--src/corelib/codecs/qiconvcodec.cpp41
-rw-r--r--src/corelib/codecs/qsimplecodec.cpp12
-rw-r--r--src/corelib/codecs/qsimplecodec_p.h4
-rw-r--r--src/corelib/codecs/qtextcodec.cpp41
-rw-r--r--src/corelib/codecs/qtextcodec_symbian.cpp678
-rw-r--r--src/corelib/codecs/qutfcodec.cpp39
7 files changed, 781 insertions, 37 deletions
diff --git a/src/corelib/codecs/codecs.pri b/src/corelib/codecs/codecs.pri
index 17f4d91..c572e08 100644
--- a/src/corelib/codecs/codecs.pri
+++ b/src/corelib/codecs/codecs.pri
@@ -31,7 +31,7 @@ unix {
DEFINES += GNU_LIBICONV
!mac:LIBS_PRIVATE *= -liconv
- } else {
+ } else:!symbian {
# no iconv, so we put all plugins in the library
HEADERS += \
../plugins/codecs/cn/qgb18030codec.h \
@@ -52,3 +52,4 @@ unix {
../plugins/codecs/jp/qfontjpcodec.cpp
}
}
+symbian:LIBS += -lcharconv
diff --git a/src/corelib/codecs/qiconvcodec.cpp b/src/corelib/codecs/qiconvcodec.cpp
index 0f73d9b..0fcdf96 100644
--- a/src/corelib/codecs/qiconvcodec.cpp
+++ b/src/corelib/codecs/qiconvcodec.cpp
@@ -299,6 +299,32 @@ QString QIconvCodec::convertToUnicode(const char* chars, int len, ConverterState
Q_GLOBAL_STATIC(QThreadStorage<QIconvCodec::IconvState *>, fromUnicodeState)
+static bool setByteOrder(iconv_t cd)
+{
+#if !defined(NO_BOM)
+ // give iconv() a BOM
+ char buf[4];
+ ushort bom[] = { QChar::ByteOrderMark };
+
+ char *outBytes = buf;
+ char *inBytes = reinterpret_cast<char *>(bom);
+ size_t outBytesLeft = sizeof buf;
+ size_t inBytesLeft = sizeof bom;
+
+#if defined(GNU_LIBICONV)
+ const char **inBytesPtr = const_cast<const char **>(&inBytes);
+#else
+ char **inBytesPtr = &inBytes;
+#endif
+
+ if (iconv(cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) {
+ return false;
+ }
+#endif // NO_BOM
+
+ return true;
+}
+
QByteArray QIconvCodec::convertFromUnicode(const QChar *uc, int len, ConverterState *convState) const
{
char *inBytes;
@@ -325,17 +351,8 @@ QByteArray QIconvCodec::convertFromUnicode(const QChar *uc, int len, ConverterSt
IconvState *&state = ts->localData();
if (!state) {
state = new IconvState(QIconvCodec::createIconv_t(0, UTF16));
- if (state->cd != reinterpret_cast<iconv_t>(-1)) {
- size_t outBytesLeft = len + 3; // +3 for the BOM
- QByteArray ba(outBytesLeft, Qt::Uninitialized);
- outBytes = ba.data();
-
-#if !defined(NO_BOM)
- // give iconv() a BOM
- QChar bom[] = { QChar(QChar::ByteOrderMark) };
- inBytes = reinterpret_cast<char *>(bom);
- inBytesLeft = sizeof(bom);
- if (iconv(state->cd, inBytesPtr, &inBytesLeft, &outBytes, &outBytesLeft) == (size_t) -1) {
+ if (state->cd == reinterpret_cast<iconv_t>(-1)) {
+ if (!setByteOrder(state->cd)) {
perror("QIconvCodec::convertFromUnicode: using ASCII for conversion, iconv failed for BOM");
iconv_close(state->cd);
@@ -343,7 +360,6 @@ QByteArray QIconvCodec::convertFromUnicode(const QChar *uc, int len, ConverterSt
return QString(uc, len).toAscii();
}
-#endif // NO_BOM
}
}
if (state->cd == reinterpret_cast<iconv_t>(-1)) {
@@ -422,6 +438,7 @@ QByteArray QIconvCodec::convertFromUnicode(const QChar *uc, int len, ConverterSt
// reset to initial state
iconv(state->cd, 0, &inBytesLeft, 0, &outBytesLeft);
+ setByteOrder(state->cd);
ba.resize(ba.size() - outBytesLeft);
diff --git a/src/corelib/codecs/qsimplecodec.cpp b/src/corelib/codecs/qsimplecodec.cpp
index 4cc7912..a6f5c9e 100644
--- a/src/corelib/codecs/qsimplecodec.cpp
+++ b/src/corelib/codecs/qsimplecodec.cpp
@@ -54,6 +54,7 @@ static const struct {
int mib;
quint16 values[128];
} unicodevalues[QSimpleTextCodec::numSimpleCodecs] = {
+#ifndef Q_OS_SYMBIAN
// from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
{ "KOI8-R", { "csKOI8R", 0 }, 2084,
{ 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
@@ -288,6 +289,7 @@ static const struct {
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
+#endif
{ "ISO-8859-16", { "iso-ir-226", "latin10", 0 }, 112,
{ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
@@ -309,7 +311,7 @@ static const struct {
// next bits generated again from tables on the Unicode 3.0 CD.
// $ for a in CP* ; do (awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
-
+#ifndef Q_OS_SYMBIAN
{ "IBM850", { "CP850", "csPC850Multilingual", 0 }, 2009,
{ 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
@@ -344,6 +346,7 @@ static const struct {
0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
+#endif //Q_OS_SYMBIAN
{ "IBM866", { "CP866", "csIBM866", 0 }, 2086,
{ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
@@ -362,6 +365,7 @@ static const struct {
0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} },
+#ifndef Q_OS_SYMBIAN
{ "windows-1250", { "CP1250", 0 }, 2250,
{ 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
@@ -516,6 +520,7 @@ static const struct {
0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
+#endif
{ "Apple Roman", { "macintosh", "MacRoman", 0 }, -168,
{ 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
@@ -534,8 +539,6 @@ static const struct {
0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} },
-
-
// This one is based on the charmap file
// /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted
// to this format by Boerre Gaup <boerre@subdimension.com>
@@ -557,7 +560,7 @@ static const struct {
0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
-
+#ifndef Q_OS_SYMBIAN
// this one is generated from the charmap file located in /usr/share/i18n/charmaps
// on most Linux distributions. The thai character set tis620 is byte by byte equivalent
// to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
@@ -581,6 +584,7 @@ static const struct {
0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } },
+#endif
/*
Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
MIBenum: 2004
diff --git a/src/corelib/codecs/qsimplecodec_p.h b/src/corelib/codecs/qsimplecodec_p.h
index b53eb95..57503b2 100644
--- a/src/corelib/codecs/qsimplecodec_p.h
+++ b/src/corelib/codecs/qsimplecodec_p.h
@@ -64,7 +64,11 @@ template <typename T> class QAtomicPointer;
class QSimpleTextCodec: public QTextCodec
{
public:
+#ifdef Q_OS_SYMBIAN
+ enum { numSimpleCodecs = 5 };
+#else
enum { numSimpleCodecs = 30 };
+#endif
explicit QSimpleTextCodec(int);
~QSimpleTextCodec();
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp
index ca5e658..4034218 100644
--- a/src/corelib/codecs/qtextcodec.cpp
+++ b/src/corelib/codecs/qtextcodec.cpp
@@ -64,6 +64,7 @@
#ifndef QT_NO_CODECS
# include "qtsciicodec_p.h"
# include "qisciicodec_p.h"
+#ifndef Q_OS_SYMBIAN
# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
// no iconv(3) support, must build all codecs into the library
# include "../../plugins/codecs/cn/qgb18030codec.h"
@@ -77,6 +78,7 @@
# include "qfontlaocodec_p.h"
# include "../../plugins/codecs/jp/qfontjpcodec.h"
# endif
+#endif // QT_NO_SYMBIAN
#endif // QT_NO_CODECS
#include "qlocale.h"
#include "qmutex.h"
@@ -93,6 +95,11 @@
# define QT_NO_SETLOCALE
#endif
+#ifdef Q_OS_SYMBIAN
+#include "qtextcodec_symbian.cpp"
+#endif
+
+
// enabling this is not exception safe!
// #define Q_DEBUG_TEXTCODEC
@@ -537,6 +544,12 @@ static QTextCodec *checkForCodec(const QByteArray &name) {
*/
static void setupLocaleMapper()
{
+#ifdef Q_OS_SYMBIAN
+ localeMapper = QSymbianTextCodec::localeMapper;
+ if (localeMapper)
+ return;
+#endif
+
#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
localeMapper = QTextCodec::codecForName("System");
#else
@@ -680,6 +693,17 @@ static void setup()
(void) createQTextCodecCleanup();
#ifndef QT_NO_CODECS
+ (void)new QTsciiCodec;
+ for (int i = 0; i < 9; ++i)
+ (void)new QIsciiCodec(i);
+
+ for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
+ (void)new QSimpleTextCodec(i);
+
+#ifdef Q_OS_SYMBIAN
+ localeMapper = QSymbianTextCodec::init();
+#endif
+
# if defined(Q_WS_X11) && !defined(QT_BOOTSTRAPPED)
// no font codecs when bootstrapping
(void)new QFontLaoCodec;
@@ -696,12 +720,8 @@ static void setup()
# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
# endif // Q_WS_X11
- (void)new QTsciiCodec;
-
- for (int i = 0; i < 9; ++i)
- (void)new QIsciiCodec(i);
-
+#ifndef Q_OS_SYMBIAN
# if defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
// no asian codecs when bootstrapping, sorry
(void)new QGb18030Codec;
@@ -715,6 +735,7 @@ static void setup()
(void)new QBig5Codec;
(void)new QBig5hkscsCodec;
# endif // QT_NO_ICONV && !QT_BOOTSTRAPPED
+#endif //Q_OS_SYMBIAN
#endif // QT_NO_CODECS
#if defined(Q_OS_WIN32) || defined(Q_OS_WINCE)
@@ -727,17 +748,18 @@ static void setup()
(void)new QUtf32Codec;
(void)new QUtf32BECodec;
(void)new QUtf32LECodec;
+#ifndef Q_OS_SYMBIAN
(void)new QLatin15Codec;
+#endif
(void)new QLatin1Codec;
(void)new QUtf8Codec;
- for (int i = 0; i < QSimpleTextCodec::numSimpleCodecs; ++i)
- (void)new QSimpleTextCodec(i);
-
+#ifndef Q_OS_SYMBIAN
#if defined(Q_OS_UNIX) && !defined(QT_NO_ICONV) && !defined(QT_BOOTSTRAPPED)
// QIconvCodec depends on the UTF-16 codec, so it needs to be created last
(void) new QIconvCodec();
#endif
+#endif
if (!localeMapper)
setupLocaleMapper();
@@ -1124,6 +1146,9 @@ QList<int> QTextCodec::availableMibs()
*/
void QTextCodec::setCodecForLocale(QTextCodec *c)
{
+#ifndef QT_NO_THREAD
+ QMutexLocker locker(textCodecsMutex());
+#endif
localeMapper = c;
if (!localeMapper)
setupLocaleMapper();
diff --git a/src/corelib/codecs/qtextcodec_symbian.cpp b/src/corelib/codecs/qtextcodec_symbian.cpp
new file mode 100644
index 0000000..e4db9d7
--- /dev/null
+++ b/src/corelib/codecs/qtextcodec_symbian.cpp
@@ -0,0 +1,678 @@
+/****************************************************************************
+**
+** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qtextcodec_p.h"
+
+#include <private/qcore_symbian_p.h>
+#include <QThreadStorage>
+#include <QScopedPointer>
+
+#include <charconv.h>
+
+struct QSymbianCodecInitData {
+ uint charsetId;
+ int mib;
+ const char *aliases;
+};
+
+/* This table contains the known Symbian codecs aliases. It is ordered by charsetId.
+ It is required as symbian does not provide have aliases.
+ */
+static const QSymbianCodecInitData codecsData[] = {
+ { /*268439485*/ KCharacterSetIdentifierShiftJis, 17, "Shift_JIS\0MS_Kanji\0csShiftJIS\0MS_KANJI\0SJIS\0" },
+ { /*268439486*/ KCharacterSetIdentifierGb2312, 57, "GB2312\0csGB2312\0CN-GB\0EUC-CN\0" },
+ { /*268439487*/ KCharacterSetIdentifierBig5, 2026, "Big5\0csBig5\0Big5-ETen\0CP950\0BIG-FIVE\0CN-BIG5\0" },
+ { /*268440246*/ KCharacterSetIdentifierCodePage1252, 2252, "windows-1252\0Code Page 1252\0CP1252\0MS-ANSI\0" },
+// { /*268450576*/ KCharacterSetIdentifierIso88591, 4, "ISO-8859-1\0ISO_8859-1:1987\0iso-ir-100\0ISO_8859-1\0latin1\0l1\0IBM819\0CP819\0csISOLatin1\0ISO-IR-100\0ISO8859-1\0L1\0LATIN1\0CSISOLATIN1\0" },
+ { /*268451531*/ KCharacterSetIdentifierGbk, 113, "GBK\0MS936\0windows-936\0CP936\0" },
+ { /*268451866*/ KCharacterSetIdentifierGb12345, 0, "GB12345\0" },
+ { /*268455110*/ KCharacterSetIdentifierAscii, 3, "US-ASCII\0ANSI_X3.4-1968\0iso-ir-6\0ANSI_X3.4-1986\0ISO_646.irv:1991\0ASCII\0ISO646-US\0us\0IBM367\0cp367\0csASCII\0ISO-IR-6\0ISO_646.IRV:1991\0"},
+ { /*268456062*/ KCharacterSetIdentifierIso88592, 5, "ISO-8859-2\0ISO_8859-2:1987\0iso-ir-101\0latin2\0l2\0csISOLatin2\0" },
+ { /*268456063*/ KCharacterSetIdentifierIso88594, 7, "ISO-8859-4\0ISO_8859-4:1988\0iso-ir-110\0latin4\0l4\0csISOLatin4\0" },
+ { /*268456064*/ KCharacterSetIdentifierIso88595, 8, "ISO-8859-5\0ISO_8859-5:1988\0iso-ir-144\0cyrillic\0csISOLatinCyrillic\0" },
+ { /*268456065*/ KCharacterSetIdentifierIso88597, 10, "ISO-8859-7\0ISO_8859-7:1987\0iso-ir-126\0ELOT_928\0ECMA-118\0greek\0greek8\0csISOLatinGreek\0" },
+ { /*268456066*/ KCharacterSetIdentifierIso88599, 12, "ISO-8859-9\0ISO_8859-9:1989\0iso-ir-148\0latin5\0l5\0csISOLatin5\0" },
+ { /*268456875*/ KCharacterSetIdentifierSms7Bit, 0, "SMS 7-bit\0" },
+ { /*268458028*/ KCharacterSetIdentifierUtf7, 103, "UTF-7\0UNICODE-1-1-UTF-7\0CSUNICODE11UTF7\0" },
+// { /*268458029*/ KCharacterSetIdentifierUtf8, 106, "UTF-8\0" },
+ { /*268458030*/ KCharacterSetIdentifierImapUtf7, 0, "IMAP UTF-7\0" },
+ { /*268458031*/ KCharacterSetIdentifierJavaConformantUtf8, 0, "JAVA UTF-8\0" },
+ { /*268458454*/ 268458454, 2250, "Windows-1250\0CP1250\0MS-EE\0" },
+ { /*268458455*/ 268458455, 2251, "Windows-1251\0CP1251\0MS-CYRL\0" },
+ { /*268458456*/ 268458456, 2253, "Windows-1253\0CP1253\0MS-GREEK\0" },
+ { /*268458457*/ 268458457, 2254, "Windows-1254\0CP1254\0MS-TURK\0" },
+ { /*268458458*/ 268458458, 2257, "Windows-1257\0CP1257\0WINBALTRIM\0" },
+ { /*268460133*/ KCharacterSetIdentifierHz, 2085, "HZ-GB-2312\0HZ\0" },
+ { /*268460134*/ KCharacterSetIdentifierJis, 16, "JIS_Encoding\0JIS\0" },
+ { /*268460135*/ KCharacterSetIdentifierEucJpPacked, 18, "EUC-JP\0Extended_UNIX_Code_Packed_Format_for_Japanese\0csEUCPkdFmtJapanese\0EUCJP_PACKED\0" },
+ { /*268461728*/ KCharacterSetIdentifierIso2022Jp, 39, "ISO-2022-JP\0csISO2022JP\0JIS7\0" },
+ { /*268461731*/ KCharacterSetIdentifierIso2022Jp1, 0, "ISO2022JP1\0" },
+ { /*268470824*/ KCharacterSetIdentifierIso88593, 6, "ISO-8859-3\0ISO_8859-3:1988\0iso-ir-109\0latin3\0l3\0csISOLatin3\0" },
+ { /*268470825*/ KCharacterSetIdentifierIso88596, 9, "ISO-8859-6\0ISO_8859-6:1987\0iso-ir-127\0ECMA-114\0ASMO-708\0arabic\0ISO88596\0csISOLatinArabic\0ARABIC\0" },
+ { /*268470826*/ KCharacterSetIdentifierIso88598, 11, "ISO-8859-8\0ISO_8859-8:1988\0iso-ir-138\0hebrew\0csISOLatinHebrew\0" },
+ { /*268470827*/ KCharacterSetIdentifierIso885910, 13, "ISO-8859-10\0iso-ir-157\0l6\0ISO_8859-10:1992\0csISOLatin6\0latin6\0" },
+ { /*268470828*/ KCharacterSetIdentifierIso885913, 109, "ISO-8859-13\0ISO885913\0ISO-IR-179\0ISO8859-13\0L7\0LATIN7\0CSISOLATIN7\0" },
+ { /*268470829*/ KCharacterSetIdentifierIso885914, 110, "ISO-8859-14\0iso-ir-199\0ISO_8859-14:1998\0latin8\0iso-celtic\0l8\0" },
+ { /*268470830*/ KCharacterSetIdentifierIso885915, 111, "ISO-8859-15\0latin-9\0ISO-IR-203\0" },
+// { /*270483374*/ KCharacterSetIdentifierUnicodeLittle, 1014, "UTF-16LE\0Little-Endian UNICODE\0" },
+// { /*270483538*/ KCharacterSetIdentifierUnicodeBig, 1013, "UTF-16BE\0Big-Endian UNICODE\0" },
+ { /*270501191*/ 270501191, 2255, "Windows-1255\0CP1255\0MS-HEBR\0" },
+ { /*270501192*/ 270501192, 2256, "Windows-1256\0CP1256\0MS-ARAB\0" },
+ { /*270501193*/ 270501193, 2259, "TIS-620\0ISO-IR-166\0TIS620-0\0TIS620.2529-1\0TIS620.2533-0\0TIS620.2533-1\0" },
+ { /*270501194*/ 270501194, 0, "windows-874\0CP874\0IBM874\0" },
+ { /*270501325*/ 270501325, 0, "SmsStrict\0" },
+ { /*270501521*/ 270501521, 0, "ShiftJisDirectmap\0" },
+ { /*270501542*/ 270501542, 0, "EucJpDirectmap\0" },
+ /* 270501691 (duplicate) Windows-1252 | windows-1252 |Windows-1252 |Code Page 1252 |CP1252 |MS-ANSI |WINDOWS-1252 |2252 */
+ { /*270501729*/ 270501729, 2088, "KOI8-U\0" },
+ { /*270501752*/ 270501752, 2084, "KOI8-R\0csKOI8R\0" },
+ { /*270529682*/ 270529682, 1000, "ISO-10646-UCS-2\0UCS-2\0CSUNICODE\0" },
+ { /*270562232*/ 270562232, 2258, "Windows-1258\0CP1258\0WINDOWS-1258\0" },
+ { /*270586888*/ 270586888, 0, "J5\0" },
+ { /*271011982*/ 271011982, 0, "ISCII\0" },
+ { /*271066541*/ 271066541, 2009, "CP850\0IBM850\0""850\0csPC850Multilingual\0" },
+ { /*271082493*/ 271082493, 0, "EXTENDED_SMS_7BIT\0" },
+ { /*271082494*/ 271082494, 0, "gsm7_turkish_single\0" },
+ { /*271082495*/ 271082495, 0, "turkish_locking_gsm7ext\0" },
+ { /*271082496*/ 271082496, 0, "turkish_locking_single\0" },
+ { /*536929574*/ 536929574, 38, "EUC-KR\0" },
+ { /*536936703*/ 536936703, 0, "CP949\0" },
+ { /*536936705*/ 536936705, 37, "ISO-2022-KR\0" },
+ { /*536941517*/ 536941517, 36, "KS_C_5601-1987\0" }
+ };
+
+
+class QSymbianTextCodec : public QTextCodec
+{
+public:
+ QString convertToUnicode(const char*, int, ConverterState*) const;
+ QByteArray convertFromUnicode(const QChar*, int, ConverterState*) const;
+ QList<QByteArray> aliases() const;
+ QByteArray name() const;
+ int mibEnum() const;
+
+ explicit QSymbianTextCodec(uint charsetId, int staticIndex = -1) : m_charsetId(charsetId), m_staticIndex(staticIndex) { }
+
+ static QSymbianTextCodec *init();
+ static QSymbianTextCodec *localeMapper;
+private:
+ static CCnvCharacterSetConverter *converter();
+ static uint getLanguageDependentCharacterSet();
+ uint m_charsetId;
+ int m_staticIndex;
+};
+
+QSymbianTextCodec *QSymbianTextCodec::localeMapper = 0;
+
+class QSymbianTextCodecWithName : public QSymbianTextCodec
+{
+public:
+ QSymbianTextCodecWithName(uint charsetId, const QByteArray &name)
+ : QSymbianTextCodec(charsetId) , m_name(name) { }
+ QByteArray name() const { return m_name; }
+ QList<QByteArray> aliases() const { return QList<QByteArray>(); }
+private:
+ QByteArray m_name;
+};
+
+Q_GLOBAL_STATIC(QThreadStorage<CCnvCharacterSetConverter *>,gs_converterStore);
+
+CCnvCharacterSetConverter *QSymbianTextCodec::converter()
+{
+ CCnvCharacterSetConverter *&conv = gs_converterStore()->localData();
+ if (!conv)
+ QT_TRAP_THROWING(conv = CCnvCharacterSetConverter::NewL())
+ return conv;
+}
+
+
+QByteArray QSymbianTextCodec::name() const
+{
+ if (m_staticIndex >= 0)
+ return QByteArray(codecsData[m_staticIndex].aliases);
+ QScopedPointer<HBufC8> buf;
+ QT_TRAP_THROWING(buf.reset(converter()->ConvertCharacterSetIdentifierToStandardNameL(m_charsetId, qt_s60GetRFs())))
+ if (buf)
+ return QByteArray(reinterpret_cast<const char *>(buf->Ptr()), buf->Length());
+ return QByteArray();
+}
+
+int QSymbianTextCodec::mibEnum() const
+{
+ if (m_staticIndex >= 0)
+ return codecsData[m_staticIndex].mib;
+ int mib;
+ QT_TRAP_THROWING(mib = converter()->ConvertCharacterSetIdentifierToMibEnumL(m_charsetId, qt_s60GetRFs()))
+ return mib;
+}
+
+QList<QByteArray> QSymbianTextCodec::aliases() const
+{
+ QList<QByteArray> result;
+ if (m_staticIndex >= 0) {
+ const char *aliases = codecsData[m_staticIndex].aliases;
+ aliases += strlen(aliases) + 1;
+ while (*aliases) {
+ int len = strlen(aliases);
+ result += QByteArray(aliases, len);
+ aliases += len + 1;
+ }
+ }
+ return result;
+}
+
+
+QString QSymbianTextCodec::convertToUnicode(const char *str, int len, ConverterState *state) const
+{
+ uint charsetId = m_charsetId;
+
+ // no support for utf7 with state
+ if (state && (charsetId == KCharacterSetIdentifierUtf7 ||
+ charsetId == KCharacterSetIdentifierImapUtf7)) {
+ return QString();
+ }
+ CCnvCharacterSetConverter *converter = QSymbianTextCodec::converter();
+ if (!str) {
+ return QString();
+ }
+
+ //Search the character set array containing all of the character sets for which conversion is available
+ CCnvCharacterSetConverter::TAvailability av;
+ QT_TRAP_THROWING(av = converter->PrepareToConvertToOrFromL(charsetId, qt_s60GetRFs()))
+ if (av == CCnvCharacterSetConverter::ENotAvailable) {
+ return QString();
+ }
+
+ char *str2;
+ int len2;
+ QByteArray helperBA;
+ if (state && (state->remainingChars > 0)) {
+ // we should prepare the input string ourselves
+ // the real size
+ len2 = len + state->remainingChars;
+ helperBA.resize(len2);
+ str2 = helperBA.data();
+ if (state->remainingChars > 3) { // doesn't happen usually
+ memcpy(str2, state->d, state->remainingChars);
+ qFree(state->d);
+ state->d = 0;
+ } else {
+ char charTbl[3];
+ charTbl[0] = state->state_data[0];
+ charTbl[1] = state->state_data[1];
+ charTbl[2] = state->state_data[2];
+ memcpy(str2, charTbl, state->remainingChars);
+ }
+ memcpy(str2+state->remainingChars, str, len);
+ }
+ else {
+ len2 = len;
+ str2 = const_cast<char*>(str);
+ }
+
+ QString UnicodeText(len2, Qt::Uninitialized);
+ TPtrC8 remainderOfForeignText;
+ remainderOfForeignText.Set(reinterpret_cast<const unsigned char *>(str2), len2);
+
+ int numberOfUnconvertibleCharacters = 0;
+ int indexOfFirstUnconvertibleCharacter;
+
+ // Use null character as replacement, if it is asked
+ bool convertToNull = (state && (state->flags & QTextCodec::ConvertInvalidToNull));
+ if (convertToNull) {
+ _LIT8(KReplacement, "\x00");
+ QT_TRAP_THROWING(converter->SetReplacementForUnconvertibleUnicodeCharactersL(KReplacement))
+ }
+ // use state->invalidChars for keeping symbian state
+ int sState = CCnvCharacterSetConverter::KStateDefault;
+ if (state && (state->invalidChars != CCnvCharacterSetConverter::KStateDefault)) {
+ sState = state->invalidChars;
+ }
+ //Convert text encoded in a non-Unicode character set into the Unicode character set (UCS-2).
+ int remainingChars = -1;
+ int initial_size=0;
+ while (1) {
+ TPtr16 UnicodePtr(reinterpret_cast<unsigned short *>(UnicodeText.data()+initial_size), UnicodeText.size());
+ QT_TRAP_THROWING(remainingChars = converter->ConvertToUnicode(UnicodePtr,
+ remainderOfForeignText,
+ sState,
+ numberOfUnconvertibleCharacters,
+ indexOfFirstUnconvertibleCharacter))
+
+ initial_size += UnicodePtr.Length();
+ // replace 0xFFFD with 0x0000 and only if state set to convert to it
+ if (numberOfUnconvertibleCharacters>0 && convertToNull) {
+ int len2 = UnicodePtr.Length();
+ for (int i = indexOfFirstUnconvertibleCharacter; i < len2; i++) {
+ UnicodePtr[i] = 0x0000;
+ }
+ }
+ // success
+ if (remainingChars==KErrNone) {
+ break;
+ }
+ // if ConvertToUnicode could not consume the foreign text at all
+ // UTF-8: EErrorIllFormedInput = KErrCorrupt
+ // UCS-2: KErrNotFound
+ if (remainingChars == CCnvCharacterSetConverter::EErrorIllFormedInput ||
+ remainingChars == KErrNotFound) {
+ remainingChars = remainderOfForeignText.Size();
+ break;
+ }
+ else {
+ if (remainingChars < 0) {
+ return QString();
+ }
+ }
+ //
+ UnicodeText.resize(UnicodeText.size() + remainingChars*2);
+ remainderOfForeignText.Set(reinterpret_cast<const unsigned char *>(str2+len2-remainingChars), remainingChars);
+ }
+ // save symbian state
+ if (state) {
+ state->invalidChars = sState;
+ }
+
+ if (remainingChars > 0) {
+ if (!state) {
+ // No way to signal, if there is still remaining chars, for ex. UTF-8 still can have
+ // some characters hanging around.
+ return QString();
+ }
+ const unsigned char *charPtr = remainderOfForeignText.Right(remainingChars).Ptr();
+ if (remainingChars > 3) { // doesn't happen usually
+ state->d = (void*)qMalloc(remainingChars);
+ if (!state->d)
+ return QString();
+ // copy characters there
+ memcpy(state->d, charPtr, remainingChars);
+ }
+ else {
+ // fallthru is correct
+ switch (remainingChars) {
+ case 3:
+ state->state_data[2] = charPtr[2];
+ case 2:
+ state->state_data[1] = charPtr[1];
+ case 1:
+ state->state_data[0] = charPtr[0];
+ }
+ }
+ state->remainingChars = remainingChars;
+ }
+ else {
+ if (state) {
+ // If we continued from an earlier iteration
+ state->remainingChars = 0;
+ }
+ }
+ // check if any ORIGINAL headers should be left
+ if (initial_size > 0) {
+ if (!state || (state && !(state->flags & QTextCodec::IgnoreHeader))) {
+ // always skip headers on following state loops
+ if (state) {
+ state->flags |= QTextCodec::IgnoreHeader;
+ }
+ const TUint16 *ptr = reinterpret_cast<const TUint16 *>(UnicodeText.data());
+ if (ptr[0] == QChar::ByteOrderMark || ptr[0] == QChar::ByteOrderSwapped) {
+ return UnicodeText.mid(1, initial_size - 1);
+ }
+ }
+ }
+ if (initial_size >= 0) {
+ UnicodeText.resize(initial_size);
+ return UnicodeText;
+ }
+ else {
+ return QString();
+ }
+}
+
+
+QByteArray QSymbianTextCodec::convertFromUnicode(const QChar *str, int len, ConverterState *state) const
+{
+ uint charsetId = m_charsetId;
+ CCnvCharacterSetConverter *converter = QSymbianTextCodec::converter();
+ if (!str)
+ return QByteArray();
+
+ if (len == 0)
+ return QByteArray();
+
+ // no support for utf7 with state
+ if (state && (charsetId == KCharacterSetIdentifierUtf7 ||
+ charsetId == KCharacterSetIdentifierImapUtf7))
+ return QByteArray();
+
+ //Get reference file session from backend
+ RFs &fileSession = qt_s60GetRFs();
+
+ //Search the character set array containing all of the character sets for which conversion is available
+ CCnvCharacterSetConverter::TAvailability av = CCnvCharacterSetConverter::ENotAvailable;
+ QT_TRAP_THROWING(av = converter->PrepareToConvertToOrFromL(charsetId, fileSession))
+ if (av == CCnvCharacterSetConverter::ENotAvailable)
+ return QByteArray();
+
+ // Use null character as replacement, if it is asked
+ if (state && (state->flags & QTextCodec::ConvertInvalidToNull)) {
+ _LIT8(KReplacement, "\x00");
+ QT_TRAP_THROWING(converter->SetReplacementForUnconvertibleUnicodeCharactersL(KReplacement))
+ }
+ else {
+ _LIT8(KReplacement, "?");
+ QT_TRAP_THROWING(converter->SetReplacementForUnconvertibleUnicodeCharactersL(KReplacement))
+ }
+ QByteArray outputBuffer;
+
+ // add header if no state (one run), or if no ignoreheader (from first state)
+ int bomofs = 0;
+ if (!state || (state && !(state->flags & QTextCodec::IgnoreHeader))) {
+
+ QChar bom(QChar::ByteOrderMark);
+
+ if (state)
+ state->flags |= QTextCodec::IgnoreHeader; // bom handling only on first state
+
+ switch (charsetId) {
+ case KCharacterSetIdentifierUcs2:
+ outputBuffer.append(bom.row());
+ outputBuffer.append(bom.cell());
+ bomofs = 2;
+ break;
+
+ case KCharacterSetIdentifierUtf8: // we don't add bom for UTF-8
+ case KCharacterSetIdentifierJavaConformantUtf8:
+ /*outputBuffer.append("\xef\xbb\xbf");
+ bomofs = 3;
+ */
+ break;
+
+ case KCharacterSetIdentifierUnicodeLittle:
+ outputBuffer.append(bom.cell());
+ outputBuffer.append(bom.row());
+ bomofs = 2;
+ break;
+
+ case KCharacterSetIdentifierUnicodeBig:
+ outputBuffer.append(bom.row());
+ outputBuffer.append(bom.cell());
+ bomofs = 2;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ // len is 16bit chars, reserve 3 8bit chars for each input char
+ // jsz - it could be differentiated, to allocate less
+ outputBuffer.resize(len * 3 + bomofs);
+
+ // loop for too short output buffer
+ int unconverted;
+ int numberOfUnconvertibleCharacters = len;
+ int indexOfFirstUnconvertibleCharacter;
+ int convertedSize;
+ int lastUnconverted = 0;
+ int initial_size=0;
+ int remainderToConvert = len;
+ while (1) {
+ TPtr8 outputPtr(reinterpret_cast<unsigned char *>(outputBuffer.data() + bomofs + initial_size), outputBuffer.size() - bomofs);
+
+ TPtrC16 UnicodeText(reinterpret_cast<const unsigned short *>(str+len-remainderToConvert), remainderToConvert);
+
+ //Convert text encoded in the Unicode character set (UCS-2) into other character sets
+ unconverted = -1;
+ QT_TRAP_THROWING( unconverted = converter->ConvertFromUnicode(outputPtr,
+ UnicodeText,
+ numberOfUnconvertibleCharacters,
+ indexOfFirstUnconvertibleCharacter))
+ initial_size += outputPtr.Length();
+ if (unconverted < 0) {
+ return QByteArray();
+ }
+
+
+ if (unconverted == 0 ) {
+ convertedSize = initial_size;
+ break;
+ }
+
+ // check what means unconverted > 0
+ if (indexOfFirstUnconvertibleCharacter<0) {
+ // 8859-6 and 8859-8 break with certain input (string of \xc0 - \xd9 converted to unicode and back)
+ if (unconverted == lastUnconverted) {
+ return QByteArray();
+ }
+ lastUnconverted = unconverted;
+ }
+ else {
+ // were some character not possible to convert
+
+ }
+ remainderToConvert = unconverted; // len - indexOfFirstUnconvertibleCharacter;
+ // resize output buffer, use =op for the null check
+ outputBuffer.resize(outputBuffer.size() + remainderToConvert * 3 + bomofs);
+ };
+
+ // shorten output
+ outputBuffer.resize(convertedSize + bomofs);
+
+ if (state) {
+ state->invalidChars = numberOfUnconvertibleCharacters;
+
+ // check if any Symbian CONVERTED headers should be removed
+ if (state->flags & QTextCodec::IgnoreHeader && state->state_data[0] == 0) {
+
+ state->state_data[0] = 0xff; // bom handling only on first state
+
+ if (charsetId == KCharacterSetIdentifierUcs2 && outputBuffer.size() > 1) {
+
+ QChar bom(QChar::ByteOrderMark);
+ if (outputBuffer.at(0) == bom.row() && outputBuffer.at(1) == bom.cell()) {
+ outputBuffer.remove(0, 2);
+ } else if (outputBuffer.at(0) == bom.cell() && outputBuffer.at(1) == bom.row()) {
+ outputBuffer.remove(0, 2);
+ }
+
+ } else if ((charsetId == KCharacterSetIdentifierUtf8 ||
+ charsetId == KCharacterSetIdentifierJavaConformantUtf8) &&
+ outputBuffer.size() > 2) {
+ if (outputBuffer.at(0) == 0xef && outputBuffer.at(1) == 0xbb && outputBuffer.at(2) == 0xbf) {
+ outputBuffer.remove(0, 3);
+ }
+
+ } else if (charsetId == KCharacterSetIdentifierUnicodeLittle &&
+ outputBuffer.size() > 1) {
+
+ QChar bom(QChar::ByteOrderMark);
+ if (outputBuffer.at(0) == bom.row() && outputBuffer.at(1) == bom.cell()) {
+ outputBuffer.remove(0, 2);
+ }
+
+ } else if (charsetId == KCharacterSetIdentifierUnicodeBig &&
+ outputBuffer.size() > 1) {
+
+ QChar bom(QChar::ByteOrderSwapped);
+ if (outputBuffer.at(0) == bom.row() && outputBuffer.at(1) == bom.cell()) {
+ outputBuffer.remove(0, 2);
+ }
+ }
+
+ }
+ }
+
+ return outputBuffer;
+}
+
+
+uint QSymbianTextCodec::getLanguageDependentCharacterSet()
+{
+ TLanguage lang = User::Language();
+
+ uint langIndex = 0;
+
+ switch (lang) {
+ case 14: //ELangTurkish
+ langIndex = KCharacterSetIdentifierIso88599; break;
+ case 16: //ELangRussian
+ langIndex = KCharacterSetIdentifierIso88595; break;
+ case 17: //ELangHungarian
+ langIndex = KCharacterSetIdentifierIso88592; break;
+ case 25: //ELangCzec
+ case 26: //ELangSlovak
+ case 27: //ELangPolish
+ case 28: //ELangSlovenian
+ langIndex = KCharacterSetIdentifierIso88592; break;
+ case 29: //ELangTaiwanChinese
+ case 30: //ELangHongKongChinese
+ langIndex = KCharacterSetIdentifierBig5; break;
+ case 31: //ELangPrcChinese
+ langIndex = KCharacterSetIdentifierGbk; break;
+ case 32: //ELangJapanese
+ langIndex = KCharacterSetIdentifierShiftJis; break;
+ case 33: //ELangThai
+ langIndex = 270501193 /*KCharacterSetIdentifierTis620*/; break;
+ case 37: //ELangArabic
+ langIndex = KCharacterSetIdentifierIso88596; break;
+ case 40: //ELangBelarussian
+ case 42: //ELangBulgarian
+ langIndex = KCharacterSetIdentifierIso88595; break;
+ case 45: //ELangCroatian
+ langIndex = KCharacterSetIdentifierIso88592; break;
+ case 49: //ELangEstonian
+ langIndex = KCharacterSetIdentifierIso88594; break;
+ case 54: //ELangGreek
+ case 55: //ELangCyprusGreek
+ langIndex = KCharacterSetIdentifierIso88597; break;
+ case 57: //ELangHebrew
+ langIndex = KCharacterSetIdentifierIso88598; break;
+ case 58: //ELangHindi
+ langIndex = 271011982/*KCharacterSetIdentifierIscii*/; break;
+ case 67: //ELangLatvian
+ case 68: //ELangLithuanian
+ langIndex = KCharacterSetIdentifierIso88594; break;
+ case 69: //ELangMacedonian
+ langIndex = KCharacterSetIdentifierIso88595; break;
+ case 78: //ELangRomanian
+ langIndex = KCharacterSetIdentifierIso88592; break;
+ case 79: //ELangSerbian
+ langIndex = KCharacterSetIdentifierIso88592; break;
+ case 91: //ELangCyprusTurkish
+ langIndex = KCharacterSetIdentifierIso88599; break;
+ case 93: //ELangUkrainian
+ langIndex = KCharacterSetIdentifierIso88595; break;
+ case 94: //ELangUrdu
+ langIndex = KCharacterSetIdentifierIso88596; break;
+ case 157: //ELangEnglish_Taiwan
+ case 158: //ELangEnglish_HongKong
+ langIndex = KCharacterSetIdentifierBig5; break;
+ case 159: //ELangEnglish_Prc
+ langIndex = KCharacterSetIdentifierGbk; break;
+ case 160:
+ langIndex = KCharacterSetIdentifierShiftJis; break;
+ case 161: //ELangEnglish_Thailand
+ langIndex = 270501193/*KCharacterSetIdentifierTis620*/; break;
+ }
+
+ if (langIndex > 0) {
+ return langIndex;
+ }
+ return KCharacterSetIdentifierCodePage1252;
+}
+
+/* Create the codecs that have aliases and return the locale mapper*/
+QSymbianTextCodec *QSymbianTextCodec::init()
+{
+ const uint localeMapperId = getLanguageDependentCharacterSet();
+ QScopedPointer<CArrayFix<CCnvCharacterSetConverter::SCharacterSet> > array;
+ QT_TRAP_THROWING(array.reset(CCnvCharacterSetConverter::CreateArrayOfCharacterSetsAvailableL(qt_s60GetRFs())))
+ CCnvCharacterSetConverter *converter = QSymbianTextCodec::converter();
+ int count = array->Count();
+ for (int i = 0; i < count; i++) {
+ int charsetId = array->At(i).Identifier();
+
+ // skip builtin Qt codecs
+ if (charsetId == KCharacterSetIdentifierUtf8 || charsetId == KCharacterSetIdentifierUnicodeLittle
+ || charsetId == KCharacterSetIdentifierUnicodeLittle || charsetId == KCharacterSetIdentifierUnicodeBig
+ || charsetId == KCharacterSetIdentifierIso88591
+ || charsetId == 270501691 /* skip Windows-1252 duplicate*/) {
+ continue;
+ }
+
+ int begin = 0;
+ int n = sizeof(codecsData) / sizeof(codecsData[0]);
+ int half;
+
+ while (n > 0) {
+ half = n >> 1;
+ int middle = begin + half;
+ if (codecsData[middle].charsetId < charsetId) {
+ begin = middle + 1;
+ n -= half + 1;
+ } else {
+ n = half;
+ }
+ }
+ if (codecsData[begin].charsetId == charsetId) {
+ QSymbianTextCodec *c = new QSymbianTextCodec(charsetId, begin);
+ if (charsetId == localeMapperId)
+ localeMapper = c;
+ } else {
+ QScopedPointer<HBufC8> buf;
+ QT_TRAP_THROWING(buf.reset(converter->ConvertCharacterSetIdentifierToStandardNameL(charsetId, qt_s60GetRFs())))
+ QByteArray name;
+ if (buf && buf->Length()) {
+ name = QByteArray(reinterpret_cast<const char *>(buf->Ptr()), buf->Length());
+ } else {
+ TPtrC charSetName = array->At(i).NameIsFileName() ? TParsePtrC(array->At(i).Name()).Name() : array->At(i).Name();
+ int len = charSetName.Length();
+ QString str;
+ str.setUnicode(reinterpret_cast<const QChar*>(charSetName.Ptr()), len);
+ name = str.toLatin1();
+ }
+ if (!name.isEmpty())
+ new QSymbianTextCodecWithName(charsetId, name);
+ }
+
+ }
+ return localeMapper;
+}
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 7655c51..f747bf7 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -48,6 +48,19 @@ QT_BEGIN_NAMESPACE
enum { Endian = 0, Data = 1 };
+static inline bool isUnicodeNonCharacter(uint ucs4)
+{
+ // Unicode has a couple of "non-characters" that one can use internally,
+ // but are not allowed to be used for text interchange.
+ //
+ // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
+ // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
+ // U+FDEF (inclusive)
+
+ return (ucs4 & 0xfffe) == 0xfffe
+ || (ucs4 - 0xfdd0U) < 16;
+}
+
QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
{
uchar replacement = '?';
@@ -106,16 +119,17 @@ QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conve
if (u < 0x0800) {
*cursor++ = 0xc0 | ((uchar) (u >> 6));
} else {
+ // is it one of the Unicode non-characters?
+ if (isUnicodeNonCharacter(u)) {
+ *cursor++ = replacement;
+ ++ch;
+ ++invalid;
+ continue;
+ }
+
if (u > 0xffff) {
- // see QString::fromUtf8() and QString::utf8() for explanations
- if (u > 0x10fe00 && u < 0x10ff00) {
- *cursor++ = (u - 0x10fe00);
- ++ch;
- continue;
- } else {
- *cursor++ = 0xf0 | ((uchar) (u >> 18));
- *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
- }
+ *cursor++ = 0xf0 | ((uchar) (u >> 18));
+ *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
} else {
*cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f);
}
@@ -179,15 +193,16 @@ QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::Converte
--need;
if (!need) {
// utf-8 bom composes into 0xfeff code point
+ bool nonCharacter;
if (!headerdone && uc == 0xfeff) {
// dont do anything, just skip the BOM
- } else if (uc > 0xffff && uc < 0x110000) {
+ } else if (!(nonCharacter = isUnicodeNonCharacter(uc)) && uc > 0xffff && uc < 0x110000) {
// surrogate pair
Q_ASSERT((qch - (ushort*)result.unicode()) + 2 < result.length());
*qch++ = QChar::highSurrogate(uc);
*qch++ = QChar::lowSurrogate(uc);
- } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- // error: overlong sequence, UTF16 surrogate or BOM
+ } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || nonCharacter || uc >= 0x110000) {
+ // error: overlong sequence, UTF16 surrogate or non-character
*qch++ = replacement;
++invalid;
} else {