summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorDenis Dzyubenko <denis.dzyubenko@nokia.com>2009-07-09 14:07:50 (GMT)
committerDenis Dzyubenko <denis.dzyubenko@nokia.com>2009-07-16 10:09:40 (GMT)
commitd124bcf70a8e167ebf997ac2a4222623a5a9acdf (patch)
tree5c4a548573dbe8efd2486af162421af7fe18bf9a /src/corelib
parentdbb3062a06c3ea6d3cb5b6536cee7b2e4b99b677 (diff)
downloadQt-d124bcf70a8e167ebf997ac2a4222623a5a9acdf.zip
Qt-d124bcf70a8e167ebf997ac2a4222623a5a9acdf.tar.gz
Qt-d124bcf70a8e167ebf997ac2a4222623a5a9acdf.tar.bz2
Changed the implementation of the unicode text codecs to share more code with qstring.
The qstring unicode conversion functions used to have its own implementation, which did the same as QUtf*Codecs, so with the change all of them will share the same implementation. Reviewed-by: Thiago Macieira
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/codecs/qtextcodec_p.h27
-rw-r--r--src/corelib/codecs/qutfcodec.cpp321
-rw-r--r--src/corelib/codecs/qutfcodec_p.h53
-rw-r--r--src/corelib/tools/qstring.cpp90
4 files changed, 241 insertions, 250 deletions
diff --git a/src/corelib/codecs/qtextcodec_p.h b/src/corelib/codecs/qtextcodec_p.h
index 499c0f9..5c82735 100644
--- a/src/corelib/codecs/qtextcodec_p.h
+++ b/src/corelib/codecs/qtextcodec_p.h
@@ -77,6 +77,33 @@ struct QTextCodecUnalignedPointer
}
};
+#else
+
+class QTextCodec
+{
+public:
+ enum ConversionFlag {
+ DefaultConversion,
+ ConvertInvalidToNull = 0x80000000,
+ IgnoreHeader = 0x1,
+ FreeFunction = 0x2
+ };
+ Q_DECLARE_FLAGS(ConversionFlags, ConversionFlag)
+
+ struct ConverterState {
+ ConverterState(ConversionFlags f = DefaultConversion)
+ : flags(f), remainingChars(0), invalidChars(0), d(0) { state_data[0] = state_data[1] = state_data[2] = 0; }
+ ~ConverterState() { }
+ ConversionFlags flags;
+ int remainingChars;
+ int invalidChars;
+ uint state_data[3];
+ void *d;
+ private:
+ Q_DISABLE_COPY(ConverterState)
+ };
+};
+
#endif //QT_NO_TEXTCODEC
QT_END_NAMESPACE
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index abae6f7..d111660 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -44,23 +44,19 @@
#include "qendian.h"
#include "qchar.h"
-#ifndef QT_NO_TEXTCODEC
-
QT_BEGIN_NAMESPACE
-QUtf8Codec::~QUtf8Codec()
-{
-}
+enum { Endian = 0, Data = 1 };
-QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
{
uchar replacement = '?';
int rlen = 3*len;
int surrogate_high = -1;
if (state) {
- if (state->flags & ConvertInvalidToNull)
+ if (state->flags & QTextCodec::ConvertInvalidToNull)
replacement = 0;
- if (!(state->flags & IgnoreHeader))
+ if (!(state->flags & QTextCodec::IgnoreHeader))
rlen += 3;
if (state->remainingChars)
surrogate_high = state->state_data[0];
@@ -71,7 +67,7 @@ QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterSta
uchar* cursor = (uchar*)rstr.data();
const QChar *ch = uc;
int invalid = 0;
- if (state && !(state->flags & IgnoreHeader)) {
+ if (state && !(state->flags & QTextCodec::IgnoreHeader)) {
*cursor++ = 0xef;
*cursor++ = 0xbb;
*cursor++ = 0xbf;
@@ -133,7 +129,7 @@ QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterSta
rstr.resize(cursor - (const uchar*)rstr.constData());
if (state) {
state->invalidChars += invalid;
- state->flags |= IgnoreHeader;
+ state->flags |= QTextCodec::IgnoreHeader;
state->remainingChars = 0;
if (surrogate_high >= 0) {
state->remainingChars = 1;
@@ -143,7 +139,7 @@ QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterSta
return rstr;
}
-void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, ConverterState *state) const
+QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state)
{
bool headerdone = false;
QChar replacement = QChar::ReplacementCharacter;
@@ -152,9 +148,9 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C
uint uc = 0;
uint min_uc = 0;
if (state) {
- if (state->flags & IgnoreHeader)
+ if (state->flags & QTextCodec::IgnoreHeader)
headerdone = true;
- if (state->flags & ConvertInvalidToNull)
+ if (state->flags & QTextCodec::ConvertInvalidToNull)
replacement = QChar::Null;
need = state->remainingChars;
if (need) {
@@ -170,10 +166,8 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C
headerdone = true;
}
- int originalLength = target->length();
- QString &result = *target;
- result.resize(originalLength + len + 1); // worst case
- QChar *qch = result.data() + originalLength;
+ QString result(len, Qt::Uninitialized); // worst case
+ QChar *qch = (QChar *)result.unicode();
uchar ch;
int invalid = 0;
@@ -260,52 +254,30 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C
state->invalidChars += invalid;
state->remainingChars = need;
if (headerdone)
- state->flags |= IgnoreHeader;
+ state->flags |= QTextCodec::IgnoreHeader;
state->state_data[0] = need ? uc : 0;
state->state_data[1] = need ? min_uc : 0;
}
-}
-
-QString QUtf8Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
-{
- QString result;
- convertToUnicode(&result, chars, len, state);
return result;
}
-QByteArray QUtf8Codec::name() const
-{
- return "UTF-8";
-}
-
-int QUtf8Codec::mibEnum() const
-{
- return 106;
-}
-
-enum { Endian = 0, Data = 1 };
-
-QUtf16Codec::~QUtf16Codec()
-{
-}
-
-QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+QByteArray QUtf16::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state, DataEndianness e)
{
- Endianness endian = e;
+ DataEndianness endian = e;
int length = 2*len;
- if (!state || (!(state->flags & IgnoreHeader))) {
+ if (!state || (!(state->flags & QTextCodec::IgnoreHeader))) {
length += 2;
}
- if (e == Detect) {
- endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+ if (e == DetectEndianness) {
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
}
QByteArray d;
d.resize(length);
char *data = d.data();
- if (!state || !(state->flags & IgnoreHeader)) {
+ if (!state || !(state->flags & QTextCodec::IgnoreHeader)) {
QChar bom(QChar::ByteOrderMark);
- if (endian == BE) {
+ if (endian == BigEndianness) {
data[0] = bom.row();
data[1] = bom.cell();
} else {
@@ -314,7 +286,7 @@ QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt
}
data += 2;
}
- if (endian == BE) {
+ if (endian == BigEndianness) {
for (int i = 0; i < len; ++i) {
*(data++) = uc[i].row();
*(data++) = uc[i].cell();
@@ -328,35 +300,35 @@ QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt
if (state) {
state->remainingChars = 0;
- state->flags |= IgnoreHeader;
+ state->flags |= QTextCodec::IgnoreHeader;
}
return d;
}
-QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+QString QUtf16::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state, DataEndianness e)
{
- Endianness endian = e;
+ DataEndianness endian = e;
bool half = false;
uchar buf = 0;
bool headerdone = false;
if (state) {
- headerdone = state->flags & IgnoreHeader;
- if (endian == Detect)
- endian = (Endianness)state->state_data[Endian];
+ headerdone = state->flags & QTextCodec::IgnoreHeader;
+ if (endian == DetectEndianness)
+ endian = (DataEndianness)state->state_data[Endian];
if (state->remainingChars) {
half = true;
buf = state->state_data[Data];
}
}
- if (headerdone && endian == Detect)
- endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+ if (headerdone && endian == DetectEndianness)
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
QString result(len, Qt::Uninitialized); // worst case
QChar *qch = (QChar *)result.unicode();
while (len--) {
if (half) {
QChar ch;
- if (endian == LE) {
+ if (endian == LittleEndianness) {
ch.setRow(*chars++);
ch.setCell(buf);
} else {
@@ -364,17 +336,17 @@ QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState
ch.setCell(*chars++);
}
if (!headerdone) {
- if (endian == Detect) {
- if (ch == QChar::ByteOrderSwapped && endian != BE) {
- endian = LE;
- } else if (ch == QChar::ByteOrderMark && endian != LE) {
+ if (endian == DetectEndianness) {
+ if (ch == QChar::ByteOrderSwapped && endian != BigEndianness) {
+ endian = LittleEndianness;
+ } else if (ch == QChar::ByteOrderMark && endian != LittleEndianness) {
// ignore BOM
- endian = BE;
+ endian = BigEndianness;
} else {
if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
- endian = BE;
+ endian = BigEndianness;
} else {
- endian = LE;
+ endian = LittleEndianness;
ch = QChar((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8));
}
*qch++ = ch;
@@ -396,7 +368,7 @@ QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState
if (state) {
if (headerdone)
- state->flags |= IgnoreHeader;
+ state->flags |= QTextCodec::IgnoreHeader;
state->state_data[Endian] = endian;
if (half) {
state->remainingChars = 1;
@@ -409,72 +381,21 @@ QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState
return result;
}
-int QUtf16Codec::mibEnum() const
-{
- return 1015;
-}
-
-QByteArray QUtf16Codec::name() const
-{
- return "UTF-16";
-}
-
-QList<QByteArray> QUtf16Codec::aliases() const
-{
- return QList<QByteArray>();
-}
-
-int QUtf16BECodec::mibEnum() const
-{
- return 1013;
-}
-
-QByteArray QUtf16BECodec::name() const
-{
- return "UTF-16BE";
-}
-
-QList<QByteArray> QUtf16BECodec::aliases() const
-{
- QList<QByteArray> list;
- return list;
-}
-
-int QUtf16LECodec::mibEnum() const
-{
- return 1014;
-}
-
-QByteArray QUtf16LECodec::name() const
-{
- return "UTF-16LE";
-}
-
-QList<QByteArray> QUtf16LECodec::aliases() const
-{
- QList<QByteArray> list;
- return list;
-}
-
-QUtf32Codec::~QUtf32Codec()
-{
-}
-
-QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state, DataEndianness e)
{
- Endianness endian = e;
+ DataEndianness endian = e;
int length = 4*len;
- if (!state || (!(state->flags & IgnoreHeader))) {
+ if (!state || (!(state->flags & QTextCodec::IgnoreHeader))) {
length += 4;
}
- if (e == Detect) {
- endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+ if (e == DetectEndianness) {
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
}
QByteArray d(length, Qt::Uninitialized);
char *data = d.data();
- if (!state || !(state->flags & IgnoreHeader)) {
- if (endian == BE) {
+ if (!state || !(state->flags & QTextCodec::IgnoreHeader)) {
+ if (endian == BigEndianness) {
data[0] = 0;
data[1] = 0;
data[2] = (char)0xfe;
@@ -487,7 +408,7 @@ QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt
}
data += 4;
}
- if (endian == BE) {
+ if (endian == BigEndianness) {
for (int i = 0; i < len; ++i) {
uint cp = uc[i].unicode();
if (uc[i].isHighSurrogate() && i < len - 1)
@@ -511,59 +432,59 @@ QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt
if (state) {
state->remainingChars = 0;
- state->flags |= IgnoreHeader;
+ state->flags |= QTextCodec::IgnoreHeader;
}
return d;
}
-QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+QString QUtf32::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state, DataEndianness e)
{
- Endianness endian = e;
+ DataEndianness endian = e;
uchar tuple[4];
int num = 0;
bool headerdone = false;
if (state) {
- headerdone = state->flags & IgnoreHeader;
- if (endian == Detect) {
- endian = (Endianness)state->state_data[Endian];
+ headerdone = state->flags & QTextCodec::IgnoreHeader;
+ if (endian == DetectEndianness) {
+ endian = (DataEndianness)state->state_data[Endian];
}
num = state->remainingChars;
memcpy(tuple, &state->state_data[Data], 4);
}
- if (headerdone && endian == Detect)
- endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE;
+ if (headerdone && endian == DetectEndianness)
+ endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
QString result;
result.resize((num + len) >> 2 << 1); // worst case
QChar *qch = (QChar *)result.unicode();
-
+
const char *end = chars + len;
while (chars < end) {
tuple[num++] = *chars++;
if (num == 4) {
if (!headerdone) {
- if (endian == Detect) {
- if (endian == Detect) {
- if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BE) {
- endian = LE;
+ if (endian == DetectEndianness) {
+ if (endian == DetectEndianness) {
+ if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BigEndianness) {
+ endian = LittleEndianness;
num = 0;
continue;
- } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LE) {
- endian = BE;
+ } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LittleEndianness) {
+ endian = BigEndianness;
num = 0;
continue;
} else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
- endian = BE;
+ endian = BigEndianness;
} else {
- endian = LE;
+ endian = LittleEndianness;
}
}
- } else if (((endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
+ } else if (((endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
num = 0;
continue;
}
}
- uint code = (endian == BE) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
+ uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
if (code >= 0x10000) {
*qch++ = QChar::highSurrogate(code);
*qch++ = QChar::lowSurrogate(code);
@@ -574,10 +495,10 @@ QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState
}
}
result.truncate(qch - result.unicode());
-
+
if (state) {
if (headerdone)
- state->flags |= IgnoreHeader;
+ state->flags |= QTextCodec::IgnoreHeader;
state->state_data[Endian] = endian;
state->remainingChars = num;
memcpy(&state->state_data[Data], tuple, 4);
@@ -585,6 +506,113 @@ QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState
return result;
}
+
+#ifndef QT_NO_TEXTCODEC
+
+QUtf8Codec::~QUtf8Codec()
+{
+}
+
+QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ return QUtf8::convertFromUnicode(uc, len, state);
+}
+
+void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, ConverterState *state) const
+{
+ *target += QUtf8::convertToUnicode(chars, len, state);
+}
+
+QString QUtf8Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+{
+ return QUtf8::convertToUnicode(chars, len, state);
+}
+
+QByteArray QUtf8Codec::name() const
+{
+ return "UTF-8";
+}
+
+int QUtf8Codec::mibEnum() const
+{
+ return 106;
+}
+
+QUtf16Codec::~QUtf16Codec()
+{
+}
+
+QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ return QUtf16::convertFromUnicode(uc, len, state, e);
+}
+
+QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+{
+ return QUtf16::convertToUnicode(chars, len, state, e);
+}
+
+int QUtf16Codec::mibEnum() const
+{
+ return 1015;
+}
+
+QByteArray QUtf16Codec::name() const
+{
+ return "UTF-16";
+}
+
+QList<QByteArray> QUtf16Codec::aliases() const
+{
+ return QList<QByteArray>();
+}
+
+int QUtf16BECodec::mibEnum() const
+{
+ return 1013;
+}
+
+QByteArray QUtf16BECodec::name() const
+{
+ return "UTF-16BE";
+}
+
+QList<QByteArray> QUtf16BECodec::aliases() const
+{
+ QList<QByteArray> list;
+ return list;
+}
+
+int QUtf16LECodec::mibEnum() const
+{
+ return 1014;
+}
+
+QByteArray QUtf16LECodec::name() const
+{
+ return "UTF-16LE";
+}
+
+QList<QByteArray> QUtf16LECodec::aliases() const
+{
+ QList<QByteArray> list;
+ return list;
+}
+
+QUtf32Codec::~QUtf32Codec()
+{
+}
+
+QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const
+{
+ return QUtf32::convertFromUnicode(uc, len, state, e);
+}
+
+QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const
+{
+ return QUtf32::convertToUnicode(chars, len, state, e);
+}
+
int QUtf32Codec::mibEnum() const
{
return 1017;
@@ -633,7 +661,6 @@ QList<QByteArray> QUtf32LECodec::aliases() const
return list;
}
+#endif //QT_NO_TEXTCODEC
QT_END_NAMESPACE
-
-#endif //QT_NO_TEXTCODEC
diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h
index 749f5be..4f8f92e 100644
--- a/src/corelib/codecs/qutfcodec_p.h
+++ b/src/corelib/codecs/qutfcodec_p.h
@@ -54,9 +54,35 @@
//
#include "QtCore/qtextcodec.h"
+#include "private/qtextcodec_p.h"
QT_BEGIN_NAMESPACE
+enum DataEndianness
+{
+ DetectEndianness,
+ BigEndianness,
+ LittleEndianness
+};
+
+struct QUtf8
+{
+ static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *);
+ static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *);
+};
+
+struct QUtf16
+{
+ static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
+ static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
+};
+
+struct QUtf32
+{
+ static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
+ static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
+};
+
#ifndef QT_NO_TEXTCODEC
class QUtf8Codec : public QTextCodec {
@@ -73,13 +99,8 @@ public:
class QUtf16Codec : public QTextCodec {
protected:
- enum Endianness {
- Detect,
- BE,
- LE
- };
public:
- QUtf16Codec() { e = Detect; }
+ QUtf16Codec() { e = DetectEndianness; }
~QUtf16Codec();
QByteArray name() const;
@@ -90,12 +111,12 @@ public:
QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
protected:
- Endianness e;
+ DataEndianness e;
};
class QUtf16BECodec : public QUtf16Codec {
public:
- QUtf16BECodec() : QUtf16Codec() { e = BE; }
+ QUtf16BECodec() : QUtf16Codec() { e = BigEndianness; }
QByteArray name() const;
QList<QByteArray> aliases() const;
int mibEnum() const;
@@ -103,21 +124,15 @@ public:
class QUtf16LECodec : public QUtf16Codec {
public:
- QUtf16LECodec() : QUtf16Codec() { e = LE; }
+ QUtf16LECodec() : QUtf16Codec() { e = LittleEndianness; }
QByteArray name() const;
QList<QByteArray> aliases() const;
int mibEnum() const;
};
class QUtf32Codec : public QTextCodec {
-protected:
- enum Endianness {
- Detect,
- BE,
- LE
- };
public:
- QUtf32Codec() { e = Detect; }
+ QUtf32Codec() { e = DetectEndianness; }
~QUtf32Codec();
QByteArray name() const;
@@ -128,12 +143,12 @@ public:
QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const;
protected:
- Endianness e;
+ DataEndianness e;
};
class QUtf32BECodec : public QUtf32Codec {
public:
- QUtf32BECodec() : QUtf32Codec() { e = BE; }
+ QUtf32BECodec() : QUtf32Codec() { e = BigEndianness; }
QByteArray name() const;
QList<QByteArray> aliases() const;
int mibEnum() const;
@@ -141,7 +156,7 @@ public:
class QUtf32LECodec : public QUtf32Codec {
public:
- QUtf32LECodec() : QUtf32Codec() { e = LE; }
+ QUtf32LECodec() : QUtf32Codec() { e = LittleEndianness; }
QByteArray name() const;
QList<QByteArray> aliases() const;
int mibEnum() const;
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 7cca339..80c2e24 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -45,6 +45,7 @@
#ifndef QT_NO_TEXTCODEC
#include <qtextcodec.h>
#endif
+#include <private/qutfcodec_p.h>
#include <qdatastream.h>
#include <qlist.h>
#include "qlocale.h"
@@ -964,7 +965,8 @@ int QString::toWCharArray(wchar_t *array) const
Constructs a string initialized with the first \a size characters
of the QChar array \a unicode.
- QString makes a deep copy of the string data.
+ QString makes a deep copy of the string data. The unicode data is copied as
+ is and the Byte Order Mark is preserved if present.
*/
QString::QString(const QChar *unicode, int size)
{
@@ -3843,74 +3845,7 @@ QString QString::fromUtf8(const char *str, int size)
if (size < 0)
size = qstrlen(str);
- QString result(size, Qt::Uninitialized); // worst case
- ushort *qch = result.d->data;
- uint uc = 0;
- uint min_uc = 0;
- int need = 0;
- int error = -1;
- uchar ch;
- int i = 0;
-
- // skip utf8-encoded byte order mark
- if (size >= 3
- && (uchar)str[0] == 0xef && (uchar)str[1] == 0xbb && (uchar)str[2] == 0xbf)
- i += 3;
-
- for (; i < size; ++i) {
- ch = str[i];
- if (need) {
- if ((ch&0xc0) == 0x80) {
- uc = (uc << 6) | (ch & 0x3f);
- need--;
- if (!need) {
- if (uc > 0xffffU && uc < 0x110000U) {
- // surrogate pair
- *qch++ = QChar::highSurrogate(uc);
- uc = QChar::lowSurrogate(uc);
- } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) {
- // overlong seqence, UTF16 surrogate or BOM
- uc = QChar::ReplacementCharacter;
- }
- *qch++ = uc;
- }
- } else {
- i = error;
- need = 0;
- *qch++ = QChar::ReplacementCharacter;
- }
- } else {
- if (ch < 128) {
- *qch++ = ch;
- } else if ((ch & 0xe0) == 0xc0) {
- uc = ch & 0x1f;
- need = 1;
- error = i;
- min_uc = 0x80;
- } else if ((ch & 0xf0) == 0xe0) {
- uc = ch & 0x0f;
- need = 2;
- error = i;
- min_uc = 0x800;
- } else if ((ch&0xf8) == 0xf0) {
- uc = ch & 0x07;
- need = 3;
- error = i;
- min_uc = 0x10000;
- } else {
- // Error
- *qch++ = QChar::ReplacementCharacter;
- }
- }
- }
- if (need) {
- // we have some invalid characters remaining we need to add to the string
- for (int i = error; i < size; ++i)
- *qch++ = QChar::ReplacementCharacter;
- }
-
- result.truncate(qch - result.d->data);
- return result;
+ return QUtf8::convertToUnicode(str, size, 0);
}
/*!
@@ -3933,7 +3868,7 @@ QString QString::fromUtf16(const ushort *unicode, int size)
while (unicode[size] != 0)
++size;
}
- return QString((const QChar *)unicode, size);
+ return QUtf16::convertToUnicode((const char *)unicode, size*2, 0);
}
@@ -3957,20 +3892,7 @@ QString QString::fromUcs4(const uint *unicode, int size)
while (unicode[size] != 0)
++size;
}
-
- QString s(size * 2, Qt::Uninitialized); // worst case
- ushort *uc = s.d->data;
- for (int i = 0; i < size; ++i) {
- uint u = unicode[i];
- if (u > 0xffff) {
- // decompose into a surrogate pair
- *uc++ = QChar::highSurrogate(u);
- u = QChar::lowSurrogate(u);
- }
- *uc++ = u;
- }
- s.resize(uc - s.d->data);
- return s;
+ return QUtf32::convertToUnicode((const char *)unicode, size*4, 0);
}
/*!