From d124bcf70a8e167ebf997ac2a4222623a5a9acdf Mon Sep 17 00:00:00 2001 From: Denis Dzyubenko Date: Thu, 9 Jul 2009 16:07:50 +0200 Subject: Changed the implementation of the unicode text codecs to share more code with qstring. The qstring unicode conversion functions used to have its own implementation, which did the same as QUtf*Codecs, so with the change all of them will share the same implementation. Reviewed-by: Thiago Macieira --- qmake/Makefile.unix | 9 +- qmake/Makefile.win32 | 7 + qmake/Makefile.win32-g++ | 5 + qmake/Makefile.win32-g++-sh | 5 + qmake/qmake.pri | 4 + src/corelib/codecs/qtextcodec_p.h | 27 ++++ src/corelib/codecs/qutfcodec.cpp | 321 +++++++++++++++++++++----------------- src/corelib/codecs/qutfcodec_p.h | 53 ++++--- src/corelib/tools/qstring.cpp | 90 +---------- 9 files changed, 270 insertions(+), 251 deletions(-) diff --git a/qmake/Makefile.unix b/qmake/Makefile.unix index 7634021..dcdc805 100644 --- a/qmake/Makefile.unix +++ b/qmake/Makefile.unix @@ -12,7 +12,7 @@ OBJS=project.o property.o main.o makefile.o unixmake2.o unixmake.o \ borland_bmake.o msvc_dsp.o msvc_vcproj.o msvc_nmake.o msvc_objectmodel.o #qt code -QOBJS=qstring.o qtextstream.o qiodevice.o qmalloc.o qglobal.o \ +QOBJS=qtextcodec.o qutfcodec.o qstring.o qtextstream.o qiodevice.o qmalloc.o qglobal.o \ qbytearray.o qbytearraymatcher.o qdatastream.o qbuffer.o qlistdata.o qfile.o \ qfsfileengine_unix.o qfsfileengine_iterator_unix.o qfsfileengine.o \ qfsfileengine_iterator.o qregexp.o qvector.o qbitarray.o qdir.o qdiriterator.o quuid.o qhash.o \ @@ -44,6 +44,7 @@ DEPEND_SRC=project.cpp property.cpp meta.cpp main.cpp generators/makefile.cpp ge generators/mac/pbuilder_pbx.cpp generators/mac/xmloutput.cpp generators/metamakefile.cpp \ generators/makefiledeps.cpp option.cpp generators/win32/mingw_make.cpp generators/makefile.cpp \ generators/win32/msvc_objectmodel.cpp generators/win32/msvc_nmake.cpp generators/win32/borland_bmake.cpp \ + $(SOURCE_PATH)/src/corelib/codecs/qtextcodec.cpp $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp \ $(SOURCE_PATH)/src/corelib/tools/qstring.cpp $(SOURCE_PATH)/src/corelib/io/qfile.cpp \ $(SOURCE_PATH)/src/corelib/io/qtextstream.cpp $(SOURCE_PATH)/src/corelib/io/qiodevice.cpp \ $(SOURCE_PATH)/src/corelib/global/qmalloc.cpp \ @@ -160,6 +161,12 @@ qcore_mac.o: $(SOURCE_PATH)/src/corelib/kernel/qcore_mac.cpp qurl.o: $(SOURCE_PATH)/src/corelib/io/qurl.cpp $(CXX) -c -o $@ $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/io/qurl.cpp +qutfcodec.o: $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp + $(CXX) -c -o $@ $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp + +qtextcodec.o: $(SOURCE_PATH)/src/corelib/codecs/qtextcodec.cpp + $(CXX) -c -o $@ $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/codecs/qtextcodec.cpp + qstring.o: $(SOURCE_PATH)/src/corelib/tools/qstring.cpp $(CXX) -c -o $@ $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/tools/qstring.cpp diff --git a/qmake/Makefile.win32 b/qmake/Makefile.win32 index 53130aa..77eb9fc 100644 --- a/qmake/Makefile.win32 +++ b/qmake/Makefile.win32 @@ -108,6 +108,8 @@ QTOBJS= \ qmalloc.obj \ qmap.obj \ qregexp.obj \ + qtextcodec.obj \ + qutfcodec.obj \ qstring.obj \ qstringlist.obj \ qtextstream.obj \ @@ -194,6 +196,8 @@ clean:: -del qmalloc.obj -del qmap.obj -del qregexp.obj + -del qtextcodec.obj + -del qutfcodec.obj -del qstring.obj -del qstringlist.obj -del qtextstream.obj @@ -337,6 +341,9 @@ qbytearraymatcher.obj: $(SOURCE_PATH)\src\corelib\tools\qbytearraymatcher.cpp qchar.obj: $(SOURCE_PATH)\src\corelib\tools\qchar.cpp $(CXX) $(CXXFLAGS) $(SOURCE_PATH)\src\corelib\tools\qchar.cpp +qutfcodec.obj: $(SOURCE_PATH)\src\corelib\codecs\qutfcodec.cpp + $(CXX) $(CXXFLAGS) $(SOURCE_PATH)\src\corelib\codecs\qutfcodec.cpp + qstring.obj: $(SOURCE_PATH)\src\corelib\tools\qstring.cpp $(CXX) $(CXXFLAGS) $(SOURCE_PATH)\src\corelib\tools\qstring.cpp diff --git a/qmake/Makefile.win32-g++ b/qmake/Makefile.win32-g++ index 229a456..1699668 100644 --- a/qmake/Makefile.win32-g++ +++ b/qmake/Makefile.win32-g++ @@ -71,6 +71,8 @@ QTOBJS= \ qmalloc.o \ qmap.o \ qregexp.o \ + qtextcodec.o \ + qutfcodec.o \ qstring.o \ qstringlist.o \ qtextstream.o \ @@ -192,6 +194,9 @@ qvsnprintf.o: $(SOURCE_PATH)/src/corelib/tools/qvsnprintf.cpp qbytearraymatcher.o: $(SOURCE_PATH)/src/corelib/tools/qbytearraymatcher.cpp $(CXX) $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/tools/qbytearraymatcher.cpp +qutfcodec.o: $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp + $(CXX) $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp + qstring.o: $(SOURCE_PATH)/src/corelib/tools/qstring.cpp $(CXX) $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/tools/qstring.cpp diff --git a/qmake/Makefile.win32-g++-sh b/qmake/Makefile.win32-g++-sh index 3f1b1df..08a9038 100644 --- a/qmake/Makefile.win32-g++-sh +++ b/qmake/Makefile.win32-g++-sh @@ -71,6 +71,8 @@ QTOBJS= \ qmalloc.o \ qmap.o \ qregexp.o \ + qtextcodec.o \ + qutfcodec.o \ qstring.o \ qstringlist.o \ qtextstream.o \ @@ -191,6 +193,9 @@ qvsnprintf.o: $(SOURCE_PATH)/src/corelib/tools/qvsnprintf.cpp qbytearraymatcher.o: $(SOURCE_PATH)/src/corelib/tools/qbytearraymatcher.cpp $(CXX) $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/tools/qbytearraymatcher.cpp +qutfcodec.o: $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp + $(CXX) $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp + qstring.o: $(SOURCE_PATH)/src/corelib/tools/qstring.cpp $(CXX) $(CXXFLAGS) $(SOURCE_PATH)/src/corelib/tools/qstring.cpp diff --git a/qmake/qmake.pri b/qmake/qmake.pri index 9147ee8..9ba8506 100644 --- a/qmake/qmake.pri +++ b/qmake/qmake.pri @@ -53,6 +53,8 @@ bootstrap { #Qt code qmap.cpp \ qmetatype.cpp \ qregexp.cpp \ + qtextcodec.cpp \ + qutfcodec.cpp \ qstring.cpp \ qstringlist.cpp \ qtemporaryfile.cpp \ @@ -90,6 +92,8 @@ bootstrap { #Qt code qmap.h \ qmetatype.h \ qregexp.h \ + qtextcodec.h \ + qutfcodec.h \ qstring.h \ qstringlist.h \ qstringmatcher.h \ diff --git a/src/corelib/codecs/qtextcodec_p.h b/src/corelib/codecs/qtextcodec_p.h index 499c0f9..5c82735 100644 --- a/src/corelib/codecs/qtextcodec_p.h +++ b/src/corelib/codecs/qtextcodec_p.h @@ -77,6 +77,33 @@ struct QTextCodecUnalignedPointer } }; +#else + +class QTextCodec +{ +public: + enum ConversionFlag { + DefaultConversion, + ConvertInvalidToNull = 0x80000000, + IgnoreHeader = 0x1, + FreeFunction = 0x2 + }; + Q_DECLARE_FLAGS(ConversionFlags, ConversionFlag) + + struct ConverterState { + ConverterState(ConversionFlags f = DefaultConversion) + : flags(f), remainingChars(0), invalidChars(0), d(0) { state_data[0] = state_data[1] = state_data[2] = 0; } + ~ConverterState() { } + ConversionFlags flags; + int remainingChars; + int invalidChars; + uint state_data[3]; + void *d; + private: + Q_DISABLE_COPY(ConverterState) + }; +}; + #endif //QT_NO_TEXTCODEC QT_END_NAMESPACE diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index abae6f7..d111660 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -44,23 +44,19 @@ #include "qendian.h" #include "qchar.h" -#ifndef QT_NO_TEXTCODEC - QT_BEGIN_NAMESPACE -QUtf8Codec::~QUtf8Codec() -{ -} +enum { Endian = 0, Data = 1 }; -QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state) { uchar replacement = '?'; int rlen = 3*len; int surrogate_high = -1; if (state) { - if (state->flags & ConvertInvalidToNull) + if (state->flags & QTextCodec::ConvertInvalidToNull) replacement = 0; - if (!(state->flags & IgnoreHeader)) + if (!(state->flags & QTextCodec::IgnoreHeader)) rlen += 3; if (state->remainingChars) surrogate_high = state->state_data[0]; @@ -71,7 +67,7 @@ QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterSta uchar* cursor = (uchar*)rstr.data(); const QChar *ch = uc; int invalid = 0; - if (state && !(state->flags & IgnoreHeader)) { + if (state && !(state->flags & QTextCodec::IgnoreHeader)) { *cursor++ = 0xef; *cursor++ = 0xbb; *cursor++ = 0xbf; @@ -133,7 +129,7 @@ QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterSta rstr.resize(cursor - (const uchar*)rstr.constData()); if (state) { state->invalidChars += invalid; - state->flags |= IgnoreHeader; + state->flags |= QTextCodec::IgnoreHeader; state->remainingChars = 0; if (surrogate_high >= 0) { state->remainingChars = 1; @@ -143,7 +139,7 @@ QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterSta return rstr; } -void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, ConverterState *state) const +QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state) { bool headerdone = false; QChar replacement = QChar::ReplacementCharacter; @@ -152,9 +148,9 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C uint uc = 0; uint min_uc = 0; if (state) { - if (state->flags & IgnoreHeader) + if (state->flags & QTextCodec::IgnoreHeader) headerdone = true; - if (state->flags & ConvertInvalidToNull) + if (state->flags & QTextCodec::ConvertInvalidToNull) replacement = QChar::Null; need = state->remainingChars; if (need) { @@ -170,10 +166,8 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C headerdone = true; } - int originalLength = target->length(); - QString &result = *target; - result.resize(originalLength + len + 1); // worst case - QChar *qch = result.data() + originalLength; + QString result(len, Qt::Uninitialized); // worst case + QChar *qch = (QChar *)result.unicode(); uchar ch; int invalid = 0; @@ -260,52 +254,30 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C state->invalidChars += invalid; state->remainingChars = need; if (headerdone) - state->flags |= IgnoreHeader; + state->flags |= QTextCodec::IgnoreHeader; state->state_data[0] = need ? uc : 0; state->state_data[1] = need ? min_uc : 0; } -} - -QString QUtf8Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const -{ - QString result; - convertToUnicode(&result, chars, len, state); return result; } -QByteArray QUtf8Codec::name() const -{ - return "UTF-8"; -} - -int QUtf8Codec::mibEnum() const -{ - return 106; -} - -enum { Endian = 0, Data = 1 }; - -QUtf16Codec::~QUtf16Codec() -{ -} - -QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +QByteArray QUtf16::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state, DataEndianness e) { - Endianness endian = e; + DataEndianness endian = e; int length = 2*len; - if (!state || (!(state->flags & IgnoreHeader))) { + if (!state || (!(state->flags & QTextCodec::IgnoreHeader))) { length += 2; } - if (e == Detect) { - endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + if (e == DetectEndianness) { + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness; } QByteArray d; d.resize(length); char *data = d.data(); - if (!state || !(state->flags & IgnoreHeader)) { + if (!state || !(state->flags & QTextCodec::IgnoreHeader)) { QChar bom(QChar::ByteOrderMark); - if (endian == BE) { + if (endian == BigEndianness) { data[0] = bom.row(); data[1] = bom.cell(); } else { @@ -314,7 +286,7 @@ QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt } data += 2; } - if (endian == BE) { + if (endian == BigEndianness) { for (int i = 0; i < len; ++i) { *(data++) = uc[i].row(); *(data++) = uc[i].cell(); @@ -328,35 +300,35 @@ QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt if (state) { state->remainingChars = 0; - state->flags |= IgnoreHeader; + state->flags |= QTextCodec::IgnoreHeader; } return d; } -QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +QString QUtf16::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state, DataEndianness e) { - Endianness endian = e; + DataEndianness endian = e; bool half = false; uchar buf = 0; bool headerdone = false; if (state) { - headerdone = state->flags & IgnoreHeader; - if (endian == Detect) - endian = (Endianness)state->state_data[Endian]; + headerdone = state->flags & QTextCodec::IgnoreHeader; + if (endian == DetectEndianness) + endian = (DataEndianness)state->state_data[Endian]; if (state->remainingChars) { half = true; buf = state->state_data[Data]; } } - if (headerdone && endian == Detect) - endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + if (headerdone && endian == DetectEndianness) + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness; QString result(len, Qt::Uninitialized); // worst case QChar *qch = (QChar *)result.unicode(); while (len--) { if (half) { QChar ch; - if (endian == LE) { + if (endian == LittleEndianness) { ch.setRow(*chars++); ch.setCell(buf); } else { @@ -364,17 +336,17 @@ QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState ch.setCell(*chars++); } if (!headerdone) { - if (endian == Detect) { - if (ch == QChar::ByteOrderSwapped && endian != BE) { - endian = LE; - } else if (ch == QChar::ByteOrderMark && endian != LE) { + if (endian == DetectEndianness) { + if (ch == QChar::ByteOrderSwapped && endian != BigEndianness) { + endian = LittleEndianness; + } else if (ch == QChar::ByteOrderMark && endian != LittleEndianness) { // ignore BOM - endian = BE; + endian = BigEndianness; } else { if (QSysInfo::ByteOrder == QSysInfo::BigEndian) { - endian = BE; + endian = BigEndianness; } else { - endian = LE; + endian = LittleEndianness; ch = QChar((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8)); } *qch++ = ch; @@ -396,7 +368,7 @@ QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState if (state) { if (headerdone) - state->flags |= IgnoreHeader; + state->flags |= QTextCodec::IgnoreHeader; state->state_data[Endian] = endian; if (half) { state->remainingChars = 1; @@ -409,72 +381,21 @@ QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState return result; } -int QUtf16Codec::mibEnum() const -{ - return 1015; -} - -QByteArray QUtf16Codec::name() const -{ - return "UTF-16"; -} - -QList QUtf16Codec::aliases() const -{ - return QList(); -} - -int QUtf16BECodec::mibEnum() const -{ - return 1013; -} - -QByteArray QUtf16BECodec::name() const -{ - return "UTF-16BE"; -} - -QList QUtf16BECodec::aliases() const -{ - QList list; - return list; -} - -int QUtf16LECodec::mibEnum() const -{ - return 1014; -} - -QByteArray QUtf16LECodec::name() const -{ - return "UTF-16LE"; -} - -QList QUtf16LECodec::aliases() const -{ - QList list; - return list; -} - -QUtf32Codec::~QUtf32Codec() -{ -} - -QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state, DataEndianness e) { - Endianness endian = e; + DataEndianness endian = e; int length = 4*len; - if (!state || (!(state->flags & IgnoreHeader))) { + if (!state || (!(state->flags & QTextCodec::IgnoreHeader))) { length += 4; } - if (e == Detect) { - endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + if (e == DetectEndianness) { + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness; } QByteArray d(length, Qt::Uninitialized); char *data = d.data(); - if (!state || !(state->flags & IgnoreHeader)) { - if (endian == BE) { + if (!state || !(state->flags & QTextCodec::IgnoreHeader)) { + if (endian == BigEndianness) { data[0] = 0; data[1] = 0; data[2] = (char)0xfe; @@ -487,7 +408,7 @@ QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt } data += 4; } - if (endian == BE) { + if (endian == BigEndianness) { for (int i = 0; i < len; ++i) { uint cp = uc[i].unicode(); if (uc[i].isHighSurrogate() && i < len - 1) @@ -511,59 +432,59 @@ QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterSt if (state) { state->remainingChars = 0; - state->flags |= IgnoreHeader; + state->flags |= QTextCodec::IgnoreHeader; } return d; } -QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +QString QUtf32::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state, DataEndianness e) { - Endianness endian = e; + DataEndianness endian = e; uchar tuple[4]; int num = 0; bool headerdone = false; if (state) { - headerdone = state->flags & IgnoreHeader; - if (endian == Detect) { - endian = (Endianness)state->state_data[Endian]; + headerdone = state->flags & QTextCodec::IgnoreHeader; + if (endian == DetectEndianness) { + endian = (DataEndianness)state->state_data[Endian]; } num = state->remainingChars; memcpy(tuple, &state->state_data[Data], 4); } - if (headerdone && endian == Detect) - endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BE : LE; + if (headerdone && endian == DetectEndianness) + endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness; QString result; result.resize((num + len) >> 2 << 1); // worst case QChar *qch = (QChar *)result.unicode(); - + const char *end = chars + len; while (chars < end) { tuple[num++] = *chars++; if (num == 4) { if (!headerdone) { - if (endian == Detect) { - if (endian == Detect) { - if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BE) { - endian = LE; + if (endian == DetectEndianness) { + if (endian == DetectEndianness) { + if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BigEndianness) { + endian = LittleEndianness; num = 0; continue; - } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LE) { - endian = BE; + } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LittleEndianness) { + endian = BigEndianness; num = 0; continue; } else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) { - endian = BE; + endian = BigEndianness; } else { - endian = LE; + endian = LittleEndianness; } } - } else if (((endian == BE) ? qFromBigEndian(tuple) : qFromLittleEndian(tuple)) == QChar::ByteOrderMark) { + } else if (((endian == BigEndianness) ? qFromBigEndian(tuple) : qFromLittleEndian(tuple)) == QChar::ByteOrderMark) { num = 0; continue; } } - uint code = (endian == BE) ? qFromBigEndian(tuple) : qFromLittleEndian(tuple); + uint code = (endian == BigEndianness) ? qFromBigEndian(tuple) : qFromLittleEndian(tuple); if (code >= 0x10000) { *qch++ = QChar::highSurrogate(code); *qch++ = QChar::lowSurrogate(code); @@ -574,10 +495,10 @@ QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState } } result.truncate(qch - result.unicode()); - + if (state) { if (headerdone) - state->flags |= IgnoreHeader; + state->flags |= QTextCodec::IgnoreHeader; state->state_data[Endian] = endian; state->remainingChars = num; memcpy(&state->state_data[Data], tuple, 4); @@ -585,6 +506,113 @@ QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState return result; } + +#ifndef QT_NO_TEXTCODEC + +QUtf8Codec::~QUtf8Codec() +{ +} + +QByteArray QUtf8Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + return QUtf8::convertFromUnicode(uc, len, state); +} + +void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, ConverterState *state) const +{ + *target += QUtf8::convertToUnicode(chars, len, state); +} + +QString QUtf8Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +{ + return QUtf8::convertToUnicode(chars, len, state); +} + +QByteArray QUtf8Codec::name() const +{ + return "UTF-8"; +} + +int QUtf8Codec::mibEnum() const +{ + return 106; +} + +QUtf16Codec::~QUtf16Codec() +{ +} + +QByteArray QUtf16Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + return QUtf16::convertFromUnicode(uc, len, state, e); +} + +QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +{ + return QUtf16::convertToUnicode(chars, len, state, e); +} + +int QUtf16Codec::mibEnum() const +{ + return 1015; +} + +QByteArray QUtf16Codec::name() const +{ + return "UTF-16"; +} + +QList QUtf16Codec::aliases() const +{ + return QList(); +} + +int QUtf16BECodec::mibEnum() const +{ + return 1013; +} + +QByteArray QUtf16BECodec::name() const +{ + return "UTF-16BE"; +} + +QList QUtf16BECodec::aliases() const +{ + QList list; + return list; +} + +int QUtf16LECodec::mibEnum() const +{ + return 1014; +} + +QByteArray QUtf16LECodec::name() const +{ + return "UTF-16LE"; +} + +QList QUtf16LECodec::aliases() const +{ + QList list; + return list; +} + +QUtf32Codec::~QUtf32Codec() +{ +} + +QByteArray QUtf32Codec::convertFromUnicode(const QChar *uc, int len, ConverterState *state) const +{ + return QUtf32::convertFromUnicode(uc, len, state, e); +} + +QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState *state) const +{ + return QUtf32::convertToUnicode(chars, len, state, e); +} + int QUtf32Codec::mibEnum() const { return 1017; @@ -633,7 +661,6 @@ QList QUtf32LECodec::aliases() const return list; } +#endif //QT_NO_TEXTCODEC QT_END_NAMESPACE - -#endif //QT_NO_TEXTCODEC diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h index 749f5be..4f8f92e 100644 --- a/src/corelib/codecs/qutfcodec_p.h +++ b/src/corelib/codecs/qutfcodec_p.h @@ -54,9 +54,35 @@ // #include "QtCore/qtextcodec.h" +#include "private/qtextcodec_p.h" QT_BEGIN_NAMESPACE +enum DataEndianness +{ + DetectEndianness, + BigEndianness, + LittleEndianness +}; + +struct QUtf8 +{ + static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *); + static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *); +}; + +struct QUtf16 +{ + static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); + static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); +}; + +struct QUtf32 +{ + static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); + static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness); +}; + #ifndef QT_NO_TEXTCODEC class QUtf8Codec : public QTextCodec { @@ -73,13 +99,8 @@ public: class QUtf16Codec : public QTextCodec { protected: - enum Endianness { - Detect, - BE, - LE - }; public: - QUtf16Codec() { e = Detect; } + QUtf16Codec() { e = DetectEndianness; } ~QUtf16Codec(); QByteArray name() const; @@ -90,12 +111,12 @@ public: QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; protected: - Endianness e; + DataEndianness e; }; class QUtf16BECodec : public QUtf16Codec { public: - QUtf16BECodec() : QUtf16Codec() { e = BE; } + QUtf16BECodec() : QUtf16Codec() { e = BigEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; @@ -103,21 +124,15 @@ public: class QUtf16LECodec : public QUtf16Codec { public: - QUtf16LECodec() : QUtf16Codec() { e = LE; } + QUtf16LECodec() : QUtf16Codec() { e = LittleEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; }; class QUtf32Codec : public QTextCodec { -protected: - enum Endianness { - Detect, - BE, - LE - }; public: - QUtf32Codec() { e = Detect; } + QUtf32Codec() { e = DetectEndianness; } ~QUtf32Codec(); QByteArray name() const; @@ -128,12 +143,12 @@ public: QByteArray convertFromUnicode(const QChar *, int, ConverterState *) const; protected: - Endianness e; + DataEndianness e; }; class QUtf32BECodec : public QUtf32Codec { public: - QUtf32BECodec() : QUtf32Codec() { e = BE; } + QUtf32BECodec() : QUtf32Codec() { e = BigEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; @@ -141,7 +156,7 @@ public: class QUtf32LECodec : public QUtf32Codec { public: - QUtf32LECodec() : QUtf32Codec() { e = LE; } + QUtf32LECodec() : QUtf32Codec() { e = LittleEndianness; } QByteArray name() const; QList aliases() const; int mibEnum() const; diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 7cca339..80c2e24 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -45,6 +45,7 @@ #ifndef QT_NO_TEXTCODEC #include #endif +#include #include #include #include "qlocale.h" @@ -964,7 +965,8 @@ int QString::toWCharArray(wchar_t *array) const Constructs a string initialized with the first \a size characters of the QChar array \a unicode. - QString makes a deep copy of the string data. + QString makes a deep copy of the string data. The unicode data is copied as + is and the Byte Order Mark is preserved if present. */ QString::QString(const QChar *unicode, int size) { @@ -3843,74 +3845,7 @@ QString QString::fromUtf8(const char *str, int size) if (size < 0) size = qstrlen(str); - QString result(size, Qt::Uninitialized); // worst case - ushort *qch = result.d->data; - uint uc = 0; - uint min_uc = 0; - int need = 0; - int error = -1; - uchar ch; - int i = 0; - - // skip utf8-encoded byte order mark - if (size >= 3 - && (uchar)str[0] == 0xef && (uchar)str[1] == 0xbb && (uchar)str[2] == 0xbf) - i += 3; - - for (; i < size; ++i) { - ch = str[i]; - if (need) { - if ((ch&0xc0) == 0x80) { - uc = (uc << 6) | (ch & 0x3f); - need--; - if (!need) { - if (uc > 0xffffU && uc < 0x110000U) { - // surrogate pair - *qch++ = QChar::highSurrogate(uc); - uc = QChar::lowSurrogate(uc); - } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { - // overlong seqence, UTF16 surrogate or BOM - uc = QChar::ReplacementCharacter; - } - *qch++ = uc; - } - } else { - i = error; - need = 0; - *qch++ = QChar::ReplacementCharacter; - } - } else { - if (ch < 128) { - *qch++ = ch; - } else if ((ch & 0xe0) == 0xc0) { - uc = ch & 0x1f; - need = 1; - error = i; - min_uc = 0x80; - } else if ((ch & 0xf0) == 0xe0) { - uc = ch & 0x0f; - need = 2; - error = i; - min_uc = 0x800; - } else if ((ch&0xf8) == 0xf0) { - uc = ch & 0x07; - need = 3; - error = i; - min_uc = 0x10000; - } else { - // Error - *qch++ = QChar::ReplacementCharacter; - } - } - } - if (need) { - // we have some invalid characters remaining we need to add to the string - for (int i = error; i < size; ++i) - *qch++ = QChar::ReplacementCharacter; - } - - result.truncate(qch - result.d->data); - return result; + return QUtf8::convertToUnicode(str, size, 0); } /*! @@ -3933,7 +3868,7 @@ QString QString::fromUtf16(const ushort *unicode, int size) while (unicode[size] != 0) ++size; } - return QString((const QChar *)unicode, size); + return QUtf16::convertToUnicode((const char *)unicode, size*2, 0); } @@ -3957,20 +3892,7 @@ QString QString::fromUcs4(const uint *unicode, int size) while (unicode[size] != 0) ++size; } - - QString s(size * 2, Qt::Uninitialized); // worst case - ushort *uc = s.d->data; - for (int i = 0; i < size; ++i) { - uint u = unicode[i]; - if (u > 0xffff) { - // decompose into a surrogate pair - *uc++ = QChar::highSurrogate(u); - u = QChar::lowSurrogate(u); - } - *uc++ = u; - } - s.resize(uc - s.d->data); - return s; + return QUtf32::convertToUnicode((const char *)unicode, size*4, 0); } /*! -- cgit v0.12