diff options
author | Jason Barron <jbarron@trolltech.com> | 2009-06-25 13:49:53 (GMT) |
---|---|---|
committer | Jason Barron <jbarron@trolltech.com> | 2009-06-25 13:49:53 (GMT) |
commit | db8f05e257019694f5e8076845626008f2adc3dd (patch) | |
tree | 05d3959403cf15ac5f702091439e028af01f343b /src/corelib/codecs | |
parent | 8aafaa65a1d16f8b982279f5aceedf1e281ddb5a (diff) | |
parent | 796a5a2c7d8c91a46ac761dde18b7da2ec6c177b (diff) | |
download | Qt-db8f05e257019694f5e8076845626008f2adc3dd.zip Qt-db8f05e257019694f5e8076845626008f2adc3dd.tar.gz Qt-db8f05e257019694f5e8076845626008f2adc3dd.tar.bz2 |
Merge commit 'qt/master-stable' into 4.6-stable
Bring Qt 4.6 into the Qt-S60 repo.
Conflicts:
configure.exe
mkspecs/features/qttest_p4.prf
qmake/generators/makefile.cpp
src/corelib/io/qdir.cpp
src/corelib/io/qprocess.h
src/corelib/kernel/qcoreevent.h
src/corelib/kernel/qobject.cpp
src/corelib/kernel/qsharedmemory_unix.cpp
src/corelib/thread/qthread_p.h
src/corelib/tools/qvector.h
src/gui/dialogs/qdialog.cpp
src/gui/dialogs/qfiledialog.cpp
src/gui/dialogs/qfiledialog_p.h
src/gui/dialogs/qmessagebox.cpp
src/gui/graphicsview/qgraphicsitem.cpp
src/gui/graphicsview/qgraphicsview.cpp
src/gui/image/qpixmapcache.cpp
src/gui/kernel/qapplication.cpp
src/gui/kernel/qapplication_p.h
src/gui/kernel/qwidget.cpp
src/gui/kernel/qwidget_p.h
src/gui/painting/qdrawhelper.cpp
src/gui/painting/qpaintengine_raster.cpp
src/gui/text/qfontengine_qpf.cpp
src/gui/widgets/qmenubar.cpp
src/network/socket/qlocalserver.cpp
src/testlib/qtestcase.cpp
src/testlib/testlib.pro
tests/auto/qimagereader/tst_qimagereader.cpp
tests/auto/qitemdelegate/tst_qitemdelegate.cpp
tests/auto/qnetworkreply/tst_qnetworkreply.cpp
tests/auto/qpixmap/qpixmap.pro
Diffstat (limited to 'src/corelib/codecs')
-rw-r--r-- | src/corelib/codecs/qisciicodec.cpp | 2 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.cpp | 134 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.h | 3 | ||||
-rw-r--r-- | src/corelib/codecs/qtsciicodec_p.h | 2 | ||||
-rw-r--r-- | src/corelib/codecs/qutfcodec.cpp | 15 |
5 files changed, 108 insertions, 48 deletions
diff --git a/src/corelib/codecs/qisciicodec.cpp b/src/corelib/codecs/qisciicodec.cpp index 5619580..f9e3292 100644 --- a/src/corelib/codecs/qisciicodec.cpp +++ b/src/corelib/codecs/qisciicodec.cpp @@ -54,7 +54,7 @@ QT_BEGIN_NAMESPACE struct Codecs { - const char *name; + const char name[10]; ushort base; }; diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp index cfb62c7..fcf0be1 100644 --- a/src/corelib/codecs/qtextcodec.cpp +++ b/src/corelib/codecs/qtextcodec.cpp @@ -461,10 +461,10 @@ static const char * const tis_620locales[] = { // static const char * const tcvnlocales[] = { // "vi", "vi_VN", 0 }; -static bool try_locale_list(const char * const locale[], const char * lang) +static bool try_locale_list(const char * const locale[], const QByteArray &lang) { int i; - for(i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++) + for(i=0; locale[i] && lang != locale[i]; i++) ; return locale[i] != 0; } @@ -516,13 +516,12 @@ static QTextCodec * ru_RU_hack(const char * i) { #endif #if !defined(Q_OS_WIN32) && !defined(Q_OS_WINCE) -static QTextCodec *checkForCodec(const char *name) { +static QTextCodec *checkForCodec(const QByteArray &name) { QTextCodec *c = QTextCodec::codecForName(name); if (!c) { - const char *at = strchr(name, '@'); - if (at) { - QByteArray n(name, at - name); - c = QTextCodec::codecForName(n.data()); + const int index = name.indexOf('@'); + if (index != -1) { + c = QTextCodec::codecForName(name.left(index)); } } return c; @@ -563,21 +562,19 @@ static void setupLocaleMapper() // definitely knows it, but since we cannot fully trust it, get ready // to fall back to environment variables. #if !defined(QT_NO_SETLOCALE) - char * ctype = qstrdup(setlocale(LC_CTYPE, 0)); + const QByteArray ctype = setlocale(LC_CTYPE, 0); #else - char * ctype = qstrdup(""); + const QByteArray ctype; #endif // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG // environment variables. - char * lang = qstrdup(qgetenv("LC_ALL").constData()); - if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { - if (lang) delete [] lang; - lang = qstrdup(qgetenv("LC_CTYPE").constData()); + QByteArray lang = qgetenv("LC_ALL"); + if (lang.isEmpty() || lang == "C") { + lang = qgetenv("LC_CTYPE"); } - if (!lang || lang[0] == 0 || strcmp(lang, "C") == 0) { - if (lang) delete [] lang; - lang = qstrdup(qgetenv("LANG").constData()); + if (lang.isEmpty() || lang == "C") { + lang = qgetenv("LANG"); } // Now try these in order: @@ -590,35 +587,35 @@ static void setupLocaleMapper() // 7. guess locale from lang // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15) - char * codeset = ctype ? strchr(ctype, '.') : 0; - if (codeset && *codeset == '.') - localeMapper = checkForCodec(codeset + 1); + int indexOfDot = ctype.indexOf('.'); + if (indexOfDot != -1) + localeMapper = checkForCodec( ctype.mid(indexOfDot + 1) ); // 2. CODESET from lang if it contains a .CODESET part - codeset = lang ? strchr(lang, '.') : 0; - if (!localeMapper && codeset && *codeset == '.') - localeMapper = checkForCodec(codeset + 1); + if (!localeMapper) { + indexOfDot = lang.indexOf('.'); + if (indexOfDot != -1) + localeMapper = checkForCodec( lang.mid(indexOfDot + 1) ); + } // 3. ctype (maybe the locale is named "ISO-8859-1" or something) - if (!localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0) + if (!localeMapper && !ctype.isEmpty() && ctype != "C") localeMapper = checkForCodec(ctype); // 4. locale (ditto) - if (!localeMapper && lang && *lang != 0) + if (!localeMapper && !lang.isEmpty()) localeMapper = checkForCodec(lang); // 5. "@euro" - if ((!localeMapper && ctype && strstr(ctype, "@euro")) || (lang && strstr(lang, "@euro"))) + if ((!localeMapper && ctype.contains("@euro")) || lang.contains("@euro")) localeMapper = checkForCodec("ISO 8859-15"); // 6. guess locale from ctype unless ctype is "C" // 7. guess locale from lang - char * try_by_name = ctype; - if (ctype && *ctype != 0 && strcmp (ctype, "C") != 0) - try_by_name = lang; + const QByteArray &try_by_name = (!ctype.isEmpty() && ctype != "C") ? lang : ctype; // Now do the guessing. - if (lang && *lang && !localeMapper && try_by_name && *try_by_name) { + if (!lang.isEmpty() && !localeMapper && !try_by_name.isEmpty()) { if (try_locale_list(iso8859_15locales, lang)) localeMapper = QTextCodec::codecForName("ISO 8859-15"); else if (try_locale_list(iso8859_2locales, lang)) @@ -651,8 +648,6 @@ static void setupLocaleMapper() localeMapper = ru_RU_hack(lang); } - delete [] ctype; - delete [] lang; } // If everything failed, we default to 8859-1 @@ -1525,9 +1520,14 @@ QString QTextDecoder::toUnicode(const QByteArray &ba) /*! \since 4.4 - Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba, - and returns a QTextCodec instance that is capable of decoding the html to unicode. - If the codec cannot be detected from the content provided, \a defaultCodec is returned. + Tries to detect the encoding of the provided snippet of HTML in + the given byte array, \a ba, by checking the BOM (Byte Order Mark) + and the content-type meta header and returns a QTextCodec instance + that is capable of decoding the html to unicode. If the codec + cannot be detected from the content provided, \a defaultCodec is + returned. + + \sa codecForUtfText() */ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec) { @@ -1535,15 +1535,8 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCo int pos; QTextCodec *c = 0; - if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff) - || ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) { - c = QTextCodec::codecForMib(1015); // utf16 - } else if (ba.size() > 2 - && (uchar)ba[0] == 0xef - && (uchar)ba[1] == 0xbb - && (uchar)ba[2] == 0xbf) { - c = QTextCodec::codecForMib(106); // utf-8 - } else { + c = QTextCodec::codecForUtfText(ba, c); + if (!c) { QByteArray header = ba.left(512).toLower(); if ((pos = header.indexOf("http-equiv=")) != -1) { pos = header.indexOf("charset=", pos) + int(strlen("charset=")); @@ -1571,6 +1564,61 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba) return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4)); } +/*! + \since 4.6 + + Tries to detect the encoding of the provided snippet \a ba by + using the BOM (Byte Order Mark) and returns a QTextCodec instance + that is capable of decoding the text to unicode. If the codec + cannot be detected from the content provided, \a defaultCodec is + returned. + + \sa codecForHtml() +*/ +QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec) +{ + const uint arraySize = ba.size(); + + if (arraySize > 3) { + if ((uchar)ba[0] == 0x00 + && (uchar)ba[1] == 0x00 + && (uchar)ba[2] == 0xFE + && (uchar)ba[3] == 0xFF) + return QTextCodec::codecForMib(1018); // utf-32 be + else if ((uchar)ba[0] == 0xFF + && (uchar)ba[1] == 0xFE + && (uchar)ba[2] == 0x00 + && (uchar)ba[3] == 0x00) + return QTextCodec::codecForMib(1019); // utf-32 le + } + + if (arraySize < 2) + return defaultCodec; + if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff) + return QTextCodec::codecForMib(1013); // utf16 be + else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe) + return QTextCodec::codecForMib(1014); // utf16 le + + if (arraySize < 3) + return defaultCodec; + if ((uchar)ba[0] == 0xef + && (uchar)ba[1] == 0xbb + && (uchar)ba[2] == 0xbf) + return QTextCodec::codecForMib(106); // utf-8 + + return defaultCodec; +} + +/*! + \overload + + If the codec cannot be detected, this overload returns a Latin-1 QTextCodec. +*/ +QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba) +{ + return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4)); +} + /*! \internal \since 4.3 diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h index e32650f..83097a5 100644 --- a/src/corelib/codecs/qtextcodec.h +++ b/src/corelib/codecs/qtextcodec.h @@ -82,6 +82,9 @@ public: static QTextCodec *codecForHtml(const QByteArray &ba); static QTextCodec *codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec); + static QTextCodec *codecForUtfText(const QByteArray &ba); + static QTextCodec *codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec); + QTextDecoder* makeDecoder() const; QTextEncoder* makeEncoder() const; diff --git a/src/corelib/codecs/qtsciicodec_p.h b/src/corelib/codecs/qtsciicodec_p.h index 8f11e48..425e7fd 100644 --- a/src/corelib/codecs/qtsciicodec_p.h +++ b/src/corelib/codecs/qtsciicodec_p.h @@ -88,7 +88,7 @@ QT_BEGIN_NAMESPACE #ifndef QT_NO_CODECS -class Q_CORE_EXPORT QTsciiCodec : public QTextCodec { +class QTsciiCodec : public QTextCodec { public: ~QTsciiCodec(); diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp index d9defe1..27c0572 100644 --- a/src/corelib/codecs/qutfcodec.cpp +++ b/src/corelib/codecs/qutfcodec.cpp @@ -184,7 +184,10 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C uc = (uc << 6) | (ch & 0x3f); need--; if (!need) { - if (uc > 0xffff && uc < 0x110000) { + // utf-8 bom composes into 0xfeff code point + if (!headerdone && uc == 0xfeff) { + // dont do anything, just skip the BOM + } else if (uc > 0xffff && uc < 0x110000) { // surrogate pair uc -= 0x10000; unsigned short high = uc/0x400 + 0xd800; @@ -206,6 +209,7 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C } else { *qch++ = uc; } + headerdone = true; } } else { // error @@ -213,15 +217,18 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C *qch++ = replacement; ++invalid; need = 0; + headerdone = true; } } else { if (ch < 128) { *qch++ = QLatin1Char(ch); + headerdone = true; } else if ((ch & 0xe0) == 0xc0) { uc = ch & 0x1f; need = 1; error = i; min_uc = 0x80; + headerdone = true; } else if ((ch & 0xf0) == 0xe0) { uc = ch & 0x0f; need = 2; @@ -232,10 +239,12 @@ void QUtf8Codec::convertToUnicode(QString *target, const char *chars, int len, C need = 3; error = i; min_uc = 0x10000; + headerdone = true; } else { // error *qch++ = replacement; ++invalid; + headerdone = true; } } } @@ -387,7 +396,7 @@ QString QUtf16Codec::convertToUnicode(const char *chars, int len, ConverterState result.truncate(qch - result.unicode()); if (state) { - if (endian != Detect) + if (headerdone) state->flags |= IgnoreHeader; state->state_data[Endian] = endian; if (half) { @@ -569,7 +578,7 @@ QString QUtf32Codec::convertToUnicode(const char *chars, int len, ConverterState result.truncate(qch - result.unicode()); if (state) { - if (endian != Detect) + if (headerdone) state->flags |= IgnoreHeader; state->state_data[Endian] = endian; state->remainingChars = num; |