diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/codecs/qtextcodec.cpp | 77 | ||||
-rw-r--r-- | src/corelib/codecs/qtextcodec.h | 3 | ||||
-rw-r--r-- | src/gui/kernel/qclipboard.cpp | 24 |
3 files changed, 85 insertions, 19 deletions
diff --git a/src/corelib/codecs/qtextcodec.cpp b/src/corelib/codecs/qtextcodec.cpp index 6e8ffa1..51ca43e 100644 --- a/src/corelib/codecs/qtextcodec.cpp +++ b/src/corelib/codecs/qtextcodec.cpp @@ -1508,9 +1508,14 @@ QString QTextDecoder::toUnicode(const QByteArray &ba) /*! \since 4.4 - Tries to detect the encoding of the provided snippet of HTML in the given byte array, \a ba, - and returns a QTextCodec instance that is capable of decoding the html to unicode. - If the codec cannot be detected from the content provided, \a defaultCodec is returned. + Tries to detect the encoding of the provided snippet of HTML in + the given byte array, \a ba, by checking the BOM (Byte Order Mark) + and the content-type meta header and returns a QTextCodec instance + that is capable of decoding the html to unicode. If the codec + cannot be detected from the content provided, \a defaultCodec is + returned. + + \sa codecForUtfText */ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec) { @@ -1518,15 +1523,8 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba, QTextCodec *defaultCo int pos; QTextCodec *c = 0; - if (ba.size() > 1 && (((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff) - || ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe))) { - c = QTextCodec::codecForMib(1015); // utf16 - } else if (ba.size() > 2 - && (uchar)ba[0] == 0xef - && (uchar)ba[1] == 0xbb - && (uchar)ba[2] == 0xbf) { - c = QTextCodec::codecForMib(106); // utf-8 - } else { + c = QTextCodec::codecForUtfText(ba, c); + if (!c) { QByteArray header = ba.left(512).toLower(); if ((pos = header.indexOf("http-equiv=")) != -1) { pos = header.indexOf("charset=", pos) + int(strlen("charset=")); @@ -1554,6 +1552,61 @@ QTextCodec *QTextCodec::codecForHtml(const QByteArray &ba) return codecForHtml(ba, QTextCodec::codecForMib(/*Latin 1*/ 4)); } +/*! + \since 4.6 + + Tries to detect the encoding of the provided snippet \a ba by + using the BOM (Byte Order Mark) and returns a QTextCodec instance + that is capable of decoding the text to unicode. If the codec + cannot be detected from the content provided, \a defaultCodec is + returned. + + \sa codecForHtml +*/ +QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec) +{ + const uint arraySize = ba.size(); + + if (arraySize > 3) { + if ((uchar)ba[0] == 0x00 + && (uchar)ba[1] == 0x00 + && (uchar)ba[2] == 0xFE + && (uchar)ba[3] == 0xFF) + return QTextCodec::codecForMib(1018); // utf-32 be + else if ((uchar)ba[0] == 0xFF + && (uchar)ba[1] == 0xFE + && (uchar)ba[2] == 0x00 + && (uchar)ba[3] == 0x00) + return QTextCodec::codecForMib(1019); // utf-32 le + } + + if (arraySize < 2) + return defaultCodec; + if ((uchar)ba[0] == 0xfe && (uchar)ba[1] == 0xff) + return QTextCodec::codecForMib(1013); // utf16 be + else if ((uchar)ba[0] == 0xff && (uchar)ba[1] == 0xfe) + return QTextCodec::codecForMib(1014); // utf16 le + + if (arraySize < 3) + return defaultCodec; + if ((uchar)ba[0] == 0xef + && (uchar)ba[1] == 0xbb + && (uchar)ba[2] == 0xbf) + return QTextCodec::codecForMib(106); // utf-8 + + return defaultCodec; +} + +/*! + \overload + + If the codec cannot be detected, this overload returns a Latin-1 QTextCodec. +*/ +QTextCodec *QTextCodec::codecForUtfText(const QByteArray &ba) +{ + return codecForUtfText(ba, QTextCodec::codecForMib(/*Latin 1*/ 4)); +} + /*! \internal \since 4.3 diff --git a/src/corelib/codecs/qtextcodec.h b/src/corelib/codecs/qtextcodec.h index e32650f..83097a5 100644 --- a/src/corelib/codecs/qtextcodec.h +++ b/src/corelib/codecs/qtextcodec.h @@ -82,6 +82,9 @@ public: static QTextCodec *codecForHtml(const QByteArray &ba); static QTextCodec *codecForHtml(const QByteArray &ba, QTextCodec *defaultCodec); + static QTextCodec *codecForUtfText(const QByteArray &ba); + static QTextCodec *codecForUtfText(const QByteArray &ba, QTextCodec *defaultCodec); + QTextDecoder* makeDecoder() const; QTextEncoder* makeEncoder() const; diff --git a/src/gui/kernel/qclipboard.cpp b/src/gui/kernel/qclipboard.cpp index 917b5d5..6daf433 100644 --- a/src/gui/kernel/qclipboard.cpp +++ b/src/gui/kernel/qclipboard.cpp @@ -50,6 +50,7 @@ #include "qvariant.h" #include "qbuffer.h" #include "qimage.h" +#include "qtextcodec.h" QT_BEGIN_NAMESPACE @@ -276,11 +277,12 @@ QClipboard::~QClipboard() */ QString QClipboard::text(QString &subtype, Mode mode) const { - const QMimeData *data = mimeData(mode); + const QMimeData *const data = mimeData(mode); if (!data) return QString(); + + const QStringList formats = data->formats(); if (subtype.isEmpty()) { - QStringList formats = data->formats(); if (formats.contains(QLatin1String("text/plain"))) subtype = QLatin1String("plain"); else { @@ -289,13 +291,21 @@ QString QClipboard::text(QString &subtype, Mode mode) const subtype = formats.at(i).mid(5); break; } + if (subtype.isEmpty()) + return QString(); } - } - if (subtype.isEmpty()) + } else if (!formats.contains(QLatin1String("text/") + subtype)) { return QString(); - if (subtype == QLatin1String("plain")) - return data->text(); - return QString::fromUtf8(data->data(QLatin1String("text/") + subtype)); + } + + const QByteArray rawData = data->data(QLatin1String("text/") + subtype); + + QTextCodec* codec = QTextCodec::codecForMib(106); // utf-8 is default + if (subtype == QLatin1String("html")) + codec = QTextCodec::codecForHtml(rawData, codec); + else + codec = QTextCodec::codecForUtfText(rawData, codec); + return codec->toUnicode(rawData); } /*! |