diff options
author | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2009-04-28 12:08:59 (GMT) |
---|---|---|
committer | Denis Dzyubenko <denis.dzyubenko@nokia.com> | 2009-05-04 13:34:03 (GMT) |
commit | 7031e1d110bb1bc97cfe0377adc211030e1e7320 (patch) | |
tree | b1260d28e0865eaab3c91f1330a1df0dca2ba858 /tests/auto/qtextcodec | |
parent | 1e0e67406c3865717fef8b98d2c69adbefc54245 (diff) | |
download | Qt-7031e1d110bb1bc97cfe0377adc211030e1e7320.zip Qt-7031e1d110bb1bc97cfe0377adc211030e1e7320.tar.gz Qt-7031e1d110bb1bc97cfe0377adc211030e1e7320.tar.bz2 |
When data was copied from Mozilla Firefox to Qt, the text format was not valid.
Mozilla encodes the text/html format in UTF16 and adds a BOM, however
it doesn't specify the charset in the html header. The fix is to guess
the encoding by either charset in the html header or BOM for text/html
format, or by BOM for non html formats.
This commit adds a new public function QTextCodec::codecForUtfText() which
can be used to guess encoding out of the BOM.
Task-number: 250555
Reviewed-by: Benjamin Poulain
Reviewed-by: Simon Hausmann
Reviewed-by: Andreas Aardal Hanssen
Diffstat (limited to 'tests/auto/qtextcodec')
-rw-r--r-- | tests/auto/qtextcodec/tst_qtextcodec.cpp | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/tests/auto/qtextcodec/tst_qtextcodec.cpp b/tests/auto/qtextcodec/tst_qtextcodec.cpp index cf4135b..22f9557 100644 --- a/tests/auto/qtextcodec/tst_qtextcodec.cpp +++ b/tests/auto/qtextcodec/tst_qtextcodec.cpp @@ -79,6 +79,9 @@ private slots: void codecForHtml(); + void codecForUtfText_data(); + void codecForUtfText(); + #ifdef Q_OS_UNIX void toLocal8Bit(); #endif @@ -1744,6 +1747,62 @@ void tst_QTextCodec::codecForHtml() QCOMPARE(QTextCodec::codecForHtml(html, QTextCodec::codecForMib(106))->mibEnum(), 111); // latin 15 } +void tst_QTextCodec::codecForUtfText_data() +{ + QTest::addColumn<QByteArray>("encoded"); + QTest::addColumn<bool>("detected"); + QTest::addColumn<int>("mib"); + + + QTest::newRow("utf8 bom") + << QByteArray("\xef\xbb\xbfhello") + << true + << 106; + QTest::newRow("utf8 nobom") + << QByteArray("hello") + << false + << 0; + + QTest::newRow("utf16 bom be") + << QByteArray("\xfe\xff\0h\0e\0l", 8) + << true + << 1013; + QTest::newRow("utf16 bom le") + << QByteArray("\xff\xfeh\0e\0l\0", 8) + << true + << 1014; + QTest::newRow("utf16 nobom") + << QByteArray("\0h\0e\0l", 6) + << false + << 0; + + QTest::newRow("utf32 bom be") + << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16) + << true + << 1018; + QTest::newRow("utf32 bom le") + << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16) + << true + << 1019; + QTest::newRow("utf32 nobom") + << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12) + << false + << 0; +} + +void tst_QTextCodec::codecForUtfText() +{ + QFETCH(QByteArray, encoded); + QFETCH(bool, detected); + QFETCH(int, mib); + + QTextCodec *codec = QTextCodec::codecForUtfText(encoded, 0); + if (detected) + QCOMPARE(codec->mibEnum(), mib); + else + QVERIFY(codec == 0); +} + #ifdef Q_OS_UNIX void tst_QTextCodec::toLocal8Bit() { |