From 68595bbc28ba0d1cd78557c61011dc004a4a507c Mon Sep 17 00:00:00 2001 From: Oswald Buddenhagen Date: Fri, 20 Nov 2009 16:35:41 +0100 Subject: fix encodings, take N Task-number: QTBUG-4499 Task-number: QTBUG-5276 --- .../lupdate/testdata/good/codecforsrc/main.cpp | 13 +++++++++++-- .../testdata/good/codecforsrc/project.ts.result | 22 ++++++++++++++++++++-- .../testdata/good/codecfortr/project.ts.result | 2 +- .../lupdate/testdata/good/codecfortr1/main.cpp | 6 ++++-- .../testdata/good/codecfortr1/project.ts.result | 16 +++++++++++++--- .../lupdate/testdata/good/codecfortr2/main.cpp | 5 +++-- .../testdata/good/codecfortr2/project.ts.result | 11 ++++++++--- tools/linguist/lupdate/cpp.cpp | 22 ++++++++++------------ tools/linguist/shared/translator.cpp | 11 ++++++++--- tools/linguist/shared/translator.h | 5 +++-- 10 files changed, 81 insertions(+), 32 deletions(-) diff --git a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp index 158451a..d99723e 100644 --- a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp +++ b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp @@ -47,12 +47,21 @@ int main(int argc, char **argv) { QApplication a(argc, argv); + QTextCodec::setCodecForTr(QTextCodec::codecForName("UTF-8")); + QWidget w; QLabel label1(QObject::tr("abc", "ascii"), &w); QLabel label2(QObject::tr("æøå", "utf-8"), &w); + QLabel label2a(QObject::tr("\303\246\303\270\303\245", "utf-8 oct"), &w); + QLabel label3(QObject::trUtf8("Für Élise", "trUtf8"), &w); + QLabel label3a(QObject::trUtf8("F\303\274r \303\211lise", "trUtf8 oct"), &w); -// I would expect the following to work !? -// QLabel label3(QObject::trUtf8("F\374r \310lise", "trUtf8"), &w); + QBoxLayout *ly = new QVBoxLayout(&w); + ly->addWidget(&label1); + ly->addWidget(&label2); + ly->addWidget(&label2a); + ly->addWidget(&label3); + ly->addWidget(&label3a); w.show(); return a.exec(); diff --git a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result index bc0d9bb..711bf02 100644 --- a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result +++ b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result @@ -5,16 +5,34 @@ QObject - + abc ascii - + æøå utf-8 + + + æøå + utf-8 oct + + + + + Für Élise + trUtf8 + + + + + Für Élise + trUtf8 oct + + diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result index 91da744..6ee369a 100644 --- a/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result +++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result @@ -6,7 +6,7 @@ QObject - Á + Б diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp index abb8b89..98b491c 100644 --- a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp +++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp @@ -49,9 +49,11 @@ public: void doFoo() { tr("random ascii only"); - tr("this contains an umlaut ü"); + tr("this contains an umlaut ü literally"); + tr("this contains an umlaut \xfc ü escaped"); trUtf8("random ascii only in utf8"); - trUtf8("umlaut \xfc ü in utf8"); + trUtf8("umlaut ü ü in literal utf8"); + trUtf8("umlaut \303\274 ü in escaped utf8"); } }; diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result index 26eb245..d548e24 100644 --- a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result +++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result @@ -11,17 +11,27 @@ - this contains an umlaut ü &uuml; + this contains an umlaut ü &uuml; literally + this contains an umlaut ü &uuml; escaped + + + + random ascii only in utf8 - - umlaut ü &uuml; in utf8 + + umlaut ü &uuml; in literal utf8 + + + + + umlaut ü &uuml; in escaped utf8 diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp index abb8b89..cd93539 100644 --- a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp +++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp @@ -49,9 +49,10 @@ public: void doFoo() { tr("random ascii only"); - tr("this contains an umlaut ü"); + tr("this contains an umlaut ü literally"); + tr("this contains an umlaut \303\274 ü escaped, really in utf-8"); trUtf8("random ascii only in utf8"); - trUtf8("umlaut \xfc ü in utf8"); + trUtf8("umlaut \303\274 ü in escaped utf8"); } }; diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result index e27c157..6728a25 100644 --- a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result +++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result @@ -11,17 +11,22 @@ - this contains an umlaut ü &uuml; + this contains an umlaut ü &uuml; literally - random ascii only in utf8 + this contains an umlaut ü &uuml; escaped, really in utf-8 - umlaut ü &uuml; in utf8 + random ascii only in utf8 + + + + + umlaut ü &uuml; in escaped utf8 diff --git a/tools/linguist/lupdate/cpp.cpp b/tools/linguist/lupdate/cpp.cpp index 7c9b27a..857233e 100644 --- a/tools/linguist/lupdate/cpp.cpp +++ b/tools/linguist/lupdate/cpp.cpp @@ -306,7 +306,6 @@ private: // the string to read from and current position in the string QTextCodec *yySourceCodec; - bool yySourceIsUnicode; QString yyInStr; const ushort *yyInPtr; @@ -353,7 +352,6 @@ void CppParser::setInput(const QString &in) yyInStr = in; yyFileName = QString(); yySourceCodec = 0; - yySourceIsUnicode = true; yyForceUtf8 = true; } @@ -362,7 +360,6 @@ void CppParser::setInput(QTextStream &ts, const QString &fileName) yyInStr = ts.readAll(); yyFileName = fileName; yySourceCodec = ts.codec(); - yySourceIsUnicode = yySourceCodec->name().startsWith("UTF-"); yyForceUtf8 = false; } @@ -1430,24 +1427,24 @@ QString CppParser::transcode(const QString &str, bool utf8) { static const char tab[] = "abfnrtv"; static const char backTab[] = "\a\b\f\n\r\t\v"; - const QString in = (!utf8 || yySourceIsUnicode) - ? str : QString::fromUtf8(yySourceCodec->fromUnicode(str).data()); - QString out; + // This function has to convert back to bytes, as C's \0* sequences work at that level. + const QByteArray in = yyForceUtf8 ? str.toUtf8() : tor->codec()->fromUnicode(str); + QByteArray out; out.reserve(in.length()); for (int i = 0; i < in.length();) { - ushort c = in[i++].unicode(); + uchar c = in[i++]; if (c == '\\') { if (i >= in.length()) break; - c = in[i++].unicode(); + c = in[i++]; if (c == '\n') continue; if (c == 'x') { QByteArray hex; - while (i < in.length() && isxdigit((c = in[i].unicode()))) { + while (i < in.length() && isxdigit((c = in[i]))) { hex += c; i++; } @@ -1456,7 +1453,7 @@ QString CppParser::transcode(const QString &str, bool utf8) QByteArray oct; int n = 0; oct += c; - while (n < 2 && i < in.length() && (c = in[i].unicode()) >= '0' && c < '8') { + while (n < 2 && i < in.length() && (c = in[i]) >= '0' && c < '8') { i++; n++; oct += c; @@ -1464,13 +1461,14 @@ QString CppParser::transcode(const QString &str, bool utf8) out += oct.toUInt(0, 8); } else { const char *p = strchr(tab, c); - out += QChar(QLatin1Char(!p ? c : backTab[p - tab])); + out += !p ? c : backTab[p - tab]; } } else { out += c; } } - return out; + return (utf8 || yyForceUtf8) ? QString::fromUtf8(out.constData(), out.length()) + : tor->codec()->toUnicode(out); } void CppParser::recordMessage( diff --git a/tools/linguist/shared/translator.cpp b/tools/linguist/shared/translator.cpp index 05fc6e5..8a071d3 100644 --- a/tools/linguist/shared/translator.cpp +++ b/tools/linguist/shared/translator.cpp @@ -67,7 +67,7 @@ QString QObject::tr(const char *sourceText, const char *, int n) #endif Translator::Translator() : - m_codecName("ISO-8859-1"), + m_codec(QTextCodec::codecForName("ISO-8859-1")), m_locationsType(AbsoluteLocations) { } @@ -713,12 +713,17 @@ void Translator::setCodecName(const QByteArray &name) if (!codec) { if (!name.isEmpty()) qWarning("No QTextCodec for %s available. Using Latin1\n", name.constData()); - m_codecName = "ISO-8859-1"; + m_codec = QTextCodec::codecForName("ISO-8859-1"); } else { - m_codecName = codec->name(); + m_codec = codec; } } +QByteArray Translator::codecName() const +{ + return m_codec->name(); +} + void Translator::dump() const { for (int i = 0; i != messageCount(); ++i) diff --git a/tools/linguist/shared/translator.h b/tools/linguist/shared/translator.h index e36f822..353cf9d 100644 --- a/tools/linguist/shared/translator.h +++ b/tools/linguist/shared/translator.h @@ -151,7 +151,8 @@ public: void reportDuplicates(const Duplicates &dupes, const QString &fileName, bool verbose); void setCodecName(const QByteArray &name); - QByteArray codecName() const { return m_codecName; } + QByteArray codecName() const; + QTextCodec *codec() const { return m_codec; } QString languageCode() const { return m_language; } QString sourceLanguageCode() const { return m_sourceLanguage; } @@ -211,7 +212,7 @@ private: typedef QList TMM; // int stores the sequence position. TMM m_messages; - QByteArray m_codecName; + QTextCodec *m_codec; LocationsType m_locationsType; // A string beginning with a 2 or 3 letter language code (ISO 639-1 -- cgit v0.12