From 68595bbc28ba0d1cd78557c61011dc004a4a507c Mon Sep 17 00:00:00 2001
From: Oswald Buddenhagen <oswald.buddenhagen@nokia.com>
Date: Fri, 20 Nov 2009 16:35:41 +0100
Subject: fix encodings, take N

Task-number: QTBUG-4499
Task-number: QTBUG-5276
---
 .../lupdate/testdata/good/codecforsrc/main.cpp     | 13 +++++++++++--
 .../testdata/good/codecforsrc/project.ts.result    | 22 ++++++++++++++++++++--
 .../testdata/good/codecfortr/project.ts.result     |  2 +-
 .../lupdate/testdata/good/codecfortr1/main.cpp     |  6 ++++--
 .../testdata/good/codecfortr1/project.ts.result    | 16 +++++++++++++---
 .../lupdate/testdata/good/codecfortr2/main.cpp     |  5 +++--
 .../testdata/good/codecfortr2/project.ts.result    | 11 ++++++++---
 tools/linguist/lupdate/cpp.cpp                     | 22 ++++++++++------------
 tools/linguist/shared/translator.cpp               | 11 ++++++++---
 tools/linguist/shared/translator.h                 |  5 +++--
 10 files changed, 81 insertions(+), 32 deletions(-)
diff --git a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp
index 158451a..d99723e 100644
--- a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp
+++ b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/main.cpp
@@ -47,12 +47,21 @@
 int main(int argc, char **argv)
 {
         QApplication a(argc, argv);
+        QTextCodec::setCodecForTr(QTextCodec::codecForName("UTF-8"));
+
         QWidget w;
         QLabel label1(QObject::tr("abc", "ascii"), &w);
         QLabel label2(QObject::tr("Г¦ГёГҐ", "utf-8"), &w);
+        QLabel label2a(QObject::tr("\303\246\303\270\303\245", "utf-8 oct"), &w);
+        QLabel label3(QObject::trUtf8("FГјr Г‰lise", "trUtf8"), &w);
+        QLabel label3a(QObject::trUtf8("F\303\274r \303\211lise", "trUtf8 oct"), &w);
 
-//      I would expect the following to work !?
-//        QLabel label3(QObject::trUtf8("F\374r \310lise", "trUtf8"), &w);
+        QBoxLayout *ly = new QVBoxLayout(&w);
+        ly->addWidget(&label1);
+        ly->addWidget(&label2);
+        ly->addWidget(&label2a);
+        ly->addWidget(&label3);
+        ly->addWidget(&label3a);
 
         w.show();
         return a.exec();
diff --git a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result
index bc0d9bb..711bf02 100644
--- a/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result
+++ b/tests/auto/linguist/lupdate/testdata/good/codecforsrc/project.ts.result
@@ -5,16 +5,34 @@
 <context>
     <name>QObject</name>
     <message>
-        <location filename="main.cpp" line="51"/>
+        <location filename="main.cpp" line="53"/>
         <source>abc</source>
         <comment>ascii</comment>
         <translation type="unfinished"></translation>
     </message>
     <message>
-        <location filename="main.cpp" line="52"/>
+        <location filename="main.cpp" line="54"/>
         <source>Г¦ГёГҐ</source>
         <comment>utf-8</comment>
         <translation type="unfinished"></translation>
     </message>
+    <message>
+        <location filename="main.cpp" line="55"/>
+        <source>Г¦ГёГҐ</source>
+        <comment>utf-8 oct</comment>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <location filename="main.cpp" line="56"/>
+        <source>FГјr Г‰lise</source>
+        <comment>trUtf8</comment>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <location filename="main.cpp" line="57"/>
+        <source>FГјr Г‰lise</source>
+        <comment>trUtf8 oct</comment>
+        <translation type="unfinished"></translation>
+    </message>
 </context>
 </TS>
diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result
index 91da744..6ee369a 100644
--- a/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result
+++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr/project.ts.result
@@ -6,7 +6,7 @@
     <name>QObject</name>
     <message>
         <location filename="main.cpp" line="61"/>
-        <source>ГЃ</source>
+        <source>Р‘</source>
         <translation type="unfinished"></translation>
     </message>
 </context>
diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp
index abb8b89..98b491c 100644
--- a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp
+++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/main.cpp
@@ -49,9 +49,11 @@ public:
     void doFoo()
     {
         tr("random ascii only");
-        tr("this contains an umlaut ь &uuml;");
+        tr("this contains an umlaut ь &uuml; literally");
+        tr("this contains an umlaut \xfc &uuml; escaped");
         trUtf8("random ascii only in utf8");
-        trUtf8("umlaut \xfc &uuml; in utf8");
+        trUtf8("umlaut Гј &uuml; in literal utf8");
+        trUtf8("umlaut \303\274 &uuml; in escaped utf8");
     }
 };
 
diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result
index 26eb245..d548e24 100644
--- a/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result
+++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr1/project.ts.result
@@ -11,17 +11,27 @@
     </message>
     <message>
         <location filename="main.cpp" line="52"/>
-        <source>this contains an umlaut Гј &amp;uuml;</source>
+        <source>this contains an umlaut Гј &amp;uuml; literally</source>
         <translation type="unfinished"></translation>
     </message>
     <message>
         <location filename="main.cpp" line="53"/>
+        <source>this contains an umlaut Гј &amp;uuml; escaped</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <location filename="main.cpp" line="54"/>
         <source>random ascii only in utf8</source>
         <translation type="unfinished"></translation>
     </message>
     <message utf8="true">
-        <location filename="main.cpp" line="54"/>
-        <source>umlaut Гј &amp;uuml; in utf8</source>
+        <location filename="main.cpp" line="55"/>
+        <source>umlaut Гј &amp;uuml; in literal utf8</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message utf8="true">
+        <location filename="main.cpp" line="56"/>
+        <source>umlaut Гј &amp;uuml; in escaped utf8</source>
         <translation type="unfinished"></translation>
     </message>
 </context>
diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp
index abb8b89..cd93539 100644
--- a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp
+++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/main.cpp
@@ -49,9 +49,10 @@ public:
     void doFoo()
     {
         tr("random ascii only");
-        tr("this contains an umlaut ь &uuml;");
+        tr("this contains an umlaut ь &uuml; literally");
+        tr("this contains an umlaut \303\274 &uuml; escaped, really in utf-8");
         trUtf8("random ascii only in utf8");
-        trUtf8("umlaut \xfc &uuml; in utf8");
+        trUtf8("umlaut \303\274 &uuml; in escaped utf8");
     }
 };
 
diff --git a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result
index e27c157..6728a25 100644
--- a/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result
+++ b/tests/auto/linguist/lupdate/testdata/good/codecfortr2/project.ts.result
@@ -11,17 +11,22 @@
     </message>
     <message>
         <location filename="main.cpp" line="52"/>
-        <source>this contains an umlaut Гј &amp;uuml;</source>
+        <source>this contains an umlaut Гј &amp;uuml; literally</source>
         <translation type="unfinished"></translation>
     </message>
     <message>
         <location filename="main.cpp" line="53"/>
-        <source>random ascii only in utf8</source>
+        <source>this contains an umlaut Гј &amp;uuml; escaped, really in utf-8</source>
         <translation type="unfinished"></translation>
     </message>
     <message>
         <location filename="main.cpp" line="54"/>
-        <source>umlaut Гј &amp;uuml; in utf8</source>
+        <source>random ascii only in utf8</source>
+        <translation type="unfinished"></translation>
+    </message>
+    <message>
+        <location filename="main.cpp" line="55"/>
+        <source>umlaut Гј &amp;uuml; in escaped utf8</source>
         <translation type="unfinished"></translation>
     </message>
 </context>
diff --git a/tools/linguist/lupdate/cpp.cpp b/tools/linguist/lupdate/cpp.cpp
index 7c9b27a..857233e 100644
--- a/tools/linguist/lupdate/cpp.cpp
+++ b/tools/linguist/lupdate/cpp.cpp
@@ -306,7 +306,6 @@ private:
 
     // the string to read from and current position in the string
     QTextCodec *yySourceCodec;
-    bool yySourceIsUnicode;
     QString yyInStr;
     const ushort *yyInPtr;
 
@@ -353,7 +352,6 @@ void CppParser::setInput(const QString &in)
     yyInStr = in;
     yyFileName = QString();
     yySourceCodec = 0;
-    yySourceIsUnicode = true;
     yyForceUtf8 = true;
 }
 
@@ -362,7 +360,6 @@ void CppParser::setInput(QTextStream &ts, const QString &fileName)
     yyInStr = ts.readAll();
     yyFileName = fileName;
     yySourceCodec = ts.codec();
-    yySourceIsUnicode = yySourceCodec->name().startsWith("UTF-");
     yyForceUtf8 = false;
 }
 
@@ -1430,24 +1427,24 @@ QString CppParser::transcode(const QString &str, bool utf8)
 {
     static const char tab[] = "abfnrtv";
     static const char backTab[] = "\a\b\f\n\r\t\v";
-    const QString in = (!utf8 || yySourceIsUnicode)
-        ? str : QString::fromUtf8(yySourceCodec->fromUnicode(str).data());
-    QString out;
+    // This function has to convert back to bytes, as C's \0* sequences work at that level.
+    const QByteArray in = yyForceUtf8 ? str.toUtf8() : tor->codec()->fromUnicode(str);
+    QByteArray out;
 
     out.reserve(in.length());
     for (int i = 0; i < in.length();) {
-        ushort c = in[i++].unicode();
+        uchar c = in[i++];
         if (c == '\\') {
             if (i >= in.length())
                 break;
-            c = in[i++].unicode();
+            c = in[i++];
 
             if (c == '\n')
                 continue;
 
             if (c == 'x') {
                 QByteArray hex;
-                while (i < in.length() && isxdigit((c = in[i].unicode()))) {
+                while (i < in.length() && isxdigit((c = in[i]))) {
                     hex += c;
                     i++;
                 }
@@ -1456,7 +1453,7 @@ QString CppParser::transcode(const QString &str, bool utf8)
                 QByteArray oct;
                 int n = 0;
                 oct += c;
-                while (n < 2 && i < in.length() && (c = in[i].unicode()) >= '0' && c < '8') {
+                while (n < 2 && i < in.length() && (c = in[i]) >= '0' && c < '8') {
                     i++;
                     n++;
                     oct += c;
@@ -1464,13 +1461,14 @@ QString CppParser::transcode(const QString &str, bool utf8)
                 out += oct.toUInt(0, 8);
             } else {
                 const char *p = strchr(tab, c);
-                out += QChar(QLatin1Char(!p ? c : backTab[p - tab]));
+                out += !p ? c : backTab[p - tab];
             }
         } else {
             out += c;
         }
     }
-    return out;
+    return (utf8 || yyForceUtf8) ? QString::fromUtf8(out.constData(), out.length())
+                                 : tor->codec()->toUnicode(out);
 }
 
 void CppParser::recordMessage(
diff --git a/tools/linguist/shared/translator.cpp b/tools/linguist/shared/translator.cpp
index 05fc6e5..8a071d3 100644
--- a/tools/linguist/shared/translator.cpp
+++ b/tools/linguist/shared/translator.cpp
@@ -67,7 +67,7 @@ QString QObject::tr(const char *sourceText, const char *, int n)
 #endif
 
 Translator::Translator() :
-    m_codecName("ISO-8859-1"),
+    m_codec(QTextCodec::codecForName("ISO-8859-1")),
     m_locationsType(AbsoluteLocations)
 {
 }
@@ -713,12 +713,17 @@ void Translator::setCodecName(const QByteArray &name)
     if (!codec) {
         if (!name.isEmpty())
             qWarning("No QTextCodec for %s available. Using Latin1\n", name.constData());
-        m_codecName = "ISO-8859-1";
+        m_codec = QTextCodec::codecForName("ISO-8859-1");
     } else {
-        m_codecName = codec->name();
+        m_codec = codec;
     }
 }
 
+QByteArray Translator::codecName() const
+{
+    return m_codec->name();
+}
+
 void Translator::dump() const
 {
     for (int i = 0; i != messageCount(); ++i)
diff --git a/tools/linguist/shared/translator.h b/tools/linguist/shared/translator.h
index e36f822..353cf9d 100644
--- a/tools/linguist/shared/translator.h
+++ b/tools/linguist/shared/translator.h
@@ -151,7 +151,8 @@ public:
     void reportDuplicates(const Duplicates &dupes, const QString &fileName, bool verbose);
 
     void setCodecName(const QByteArray &name);
-    QByteArray codecName() const { return m_codecName; }
+    QByteArray codecName() const;
+    QTextCodec *codec() const { return m_codec; }
 
     QString languageCode() const { return m_language; }
     QString sourceLanguageCode() const { return m_sourceLanguage; }
@@ -211,7 +212,7 @@ private:
     typedef QList<TranslatorMessage> TMM;       // int stores the sequence position.
 
     TMM m_messages;
-    QByteArray m_codecName;
+    QTextCodec *m_codec;
     LocationsType m_locationsType;
 
     // A string beginning with a 2 or 3 letter language code (ISO 639-1
-- 
cgit v0.12