/**************************************************************************** ** ** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). ** All rights reserved. ** Contact: Nokia Corporation (qt-info@nokia.com) ** ** This file is part of the Qt Linguist of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** No Commercial Usage ** This file contains pre-release code and may not be distributed. ** You may use this file in accordance with the terms and conditions ** contained in the Technology Preview License Agreement accompanying ** this package. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 2.1 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 2.1 requirements ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** ** In addition, as a special exception, Nokia gives you certain additional ** rights. These rights are described in the Nokia Qt LGPL Exception ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ** ** If you have questions regarding the use of this file, please contact ** Nokia at qt-info@nokia.com. ** ** ** ** ** ** ** ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #include "translator.h" #include #include #include #include #include #include #define STRINGIFY_INTERNAL(x) #x #define STRINGIFY(x) STRINGIFY_INTERNAL(x) #define STRING(s) static QString str##s(QLatin1String(STRINGIFY(s))) QT_BEGIN_NAMESPACE /* * The encodings are a total mess. * A Translator has a codecForTr(). Each message's text will be passed to tr() * in that encoding or as UTF-8 to trUtf8() if it is flagged as such. * For ts 2.0, the file content is always uniformly in UTF-8. The file stores * the codecForTr default and marks deviating messages accordingly. * For ts 1.1, the file content is in mixed encoding. Each message is encoded * the way it will be passed to tr() (with 8-bit characters encoded as numeric * entities) or trUtf8(). The file stores the encoding and codecForTr in one * attribute, for both the default and each deviating message. */ QDebug &operator<<(QDebug &d, const QXmlStreamAttribute &attr) { return d << "[" << attr.name().toString() << "," << attr.value().toString() << "]"; } class TSReader : public QXmlStreamReader { public: TSReader(QIODevice &dev, ConversionData &cd) : QXmlStreamReader(&dev), m_cd(cd) {} // the "real thing" bool read(Translator &translator); private: bool elementStarts(const QString &str) const { return isStartElement() && name() == str; } bool isWhiteSpace() const { return isCharacters() && text().toString().trimmed().isEmpty(); } // needed to expand QString readContents(); // needed to join s QString readTransContents(); void handleError(); ConversionData &m_cd; }; void TSReader::handleError() { if (isComment()) return; if (hasError() && error() == CustomError) // raised by readContents return; const QString loc = QString::fromLatin1("at %3:%1:%2") .arg(lineNumber()).arg(columnNumber()).arg(m_cd.m_sourceFileName); switch (tokenType()) { case NoToken: // Cannot happen default: // likewise case Invalid: raiseError(QString::fromLatin1("Parse error %1: %2").arg(loc, errorString())); break; case StartElement: raiseError(QString::fromLatin1("Unexpected tag <%1> %2").arg(name().toString(), loc)); break; case Characters: { QString tok = text().toString(); if (tok.length() > 30) tok = tok.left(30) + QLatin1String("[...]"); raiseError(QString::fromLatin1("Unexpected characters '%1' %2").arg(tok, loc)); } break; case EntityReference: raiseError(QString::fromLatin1("Unexpected entity '&%1;' %2").arg(name().toString(), loc)); break; case ProcessingInstruction: raiseError(QString::fromLatin1("Unexpected processing instruction %1").arg(loc)); break; } } static QString byteValue(QString value) { int base = 10; if (value.startsWith(QLatin1String("x"))) { base = 16; value.remove(0, 1); } int n = value.toUInt(0, base); return (n != 0) ? QString(QChar(n)) : QString(); } QString TSReader::readContents() { STRING(byte); STRING(value); QString result; while (!atEnd()) { readNext(); if (isEndElement()) { break; } else if (isCharacters()) { result += text(); } else if (elementStarts(strbyte)) { // result += byteValue(attributes().value(strvalue).toString()); readNext(); if (!isEndElement()) { handleError(); break; } } else { handleError(); break; } } //qDebug() << "TEXT: " << result; return result; } QString TSReader::readTransContents() { STRING(lengthvariant); STRING(variants); STRING(yes); if (attributes().value(strvariants) == stryes) { QString result; while (!atEnd()) { readNext(); if (isEndElement()) { break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strlengthvariant)) { if (!result.isEmpty()) result += QChar(Translator::BinaryVariantSeparator); result += readContents(); } else { handleError(); break; } } return result; } else { return readContents(); } } bool TSReader::read(Translator &translator) { STRING(both); STRING(byte); STRING(comment); STRING(context); STRING(defaultcodec); STRING(encoding); STRING(extracomment); STRING(filename); STRING(id); STRING(language); STRING(line); STRING(location); STRING(message); STRING(name); STRING(numerus); STRING(numerusform); STRING(obsolete); STRING(oldcomment); STRING(oldsource); STRING(source); STRING(sourcelanguage); STRING(translation); STRING(translatorcomment); STRING(true); STRING(TS); STRING(type); STRING(unfinished); STRING(userdata); STRING(utf8); STRING(value); //STRING(version); STRING(yes); static const QString strextrans(QLatin1String("extra-")); static const QString strUtf8(QLatin1String("UTF-8")); while (!atEnd()) { readNext(); if (isStartDocument()) { // //qDebug() << attributes(); } else if (isEndDocument()) { // //qDebug() << attributes(); } else if (isDTD()) { // //qDebug() << tokenString(); } else if (elementStarts(strTS)) { // //qDebug() << "TS " << attributes(); QHash currentLine; QString currentFile; QXmlStreamAttributes atts = attributes(); //QString version = atts.value(strversion).toString(); translator.setLanguageCode(atts.value(strlanguage).toString()); translator.setSourceLanguageCode(atts.value(strsourcelanguage).toString()); while (!atEnd()) { readNext(); if (isEndElement()) { // found, finish local loop break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strdefaultcodec)) { // const QString &codec = readElementText(); if (!codec.isEmpty()) translator.setCodecName(codec.toLatin1()); // } else if (isStartElement() && name().toString().startsWith(strextrans)) { // QString tag = name().toString(); translator.setExtra(tag.mid(6), readContents()); // } else if (elementStarts(strcontext)) { // QString context; while (!atEnd()) { readNext(); if (isEndElement()) { // found, finish local loop break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strname)) { // context = readElementText(); // } else if (elementStarts(strmessage)) { // TranslatorMessage::References refs; QString currentMsgFile = currentFile; TranslatorMessage msg; msg.setId(attributes().value(strid).toString()); msg.setContext(context); msg.setType(TranslatorMessage::Finished); msg.setPlural(attributes().value(strnumerus) == stryes); const QStringRef &utf8Attr = attributes().value(strutf8); msg.setNonUtf8(utf8Attr == strboth); msg.setUtf8(msg.isNonUtf8() || utf8Attr == strtrue || attributes().value(strencoding) == strUtf8); while (!atEnd()) { readNext(); if (isEndElement()) { // found, finish local loop msg.setReferences(refs); translator.append(msg); break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strsource)) { // ... msg.setSourceText(readContents()); } else if (elementStarts(stroldsource)) { // ... msg.setOldSourceText(readContents()); } else if (elementStarts(stroldcomment)) { // ... msg.setOldComment(readContents()); } else if (elementStarts(strextracomment)) { // ... msg.setExtraComment(readContents()); } else if (elementStarts(strtranslatorcomment)) { // ... msg.setTranslatorComment(readContents()); } else if (elementStarts(strlocation)) { // QXmlStreamAttributes atts = attributes(); QString fileName = atts.value(strfilename).toString(); if (fileName.isEmpty()) { fileName = currentMsgFile; } else { if (refs.isEmpty()) currentFile = fileName; currentMsgFile = fileName; } const QString lin = atts.value(strline).toString(); if (lin.isEmpty()) { translator.setLocationsType(Translator::RelativeLocations); refs.append(TranslatorMessage::Reference(fileName, -1)); } else { bool bOK; int lineNo = lin.toInt(&bOK); if (bOK) { if (lin.startsWith(QLatin1Char('+')) || lin.startsWith(QLatin1Char('-'))) { lineNo = (currentLine[fileName] += lineNo); translator.setLocationsType(Translator::RelativeLocations); } else { translator.setLocationsType(Translator::AbsoluteLocations); } refs.append(TranslatorMessage::Reference(fileName, lineNo)); } } readContents(); } else if (elementStarts(strcomment)) { // ... msg.setComment(readContents()); } else if (elementStarts(struserdata)) { // ... msg.setUserData(readContents()); } else if (elementStarts(strtranslation)) { // QXmlStreamAttributes atts = attributes(); QStringRef type = atts.value(strtype); if (type == strunfinished) msg.setType(TranslatorMessage::Unfinished); else if (type == strobsolete) msg.setType(TranslatorMessage::Obsolete); if (msg.isPlural()) { QStringList translations; while (!atEnd()) { readNext(); if (isEndElement()) { break; } else if (isWhiteSpace()) { // ignore these, just whitespace } else if (elementStarts(strnumerusform)) { translations.append(readTransContents()); } else { handleError(); break; } } msg.setTranslations(translations); } else { msg.setTranslation(readTransContents()); } // } else if (isStartElement() && name().toString().startsWith(strextrans)) { // QString tag = name().toString(); msg.setExtra(tag.mid(6), readContents()); // } else { handleError(); } } // } else { handleError(); } } // } else { handleError(); } } // } else { handleError(); } } if (hasError()) { m_cd.appendError(errorString()); return false; } return true; } static QString numericEntity(int ch) { return QString(ch <= 0x20 ? QLatin1String("") : QLatin1String("&#x%1;")) .arg(ch, 0, 16); } static QString protect(const QString &str) { QString result; result.reserve(str.length() * 12 / 10); for (int i = 0; i != str.size(); ++i) { uint c = str.at(i).unicode(); switch (c) { case '\"': result += QLatin1String("""); break; case '&': result += QLatin1String("&"); break; case '>': result += QLatin1String(">"); break; case '<': result += QLatin1String("<"); break; case '\'': result += QLatin1String("'"); break; default: if (c < 0x20 && c != '\r' && c != '\n' && c != '\t') result += numericEntity(c); else // this also covers surrogates result += QChar(c); } } return result; } static QString evilBytes(const QString& str, bool isUtf8, int format, const QByteArray &codecName) { //qDebug() << "EVIL: " << str << isUtf8 << format << codecName; if (isUtf8) return protect(str); if (format == 20) return protect(str); if (codecName == "UTF-8") return protect(str); QTextCodec *codec = QTextCodec::codecForName(codecName); if (!codec) return protect(str); QString t = QString::fromLatin1(codec->fromUnicode(protect(str)).data()); int len = (int) t.length(); QString result; // FIXME: Factor is sensible only for latin scripts, probably. result.reserve(t.length() * 2); for (int k = 0; k < len; k++) { if (t[k].unicode() >= 0x7f) result += numericEntity(t[k].unicode()); else result += t[k]; } return result; } static void writeExtras(QTextStream &t, const char *indent, const TranslatorMessage::ExtraData &extras, const QRegExp &drops) { for (Translator::ExtraData::ConstIterator it = extras.begin(); it != extras.end(); ++it) { if (!drops.exactMatch(it.key())) { t << indent << "' << protect(it.value()) << "\n"; } } } static void writeVariants(QTextStream &t, const char *indent, const QString &input) { int offset; if ((offset = input.indexOf(QChar(Translator::BinaryVariantSeparator))) >= 0) { t << " variants=\"yes\">"; int start = 0; forever { t << "\n " << indent << "" << protect(input.mid(start, offset - start)) << ""; if (offset == input.length()) break; start = offset + 1; offset = input.indexOf(QChar(Translator::BinaryVariantSeparator), start); if (offset < 0) offset = input.length(); } t << "\n" << indent; } else { t << ">" << protect(input); } } bool saveTS(const Translator &translator, QIODevice &dev, ConversionData &cd, int format) { bool result = true; QTextStream t(&dev); t.setCodec(QTextCodec::codecForName("UTF-8")); bool trIsUtf8 = (translator.codecName() == "UTF-8"); //qDebug() << translator.codecName(); bool fileIsUtf8 = (format == 20 || trIsUtf8); // The xml prolog allows processors to easily detect the correct encoding t << "\n\n"; if (format == 11) t << "\n"; QByteArray codecName = translator.codecName(); if (codecName != "ISO-8859-1") t << "" << codecName << "\n"; QRegExp drops(cd.dropTags().join(QLatin1String("|"))); if (format == 20) writeExtras(t, " ", translator.extras(), drops); QHash > messageOrder; QList contextOrder; foreach (const TranslatorMessage &msg, translator.messages()) { // no need for such noise if (msg.type() == TranslatorMessage::Obsolete && msg.translation().isEmpty()) continue; QList &context = messageOrder[msg.context()]; if (context.isEmpty()) contextOrder.append(msg.context()); context.append(msg); } if (cd.sortContexts()) qSort(contextOrder); QHash currentLine; QString currentFile; foreach (const QString &context, contextOrder) { const TranslatorMessage &firstMsg = messageOrder[context].first(); t << "\n"; t << " " << evilBytes(context, firstMsg.isUtf8() || fileIsUtf8, format, codecName) << "\n"; foreach (const TranslatorMessage &msg, messageOrder[context]) { //msg.dump(); bool isUtf8 = msg.isUtf8(); bool second = false; forever { t << " \n"; if (translator.locationsType() != Translator::NoLocations) { QString cfile = currentFile; bool first = true; foreach (const TranslatorMessage::Reference &ref, msg.allReferences()) { QString fn = cd.m_targetDir.relativeFilePath(ref.fileName()) .replace(QLatin1Char('\\'),QLatin1Char('/')); int ln = ref.lineNumber(); QString ld; if (translator.locationsType() == Translator::RelativeLocations) { if (ln != -1) { int dlt = ln - currentLine[fn]; if (dlt >= 0) ld.append(QLatin1Char('+')); ld.append(QString::number(dlt)); currentLine[fn] = ln; } if (fn != cfile) { if (first) currentFile = fn; cfile = fn; } else { fn.clear(); } first = false; } else { if (ln != -1) ld = QString::number(ln); } t << " \n"; } } t << " " << evilBytes(msg.sourceText(), isUtf8, format, codecName) << "\n"; if (format != 11 && !msg.oldSourceText().isEmpty()) t << " " << protect(msg.oldSourceText()) << "\n"; if (!msg.comment().isEmpty()) { t << " " << evilBytes(msg.comment(), isUtf8, format, codecName) << "\n"; } if (format != 11) { if (!msg.oldComment().isEmpty()) t << " " << protect(msg.oldComment()) << "\n"; if (!msg.extraComment().isEmpty()) t << " " << protect(msg.extraComment()) << "\n"; if (!msg.translatorComment().isEmpty()) t << " " << protect(msg.translatorComment()) << "\n"; } t << " "; const QStringList &translns = msg.translations(); for (int j = 0; j < translns.count(); ++j) { t << "\n "; } t << "\n "; } else { writeVariants(t, " ", msg.translation()); } t << "\n"; if (format != 11) writeExtras(t, " ", msg.extras(), drops); if (!msg.userData().isEmpty()) t << " " << msg.userData() << "\n"; t << " \n"; if (format != 11 || second || !msg.isUtf8() || !msg.isNonUtf8()) break; isUtf8 = false; second = true; } } t << "\n"; } t << "\n"; return result; } bool loadTS(Translator &translator, QIODevice &dev, ConversionData &cd) { translator.setLocationsType(Translator::NoLocations); TSReader reader(dev, cd); return reader.read(translator); } bool saveTS11(const Translator &translator, QIODevice &dev, ConversionData &cd) { return saveTS(translator, dev, cd, 11); } bool saveTS20(const Translator &translator, QIODevice &dev, ConversionData &cd) { return saveTS(translator, dev, cd, 20); } int initTS() { Translator::FileFormat format; format.extension = QLatin1String("ts11"); format.fileType = Translator::FileFormat::TranslationSource; format.priority = -1; format.description = QObject::tr("Qt translation sources (format 1.1)"); format.loader = &loadTS; format.saver = &saveTS11; Translator::registerFileFormat(format); format.extension = QLatin1String("ts20"); format.fileType = Translator::FileFormat::TranslationSource; format.priority = -1; format.description = QObject::tr("Qt translation sources (format 2.0)"); format.loader = &loadTS; format.saver = &saveTS20; Translator::registerFileFormat(format); // "ts" is always the latest. right now it's ts20. format.extension = QLatin1String("ts"); format.fileType = Translator::FileFormat::TranslationSource; format.priority = 0; format.description = QObject::tr("Qt translation sources (latest format)"); format.loader = &loadTS; format.saver = &saveTS20; Translator::registerFileFormat(format); return 1; } Q_CONSTRUCTOR_FUNCTION(initTS) QT_END_NAMESPACE