/**************************************************************************** ** ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). ** All rights reserved. ** Contact: Nokia Corporation (qt-info@nokia.com) ** ** This file is part of the Qt Linguist of the Qt Toolkit. ** ** $QT_BEGIN_LICENSE:LGPL$ ** No Commercial Usage ** This file contains pre-release code and may not be distributed. ** You may use this file in accordance with the terms and conditions ** contained in the Technology Preview License Agreement accompanying ** this package. ** ** GNU Lesser General Public License Usage ** Alternatively, this file may be used under the terms of the GNU Lesser ** General Public License version 2.1 as published by the Free Software ** Foundation and appearing in the file LICENSE.LGPL included in the ** packaging of this file. Please review the following information to ** ensure the GNU Lesser General Public License version 2.1 requirements ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. ** ** In addition, as a special exception, Nokia gives you certain additional ** rights. These rights are described in the Nokia Qt LGPL Exception ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. ** ** If you have questions regarding the use of this file, please contact ** Nokia at qt-info@nokia.com. ** ** ** ** ** ** ** ** ** $QT_END_LICENSE$ ** ****************************************************************************/ #include "translator.h" #include #include #include #include #include #include #include #include QT_BEGIN_NAMESPACE // magic number for the file static const int MagicLength = 16; static const uchar magic[MagicLength] = { 0x3c, 0xb8, 0x64, 0x18, 0xca, 0xef, 0x9c, 0x95, 0xcd, 0x21, 0x1c, 0xbf, 0x60, 0xa1, 0xbd, 0xdd }; namespace { enum Tag { Tag_End = 1, Tag_SourceText16 = 2, Tag_Translation = 3, Tag_Context16 = 4, Tag_Obsolete1 = 5, Tag_SourceText = 6, Tag_Context = 7, Tag_Comment = 8, Tag_Obsolete2 = 9 }; enum Prefix { NoPrefix, Hash, HashContext, HashContextSourceText, HashContextSourceTextComment }; } // namespace anon static uint elfHash(const QByteArray &ba) { const uchar *k = (const uchar *)ba.data(); uint h = 0; uint g; if (k) { while (*k) { h = (h << 4) + *k++; if ((g = (h & 0xf0000000)) != 0) h ^= g >> 24; h &= ~g; } } if (!h) h = 1; return h; } class ByteTranslatorMessage { public: ByteTranslatorMessage( const QByteArray &context, const QByteArray &sourceText, const QByteArray &comment, const QStringList &translations) : m_context(context), m_sourcetext(sourceText), m_comment(comment), m_translations(translations) {} const QByteArray &context() const { return m_context; } const QByteArray &sourceText() const { return m_sourcetext; } const QByteArray &comment() const { return m_comment; } const QStringList &translations() const { return m_translations; } bool operator<(const ByteTranslatorMessage& m) const; private: QByteArray m_context; QByteArray m_sourcetext; QByteArray m_comment; QStringList m_translations; }; Q_DECLARE_TYPEINFO(ByteTranslatorMessage, Q_MOVABLE_TYPE); bool ByteTranslatorMessage::operator<(const ByteTranslatorMessage& m) const { if (m_context != m.m_context) return m_context < m.m_context; if (m_sourcetext != m.m_sourcetext) return m_sourcetext < m.m_sourcetext; return m_comment < m.m_comment; } class Releaser { public: struct Offset { Offset() : h(0), o(0) {} Offset(uint hash, uint offset) : h(hash), o(offset) {} bool operator<(const Offset &other) const { return (h != other.h) ? h < other.h : o < other.o; } bool operator==(const Offset &other) const { return h == other.h && o == other.o; } uint h; uint o; }; enum { Contexts = 0x2f, Hashes = 0x42, Messages = 0x69, NumerusRules = 0x88 }; Releaser() : m_codec(0) {} void setCodecName(const QByteArray &codecName) { m_codec = QTextCodec::codecForName(codecName); } bool save(QIODevice *iod); void insert(const TranslatorMessage &msg, bool forceComment); void insertIdBased(const TranslatorMessage &message); void squeeze(TranslatorSaveMode mode); void setNumerusRules(const QByteArray &rules); private: Q_DISABLE_COPY(Releaser) // This should reproduce the byte array fetched from the source file, which // on turn should be the same as passed to the actual tr(...) calls QByteArray originalBytes(const QString &str, bool isUtf8) const; void insertInternal(const TranslatorMessage &message, bool forceComment, bool isUtf8); static Prefix commonPrefix(const ByteTranslatorMessage &m1, const ByteTranslatorMessage &m2); static uint msgHash(const ByteTranslatorMessage &msg); void writeMessage(const ByteTranslatorMessage & msg, QDataStream & stream, TranslatorSaveMode strip, Prefix prefix) const; // for squeezed but non-file data, this is what needs to be deleted QByteArray m_messageArray; QByteArray m_offsetArray; QByteArray m_contextArray; QMap m_messages; QByteArray m_numerusRules; // Used to reproduce the original bytes QTextCodec *m_codec; }; QByteArray Releaser::originalBytes(const QString &str, bool isUtf8) const { if (str.isEmpty()) { // Do not use QByteArray() here as the result of the serialization // will be different. return QByteArray(""); } if (isUtf8) return str.toUtf8(); return m_codec ? m_codec->fromUnicode(str) : str.toLatin1(); } uint Releaser::msgHash(const ByteTranslatorMessage &msg) { return elfHash(msg.sourceText() + msg.comment()); } Prefix Releaser::commonPrefix(const ByteTranslatorMessage &m1, const ByteTranslatorMessage &m2) { if (msgHash(m1) != msgHash(m2)) return NoPrefix; if (m1.context() != m2.context()) return Hash; if (m1.sourceText() != m2.sourceText()) return HashContext; if (m1.comment() != m2.comment()) return HashContextSourceText; return HashContextSourceTextComment; } void Releaser::writeMessage(const ByteTranslatorMessage &msg, QDataStream &stream, TranslatorSaveMode mode, Prefix prefix) const { for (int i = 0; i < msg.translations().count(); ++i) stream << quint8(Tag_Translation) << msg.translations().at(i); if (mode == SaveEverything) prefix = HashContextSourceTextComment; // lrelease produces "wrong" QM files for QByteArrays that are .isNull(). switch (prefix) { default: case HashContextSourceTextComment: stream << quint8(Tag_Comment) << msg.comment(); // fall through case HashContextSourceText: stream << quint8(Tag_SourceText) << msg.sourceText(); // fall through case HashContext: stream << quint8(Tag_Context) << msg.context(); break; } stream << quint8(Tag_End); } bool Releaser::save(QIODevice *iod) { QDataStream s(iod); s.writeRawData((const char *)magic, MagicLength); if (!m_offsetArray.isEmpty()) { quint32 oas = quint32(m_offsetArray.size()); s << quint8(Hashes) << oas; s.writeRawData(m_offsetArray.constData(), oas); } if (!m_messageArray.isEmpty()) { quint32 mas = quint32(m_messageArray.size()); s << quint8(Messages) << mas; s.writeRawData(m_messageArray.constData(), mas); } if (!m_contextArray.isEmpty()) { quint32 cas = quint32(m_contextArray.size()); s << quint8(Contexts) << cas; s.writeRawData(m_contextArray.constData(), cas); } if (!m_numerusRules.isEmpty()) { quint32 nrs = m_numerusRules.size(); s << quint8(NumerusRules) << nrs; s.writeRawData(m_numerusRules.constData(), nrs); } return true; } void Releaser::squeeze(TranslatorSaveMode mode) { if (m_messages.isEmpty() && mode == SaveEverything) return; QMap messages = m_messages; // re-build contents m_messageArray.clear(); m_offsetArray.clear(); m_contextArray.clear(); m_messages.clear(); QMap offsets; QDataStream ms(&m_messageArray, QIODevice::WriteOnly); QMap::const_iterator it, next; int cpPrev = 0, cpNext = 0; for (it = messages.constBegin(); it != messages.constEnd(); ++it) { cpPrev = cpNext; next = it; ++next; if (next == messages.constEnd()) cpNext = 0; else cpNext = commonPrefix(it.key(), next.key()); offsets.insert(Offset(msgHash(it.key()), ms.device()->pos()), (void *)0); writeMessage(it.key(), ms, mode, Prefix(qMax(cpPrev, cpNext + 1))); } QMap::Iterator offset; offset = offsets.begin(); QDataStream ds(&m_offsetArray, QIODevice::WriteOnly); while (offset != offsets.end()) { Offset k = offset.key(); ++offset; ds << quint32(k.h) << quint32(k.o); } if (mode == SaveStripped) { QMap contextSet; for (it = messages.constBegin(); it != messages.constEnd(); ++it) ++contextSet[it.key().context()]; quint16 hTableSize; if (contextSet.size() < 200) hTableSize = (contextSet.size() < 60) ? 151 : 503; else if (contextSet.size() < 2500) hTableSize = (contextSet.size() < 750) ? 1511 : 5003; else hTableSize = (contextSet.size() < 10000) ? 15013 : 3 * contextSet.size() / 2; QMultiMap hashMap; QMap::const_iterator c; for (c = contextSet.constBegin(); c != contextSet.constEnd(); ++c) hashMap.insert(elfHash(c.key()) % hTableSize, c.key()); /* The contexts found in this translator are stored in a hash table to provide fast lookup. The context array has the following format: quint16 hTableSize; quint16 hTable[hTableSize]; quint8 contextPool[...]; The context pool stores the contexts as Pascal strings: quint8 len; quint8 data[len]; Let's consider the look-up of context "FunnyDialog". A hash value between 0 and hTableSize - 1 is computed, say h. If hTable[h] is 0, "FunnyDialog" is not covered by this translator. Else, we check in the contextPool at offset 2 * hTable[h] to see if "FunnyDialog" is one of the contexts stored there, until we find it or we meet the empty string. */ m_contextArray.resize(2 + (hTableSize << 1)); QDataStream t(&m_contextArray, QIODevice::WriteOnly); quint16 *hTable = new quint16[hTableSize]; memset(hTable, 0, hTableSize * sizeof(quint16)); t << hTableSize; t.device()->seek(2 + (hTableSize << 1)); t << quint16(0); // the entry at offset 0 cannot be used uint upto = 2; QMap::const_iterator entry = hashMap.constBegin(); while (entry != hashMap.constEnd()) { int i = entry.key(); hTable[i] = quint16(upto >> 1); do { const char *con = entry.value().constData(); uint len = uint(entry.value().length()); len = qMin(len, 255u); t << quint8(len); t.writeRawData(con, len); upto += 1 + len; ++entry; } while (entry != hashMap.constEnd() && entry.key() == i); if (upto & 0x1) { // offsets have to be even t << quint8(0); // empty string ++upto; } } t.device()->seek(2); for (int j = 0; j < hTableSize; j++) t << hTable[j]; delete [] hTable; if (upto > 131072) { qWarning("Releaser::squeeze: Too many contexts"); m_contextArray.clear(); } } } void Releaser::insertInternal(const TranslatorMessage &message, bool forceComment, bool isUtf8) { ByteTranslatorMessage bmsg(originalBytes(message.context(), isUtf8), originalBytes(message.sourceText(), isUtf8), originalBytes(message.comment(), isUtf8), message.translations()); if (!forceComment) { ByteTranslatorMessage bmsg2( bmsg.context(), bmsg.sourceText(), QByteArray(""), bmsg.translations()); if (!m_messages.contains(bmsg2)) { m_messages.insert(bmsg2, 0); return; } } m_messages.insert(bmsg, 0); } void Releaser::insert(const TranslatorMessage &message, bool forceComment) { insertInternal(message, forceComment, message.isUtf8()); if (message.isUtf8() && message.isNonUtf8()) insertInternal(message, forceComment, false); } void Releaser::insertIdBased(const TranslatorMessage &message) { QStringList tlns = message.translations(); for (int i = 0; i < tlns.size(); ++i) if (tlns.at(i).isEmpty()) tlns[i] = message.sourceText(); ByteTranslatorMessage bmsg("", originalBytes(message.id(), false), "", tlns); m_messages.insert(bmsg, 0); } void Releaser::setNumerusRules(const QByteArray &rules) { m_numerusRules = rules; } static quint8 read8(const uchar *data) { return *data; } static quint32 read32(const uchar *data) { return (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | (data[3]); } static void fromBytes(const char *str, int len, QTextCodec *codec, QTextCodec *utf8Codec, QString *out, QString *utf8Out, bool *isSystem, bool *isUtf8, bool *needs8Bit) { for (int i = 0; i < len; ++i) if (str[i] & 0x80) { if (utf8Codec) { QTextCodec::ConverterState cvtState; *utf8Out = utf8Codec->toUnicode(str, len, &cvtState); *isUtf8 = !cvtState.invalidChars; } QTextCodec::ConverterState cvtState; *out = codec->toUnicode(str, len, &cvtState); *isSystem = !cvtState.invalidChars; *needs8Bit = true; return; } *out = QString::fromLatin1(str, len); *isSystem = true; if (utf8Codec) { *utf8Out = *out; *isUtf8 = true; } *needs8Bit = false; } bool loadQM(Translator &translator, QIODevice &dev, ConversionData &cd) { QByteArray ba = dev.readAll(); const uchar *data = (uchar*)ba.data(); int len = ba.size(); if (len < MagicLength || memcmp(data, magic, MagicLength) != 0) { cd.appendError(QLatin1String("QM-Format error: magic marker missing")); return false; } enum { Contexts = 0x2f, Hashes = 0x42, Messages = 0x69, NumerusRules = 0x88 }; // for squeezed but non-file data, this is what needs to be deleted const uchar *messageArray = 0; const uchar *offsetArray = 0; const uchar *contextArray = 0; const uchar *numerusRulesArray = 0; uint messageLength = 0; uint offsetLength = 0; uint contextLength = 0; uint numerusRulesLength = 0; bool ok = true; const uchar *end = data + len; data += MagicLength; while (data < end - 4) { quint8 tag = read8(data++); quint32 blockLen = read32(data); //qDebug() << "TAG:" << tag << "BLOCKLEN:" << blockLen; data += 4; if (!tag || !blockLen) break; if (data + blockLen > end) { ok = false; break; } if (tag == Contexts) { contextArray = data; contextLength = blockLen; //qDebug() << "CONTEXTS: " << contextLength << QByteArray((const char *)contextArray, contextLength).toHex(); } else if (tag == Hashes) { offsetArray = data; offsetLength = blockLen; //qDebug() << "HASHES: " << offsetLength << QByteArray((const char *)offsetArray, offsetLength).toHex(); } else if (tag == Messages) { messageArray = data; messageLength = blockLen; //qDebug() << "MESSAGES: " << messageLength << QByteArray((const char *)messageArray, messageLength).toHex(); } else if (tag == NumerusRules) { numerusRulesArray = data; numerusRulesLength = blockLen; //qDebug() << "NUMERUSRULES: " << numerusRulesLength << QByteArray((const char *)numerusRulesArray, numerusRulesLength).toHex(); } data += blockLen; } size_t numItems = offsetLength / (2 * sizeof(quint32)); //qDebug() << "NUMITEMS: " << numItems; QTextCodec *codec = QTextCodec::codecForName( cd.m_codecForSource.isEmpty() ? QByteArray("Latin1") : cd.m_codecForSource); QTextCodec *utf8Codec = 0; if (codec->name() != "UTF-8") utf8Codec = QTextCodec::codecForName("UTF-8"); QString strProN = QLatin1String("%n"); QLocale::Language l; QLocale::Country c; Translator::languageAndCountry(translator.languageCode(), &l, &c); QStringList numerusForms; bool guessPlurals = true; if (getNumerusInfo(l, c, 0, &numerusForms)) guessPlurals = (numerusForms.count() == 1); QString context, contextUtf8; bool contextIsSystem, contextIsUtf8, contextNeeds8Bit; QString sourcetext, sourcetextUtf8; bool sourcetextIsSystem, sourcetextIsUtf8, sourcetextNeeds8Bit; QString comment, commentUtf8; bool commentIsSystem, commentIsUtf8, commentNeeds8Bit; QStringList translations; for (const uchar *start = offsetArray; start != offsetArray + (numItems << 3); start += 8) { //quint32 hash = read32(start); quint32 ro = read32(start + 4); //qDebug() << "\nHASH:" << hash; const uchar *m = messageArray + ro; for (;;) { uchar tag = read8(m++); //qDebug() << "Tag:" << tag << " ADDR: " << m; switch(tag) { case Tag_End: goto end; case Tag_Translation: { int len = read32(m); if (len % 1) { cd.appendError(QLatin1String("QM-Format error")); return false; } m += 4; QString str = QString::fromUtf16((const ushort *)m, len/2); if (QSysInfo::ByteOrder == QSysInfo::LittleEndian) { for (int i = 0; i < str.length(); ++i) str[i] = QChar((str.at(i).unicode() >> 8) + ((str.at(i).unicode() << 8) & 0xff00)); } translations << str; m += len; break; } case Tag_Obsolete1: m += 4; //qDebug() << "OBSOLETE"; break; case Tag_SourceText: { quint32 len = read32(m); m += 4; //qDebug() << "SOURCE LEN: " << len; //qDebug() << "SOURCE: " << QByteArray((const char*)m, len); fromBytes((const char*)m, len, codec, utf8Codec, &sourcetext, &sourcetextUtf8, &sourcetextIsSystem, &sourcetextIsUtf8, &sourcetextNeeds8Bit); m += len; break; } case Tag_Context: { quint32 len = read32(m); m += 4; //qDebug() << "CONTEXT LEN: " << len; //qDebug() << "CONTEXT: " << QByteArray((const char*)m, len); fromBytes((const char*)m, len, codec, utf8Codec, &context, &contextUtf8, &contextIsSystem, &contextIsUtf8, &contextNeeds8Bit); m += len; break; } case Tag_Comment: { quint32 len = read32(m); m += 4; //qDebug() << "COMMENT LEN: " << len; //qDebug() << "COMMENT: " << QByteArray((const char*)m, len); fromBytes((const char*)m, len, codec, utf8Codec, &comment, &commentUtf8, &commentIsSystem, &commentIsUtf8, &commentNeeds8Bit); m += len; break; } default: //qDebug() << "UNKNOWN TAG" << tag; break; } } end:; TranslatorMessage msg; msg.setType(TranslatorMessage::Finished); if (translations.count() > 1) { // If guessPlurals is not false here, plural form discard messages // will be spewn out later. msg.setPlural(true); } else if (guessPlurals) { // This might cause false positives, so it is a fallback only. if (sourcetext.contains(strProN)) msg.setPlural(true); } msg.setTranslations(translations); translations.clear(); if (contextNeeds8Bit || sourcetextNeeds8Bit || commentNeeds8Bit) { if (utf8Codec && contextIsUtf8 && sourcetextIsUtf8 && commentIsUtf8) { // The message is utf-8, but file is not. msg.setUtf8(true); msg.setContext(contextUtf8); msg.setSourceText(sourcetextUtf8); msg.setComment(commentUtf8); translator.append(msg); continue; } if (!(contextIsSystem && sourcetextIsSystem && commentIsSystem)) { cd.appendError(QLatin1String( "Cannot read file with specified input codec")); return false; } // The message is 8-bit in the file's encoding (utf-8 or not). } msg.setContext(context); msg.setSourceText(sourcetext); msg.setComment(comment); translator.append(msg); } return ok; } static bool saveQM(const Translator &translator, QIODevice &dev, ConversionData &cd) { Releaser releaser; QLocale::Language l; QLocale::Country c; Translator::languageAndCountry(translator.languageCode(), &l, &c); QByteArray rules; if (getNumerusInfo(l, c, &rules, 0)) releaser.setNumerusRules(rules); releaser.setCodecName(translator.codecName()); int finished = 0; int unfinished = 0; int untranslated = 0; int missingIds = 0; int droppedData = 0; for (int i = 0; i != translator.messageCount(); ++i) { const TranslatorMessage &msg = translator.message(i); TranslatorMessage::Type typ = msg.type(); if (typ != TranslatorMessage::Obsolete) { if (cd.m_idBased && msg.id().isEmpty()) { ++missingIds; continue; } if (typ == TranslatorMessage::Unfinished) { if (msg.translation().isEmpty()) { ++untranslated; continue; } else { if (cd.ignoreUnfinished()) continue; ++unfinished; } } else { ++finished; } if (cd.m_idBased) { if (!msg.context().isEmpty() || !msg.comment().isEmpty()) ++droppedData; releaser.insertIdBased(msg); } else { // Drop the comment in (context, sourceText, comment), // unless the context is empty, // unless (context, sourceText, "") already exists or // unless we already dropped the comment of (context, // sourceText, comment0). bool forceComment = msg.comment().isEmpty() || msg.context().isEmpty() || translator.contains(msg.context(), msg.sourceText(), QString()); releaser.insert(msg, forceComment); } } } if (missingIds) cd.appendError(QCoreApplication::translate("LRelease", "Dropped %n message(s) which had no ID.", 0, QCoreApplication::CodecForTr, missingIds)); if (droppedData) cd.appendError(QCoreApplication::translate("LRelease", "Excess context/disambiguation dropped from %n message(s).", 0, QCoreApplication::CodecForTr, droppedData)); releaser.squeeze(cd.m_saveMode); bool saved = releaser.save(&dev); if (saved && cd.isVerbose()) { int generatedCount = finished + unfinished; cd.appendError(QCoreApplication::translate("LRelease", " Generated %n translation(s) (%1 finished and %2 unfinished)\n", 0, QCoreApplication::CodecForTr, generatedCount).arg(finished).arg(unfinished)); if (untranslated) cd.appendError(QCoreApplication::translate("LRelease", " Ignored %n untranslated source text(s)\n", 0, QCoreApplication::CodecForTr, untranslated)); } return saved; } int initQM() { Translator::FileFormat format; format.extension = QLatin1String("qm"); format.description = QObject::tr("Compiled Qt translations"); format.fileType = Translator::FileFormat::TranslationBinary; format.priority = 0; format.loader = &loadQM; format.saver = &saveQM; Translator::registerFileFormat(format); return 1; } Q_CONSTRUCTOR_FUNCTION(initQM) QT_END_NAMESPACE