diff options
author | Geir Vattekar <geir.vattekar@nokia.com> | 2010-09-24 07:37:07 (GMT) |
---|---|---|
committer | Geir Vattekar <geir.vattekar@nokia.com> | 2010-09-24 07:37:07 (GMT) |
commit | b21b3042c99cc400b523615f8331695f3a88da4b (patch) | |
tree | f1a36b5e6ca835297cabe7b9b469e5c7c523570f /util/unicode/main.cpp | |
parent | 4286d8d20eae7b0876acc5afcecebc03cfc2514a (diff) | |
parent | 9634e133db1bf50a55ed44b3fe01e49954c80d08 (diff) | |
download | Qt-b21b3042c99cc400b523615f8331695f3a88da4b.zip Qt-b21b3042c99cc400b523615f8331695f3a88da4b.tar.gz Qt-b21b3042c99cc400b523615f8331695f3a88da4b.tar.bz2 |
Merge branch '4.7' of git@scm.dev.nokia.troll.no:qt/qt-doc-team into 4.7
Diffstat (limited to 'util/unicode/main.cpp')
-rw-r--r-- | util/unicode/main.cpp | 211 |
1 files changed, 121 insertions, 90 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index f2ebe7c..cfe5956 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -403,7 +403,7 @@ struct PropertyFlags { // from DerivedAge.txt QChar::UnicodeVersion age : 4; int digitValue; - uint line_break_class : 6; + LineBreakClass line_break_class; int mirrorDiff : 16; @@ -429,7 +429,7 @@ static int appendToSpecialCaseMap(const QList<int> &map) QList<int> utf16map; for (int i = 0; i < map.size(); ++i) { int val = map.at(i); - if (val > 0xffff) { + if (val >= 0x10000) { utf16map << QChar::highSurrogate(val); utf16map << QChar::lowSurrogate(val); } else { @@ -505,7 +505,7 @@ struct UnicodeData { // from BidiMirroring.txt int mirroredChar; - // CompositionExclusions.txt + // DerivedNormalizationProps.txt bool excludedComposition; // computed position of unicode property set @@ -726,8 +726,8 @@ static void readUnicodeData() data.p.category = categoryMap.value(properties[UD_Category], QChar::NoCategory); if (data.p.category == QChar::NoCategory) qFatal("unassigned char category: %s", properties[UD_Category].constData()); - data.p.combiningClass = properties[UD_CombiningClass].toInt(); + data.p.combiningClass = properties[UD_CombiningClass].toInt(); if (!combiningClassUsage.contains(data.p.combiningClass)) combiningClassUsage[data.p.combiningClass] = 1; else @@ -738,27 +738,29 @@ static void readUnicodeData() if (!properties[UD_UpperCase].isEmpty()) { int upperCase = properties[UD_UpperCase].toInt(&ok, 16); Q_ASSERT(ok); - if (qAbs(upperCase - codepoint) >= (1<<14)) + int diff = upperCase - codepoint; + if (qAbs(diff) >= (1<<14)) qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")"; - data.p.upperCaseDiff = upperCase - codepoint; - maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(data.p.upperCaseDiff)); - if (codepoint > 0xffff) { - // if the condition below doesn't hold anymore we need to modify our case folding code - //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); + data.p.upperCaseDiff = diff; + maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff)); + if (codepoint >= 0x10000 || upperCase >= 0x10000) { + // if the conditions below doesn't hold anymore we need to modify our upper casing code Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase)); + Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase)); } } if (!properties[UD_LowerCase].isEmpty()) { int lowerCase = properties[UD_LowerCase].toInt(&ok, 16); Q_ASSERT(ok); - if (qAbs(lowerCase - codepoint) >= (1<<14)) + int diff = lowerCase - codepoint; + if (qAbs(diff) >= (1<<14)) qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")"; - data.p.lowerCaseDiff = lowerCase - codepoint; - maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(data.p.lowerCaseDiff)); - if (codepoint > 0xffff) { - // if the condition below doesn't hold anymore we need to modify our case folding code - //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); + data.p.lowerCaseDiff = diff; + maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff)); + if (codepoint >= 0x10000 || lowerCase >= 0x10000) { + // if the conditions below doesn't hold anymore we need to modify our lower casing code Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase)); + Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase)); } } // we want toTitleCase to map to ToUpper in case we don't have any titlecase. @@ -767,14 +769,15 @@ static void readUnicodeData() if (!properties[UD_TitleCase].isEmpty()) { int titleCase = properties[UD_TitleCase].toInt(&ok, 16); Q_ASSERT(ok); - if (qAbs(titleCase - codepoint) >= (1<<14)) + int diff = titleCase - codepoint; + if (qAbs(diff) >= (1<<14)) qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")"; - data.p.titleCaseDiff = titleCase - codepoint; - maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(data.p.titleCaseDiff)); - if (codepoint > 0xffff) { - // if the condition below doesn't hold anymore we need to modify our case folding code - //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); + data.p.titleCaseDiff = diff; + maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff)); + if (codepoint >= 0x10000 || titleCase >= 0x10000) { + // if the conditions below doesn't hold anymore we need to modify our title casing code Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase)); + Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase)); } } @@ -1003,16 +1006,16 @@ static void readDerivedNormalizationProps() && d.decomposition.size() > 1) { Q_ASSERT(d.decomposition.size() == 2); - uint part1 = d.decomposition.at(0); - uint part2 = d.decomposition.at(1); + int part1 = d.decomposition.at(0); + int part2 = d.decomposition.at(1); // all non-starters are listed in DerivedNormalizationProps.txt // and already excluded from composition Q_ASSERT(unicodeData.value(part1, UnicodeData(part1)).p.combiningClass == 0); ++numLigatures; - highestLigature = qMax(highestLigature, (int)part1); - Ligature l = {(ushort)part1, (ushort)part2, codepoint}; + highestLigature = qMax(highestLigature, part1); + Ligature l = {(ushort)part1, (ushort)part2, (ushort)codepoint}; ligatureHashes[part2].append(l); } } @@ -1109,6 +1112,7 @@ static void computeUniqueProperties() static void readLineBreak() { + qDebug() << "Reading LineBreak.txt"; QFile f("data/LineBreak.txt"); if (!f.exists()) qFatal("Couldn't find LineBreak.txt"); @@ -1145,7 +1149,7 @@ static void readLineBreak() Q_ASSERT(ok); } - LineBreakClass lb = line_break_map.value(l[1].trimmed(), LineBreak_Unassigned); + LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned); if (lb == LineBreak_Unassigned) qFatal("unassigned line break class: %s", l[1].constData()); @@ -1190,7 +1194,10 @@ static void readSpecialCasing() bool ok; int codepoint = l[0].trimmed().toInt(&ok, 16); Q_ASSERT(ok); - Q_ASSERT(codepoint <= 0xffff); + + // if the condition below doesn't hold anymore we need to modify our + // lower/upper/title casing code and case folding code + Q_ASSERT(codepoint < 0x10000); // qDebug() << "codepoint" << hex << codepoint; // qDebug() << line; @@ -1290,16 +1297,18 @@ static void readCaseFolding() UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint)); if (foldMap.size() == 1) { - if (qAbs(foldMap.at(0) - codepoint) >= (1<<14)) - qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << foldMap.at(0) << ")"; - ud.p.caseFoldDiff = foldMap.at(0) - codepoint; - maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(ud.p.caseFoldDiff)); - if (codepoint > 0xffff) { - // if the condition below doesn't hold anymore we need to modify our case folding code - //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0)); - Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(foldMap.at(0))); + int caseFolded = foldMap.at(0); + int diff = caseFolded - codepoint; + if (qAbs(diff) >= (1<<14)) + qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << ")"; + ud.p.caseFoldDiff = diff; + maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff)); + if (codepoint >= 0x10000 || caseFolded >= 0x10000) { + // if the conditions below doesn't hold anymore we need to modify our case folding code + Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded)); + Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded)); } - if (foldMap.at(0) != codepoint + ud.p.lowerCaseDiff) + if (caseFolded != codepoint + ud.p.lowerCaseDiff) qDebug() << hex << codepoint; } else { qFatal("we currently don't support full case foldings"); @@ -1329,13 +1338,15 @@ static void readGraphemeBreak() int comment = line.indexOf('#'); if (comment >= 0) line = line.left(comment); + line.replace(" ", ""); if (line.isEmpty()) continue; QList<QByteArray> l = line.split(';'); + Q_ASSERT(l.size() == 2); - QByteArray codes = l[0].trimmed(); + QByteArray codes = l[0]; codes.replace("..", "."); QList<QByteArray> cl = codes.split('.'); @@ -1348,7 +1359,7 @@ static void readGraphemeBreak() Q_ASSERT(ok); } - GraphemeBreak brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreak_Unassigned); + GraphemeBreak brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned); if (brk == GraphemeBreak_Unassigned) qFatal("unassigned grapheme break class: %s", l[1].constData()); @@ -1378,13 +1389,15 @@ static void readWordBreak() int comment = line.indexOf('#'); if (comment >= 0) line = line.left(comment); + line.replace(" ", ""); if (line.isEmpty()) continue; QList<QByteArray> l = line.split(';'); + Q_ASSERT(l.size() == 2); - QByteArray codes = l[0].trimmed(); + QByteArray codes = l[0]; codes.replace("..", "."); QList<QByteArray> cl = codes.split('.'); @@ -1397,7 +1410,7 @@ static void readWordBreak() Q_ASSERT(ok); } - WordBreak brk = word_break_map.value(l[1].trimmed(), WordBreak_Unassigned); + WordBreak brk = word_break_map.value(l[1], WordBreak_Unassigned); if (brk == WordBreak_Unassigned) qFatal("unassigned word break class: %s", l[1].constData()); @@ -1427,13 +1440,15 @@ static void readSentenceBreak() int comment = line.indexOf('#'); if (comment >= 0) line = line.left(comment); + line.replace(" ", ""); if (line.isEmpty()) continue; QList<QByteArray> l = line.split(';'); + Q_ASSERT(l.size() == 2); - QByteArray codes = l[0].trimmed(); + QByteArray codes = l[0]; codes.replace("..", "."); QList<QByteArray> cl = codes.split('.'); @@ -1446,7 +1461,7 @@ static void readSentenceBreak() Q_ASSERT(ok); } - SentenceBreak brk = sentence_break_map.value(l[1].trimmed(), SentenceBreak_Unassigned); + SentenceBreak brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned); if (brk == SentenceBreak_Unassigned) qFatal("unassigned sentence break class: %s", l[1].constData()); @@ -1624,17 +1639,22 @@ static void readBlocks() QByteArray blockName = line.mid(semicolon + 1); int blockIndex = blockNames.indexOf(blockName); - if (blockIndex < 0) { + if (blockIndex == -1) { + blockIndex = blockNames.size(); blockNames.append(blockName); - blockIndex = blockNames.indexOf(blockName); - Q_ASSERT(blockIndex >= 0); } - int dotdot = codePoints.indexOf(".."); - Q_ASSERT(dotdot >= 0); - bool unused; - int first = codePoints.left(dotdot).toInt(&unused, 16); - int last = codePoints.mid(dotdot + 2).toInt(&unused, 16); + codePoints.replace("..", "."); + QList<QByteArray> cl = codePoints.split('.'); + + bool ok; + int first = cl[0].toInt(&ok, 16); + Q_ASSERT(ok); + int last = first; + if (cl.size() == 2) { + last = cl[1].toInt(&ok, 16); + Q_ASSERT(ok); + } BlockInfo blockInfo = { blockIndex, first, last }; blockInfoList.append(blockInfo); @@ -1670,7 +1690,6 @@ static void readScripts() if (!f.exists()) qFatal("Couldn't find %s", files[i]); - f.open(QFile::ReadOnly); while (!f.atEnd()) { @@ -1693,28 +1712,25 @@ static void readScripts() QByteArray scriptName = line.mid(semicolon + 1); int scriptIndex = scriptNames.indexOf(scriptName); - if (scriptIndex < 0) { + if (scriptIndex == -1) { + scriptIndex = scriptNames.size(); scriptNames.append(scriptName); - scriptIndex = scriptNames.indexOf(scriptName); - Q_ASSERT(scriptIndex >= 0); } - int dotdot = codePoints.indexOf(".."); - bool unused; - int first = -1, last = -1; - if (dotdot >= 0) { - first = codePoints.left(dotdot).toInt(&unused, 16); - last = codePoints.mid(dotdot + 2).toInt(&unused, 16); - } else { - first = codePoints.toInt(&unused, 16); - } + codePoints.replace("..", "."); + QList<QByteArray> cl = codePoints.split('.'); - if (last != -1) { - for (int i = first; i <= last; ++i) - scriptAssignment[i] = scriptIndex; - } else { - scriptAssignment[first] = scriptIndex; + bool ok; + int first = cl[0].toInt(&ok, 16); + Q_ASSERT(ok); + int last = first; + if (cl.size() == 2) { + last = cl[1].toInt(&ok, 16); + Q_ASSERT(ok); } + + for (int i = first; i <= last; ++i) + scriptAssignment[i] = scriptIndex; } } } @@ -1849,21 +1865,18 @@ QByteArray createScriptTableDeclaration() declaration += ", /* U+"; declaration += QByteArray::number(block, 16).rightJustified(4, '0'); declaration += '-'; - declaration += - QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); + declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); declaration += " */\n"; } else { const int value = extraBlockList.size() + scriptSentinel; - const int offset = - ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount; + const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount; declaration += " "; declaration += QByteArray::number(value); declaration += ", /* U+"; declaration += QByteArray::number(block, 16).rightJustified(4, '0'); declaration += '-'; - declaration += - QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); + declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); declaration += " at offset "; declaration += QByteArray::number(offset); declaration += " */\n"; @@ -1880,16 +1893,14 @@ QByteArray createScriptTableDeclaration() for (int i = 0; i < extraBlockList.size(); ++i) { const int value = i + scriptSentinel; - const int offset = - ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount; + const int offset = ((value - scriptSentinel) * unicodeBlockSize) + unicodeBlockCount; const ExtraBlock &extraBlock = extraBlockList.at(i); const int block = extraBlock.block; declaration += "\n\n /* U+"; declaration += QByteArray::number(block, 16).rightJustified(4, '0'); declaration += '-'; - declaration += - QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); + declaration += QByteArray::number(block + unicodeBlockSize - 1, 16).rightJustified(4, '0'); declaration += " at offset "; declaration += QByteArray::number(offset); declaration += " */\n "; @@ -1905,9 +1916,24 @@ QByteArray createScriptTableDeclaration() else declaration += ' '; } + if (declaration.endsWith(' ')) + declaration.chop(1); } declaration += "\n};\n\n} // namespace QUnicodeTables\n\n"; + declaration += + "Q_CORE_EXPORT int QT_FASTCALL QUnicodeTables::script(uint ucs4)\n" + "{\n" + " if (ucs4 > 0xffff)\n" + " return Common;\n" + " int script = uc_scripts[ucs4 >> 7];\n" + " if (script < ScriptSentinel)\n" + " return script;\n" + " script = (((script - ScriptSentinel) * UnicodeBlockSize) + UnicodeBlockCount);\n" + " script = uc_scripts[script + (ucs4 & 0x7f)];\n" + " return script;\n" + "}\n\n"; + qDebug("createScriptTableDeclaration: table size is %d bytes", unicodeBlockCount + (extraBlockList.size() * unicodeBlockSize)); @@ -2168,6 +2194,11 @@ static QByteArray createPropertyInfo() " return uc_properties + index;\n" "}\n\n"; + out += "Q_CORE_EXPORT QUnicodeTables::LineBreakClass QT_FASTCALL QUnicodeTables::lineBreakClass(uint ucs4)\n" + "{\n" + " return (QUnicodeTables::LineBreakClass)qGetProp(ucs4)->line_break_class;\n" + "}\n\n"; + out += "static const ushort specialCaseMap[] = {\n "; for (int i = 0; i < specialCaseMap.size(); ++i) { out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i), 16); @@ -2204,7 +2235,7 @@ static QByteArray createCompositionInfo() const int SMP_BLOCKSIZE = 256; const int SMP_SHIFT = 8; - if(SMP_END <= highestComposedCharacter) + if (SMP_END <= highestComposedCharacter) qFatal("end of table smaller than highest composed character at %x", highestComposedCharacter); QList<DecompositionBlock> blocks; @@ -2417,15 +2448,15 @@ static QByteArray createLigatureInfo() int uc = block*BMP_BLOCKSIZE + i; QList<Ligature> l = ligatureHashes.value(uc); if (!l.isEmpty()) { - b.decompositionPositions.append(tableIndex); qSort(l); ligatures.append(l.size()); - for (int i = 0; i < l.size(); ++i) { - Q_ASSERT(l.at(i).u2 == uc); - ligatures.append(l.at(i).u1); - ligatures.append(l.at(i).ligature); + for (int j = 0; j < l.size(); ++j) { + Q_ASSERT(l.at(j).u2 == uc); + ligatures.append(l.at(j).u1); + ligatures.append(l.at(j).ligature); } + b.decompositionPositions.append(tableIndex); tableIndex += 2*l.size() + 1; } else { b.decompositionPositions.append(0xffff); @@ -2450,12 +2481,11 @@ static QByteArray createLigatureInfo() qDebug(" %d unique blocks in BMP.", blocks.size()); qDebug(" block data uses: %d bytes", bmp_block_data); qDebug(" trie data uses : %d bytes", bmp_trie); - qDebug(" ligature data uses : %d bytes", ligatures.size()*2); - qDebug(" memory usage: %d bytes", bmp_mem + ligatures.size() * 2); + qDebug("\n ligature data uses : %d bytes", ligatures.size()*2); + qDebug(" memory usage: %d bytes", bmp_mem + ligatures.size() * 2); QByteArray out; - out += "static const unsigned short uc_ligature_trie[] = {\n"; // first write the map @@ -2531,6 +2561,7 @@ QByteArray createCasingInfo() return out; } + int main(int, char **) { initAgeMap(); @@ -2650,14 +2681,14 @@ int main(int, char **) f.write("\n"); f.write(scriptEnumDeclaration); f.write("\n"); - f.write(lineBreakClass); - f.write("\n"); f.write(grapheme_break_string); f.write("\n"); f.write(word_break_string); f.write("\n"); f.write(sentence_break_string); f.write("\n"); + f.write(lineBreakClass); + f.write("\n"); f.write(methods); f.write("} // namespace QUnicodeTables\n\n" "QT_END_NAMESPACE\n\n" |