summaryrefslogtreecommitdiffstats
path: root/util/unicode
diff options
context:
space:
mode:
authorRitt Konstantin <ritt.ks@gmail.com>2010-05-11 02:10:31 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2010-09-21 10:36:22 (GMT)
commit18da74c622108dc955682f95af7795d635c405c5 (patch)
treeb40f684f4bfb1a9b127b214f16110cb02c87615f /util/unicode
parentfea278afe6ed287f7dc74ee1b90249d00bae2162 (diff)
downloadQt-18da74c622108dc955682f95af7795d635c405c5.zip
Qt-18da74c622108dc955682f95af7795d635c405c5.tar.gz
Qt-18da74c622108dc955682f95af7795d635c405c5.tar.bz2
minor improvements for data parsers
improve readability of upperCase/lowerCase/titleCase/caseFolded code paths; additional tests make some parsers more consistent; minor speed-up
Diffstat (limited to 'util/unicode')
-rw-r--r--util/unicode/main.cpp138
1 files changed, 77 insertions, 61 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index 5ae9c1f..33fa8ae 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -738,27 +738,29 @@ static void readUnicodeData()
if (!properties[UD_UpperCase].isEmpty()) {
int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
Q_ASSERT(ok);
- if (qAbs(upperCase - codepoint) >= (1<<14))
+ int diff = upperCase - codepoint;
+ if (qAbs(diff) >= (1<<14))
qWarning() << "upperCaseDiff exceeded (" << hex << codepoint << "->" << upperCase << ")";
- data.p.upperCaseDiff = upperCase - codepoint;
- maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(data.p.upperCaseDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+ data.p.upperCaseDiff = diff;
+ maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || upperCase >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our upper casing code
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
}
}
if (!properties[UD_LowerCase].isEmpty()) {
int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
Q_ASSERT(ok);
- if (qAbs(lowerCase - codepoint) >= (1<<14))
+ int diff = lowerCase - codepoint;
+ if (qAbs(diff) >= (1<<14))
qWarning() << "lowerCaseDiff exceeded (" << hex << codepoint << "->" << lowerCase << ")";
- data.p.lowerCaseDiff = lowerCase - codepoint;
- maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(data.p.lowerCaseDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+ data.p.lowerCaseDiff = diff;
+ maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || lowerCase >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our lower casing code
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(lowerCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(lowerCase));
}
}
// we want toTitleCase to map to ToUpper in case we don't have any titlecase.
@@ -767,14 +769,15 @@ static void readUnicodeData()
if (!properties[UD_TitleCase].isEmpty()) {
int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
Q_ASSERT(ok);
- if (qAbs(titleCase - codepoint) >= (1<<14))
+ int diff = titleCase - codepoint;
+ if (qAbs(diff) >= (1<<14))
qWarning() << "titleCaseDiff exceeded (" << hex << codepoint << "->" << titleCase << ")";
- data.p.titleCaseDiff = titleCase - codepoint;
- maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(data.p.titleCaseDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
+ data.p.titleCaseDiff = diff;
+ maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || titleCase >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our title casing code
Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
}
}
@@ -1109,6 +1112,7 @@ static void computeUniqueProperties()
static void readLineBreak()
{
+ qDebug() << "Reading LineBreak.txt";
QFile f("data/LineBreak.txt");
if (!f.exists())
qFatal("Couldn't find LineBreak.txt");
@@ -1145,7 +1149,7 @@ static void readLineBreak()
Q_ASSERT(ok);
}
- LineBreakClass lb = line_break_map.value(l[1].trimmed(), LineBreak_Unassigned);
+ LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned);
if (lb == LineBreak_Unassigned)
qFatal("unassigned line break class: %s", l[1].constData());
@@ -1190,7 +1194,10 @@ static void readSpecialCasing()
bool ok;
int codepoint = l[0].trimmed().toInt(&ok, 16);
Q_ASSERT(ok);
- Q_ASSERT(codepoint <= 0xffff);
+
+ // if the condition below doesn't hold anymore we need to modify our
+ // lower/upper/title casing code and case folding code
+ Q_ASSERT(codepoint < 0x10000);
// qDebug() << "codepoint" << hex << codepoint;
// qDebug() << line;
@@ -1290,16 +1297,18 @@ static void readCaseFolding()
UnicodeData ud = unicodeData.value(codepoint, UnicodeData(codepoint));
if (foldMap.size() == 1) {
- if (qAbs(foldMap.at(0) - codepoint) >= (1<<14))
- qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << foldMap.at(0) << ")";
- ud.p.caseFoldDiff = foldMap.at(0) - codepoint;
- maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(ud.p.caseFoldDiff));
- if (codepoint > 0xffff) {
- // if the condition below doesn't hold anymore we need to modify our case folding code
- //qDebug() << codepoint << QChar::highSurrogate(codepoint) << QChar::highSurrogate(foldMap.at(0));
- Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(foldMap.at(0)));
+ int caseFolded = foldMap.at(0);
+ int diff = caseFolded - codepoint;
+ if (qAbs(diff) >= (1<<14))
+ qWarning() << "caseFoldDiff exceeded (" << hex << codepoint << "->" << caseFolded << ")";
+ ud.p.caseFoldDiff = diff;
+ maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
+ if (codepoint >= 0x10000 || caseFolded >= 0x10000) {
+ // if the conditions below doesn't hold anymore we need to modify our case folding code
+ Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
+ Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
}
- if (foldMap.at(0) != codepoint + ud.p.lowerCaseDiff)
+ if (caseFolded != codepoint + ud.p.lowerCaseDiff)
qDebug() << hex << codepoint;
} else {
qFatal("we currently don't support full case foldings");
@@ -1329,13 +1338,15 @@ static void readGraphemeBreak()
int comment = line.indexOf('#');
if (comment >= 0)
line = line.left(comment);
+ line.replace(" ", "");
if (line.isEmpty())
continue;
QList<QByteArray> l = line.split(';');
+ Q_ASSERT(l.size() == 2);
- QByteArray codes = l[0].trimmed();
+ QByteArray codes = l[0];
codes.replace("..", ".");
QList<QByteArray> cl = codes.split('.');
@@ -1348,7 +1359,7 @@ static void readGraphemeBreak()
Q_ASSERT(ok);
}
- GraphemeBreak brk = grapheme_break_map.value(l[1].trimmed(), GraphemeBreak_Unassigned);
+ GraphemeBreak brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned);
if (brk == GraphemeBreak_Unassigned)
qFatal("unassigned grapheme break class: %s", l[1].constData());
@@ -1378,13 +1389,15 @@ static void readWordBreak()
int comment = line.indexOf('#');
if (comment >= 0)
line = line.left(comment);
+ line.replace(" ", "");
if (line.isEmpty())
continue;
QList<QByteArray> l = line.split(';');
+ Q_ASSERT(l.size() == 2);
- QByteArray codes = l[0].trimmed();
+ QByteArray codes = l[0];
codes.replace("..", ".");
QList<QByteArray> cl = codes.split('.');
@@ -1397,7 +1410,7 @@ static void readWordBreak()
Q_ASSERT(ok);
}
- WordBreak brk = word_break_map.value(l[1].trimmed(), WordBreak_Unassigned);
+ WordBreak brk = word_break_map.value(l[1], WordBreak_Unassigned);
if (brk == WordBreak_Unassigned)
qFatal("unassigned word break class: %s", l[1].constData());
@@ -1427,13 +1440,15 @@ static void readSentenceBreak()
int comment = line.indexOf('#');
if (comment >= 0)
line = line.left(comment);
+ line.replace(" ", "");
if (line.isEmpty())
continue;
QList<QByteArray> l = line.split(';');
+ Q_ASSERT(l.size() == 2);
- QByteArray codes = l[0].trimmed();
+ QByteArray codes = l[0];
codes.replace("..", ".");
QList<QByteArray> cl = codes.split('.');
@@ -1446,7 +1461,7 @@ static void readSentenceBreak()
Q_ASSERT(ok);
}
- SentenceBreak brk = sentence_break_map.value(l[1].trimmed(), SentenceBreak_Unassigned);
+ SentenceBreak brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned);
if (brk == SentenceBreak_Unassigned)
qFatal("unassigned sentence break class: %s", l[1].constData());
@@ -1624,17 +1639,22 @@ static void readBlocks()
QByteArray blockName = line.mid(semicolon + 1);
int blockIndex = blockNames.indexOf(blockName);
- if (blockIndex < 0) {
+ if (blockIndex == -1) {
+ blockIndex = blockNames.size();
blockNames.append(blockName);
- blockIndex = blockNames.indexOf(blockName);
- Q_ASSERT(blockIndex >= 0);
}
- int dotdot = codePoints.indexOf("..");
- Q_ASSERT(dotdot >= 0);
- bool unused;
- int first = codePoints.left(dotdot).toInt(&unused, 16);
- int last = codePoints.mid(dotdot + 2).toInt(&unused, 16);
+ codePoints.replace("..", ".");
+ QList<QByteArray> cl = codePoints.split('.');
+
+ bool ok;
+ int first = cl[0].toInt(&ok, 16);
+ Q_ASSERT(ok);
+ int last = first;
+ if (cl.size() == 2) {
+ last = cl[1].toInt(&ok, 16);
+ Q_ASSERT(ok);
+ }
BlockInfo blockInfo = { blockIndex, first, last };
blockInfoList.append(blockInfo);
@@ -1670,7 +1690,6 @@ static void readScripts()
if (!f.exists())
qFatal("Couldn't find %s", files[i]);
-
f.open(QFile::ReadOnly);
while (!f.atEnd()) {
@@ -1693,28 +1712,25 @@ static void readScripts()
QByteArray scriptName = line.mid(semicolon + 1);
int scriptIndex = scriptNames.indexOf(scriptName);
- if (scriptIndex < 0) {
+ if (scriptIndex == -1) {
+ scriptIndex = scriptNames.size();
scriptNames.append(scriptName);
- scriptIndex = scriptNames.indexOf(scriptName);
- Q_ASSERT(scriptIndex >= 0);
}
- int dotdot = codePoints.indexOf("..");
- bool unused;
- int first = -1, last = -1;
- if (dotdot >= 0) {
- first = codePoints.left(dotdot).toInt(&unused, 16);
- last = codePoints.mid(dotdot + 2).toInt(&unused, 16);
- } else {
- first = codePoints.toInt(&unused, 16);
- }
+ codePoints.replace("..", ".");
+ QList<QByteArray> cl = codePoints.split('.');
- if (last != -1) {
- for (int i = first; i <= last; ++i)
- scriptAssignment[i] = scriptIndex;
- } else {
- scriptAssignment[first] = scriptIndex;
+ bool ok;
+ int first = cl[0].toInt(&ok, 16);
+ Q_ASSERT(ok);
+ int last = first;
+ if (cl.size() == 2) {
+ last = cl[1].toInt(&ok, 16);
+ Q_ASSERT(ok);
}
+
+ for (int i = first; i <= last; ++i)
+ scriptAssignment[i] = scriptIndex;
}
}
}