From 155a7ceeed542911817dfaa6b2959717f6c92735 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Mon, 29 Nov 2010 17:38:45 +0100 Subject: define a constant for the expected Properties struct size Merge-request: 946 Reviewed-by: Thiago Macieira --- util/unicode/main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index cfe5956..6949e4c 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -370,6 +370,7 @@ static const char *methods = " inline int script(const QChar &ch)\n" " { return script(ch.unicode()); }\n\n"; +static const int SizeOfPropertiesStruct = 20; struct PropertyFlags { bool operator ==(const PropertyFlags &o) { @@ -2031,8 +2032,8 @@ static QByteArray createPropertyInfo() qDebug(" block data uses: %d bytes", smp_block_data); qDebug(" trie data uses : %d bytes", smp_trie); - qDebug("\n properties use : %d bytes", uniqueProperties.size()*20); - qDebug(" memory usage: %d bytes", bmp_mem+smp_mem + uniqueProperties.size()*20); + qDebug("\n properties uses : %d bytes", uniqueProperties.size() * SizeOfPropertiesStruct); + qDebug(" memory usage: %d bytes", bmp_mem + smp_mem + uniqueProperties.size() * SizeOfPropertiesStruct); QByteArray out; out += "static const unsigned short uc_property_trie[] = {\n"; -- cgit v0.12 From 52abf69e3ecb0c7d7a7be0cd390afd05bb5999d4 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Mon, 29 Nov 2010 17:38:46 +0100 Subject: fix a typo in the code range and add the curly braces to satisfy the coding style Merge-request: 946 Reviewed-by: Thiago Macieira --- util/unicode/main.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 6949e4c..8b505c4 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -461,18 +461,22 @@ struct UnicodeData { p.combiningClass = 0; p.direction = QChar::DirL; + // DerivedBidiClass.txt // DirR for: U+0590..U+05FF, U+07C0..U+08FF, U+FB1D..U+FB4F, U+10800..U+10FFF if ((codepoint >= 0x590 && codepoint <= 0x5ff) || (codepoint >= 0x7c0 && codepoint <= 0x8ff) || (codepoint >= 0xfb1d && codepoint <= 0xfb4f) - || (codepoint >= 0x10800 && codepoint <= 0x10fff)) + || (codepoint >= 0x10800 && codepoint <= 0x10fff)) { p.direction = QChar::DirR; - // DirAL for: U+0600..U+07BF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFE + } + // DirAL for: U+0600..U+07BF, U+FB50..U+FDFF, U+FE70..U+FEFF + // minus noncharacter code points (intersects with U+FDD0..U+FDEF) if ((codepoint >= 0x600 && codepoint <= 0x7bf) || (codepoint >= 0xfb50 && codepoint <= 0xfdcf) || (codepoint >= 0xfdf0 && codepoint <= 0xfdff) - || (codepoint >= 0xfe70 && codepoint <= 0xfefe)) + || (codepoint >= 0xfe70 && codepoint <= 0xfeff)) { p.direction = QChar::DirAL; + } mirroredChar = 0; decompositionType = QChar::NoDecomposition; -- cgit v0.12 From 1a204f01b5f2dd2cfea81d371f476f8eb1d895ee Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Mon, 29 Nov 2010 17:38:46 +0100 Subject: make the ArabicShaping parser a bit stricter warn and halt if unassigned or unhandled joining value was met. this doesn't affect on the generated tables but makes the upgrading to the newer UCD versions a bit easier but safer in general. Merge-request: 946 Reviewed-by: Thiago Macieira --- util/unicode/main.cpp | 69 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp index 8b505c4..ccb238b 100644 --- a/util/unicode/main.cpp +++ b/util/unicode/main.cpp @@ -85,6 +85,41 @@ static void initAgeMap() } +enum Joining { + Joining_None, + Joining_Left, + Joining_Causing, + Joining_Dual, + Joining_Right, + Joining_Transparent + + , Joining_Unassigned +}; + +static QHash joining_map; + +static void initJoiningMap() +{ + struct JoiningList { + Joining joining; + const char *name; + } joinings[] = { + { Joining_None, "U" }, + { Joining_Left, "L" }, + { Joining_Causing, "C" }, + { Joining_Dual, "D" }, + { Joining_Right, "R" }, + { Joining_Transparent, "T" }, + { Joining_Unassigned, 0 } + }; + JoiningList *d = joinings; + while (d->name) { + joining_map.insert(d->name, d->joining); + ++d; + } +} + + static const char *grapheme_break_string = " enum GraphemeBreak {\n" " GraphemeBreakOther,\n" @@ -881,24 +916,31 @@ static void readArabicShaping() if (line.isEmpty()) continue; - QList shaping = line.split(';'); - Q_ASSERT(shaping.size() == 4); + QList l = line.split(';'); + Q_ASSERT(l.size() == 4); bool ok; - int codepoint = shaping[0].toInt(&ok, 16); + int codepoint = l[0].toInt(&ok, 16); Q_ASSERT(ok); - QChar::Joining j = QChar::OtherJoining; - QByteArray shape = shaping[2].trimmed(); - if (shape == "R") - j = QChar::Right; - else if (shape == "D") - j = QChar::Dual; - else if (shape == "C") - j = QChar::Center; + Joining joining = joining_map.value(l[2].trimmed(), Joining_Unassigned); + if (joining == Joining_Unassigned) + qFatal("unassigned or unhandled joining value: %s", l[2].constData()); + + if (joining == Joining_Left) { + // There are currently no characters of joining type Left_Joining defined in Unicode. + qFatal("%x: joining type '%s' was met; the current implementation needs to be revised!", codepoint, l[2].constData()); + } UnicodeData d = unicodeData.value(codepoint, UnicodeData(codepoint)); - d.p.joining = j; + if (joining == Joining_Right) + d.p.joining = QChar::Right; + else if (joining == Joining_Dual) + d.p.joining = QChar::Dual; + else if (joining == Joining_Causing) + d.p.joining = QChar::Center; + else + d.p.joining = QChar::OtherJoining; unicodeData.insert(codepoint, d); } } @@ -2571,8 +2613,9 @@ int main(int, char **) { initAgeMap(); initCategoryMap(); - initDirectionMap(); initDecompositionMap(); + initDirectionMap(); + initJoiningMap(); initGraphemeBreak(); initWordBreak(); initSentenceBreak(); -- cgit v0.12 From ac8535b0277670e2a8f572045306e957ffa4dfc1 Mon Sep 17 00:00:00 2001 From: Ritt Konstantin Date: Mon, 29 Nov 2010 18:22:37 +0100 Subject: add QChar::currentUnicodeVersion() static member Merge-request: 2482 Reviewed-by: Thiago Macieira --- src/corelib/tools/qchar.cpp | 9 +++++++++ src/corelib/tools/qchar.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp index fba0bd2..a99b313 100644 --- a/src/corelib/tools/qchar.cpp +++ b/src/corelib/tools/qchar.cpp @@ -1069,6 +1069,15 @@ QChar::UnicodeVersion QChar::unicodeVersion(ushort ucs2) return (QChar::UnicodeVersion) qGetProp(ucs2)->unicodeVersion; } +/*! + \since 4.8 + + Returns the most recent supported Unicode version. +*/ +QChar::UnicodeVersion QChar::currentUnicodeVersion() +{ + return UNICODE_DATA_VERSION; +} /*! Returns the lowercase equivalent if the character is uppercase or titlecase; diff --git a/src/corelib/tools/qchar.h b/src/corelib/tools/qchar.h index b9e7e01..ecc6603 100644 --- a/src/corelib/tools/qchar.h +++ b/src/corelib/tools/qchar.h @@ -334,6 +334,8 @@ public: static UnicodeVersion QT_FASTCALL unicodeVersion(uint ucs4); static UnicodeVersion QT_FASTCALL unicodeVersion(ushort ucs2); + static UnicodeVersion QT_FASTCALL currentUnicodeVersion(); + static QString QT_FASTCALL decomposition(uint ucs4); #ifdef QT3_SUPPORT -- cgit v0.12 From 3f314cb73b9404ac899d503c1ec16aeafe33ea4b Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Mon, 29 Nov 2010 18:45:00 +0100 Subject: add reminders for Qt 5.0 * QChar::NoCategory is a `fake` category since there is no relevant equivalence for it in the Unicode specs; the default category for invalid/unassigned codepoints is QChar::Other_NotAssigned. QChar::NoCategory already caused some troubles in the past, so let's remember that lesson and do not repeat this mistake anymore; * QChar::Unicode_Unassigned == 0 and thus it is less than any other QChar::UnicodeVersion value that makes test for unassigned (in some Unicode version) codepoints a bit more complex (eg `if (v == QChar::Unicode_Unassigned || v > QChar::Unicode_3_1)` to check if some codepoint is unassigned for Unicode 3.1); * QChar::Punctuation_Dask was just a typo. Merge-request: 947 Reviewed-by: Thiago Macieira --- src/corelib/tools/qchar.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/corelib/tools/qchar.h b/src/corelib/tools/qchar.h index ecc6603..8991d07 100644 --- a/src/corelib/tools/qchar.h +++ b/src/corelib/tools/qchar.h @@ -105,7 +105,7 @@ public: enum Category { - NoCategory, + NoCategory, // ### Qt 5: replace with Other_NotAssigned Mark_NonSpacing, // Mn Mark_SpacingCombining, // Mc @@ -144,7 +144,7 @@ public: Symbol_Modifier, // Sk Symbol_Other, // So - Punctuation_Dask = Punctuation_Dash // oops + Punctuation_Dask = Punctuation_Dash // ### Qt 5: remove }; enum Direction @@ -210,7 +210,7 @@ public: }; enum UnicodeVersion { - Unicode_Unassigned, + Unicode_Unassigned, // ### Qt 5: assign with some constantly big value Unicode_1_1, Unicode_2_0, Unicode_2_1_2, -- cgit v0.12 From 3408dd34340ba7570feebcf09dbec9fb8db15736 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Mon, 29 Nov 2010 18:47:04 +0100 Subject: minor optimization: decrease amount of possible detaches to 1 Merge-request: 2503 Reviewed-by: Thiago Macieira --- src/corelib/tools/qstring.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index c30af64..36b01d2 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -6261,28 +6261,32 @@ void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar:: if (version == QChar::Unicode_Unassigned) { version = UNICODE_DATA_VERSION; } else if (version != UNICODE_DATA_VERSION) { - QString &s = *data; + const QString &s = *data; + QChar *d = 0; for (int i = 0; i < NumNormalizationCorrections; ++i) { const NormalizationCorrection &n = uc_normalization_corrections[i]; if (n.version > version) { int pos = from; - if (n.ucs4 > 0xffff) { + if (QChar::requiresSurrogates(n.ucs4)) { ushort ucs4High = QChar::highSurrogate(n.ucs4); ushort ucs4Low = QChar::lowSurrogate(n.ucs4); ushort oldHigh = QChar::highSurrogate(n.old_mapping); ushort oldLow = QChar::lowSurrogate(n.old_mapping); while (pos < s.length() - 1) { if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) { - s[pos] = oldHigh; - s[pos + 1] = oldLow; - ++pos; + if (!d) + d = data->data(); + d[pos] = QChar(oldHigh); + d[++pos] = QChar(oldLow); } ++pos; } } else { while (pos < s.length()) { if (s.at(pos).unicode() == n.ucs4) { - s[pos] = n.old_mapping; + if (!d) + d = data->data(); + d[pos] = QChar(n.old_mapping); } ++pos; } -- cgit v0.12