From e3c5ca076ee15975dd2d8973b871ec0115c614fc Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 18 Mar 2009 21:42:04 +0100 Subject: Add qt_string_normalize to do in-place Unicode normalization. This way, we can improve QUrl parsing performance by avoiding unnecessary copies. --- src/corelib/tools/qchar.cpp | 28 +++++++++++--------------- src/corelib/tools/qstring.cpp | 46 ++++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp index 88053d6..1558f7d 100644 --- a/src/corelib/tools/qchar.cpp +++ b/src/corelib/tools/qchar.cpp @@ -1421,16 +1421,15 @@ QDataStream &operator>>(QDataStream &in, QChar &chr) // --------------------------------------------------------------------------- -static QString decomposeHelper - (const QString &str, bool canonical, QChar::UnicodeVersion version) +static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from) { unsigned short buffer[3]; - QString s = str; + QString &s = *str; const unsigned short *utf16 = s.utf16(); const unsigned short *uc = utf16 + s.length(); - while (uc != utf16) { + while (uc != utf16 + from) { uint ucs4 = *(--uc); if (QChar(ucs4).isLowSurrogate() && uc != utf16) { ushort high = *(uc - 1); @@ -1453,8 +1452,6 @@ static QString decomposeHelper utf16 = s.utf16(); uc = utf16 + pos + length; } - - return s; } @@ -1489,17 +1486,17 @@ static ushort ligatureHelper(ushort u1, ushort u2) return 0; } -static QString composeHelper(const QString &str) +static void composeHelper(QString *str, int from) { - QString s = str; + QString &s = *str; - if (s.length() < 2) - return s; + if (s.length() - from < 2) + return; // the loop can partly ignore high Unicode as all ligatures are in the BMP int starter = 0; int lastCombining = 0; - int pos = 0; + int pos = from; while (pos < s.length()) { uint uc = s.utf16()[pos]; if (QChar(uc).isHighSurrogate() && pos < s.length()-1) { @@ -1524,16 +1521,14 @@ static QString composeHelper(const QString &str) lastCombining = combining; ++pos; } - return s; } -static QString canonicalOrderHelper - (const QString &str, QChar::UnicodeVersion version) +static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from) { - QString s = str; + QString &s = *str; const int l = s.length()-1; - int pos = 0; + int pos = from; while (pos < l) { int p2 = pos+1; uint u1 = s.at(pos).unicode(); @@ -1593,7 +1588,6 @@ static QString canonicalOrderHelper ++pos; } } - return s; } int QT_FASTCALL QUnicodeTables::script(unsigned int uc) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index b97ba45..99fbaa9 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -6028,6 +6028,7 @@ QString QString::repeated(int times) const return result; } +void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, int from); /*! \overload \fn QString QString::normalized(NormalizationForm mode, QChar::UnicodeVersion version) const @@ -6037,42 +6038,48 @@ QString QString::repeated(int times) const */ QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const { + QString copy = *this; + qt_string_normalize(©, mode, version, 0); + return copy; +} + +void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, int from) +{ bool simple = true; - for (int i = 0; i < d->size; ++i) { - if (d->data[i] >= 0x80) { + const QChar *p = data->constData(); + int len = data->length(); + for (int i = from; i < len; ++i) { + if (p[i].unicode() >= 0x80) { simple = false; break; } } if (simple) - return *this; + return; - QString s = *this; + QString &s = *data; if (version != CURRENT_VERSION) { for (int i = 0; i < NumNormalizationCorrections; ++i) { const NormalizationCorrection &n = uc_normalization_corrections[i]; if (n.version > version) { + int pos = from; if (n.ucs4 > 0xffff) { ushort ucs4High = QChar::highSurrogate(n.ucs4); ushort ucs4Low = QChar::lowSurrogate(n.ucs4); ushort oldHigh = QChar::highSurrogate(n.old_mapping); ushort oldLow = QChar::lowSurrogate(n.old_mapping); - int pos = 0; - while (pos < s.d->size - 1) { - if (s.d->data[pos] == ucs4High && s.d->data[pos + 1] == ucs4Low) { - s.detach(); - s.d->data[pos] = oldHigh; - s.d->data[pos + 1] = oldLow; + while (pos < s.length() - 1) { + if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) { + s[pos] = oldHigh; + s[pos + 1] = oldLow; ++pos; } ++pos; } } else { - int pos = 0; - while (pos < s.d->size) { - if (s.d->data[pos] == n.ucs4) { - s.detach(); - s.d->data[pos] = n.old_mapping; + while (pos < s.length()) { + if (s.at(pos).unicode() == n.ucs4) { + s[pos] = n.old_mapping; } ++pos; } @@ -6080,15 +6087,14 @@ QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersi } } } - s = decomposeHelper(s, mode < QString::NormalizationForm_KD, version); + decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from); - s = canonicalOrderHelper(s, version); + canonicalOrderHelper(data, version, from); if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD) - return s; - - return composeHelper(s); + return; + composeHelper(data, from); } -- cgit v0.12