summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2009-03-18 20:42:04 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2009-07-21 13:48:45 (GMT)
commite3c5ca076ee15975dd2d8973b871ec0115c614fc (patch)
tree2fb76915937fc79f9f922897fa76f8132652ad5b
parentb6b12bc6b8296d7e199cab0ece35c9eb9ae7fe64 (diff)
downloadQt-e3c5ca076ee15975dd2d8973b871ec0115c614fc.zip
Qt-e3c5ca076ee15975dd2d8973b871ec0115c614fc.tar.gz
Qt-e3c5ca076ee15975dd2d8973b871ec0115c614fc.tar.bz2
Add qt_string_normalize to do in-place Unicode normalization.
This way, we can improve QUrl parsing performance by avoiding unnecessary copies.
-rw-r--r--src/corelib/tools/qchar.cpp28
-rw-r--r--src/corelib/tools/qstring.cpp46
2 files changed, 37 insertions, 37 deletions
diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp
index 88053d6..1558f7d 100644
--- a/src/corelib/tools/qchar.cpp
+++ b/src/corelib/tools/qchar.cpp
@@ -1421,16 +1421,15 @@ QDataStream &operator>>(QDataStream &in, QChar &chr)
// ---------------------------------------------------------------------------
-static QString decomposeHelper
- (const QString &str, bool canonical, QChar::UnicodeVersion version)
+static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
{
unsigned short buffer[3];
- QString s = str;
+ QString &s = *str;
const unsigned short *utf16 = s.utf16();
const unsigned short *uc = utf16 + s.length();
- while (uc != utf16) {
+ while (uc != utf16 + from) {
uint ucs4 = *(--uc);
if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
ushort high = *(uc - 1);
@@ -1453,8 +1452,6 @@ static QString decomposeHelper
utf16 = s.utf16();
uc = utf16 + pos + length;
}
-
- return s;
}
@@ -1489,17 +1486,17 @@ static ushort ligatureHelper(ushort u1, ushort u2)
return 0;
}
-static QString composeHelper(const QString &str)
+static void composeHelper(QString *str, int from)
{
- QString s = str;
+ QString &s = *str;
- if (s.length() < 2)
- return s;
+ if (s.length() - from < 2)
+ return;
// the loop can partly ignore high Unicode as all ligatures are in the BMP
int starter = 0;
int lastCombining = 0;
- int pos = 0;
+ int pos = from;
while (pos < s.length()) {
uint uc = s.utf16()[pos];
if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
@@ -1524,16 +1521,14 @@ static QString composeHelper(const QString &str)
lastCombining = combining;
++pos;
}
- return s;
}
-static QString canonicalOrderHelper
- (const QString &str, QChar::UnicodeVersion version)
+static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
{
- QString s = str;
+ QString &s = *str;
const int l = s.length()-1;
- int pos = 0;
+ int pos = from;
while (pos < l) {
int p2 = pos+1;
uint u1 = s.at(pos).unicode();
@@ -1593,7 +1588,6 @@ static QString canonicalOrderHelper
++pos;
}
}
- return s;
}
int QT_FASTCALL QUnicodeTables::script(unsigned int uc)
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index b97ba45..99fbaa9 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -6028,6 +6028,7 @@ QString QString::repeated(int times) const
return result;
}
+void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, int from);
/*!
\overload
\fn QString QString::normalized(NormalizationForm mode, QChar::UnicodeVersion version) const
@@ -6037,42 +6038,48 @@ QString QString::repeated(int times) const
*/
QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersion version) const
{
+ QString copy = *this;
+ qt_string_normalize(&copy, mode, version, 0);
+ return copy;
+}
+
+void qt_string_normalize(QString *data, QString::NormalizationForm mode, QChar::UnicodeVersion version, int from)
+{
bool simple = true;
- for (int i = 0; i < d->size; ++i) {
- if (d->data[i] >= 0x80) {
+ const QChar *p = data->constData();
+ int len = data->length();
+ for (int i = from; i < len; ++i) {
+ if (p[i].unicode() >= 0x80) {
simple = false;
break;
}
}
if (simple)
- return *this;
+ return;
- QString s = *this;
+ QString &s = *data;
if (version != CURRENT_VERSION) {
for (int i = 0; i < NumNormalizationCorrections; ++i) {
const NormalizationCorrection &n = uc_normalization_corrections[i];
if (n.version > version) {
+ int pos = from;
if (n.ucs4 > 0xffff) {
ushort ucs4High = QChar::highSurrogate(n.ucs4);
ushort ucs4Low = QChar::lowSurrogate(n.ucs4);
ushort oldHigh = QChar::highSurrogate(n.old_mapping);
ushort oldLow = QChar::lowSurrogate(n.old_mapping);
- int pos = 0;
- while (pos < s.d->size - 1) {
- if (s.d->data[pos] == ucs4High && s.d->data[pos + 1] == ucs4Low) {
- s.detach();
- s.d->data[pos] = oldHigh;
- s.d->data[pos + 1] = oldLow;
+ while (pos < s.length() - 1) {
+ if (s.at(pos).unicode() == ucs4High && s.at(pos + 1).unicode() == ucs4Low) {
+ s[pos] = oldHigh;
+ s[pos + 1] = oldLow;
++pos;
}
++pos;
}
} else {
- int pos = 0;
- while (pos < s.d->size) {
- if (s.d->data[pos] == n.ucs4) {
- s.detach();
- s.d->data[pos] = n.old_mapping;
+ while (pos < s.length()) {
+ if (s.at(pos).unicode() == n.ucs4) {
+ s[pos] = n.old_mapping;
}
++pos;
}
@@ -6080,15 +6087,14 @@ QString QString::normalized(QString::NormalizationForm mode, QChar::UnicodeVersi
}
}
}
- s = decomposeHelper(s, mode < QString::NormalizationForm_KD, version);
+ decomposeHelper(data, mode < QString::NormalizationForm_KD, version, from);
- s = canonicalOrderHelper(s, version);
+ canonicalOrderHelper(data, version, from);
if (mode == QString::NormalizationForm_D || mode == QString::NormalizationForm_KD)
- return s;
-
- return composeHelper(s);
+ return;
+ composeHelper(data, from);
}