summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@nokia.com>2009-05-19 12:38:49 (GMT)
committerThiago Macieira <thiago.macieira@nokia.com>2009-05-22 12:17:57 (GMT)
commit1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240 (patch)
tree8572f5ba08537661a62bfb36cff8c8e9fe27381e
parent2387c77787ec279981d58c46cfb5bde3e1530790 (diff)
downloadQt-1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240.zip
Qt-1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240.tar.gz
Qt-1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240.tar.bz2
Reintroduce the unaligned-unaligned 32-bit code that I had removed out of ignorance.
If both pointers are out of 4-byte alignment, doing the first load will align them so we can do 32-bit comparisons. Lars's code had this before, but I misunderstood it and removed, thinking it was doing misaligned accesses. I experimented with moving the tail comparison above the 32-bit comparison to save a register, but it made things worse. Reviewed-By: Bradley T. Hughes
-rw-r--r--src/corelib/tools/qstring.cpp45
-rw-r--r--tests/benchmarks/qstring/main.cpp41
2 files changed, 59 insertions, 27 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 456fdfd..29509c5 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -206,12 +206,12 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length)
// block of data, with 4194304 iterations (per iteration):
// operation usec cpu ticks
// memcmp 330 710
- // 16-bit 135 285-290
- // 32-bit aligned 69.7 135-145
+ // 16-bit 79 167-171
+ // 32-bit aligned 49 105-109
//
// Testing also indicates that unaligned 32-bit loads are as
// performant as 32-bit aligned.
- if (a == b)
+ if (a == b || !length)
return true;
register union {
@@ -223,24 +223,37 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length)
sb.w = b;
// check alignment
- bool unaligned = (sa.value | sb.value) & 2;
-#if defined(__i386__) || defined(__x86_64__) || defined(_M_X64_)
- unaligned = false;
-#endif
- if (!unaligned) {
- // both addresses are 4-bytes aligned (or this is an x86)
+ if ((sa.value & 2) == (sb.value & 2)) {
+ // both addresses have the same alignment
+ if (sa.value & 2) {
+ // both addresses are not aligned to 4-bytes boundaries
+ // compare the first character
+ if (*sa.w != *sb.w)
+ return false;
+ --length;
+ ++sa.w;
+ ++sb.w;
+
+ // now both addresses are 4-bytes aligned
+ }
+
+ // both addresses are 4-bytes aligned
// do a fast 32-bit comparison
- for (register int halfLength = length / 2; halfLength; --halfLength, ++sa.d, ++sb.d) {
+ register const quint32 *e = sa.d + (length >> 1);
+ for ( ; sa.d != e; ++sa.d, ++sb.d) {
if (*sa.d != *sb.d)
return false;
}
- return length & 1 ? (*sa.w == *sb.w) : true;
- }
- // one or both of the addresses isn't 2-byte aligned
- for ( ; length; --length, ++sa.w, ++sb.w) {
- if (*sa.w != *sb.w)
- return false;
+ // do we have a tail?
+ return (length & 1) ? *sa.w == *sb.w : true;
+ } else {
+ // one of the addresses isn't 4-byte aligned but the other is
+ register const quint16 *e = sa.w + length;
+ for ( ; sa.w != e; ++sa.w, ++sb.w) {
+ if (*sa.w != *sb.w)
+ return false;
+ }
}
return true;
}
diff --git a/tests/benchmarks/qstring/main.cpp b/tests/benchmarks/qstring/main.cpp
index c7962bd..cbbf0a1 100644
--- a/tests/benchmarks/qstring/main.cpp
+++ b/tests/benchmarks/qstring/main.cpp
@@ -74,8 +74,8 @@ void tst_QString::equals_data() const
64, 64, 64, 64, 64, 64, 64, 64, // 48
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64, // 64
- 64, 64, 64, 64, 64, 64, 64, 64,
- 96, 96, 96, 96, 96, 96, 96, 96 // 80
+ 64, 64, 64, 64, 96, 96, 96, 96,
+ 64, 64, 96, 96, 96, 96, 96, 96 // 80
}, 0
};
const QChar *ptr = reinterpret_cast<const QChar *>(data.data);
@@ -88,15 +88,34 @@ void tst_QString::equals_data() const
QTest::newRow("same-string") << base << base;
QTest::newRow("same-data") << base << QString::fromRawData(ptr, 64);
- // don't use length > 64, since that crosses a cache line
- QTest::newRow("aligned-odd")
- << QString::fromRawData(ptr, 63) << QString::fromRawData(ptr + 2, 63);
- QTest::newRow("aligned-even")
- << QString::fromRawData(ptr, 64) << QString::fromRawData(ptr + 2, 64);
- QTest::newRow("unaligned-even")
- << QString::fromRawData(ptr, 63) << QString::fromRawData(ptr + 1, 63);
- QTest::newRow("unaligned-odd")
- << QString::fromRawData(ptr, 64) << QString::fromRawData(ptr + 1, 64);
+ // try to avoid crossing a cache line (that is, at ptr[64])
+ QTest::newRow("aligned-aligned-4n")
+ << QString::fromRawData(ptr, 60) << QString::fromRawData(ptr + 2, 60);
+ QTest::newRow("aligned-unaligned-4n")
+ << QString::fromRawData(ptr, 60) << QString::fromRawData(ptr + 1, 60);
+ QTest::newRow("unaligned-unaligned-4n")
+ << QString::fromRawData(ptr + 1, 60) << QString::fromRawData(ptr + 3, 60);
+
+ QTest::newRow("aligned-aligned-4n+1")
+ << QString::fromRawData(ptr, 61) << QString::fromRawData(ptr + 2, 61);
+ QTest::newRow("aligned-unaligned-4n+1")
+ << QString::fromRawData(ptr, 61) << QString::fromRawData(ptr + 1, 61);
+ QTest::newRow("unaligned-unaligned-4n+1")
+ << QString::fromRawData(ptr + 1, 61) << QString::fromRawData(ptr + 3, 61);
+
+ QTest::newRow("aligned-aligned-4n-1")
+ << QString::fromRawData(ptr, 59) << QString::fromRawData(ptr + 2, 59);
+ QTest::newRow("aligned-unaligned-4n-1")
+ << QString::fromRawData(ptr, 59) << QString::fromRawData(ptr + 1, 59);
+ QTest::newRow("unaligned-unaligned-4n-1")
+ << QString::fromRawData(ptr + 1, 59) << QString::fromRawData(ptr + 3, 59);
+
+ QTest::newRow("aligned-aligned-2n")
+ << QString::fromRawData(ptr, 58) << QString::fromRawData(ptr + 2, 58);
+ QTest::newRow("aligned-unaligned-2n")
+ << QString::fromRawData(ptr, 58) << QString::fromRawData(ptr + 1, 58);
+ QTest::newRow("unaligned-unaligned-2n")
+ << QString::fromRawData(ptr + 1, 58) << QString::fromRawData(ptr + 3, 58);
}
QTEST_MAIN(tst_QString)