diff options
author | Thiago Macieira <thiago.macieira@nokia.com> | 2009-05-19 12:38:49 (GMT) |
---|---|---|
committer | Thiago Macieira <thiago.macieira@nokia.com> | 2009-05-22 12:17:57 (GMT) |
commit | 1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240 (patch) | |
tree | 8572f5ba08537661a62bfb36cff8c8e9fe27381e | |
parent | 2387c77787ec279981d58c46cfb5bde3e1530790 (diff) | |
download | Qt-1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240.zip Qt-1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240.tar.gz Qt-1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240.tar.bz2 |
Reintroduce the unaligned-unaligned 32-bit code that I had removed out of ignorance.
If both pointers are out of 4-byte alignment, doing the first load
will align them so we can do 32-bit comparisons. Lars's code had this
before, but I misunderstood it and removed, thinking it was doing
misaligned accesses.
I experimented with moving the tail comparison above the 32-bit
comparison to save a register, but it made things worse.
Reviewed-By: Bradley T. Hughes
-rw-r--r-- | src/corelib/tools/qstring.cpp | 45 | ||||
-rw-r--r-- | tests/benchmarks/qstring/main.cpp | 41 |
2 files changed, 59 insertions, 27 deletions
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 456fdfd..29509c5 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -206,12 +206,12 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) // block of data, with 4194304 iterations (per iteration): // operation usec cpu ticks // memcmp 330 710 - // 16-bit 135 285-290 - // 32-bit aligned 69.7 135-145 + // 16-bit 79 167-171 + // 32-bit aligned 49 105-109 // // Testing also indicates that unaligned 32-bit loads are as // performant as 32-bit aligned. - if (a == b) + if (a == b || !length) return true; register union { @@ -223,24 +223,37 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) sb.w = b; // check alignment - bool unaligned = (sa.value | sb.value) & 2; -#if defined(__i386__) || defined(__x86_64__) || defined(_M_X64_) - unaligned = false; -#endif - if (!unaligned) { - // both addresses are 4-bytes aligned (or this is an x86) + if ((sa.value & 2) == (sb.value & 2)) { + // both addresses have the same alignment + if (sa.value & 2) { + // both addresses are not aligned to 4-bytes boundaries + // compare the first character + if (*sa.w != *sb.w) + return false; + --length; + ++sa.w; + ++sb.w; + + // now both addresses are 4-bytes aligned + } + + // both addresses are 4-bytes aligned // do a fast 32-bit comparison - for (register int halfLength = length / 2; halfLength; --halfLength, ++sa.d, ++sb.d) { + register const quint32 *e = sa.d + (length >> 1); + for ( ; sa.d != e; ++sa.d, ++sb.d) { if (*sa.d != *sb.d) return false; } - return length & 1 ? (*sa.w == *sb.w) : true; - } - // one or both of the addresses isn't 2-byte aligned - for ( ; length; --length, ++sa.w, ++sb.w) { - if (*sa.w != *sb.w) - return false; + // do we have a tail? + return (length & 1) ? *sa.w == *sb.w : true; + } else { + // one of the addresses isn't 4-byte aligned but the other is + register const quint16 *e = sa.w + length; + for ( ; sa.w != e; ++sa.w, ++sb.w) { + if (*sa.w != *sb.w) + return false; + } } return true; } diff --git a/tests/benchmarks/qstring/main.cpp b/tests/benchmarks/qstring/main.cpp index c7962bd..cbbf0a1 100644 --- a/tests/benchmarks/qstring/main.cpp +++ b/tests/benchmarks/qstring/main.cpp @@ -74,8 +74,8 @@ void tst_QString::equals_data() const 64, 64, 64, 64, 64, 64, 64, 64, // 48 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 64 - 64, 64, 64, 64, 64, 64, 64, 64, - 96, 96, 96, 96, 96, 96, 96, 96 // 80 + 64, 64, 64, 64, 96, 96, 96, 96, + 64, 64, 96, 96, 96, 96, 96, 96 // 80 }, 0 }; const QChar *ptr = reinterpret_cast<const QChar *>(data.data); @@ -88,15 +88,34 @@ void tst_QString::equals_data() const QTest::newRow("same-string") << base << base; QTest::newRow("same-data") << base << QString::fromRawData(ptr, 64); - // don't use length > 64, since that crosses a cache line - QTest::newRow("aligned-odd") - << QString::fromRawData(ptr, 63) << QString::fromRawData(ptr + 2, 63); - QTest::newRow("aligned-even") - << QString::fromRawData(ptr, 64) << QString::fromRawData(ptr + 2, 64); - QTest::newRow("unaligned-even") - << QString::fromRawData(ptr, 63) << QString::fromRawData(ptr + 1, 63); - QTest::newRow("unaligned-odd") - << QString::fromRawData(ptr, 64) << QString::fromRawData(ptr + 1, 64); + // try to avoid crossing a cache line (that is, at ptr[64]) + QTest::newRow("aligned-aligned-4n") + << QString::fromRawData(ptr, 60) << QString::fromRawData(ptr + 2, 60); + QTest::newRow("aligned-unaligned-4n") + << QString::fromRawData(ptr, 60) << QString::fromRawData(ptr + 1, 60); + QTest::newRow("unaligned-unaligned-4n") + << QString::fromRawData(ptr + 1, 60) << QString::fromRawData(ptr + 3, 60); + + QTest::newRow("aligned-aligned-4n+1") + << QString::fromRawData(ptr, 61) << QString::fromRawData(ptr + 2, 61); + QTest::newRow("aligned-unaligned-4n+1") + << QString::fromRawData(ptr, 61) << QString::fromRawData(ptr + 1, 61); + QTest::newRow("unaligned-unaligned-4n+1") + << QString::fromRawData(ptr + 1, 61) << QString::fromRawData(ptr + 3, 61); + + QTest::newRow("aligned-aligned-4n-1") + << QString::fromRawData(ptr, 59) << QString::fromRawData(ptr + 2, 59); + QTest::newRow("aligned-unaligned-4n-1") + << QString::fromRawData(ptr, 59) << QString::fromRawData(ptr + 1, 59); + QTest::newRow("unaligned-unaligned-4n-1") + << QString::fromRawData(ptr + 1, 59) << QString::fromRawData(ptr + 3, 59); + + QTest::newRow("aligned-aligned-2n") + << QString::fromRawData(ptr, 58) << QString::fromRawData(ptr + 2, 58); + QTest::newRow("aligned-unaligned-2n") + << QString::fromRawData(ptr, 58) << QString::fromRawData(ptr + 1, 58); + QTest::newRow("unaligned-unaligned-2n") + << QString::fromRawData(ptr + 1, 58) << QString::fromRawData(ptr + 3, 58); } QTEST_MAIN(tst_QString) |