From 1ff30b2b88af31cf18d2a1e6961c3ed7bd9b0240 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 19 May 2009 14:38:49 +0200 Subject: Reintroduce the unaligned-unaligned 32-bit code that I had removed out of ignorance. If both pointers are out of 4-byte alignment, doing the first load will align them so we can do 32-bit comparisons. Lars's code had this before, but I misunderstood it and removed, thinking it was doing misaligned accesses. I experimented with moving the tail comparison above the 32-bit comparison to save a register, but it made things worse. Reviewed-By: Bradley T. Hughes --- src/corelib/tools/qstring.cpp | 45 +++++++++++++++++++++++++-------------- tests/benchmarks/qstring/main.cpp | 41 +++++++++++++++++++++++++---------- 2 files changed, 59 insertions(+), 27 deletions(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 456fdfd..29509c5 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -206,12 +206,12 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) // block of data, with 4194304 iterations (per iteration): // operation usec cpu ticks // memcmp 330 710 - // 16-bit 135 285-290 - // 32-bit aligned 69.7 135-145 + // 16-bit 79 167-171 + // 32-bit aligned 49 105-109 // // Testing also indicates that unaligned 32-bit loads are as // performant as 32-bit aligned. - if (a == b) + if (a == b || !length) return true; register union { @@ -223,24 +223,37 @@ static bool qMemEquals(const quint16 *a, const quint16 *b, int length) sb.w = b; // check alignment - bool unaligned = (sa.value | sb.value) & 2; -#if defined(__i386__) || defined(__x86_64__) || defined(_M_X64_) - unaligned = false; -#endif - if (!unaligned) { - // both addresses are 4-bytes aligned (or this is an x86) + if ((sa.value & 2) == (sb.value & 2)) { + // both addresses have the same alignment + if (sa.value & 2) { + // both addresses are not aligned to 4-bytes boundaries + // compare the first character + if (*sa.w != *sb.w) + return false; + --length; + ++sa.w; + ++sb.w; + + // now both addresses are 4-bytes aligned + } + + // both addresses are 4-bytes aligned // do a fast 32-bit comparison - for (register int halfLength = length / 2; halfLength; --halfLength, ++sa.d, ++sb.d) { + register const quint32 *e = sa.d + (length >> 1); + for ( ; sa.d != e; ++sa.d, ++sb.d) { if (*sa.d != *sb.d) return false; } - return length & 1 ? (*sa.w == *sb.w) : true; - } - // one or both of the addresses isn't 2-byte aligned - for ( ; length; --length, ++sa.w, ++sb.w) { - if (*sa.w != *sb.w) - return false; + // do we have a tail? + return (length & 1) ? *sa.w == *sb.w : true; + } else { + // one of the addresses isn't 4-byte aligned but the other is + register const quint16 *e = sa.w + length; + for ( ; sa.w != e; ++sa.w, ++sb.w) { + if (*sa.w != *sb.w) + return false; + } } return true; } diff --git a/tests/benchmarks/qstring/main.cpp b/tests/benchmarks/qstring/main.cpp index c7962bd..cbbf0a1 100644 --- a/tests/benchmarks/qstring/main.cpp +++ b/tests/benchmarks/qstring/main.cpp @@ -74,8 +74,8 @@ void tst_QString::equals_data() const 64, 64, 64, 64, 64, 64, 64, 64, // 48 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, // 64 - 64, 64, 64, 64, 64, 64, 64, 64, - 96, 96, 96, 96, 96, 96, 96, 96 // 80 + 64, 64, 64, 64, 96, 96, 96, 96, + 64, 64, 96, 96, 96, 96, 96, 96 // 80 }, 0 }; const QChar *ptr = reinterpret_cast(data.data); @@ -88,15 +88,34 @@ void tst_QString::equals_data() const QTest::newRow("same-string") << base << base; QTest::newRow("same-data") << base << QString::fromRawData(ptr, 64); - // don't use length > 64, since that crosses a cache line - QTest::newRow("aligned-odd") - << QString::fromRawData(ptr, 63) << QString::fromRawData(ptr + 2, 63); - QTest::newRow("aligned-even") - << QString::fromRawData(ptr, 64) << QString::fromRawData(ptr + 2, 64); - QTest::newRow("unaligned-even") - << QString::fromRawData(ptr, 63) << QString::fromRawData(ptr + 1, 63); - QTest::newRow("unaligned-odd") - << QString::fromRawData(ptr, 64) << QString::fromRawData(ptr + 1, 64); + // try to avoid crossing a cache line (that is, at ptr[64]) + QTest::newRow("aligned-aligned-4n") + << QString::fromRawData(ptr, 60) << QString::fromRawData(ptr + 2, 60); + QTest::newRow("aligned-unaligned-4n") + << QString::fromRawData(ptr, 60) << QString::fromRawData(ptr + 1, 60); + QTest::newRow("unaligned-unaligned-4n") + << QString::fromRawData(ptr + 1, 60) << QString::fromRawData(ptr + 3, 60); + + QTest::newRow("aligned-aligned-4n+1") + << QString::fromRawData(ptr, 61) << QString::fromRawData(ptr + 2, 61); + QTest::newRow("aligned-unaligned-4n+1") + << QString::fromRawData(ptr, 61) << QString::fromRawData(ptr + 1, 61); + QTest::newRow("unaligned-unaligned-4n+1") + << QString::fromRawData(ptr + 1, 61) << QString::fromRawData(ptr + 3, 61); + + QTest::newRow("aligned-aligned-4n-1") + << QString::fromRawData(ptr, 59) << QString::fromRawData(ptr + 2, 59); + QTest::newRow("aligned-unaligned-4n-1") + << QString::fromRawData(ptr, 59) << QString::fromRawData(ptr + 1, 59); + QTest::newRow("unaligned-unaligned-4n-1") + << QString::fromRawData(ptr + 1, 59) << QString::fromRawData(ptr + 3, 59); + + QTest::newRow("aligned-aligned-2n") + << QString::fromRawData(ptr, 58) << QString::fromRawData(ptr + 2, 58); + QTest::newRow("aligned-unaligned-2n") + << QString::fromRawData(ptr, 58) << QString::fromRawData(ptr + 1, 58); + QTest::newRow("unaligned-unaligned-2n") + << QString::fromRawData(ptr + 1, 58) << QString::fromRawData(ptr + 3, 58); } QTEST_MAIN(tst_QString) -- cgit v0.12