summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-09-13 17:20:36 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-09-13 17:50:59 (GMT)
commitb5cd029cdd700069e603fe27049f3afb999a234c (patch)
treea130b651f63f2948cc6a79206a818ce245f1ff7f
parent0b3183427395be7dae29ba91254b00b0845b72af (diff)
downloadQt-b5cd029cdd700069e603fe27049f3afb999a234c.zip
Qt-b5cd029cdd700069e603fe27049f3afb999a234c.tar.gz
Qt-b5cd029cdd700069e603fe27049f3afb999a234c.tar.bz2
Use aligned operations for solid SourceOver operations.
Aligned load() and store() have been shown to be faster for the composition functions. This patch applies this to the solid SourceOver function. Reviewed-by: Samuel Rødal
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp7
1 files changed, 5 insertions, 2 deletions
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index ac73958..f97d865 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -300,11 +300,14 @@ void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, u
const __m128i half = _mm_set1_epi16(0x80);
const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
+ destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
+
for (; x < length-3; x += 4) {
- __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]);
+ __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
dstVector = _mm_add_epi8(colorVector, dstVector);
- _mm_storeu_si128((__m128i *)&dst[x], dstVector);
+ _mm_store_si128((__m128i *)&dst[x], dstVector);
}
for (;x < length; ++x)
destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);