From a922a304ae9115d04f3bbcb3bd13c8e374bb16f1 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 22 Jun 2010 22:06:02 +0200 Subject: Add a SSE2 implementation of comp_func_solid_SourceOver() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is used quite a lot by WebKit animations, the SSE2 implementation is twice as fast in those uses cases. Reviewed-by: Andreas Kling Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper.cpp | 5 ++++- src/gui/painting/qdrawhelper_sse2.cpp | 30 +++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index d088499..f08c090 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7817,11 +7817,14 @@ void qInitDrawhelperAsm() #ifdef QT_HAVE_SSE2 if (features & SSE2) { - extern void comp_func_SourceOver_sse2(uint *destPixels, + extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha); + extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha); + functionForModeAsm[0] = comp_func_SourceOver_sse2; + functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2; extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index b650aac..7d542d6 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -297,7 +297,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, } } -void comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) +void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) { Q_ASSERT(const_alpha > 0); // if const_alpha == 0, this should never be called Q_ASSERT(const_alpha < 256); @@ -362,6 +362,34 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) } } +void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + qt_memfill32_sse2(destPixels, length, color); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + quint32 *dst = (uint *) destPixels; + const __m128i colorVector = _mm_set1_epi32(color); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half); + dstVector = _mm_add_epi8(colorVector, dstVector); + _mm_storeu_si128((__m128i *)&dst[x], dstVector); + } + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) { if (count < 3) { -- cgit v0.12