diff options
author | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-06-22 20:06:02 (GMT) |
---|---|---|
committer | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-06-23 17:28:49 (GMT) |
commit | a922a304ae9115d04f3bbcb3bd13c8e374bb16f1 (patch) | |
tree | 6cd086fd060474dc41bed9e73275212182b47b8c /src | |
parent | 98d9083f87feb3d78af509db43685a148aa0766e (diff) | |
download | Qt-a922a304ae9115d04f3bbcb3bd13c8e374bb16f1.zip Qt-a922a304ae9115d04f3bbcb3bd13c8e374bb16f1.tar.gz Qt-a922a304ae9115d04f3bbcb3bd13c8e374bb16f1.tar.bz2 |
Add a SSE2 implementation of comp_func_solid_SourceOver()
This function is used quite a lot by WebKit animations, the SSE2
implementation is twice as fast in those uses cases.
Reviewed-by: Andreas Kling
Reviewed-by: Samuel Rødal
Diffstat (limited to 'src')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 5 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 30 |
2 files changed, 33 insertions, 2 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index d088499..f08c090 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7817,11 +7817,14 @@ void qInitDrawhelperAsm() #ifdef QT_HAVE_SSE2 if (features & SSE2) { - extern void comp_func_SourceOver_sse2(uint *destPixels, + extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha); + extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha); + functionForModeAsm[0] = comp_func_SourceOver_sse2; + functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2; extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index b650aac..7d542d6 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -297,7 +297,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, } } -void comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) +void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) { Q_ASSERT(const_alpha > 0); // if const_alpha == 0, this should never be called Q_ASSERT(const_alpha < 256); @@ -362,6 +362,34 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) } } +void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + qt_memfill32_sse2(destPixels, length, color); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + quint32 *dst = (uint *) destPixels; + const __m128i colorVector = _mm_set1_epi32(color); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half); + dstVector = _mm_add_epi8(colorVector, dstVector); + _mm_storeu_si128((__m128i *)&dst[x], dstVector); + } + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) { if (count < 3) { |