summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-06-22 20:06:02 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-06-23 17:28:49 (GMT)
commita922a304ae9115d04f3bbcb3bd13c8e374bb16f1 (patch)
tree6cd086fd060474dc41bed9e73275212182b47b8c
parent98d9083f87feb3d78af509db43685a148aa0766e (diff)
downloadQt-a922a304ae9115d04f3bbcb3bd13c8e374bb16f1.zip
Qt-a922a304ae9115d04f3bbcb3bd13c8e374bb16f1.tar.gz
Qt-a922a304ae9115d04f3bbcb3bd13c8e374bb16f1.tar.bz2
Add a SSE2 implementation of comp_func_solid_SourceOver()
This function is used quite a lot by WebKit animations, the SSE2 implementation is twice as fast in those uses cases. Reviewed-by: Andreas Kling Reviewed-by: Samuel Rødal
-rw-r--r--src/gui/painting/qdrawhelper.cpp5
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp30
2 files changed, 33 insertions, 2 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index d088499..f08c090 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -7817,11 +7817,14 @@ void qInitDrawhelperAsm()
#ifdef QT_HAVE_SSE2
if (features & SSE2) {
- extern void comp_func_SourceOver_sse2(uint *destPixels,
+ extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels,
const uint *srcPixels,
int length,
uint const_alpha);
+ extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
+
functionForModeAsm[0] = comp_func_SourceOver_sse2;
+ functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2;
extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index b650aac..7d542d6 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -297,7 +297,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
}
}
-void comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha)
+void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha)
{
Q_ASSERT(const_alpha > 0); // if const_alpha == 0, this should never be called
Q_ASSERT(const_alpha < 256);
@@ -362,6 +362,34 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
}
}
+void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha)
+{
+ if ((const_alpha & qAlpha(color)) == 255) {
+ qt_memfill32_sse2(destPixels, length, color);
+ } else {
+ if (const_alpha != 255)
+ color = BYTE_MUL(color, const_alpha);
+
+ const quint32 minusAlphaOfColor = qAlpha(~color);
+ int x = 0;
+
+ quint32 *dst = (uint *) destPixels;
+ const __m128i colorVector = _mm_set1_epi32(color);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ const __m128i half = _mm_set1_epi16(0x80);
+ const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
+
+ for (; x < length-3; x += 4) {
+ __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]);
+ BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
+ dstVector = _mm_add_epi8(colorVector, dstVector);
+ _mm_storeu_si128((__m128i *)&dst[x], dstVector);
+ }
+ for (;x < length; ++x)
+ destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
+ }
+}
+
void qt_memfill16_sse2(quint16 *dest, quint16 value, int count)
{
if (count < 3) {