summaryrefslogtreecommitdiffstats
path: root/src/gui
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-07-26 15:14:13 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-07-27 09:05:34 (GMT)
commit34beae57ff8248e7666a4ce08a06eeff54e05e7a (patch)
tree72af98d91ca182b2156cd8b11b607bcf62c2bcc7 /src/gui
parentc3b2f89b85696c01062ddcd9d21a05cebf51b078 (diff)
downloadQt-34beae57ff8248e7666a4ce08a06eeff54e05e7a.zip
Qt-34beae57ff8248e7666a4ce08a06eeff54e05e7a.tar.gz
Qt-34beae57ff8248e7666a4ce08a06eeff54e05e7a.tar.bz2
Implement the composition mode "Plus" with SSE2
Implement the composition function for CompositionMode_Plus with SSE2. The macro MIX() can be replaced by a single instruction add-saturate, which increase the speed a lot (13 times faster on the blend benchmark). Reviewed-by: Olivier Goffart Reviewed-by: Andreas Kling
Diffstat (limited to 'src/gui')
-rw-r--r--src/gui/painting/qdrawhelper.cpp8
-rw-r--r--src/gui/painting/qdrawhelper_p.h5
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp66
3 files changed, 74 insertions, 5 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index a77c379..4ce2bee 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -1715,11 +1715,6 @@ void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint con
}
}
-static const uint AMASK = 0xff000000;
-static const uint RMASK = 0x00ff0000;
-static const uint GMASK = 0x0000ff00;
-static const uint BMASK = 0x000000ff;
-
struct QFullCoverage {
inline void store(uint *dest, const uint src) const
{
@@ -7775,6 +7770,7 @@ void qInitDrawhelperAsm()
#ifdef QT_HAVE_MMX
if (features & MMX) {
functionForModeAsm = qt_functionForMode_MMX;
+
functionForModeSolidAsm = qt_functionForModeSolid_MMX;
qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_mmx;
#ifdef QT_HAVE_3DNOW
@@ -7823,8 +7819,10 @@ void qInitDrawhelperAsm()
int length,
uint const_alpha);
extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
+ extern void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha);
functionForModeAsm[0] = comp_func_SourceOver_sse2;
+ functionForModeAsm[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2;
extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index eec6bf4..1a87127 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -91,6 +91,11 @@ QT_BEGIN_NAMESPACE
# define Q_STATIC_INLINE_FUNCTION static inline
#endif
+static const uint AMASK = 0xff000000;
+static const uint RMASK = 0x00ff0000;
+static const uint GMASK = 0x0000ff00;
+static const uint BMASK = 0x000000ff;
+
/*******************************************************************************
* QSpan
*
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index 279f685..b4ef23e 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -164,6 +164,72 @@ void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixe
}
}
+inline int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha)
+{
+#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask)))
+ const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK));
+#undef MIX
+ return INTERPOLATE_PIXEL_255(result, const_alpha, d, one_minus_const_alpha);
+}
+
+inline int comp_func_Plus_one_pixel(uint d, const uint s)
+{
+#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask)))
+ const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK));
+#undef MIX
+ return result;
+}
+
+void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha)
+{
+ int x = 0;
+ const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
+ const int prologLength = qMin(length, offsetToAlignOn16Bytes);
+
+ if (const_alpha == 255) {
+ // 1) Prologue: align destination on 16 bytes
+ for (; x < prologLength; ++x)
+ dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
+
+ // 2) composition with SSE2
+ for (; x < length - 3; x += 4) {
+ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
+ const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
+
+ const __m128i result = _mm_adds_epu8(srcVector, dstVector);
+ _mm_store_si128((__m128i *)&dst[x], result);
+ }
+
+ // 3) Epilogue:
+ for (; x < length; ++x)
+ dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
+ } else {
+ const int one_minus_const_alpha = 255 - const_alpha;
+ const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
+ const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
+
+ // 1) Prologue: align destination on 16 bytes
+ for (; x < prologLength; ++x)
+ dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
+
+ const __m128i half = _mm_set1_epi16(0x80);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ // 2) composition with SSE2
+ for (; x < length - 3; x += 4) {
+ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
+ const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
+
+ __m128i result = _mm_adds_epu8(srcVector, dstVector);
+ INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
+ _mm_store_si128((__m128i *)&dst[x], result);
+ }
+
+ // 3) Epilogue:
+ for (; x < length; ++x)
+ dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
+ }
+}
+
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
{
if (count < 7) {