From 34beae57ff8248e7666a4ce08a06eeff54e05e7a Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Mon, 26 Jul 2010 17:14:13 +0200 Subject: Implement the composition mode "Plus" with SSE2 Implement the composition function for CompositionMode_Plus with SSE2. The macro MIX() can be replaced by a single instruction add-saturate, which increase the speed a lot (13 times faster on the blend benchmark). Reviewed-by: Olivier Goffart Reviewed-by: Andreas Kling --- src/gui/painting/qdrawhelper.cpp | 8 ++--- src/gui/painting/qdrawhelper_p.h | 5 +++ src/gui/painting/qdrawhelper_sse2.cpp | 66 +++++++++++++++++++++++++++++++++++ tests/auto/qpainter/tst_qpainter.cpp | 24 ++++++++++--- 4 files changed, 94 insertions(+), 9 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index a77c379..4ce2bee 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1715,11 +1715,6 @@ void QT_FASTCALL comp_func_XOR(uint *dest, const uint *src, int length, uint con } } -static const uint AMASK = 0xff000000; -static const uint RMASK = 0x00ff0000; -static const uint GMASK = 0x0000ff00; -static const uint BMASK = 0x000000ff; - struct QFullCoverage { inline void store(uint *dest, const uint src) const { @@ -7775,6 +7770,7 @@ void qInitDrawhelperAsm() #ifdef QT_HAVE_MMX if (features & MMX) { functionForModeAsm = qt_functionForMode_MMX; + functionForModeSolidAsm = qt_functionForModeSolid_MMX; qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_mmx; #ifdef QT_HAVE_3DNOW @@ -7823,8 +7819,10 @@ void qInitDrawhelperAsm() int length, uint const_alpha); extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha); + extern void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha); functionForModeAsm[0] = comp_func_SourceOver_sse2; + functionForModeAsm[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2; functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2; extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index eec6bf4..1a87127 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -91,6 +91,11 @@ QT_BEGIN_NAMESPACE # define Q_STATIC_INLINE_FUNCTION static inline #endif +static const uint AMASK = 0xff000000; +static const uint RMASK = 0x00ff0000; +static const uint GMASK = 0x0000ff00; +static const uint BMASK = 0x000000ff; + /******************************************************************************* * QSpan * diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 279f685..b4ef23e 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -164,6 +164,72 @@ void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixe } } +inline int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha) +{ +#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) + const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); +#undef MIX + return INTERPOLATE_PIXEL_255(result, const_alpha, d, one_minus_const_alpha); +} + +inline int comp_func_Plus_one_pixel(uint d, const uint s) +{ +#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) + const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); +#undef MIX + return result; +} + +void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) +{ + int x = 0; + const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3; + const int prologLength = qMin(length, offsetToAlignOn16Bytes); + + if (const_alpha == 255) { + // 1) Prologue: align destination on 16 bytes + for (; x < prologLength; ++x) + dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); + + // 2) composition with SSE2 + for (; x < length - 3; x += 4) { + const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); + const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); + + const __m128i result = _mm_adds_epu8(srcVector, dstVector); + _mm_store_si128((__m128i *)&dst[x], result); + } + + // 3) Epilogue: + for (; x < length; ++x) + dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); + } else { + const int one_minus_const_alpha = 255 - const_alpha; + const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); + const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); + + // 1) Prologue: align destination on 16 bytes + for (; x < prologLength; ++x) + dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); + + const __m128i half = _mm_set1_epi16(0x80); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + // 2) composition with SSE2 + for (; x < length - 3; x += 4) { + const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); + const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); + + __m128i result = _mm_adds_epu8(srcVector, dstVector); + INTERPOLATE_PIXEL_255_SSE2(result, result, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half) + _mm_store_si128((__m128i *)&dst[x], result); + } + + // 3) Epilogue: + for (; x < length; ++x) + dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); + } +} + void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) { if (count < 7) { diff --git a/tests/auto/qpainter/tst_qpainter.cpp b/tests/auto/qpainter/tst_qpainter.cpp index 27ee6e7..f358681 100644 --- a/tests/auto/qpainter/tst_qpainter.cpp +++ b/tests/auto/qpainter/tst_qpainter.cpp @@ -4176,14 +4176,18 @@ void tst_QPainter::inactivePainter() p.setWorldTransform(QTransform().scale(0.5, 0.5), true); } -bool testCompositionMode(int src, int dst, int expected, QPainter::CompositionMode op) +bool testCompositionMode(int src, int dst, int expected, QPainter::CompositionMode op, qreal opacity = 1.0) { - QImage actual(1, 1, QImage::Format_ARGB32_Premultiplied); + // The test image needs to be large enough to test SIMD code + const QSize imageSize(100, 100); + + QImage actual(imageSize, QImage::Format_ARGB32_Premultiplied); actual.fill(QColor(dst, dst, dst).rgb()); QPainter p(&actual); p.setCompositionMode(op); - p.fillRect(0, 0, 1, 1, QColor(src, src, src)); + p.setOpacity(opacity); + p.fillRect(QRect(QPoint(), imageSize), QColor(src, src, src)); p.end(); if (qRed(actual.pixel(0, 0)) != expected) { @@ -4191,7 +4195,9 @@ bool testCompositionMode(int src, int dst, int expected, QPainter::CompositionMo src, dst, qRed(actual.pixel(0, 0)), expected); return false; } else { - return true; + QImage refImage(imageSize, QImage::Format_ARGB32_Premultiplied); + refImage.fill(QColor(expected, expected, expected).rgb()); + return actual == refImage; } } @@ -4206,6 +4212,16 @@ void tst_QPainter::extendedBlendModes() QVERIFY(testCompositionMode( 0, 255, 255, QPainter::CompositionMode_Plus)); QVERIFY(testCompositionMode(128, 128, 255, QPainter::CompositionMode_Plus)); + QVERIFY(testCompositionMode(255, 255, 255, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode( 0, 0, 0, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode(127, 128, 165, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode(127, 0, 37, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode( 0, 127, 127, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode(255, 0, 75, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode( 0, 255, 255, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode(128, 128, 166, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode(186, 200, 255, QPainter::CompositionMode_Plus, 0.3)); + QVERIFY(testCompositionMode(255, 255, 255, QPainter::CompositionMode_Multiply)); QVERIFY(testCompositionMode( 0, 0, 0, QPainter::CompositionMode_Multiply)); QVERIFY(testCompositionMode(127, 255, 127, QPainter::CompositionMode_Multiply)); -- cgit v0.12