diff options
author | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-08-04 09:46:40 (GMT) |
---|---|---|
committer | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-08-04 11:00:26 (GMT) |
commit | a214bb99775e898fd9233b932f08ff91a57fc053 (patch) | |
tree | 24b8142db51ee55ccb0716e16a12b27c00b513b4 | |
parent | c1bac607f3f3889d367e20c128acf30dc9537ddc (diff) | |
download | Qt-a214bb99775e898fd9233b932f08ff91a57fc053.zip Qt-a214bb99775e898fd9233b932f08ff91a57fc053.tar.gz Qt-a214bb99775e898fd9233b932f08ff91a57fc053.tar.bz2 |
Implement comp_Source with SSE2 when there is a const alpha
On Atom, comp_Source is 280% faster with the SSE2 implementation.
Reviewed-by: Andreas Kling
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 2 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 33 | ||||
-rw-r--r-- | tests/benchmarks/gui/image/blendbench/main.cpp | 32 |
3 files changed, 67 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 4ce2bee..1ff3d7b 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7820,8 +7820,10 @@ void qInitDrawhelperAsm() uint const_alpha); extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha); extern void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha); + extern void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha); functionForModeAsm[0] = comp_func_SourceOver_sse2; + functionForModeAsm[QPainter::CompositionMode_Source] = comp_func_Source_sse2; functionForModeAsm[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2; functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2; diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index e090ae5..7ab9eda 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -229,6 +229,39 @@ void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uin } } +void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha) +{ + if (const_alpha == 255) { + ::memcpy(dst, src, length * sizeof(uint)); + } else { + const int ialpha = 255 - const_alpha; + + int x = 0; + + // 1) prologue, align on 16 bytes + const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3; + const int prologLength = qMin(length, offsetToAlignOn16Bytes); + for (; x < prologLength; ++x) + dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); + + // 2) interpolate pixels with SSE2 + const __m128i half = _mm_set1_epi16(0x80); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); + const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha); + for (; x < length - 3; x += 4) { + const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); + __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); + INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half) + _mm_store_si128((__m128i *)&dst[x], dstVector); + } + + // 3) Epilogue + for (; x < length; ++x) + dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); + } +} + void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) { if (count < 7) { diff --git a/tests/benchmarks/gui/image/blendbench/main.cpp b/tests/benchmarks/gui/image/blendbench/main.cpp index 92d1633..f53654b 100644 --- a/tests/benchmarks/gui/image/blendbench/main.cpp +++ b/tests/benchmarks/gui/image/blendbench/main.cpp @@ -103,6 +103,9 @@ class BlendBench : public QObject private slots: void blendBench_data(); void blendBench(); + + void blendBenchAlpha_data(); + void blendBenchAlpha(); }; void BlendBench::blendBench_data() @@ -147,6 +150,35 @@ void BlendBench::blendBench() } } +void BlendBench::blendBenchAlpha_data() +{ + blendBench_data(); +} + +void BlendBench::blendBenchAlpha() +{ + QFETCH(int, brushType); + QFETCH(int, compositionMode); + + QImage img(512, 512, QImage::Format_ARGB32_Premultiplied); + QImage src(512, 512, QImage::Format_ARGB32_Premultiplied); + paint(&src); + QPainter p(&img); + p.setPen(Qt::NoPen); + + p.setCompositionMode(QPainter::CompositionMode(compositionMode)); + if (brushType == ImageBrush) { + p.setBrush(QBrush(src)); + } else if (brushType == SolidBrush) { + p.setBrush(QColor(127, 127, 127, 127)); + } + p.setOpacity(0.7f); + + QBENCHMARK { + p.drawRect(0, 0, 512, 512); + } +} + QTEST_MAIN(BlendBench) #include "main.moc" |