summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-04 09:46:40 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-04 11:00:26 (GMT)
commita214bb99775e898fd9233b932f08ff91a57fc053 (patch)
tree24b8142db51ee55ccb0716e16a12b27c00b513b4
parentc1bac607f3f3889d367e20c128acf30dc9537ddc (diff)
downloadQt-a214bb99775e898fd9233b932f08ff91a57fc053.zip
Qt-a214bb99775e898fd9233b932f08ff91a57fc053.tar.gz
Qt-a214bb99775e898fd9233b932f08ff91a57fc053.tar.bz2
Implement comp_Source with SSE2 when there is a const alpha
On Atom, comp_Source is 280% faster with the SSE2 implementation. Reviewed-by: Andreas Kling
-rw-r--r--src/gui/painting/qdrawhelper.cpp2
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp33
-rw-r--r--tests/benchmarks/gui/image/blendbench/main.cpp32
3 files changed, 67 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 4ce2bee..1ff3d7b 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -7820,8 +7820,10 @@ void qInitDrawhelperAsm()
uint const_alpha);
extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
extern void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha);
+ extern void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha);
functionForModeAsm[0] = comp_func_SourceOver_sse2;
+ functionForModeAsm[QPainter::CompositionMode_Source] = comp_func_Source_sse2;
functionForModeAsm[QPainter::CompositionMode_Plus] = comp_func_Plus_sse2;
functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2;
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index e090ae5..7ab9eda 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -229,6 +229,39 @@ void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uin
}
}
+void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, uint const_alpha)
+{
+ if (const_alpha == 255) {
+ ::memcpy(dst, src, length * sizeof(uint));
+ } else {
+ const int ialpha = 255 - const_alpha;
+
+ int x = 0;
+
+ // 1) prologue, align on 16 bytes
+ const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
+ const int prologLength = qMin(length, offsetToAlignOn16Bytes);
+ for (; x < prologLength; ++x)
+ dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
+
+ // 2) interpolate pixels with SSE2
+ const __m128i half = _mm_set1_epi16(0x80);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
+ const __m128i oneMinusConstAlpha = _mm_set1_epi16(ialpha);
+ for (; x < length - 3; x += 4) {
+ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
+ __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]);
+ INTERPOLATE_PIXEL_255_SSE2(dstVector, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half)
+ _mm_store_si128((__m128i *)&dst[x], dstVector);
+ }
+
+ // 3) Epilogue
+ for (; x < length; ++x)
+ dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
+ }
+}
+
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
{
if (count < 7) {
diff --git a/tests/benchmarks/gui/image/blendbench/main.cpp b/tests/benchmarks/gui/image/blendbench/main.cpp
index 92d1633..f53654b 100644
--- a/tests/benchmarks/gui/image/blendbench/main.cpp
+++ b/tests/benchmarks/gui/image/blendbench/main.cpp
@@ -103,6 +103,9 @@ class BlendBench : public QObject
private slots:
void blendBench_data();
void blendBench();
+
+ void blendBenchAlpha_data();
+ void blendBenchAlpha();
};
void BlendBench::blendBench_data()
@@ -147,6 +150,35 @@ void BlendBench::blendBench()
}
}
+void BlendBench::blendBenchAlpha_data()
+{
+ blendBench_data();
+}
+
+void BlendBench::blendBenchAlpha()
+{
+ QFETCH(int, brushType);
+ QFETCH(int, compositionMode);
+
+ QImage img(512, 512, QImage::Format_ARGB32_Premultiplied);
+ QImage src(512, 512, QImage::Format_ARGB32_Premultiplied);
+ paint(&src);
+ QPainter p(&img);
+ p.setPen(Qt::NoPen);
+
+ p.setCompositionMode(QPainter::CompositionMode(compositionMode));
+ if (brushType == ImageBrush) {
+ p.setBrush(QBrush(src));
+ } else if (brushType == SolidBrush) {
+ p.setBrush(QColor(127, 127, 127, 127));
+ }
+ p.setOpacity(0.7f);
+
+ QBENCHMARK {
+ p.drawRect(0, 0, 512, 512);
+ }
+}
+
QTEST_MAIN(BlendBench)
#include "main.moc"