summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorSamuel Rødal <sroedal@trolltech.com>2010-03-08 12:38:08 (GMT)
committerSamuel Rødal <sroedal@trolltech.com>2010-03-26 09:48:59 (GMT)
commit348d22c37611066dc7efc9aac820d77bcf3bbbab (patch)
tree2def91d28881b2ec809bf1eb4a64a71b08090530 /src/gui/painting
parent8d5ae9bca2cbe8c5a7f764b8ba325f79c0bbfe62 (diff)
downloadQt-348d22c37611066dc7efc9aac820d77bcf3bbbab.zip
Qt-348d22c37611066dc7efc9aac820d77bcf3bbbab.tar.gz
Qt-348d22c37611066dc7efc9aac820d77bcf3bbbab.tar.bz2
Included ARM NEON optimizations from pixman in Qt.
On the N900 16 bit text blending is 30 - 50 % faster, and ARGB32PM on RGB16 image blending now runs in 1/10th of the time it used to. We now make ARGB32PM the default pixmap format for alpha pixmaps instead of ARGB8565PM which is unaligned and bad for performance. The relevant numbers: Mostly opaque pixels: ARGB24 on ARGB24 using QPainter..................: 336,813033 ARGB32 on ARGB32 using QPainter.................: 18,419387 RGB16 on ARGB24 using QPainter..................: 167,301014 RGB16 on ARGB32 using QPainter..................: 17,279372 ARGB24 on RGB16 using QPainter..................: 35,100147 ARGB32PM on RGB16 using QPainter................: 15,924256 No opaque pixels: ARGB24 on ARGB24 using QPainter..................: 412,190765 ARGB32 on ARGB32 using QPainter.................: 16,818389 RGB16 on ARGB24 using QPainter..................: 170,957878 RGB16 on ARGB32 using QPainter..................: 16,742984 ARGB24 on RGB16 using QPainter..................: 93,600482 ARGB32PM on RGB16 using QPainter................: 15,999310 So switching to ARGB32PM should give a boost in all areas. Task-number: QTBUG-6684 Reviewed-by: Gunnar Sletta
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/painting.pri14
-rw-r--r--src/gui/painting/qblendfunctions.cpp8
-rw-r--r--src/gui/painting/qdrawhelper.cpp4
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp144
-rw-r--r--src/gui/painting/qdrawhelper_neon_p.h15
5 files changed, 136 insertions, 49 deletions
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri
index a6cc9c7..e61e07c 100644
--- a/src/gui/painting/painting.pri
+++ b/src/gui/painting/painting.pri
@@ -379,11 +379,23 @@ symbian {
QMAKE_CXXFLAGS.ARMCC *= -O3
}
-neon {
+neon:*-g++* {
DEFINES += QT_HAVE_NEON
HEADERS += painting/qdrawhelper_neon_p.h
SOURCES += painting/qdrawhelper_neon.cpp
QMAKE_CXXFLAGS *= -mfpu=neon
+
+ PIXMAN_NEON_ASM_FILES = ../3rdparty/pixman/pixman-arm-neon-asm.S
+
+ neon_compiler.commands = $$QMAKE_CXX -c
+ neon_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+ neon_compiler.dependency_type = TYPE_C
+ neon_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+ neon_compiler.input = PIXMAN_NEON_ASM_FILES
+ neon_compiler.variable_out = OBJECTS
+ neon_compiler.name = compiling[neon] ${QMAKE_FILE_IN}
+ silent:neon_compiler.commands = @echo compiling[neon] ${QMAKE_FILE_IN} && $$neon_compiler.commands
+ QMAKE_EXTRA_COMPILERS += neon_compiler
}
contains(QT_CONFIG, zlib) {
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp
index dc33896..13bb260 100644
--- a/src/gui/painting/qblendfunctions.cpp
+++ b/src/gui/painting/qblendfunctions.cpp
@@ -447,10 +447,10 @@ static void qt_blend_argb24_on_rgb16(uchar *destPixels, int dbpl,
-static void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
+void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
{
quint16 *dst = (quint16 *) destPixels;
const quint32 *src = (const quint32 *) srcPixels;
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 1f75ec7..130270b 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -7961,6 +7961,10 @@ void qInitDrawhelperAsm()
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
+ qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
+ qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
+
+ qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
}
#endif
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index 77c5202..ef1b85c 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -44,6 +44,7 @@
#ifdef QT_HAVE_NEON
#include <private/qdrawhelper_neon_p.h>
+#include <private/qpaintengine_raster_p.h>
#include <arm_neon.h>
QT_BEGIN_NAMESPACE
@@ -87,6 +88,79 @@ static inline uint16x8_t qvsource_over_u16(uint16x8_t src16, uint16x8_t dst16, u
return vaddq_u16(src16, qvbyte_mul_u16(dst16, alpha16, half));
}
+extern "C" void
+pixman_composite_over_8888_0565_asm_neon (int32_t w,
+ int32_t h,
+ uint16_t *dst,
+ int32_t dst_stride,
+ uint32_t *src,
+ int32_t src_stride);
+
+extern "C" void
+pixman_composite_over_8888_8888_asm_neon (int32_t w,
+ int32_t h,
+ uint32_t *dst,
+ int32_t dst_stride,
+ uint32_t *src,
+ int32_t src_stride);
+
+extern "C" void
+pixman_composite_src_0565_8888_asm_neon (int32_t w,
+ int32_t h,
+ uint32_t *dst,
+ int32_t dst_stride,
+ uint16_t *src,
+ int32_t src_stride);
+
+// qblendfunctions.cpp
+void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha);
+
+void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
+{
+ dbpl /= 4;
+ sbpl /= 2;
+
+ quint32 *dst = (quint32 *) destPixels;
+ quint16 *src = (quint16 *) srcPixels;
+
+ if (const_alpha != 256) {
+ quint8 a = (255 * const_alpha) >> 8;
+ quint8 ia = 255 - a;
+
+ while (h--) {
+ for (int x=0; x<w; ++x)
+ dst[x] = INTERPOLATE_PIXEL_255(qt_colorConvert(src[x], dst[x]), a, dst[x], ia);
+ dst += dbpl;
+ src += sbpl;
+ }
+ return;
+ }
+
+ pixman_composite_src_0565_8888_asm_neon(w, h, dst, dbpl, src, sbpl);
+}
+
+void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
+{
+ if (const_alpha != 256) {
+ qt_blend_argb32_on_rgb16_const_alpha(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
+ return;
+ }
+
+ quint16 *dst = (quint16 *) destPixels;
+ quint32 *src = (quint32 *) srcPixels;
+
+ pixman_composite_over_8888_0565_asm_neon(w, h, dst, dbpl / 2, src, sbpl / 4);
+}
+
void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
@@ -97,50 +171,7 @@ void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl,
uint16x8_t half = vdupq_n_u16(0x80);
uint16x8_t full = vdupq_n_u16(0xff);
if (const_alpha == 256) {
- for (int y = 0; y < h; ++y) {
- int x = 0;
- for (; x < w-3; x += 4) {
- uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]);
- if ((src[x] & src[x+1] & src[x+2] & src[x+3]) >= 0xff000000) {
- // all opaque
- vst1q_u32((uint32_t *)&dst[x], src32);
- } else if (src[x] | src[x+1] | src[x+2] | src[x+3]) {
- uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]);
-
- const uint8x16_t src8 = vreinterpretq_u8_u32(src32);
- const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32);
-
- const uint8x8_t src8_low = vget_low_u8(src8);
- const uint8x8_t dst8_low = vget_low_u8(dst8);
-
- const uint8x8_t src8_high = vget_high_u8(src8);
- const uint8x8_t dst8_high = vget_high_u8(dst8);
-
- const uint16x8_t src16_low = vmovl_u8(src8_low);
- const uint16x8_t dst16_low = vmovl_u8(dst8_low);
-
- const uint16x8_t src16_high = vmovl_u8(src8_high);
- const uint16x8_t dst16_high = vmovl_u8(dst8_high);
-
- const uint16x8_t result16_low = qvsource_over_u16(src16_low, dst16_low, half, full);
- const uint16x8_t result16_high = qvsource_over_u16(src16_high, dst16_high, half, full);
-
- const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low));
- const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high));
-
- vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high));
- }
- }
- for (; x<w; ++x) {
- uint s = src[x];
- if (s >= 0xff000000)
- dst[x] = s;
- else if (s != 0)
- dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
- }
+ pixman_composite_over_8888_8888_asm_neon(w, h, (uint32_t *)destPixels, dbpl / 4, (uint32_t *)srcPixels, sbpl / 4);
} else if (const_alpha != 0) {
const_alpha = (const_alpha * 255) >> 8;
uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha);
@@ -254,6 +285,31 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl,
}
}
+extern "C" void
+pixman_composite_over_n_8_0565_asm_neon (int32_t w,
+ int32_t h,
+ uint16_t *dst,
+ int32_t dst_stride,
+ uint32_t src,
+ int32_t unused,
+ uint8_t *mask,
+ int32_t mask_stride);
+
+
+void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
+ int x, int y, quint32 color,
+ const uchar *bitmap,
+ int mapWidth, int mapHeight, int mapStride,
+ const QClipData *)
+{
+ quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
+ const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
+
+ uchar *mask = const_cast<uchar *>(bitmap);
+
+ pixman_composite_over_n_8_0565_asm_neon(mapWidth, mapHeight, dest, destStride, color, 0, mask, mapStride);
+}
+
QT_END_NAMESPACE
#endif // QT_HAVE_NEON
diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h
index 1994441..6f9604f 100644
--- a/src/gui/painting/qdrawhelper_neon_p.h
+++ b/src/gui/painting/qdrawhelper_neon_p.h
@@ -69,6 +69,21 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl,
int w, int h,
int const_alpha);
+void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha);
+
+void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha);
+
+void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
+ int x, int y, quint32 color,
+ const uchar *bitmap,
+ int mapWidth, int mapHeight, int mapStride,
+ const QClipData *clip);
#endif // QT_HAVE_NEON
QT_END_NAMESPACE