diff options
author | Samuel Rødal <sroedal@trolltech.com> | 2010-03-25 11:14:40 (GMT) |
---|---|---|
committer | Samuel Rødal <sroedal@trolltech.com> | 2010-03-26 09:49:09 (GMT) |
commit | fa44a37174f51f3d2786fc6e60d8fa5561a4df6c (patch) | |
tree | 014108ee7598fb3a246bad894cb0a47da36115b9 /src/gui | |
parent | 0ad22e6cd1cb353e2e1244c1eb7257cb3af9def4 (diff) | |
download | Qt-fa44a37174f51f3d2786fc6e60d8fa5561a4df6c.zip Qt-fa44a37174f51f3d2786fc6e60d8fa5561a4df6c.tar.gz Qt-fa44a37174f51f3d2786fc6e60d8fa5561a4df6c.tar.bz2 |
Optimized SourceOver and 16 bit dest fetches, dest stores using NEON.
This makes for example linear gradient blending on top of RGB16
156 % faster (from 20.4 fps to 52.3 fps in my benchmark).
Task-number: QTBUG-6684
Reviewed-by: Gunnar Sletta
Diffstat (limited to 'src/gui')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 12 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 125 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 12 |
3 files changed, 134 insertions, 15 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index dc3b79b..917b910 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -175,7 +175,7 @@ Q_STATIC_TEMPLATE_FUNCTION uint * QT_FASTCALL destFetch(uint *buffer, QRasterBuf # define SPANFUNC_POINTER_DESTFETCH(Arg) destFetch<Arg> -static const DestFetchProc destFetchProc[QImage::NImageFormats] = +static DestFetchProc destFetchProc[QImage::NImageFormats] = { 0, // Format_Invalid destFetchMono, // Format_Mono, @@ -323,7 +323,7 @@ Q_STATIC_TEMPLATE_FUNCTION void QT_FASTCALL destStore(QRasterBuffer *rasterBuffe # define SPANFUNC_POINTER_DESTSTORE(DEST) destStore<DEST> -static const DestStoreProc destStoreProc[QImage::NImageFormats] = +static DestStoreProc destStoreProc[QImage::NImageFormats] = { 0, // Format_Invalid destStoreMono, // Format_Mono, @@ -2827,7 +2827,7 @@ static void QT_FASTCALL rasterop_SourceAndNotDestination(uint *dest, } } -static const CompositionFunctionSolid functionForModeSolid_C[] = { +static CompositionFunctionSolid functionForModeSolid_C[] = { comp_func_solid_SourceOver, comp_func_solid_DestinationOver, comp_func_solid_Clear, @@ -2865,7 +2865,7 @@ static const CompositionFunctionSolid functionForModeSolid_C[] = { static const CompositionFunctionSolid *functionForModeSolid = functionForModeSolid_C; -static const CompositionFunction functionForMode_C[] = { +static CompositionFunction functionForMode_C[] = { comp_func_SourceOver, comp_func_DestinationOver, comp_func_Clear, @@ -7971,6 +7971,10 @@ void qInitDrawhelperAsm() qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon; qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon; + + functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon; + destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; + destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; } #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index ca1d85f..946e100 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -114,6 +114,21 @@ pixman_composite_src_0565_8888_asm_neon (int32_t w, uint16_t *src, int32_t src_stride); +extern "C" void +pixman_composite_over_n_8_0565_asm_neon (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint32_t src, + int32_t unused, + uint8_t *mask, + int32_t mask_stride); + +extern "C" void +pixman_composite_scanline_over_asm_neon (int32_t w, + const uint32_t *dst, + const uint32_t *src); + // qblendfunctions.cpp void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, @@ -163,6 +178,15 @@ void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, pixman_composite_over_8888_0565_asm_neon(w, h, dst, dbpl / 2, src, sbpl / 4); } +void qt_blend_argb32_on_argb32_scanline_neon(uint *dest, const uint *src, int length, uint const_alpha) +{ + if (const_alpha == 255) { + pixman_composite_scanline_over_asm_neon(length, dest, src); + } else { + qt_blend_argb32_on_argb32_neon((uchar *)dest, 4 * length, (uchar *)src, 4 * length, length, 1, (const_alpha * 256) / 255); + } +} + void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -287,17 +311,6 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl, } } -extern "C" void -pixman_composite_over_n_8_0565_asm_neon (int32_t w, - int32_t h, - uint16_t *dst, - int32_t dst_stride, - uint32_t src, - int32_t unused, - uint8_t *mask, - int32_t mask_stride); - - void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer, int x, int y, quint32 color, const uchar *bitmap, @@ -449,6 +462,96 @@ void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha)); } +static inline void convert_8_pixels_rgb16_to_argb32(quint32 *dst, const quint16 *src) +{ + asm volatile ( + "vld1.16 { d0, d1 }, [%[SRC]]\n\t" + + /* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format + and put data into d4 - red, d3 - green, d2 - blue */ + "vshrn.u16 d4, q0, #8\n\t" + "vshrn.u16 d3, q0, #3\n\t" + "vsli.u16 q0, q0, #5\n\t" + "vsri.u8 d4, d4, #5\n\t" + "vsri.u8 d3, d3, #6\n\t" + "vshrn.u16 d2, q0, #2\n\t" + + /* fill d5 - alpha with 0xff */ + "mov r2, #255\n\t" + "vdup.8 d5, r2\n\t" + + "vst4.8 { d2, d3, d4, d5 }, [%[DST]]" + : : [DST]"r" (dst), [SRC]"r" (src) + : "memory", "r2", "d0", "d1", "d2", "d3", "d4", "d5" + ); +} + +uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length) +{ + const ushort *data = (const ushort *)rasterBuffer->scanLine(y) + x; + + int i = 0; + for (; i < length - 7; i += 8) + convert_8_pixels_rgb16_to_argb32(&buffer[i], &data[i]); + + if (i < length) { + quint16 srcBuffer[8]; + quint32 dstBuffer[8]; + + int tail = length - i; + for (int j = 0; j < tail; ++j) + srcBuffer[j] = data[i + j]; + + convert_8_pixels_rgb16_to_argb32(dstBuffer, srcBuffer); + + for (int j = 0; j < tail; ++j) + buffer[i + j] = dstBuffer[j]; + } + + return buffer; +} + +static inline void convert_8_pixels_argb32_to_rgb16(quint16 *dst, const quint32 *src) +{ + asm volatile ( + "vld4.8 { d0, d1, d2, d3 }, [%[SRC]]\n\t" + + /* convert to r5g6b5 and store it into {d28, d29} */ + "vshll.u8 q14, d2, #8\n\t" + "vshll.u8 q8, d1, #8\n\t" + "vshll.u8 q9, d0, #8\n\t" + "vsri.u16 q14, q8, #5\n\t" + "vsri.u16 q14, q9, #11\n\t" + + "vst1.16 { d28, d29 }, [%[DST]]" + : : [DST]"r" (dst), [SRC]"r" (src) + : "memory", "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19", "d28", "d29" + ); +} + +void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length) +{ + quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x; + + int i = 0; + for (; i < length - 7; i += 8) + convert_8_pixels_argb32_to_rgb16(&data[i], &buffer[i]); + + if (i < length) { + quint32 srcBuffer[8]; + quint16 dstBuffer[8]; + + int tail = length - i; + for (int j = 0; j < tail; ++j) + srcBuffer[j] = buffer[i + j]; + + convert_8_pixels_argb32_to_rgb16(dstBuffer, srcBuffer); + + for (int j = 0; j < tail; ++j) + data[i + j] = dstBuffer[j]; + } +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index 6f25243..d6a4509 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -74,6 +74,11 @@ void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, int w, int h, int const_alpha); +void qt_blend_argb32_on_argb32_scanline_neon(uint *dest, + const uint *src, + int length, + uint const_alpha); + void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -115,6 +120,13 @@ void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); +uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, + QRasterBuffer *rasterBuffer, + int x, int y, int length); + +void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, + int x, int y, const uint *buffer, int length); + #endif // QT_HAVE_NEON QT_END_NAMESPACE |