diff options
author | Samuel Rødal <samuel.rodal@nokia.com> | 2010-09-01 06:57:36 (GMT) |
---|---|---|
committer | Samuel Rødal <samuel.rodal@nokia.com> | 2010-09-01 10:06:50 (GMT) |
commit | bc0c6e9bb53f935c659dda90c7968d7738705f38 (patch) | |
tree | 26dec4934457eb431e57fc0643b150bef9e32d67 /src/gui | |
parent | 7b6028276cf5de1ffd5ab8d6dede7cca12e906fd (diff) | |
download | Qt-bc0c6e9bb53f935c659dda90c7968d7738705f38.zip Qt-bc0c6e9bb53f935c659dda90c7968d7738705f38.tar.gz Qt-bc0c6e9bb53f935c659dda90c7968d7738705f38.tar.bz2 |
Use NEON and preloading for 16 bit small / medium sized image blits.
This gives a nice speedup for blitting of small and medium sized
images by using preloading and avoiding function call overhead to
memcpy for each scanline. For larger image widths memcpy becomes more
efficient.
Speedups of up to 40 % for 64 pixel wide images were measured.
For image widths between 2 and 16 the speedup ranges between 12 %
and 28 %.
Task-number: QT-3401
Reviewed-by: Benjamin Poulain <benjamin.poulain@nokia.com>
Diffstat (limited to 'src/gui')
-rw-r--r-- | src/gui/painting/qblendfunctions.cpp | 8 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 1 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 98 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 5 |
4 files changed, 108 insertions, 4 deletions
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp index 24908ce..0edf256 100644 --- a/src/gui/painting/qblendfunctions.cpp +++ b/src/gui/painting/qblendfunctions.cpp @@ -254,10 +254,10 @@ void qt_scale_image_argb32_on_rgb16(uchar *destPixels, int dbpl, } } -static void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl, - const uchar *src, int sbpl, - int w, int h, - int const_alpha) +void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl, + const uchar *src, int sbpl, + int w, int h, + int const_alpha) { #ifdef QT_DEBUG_DRAW printf("qt_blend_rgb16_on_rgb16: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n", diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 5223458..5e1509d 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -8014,6 +8014,7 @@ void qInitDrawhelperAsm() qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon; qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon; qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon; + qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon; qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon; qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon; diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index ed15c5c..0afd077 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -167,6 +167,14 @@ pixman_composite_scanline_over_asm_neon (int32_t w, const uint32_t *dst, const uint32_t *src); +extern "C" void +pixman_composite_src_0565_0565_asm_neon (int32_t w, + int32_t h, + uint16_t *dst, + int32_t dst_stride, + uint16_t *src, + int32_t src_stride); + // qblendfunctions.cpp void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, @@ -200,6 +208,96 @@ void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl, pixman_composite_src_0565_8888_asm_neon(w, h, dst, dbpl, src, sbpl); } +// qblendfunctions.cpp +void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl, + const uchar *src, int sbpl, + int w, int h, + int const_alpha); + +template <int N> +static inline void scanLineBlit16(quint16 *dst, quint16 *src, int dstride) +{ + if (N >= 2) { + ((quint32 *)dst)[0] = ((quint32 *)src)[0]; + __builtin_prefetch(dst + dstride, 1, 0); + } + for (int i = 1; i < N/2; ++i) + ((quint32 *)dst)[i] = ((quint32 *)src)[i]; + if (N & 1) + dst[N-1] = src[N-1]; +} + +template <int Width> +static inline void blockBlit16(quint16 *dst, quint16 *src, int dstride, int sstride, int h) +{ + union { + quintptr address; + quint16 *pointer; + } u; + + u.pointer = dst; + + if (u.address & 2) { + while (h--) { + // align dst + dst[0] = src[0]; + if (Width > 1) + scanLineBlit16<Width-1>(dst + 1, src + 1, dstride); + dst += dstride; + src += sstride; + } + } else { + while (h--) { + scanLineBlit16<Width>(dst, src, dstride); + + dst += dstride; + src += sstride; + } + } +} + +void qt_blend_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha) +{ + // testing show that the default memcpy is faster for widths 150 and up + if (const_alpha != 256 || w >= 150) { + qt_blend_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); + return; + } + + int dstride = dbpl / 2; + int sstride = sbpl / 2; + + quint16 *dst = (quint16 *) destPixels; + quint16 *src = (quint16 *) srcPixels; + + switch (w) { +#define BLOCKBLIT(n) case n: blockBlit16<n>(dst, src, dstride, sstride, h); return; + BLOCKBLIT(1); + BLOCKBLIT(2); + BLOCKBLIT(3); + BLOCKBLIT(4); + BLOCKBLIT(5); + BLOCKBLIT(6); + BLOCKBLIT(7); + BLOCKBLIT(8); + BLOCKBLIT(9); + BLOCKBLIT(10); + BLOCKBLIT(11); + BLOCKBLIT(12); + BLOCKBLIT(13); + BLOCKBLIT(14); + BLOCKBLIT(15); +#undef BLOCKBLIT + default: + break; + } + + pixman_composite_src_0565_0565_asm_neon (w, h, dst, dstride, src, sstride); +} + extern "C" void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha); void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index 451edbc..d25b7ec 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -84,6 +84,11 @@ void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl, int w, int h, int const_alpha); +void qt_blend_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); + void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer, int x, int y, quint32 color, const uchar *bitmap, |