summaryrefslogtreecommitdiffstats
path: root/src/gui
diff options
context:
space:
mode:
authorSamuel Rødal <samuel.rodal@nokia.com>2010-09-01 06:57:36 (GMT)
committerSamuel Rødal <samuel.rodal@nokia.com>2010-09-01 10:06:50 (GMT)
commitbc0c6e9bb53f935c659dda90c7968d7738705f38 (patch)
tree26dec4934457eb431e57fc0643b150bef9e32d67 /src/gui
parent7b6028276cf5de1ffd5ab8d6dede7cca12e906fd (diff)
downloadQt-bc0c6e9bb53f935c659dda90c7968d7738705f38.zip
Qt-bc0c6e9bb53f935c659dda90c7968d7738705f38.tar.gz
Qt-bc0c6e9bb53f935c659dda90c7968d7738705f38.tar.bz2
Use NEON and preloading for 16 bit small / medium sized image blits.
This gives a nice speedup for blitting of small and medium sized images by using preloading and avoiding function call overhead to memcpy for each scanline. For larger image widths memcpy becomes more efficient. Speedups of up to 40 % for 64 pixel wide images were measured. For image widths between 2 and 16 the speedup ranges between 12 % and 28 %. Task-number: QT-3401 Reviewed-by: Benjamin Poulain <benjamin.poulain@nokia.com>
Diffstat (limited to 'src/gui')
-rw-r--r--src/gui/painting/qblendfunctions.cpp8
-rw-r--r--src/gui/painting/qdrawhelper.cpp1
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp98
-rw-r--r--src/gui/painting/qdrawhelper_neon_p.h5
4 files changed, 108 insertions, 4 deletions
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp
index 24908ce..0edf256 100644
--- a/src/gui/painting/qblendfunctions.cpp
+++ b/src/gui/painting/qblendfunctions.cpp
@@ -254,10 +254,10 @@ void qt_scale_image_argb32_on_rgb16(uchar *destPixels, int dbpl,
}
}
-static void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl,
- const uchar *src, int sbpl,
- int w, int h,
- int const_alpha)
+void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl,
+ const uchar *src, int sbpl,
+ int w, int h,
+ int const_alpha)
{
#ifdef QT_DEBUG_DRAW
printf("qt_blend_rgb16_on_rgb16: dst=(%p, %d), src=(%p, %d), dim=(%d, %d) alpha=%d\n",
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 5223458..5e1509d 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -8014,6 +8014,7 @@ void qInitDrawhelperAsm()
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
+ qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_neon;
qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index ed15c5c..0afd077 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -167,6 +167,14 @@ pixman_composite_scanline_over_asm_neon (int32_t w,
const uint32_t *dst,
const uint32_t *src);
+extern "C" void
+pixman_composite_src_0565_0565_asm_neon (int32_t w,
+ int32_t h,
+ uint16_t *dst,
+ int32_t dst_stride,
+ uint16_t *src,
+ int32_t src_stride);
+
// qblendfunctions.cpp
void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
@@ -200,6 +208,96 @@ void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
pixman_composite_src_0565_8888_asm_neon(w, h, dst, dbpl, src, sbpl);
}
+// qblendfunctions.cpp
+void qt_blend_rgb16_on_rgb16(uchar *dst, int dbpl,
+ const uchar *src, int sbpl,
+ int w, int h,
+ int const_alpha);
+
+template <int N>
+static inline void scanLineBlit16(quint16 *dst, quint16 *src, int dstride)
+{
+ if (N >= 2) {
+ ((quint32 *)dst)[0] = ((quint32 *)src)[0];
+ __builtin_prefetch(dst + dstride, 1, 0);
+ }
+ for (int i = 1; i < N/2; ++i)
+ ((quint32 *)dst)[i] = ((quint32 *)src)[i];
+ if (N & 1)
+ dst[N-1] = src[N-1];
+}
+
+template <int Width>
+static inline void blockBlit16(quint16 *dst, quint16 *src, int dstride, int sstride, int h)
+{
+ union {
+ quintptr address;
+ quint16 *pointer;
+ } u;
+
+ u.pointer = dst;
+
+ if (u.address & 2) {
+ while (h--) {
+ // align dst
+ dst[0] = src[0];
+ if (Width > 1)
+ scanLineBlit16<Width-1>(dst + 1, src + 1, dstride);
+ dst += dstride;
+ src += sstride;
+ }
+ } else {
+ while (h--) {
+ scanLineBlit16<Width>(dst, src, dstride);
+
+ dst += dstride;
+ src += sstride;
+ }
+ }
+}
+
+void qt_blend_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
+{
+ // testing show that the default memcpy is faster for widths 150 and up
+ if (const_alpha != 256 || w >= 150) {
+ qt_blend_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha);
+ return;
+ }
+
+ int dstride = dbpl / 2;
+ int sstride = sbpl / 2;
+
+ quint16 *dst = (quint16 *) destPixels;
+ quint16 *src = (quint16 *) srcPixels;
+
+ switch (w) {
+#define BLOCKBLIT(n) case n: blockBlit16<n>(dst, src, dstride, sstride, h); return;
+ BLOCKBLIT(1);
+ BLOCKBLIT(2);
+ BLOCKBLIT(3);
+ BLOCKBLIT(4);
+ BLOCKBLIT(5);
+ BLOCKBLIT(6);
+ BLOCKBLIT(7);
+ BLOCKBLIT(8);
+ BLOCKBLIT(9);
+ BLOCKBLIT(10);
+ BLOCKBLIT(11);
+ BLOCKBLIT(12);
+ BLOCKBLIT(13);
+ BLOCKBLIT(14);
+ BLOCKBLIT(15);
+#undef BLOCKBLIT
+ default:
+ break;
+ }
+
+ pixman_composite_src_0565_0565_asm_neon (w, h, dst, dstride, src, sstride);
+}
+
extern "C" void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha);
void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h
index 451edbc..d25b7ec 100644
--- a/src/gui/painting/qdrawhelper_neon_p.h
+++ b/src/gui/painting/qdrawhelper_neon_p.h
@@ -84,6 +84,11 @@ void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
int w, int h,
int const_alpha);
+void qt_blend_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha);
+
void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
int x, int y, quint32 color,
const uchar *bitmap,