summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@nokia.com>2010-03-26 10:47:01 (GMT)
committerEskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@nokia.com>2010-03-26 10:47:01 (GMT)
commite7eb7bdf63791ed03257f2f23b1f515e4d89e054 (patch)
tree1d580cea9ffbf342a029c73bd2cecc106811ff22
parent47472906fd00e0eff820870330d481c4229ee285 (diff)
parent41e9adb44137c8839d0d7e131802de198b0e7168 (diff)
downloadQt-e7eb7bdf63791ed03257f2f23b1f515e4d89e054.zip
Qt-e7eb7bdf63791ed03257f2f23b1f515e4d89e054.tar.gz
Qt-e7eb7bdf63791ed03257f2f23b1f515e4d89e054.tar.bz2
Merge branch '4.7' of git@scm.dev.nokia.troll.no:qt/oslo-staging-2 into 4.7
-rw-r--r--doc/src/legal/3rdparty.qdoc39
-rw-r--r--src/3rdparty/pixman/README26
-rw-r--r--src/3rdparty/pixman/pixman-arm-neon-asm.S1709
-rw-r--r--src/3rdparty/pixman/pixman-arm-neon-asm.h906
-rw-r--r--src/gui/image/qimage.cpp4
-rw-r--r--src/gui/image/qpixmap_raster.cpp4
-rw-r--r--src/gui/itemviews/qitemdelegate.cpp4
-rw-r--r--src/gui/painting/painting.pri16
-rw-r--r--src/gui/painting/qblendfunctions.cpp446
-rw-r--r--src/gui/painting/qblendfunctions_p.h497
-rw-r--r--src/gui/painting/qdrawhelper.cpp22
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp409
-rw-r--r--src/gui/painting/qdrawhelper_neon_asm.S192
-rw-r--r--src/gui/painting/qdrawhelper_neon_p.h58
-rw-r--r--src/gui/painting/qpaintbuffer.cpp303
-rw-r--r--src/gui/painting/qpaintbuffer_p.h8
-rw-r--r--src/gui/painting/qtransform.cpp15
-rw-r--r--src/plugins/bearer/corewlan/qcorewlanengine.h2
-rw-r--r--src/plugins/bearer/corewlan/qcorewlanengine.mm20
-rw-r--r--src/plugins/bearer/networkmanager/qnetworkmanagerengine.cpp14
-rw-r--r--src/plugins/mediaservices/directshow/mediaplayer/directshowmediatype.cpp28
-rw-r--r--src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.cpp7
-rw-r--r--src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.h3
-rw-r--r--src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.cpp59
-rw-r--r--src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.h4
-rw-r--r--src/plugins/mediaservices/gstreamer/qgstreamervideooverlay.cpp19
-rw-r--r--src/plugins/mediaservices/gstreamer/qx11videosurface.cpp10
-rw-r--r--src/plugins/mediaservices/gstreamer/qx11videosurface.h3
-rw-r--r--tools/qttracereplay/main.cpp99
29 files changed, 4385 insertions, 541 deletions
diff --git a/doc/src/legal/3rdparty.qdoc b/doc/src/legal/3rdparty.qdoc
index d608038..8d0cd2a 100644
--- a/doc/src/legal/3rdparty.qdoc
+++ b/doc/src/legal/3rdparty.qdoc
@@ -454,4 +454,43 @@
OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
See \c src/3rdparty/webkit/JavaScriptCore/wtf/dtoa.cpp for license details.
+
+ \section1 Pixman (\c pixman) version 0.17.11
+
+ \e{pixman is a library that provides low-level pixel manipulation
+ features such as image compositing and trapezoid rasterization.} -- quoted
+ from \c src/3rdparty/pixman/README
+
+ We are only using the pixman-arm-neon-asm.h and pixman-arm-neon-asm.S
+ source files which have the following copyright and license header:
+
+ \hr
+
+ Copyright © 2009 Nokia Corporation
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the "Software"),
+ to deal in the Software without restriction, including without limitation
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ and/or sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice (including the next
+ paragraph) shall be included in all copies or substantial portions of the
+ Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ DEALINGS IN THE SOFTWARE.
+
+ Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+
+ \hr
+
+ See \c src/3rdparty/pixman/pixman-arm-neon-asm.h and
+ \c src/3rdparty/pixman/pixman-arm-neon-asm.S
*/
diff --git a/src/3rdparty/pixman/README b/src/3rdparty/pixman/README
new file mode 100644
index 0000000..843b069
--- /dev/null
+++ b/src/3rdparty/pixman/README
@@ -0,0 +1,26 @@
+pixman is a library that provides low-level pixel manipulation
+features such as image compositing and trapezoid rasterization.
+
+Please submit bugs & patches to the libpixman bugzilla:
+
+ https://bugs.freedesktop.org/enter_bug.cgi?product=pixman
+
+All questions regarding this software should be directed to either the
+Xorg mailing list:
+
+ http://lists.freedesktop.org/mailman/listinfo/xorg
+
+or the cairo mailing list:
+
+ http://lists.freedesktop.org/mailman/listinfo/cairo
+
+The master development code repository can be found at:
+
+ git://anongit.freedesktop.org/git/pixman
+
+ http://gitweb.freedesktop.org/?p=pixman;a=summary
+
+For more information on the git code manager, see:
+
+ http://wiki.x.org/wiki/GitPage
+
diff --git a/src/3rdparty/pixman/pixman-arm-neon-asm.S b/src/3rdparty/pixman/pixman-arm-neon-asm.S
new file mode 100644
index 0000000..eb8cc4c
--- /dev/null
+++ b/src/3rdparty/pixman/pixman-arm-neon-asm.S
@@ -0,0 +1,1709 @@
+/*
+ * Copyright © 2009 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+/*
+ * This file contains implementations of NEON optimized pixel processing
+ * functions. There is no full and detailed tutorial, but some functions
+ * (those which are exposing some new or interesting features) are
+ * extensively commented and can be used as examples.
+ *
+ * You may want to have a look at the comments for following functions:
+ * - pixman_composite_over_8888_0565_asm_neon
+ * - pixman_composite_over_n_8_0565_asm_neon
+ */
+
+/* Prevent the stack from becoming executable for no reason... */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+ .text
+ .fpu neon
+ .arch armv7a
+ .altmacro
+
+#include "pixman-arm-neon-asm.h"
+
+/* Global configuration options and preferences */
+
+/*
+ * The code can optionally make use of unaligned memory accesses to improve
+ * performance of handling leading/trailing pixels for each scanline.
+ * Configuration variable RESPECT_STRICT_ALIGNMENT can be set to 0 for
+ * example in linux if unaligned memory accesses are not configured to
+ * generate.exceptions.
+ */
+.set RESPECT_STRICT_ALIGNMENT, 1
+
+/*
+ * Set default prefetch type. There is a choice between the following options:
+ *
+ * PREFETCH_TYPE_NONE (may be useful for the ARM cores where PLD is set to work
+ * as NOP to workaround some HW bugs or for whatever other reason)
+ *
+ * PREFETCH_TYPE_SIMPLE (may be useful for simple single-issue ARM cores where
+ * advanced prefetch intruduces heavy overhead)
+ *
+ * PREFETCH_TYPE_ADVANCED (useful for superscalar cores such as ARM Cortex-A8
+ * which can run ARM and NEON instructions simultaneously so that extra ARM
+ * instructions do not add (many) extra cycles, but improve prefetch efficiency)
+ *
+ * Note: some types of function can't support advanced prefetch and fallback
+ * to simple one (those which handle 24bpp pixels)
+ */
+.set PREFETCH_TYPE_DEFAULT, PREFETCH_TYPE_ADVANCED
+
+/* Prefetch distance in pixels for simple prefetch */
+.set PREFETCH_DISTANCE_SIMPLE, 64
+
+/*
+ * Implementation of pixman_composite_over_8888_0565_asm_neon
+ *
+ * This function takes a8r8g8b8 source buffer, r5g6b5 destination buffer and
+ * performs OVER compositing operation. Function fast_composite_over_8888_0565
+ * from pixman-fast-path.c does the same in C and can be used as a reference.
+ *
+ * First we need to have some NEON assembly code which can do the actual
+ * operation on the pixels and provide it to the template macro.
+ *
+ * Template macro quite conveniently takes care of emitting all the necessary
+ * code for memory reading and writing (including quite tricky cases of
+ * handling unaligned leading/trailing pixels), so we only need to deal with
+ * the data in NEON registers.
+ *
+ * NEON registers allocation in general is recommented to be the following:
+ * d0, d1, d2, d3 - contain loaded source pixel data
+ * d4, d5, d6, d7 - contain loaded destination pixels (if they are needed)
+ * d24, d25, d26, d27 - contain loading mask pixel data (if mask is used)
+ * d28, d29, d30, d31 - place for storing the result (destination pixels)
+ *
+ * As can be seen above, four 64-bit NEON registers are used for keeping
+ * intermediate pixel data and up to 8 pixels can be processed in one step
+ * for 32bpp formats (16 pixels for 16bpp, 32 pixels for 8bpp).
+ *
+ * This particular function uses the following registers allocation:
+ * d0, d1, d2, d3 - contain loaded source pixel data
+ * d4, d5 - contain loaded destination pixels (they are needed)
+ * d28, d29 - place for storing the result (destination pixels)
+ */
+
+/*
+ * Step one. We need to have some code to do some arithmetics on pixel data.
+ * This is implemented as a pair of macros: '*_head' and '*_tail'. When used
+ * back-to-back, they take pixel data from {d0, d1, d2, d3} and {d4, d5},
+ * perform all the needed calculations and write the result to {d28, d29}.
+ * The rationale for having two macros and not just one will be explained
+ * later. In practice, any single monolitic function which does the work can
+ * be split into two parts in any arbitrary way without affecting correctness.
+ *
+ * There is one special trick here too. Common template macro can optionally
+ * make our life a bit easier by doing R, G, B, A color components
+ * deinterleaving for 32bpp pixel formats (and this feature is used in
+ * 'pixman_composite_over_8888_0565_asm_neon' function). So it means that
+ * instead of having 8 packed pixels in {d0, d1, d2, d3} registers, we
+ * actually use d0 register for blue channel (a vector of eight 8-bit
+ * values), d1 register for green, d2 for red and d3 for alpha. This
+ * simple conversion can be also done with a few NEON instructions:
+ *
+ * Packed to planar conversion:
+ * vuzp.8 d0, d1
+ * vuzp.8 d2, d3
+ * vuzp.8 d1, d3
+ * vuzp.8 d0, d2
+ *
+ * Planar to packed conversion:
+ * vzip.8 d0, d2
+ * vzip.8 d1, d3
+ * vzip.8 d2, d3
+ * vzip.8 d0, d1
+ *
+ * But pixel can be loaded directly in planar format using VLD4.8 NEON
+ * instruction. It is 1 cycle slower than VLD1.32, so this is not always
+ * desirable, that's why deinterleaving is optional.
+ *
+ * But anyway, here is the code:
+ */
+.macro pixman_composite_over_8888_0565_process_pixblock_head
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vmvn.8 d3, d3 /* invert source alpha */
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_8888_0565_process_pixblock_tail
+ /* ... continue alpha blending */
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/*
+ * OK, now we got almost everything that we need. Using the above two
+ * macros, the work can be done right. But now we want to optimize
+ * it a bit. ARM Cortex-A8 is an in-order core, and benefits really
+ * a lot from good code scheduling and software pipelining.
+ *
+ * Let's construct some code, which will run in the core main loop.
+ * Some pseudo-code of the main loop will look like this:
+ * head
+ * while (...) {
+ * tail
+ * head
+ * }
+ * tail
+ *
+ * It may look a bit weird, but this setup allows to hide instruction
+ * latencies better and also utilize dual-issue capability more
+ * efficiently (make pairs of load-store and ALU instructions).
+ *
+ * So what we need now is a '*_tail_head' macro, which will be used
+ * in the core main loop. A trivial straightforward implementation
+ * of this macro would look like this:
+ *
+ * pixman_composite_over_8888_0565_process_pixblock_tail
+ * vst1.16 {d28, d29}, [DST_W, :128]!
+ * vld1.16 {d4, d5}, [DST_R, :128]!
+ * vld4.32 {d0, d1, d2, d3}, [SRC]!
+ * pixman_composite_over_8888_0565_process_pixblock_head
+ * cache_preload 8, 8
+ *
+ * Now it also got some VLD/VST instructions. We simply can't move from
+ * processing one block of pixels to the other one with just arithmetics.
+ * The previously processed data needs to be written to memory and new
+ * data needs to be fetched. Fortunately, this main loop does not deal
+ * with partial leading/trailing pixels and can load/store a full block
+ * of pixels in a bulk. Additionally, destination buffer is already
+ * 16 bytes aligned here (which is good for performance).
+ *
+ * New things here are DST_R, DST_W, SRC and MASK identifiers. These
+ * are the aliases for ARM registers which are used as pointers for
+ * accessing data. We maintain separate pointers for reading and writing
+ * destination buffer (DST_R and DST_W).
+ *
+ * Another new thing is 'cache_preload' macro. It is used for prefetching
+ * data into CPU L2 cache and improve performance when dealing with large
+ * images which are far larger than cache size. It uses one argument
+ * (actually two, but they need to be the same here) - number of pixels
+ * in a block. Looking into 'pixman-arm-neon-asm.h' can provide some
+ * details about this macro. Moreover, if good performance is needed
+ * the code from this macro needs to be copied into '*_tail_head' macro
+ * and mixed with the rest of code for optimal instructions scheduling.
+ * We are actually doing it below.
+ *
+ * Now after all the explanations, here is the optimized code.
+ * Different instruction streams (originaling from '*_head', '*_tail'
+ * and 'cache_preload' macro) use different indentation levels for
+ * better readability. Actually taking the code from one of these
+ * indentation levels and ignoring a few VLD/VST instructions would
+ * result in exactly the code from '*_head', '*_tail' or 'cache_preload'
+ * macro!
+ */
+
+#if 1
+
+.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
+ vqadd.u8 d16, d2, d20
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vqadd.u8 q9, q0, q11
+ vshrn.u16 d6, q2, #8
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vshll.u8 q14, d16, #8
+ PF add PF_X, PF_X, #8
+ vshll.u8 q8, d19, #8
+ PF tst PF_CTL, #0xF
+ vsri.u8 d6, d6, #5
+ PF addne PF_X, PF_X, #8
+ vmvn.8 d3, d3
+ PF subne PF_CTL, PF_CTL, #1
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ vmull.u8 q10, d3, d6
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vsri.u16 q14, q8, #5
+ PF cmp PF_X, ORIG_W
+ vshll.u8 q9, d18, #8
+ vrshr.u16 q13, q10, #8
+ PF subge PF_X, PF_X, ORIG_W
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ PF subges PF_CTL, PF_CTL, #0x10
+ vsri.u16 q14, q9, #11
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vraddhn.u16 d22, q12, q15
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+#else
+
+/* If we did not care much about the performance, we would just use this... */
+.macro pixman_composite_over_8888_0565_process_pixblock_tail_head
+ pixman_composite_over_8888_0565_process_pixblock_tail
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vld4.32 {d0, d1, d2, d3}, [SRC]!
+ pixman_composite_over_8888_0565_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+#endif
+
+/*
+ * And now the final part. We are using 'generate_composite_function' macro
+ * to put all the stuff together. We are specifying the name of the function
+ * which we want to get, number of bits per pixel for the source, mask and
+ * destination (0 if unused, like mask in this case). Next come some bit
+ * flags:
+ * FLAG_DST_READWRITE - tells that the destination buffer is both read
+ * and written, for write-only buffer we would use
+ * FLAG_DST_WRITEONLY flag instead
+ * FLAG_DEINTERLEAVE_32BPP - tells that we prefer to work with planar data
+ * and separate color channels for 32bpp format.
+ * The next things are:
+ * - the number of pixels processed per iteration (8 in this case, because
+ * that's the maximum what can fit into four 64-bit NEON registers).
+ * - prefetch distance, measured in pixel blocks. In this case it is 5 times
+ * by 8 pixels. That would be 40 pixels, or up to 160 bytes. Optimal
+ * prefetch distance can be selected by running some benchmarks.
+ *
+ * After that we specify some macros, these are 'default_init',
+ * 'default_cleanup' here which are empty (but it is possible to have custom
+ * init/cleanup macros to be able to save/restore some extra NEON registers
+ * like d8-d15 or do anything else) followed by
+ * 'pixman_composite_over_8888_0565_process_pixblock_head',
+ * 'pixman_composite_over_8888_0565_process_pixblock_tail' and
+ * 'pixman_composite_over_8888_0565_process_pixblock_tail_head'
+ * which we got implemented above.
+ *
+ * The last part is the NEON registers allocation scheme.
+ */
+generate_composite_function \
+ pixman_composite_over_8888_0565_asm_neon, 32, 0, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_0565_process_pixblock_head, \
+ pixman_composite_over_8888_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_0565_process_pixblock_head
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_n_0565_process_pixblock_tail
+ /* ... continue alpha blending */
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_0565_process_pixblock_tail_head
+ pixman_composite_over_n_0565_process_pixblock_tail
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ pixman_composite_over_n_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+ vmvn.8 d3, d3 /* invert source alpha */
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_0565_asm_neon, 0, 0, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_0565_init, \
+ default_cleanup, \
+ pixman_composite_over_n_0565_process_pixblock_head, \
+ pixman_composite_over_n_0565_process_pixblock_tail, \
+ pixman_composite_over_n_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_8888_0565_process_pixblock_head
+ vshll.u8 q8, d1, #8
+ vshll.u8 q14, d2, #8
+ vshll.u8 q9, d0, #8
+.endm
+
+.macro pixman_composite_src_8888_0565_process_pixblock_tail
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_src_8888_0565_process_pixblock_tail_head
+ vsri.u16 q14, q8, #5
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vsri.u16 q14, q9, #11
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vshll.u8 q14, d2, #8
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vshll.u8 q9, d0, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_8888_0565_asm_neon, 32, 0, 16, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_0565_process_pixblock_head, \
+ pixman_composite_src_8888_0565_process_pixblock_tail, \
+ pixman_composite_src_8888_0565_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0565_8888_process_pixblock_head
+ vshrn.u16 d30, q0, #8
+ vshrn.u16 d29, q0, #3
+ vsli.u16 q0, q0, #5
+ vmov.u8 d31, #255
+ vsri.u8 d30, d30, #5
+ vsri.u8 d29, d29, #6
+ vshrn.u16 d28, q0, #2
+.endm
+
+.macro pixman_composite_src_0565_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_src_0565_8888_process_pixblock_tail_head
+ pixman_composite_src_0565_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.16 {d0, d1}, [SRC]!
+ pixman_composite_src_0565_8888_process_pixblock_head
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0565_8888_asm_neon, 16, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_8888_process_pixblock_head, \
+ pixman_composite_src_0565_8888_process_pixblock_tail, \
+ pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8000_8000_process_pixblock_head
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8000_8000_process_pixblock_tail
+.endm
+
+.macro pixman_composite_add_8000_8000_process_pixblock_tail_head
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+ PF add PF_X, PF_X, #32
+ PF tst PF_CTL, #0xF
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ PF addne PF_X, PF_X, #32
+ PF subne PF_CTL, PF_CTL, #1
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8000_8000_asm_neon, 8, 0, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8000_8000_process_pixblock_head, \
+ pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8000_8000_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_8888_process_pixblock_tail_head
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ vqadd.u8 q14, q0, q2
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vqadd.u8 q15, q1, q3
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8000_8000_process_pixblock_head, \
+ pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8000_8000_process_pixblock_head, \
+ pixman_composite_add_8000_8000_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_8888_process_pixblock_head
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_over_8888_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+.macro pixman_composite_over_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q14, q8, #8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ vmvn.8 d22, d3
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+ vmull.u8 q10, d22, d6
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8888_process_pixblock_tail_head
+ pixman_composite_over_8888_8888_process_pixblock_tail
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ pixman_composite_over_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d3[0]}, [DUMMY]
+ vdup.8 d0, d3[0]
+ vdup.8 d1, d3[1]
+ vdup.8 d2, d3[2]
+ vdup.8 d3, d3[3]
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_reverse_n_8888_process_pixblock_tail_head
+ vrshr.u16 q14, q8, #8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ PF cmp PF_X, ORIG_W
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ vld4.8 {d0, d1, d2, d3}, [DST_R, :128]!
+ vmvn.8 d22, d3
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF subge PF_X, PF_X, ORIG_W
+ vmull.u8 q8, d22, d4
+ PF subges PF_CTL, PF_CTL, #0x10
+ vmull.u8 q9, d22, d5
+ vmull.u8 q10, d22, d6
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+ vmull.u8 q11, d22, d7
+.endm
+
+.macro pixman_composite_over_reverse_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d7[0]}, [DUMMY]
+ vdup.8 d4, d7[0]
+ vdup.8 d5, d7[1]
+ vdup.8 d6, d7[2]
+ vdup.8 d7, d7[3]
+.endm
+
+generate_composite_function \
+ pixman_composite_over_reverse_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_reverse_n_8888_init, \
+ default_cleanup, \
+ pixman_composite_over_8888_8888_process_pixblock_head, \
+ pixman_composite_over_8888_8888_process_pixblock_tail, \
+ pixman_composite_over_reverse_n_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 4, /* src_basereg */ \
+ 24 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8_0565_process_pixblock_head
+ /* in */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d24, d9
+ vmull.u8 q6, d24, d10
+ vmull.u8 q7, d24, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vmvn.8 d3, d3
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+ /* now do alpha blending */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+.endm
+
+.macro pixman_composite_over_n_8_0565_process_pixblock_tail
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert to r5g6b5 */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8_0565_process_pixblock_tail_head
+ pixman_composite_over_n_8_0565_process_pixblock_tail
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vld1.8 {d24}, [MASK]!
+ cache_preload 8, 8
+ pixman_composite_over_n_8_0565_process_pixblock_head
+.endm
+
+/*
+ * This function needs a special initialization of solid mask.
+ * Solid source pixel data is fetched from stack at ARGS_STACK_OFFSET
+ * offset, split into color components and replicated in d8-d11
+ * registers. Additionally, this function needs all the NEON registers,
+ * so it has to save d8-d15 registers which are callee saved according
+ * to ABI. These registers are restored from 'cleanup' macro. All the
+ * other NEON registers are caller saved, so can be clobbered freely
+ * without introducing any problems.
+ */
+.macro pixman_composite_over_n_8_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8_0565_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8_0565_asm_neon, 0, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8_0565_init, \
+ pixman_composite_over_n_8_0565_cleanup, \
+ pixman_composite_over_n_8_0565_process_pixblock_head, \
+ pixman_composite_over_n_8_0565_process_pixblock_tail, \
+ pixman_composite_over_n_8_0565_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0565_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_0565_0565_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0565_0565_process_pixblock_tail_head
+ vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
+ vld1.16 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 16, 16
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0565_0565_asm_neon, 16, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 16, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0565_0565_process_pixblock_head, \
+ pixman_composite_src_0565_0565_process_pixblock_tail, \
+ pixman_composite_src_0565_0565_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_8_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_8_process_pixblock_tail_head
+ vst1.8 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #8
+ vsli.u64 d0, d0, #16
+ vsli.u64 d0, d0, #32
+ vmov d1, d0
+ vmov q1, q0
+.endm
+
+.macro pixman_composite_src_n_8_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_8_asm_neon, 0, 0, 8, \
+ FLAG_DST_WRITEONLY, \
+ 32, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_8_init, \
+ pixman_composite_src_n_8_cleanup, \
+ pixman_composite_src_n_8_process_pixblock_head, \
+ pixman_composite_src_n_8_process_pixblock_tail, \
+ pixman_composite_src_n_8_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_0565_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_0565_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_0565_process_pixblock_tail_head
+ vst1.16 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_0565_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #16
+ vsli.u64 d0, d0, #32
+ vmov d1, d0
+ vmov q1, q0
+.endm
+
+.macro pixman_composite_src_n_0565_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_0565_asm_neon, 0, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 16, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_0565_init, \
+ pixman_composite_src_n_0565_cleanup, \
+ pixman_composite_src_n_0565_process_pixblock_head, \
+ pixman_composite_src_n_0565_process_pixblock_tail, \
+ pixman_composite_src_n_0565_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_n_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_n_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_n_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_src_n_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vld1.32 {d0[0]}, [DUMMY]
+ vsli.u64 d0, d0, #32
+ vmov d1, d0
+ vmov q1, q0
+.endm
+
+.macro pixman_composite_src_n_8888_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_src_n_8888_asm_neon, 0, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 0, /* prefetch distance */ \
+ pixman_composite_src_n_8888_init, \
+ pixman_composite_src_n_8888_cleanup, \
+ pixman_composite_src_n_8888_process_pixblock_head, \
+ pixman_composite_src_n_8888_process_pixblock_tail, \
+ pixman_composite_src_n_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_8888_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_8888_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_8888_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+ vld1.32 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_8888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_8888_8888_process_pixblock_head, \
+ pixman_composite_src_8888_8888_process_pixblock_tail, \
+ pixman_composite_src_8888_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_x888_8888_process_pixblock_head
+ vorr q0, q0, q2
+ vorr q1, q1, q2
+.endm
+
+.macro pixman_composite_src_x888_8888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_x888_8888_process_pixblock_tail_head
+ vst1.32 {d0, d1, d2, d3}, [DST_W, :128]!
+ vld1.32 {d0, d1, d2, d3}, [SRC]!
+ vorr q0, q0, q2
+ vorr q1, q1, q2
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_x888_8888_init
+ vmov.u8 q2, #0xFF
+ vshl.u32 q2, q2, #24
+.endm
+
+generate_composite_function \
+ pixman_composite_src_x888_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ pixman_composite_src_x888_8888_init, \
+ default_cleanup, \
+ pixman_composite_src_x888_8888_process_pixblock_head, \
+ pixman_composite_src_x888_8888_process_pixblock_tail, \
+ pixman_composite_src_x888_8888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8_8888_process_pixblock_head
+ /* expecting deinterleaved source data in {d8, d9, d10, d11} */
+ /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+ /* and destination data in {d4, d5, d6, d7} */
+ /* mask is in d24 (d25, d26, d27 are unused) */
+
+ /* in */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d24, d9
+ vmull.u8 q6, d24, d10
+ vmull.u8 q7, d24, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* source: d0 - blue, d1 - green, d2 - red, d3 - alpha */
+ /* destination: d4 - blue, d5 - green, d6 - red, d7 - alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_over_n_8_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_n_8_8888_process_pixblock_tail_head
+ pixman_composite_over_n_8_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld1.8 {d24}, [MASK]!
+ cache_preload 8, 8
+ pixman_composite_over_n_8_8888_process_pixblock_head
+.endm
+
+.macro pixman_composite_over_n_8_8888_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8_8888_asm_neon, 0, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8_8888_init, \
+ pixman_composite_over_n_8_8888_cleanup, \
+ pixman_composite_over_n_8_8888_process_pixblock_head, \
+ pixman_composite_over_n_8_8888_process_pixblock_tail, \
+ pixman_composite_over_n_8_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_head
+ /*
+ * 'combine_mask_ca' replacement
+ *
+ * input: solid src (n) in {d8, d9, d10, d11}
+ * dest in {d4, d5, d6, d7 }
+ * mask in {d24, d25, d26, d27}
+ * output: updated src in {d0, d1, d2, d3 }
+ * updated mask in {d24, d25, d26, d3 }
+ */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d25, d9
+ vmull.u8 q6, d26, d10
+ vmull.u8 q7, d27, d11
+ vmull.u8 q9, d11, d25
+ vmull.u8 q12, d11, d24
+ vmull.u8 q13, d11, d26
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q10, q1, #8
+ vrshr.u16 q11, q6, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q10
+ vraddhn.u16 d2, q6, q11
+ vrshr.u16 q11, q12, #8
+ vrshr.u16 q8, q9, #8
+ vrshr.u16 q6, q13, #8
+ vrshr.u16 q10, q7, #8
+ vraddhn.u16 d24, q12, q11
+ vraddhn.u16 d25, q9, q8
+ vraddhn.u16 d26, q13, q6
+ vraddhn.u16 d3, q7, q10
+ /*
+ * 'combine_over_ca' replacement
+ *
+ * output: updated dest in {d28, d29, d30, d31}
+ */
+ vmvn.8 d24, d24
+ vmvn.8 d25, d25
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d25, d5
+ vmvn.8 d26, d26
+ vmvn.8 d27, d3
+ vmull.u8 q10, d26, d6
+ vmull.u8 q11, d27, d7
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail
+ /* ... continue 'combine_over_ca' replacement */
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q6, q10, #8
+ vrshr.u16 q7, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q6, q10
+ vraddhn.u16 d31, q7, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vrshr.u16 q6, q10, #8
+ vrshr.u16 q7, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q6, q10
+ vraddhn.u16 d31, q7, q11
+ vld4.8 {d24, d25, d26, d27}, [MASK]!
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+ cache_preload 8, 8
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8888_8888_ca_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_8888_ca_asm_neon, 0, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_8888_ca_init, \
+ pixman_composite_over_n_8888_8888_ca_cleanup, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_head, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail, \
+ pixman_composite_over_n_8888_8888_ca_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_n_8_8_process_pixblock_head
+ /* expecting source data in {d8, d9, d10, d11} */
+ /* d8 - blue, d9 - green, d10 - red, d11 - alpha */
+ /* and destination data in {d4, d5, d6, d7} */
+ /* mask is in d24, d25, d26, d27 */
+ vmull.u8 q0, d24, d11
+ vmull.u8 q1, d25, d11
+ vmull.u8 q6, d26, d11
+ vmull.u8 q7, d27, d11
+ vrshr.u16 q10, q0, #8
+ vrshr.u16 q11, q1, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q13, q7, #8
+ vraddhn.u16 d0, q0, q10
+ vraddhn.u16 d1, q1, q11
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d3, q7, q13
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_n_8_8_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_n_8_8_process_pixblock_tail_head
+ pixman_composite_add_n_8_8_process_pixblock_tail
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld1.8 {d24, d25, d26, d27}, [MASK]!
+ cache_preload 32, 32
+ pixman_composite_add_n_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_add_n_8_8_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_add_n_8_8_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_add_n_8_8_asm_neon, 0, 8, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_n_8_8_init, \
+ pixman_composite_add_n_8_8_cleanup, \
+ pixman_composite_add_n_8_8_process_pixblock_head, \
+ pixman_composite_add_n_8_8_process_pixblock_tail, \
+ pixman_composite_add_n_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8_8_8_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* mask in {d24, d25, d26, d27} */
+ vmull.u8 q8, d24, d0
+ vmull.u8 q9, d25, d1
+ vmull.u8 q10, d26, d2
+ vmull.u8 q11, d27, d3
+ vrshr.u16 q0, q8, #8
+ vrshr.u16 q1, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q12, q10
+ vraddhn.u16 d3, q13, q11
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8_8_8_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_8_8_8_process_pixblock_tail_head
+ pixman_composite_add_8_8_8_process_pixblock_tail
+ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld1.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld1.8 {d24, d25, d26, d27}, [MASK]!
+ vld1.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 32, 32
+ pixman_composite_add_8_8_8_process_pixblock_head
+.endm
+
+.macro pixman_composite_add_8_8_8_init
+.endm
+
+.macro pixman_composite_add_8_8_8_cleanup
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8_8_8_asm_neon, 8, 8, 8, \
+ FLAG_DST_READWRITE, \
+ 32, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_add_8_8_8_init, \
+ pixman_composite_add_8_8_8_cleanup, \
+ pixman_composite_add_8_8_8_process_pixblock_head, \
+ pixman_composite_add_8_8_8_process_pixblock_tail, \
+ pixman_composite_add_8_8_8_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* mask in {d24, d25, d26, d27} */
+ vmull.u8 q8, d27, d0
+ vmull.u8 q9, d27, d1
+ vmull.u8 q10, d27, d2
+ vmull.u8 q11, d27, d3
+ vrshr.u16 q0, q8, #8
+ vrshr.u16 q1, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q9
+ vraddhn.u16 d2, q12, q10
+ vraddhn.u16 d3, q13, q11
+ vqadd.u8 q14, q0, q2
+ vqadd.u8 q15, q1, q3
+.endm
+
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ vld4.8 {d24, d25, d26, d27}, [MASK]!
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ pixman_composite_add_8888_8888_8888_process_pixblock_head
+.endm
+
+generate_composite_function \
+ pixman_composite_add_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+ pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+.macro pixman_composite_over_8888_n_8888_process_pixblock_head
+ /* expecting source data in {d0, d1, d2, d3} */
+ /* destination data in {d4, d5, d6, d7} */
+ /* solid mask is in d15 */
+
+ /* 'in' */
+ vmull.u8 q8, d15, d3
+ vmull.u8 q6, d15, d2
+ vmull.u8 q5, d15, d1
+ vmull.u8 q4, d15, d0
+ vrshr.u16 q13, q8, #8
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q11, q5, #8
+ vrshr.u16 q10, q4, #8
+ vraddhn.u16 d3, q8, q13
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d1, q5, q11
+ vraddhn.u16 d0, q4, q10
+ vmvn.8 d24, d3 /* get inverted alpha */
+ /* now do alpha blending */
+ vmull.u8 q8, d24, d4
+ vmull.u8 q9, d24, d5
+ vmull.u8 q10, d24, d6
+ vmull.u8 q11, d24, d7
+.endm
+
+.macro pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q10, #8
+ vrshr.u16 q13, q11, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q10
+ vraddhn.u16 d31, q13, q11
+ vqadd.u8 q14, q0, q14
+ vqadd.u8 q15, q1, q15
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_n_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_n_8888_init
+ add DUMMY, sp, #48
+ vpush {d8-d15}
+ vld1.32 {d15[0]}, [DUMMY]
+ vdup.8 d15, d15[3]
+.endm
+
+.macro pixman_composite_over_8888_n_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_n_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_n_8888_init, \
+ pixman_composite_over_8888_n_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail_head
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8888_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ vld4.8 {d12, d13, d14, d15}, [MASK]!
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_init
+ vpush {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8888_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_8888_8888_init, \
+ pixman_composite_over_8888_8888_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+generate_composite_function_single_scanline \
+ pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ pixman_composite_over_8888_8888_8888_init, \
+ pixman_composite_over_8888_8888_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 12 /* mask_basereg */
+
+/******************************************************************************/
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_8888_8_8888_process_pixblock_tail_head
+ vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
+ pixman_composite_over_8888_n_8888_process_pixblock_tail
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ cache_preload 8, 8
+ vld1.8 {d15}, [MASK]!
+ pixman_composite_over_8888_n_8888_process_pixblock_head
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_8888_8_8888_init
+ vpush {d8-d15}
+.endm
+
+.macro pixman_composite_over_8888_8_8888_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_8888_8_8888_init, \
+ pixman_composite_over_8888_8_8888_cleanup, \
+ pixman_composite_over_8888_n_8888_process_pixblock_head, \
+ pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+ pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 15 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0888_process_pixblock_head
+.endm
+
+.macro pixman_composite_src_0888_0888_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_0888_process_pixblock_tail_head
+ vst3.8 {d0, d1, d2}, [DST_W]!
+ vld3.8 {d0, d1, d2}, [SRC]!
+ cache_preload 8, 8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_0888_asm_neon, 24, 0, 24, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_0888_process_pixblock_head, \
+ pixman_composite_src_0888_0888_process_pixblock_tail, \
+ pixman_composite_src_0888_0888_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
+ vswp d0, d2
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
+ vst4.8 {d0, d1, d2, d3}, [DST_W]!
+ vld3.8 {d0, d1, d2}, [SRC]!
+ vswp d0, d2
+ cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_init
+ veor d3, d3, d3
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ pixman_composite_src_0888_8888_rev_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_head, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
+ 0, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
+ vshll.u8 q8, d1, #8
+ vshll.u8 q9, d2, #8
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
+ vshll.u8 q14, d0, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
+ vshll.u8 q14, d0, #8
+ vld3.8 {d0, d1, d2}, [SRC]!
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+ vshll.u8 q8, d1, #8
+ vst1.16 {d28, d29}, [DST_W, :128]!
+ vshll.u8 q9, d2, #8
+.endm
+
+generate_composite_function \
+ pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
+ FLAG_DST_WRITEONLY, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_head, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
+ pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vraddhn.u16 d30, q11, q8
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
+ vrshr.u16 q11, q8, #8
+ vswp d3, d31
+ vrshr.u16 q12, q9, #8
+ vrshr.u16 q13, q10, #8
+ vld4.8 {d0, d1, d2, d3}, [SRC]!
+ vraddhn.u16 d30, q11, q8
+ PF add PF_X, PF_X, #8
+ PF tst PF_CTL, #0xF
+ PF addne PF_X, PF_X, #8
+ PF subne PF_CTL, PF_CTL, #1
+ vraddhn.u16 d29, q12, q9
+ vraddhn.u16 d28, q13, q10
+ vmull.u8 q8, d3, d0
+ vmull.u8 q9, d3, d1
+ vmull.u8 q10, d3, d2
+ vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+ PF cmp PF_X, ORIG_W
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endm
+
+generate_composite_function \
+ pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
+ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 10, /* prefetch distance */ \
+ default_init, \
+ default_cleanup, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_head, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
+ pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 0, /* dst_r_basereg */ \
+ 0, /* src_basereg */ \
+ 0 /* mask_basereg */
diff --git a/src/3rdparty/pixman/pixman-arm-neon-asm.h b/src/3rdparty/pixman/pixman-arm-neon-asm.h
new file mode 100644
index 0000000..56c3fae
--- /dev/null
+++ b/src/3rdparty/pixman/pixman-arm-neon-asm.h
@@ -0,0 +1,906 @@
+/*
+ * Copyright © 2009 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
+ */
+
+/*
+ * This file contains a macro ('generate_composite_function') which can
+ * construct 2D image processing functions, based on a common template.
+ * Any combinations of source, destination and mask images with 8bpp,
+ * 16bpp, 24bpp, 32bpp color formats are supported.
+ *
+ * This macro takes care of:
+ * - handling of leading and trailing unaligned pixels
+ * - doing most of the work related to L2 cache preload
+ * - encourages the use of software pipelining for better instructions
+ * scheduling
+ *
+ * The user of this macro has to provide some configuration parameters
+ * (bit depths for the images, prefetch distance, etc.) and a set of
+ * macros, which should implement basic code chunks responsible for
+ * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage
+ * examples.
+ *
+ * TODO:
+ * - try overlapped pixel method (from Ian Rickards) when processing
+ * exactly two blocks of pixels
+ * - maybe add an option to do reverse scanline processing
+ */
+
+/*
+ * Bit flags for 'generate_composite_function' macro which are used
+ * to tune generated functions behavior.
+ */
+.set FLAG_DST_WRITEONLY, 0
+.set FLAG_DST_READWRITE, 1
+.set FLAG_DEINTERLEAVE_32BPP, 2
+
+/*
+ * Offset in stack where mask and source pointer/stride can be accessed
+ * from 'init' macro. This is useful for doing special handling for solid mask.
+ */
+.set ARGS_STACK_OFFSET, 40
+
+/*
+ * Constants for selecting preferable prefetch type.
+ */
+.set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */
+.set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */
+.set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */
+
+/*
+ * Definitions of supplementary pixld/pixst macros (for partial load/store of
+ * pixel data).
+ */
+
+.macro pixldst1 op, elem_size, reg1, mem_operand, abits
+.if abits > 0
+ op&.&elem_size {d&reg1}, [&mem_operand&, :&abits&]!
+.else
+ op&.&elem_size {d&reg1}, [&mem_operand&]!
+.endif
+.endm
+
+.macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
+.if abits > 0
+ op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&, :&abits&]!
+.else
+ op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&]!
+.endif
+.endm
+
+.macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
+.if abits > 0
+ op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&, :&abits&]!
+.else
+ op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&]!
+.endif
+.endm
+
+.macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits
+ op&.&elem_size {d&reg1[idx]}, [&mem_operand&]!
+.endm
+
+.macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
+ op&.&elem_size {d&reg1, d&reg2, d&reg3}, [&mem_operand&]!
+.endm
+
+.macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
+ op&.&elem_size {d&reg1[idx], d&reg2[idx], d&reg3[idx]}, [&mem_operand&]!
+.endm
+
+.macro pixldst numbytes, op, elem_size, basereg, mem_operand, abits
+.if numbytes == 32
+ pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
+.elseif numbytes == 16
+ pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits
+.elseif numbytes == 8
+ pixldst1 op, elem_size, %(basereg+1), mem_operand, abits
+.elseif numbytes == 4
+ .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
+ pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits
+ .elseif elem_size == 16
+ pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits
+ pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits
+ .else
+ pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits
+ .endif
+.elseif numbytes == 2
+ .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
+ pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits
+ .else
+ pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits
+ pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits
+ .endif
+.elseif numbytes == 1
+ pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits
+.else
+ .error "unsupported size: numbytes"
+.endif
+.endm
+
+.macro pixld numpix, bpp, basereg, mem_operand, abits=0
+.if bpp > 0
+.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
+.elseif (bpp == 24) && (numpix == 8)
+ pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
+.elseif (bpp == 24) && (numpix == 4)
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
+.elseif (bpp == 24) && (numpix == 2)
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
+.elseif (bpp == 24) && (numpix == 1)
+ pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
+.else
+ pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits
+.endif
+.endif
+.endm
+
+.macro pixst numpix, bpp, basereg, mem_operand, abits=0
+.if bpp > 0
+.if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \
+ %(basereg+6), %(basereg+7), mem_operand, abits
+.elseif (bpp == 24) && (numpix == 8)
+ pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
+.elseif (bpp == 24) && (numpix == 4)
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
+.elseif (bpp == 24) && (numpix == 2)
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
+.elseif (bpp == 24) && (numpix == 1)
+ pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
+.else
+ pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits
+.endif
+.endif
+.endm
+
+.macro pixld_a numpix, bpp, basereg, mem_operand
+.if (bpp * numpix) <= 128
+ pixld numpix, bpp, basereg, mem_operand, %(bpp * numpix)
+.else
+ pixld numpix, bpp, basereg, mem_operand, 128
+.endif
+.endm
+
+.macro pixst_a numpix, bpp, basereg, mem_operand
+.if (bpp * numpix) <= 128
+ pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
+.else
+ pixst numpix, bpp, basereg, mem_operand, 128
+.endif
+.endm
+
+.macro vuzp8 reg1, reg2
+ vuzp.8 d&reg1, d&reg2
+.endm
+
+.macro vzip8 reg1, reg2
+ vzip.8 d&reg1, d&reg2
+.endm
+
+/* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
+.macro pixdeinterleave bpp, basereg
+.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ vuzp8 %(basereg+0), %(basereg+1)
+ vuzp8 %(basereg+2), %(basereg+3)
+ vuzp8 %(basereg+1), %(basereg+3)
+ vuzp8 %(basereg+0), %(basereg+2)
+.endif
+.endm
+
+/* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
+.macro pixinterleave bpp, basereg
+.if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
+ vzip8 %(basereg+0), %(basereg+2)
+ vzip8 %(basereg+1), %(basereg+3)
+ vzip8 %(basereg+2), %(basereg+3)
+ vzip8 %(basereg+0), %(basereg+1)
+.endif
+.endm
+
+/*
+ * This is a macro for implementing cache preload. The main idea is that
+ * cache preload logic is mostly independent from the rest of pixels
+ * processing code. It starts at the top left pixel and moves forward
+ * across pixels and can jump across scanlines. Prefetch distance is
+ * handled in an 'incremental' way: it starts from 0 and advances to the
+ * optimal distance over time. After reaching optimal prefetch distance,
+ * it is kept constant. There are some checks which prevent prefetching
+ * unneeded pixel lines below the image (but it still can prefetch a bit
+ * more data on the right side of the image - not a big issue and may
+ * be actually helpful when rendering text glyphs). Additional trick is
+ * the use of LDR instruction for prefetch instead of PLD when moving to
+ * the next line, the point is that we have a high chance of getting TLB
+ * miss in this case, and PLD would be useless.
+ *
+ * This sounds like it may introduce a noticeable overhead (when working with
+ * fully cached data). But in reality, due to having a separate pipeline and
+ * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can
+ * execute simultaneously with NEON and be completely shadowed by it. Thus
+ * we get no performance overhead at all (*). This looks like a very nice
+ * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
+ * but still can implement some rather advanced prefetch logic in sofware
+ * for almost zero cost!
+ *
+ * (*) The overhead of the prefetcher is visible when running some trivial
+ * pixels processing like simple copy. Anyway, having prefetch is a must
+ * when working with the graphics data.
+ */
+.macro PF a, x:vararg
+.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED)
+ a x
+.endif
+.endm
+
+.macro cache_preload std_increment, boost_increment
+.if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
+.if regs_shortage
+ PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
+.endif
+.if std_increment != 0
+ PF add PF_X, PF_X, #std_increment
+.endif
+ PF tst PF_CTL, #0xF
+ PF addne PF_X, PF_X, #boost_increment
+ PF subne PF_CTL, PF_CTL, #1
+ PF cmp PF_X, ORIG_W
+.if src_bpp_shift >= 0
+ PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+.endif
+.if dst_r_bpp != 0
+ PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
+.endif
+.if mask_bpp_shift >= 0
+ PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
+.endif
+ PF subge PF_X, PF_X, ORIG_W
+ PF subges PF_CTL, PF_CTL, #0x10
+.if src_bpp_shift >= 0
+ PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endif
+.if dst_r_bpp != 0
+ PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
+.endif
+.if mask_bpp_shift >= 0
+ PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
+.endif
+.endif
+.endm
+
+.macro cache_preload_simple
+.if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE)
+.if src_bpp > 0
+ pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)]
+.endif
+.if dst_r_bpp > 0
+ pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
+.endif
+.if mask_bpp > 0
+ pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
+.endif
+.endif
+.endm
+
+/*
+ * Macro which is used to process leading pixels until destination
+ * pointer is properly aligned (at 16 bytes boundary). When destination
+ * buffer uses 16bpp format, this is unnecessary, or even pointless.
+ */
+.macro ensure_destination_ptr_alignment process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+.if dst_w_bpp != 24
+ tst DST_R, #0xF
+ beq 2f
+
+.irp lowbit, 1, 2, 4, 8, 16
+local skip1
+.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
+.if lowbit < 16 /* we don't need more than 16-byte alignment */
+ tst DST_R, #lowbit
+ beq 1f
+.endif
+ pixld (lowbit * 8 / dst_w_bpp), src_bpp, src_basereg, SRC
+ pixld (lowbit * 8 / dst_w_bpp), mask_bpp, mask_basereg, MASK
+.if dst_r_bpp > 0
+ pixld_a (lowbit * 8 / dst_r_bpp), dst_r_bpp, dst_r_basereg, DST_R
+.else
+ add DST_R, DST_R, #lowbit
+.endif
+ PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
+ sub W, W, #(lowbit * 8 / dst_w_bpp)
+1:
+.endif
+.endr
+ pixdeinterleave src_bpp, src_basereg
+ pixdeinterleave mask_bpp, mask_basereg
+ pixdeinterleave dst_r_bpp, dst_r_basereg
+
+ process_pixblock_head
+ cache_preload 0, pixblock_size
+ cache_preload_simple
+ process_pixblock_tail
+
+ pixinterleave dst_w_bpp, dst_w_basereg
+.irp lowbit, 1, 2, 4, 8, 16
+.if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
+.if lowbit < 16 /* we don't need more than 16-byte alignment */
+ tst DST_W, #lowbit
+ beq 1f
+.endif
+ pixst_a (lowbit * 8 / dst_w_bpp), dst_w_bpp, dst_w_basereg, DST_W
+1:
+.endif
+.endr
+.endif
+2:
+.endm
+
+/*
+ * Special code for processing up to (pixblock_size - 1) remaining
+ * trailing pixels. As SIMD processing performs operation on
+ * pixblock_size pixels, anything smaller than this has to be loaded
+ * and stored in a special way. Loading and storing of pixel data is
+ * performed in such a way that we fill some 'slots' in the NEON
+ * registers (some slots naturally are unused), then perform compositing
+ * operation as usual. In the end, the data is taken from these 'slots'
+ * and saved to memory.
+ *
+ * cache_preload_flag - allows to suppress prefetch if
+ * set to 0
+ * dst_aligned_flag - selects whether destination buffer
+ * is aligned
+ */
+.macro process_trailing_pixels cache_preload_flag, \
+ dst_aligned_flag, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+ tst W, #(pixblock_size - 1)
+ beq 2f
+.irp chunk_size, 16, 8, 4, 2, 1
+.if pixblock_size > chunk_size
+ tst W, #chunk_size
+ beq 1f
+ pixld chunk_size, src_bpp, src_basereg, SRC
+ pixld chunk_size, mask_bpp, mask_basereg, MASK
+.if dst_aligned_flag != 0
+ pixld_a chunk_size, dst_r_bpp, dst_r_basereg, DST_R
+.else
+ pixld chunk_size, dst_r_bpp, dst_r_basereg, DST_R
+.endif
+.if cache_preload_flag != 0
+ PF add PF_X, PF_X, #chunk_size
+.endif
+1:
+.endif
+.endr
+ pixdeinterleave src_bpp, src_basereg
+ pixdeinterleave mask_bpp, mask_basereg
+ pixdeinterleave dst_r_bpp, dst_r_basereg
+
+ process_pixblock_head
+.if cache_preload_flag != 0
+ cache_preload 0, pixblock_size
+ cache_preload_simple
+.endif
+ process_pixblock_tail
+ pixinterleave dst_w_bpp, dst_w_basereg
+.irp chunk_size, 16, 8, 4, 2, 1
+.if pixblock_size > chunk_size
+ tst W, #chunk_size
+ beq 1f
+.if dst_aligned_flag != 0
+ pixst_a chunk_size, dst_w_bpp, dst_w_basereg, DST_W
+.else
+ pixst chunk_size, dst_w_bpp, dst_w_basereg, DST_W
+.endif
+1:
+.endif
+.endr
+2:
+.endm
+
+/*
+ * Macro, which performs all the needed operations to switch to the next
+ * scanline and start the next loop iteration unless all the scanlines
+ * are already processed.
+ */
+.macro advance_to_next_scanline start_of_loop_label
+.if regs_shortage
+ ldrd W, [sp] /* load W and H (width and height) from stack */
+.else
+ mov W, ORIG_W
+.endif
+ add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift
+.if src_bpp != 0
+ add SRC, SRC, SRC_STRIDE, lsl #src_bpp_shift
+.endif
+.if mask_bpp != 0
+ add MASK, MASK, MASK_STRIDE, lsl #mask_bpp_shift
+.endif
+.if (dst_w_bpp != 24)
+ sub DST_W, DST_W, W, lsl #dst_bpp_shift
+.endif
+.if (src_bpp != 24) && (src_bpp != 0)
+ sub SRC, SRC, W, lsl #src_bpp_shift
+.endif
+.if (mask_bpp != 24) && (mask_bpp != 0)
+ sub MASK, MASK, W, lsl #mask_bpp_shift
+.endif
+ subs H, H, #1
+ mov DST_R, DST_W
+.if regs_shortage
+ str H, [sp, #4] /* save updated height to stack */
+.endif
+ bge start_of_loop_label
+.endm
+
+/*
+ * Registers are allocated in the following way by default:
+ * d0, d1, d2, d3 - reserved for loading source pixel data
+ * d4, d5, d6, d7 - reserved for loading destination pixel data
+ * d24, d25, d26, d27 - reserved for loading mask pixel data
+ * d28, d29, d30, d31 - final destination pixel data for writeback to memory
+ */
+.macro generate_composite_function fname, \
+ src_bpp_, \
+ mask_bpp_, \
+ dst_w_bpp_, \
+ flags, \
+ pixblock_size_, \
+ prefetch_distance, \
+ init, \
+ cleanup, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head, \
+ dst_w_basereg_ = 28, \
+ dst_r_basereg_ = 4, \
+ src_basereg_ = 0, \
+ mask_basereg_ = 24
+
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+ push {r4-r12, lr} /* save all registers */
+
+/*
+ * Select prefetch type for this function. If prefetch distance is
+ * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
+ * has to be used instead of ADVANCED.
+ */
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
+.if prefetch_distance == 0
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
+.elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \
+ ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE
+.endif
+
+/*
+ * Make some macro arguments globally visible and accessible
+ * from other macros
+ */
+ .set src_bpp, src_bpp_
+ .set mask_bpp, mask_bpp_
+ .set dst_w_bpp, dst_w_bpp_
+ .set pixblock_size, pixblock_size_
+ .set dst_w_basereg, dst_w_basereg_
+ .set dst_r_basereg, dst_r_basereg_
+ .set src_basereg, src_basereg_
+ .set mask_basereg, mask_basereg_
+
+/*
+ * Assign symbolic names to registers
+ */
+ W .req r0 /* width (is updated during processing) */
+ H .req r1 /* height (is updated during processing) */
+ DST_W .req r2 /* destination buffer pointer for writes */
+ DST_STRIDE .req r3 /* destination image stride */
+ SRC .req r4 /* source buffer pointer */
+ SRC_STRIDE .req r5 /* source image stride */
+ DST_R .req r6 /* destination buffer pointer for reads */
+
+ MASK .req r7 /* mask pointer */
+ MASK_STRIDE .req r8 /* mask stride */
+
+ PF_CTL .req r9 /* combined lines counter and prefetch */
+ /* distance increment counter */
+ PF_X .req r10 /* pixel index in a scanline for current */
+ /* pretetch position */
+ PF_SRC .req r11 /* pointer to source scanline start */
+ /* for prefetch purposes */
+ PF_DST .req r12 /* pointer to destination scanline start */
+ /* for prefetch purposes */
+ PF_MASK .req r14 /* pointer to mask scanline start */
+ /* for prefetch purposes */
+/*
+ * Check whether we have enough registers for all the local variables.
+ * If we don't have enough registers, original width and height are
+ * kept on top of stack (and 'regs_shortage' variable is set to indicate
+ * this for the rest of code). Even if there are enough registers, the
+ * allocation scheme may be a bit different depending on whether source
+ * or mask is not used.
+ */
+.if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED)
+ ORIG_W .req r10 /* saved original width */
+ DUMMY .req r12 /* temporary register */
+ .set regs_shortage, 0
+.elseif mask_bpp == 0
+ ORIG_W .req r7 /* saved original width */
+ DUMMY .req r8 /* temporary register */
+ .set regs_shortage, 0
+.elseif src_bpp == 0
+ ORIG_W .req r4 /* saved original width */
+ DUMMY .req r5 /* temporary register */
+ .set regs_shortage, 0
+.else
+ ORIG_W .req r1 /* saved original width */
+ DUMMY .req r1 /* temporary register */
+ .set regs_shortage, 1
+.endif
+
+ .set mask_bpp_shift, -1
+.if src_bpp == 32
+ .set src_bpp_shift, 2
+.elseif src_bpp == 24
+ .set src_bpp_shift, 0
+.elseif src_bpp == 16
+ .set src_bpp_shift, 1
+.elseif src_bpp == 8
+ .set src_bpp_shift, 0
+.elseif src_bpp == 0
+ .set src_bpp_shift, -1
+.else
+ .error "requested src bpp (src_bpp) is not supported"
+.endif
+.if mask_bpp == 32
+ .set mask_bpp_shift, 2
+.elseif mask_bpp == 24
+ .set mask_bpp_shift, 0
+.elseif mask_bpp == 8
+ .set mask_bpp_shift, 0
+.elseif mask_bpp == 0
+ .set mask_bpp_shift, -1
+.else
+ .error "requested mask bpp (mask_bpp) is not supported"
+.endif
+.if dst_w_bpp == 32
+ .set dst_bpp_shift, 2
+.elseif dst_w_bpp == 24
+ .set dst_bpp_shift, 0
+.elseif dst_w_bpp == 16
+ .set dst_bpp_shift, 1
+.elseif dst_w_bpp == 8
+ .set dst_bpp_shift, 0
+.else
+ .error "requested dst bpp (dst_w_bpp) is not supported"
+.endif
+
+.if (((flags) & FLAG_DST_READWRITE) != 0)
+ .set dst_r_bpp, dst_w_bpp
+.else
+ .set dst_r_bpp, 0
+.endif
+.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+ .set DEINTERLEAVE_32BPP_ENABLED, 1
+.else
+ .set DEINTERLEAVE_32BPP_ENABLED, 0
+.endif
+
+.if prefetch_distance < 0 || prefetch_distance > 15
+ .error "invalid prefetch distance (prefetch_distance)"
+.endif
+
+.if src_bpp > 0
+ ldr SRC, [sp, #40]
+.endif
+.if mask_bpp > 0
+ ldr MASK, [sp, #48]
+.endif
+ PF mov PF_X, #0
+.if src_bpp > 0
+ ldr SRC_STRIDE, [sp, #44]
+.endif
+.if mask_bpp > 0
+ ldr MASK_STRIDE, [sp, #52]
+.endif
+ mov DST_R, DST_W
+
+.if src_bpp == 24
+ sub SRC_STRIDE, SRC_STRIDE, W
+ sub SRC_STRIDE, SRC_STRIDE, W, lsl #1
+.endif
+.if mask_bpp == 24
+ sub MASK_STRIDE, MASK_STRIDE, W
+ sub MASK_STRIDE, MASK_STRIDE, W, lsl #1
+.endif
+.if dst_w_bpp == 24
+ sub DST_STRIDE, DST_STRIDE, W
+ sub DST_STRIDE, DST_STRIDE, W, lsl #1
+.endif
+
+/*
+ * Setup advanced prefetcher initial state
+ */
+ PF mov PF_SRC, SRC
+ PF mov PF_DST, DST_R
+ PF mov PF_MASK, MASK
+ /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
+ PF mov PF_CTL, H, lsl #4
+ PF add PF_CTL, #(prefetch_distance - 0x10)
+
+ init
+.if regs_shortage
+ push {r0, r1}
+.endif
+ subs H, H, #1
+.if regs_shortage
+ str H, [sp, #4] /* save updated height to stack */
+.else
+ mov ORIG_W, W
+.endif
+ blt 9f
+ cmp W, #(pixblock_size * 2)
+ blt 8f
+/*
+ * This is the start of the pipelined loop, which if optimized for
+ * long scanlines
+ */
+0:
+ ensure_destination_ptr_alignment process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
+ pixld_a pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+ pixld pixblock_size, src_bpp, \
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+ PF add PF_X, PF_X, #pixblock_size
+ process_pixblock_head
+ cache_preload 0, pixblock_size
+ cache_preload_simple
+ subs W, W, #(pixblock_size * 2)
+ blt 2f
+1:
+ process_pixblock_tail_head
+ cache_preload_simple
+ subs W, W, #pixblock_size
+ bge 1b
+2:
+ process_pixblock_tail
+ pixst_a pixblock_size, dst_w_bpp, \
+ (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+
+ /* Process the remaining trailing pixels in the scanline */
+ process_trailing_pixels 1, 1, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+ advance_to_next_scanline 0b
+
+.if regs_shortage
+ pop {r0, r1}
+.endif
+ cleanup
+ pop {r4-r12, pc} /* exit */
+/*
+ * This is the start of the loop, designed to process images with small width
+ * (less than pixblock_size * 2 pixels). In this case neither pipelining
+ * nor prefetch are used.
+ */
+8:
+ /* Process exactly pixblock_size pixels if needed */
+ tst W, #pixblock_size
+ beq 1f
+ pixld pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+ pixld pixblock_size, src_bpp, \
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+ process_pixblock_head
+ process_pixblock_tail
+ pixst pixblock_size, dst_w_bpp, \
+ (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+1:
+ /* Process the remaining trailing pixels in the scanline */
+ process_trailing_pixels 0, 0, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+ advance_to_next_scanline 8b
+9:
+.if regs_shortage
+ pop {r0, r1}
+.endif
+ cleanup
+ pop {r4-r12, pc} /* exit */
+
+ .unreq SRC
+ .unreq MASK
+ .unreq DST_R
+ .unreq DST_W
+ .unreq ORIG_W
+ .unreq W
+ .unreq H
+ .unreq SRC_STRIDE
+ .unreq DST_STRIDE
+ .unreq MASK_STRIDE
+ .unreq PF_CTL
+ .unreq PF_X
+ .unreq PF_SRC
+ .unreq PF_DST
+ .unreq PF_MASK
+ .unreq DUMMY
+ .endfunc
+.endm
+
+/*
+ * A simplified variant of function generation template for a single
+ * scanline processing (for implementing pixman combine functions)
+ */
+.macro generate_composite_function_single_scanline fname, \
+ src_bpp_, \
+ mask_bpp_, \
+ dst_w_bpp_, \
+ flags, \
+ pixblock_size_, \
+ init, \
+ cleanup, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head, \
+ dst_w_basereg_ = 28, \
+ dst_r_basereg_ = 4, \
+ src_basereg_ = 0, \
+ mask_basereg_ = 24
+
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
+/*
+ * Make some macro arguments globally visible and accessible
+ * from other macros
+ */
+ .set src_bpp, src_bpp_
+ .set mask_bpp, mask_bpp_
+ .set dst_w_bpp, dst_w_bpp_
+ .set pixblock_size, pixblock_size_
+ .set dst_w_basereg, dst_w_basereg_
+ .set dst_r_basereg, dst_r_basereg_
+ .set src_basereg, src_basereg_
+ .set mask_basereg, mask_basereg_
+/*
+ * Assign symbolic names to registers
+ */
+ W .req r0 /* width (is updated during processing) */
+ DST_W .req r1 /* destination buffer pointer for writes */
+ SRC .req r2 /* source buffer pointer */
+ DST_R .req ip /* destination buffer pointer for reads */
+ MASK .req r3 /* mask pointer */
+
+.if (((flags) & FLAG_DST_READWRITE) != 0)
+ .set dst_r_bpp, dst_w_bpp
+.else
+ .set dst_r_bpp, 0
+.endif
+.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+ .set DEINTERLEAVE_32BPP_ENABLED, 1
+.else
+ .set DEINTERLEAVE_32BPP_ENABLED, 0
+.endif
+
+ init
+ mov DST_R, DST_W
+
+ cmp W, #pixblock_size
+ blt 8f
+
+ ensure_destination_ptr_alignment process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ subs W, W, #pixblock_size
+ blt 7f
+
+ /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
+ pixld_a pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+ pixld pixblock_size, src_bpp, \
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+ process_pixblock_head
+ subs W, W, #pixblock_size
+ blt 2f
+1:
+ process_pixblock_tail_head
+ subs W, W, #pixblock_size
+ bge 1b
+2:
+ process_pixblock_tail
+ pixst_a pixblock_size, dst_w_bpp, \
+ (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+7:
+ /* Process the remaining trailing pixels in the scanline (dst aligned) */
+ process_trailing_pixels 0, 1, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ cleanup
+ bx lr /* exit */
+8:
+ /* Process the remaining trailing pixels in the scanline (dst unaligned) */
+ process_trailing_pixels 0, 0, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ cleanup
+ bx lr /* exit */
+
+ .unreq SRC
+ .unreq MASK
+ .unreq DST_R
+ .unreq DST_W
+ .unreq W
+ .endfunc
+.endm
+
+.macro default_init
+.endm
+
+.macro default_cleanup
+.endm
diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp
index 94307de..233c58d 100644
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@@ -2988,19 +2988,19 @@ static void convert_Indexed8_to_X32(QImageData *dest, const QImageData *src, Qt:
colorTable.resize(256);
for (int i=0; i<256; ++i)
colorTable[i] = qRgb(i, i, i);
-
}
int w = src->width;
const uchar *src_data = src->data;
uchar *dest_data = dest->data;
+ int tableSize = colorTable.size() - 1;
for (int y = 0; y < src->height; y++) {
uint *p = (uint *)dest_data;
const uchar *b = src_data;
uint *end = p + w;
while (p < end)
- *p++ = colorTable.at(*b++);
+ *p++ = colorTable.at(qMin<int>(tableSize, *b++));
src_data += src->bytes_per_line;
dest_data += dest->bytes_per_line;
diff --git a/src/gui/image/qpixmap_raster.cpp b/src/gui/image/qpixmap_raster.cpp
index 0b1c18d..b183d0d 100644
--- a/src/gui/image/qpixmap_raster.cpp
+++ b/src/gui/image/qpixmap_raster.cpp
@@ -182,6 +182,7 @@ void QRasterPixmapData::fromImage(const QImage &sourceImage,
QImage::Format opaqueFormat = QNativeImage::systemFormat();
QImage::Format alphaFormat = QImage::Format_ARGB32_Premultiplied;
+#ifndef QT_HAVE_NEON
switch (opaqueFormat) {
case QImage::Format_RGB16:
alphaFormat = QImage::Format_ARGB8565_Premultiplied;
@@ -189,6 +190,7 @@ void QRasterPixmapData::fromImage(const QImage &sourceImage,
default: // We don't care about the others...
break;
}
+#endif
if (!sourceImage.hasAlphaChannel()
|| ((flags & Qt::NoOpaqueDetection) == 0
@@ -238,6 +240,7 @@ void QRasterPixmapData::fill(const QColor &color)
if (alpha != 255) {
if (!image.hasAlphaChannel()) {
QImage::Format toFormat;
+#ifndef QT_HAVE_NEON
if (image.format() == QImage::Format_RGB16)
toFormat = QImage::Format_ARGB8565_Premultiplied;
else if (image.format() == QImage::Format_RGB666)
@@ -247,6 +250,7 @@ void QRasterPixmapData::fill(const QColor &color)
else if (image.format() == QImage::Format_RGB444)
toFormat = QImage::Format_ARGB4444_Premultiplied;
else
+#endif
toFormat = QImage::Format_ARGB32_Premultiplied;
image = QImage(image.width(), image.height(), toFormat);
}
diff --git a/src/gui/itemviews/qitemdelegate.cpp b/src/gui/itemviews/qitemdelegate.cpp
index cba213b..d5f6fd2 100644
--- a/src/gui/itemviews/qitemdelegate.cpp
+++ b/src/gui/itemviews/qitemdelegate.cpp
@@ -69,6 +69,7 @@
#include <qdebug.h>
#include <qlocale.h>
#include <qdialog.h>
+#include <qmath.h>
#include <limits.h>
@@ -1148,7 +1149,8 @@ QRect QItemDelegate::textRectangle(QPainter * /*painter*/, const QRect &rect,
d->textLayout.setTextOption(d->textOption);
d->textLayout.setFont(font);
d->textLayout.setText(QItemDelegatePrivate::replaceNewLine(text));
- const QSize size = d->doTextLayout(rect.width()).toSize();
+ QSizeF fpSize = d->doTextLayout(rect.width());
+ const QSize size = QSize(qCeil(fpSize.width()), qCeil(fpSize.height()));
// ###: textRectangle should take style option as argument
const int textMargin = QApplication::style()->pixelMetric(QStyle::PM_FocusFrameHMargin) + 1;
return QRect(0, 0, size.width() + 2 * textMargin, size.height());
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri
index a6cc9c7..ed8ee76 100644
--- a/src/gui/painting/painting.pri
+++ b/src/gui/painting/painting.pri
@@ -91,6 +91,8 @@ SOURCES += \
HEADERS += \
painting/qpaintengine_raster_p.h \
+ painting/qdrawhelper_p.h \
+ painting/qblendfunctions_p.h \
painting/qrasterdefs_p.h \
painting/qgrayraster_p.h
@@ -379,11 +381,23 @@ symbian {
QMAKE_CXXFLAGS.ARMCC *= -O3
}
-neon {
+neon:*-g++* {
DEFINES += QT_HAVE_NEON
HEADERS += painting/qdrawhelper_neon_p.h
SOURCES += painting/qdrawhelper_neon.cpp
QMAKE_CXXFLAGS *= -mfpu=neon
+
+ DRAWHELPER_NEON_ASM_FILES = ../3rdparty/pixman/pixman-arm-neon-asm.S painting/qdrawhelper_neon_asm.S
+
+ neon_compiler.commands = $$QMAKE_CXX -c
+ neon_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+ neon_compiler.dependency_type = TYPE_C
+ neon_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+ neon_compiler.input = DRAWHELPER_NEON_ASM_FILES
+ neon_compiler.variable_out = OBJECTS
+ neon_compiler.name = compiling[neon] ${QMAKE_FILE_IN}
+ silent:neon_compiler.commands = @echo compiling[neon] ${QMAKE_FILE_IN} && $$neon_compiler.commands
+ QMAKE_EXTRA_COMPILERS += neon_compiler
}
contains(QT_CONFIG, zlib) {
diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp
index dc33896..24908ce 100644
--- a/src/gui/painting/qblendfunctions.cpp
+++ b/src/gui/painting/qblendfunctions.cpp
@@ -40,7 +40,7 @@
****************************************************************************/
#include <qmath.h>
-#include "qdrawhelper_p.h"
+#include "qblendfunctions_p.h"
QT_BEGIN_NAMESPACE
@@ -88,6 +88,8 @@ static inline quint16 convert_argb32_to_rgb16(quint32 spix)
struct Blend_RGB16_on_RGB16_NoAlpha {
inline void write(quint16 *dst, quint16 src) { *dst = src; }
+
+ inline void flush(void *) {}
};
struct Blend_RGB16_on_RGB16_ConstAlpha {
@@ -100,6 +102,8 @@ struct Blend_RGB16_on_RGB16_ConstAlpha {
*dst = BYTE_MUL_RGB16(src, m_alpha) + BYTE_MUL_RGB16(*dst, m_ialpha);
}
+ inline void flush(void *) {}
+
quint32 m_alpha;
quint32 m_ialpha;
};
@@ -114,6 +118,8 @@ struct Blend_ARGB24_on_RGB16_SourceAlpha {
*dst = s;
}
}
+
+ inline void flush(void *) {}
};
struct Blend_ARGB24_on_RGB16_SourceAndConstAlpha {
@@ -132,6 +138,8 @@ struct Blend_ARGB24_on_RGB16_SourceAndConstAlpha {
}
}
+ inline void flush(void *) {}
+
quint32 m_alpha;
};
@@ -145,6 +153,8 @@ struct Blend_ARGB32_on_RGB16_SourceAlpha {
*dst = s;
}
}
+
+ inline void flush(void *) {}
};
struct Blend_ARGB32_on_RGB16_SourceAndConstAlpha {
@@ -163,99 +173,11 @@ struct Blend_ARGB32_on_RGB16_SourceAndConstAlpha {
}
}
+ inline void flush(void *) {}
+
quint32 m_alpha;
};
-template <typename SRC, typename T>
-void qt_scale_image_16bit(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- const QRectF &targetRect,
- const QRectF &srcRect,
- const QRect &clip,
- T blender)
-{
- qreal sx = targetRect.width() / (qreal) srcRect.width();
- qreal sy = targetRect.height() / (qreal) srcRect.height();
-
- int ix = 0x00010000 / sx;
- int iy = 0x00010000 / sy;
-
-// qDebug() << "scale:" << endl
-// << " - target" << targetRect << endl
-// << " - source" << srcRect << endl
-// << " - clip" << clip << endl
-// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy;
-
- int cx1 = clip.x();
- int cx2 = clip.x() + clip.width();
- int cy1 = clip.top();
- int cy2 = clip.y() + clip.height();
-
- int tx1 = qRound(targetRect.left());
- int tx2 = qRound(targetRect.right());
- int ty1 = qRound(targetRect.top());
- int ty2 = qRound(targetRect.bottom());
-
- if (tx2 < tx1)
- qSwap(tx2, tx1);
-
- if (ty2 < ty1)
- qSwap(ty2, ty1);
-
- if (tx1 < cx1)
- tx1 = cx1;
-
- if (tx2 >= cx2)
- tx2 = cx2;
-
- if (tx1 >= tx2)
- return;
-
- if (ty1 < cy1)
- ty1 = cy1;
-
- if (ty2 >= cy2)
- ty2 = cy2;
-
- if (ty1 >= ty2)
- return;
-
- int h = ty2 - ty1;
- int w = tx2 - tx1;
-
-
- quint32 basex;
- quint32 srcy;
-
- if (sx < 0) {
- int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1;
- basex = quint32(srcRect.right() * 65536) + dstx;
- } else {
- int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1;
- basex = quint32(srcRect.left() * 65536) + dstx;
- }
- if (sy < 0) {
- int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1;
- srcy = quint32(srcRect.bottom() * 65536) + dsty;
- } else {
- int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1;
- srcy = quint32(srcRect.top() * 65536) + dsty;
- }
-
- quint16 *dst = ((quint16 *) (destPixels + ty1 * dbpl)) + tx1;
-
- while (h--) {
- const SRC *src = (const SRC *) (srcPixels + (srcy >> 16) * sbpl);
- int srcx = basex;
- for (int x=0; x<w; ++x) {
- blender.write(&dst[x], src[srcx >> 16]);
- srcx += ix;
- }
- dst = (quint16 *)(((uchar *) dst) + dbpl);
- srcy += iy;
- }
-}
-
void qt_scale_image_rgb16_on_rgb16(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
const QRectF &targetRect,
@@ -447,10 +369,10 @@ static void qt_blend_argb24_on_rgb16(uchar *destPixels, int dbpl,
-static void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
+void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
{
quint16 *dst = (quint16 *) destPixels;
const quint32 *src = (const quint32 *) srcPixels;
@@ -643,6 +565,8 @@ void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl,
struct Blend_RGB32_on_RGB32_NoAlpha {
inline void write(quint32 *dst, quint32 src) { *dst = src; }
+
+ inline void flush(void *) {}
};
struct Blend_RGB32_on_RGB32_ConstAlpha {
@@ -655,6 +579,8 @@ struct Blend_RGB32_on_RGB32_ConstAlpha {
*dst = BYTE_MUL(src, m_alpha) + BYTE_MUL(*dst, m_ialpha);
}
+ inline void flush(void *) {}
+
quint32 m_alpha;
quint32 m_ialpha;
};
@@ -663,6 +589,8 @@ struct Blend_ARGB32_on_ARGB32_SourceAlpha {
inline void write(quint32 *dst, quint32 src) {
*dst = src + BYTE_MUL(*dst, qAlpha(~src));
}
+
+ inline void flush(void *) {}
};
struct Blend_ARGB32_on_ARGB32_SourceAndConstAlpha {
@@ -676,98 +604,12 @@ struct Blend_ARGB32_on_ARGB32_SourceAndConstAlpha {
*dst = src + BYTE_MUL(*dst, qAlpha(~src));
}
+ inline void flush(void *) {}
+
quint32 m_alpha;
quint32 m_ialpha;
};
-template <typename T> void qt_scale_image_32bit(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- const QRectF &targetRect,
- const QRectF &srcRect,
- const QRect &clip,
- T blender)
-{
- qreal sx = targetRect.width() / (qreal) srcRect.width();
- qreal sy = targetRect.height() / (qreal) srcRect.height();
-
- int ix = 0x00010000 / sx;
- int iy = 0x00010000 / sy;
-
-// qDebug() << "scale:" << endl
-// << " - target" << targetRect << endl
-// << " - source" << srcRect << endl
-// << " - clip" << clip << endl
-// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy;
-
- int cx1 = clip.x();
- int cx2 = clip.x() + clip.width();
- int cy1 = clip.top();
- int cy2 = clip.y() + clip.height();
-
- int tx1 = qRound(targetRect.left());
- int tx2 = qRound(targetRect.right());
- int ty1 = qRound(targetRect.top());
- int ty2 = qRound(targetRect.bottom());
-
- if (tx2 < tx1)
- qSwap(tx2, tx1);
-
- if (ty2 < ty1)
- qSwap(ty2, ty1);
-
- if (tx1 < cx1)
- tx1 = cx1;
-
- if (tx2 >= cx2)
- tx2 = cx2;
-
- if (tx1 >= tx2)
- return;
-
- if (ty1 < cy1)
- ty1 = cy1;
-
- if (ty2 >= cy2)
- ty2 = cy2;
-
- if (ty1 >= ty2)
- return;
-
- int h = ty2 - ty1;
- int w = tx2 - tx1;
-
- quint32 basex;
- quint32 srcy;
-
- if (sx < 0) {
- int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1;
- basex = quint32(srcRect.right() * 65536) + dstx;
- } else {
- int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1;
- basex = quint32(srcRect.left() * 65536) + dstx;
- }
- if (sy < 0) {
- int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1;
- srcy = quint32(srcRect.bottom() * 65536) + dsty;
- } else {
- int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1;
- srcy = quint32(srcRect.top() * 65536) + dsty;
- }
-
- quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1;
-
- while (h--) {
- const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl);
- int srcx = basex;
- for (int x=0; x<w; ++x) {
- blender.write(&dst[x], src[srcx >> 16]);
- srcx += ix;
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- srcy += iy;
- }
-}
-
void qt_scale_image_rgb32_on_rgb32(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
const QRectF &targetRect,
@@ -818,244 +660,6 @@ void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl,
}
}
-struct QTransformImageVertex
-{
- qreal x, y, u, v; // destination coordinates (x, y) and source coordinates (u, v)
-};
-
-template <class SrcT, class DestT, class Blender>
-void qt_transform_image_rasterize(DestT *destPixels, int dbpl,
- const SrcT *srcPixels, int sbpl,
- const QTransformImageVertex &topLeft, const QTransformImageVertex &bottomLeft,
- const QTransformImageVertex &topRight, const QTransformImageVertex &bottomRight,
- const QRect &sourceRect,
- const QRect &clip,
- qreal topY, qreal bottomY,
- int dudx, int dvdx, int dudy, int dvdy, int u0, int v0,
- Blender blender)
-{
- int fromY = qMax(qRound(topY), clip.top());
- int toY = qMin(qRound(bottomY), clip.top() + clip.height());
- if (fromY >= toY)
- return;
-
- qreal leftSlope = (bottomLeft.x - topLeft.x) / (bottomLeft.y - topLeft.y);
- qreal rightSlope = (bottomRight.x - topRight.x) / (bottomRight.y - topRight.y);
- int dx_l = int(leftSlope * 0x10000);
- int dx_r = int(rightSlope * 0x10000);
- int x_l = int((topLeft.x + (0.5 + fromY - topLeft.y) * leftSlope + 0.5) * 0x10000);
- int x_r = int((topRight.x + (0.5 + fromY - topRight.y) * rightSlope + 0.5) * 0x10000);
-
- int fromX, toX, x1, x2, u, v, i, ii;
- DestT *line;
- for (int y = fromY; y < toY; ++y) {
- line = reinterpret_cast<DestT *>(reinterpret_cast<uchar *>(destPixels) + y * dbpl);
-
- fromX = qMax(x_l >> 16, clip.left());
- toX = qMin(x_r >> 16, clip.left() + clip.width());
- if (fromX < toX) {
- // Because of rounding, we can get source coordinates outside the source image.
- // Clamp these coordinates to the source rect to avoid segmentation fault and
- // garbage on the screen.
-
- // Find the first pixel on the current scan line where the source coordinates are within the source rect.
- x1 = fromX;
- u = x1 * dudx + y * dudy + u0;
- v = x1 * dvdx + y * dvdy + v0;
- for (; x1 < toX; ++x1) {
- int uu = u >> 16;
- int vv = v >> 16;
- if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width()
- && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) {
- break;
- }
- u += dudx;
- v += dvdx;
- }
-
- // Find the last pixel on the current scan line where the source coordinates are within the source rect.
- x2 = toX;
- u = (x2 - 1) * dudx + y * dudy + u0;
- v = (x2 - 1) * dvdx + y * dvdy + v0;
- for (; x2 > x1; --x2) {
- int uu = u >> 16;
- int vv = v >> 16;
- if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width()
- && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) {
- break;
- }
- u -= dudx;
- v -= dvdx;
- }
-
- // Set up values at the beginning of the scan line.
- u = fromX * dudx + y * dudy + u0;
- v = fromX * dvdx + y * dvdy + v0;
- line += fromX;
-
- // Beginning of the scan line, with per-pixel checks.
- i = x1 - fromX;
- while (i) {
- int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1);
- int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1);
- blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]);
- u += dudx;
- v += dvdx;
- ++line;
- --i;
- }
-
- // Middle of the scan line, without checks.
- // Manual loop unrolling.
- i = x2 - x1;
- ii = i >> 3;
- while (ii) {
- blender.write(&line[0], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- blender.write(&line[1], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- blender.write(&line[2], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- blender.write(&line[3], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- blender.write(&line[4], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- blender.write(&line[5], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- blender.write(&line[6], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- blender.write(&line[7], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
- line += 8;
- --ii;
- }
- switch (i & 7) {
- case 7: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
- case 6: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
- case 5: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
- case 4: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
- case 3: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
- case 2: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
- case 1: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
- }
-
- // End of the scan line, with per-pixel checks.
- i = toX - x2;
- while (i) {
- int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1);
- int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1);
- blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]);
- u += dudx;
- v += dvdx;
- ++line;
- --i;
- }
- }
- x_l += dx_l;
- x_r += dx_r;
- }
-}
-
-template <class SrcT, class DestT, class Blender>
-void qt_transform_image(DestT *destPixels, int dbpl,
- const SrcT *srcPixels, int sbpl,
- const QRectF &targetRect,
- const QRectF &sourceRect,
- const QRect &clip,
- const QTransform &targetRectTransform,
- Blender blender)
-{
- enum Corner
- {
- TopLeft,
- TopRight,
- BottomRight,
- BottomLeft
- };
-
- // map source rectangle to destination.
- QTransformImageVertex v[4];
- v[TopLeft].u = v[BottomLeft].u = sourceRect.left();
- v[TopLeft].v = v[TopRight].v = sourceRect.top();
- v[TopRight].u = v[BottomRight].u = sourceRect.right();
- v[BottomLeft].v = v[BottomRight].v = sourceRect.bottom();
- targetRectTransform.map(targetRect.left(), targetRect.top(), &v[TopLeft].x, &v[TopLeft].y);
- targetRectTransform.map(targetRect.right(), targetRect.top(), &v[TopRight].x, &v[TopRight].y);
- targetRectTransform.map(targetRect.left(), targetRect.bottom(), &v[BottomLeft].x, &v[BottomLeft].y);
- targetRectTransform.map(targetRect.right(), targetRect.bottom(), &v[BottomRight].x, &v[BottomRight].y);
-
- // find topmost vertex.
- int topmost = 0;
- for (int i = 1; i < 4; ++i) {
- if (v[i].y < v[topmost].y)
- topmost = i;
- }
- // rearrange array such that topmost vertex is at index 0.
- switch (topmost) {
- case 1:
- {
- QTransformImageVertex t = v[0];
- for (int i = 0; i < 3; ++i)
- v[i] = v[i+1];
- v[3] = t;
- }
- break;
- case 2:
- qSwap(v[0], v[2]);
- qSwap(v[1], v[3]);
- break;
- case 3:
- {
- QTransformImageVertex t = v[3];
- for (int i = 3; i > 0; --i)
- v[i] = v[i-1];
- v[0] = t;
- }
- break;
- }
-
- // if necessary, swap vertex 1 and 3 such that 1 is to the left of 3.
- qreal dx1 = v[1].x - v[0].x;
- qreal dy1 = v[1].y - v[0].y;
- qreal dx2 = v[3].x - v[0].x;
- qreal dy2 = v[3].y - v[0].y;
- if (dx1 * dy2 - dx2 * dy1 > 0)
- qSwap(v[1], v[3]);
-
- QTransformImageVertex u = {v[1].x - v[0].x, v[1].y - v[0].y, v[1].u - v[0].u, v[1].v - v[0].v};
- QTransformImageVertex w = {v[2].x - v[0].x, v[2].y - v[0].y, v[2].u - v[0].u, v[2].v - v[0].v};
-
- qreal det = u.x * w.y - u.y * w.x;
- if (det == 0)
- return;
-
- qreal invDet = 1.0 / det;
- qreal m11, m12, m21, m22, mdx, mdy;
-
- m11 = (u.u * w.y - u.y * w.u) * invDet;
- m12 = (u.x * w.u - u.u * w.x) * invDet;
- m21 = (u.v * w.y - u.y * w.v) * invDet;
- m22 = (u.x * w.v - u.v * w.x) * invDet;
- mdx = v[0].u - m11 * v[0].x - m12 * v[0].y;
- mdy = v[0].v - m21 * v[0].x - m22 * v[0].y;
-
- int dudx = int(m11 * 0x10000);
- int dvdx = int(m21 * 0x10000);
- int dudy = int(m12 * 0x10000);
- int dvdy = int(m22 * 0x10000);
- int u0 = qCeil((0.5 * m11 + 0.5 * m12 + mdx) * 0x10000) - 1;
- int v0 = qCeil((0.5 * m21 + 0.5 * m22 + mdy) * 0x10000) - 1;
-
- int x1 = qFloor(sourceRect.left());
- int y1 = qFloor(sourceRect.top());
- int x2 = qCeil(sourceRect.right());
- int y2 = qCeil(sourceRect.bottom());
- QRect sourceRectI(x1, y1, x2 - x1, y2 - y1);
-
- // rasterize trapezoids.
- if (v[1].y < v[3].y) {
- qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
- qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[0], v[3], sourceRectI, clip, v[1].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
- qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[3].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
- } else {
- qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
- qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[3], v[2], sourceRectI, clip, v[3].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
- qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[1].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
- }
-}
-
void qt_transform_image_rgb16_on_rgb16(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
const QRectF &targetRect,
diff --git a/src/gui/painting/qblendfunctions_p.h b/src/gui/painting/qblendfunctions_p.h
new file mode 100644
index 0000000..ad754b0
--- /dev/null
+++ b/src/gui/painting/qblendfunctions_p.h
@@ -0,0 +1,497 @@
+/****************************************************************************
+**
+** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QBLENDFUNCTIONS_P_H
+#define QBLENDFUNCTIONS_P_H
+
+#include <qmath.h>
+#include "qdrawhelper_p.h"
+
+QT_BEGIN_NAMESPACE
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+template <typename SRC, typename T>
+void qt_scale_image_16bit(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &srcRect,
+ const QRect &clip,
+ T blender)
+{
+ qreal sx = targetRect.width() / (qreal) srcRect.width();
+ qreal sy = targetRect.height() / (qreal) srcRect.height();
+
+ int ix = 0x00010000 / sx;
+ int iy = 0x00010000 / sy;
+
+// qDebug() << "scale:" << endl
+// << " - target" << targetRect << endl
+// << " - source" << srcRect << endl
+// << " - clip" << clip << endl
+// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy;
+
+ int cx1 = clip.x();
+ int cx2 = clip.x() + clip.width();
+ int cy1 = clip.top();
+ int cy2 = clip.y() + clip.height();
+
+ int tx1 = qRound(targetRect.left());
+ int tx2 = qRound(targetRect.right());
+ int ty1 = qRound(targetRect.top());
+ int ty2 = qRound(targetRect.bottom());
+
+ if (tx2 < tx1)
+ qSwap(tx2, tx1);
+
+ if (ty2 < ty1)
+ qSwap(ty2, ty1);
+
+ if (tx1 < cx1)
+ tx1 = cx1;
+
+ if (tx2 >= cx2)
+ tx2 = cx2;
+
+ if (tx1 >= tx2)
+ return;
+
+ if (ty1 < cy1)
+ ty1 = cy1;
+
+ if (ty2 >= cy2)
+ ty2 = cy2;
+
+ if (ty1 >= ty2)
+ return;
+
+ int h = ty2 - ty1;
+ int w = tx2 - tx1;
+
+
+ quint32 basex;
+ quint32 srcy;
+
+ if (sx < 0) {
+ int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1;
+ basex = quint32(srcRect.right() * 65536) + dstx;
+ } else {
+ int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1;
+ basex = quint32(srcRect.left() * 65536) + dstx;
+ }
+ if (sy < 0) {
+ int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1;
+ srcy = quint32(srcRect.bottom() * 65536) + dsty;
+ } else {
+ int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1;
+ srcy = quint32(srcRect.top() * 65536) + dsty;
+ }
+
+ quint16 *dst = ((quint16 *) (destPixels + ty1 * dbpl)) + tx1;
+
+ while (h--) {
+ const SRC *src = (const SRC *) (srcPixels + (srcy >> 16) * sbpl);
+ int srcx = basex;
+ int x = 0;
+ for (; x<w-7; x+=8) {
+ blender.write(&dst[x], src[srcx >> 16]); srcx += ix;
+ blender.write(&dst[x+1], src[srcx >> 16]); srcx += ix;
+ blender.write(&dst[x+2], src[srcx >> 16]); srcx += ix;
+ blender.write(&dst[x+3], src[srcx >> 16]); srcx += ix;
+ blender.write(&dst[x+4], src[srcx >> 16]); srcx += ix;
+ blender.write(&dst[x+5], src[srcx >> 16]); srcx += ix;
+ blender.write(&dst[x+6], src[srcx >> 16]); srcx += ix;
+ blender.write(&dst[x+7], src[srcx >> 16]); srcx += ix;
+ }
+ for (; x<w; ++x) {
+ blender.write(&dst[x], src[srcx >> 16]);
+ srcx += ix;
+ }
+ blender.flush(&dst[x]);
+ dst = (quint16 *)(((uchar *) dst) + dbpl);
+ srcy += iy;
+ }
+}
+
+template <typename T> void qt_scale_image_32bit(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &srcRect,
+ const QRect &clip,
+ T blender)
+{
+ qreal sx = targetRect.width() / (qreal) srcRect.width();
+ qreal sy = targetRect.height() / (qreal) srcRect.height();
+
+ int ix = 0x00010000 / sx;
+ int iy = 0x00010000 / sy;
+
+// qDebug() << "scale:" << endl
+// << " - target" << targetRect << endl
+// << " - source" << srcRect << endl
+// << " - clip" << clip << endl
+// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy;
+
+ int cx1 = clip.x();
+ int cx2 = clip.x() + clip.width();
+ int cy1 = clip.top();
+ int cy2 = clip.y() + clip.height();
+
+ int tx1 = qRound(targetRect.left());
+ int tx2 = qRound(targetRect.right());
+ int ty1 = qRound(targetRect.top());
+ int ty2 = qRound(targetRect.bottom());
+
+ if (tx2 < tx1)
+ qSwap(tx2, tx1);
+
+ if (ty2 < ty1)
+ qSwap(ty2, ty1);
+
+ if (tx1 < cx1)
+ tx1 = cx1;
+
+ if (tx2 >= cx2)
+ tx2 = cx2;
+
+ if (tx1 >= tx2)
+ return;
+
+ if (ty1 < cy1)
+ ty1 = cy1;
+
+ if (ty2 >= cy2)
+ ty2 = cy2;
+
+ if (ty1 >= ty2)
+ return;
+
+ int h = ty2 - ty1;
+ int w = tx2 - tx1;
+
+ quint32 basex;
+ quint32 srcy;
+
+ if (sx < 0) {
+ int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1;
+ basex = quint32(srcRect.right() * 65536) + dstx;
+ } else {
+ int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1;
+ basex = quint32(srcRect.left() * 65536) + dstx;
+ }
+ if (sy < 0) {
+ int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1;
+ srcy = quint32(srcRect.bottom() * 65536) + dsty;
+ } else {
+ int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1;
+ srcy = quint32(srcRect.top() * 65536) + dsty;
+ }
+
+ quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1;
+
+ while (h--) {
+ const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl);
+ int srcx = basex;
+ int x = 0;
+ for (; x<w; ++x) {
+ blender.write(&dst[x], src[srcx >> 16]);
+ srcx += ix;
+ }
+ blender.flush(&dst[x]);
+ dst = (quint32 *)(((uchar *) dst) + dbpl);
+ srcy += iy;
+ }
+}
+
+struct QTransformImageVertex
+{
+ qreal x, y, u, v; // destination coordinates (x, y) and source coordinates (u, v)
+};
+
+template <class SrcT, class DestT, class Blender>
+void qt_transform_image_rasterize(DestT *destPixels, int dbpl,
+ const SrcT *srcPixels, int sbpl,
+ const QTransformImageVertex &topLeft, const QTransformImageVertex &bottomLeft,
+ const QTransformImageVertex &topRight, const QTransformImageVertex &bottomRight,
+ const QRect &sourceRect,
+ const QRect &clip,
+ qreal topY, qreal bottomY,
+ int dudx, int dvdx, int dudy, int dvdy, int u0, int v0,
+ Blender blender)
+{
+ int fromY = qMax(qRound(topY), clip.top());
+ int toY = qMin(qRound(bottomY), clip.top() + clip.height());
+ if (fromY >= toY)
+ return;
+
+ qreal leftSlope = (bottomLeft.x - topLeft.x) / (bottomLeft.y - topLeft.y);
+ qreal rightSlope = (bottomRight.x - topRight.x) / (bottomRight.y - topRight.y);
+ int dx_l = int(leftSlope * 0x10000);
+ int dx_r = int(rightSlope * 0x10000);
+ int x_l = int((topLeft.x + (0.5 + fromY - topLeft.y) * leftSlope + 0.5) * 0x10000);
+ int x_r = int((topRight.x + (0.5 + fromY - topRight.y) * rightSlope + 0.5) * 0x10000);
+
+ int fromX, toX, x1, x2, u, v, i, ii;
+ DestT *line;
+ for (int y = fromY; y < toY; ++y) {
+ line = reinterpret_cast<DestT *>(reinterpret_cast<uchar *>(destPixels) + y * dbpl);
+
+ fromX = qMax(x_l >> 16, clip.left());
+ toX = qMin(x_r >> 16, clip.left() + clip.width());
+ if (fromX < toX) {
+ // Because of rounding, we can get source coordinates outside the source image.
+ // Clamp these coordinates to the source rect to avoid segmentation fault and
+ // garbage on the screen.
+
+ // Find the first pixel on the current scan line where the source coordinates are within the source rect.
+ x1 = fromX;
+ u = x1 * dudx + y * dudy + u0;
+ v = x1 * dvdx + y * dvdy + v0;
+ for (; x1 < toX; ++x1) {
+ int uu = u >> 16;
+ int vv = v >> 16;
+ if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width()
+ && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) {
+ break;
+ }
+ u += dudx;
+ v += dvdx;
+ }
+
+ // Find the last pixel on the current scan line where the source coordinates are within the source rect.
+ x2 = toX;
+ u = (x2 - 1) * dudx + y * dudy + u0;
+ v = (x2 - 1) * dvdx + y * dvdy + v0;
+ for (; x2 > x1; --x2) {
+ int uu = u >> 16;
+ int vv = v >> 16;
+ if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width()
+ && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) {
+ break;
+ }
+ u -= dudx;
+ v -= dvdx;
+ }
+
+ // Set up values at the beginning of the scan line.
+ u = fromX * dudx + y * dudy + u0;
+ v = fromX * dvdx + y * dvdy + v0;
+ line += fromX;
+
+ // Beginning of the scan line, with per-pixel checks.
+ i = x1 - fromX;
+ while (i) {
+ int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1);
+ int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1);
+ blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]);
+ u += dudx;
+ v += dvdx;
+ ++line;
+ --i;
+ }
+
+ // Middle of the scan line, without checks.
+ // Manual loop unrolling.
+ i = x2 - x1;
+ ii = i >> 3;
+ while (ii) {
+ blender.write(&line[0], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+ blender.write(&line[1], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+ blender.write(&line[2], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+ blender.write(&line[3], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+ blender.write(&line[4], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+ blender.write(&line[5], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+ blender.write(&line[6], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+ blender.write(&line[7], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx;
+
+ line += 8;
+
+ --ii;
+ }
+ switch (i & 7) {
+ case 7: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
+ case 6: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
+ case 5: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
+ case 4: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
+ case 3: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
+ case 2: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
+ case 1: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line;
+ }
+
+ // End of the scan line, with per-pixel checks.
+ i = toX - x2;
+ while (i) {
+ int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1);
+ int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1);
+ blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]);
+ u += dudx;
+ v += dvdx;
+ ++line;
+ --i;
+ }
+
+ blender.flush(line);
+ }
+ x_l += dx_l;
+ x_r += dx_r;
+ }
+}
+
+template <class SrcT, class DestT, class Blender>
+void qt_transform_image(DestT *destPixels, int dbpl,
+ const SrcT *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ const QTransform &targetRectTransform,
+ Blender blender)
+{
+ enum Corner
+ {
+ TopLeft,
+ TopRight,
+ BottomRight,
+ BottomLeft
+ };
+
+ // map source rectangle to destination.
+ QTransformImageVertex v[4];
+ v[TopLeft].u = v[BottomLeft].u = sourceRect.left();
+ v[TopLeft].v = v[TopRight].v = sourceRect.top();
+ v[TopRight].u = v[BottomRight].u = sourceRect.right();
+ v[BottomLeft].v = v[BottomRight].v = sourceRect.bottom();
+ targetRectTransform.map(targetRect.left(), targetRect.top(), &v[TopLeft].x, &v[TopLeft].y);
+ targetRectTransform.map(targetRect.right(), targetRect.top(), &v[TopRight].x, &v[TopRight].y);
+ targetRectTransform.map(targetRect.left(), targetRect.bottom(), &v[BottomLeft].x, &v[BottomLeft].y);
+ targetRectTransform.map(targetRect.right(), targetRect.bottom(), &v[BottomRight].x, &v[BottomRight].y);
+
+ // find topmost vertex.
+ int topmost = 0;
+ for (int i = 1; i < 4; ++i) {
+ if (v[i].y < v[topmost].y)
+ topmost = i;
+ }
+ // rearrange array such that topmost vertex is at index 0.
+ switch (topmost) {
+ case 1:
+ {
+ QTransformImageVertex t = v[0];
+ for (int i = 0; i < 3; ++i)
+ v[i] = v[i+1];
+ v[3] = t;
+ }
+ break;
+ case 2:
+ qSwap(v[0], v[2]);
+ qSwap(v[1], v[3]);
+ break;
+ case 3:
+ {
+ QTransformImageVertex t = v[3];
+ for (int i = 3; i > 0; --i)
+ v[i] = v[i-1];
+ v[0] = t;
+ }
+ break;
+ }
+
+ // if necessary, swap vertex 1 and 3 such that 1 is to the left of 3.
+ qreal dx1 = v[1].x - v[0].x;
+ qreal dy1 = v[1].y - v[0].y;
+ qreal dx2 = v[3].x - v[0].x;
+ qreal dy2 = v[3].y - v[0].y;
+ if (dx1 * dy2 - dx2 * dy1 > 0)
+ qSwap(v[1], v[3]);
+
+ QTransformImageVertex u = {v[1].x - v[0].x, v[1].y - v[0].y, v[1].u - v[0].u, v[1].v - v[0].v};
+ QTransformImageVertex w = {v[2].x - v[0].x, v[2].y - v[0].y, v[2].u - v[0].u, v[2].v - v[0].v};
+
+ qreal det = u.x * w.y - u.y * w.x;
+ if (det == 0)
+ return;
+
+ qreal invDet = 1.0 / det;
+ qreal m11, m12, m21, m22, mdx, mdy;
+
+ m11 = (u.u * w.y - u.y * w.u) * invDet;
+ m12 = (u.x * w.u - u.u * w.x) * invDet;
+ m21 = (u.v * w.y - u.y * w.v) * invDet;
+ m22 = (u.x * w.v - u.v * w.x) * invDet;
+ mdx = v[0].u - m11 * v[0].x - m12 * v[0].y;
+ mdy = v[0].v - m21 * v[0].x - m22 * v[0].y;
+
+ int dudx = int(m11 * 0x10000);
+ int dvdx = int(m21 * 0x10000);
+ int dudy = int(m12 * 0x10000);
+ int dvdy = int(m22 * 0x10000);
+ int u0 = qCeil((0.5 * m11 + 0.5 * m12 + mdx) * 0x10000) - 1;
+ int v0 = qCeil((0.5 * m21 + 0.5 * m22 + mdy) * 0x10000) - 1;
+
+ int x1 = qFloor(sourceRect.left());
+ int y1 = qFloor(sourceRect.top());
+ int x2 = qCeil(sourceRect.right());
+ int y2 = qCeil(sourceRect.bottom());
+ QRect sourceRectI(x1, y1, x2 - x1, y2 - y1);
+
+ // rasterize trapezoids.
+ if (v[1].y < v[3].y) {
+ qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
+ qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[0], v[3], sourceRectI, clip, v[1].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
+ qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[3].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
+ } else {
+ qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
+ qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[3], v[2], sourceRectI, clip, v[3].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
+ qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[1].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender);
+ }
+}
+
+QT_END_NAMESPACE
+
+#endif // QBLENDFUNCTIONS_P_H
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 1f75ec7..917b910 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -175,7 +175,7 @@ Q_STATIC_TEMPLATE_FUNCTION uint * QT_FASTCALL destFetch(uint *buffer, QRasterBuf
# define SPANFUNC_POINTER_DESTFETCH(Arg) destFetch<Arg>
-static const DestFetchProc destFetchProc[QImage::NImageFormats] =
+static DestFetchProc destFetchProc[QImage::NImageFormats] =
{
0, // Format_Invalid
destFetchMono, // Format_Mono,
@@ -323,7 +323,7 @@ Q_STATIC_TEMPLATE_FUNCTION void QT_FASTCALL destStore(QRasterBuffer *rasterBuffe
# define SPANFUNC_POINTER_DESTSTORE(DEST) destStore<DEST>
-static const DestStoreProc destStoreProc[QImage::NImageFormats] =
+static DestStoreProc destStoreProc[QImage::NImageFormats] =
{
0, // Format_Invalid
destStoreMono, // Format_Mono,
@@ -2827,7 +2827,7 @@ static void QT_FASTCALL rasterop_SourceAndNotDestination(uint *dest,
}
}
-static const CompositionFunctionSolid functionForModeSolid_C[] = {
+static CompositionFunctionSolid functionForModeSolid_C[] = {
comp_func_solid_SourceOver,
comp_func_solid_DestinationOver,
comp_func_solid_Clear,
@@ -2865,7 +2865,7 @@ static const CompositionFunctionSolid functionForModeSolid_C[] = {
static const CompositionFunctionSolid *functionForModeSolid = functionForModeSolid_C;
-static const CompositionFunction functionForMode_C[] = {
+static CompositionFunction functionForMode_C[] = {
comp_func_SourceOver,
comp_func_DestinationOver,
comp_func_Clear,
@@ -7961,6 +7961,20 @@ void qInitDrawhelperAsm()
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_neon;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_neon;
+ qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon;
+ qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon;
+
+ qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon;
+ qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon;
+
+ qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon;
+ qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon;
+
+ qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
+
+ functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
+ destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
+ destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
}
#endif
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index 77c5202..ee5f24a 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -40,10 +40,13 @@
****************************************************************************/
#include <private/qdrawhelper_p.h>
+#include <private/qblendfunctions_p.h>
+#include <private/qmath_p.h>
#ifdef QT_HAVE_NEON
#include <private/qdrawhelper_neon_p.h>
+#include <private/qpaintengine_raster_p.h>
#include <arm_neon.h>
QT_BEGIN_NAMESPACE
@@ -87,60 +90,142 @@ static inline uint16x8_t qvsource_over_u16(uint16x8_t src16, uint16x8_t dst16, u
return vaddq_u16(src16, qvbyte_mul_u16(dst16, alpha16, half));
}
-void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl,
- const uchar *srcPixels, int sbpl,
- int w, int h,
- int const_alpha)
+extern "C" void
+pixman_composite_over_8888_0565_asm_neon (int32_t w,
+ int32_t h,
+ uint16_t *dst,
+ int32_t dst_stride,
+ uint32_t *src,
+ int32_t src_stride);
+
+extern "C" void
+pixman_composite_over_8888_8888_asm_neon (int32_t w,
+ int32_t h,
+ uint32_t *dst,
+ int32_t dst_stride,
+ uint32_t *src,
+ int32_t src_stride);
+
+extern "C" void
+pixman_composite_src_0565_8888_asm_neon (int32_t w,
+ int32_t h,
+ uint32_t *dst,
+ int32_t dst_stride,
+ uint16_t *src,
+ int32_t src_stride);
+
+extern "C" void
+pixman_composite_over_n_8_0565_asm_neon (int32_t w,
+ int32_t h,
+ uint16_t *dst,
+ int32_t dst_stride,
+ uint32_t src,
+ int32_t unused,
+ uint8_t *mask,
+ int32_t mask_stride);
+
+extern "C" void
+pixman_composite_scanline_over_asm_neon (int32_t w,
+ const uint32_t *dst,
+ const uint32_t *src);
+
+// qblendfunctions.cpp
+void qt_blend_argb32_on_rgb16_const_alpha(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha);
+
+void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
{
- const uint *src = (const uint *) srcPixels;
- uint *dst = (uint *) destPixels;
- uint16x8_t half = vdupq_n_u16(0x80);
- uint16x8_t full = vdupq_n_u16(0xff);
- if (const_alpha == 256) {
- for (int y = 0; y < h; ++y) {
- int x = 0;
- for (; x < w-3; x += 4) {
- uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]);
- if ((src[x] & src[x+1] & src[x+2] & src[x+3]) >= 0xff000000) {
- // all opaque
- vst1q_u32((uint32_t *)&dst[x], src32);
- } else if (src[x] | src[x+1] | src[x+2] | src[x+3]) {
- uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]);
+ dbpl /= 4;
+ sbpl /= 2;
- const uint8x16_t src8 = vreinterpretq_u8_u32(src32);
- const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32);
+ quint32 *dst = (quint32 *) destPixels;
+ quint16 *src = (quint16 *) srcPixels;
- const uint8x8_t src8_low = vget_low_u8(src8);
- const uint8x8_t dst8_low = vget_low_u8(dst8);
+ if (const_alpha != 256) {
+ quint8 a = (255 * const_alpha) >> 8;
+ quint8 ia = 255 - a;
+
+ while (h--) {
+ for (int x=0; x<w; ++x)
+ dst[x] = INTERPOLATE_PIXEL_255(qt_colorConvert(src[x], dst[x]), a, dst[x], ia);
+ dst += dbpl;
+ src += sbpl;
+ }
+ return;
+ }
- const uint8x8_t src8_high = vget_high_u8(src8);
- const uint8x8_t dst8_high = vget_high_u8(dst8);
+ pixman_composite_src_0565_8888_asm_neon(w, h, dst, dbpl, src, sbpl);
+}
- const uint16x8_t src16_low = vmovl_u8(src8_low);
- const uint16x8_t dst16_low = vmovl_u8(dst8_low);
+extern "C" void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha);
- const uint16x8_t src16_high = vmovl_u8(src8_high);
- const uint16x8_t dst16_high = vmovl_u8(dst8_high);
+void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
+{
+ quint16 *dst = (quint16 *) destPixels;
+ quint32 *src = (quint32 *) srcPixels;
- const uint16x8_t result16_low = qvsource_over_u16(src16_low, dst16_low, half, full);
- const uint16x8_t result16_high = qvsource_over_u16(src16_high, dst16_high, half, full);
+ if (const_alpha != 256) {
+ for (int y=0; y<h; ++y) {
+ int i = 0;
+ for (; i < w-7; i += 8)
+ blend_8_pixels_argb32_on_rgb16_neon(&dst[i], &src[i], const_alpha);
- const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low));
- const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high));
+ if (i < w) {
+ int tail = w - i;
- vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high));
+ quint16 dstBuffer[8];
+ quint32 srcBuffer[8];
+
+ for (int j = 0; j < tail; ++j) {
+ dstBuffer[j] = dst[i + j];
+ srcBuffer[j] = src[i + j];
+ }
+
+ blend_8_pixels_argb32_on_rgb16_neon(dstBuffer, srcBuffer, const_alpha);
+
+ for (int j = 0; j < tail; ++j) {
+ dst[i + j] = dstBuffer[j];
+ src[i + j] = srcBuffer[j];
}
}
- for (; x<w; ++x) {
- uint s = src[x];
- if (s >= 0xff000000)
- dst[x] = s;
- else if (s != 0)
- dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
- }
- dst = (quint32 *)(((uchar *) dst) + dbpl);
- src = (const quint32 *)(((const uchar *) src) + sbpl);
+
+ dst = (quint16 *)(((uchar *) dst) + dbpl);
+ src = (quint32 *)(((uchar *) src) + sbpl);
}
+ return;
+ }
+
+ pixman_composite_over_8888_0565_asm_neon(w, h, dst, dbpl / 2, src, sbpl / 4);
+}
+
+void qt_blend_argb32_on_argb32_scanline_neon(uint *dest, const uint *src, int length, uint const_alpha)
+{
+ if (const_alpha == 255) {
+ pixman_composite_scanline_over_asm_neon(length, dest, src);
+ } else {
+ qt_blend_argb32_on_argb32_neon((uchar *)dest, 4 * length, (uchar *)src, 4 * length, length, 1, (const_alpha * 256) / 255);
+ }
+}
+
+void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha)
+{
+ const uint *src = (const uint *) srcPixels;
+ uint *dst = (uint *) destPixels;
+ uint16x8_t half = vdupq_n_u16(0x80);
+ uint16x8_t full = vdupq_n_u16(0xff);
+ if (const_alpha == 256) {
+ pixman_composite_over_8888_8888_asm_neon(w, h, (uint32_t *)destPixels, dbpl / 4, (uint32_t *)srcPixels, sbpl / 4);
} else if (const_alpha != 0) {
const_alpha = (const_alpha * 255) >> 8;
uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha);
@@ -254,6 +339,246 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl,
}
}
+void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
+ int x, int y, quint32 color,
+ const uchar *bitmap,
+ int mapWidth, int mapHeight, int mapStride,
+ const QClipData *)
+{
+ quint16 *dest = reinterpret_cast<quint16*>(rasterBuffer->scanLine(y)) + x;
+ const int destStride = rasterBuffer->bytesPerLine() / sizeof(quint16);
+
+ uchar *mask = const_cast<uchar *>(bitmap);
+
+ pixman_composite_over_n_8_0565_asm_neon(mapWidth, mapHeight, dest, destStride, color, 0, mask, mapStride);
+}
+
+extern "C" void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha);
+
+template <typename SRC, typename BlendFunc>
+struct Blend_on_RGB16_SourceAndConstAlpha_Neon {
+ Blend_on_RGB16_SourceAndConstAlpha_Neon(BlendFunc blender, int const_alpha)
+ : m_index(0)
+ , m_blender(blender)
+ , m_const_alpha(const_alpha)
+ {
+ }
+
+ inline void write(quint16 *dst, quint32 src)
+ {
+ srcBuffer[m_index++] = src;
+
+ if (m_index == 8) {
+ m_blender(dst - 7, srcBuffer, m_const_alpha);
+ m_index = 0;
+ }
+ }
+
+ inline void flush(quint16 *dst)
+ {
+ if (m_index > 0) {
+ quint16 dstBuffer[8];
+ for (int i = 0; i < m_index; ++i)
+ dstBuffer[i] = dst[i - m_index];
+
+ m_blender(dstBuffer, srcBuffer, m_const_alpha);
+
+ for (int i = 0; i < m_index; ++i)
+ dst[i - m_index] = dstBuffer[i];
+
+ m_index = 0;
+ }
+ }
+
+ SRC srcBuffer[8];
+
+ int m_index;
+ BlendFunc m_blender;
+ int m_const_alpha;
+};
+
+template <typename SRC, typename BlendFunc>
+Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc>
+Blend_on_RGB16_SourceAndConstAlpha_Neon_create(BlendFunc blender, int const_alpha)
+{
+ return Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc>(blender, const_alpha);
+}
+
+void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ int const_alpha)
+{
+ if (const_alpha == 0)
+ return;
+
+ qt_scale_image_16bit<quint32>(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip,
+ Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha));
+}
+
+void qt_scale_image_rgb16_on_rgb16(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ int const_alpha);
+
+void qt_scale_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ int const_alpha)
+{
+ if (const_alpha == 0)
+ return;
+
+ if (const_alpha == 256) {
+ qt_scale_image_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha);
+ return;
+ }
+
+ qt_scale_image_16bit<quint16>(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip,
+ Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha));
+}
+
+extern void qt_transform_image_rgb16_on_rgb16(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ const QTransform &targetRectTransform,
+ int const_alpha);
+
+void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ const QTransform &targetRectTransform,
+ int const_alpha)
+{
+ if (const_alpha == 0)
+ return;
+
+ if (const_alpha == 256) {
+ qt_transform_image_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, targetRectTransform, const_alpha);
+ return;
+ }
+
+ qt_transform_image(reinterpret_cast<quint16 *>(destPixels), dbpl,
+ reinterpret_cast<const quint16 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform,
+ Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha));
+}
+
+void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ const QTransform &targetRectTransform,
+ int const_alpha)
+{
+ if (const_alpha == 0)
+ return;
+
+ qt_transform_image(reinterpret_cast<quint16 *>(destPixels), dbpl,
+ reinterpret_cast<const quint32 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform,
+ Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha));
+}
+
+static inline void convert_8_pixels_rgb16_to_argb32(quint32 *dst, const quint16 *src)
+{
+ asm volatile (
+ "vld1.16 { d0, d1 }, [%[SRC]]\n\t"
+
+ /* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format
+ and put data into d4 - red, d3 - green, d2 - blue */
+ "vshrn.u16 d4, q0, #8\n\t"
+ "vshrn.u16 d3, q0, #3\n\t"
+ "vsli.u16 q0, q0, #5\n\t"
+ "vsri.u8 d4, d4, #5\n\t"
+ "vsri.u8 d3, d3, #6\n\t"
+ "vshrn.u16 d2, q0, #2\n\t"
+
+ /* fill d5 - alpha with 0xff */
+ "mov r2, #255\n\t"
+ "vdup.8 d5, r2\n\t"
+
+ "vst4.8 { d2, d3, d4, d5 }, [%[DST]]"
+ : : [DST]"r" (dst), [SRC]"r" (src)
+ : "memory", "r2", "d0", "d1", "d2", "d3", "d4", "d5"
+ );
+}
+
+uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length)
+{
+ const ushort *data = (const ushort *)rasterBuffer->scanLine(y) + x;
+
+ int i = 0;
+ for (; i < length - 7; i += 8)
+ convert_8_pixels_rgb16_to_argb32(&buffer[i], &data[i]);
+
+ if (i < length) {
+ quint16 srcBuffer[8];
+ quint32 dstBuffer[8];
+
+ int tail = length - i;
+ for (int j = 0; j < tail; ++j)
+ srcBuffer[j] = data[i + j];
+
+ convert_8_pixels_rgb16_to_argb32(dstBuffer, srcBuffer);
+
+ for (int j = 0; j < tail; ++j)
+ buffer[i + j] = dstBuffer[j];
+ }
+
+ return buffer;
+}
+
+static inline void convert_8_pixels_argb32_to_rgb16(quint16 *dst, const quint32 *src)
+{
+ asm volatile (
+ "vld4.8 { d0, d1, d2, d3 }, [%[SRC]]\n\t"
+
+ /* convert to r5g6b5 and store it into {d28, d29} */
+ "vshll.u8 q14, d2, #8\n\t"
+ "vshll.u8 q8, d1, #8\n\t"
+ "vshll.u8 q9, d0, #8\n\t"
+ "vsri.u16 q14, q8, #5\n\t"
+ "vsri.u16 q14, q9, #11\n\t"
+
+ "vst1.16 { d28, d29 }, [%[DST]]"
+ : : [DST]"r" (dst), [SRC]"r" (src)
+ : "memory", "d0", "d1", "d2", "d3", "d16", "d17", "d18", "d19", "d28", "d29"
+ );
+}
+
+void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length)
+{
+ quint16 *data = (quint16*)rasterBuffer->scanLine(y) + x;
+
+ int i = 0;
+ for (; i < length - 7; i += 8)
+ convert_8_pixels_argb32_to_rgb16(&data[i], &buffer[i]);
+
+ if (i < length) {
+ quint32 srcBuffer[8];
+ quint16 dstBuffer[8];
+
+ int tail = length - i;
+ for (int j = 0; j < tail; ++j)
+ srcBuffer[j] = buffer[i + j];
+
+ convert_8_pixels_argb32_to_rgb16(dstBuffer, srcBuffer);
+
+ for (int j = 0; j < tail; ++j)
+ data[i + j] = dstBuffer[j];
+ }
+}
+
QT_END_NAMESPACE
#endif // QT_HAVE_NEON
diff --git a/src/gui/painting/qdrawhelper_neon_asm.S b/src/gui/painting/qdrawhelper_neon_asm.S
new file mode 100644
index 0000000..9992817
--- /dev/null
+++ b/src/gui/painting/qdrawhelper_neon_asm.S
@@ -0,0 +1,192 @@
+/****************************************************************************
+**
+** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+/* Prevent the stack from becoming executable for no reason... */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+.text
+.fpu neon
+.arch armv7a
+.altmacro
+
+/* void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha) */
+
+ .func blend_8_pixels_argb32_on_rgb16_neon
+ .global blend_8_pixels_argb32_on_rgb16_neon
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden blend_8_pixels_argb32_on_rgb16_neon
+ .type blend_8_pixels_argb32_on_rgb16_neon, %function
+#endif
+blend_8_pixels_argb32_on_rgb16_neon:
+ vld4.8 { d0, d1, d2, d3 }, [r1]
+ vld1.16 { d4, d5 }, [r0]
+
+ cmp r2, #256
+ beq .blend_32_inner
+
+ vdup.8 d6, r2
+
+ /* multiply by const_alpha */
+ vmull.u8 q8, d6, d0
+ vmull.u8 q9, d6, d1
+ vmull.u8 q10, d6, d2
+ vmull.u8 q11, d6, d3
+
+ vshrn.u16 d0, q8, #8
+ vshrn.u16 d1, q9, #8
+ vshrn.u16 d2, q10, #8
+ vshrn.u16 d3, q11, #8
+
+.blend_32_inner:
+ /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q2, #8
+ vshrn.u16 d7, q2, #3
+ vsli.u16 q2, q2, #5
+ vsri.u8 d6, d6, #5
+ vmvn.8 d3, d3
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q2, #2
+
+ pld [r0, #128]
+
+ /* now do alpha blending, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vmull.u8 q10, d3, d6
+ vmull.u8 q11, d3, d7
+ vmull.u8 q12, d3, d30
+ vrshr.u16 q13, q10, #8
+ vrshr.u16 q3, q11, #8
+ vrshr.u16 q15, q12, #8
+ vraddhn.u16 d20, q10, q13
+ vraddhn.u16 d23, q11, q3
+ vraddhn.u16 d22, q12, q15
+ vqadd.u8 d16, d2, d20
+ vqadd.u8 q9, q0, q11
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+
+ vst1.16 { d28, d29 }, [r0]
+
+ bx lr
+
+ .endfunc
+
+/* void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha) */
+
+ .func blend_8_pixels_rgb16_on_rgb16_neon
+ .global blend_8_pixels_rgb16_on_rgb16_neon
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden blend_8_pixels_rgb16_on_rgb16_neon
+ .type blend_8_pixels_rgb16_on_rgb16_neon, %function
+#endif
+blend_8_pixels_rgb16_on_rgb16_neon:
+ vld1.16 { d0, d1 }, [r0]
+ vld1.16 { d2, d3 }, [r1]
+
+ rsb r3, r2, #256
+ vdup.8 d4, r2
+ vdup.8 d5, r3
+
+ /* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format
+ and put data into d6 - red, d7 - green, d30 - blue */
+ vshrn.u16 d6, q0, #8
+ vshrn.u16 d7, q0, #3
+ vsli.u16 q0, q0, #5
+ vsri.u8 d6, d6, #5
+ vsri.u8 d7, d7, #6
+ vshrn.u16 d30, q0, #2
+
+ /* same from {d2, d3} into {d26, d27, d28} */
+ vshrn.u16 d26, q1, #8
+ vshrn.u16 d27, q1, #3
+ vsli.u16 q1, q1, #5
+ vsri.u8 d26, d26, #5
+ vsri.u8 d27, d27, #6
+ vshrn.u16 d28, q1, #2
+
+ /* multiply dst by inv const_alpha */
+ vmull.u8 q10, d5, d6
+ vmull.u8 q11, d5, d7
+ vmull.u8 q12, d5, d30
+
+ vshrn.u16 d6, q10, #8
+ vshrn.u16 d7, q11, #8
+ vshrn.u16 d30, q12, #8
+
+ /* multiply src by const_alpha */
+ vmull.u8 q10, d4, d26
+ vmull.u8 q11, d4, d27
+ vmull.u8 q12, d4, d28
+
+ vshrn.u16 d26, q10, #8
+ vshrn.u16 d27, q11, #8
+ vshrn.u16 d28, q12, #8
+
+ /* preload dst + 128 */
+ pld [r0, #128]
+
+ /* add components, storing results in 8-bit planar format
+ into d16 - red, d19 - green, d18 - blue */
+ vadd.u8 d16, d26, d6
+ vadd.u8 d19, d27, d7
+ vadd.u8 d18, d28, d30
+
+ /* convert the result to r5g6b5 and store it into {d28, d29} */
+ vshll.u8 q14, d16, #8
+ vshll.u8 q8, d19, #8
+ vshll.u8 q9, d18, #8
+ vsri.u16 q14, q8, #5
+ vsri.u16 q14, q9, #11
+
+ vst1.16 { d28, d29 }, [r0]
+
+ bx lr
+
+ .endfunc
diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h
index 1994441..d6a4509 100644
--- a/src/gui/painting/qdrawhelper_neon_p.h
+++ b/src/gui/painting/qdrawhelper_neon_p.h
@@ -69,6 +69,64 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl,
int w, int h,
int const_alpha);
+void qt_blend_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha);
+
+void qt_blend_argb32_on_argb32_scanline_neon(uint *dest,
+ const uint *src,
+ int length,
+ uint const_alpha);
+
+void qt_blend_rgb16_on_argb32_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ int w, int h,
+ int const_alpha);
+
+void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer,
+ int x, int y, quint32 color,
+ const uchar *bitmap,
+ int mapWidth, int mapHeight, int mapStride,
+ const QClipData *clip);
+
+void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ int const_alpha);
+
+void qt_scale_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ int const_alpha);
+
+void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ const QTransform &targetRectTransform,
+ int const_alpha);
+
+void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl,
+ const uchar *srcPixels, int sbpl,
+ const QRectF &targetRect,
+ const QRectF &sourceRect,
+ const QRect &clip,
+ const QTransform &targetRectTransform,
+ int const_alpha);
+
+uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer,
+ QRasterBuffer *rasterBuffer,
+ int x, int y, int length);
+
+void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer,
+ int x, int y, const uint *buffer, int length);
+
#endif // QT_HAVE_NEON
QT_END_NAMESPACE
diff --git a/src/gui/painting/qpaintbuffer.cpp b/src/gui/painting/qpaintbuffer.cpp
index 39b76c8..ca2077f 100644
--- a/src/gui/painting/qpaintbuffer.cpp
+++ b/src/gui/painting/qpaintbuffer.cpp
@@ -269,24 +269,304 @@ void QPaintBuffer::draw(QPainter *painter, int frame) const
printf("\n");
#endif
- if (painter && !painter->isActive())
- return;
+ processCommands(painter, frameStartIndex(frame), frameEndIndex(frame));
+
+#ifdef QPAINTBUFFER_DEBUG_DRAW
+ qDebug() << "QPaintBuffer::draw() -------------------------------- DONE!";
+#endif
+}
+
+int QPaintBuffer::frameStartIndex(int frame) const
+{
+ return (frame == 0) ? 0 : d_ptr->frames.at(frame - 1);
+}
+
+int QPaintBuffer::frameEndIndex(int frame) const
+{
+ return (frame == d_ptr->frames.size()) ? d_ptr->commands.size() : d_ptr->frames.at(frame);
+}
+
+int QPaintBuffer::processCommands(QPainter *painter, int begin, int end) const
+{
+ if (!painter || !painter->isActive())
+ return 0;
QPaintEngineEx *xengine = painter->paintEngine()->isExtended()
? (QPaintEngineEx *) painter->paintEngine() : 0;
if (xengine) {
QPaintEngineExReplayer player;
- player.draw(*this, painter, frame);
+ player.processCommands(*this, painter, begin, end);
} else {
QPainterReplayer player;
- player.draw(*this, painter, frame);
+ player.processCommands(*this, painter, begin, end);
}
-#ifdef QPAINTBUFFER_DEBUG_DRAW
- qDebug() << "QPaintBuffer::draw() -------------------------------- DONE!";
-#endif
+ int depth = 0;
+ for (int i = begin; i < end; ++i) {
+ const QPaintBufferCommand &cmd = d_ptr->commands.at(i);
+ if (cmd.id == QPaintBufferPrivate::Cmd_Save)
+ ++depth;
+ else if (cmd.id == QPaintBufferPrivate::Cmd_Restore)
+ --depth;
+ }
+ return depth;
}
+QString QPaintBuffer::commandDescription(int command) const
+{
+ QString desc;
+ QDebug debug(&desc);
+
+ const QPaintBufferCommand &cmd = d_ptr->commands.at(command);
+
+ switch (cmd.id) {
+ case QPaintBufferPrivate::Cmd_Save: {
+ debug << "Cmd_Save";
+ break; }
+
+ case QPaintBufferPrivate::Cmd_Restore: {
+ debug << "Cmd_Restore";
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetBrush: {
+ QBrush brush = qVariantValue<QBrush>(d_ptr->variants.at(cmd.offset));
+ debug << "Cmd_SetBrush: " << brush;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetBrushOrigin: {
+ debug << "Cmd_SetBrushOrigin: " << d_ptr->variants.at(cmd.offset).toPointF();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetCompositionMode: {
+ QPainter::CompositionMode mode = (QPainter::CompositionMode) cmd.extra;
+ debug << "ExCmd_SetCompositionMode, mode: " << mode;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetOpacity: {
+ debug << "ExCmd_SetOpacity: " << d_ptr->variants.at(cmd.offset).toDouble();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawVectorPath: {
+ debug << "ExCmd_DrawVectorPath: size: " << cmd.size
+// << ", hints:" << d->ints[cmd.offset2+cmd.size]
+ << "pts/elms:" << cmd.offset << cmd.offset2;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_StrokeVectorPath: {
+ QPen pen = qVariantValue<QPen>(d_ptr->variants.at(cmd.extra));
+ debug << "ExCmd_StrokeVectorPath: size: " << cmd.size
+// << ", hints:" << d->ints[cmd.offset2+cmd.size]
+ << "pts/elms:" << cmd.offset << cmd.offset2 << pen;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_FillVectorPath: {
+ QBrush brush = qVariantValue<QBrush>(d_ptr->variants.at(cmd.extra));
+ debug << "ExCmd_FillVectorPath: size: " << cmd.size
+// << ", hints:" << d->ints[cmd.offset2+cmd.size]
+ << "pts/elms:" << cmd.offset << cmd.offset2 << brush;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_FillRectBrush: {
+ QBrush brush = qVariantValue<QBrush>(d_ptr->variants.at(cmd.extra));
+ QRectF *rect = (QRectF *)(d_ptr->floats.constData() + cmd.offset);
+ debug << "ExCmd_FillRectBrush, offset: " << cmd.offset << " rect: " << *rect << " brush: " << brush;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_FillRectColor: {
+ QColor color = qVariantValue<QColor>(d_ptr->variants.at(cmd.extra));
+ QRectF *rect = (QRectF *)(d_ptr->floats.constData() + cmd.offset);
+ debug << "ExCmd_FillRectBrush, offset: " << cmd.offset << " rect: " << *rect << " color: " << color;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPolygonF: {
+ debug << "ExCmd_DrawPolygonF, offset: " << cmd.offset << " size: " << cmd.size
+ << " mode: " << cmd.extra
+ << d_ptr->floats.at(cmd.offset)
+ << d_ptr->floats.at(cmd.offset+1);
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPolygonI: {
+ debug << "ExCmd_DrawPolygonI, offset: " << cmd.offset << " size: " << cmd.size
+ << " mode: " << cmd.extra
+ << d_ptr->ints.at(cmd.offset)
+ << d_ptr->ints.at(cmd.offset+1);
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawEllipseF: {
+ debug << "ExCmd_DrawEllipseF, offset: " << cmd.offset;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawLineF: {
+ debug << "ExCmd_DrawLineF, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawLineI: {
+ debug << "ExCmd_DrawLineI, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPointsF: {
+ debug << "ExCmd_DrawPointsF, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPointsI: {
+ debug << "ExCmd_DrawPointsI, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPolylineF: {
+ debug << "ExCmd_DrawPolylineF, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPolylineI: {
+ debug << "ExCmd_DrawPolylineI, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawRectF: {
+ debug << "ExCmd_DrawRectF, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawRectI: {
+ debug << "ExCmd_DrawRectI, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetClipEnabled: {
+ bool clipEnabled = d_ptr->variants.at(cmd.offset).toBool();
+ debug << "ExCmd_SetClipEnabled:" << clipEnabled;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_ClipVectorPath: {
+ QVectorPathCmd path(d_ptr, cmd);
+ debug << "ExCmd_ClipVectorPath:" << path().elementCount();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_ClipRect: {
+ QRect rect(QPoint(d_ptr->ints.at(cmd.offset), d_ptr->ints.at(cmd.offset + 1)),
+ QPoint(d_ptr->ints.at(cmd.offset + 2), d_ptr->ints.at(cmd.offset + 3)));
+ debug << "ExCmd_ClipRect:" << rect << cmd.extra;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_ClipRegion: {
+ QRegion region(d_ptr->variants.at(cmd.offset).value<QRegion>());
+ debug << "ExCmd_ClipRegion:" << region.boundingRect() << cmd.extra;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetPen: {
+ QPen pen = qVariantValue<QPen>(d_ptr->variants.at(cmd.offset));
+ debug << "Cmd_SetPen: " << pen;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetTransform: {
+ QTransform xform = qVariantValue<QTransform>(d_ptr->variants.at(cmd.offset));
+ debug << "Cmd_SetTransform, offset: " << cmd.offset << xform;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetRenderHints: {
+ debug << "Cmd_SetRenderHints, hints: " << cmd.extra;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_SetBackgroundMode: {
+ debug << "Cmd_SetBackgroundMode: " << cmd.extra;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawConvexPolygonF: {
+ debug << "Cmd_DrawConvexPolygonF, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawConvexPolygonI: {
+ debug << "Cmd_DrawConvexPolygonI, offset: " << cmd.offset << " size: " << cmd.size;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawEllipseI: {
+ debug << "Cmd_DrawEllipseI, offset: " << cmd.offset;
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPixmapRect: {
+ QPixmap pm(d_ptr->variants.at(cmd.offset).value<QPixmap>());
+ QRectF r(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1),
+ d_ptr->floats.at(cmd.extra+2), d_ptr->floats.at(cmd.extra+3));
+
+ QRectF sr(d_ptr->floats.at(cmd.extra+4), d_ptr->floats.at(cmd.extra+5),
+ d_ptr->floats.at(cmd.extra+6), d_ptr->floats.at(cmd.extra+7));
+ debug << "Cmd_DrawPixmapRect:" << r << sr << pm.size();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawPixmapPos: {
+ QPixmap pm(d_ptr->variants.at(cmd.offset).value<QPixmap>());
+ QPointF pos(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1));
+ debug << "Cmd_DrawPixmapPos:" << pos << pm.size();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawTiledPixmap: {
+ QPixmap pm(d_ptr->variants.at(cmd.offset).value<QPixmap>());
+ QRectF r(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1),
+ d_ptr->floats.at(cmd.extra+2), d_ptr->floats.at(cmd.extra+3));
+
+ QPointF offset(d_ptr->floats.at(cmd.extra+4), d_ptr->floats.at(cmd.extra+5));
+ debug << "Cmd_DrawTiledPixmap:" << r << offset << pm.size();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawImageRect: {
+ QImage image(d_ptr->variants.at(cmd.offset).value<QImage>());
+ QRectF r(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1),
+ d_ptr->floats.at(cmd.extra+2), d_ptr->floats.at(cmd.extra+3));
+ QRectF sr(d_ptr->floats.at(cmd.extra+4), d_ptr->floats.at(cmd.extra+5),
+ d_ptr->floats.at(cmd.extra+6), d_ptr->floats.at(cmd.extra+7));
+ debug << "Cmd_DrawImageRect:" << r << sr << image.size();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawImagePos: {
+ QImage image(d_ptr->variants.at(cmd.offset).value<QImage>());
+ QPointF pos(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1));
+ debug << "Cmd_DrawImagePos:" << pos << image.size();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawText: {
+ QPointF pos(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1));
+ QList<QVariant> variants(d_ptr->variants.at(cmd.offset).value<QList<QVariant> >());
+
+ QFont font(variants.at(0).value<QFont>());
+ QString text(variants.at(1).value<QString>());
+
+ debug << "Cmd_DrawText:" << pos << text << font.family();
+ break; }
+
+ case QPaintBufferPrivate::Cmd_DrawTextItem: {
+ QPointF pos(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1));
+ QTextItemIntCopy *tiCopy = reinterpret_cast<QTextItemIntCopy *>(qVariantValue<void *>(d_ptr->variants.at(cmd.offset)));
+ QTextItemInt &ti = (*tiCopy)();
+ QString text(ti.text());
+
+ QFont font(ti.font());
+ font.setUnderline(false);
+ font.setStrikeOut(false);
+ font.setOverline(false);
+
+ const QTextItemInt &si = static_cast<const QTextItemInt &>(ti);
+ qreal justificationWidth = 0;
+ if (si.justified)
+ justificationWidth = si.width.toReal();
+
+ debug << "Cmd_DrawTextItem:" << pos << " " << text;
+ break; }
+ case QPaintBufferPrivate::Cmd_SystemStateChanged: {
+ QRegion systemClip(d_ptr->variants.at(cmd.offset).value<QRegion>());
+
+ debug << "Cmd_SystemStateChanged:" << systemClip;
+ break; }
+ case QPaintBufferPrivate::Cmd_Translate: {
+ QPointF delta(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1));
+ debug << "Cmd_Translate:" << delta;
+ break; }
+ case QPaintBufferPrivate::Cmd_DrawStaticText: {
+ QPointF delta(d_ptr->floats.at(cmd.extra), d_ptr->floats.at(cmd.extra+1));
+ QVariantList variants(d_ptr->variants.at(cmd.offset).value<QVariantList>());
+
+ QStaticText text(variants.at(0).value<QStaticText>());
+ debug << "Cmd_DrawStaticText:" << text.text();
+ break; }
+ }
+
+ return desc;
+}
QRectF QPaintBuffer::boundingRect() const
{
@@ -1110,15 +1390,12 @@ void QPainterReplayer::setupTransform(QPainter *_painter)
painter->setTransform(m_world_matrix);
}
-void QPainterReplayer::draw(const QPaintBuffer &buffer, QPainter *_painter, int frame)
+void QPainterReplayer::processCommands(const QPaintBuffer &buffer, QPainter *p, int begin, int end)
{
d = buffer.d_ptr;
- setupTransform(_painter);
-
- int frameStart = (frame == 0) ? 0 : d->frames.at(frame-1);
- int frameEnd = (frame == d->frames.size()) ? d->commands.size() : d->frames.at(frame);
+ painter = p;
- for (int cmdIndex=frameStart; cmdIndex<frameEnd; ++cmdIndex) {
+ for (int cmdIndex = begin; cmdIndex < end; ++cmdIndex) {
const QPaintBufferCommand &cmd = d->commands.at(cmdIndex);
process(cmd);
}
diff --git a/src/gui/painting/qpaintbuffer_p.h b/src/gui/painting/qpaintbuffer_p.h
index 0fde290..4576947 100644
--- a/src/gui/painting/qpaintbuffer_p.h
+++ b/src/gui/painting/qpaintbuffer_p.h
@@ -78,6 +78,12 @@ public:
int numFrames() const;
void draw(QPainter *painter, int frame = 0) const;
+
+ int frameStartIndex(int frame) const;
+ int frameEndIndex(int frame) const;
+ int processCommands(QPainter *painter, int begin, int end) const;
+ QString commandDescription(int command) const;
+
void setBoundingRect(const QRectF &rect);
QRectF boundingRect() const;
@@ -317,7 +323,7 @@ public:
void setupTransform(QPainter *painter);
virtual void process(const QPaintBufferCommand &cmd);
- void draw(const QPaintBuffer &buffer, QPainter *painter, int frame);
+ void processCommands(const QPaintBuffer &buffer, QPainter *painter, int begin, int end);
protected:
QPaintBufferPrivate *d;
diff --git a/src/gui/painting/qtransform.cpp b/src/gui/painting/qtransform.cpp
index 4f42a58..988d678 100644
--- a/src/gui/painting/qtransform.cpp
+++ b/src/gui/painting/qtransform.cpp
@@ -1037,8 +1037,18 @@ QDataStream & operator>>(QDataStream &s, QTransform &t)
#ifndef QT_NO_DEBUG_STREAM
QDebug operator<<(QDebug dbg, const QTransform &m)
{
- dbg.nospace() << "QTransform("
- << "11=" << m.m11()
+ static const char *typeStr[] =
+ {
+ "TxNone",
+ "TxTranslate",
+ "TxScale",
+ "TxRotate",
+ "TxShear",
+ "TxProject"
+ };
+
+ dbg.nospace() << "QTransform(type=" << typeStr[m.type()] << ','
+ << " 11=" << m.m11()
<< " 12=" << m.m12()
<< " 13=" << m.m13()
<< " 21=" << m.m21()
@@ -1048,6 +1058,7 @@ QDebug operator<<(QDebug dbg, const QTransform &m)
<< " 32=" << m.m32()
<< " 33=" << m.m33()
<< ')';
+
return dbg.space();
}
#endif
diff --git a/src/plugins/bearer/corewlan/qcorewlanengine.h b/src/plugins/bearer/corewlan/qcorewlanengine.h
index 5e93193..11f5d96 100644
--- a/src/plugins/bearer/corewlan/qcorewlanengine.h
+++ b/src/plugins/bearer/corewlan/qcorewlanengine.h
@@ -102,7 +102,7 @@ protected:
void getUserConfigurations();
QString getNetworkNameFromSsid(const QString &ssid);
QString getSsidFromNetworkName(const QString &name);
- QStringList foundNetwork(const QString &id, const QString &ssid, const QNetworkConfiguration::StateFlags state, const QString &interfaceName);
+ QStringList foundNetwork(const QString &id, const QString &ssid, const QNetworkConfiguration::StateFlags state, const QString &interfaceName, const QNetworkConfiguration::Purpose purpose);
};
QT_END_NAMESPACE
diff --git a/src/plugins/bearer/corewlan/qcorewlanengine.mm b/src/plugins/bearer/corewlan/qcorewlanengine.mm
index a366d00..b59ccee 100644
--- a/src/plugins/bearer/corewlan/qcorewlanengine.mm
+++ b/src/plugins/bearer/corewlan/qcorewlanengine.mm
@@ -271,7 +271,7 @@ void QCoreWlanEngine::connectToId(const QString &id)
SecKeychainAttributeList attributeList = {3,attributes};
SecKeychainSearchRef searchRef;
- OSErr result = SecKeychainSearchCreateFromAttributes(NULL, kSecGenericPasswordItemClass, &attributeList, &searchRef);
+ SecKeychainSearchCreateFromAttributes(NULL, kSecGenericPasswordItemClass, &attributeList, &searchRef);
NSString *password = @"";
SecKeychainItemRef searchItem;
@@ -429,7 +429,14 @@ QStringList QCoreWlanEngine::scanForSsids(const QString &interfaceName)
state = QNetworkConfiguration::Undefined;
}
}
- found.append(foundNetwork(id, networkSsid, state, interfaceName));
+ QNetworkConfiguration::Purpose purpose = QNetworkConfiguration::UnknownPurpose;
+ if([[apNetwork securityMode] intValue] == kCWSecurityModeOpen) {
+ purpose = QNetworkConfiguration::PublicPurpose;
+ } else {
+ purpose = QNetworkConfiguration::PrivatePurpose;
+ }
+
+ found.append(foundNetwork(id, networkSsid, state, interfaceName, purpose));
} //end row
} //end error
@@ -470,13 +477,13 @@ QStringList QCoreWlanEngine::scanForSsids(const QString &interfaceName)
state = QNetworkConfiguration::Defined;
}
- found.append(foundNetwork(id, networkName, state, interfaceName));
+ found.append(foundNetwork(id, networkName, state, interfaceName, QNetworkConfiguration::UnknownPurpose));
}
}
return found;
}
-QStringList QCoreWlanEngine::foundNetwork(const QString &id, const QString &name, const QNetworkConfiguration::StateFlags state, const QString &interfaceName)
+QStringList QCoreWlanEngine::foundNetwork(const QString &id, const QString &name, const QNetworkConfiguration::StateFlags state, const QString &interfaceName, const QNetworkConfiguration::Purpose purpose)
{
QStringList found;
QMutexLocker locker(&mutex);
@@ -507,6 +514,10 @@ QStringList QCoreWlanEngine::foundNetwork(const QString &id, const QString &name
changed = true;
}
+ if (ptr->purpose != purpose) {
+ ptr->purpose = purpose;
+ changed = true;
+ }
ptr->mutex.unlock();
if (changed) {
@@ -524,6 +535,7 @@ QStringList QCoreWlanEngine::foundNetwork(const QString &id, const QString &name
ptr->state = state;
ptr->type = QNetworkConfiguration::InternetAccessPoint;
ptr->bearer = QLatin1String("WLAN");
+ ptr->purpose = purpose;
accessPointConfigurations.insert(ptr->id, ptr);
configurationInterface.insert(ptr->id, interfaceName);
diff --git a/src/plugins/bearer/networkmanager/qnetworkmanagerengine.cpp b/src/plugins/bearer/networkmanager/qnetworkmanagerengine.cpp
index 28ee38e..72d6838 100644
--- a/src/plugins/bearer/networkmanager/qnetworkmanagerengine.cpp
+++ b/src/plugins/bearer/networkmanager/qnetworkmanagerengine.cpp
@@ -602,7 +602,11 @@ void QNetworkManagerEngine::newAccessPoint(const QString &path, const QDBusObjec
ptr->isValid = true;
ptr->id = QString::number(qHash(objectPath.path()));
ptr->type = QNetworkConfiguration::InternetAccessPoint;
- ptr->purpose = QNetworkConfiguration::PublicPurpose;
+ if(accessPoint->flags() == NM_802_11_AP_FLAGS_PRIVACY) {
+ ptr->purpose = QNetworkConfiguration::PrivatePurpose;
+ } else {
+ ptr->purpose = QNetworkConfiguration::PublicPurpose;
+ }
ptr->state = QNetworkConfiguration::Undefined;
ptr->bearer = QLatin1String("WLAN");
@@ -718,6 +722,7 @@ QNetworkConfigurationPrivate *QNetworkManagerEngine::parseConnection(const QStri
if (connectionType == QLatin1String("802-3-ethernet")) {
cpPriv->bearer = QLatin1String("Ethernet");
+ cpPriv->purpose = QNetworkConfiguration::PublicPurpose;
foreach (const QDBusObjectPath &devicePath, interface->getDevices()) {
QNetworkManagerInterfaceDevice device(devicePath.path());
@@ -734,7 +739,12 @@ QNetworkConfigurationPrivate *QNetworkManagerEngine::parseConnection(const QStri
cpPriv->bearer = QLatin1String("WLAN");
const QString connectionSsid = map.value("802-11-wireless").value("ssid").toString();
-
+ const QString connectionSecurity = map.value("802-11-wireless").value("security").toString();
+ if(!connectionSecurity.isEmpty()) {
+ cpPriv->purpose = QNetworkConfiguration::PrivatePurpose;
+ } else {
+ cpPriv->purpose = QNetworkConfiguration::PublicPurpose;
+ }
for (int i = 0; i < accessPoints.count(); ++i) {
if (connectionSsid == accessPoints.at(i)->ssid()) {
cpPriv->state |= QNetworkConfiguration::Discovered;
diff --git a/src/plugins/mediaservices/directshow/mediaplayer/directshowmediatype.cpp b/src/plugins/mediaservices/directshow/mediaplayer/directshowmediatype.cpp
index cf6d45b..f8f519d 100644
--- a/src/plugins/mediaservices/directshow/mediaplayer/directshowmediatype.cpp
+++ b/src/plugins/mediaservices/directshow/mediaplayer/directshowmediatype.cpp
@@ -130,9 +130,17 @@ QVideoSurfaceFormat DirectShowMediaType::formatFromType(const AM_MEDIA_TYPE &typ
if (header->AvgTimePerFrame > 0)
format.setFrameRate(10000 /header->AvgTimePerFrame);
- format.setScanLineDirection(header->bmiHeader.biHeight < 0
- ? QVideoSurfaceFormat::TopToBottom
- : QVideoSurfaceFormat::BottomToTop);
+ switch (qt_typeLookup[i].pixelFormat) {
+ case QVideoFrame::Format_RGB32:
+ case QVideoFrame::Format_BGR24:
+ case QVideoFrame::Format_RGB565:
+ case QVideoFrame::Format_RGB555:
+ if (header->bmiHeader.biHeight >= 0)
+ format.setScanLineDirection(QVideoSurfaceFormat::BottomToTop);
+ break;
+ default:
+ break;
+ }
return format;
} else if (IsEqualGUID(type.formattype, FORMAT_VideoInfo2)) {
@@ -145,9 +153,17 @@ QVideoSurfaceFormat DirectShowMediaType::formatFromType(const AM_MEDIA_TYPE &typ
if (header->AvgTimePerFrame > 0)
format.setFrameRate(10000 / header->AvgTimePerFrame);
- format.setScanLineDirection(header->bmiHeader.biHeight < 0
- ? QVideoSurfaceFormat::TopToBottom
- : QVideoSurfaceFormat::BottomToTop);
+ switch (qt_typeLookup[i].pixelFormat) {
+ case QVideoFrame::Format_RGB32:
+ case QVideoFrame::Format_BGR24:
+ case QVideoFrame::Format_RGB565:
+ case QVideoFrame::Format_RGB555:
+ if (header->bmiHeader.biHeight >= 0)
+ format.setScanLineDirection(QVideoSurfaceFormat::BottomToTop);
+ break;
+ default:
+ break;
+ }
return format;
}
diff --git a/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.cpp b/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.cpp
index d54d188..a5f143f 100644
--- a/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.cpp
+++ b/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.cpp
@@ -191,6 +191,8 @@ void DirectShowPlayerService::load(const QMediaContent &media, QIODevice *stream
m_atEnd = false;
m_metaDataControl->updateGraph(0, 0);
+ QCoreApplication::postEvent(this, new QEvent(QEvent::Type(VideoOutputChange)));
+
if (m_resources.isEmpty() && !stream) {
m_pendingTasks = 0;
m_graphStatus = NoMedia;
@@ -464,6 +466,8 @@ void DirectShowPlayerService::doRender(QMutexLocker *locker)
QCoreApplication::postEvent(this, new QEvent(QEvent::Type(Error)));
}
+ QCoreApplication::postEvent(this, new QEvent(QEvent::Type(VideoOutputChange)));
+
m_executedTasks |= Render;
}
}
@@ -1144,6 +1148,9 @@ void DirectShowPlayerService::customEvent(QEvent *event)
QMutexLocker locker(&m_mutex);
m_playerControl->updatePosition(m_position);
+ } else if (event->type() == QEvent::Type(VideoOutputChange)) {
+ if (m_videoWindowControl)
+ m_videoWindowControl->updateNativeSize();
} else {
QMediaService::customEvent(event);
}
diff --git a/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.h b/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.h
index 23515d0..d3ef809 100644
--- a/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.h
+++ b/src/plugins/mediaservices/directshow/mediaplayer/directshowplayerservice.h
@@ -164,7 +164,8 @@ private:
DurationChange,
StatusChange,
EndOfMedia,
- PositionChange
+ PositionChange,
+ VideoOutputChange
};
enum GraphStatus
diff --git a/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.cpp b/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.cpp
index 4b9aeb8..e25dd99 100644
--- a/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.cpp
+++ b/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.cpp
@@ -51,6 +51,7 @@ Vmr9VideoWindowControl::Vmr9VideoWindowControl(QObject *parent)
, m_filter(com_new<IBaseFilter>(CLSID_VideoMixingRenderer9, IID_IBaseFilter))
, m_windowId(0)
, m_dirtyValues(0)
+ , m_aspectRatioMode(Qt::KeepAspectRatio)
, m_brightness(0)
, m_contrast(0)
, m_hue(0)
@@ -90,33 +91,30 @@ void Vmr9VideoWindowControl::setWinId(WId id)
QRect Vmr9VideoWindowControl::displayRect() const
{
- QRect rect;
-
- if (IVMRWindowlessControl9 *control = com_cast<IVMRWindowlessControl9>(
- m_filter, IID_IVMRWindowlessControl9)) {
- RECT sourceRect;
- RECT displayRect;
-
- if (control->GetVideoPosition(&sourceRect, &displayRect) == S_OK) {
- rect = QRect(
- displayRect.left,
- displayRect.bottom,
- displayRect.right - displayRect.left,
- displayRect.bottom - displayRect.top);
- }
- control->Release();
- }
- return rect;
+ return m_displayRect;
}
void Vmr9VideoWindowControl::setDisplayRect(const QRect &rect)
{
+ m_displayRect = rect;
+
if (IVMRWindowlessControl9 *control = com_cast<IVMRWindowlessControl9>(
m_filter, IID_IVMRWindowlessControl9)) {
RECT sourceRect = { 0, 0, 0, 0 };
RECT displayRect = { rect.left(), rect.top(), rect.right(), rect.bottom() };
control->GetNativeVideoSize(&sourceRect.right, &sourceRect.bottom, 0, 0);
+
+ if (m_aspectRatioMode == Qt::KeepAspectRatioByExpanding) {
+ QSize clippedSize = rect.size();
+ clippedSize.scale(sourceRect.right, sourceRect.bottom, Qt::KeepAspectRatio);
+
+ sourceRect.left = (sourceRect.right - clippedSize.width()) / 2;
+ sourceRect.top = (sourceRect.bottom - clippedSize.height()) / 2;
+ sourceRect.right = sourceRect.left + clippedSize.width();
+ sourceRect.bottom = sourceRect.top + clippedSize.height();
+ }
+
control->SetVideoPosition(&sourceRect, &displayRect);
control->Release();
}
@@ -134,7 +132,6 @@ void Vmr9VideoWindowControl::setFullScreen(bool fullScreen)
void Vmr9VideoWindowControl::repaint()
{
-
if (QWidget *widget = QWidget::find(m_windowId)) {
HDC dc = widget->getDC();
if (IVMRWindowlessControl9 *control = com_cast<IVMRWindowlessControl9>(
@@ -164,21 +161,13 @@ QSize Vmr9VideoWindowControl::nativeSize() const
Qt::AspectRatioMode Vmr9VideoWindowControl::aspectRatioMode() const
{
- Qt::AspectRatioMode mode = Qt::KeepAspectRatio;
-
- if (IVMRWindowlessControl9 *control = com_cast<IVMRWindowlessControl9>(
- m_filter, IID_IVMRWindowlessControl9)) {
- DWORD arMode;
-
- if (control->GetAspectRatioMode(&arMode) == S_OK && arMode == VMR9ARMode_None)
- mode = Qt::IgnoreAspectRatio;
- control->Release();
- }
- return mode;
+ return m_aspectRatioMode;
}
void Vmr9VideoWindowControl::setAspectRatioMode(Qt::AspectRatioMode mode)
{
+ m_aspectRatioMode = mode;
+
if (IVMRWindowlessControl9 *control = com_cast<IVMRWindowlessControl9>(
m_filter, IID_IVMRWindowlessControl9)) {
switch (mode) {
@@ -188,10 +177,15 @@ void Vmr9VideoWindowControl::setAspectRatioMode(Qt::AspectRatioMode mode)
case Qt::KeepAspectRatio:
control->SetAspectRatioMode(VMR9ARMode_LetterBox);
break;
+ case Qt::KeepAspectRatioByExpanding:
+ control->SetAspectRatioMode(VMR9ARMode_LetterBox);
+ break;
default:
break;
}
control->Release();
+
+ setDisplayRect(m_displayRect);
}
}
@@ -259,6 +253,13 @@ void Vmr9VideoWindowControl::setSaturation(int saturation)
emit saturationChanged(saturation);
}
+void Vmr9VideoWindowControl::updateNativeSize()
+{
+ setDisplayRect(m_displayRect);
+
+ emit nativeSizeChanged();
+}
+
void Vmr9VideoWindowControl::setProcAmpValues()
{
if (IVMRMixerControl9 *control = com_cast<IVMRMixerControl9>(m_filter, IID_IVMRMixerControl9)) {
diff --git a/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.h b/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.h
index bf4fb42..beac433 100644
--- a/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.h
+++ b/src/plugins/mediaservices/directshow/mediaplayer/vmr9videowindowcontrol.h
@@ -90,6 +90,8 @@ public:
int saturation() const;
void setSaturation(int saturation);
+ void updateNativeSize();
+
private:
void setProcAmpValues();
float scaleProcAmpValue(
@@ -98,6 +100,8 @@ private:
IBaseFilter *m_filter;
WId m_windowId;
DWORD m_dirtyValues;
+ Qt::AspectRatioMode m_aspectRatioMode;
+ QRect m_displayRect;
int m_brightness;
int m_contrast;
int m_hue;
diff --git a/src/plugins/mediaservices/gstreamer/qgstreamervideooverlay.cpp b/src/plugins/mediaservices/gstreamer/qgstreamervideooverlay.cpp
index 427d514..f381f7f 100644
--- a/src/plugins/mediaservices/gstreamer/qgstreamervideooverlay.cpp
+++ b/src/plugins/mediaservices/gstreamer/qgstreamervideooverlay.cpp
@@ -192,21 +192,36 @@ void QGstreamerVideoOverlay::surfaceFormatChanged()
void QGstreamerVideoOverlay::setScaledDisplayRect()
{
+ QRect formatViewport = m_surface->surfaceFormat().viewport();
+
switch (m_aspectRatioMode) {
case Qt::KeepAspectRatio:
{
- QSize size = m_surface->surfaceFormat().viewport().size();
-
+ QSize size = m_surface->surfaceFormat().sizeHint();
size.scale(m_displayRect.size(), Qt::KeepAspectRatio);
QRect rect(QPoint(0, 0), size);
rect.moveCenter(m_displayRect.center());
m_surface->setDisplayRect(rect);
+ m_surface->setViewport(formatViewport);
}
break;
case Qt::IgnoreAspectRatio:
m_surface->setDisplayRect(m_displayRect);
+ m_surface->setViewport(formatViewport);
+ break;
+ case Qt::KeepAspectRatioByExpanding:
+ {
+ QSize size = m_displayRect.size();
+ size.scale(m_surface->surfaceFormat().sizeHint(), Qt::KeepAspectRatio);
+
+ QRect viewport(QPoint(0, 0), size);
+ viewport.moveCenter(formatViewport.center());
+
+ m_surface->setDisplayRect(m_displayRect);
+ m_surface->setViewport(viewport);
+ }
break;
};
}
diff --git a/src/plugins/mediaservices/gstreamer/qx11videosurface.cpp b/src/plugins/mediaservices/gstreamer/qx11videosurface.cpp
index cbd5a76..70b8527 100644
--- a/src/plugins/mediaservices/gstreamer/qx11videosurface.cpp
+++ b/src/plugins/mediaservices/gstreamer/qx11videosurface.cpp
@@ -213,6 +213,16 @@ void QX11VideoSurface::setDisplayRect(const QRect &rect)
m_displayRect = rect;
}
+QRect QX11VideoSurface::viewport() const
+{
+ return m_viewport;
+}
+
+void QX11VideoSurface::setViewport(const QRect &rect)
+{
+ m_viewport = rect;
+}
+
int QX11VideoSurface::brightness() const
{
return getAttribute("XV_BRIGHTNESS", m_brightnessRange.first, m_brightnessRange.second);
diff --git a/src/plugins/mediaservices/gstreamer/qx11videosurface.h b/src/plugins/mediaservices/gstreamer/qx11videosurface.h
index 1be963e..10f79a6 100644
--- a/src/plugins/mediaservices/gstreamer/qx11videosurface.h
+++ b/src/plugins/mediaservices/gstreamer/qx11videosurface.h
@@ -67,6 +67,9 @@ public:
QRect displayRect() const;
void setDisplayRect(const QRect &rect);
+ QRect viewport() const;
+ void setViewport(const QRect &rect);
+
int brightness() const;
void setBrightness(int brightness);
diff --git a/tools/qttracereplay/main.cpp b/tools/qttracereplay/main.cpp
index be7906b..101d512 100644
--- a/tools/qttracereplay/main.cpp
+++ b/tools/qttracereplay/main.cpp
@@ -49,7 +49,7 @@ class ReplayWidget : public QWidget
{
Q_OBJECT
public:
- ReplayWidget(const QString &filename, int from, int to, bool single);
+ ReplayWidget(const QString &filename, int from, int to, bool single, int frame);
void paintEvent(QPaintEvent *event);
void resizeEvent(QResizeEvent *event);
@@ -66,27 +66,96 @@ public:
QTime timer;
QList<uint> visibleUpdates;
- QList<uint> iterationTimes;
+
+ QVector<uint> iterationTimes;
QString filename;
int from;
int to;
bool single;
+
+ int frame;
+ int currentCommand;
};
void ReplayWidget::updateRect()
{
- if (!visibleUpdates.isEmpty())
+ if (frame >= 0 && !updates.isEmpty())
+ update(updates.at(frame));
+ else if (!visibleUpdates.isEmpty())
update(updates.at(visibleUpdates.at(currentFrame)));
}
+const int singleFrameRepeatsPerCommand = 100;
+const int singleFrameIterations = 4;
+
void ReplayWidget::paintEvent(QPaintEvent *)
{
QPainter p(this);
+ QTimer::singleShot(0, this, SLOT(updateRect()));
+
// p.setClipRegion(frames.at(currentFrame).updateRegion);
+ if (frame >= 0) {
+ int start = buffer.frameStartIndex(frame);
+ int end = buffer.frameEndIndex(frame);
+
+ iterationTimes.resize(end - start);
+
+ int saveRestoreStackDepth = buffer.processCommands(&p, start, start + currentCommand);
+
+ for (int i = 0; i < saveRestoreStackDepth; ++i)
+ p.restore();
+
+ const int repeats = currentIteration >= 3 ? singleFrameRepeatsPerCommand : 1;
+
+ ++currentFrame;
+ if (currentFrame == repeats) {
+ currentFrame = 0;
+ if (currentIteration >= 3) {
+ iterationTimes[currentCommand - 1] = qMin(iterationTimes[currentCommand - 1], uint(timer.elapsed()));
+ timer.restart();
+ }
+
+ if (currentIteration >= singleFrameIterations + 3) {
+ printf(" # | ms | description\n");
+ printf("------+---------+------------------------------------------------------------\n");
+
+ qSort(iterationTimes);
+
+ int sum = 0;
+ for (int i = 0; i < iterationTimes.size(); ++i) {
+ int delta = iterationTimes.at(i);
+ if (i > 0)
+ delta -= iterationTimes.at(i-1);
+ sum += delta;
+ qreal deltaF = delta / qreal(repeats);
+ printf("%.5d | %.5f | %s\n", i, deltaF, qPrintable(buffer.commandDescription(start + i)));
+ }
+ printf("Total | %.5f | Total frame time\n", sum / qreal(repeats));
+ deleteLater();
+ return;
+ }
+
+ if (start + currentCommand >= end) {
+ currentCommand = 1;
+ ++currentIteration;
+ if (currentIteration == 3) {
+ timer.start();
+ iterationTimes.fill(uint(-1));
+ }
+ if (currentIteration >= 3 && currentIteration < singleFrameIterations + 3)
+ printf("Profiling iteration %d of %d\n", currentIteration - 2, singleFrameIterations);
+ } else {
+ ++currentCommand;
+ }
+ }
+
+ return;
+ }
+
buffer.draw(&p, visibleUpdates.at(currentFrame));
++currentFrame;
@@ -138,11 +207,9 @@ void ReplayWidget::paintEvent(QPaintEvent *)
}
}
}
-
- QTimer::singleShot(0, this, SLOT(updateRect()));
}
-void ReplayWidget::resizeEvent(QResizeEvent *event)
+void ReplayWidget::resizeEvent(QResizeEvent *)
{
visibleUpdates.clear();
@@ -162,13 +229,15 @@ void ReplayWidget::resizeEvent(QResizeEvent *event)
}
-ReplayWidget::ReplayWidget(const QString &filename_, int from_, int to_, bool single_)
+ReplayWidget::ReplayWidget(const QString &filename_, int from_, int to_, bool single_, int frame_)
: currentFrame(0)
, currentIteration(0)
, filename(filename_)
, from(from_)
, to(to_)
, single(single_)
+ , frame(frame_)
+ , currentCommand(1)
{
setWindowTitle(filename);
QFile file(filename);
@@ -216,7 +285,8 @@ int main(int argc, char **argv)
printf("Usage:\n > %s [OPTIONS] [traceFile]\n", argv[0]);
printf("OPTIONS\n"
" --range=from-to to specify a frame range.\n"
- " --singlerun to do only one run (without statistics)\n");
+ " --singlerun to do only one run (without statistics)\n"
+ " --instrumentframe=frame to instrument a single frame\n");
return 1;
}
@@ -228,6 +298,8 @@ int main(int argc, char **argv)
bool single = false;
+ int frame = -1;
+
int from = 0;
int to = -1;
for (int i = 1; i < app.arguments().size() - 1; ++i) {
@@ -253,13 +325,22 @@ int main(int argc, char **argv)
}
} else if (arg == QLatin1String("--singlerun")) {
single = true;
+ } else if (arg.startsWith(QLatin1String("--instrumentframe="))) {
+ QString rest = arg.mid(18);
+ bool ok = false;
+ int frameCandidate = rest.toInt(&ok);
+ if (ok) {
+ frame = frameCandidate;
+ } else {
+ printf("ERROR: malformed syntax in argument %s\n", qPrintable(arg));
+ }
} else {
printf("Unrecognized argument: %s\n", qPrintable(arg));
return 1;
}
}
- ReplayWidget *widget = new ReplayWidget(app.arguments().last(), from, to, single);
+ ReplayWidget *widget = new ReplayWidget(app.arguments().last(), from, to, single, frame);
if (!widget->updates.isEmpty()) {
widget->show();