diff options
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 9 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 55 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 1 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 24 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 16 |
5 files changed, 83 insertions, 22 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 276da93..0b74fc0 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -1757,9 +1757,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Plus_impl(uint *dest, int for (int i = 0; i < length; ++i) { PRELOAD_COND(dest) uint d = dest[i]; -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - d = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX + d = comp_func_Plus_one_pixel(d, s); coverage.store(&dest[i], d); } } @@ -1781,9 +1779,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Plus_impl(uint *dest, const uin uint d = dest[i]; uint s = src[i]; -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - d = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX + d = comp_func_Plus_one_pixel(d, s); coverage.store(&dest[i], d); } @@ -7911,6 +7907,7 @@ void qInitDrawhelperAsm() functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon; functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon; + functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon; destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index 03fe075..c1f815d 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -622,6 +622,61 @@ void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, u } } +void QT_FASTCALL comp_func_Plus_neon(uint *dst, const uint *src, int length, uint const_alpha) +{ + if (const_alpha == 255) { + uint *const end = dst + length; + uint *const neonEnd = end - 3; + + while (dst < neonEnd) { + asm volatile ( + "vld2.8 { d0, d1 }, [%[SRC]] !\n\t" + "vld2.8 { d2, d3 }, [%[DST]]\n\t" + "vqadd.u8 q0, q0, q1\n\t" + "vst2.8 { d0, d1 }, [%[DST]] !\n\t" + : [DST]"+r" (dst), [SRC]"+r" (src) + : + : "memory", "d0", "d1", "d2", "d3", "q0", "q1" + ); + } + + while (dst != end) { + *dst = comp_func_Plus_one_pixel(*dst, *src); + ++dst; + ++src; + } + } else { + int x = 0; + const int one_minus_const_alpha = 255 - const_alpha; + const uint16x8_t constAlphaVector = vdupq_n_u16(const_alpha); + const uint16x8_t oneMinusconstAlphaVector = vdupq_n_u16(one_minus_const_alpha); + + const uint16x8_t half = vdupq_n_u16(0x80); + for (; x < length - 3; x += 4) { + const uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]); + const uint8x16_t src8 = vreinterpretq_u8_u32(src32); + uint8x16_t dst8 = vld1q_u8((uint8_t *)&dst[x]); + uint8x16_t result = vqaddq_u8(dst8, src8); + + uint16x8_t result_low = vmovl_u8(vget_low_u8(result)); + uint16x8_t result_high = vmovl_u8(vget_high_u8(result)); + + uint16x8_t dst_low = vmovl_u8(vget_low_u8(dst8)); + uint16x8_t dst_high = vmovl_u8(vget_high_u8(dst8)); + + result_low = qvinterpolate_pixel_255(result_low, constAlphaVector, dst_low, oneMinusconstAlphaVector, half); + result_high = qvinterpolate_pixel_255(result_high, constAlphaVector, dst_high, oneMinusconstAlphaVector, half); + + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result_high)); + vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high)); + } + + for (; x < length; ++x) + dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); + } +} + static const int tileSize = 32; extern "C" void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count); diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index cd2dbfc..182c936 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -131,6 +131,7 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length); void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha); +void QT_FASTCALL comp_func_Plus_neon(uint *dst, const uint *src, int length, uint const_alpha); #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index d04c70d..75f42a0 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -1944,6 +1944,30 @@ const uint qt_bayer_matrix[16][16] = { ((((argb >> 24) * alpha) >> 8) << 24) | (argb & 0x00ffffff) +#if QT_POINTER_SIZE == 8 // 64-bit versions +#define AMIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) +#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) +#else // 32 bits +// The mask for alpha can overflow over 32 bits +#define AMIX(mask) quint32(qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) +#define MIX(mask) (qMin(((quint32(s)&mask) + (quint32(d)&mask)), quint32(mask))) +#endif + +inline int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha) +{ + const int result = (AMIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); + return INTERPOLATE_PIXEL_255(result, const_alpha, d, one_minus_const_alpha); +} + +inline int comp_func_Plus_one_pixel(uint d, const uint s) +{ + const int result = (AMIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); + return result; +} + +#undef MIX +#undef AMIX + // prototypes of all the composition functions void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha); void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha); diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 22c0384..30454af 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -161,22 +161,6 @@ void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixe } } -inline int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha) -{ -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX - return INTERPOLATE_PIXEL_255(result, const_alpha, d, one_minus_const_alpha); -} - -inline int comp_func_Plus_one_pixel(uint d, const uint s) -{ -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX - return result; -} - void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) { int x = 0; |