From 5cfa764466be6ec2b987e0694b99f1d343d55048 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Wed, 23 Jun 2010 22:22:56 +0200 Subject: Add an implementation of comp_func_solid_SourceOver_neon() with Neon. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function comp_func_solid_SourceOver_neon() is use extensively by WebKit via the calls to fillRect() of QPainter(). Implementing the function with Neon provides some performance improvement (around 175% of the previous speed). Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper.cpp | 1 + src/gui/painting/qdrawhelper_neon.cpp | 43 +++++++++++++++++++++++++++++++++++ src/gui/painting/qdrawhelper_neon_p.h | 2 ++ 3 files changed, 46 insertions(+) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index f08c090..5727b3c 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7898,6 +7898,7 @@ void qInitDrawhelperAsm() qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon; functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon; + functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon; destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; } diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index ee5f24a..3ce90d2 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -579,6 +579,49 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int } } +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + QT_MEMFILL_UINT(destPixels, length, color); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + uint32_t *dst = (uint32_t *) destPixels; + const uint32x4_t colorVector = vdupq_n_u32(color); + uint16x8_t half = vdupq_n_u16(0x80); + const uint16x8_t minusAlphaOfColorVector = vdupq_n_u16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + uint32x4_t dstVector = vld1q_u32(&dst[x]); + + const uint8x16_t dst8 = vreinterpretq_u8_u32(dstVector); + + const uint8x8_t dst8_low = vget_low_u8(dst8); + const uint8x8_t dst8_high = vget_high_u8(dst8); + + const uint16x8_t dst16_low = vmovl_u8(dst8_low); + const uint16x8_t dst16_high = vmovl_u8(dst8_high); + + const uint16x8_t result16_low = qvbyte_mul_u16(dst16_low, minusAlphaOfColorVector, half); + const uint16x8_t result16_high = qvbyte_mul_u16(dst16_high, minusAlphaOfColorVector, half); + + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); + + uint32x4_t blendedPixels = vcombine_u32(result32_low, result32_high); + uint32x4_t colorPlusBlendedPixels = vaddq_u32(colorVector, blendedPixels); + vst1q_u32(&dst[x], colorPlusBlendedPixels); + } + + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index d6a4509..c054a1e 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -127,6 +127,8 @@ uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length); +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha); + #endif // QT_HAVE_NEON QT_END_NAMESPACE -- cgit v0.12