diff options
author | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-08-24 02:20:19 (GMT) |
---|---|---|
committer | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-08-25 09:07:12 (GMT) |
commit | d28f3ffadec209511d8d992e3b35fae155b3d1c5 (patch) | |
tree | ae6de5aa97c85c05136dff9547bc7089bbe8c7bb /src | |
parent | 9f5457110f86fc11995efd93729c563e0713bebc (diff) | |
download | Qt-d28f3ffadec209511d8d992e3b35fae155b3d1c5.zip Qt-d28f3ffadec209511d8d992e3b35fae155b3d1c5.tar.gz Qt-d28f3ffadec209511d8d992e3b35fae155b3d1c5.tar.bz2 |
Implement qt_memfill32 with Neon.
This patch introduce a implementation of qt_memfill32 with the Neon
instructions set from ARMv7.
The loop is unrolled 1 time to get better performance.
This implementation of memfill is 330% faster on the N900.
Reviewed-by: Samuel Rødal
Diffstat (limited to 'src')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 1 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 38 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 1 |
3 files changed, 40 insertions, 0 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 0b74fc0..be4275c 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7913,6 +7913,7 @@ void qInitDrawhelperAsm() qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon; qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon; + qt_memfill32 = qt_memfill32_neon; } #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index c1f815d..ed15c5c 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -51,6 +51,44 @@ QT_BEGIN_NAMESPACE +void qt_memfill32_neon(quint32 *dest, quint32 value, int count) +{ + const int epilogueSize = count % 16; + if (count >= 16) { + quint32 *const neonEnd = dest + count - epilogueSize; + register uint32x4_t valueVector1 asm ("q0") = vdupq_n_u32(value); + register uint32x4_t valueVector2 asm ("q1") = valueVector1; + while (dest != neonEnd) { + asm volatile ( + "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t" + "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t" + : [DST]"+r" (dest) + : [VALUE1]"w"(valueVector1), [VALUE2]"w"(valueVector2) + : "memory" + ); + } + } + + switch (epilogueSize) + { + case 15: *dest++ = value; + case 14: *dest++ = value; + case 13: *dest++ = value; + case 12: *dest++ = value; + case 11: *dest++ = value; + case 10: *dest++ = value; + case 9: *dest++ = value; + case 8: *dest++ = value; + case 7: *dest++ = value; + case 6: *dest++ = value; + case 5: *dest++ = value; + case 4: *dest++ = value; + case 3: *dest++ = value; + case 2: *dest++ = value; + case 1: *dest++ = value; + } +} + static inline uint16x8_t qvdiv_255_u16(uint16x8_t x, uint16x8_t half) { // result = (x + (x >> 8) + 0x80) >> 8 diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index 182c936..451edbc 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -120,6 +120,7 @@ void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); +void qt_memfill32_neon(quint32 *dest, quint32 value, int count); void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); |