diff options
author | Andreas Kling <andreas.kling@nokia.com> | 2010-08-05 16:36:54 (GMT) |
---|---|---|
committer | Andreas Kling <andreas.kling@nokia.com> | 2010-08-11 12:30:08 (GMT) |
commit | 59f01a1efe818004fdc9265af6a0e9a05c43932a (patch) | |
tree | 8e47df2906d7a8de52a0629e2dbd599c6d1b05a4 | |
parent | c7fcd56bd3e150e57230ea1445bf4286b3f3fabe (diff) | |
download | Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.zip Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.gz Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.bz2 |
64-bit versions of PREMUL, BYTE_MUL and INTERPOLATE_PIXEL_256
Corrected and enabled the 64-bit versions of these three functions.
Speed improvements (tested on an i7):
BYTE_MUL : 104.7%
INTERPOLATE_PIXEL_256 : 13.2%
PREMUL : 13.1%
Reviewed-by: Samuel Rødal
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 140 |
1 files changed, 67 insertions, 73 deletions
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 1a87127..d04c70d 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -314,18 +314,61 @@ struct QSpanData void adjustSpanMethods(); }; +#if defined(Q_CC_RVCT) +# pragma push +# pragma arm +#endif +Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) { + uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b; + t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8; + t &= 0xff00ff; -Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16(uint x, uint a) { - a += 1; - uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0; - t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f; - return t; + x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b; + x = (x + ((x >> 8) & 0xff00ff) + 0x800080); + x &= 0xff00ff00; + x |= t; + return x; } +#if defined(Q_CC_RVCT) +# pragma pop +#endif -Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16_32(uint x, uint a) { - uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0; - t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f; - return t; +#if QT_POINTER_SIZE == 8 // 64-bit versions + +Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) { + quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a; + t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b; + t >>= 8; + t &= 0x00ff00ff00ff00ff; + return (uint(t)) | (uint(t >> 24)); +} + +Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a) { + quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a; + t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8; + t &= 0x00ff00ff00ff00ff; + return (uint(t)) | (uint(t >> 24)); +} + +Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) { + uint a = x >> 24; + quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a; + t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8; + t &= 0x000000ff00ff00ff; + return (uint(t)) | (uint(t >> 24)) | (a << 24); +} + +#else // 32-bit versions + +Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) { + uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b; + t >>= 8; + t &= 0xff00ff; + + x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b; + x &= 0xff00ff00; + x |= t; + return x; } #if defined(Q_CC_RVCT) @@ -359,6 +402,21 @@ Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) { x |= t | (a << 24); return x; } +#endif + + +Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16(uint x, uint a) { + a += 1; + uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0; + t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f; + return t; +} + +Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16_32(uint x, uint a) { + uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0; + t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f; + return t; +} #define INV_PREMUL(p) \ (qAlpha(p) == 0 ? 0 : \ @@ -1847,70 +1905,6 @@ inline int qBlue565(quint16 rgb) { return (b << 3) | (b >> 2); } -#if 1 -Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) { - uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b; - t >>= 8; - t &= 0xff00ff; - - x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b; - x &= 0xff00ff00; - x |= t; - return x; -} - -#if defined(Q_CC_RVCT) -# pragma push -# pragma arm -#endif -Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) { - uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b; - t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8; - t &= 0xff00ff; - - x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b; - x = (x + ((x >> 8) & 0xff00ff) + 0x800080); - x &= 0xff00ff00; - x |= t; - return x; -} -#if defined(Q_CC_RVCT) -# pragma pop -#endif -#else -// possible implementation for 64 bit -Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) { - ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a; - t += (((ulong(y)) | ((ulong(y)) << 24)) & 0x00ff00ff00ff00ff) * b; - t >>= 8; - t &= 0x00ff00ff00ff00ff; - return (uint(t)) | (uint(t >> 24)); -} - -Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) { - ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a; - t += (((ulong(y)) | ((ulong(y)) << 24)) & 0x00ff00ff00ff00ff) * b; - t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080); - t &= 0x00ff00ff00ff00ff; - return (uint(t)) | (uint(t >> 24)); -} - -Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a) { - ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a; - t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080); - t &= 0x00ff00ff00ff00ff; - return (uint(t)) | (uint(t >> 24)); -} - -Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) { - uint a = x >> 24; - ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a; - t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080); - t &= 0x00ff00ff00ff00ff; - return (uint(t)) | (uint(t >> 24)) | 0xff000000; -} -#endif - const uint qt_bayer_matrix[16][16] = { { 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc, 0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff}, |