64-bit versions of PREMUL, BYTE_MUL and INTERPOLATE_PIXEL_256

Corrected and enabled the 64-bit versions of these three functions. Speed improvements (tested on an i7): BYTE_MUL : 104.7% INTERPOLATE_PIXEL_256 : 13.2% PREMUL : 13.1% Reviewed-by: Samuel Rødal
author: Andreas Kling <andreas.kling@nokia.com> 2010-08-05 16:36:54 (GMT)
committer: Andreas Kling <andreas.kling@nokia.com> 2010-08-11 12:30:08 (GMT)
commit: 59f01a1efe818004fdc9265af6a0e9a05c43932a (patch)
tree: 8e47df2906d7a8de52a0629e2dbd599c6d1b05a4
parent: c7fcd56bd3e150e57230ea1445bf4286b3f3fabe (diff)
download: Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.zip
Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.gz
Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.bz2
1 files changed, 67 insertions, 73 deletions
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 1a87127..d04c70d 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -314,18 +314,61 @@ struct QSpanData
     void adjustSpanMethods();
 };
 
+#if defined(Q_CC_RVCT)
+#  pragma push
+#  pragma arm
+#endif
+Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
+    uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
+    t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
+    t &= 0xff00ff;
 
-Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16(uint x, uint a) {
-    a += 1;
-    uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0;
-    t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f;
-    return t;
+    x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
+    x = (x + ((x >> 8) & 0xff00ff) + 0x800080);
+    x &= 0xff00ff00;
+    x |= t;
+    return x;
 }
+#if defined(Q_CC_RVCT)
+#  pragma pop
+#endif
 
-Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16_32(uint x, uint a) {
-    uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0;
-    t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f;
-    return t;
+#if QT_POINTER_SIZE == 8 // 64-bit versions
+
+Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+    quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
+    t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
+    t >>= 8;
+    t &= 0x00ff00ff00ff00ff;
+    return (uint(t)) | (uint(t >> 24));
+}
+
+Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a) {
+    quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
+    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
+    t &= 0x00ff00ff00ff00ff;
+    return (uint(t)) | (uint(t >> 24));
+}
+
+Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) {
+    uint a = x >> 24;
+    quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
+    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
+    t &= 0x000000ff00ff00ff;
+    return (uint(t)) | (uint(t >> 24)) | (a << 24);
+}
+
+#else // 32-bit versions
+
+Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+    uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
+    t >>= 8;
+    t &= 0xff00ff;
+
+    x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
+    x &= 0xff00ff00;
+    x |= t;
+    return x;
 }
 
 #if defined(Q_CC_RVCT)
@@ -359,6 +402,21 @@ Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) {
     x |= t | (a << 24);
     return x;
 }
+#endif
+
+
+Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16(uint x, uint a) {
+    a += 1;
+    uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0;
+    t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f;
+    return t;
+}
+
+Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16_32(uint x, uint a) {
+    uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0;
+    t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f;
+    return t;
+}
 
 #define INV_PREMUL(p)                                   \
     (qAlpha(p) == 0 ? 0 :                               \
@@ -1847,70 +1905,6 @@ inline int qBlue565(quint16 rgb) {
     return (b << 3) | (b >> 2);
 }
 
-#if 1
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
-    uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
-    t >>= 8;
-    t &= 0xff00ff;
-
-    x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
-    x &= 0xff00ff00;
-    x |= t;
-    return x;
-}
-
-#if defined(Q_CC_RVCT)
-#  pragma push
-#  pragma arm
-#endif
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
-    uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
-    t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
-    t &= 0xff00ff;
-
-    x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
-    x = (x + ((x >> 8) & 0xff00ff) + 0x800080);
-    x &= 0xff00ff00;
-    x |= t;
-    return x;
-}
-#if defined(Q_CC_RVCT)
-#  pragma pop
-#endif
-#else
-// possible implementation for 64 bit
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
-    ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
-    t += (((ulong(y)) | ((ulong(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
-    t >>= 8;
-    t &= 0x00ff00ff00ff00ff;
-    return (uint(t)) | (uint(t >> 24));
-}
-
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
-    ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
-    t += (((ulong(y)) | ((ulong(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
-    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080);
-    t &= 0x00ff00ff00ff00ff;
-    return (uint(t)) | (uint(t >> 24));
-}
-
-Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a) {
-    ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
-    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080);
-    t &= 0x00ff00ff00ff00ff;
-    return (uint(t)) | (uint(t >> 24));
-}
-
-Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) {
-    uint a = x >> 24;
-    ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
-    t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080);
-    t &= 0x00ff00ff00ff00ff;
-    return (uint(t)) | (uint(t >> 24)) | 0xff000000;
-}
-#endif
-
 const uint qt_bayer_matrix[16][16] = {
     { 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc,
       0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff},
author	Andreas Kling <andreas.kling@nokia.com>	2010-08-05 16:36:54 (GMT)
committer	Andreas Kling <andreas.kling@nokia.com>	2010-08-11 12:30:08 (GMT)
commit	59f01a1efe818004fdc9265af6a0e9a05c43932a (patch)
tree	8e47df2906d7a8de52a0629e2dbd599c6d1b05a4
parent	c7fcd56bd3e150e57230ea1445bf4286b3f3fabe (diff)
download	Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.zip Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.gz Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.bz2