summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Kling <andreas.kling@nokia.com>2010-08-05 16:36:54 (GMT)
committerAndreas Kling <andreas.kling@nokia.com>2010-08-11 12:30:08 (GMT)
commit59f01a1efe818004fdc9265af6a0e9a05c43932a (patch)
tree8e47df2906d7a8de52a0629e2dbd599c6d1b05a4
parentc7fcd56bd3e150e57230ea1445bf4286b3f3fabe (diff)
downloadQt-59f01a1efe818004fdc9265af6a0e9a05c43932a.zip
Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.gz
Qt-59f01a1efe818004fdc9265af6a0e9a05c43932a.tar.bz2
64-bit versions of PREMUL, BYTE_MUL and INTERPOLATE_PIXEL_256
Corrected and enabled the 64-bit versions of these three functions. Speed improvements (tested on an i7): BYTE_MUL : 104.7% INTERPOLATE_PIXEL_256 : 13.2% PREMUL : 13.1% Reviewed-by: Samuel Rødal
-rw-r--r--src/gui/painting/qdrawhelper_p.h140
1 files changed, 67 insertions, 73 deletions
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index 1a87127..d04c70d 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -314,18 +314,61 @@ struct QSpanData
void adjustSpanMethods();
};
+#if defined(Q_CC_RVCT)
+# pragma push
+# pragma arm
+#endif
+Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
+ uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
+ t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
+ t &= 0xff00ff;
-Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16(uint x, uint a) {
- a += 1;
- uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0;
- t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f;
- return t;
+ x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
+ x = (x + ((x >> 8) & 0xff00ff) + 0x800080);
+ x &= 0xff00ff00;
+ x |= t;
+ return x;
}
+#if defined(Q_CC_RVCT)
+# pragma pop
+#endif
-Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16_32(uint x, uint a) {
- uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0;
- t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f;
- return t;
+#if QT_POINTER_SIZE == 8 // 64-bit versions
+
+Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+ quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
+ t += (((quint64(y)) | ((quint64(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
+ t >>= 8;
+ t &= 0x00ff00ff00ff00ff;
+ return (uint(t)) | (uint(t >> 24));
+}
+
+Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a) {
+ quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
+ t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
+ t &= 0x00ff00ff00ff00ff;
+ return (uint(t)) | (uint(t >> 24));
+}
+
+Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) {
+ uint a = x >> 24;
+ quint64 t = (((quint64(x)) | ((quint64(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
+ t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080) >> 8;
+ t &= 0x000000ff00ff00ff;
+ return (uint(t)) | (uint(t >> 24)) | (a << 24);
+}
+
+#else // 32-bit versions
+
+Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
+ uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
+ t >>= 8;
+ t &= 0xff00ff;
+
+ x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
+ x &= 0xff00ff00;
+ x |= t;
+ return x;
}
#if defined(Q_CC_RVCT)
@@ -359,6 +402,21 @@ Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) {
x |= t | (a << 24);
return x;
}
+#endif
+
+
+Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16(uint x, uint a) {
+ a += 1;
+ uint t = (((x & 0x07e0)*a) >> 8) & 0x07e0;
+ t |= (((x & 0xf81f)*(a>>2)) >> 6) & 0xf81f;
+ return t;
+}
+
+Q_STATIC_INLINE_FUNCTION uint BYTE_MUL_RGB16_32(uint x, uint a) {
+ uint t = (((x & 0xf81f07e0) >> 5)*a) & 0xf81f07e0;
+ t |= (((x & 0x07e0f81f)*a) >> 5) & 0x07e0f81f;
+ return t;
+}
#define INV_PREMUL(p) \
(qAlpha(p) == 0 ? 0 : \
@@ -1847,70 +1905,6 @@ inline int qBlue565(quint16 rgb) {
return (b << 3) | (b >> 2);
}
-#if 1
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
- uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
- t >>= 8;
- t &= 0xff00ff;
-
- x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
- x &= 0xff00ff00;
- x |= t;
- return x;
-}
-
-#if defined(Q_CC_RVCT)
-# pragma push
-# pragma arm
-#endif
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
- uint t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;
- t = (t + ((t >> 8) & 0xff00ff) + 0x800080) >> 8;
- t &= 0xff00ff;
-
- x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;
- x = (x + ((x >> 8) & 0xff00ff) + 0x800080);
- x &= 0xff00ff00;
- x |= t;
- return x;
-}
-#if defined(Q_CC_RVCT)
-# pragma pop
-#endif
-#else
-// possible implementation for 64 bit
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_256(uint x, uint a, uint y, uint b) {
- ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
- t += (((ulong(y)) | ((ulong(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
- t >>= 8;
- t &= 0x00ff00ff00ff00ff;
- return (uint(t)) | (uint(t >> 24));
-}
-
-Q_STATIC_INLINE_FUNCTION uint INTERPOLATE_PIXEL_255(uint x, uint a, uint y, uint b) {
- ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
- t += (((ulong(y)) | ((ulong(y)) << 24)) & 0x00ff00ff00ff00ff) * b;
- t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080);
- t &= 0x00ff00ff00ff00ff;
- return (uint(t)) | (uint(t >> 24));
-}
-
-Q_STATIC_INLINE_FUNCTION uint BYTE_MUL(uint x, uint a) {
- ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
- t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080);
- t &= 0x00ff00ff00ff00ff;
- return (uint(t)) | (uint(t >> 24));
-}
-
-Q_STATIC_INLINE_FUNCTION uint PREMUL(uint x) {
- uint a = x >> 24;
- ulong t = (((ulong(x)) | ((ulong(x)) << 24)) & 0x00ff00ff00ff00ff) * a;
- t = (t + ((t >> 8) & 0xff00ff00ff00ff) + 0x80008000800080);
- t &= 0x00ff00ff00ff00ff;
- return (uint(t)) | (uint(t >> 24)) | 0xff000000;
-}
-#endif
-
const uint qt_bayer_matrix[16][16] = {
{ 0x1, 0xc0, 0x30, 0xf0, 0xc, 0xcc, 0x3c, 0xfc,
0x3, 0xc3, 0x33, 0xf3, 0xf, 0xcf, 0x3f, 0xff},