Merge branch '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-2 into 4.7-integration

* '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-2: Corrected BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2 Used aligned load and store when possible for the blending of ARGB32
author: Qt Continuous Integration System <qt-info@nokia.com> 2010-07-18 23:33:45 (GMT)
committer: Qt Continuous Integration System <qt-info@nokia.com> 2010-07-18 23:33:45 (GMT)
commit: e7b78d2225c5338545852330c9160084f12ffe1f (patch)
tree: 663fa0923182e25b5ba613a831eb7c6a517ebaf7 /src/gui
parent: c79ad5443835a5be9057de5e1553786dde46f65d (diff)
parent: 23ea4340a622cbfed81eb7afb2e09ec64b0ebef8 (diff)
download: Qt-e7b78d2225c5338545852330c9160084f12ffe1f.zip
Qt-e7b78d2225c5338545852330c9160084f12ffe1f.tar.gz
Qt-e7b78d2225c5338545852330c9160084f12ffe1f.tar.bz2
1 files changed, 28 insertions, 5 deletions
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 3c96946..18355c2 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -141,12 +141,24 @@ QT_BEGIN_NAMESPACE
 // with shortcuts if fully opaque or fully transparent.
 #define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
     int x = 0; \
+\
+    /* First, get dst aligned. */ \
+    const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;\
+    const int prologLength = qMin(length, offsetToAlignOn16Bytes);\
+    for (; x < prologLength; ++x) { \
+        uint s = src[x]; \
+        if (s >= 0xff000000) \
+            dst[x] = s; \
+        else if (s != 0) \
+            dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
+    } \
+\
     for (; x < length-3; x += 4) { \
         const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
         const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
         if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
             /* all opaque */ \
-            _mm_storeu_si128((__m128i *)&dst[x], srcVector); \
+            _mm_store_si128((__m128i *)&dst[x], srcVector); \
         } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
             /* not fully transparent */ \
             /* extract the alpha channel on 2 x 16 bits */ \
@@ -157,13 +169,13 @@ QT_BEGIN_NAMESPACE
             alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
             alphaChannel = _mm_sub_epi16(one, alphaChannel); \
  \
-            const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
+            const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
             __m128i destMultipliedByOneMinusAlpha; \
             BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
  \
             /* result = s + d * (1-alpha) */\
             const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
-            _mm_storeu_si128((__m128i *)&dst[x], result); \
+            _mm_store_si128((__m128i *)&dst[x], result); \
         } \
     } \
     for (; x < length; ++x) { \
@@ -189,6 +201,17 @@ QT_BEGIN_NAMESPACE
 #define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \
 { \
     int x = 0; \
+\
+    const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;\
+    const int prologLength = qMin(length, offsetToAlignOn16Bytes);\
+    for (; x < prologLength; ++x) { \
+        quint32 s = src[x]; \
+        if (s != 0) { \
+            s = BYTE_MUL(s, const_alpha); \
+            dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
+        } \
+    } \
+\
     for (; x < length-3; x += 4) { \
         __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
         if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \
@@ -198,12 +221,12 @@ QT_BEGIN_NAMESPACE
             alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
             alphaChannel = _mm_sub_epi16(one, alphaChannel); \
  \
-            const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
+            const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
             __m128i destMultipliedByOneMinusAlpha; \
             BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
  \
             const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
-            _mm_storeu_si128((__m128i *)&dst[x], result); \
+            _mm_store_si128((__m128i *)&dst[x], result); \
         } \
     } \
     for (; x < length; ++x) { \
author	Qt Continuous Integration System <qt-info@nokia.com>	2010-07-18 23:33:45 (GMT)
committer	Qt Continuous Integration System <qt-info@nokia.com>	2010-07-18 23:33:45 (GMT)
commit	e7b78d2225c5338545852330c9160084f12ffe1f (patch)
tree	663fa0923182e25b5ba613a831eb7c6a517ebaf7 /src/gui
parent	c79ad5443835a5be9057de5e1553786dde46f65d (diff)
parent	23ea4340a622cbfed81eb7afb2e09ec64b0ebef8 (diff)
download	Qt-e7b78d2225c5338545852330c9160084f12ffe1f.zip Qt-e7b78d2225c5338545852330c9160084f12ffe1f.tar.gz Qt-e7b78d2225c5338545852330c9160084f12ffe1f.tar.bz2