summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gui/painting/qdrawingprimitive_sse2_p.h34
1 files changed, 5 insertions, 29 deletions
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 65292bc..3c96946 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -43,7 +43,6 @@
#define QDRAWINGPRIMITIVE_SSE2_P_H
#include <private/qsimd_p.h>
-#include <stdint.h>
#ifdef QT_HAVE_SSE2
@@ -142,24 +141,12 @@ QT_BEGIN_NAMESPACE
// with shortcuts if fully opaque or fully transparent.
#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
int x = 0; \
-\
- /* First, get dst aligned. */ \
- const int offsetToAlignOn16Bytes = (4 - (reinterpret_cast<uintptr_t>(dst) >> 2 & 0x3)) & 0x3;\
- const int prologLength = qMin(length, offsetToAlignOn16Bytes);\
- for (; x < prologLength; ++x) { \
- uint s = src[x]; \
- if (s >= 0xff000000) \
- dst[x] = s; \
- else if (s != 0) \
- dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
- } \
-\
for (; x < length-3; x += 4) { \
const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
/* all opaque */ \
- _mm_store_si128((__m128i *)&dst[x], srcVector); \
+ _mm_storeu_si128((__m128i *)&dst[x], srcVector); \
} else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
/* not fully transparent */ \
/* extract the alpha channel on 2 x 16 bits */ \
@@ -170,13 +157,13 @@ QT_BEGIN_NAMESPACE
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
\
- const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
+ const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
__m128i destMultipliedByOneMinusAlpha; \
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
\
/* result = s + d * (1-alpha) */\
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
- _mm_store_si128((__m128i *)&dst[x], result); \
+ _mm_storeu_si128((__m128i *)&dst[x], result); \
} \
} \
for (; x < length; ++x) { \
@@ -202,17 +189,6 @@ QT_BEGIN_NAMESPACE
#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \
{ \
int x = 0; \
-\
- const int offsetToAlignOn16Bytes = (4 - (reinterpret_cast<uintptr_t>(dst) >> 2 & 0x3)) & 0x3;\
- const int prologLength = qMin(length, offsetToAlignOn16Bytes);\
- for (; x < prologLength; ++x) { \
- uint s = src[x]; \
- if (s >= 0xff000000) \
- dst[x] = s; \
- else if (s != 0) \
- dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
- } \
-\
for (; x < length-3; x += 4) { \
__m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \
@@ -222,12 +198,12 @@ QT_BEGIN_NAMESPACE
alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
alphaChannel = _mm_sub_epi16(one, alphaChannel); \
\
- const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \
+ const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
__m128i destMultipliedByOneMinusAlpha; \
BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
\
const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
- _mm_store_si128((__m128i *)&dst[x], result); \
+ _mm_storeu_si128((__m128i *)&dst[x], result); \
} \
} \
for (; x < length; ++x) { \