summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-05 11:43:19 (GMT)
committerBenjamin Poulain <benjamin.poulain@nokia.com>2010-08-05 11:56:48 (GMT)
commit15505360dc8e8b8f1343bba6fd3e5a9c95718d30 (patch)
tree34feda5557418bd094b8acfd295c2eb30fef294a
parent637d207e397c13c09a8dcbd718ee85bce2548e90 (diff)
downloadQt-15505360dc8e8b8f1343bba6fd3e5a9c95718d30.zip
Qt-15505360dc8e8b8f1343bba6fd3e5a9c95718d30.tar.gz
Qt-15505360dc8e8b8f1343bba6fd3e5a9c95718d30.tar.bz2
Replace the SSE prologues by a macro
Replace the code of the SSE prologue by a macro to avoid copying the prologue everywhere. Reviewed-by: Andreas Kling
-rw-r--r--src/corelib/tools/qsimd_p.h4
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp14
-rw-r--r--src/gui/painting/qdrawingprimitive_sse2_p.h8
3 files changed, 9 insertions, 17 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 5ff0f97..a3148fb 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -150,7 +150,9 @@ enum CPUFeatures {
Q_CORE_EXPORT uint qDetectCPUFeatures();
-Q_CORE_EXPORT uint qDetectCPUFeatures();
+
+#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
+ for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)
QT_END_NAMESPACE
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index 7ab9eda..22c0384 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -112,9 +112,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
int x = 0;
// First, align dest to 16 bytes:
- const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
- const int prologLength = qMin(w, offsetToAlignOn16Bytes);
- for (; x < prologLength; ++x) {
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) {
quint32 s = src[x];
s = BYTE_MUL(s, const_alpha);
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha);
@@ -182,12 +180,10 @@ inline int comp_func_Plus_one_pixel(uint d, const uint s)
void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha)
{
int x = 0;
- const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
- const int prologLength = qMin(length, offsetToAlignOn16Bytes);
if (const_alpha == 255) {
// 1) Prologue: align destination on 16 bytes
- for (; x < prologLength; ++x)
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]);
// 2) composition with SSE2
@@ -208,7 +204,7 @@ void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uin
const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha);
// 1) Prologue: align destination on 16 bytes
- for (; x < prologLength; ++x)
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha);
const __m128i half = _mm_set1_epi16(0x80);
@@ -239,9 +235,7 @@ void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, u
int x = 0;
// 1) prologue, align on 16 bytes
- const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;
- const int prologLength = qMin(length, offsetToAlignOn16Bytes);
- for (; x < prologLength; ++x)
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length)
dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha);
// 2) interpolate pixels with SSE2
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 18355c2..d8f6bf5 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -143,9 +143,7 @@ QT_BEGIN_NAMESPACE
int x = 0; \
\
/* First, get dst aligned. */ \
- const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;\
- const int prologLength = qMin(length, offsetToAlignOn16Bytes);\
- for (; x < prologLength; ++x) { \
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
uint s = src[x]; \
if (s >= 0xff000000) \
dst[x] = s; \
@@ -202,9 +200,7 @@ QT_BEGIN_NAMESPACE
{ \
int x = 0; \
\
- const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;\
- const int prologLength = qMin(length, offsetToAlignOn16Bytes);\
- for (; x < prologLength; ++x) { \
+ ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \
quint32 s = src[x]; \
if (s != 0) { \
s = BYTE_MUL(s, const_alpha); \