diff options
author | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-08-05 11:43:19 (GMT) |
---|---|---|
committer | Benjamin Poulain <benjamin.poulain@nokia.com> | 2010-08-05 11:56:48 (GMT) |
commit | 15505360dc8e8b8f1343bba6fd3e5a9c95718d30 (patch) | |
tree | 34feda5557418bd094b8acfd295c2eb30fef294a | |
parent | 637d207e397c13c09a8dcbd718ee85bce2548e90 (diff) | |
download | Qt-15505360dc8e8b8f1343bba6fd3e5a9c95718d30.zip Qt-15505360dc8e8b8f1343bba6fd3e5a9c95718d30.tar.gz Qt-15505360dc8e8b8f1343bba6fd3e5a9c95718d30.tar.bz2 |
Replace the SSE prologues by a macro
Replace the code of the SSE prologue by a macro to avoid copying the
prologue everywhere.
Reviewed-by: Andreas Kling
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 4 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 14 | ||||
-rw-r--r-- | src/gui/painting/qdrawingprimitive_sse2_p.h | 8 |
3 files changed, 9 insertions, 17 deletions
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 5ff0f97..a3148fb 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -150,7 +150,9 @@ enum CPUFeatures { Q_CORE_EXPORT uint qDetectCPUFeatures(); -Q_CORE_EXPORT uint qDetectCPUFeatures(); + +#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ + for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i) QT_END_NAMESPACE diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 7ab9eda..22c0384 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -112,9 +112,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, int x = 0; // First, align dest to 16 bytes: - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3; - const int prologLength = qMin(w, offsetToAlignOn16Bytes); - for (; x < prologLength; ++x) { + ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) { quint32 s = src[x]; s = BYTE_MUL(s, const_alpha); dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); @@ -182,12 +180,10 @@ inline int comp_func_Plus_one_pixel(uint d, const uint s) void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) { int x = 0; - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3; - const int prologLength = qMin(length, offsetToAlignOn16Bytes); if (const_alpha == 255) { // 1) Prologue: align destination on 16 bytes - for (; x < prologLength; ++x) + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); // 2) composition with SSE2 @@ -208,7 +204,7 @@ void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uin const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); // 1) Prologue: align destination on 16 bytes - for (; x < prologLength; ++x) + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); const __m128i half = _mm_set1_epi16(0x80); @@ -239,9 +235,7 @@ void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, u int x = 0; // 1) prologue, align on 16 bytes - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3; - const int prologLength = qMin(length, offsetToAlignOn16Bytes); - for (; x < prologLength; ++x) + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); // 2) interpolate pixels with SSE2 diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index 18355c2..d8f6bf5 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -143,9 +143,7 @@ QT_BEGIN_NAMESPACE int x = 0; \ \ /* First, get dst aligned. */ \ - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;\ - const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ - for (; x < prologLength; ++x) { \ + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ uint s = src[x]; \ if (s >= 0xff000000) \ dst[x] = s; \ @@ -202,9 +200,7 @@ QT_BEGIN_NAMESPACE { \ int x = 0; \ \ - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;\ - const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ - for (; x < prologLength; ++x) { \ + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ quint32 s = src[x]; \ if (s != 0) { \ s = BYTE_MUL(s, const_alpha); \ |