From 15505360dc8e8b8f1343bba6fd3e5a9c95718d30 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Thu, 5 Aug 2010 13:43:19 +0200 Subject: Replace the SSE prologues by a macro Replace the code of the SSE prologue by a macro to avoid copying the prologue everywhere. Reviewed-by: Andreas Kling --- src/corelib/tools/qsimd_p.h | 4 +++- src/gui/painting/qdrawhelper_sse2.cpp | 14 ++++---------- src/gui/painting/qdrawingprimitive_sse2_p.h | 8 ++------ 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 5ff0f97..a3148fb 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -150,7 +150,9 @@ enum CPUFeatures { Q_CORE_EXPORT uint qDetectCPUFeatures(); -Q_CORE_EXPORT uint qDetectCPUFeatures(); + +#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \ + for (; i < static_cast(qMin(static_cast(length), ((4 - ((reinterpret_cast(ptr) >> 2) & 0x3)) & 0x3))); ++i) QT_END_NAMESPACE diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 7ab9eda..22c0384 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -112,9 +112,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, int x = 0; // First, align dest to 16 bytes: - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3; - const int prologLength = qMin(w, offsetToAlignOn16Bytes); - for (; x < prologLength; ++x) { + ALIGNMENT_PROLOGUE_16BYTES(dst, x, w) { quint32 s = src[x]; s = BYTE_MUL(s, const_alpha); dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); @@ -182,12 +180,10 @@ inline int comp_func_Plus_one_pixel(uint d, const uint s) void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) { int x = 0; - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3; - const int prologLength = qMin(length, offsetToAlignOn16Bytes); if (const_alpha == 255) { // 1) Prologue: align destination on 16 bytes - for (; x < prologLength; ++x) + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) dst[x] = comp_func_Plus_one_pixel(dst[x], src[x]); // 2) composition with SSE2 @@ -208,7 +204,7 @@ void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uin const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); // 1) Prologue: align destination on 16 bytes - for (; x < prologLength; ++x) + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); const __m128i half = _mm_set1_epi16(0x80); @@ -239,9 +235,7 @@ void QT_FASTCALL comp_func_Source_sse2(uint *dst, const uint *src, int length, u int x = 0; // 1) prologue, align on 16 bytes - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3; - const int prologLength = qMin(length, offsetToAlignOn16Bytes); - for (; x < prologLength; ++x) + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], ialpha); // 2) interpolate pixels with SSE2 diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index 18355c2..d8f6bf5 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -143,9 +143,7 @@ QT_BEGIN_NAMESPACE int x = 0; \ \ /* First, get dst aligned. */ \ - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3;\ - const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ - for (; x < prologLength; ++x) { \ + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ uint s = src[x]; \ if (s >= 0xff000000) \ dst[x] = s; \ @@ -202,9 +200,7 @@ QT_BEGIN_NAMESPACE { \ int x = 0; \ \ - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3;\ - const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ - for (; x < prologLength; ++x) { \ + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) { \ quint32 s = src[x]; \ if (s != 0) { \ s = BYTE_MUL(s, const_alpha); \ -- cgit v0.12