From 5b74a70ac630073582be56f8a0539624a1080185 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B8dal?= Date: Wed, 13 Apr 2011 10:15:06 +0200 Subject: Improved gradient table generation performance for two-stop gradients. Two stops is a fairly common case so we gain quite a bit by special casing it. Improves performance by 10 % in parcycle benchmark, and by 90 % in a synthetic benchmark. Reviewed-by: Andreas Kling --- src/gui/painting/qdrawhelper.cpp | 5 ++ src/gui/painting/qdrawhelper_neon.cpp | 38 ++++++++++++ src/gui/painting/qdrawhelper_p.h | 73 ++++++++++++++++++++++ src/gui/painting/qdrawhelper_sse2.cpp | 100 +++++++++---------------------- src/gui/painting/qpaintengine_raster.cpp | 78 ++++++++++++++++++++++++ tests/auto/qpainter/tst_qpainter.cpp | 39 ++++++++++-- 6 files changed, 256 insertions(+), 77 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index a676fe9..fd26676 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7834,6 +7834,11 @@ void qInitDrawhelperAsm() qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon; qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon; qt_memfill32 = qt_memfill32_neon; + + extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data, + int y, int x, int length); + + qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon; } #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index debca37..7eb2f09 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -955,6 +955,44 @@ void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, } } +class QSimdNeon +{ +public: + typedef int32x4_t Int32x4; + typedef float32x4_t Float32x4; + + union Vect_buffer_i { Int32x4 v; int i[4]; }; + union Vect_buffer_f { Float32x4 v; float f[4]; }; + + static inline Float32x4 v_dup(float x) { return vdupq_n_f32(x); } + static inline Int32x4 v_dup(int x) { return vdupq_n_s32(x); } + static inline Int32x4 v_dup(uint x) { return vdupq_n_s32(x); } + + static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return vaddq_f32(a, b); } + static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return vaddq_s32(a, b); } + + static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return vmaxq_f32(a, b); } + static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return vminq_f32(a, b); } + static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return vminq_s32(a, b); } + + static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return vandq_s32(a, b); } + + static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return vsubq_f32(a, b); } + static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return vsubq_s32(a, b); } + + static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return vmulq_f32(a, b); } + + static inline Float32x4 v_sqrt(Float32x4 x) { Float32x4 y = vrsqrteq_f32(x); y = vmulq_f32(y, vrsqrtsq_f32(x, vmulq_f32(y, y))); return vmulq_f32(x, y); } + + static inline Int32x4 v_toInt(Float32x4 x) { return vcvtq_s32_f32(x); } +}; + +const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data, + int y, int x, int length) +{ + return qt_fetch_radial_gradient_template >(buffer, op, data, y, x, length); +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index db5ec70..a92f686 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -465,6 +465,79 @@ const uint * QT_FASTCALL qt_fetch_radial_gradient_template(uint *buffer, const O return b; } +template +class QRadialFetchSimd +{ +public: + static inline void fetch(uint *buffer, uint *end, const QSpanData *data, qreal det, qreal delta_det, + qreal delta_delta_det, qreal b, qreal delta_b) + { + typename Simd::Vect_buffer_f det_vec; + typename Simd::Vect_buffer_f delta_det4_vec; + typename Simd::Vect_buffer_f b_vec; + + for (int i = 0; i < 4; ++i) { + det_vec.f[i] = det; + delta_det4_vec.f[i] = 4 * delta_det; + b_vec.f[i] = b; + + det += delta_det; + delta_det += delta_delta_det; + b += delta_b; + } + + const typename Simd::Float32x4 v_delta_delta_det16 = Simd::v_dup(16 * delta_delta_det); + const typename Simd::Float32x4 v_delta_delta_det6 = Simd::v_dup(6 * delta_delta_det); + const typename Simd::Float32x4 v_delta_b4 = Simd::v_dup(4 * delta_b); + + const typename Simd::Float32x4 v_min = Simd::v_dup(0.0f); + const typename Simd::Float32x4 v_max = Simd::v_dup(GRADIENT_STOPTABLE_SIZE-1.5f); + const typename Simd::Float32x4 v_half = Simd::v_dup(0.5f); + + const typename Simd::Float32x4 v_table_size_minus_one = Simd::v_dup(float(GRADIENT_STOPTABLE_SIZE-1)); + + const typename Simd::Int32x4 v_repeat_mask = Simd::v_dup(~(uint(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT)); + const typename Simd::Int32x4 v_reflect_mask = Simd::v_dup(~(uint(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1))); + + const typename Simd::Int32x4 v_reflect_limit = Simd::v_dup(2 * GRADIENT_STOPTABLE_SIZE - 1); + +#define FETCH_RADIAL_LOOP_PROLOGUE \ + while (buffer < end) { \ + const typename Simd::Float32x4 v_index_local = Simd::v_sub(Simd::v_sqrt(Simd::v_max(v_min, det_vec.v)), b_vec.v); \ + const typename Simd::Float32x4 v_index = Simd::v_add(Simd::v_mul(v_index_local, v_table_size_minus_one), v_half); \ + typename Simd::Vect_buffer_i index_vec; +#define FETCH_RADIAL_LOOP_CLAMP_REPEAT \ + index_vec.v = Simd::v_and(v_repeat_mask, Simd::v_toInt(v_index)); +#define FETCH_RADIAL_LOOP_CLAMP_REFLECT \ + const typename Simd::Int32x4 v_index_i = Simd::v_and(v_reflect_mask, Simd::v_toInt(v_index)); \ + const typename Simd::Int32x4 v_index_i_inv = Simd::v_sub(v_reflect_limit, v_index_i); \ + index_vec.v = Simd::v_min_16(v_index_i, v_index_i_inv); +#define FETCH_RADIAL_LOOP_CLAMP_PAD \ + index_vec.v = Simd::v_toInt(Simd::v_min(v_max, Simd::v_max(v_min, v_index))); +#define FETCH_RADIAL_LOOP_EPILOGUE \ + det_vec.v = Simd::v_add(Simd::v_add(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \ + delta_det4_vec.v = Simd::v_add(delta_det4_vec.v, v_delta_delta_det16); \ + b_vec.v = Simd::v_add(b_vec.v, v_delta_b4); \ + for (int i = 0; i < 4; ++i) \ + *buffer++ = data->gradient.colorTable[index_vec.i[i]]; \ + } + + if (data->gradient.spread == QGradient::RepeatSpread) { + FETCH_RADIAL_LOOP_PROLOGUE + FETCH_RADIAL_LOOP_CLAMP_REPEAT + FETCH_RADIAL_LOOP_EPILOGUE + } else if (data->gradient.spread == QGradient::ReflectSpread) { + FETCH_RADIAL_LOOP_PROLOGUE + FETCH_RADIAL_LOOP_CLAMP_REFLECT + FETCH_RADIAL_LOOP_EPILOGUE + } else { + FETCH_RADIAL_LOOP_PROLOGUE + FETCH_RADIAL_LOOP_CLAMP_PAD + FETCH_RADIAL_LOOP_EPILOGUE + } + } +}; + #if defined(Q_CC_RVCT) # pragma push # pragma arm diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index eef4cda..542d845 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -491,87 +491,43 @@ void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y, } } -extern const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data, - int y, int x, int length); -class RadialFetchSse2 +class QSimdSse2 { public: - static inline void fetch(uint *buffer, uint *end, const QSpanData *data, qreal det, qreal delta_det, - qreal delta_delta_det, qreal b, qreal delta_b) - { - union Vect_buffer_f { __m128 v; float f[4]; }; - union Vect_buffer_i { __m128i v; int i[4]; }; - - Vect_buffer_f det_vec; - Vect_buffer_f delta_det4_vec; - Vect_buffer_f b_vec; - - for (int i = 0; i < 4; ++i) { - det_vec.f[i] = det; - delta_det4_vec.f[i] = 4 * delta_det; - b_vec.f[i] = b; - - det += delta_det; - delta_det += delta_delta_det; - b += delta_b; - } + typedef __m128i Int32x4; + typedef __m128 Float32x4; - const __m128 v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det); - const __m128 v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det); - const __m128 v_delta_b4 = _mm_set1_ps(4 * delta_b); - - const __m128 v_min = _mm_set1_ps(0.0f); - const __m128 v_max = _mm_set1_ps(GRADIENT_STOPTABLE_SIZE-1.5f); - const __m128 v_half = _mm_set1_ps(0.5f); - - const __m128 v_table_size_minus_one = _mm_set1_ps(float(GRADIENT_STOPTABLE_SIZE-1)); - - const __m128i v_repeat_mask = _mm_set1_epi32(uint(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT); - const __m128i v_reflect_mask = _mm_set1_epi32(uint(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1)); - - const __m128i v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1); - -#define FETCH_RADIAL_LOOP_PROLOGUE \ - while (buffer < end) { \ - const __m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(_mm_max_ps(v_min, det_vec.v)), b_vec.v); \ - const __m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_table_size_minus_one), v_half); \ - Vect_buffer_i index_vec; -#define FETCH_RADIAL_LOOP_CLAMP_REPEAT \ - index_vec.v = _mm_andnot_si128(v_repeat_mask, _mm_cvttps_epi32(v_index)); -#define FETCH_RADIAL_LOOP_CLAMP_REFLECT \ - const __m128i v_index_i = _mm_andnot_si128(v_reflect_mask, _mm_cvttps_epi32(v_index)); \ - const __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \ - index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv); -#define FETCH_RADIAL_LOOP_CLAMP_PAD \ - index_vec.v = _mm_cvttps_epi32(_mm_min_ps(v_max, _mm_max_ps(v_min, v_index))); -#define FETCH_RADIAL_LOOP_EPILOGUE \ - det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \ - delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \ - b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4); \ - for (int i = 0; i < 4; ++i) \ - *buffer++ = data->gradient.colorTable[index_vec.i[i]]; \ - } + union Vect_buffer_i { Int32x4 v; int i[4]; }; + union Vect_buffer_f { Float32x4 v; float f[4]; }; - if (data->gradient.spread == QGradient::RepeatSpread) { - FETCH_RADIAL_LOOP_PROLOGUE - FETCH_RADIAL_LOOP_CLAMP_REPEAT - FETCH_RADIAL_LOOP_EPILOGUE - } else if (data->gradient.spread == QGradient::ReflectSpread) { - FETCH_RADIAL_LOOP_PROLOGUE - FETCH_RADIAL_LOOP_CLAMP_REFLECT - FETCH_RADIAL_LOOP_EPILOGUE - } else { - FETCH_RADIAL_LOOP_PROLOGUE - FETCH_RADIAL_LOOP_CLAMP_PAD - FETCH_RADIAL_LOOP_EPILOGUE - } - } + static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); } + static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); } + static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); } + static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); } + + static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); } + static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); } + + static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); } + static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); } + static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); } + + static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); } + + static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); } + static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); } + + static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); } + + static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); } + + static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); } }; const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data, int y, int x, int length) { - return qt_fetch_radial_gradient_template(buffer, op, data, y, x, length); + return qt_fetch_radial_gradient_template >(buffer, op, data, y, x, length); } diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index 6902543..8486adb 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -5033,6 +5033,84 @@ void QGradientCache::generateGradientColorTable(const QGradient& gradient, uint bool colorInterpolation = (gradient.interpolationMode() == QGradient::ColorInterpolation); + if (stopCount == 2) { + uint first_color = ARGB_COMBINE_ALPHA(stops[0].second.rgba(), opacity); + uint second_color = ARGB_COMBINE_ALPHA(stops[1].second.rgba(), opacity); + + qreal first_stop = stops[0].first; + qreal second_stop = stops[1].first; + + if (second_stop < first_stop) { + qSwap(first_color, second_color); + qSwap(first_stop, second_stop); + } + + if (colorInterpolation) { + first_color = PREMUL(first_color); + second_color = PREMUL(second_color); + } + + int first_index = qRound(first_stop * (GRADIENT_STOPTABLE_SIZE-1)); + int second_index = qRound(second_stop * (GRADIENT_STOPTABLE_SIZE-1)); + + uint red_first = qRed(first_color) << 16; + uint green_first = qGreen(first_color) << 16; + uint blue_first = qBlue(first_color) << 16; + uint alpha_first = qAlpha(first_color) << 16; + + uint red_second = qRed(second_color) << 16; + uint green_second = qGreen(second_color) << 16; + uint blue_second = qBlue(second_color) << 16; + uint alpha_second = qAlpha(second_color) << 16; + + int i = 0; + for (; i <= qMin(GRADIENT_STOPTABLE_SIZE, first_index); ++i) { + if (colorInterpolation) + colorTable[i] = first_color; + else + colorTable[i] = PREMUL(first_color); + } + + if (i < second_index) { + qreal reciprocal = qreal(1) / (second_index - first_index); + + int red_delta = qRound(int(red_second - red_first) * reciprocal); + int green_delta = qRound(int(green_second - green_first) * reciprocal); + int blue_delta = qRound(int(blue_second - blue_first) * reciprocal); + int alpha_delta = qRound(int(alpha_second - alpha_first) * reciprocal); + + // rounding + red_first += 1 << 15; + green_first += 1 << 15; + blue_first += 1 << 15; + alpha_first += 1 << 15; + + for (; i < qMin(GRADIENT_STOPTABLE_SIZE, second_index); ++i) { + red_first += red_delta; + green_first += green_delta; + blue_first += blue_delta; + alpha_first += alpha_delta; + + const uint color = ((alpha_first << 8) & 0xff000000) | (red_first & 0xff0000) + | ((green_first >> 8) & 0xff00) | (blue_first >> 16); + + if (colorInterpolation) + colorTable[i] = color; + else + colorTable[i] = PREMUL(color); + } + } + + for (; i < GRADIENT_STOPTABLE_SIZE; ++i) { + if (colorInterpolation) + colorTable[i] = second_color; + else + colorTable[i] = PREMUL(second_color); + } + + return; + } + uint current_color = ARGB_COMBINE_ALPHA(stops[0].second.rgba(), opacity); if (stopCount == 1) { current_color = PREMUL(current_color); diff --git a/tests/auto/qpainter/tst_qpainter.cpp b/tests/auto/qpainter/tst_qpainter.cpp index c21514b..fa80635 100644 --- a/tests/auto/qpainter/tst_qpainter.cpp +++ b/tests/auto/qpainter/tst_qpainter.cpp @@ -77,6 +77,7 @@ # define SRCDIR "." #endif +Q_DECLARE_METATYPE(QGradientStops) Q_DECLARE_METATYPE(QLine) Q_DECLARE_METATYPE(QRect) Q_DECLARE_METATYPE(QSize) @@ -189,6 +190,7 @@ private slots: void fillRect_stretchToDeviceMode(); void monoImages(); + void linearGradientSymmetry_data(); void linearGradientSymmetry(); void gradientInterpolation(); @@ -3983,8 +3985,39 @@ static QLinearGradient inverseGradient(QLinearGradient g) return g2; } +void tst_QPainter::linearGradientSymmetry_data() +{ + QTest::addColumn("stops"); + + { + QGradientStops stops; + stops << qMakePair(qreal(0.0), QColor(Qt::blue)); + stops << qMakePair(qreal(0.2), QColor(220, 220, 220, 0)); + stops << qMakePair(qreal(0.6), QColor(Qt::red)); + stops << qMakePair(qreal(0.9), QColor(220, 220, 220, 255)); + stops << qMakePair(qreal(1.0), QColor(Qt::black)); + QTest::newRow("multiple stops") << stops; + } + + { + QGradientStops stops; + stops << qMakePair(qreal(0.0), QColor(Qt::blue)); + stops << qMakePair(qreal(1.0), QColor(Qt::black)); + QTest::newRow("two stops") << stops; + } + + { + QGradientStops stops; + stops << qMakePair(qreal(0.3), QColor(Qt::blue)); + stops << qMakePair(qreal(0.6), QColor(Qt::black)); + QTest::newRow("two stops 2") << stops; + } +} + void tst_QPainter::linearGradientSymmetry() { + QFETCH(QGradientStops, stops); + QImage a(64, 8, QImage::Format_ARGB32_Premultiplied); QImage b(64, 8, QImage::Format_ARGB32_Premultiplied); @@ -3992,11 +4025,7 @@ void tst_QPainter::linearGradientSymmetry() b.fill(0); QLinearGradient gradient(QRectF(b.rect()).topLeft(), QRectF(b.rect()).topRight()); - gradient.setColorAt(0.0, Qt::blue); - gradient.setColorAt(0.2, QColor(220, 220, 220, 0)); - gradient.setColorAt(0.6, Qt::red); - gradient.setColorAt(0.9, QColor(220, 220, 220, 255)); - gradient.setColorAt(1.0, Qt::black); + gradient.setStops(stops); QPainter pa(&a); pa.fillRect(a.rect(), gradient); -- cgit v0.12