summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSamuel Rødal <samuel.rodal@nokia.com>2011-04-13 08:15:06 (GMT)
committerSamuel Rødal <samuel.rodal@nokia.com>2011-04-13 08:17:18 (GMT)
commit5b74a70ac630073582be56f8a0539624a1080185 (patch)
tree14a73dd71ac328cf27d20e4fc9553fa692952180 /src
parent26bd3dccdee8c6a8f1cf9d254a2a6be7d403aa8d (diff)
downloadQt-5b74a70ac630073582be56f8a0539624a1080185.zip
Qt-5b74a70ac630073582be56f8a0539624a1080185.tar.gz
Qt-5b74a70ac630073582be56f8a0539624a1080185.tar.bz2
Improved gradient table generation performance for two-stop gradients.
Two stops is a fairly common case so we gain quite a bit by special casing it. Improves performance by 10 % in parcycle benchmark, and by 90 % in a synthetic benchmark. Reviewed-by: Andreas Kling
Diffstat (limited to 'src')
-rw-r--r--src/gui/painting/qdrawhelper.cpp5
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp38
-rw-r--r--src/gui/painting/qdrawhelper_p.h73
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp100
-rw-r--r--src/gui/painting/qpaintengine_raster.cpp78
5 files changed, 222 insertions, 72 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index a676fe9..fd26676 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -7834,6 +7834,11 @@ void qInitDrawhelperAsm()
qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon;
qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon;
qt_memfill32 = qt_memfill32_neon;
+
+ extern const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
+ int y, int x, int length);
+
+ qt_fetch_radial_gradient = qt_fetch_radial_gradient_neon;
}
#endif
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index debca37..7eb2f09 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -955,6 +955,44 @@ void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h,
}
}
+class QSimdNeon
+{
+public:
+ typedef int32x4_t Int32x4;
+ typedef float32x4_t Float32x4;
+
+ union Vect_buffer_i { Int32x4 v; int i[4]; };
+ union Vect_buffer_f { Float32x4 v; float f[4]; };
+
+ static inline Float32x4 v_dup(float x) { return vdupq_n_f32(x); }
+ static inline Int32x4 v_dup(int x) { return vdupq_n_s32(x); }
+ static inline Int32x4 v_dup(uint x) { return vdupq_n_s32(x); }
+
+ static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return vaddq_f32(a, b); }
+ static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return vaddq_s32(a, b); }
+
+ static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return vmaxq_f32(a, b); }
+ static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return vminq_f32(a, b); }
+ static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return vminq_s32(a, b); }
+
+ static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return vandq_s32(a, b); }
+
+ static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return vsubq_f32(a, b); }
+ static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return vsubq_s32(a, b); }
+
+ static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return vmulq_f32(a, b); }
+
+ static inline Float32x4 v_sqrt(Float32x4 x) { Float32x4 y = vrsqrteq_f32(x); y = vmulq_f32(y, vrsqrtsq_f32(x, vmulq_f32(y, y))); return vmulq_f32(x, y); }
+
+ static inline Int32x4 v_toInt(Float32x4 x) { return vcvtq_s32_f32(x); }
+};
+
+const uint * QT_FASTCALL qt_fetch_radial_gradient_neon(uint *buffer, const Operator *op, const QSpanData *data,
+ int y, int x, int length)
+{
+ return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdNeon> >(buffer, op, data, y, x, length);
+}
+
QT_END_NAMESPACE
#endif // QT_HAVE_NEON
diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h
index db5ec70..a92f686 100644
--- a/src/gui/painting/qdrawhelper_p.h
+++ b/src/gui/painting/qdrawhelper_p.h
@@ -465,6 +465,79 @@ const uint * QT_FASTCALL qt_fetch_radial_gradient_template(uint *buffer, const O
return b;
}
+template <class Simd>
+class QRadialFetchSimd
+{
+public:
+ static inline void fetch(uint *buffer, uint *end, const QSpanData *data, qreal det, qreal delta_det,
+ qreal delta_delta_det, qreal b, qreal delta_b)
+ {
+ typename Simd::Vect_buffer_f det_vec;
+ typename Simd::Vect_buffer_f delta_det4_vec;
+ typename Simd::Vect_buffer_f b_vec;
+
+ for (int i = 0; i < 4; ++i) {
+ det_vec.f[i] = det;
+ delta_det4_vec.f[i] = 4 * delta_det;
+ b_vec.f[i] = b;
+
+ det += delta_det;
+ delta_det += delta_delta_det;
+ b += delta_b;
+ }
+
+ const typename Simd::Float32x4 v_delta_delta_det16 = Simd::v_dup(16 * delta_delta_det);
+ const typename Simd::Float32x4 v_delta_delta_det6 = Simd::v_dup(6 * delta_delta_det);
+ const typename Simd::Float32x4 v_delta_b4 = Simd::v_dup(4 * delta_b);
+
+ const typename Simd::Float32x4 v_min = Simd::v_dup(0.0f);
+ const typename Simd::Float32x4 v_max = Simd::v_dup(GRADIENT_STOPTABLE_SIZE-1.5f);
+ const typename Simd::Float32x4 v_half = Simd::v_dup(0.5f);
+
+ const typename Simd::Float32x4 v_table_size_minus_one = Simd::v_dup(float(GRADIENT_STOPTABLE_SIZE-1));
+
+ const typename Simd::Int32x4 v_repeat_mask = Simd::v_dup(~(uint(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT));
+ const typename Simd::Int32x4 v_reflect_mask = Simd::v_dup(~(uint(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1)));
+
+ const typename Simd::Int32x4 v_reflect_limit = Simd::v_dup(2 * GRADIENT_STOPTABLE_SIZE - 1);
+
+#define FETCH_RADIAL_LOOP_PROLOGUE \
+ while (buffer < end) { \
+ const typename Simd::Float32x4 v_index_local = Simd::v_sub(Simd::v_sqrt(Simd::v_max(v_min, det_vec.v)), b_vec.v); \
+ const typename Simd::Float32x4 v_index = Simd::v_add(Simd::v_mul(v_index_local, v_table_size_minus_one), v_half); \
+ typename Simd::Vect_buffer_i index_vec;
+#define FETCH_RADIAL_LOOP_CLAMP_REPEAT \
+ index_vec.v = Simd::v_and(v_repeat_mask, Simd::v_toInt(v_index));
+#define FETCH_RADIAL_LOOP_CLAMP_REFLECT \
+ const typename Simd::Int32x4 v_index_i = Simd::v_and(v_reflect_mask, Simd::v_toInt(v_index)); \
+ const typename Simd::Int32x4 v_index_i_inv = Simd::v_sub(v_reflect_limit, v_index_i); \
+ index_vec.v = Simd::v_min_16(v_index_i, v_index_i_inv);
+#define FETCH_RADIAL_LOOP_CLAMP_PAD \
+ index_vec.v = Simd::v_toInt(Simd::v_min(v_max, Simd::v_max(v_min, v_index)));
+#define FETCH_RADIAL_LOOP_EPILOGUE \
+ det_vec.v = Simd::v_add(Simd::v_add(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \
+ delta_det4_vec.v = Simd::v_add(delta_det4_vec.v, v_delta_delta_det16); \
+ b_vec.v = Simd::v_add(b_vec.v, v_delta_b4); \
+ for (int i = 0; i < 4; ++i) \
+ *buffer++ = data->gradient.colorTable[index_vec.i[i]]; \
+ }
+
+ if (data->gradient.spread == QGradient::RepeatSpread) {
+ FETCH_RADIAL_LOOP_PROLOGUE
+ FETCH_RADIAL_LOOP_CLAMP_REPEAT
+ FETCH_RADIAL_LOOP_EPILOGUE
+ } else if (data->gradient.spread == QGradient::ReflectSpread) {
+ FETCH_RADIAL_LOOP_PROLOGUE
+ FETCH_RADIAL_LOOP_CLAMP_REFLECT
+ FETCH_RADIAL_LOOP_EPILOGUE
+ } else {
+ FETCH_RADIAL_LOOP_PROLOGUE
+ FETCH_RADIAL_LOOP_CLAMP_PAD
+ FETCH_RADIAL_LOOP_EPILOGUE
+ }
+ }
+};
+
#if defined(Q_CC_RVCT)
# pragma push
# pragma arm
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index eef4cda..542d845 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -491,87 +491,43 @@ void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y,
}
}
-extern const uint * QT_FASTCALL qt_fetch_radial_gradient_plain(uint *buffer, const Operator *op, const QSpanData *data,
- int y, int x, int length);
-class RadialFetchSse2
+class QSimdSse2
{
public:
- static inline void fetch(uint *buffer, uint *end, const QSpanData *data, qreal det, qreal delta_det,
- qreal delta_delta_det, qreal b, qreal delta_b)
- {
- union Vect_buffer_f { __m128 v; float f[4]; };
- union Vect_buffer_i { __m128i v; int i[4]; };
-
- Vect_buffer_f det_vec;
- Vect_buffer_f delta_det4_vec;
- Vect_buffer_f b_vec;
-
- for (int i = 0; i < 4; ++i) {
- det_vec.f[i] = det;
- delta_det4_vec.f[i] = 4 * delta_det;
- b_vec.f[i] = b;
-
- det += delta_det;
- delta_det += delta_delta_det;
- b += delta_b;
- }
+ typedef __m128i Int32x4;
+ typedef __m128 Float32x4;
- const __m128 v_delta_delta_det16 = _mm_set1_ps(16 * delta_delta_det);
- const __m128 v_delta_delta_det6 = _mm_set1_ps(6 * delta_delta_det);
- const __m128 v_delta_b4 = _mm_set1_ps(4 * delta_b);
-
- const __m128 v_min = _mm_set1_ps(0.0f);
- const __m128 v_max = _mm_set1_ps(GRADIENT_STOPTABLE_SIZE-1.5f);
- const __m128 v_half = _mm_set1_ps(0.5f);
-
- const __m128 v_table_size_minus_one = _mm_set1_ps(float(GRADIENT_STOPTABLE_SIZE-1));
-
- const __m128i v_repeat_mask = _mm_set1_epi32(uint(0xffffff) << GRADIENT_STOPTABLE_SIZE_SHIFT);
- const __m128i v_reflect_mask = _mm_set1_epi32(uint(0xffffff) << (GRADIENT_STOPTABLE_SIZE_SHIFT+1));
-
- const __m128i v_reflect_limit = _mm_set1_epi32(2 * GRADIENT_STOPTABLE_SIZE - 1);
-
-#define FETCH_RADIAL_LOOP_PROLOGUE \
- while (buffer < end) { \
- const __m128 v_index_local = _mm_sub_ps(_mm_sqrt_ps(_mm_max_ps(v_min, det_vec.v)), b_vec.v); \
- const __m128 v_index = _mm_add_ps(_mm_mul_ps(v_index_local, v_table_size_minus_one), v_half); \
- Vect_buffer_i index_vec;
-#define FETCH_RADIAL_LOOP_CLAMP_REPEAT \
- index_vec.v = _mm_andnot_si128(v_repeat_mask, _mm_cvttps_epi32(v_index));
-#define FETCH_RADIAL_LOOP_CLAMP_REFLECT \
- const __m128i v_index_i = _mm_andnot_si128(v_reflect_mask, _mm_cvttps_epi32(v_index)); \
- const __m128i v_index_i_inv = _mm_sub_epi32(v_reflect_limit, v_index_i); \
- index_vec.v = _mm_min_epi16(v_index_i, v_index_i_inv);
-#define FETCH_RADIAL_LOOP_CLAMP_PAD \
- index_vec.v = _mm_cvttps_epi32(_mm_min_ps(v_max, _mm_max_ps(v_min, v_index)));
-#define FETCH_RADIAL_LOOP_EPILOGUE \
- det_vec.v = _mm_add_ps(_mm_add_ps(det_vec.v, delta_det4_vec.v), v_delta_delta_det6); \
- delta_det4_vec.v = _mm_add_ps(delta_det4_vec.v, v_delta_delta_det16); \
- b_vec.v = _mm_add_ps(b_vec.v, v_delta_b4); \
- for (int i = 0; i < 4; ++i) \
- *buffer++ = data->gradient.colorTable[index_vec.i[i]]; \
- }
+ union Vect_buffer_i { Int32x4 v; int i[4]; };
+ union Vect_buffer_f { Float32x4 v; float f[4]; };
- if (data->gradient.spread == QGradient::RepeatSpread) {
- FETCH_RADIAL_LOOP_PROLOGUE
- FETCH_RADIAL_LOOP_CLAMP_REPEAT
- FETCH_RADIAL_LOOP_EPILOGUE
- } else if (data->gradient.spread == QGradient::ReflectSpread) {
- FETCH_RADIAL_LOOP_PROLOGUE
- FETCH_RADIAL_LOOP_CLAMP_REFLECT
- FETCH_RADIAL_LOOP_EPILOGUE
- } else {
- FETCH_RADIAL_LOOP_PROLOGUE
- FETCH_RADIAL_LOOP_CLAMP_PAD
- FETCH_RADIAL_LOOP_EPILOGUE
- }
- }
+ static inline Float32x4 v_dup(float x) { return _mm_set1_ps(x); }
+ static inline Float32x4 v_dup(double x) { return _mm_set1_ps(x); }
+ static inline Int32x4 v_dup(int x) { return _mm_set1_epi32(x); }
+ static inline Int32x4 v_dup(uint x) { return _mm_set1_epi32(x); }
+
+ static inline Float32x4 v_add(Float32x4 a, Float32x4 b) { return _mm_add_ps(a, b); }
+ static inline Int32x4 v_add(Int32x4 a, Int32x4 b) { return _mm_add_epi32(a, b); }
+
+ static inline Float32x4 v_max(Float32x4 a, Float32x4 b) { return _mm_max_ps(a, b); }
+ static inline Float32x4 v_min(Float32x4 a, Float32x4 b) { return _mm_min_ps(a, b); }
+ static inline Int32x4 v_min_16(Int32x4 a, Int32x4 b) { return _mm_min_epi16(a, b); }
+
+ static inline Int32x4 v_and(Int32x4 a, Int32x4 b) { return _mm_and_si128(a, b); }
+
+ static inline Float32x4 v_sub(Float32x4 a, Float32x4 b) { return _mm_sub_ps(a, b); }
+ static inline Int32x4 v_sub(Int32x4 a, Int32x4 b) { return _mm_sub_epi32(a, b); }
+
+ static inline Float32x4 v_mul(Float32x4 a, Float32x4 b) { return _mm_mul_ps(a, b); }
+
+ static inline Float32x4 v_sqrt(Float32x4 x) { return _mm_sqrt_ps(x); }
+
+ static inline Int32x4 v_toInt(Float32x4 x) { return _mm_cvttps_epi32(x); }
};
const uint * QT_FASTCALL qt_fetch_radial_gradient_sse2(uint *buffer, const Operator *op, const QSpanData *data,
int y, int x, int length)
{
- return qt_fetch_radial_gradient_template<RadialFetchSse2>(buffer, op, data, y, x, length);
+ return qt_fetch_radial_gradient_template<QRadialFetchSimd<QSimdSse2> >(buffer, op, data, y, x, length);
}
diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp
index 6902543..8486adb 100644
--- a/src/gui/painting/qpaintengine_raster.cpp
+++ b/src/gui/painting/qpaintengine_raster.cpp
@@ -5033,6 +5033,84 @@ void QGradientCache::generateGradientColorTable(const QGradient& gradient, uint
bool colorInterpolation = (gradient.interpolationMode() == QGradient::ColorInterpolation);
+ if (stopCount == 2) {
+ uint first_color = ARGB_COMBINE_ALPHA(stops[0].second.rgba(), opacity);
+ uint second_color = ARGB_COMBINE_ALPHA(stops[1].second.rgba(), opacity);
+
+ qreal first_stop = stops[0].first;
+ qreal second_stop = stops[1].first;
+
+ if (second_stop < first_stop) {
+ qSwap(first_color, second_color);
+ qSwap(first_stop, second_stop);
+ }
+
+ if (colorInterpolation) {
+ first_color = PREMUL(first_color);
+ second_color = PREMUL(second_color);
+ }
+
+ int first_index = qRound(first_stop * (GRADIENT_STOPTABLE_SIZE-1));
+ int second_index = qRound(second_stop * (GRADIENT_STOPTABLE_SIZE-1));
+
+ uint red_first = qRed(first_color) << 16;
+ uint green_first = qGreen(first_color) << 16;
+ uint blue_first = qBlue(first_color) << 16;
+ uint alpha_first = qAlpha(first_color) << 16;
+
+ uint red_second = qRed(second_color) << 16;
+ uint green_second = qGreen(second_color) << 16;
+ uint blue_second = qBlue(second_color) << 16;
+ uint alpha_second = qAlpha(second_color) << 16;
+
+ int i = 0;
+ for (; i <= qMin(GRADIENT_STOPTABLE_SIZE, first_index); ++i) {
+ if (colorInterpolation)
+ colorTable[i] = first_color;
+ else
+ colorTable[i] = PREMUL(first_color);
+ }
+
+ if (i < second_index) {
+ qreal reciprocal = qreal(1) / (second_index - first_index);
+
+ int red_delta = qRound(int(red_second - red_first) * reciprocal);
+ int green_delta = qRound(int(green_second - green_first) * reciprocal);
+ int blue_delta = qRound(int(blue_second - blue_first) * reciprocal);
+ int alpha_delta = qRound(int(alpha_second - alpha_first) * reciprocal);
+
+ // rounding
+ red_first += 1 << 15;
+ green_first += 1 << 15;
+ blue_first += 1 << 15;
+ alpha_first += 1 << 15;
+
+ for (; i < qMin(GRADIENT_STOPTABLE_SIZE, second_index); ++i) {
+ red_first += red_delta;
+ green_first += green_delta;
+ blue_first += blue_delta;
+ alpha_first += alpha_delta;
+
+ const uint color = ((alpha_first << 8) & 0xff000000) | (red_first & 0xff0000)
+ | ((green_first >> 8) & 0xff00) | (blue_first >> 16);
+
+ if (colorInterpolation)
+ colorTable[i] = color;
+ else
+ colorTable[i] = PREMUL(color);
+ }
+ }
+
+ for (; i < GRADIENT_STOPTABLE_SIZE; ++i) {
+ if (colorInterpolation)
+ colorTable[i] = second_color;
+ else
+ colorTable[i] = PREMUL(second_color);
+ }
+
+ return;
+ }
+
uint current_color = ARGB_COMBINE_ALPHA(stops[0].second.rgba(), opacity);
if (stopCount == 1) {
current_color = PREMUL(current_color);