diff options
author | Qt Continuous Integration System <qt-info@nokia.com> | 2010-12-22 23:03:48 (GMT) |
---|---|---|
committer | Qt Continuous Integration System <qt-info@nokia.com> | 2010-12-22 23:03:48 (GMT) |
commit | 8e8f4881aa6086f109ad5fe81d74b7e7daa9c914 (patch) | |
tree | b7e26b73dca82687509ba873dbebfd7c205b5646 /src/gui/painting/qdrawhelper.cpp | |
parent | 91bc1f39788a4a097e65f542b8515c68313ec17f (diff) | |
parent | 8565d6d123227615312a791ae7acc5b252db264a (diff) | |
download | Qt-8e8f4881aa6086f109ad5fe81d74b7e7daa9c914.zip Qt-8e8f4881aa6086f109ad5fe81d74b7e7daa9c914.tar.gz Qt-8e8f4881aa6086f109ad5fe81d74b7e7daa9c914.tar.bz2 |
Merge branch '4.7' of git://scm.dev.nokia.troll.no/qt/qt-water-team
* '4.7' of git://scm.dev.nokia.troll.no/qt/qt-water-team:
Fix infinite loop in qmake when reading malformed .ts files.
Fix libinfixed usage in Symbian when def files are used
Made documentation of QPixmap::createMaskFromColor match behaviour.
Improve performance of bilinear downscaling of images with neon.
Document the 'variant' basic type
Add double type to QML Basic Types docs
Remove redundant docs
Diffstat (limited to 'src/gui/painting/qdrawhelper.cpp')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 133 |
1 files changed, 107 insertions, 26 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 62af212..fdb686d 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -712,6 +712,38 @@ static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, i } #endif +#if defined(QT_ALWAYS_HAVE_NEON) +#define interpolate_4_pixels_16_neon(tl, tr, bl, br, distx, disty, disty_, colorMask, invColorMask, v_256, b) \ +{ \ + const int16x8_t dxdy = vmulq_s16(distx, disty); \ + const int16x8_t distx_ = vshlq_n_s16(distx, 4); \ + const int16x8_t idxidy = vaddq_s16(dxdy, vsubq_s16(v_256, vaddq_s16(distx_, disty_))); \ + const int16x8_t dxidy = vsubq_s16(distx_, dxdy); \ + const int16x8_t idxdy = vsubq_s16(disty_, dxdy); \ + \ + int16x8_t tlAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tl), 8)); \ + int16x8_t tlRB = vandq_s16(tl, colorMask); \ + int16x8_t trAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(tr), 8)); \ + int16x8_t trRB = vandq_s16(tr, colorMask); \ + int16x8_t blAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(bl), 8)); \ + int16x8_t blRB = vandq_s16(bl, colorMask); \ + int16x8_t brAG = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(br), 8)); \ + int16x8_t brRB = vandq_s16(br, colorMask); \ + \ + int16x8_t rAG = vmulq_s16(tlAG, idxidy); \ + int16x8_t rRB = vmulq_s16(tlRB, idxidy); \ + rAG = vmlaq_s16(rAG, trAG, dxidy); \ + rRB = vmlaq_s16(rRB, trRB, dxidy); \ + rAG = vmlaq_s16(rAG, blAG, idxdy); \ + rRB = vmlaq_s16(rRB, blRB, idxdy); \ + rAG = vmlaq_s16(rAG, brAG, dxdy); \ + rRB = vmlaq_s16(rRB, brRB, dxdy); \ + \ + rAG = vandq_s16(invColorMask, rAG); \ + rRB = vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(rRB), 8)); \ + vst1q_s16((int16_t*)(b), vorrq_s16(rAG, rRB)); \ +} +#endif template<TextureBlendType blendType> Q_STATIC_TEMPLATE_FUNCTION inline void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2) @@ -920,35 +952,36 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * const uchar *s2 = data->texture.scanLine(y2); int disty = (fy & 0x0000ffff) >> 12; -#if defined(QT_ALWAYS_HAVE_SSE2) if (blendType != BlendTransformedBilinearTiled && (format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) { - //prolog to get into the bounds - while (b < end) { - int x1 = (fx >> 16); - int x2; - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); - if (x1 != x2) //break if we are insided the bounds. - break; - uint tl = fetch(s1, x1, data->texture.colorTable); - uint tr = fetch(s1, x2, data->texture.colorTable); - uint bl = fetch(s2, x1, data->texture.colorTable); - uint br = fetch(s2, x2, data->texture.colorTable); - int distx = (fx & 0x0000ffff) >> 12; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); - fx += fdx; - ++b; - } - uint *boundedEnd; - if (fdx > 0) - boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); - else - boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); +#define BILINEAR_DOWNSCALE_BOUNDS_PROLOG \ + while (b < end) { \ + int x1 = (fx >> 16); \ + int x2; \ + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); \ + if (x1 != x2) \ + break; \ + uint tl = fetch(s1, x1, data->texture.colorTable); \ + uint tr = fetch(s1, x2, data->texture.colorTable); \ + uint bl = fetch(s2, x1, data->texture.colorTable); \ + uint br = fetch(s2, x2, data->texture.colorTable); \ + int distx = (fx & 0x0000ffff) >> 12; \ + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); \ + fx += fdx; \ + ++b; \ + } \ + uint *boundedEnd; \ + if (fdx > 0) \ + boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); \ + else \ + boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); \ boundedEnd -= 3; +#if defined(QT_ALWAYS_HAVE_SSE2) + BILINEAR_DOWNSCALE_BOUNDS_PROLOG + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); - //const __m128i distShuffleMask = _mm_set_epi8(13, 12, 13, 12, 9, 8, 9, 8, 5, 4, 5, 4, 1, 0, 1, 0); const __m128i v_256 = _mm_set1_epi16(256); const __m128i v_disty = _mm_set1_epi16(disty); __m128i v_fdx = _mm_set1_epi32(fdx*4); @@ -976,8 +1009,7 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * bl.i[i] = *(addr_tl+secondLine); br.i[i] = *(addr_tr+secondLine); } - __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); //distx = (fx & 0x0000ffff) >> 12; - //v_distx = _mm_shuffle_epi8(v_disty, distShuffleMask); //distx |= distx << 16; + __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); @@ -986,8 +1018,57 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx); } fx = v_fx.i[0]; - } +#elif defined(QT_ALWAYS_HAVE_NEON) + BILINEAR_DOWNSCALE_BOUNDS_PROLOG + + const int16x8_t colorMask = vdupq_n_s16(0x00ff); + const int16x8_t invColorMask = vmvnq_s16(colorMask); + const int16x8_t v_256 = vdupq_n_s16(256); + const int16x8_t v_disty = vdupq_n_s16(disty); + const int16x8_t v_disty_ = vshlq_n_s16(v_disty, 4); + int32x4_t v_fdx = vdupq_n_s32(fdx*4); + + ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1); + + union Vect_buffer { int32x4_t vect; quint32 i[4]; }; + Vect_buffer v_fx; + + for (int i = 0; i < 4; i++) { + v_fx.i[i] = fx; + fx += fdx; + } + + const int32x4_t v_ffff_mask = vdupq_n_s32(0x0000ffff); + + while (b < boundedEnd) { + + Vect_buffer tl, tr, bl, br; + + Vect_buffer v_fx_shifted; + v_fx_shifted.vect = vshrq_n_s32(v_fx.vect, 16); + + int32x4_t v_distx = vshrq_n_s32(vandq_s32(v_fx.vect, v_ffff_mask), 12); + + for (int i = 0; i < 4; i++) { + int x1 = v_fx_shifted.i[i]; + const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1; + const uint *addr_tr = addr_tl + 1; + tl.i[i] = *addr_tl; + tr.i[i] = *addr_tr; + bl.i[i] = *(addr_tl+secondLine); + br.i[i] = *(addr_tr+secondLine); + } + + v_distx = vorrq_s32(v_distx, vshlq_n_s32(v_distx, 16)); + + interpolate_4_pixels_16_neon(vreinterpretq_s16_s32(tl.vect), vreinterpretq_s16_s32(tr.vect), vreinterpretq_s16_s32(bl.vect), vreinterpretq_s16_s32(br.vect), vreinterpretq_s16_s32(v_distx), v_disty, v_disty_, colorMask, invColorMask, v_256, b); + b+=4; + v_fx.vect = vaddq_s32(v_fx.vect, v_fdx); + } + fx = v_fx.i[0]; #endif + } + while (b < end) { int x1 = (fx >> 16); int x2; |