diff options
author | Olivier Goffart <olivier.goffart@nokia.com> | 2010-09-08 09:26:31 (GMT) |
---|---|---|
committer | Olivier Goffart <olivier.goffart@nokia.com> | 2010-09-08 13:27:01 (GMT) |
commit | 46cb697b5e2eadfdad37aa6fcbdb58abf305de45 (patch) | |
tree | 10b15c4f70f6681c8c00d9e08b2fc54ad43adb56 | |
parent | 320ab7a9cfc484025f703dfdd39349a5f94fbe69 (diff) | |
download | Qt-46cb697b5e2eadfdad37aa6fcbdb58abf305de45.zip Qt-46cb697b5e2eadfdad37aa6fcbdb58abf305de45.tar.gz Qt-46cb697b5e2eadfdad37aa6fcbdb58abf305de45.tar.bz2 |
qdrawhelper: Optimisations in fetchTransformedBilinear
Scaling down, no rotation, with SSE2.
Specialize for ARGB32 not tiled, so the pixelbound can be simplified
Reviewed-by: Samuel
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 89 |
1 files changed, 59 insertions, 30 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index a302a83..bd5b0bd 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -761,7 +761,7 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * const qreal cx = x + 0.5; const qreal cy = y + 0.5; - const uint *end = buffer + length; + uint *end = buffer + length; uint *b = buffer; if (data->fast_matrix) { // The increment pr x in the scanline @@ -921,43 +921,72 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * int disty = (fy & 0x0000ffff) >> 12; #if defined(QT_ALWAYS_HAVE_SSE2) - const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); - //const __m128i distShuffleMask = _mm_set_epi8(13, 12, 13, 12, 9, 8, 9, 8, 5, 4, 5, 4, 1, 0, 1, 0); - const __m128i v_256 = _mm_set1_epi16(256); - const __m128i v_disty = _mm_set1_epi16(disty); - __m128i v_fdx = _mm_set1_epi32(fdx*4); + if (blendType != BlendTransformedBilinearTiled && + (format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) { - union Vect_buffer { __m128i vect; quint32 i[4]; }; - Vect_buffer v_fx; + //prolog to get into the bounds + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + if (x1 != x2) //break if we are insided the bounds. + break; + uint tl = fetch(s1, x1, data->texture.colorTable); + uint tr = fetch(s1, x2, data->texture.colorTable); + uint bl = fetch(s2, x1, data->texture.colorTable); + uint br = fetch(s2, x2, data->texture.colorTable); + int distx = (fx & 0x0000ffff) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); + fx += fdx; + ++b; + } + uint *boundedEnd; + if (fdx > 0) + boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); + else + boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); + boundedEnd -= 3; - for (int i = 0; i < 4; i++) { - v_fx.i[i] = fx; - fx += fdx; - } + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + //const __m128i distShuffleMask = _mm_set_epi8(13, 12, 13, 12, 9, 8, 9, 8, 5, 4, 5, 4, 1, 0, 1, 0); + const __m128i v_256 = _mm_set1_epi16(256); + const __m128i v_disty = _mm_set1_epi16(disty); + __m128i v_fdx = _mm_set1_epi32(fdx*4); - while (b < end-3) { + ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1); - Vect_buffer tl, tr, bl, br; + union Vect_buffer { __m128i vect; quint32 i[4]; }; + Vect_buffer v_fx; for (int i = 0; i < 4; i++) { - int x1 = v_fx.i[i] >> 16; - int x2; - fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); - tl.i[i] = fetch(s1, x1, data->texture.colorTable); - tr.i[i] = fetch(s1, x2, data->texture.colorTable); - bl.i[i] = fetch(s2, x1, data->texture.colorTable); - br.i[i] = fetch(s2, x2, data->texture.colorTable); + v_fx.i[i] = fx; + fx += fdx; + } + + while (b < boundedEnd) { + + Vect_buffer tl, tr, bl, br; + + for (int i = 0; i < 4; i++) { + int x1 = v_fx.i[i] >> 16; + const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1; + const uint *addr_tr = addr_tl + 1; + tl.i[i] = *addr_tl; + tr.i[i] = *addr_tr; + bl.i[i] = *(addr_tl+secondLine); + br.i[i] = *(addr_tr+secondLine); + } + __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); //distx = (fx & 0x0000ffff) >> 12; + //v_distx = _mm_shuffle_epi8(v_disty, distShuffleMask); //distx |= distx << 16; + v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + + interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b); + b+=4; + v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx); } - __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); //distx = (fx & 0x0000ffff) >> 12; - //v_distx = _mm_shuffle_epi8(v_disty, distShuffleMask); //distx |= distx << 16; - v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); - v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); - - interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b); - b+=4; - v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx); + fx = v_fx.i[0]; } - fx = v_fx.i[0]; #endif while (b < end) { int x1 = (fx >> 16); |