diff options
author | David Boddie <david.boddie@nokia.com> | 2010-09-15 17:21:39 (GMT) |
---|---|---|
committer | David Boddie <david.boddie@nokia.com> | 2010-09-15 17:21:39 (GMT) |
commit | f23bce37a8a3536bfedce7cc2d57f0761b2d1e31 (patch) | |
tree | e7dd9c7f7724df3f12fe819321e44c63f4eb0c2c /src/gui/painting | |
parent | f41eac269d354ebeb797b9cb173b09fc996564cf (diff) | |
parent | b832dffc1eb85181aa2d99afd1a6c764b634091d (diff) | |
download | Qt-f23bce37a8a3536bfedce7cc2d57f0761b2d1e31.zip Qt-f23bce37a8a3536bfedce7cc2d57f0761b2d1e31.tar.gz Qt-f23bce37a8a3536bfedce7cc2d57f0761b2d1e31.tar.bz2 |
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-2 into 4.7
Diffstat (limited to 'src/gui/painting')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 139 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 15 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_ssse3.cpp | 75 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_runtime.cpp | 6 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_runtime_p.h | 1 | ||||
-rw-r--r-- | src/gui/painting/qpaintengineex.cpp | 2 | ||||
-rw-r--r-- | src/gui/painting/qstroker.cpp | 2 | ||||
-rw-r--r-- | src/gui/painting/qtransform.h | 1 |
8 files changed, 141 insertions, 100 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 03ed597..bd5b0bd 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -659,19 +659,59 @@ const uint * QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const interpolate 4 argb pixels with the distx and disty factor. distx and disty bust be between 0 and 16 */ -static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty, int idistx, int idisty) -{ - uint tlrb = ((tl & 0x00ff00ff) * idistx * idisty); - uint tlag = (((tl & 0xff00ff00) >> 8) * idistx * idisty); - uint trrb = ((tr & 0x00ff00ff) * distx * idisty); - uint trag = (((tr & 0xff00ff00) >> 8) * distx * idisty); - uint blrb = ((bl & 0x00ff00ff) * idistx * disty); - uint blag = (((bl & 0xff00ff00) >> 8) * idistx * disty); - uint brrb = ((br & 0x00ff00ff) * distx * disty); - uint brag = (((br & 0xff00ff00) >> 8) * distx * disty); +static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty) +{ + uint distxy = distx * disty; + //idistx * disty = (16-distx) * disty = 16*disty - distxy + //idistx * idisty = (16-distx) * (16-disty) = 16*16 - 16*distx -16*dity + distxy + uint tlrb = (tl & 0x00ff00ff) * (16*16 - 16*distx - 16*disty + distxy); + uint tlag = ((tl & 0xff00ff00) >> 8) * (16*16 - 16*distx - 16*disty + distxy); + uint trrb = ((tr & 0x00ff00ff) * (distx*16 - distxy)); + uint trag = (((tr & 0xff00ff00) >> 8) * (distx*16 - distxy)); + uint blrb = ((bl & 0x00ff00ff) * (disty*16 - distxy)); + uint blag = (((bl & 0xff00ff00) >> 8) * (disty*16 - distxy)); + uint brrb = ((br & 0x00ff00ff) * (distxy)); + uint brag = (((br & 0xff00ff00) >> 8) * (distxy)); return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00); } +#if defined(QT_ALWAYS_HAVE_SSE2) +#define interpolate_4_pixels_16_sse2(tl, tr, bl, br, distx, disty, colorMask, v_256, b) \ +{ \ + const __m128i dxdy = _mm_mullo_epi16 (distx, disty); \ + const __m128i distx_ = _mm_slli_epi16(distx, 4); \ + const __m128i disty_ = _mm_slli_epi16(disty, 4); \ + const __m128i idxidy = _mm_add_epi16(dxdy, _mm_sub_epi16(v_256, _mm_add_epi16(distx_, disty_))); \ + const __m128i dxidy = _mm_sub_epi16(distx_, dxdy); \ + const __m128i idxdy = _mm_sub_epi16(disty_, dxdy); \ + \ + __m128i tlAG = _mm_srli_epi16(tl, 8); \ + __m128i tlRB = _mm_and_si128(tl, colorMask); \ + __m128i trAG = _mm_srli_epi16(tr, 8); \ + __m128i trRB = _mm_and_si128(tr, colorMask); \ + __m128i blAG = _mm_srli_epi16(bl, 8); \ + __m128i blRB = _mm_and_si128(bl, colorMask); \ + __m128i brAG = _mm_srli_epi16(br, 8); \ + __m128i brRB = _mm_and_si128(br, colorMask); \ + \ + tlAG = _mm_mullo_epi16(tlAG, idxidy); \ + tlRB = _mm_mullo_epi16(tlRB, idxidy); \ + trAG = _mm_mullo_epi16(trAG, dxidy); \ + trRB = _mm_mullo_epi16(trRB, dxidy); \ + blAG = _mm_mullo_epi16(blAG, idxdy); \ + blRB = _mm_mullo_epi16(blRB, idxdy); \ + brAG = _mm_mullo_epi16(brAG, dxdy); \ + brRB = _mm_mullo_epi16(brRB, dxdy); \ + \ + /* Add the values, and shift to only keep 8 significant bits per colors */ \ + __m128i rAG =_mm_add_epi16(_mm_add_epi16(tlAG, trAG), _mm_add_epi16(blAG, brAG)); \ + __m128i rRB =_mm_add_epi16(_mm_add_epi16(tlRB, trRB), _mm_add_epi16(blRB, brRB)); \ + rAG = _mm_andnot_si128(colorMask, rAG); \ + rRB = _mm_srli_epi16(rRB, 8); \ + _mm_storeu_si128((__m128i*)(b), _mm_or_si128(rAG, rRB)); \ +} +#endif + template<TextureBlendType blendType> Q_STATIC_TEMPLATE_FUNCTION inline void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2) @@ -721,7 +761,7 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * const qreal cx = x + 0.5; const qreal cy = y + 0.5; - const uint *end = buffer + length; + uint *end = buffer + length; uint *b = buffer; if (data->fast_matrix) { // The increment pr x in the scanline @@ -879,7 +919,75 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * const uchar *s1 = data->texture.scanLine(y1); const uchar *s2 = data->texture.scanLine(y2); int disty = (fy & 0x0000ffff) >> 12; - int idisty = 16 - disty; + +#if defined(QT_ALWAYS_HAVE_SSE2) + if (blendType != BlendTransformedBilinearTiled && + (format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) { + + //prolog to get into the bounds + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + if (x1 != x2) //break if we are insided the bounds. + break; + uint tl = fetch(s1, x1, data->texture.colorTable); + uint tr = fetch(s1, x2, data->texture.colorTable); + uint bl = fetch(s2, x1, data->texture.colorTable); + uint br = fetch(s2, x2, data->texture.colorTable); + int distx = (fx & 0x0000ffff) >> 12; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); + fx += fdx; + ++b; + } + uint *boundedEnd; + if (fdx > 0) + boundedEnd = qMin(end, buffer + uint((image_x2 - (fx >> 16)) / data->m11)); + else + boundedEnd = qMin(end, buffer + uint((image_x1 - (fx >> 16)) / data->m11)); + boundedEnd -= 3; + + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + //const __m128i distShuffleMask = _mm_set_epi8(13, 12, 13, 12, 9, 8, 9, 8, 5, 4, 5, 4, 1, 0, 1, 0); + const __m128i v_256 = _mm_set1_epi16(256); + const __m128i v_disty = _mm_set1_epi16(disty); + __m128i v_fdx = _mm_set1_epi32(fdx*4); + + ptrdiff_t secondLine = reinterpret_cast<const uint *>(s2) - reinterpret_cast<const uint *>(s1); + + union Vect_buffer { __m128i vect; quint32 i[4]; }; + Vect_buffer v_fx; + + for (int i = 0; i < 4; i++) { + v_fx.i[i] = fx; + fx += fdx; + } + + while (b < boundedEnd) { + + Vect_buffer tl, tr, bl, br; + + for (int i = 0; i < 4; i++) { + int x1 = v_fx.i[i] >> 16; + const uint *addr_tl = reinterpret_cast<const uint *>(s1) + x1; + const uint *addr_tr = addr_tl + 1; + tl.i[i] = *addr_tl; + tr.i[i] = *addr_tr; + bl.i[i] = *(addr_tl+secondLine); + br.i[i] = *(addr_tr+secondLine); + } + __m128i v_distx = _mm_srli_epi16(v_fx.vect, 12); //distx = (fx & 0x0000ffff) >> 12; + //v_distx = _mm_shuffle_epi8(v_disty, distShuffleMask); //distx |= distx << 16; + v_distx = _mm_shufflehi_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + v_distx = _mm_shufflelo_epi16(v_distx, _MM_SHUFFLE(2,2,0,0)); + + interpolate_4_pixels_16_sse2(tl.vect, tr.vect, bl.vect, br.vect, v_distx, v_disty, colorMask, v_256, b); + b+=4; + v_fx.vect = _mm_add_epi32(v_fx.vect, v_fdx); + } + fx = v_fx.i[0]; + } +#endif while (b < end) { int x1 = (fx >> 16); int x2; @@ -889,8 +997,7 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * uint bl = fetch(s2, x1, data->texture.colorTable); uint br = fetch(s2, x2, data->texture.colorTable); int distx = (fx & 0x0000ffff) >> 12; - int idistx = 16 - distx; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty, idistx, idisty); + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); fx += fdx; ++b; } @@ -949,10 +1056,8 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * int distx = (fx & 0x0000ffff) >> 12; int disty = (fy & 0x0000ffff) >> 12; - int idistx = 16 - distx; - int idisty = 16 - disty; - *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty, idistx, idisty); + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty); fx += fdx; fy += fdy; diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index ba2ba32..f97d865 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -266,10 +266,10 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) int n = (count128 + 3) / 4; switch (count128 & 0x3) { - case 0: do { _mm_store_si128(dst128++, value128); - case 3: _mm_store_si128(dst128++, value128); - case 2: _mm_store_si128(dst128++, value128); - case 1: _mm_store_si128(dst128++, value128); + case 0: do { _mm_stream_si128(dst128++, value128); + case 3: _mm_stream_si128(dst128++, value128); + case 2: _mm_stream_si128(dst128++, value128); + case 1: _mm_stream_si128(dst128++, value128); } while (--n > 0); } @@ -300,11 +300,14 @@ void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, u const __m128i half = _mm_set1_epi16(0x80); const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); + ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + for (; x < length-3; x += 4) { - __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half); dstVector = _mm_add_epi8(colorVector, dstVector); - _mm_storeu_si128((__m128i *)&dst[x], dstVector); + _mm_store_si128((__m128i *)&dst[x], dstVector); } for (;x < length; ++x) destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); diff --git a/src/gui/painting/qdrawhelper_ssse3.cpp b/src/gui/painting/qdrawhelper_ssse3.cpp index 4cb4089..fb5602e 100644 --- a/src/gui/painting/qdrawhelper_ssse3.cpp +++ b/src/gui/painting/qdrawhelper_ssse3.cpp @@ -79,69 +79,6 @@ inline static void blend_pixel(quint32 &dst, const quint32 src) } -#define BLEND_SOURCE_OVER_ARGB32_FIRST_ROW_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ - int x = 0; \ -\ - /* First, get dst aligned. */ \ - const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast<quintptr>(dst) >> 2) & 0x3)) & 0x3;\ - const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ -\ - for (; x < prologLength; ++x) {\ - blend_pixel(dst[x], src[x]); \ - } \ -\ - const int minusOffsetToAlignSrcOn16Bytes = (reinterpret_cast<quintptr>(&(src[x])) >> 2) & 0x3;\ -\ - if (!minusOffsetToAlignSrcOn16Bytes) {\ - /* src is aligned, usual algorithm but with aligned operations.\ - See the SSE2 version for more documentation on the algorithm itself. */\ - const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ - for (; x < length-3; x += 4) { \ - const __m128i srcVector = _mm_load_si128((__m128i *)&src[x]); \ - const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ - _mm_store_si128((__m128i *)&dst[x], srcVector); \ - } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ - __m128i alphaChannel = _mm_shuffle_epi8(srcVector, alphaShuffleMask); \ - alphaChannel = _mm_sub_epi16(one, alphaChannel); \ - const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ - __m128i destMultipliedByOneMinusAlpha; \ - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ - _mm_store_si128((__m128i *)&dst[x], result); \ - } \ - } /* end for() */\ - } else if ((length - x) >= 8) {\ - /* We are at the first line, so "x - minusOffsetToAlignSrcOn16Bytes" could go before src, and\ - generate an invalid access. */\ -\ - /* We use two vectors to extract the src: prevLoaded for the first pixels, lastLoaded for the current pixels. */\ - __m128i srcVectorPrevLoaded;\ - if (minusOffsetToAlignSrcOn16Bytes > prologLength) {\ - /* We go forward 4 pixels to avoid reading before src. */\ - for (; x < prologLength + 4; ++x)\ - blend_pixel(dst[x], src[x]); \ - }\ - srcVectorPrevLoaded = _mm_load_si128((__m128i *)&src[x - minusOffsetToAlignSrcOn16Bytes]);\ - const int palignrOffset = minusOffsetToAlignSrcOn16Bytes << 2;\ -\ - const __m128i alphaShuffleMask = _mm_set_epi8(0xff,15,0xff,15,0xff,11,0xff,11,0xff,7,0xff,7,0xff,3,0xff,3);\ - switch (palignrOffset) {\ - case 4:\ - BLENDING_LOOP(4, length)\ - break;\ - case 8:\ - BLENDING_LOOP(8, length)\ - break;\ - case 12:\ - BLENDING_LOOP(12, length)\ - break;\ - }\ - }\ - for (; x < length; ++x) \ - blend_pixel(dst[x], src[x]); \ -} - // Basically blend src over dst with the const alpha defined as constAlphaVector. // nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as: //const __m128i nullVector = _mm_set1_epi32(0); @@ -153,7 +90,7 @@ inline static void blend_pixel(quint32 &dst, const quint32 src) // The computation being done is: // result = s + d * (1-alpha) // with shortcuts if fully opaque or fully transparent. -#define BLEND_SOURCE_OVER_ARGB32_MAIN_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ +#define BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ int x = 0; \ \ /* First, get dst aligned. */ \ @@ -218,14 +155,8 @@ void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl, const __m128i one = _mm_set1_epi16(0xff); const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); - // We have to unrol the first row in order to deal with the load on unaligned data - // prior to the src pointer. - BLEND_SOURCE_OVER_ARGB32_FIRST_ROW_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask); - dst = (quint32 *)(((uchar *) dst) + dbpl); - src = (const quint32 *)(((const uchar *) src) + sbpl); - - for (int y = 1; y < h; ++y) { - BLEND_SOURCE_OVER_ARGB32_MAIN_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask); + for (int y = 0; y < h; ++y) { + BLEND_SOURCE_OVER_ARGB32_SSSE3(dst, src, w, nullVector, half, one, colorMask, alphaMask); dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } diff --git a/src/gui/painting/qgraphicssystem_runtime.cpp b/src/gui/painting/qgraphicssystem_runtime.cpp index 2828e9d..a9fbbee 100644 --- a/src/gui/painting/qgraphicssystem_runtime.cpp +++ b/src/gui/painting/qgraphicssystem_runtime.cpp @@ -94,7 +94,8 @@ QRuntimePixmapData::QRuntimePixmapData(const QRuntimeGraphicsSystem *gs, PixelTy QRuntimePixmapData::~QRuntimePixmapData() { - m_graphicsSystem->removePixmapData(this); + if (QApplicationPrivate::graphics_system) + m_graphicsSystem->removePixmapData(this); delete m_data; } @@ -258,7 +259,8 @@ QRuntimeWindowSurface::QRuntimeWindowSurface(const QRuntimeGraphicsSystem *gs, Q QRuntimeWindowSurface::~QRuntimeWindowSurface() { - m_graphicsSystem->removeWindowSurface(this); + if (QApplicationPrivate::graphics_system) + m_graphicsSystem->removeWindowSurface(this); } QPaintDevice *QRuntimeWindowSurface::paintDevice() diff --git a/src/gui/painting/qgraphicssystem_runtime_p.h b/src/gui/painting/qgraphicssystem_runtime_p.h index 0232241..421fbeb 100644 --- a/src/gui/painting/qgraphicssystem_runtime_p.h +++ b/src/gui/painting/qgraphicssystem_runtime_p.h @@ -177,6 +177,7 @@ private: friend class QRuntimePixmapData; friend class QRuntimeWindowSurface; + friend class QMeeGoGraphicsSystem; }; QT_END_NAMESPACE diff --git a/src/gui/painting/qpaintengineex.cpp b/src/gui/painting/qpaintengineex.cpp index 881bd6e..1e857e4 100644 --- a/src/gui/painting/qpaintengineex.cpp +++ b/src/gui/painting/qpaintengineex.cpp @@ -768,7 +768,7 @@ void QPaintEngineEx::drawRoundedRect(const QRectF &rect, qreal xRadius, qreal yR x1, y2 - (1 - KAPPA) * yRadius, x1, y2 - yRadius, x1, y1 + yRadius, // LineTo - x1, y1 + KAPPA * yRadius, // CurveTo + x1, y1 + (1 - KAPPA) * yRadius, // CurveTo x1 + (1 - KAPPA) * xRadius, y1, x1 + xRadius, y1 }; diff --git a/src/gui/painting/qstroker.cpp b/src/gui/painting/qstroker.cpp index eabbd8a..9cff339 100644 --- a/src/gui/painting/qstroker.cpp +++ b/src/gui/painting/qstroker.cpp @@ -609,7 +609,7 @@ void QStroker::joinPoints(qfixed focal_x, qfixed focal_y, const QLineF &nextLine } QLineF miterLine(QPointF(qt_fixed_to_real(focal_x), qt_fixed_to_real(focal_y)), isect); - if (miterLine.length() > qt_fixed_to_real(m_strokeWidth * m_miterLimit) / 2) { + if (type == QLineF::NoIntersection || miterLine.length() > qt_fixed_to_real(m_strokeWidth * m_miterLimit) / 2) { emitLineTo(qt_real_to_fixed(nextLine.x1()), qt_real_to_fixed(nextLine.y1())); } else { diff --git a/src/gui/painting/qtransform.h b/src/gui/painting/qtransform.h index 212a582..11f2673 100644 --- a/src/gui/painting/qtransform.h +++ b/src/gui/painting/qtransform.h @@ -64,7 +64,6 @@ class QVariant; class Q_GUI_EXPORT QTransform { - Q_ENUMS(TransformationType) public: enum TransformationType { TxNone = 0x00, |