diff options
author | Alan Alpert <alan.alpert@nokia.com> | 2010-09-02 01:00:14 (GMT) |
---|---|---|
committer | Alan Alpert <alan.alpert@nokia.com> | 2010-09-02 01:00:14 (GMT) |
commit | 63eab5ef720936dbb5ee76686c6558e18a79e587 (patch) | |
tree | 920a29827e6fb83431e90c160616ca73924b455a /src/gui/painting | |
parent | 661e237be622a5f7886cee7c06f9daa73d4a07d7 (diff) | |
parent | e3a76875d1eea19c29fd6c8dec4e0db9252c2d29 (diff) | |
download | Qt-63eab5ef720936dbb5ee76686c6558e18a79e587.zip Qt-63eab5ef720936dbb5ee76686c6558e18a79e587.tar.gz Qt-63eab5ef720936dbb5ee76686c6558e18a79e587.tar.bz2 |
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/qt-qml into 4.7
Diffstat (limited to 'src/gui/painting')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 406 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 93 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 2 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 32 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 17 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_ssse3.cpp | 3 | ||||
-rw-r--r-- | src/gui/painting/qpainter.cpp | 11 |
7 files changed, 388 insertions, 176 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 276da93..5223458 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -46,7 +46,6 @@ #include <private/qdrawhelper_armv6_p.h> #include <private/qdrawhelper_neon_p.h> #include <private/qmath_p.h> -#include <private/qsimd_p.h> #include <qmath.h> QT_BEGIN_NAMESPACE @@ -656,6 +655,46 @@ const uint * QT_FASTCALL fetchTransformed(uint *buffer, const Operator *, const return buffer; } +/** \internal + interpolate 4 argb pixels with the distx and disty factor. + distx and disty bust be between 0 and 16 + */ +static inline uint interpolate_4_pixels_16(uint tl, uint tr, uint bl, uint br, int distx, int disty, int idistx, int idisty) +{ + uint tlrb = ((tl & 0x00ff00ff) * idistx * idisty); + uint tlag = (((tl & 0xff00ff00) >> 8) * idistx * idisty); + uint trrb = ((tr & 0x00ff00ff) * distx * idisty); + uint trag = (((tr & 0xff00ff00) >> 8) * distx * idisty); + uint blrb = ((bl & 0x00ff00ff) * idistx * disty); + uint blag = (((bl & 0xff00ff00) >> 8) * idistx * disty); + uint brrb = ((br & 0x00ff00ff) * distx * disty); + uint brag = (((br & 0xff00ff00) >> 8) * distx * disty); + return (((tlrb + trrb + blrb + brrb) >> 8) & 0x00ff00ff) | ((tlag + trag + blag + brag) & 0xff00ff00); +} + + +template<TextureBlendType blendType> +Q_STATIC_TEMPLATE_FUNCTION inline void fetchTransformedBilinear_pixelBounds(int max, int l1, int l2, int &v1, int &v2) +{ + if (blendType == BlendTransformedBilinearTiled) { + v1 %= max; + if (v1 < 0) v1 += max; + v2 = v1 + 1; + v2 %= max; + } else { + if (v1 < l1) { + v2 = v1 = l1; + } else if (v1 >= l2 - 1) { + v2 = v1 = l2 - 1; + } else { + v2 = v1 + 1; + } + } + + Q_ASSERT(v1 >= 0 && v1 < max); + Q_ASSERT(v2 >= 0 && v2 < max); +} + template<TextureBlendType blendType, QImage::Format format> /* blendType = BlendTransformedBilinear or BlendTransformedBilinearTiled */ Q_STATIC_TEMPLATE_FUNCTION const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator *, const QSpanData *data, @@ -696,64 +735,230 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * fx -= half_point; fy -= half_point; - while (b < end) { - int x1 = (fx >> 16); - int x2; + + if (fdy == 0) { //simple scale, no rotation int y1 = (fy >> 16); int y2; + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); + const uchar *s1 = data->texture.scanLine(y1); + const uchar *s2 = data->texture.scanLine(y2); - if (blendType == BlendTransformedBilinearTiled) { - x1 %= image_width; - if (x1 < 0) x1 += image_width; - x2 = x1 + 1; - x2 %= image_width; - - y1 %= image_height; - if (y1 < 0) y1 += image_height; - y2 = y1 + 1; - y2 %= image_height; - } else { - if (x1 < image_x1) { - x2 = x1 = image_x1; - } else if (x1 >= image_x2 - 1) { - x2 = x1 = image_x2 - 1; + if (fdx <= fixed_scale && fdx > 0) { // scale up on X + int disty = (fy & 0x0000ffff) >> 8; + int idisty = 256 - disty; + int x = fx >> 16; + + // The idea is first to do the interpolation between the row s1 and the row s2 + // into an intermediate buffer, then we interpolate between two pixel of this buffer. + + // intermediate_buffer[0] is a buffer of red-blue component of the pixel, in the form 0x00RR00BB + // intermediate_buffer[1] is the alpha-green component of the pixel, in the form 0x00AA00GG + quint32 intermediate_buffer[2][buffer_size + 2]; + // count is the size used in the intermediate_buffer. + int count = qCeil(length * data->m11) + 2; //+1 for the last pixel to interpolate with, and +1 for rounding errors. + Q_ASSERT(count <= buffer_size + 2); //length is supposed to be <= buffer_size and data->m11 < 1 in this case + int f = 0; + int lim = count; + if (blendType == BlendTransformedBilinearTiled) { + x %= image_width; + if (x < 0) x += image_width; } else { - x2 = x1 + 1; + lim = qMin(count, image_x2-x); + if (x < image_x1) { + Q_ASSERT(x < image_x2); + uint t = fetch(s1, image_x1, data->texture.colorTable); + uint b = fetch(s2, image_x1, data->texture.colorTable); + quint32 rb = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + quint32 ag = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + do { + intermediate_buffer[0][f] = rb; + intermediate_buffer[1][f] = ag; + f++; + x++; + } while (x < image_x1 && f < lim); + } } - if (y1 < image_y1) { - y2 = y1 = image_y1; - } else if (y1 >= image_y2 - 1) { - y2 = y1 = image_y2 - 1; - } else { - y2 = y1 + 1; + +#if defined(QT_ALWAYS_HAVE_SSE2) + if (blendType != BlendTransformedBilinearTiled && + (format == QImage::Format_ARGB32_Premultiplied || format == QImage::Format_RGB32)) { + + const __m128i disty_ = _mm_set1_epi16(disty); + const __m128i idisty_ = _mm_set1_epi16(idisty); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + + lim -= 3; + for (; f < lim; x += 4, f += 4) { + // Load 4 pixels from s1, and split the alpha-green and red-blue component + __m128i top = _mm_loadu_si128((__m128i*)((const uint *)(s1)+x)); + __m128i topAG = _mm_srli_epi16(top, 8); + __m128i topRB = _mm_and_si128(top, colorMask); + // Multiplies each colour component by idisty + topAG = _mm_mullo_epi16 (topAG, idisty_); + topRB = _mm_mullo_epi16 (topRB, idisty_); + + // Same for the s2 vector + __m128i bottom = _mm_loadu_si128((__m128i*)((const uint *)(s2)+x)); + __m128i bottomAG = _mm_srli_epi16(bottom, 8); + __m128i bottomRB = _mm_and_si128(bottom, colorMask); + bottomAG = _mm_mullo_epi16 (bottomAG, disty_); + bottomRB = _mm_mullo_epi16 (bottomRB, disty_); + + // Add the values, and shift to only keep 8 significant bits per colors + __m128i rAG =_mm_add_epi16(topAG, bottomAG); + rAG = _mm_srli_epi16(rAG, 8); + _mm_storeu_si128((__m128i*)(&intermediate_buffer[1][f]), rAG); + __m128i rRB =_mm_add_epi16(topRB, bottomRB); + rRB = _mm_srli_epi16(rRB, 8); + _mm_storeu_si128((__m128i*)(&intermediate_buffer[0][f]), rRB); + } + } +#endif + for (; f < count; f++) { // Same as above but without sse2 + if (blendType == BlendTransformedBilinearTiled) { + if (x >= image_width) x -= image_width; + } else { + x = qMin(x, image_x2 - 1); + } + + uint t = fetch(s1, x, data->texture.colorTable); + uint b = fetch(s2, x, data->texture.colorTable); + + intermediate_buffer[0][f] = (((t & 0xff00ff) * idisty + (b & 0xff00ff) * disty) >> 8) & 0xff00ff; + intermediate_buffer[1][f] = ((((t>>8) & 0xff00ff) * idisty + ((b>>8) & 0xff00ff) * disty) >> 8) & 0xff00ff; + x++; + } + // Now interpolate the values from the intermediate_buffer to get the final result. + fx &= fixed_scale - 1; + Q_ASSERT((fx >> 16) == 0); + while (b < end) { + register int x1 = (fx >> 16); + register int x2 = x1 + 1; + Q_ASSERT(x1 >= 0); + Q_ASSERT(x2 < count); + + register int distx = (fx & 0x0000ffff) >> 8; + register int idistx = 256 - distx; + int rb = ((intermediate_buffer[0][x1] * idistx + intermediate_buffer[0][x2] * distx) >> 8) & 0xff00ff; + int ag = (intermediate_buffer[1][x1] * idistx + intermediate_buffer[1][x2] * distx) & 0xff00ff00; + *b = rb | ag; + b++; + fx += fdx; + } + } else if ((fdx < 0 && fdx > -(fixed_scale / 8)) || fabs(data->m22) < (1./8.)) { // scale up more than 8x + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); + const uchar *s1 = data->texture.scanLine(y1); + const uchar *s2 = data->texture.scanLine(y2); + int disty = (fy & 0x0000ffff) >> 8; + int idisty = 256 - disty; + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + uint tl = fetch(s1, x1, data->texture.colorTable); + uint tr = fetch(s1, x2, data->texture.colorTable); + uint bl = fetch(s2, x1, data->texture.colorTable); + uint br = fetch(s2, x2, data->texture.colorTable); + + int distx = (fx & 0x0000ffff) >> 8; + int idistx = 256 - distx; + + uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx); + uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx); + *b = INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty); + + fx += fdx; + ++b; + } + } else { //scale down + int y1 = (fy >> 16); + int y2; + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); + const uchar *s1 = data->texture.scanLine(y1); + const uchar *s2 = data->texture.scanLine(y2); + int disty = (fy & 0x0000ffff) >> 12; + int idisty = 16 - disty; + while (b < end) { + int x1 = (fx >> 16); + int x2; + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + uint tl = fetch(s1, x1, data->texture.colorTable); + uint tr = fetch(s1, x2, data->texture.colorTable); + uint bl = fetch(s2, x1, data->texture.colorTable); + uint br = fetch(s2, x2, data->texture.colorTable); + int distx = (fx & 0x0000ffff) >> 12; + int idistx = 16 - distx; + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty, idistx, idisty); + fx += fdx; + ++b; } } + } else { //rotation + if (fabs(data->m11) > 8 || fabs(data->m22) > 8) { + //if we are zooming more than 8 times, we use 8bit precision for the position. + while (b < end) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; - Q_ASSERT(x1 >= 0 && x1 < image_width); - Q_ASSERT(x2 >= 0 && x2 < image_width); - Q_ASSERT(y1 >= 0 && y1 < image_height); - Q_ASSERT(y2 >= 0 && y2 < image_height); + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); - const uchar *s1 = data->texture.scanLine(y1); - const uchar *s2 = data->texture.scanLine(y2); + const uchar *s1 = data->texture.scanLine(y1); + const uchar *s2 = data->texture.scanLine(y2); - uint tl = fetch(s1, x1, data->texture.colorTable); - uint tr = fetch(s1, x2, data->texture.colorTable); - uint bl = fetch(s2, x1, data->texture.colorTable); - uint br = fetch(s2, x2, data->texture.colorTable); + uint tl = fetch(s1, x1, data->texture.colorTable); + uint tr = fetch(s1, x2, data->texture.colorTable); + uint bl = fetch(s2, x1, data->texture.colorTable); + uint br = fetch(s2, x2, data->texture.colorTable); - int distx = (fx & 0x0000ffff) >> 8; - int disty = (fy & 0x0000ffff) >> 8; - int idistx = 256 - distx; - int idisty = 256 - disty; + int distx = (fx & 0x0000ffff) >> 8; + int disty = (fy & 0x0000ffff) >> 8; + int idistx = 256 - distx; + int idisty = 256 - disty; - uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx); - uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx); - *b = INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty); + uint xtop = INTERPOLATE_PIXEL_256(tl, idistx, tr, distx); + uint xbot = INTERPOLATE_PIXEL_256(bl, idistx, br, distx); + *b = INTERPOLATE_PIXEL_256(xtop, idisty, xbot, disty); - fx += fdx; - fy += fdy; - ++b; + fx += fdx; + fy += fdy; + ++b; + } + } else { + //we are zooming less than 8x, use 4bit precision + while (b < end) { + int x1 = (fx >> 16); + int x2; + int y1 = (fy >> 16); + int y2; + + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); + + const uchar *s1 = data->texture.scanLine(y1); + const uchar *s2 = data->texture.scanLine(y2); + + uint tl = fetch(s1, x1, data->texture.colorTable); + uint tr = fetch(s1, x2, data->texture.colorTable); + uint bl = fetch(s2, x1, data->texture.colorTable); + uint br = fetch(s2, x2, data->texture.colorTable); + + int distx = (fx & 0x0000ffff) >> 12; + int disty = (fy & 0x0000ffff) >> 12; + int idistx = 16 - distx; + int idisty = 16 - disty; + + *b = interpolate_4_pixels_16(tl, tr, bl, br, distx, disty, idistx, idisty); + + fx += fdx; + fy += fdy; + ++b; + } + } } } else { const qreal fdx = data->m11; @@ -779,37 +984,8 @@ const uint * QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Operator * int idistx = 256 - distx; int idisty = 256 - disty; - if (blendType == BlendTransformedBilinearTiled) { - x1 %= image_width; - if (x1 < 0) x1 += image_width; - x2 = x1 + 1; - x2 %= image_width; - - y1 %= image_height; - if (y1 < 0) y1 += image_height; - y2 = y1 + 1; - y2 %= image_height; - } else { - if (x1 < 0) { - x2 = x1 = 0; - } else if (x1 >= image_width - 1) { - x2 = x1 = image_width - 1; - } else { - x2 = x1 + 1; - } - if (y1 < 0) { - y2 = y1 = 0; - } else if (y1 >= image_height - 1) { - y2 = y1 = image_height - 1; - } else { - y2 = y1 + 1; - } - } - - Q_ASSERT(x1 >= 0 && x1 < image_width); - Q_ASSERT(x2 >= 0 && x2 < image_width); - Q_ASSERT(y1 >= 0 && y1 < image_height); - Q_ASSERT(y2 >= 0 && y2 < image_height); + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); const uchar *s1 = data->texture.scanLine(y1); const uchar *s2 = data->texture.scanLine(y2); @@ -1757,9 +1933,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_solid_Plus_impl(uint *dest, int for (int i = 0; i < length; ++i) { PRELOAD_COND(dest) uint d = dest[i]; -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - d = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX + d = comp_func_Plus_one_pixel(d, s); coverage.store(&dest[i], d); } } @@ -1781,9 +1955,7 @@ Q_STATIC_TEMPLATE_FUNCTION inline void comp_func_Plus_impl(uint *dest, const uin uint d = dest[i]; uint s = src[i]; -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - d = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX + d = comp_func_Plus_one_pixel(d, s); coverage.store(&dest[i], d); } @@ -5216,37 +5388,8 @@ Q_STATIC_TEMPLATE_FUNCTION void blend_transformed_bilinear_argb(int count, const int y1 = (y >> 16); int y2; - if (blendType == BlendTransformedBilinearTiled) { - x1 %= image_width; - if (x1 < 0) x1 += image_width; - x2 = x1 + 1; - x2 %= image_width; - - y1 %= image_height; - if (y1 < 0) y1 += image_height; - y2 = y1 + 1; - y2 %= image_height; - - Q_ASSERT(x1 >= 0 && x1 < image_width); - Q_ASSERT(x2 >= 0 && x2 < image_width); - Q_ASSERT(y1 >= 0 && y1 < image_height); - Q_ASSERT(y2 >= 0 && y2 < image_height); - } else { - if (x1 < image_x1) { - x2 = x1 = image_x1; - } else if (x1 >= image_x2 - 1) { - x2 = x1 = image_x2 - 1; - } else { - x2 = x1 + 1; - } - if (y1 < image_y1) { - y2 = y1 = image_y1; - } else if (y1 >= image_y2 - 1) { - y2 = y1 = image_y2 - 1; - } else { - y2 = y1 + 1; - } - } + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); int y1_offset = y1 * scanline_offset; int y2_offset = y2 * scanline_offset; @@ -5326,37 +5469,8 @@ Q_STATIC_TEMPLATE_FUNCTION void blend_transformed_bilinear_argb(int count, const int idistx = 256 - distx; int idisty = 256 - disty; - if (blendType == BlendTransformedBilinearTiled) { - x1 %= image_width; - if (x1 < 0) x1 += image_width; - x2 = x1 + 1; - x2 %= image_width; - - y1 %= image_height; - if (y1 < 0) y1 += image_height; - y2 = y1 + 1; - y2 %= image_height; - - Q_ASSERT(x1 >= 0 && x1 < image_width); - Q_ASSERT(x2 >= 0 && x2 < image_width); - Q_ASSERT(y1 >= 0 && y1 < image_height); - Q_ASSERT(y2 >= 0 && y2 < image_height); - } else { - if (x1 < image_x1) { - x2 = x1 = image_x1; - } else if (x1 >= image_x2 - 1) { - x2 = x1 = image_x2 - 1; - } else { - x2 = x1 + 1; - } - if (y1 < image_y1) { - y2 = y1 = image_y1; - } else if (y1 >= image_y2 - 1) { - y2 = y1 = image_y2 - 1; - } else { - y2 = y1 + 1; - } - } + fetchTransformedBilinear_pixelBounds<blendType>(image_width, image_x1, image_x2, x1, x2); + fetchTransformedBilinear_pixelBounds<blendType>(image_height, image_y1, image_y2, y1, y2); int y1_offset = y1 * scanline_offset; int y2_offset = y2 * scanline_offset; @@ -7911,11 +8025,13 @@ void qInitDrawhelperAsm() functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon; functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon; + functionForMode_C[QPainter::CompositionMode_Plus] = comp_func_Plus_neon; destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon; qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon; + qt_memfill32 = qt_memfill32_neon; } #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index 03fe075..ed15c5c 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -51,6 +51,44 @@ QT_BEGIN_NAMESPACE +void qt_memfill32_neon(quint32 *dest, quint32 value, int count) +{ + const int epilogueSize = count % 16; + if (count >= 16) { + quint32 *const neonEnd = dest + count - epilogueSize; + register uint32x4_t valueVector1 asm ("q0") = vdupq_n_u32(value); + register uint32x4_t valueVector2 asm ("q1") = valueVector1; + while (dest != neonEnd) { + asm volatile ( + "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t" + "vst2.32 { d0, d1, d2, d3 }, [%[DST]] !\n\t" + : [DST]"+r" (dest) + : [VALUE1]"w"(valueVector1), [VALUE2]"w"(valueVector2) + : "memory" + ); + } + } + + switch (epilogueSize) + { + case 15: *dest++ = value; + case 14: *dest++ = value; + case 13: *dest++ = value; + case 12: *dest++ = value; + case 11: *dest++ = value; + case 10: *dest++ = value; + case 9: *dest++ = value; + case 8: *dest++ = value; + case 7: *dest++ = value; + case 6: *dest++ = value; + case 5: *dest++ = value; + case 4: *dest++ = value; + case 3: *dest++ = value; + case 2: *dest++ = value; + case 1: *dest++ = value; + } +} + static inline uint16x8_t qvdiv_255_u16(uint16x8_t x, uint16x8_t half) { // result = (x + (x >> 8) + 0x80) >> 8 @@ -622,6 +660,61 @@ void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, u } } +void QT_FASTCALL comp_func_Plus_neon(uint *dst, const uint *src, int length, uint const_alpha) +{ + if (const_alpha == 255) { + uint *const end = dst + length; + uint *const neonEnd = end - 3; + + while (dst < neonEnd) { + asm volatile ( + "vld2.8 { d0, d1 }, [%[SRC]] !\n\t" + "vld2.8 { d2, d3 }, [%[DST]]\n\t" + "vqadd.u8 q0, q0, q1\n\t" + "vst2.8 { d0, d1 }, [%[DST]] !\n\t" + : [DST]"+r" (dst), [SRC]"+r" (src) + : + : "memory", "d0", "d1", "d2", "d3", "q0", "q1" + ); + } + + while (dst != end) { + *dst = comp_func_Plus_one_pixel(*dst, *src); + ++dst; + ++src; + } + } else { + int x = 0; + const int one_minus_const_alpha = 255 - const_alpha; + const uint16x8_t constAlphaVector = vdupq_n_u16(const_alpha); + const uint16x8_t oneMinusconstAlphaVector = vdupq_n_u16(one_minus_const_alpha); + + const uint16x8_t half = vdupq_n_u16(0x80); + for (; x < length - 3; x += 4) { + const uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]); + const uint8x16_t src8 = vreinterpretq_u8_u32(src32); + uint8x16_t dst8 = vld1q_u8((uint8_t *)&dst[x]); + uint8x16_t result = vqaddq_u8(dst8, src8); + + uint16x8_t result_low = vmovl_u8(vget_low_u8(result)); + uint16x8_t result_high = vmovl_u8(vget_high_u8(result)); + + uint16x8_t dst_low = vmovl_u8(vget_low_u8(dst8)); + uint16x8_t dst_high = vmovl_u8(vget_high_u8(dst8)); + + result_low = qvinterpolate_pixel_255(result_low, constAlphaVector, dst_low, oneMinusconstAlphaVector, half); + result_high = qvinterpolate_pixel_255(result_high, constAlphaVector, dst_high, oneMinusconstAlphaVector, half); + + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result_high)); + vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high)); + } + + for (; x < length; ++x) + dst[x] = comp_func_Plus_one_pixel_const_alpha(dst[x], src[x], const_alpha, one_minus_const_alpha); + } +} + static const int tileSize = 32; extern "C" void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count); diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index cd2dbfc..451edbc 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -120,6 +120,7 @@ void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); +void qt_memfill32_neon(quint32 *dest, quint32 value, int count); void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); @@ -131,6 +132,7 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length); void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha); +void QT_FASTCALL comp_func_Plus_neon(uint *dst, const uint *src, int length, uint const_alpha); #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index d04c70d..5747da5 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -62,6 +62,7 @@ #define QT_FT_END_HEADER #endif #include "private/qrasterdefs_p.h" +#include <private/qsimd_p.h> #ifdef Q_WS_QWS #include "QtGui/qscreen_qws.h" @@ -69,13 +70,6 @@ QT_BEGIN_NAMESPACE -#if defined(Q_OS_MAC) && (defined(__ppc__) || defined(__ppc64__)) -#undef QT_HAVE_MMX -#undef QT_HAVE_SSE -#undef QT_HAVE_SSE2 -#undef QT_HAVE_3DNOW -#endif - #if defined(Q_CC_MSVC) && _MSCVER <= 1300 && !defined(Q_CC_INTEL) #define Q_STATIC_TEMPLATE_SPECIALIZATION static #else @@ -1944,6 +1938,30 @@ const uint qt_bayer_matrix[16][16] = { ((((argb >> 24) * alpha) >> 8) << 24) | (argb & 0x00ffffff) +#if QT_POINTER_SIZE == 8 // 64-bit versions +#define AMIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) +#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) +#else // 32 bits +// The mask for alpha can overflow over 32 bits +#define AMIX(mask) quint32(qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) +#define MIX(mask) (qMin(((quint32(s)&mask) + (quint32(d)&mask)), quint32(mask))) +#endif + +inline int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha) +{ + const int result = (AMIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); + return INTERPOLATE_PIXEL_255(result, const_alpha, d, one_minus_const_alpha); +} + +inline int comp_func_Plus_one_pixel(uint d, const uint s) +{ + const int result = (AMIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); + return result; +} + +#undef MIX +#undef AMIX + // prototypes of all the composition functions void QT_FASTCALL comp_func_SourceOver(uint *dest, const uint *src, int length, uint const_alpha); void QT_FASTCALL comp_func_DestinationOver(uint *dest, const uint *src, int length, uint const_alpha); diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 22c0384..5b674b5 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -43,7 +43,6 @@ #ifdef QT_HAVE_SSE2 -#include <private/qsimd_p.h> #include <private/qdrawingprimitive_sse2_p.h> #include <private/qpaintengine_raster_p.h> @@ -161,22 +160,6 @@ void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixe } } -inline int comp_func_Plus_one_pixel_const_alpha(uint d, const uint s, const uint const_alpha, const uint one_minus_const_alpha) -{ -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX - return INTERPOLATE_PIXEL_255(result, const_alpha, d, one_minus_const_alpha); -} - -inline int comp_func_Plus_one_pixel(uint d, const uint s) -{ -#define MIX(mask) (qMin(((qint64(s)&mask) + (qint64(d)&mask)), qint64(mask))) - const int result = (MIX(AMASK) | MIX(RMASK) | MIX(GMASK) | MIX(BMASK)); -#undef MIX - return result; -} - void QT_FASTCALL comp_func_Plus_sse2(uint *dst, const uint *src, int length, uint const_alpha) { int x = 0; diff --git a/src/gui/painting/qdrawhelper_ssse3.cpp b/src/gui/painting/qdrawhelper_ssse3.cpp index 9c02009..4cb4089 100644 --- a/src/gui/painting/qdrawhelper_ssse3.cpp +++ b/src/gui/painting/qdrawhelper_ssse3.cpp @@ -39,11 +39,10 @@ ** ****************************************************************************/ +#include <private/qdrawhelper_x86_p.h> #ifdef QT_HAVE_SSSE3 -#include <private/qsimd_p.h> -#include <private/qdrawhelper_x86_p.h> #include <private/qdrawingprimitive_sse2_p.h> QT_BEGIN_NAMESPACE diff --git a/src/gui/painting/qpainter.cpp b/src/gui/painting/qpainter.cpp index be90006..4e10671 100644 --- a/src/gui/painting/qpainter.cpp +++ b/src/gui/painting/qpainter.cpp @@ -696,9 +696,9 @@ void QPainterPrivate::updateEmulationSpecifier(QPainterState *s) skip = false; - QBrush penBrush = s->pen.brush(); - Qt::BrushStyle brushStyle = s->brush.style(); - Qt::BrushStyle penBrushStyle = penBrush.style(); + QBrush penBrush = (qpen_style(s->pen) == Qt::NoPen) ? QBrush(Qt::NoBrush) : qpen_brush(s->pen); + Qt::BrushStyle brushStyle = qbrush_style(s->brush); + Qt::BrushStyle penBrushStyle = qbrush_style(penBrush); alpha = (penBrushStyle != Qt::NoBrush && (penBrushStyle < Qt::LinearGradientPattern && penBrush.color().alpha() != 255) && !penBrush.isOpaque()) @@ -5863,7 +5863,8 @@ void QPainter::drawStaticText(const QPointF &topLeftPosition, const QStaticText } bool paintEngineSupportsTransformations = d->extended->type() == QPaintEngine::OpenGL2 - || d->extended->type() == QPaintEngine::OpenVG; + || d->extended->type() == QPaintEngine::OpenVG + || d->extended->type() == QPaintEngine::OpenGL; if (paintEngineSupportsTransformations && !staticText_d->untransformedCoordinates) { staticText_d->untransformedCoordinates = true; @@ -5907,7 +5908,7 @@ void QPainter::drawStaticText(const QPointF &topLeftPosition, const QStaticText // Recreate the layout of the static text because the matrix or font has changed if (staticTextNeedsReinit) - staticText_d->init(); + staticText_d->init(); if (transformedPosition != staticText_d->position) { // Translate to actual position QFixed fx = QFixed::fromReal(transformedPosition.x()); |