diff options
author | mread <qt-info@nokia.com> | 2010-07-02 10:50:23 (GMT) |
---|---|---|
committer | mread <qt-info@nokia.com> | 2010-07-02 10:50:23 (GMT) |
commit | 65316c0fcf9f0a406fd714622407e2b0e767fe7b (patch) | |
tree | b22c5f1d6d314a972302b8a996bde43dc3ddd2af /src/gui/painting | |
parent | 36f4d17a139c58cf00d3d9222dd2d35603ac09e8 (diff) | |
parent | 1636e03a2fda5108cb4389689a327e65c47dfe0e (diff) | |
download | Qt-65316c0fcf9f0a406fd714622407e2b0e767fe7b.zip Qt-65316c0fcf9f0a406fd714622407e2b0e767fe7b.tar.gz Qt-65316c0fcf9f0a406fd714622407e2b0e767fe7b.tar.bz2 |
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/qt into 4.7
Conflicts fixed:
src/s60installs/bwins/QtGuiu.def
src/s60installs/eabi/QtGuiu.def
Diffstat (limited to 'src/gui/painting')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 17 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 187 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_asm.S | 105 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 5 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 2 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 202 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem.cpp | 4 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_p.h | 1 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_runtime.cpp | 2 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_runtime_p.h | 2 | ||||
-rw-r--r-- | src/gui/painting/qmemrotate.cpp | 51 | ||||
-rw-r--r-- | src/gui/painting/qpaintengine_raster.cpp | 38 | ||||
-rw-r--r-- | src/gui/painting/qpainter.cpp | 2 | ||||
-rw-r--r-- | src/gui/painting/qpathclipper.cpp | 48 | ||||
-rw-r--r-- | src/gui/painting/qpathclipper_p.h | 4 |
15 files changed, 543 insertions, 127 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index bfa1136..ca9556b 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -5014,7 +5014,8 @@ Q_STATIC_TEMPLATE_FUNCTION void blendTiled(int count, const QSpan *spans, void * length -= copy_image_width; copy_image_width *= 2; } - qt_memconvert(dest, src, length); + if (length > 0) + qt_memconvert(dest, src, length); } else { while (length) { int l = qMin(image_width - sx, length); @@ -7817,6 +7818,15 @@ void qInitDrawhelperAsm() #ifdef QT_HAVE_SSE2 if (features & SSE2) { + extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, + const uint *srcPixels, + int length, + uint const_alpha); + extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha); + + functionForModeAsm[0] = comp_func_SourceOver_sse2; + functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2; + extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -7826,7 +7836,6 @@ void qInitDrawhelperAsm() int w, int h, int const_alpha); - qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2; @@ -7890,8 +7899,12 @@ void qInitDrawhelperAsm() qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon; functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon; + functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon; destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; + + qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon; + qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon; } #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index ee5f24a..03fe075 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -579,6 +579,193 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int } } +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + QT_MEMFILL_UINT(destPixels, length, color); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + uint32_t *dst = (uint32_t *) destPixels; + const uint32x4_t colorVector = vdupq_n_u32(color); + uint16x8_t half = vdupq_n_u16(0x80); + const uint16x8_t minusAlphaOfColorVector = vdupq_n_u16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + uint32x4_t dstVector = vld1q_u32(&dst[x]); + + const uint8x16_t dst8 = vreinterpretq_u8_u32(dstVector); + + const uint8x8_t dst8_low = vget_low_u8(dst8); + const uint8x8_t dst8_high = vget_high_u8(dst8); + + const uint16x8_t dst16_low = vmovl_u8(dst8_low); + const uint16x8_t dst16_high = vmovl_u8(dst8_high); + + const uint16x8_t result16_low = qvbyte_mul_u16(dst16_low, minusAlphaOfColorVector, half); + const uint16x8_t result16_high = qvbyte_mul_u16(dst16_high, minusAlphaOfColorVector, half); + + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); + + uint32x4_t blendedPixels = vcombine_u32(result32_low, result32_high); + uint32x4_t colorPlusBlendedPixels = vaddq_u32(colorVector, blendedPixels); + vst1q_u32(&dst[x], colorPlusBlendedPixels); + } + + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + +static const int tileSize = 32; + +extern "C" void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count); + +void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sstride, uchar *destPixels, int dstride) +{ + const ushort *src = (const ushort *)srcPixels; + ushort *dest = (ushort *)destPixels; + + sstride /= sizeof(ushort); + dstride /= sizeof(ushort); + + const int pack = sizeof(quint32) / sizeof(ushort); + const int unaligned = + qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(ushort)), uint(h)); + const int restX = w % tileSize; + const int restY = (h - unaligned) % tileSize; + const int unoptimizedY = restY % pack; + const int numTilesX = w / tileSize + (restX > 0); + const int numTilesY = (h - unaligned) / tileSize + (restY >= pack); + + for (int tx = 0; tx < numTilesX; ++tx) { + const int startx = w - tx * tileSize - 1; + const int stopx = qMax(startx - tileSize, 0); + + if (unaligned) { + for (int x = startx; x >= stopx; --x) { + ushort *d = dest + (w - x - 1) * dstride; + for (int y = 0; y < unaligned; ++y) { + *d++ = src[y * sstride + x]; + } + } + } + + for (int ty = 0; ty < numTilesY; ++ty) { + const int starty = ty * tileSize + unaligned; + const int stopy = qMin(starty + tileSize, h - unoptimizedY); + + int x = startx; + // qt_rotate90_16_neon writes to eight rows, four pixels at a time + for (; x >= stopx + 7; x -= 8) { + ushort *d = dest + (w - x - 1) * dstride + starty; + const ushort *s = &src[starty * sstride + x - 7]; + qt_rotate90_16_neon(d, s, sstride * 2, dstride * 2, stopy - starty); + } + + for (; x >= stopx; --x) { + quint32 *d = reinterpret_cast<quint32*>(dest + (w - x - 1) * dstride + starty); + for (int y = starty; y < stopy; y += pack) { + quint32 c = src[y * sstride + x]; + for (int i = 1; i < pack; ++i) { + const int shift = (sizeof(int) * 8 / pack * i); + const ushort color = src[(y + i) * sstride + x]; + c |= color << shift; + } + *d++ = c; + } + } + } + + if (unoptimizedY) { + const int starty = h - unoptimizedY; + for (int x = startx; x >= stopx; --x) { + ushort *d = dest + (w - x - 1) * dstride + starty; + for (int y = starty; y < h; ++y) { + *d++ = src[y * sstride + x]; + } + } + } + } +} + +extern "C" void qt_rotate270_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count); + +void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, + int sstride, + uchar *destPixels, int dstride) +{ + const ushort *src = (const ushort *)srcPixels; + ushort *dest = (ushort *)destPixels; + + sstride /= sizeof(ushort); + dstride /= sizeof(ushort); + + const int pack = sizeof(quint32) / sizeof(ushort); + const int unaligned = + qMin(uint((long(dest) & (sizeof(quint32)-1)) / sizeof(ushort)), uint(h)); + const int restX = w % tileSize; + const int restY = (h - unaligned) % tileSize; + const int unoptimizedY = restY % pack; + const int numTilesX = w / tileSize + (restX > 0); + const int numTilesY = (h - unaligned) / tileSize + (restY >= pack); + + for (int tx = 0; tx < numTilesX; ++tx) { + const int startx = tx * tileSize; + const int stopx = qMin(startx + tileSize, w); + + if (unaligned) { + for (int x = startx; x < stopx; ++x) { + ushort *d = dest + x * dstride; + for (int y = h - 1; y >= h - unaligned; --y) { + *d++ = src[y * sstride + x]; + } + } + } + + for (int ty = 0; ty < numTilesY; ++ty) { + const int starty = h - 1 - unaligned - ty * tileSize; + const int stopy = qMax(starty - tileSize, unoptimizedY); + + int x = startx; + // qt_rotate90_16_neon writes to eight rows, four pixels at a time + for (; x < stopx - 7; x += 8) { + ushort *d = dest + x * dstride + h - 1 - starty; + const ushort *s = &src[starty * sstride + x]; + qt_rotate90_16_neon(d + 7 * dstride, s, -sstride * 2, -dstride * 2, starty - stopy); + } + + for (; x < stopx; ++x) { + quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride + + h - 1 - starty); + for (int y = starty; y > stopy; y -= pack) { + quint32 c = src[y * sstride + x]; + for (int i = 1; i < pack; ++i) { + const int shift = (sizeof(int) * 8 / pack * i); + const ushort color = src[(y - i) * sstride + x]; + c |= color << shift; + } + *d++ = c; + } + } + } + if (unoptimizedY) { + const int starty = unoptimizedY - 1; + for (int x = startx; x < stopx; ++x) { + ushort *d = dest + x * dstride + h - 1 - starty; + for (int y = starty; y >= 0; --y) { + *d++ = src[y * sstride + x]; + } + } + } + } +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_neon_asm.S b/src/gui/painting/qdrawhelper_neon_asm.S index 9992817..d9cdc36 100644 --- a/src/gui/painting/qdrawhelper_neon_asm.S +++ b/src/gui/painting/qdrawhelper_neon_asm.S @@ -190,3 +190,108 @@ blend_8_pixels_rgb16_on_rgb16_neon: bx lr .endfunc + +/* void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count) */ + .func qt_rotate90_16_neon + .global qt_rotate90_16_neon + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden qt_rotate90_16_neon + .type qt_rotate90_16_neon, %function +#endif +qt_rotate90_16_neon: + push { r4-r11, lr } + ldr r5, [sp, #(9*4)] + + /* The preloads are the key to getting good performance */ + pld [r1] + + mov r4, r5, asr #2 + add r6, r0, r3 + add r7, r6, r3 + + add r8, r7, r3 + add r9, r8, r3 + + pld [r1, r2] + + add r10, r9, r3 + add r11, r10, r3 + + add r3, r3, r11 + and r5, r5, #3 + + pld [r1, r2, lsl #1] + + cmp r4, #0 + beq .rotate90_16_tail + +.rotate90_16_loop: + vld1.16 { q8 }, [r1], r2 + + pld [r1, r2, lsl #1] + + vld1.16 { q9 }, [r1], r2 + vld1.16 { q10 }, [r1], r2 + vld1.16 { q11 }, [r1], r2 + + pld [r1] + + /* Could have used four quad-word zips instead, + but those take three cycles as opposed to one. */ + vzip.16 d16, d20 + vzip.16 d17, d21 + + vzip.16 d18, d22 + + pld [r1, r2] + + vzip.16 d19, d23 + + vzip.16 d16, d18 + vzip.16 d17, d19 + + pld [r1, r2, lsl #1] + + vzip.16 d20, d22 + vzip.16 d21, d23 + + vst1.16 { d23 }, [r0]! + vst1.16 { d21 }, [r6]! + vst1.16 { d19 }, [r7]! + vst1.16 { d17 }, [r8]! + vst1.16 { d22 }, [r9]! + vst1.16 { d20 }, [r10]! + vst1.16 { d18 }, [r11]! + vst1.16 { d16 }, [r3]! + + sub r4, r4, #1 + cmp r4, #0 + bne .rotate90_16_loop + b .rotate90_16_tail + +.rotate90_16_tail_loop: + sub r5, r5, #2 + + vld1.16 { q8 }, [r1], r2 + vld1.16 { q9 }, [r1], r2 + + vzip.16 d16, d18 + vzip.16 d17, d19 + + vst1.32 { d19[1] }, [r0]! + vst1.32 { d19[0] }, [r6]! + vst1.32 { d17[1] }, [r7]! + vst1.32 { d17[0] }, [r8]! + vst1.32 { d18[1] }, [r9]! + vst1.32 { d18[0] }, [r10]! + vst1.32 { d16[1] }, [r11]! + vst1.32 { d16[0] }, [r3]! + +.rotate90_16_tail: + cmp r5, #0 + bgt .rotate90_16_tail_loop + + pop { r4-r11, pc } + + .endfunc diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index d6a4509..cd2dbfc 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -120,6 +120,9 @@ void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); +void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); +void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); + uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length); @@ -127,6 +130,8 @@ uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length); +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha); + #endif // QT_HAVE_NEON QT_END_NAMESPACE diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index acf765c..97c78bb 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -152,6 +152,7 @@ typedef void (*SrcOverTransformFunc)(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); +typedef void (*MemRotateFunc)(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); struct DrawHelper { ProcessSpans blendColor; @@ -165,6 +166,7 @@ struct DrawHelper { extern SrcOverBlendFunc qBlendFunctions[QImage::NImageFormats][QImage::NImageFormats]; extern SrcOverScaleFunc qScaleFunctions[QImage::NImageFormats][QImage::NImageFormats]; extern SrcOverTransformFunc qTransformFunctions[QImage::NImageFormats][QImage::NImageFormats]; +extern MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3]; extern DrawHelper qDrawHelper[QImage::NImageFormats]; diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 6ac64d3..6cd8688 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -126,13 +126,100 @@ QT_BEGIN_NAMESPACE result = _mm_or_si128(finalAG, finalRB); \ } +// Basically blend src over dst with the const alpha defined as constAlphaVector. +// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as: +//const __m128i nullVector = _mm_set1_epi32(0); +//const __m128i half = _mm_set1_epi16(0x80); +//const __m128i one = _mm_set1_epi16(0xff); +//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); +//const __m128i alphaMask = _mm_set1_epi32(0xff000000); +// +// The computation being done is: +// result = s + d * (1-alpha) +// with shortcuts if fully opaque or fully transparent. +#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ + int x = 0; \ + for (; x < length-3; x += 4) { \ + const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ + const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ + /* all opaque */ \ + _mm_storeu_si128((__m128i *)&dst[x], srcVector); \ + } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ + /* not fully transparent */ \ + /* extract the alpha channel on 2 x 16 bits */ \ + /* so we have room for the multiplication */ \ + /* each 32 bits will be in the form 0x00AA00AA */ \ + /* with A being the 1 - alpha */ \ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + /* result = s + d * (1-alpha) */\ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_storeu_si128((__m128i *)&dst[x], result); \ + } \ + } \ + for (; x < length; ++x) { \ + uint s = src[x]; \ + if (s >= 0xff000000) \ + dst[x] = s; \ + else if (s != 0) \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ +} + +// Basically blend src over dst with the const alpha defined as constAlphaVector. +// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as: +//const __m128i nullVector = _mm_set1_epi32(0); +//const __m128i half = _mm_set1_epi16(0x80); +//const __m128i one = _mm_set1_epi16(0xff); +//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); +// +// The computation being done is: +// dest = (s + d * sia) * ca + d * cia +// = s * ca + d * (sia * ca + cia) +// = s * ca + d * (1 - sa*ca) +#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \ +{ \ + int x = 0; \ + for (; x < length-3; x += 4) { \ + __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \ + BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \ +\ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_storeu_si128((__m128i *)&dst[x], result); \ + } \ + } \ + for (; x < length; ++x) { \ + quint32 s = src[x]; \ + if (s != 0) { \ + s = BYTE_MUL(s, const_alpha); \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ + } \ +} + void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha) { const quint32 *src = (const quint32 *) srcPixels; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; if (const_alpha == 256) { const __m128i alphaMask = _mm_set1_epi32(0xff000000); const __m128i nullVector = _mm_set1_epi32(0); @@ -140,41 +227,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const __m128i one = _mm_set1_epi16(0xff); const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); for (int y = 0; y < h; ++y) { - int x = 0; - for (; x < w-3; x += 4) { - const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); - const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { - // all opaque - _mm_storeu_si128((__m128i *)&dst[x], srcVector); - } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { - // not fully transparent - // result = s + d * (1-alpha) - - // extract the alpha channel on 2 x 16 bits - // so we have room for the multiplication - // each 32 bits will be in the form 0x00AA00AA - // with A being the 1 - alpha - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); - alphaChannel = _mm_sub_epi16(one, alphaChannel); - - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); - __m128i destMultipliedByOneMinusAlpha; - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); - - // result = s + d * (1-alpha) - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); - _mm_storeu_si128((__m128i *)&dst[x], result); - } - } - for (; x<w; ++x) { - uint s = src[x]; - if (s >= 0xff000000) - dst[x] = s; - else if (s != 0) - dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); - } + BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask); dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } @@ -189,31 +242,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); for (int y = 0; y < h; ++y) { - int x = 0; - for (; x < w-3; x += 4) { - __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { - BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); - - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); - alphaChannel = _mm_sub_epi16(one, alphaChannel); - - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); - __m128i destMultipliedByOneMinusAlpha; - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); - - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); - _mm_storeu_si128((__m128i *)&dst[x], result); - } - } - for (; x<w; ++x) { - quint32 s = src[x]; - if (s != 0) { - s = BYTE_MUL(s, const_alpha); - dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); - } - } + BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector) dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } @@ -232,7 +261,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, int const_alpha) { const quint32 *src = (const quint32 *) srcPixels; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; if (const_alpha != 256) { if (const_alpha != 0) { const __m128i nullVector = _mm_set1_epi32(0); @@ -268,6 +297,27 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, } } +void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) +{ + Q_ASSERT(const_alpha >= 0); + Q_ASSERT(const_alpha < 256); + + const quint32 *src = (const quint32 *) srcPixels; + quint32 *dst = (quint32 *) destPixels; + + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i one = _mm_set1_epi16(0xff); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + if (const_alpha == 255) { + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask); + } else { + const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); + BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector); + } +} + void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) { if (count < 7) { @@ -312,6 +362,34 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) } } +void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + qt_memfill32_sse2(destPixels, color, length); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + quint32 *dst = (quint32 *) destPixels; + const __m128i colorVector = _mm_set1_epi32(color); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half); + dstVector = _mm_add_epi8(colorVector, dstVector); + _mm_storeu_si128((__m128i *)&dst[x], dstVector); + } + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) { if (count < 3) { diff --git a/src/gui/painting/qgraphicssystem.cpp b/src/gui/painting/qgraphicssystem.cpp index bd9e7fc..f06e309 100644 --- a/src/gui/painting/qgraphicssystem.cpp +++ b/src/gui/painting/qgraphicssystem.cpp @@ -79,5 +79,9 @@ QPixmapData *QGraphicsSystem::createDefaultPixmapData(QPixmapData::PixelType typ return 0; } +QPixmapData *QGraphicsSystem::createPixmapData(QPixmapData *origin) +{ + return createPixmapData(origin->pixelType()); +} QT_END_NAMESPACE diff --git a/src/gui/painting/qgraphicssystem_p.h b/src/gui/painting/qgraphicssystem_p.h index ddca788..1211505 100644 --- a/src/gui/painting/qgraphicssystem_p.h +++ b/src/gui/painting/qgraphicssystem_p.h @@ -64,6 +64,7 @@ class Q_GUI_EXPORT QGraphicsSystem { public: virtual QPixmapData *createPixmapData(QPixmapData::PixelType type) const = 0; + virtual QPixmapData *createPixmapData(QPixmapData *origin); virtual QWindowSurface *createWindowSurface(QWidget *widget) const = 0; virtual ~QGraphicsSystem() = 0; diff --git a/src/gui/painting/qgraphicssystem_runtime.cpp b/src/gui/painting/qgraphicssystem_runtime.cpp index 32a8578..1c3ae10 100644 --- a/src/gui/painting/qgraphicssystem_runtime.cpp +++ b/src/gui/painting/qgraphicssystem_runtime.cpp @@ -416,7 +416,7 @@ void QRuntimeGraphicsSystem::setGraphicsSystem(const QString &name) for (int i = 0; i < m_pixmapDatas.size(); ++i) { QRuntimePixmapData *proxy = m_pixmapDatas.at(i); - QPixmapData *newData = m_graphicsSystem->createPixmapData(proxy->m_data->pixelType()); + QPixmapData *newData = m_graphicsSystem->createPixmapData(proxy->m_data); // ### TODO Optimize. Openvg and s60raster graphics systems could switch internal ARGB32_PRE QImage buffers. newData->fromImage(proxy->m_data->toImage(), Qt::AutoColor | Qt::OrderedAlphaDither); delete proxy->m_data; diff --git a/src/gui/painting/qgraphicssystem_runtime_p.h b/src/gui/painting/qgraphicssystem_runtime_p.h index 101a8e7..7aab89c 100644 --- a/src/gui/painting/qgraphicssystem_runtime_p.h +++ b/src/gui/painting/qgraphicssystem_runtime_p.h @@ -61,7 +61,7 @@ QT_BEGIN_NAMESPACE class QRuntimeGraphicsSystem; -class QRuntimePixmapData : public QPixmapData { +class Q_GUI_EXPORT QRuntimePixmapData : public QPixmapData { public: QRuntimePixmapData(const QRuntimeGraphicsSystem *gs, PixelType type); ~QRuntimePixmapData(); diff --git a/src/gui/painting/qmemrotate.cpp b/src/gui/painting/qmemrotate.cpp index c37aa51..6888bb0 100644 --- a/src/gui/painting/qmemrotate.cpp +++ b/src/gui/painting/qmemrotate.cpp @@ -594,4 +594,55 @@ void Q_GUI_EXPORT qt_memrotate90_gl(const quint32 *src, int srcWidth, int srcHei qt_memrotate90_template(src, srcWidth, srcHeight, srcStride, reinterpret_cast<qrgb_gl_rgba *>(dest), dstStride); } +void qt_memrotate90_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate90((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl); +} + +void qt_memrotate180_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate180((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl); +} + +void qt_memrotate270_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate270((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl); +} + +void qt_memrotate90_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate90((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); +} + +void qt_memrotate180_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate180((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); +} + +void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate270((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); +} + +MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3] = +// 90, 180, 270 +{ + { 0, 0, 0 }, // Format_Invalid, + { 0, 0, 0 }, // Format_Mono, + { 0, 0, 0 }, // Format_MonoLSB, + { 0, 0, 0 }, // Format_Indexed8, + { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGB32, + { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32, + { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32_Premultiplied, + { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // Format_RGB16, + { 0, 0, 0 }, // Format_ARGB8565_Premultiplied, + { 0, 0, 0 }, // Format_RGB666, + { 0, 0, 0 }, // Format_ARGB6666_Premultiplied, + { 0, 0, 0 }, // Format_RGB555, + { 0, 0, 0 }, // Format_ARGB8555_Premultiplied, + { 0, 0, 0 }, // Format_RGB888, + { 0, 0, 0 }, // Format_RGB444, + { 0, 0, 0 } // Format_ARGB4444_Premultiplied, +}; + QT_END_NAMESPACE diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index a212718..09a87aa 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -2419,7 +2419,9 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons drawImage(r, image, sr); } } else { - const QImage image = pixmap.toImage(); + QRect clippedSource = sr.toAlignedRect().intersected(pixmap.rect()); + const QImage image = pd->toImage(clippedSource); + QRectF translatedSource = sr.translated(-clippedSource.topLeft()); if (image.depth() == 1) { Q_D(QRasterPaintEngine); QRasterPaintEngineState *s = state(); @@ -2430,10 +2432,10 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons drawBitmap(r.topLeft() + QPointF(s->matrix.dx(), s->matrix.dy()), image, &s->penData); return; } else { - drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), sr); + drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), translatedSource); } } else { - drawImage(r, image, sr); + drawImage(r, image, translatedSource); } } } @@ -2551,23 +2553,6 @@ namespace { return NoRotation; } - template <typename T> void memRotate(RotationType type, const T *srcBase, int w, int h, int sbpl, T *dstBase, int dbpl) - { - switch (type) { - case Rotation90: - qt_memrotate90(srcBase, w, h, sbpl, dstBase, dbpl); - break; - case Rotation180: - qt_memrotate180(srcBase, w, h, sbpl, dstBase, dbpl); - break; - case Rotation270: - qt_memrotate270(srcBase, w, h, sbpl, dstBase, dbpl); - break; - case NoRotation: - break; - } - } - inline bool isPixelAligned(const QRectF &rect) { return QRectF(rect.toRect()) == rect; } @@ -2648,7 +2633,7 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe { RotationType rotationType = qRotationType(s->matrix); - if (rotationType != NoRotation && img.rect().contains(sr.toAlignedRect())) { + if (rotationType != NoRotation && qMemRotateFunctions[d->rasterBuffer->format][rotationType] && img.rect().contains(sr.toAlignedRect())) { QRectF transformedTargetRect = s->matrix.mapRect(r); if ((!(s->renderHints & QPainter::SmoothPixmapTransform) && !(s->renderHints & QPainter::Antialiasing)) @@ -2676,10 +2661,7 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe uint cw = clippedSourceRect.width(); uint ch = clippedSourceRect.height(); - if (d->rasterBuffer->format == QImage::Format_RGB16) - memRotate(rotationType, (quint16 *)srcBase, cw, ch, sbpl, (quint16 *)dstBase, dbpl); - else - memRotate(rotationType, (quint32 *)srcBase, cw, ch, sbpl, (quint32 *)dstBase, dbpl); + qMemRotateFunctions[d->rasterBuffer->format][rotationType](srcBase, cw, ch, sbpl, dstBase, dbpl); return; } @@ -2688,7 +2670,11 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe if (s->matrix.type() > QTransform::TxTranslate || stretch_sr) { - if (s->flags.fast_images) { + QRectF targetBounds = s->matrix.mapRect(r); + bool exceedsPrecision = targetBounds.width() > 0xffff + || targetBounds.height() > 0xffff; + + if (s->flags.fast_images && !exceedsPrecision) { if (s->matrix.type() > QTransform::TxScale) { SrcOverTransformFunc func = qTransformFunctions[d->rasterBuffer->format][img.format()]; if (func && (!clip || clip->hasRectClip)) { diff --git a/src/gui/painting/qpainter.cpp b/src/gui/painting/qpainter.cpp index 71bc990..2ea6673 100644 --- a/src/gui/painting/qpainter.cpp +++ b/src/gui/painting/qpainter.cpp @@ -5958,7 +5958,7 @@ void QPainter::drawText(const QPointF &p, const QString &str, int tf, int justif Q_ASSERT_X(false, Q_FUNC_INFO, "stringToCMap shouldn't fail twice"); } - QTextItemInt gf(glyphs, &d->state->font, fontEngine); + QTextItemInt gf(glyphs, &d->state->font, str.data(), len, fontEngine); drawTextItem(p, gf); return; } diff --git a/src/gui/painting/qpathclipper.cpp b/src/gui/painting/qpathclipper.cpp index 78553c9..a17b7c1 100644 --- a/src/gui/painting/qpathclipper.cpp +++ b/src/gui/painting/qpathclipper.cpp @@ -86,9 +86,11 @@ static qreal dot(const QPointF &a, const QPointF &b) return a.x() * b.x() + a.y() * b.y(); } -static QPointF normalize(const QPointF &p) +static void normalize(double &x, double &y) { - return p / qSqrt(p.x() * p.x() + p.y() * p.y()); + double reciprocal = 1 / qSqrt(x * x + y * y); + x *= reciprocal; + y *= reciprocal; } struct QIntersection @@ -1017,8 +1019,8 @@ qreal QWingedEdge::delta(int vertex, int a, int b) const const QPathEdge *ap = edge(a); const QPathEdge *bp = edge(b); - qreal a_angle = ap->angle; - qreal b_angle = bp->angle; + double a_angle = ap->angle; + double b_angle = bp->angle; if (vertex == ap->second) a_angle = ap->invAngle; @@ -1026,7 +1028,7 @@ qreal QWingedEdge::delta(int vertex, int a, int b) const if (vertex == bp->second) b_angle = bp->invAngle; - qreal result = b_angle - a_angle; + double result = b_angle - a_angle; if (result >= 128.) return result - 128.; @@ -1036,26 +1038,6 @@ qreal QWingedEdge::delta(int vertex, int a, int b) const return result; } -static inline QPointF tangentAt(const QWingedEdge &list, int vi, int ei) -{ - const QPathEdge *ep = list.edge(ei); - Q_ASSERT(ep); - - qreal sign; - - if (ep->first == vi) { - sign = 1; - } else { - sign = -1; - } - - const QPointF a = *list.vertex(ep->first); - const QPointF b = *list.vertex(ep->second); - QPointF normal = b - a; - - return normalize(sign * normal); -} - static inline QPointF midPoint(const QWingedEdge &list, int ei) { const QPathEdge *ep = list.edge(ei); @@ -1191,7 +1173,7 @@ static int commonEdge(const QWingedEdge &list, int a, int b) return -1; } -static qreal computeAngle(const QPointF &v) +static double computeAngle(const QPointF &v) { #if 1 if (v.x() == 0) { @@ -1200,15 +1182,17 @@ static qreal computeAngle(const QPointF &v) return v.x() <= 0 ? 32. : 96.; } - QPointF nv = normalize(v); - if (nv.y() < 0) { - if (nv.x() < 0) { // 0 - 32 - return -32. * nv.x(); + double vx = v.x(); + double vy = v.y(); + normalize(vx, vy); + if (vy < 0) { + if (vx < 0) { // 0 - 32 + return -32. * vx; } else { // 96 - 128 - return 128. - 32. * nv.x(); + return 128. - 32. * vx; } } else { // 32 - 96 - return 64. + 32 * nv.x(); + return 64. + 32. * vx; } #else // doesn't seem to be robust enough diff --git a/src/gui/painting/qpathclipper_p.h b/src/gui/painting/qpathclipper_p.h index fab618d..bdad4e1 100644 --- a/src/gui/painting/qpathclipper_p.h +++ b/src/gui/painting/qpathclipper_p.h @@ -148,8 +148,8 @@ public: int first; int second; - qreal angle; - qreal invAngle; + double angle; + double invAngle; int next(Traversal traversal, Direction direction) const; |