diff options
author | Qt Continuous Integration System <qt-info@nokia.com> | 2010-07-01 15:43:42 (GMT) |
---|---|---|
committer | Qt Continuous Integration System <qt-info@nokia.com> | 2010-07-01 15:43:42 (GMT) |
commit | e95d8b9bc027dee5cfd8a13afa0c9204607ad7f1 (patch) | |
tree | ac59ce25d31fe953355dca66421842af15b32b94 /src/gui/painting | |
parent | 405ae4ef5382cd8ee0adfbe45c59c8d92e3ffdf6 (diff) | |
parent | 71547238ea4391636e37f5cf89905433faeb32d1 (diff) | |
download | Qt-e95d8b9bc027dee5cfd8a13afa0c9204607ad7f1.zip Qt-e95d8b9bc027dee5cfd8a13afa0c9204607ad7f1.tar.gz Qt-e95d8b9bc027dee5cfd8a13afa0c9204607ad7f1.tar.bz2 |
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-2 into 4.7-integration
* '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-2: (51 commits)
Symbian on Linux: $QTDIR/bin is not necessarily in the path.
Updated Symbian def files with new EGL exports.
Added another missing EGL stub.
Fixed compilation on Symbian.
Added missing EGL stub function.
Got rid of unused variable compiler warning.
QGLWindowSurface support for partial updates via EGL_NOK_swap_region2
Adding func prototypes for EGL_NOK_swap_region2 extension.
Check for EGLSurface leak only when paint device is a QGLWidget.
Fixed bug in drawTiledPixmap when width of pixmap matches target rect.
Update def files for symbian
Improved performance of 16 bit memrotates using NEON instructions.
Use built-in iconv on Solaris if available
Export various symbols needed to make a custom GL graphicssystem.
Moc: fix compilation when templated types with multiple arguments are used.
Fixes the documentation of QGraphicsEffect::update().
support BGRA textures on SGX
Avoid calling time.elapsed() twice in abstract animation
Adds slowdownFactor to UnifiedTimer in abstract animation
Add the conversion in-place for QPixmap::fromImageReader() on raster.
...
Diffstat (limited to 'src/gui/painting')
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 17 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 187 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_asm.S | 105 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 5 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_p.h | 2 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 202 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem.cpp | 4 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_p.h | 1 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_runtime.cpp | 2 | ||||
-rw-r--r-- | src/gui/painting/qgraphicssystem_runtime_p.h | 2 | ||||
-rw-r--r-- | src/gui/painting/qmemrotate.cpp | 51 | ||||
-rw-r--r-- | src/gui/painting/qpaintengine_raster.cpp | 38 | ||||
-rw-r--r-- | src/gui/painting/qpainter.cpp | 2 | ||||
-rw-r--r-- | src/gui/painting/qpathclipper.cpp | 48 | ||||
-rw-r--r-- | src/gui/painting/qpathclipper_p.h | 4 |
15 files changed, 543 insertions, 127 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index bfa1136..ca9556b 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -5014,7 +5014,8 @@ Q_STATIC_TEMPLATE_FUNCTION void blendTiled(int count, const QSpan *spans, void * length -= copy_image_width; copy_image_width *= 2; } - qt_memconvert(dest, src, length); + if (length > 0) + qt_memconvert(dest, src, length); } else { while (length) { int l = qMin(image_width - sx, length); @@ -7817,6 +7818,15 @@ void qInitDrawhelperAsm() #ifdef QT_HAVE_SSE2 if (features & SSE2) { + extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, + const uint *srcPixels, + int length, + uint const_alpha); + extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha); + + functionForModeAsm[0] = comp_func_SourceOver_sse2; + functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2; + extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -7826,7 +7836,6 @@ void qInitDrawhelperAsm() int w, int h, int const_alpha); - qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2; @@ -7890,8 +7899,12 @@ void qInitDrawhelperAsm() qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon; functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon; + functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon; destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; + + qMemRotateFunctions[QImage::Format_RGB16][0] = qt_memrotate90_16_neon; + qMemRotateFunctions[QImage::Format_RGB16][2] = qt_memrotate270_16_neon; } #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index ee5f24a..03fe075 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -579,6 +579,193 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int } } +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + QT_MEMFILL_UINT(destPixels, length, color); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + uint32_t *dst = (uint32_t *) destPixels; + const uint32x4_t colorVector = vdupq_n_u32(color); + uint16x8_t half = vdupq_n_u16(0x80); + const uint16x8_t minusAlphaOfColorVector = vdupq_n_u16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + uint32x4_t dstVector = vld1q_u32(&dst[x]); + + const uint8x16_t dst8 = vreinterpretq_u8_u32(dstVector); + + const uint8x8_t dst8_low = vget_low_u8(dst8); + const uint8x8_t dst8_high = vget_high_u8(dst8); + + const uint16x8_t dst16_low = vmovl_u8(dst8_low); + const uint16x8_t dst16_high = vmovl_u8(dst8_high); + + const uint16x8_t result16_low = qvbyte_mul_u16(dst16_low, minusAlphaOfColorVector, half); + const uint16x8_t result16_high = qvbyte_mul_u16(dst16_high, minusAlphaOfColorVector, half); + + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); + + uint32x4_t blendedPixels = vcombine_u32(result32_low, result32_high); + uint32x4_t colorPlusBlendedPixels = vaddq_u32(colorVector, blendedPixels); + vst1q_u32(&dst[x], colorPlusBlendedPixels); + } + + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + +static const int tileSize = 32; + +extern "C" void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count); + +void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sstride, uchar *destPixels, int dstride) +{ + const ushort *src = (const ushort *)srcPixels; + ushort *dest = (ushort *)destPixels; + + sstride /= sizeof(ushort); + dstride /= sizeof(ushort); + + const int pack = sizeof(quint32) / sizeof(ushort); + const int unaligned = + qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(ushort)), uint(h)); + const int restX = w % tileSize; + const int restY = (h - unaligned) % tileSize; + const int unoptimizedY = restY % pack; + const int numTilesX = w / tileSize + (restX > 0); + const int numTilesY = (h - unaligned) / tileSize + (restY >= pack); + + for (int tx = 0; tx < numTilesX; ++tx) { + const int startx = w - tx * tileSize - 1; + const int stopx = qMax(startx - tileSize, 0); + + if (unaligned) { + for (int x = startx; x >= stopx; --x) { + ushort *d = dest + (w - x - 1) * dstride; + for (int y = 0; y < unaligned; ++y) { + *d++ = src[y * sstride + x]; + } + } + } + + for (int ty = 0; ty < numTilesY; ++ty) { + const int starty = ty * tileSize + unaligned; + const int stopy = qMin(starty + tileSize, h - unoptimizedY); + + int x = startx; + // qt_rotate90_16_neon writes to eight rows, four pixels at a time + for (; x >= stopx + 7; x -= 8) { + ushort *d = dest + (w - x - 1) * dstride + starty; + const ushort *s = &src[starty * sstride + x - 7]; + qt_rotate90_16_neon(d, s, sstride * 2, dstride * 2, stopy - starty); + } + + for (; x >= stopx; --x) { + quint32 *d = reinterpret_cast<quint32*>(dest + (w - x - 1) * dstride + starty); + for (int y = starty; y < stopy; y += pack) { + quint32 c = src[y * sstride + x]; + for (int i = 1; i < pack; ++i) { + const int shift = (sizeof(int) * 8 / pack * i); + const ushort color = src[(y + i) * sstride + x]; + c |= color << shift; + } + *d++ = c; + } + } + } + + if (unoptimizedY) { + const int starty = h - unoptimizedY; + for (int x = startx; x >= stopx; --x) { + ushort *d = dest + (w - x - 1) * dstride + starty; + for (int y = starty; y < h; ++y) { + *d++ = src[y * sstride + x]; + } + } + } + } +} + +extern "C" void qt_rotate270_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count); + +void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, + int sstride, + uchar *destPixels, int dstride) +{ + const ushort *src = (const ushort *)srcPixels; + ushort *dest = (ushort *)destPixels; + + sstride /= sizeof(ushort); + dstride /= sizeof(ushort); + + const int pack = sizeof(quint32) / sizeof(ushort); + const int unaligned = + qMin(uint((long(dest) & (sizeof(quint32)-1)) / sizeof(ushort)), uint(h)); + const int restX = w % tileSize; + const int restY = (h - unaligned) % tileSize; + const int unoptimizedY = restY % pack; + const int numTilesX = w / tileSize + (restX > 0); + const int numTilesY = (h - unaligned) / tileSize + (restY >= pack); + + for (int tx = 0; tx < numTilesX; ++tx) { + const int startx = tx * tileSize; + const int stopx = qMin(startx + tileSize, w); + + if (unaligned) { + for (int x = startx; x < stopx; ++x) { + ushort *d = dest + x * dstride; + for (int y = h - 1; y >= h - unaligned; --y) { + *d++ = src[y * sstride + x]; + } + } + } + + for (int ty = 0; ty < numTilesY; ++ty) { + const int starty = h - 1 - unaligned - ty * tileSize; + const int stopy = qMax(starty - tileSize, unoptimizedY); + + int x = startx; + // qt_rotate90_16_neon writes to eight rows, four pixels at a time + for (; x < stopx - 7; x += 8) { + ushort *d = dest + x * dstride + h - 1 - starty; + const ushort *s = &src[starty * sstride + x]; + qt_rotate90_16_neon(d + 7 * dstride, s, -sstride * 2, -dstride * 2, starty - stopy); + } + + for (; x < stopx; ++x) { + quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride + + h - 1 - starty); + for (int y = starty; y > stopy; y -= pack) { + quint32 c = src[y * sstride + x]; + for (int i = 1; i < pack; ++i) { + const int shift = (sizeof(int) * 8 / pack * i); + const ushort color = src[(y - i) * sstride + x]; + c |= color << shift; + } + *d++ = c; + } + } + } + if (unoptimizedY) { + const int starty = unoptimizedY - 1; + for (int x = startx; x < stopx; ++x) { + ushort *d = dest + x * dstride + h - 1 - starty; + for (int y = starty; y >= 0; --y) { + *d++ = src[y * sstride + x]; + } + } + } + } +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_neon_asm.S b/src/gui/painting/qdrawhelper_neon_asm.S index 9992817..d9cdc36 100644 --- a/src/gui/painting/qdrawhelper_neon_asm.S +++ b/src/gui/painting/qdrawhelper_neon_asm.S @@ -190,3 +190,108 @@ blend_8_pixels_rgb16_on_rgb16_neon: bx lr .endfunc + +/* void qt_rotate90_16_neon(quint16 *dst, const quint16 *src, int sstride, int dstride, int count) */ + .func qt_rotate90_16_neon + .global qt_rotate90_16_neon + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden qt_rotate90_16_neon + .type qt_rotate90_16_neon, %function +#endif +qt_rotate90_16_neon: + push { r4-r11, lr } + ldr r5, [sp, #(9*4)] + + /* The preloads are the key to getting good performance */ + pld [r1] + + mov r4, r5, asr #2 + add r6, r0, r3 + add r7, r6, r3 + + add r8, r7, r3 + add r9, r8, r3 + + pld [r1, r2] + + add r10, r9, r3 + add r11, r10, r3 + + add r3, r3, r11 + and r5, r5, #3 + + pld [r1, r2, lsl #1] + + cmp r4, #0 + beq .rotate90_16_tail + +.rotate90_16_loop: + vld1.16 { q8 }, [r1], r2 + + pld [r1, r2, lsl #1] + + vld1.16 { q9 }, [r1], r2 + vld1.16 { q10 }, [r1], r2 + vld1.16 { q11 }, [r1], r2 + + pld [r1] + + /* Could have used four quad-word zips instead, + but those take three cycles as opposed to one. */ + vzip.16 d16, d20 + vzip.16 d17, d21 + + vzip.16 d18, d22 + + pld [r1, r2] + + vzip.16 d19, d23 + + vzip.16 d16, d18 + vzip.16 d17, d19 + + pld [r1, r2, lsl #1] + + vzip.16 d20, d22 + vzip.16 d21, d23 + + vst1.16 { d23 }, [r0]! + vst1.16 { d21 }, [r6]! + vst1.16 { d19 }, [r7]! + vst1.16 { d17 }, [r8]! + vst1.16 { d22 }, [r9]! + vst1.16 { d20 }, [r10]! + vst1.16 { d18 }, [r11]! + vst1.16 { d16 }, [r3]! + + sub r4, r4, #1 + cmp r4, #0 + bne .rotate90_16_loop + b .rotate90_16_tail + +.rotate90_16_tail_loop: + sub r5, r5, #2 + + vld1.16 { q8 }, [r1], r2 + vld1.16 { q9 }, [r1], r2 + + vzip.16 d16, d18 + vzip.16 d17, d19 + + vst1.32 { d19[1] }, [r0]! + vst1.32 { d19[0] }, [r6]! + vst1.32 { d17[1] }, [r7]! + vst1.32 { d17[0] }, [r8]! + vst1.32 { d18[1] }, [r9]! + vst1.32 { d18[0] }, [r10]! + vst1.32 { d16[1] }, [r11]! + vst1.32 { d16[0] }, [r3]! + +.rotate90_16_tail: + cmp r5, #0 + bgt .rotate90_16_tail_loop + + pop { r4-r11, pc } + + .endfunc diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index d6a4509..cd2dbfc 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -120,6 +120,9 @@ void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); +void qt_memrotate90_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); +void qt_memrotate270_16_neon(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); + uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, QRasterBuffer *rasterBuffer, int x, int y, int length); @@ -127,6 +130,8 @@ uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length); +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha); + #endif // QT_HAVE_NEON QT_END_NAMESPACE diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index acf765c..97c78bb 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -152,6 +152,7 @@ typedef void (*SrcOverTransformFunc)(uchar *destPixels, int dbpl, const QTransform &targetRectTransform, int const_alpha); +typedef void (*MemRotateFunc)(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl); struct DrawHelper { ProcessSpans blendColor; @@ -165,6 +166,7 @@ struct DrawHelper { extern SrcOverBlendFunc qBlendFunctions[QImage::NImageFormats][QImage::NImageFormats]; extern SrcOverScaleFunc qScaleFunctions[QImage::NImageFormats][QImage::NImageFormats]; extern SrcOverTransformFunc qTransformFunctions[QImage::NImageFormats][QImage::NImageFormats]; +extern MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3]; extern DrawHelper qDrawHelper[QImage::NImageFormats]; diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 6ac64d3..6cd8688 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -126,13 +126,100 @@ QT_BEGIN_NAMESPACE result = _mm_or_si128(finalAG, finalRB); \ } +// Basically blend src over dst with the const alpha defined as constAlphaVector. +// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as: +//const __m128i nullVector = _mm_set1_epi32(0); +//const __m128i half = _mm_set1_epi16(0x80); +//const __m128i one = _mm_set1_epi16(0xff); +//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); +//const __m128i alphaMask = _mm_set1_epi32(0xff000000); +// +// The computation being done is: +// result = s + d * (1-alpha) +// with shortcuts if fully opaque or fully transparent. +#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ + int x = 0; \ + for (; x < length-3; x += 4) { \ + const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ + const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ + /* all opaque */ \ + _mm_storeu_si128((__m128i *)&dst[x], srcVector); \ + } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ + /* not fully transparent */ \ + /* extract the alpha channel on 2 x 16 bits */ \ + /* so we have room for the multiplication */ \ + /* each 32 bits will be in the form 0x00AA00AA */ \ + /* with A being the 1 - alpha */ \ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + /* result = s + d * (1-alpha) */\ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_storeu_si128((__m128i *)&dst[x], result); \ + } \ + } \ + for (; x < length; ++x) { \ + uint s = src[x]; \ + if (s >= 0xff000000) \ + dst[x] = s; \ + else if (s != 0) \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ +} + +// Basically blend src over dst with the const alpha defined as constAlphaVector. +// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as: +//const __m128i nullVector = _mm_set1_epi32(0); +//const __m128i half = _mm_set1_epi16(0x80); +//const __m128i one = _mm_set1_epi16(0xff); +//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); +// +// The computation being done is: +// dest = (s + d * sia) * ca + d * cia +// = s * ca + d * (sia * ca + cia) +// = s * ca + d * (1 - sa*ca) +#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \ +{ \ + int x = 0; \ + for (; x < length-3; x += 4) { \ + __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \ + BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \ +\ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_storeu_si128((__m128i *)&dst[x], result); \ + } \ + } \ + for (; x < length; ++x) { \ + quint32 s = src[x]; \ + if (s != 0) { \ + s = BYTE_MUL(s, const_alpha); \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ + } \ +} + void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, int const_alpha) { const quint32 *src = (const quint32 *) srcPixels; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; if (const_alpha == 256) { const __m128i alphaMask = _mm_set1_epi32(0xff000000); const __m128i nullVector = _mm_set1_epi32(0); @@ -140,41 +227,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const __m128i one = _mm_set1_epi16(0xff); const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); for (int y = 0; y < h; ++y) { - int x = 0; - for (; x < w-3; x += 4) { - const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); - const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { - // all opaque - _mm_storeu_si128((__m128i *)&dst[x], srcVector); - } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { - // not fully transparent - // result = s + d * (1-alpha) - - // extract the alpha channel on 2 x 16 bits - // so we have room for the multiplication - // each 32 bits will be in the form 0x00AA00AA - // with A being the 1 - alpha - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); - alphaChannel = _mm_sub_epi16(one, alphaChannel); - - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); - __m128i destMultipliedByOneMinusAlpha; - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); - - // result = s + d * (1-alpha) - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); - _mm_storeu_si128((__m128i *)&dst[x], result); - } - } - for (; x<w; ++x) { - uint s = src[x]; - if (s >= 0xff000000) - dst[x] = s; - else if (s != 0) - dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); - } + BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask); dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } @@ -189,31 +242,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); for (int y = 0; y < h; ++y) { - int x = 0; - for (; x < w-3; x += 4) { - __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { - BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); - - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); - alphaChannel = _mm_sub_epi16(one, alphaChannel); - - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); - __m128i destMultipliedByOneMinusAlpha; - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); - - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); - _mm_storeu_si128((__m128i *)&dst[x], result); - } - } - for (; x<w; ++x) { - quint32 s = src[x]; - if (s != 0) { - s = BYTE_MUL(s, const_alpha); - dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); - } - } + BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector) dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } @@ -232,7 +261,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, int const_alpha) { const quint32 *src = (const quint32 *) srcPixels; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; if (const_alpha != 256) { if (const_alpha != 0) { const __m128i nullVector = _mm_set1_epi32(0); @@ -268,6 +297,27 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, } } +void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) +{ + Q_ASSERT(const_alpha >= 0); + Q_ASSERT(const_alpha < 256); + + const quint32 *src = (const quint32 *) srcPixels; + quint32 *dst = (quint32 *) destPixels; + + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i one = _mm_set1_epi16(0xff); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + if (const_alpha == 255) { + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask); + } else { + const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); + BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector); + } +} + void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) { if (count < 7) { @@ -312,6 +362,34 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) } } +void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + qt_memfill32_sse2(destPixels, color, length); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + quint32 *dst = (quint32 *) destPixels; + const __m128i colorVector = _mm_set1_epi32(color); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half); + dstVector = _mm_add_epi8(colorVector, dstVector); + _mm_storeu_si128((__m128i *)&dst[x], dstVector); + } + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) { if (count < 3) { diff --git a/src/gui/painting/qgraphicssystem.cpp b/src/gui/painting/qgraphicssystem.cpp index 2ea3d33..69ce47e 100644 --- a/src/gui/painting/qgraphicssystem.cpp +++ b/src/gui/painting/qgraphicssystem.cpp @@ -79,5 +79,9 @@ QPixmapData *QGraphicsSystem::createDefaultPixmapData(QPixmapData::PixelType typ return 0; } +QPixmapData *QGraphicsSystem::createPixmapData(QPixmapData *origin) +{ + return createPixmapData(origin->pixelType()); +} QT_END_NAMESPACE diff --git a/src/gui/painting/qgraphicssystem_p.h b/src/gui/painting/qgraphicssystem_p.h index ddca788..1211505 100644 --- a/src/gui/painting/qgraphicssystem_p.h +++ b/src/gui/painting/qgraphicssystem_p.h @@ -64,6 +64,7 @@ class Q_GUI_EXPORT QGraphicsSystem { public: virtual QPixmapData *createPixmapData(QPixmapData::PixelType type) const = 0; + virtual QPixmapData *createPixmapData(QPixmapData *origin); virtual QWindowSurface *createWindowSurface(QWidget *widget) const = 0; virtual ~QGraphicsSystem() = 0; diff --git a/src/gui/painting/qgraphicssystem_runtime.cpp b/src/gui/painting/qgraphicssystem_runtime.cpp index 32a8578..1c3ae10 100644 --- a/src/gui/painting/qgraphicssystem_runtime.cpp +++ b/src/gui/painting/qgraphicssystem_runtime.cpp @@ -416,7 +416,7 @@ void QRuntimeGraphicsSystem::setGraphicsSystem(const QString &name) for (int i = 0; i < m_pixmapDatas.size(); ++i) { QRuntimePixmapData *proxy = m_pixmapDatas.at(i); - QPixmapData *newData = m_graphicsSystem->createPixmapData(proxy->m_data->pixelType()); + QPixmapData *newData = m_graphicsSystem->createPixmapData(proxy->m_data); // ### TODO Optimize. Openvg and s60raster graphics systems could switch internal ARGB32_PRE QImage buffers. newData->fromImage(proxy->m_data->toImage(), Qt::AutoColor | Qt::OrderedAlphaDither); delete proxy->m_data; diff --git a/src/gui/painting/qgraphicssystem_runtime_p.h b/src/gui/painting/qgraphicssystem_runtime_p.h index 101a8e7..7aab89c 100644 --- a/src/gui/painting/qgraphicssystem_runtime_p.h +++ b/src/gui/painting/qgraphicssystem_runtime_p.h @@ -61,7 +61,7 @@ QT_BEGIN_NAMESPACE class QRuntimeGraphicsSystem; -class QRuntimePixmapData : public QPixmapData { +class Q_GUI_EXPORT QRuntimePixmapData : public QPixmapData { public: QRuntimePixmapData(const QRuntimeGraphicsSystem *gs, PixelType type); ~QRuntimePixmapData(); diff --git a/src/gui/painting/qmemrotate.cpp b/src/gui/painting/qmemrotate.cpp index c37aa51..6888bb0 100644 --- a/src/gui/painting/qmemrotate.cpp +++ b/src/gui/painting/qmemrotate.cpp @@ -594,4 +594,55 @@ void Q_GUI_EXPORT qt_memrotate90_gl(const quint32 *src, int srcWidth, int srcHei qt_memrotate90_template(src, srcWidth, srcHeight, srcStride, reinterpret_cast<qrgb_gl_rgba *>(dest), dstStride); } +void qt_memrotate90_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate90((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl); +} + +void qt_memrotate180_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate180((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl); +} + +void qt_memrotate270_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate270((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl); +} + +void qt_memrotate90_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate90((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); +} + +void qt_memrotate180_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate180((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); +} + +void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl) +{ + qt_memrotate270((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl); +} + +MemRotateFunc qMemRotateFunctions[QImage::NImageFormats][3] = +// 90, 180, 270 +{ + { 0, 0, 0 }, // Format_Invalid, + { 0, 0, 0 }, // Format_Mono, + { 0, 0, 0 }, // Format_MonoLSB, + { 0, 0, 0 }, // Format_Indexed8, + { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_RGB32, + { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32, + { qt_memrotate90_32, qt_memrotate180_32, qt_memrotate270_32 }, // Format_ARGB32_Premultiplied, + { qt_memrotate90_16, qt_memrotate180_16, qt_memrotate270_16 }, // Format_RGB16, + { 0, 0, 0 }, // Format_ARGB8565_Premultiplied, + { 0, 0, 0 }, // Format_RGB666, + { 0, 0, 0 }, // Format_ARGB6666_Premultiplied, + { 0, 0, 0 }, // Format_RGB555, + { 0, 0, 0 }, // Format_ARGB8555_Premultiplied, + { 0, 0, 0 }, // Format_RGB888, + { 0, 0, 0 }, // Format_RGB444, + { 0, 0, 0 } // Format_ARGB4444_Premultiplied, +}; + QT_END_NAMESPACE diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index a212718..09a87aa 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -2419,7 +2419,9 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons drawImage(r, image, sr); } } else { - const QImage image = pixmap.toImage(); + QRect clippedSource = sr.toAlignedRect().intersected(pixmap.rect()); + const QImage image = pd->toImage(clippedSource); + QRectF translatedSource = sr.translated(-clippedSource.topLeft()); if (image.depth() == 1) { Q_D(QRasterPaintEngine); QRasterPaintEngineState *s = state(); @@ -2430,10 +2432,10 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons drawBitmap(r.topLeft() + QPointF(s->matrix.dx(), s->matrix.dy()), image, &s->penData); return; } else { - drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), sr); + drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), translatedSource); } } else { - drawImage(r, image, sr); + drawImage(r, image, translatedSource); } } } @@ -2551,23 +2553,6 @@ namespace { return NoRotation; } - template <typename T> void memRotate(RotationType type, const T *srcBase, int w, int h, int sbpl, T *dstBase, int dbpl) - { - switch (type) { - case Rotation90: - qt_memrotate90(srcBase, w, h, sbpl, dstBase, dbpl); - break; - case Rotation180: - qt_memrotate180(srcBase, w, h, sbpl, dstBase, dbpl); - break; - case Rotation270: - qt_memrotate270(srcBase, w, h, sbpl, dstBase, dbpl); - break; - case NoRotation: - break; - } - } - inline bool isPixelAligned(const QRectF &rect) { return QRectF(rect.toRect()) == rect; } @@ -2648,7 +2633,7 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe { RotationType rotationType = qRotationType(s->matrix); - if (rotationType != NoRotation && img.rect().contains(sr.toAlignedRect())) { + if (rotationType != NoRotation && qMemRotateFunctions[d->rasterBuffer->format][rotationType] && img.rect().contains(sr.toAlignedRect())) { QRectF transformedTargetRect = s->matrix.mapRect(r); if ((!(s->renderHints & QPainter::SmoothPixmapTransform) && !(s->renderHints & QPainter::Antialiasing)) @@ -2676,10 +2661,7 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe uint cw = clippedSourceRect.width(); uint ch = clippedSourceRect.height(); - if (d->rasterBuffer->format == QImage::Format_RGB16) - memRotate(rotationType, (quint16 *)srcBase, cw, ch, sbpl, (quint16 *)dstBase, dbpl); - else - memRotate(rotationType, (quint32 *)srcBase, cw, ch, sbpl, (quint32 *)dstBase, dbpl); + qMemRotateFunctions[d->rasterBuffer->format][rotationType](srcBase, cw, ch, sbpl, dstBase, dbpl); return; } @@ -2688,7 +2670,11 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe if (s->matrix.type() > QTransform::TxTranslate || stretch_sr) { - if (s->flags.fast_images) { + QRectF targetBounds = s->matrix.mapRect(r); + bool exceedsPrecision = targetBounds.width() > 0xffff + || targetBounds.height() > 0xffff; + + if (s->flags.fast_images && !exceedsPrecision) { if (s->matrix.type() > QTransform::TxScale) { SrcOverTransformFunc func = qTransformFunctions[d->rasterBuffer->format][img.format()]; if (func && (!clip || clip->hasRectClip)) { diff --git a/src/gui/painting/qpainter.cpp b/src/gui/painting/qpainter.cpp index 71bc990..2ea6673 100644 --- a/src/gui/painting/qpainter.cpp +++ b/src/gui/painting/qpainter.cpp @@ -5958,7 +5958,7 @@ void QPainter::drawText(const QPointF &p, const QString &str, int tf, int justif Q_ASSERT_X(false, Q_FUNC_INFO, "stringToCMap shouldn't fail twice"); } - QTextItemInt gf(glyphs, &d->state->font, fontEngine); + QTextItemInt gf(glyphs, &d->state->font, str.data(), len, fontEngine); drawTextItem(p, gf); return; } diff --git a/src/gui/painting/qpathclipper.cpp b/src/gui/painting/qpathclipper.cpp index 78553c9..a17b7c1 100644 --- a/src/gui/painting/qpathclipper.cpp +++ b/src/gui/painting/qpathclipper.cpp @@ -86,9 +86,11 @@ static qreal dot(const QPointF &a, const QPointF &b) return a.x() * b.x() + a.y() * b.y(); } -static QPointF normalize(const QPointF &p) +static void normalize(double &x, double &y) { - return p / qSqrt(p.x() * p.x() + p.y() * p.y()); + double reciprocal = 1 / qSqrt(x * x + y * y); + x *= reciprocal; + y *= reciprocal; } struct QIntersection @@ -1017,8 +1019,8 @@ qreal QWingedEdge::delta(int vertex, int a, int b) const const QPathEdge *ap = edge(a); const QPathEdge *bp = edge(b); - qreal a_angle = ap->angle; - qreal b_angle = bp->angle; + double a_angle = ap->angle; + double b_angle = bp->angle; if (vertex == ap->second) a_angle = ap->invAngle; @@ -1026,7 +1028,7 @@ qreal QWingedEdge::delta(int vertex, int a, int b) const if (vertex == bp->second) b_angle = bp->invAngle; - qreal result = b_angle - a_angle; + double result = b_angle - a_angle; if (result >= 128.) return result - 128.; @@ -1036,26 +1038,6 @@ qreal QWingedEdge::delta(int vertex, int a, int b) const return result; } -static inline QPointF tangentAt(const QWingedEdge &list, int vi, int ei) -{ - const QPathEdge *ep = list.edge(ei); - Q_ASSERT(ep); - - qreal sign; - - if (ep->first == vi) { - sign = 1; - } else { - sign = -1; - } - - const QPointF a = *list.vertex(ep->first); - const QPointF b = *list.vertex(ep->second); - QPointF normal = b - a; - - return normalize(sign * normal); -} - static inline QPointF midPoint(const QWingedEdge &list, int ei) { const QPathEdge *ep = list.edge(ei); @@ -1191,7 +1173,7 @@ static int commonEdge(const QWingedEdge &list, int a, int b) return -1; } -static qreal computeAngle(const QPointF &v) +static double computeAngle(const QPointF &v) { #if 1 if (v.x() == 0) { @@ -1200,15 +1182,17 @@ static qreal computeAngle(const QPointF &v) return v.x() <= 0 ? 32. : 96.; } - QPointF nv = normalize(v); - if (nv.y() < 0) { - if (nv.x() < 0) { // 0 - 32 - return -32. * nv.x(); + double vx = v.x(); + double vy = v.y(); + normalize(vx, vy); + if (vy < 0) { + if (vx < 0) { // 0 - 32 + return -32. * vx; } else { // 96 - 128 - return 128. - 32. * nv.x(); + return 128. - 32. * vx; } } else { // 32 - 96 - return 64. + 32 * nv.x(); + return 64. + 32. * vx; } #else // doesn't seem to be robust enough diff --git a/src/gui/painting/qpathclipper_p.h b/src/gui/painting/qpathclipper_p.h index fab618d..bdad4e1 100644 --- a/src/gui/painting/qpathclipper_p.h +++ b/src/gui/painting/qpathclipper_p.h @@ -148,8 +148,8 @@ public: int first; int second; - qreal angle; - qreal invAngle; + double angle; + double invAngle; int next(Traversal traversal, Direction direction) const; |