diff options
author | Qt Continuous Integration System <qt-info@nokia.com> | 2010-02-20 13:41:40 (GMT) |
---|---|---|
committer | Qt Continuous Integration System <qt-info@nokia.com> | 2010-02-20 13:41:40 (GMT) |
commit | 5c4234aa7499d879f50d1475aff8cce7d3ff6c11 (patch) | |
tree | 10c930f9d1526cb2bc779469e7b97cebc21619a0 /src/gui/painting | |
parent | d05f83919949fd9604e6d96858a8404c9a580def (diff) | |
parent | c9d73831910d4959e17899497335bd7fc4e08935 (diff) | |
download | Qt-5c4234aa7499d879f50d1475aff8cce7d3ff6c11.zip Qt-5c4234aa7499d879f50d1475aff8cce7d3ff6c11.tar.gz Qt-5c4234aa7499d879f50d1475aff8cce7d3ff6c11.tar.bz2 |
Merge branch 'qt-master-from-4.6' of scm.dev.nokia.troll.no:qt/qt-integration into master-integration
* 'qt-master-from-4.6' of scm.dev.nokia.troll.no:qt/qt-integration: (323 commits)
Doc: these files are NOT part of the test suite of the Qt toolkit
Autotest: make the licenseCheck test also check .qdoc files
use egl properties when creating surfaces on symbian
work around the current include file structure on Symbian^3
Get rid of the dependency on the Symbian app layer
Fix incorrect license headers
Fix incorrect license headers
tst_qnetworkreply: Check if TCP/HTTP connection got re-used
Fix bad conflict resolution from 4baa9dfb5273d7b501dcb3f456983262c53cc8d1
Fix building in a namespace on Windows
Supressed Icon sizes on QPushButton in QS60Style
Ensure that posted events are sent on Windows
Fixed off-by-one blending errors in the NEON drawhelper code.
Fixed libstdcpp.dll version autodetection for Symbian
Fixed linkage failure when building qmake on Unix platforms
Factored epocRoot implementation out of qmake
Factored readRegistryKey implementation out of qmake
Cetest extensions for Windows Mobile device power operations.
Remote lib extensions for Windows Mobile device power operations.
ColorDialog is always shown as stripped-down version (for QVGA)
...
Diffstat (limited to 'src/gui/painting')
-rw-r--r-- | src/gui/painting/qcups.cpp | 4 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 49 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 130 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_sse2.cpp | 211 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_x86_p.h | 8 | ||||
-rw-r--r-- | src/gui/painting/qpaintengine_raster.cpp | 18 | ||||
-rw-r--r-- | src/gui/painting/qpainter.cpp | 7 |
7 files changed, 327 insertions, 100 deletions
diff --git a/src/gui/painting/qcups.cpp b/src/gui/painting/qcups.cpp index 7903762..ac41692 100644 --- a/src/gui/painting/qcups.cpp +++ b/src/gui/painting/qcups.cpp @@ -342,7 +342,9 @@ bool QCUPSSupport::printerHasPPD(const char *printerName) { if (!isAvailable()) return false; - return _cupsGetPPD(printerName) != 0; + const char *ppdFile = _cupsGetPPD(printerName); + unlink(ppdFile); + return (ppdFile != 0); } QString QCUPSSupport::unicodeString(const char *s) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 7a3da20..070491d 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -8092,20 +8092,43 @@ void qInitDrawhelperAsm() qDrawHelper[QImage::Format_ARGB32_Premultiplied].blendColor = qt_blend_color_argb_sse3dnow; } #endif // 3DNOW - extern void qt_blend_rgb32_on_rgb32_sse(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha); - extern void qt_blend_argb32_on_argb32_sse(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - int w, int h, - int const_alpha); - qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse; - qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse; - qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse; - qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse; - } + +#ifdef QT_HAVE_SSE2 + if (features & SSE2) { + extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); + extern void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); + + + qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; + qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; + qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2; + qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2; + } else +#endif + { + extern void qt_blend_rgb32_on_rgb32_sse(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); + extern void qt_blend_argb32_on_argb32_sse(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); + + + qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse; + qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse; + qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse; + qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse; + } +} #endif // SSE #ifdef QT_HAVE_IWMMXT diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index 25860a0..77c5202 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -48,43 +48,43 @@ QT_BEGIN_NAMESPACE -static inline int16x8_t qvdiv_255_s16(int16x8_t x, int16x8_t half) +static inline uint16x8_t qvdiv_255_u16(uint16x8_t x, uint16x8_t half) { // result = (x + (x >> 8) + 0x80) >> 8 - const int16x8_t temp = vshrq_n_s16(x, 8); // x >> 8 - const int16x8_t sum_part = vaddq_s16(x, half); // x + 0x80 - const int16x8_t sum = vaddq_s16(temp, sum_part); + const uint16x8_t temp = vshrq_n_u16(x, 8); // x >> 8 + const uint16x8_t sum_part = vaddq_u16(x, half); // x + 0x80 + const uint16x8_t sum = vaddq_u16(temp, sum_part); - return vreinterpretq_s16_u16(vshrq_n_u16(vreinterpretq_u16_s16(sum), 8)); + return vshrq_n_u16(sum, 8); } -static inline int16x8_t qvbyte_mul_s16(int16x8_t x, int16x8_t alpha, int16x8_t half) +static inline uint16x8_t qvbyte_mul_u16(uint16x8_t x, uint16x8_t alpha, uint16x8_t half) { // t = qRound(x * alpha / 255.0) - const int16x8_t t = vmulq_s16(x, alpha); // t - return qvdiv_255_s16(t, half); + const uint16x8_t t = vmulq_u16(x, alpha); // t + return qvdiv_255_u16(t, half); } -static inline int16x8_t qvinterpolate_pixel_255(int16x8_t x, int16x8_t a, int16x8_t y, int16x8_t b, int16x8_t half) +static inline uint16x8_t qvinterpolate_pixel_255(uint16x8_t x, uint16x8_t a, uint16x8_t y, uint16x8_t b, uint16x8_t half) { // t = x * a + y * b - const int16x8_t ta = vmulq_s16(x, a); - const int16x8_t tb = vmulq_s16(y, b); + const uint16x8_t ta = vmulq_u16(x, a); + const uint16x8_t tb = vmulq_u16(y, b); - return qvdiv_255_s16(vaddq_s16(ta, tb), half); + return qvdiv_255_u16(vaddq_u16(ta, tb), half); } -static inline int16x8_t qvsource_over_s16(int16x8_t src16, int16x8_t dst16, int16x8_t half, int16x8_t full) +static inline uint16x8_t qvsource_over_u16(uint16x8_t src16, uint16x8_t dst16, uint16x8_t half, uint16x8_t full) { - const int16x4_t alpha16_high = vdup_lane_s16(vget_high_s16(src16), 3); - const int16x4_t alpha16_low = vdup_lane_s16(vget_low_s16(src16), 3); + const uint16x4_t alpha16_high = vdup_lane_u16(vget_high_u16(src16), 3); + const uint16x4_t alpha16_low = vdup_lane_u16(vget_low_u16(src16), 3); - const int16x8_t alpha16 = vsubq_s16(full, vcombine_s16(alpha16_low, alpha16_high)); + const uint16x8_t alpha16 = vsubq_u16(full, vcombine_u16(alpha16_low, alpha16_high)); - return vaddq_s16(src16, qvbyte_mul_s16(dst16, alpha16, half)); + return vaddq_u16(src16, qvbyte_mul_u16(dst16, alpha16, half)); } void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, @@ -94,21 +94,21 @@ void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, { const uint *src = (const uint *) srcPixels; uint *dst = (uint *) destPixels; - int16x8_t half = vdupq_n_s16(0x80); - int16x8_t full = vdupq_n_s16(0xff); + uint16x8_t half = vdupq_n_u16(0x80); + uint16x8_t full = vdupq_n_u16(0xff); if (const_alpha == 256) { for (int y = 0; y < h; ++y) { int x = 0; for (; x < w-3; x += 4) { - int32x4_t src32 = vld1q_s32((int32_t *)&src[x]); + uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]); if ((src[x] & src[x+1] & src[x+2] & src[x+3]) >= 0xff000000) { // all opaque - vst1q_s32((int32_t *)&dst[x], src32); + vst1q_u32((uint32_t *)&dst[x], src32); } else if (src[x] | src[x+1] | src[x+2] | src[x+3]) { - int32x4_t dst32 = vld1q_s32((int32_t *)&dst[x]); + uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]); - const uint8x16_t src8 = vreinterpretq_u8_s32(src32); - const uint8x16_t dst8 = vreinterpretq_u8_s32(dst32); + const uint8x16_t src8 = vreinterpretq_u8_u32(src32); + const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32); const uint8x8_t src8_low = vget_low_u8(src8); const uint8x8_t dst8_low = vget_low_u8(dst8); @@ -116,19 +116,19 @@ void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, const uint8x8_t src8_high = vget_high_u8(src8); const uint8x8_t dst8_high = vget_high_u8(dst8); - const int16x8_t src16_low = vreinterpretq_s16_u16(vmovl_u8(src8_low)); - const int16x8_t dst16_low = vreinterpretq_s16_u16(vmovl_u8(dst8_low)); + const uint16x8_t src16_low = vmovl_u8(src8_low); + const uint16x8_t dst16_low = vmovl_u8(dst8_low); - const int16x8_t src16_high = vreinterpretq_s16_u16(vmovl_u8(src8_high)); - const int16x8_t dst16_high = vreinterpretq_s16_u16(vmovl_u8(dst8_high)); + const uint16x8_t src16_high = vmovl_u8(src8_high); + const uint16x8_t dst16_high = vmovl_u8(dst8_high); - const int16x8_t result16_low = qvsource_over_s16(src16_low, dst16_low, half, full); - const int16x8_t result16_high = qvsource_over_s16(src16_high, dst16_high, half, full); + const uint16x8_t result16_low = qvsource_over_u16(src16_low, dst16_low, half, full); + const uint16x8_t result16_high = qvsource_over_u16(src16_high, dst16_high, half, full); - const int32x2_t result32_low = vreinterpret_s32_s8(vmovn_s16(result16_low)); - const int32x2_t result32_high = vreinterpret_s32_s8(vmovn_s16(result16_high)); + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); - vst1q_s32((int32_t *)&dst[x], vcombine_s32(result32_low, result32_high)); + vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high)); } } for (; x<w; ++x) { @@ -143,16 +143,16 @@ void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, } } else if (const_alpha != 0) { const_alpha = (const_alpha * 255) >> 8; - int16x8_t const_alpha16 = vdupq_n_s16(const_alpha); + uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha); for (int y = 0; y < h; ++y) { int x = 0; for (; x < w-3; x += 4) { if (src[x] | src[x+1] | src[x+2] | src[x+3]) { - int32x4_t src32 = vld1q_s32((int32_t *)&src[x]); - int32x4_t dst32 = vld1q_s32((int32_t *)&dst[x]); + uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]); + uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]); - const uint8x16_t src8 = vreinterpretq_u8_s32(src32); - const uint8x16_t dst8 = vreinterpretq_u8_s32(dst32); + const uint8x16_t src8 = vreinterpretq_u8_u32(src32); + const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32); const uint8x8_t src8_low = vget_low_u8(src8); const uint8x8_t dst8_low = vget_low_u8(dst8); @@ -160,22 +160,22 @@ void qt_blend_argb32_on_argb32_neon(uchar *destPixels, int dbpl, const uint8x8_t src8_high = vget_high_u8(src8); const uint8x8_t dst8_high = vget_high_u8(dst8); - const int16x8_t src16_low = vreinterpretq_s16_u16(vmovl_u8(src8_low)); - const int16x8_t dst16_low = vreinterpretq_s16_u16(vmovl_u8(dst8_low)); + const uint16x8_t src16_low = vmovl_u8(src8_low); + const uint16x8_t dst16_low = vmovl_u8(dst8_low); - const int16x8_t src16_high = vreinterpretq_s16_u16(vmovl_u8(src8_high)); - const int16x8_t dst16_high = vreinterpretq_s16_u16(vmovl_u8(dst8_high)); + const uint16x8_t src16_high = vmovl_u8(src8_high); + const uint16x8_t dst16_high = vmovl_u8(dst8_high); - const int16x8_t srcalpha16_low = qvbyte_mul_s16(src16_low, const_alpha16, half); - const int16x8_t srcalpha16_high = qvbyte_mul_s16(src16_high, const_alpha16, half); + const uint16x8_t srcalpha16_low = qvbyte_mul_u16(src16_low, const_alpha16, half); + const uint16x8_t srcalpha16_high = qvbyte_mul_u16(src16_high, const_alpha16, half); - const int16x8_t result16_low = qvsource_over_s16(srcalpha16_low, dst16_low, half, full); - const int16x8_t result16_high = qvsource_over_s16(srcalpha16_high, dst16_high, half, full); + const uint16x8_t result16_low = qvsource_over_u16(srcalpha16_low, dst16_low, half, full); + const uint16x8_t result16_high = qvsource_over_u16(srcalpha16_high, dst16_high, half, full); - const int32x2_t result32_low = vreinterpret_s32_s8(vmovn_s16(result16_low)); - const int32x2_t result32_high = vreinterpret_s32_s8(vmovn_s16(result16_high)); + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); - vst1q_s32((int32_t *)&dst[x], vcombine_s32(result32_low, result32_high)); + vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high)); } } for (; x<w; ++x) { @@ -206,19 +206,19 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl, if (const_alpha != 0) { const uint *src = (const uint *) srcPixels; uint *dst = (uint *) destPixels; - int16x8_t half = vdupq_n_s16(0x80); + uint16x8_t half = vdupq_n_u16(0x80); const_alpha = (const_alpha * 255) >> 8; int one_minus_const_alpha = 255 - const_alpha; - int16x8_t const_alpha16 = vdupq_n_s16(const_alpha); - int16x8_t one_minus_const_alpha16 = vdupq_n_s16(255 - const_alpha); + uint16x8_t const_alpha16 = vdupq_n_u16(const_alpha); + uint16x8_t one_minus_const_alpha16 = vdupq_n_u16(255 - const_alpha); for (int y = 0; y < h; ++y) { int x = 0; for (; x < w-3; x += 4) { - int32x4_t src32 = vld1q_s32((int32_t *)&src[x]); - int32x4_t dst32 = vld1q_s32((int32_t *)&dst[x]); + uint32x4_t src32 = vld1q_u32((uint32_t *)&src[x]); + uint32x4_t dst32 = vld1q_u32((uint32_t *)&dst[x]); - const uint8x16_t src8 = vreinterpretq_u8_s32(src32); - const uint8x16_t dst8 = vreinterpretq_u8_s32(dst32); + const uint8x16_t src8 = vreinterpretq_u8_u32(src32); + const uint8x16_t dst8 = vreinterpretq_u8_u32(dst32); const uint8x8_t src8_low = vget_low_u8(src8); const uint8x8_t dst8_low = vget_low_u8(dst8); @@ -226,19 +226,19 @@ void qt_blend_rgb32_on_rgb32_neon(uchar *destPixels, int dbpl, const uint8x8_t src8_high = vget_high_u8(src8); const uint8x8_t dst8_high = vget_high_u8(dst8); - const int16x8_t src16_low = vreinterpretq_s16_u16(vmovl_u8(src8_low)); - const int16x8_t dst16_low = vreinterpretq_s16_u16(vmovl_u8(dst8_low)); + const uint16x8_t src16_low = vmovl_u8(src8_low); + const uint16x8_t dst16_low = vmovl_u8(dst8_low); - const int16x8_t src16_high = vreinterpretq_s16_u16(vmovl_u8(src8_high)); - const int16x8_t dst16_high = vreinterpretq_s16_u16(vmovl_u8(dst8_high)); + const uint16x8_t src16_high = vmovl_u8(src8_high); + const uint16x8_t dst16_high = vmovl_u8(dst8_high); - const int16x8_t result16_low = qvinterpolate_pixel_255(src16_low, const_alpha16, dst16_low, one_minus_const_alpha16, half); - const int16x8_t result16_high = qvinterpolate_pixel_255(src16_high, const_alpha16, dst16_high, one_minus_const_alpha16, half); + const uint16x8_t result16_low = qvinterpolate_pixel_255(src16_low, const_alpha16, dst16_low, one_minus_const_alpha16, half); + const uint16x8_t result16_high = qvinterpolate_pixel_255(src16_high, const_alpha16, dst16_high, one_minus_const_alpha16, half); - const int32x2_t result32_low = vreinterpret_s32_s8(vmovn_s16(result16_low)); - const int32x2_t result32_high = vreinterpret_s32_s8(vmovn_s16(result16_high)); + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); - vst1q_s32((int32_t *)&dst[x], vcombine_s32(result32_low, result32_high)); + vst1q_u32((uint32_t *)&dst[x], vcombine_u32(result32_low, result32_high)); } for (; x<w; ++x) { uint s = src[x]; diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index dd6fa1b..6ac64d3 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -57,6 +57,217 @@ QT_BEGIN_NAMESPACE +/* + * Multiply the components of pixelVector by alphaChannel + * Each 32bits components of alphaChannel must be in the form 0x00AA00AA + * colorMask must have 0x00ff00ff on each 32 bits component + * half must have the value 128 (0x80) for each 32 bits compnent + */ +#define BYTE_MUL_SSE2(result, pixelVector, alphaChannel, colorMask, half) \ +{ \ + /* 1. separate the colors in 2 vectors so each color is on 16 bits \ + (in order to be multiplied by the alpha \ + each 32 bit of dstVectorAG are in the form 0x00AA00GG \ + each 32 bit of dstVectorRB are in the form 0x00RR00BB */\ + __m128i pixelVectorAG = _mm_srli_epi16(pixelVector, 8); \ + __m128i pixelVectorRB = _mm_and_si128(pixelVector, colorMask); \ + \ + /* 2. multiply the vectors by the alpha channel */\ + pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); \ + pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); \ + \ + /* 3. devide by 255, that's the tricky part. \ + we do it like for BYTE_MUL(), with bit shift: X/255 ~= (X + X/256 + rounding)/256 */ \ + /** so first (X + X/256 + rounding) */\ + pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); \ + pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); \ + pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); \ + pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); \ + \ + /** second devide by 256 */\ + pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); \ + /** for AG, we could >> 8 to divide followed by << 8 to put the \ + bytes in the correct position. By masking instead, we execute \ + only one instruction */\ + pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); \ + \ + /* 4. combine the 2 pairs of colors */ \ + result = _mm_or_si128(pixelVectorAG, pixelVectorRB); \ +} + +/* + * Each 32bits components of alphaChannel must be in the form 0x00AA00AA + * oneMinusAlphaChannel must be 255 - alpha for each 32 bits component + * colorMask must have 0x00ff00ff on each 32 bits component + * half must have the value 128 (0x80) for each 32 bits compnent + */ +#define INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, alphaChannel, oneMinusAlphaChannel, colorMask, half) { \ + /* interpolate AG */\ + __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); \ + __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); \ + __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, alphaChannel); \ + __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusAlphaChannel); \ + __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); \ + finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); \ + finalAG = _mm_add_epi16(finalAG, half); \ + finalAG = _mm_andnot_si128(colorMask, finalAG); \ + \ + /* interpolate RB */\ + __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); \ + __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); \ + __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, alphaChannel); \ + __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusAlphaChannel); \ + __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); \ + finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); \ + finalRB = _mm_add_epi16(finalRB, half); \ + finalRB = _mm_srli_epi16(finalRB, 8); \ + \ + /* combine */\ + result = _mm_or_si128(finalAG, finalRB); \ +} + +void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha) +{ + const quint32 *src = (const quint32 *) srcPixels; + quint32 *dst = (uint *) destPixels; + if (const_alpha == 256) { + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i one = _mm_set1_epi16(0xff); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + for (int y = 0; y < h; ++y) { + int x = 0; + for (; x < w-3; x += 4) { + const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); + const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { + // all opaque + _mm_storeu_si128((__m128i *)&dst[x], srcVector); + } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { + // not fully transparent + // result = s + d * (1-alpha) + + // extract the alpha channel on 2 x 16 bits + // so we have room for the multiplication + // each 32 bits will be in the form 0x00AA00AA + // with A being the 1 - alpha + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); + alphaChannel = _mm_sub_epi16(one, alphaChannel); + + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + __m128i destMultipliedByOneMinusAlpha; + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); + + // result = s + d * (1-alpha) + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); + _mm_storeu_si128((__m128i *)&dst[x], result); + } + } + for (; x<w; ++x) { + uint s = src[x]; + if (s >= 0xff000000) + dst[x] = s; + else if (s != 0) + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); + } + dst = (quint32 *)(((uchar *) dst) + dbpl); + src = (const quint32 *)(((const uchar *) src) + sbpl); + } + } else if (const_alpha != 0) { + // dest = (s + d * sia) * ca + d * cia + // = s * ca + d * (sia * ca + cia) + // = s * ca + d * (1 - sa*ca) + const_alpha = (const_alpha * 255) >> 8; + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i one = _mm_set1_epi16(0xff); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); + for (int y = 0; y < h; ++y) { + int x = 0; + for (; x < w-3; x += 4) { + __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { + BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); + + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); + alphaChannel = _mm_sub_epi16(one, alphaChannel); + + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + __m128i destMultipliedByOneMinusAlpha; + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); + + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); + _mm_storeu_si128((__m128i *)&dst[x], result); + } + } + for (; x<w; ++x) { + quint32 s = src[x]; + if (s != 0) { + s = BYTE_MUL(s, const_alpha); + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); + } + } + dst = (quint32 *)(((uchar *) dst) + dbpl); + src = (const quint32 *)(((const uchar *) src) + sbpl); + } + } +} + +// qblendfunctions.cpp +void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); + +void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha) +{ + const quint32 *src = (const quint32 *) srcPixels; + quint32 *dst = (uint *) destPixels; + if (const_alpha != 256) { + if (const_alpha != 0) { + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + + const_alpha = (const_alpha * 255) >> 8; + int one_minus_const_alpha = 255 - const_alpha; + const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); + const __m128i oneMinusConstAlpha = _mm_set1_epi16(one_minus_const_alpha); + for (int y = 0; y < h; ++y) { + int x = 0; + for (; x < w-3; x += 4) { + __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + __m128i result; + INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, constAlphaVector, oneMinusConstAlpha, colorMask, half); + _mm_storeu_si128((__m128i *)&dst[x], result); + } + } + for (; x<w; ++x) { + quint32 s = src[x]; + s = BYTE_MUL(s, const_alpha); + dst[x] = INTERPOLATE_PIXEL_255(src[x], const_alpha, dst[x], one_minus_const_alpha); + } + dst = (quint32 *)(((uchar *) dst) + dbpl); + src = (const quint32 *)(((const uchar *) src) + sbpl); + } + } + } else { + qt_blend_rgb32_on_rgb32(destPixels, dbpl, srcPixels, sbpl, w, h, const_alpha); + } +} + void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) { if (count < 7) { diff --git a/src/gui/painting/qdrawhelper_x86_p.h b/src/gui/painting/qdrawhelper_x86_p.h index 30aadd0..d7282a7 100644 --- a/src/gui/painting/qdrawhelper_x86_p.h +++ b/src/gui/painting/qdrawhelper_x86_p.h @@ -114,6 +114,14 @@ void qt_bitmapblit32_sse2(QRasterBuffer *rasterBuffer, int x, int y, void qt_bitmapblit16_sse2(QRasterBuffer *rasterBuffer, int x, int y, quint32 color, const uchar *src, int width, int height, int stride); +void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); +void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + int w, int h, + int const_alpha); #endif // QT_HAVE_SSE2 #ifdef QT_HAVE_IWMMXT diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index 41c4f14..60265c5 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -101,10 +101,6 @@ #endif #include <limits.h> -#if defined(QT_NO_FPU) || (_MSC_VER >= 1300 && _MSC_VER < 1400) -# define FLOATING_POINT_BUGGY_OR_NO_FPU -#endif - QT_BEGIN_NAMESPACE extern bool qt_scaleForTransform(const QTransform &transform, qreal *scale); // qtransform.cpp @@ -3694,9 +3690,6 @@ void QRasterPaintEngine::drawEllipse(const QRectF &rect) if (((qpen_style(s->lastPen) == Qt::SolidLine && s->flags.fast_pen) || (qpen_style(s->lastPen) == Qt::NoPen && !s->flags.antialiased)) && qMax(rect.width(), rect.height()) < QT_RASTER_COORD_LIMIT -#ifdef FLOATING_POINT_BUGGY_OR_NO_FPU - && qMax(rect.width(), rect.height()) < 128 // integer math breakdown -#endif && s->matrix.type() <= QTransform::TxScale) // no shear { ensureBrush(); @@ -6069,15 +6062,9 @@ static void drawEllipse_midpoint_i(const QRect &rect, const QRect &clip, ProcessSpans pen_func, ProcessSpans brush_func, QSpanData *pen_data, QSpanData *brush_data) { -#ifdef FLOATING_POINT_BUGGY_OR_NO_FPU // no fpu, so use fixed point - const QFixed a = QFixed(rect.width()) >> 1; - const QFixed b = QFixed(rect.height()) >> 1; - QFixed d = b*b - (a*a*b) + ((a*a) >> 2); -#else const qreal a = qreal(rect.width()) / 2; const qreal b = qreal(rect.height()) / 2; qreal d = b*b - (a*a*b) + 0.25*a*a; -#endif int x = 0; int y = (rect.height() + 1) / 2; @@ -6100,12 +6087,7 @@ static void drawEllipse_midpoint_i(const QRect &rect, const QRect &clip, pen_func, brush_func, pen_data, brush_data); // region 2 -#ifdef FLOATING_POINT_BUGGY_OR_NO_FPU - d = b*b*(x + (QFixed(1) >> 1))*(x + (QFixed(1) >> 1)) - + a*a*((y - 1)*(y - 1) - b*b); -#else d = b*b*(x + 0.5)*(x + 0.5) + a*a*((y - 1)*(y - 1) - b*b); -#endif const int miny = rect.height() & 0x1; while (y > miny) { if (d < 0) { // select SE diff --git a/src/gui/painting/qpainter.cpp b/src/gui/painting/qpainter.cpp index 1b8b718..e69512d 100644 --- a/src/gui/painting/qpainter.cpp +++ b/src/gui/painting/qpainter.cpp @@ -710,13 +710,14 @@ void QPainterPrivate::updateEmulationSpecifier(QPainterState *s) bool penTextureAlpha = false; if (penBrush.style() == Qt::TexturePattern) penTextureAlpha = qHasPixmapTexture(penBrush) - ? penBrush.texture().hasAlpha() + ? (penBrush.texture().depth() > 1) && penBrush.texture().hasAlpha() : penBrush.textureImage().hasAlphaChannel(); bool brushTextureAlpha = false; - if (s->brush.style() == Qt::TexturePattern) + if (s->brush.style() == Qt::TexturePattern) { brushTextureAlpha = qHasPixmapTexture(s->brush) - ? s->brush.texture().hasAlpha() + ? (s->brush.texture().depth() > 1) && s->brush.texture().hasAlpha() : s->brush.textureImage().hasAlphaChannel(); + } if (((penBrush.style() == Qt::TexturePattern && penTextureAlpha) || (s->brush.style() == Qt::TexturePattern && brushTextureAlpha)) && !engine->hasFeature(QPaintEngine::MaskedBrush)) |