summaryrefslogtreecommitdiffstats
path: root/src/gui/painting
diff options
context:
space:
mode:
authorQt Continuous Integration System <qt-info@nokia.com>2010-06-24 04:32:39 (GMT)
committerQt Continuous Integration System <qt-info@nokia.com>2010-06-24 04:32:39 (GMT)
commit62a6660979b036d5ed32917d7b9ce86018150df7 (patch)
treeadd0e11490022e5222921bfc1fdb3c54a348205d /src/gui/painting
parente5722f539888913b9bea4f91db95f5e2c5fceed1 (diff)
parent9de452bba5b592402ced6f20fbdc6d0b5c075416 (diff)
downloadQt-62a6660979b036d5ed32917d7b9ce86018150df7.zip
Qt-62a6660979b036d5ed32917d7b9ce86018150df7.tar.gz
Qt-62a6660979b036d5ed32917d7b9ce86018150df7.tar.bz2
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-2 into 4.7-integration
* '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-2: Normalize integers when calling glVertexAttribPointer() Add an implementation of comp_func_solid_SourceOver_neon() with Neon. Fix the casts of qdrawhelper_sse2 Add a SSE2 implementation of comp_func_solid_SourceOver() Add a SSE2 version of comp_func_SourceOver() Fixed missing copy of raster pixmap data after change fb76a872e20bd. Fixed QPixmap::toImage() bug introduced in fb76a872e20bd. Optimized sub-rect copying / painting of QPixmaps. Fixed crash in the fast blend functions for raster
Diffstat (limited to 'src/gui/painting')
-rw-r--r--src/gui/painting/qdrawhelper.cpp11
-rw-r--r--src/gui/painting/qdrawhelper_neon.cpp43
-rw-r--r--src/gui/painting/qdrawhelper_neon_p.h2
-rw-r--r--src/gui/painting/qdrawhelper_sse2.cpp202
-rw-r--r--src/gui/painting/qpaintengine_raster.cpp14
5 files changed, 205 insertions, 67 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index bfa1136..5727b3c 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -7817,6 +7817,15 @@ void qInitDrawhelperAsm()
#ifdef QT_HAVE_SSE2
if (features & SSE2) {
+ extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels,
+ const uint *srcPixels,
+ int length,
+ uint const_alpha);
+ extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha);
+
+ functionForModeAsm[0] = comp_func_SourceOver_sse2;
+ functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2;
+
extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
@@ -7826,7 +7835,6 @@ void qInitDrawhelperAsm()
int w, int h,
int const_alpha);
-
qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2;
qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2;
@@ -7890,6 +7898,7 @@ void qInitDrawhelperAsm()
qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon;
functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon;
+ functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon;
destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon;
destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon;
}
diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp
index ee5f24a..3ce90d2 100644
--- a/src/gui/painting/qdrawhelper_neon.cpp
+++ b/src/gui/painting/qdrawhelper_neon.cpp
@@ -579,6 +579,49 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int
}
}
+void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha)
+{
+ if ((const_alpha & qAlpha(color)) == 255) {
+ QT_MEMFILL_UINT(destPixels, length, color);
+ } else {
+ if (const_alpha != 255)
+ color = BYTE_MUL(color, const_alpha);
+
+ const quint32 minusAlphaOfColor = qAlpha(~color);
+ int x = 0;
+
+ uint32_t *dst = (uint32_t *) destPixels;
+ const uint32x4_t colorVector = vdupq_n_u32(color);
+ uint16x8_t half = vdupq_n_u16(0x80);
+ const uint16x8_t minusAlphaOfColorVector = vdupq_n_u16(minusAlphaOfColor);
+
+ for (; x < length-3; x += 4) {
+ uint32x4_t dstVector = vld1q_u32(&dst[x]);
+
+ const uint8x16_t dst8 = vreinterpretq_u8_u32(dstVector);
+
+ const uint8x8_t dst8_low = vget_low_u8(dst8);
+ const uint8x8_t dst8_high = vget_high_u8(dst8);
+
+ const uint16x8_t dst16_low = vmovl_u8(dst8_low);
+ const uint16x8_t dst16_high = vmovl_u8(dst8_high);
+
+ const uint16x8_t result16_low = qvbyte_mul_u16(dst16_low, minusAlphaOfColorVector, half);
+ const uint16x8_t result16_high = qvbyte_mul_u16(dst16_high, minusAlphaOfColorVector, half);
+
+ const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low));
+ const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high));
+
+ uint32x4_t blendedPixels = vcombine_u32(result32_low, result32_high);
+ uint32x4_t colorPlusBlendedPixels = vaddq_u32(colorVector, blendedPixels);
+ vst1q_u32(&dst[x], colorPlusBlendedPixels);
+ }
+
+ for (;x < length; ++x)
+ destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
+ }
+}
+
QT_END_NAMESPACE
#endif // QT_HAVE_NEON
diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h
index d6a4509..c054a1e 100644
--- a/src/gui/painting/qdrawhelper_neon_p.h
+++ b/src/gui/painting/qdrawhelper_neon_p.h
@@ -127,6 +127,8 @@ uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer,
void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer,
int x, int y, const uint *buffer, int length);
+void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha);
+
#endif // QT_HAVE_NEON
QT_END_NAMESPACE
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index 6ac64d3..e2a69ec 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -126,13 +126,100 @@ QT_BEGIN_NAMESPACE
result = _mm_or_si128(finalAG, finalRB); \
}
+// Basically blend src over dst with the const alpha defined as constAlphaVector.
+// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as:
+//const __m128i nullVector = _mm_set1_epi32(0);
+//const __m128i half = _mm_set1_epi16(0x80);
+//const __m128i one = _mm_set1_epi16(0xff);
+//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
+//
+// The computation being done is:
+// result = s + d * (1-alpha)
+// with shortcuts if fully opaque or fully transparent.
+#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
+ int x = 0; \
+ for (; x < length-3; x += 4) { \
+ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
+ const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
+ if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
+ /* all opaque */ \
+ _mm_storeu_si128((__m128i *)&dst[x], srcVector); \
+ } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
+ /* not fully transparent */ \
+ /* extract the alpha channel on 2 x 16 bits */ \
+ /* so we have room for the multiplication */ \
+ /* each 32 bits will be in the form 0x00AA00AA */ \
+ /* with A being the 1 - alpha */ \
+ __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
+ alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
+ alphaChannel = _mm_sub_epi16(one, alphaChannel); \
+ \
+ const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
+ __m128i destMultipliedByOneMinusAlpha; \
+ BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
+ \
+ /* result = s + d * (1-alpha) */\
+ const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
+ _mm_storeu_si128((__m128i *)&dst[x], result); \
+ } \
+ } \
+ for (; x < length; ++x) { \
+ uint s = src[x]; \
+ if (s >= 0xff000000) \
+ dst[x] = s; \
+ else if (s != 0) \
+ dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
+ } \
+}
+
+// Basically blend src over dst with the const alpha defined as constAlphaVector.
+// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as:
+//const __m128i nullVector = _mm_set1_epi32(0);
+//const __m128i half = _mm_set1_epi16(0x80);
+//const __m128i one = _mm_set1_epi16(0xff);
+//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+//
+// The computation being done is:
+// dest = (s + d * sia) * ca + d * cia
+// = s * ca + d * (sia * ca + cia)
+// = s * ca + d * (1 - sa*ca)
+#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \
+{ \
+ int x = 0; \
+ for (; x < length-3; x += 4) { \
+ __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
+ if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \
+ BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \
+\
+ __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
+ alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
+ alphaChannel = _mm_sub_epi16(one, alphaChannel); \
+ \
+ const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
+ __m128i destMultipliedByOneMinusAlpha; \
+ BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
+ \
+ const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
+ _mm_storeu_si128((__m128i *)&dst[x], result); \
+ } \
+ } \
+ for (; x < length; ++x) { \
+ quint32 s = src[x]; \
+ if (s != 0) { \
+ s = BYTE_MUL(s, const_alpha); \
+ dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
+ } \
+ } \
+}
+
void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
const quint32 *src = (const quint32 *) srcPixels;
- quint32 *dst = (uint *) destPixels;
+ quint32 *dst = (quint32 *) destPixels;
if (const_alpha == 256) {
const __m128i alphaMask = _mm_set1_epi32(0xff000000);
const __m128i nullVector = _mm_set1_epi32(0);
@@ -140,41 +227,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
const __m128i one = _mm_set1_epi16(0xff);
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
for (int y = 0; y < h; ++y) {
- int x = 0;
- for (; x < w-3; x += 4) {
- const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
- const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask);
- if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) {
- // all opaque
- _mm_storeu_si128((__m128i *)&dst[x], srcVector);
- } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) {
- // not fully transparent
- // result = s + d * (1-alpha)
-
- // extract the alpha channel on 2 x 16 bits
- // so we have room for the multiplication
- // each 32 bits will be in the form 0x00AA00AA
- // with A being the 1 - alpha
- __m128i alphaChannel = _mm_srli_epi32(srcVector, 24);
- alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16));
- alphaChannel = _mm_sub_epi16(one, alphaChannel);
-
- const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]);
- __m128i destMultipliedByOneMinusAlpha;
- BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half);
-
- // result = s + d * (1-alpha)
- const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha);
- _mm_storeu_si128((__m128i *)&dst[x], result);
- }
- }
- for (; x<w; ++x) {
- uint s = src[x];
- if (s >= 0xff000000)
- dst[x] = s;
- else if (s != 0)
- dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
- }
+ BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
@@ -189,31 +242,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
for (int y = 0; y < h; ++y) {
- int x = 0;
- for (; x < w-3; x += 4) {
- __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]);
- if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) {
- BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half);
-
- __m128i alphaChannel = _mm_srli_epi32(srcVector, 24);
- alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16));
- alphaChannel = _mm_sub_epi16(one, alphaChannel);
-
- const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]);
- __m128i destMultipliedByOneMinusAlpha;
- BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half);
-
- const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha);
- _mm_storeu_si128((__m128i *)&dst[x], result);
- }
- }
- for (; x<w; ++x) {
- quint32 s = src[x];
- if (s != 0) {
- s = BYTE_MUL(s, const_alpha);
- dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s));
- }
- }
+ BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, w, nullVector, half, one, colorMask, constAlphaVector)
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
@@ -232,7 +261,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
int const_alpha)
{
const quint32 *src = (const quint32 *) srcPixels;
- quint32 *dst = (uint *) destPixels;
+ quint32 *dst = (quint32 *) destPixels;
if (const_alpha != 256) {
if (const_alpha != 0) {
const __m128i nullVector = _mm_set1_epi32(0);
@@ -268,6 +297,27 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl,
}
}
+void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha)
+{
+ Q_ASSERT(const_alpha > 0); // if const_alpha == 0, this should never be called
+ Q_ASSERT(const_alpha < 256);
+
+ const quint32 *src = (const quint32 *) srcPixels;
+ quint32 *dst = (quint32 *) destPixels;
+
+ const __m128i nullVector = _mm_set1_epi32(0);
+ const __m128i half = _mm_set1_epi16(0x80);
+ const __m128i one = _mm_set1_epi16(0xff);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ if (const_alpha == 255) {
+ const __m128i alphaMask = _mm_set1_epi32(0xff000000);
+ BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask);
+ } else {
+ const __m128i constAlphaVector = _mm_set1_epi16(const_alpha);
+ BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector);
+ }
+}
+
void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
{
if (count < 7) {
@@ -312,6 +362,34 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count)
}
}
+void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha)
+{
+ if ((const_alpha & qAlpha(color)) == 255) {
+ qt_memfill32_sse2(destPixels, length, color);
+ } else {
+ if (const_alpha != 255)
+ color = BYTE_MUL(color, const_alpha);
+
+ const quint32 minusAlphaOfColor = qAlpha(~color);
+ int x = 0;
+
+ quint32 *dst = (quint32 *) destPixels;
+ const __m128i colorVector = _mm_set1_epi32(color);
+ const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+ const __m128i half = _mm_set1_epi16(0x80);
+ const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor);
+
+ for (; x < length-3; x += 4) {
+ __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]);
+ BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half);
+ dstVector = _mm_add_epi8(colorVector, dstVector);
+ _mm_storeu_si128((__m128i *)&dst[x], dstVector);
+ }
+ for (;x < length; ++x)
+ destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor);
+ }
+}
+
void qt_memfill16_sse2(quint16 *dest, quint16 value, int count)
{
if (count < 3) {
diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp
index a212718..84b36c7 100644
--- a/src/gui/painting/qpaintengine_raster.cpp
+++ b/src/gui/painting/qpaintengine_raster.cpp
@@ -2419,7 +2419,9 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons
drawImage(r, image, sr);
}
} else {
- const QImage image = pixmap.toImage();
+ QRect clippedSource = sr.toAlignedRect().intersected(pixmap.rect());
+ const QImage image = pd->toImage(clippedSource);
+ QRectF translatedSource = sr.translated(-clippedSource.topLeft());
if (image.depth() == 1) {
Q_D(QRasterPaintEngine);
QRasterPaintEngineState *s = state();
@@ -2430,10 +2432,10 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons
drawBitmap(r.topLeft() + QPointF(s->matrix.dx(), s->matrix.dy()), image, &s->penData);
return;
} else {
- drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), sr);
+ drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), translatedSource);
}
} else {
- drawImage(r, image, sr);
+ drawImage(r, image, translatedSource);
}
}
}
@@ -2688,7 +2690,11 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe
if (s->matrix.type() > QTransform::TxTranslate || stretch_sr) {
- if (s->flags.fast_images) {
+ QRectF targetBounds = s->matrix.mapRect(r);
+ bool exceedsPrecision = targetBounds.width() > 0xffff
+ || targetBounds.height() > 0xffff;
+
+ if (s->flags.fast_images && !exceedsPrecision) {
if (s->matrix.type() > QTransform::TxScale) {
SrcOverTransformFunc func = qTransformFunctions[d->rasterBuffer->format][img.format()];
if (func && (!clip || clip->hasRectClip)) {