From 08e2216a5157c5de30a65674051b08df6f2a1bf6 Mon Sep 17 00:00:00 2001 From: Gunnar Sletta Date: Tue, 22 Jun 2010 15:26:24 +0200 Subject: Fixed crash in the fast blend functions for raster The blend functions don't work when the scaling factor goes beyond 65536, so abort early. Strictly speaking the scale factor comes from targetWidth / sourceWidth, so this catches a bit more cases. Reviewed-by: Kim Task: http://bugreports.qt.nokia.com/browse/QTBUG-9437 --- src/gui/painting/qpaintengine_raster.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index a212718..94e5cbc 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -2688,7 +2688,11 @@ void QRasterPaintEngine::drawImage(const QRectF &r, const QImage &img, const QRe if (s->matrix.type() > QTransform::TxTranslate || stretch_sr) { - if (s->flags.fast_images) { + QRectF targetBounds = s->matrix.mapRect(r); + bool exceedsPrecision = targetBounds.width() > 0xffff + || targetBounds.height() > 0xffff; + + if (s->flags.fast_images && !exceedsPrecision) { if (s->matrix.type() > QTransform::TxScale) { SrcOverTransformFunc func = qTransformFunctions[d->rasterBuffer->format][img.format()]; if (func && (!clip || clip->hasRectClip)) { -- cgit v0.12 From fb76a872e20bd0df8e7bbe9c039b7f20423c6f12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B8dal?= Date: Tue, 22 Jun 2010 13:24:26 +0200 Subject: Optimized sub-rect copying / painting of QPixmaps. Prevented downloading of the whole XImage by introducing new QPixmapData::toImage() overload taking a sub-rect. Also avoid an additional copy by simply taking ownership of the XImage data when the XImage format matches the QImage format. Reviewed-by: Trond --- src/gui/image/qpixmap_raster.cpp | 14 +++ src/gui/image/qpixmap_raster_p.h | 1 + src/gui/image/qpixmap_x11.cpp | 172 ++++++++++++++++++++----------- src/gui/image/qpixmap_x11_p.h | 8 ++ src/gui/image/qpixmapdata.cpp | 10 +- src/gui/image/qpixmapdata_p.h | 1 + src/gui/painting/qpaintengine_raster.cpp | 8 +- 7 files changed, 152 insertions(+), 62 deletions(-) diff --git a/src/gui/image/qpixmap_raster.cpp b/src/gui/image/qpixmap_raster.cpp index 13c03a1..13e95c7 100644 --- a/src/gui/image/qpixmap_raster.cpp +++ b/src/gui/image/qpixmap_raster.cpp @@ -289,6 +289,20 @@ QImage QRasterPixmapData::toImage() const return image; } +QImage QRasterPixmapData::toImage(const QRect &rect) const +{ + if (rect.isNull()) + return image; + + QRect clipped = rect.intersected(QRect(0, 0, w, h)); + if (d % 8 == 0) + return QImage(image.scanLine(clipped.y()) + clipped.x() * (d / 8), + clipped.width(), clipped.height(), + image.bytesPerLine(), image.format()); + else + return image.copy(clipped); +} + void QRasterPixmapData::setAlphaChannel(const QPixmap &alphaChannel) { image.setAlphaChannel(alphaChannel.toImage()); diff --git a/src/gui/image/qpixmap_raster_p.h b/src/gui/image/qpixmap_raster_p.h index d7e3f85..42ceeca 100644 --- a/src/gui/image/qpixmap_raster_p.h +++ b/src/gui/image/qpixmap_raster_p.h @@ -81,6 +81,7 @@ public: bool hasAlphaChannel() const; void setAlphaChannel(const QPixmap &alphaChannel); QImage toImage() const; + QImage toImage(const QRect &rect) const; QPaintEngine* paintEngine() const; QImage* buffer(); diff --git a/src/gui/image/qpixmap_x11.cpp b/src/gui/image/qpixmap_x11.cpp index e8dc5ae..604dbf5 100644 --- a/src/gui/image/qpixmap_x11.cpp +++ b/src/gui/image/qpixmap_x11.cpp @@ -1458,6 +1458,95 @@ int QX11PixmapData::metric(QPaintDevice::PaintDeviceMetric metric) const } } +struct QXImageWrapper +{ + XImage *xi; +}; + +bool QX11PixmapData::canTakeQImageFromXImage(const QXImageWrapper &xiWrapper) const +{ + XImage *xi = xiWrapper.xi; + + // ARGB32_Premultiplied + if (picture && depth() == 32) + return true; + + Visual *visual = (Visual *)xinfo.visual(); + + // RGB32 + if (depth() == 24 && xi->bits_per_pixel == 32 && visual->red_mask == 0xff0000 + && visual->green_mask == 0xff00 && visual->blue_mask == 0xff) + return true; + + // RGB16 + if (depth() == 16 && xi->bits_per_pixel == 16 && visual->red_mask == 0xf800 + && visual->green_mask == 0x7e0 && visual->blue_mask == 0x1f) + return true; + + return false; +} + +QImage QX11PixmapData::takeQImageFromXImage(const QXImageWrapper &xiWrapper) const +{ + XImage *xi = xiWrapper.xi; + + QImage::Format format = QImage::Format_ARGB32_Premultiplied; + if (depth() == 24) + format = QImage::Format_RGB32; + else if (depth() == 16) + format = QImage::Format_RGB16; + + QImage image((uchar *)xi->data, xi->width, xi->height, xi->bytes_per_line, format); + // take ownership + image.data_ptr()->own_data = true; + xi->data = 0; + + // we may have to swap the byte order + if ((QSysInfo::ByteOrder == QSysInfo::LittleEndian && xi->byte_order == MSBFirst) + || (QSysInfo::ByteOrder == QSysInfo::BigEndian && xi->byte_order == LSBFirst)) + { + for (int i=0; i < image.height(); i++) { + if (depth() == 16) { + ushort *p = (ushort*)image.scanLine(i); + ushort *end = p + image.width(); + while (p < end) { + *p = ((*p << 8) & 0xff00) | ((*p >> 8) & 0x00ff); + p++; + } + } else { + uint *p = (uint*)image.scanLine(i); + uint *end = p + image.width(); + while (p < end) { + *p = ((*p << 24) & 0xff000000) | ((*p << 8) & 0x00ff0000) + | ((*p >> 8) & 0x0000ff00) | ((*p >> 24) & 0x000000ff); + p++; + } + } + } + } + + XDestroyImage(xi); + return image; +} + +QImage QX11PixmapData::toImage(const QRect &rect) const +{ + QXImageWrapper xiWrapper; + xiWrapper.xi = XGetImage(X11->display, hd, rect.x(), rect.y(), rect.width(), rect.height(), + AllPlanes, (depth() == 1) ? XYPixmap : ZPixmap); + + Q_CHECK_PTR(xiWrapper.xi); + if (!xiWrapper.xi) + return QImage(); + + if (canTakeQImageFromXImage(xiWrapper)) + return takeQImageFromXImage(xiWrapper); + + QImage image = toImage(xiWrapper, rect); + qSafeXDestroyImage(xiWrapper.xi); + return image; +} + /*! Converts the pixmap to a QImage. Returns a null image if the conversion fails. @@ -1475,6 +1564,13 @@ int QX11PixmapData::metric(QPaintDevice::PaintDeviceMetric metric) const QImage QX11PixmapData::toImage() const { + return toImage(QRect(0, 0, w, h)); +} + +QImage QX11PixmapData::toImage(const QXImageWrapper &xiWrapper, const QRect &rect) const +{ + XImage *xi = xiWrapper.xi; + int d = depth(); Visual *visual = (Visual *)xinfo.visual(); bool trucol = (visual->c_class >= TrueColor) && d > 1; @@ -1492,59 +1588,21 @@ QImage QX11PixmapData::toImage() const format = QImage::Format_RGB32; } - XImage *xi = XGetImage(X11->display, hd, 0, 0, w, h, AllPlanes, - (d == 1) ? XYPixmap : ZPixmap); - - Q_CHECK_PTR(xi); - if (!xi) - return QImage(); - - if (picture && depth() == 32) { - QImage image(w, h, QImage::Format_ARGB32_Premultiplied); - memcpy(image.bits(), xi->data, xi->bytes_per_line * xi->height); - - // we may have to swap the byte order - if ((QSysInfo::ByteOrder == QSysInfo::LittleEndian && xi->byte_order == MSBFirst) - || (QSysInfo::ByteOrder == QSysInfo::BigEndian && xi->byte_order == LSBFirst)) - { - for (int i=0; i < image.height(); i++) { - uint *p = (uint*)image.scanLine(i); - uint *end = p + image.width(); - if ((xi->byte_order == LSBFirst && QSysInfo::ByteOrder == QSysInfo::BigEndian) - || (xi->byte_order == MSBFirst && QSysInfo::ByteOrder == QSysInfo::LittleEndian)) { - while (p < end) { - *p = ((*p << 24) & 0xff000000) | ((*p << 8) & 0x00ff0000) - | ((*p >> 8) & 0x0000ff00) | ((*p >> 24) & 0x000000ff); - p++; - } - } else if (xi->byte_order == MSBFirst && QSysInfo::ByteOrder == QSysInfo::BigEndian) { - while (p < end) { - *p = ((*p << 16) & 0x00ff0000) | ((*p >> 16) & 0x000000ff) - | ((*p ) & 0xff00ff00); - p++; - } - } - } - } - - // throw away image data - qSafeXDestroyImage(xi); - - return image; - } - if (d == 1 && xi->bitmap_bit_order == LSBFirst) format = QImage::Format_MonoLSB; if (x11_mask && format == QImage::Format_RGB32) format = QImage::Format_ARGB32; - QImage image(w, h, format); + QImage image(xi->width, xi->height, format); if (image.isNull()) // could not create image return image; QImage alpha; if (x11_mask) { - alpha = mask().toImage(); + if (rect.contains(QRect(0, 0, w, h))) + alpha = mask().toImage(); + else + alpha = mask().toImage().copy(rect); } bool ale = alpha.format() == QImage::Format_MonoLSB; @@ -1587,11 +1645,11 @@ QImage QX11PixmapData::toImage() const if (bppc > 8 && xi->byte_order == LSBFirst) bppc++; - for (int y = 0; y < h; ++y) { + for (int y = 0; y < xi->height; ++y) { uchar* asrc = x11_mask ? alpha.scanLine(y) : 0; dst = (QRgb *)image.scanLine(y); src = (uchar *)xi->data + xi->bytes_per_line*y; - for (int x = 0; x < w; x++) { + for (int x = 0; x < xi->width; x++) { switch (bppc) { case 8: pixel = *src++; @@ -1621,8 +1679,8 @@ QImage QX11PixmapData::toImage() const src += 4; break; default: // should not really happen - x = w; // leave loop - y = h; + x = xi->width; // leave loop + y = xi->height; pixel = 0; // eliminate compiler warning qWarning("QPixmap::convertToImage: Invalid depth %d", bppc); } @@ -1660,7 +1718,7 @@ QImage QX11PixmapData::toImage() const } else if (xi->bits_per_pixel == d) { // compatible depth char *xidata = xi->data; // copy each scanline int bpl = qMin(image.bytesPerLine(),xi->bytes_per_line); - for (int y=0; yheight; y++) { memcpy(image.scanLine(y), xidata, bpl); xidata += xi->bytes_per_line; } @@ -1686,17 +1744,17 @@ QImage QX11PixmapData::toImage() const bpl = image.bytesPerLine(); if (x11_mask) { // which pixels are used? - for (int i = 0; i < h; i++) { + for (int i = 0; i < xi->height; i++) { uchar* asrc = alpha.scanLine(i); p = image.scanLine(i); if (ale) { - for (int x = 0; x < w; x++) { + for (int x = 0; x < xi->width; x++) { if (asrc[x >> 3] & (1 << (x & 7))) use[*p] = 1; ++p; } } else { - for (int x = 0; x < w; x++) { + for (int x = 0; x < xi->width; x++) { if (asrc[x >> 3] & (0x80 >> (x & 7))) use[*p] = 1; ++p; @@ -1704,7 +1762,7 @@ QImage QX11PixmapData::toImage() const } } } else { - for (int i = 0; i < h; i++) { + for (int i = 0; i < xi->height; i++) { p = image.scanLine(i); end = p + bpl; while (p < end) @@ -1716,7 +1774,7 @@ QImage QX11PixmapData::toImage() const if (use[i]) pix[i] = ncols++; } - for (int i = 0; i < h; i++) { // translate pixels + for (int i = 0; i < xi->height; i++) { // translate pixels p = image.scanLine(i); end = p + bpl; while (p < end) { @@ -1736,17 +1794,17 @@ QImage QX11PixmapData::toImage() const // use first pixel in image (as good as any). trans = image.scanLine(0)[0]; } - for (int i = 0; i < h; i++) { + for (int i = 0; i < xi->height; i++) { uchar* asrc = alpha.scanLine(i); p = image.scanLine(i); if (ale) { - for (int x = 0; x < w; x++) { + for (int x = 0; x < xi->width; x++) { if (!(asrc[x >> 3] & (1 << (x & 7)))) *p = trans; ++p; } } else { - for (int x = 0; x < w; x++) { + for (int x = 0; x < xi->width; x++) { if (!(asrc[x >> 3] & (1 << (7 -(x & 7))))) *p = trans; ++p; @@ -1764,8 +1822,6 @@ QImage QX11PixmapData::toImage() const } } - qSafeXDestroyImage(xi); - return image; } diff --git a/src/gui/image/qpixmap_x11_p.h b/src/gui/image/qpixmap_x11_p.h index 7575838..821fb69 100644 --- a/src/gui/image/qpixmap_x11_p.h +++ b/src/gui/image/qpixmap_x11_p.h @@ -62,6 +62,8 @@ QT_BEGIN_NAMESPACE class QX11PaintEngine; +struct QXImageWrapper; + class Q_GUI_EXPORT QX11PixmapData : public QPixmapData { public: @@ -87,6 +89,7 @@ public: QPixmap transformed(const QTransform &transform, Qt::TransformationMode mode) const; QImage toImage() const; + QImage toImage(const QRect &rect) const; QPaintEngine* paintEngine() const; Qt::HANDLE handle() const { return hd; } @@ -116,10 +119,15 @@ private: void release(); + QImage toImage(const QXImageWrapper &xi, const QRect &rect) const; + QBitmap mask_to_bitmap(int screen) const; static Qt::HANDLE bitmap_to_mask(const QBitmap &, int screen); void bitmapFromImage(const QImage &image); + bool canTakeQImageFromXImage(const QXImageWrapper &xi) const; + QImage takeQImageFromXImage(const QXImageWrapper &xi) const; + Qt::HANDLE hd; enum Flag { diff --git a/src/gui/image/qpixmapdata.cpp b/src/gui/image/qpixmapdata.cpp index 31ca909..345e3cf 100644 --- a/src/gui/image/qpixmapdata.cpp +++ b/src/gui/image/qpixmapdata.cpp @@ -146,7 +146,7 @@ bool QPixmapData::fromData(const uchar *buf, uint len, const char *format, Qt::I void QPixmapData::copy(const QPixmapData *data, const QRect &rect) { - fromImage(data->toImage().copy(rect), Qt::NoOpaqueDetection); + fromImage(data->toImage(rect), Qt::NoOpaqueDetection); } bool QPixmapData::scroll(int dx, int dy, const QRect &rect) @@ -255,6 +255,14 @@ void QPixmapData::setSerialNumber(int serNo) ser_no = serNo; } +QImage QPixmapData::toImage(const QRect &rect) const +{ + if (rect.contains(QRect(0, 0, w, h))) + return toImage(); + else + return toImage().copy(rect); +} + QImage* QPixmapData::buffer() { return 0; diff --git a/src/gui/image/qpixmapdata_p.h b/src/gui/image/qpixmapdata_p.h index 60ed26a..9a1505a 100644 --- a/src/gui/image/qpixmapdata_p.h +++ b/src/gui/image/qpixmapdata_p.h @@ -102,6 +102,7 @@ public: virtual void setAlphaChannel(const QPixmap &alphaChannel); virtual QPixmap alphaChannel() const; virtual QImage toImage() const = 0; + virtual QImage toImage(const QRect &rect) const; virtual QPaintEngine* paintEngine() const = 0; inline int serialNumber() const { return ser_no; } diff --git a/src/gui/painting/qpaintengine_raster.cpp b/src/gui/painting/qpaintengine_raster.cpp index 94e5cbc..84b36c7 100644 --- a/src/gui/painting/qpaintengine_raster.cpp +++ b/src/gui/painting/qpaintengine_raster.cpp @@ -2419,7 +2419,9 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons drawImage(r, image, sr); } } else { - const QImage image = pixmap.toImage(); + QRect clippedSource = sr.toAlignedRect().intersected(pixmap.rect()); + const QImage image = pd->toImage(clippedSource); + QRectF translatedSource = sr.translated(-clippedSource.topLeft()); if (image.depth() == 1) { Q_D(QRasterPaintEngine); QRasterPaintEngineState *s = state(); @@ -2430,10 +2432,10 @@ void QRasterPaintEngine::drawPixmap(const QRectF &r, const QPixmap &pixmap, cons drawBitmap(r.topLeft() + QPointF(s->matrix.dx(), s->matrix.dy()), image, &s->penData); return; } else { - drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), sr); + drawImage(r, d->rasterBuffer->colorizeBitmap(image, s->pen.color()), translatedSource); } } else { - drawImage(r, image, sr); + drawImage(r, image, translatedSource); } } } -- cgit v0.12 From 0c0eac000ecbe8c0bdcb8c6d914854b1a09a720b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B8dal?= Date: Wed, 23 Jun 2010 13:33:20 +0200 Subject: Fixed QPixmap::toImage() bug introduced in fb76a872e20bd. The alpha channel we get from XGetImage might not be saturated for opaque pixmaps. Reviewed-by: Trond --- src/gui/image/qpixmap_x11.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/gui/image/qpixmap_x11.cpp b/src/gui/image/qpixmap_x11.cpp index 604dbf5..3d9c363 100644 --- a/src/gui/image/qpixmap_x11.cpp +++ b/src/gui/image/qpixmap_x11.cpp @@ -1525,6 +1525,16 @@ QImage QX11PixmapData::takeQImageFromXImage(const QXImageWrapper &xiWrapper) con } } + // fix-up alpha channel + if (format == QImage::Format_RGB32) { + QRgb *p = (QRgb *)image.bits(); + for (int y = 0; y < xi->height; ++y) { + for (int x = 0; x < xi->width; ++x) + p[x] |= 0xff000000; + p += xi->bytes_per_line / 4; + } + } + XDestroyImage(xi); return image; } -- cgit v0.12 From 5817f04060f5a545a71a1f9e9dfa167f92a4ab93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B8dal?= Date: Wed, 23 Jun 2010 13:55:20 +0200 Subject: Fixed missing copy of raster pixmap data after change fb76a872e20bd. It's expected that copy actually does a deep copy of the image data. Because QRasterPixmapData::fromImage() will just use the source image as is in this case we need to do a deep copy of the QImage. Reviewed-by: Trond --- src/gui/image/qpixmap_raster.cpp | 5 +++++ src/gui/image/qpixmap_raster_p.h | 1 + 2 files changed, 6 insertions(+) diff --git a/src/gui/image/qpixmap_raster.cpp b/src/gui/image/qpixmap_raster.cpp index 13e95c7..e188745 100644 --- a/src/gui/image/qpixmap_raster.cpp +++ b/src/gui/image/qpixmap_raster.cpp @@ -155,6 +155,11 @@ void QRasterPixmapData::fromImage(const QImage &sourceImage, // from qwindowsurface.cpp extern void qt_scrollRectInImage(QImage &img, const QRect &rect, const QPoint &offset); +void QRasterPixmapData::copy(const QPixmapData *data, const QRect &rect) +{ + fromImage(data->toImage(rect).copy(), Qt::NoOpaqueDetection); +} + bool QRasterPixmapData::scroll(int dx, int dy, const QRect &rect) { if (!image.isNull()) diff --git a/src/gui/image/qpixmap_raster_p.h b/src/gui/image/qpixmap_raster_p.h index 42ceeca..a46e054 100644 --- a/src/gui/image/qpixmap_raster_p.h +++ b/src/gui/image/qpixmap_raster_p.h @@ -75,6 +75,7 @@ public: bool fromData(const uchar *buffer, uint len, const char *format, Qt::ImageConversionFlags flags); void fromImage(const QImage &image, Qt::ImageConversionFlags flags); + void copy(const QPixmapData *data, const QRect &rect); bool scroll(int dx, int dy, const QRect &rect); void fill(const QColor &color); void setMask(const QBitmap &mask); -- cgit v0.12 From 98d9083f87feb3d78af509db43685a148aa0766e Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 22 Jun 2010 18:18:45 +0200 Subject: Add a SSE2 version of comp_func_SourceOver() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a version of comp_func_SourceOver() with SSE2. This gives a performance boost of 11% on some WebKit animations. Two new macros were added to simplify the implementation of the different blending primitives: BLEND_SOURCE_OVER_ARGB32_SSE2() and BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2() Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper.cpp | 7 +- src/gui/painting/qdrawhelper_sse2.cpp | 170 ++++++++++++++++++++++------------ 2 files changed, 116 insertions(+), 61 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index bfa1136..d088499 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7817,6 +7817,12 @@ void qInitDrawhelperAsm() #ifdef QT_HAVE_SSE2 if (features & SSE2) { + extern void comp_func_SourceOver_sse2(uint *destPixels, + const uint *srcPixels, + int length, + uint const_alpha); + functionForModeAsm[0] = comp_func_SourceOver_sse2; + extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -7826,7 +7832,6 @@ void qInitDrawhelperAsm() int w, int h, int const_alpha); - qBlendFunctions[QImage::Format_RGB32][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB32] = qt_blend_rgb32_on_rgb32_sse2; qBlendFunctions[QImage::Format_RGB32][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_argb32_sse2; diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 6ac64d3..b650aac 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -126,6 +126,93 @@ QT_BEGIN_NAMESPACE result = _mm_or_si128(finalAG, finalRB); \ } +// Basically blend src over dst with the const alpha defined as constAlphaVector. +// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as: +//const __m128i nullVector = _mm_set1_epi32(0); +//const __m128i half = _mm_set1_epi16(0x80); +//const __m128i one = _mm_set1_epi16(0xff); +//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); +//const __m128i alphaMask = _mm_set1_epi32(0xff000000); +// +// The computation being done is: +// result = s + d * (1-alpha) +// with shortcuts if fully opaque or fully transparent. +#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ + int x = 0; \ + for (; x < length-3; x += 4) { \ + const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ + const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ + /* all opaque */ \ + _mm_storeu_si128((__m128i *)&dst[x], srcVector); \ + } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ + /* not fully transparent */ \ + /* extract the alpha channel on 2 x 16 bits */ \ + /* so we have room for the multiplication */ \ + /* each 32 bits will be in the form 0x00AA00AA */ \ + /* with A being the 1 - alpha */ \ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + /* result = s + d * (1-alpha) */\ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_storeu_si128((__m128i *)&dst[x], result); \ + } \ + } \ + for (; x < length; ++x) { \ + uint s = src[x]; \ + if (s >= 0xff000000) \ + dst[x] = s; \ + else if (s != 0) \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ +} + +// Basically blend src over dst with the const alpha defined as constAlphaVector. +// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as: +//const __m128i nullVector = _mm_set1_epi32(0); +//const __m128i half = _mm_set1_epi16(0x80); +//const __m128i one = _mm_set1_epi16(0xff); +//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); +// +// The computation being done is: +// dest = (s + d * sia) * ca + d * cia +// = s * ca + d * (sia * ca + cia) +// = s * ca + d * (1 - sa*ca) +#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \ +{ \ + int x = 0; \ + for (; x < length-3; x += 4) { \ + __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ + if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \ + BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \ +\ + __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \ + alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ + alphaChannel = _mm_sub_epi16(one, alphaChannel); \ + \ + const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + __m128i destMultipliedByOneMinusAlpha; \ + BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ + \ + const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ + _mm_storeu_si128((__m128i *)&dst[x], result); \ + } \ + } \ + for (; x < length; ++x) { \ + quint32 s = src[x]; \ + if (s != 0) { \ + s = BYTE_MUL(s, const_alpha); \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ + } \ +} + void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, int w, int h, @@ -140,41 +227,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const __m128i one = _mm_set1_epi16(0xff); const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); for (int y = 0; y < h; ++y) { - int x = 0; - for (; x < w-3; x += 4) { - const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); - const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { - // all opaque - _mm_storeu_si128((__m128i *)&dst[x], srcVector); - } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { - // not fully transparent - // result = s + d * (1-alpha) - - // extract the alpha channel on 2 x 16 bits - // so we have room for the multiplication - // each 32 bits will be in the form 0x00AA00AA - // with A being the 1 - alpha - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); - alphaChannel = _mm_sub_epi16(one, alphaChannel); - - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); - __m128i destMultipliedByOneMinusAlpha; - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); - - // result = s + d * (1-alpha) - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); - _mm_storeu_si128((__m128i *)&dst[x], result); - } - } - for (; x= 0xff000000) - dst[x] = s; - else if (s != 0) - dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); - } + BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, w, nullVector, half, one, colorMask, alphaMask); dst = (quint32 *)(((uchar *) dst) + dbpl); src = (const quint32 *)(((const uchar *) src) + sbpl); } @@ -189,31 +242,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); for (int y = 0; y < h; ++y) { - int x = 0; - for (; x < w-3; x += 4) { - __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); - if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { - BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); - - __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); - alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); - alphaChannel = _mm_sub_epi16(one, alphaChannel); - - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); - __m128i destMultipliedByOneMinusAlpha; - BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); - - const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); - _mm_storeu_si128((__m128i *)&dst[x], result); - } - } - for (; x 0); // if const_alpha == 0, this should never be called + Q_ASSERT(const_alpha < 256); + + const quint32 *src = (const quint32 *) srcPixels; + quint32 *dst = (uint *) destPixels; + + const __m128i nullVector = _mm_set1_epi32(0); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i one = _mm_set1_epi16(0xff); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + if (const_alpha == 255) { + const __m128i alphaMask = _mm_set1_epi32(0xff000000); + BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask); + } else { + const __m128i constAlphaVector = _mm_set1_epi16(const_alpha); + BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector); + } +} + void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) { if (count < 7) { -- cgit v0.12 From a922a304ae9115d04f3bbcb3bd13c8e374bb16f1 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 22 Jun 2010 22:06:02 +0200 Subject: Add a SSE2 implementation of comp_func_solid_SourceOver() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function is used quite a lot by WebKit animations, the SSE2 implementation is twice as fast in those uses cases. Reviewed-by: Andreas Kling Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper.cpp | 5 ++++- src/gui/painting/qdrawhelper_sse2.cpp | 30 +++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index d088499..f08c090 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7817,11 +7817,14 @@ void qInitDrawhelperAsm() #ifdef QT_HAVE_SSE2 if (features & SSE2) { - extern void comp_func_SourceOver_sse2(uint *destPixels, + extern void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha); + extern void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha); + functionForModeAsm[0] = comp_func_SourceOver_sse2; + functionForModeSolidAsm[0] = comp_func_solid_SourceOver_sse2; extern void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index b650aac..7d542d6 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -297,7 +297,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, } } -void comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) +void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixels, int length, uint const_alpha) { Q_ASSERT(const_alpha > 0); // if const_alpha == 0, this should never be called Q_ASSERT(const_alpha < 256); @@ -362,6 +362,34 @@ void qt_memfill32_sse2(quint32 *dest, quint32 value, int count) } } +void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + qt_memfill32_sse2(destPixels, length, color); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + quint32 *dst = (uint *) destPixels; + const __m128i colorVector = _mm_set1_epi32(color); + const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); + const __m128i half = _mm_set1_epi16(0x80); + const __m128i minusAlphaOfColorVector = _mm_set1_epi16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); + BYTE_MUL_SSE2(dstVector, dstVector, minusAlphaOfColorVector, colorMask, half); + dstVector = _mm_add_epi8(colorVector, dstVector); + _mm_storeu_si128((__m128i *)&dst[x], dstVector); + } + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + void qt_memfill16_sse2(quint16 *dest, quint16 value, int count) { if (count < 3) { -- cgit v0.12 From 86f375e91c2b1162a75f86c9b8f041ae1d4747de Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 22 Jun 2010 23:48:54 +0200 Subject: Fix the casts of qdrawhelper_sse2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I did erroneous cast by mistake, the code should ensure the pointer are on 32 bits integers. Reviewed-by: Andreas Kling Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper_sse2.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp index 7d542d6..e2a69ec 100644 --- a/src/gui/painting/qdrawhelper_sse2.cpp +++ b/src/gui/painting/qdrawhelper_sse2.cpp @@ -219,7 +219,7 @@ void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl, int const_alpha) { const quint32 *src = (const quint32 *) srcPixels; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; if (const_alpha == 256) { const __m128i alphaMask = _mm_set1_epi32(0xff000000); const __m128i nullVector = _mm_set1_epi32(0); @@ -261,7 +261,7 @@ void qt_blend_rgb32_on_rgb32_sse2(uchar *destPixels, int dbpl, int const_alpha) { const quint32 *src = (const quint32 *) srcPixels; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; if (const_alpha != 256) { if (const_alpha != 0) { const __m128i nullVector = _mm_set1_epi32(0); @@ -303,7 +303,7 @@ void QT_FASTCALL comp_func_SourceOver_sse2(uint *destPixels, const uint *srcPixe Q_ASSERT(const_alpha < 256); const quint32 *src = (const quint32 *) srcPixels; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; const __m128i nullVector = _mm_set1_epi32(0); const __m128i half = _mm_set1_epi16(0x80); @@ -373,7 +373,7 @@ void QT_FASTCALL comp_func_solid_SourceOver_sse2(uint *destPixels, int length, u const quint32 minusAlphaOfColor = qAlpha(~color); int x = 0; - quint32 *dst = (uint *) destPixels; + quint32 *dst = (quint32 *) destPixels; const __m128i colorVector = _mm_set1_epi32(color); const __m128i colorMask = _mm_set1_epi32(0x00ff00ff); const __m128i half = _mm_set1_epi16(0x80); -- cgit v0.12 From 5cfa764466be6ec2b987e0694b99f1d343d55048 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Wed, 23 Jun 2010 22:22:56 +0200 Subject: Add an implementation of comp_func_solid_SourceOver_neon() with Neon. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function comp_func_solid_SourceOver_neon() is use extensively by WebKit via the calls to fillRect() of QPainter(). Implementing the function with Neon provides some performance improvement (around 175% of the previous speed). Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper.cpp | 1 + src/gui/painting/qdrawhelper_neon.cpp | 43 +++++++++++++++++++++++++++++++++++ src/gui/painting/qdrawhelper_neon_p.h | 2 ++ 3 files changed, 46 insertions(+) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index f08c090..5727b3c 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7898,6 +7898,7 @@ void qInitDrawhelperAsm() qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon; functionForMode_C[QPainter::CompositionMode_SourceOver] = qt_blend_argb32_on_argb32_scanline_neon; + functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_neon; destFetchProc[QImage::Format_RGB16] = qt_destFetchRGB16_neon; destStoreProc[QImage::Format_RGB16] = qt_destStoreRGB16_neon; } diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index ee5f24a..3ce90d2 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -579,6 +579,49 @@ void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int } } +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha) +{ + if ((const_alpha & qAlpha(color)) == 255) { + QT_MEMFILL_UINT(destPixels, length, color); + } else { + if (const_alpha != 255) + color = BYTE_MUL(color, const_alpha); + + const quint32 minusAlphaOfColor = qAlpha(~color); + int x = 0; + + uint32_t *dst = (uint32_t *) destPixels; + const uint32x4_t colorVector = vdupq_n_u32(color); + uint16x8_t half = vdupq_n_u16(0x80); + const uint16x8_t minusAlphaOfColorVector = vdupq_n_u16(minusAlphaOfColor); + + for (; x < length-3; x += 4) { + uint32x4_t dstVector = vld1q_u32(&dst[x]); + + const uint8x16_t dst8 = vreinterpretq_u8_u32(dstVector); + + const uint8x8_t dst8_low = vget_low_u8(dst8); + const uint8x8_t dst8_high = vget_high_u8(dst8); + + const uint16x8_t dst16_low = vmovl_u8(dst8_low); + const uint16x8_t dst16_high = vmovl_u8(dst8_high); + + const uint16x8_t result16_low = qvbyte_mul_u16(dst16_low, minusAlphaOfColorVector, half); + const uint16x8_t result16_high = qvbyte_mul_u16(dst16_high, minusAlphaOfColorVector, half); + + const uint32x2_t result32_low = vreinterpret_u32_u8(vmovn_u16(result16_low)); + const uint32x2_t result32_high = vreinterpret_u32_u8(vmovn_u16(result16_high)); + + uint32x4_t blendedPixels = vcombine_u32(result32_low, result32_high); + uint32x4_t colorPlusBlendedPixels = vaddq_u32(colorVector, blendedPixels); + vst1q_u32(&dst[x], colorPlusBlendedPixels); + } + + for (;x < length; ++x) + destPixels[x] = color + BYTE_MUL(destPixels[x], minusAlphaOfColor); + } +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index d6a4509..c054a1e 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -127,6 +127,8 @@ uint * QT_FASTCALL qt_destFetchRGB16_neon(uint *buffer, void QT_FASTCALL qt_destStoreRGB16_neon(QRasterBuffer *rasterBuffer, int x, int y, const uint *buffer, int length); +void QT_FASTCALL comp_func_solid_SourceOver_neon(uint *destPixels, int length, uint color, uint const_alpha); + #endif // QT_HAVE_NEON QT_END_NAMESPACE -- cgit v0.12 From 9de452bba5b592402ced6f20fbdc6d0b5c075416 Mon Sep 17 00:00:00 2001 From: Rhys Weatherley Date: Thu, 24 Jun 2010 11:23:57 +1000 Subject: Normalize integers when calling glVertexAttribPointer() When QGLShaderProgram::setAttributeArray() is used with a type like GL_UNSIGNED_BYTE, it is normally going to be a value that should be normalized to the range 0..1. But the function wasn't normalizing, which led to errors in programs that used per-vertex colors with the 4ub representation. Reviewed-by: Sarah Smith --- src/opengl/qglshaderprogram.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/opengl/qglshaderprogram.cpp b/src/opengl/qglshaderprogram.cpp index 83b4b21..c7689b8 100644 --- a/src/opengl/qglshaderprogram.cpp +++ b/src/opengl/qglshaderprogram.cpp @@ -1490,7 +1490,7 @@ void QGLShaderProgram::setAttributeArray Q_D(QGLShaderProgram); Q_UNUSED(d); if (location != -1) { - glVertexAttribPointer(location, tupleSize, type, GL_FALSE, + glVertexAttribPointer(location, tupleSize, type, GL_TRUE, stride, values); } } @@ -1634,7 +1634,7 @@ void QGLShaderProgram::setAttributeBuffer Q_D(QGLShaderProgram); Q_UNUSED(d); if (location != -1) { - glVertexAttribPointer(location, tupleSize, type, GL_FALSE, stride, + glVertexAttribPointer(location, tupleSize, type, GL_TRUE, stride, reinterpret_cast(offset)); } } -- cgit v0.12