diff options
author | Samuel Rødal <sroedal@trolltech.com> | 2010-03-24 18:03:47 (GMT) |
---|---|---|
committer | Samuel Rødal <sroedal@trolltech.com> | 2010-03-26 09:49:03 (GMT) |
commit | 0ad22e6cd1cb353e2e1244c1eb7257cb3af9def4 (patch) | |
tree | b362551160a25356a12697192fe8bef7e392048c /src | |
parent | 348d22c37611066dc7efc9aac820d77bcf3bbbab (diff) | |
download | Qt-0ad22e6cd1cb353e2e1244c1eb7257cb3af9def4.zip Qt-0ad22e6cd1cb353e2e1244c1eb7257cb3af9def4.tar.gz Qt-0ad22e6cd1cb353e2e1244c1eb7257cb3af9def4.tar.bz2 |
Optimized scaled/transformed image blending for ARGB32PM and RGB16 on RGB16.
Before:
:/traces/qmlphoneconcept.trace, iterations: 5, frames: 48, min(ms):
1207, median(ms): 1212, stddev: 0,165153 %, max(fps): 39,768020
After:
traces/qmlphoneconcept.trace, iterations: 3, frames: 48, min(ms): 884,
median(ms): 886, stddev: 0,383097 %, max(fps): 54,298643
Task-number: QTBUG-6684
Reviewed-by: Gunnar Sletta
Diffstat (limited to 'src')
-rw-r--r-- | src/gui/painting/painting.pri | 6 | ||||
-rw-r--r-- | src/gui/painting/qblendfunctions.cpp | 438 | ||||
-rw-r--r-- | src/gui/painting/qblendfunctions_p.h | 497 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper.cpp | 6 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon.cpp | 139 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_asm.S | 192 | ||||
-rw-r--r-- | src/gui/painting/qdrawhelper_neon_p.h | 31 |
7 files changed, 890 insertions, 419 deletions
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri index e61e07c..ed8ee76 100644 --- a/src/gui/painting/painting.pri +++ b/src/gui/painting/painting.pri @@ -91,6 +91,8 @@ SOURCES += \ HEADERS += \ painting/qpaintengine_raster_p.h \ + painting/qdrawhelper_p.h \ + painting/qblendfunctions_p.h \ painting/qrasterdefs_p.h \ painting/qgrayraster_p.h @@ -385,13 +387,13 @@ neon:*-g++* { SOURCES += painting/qdrawhelper_neon.cpp QMAKE_CXXFLAGS *= -mfpu=neon - PIXMAN_NEON_ASM_FILES = ../3rdparty/pixman/pixman-arm-neon-asm.S + DRAWHELPER_NEON_ASM_FILES = ../3rdparty/pixman/pixman-arm-neon-asm.S painting/qdrawhelper_neon_asm.S neon_compiler.commands = $$QMAKE_CXX -c neon_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT} neon_compiler.dependency_type = TYPE_C neon_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)} - neon_compiler.input = PIXMAN_NEON_ASM_FILES + neon_compiler.input = DRAWHELPER_NEON_ASM_FILES neon_compiler.variable_out = OBJECTS neon_compiler.name = compiling[neon] ${QMAKE_FILE_IN} silent:neon_compiler.commands = @echo compiling[neon] ${QMAKE_FILE_IN} && $$neon_compiler.commands diff --git a/src/gui/painting/qblendfunctions.cpp b/src/gui/painting/qblendfunctions.cpp index 13bb260..24908ce 100644 --- a/src/gui/painting/qblendfunctions.cpp +++ b/src/gui/painting/qblendfunctions.cpp @@ -40,7 +40,7 @@ ****************************************************************************/ #include <qmath.h> -#include "qdrawhelper_p.h" +#include "qblendfunctions_p.h" QT_BEGIN_NAMESPACE @@ -88,6 +88,8 @@ static inline quint16 convert_argb32_to_rgb16(quint32 spix) struct Blend_RGB16_on_RGB16_NoAlpha { inline void write(quint16 *dst, quint16 src) { *dst = src; } + + inline void flush(void *) {} }; struct Blend_RGB16_on_RGB16_ConstAlpha { @@ -100,6 +102,8 @@ struct Blend_RGB16_on_RGB16_ConstAlpha { *dst = BYTE_MUL_RGB16(src, m_alpha) + BYTE_MUL_RGB16(*dst, m_ialpha); } + inline void flush(void *) {} + quint32 m_alpha; quint32 m_ialpha; }; @@ -114,6 +118,8 @@ struct Blend_ARGB24_on_RGB16_SourceAlpha { *dst = s; } } + + inline void flush(void *) {} }; struct Blend_ARGB24_on_RGB16_SourceAndConstAlpha { @@ -132,6 +138,8 @@ struct Blend_ARGB24_on_RGB16_SourceAndConstAlpha { } } + inline void flush(void *) {} + quint32 m_alpha; }; @@ -145,6 +153,8 @@ struct Blend_ARGB32_on_RGB16_SourceAlpha { *dst = s; } } + + inline void flush(void *) {} }; struct Blend_ARGB32_on_RGB16_SourceAndConstAlpha { @@ -163,99 +173,11 @@ struct Blend_ARGB32_on_RGB16_SourceAndConstAlpha { } } + inline void flush(void *) {} + quint32 m_alpha; }; -template <typename SRC, typename T> -void qt_scale_image_16bit(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - const QRectF &targetRect, - const QRectF &srcRect, - const QRect &clip, - T blender) -{ - qreal sx = targetRect.width() / (qreal) srcRect.width(); - qreal sy = targetRect.height() / (qreal) srcRect.height(); - - int ix = 0x00010000 / sx; - int iy = 0x00010000 / sy; - -// qDebug() << "scale:" << endl -// << " - target" << targetRect << endl -// << " - source" << srcRect << endl -// << " - clip" << clip << endl -// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy; - - int cx1 = clip.x(); - int cx2 = clip.x() + clip.width(); - int cy1 = clip.top(); - int cy2 = clip.y() + clip.height(); - - int tx1 = qRound(targetRect.left()); - int tx2 = qRound(targetRect.right()); - int ty1 = qRound(targetRect.top()); - int ty2 = qRound(targetRect.bottom()); - - if (tx2 < tx1) - qSwap(tx2, tx1); - - if (ty2 < ty1) - qSwap(ty2, ty1); - - if (tx1 < cx1) - tx1 = cx1; - - if (tx2 >= cx2) - tx2 = cx2; - - if (tx1 >= tx2) - return; - - if (ty1 < cy1) - ty1 = cy1; - - if (ty2 >= cy2) - ty2 = cy2; - - if (ty1 >= ty2) - return; - - int h = ty2 - ty1; - int w = tx2 - tx1; - - - quint32 basex; - quint32 srcy; - - if (sx < 0) { - int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; - basex = quint32(srcRect.right() * 65536) + dstx; - } else { - int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; - basex = quint32(srcRect.left() * 65536) + dstx; - } - if (sy < 0) { - int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; - srcy = quint32(srcRect.bottom() * 65536) + dsty; - } else { - int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; - srcy = quint32(srcRect.top() * 65536) + dsty; - } - - quint16 *dst = ((quint16 *) (destPixels + ty1 * dbpl)) + tx1; - - while (h--) { - const SRC *src = (const SRC *) (srcPixels + (srcy >> 16) * sbpl); - int srcx = basex; - for (int x=0; x<w; ++x) { - blender.write(&dst[x], src[srcx >> 16]); - srcx += ix; - } - dst = (quint16 *)(((uchar *) dst) + dbpl); - srcy += iy; - } -} - void qt_scale_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, const QRectF &targetRect, @@ -643,6 +565,8 @@ void qt_blend_rgb32_on_rgb32(uchar *destPixels, int dbpl, struct Blend_RGB32_on_RGB32_NoAlpha { inline void write(quint32 *dst, quint32 src) { *dst = src; } + + inline void flush(void *) {} }; struct Blend_RGB32_on_RGB32_ConstAlpha { @@ -655,6 +579,8 @@ struct Blend_RGB32_on_RGB32_ConstAlpha { *dst = BYTE_MUL(src, m_alpha) + BYTE_MUL(*dst, m_ialpha); } + inline void flush(void *) {} + quint32 m_alpha; quint32 m_ialpha; }; @@ -663,6 +589,8 @@ struct Blend_ARGB32_on_ARGB32_SourceAlpha { inline void write(quint32 *dst, quint32 src) { *dst = src + BYTE_MUL(*dst, qAlpha(~src)); } + + inline void flush(void *) {} }; struct Blend_ARGB32_on_ARGB32_SourceAndConstAlpha { @@ -676,98 +604,12 @@ struct Blend_ARGB32_on_ARGB32_SourceAndConstAlpha { *dst = src + BYTE_MUL(*dst, qAlpha(~src)); } + inline void flush(void *) {} + quint32 m_alpha; quint32 m_ialpha; }; -template <typename T> void qt_scale_image_32bit(uchar *destPixels, int dbpl, - const uchar *srcPixels, int sbpl, - const QRectF &targetRect, - const QRectF &srcRect, - const QRect &clip, - T blender) -{ - qreal sx = targetRect.width() / (qreal) srcRect.width(); - qreal sy = targetRect.height() / (qreal) srcRect.height(); - - int ix = 0x00010000 / sx; - int iy = 0x00010000 / sy; - -// qDebug() << "scale:" << endl -// << " - target" << targetRect << endl -// << " - source" << srcRect << endl -// << " - clip" << clip << endl -// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy; - - int cx1 = clip.x(); - int cx2 = clip.x() + clip.width(); - int cy1 = clip.top(); - int cy2 = clip.y() + clip.height(); - - int tx1 = qRound(targetRect.left()); - int tx2 = qRound(targetRect.right()); - int ty1 = qRound(targetRect.top()); - int ty2 = qRound(targetRect.bottom()); - - if (tx2 < tx1) - qSwap(tx2, tx1); - - if (ty2 < ty1) - qSwap(ty2, ty1); - - if (tx1 < cx1) - tx1 = cx1; - - if (tx2 >= cx2) - tx2 = cx2; - - if (tx1 >= tx2) - return; - - if (ty1 < cy1) - ty1 = cy1; - - if (ty2 >= cy2) - ty2 = cy2; - - if (ty1 >= ty2) - return; - - int h = ty2 - ty1; - int w = tx2 - tx1; - - quint32 basex; - quint32 srcy; - - if (sx < 0) { - int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; - basex = quint32(srcRect.right() * 65536) + dstx; - } else { - int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; - basex = quint32(srcRect.left() * 65536) + dstx; - } - if (sy < 0) { - int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; - srcy = quint32(srcRect.bottom() * 65536) + dsty; - } else { - int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; - srcy = quint32(srcRect.top() * 65536) + dsty; - } - - quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; - - while (h--) { - const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); - int srcx = basex; - for (int x=0; x<w; ++x) { - blender.write(&dst[x], src[srcx >> 16]); - srcx += ix; - } - dst = (quint32 *)(((uchar *) dst) + dbpl); - srcy += iy; - } -} - void qt_scale_image_rgb32_on_rgb32(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, const QRectF &targetRect, @@ -818,244 +660,6 @@ void qt_scale_image_argb32_on_argb32(uchar *destPixels, int dbpl, } } -struct QTransformImageVertex -{ - qreal x, y, u, v; // destination coordinates (x, y) and source coordinates (u, v) -}; - -template <class SrcT, class DestT, class Blender> -void qt_transform_image_rasterize(DestT *destPixels, int dbpl, - const SrcT *srcPixels, int sbpl, - const QTransformImageVertex &topLeft, const QTransformImageVertex &bottomLeft, - const QTransformImageVertex &topRight, const QTransformImageVertex &bottomRight, - const QRect &sourceRect, - const QRect &clip, - qreal topY, qreal bottomY, - int dudx, int dvdx, int dudy, int dvdy, int u0, int v0, - Blender blender) -{ - int fromY = qMax(qRound(topY), clip.top()); - int toY = qMin(qRound(bottomY), clip.top() + clip.height()); - if (fromY >= toY) - return; - - qreal leftSlope = (bottomLeft.x - topLeft.x) / (bottomLeft.y - topLeft.y); - qreal rightSlope = (bottomRight.x - topRight.x) / (bottomRight.y - topRight.y); - int dx_l = int(leftSlope * 0x10000); - int dx_r = int(rightSlope * 0x10000); - int x_l = int((topLeft.x + (0.5 + fromY - topLeft.y) * leftSlope + 0.5) * 0x10000); - int x_r = int((topRight.x + (0.5 + fromY - topRight.y) * rightSlope + 0.5) * 0x10000); - - int fromX, toX, x1, x2, u, v, i, ii; - DestT *line; - for (int y = fromY; y < toY; ++y) { - line = reinterpret_cast<DestT *>(reinterpret_cast<uchar *>(destPixels) + y * dbpl); - - fromX = qMax(x_l >> 16, clip.left()); - toX = qMin(x_r >> 16, clip.left() + clip.width()); - if (fromX < toX) { - // Because of rounding, we can get source coordinates outside the source image. - // Clamp these coordinates to the source rect to avoid segmentation fault and - // garbage on the screen. - - // Find the first pixel on the current scan line where the source coordinates are within the source rect. - x1 = fromX; - u = x1 * dudx + y * dudy + u0; - v = x1 * dvdx + y * dvdy + v0; - for (; x1 < toX; ++x1) { - int uu = u >> 16; - int vv = v >> 16; - if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width() - && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) { - break; - } - u += dudx; - v += dvdx; - } - - // Find the last pixel on the current scan line where the source coordinates are within the source rect. - x2 = toX; - u = (x2 - 1) * dudx + y * dudy + u0; - v = (x2 - 1) * dvdx + y * dvdy + v0; - for (; x2 > x1; --x2) { - int uu = u >> 16; - int vv = v >> 16; - if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width() - && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) { - break; - } - u -= dudx; - v -= dvdx; - } - - // Set up values at the beginning of the scan line. - u = fromX * dudx + y * dudy + u0; - v = fromX * dvdx + y * dvdy + v0; - line += fromX; - - // Beginning of the scan line, with per-pixel checks. - i = x1 - fromX; - while (i) { - int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1); - int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1); - blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]); - u += dudx; - v += dvdx; - ++line; - --i; - } - - // Middle of the scan line, without checks. - // Manual loop unrolling. - i = x2 - x1; - ii = i >> 3; - while (ii) { - blender.write(&line[0], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - blender.write(&line[1], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - blender.write(&line[2], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - blender.write(&line[3], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - blender.write(&line[4], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - blender.write(&line[5], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - blender.write(&line[6], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - blender.write(&line[7], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; - line += 8; - --ii; - } - switch (i & 7) { - case 7: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; - case 6: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; - case 5: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; - case 4: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; - case 3: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; - case 2: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; - case 1: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; - } - - // End of the scan line, with per-pixel checks. - i = toX - x2; - while (i) { - int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1); - int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1); - blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]); - u += dudx; - v += dvdx; - ++line; - --i; - } - } - x_l += dx_l; - x_r += dx_r; - } -} - -template <class SrcT, class DestT, class Blender> -void qt_transform_image(DestT *destPixels, int dbpl, - const SrcT *srcPixels, int sbpl, - const QRectF &targetRect, - const QRectF &sourceRect, - const QRect &clip, - const QTransform &targetRectTransform, - Blender blender) -{ - enum Corner - { - TopLeft, - TopRight, - BottomRight, - BottomLeft - }; - - // map source rectangle to destination. - QTransformImageVertex v[4]; - v[TopLeft].u = v[BottomLeft].u = sourceRect.left(); - v[TopLeft].v = v[TopRight].v = sourceRect.top(); - v[TopRight].u = v[BottomRight].u = sourceRect.right(); - v[BottomLeft].v = v[BottomRight].v = sourceRect.bottom(); - targetRectTransform.map(targetRect.left(), targetRect.top(), &v[TopLeft].x, &v[TopLeft].y); - targetRectTransform.map(targetRect.right(), targetRect.top(), &v[TopRight].x, &v[TopRight].y); - targetRectTransform.map(targetRect.left(), targetRect.bottom(), &v[BottomLeft].x, &v[BottomLeft].y); - targetRectTransform.map(targetRect.right(), targetRect.bottom(), &v[BottomRight].x, &v[BottomRight].y); - - // find topmost vertex. - int topmost = 0; - for (int i = 1; i < 4; ++i) { - if (v[i].y < v[topmost].y) - topmost = i; - } - // rearrange array such that topmost vertex is at index 0. - switch (topmost) { - case 1: - { - QTransformImageVertex t = v[0]; - for (int i = 0; i < 3; ++i) - v[i] = v[i+1]; - v[3] = t; - } - break; - case 2: - qSwap(v[0], v[2]); - qSwap(v[1], v[3]); - break; - case 3: - { - QTransformImageVertex t = v[3]; - for (int i = 3; i > 0; --i) - v[i] = v[i-1]; - v[0] = t; - } - break; - } - - // if necessary, swap vertex 1 and 3 such that 1 is to the left of 3. - qreal dx1 = v[1].x - v[0].x; - qreal dy1 = v[1].y - v[0].y; - qreal dx2 = v[3].x - v[0].x; - qreal dy2 = v[3].y - v[0].y; - if (dx1 * dy2 - dx2 * dy1 > 0) - qSwap(v[1], v[3]); - - QTransformImageVertex u = {v[1].x - v[0].x, v[1].y - v[0].y, v[1].u - v[0].u, v[1].v - v[0].v}; - QTransformImageVertex w = {v[2].x - v[0].x, v[2].y - v[0].y, v[2].u - v[0].u, v[2].v - v[0].v}; - - qreal det = u.x * w.y - u.y * w.x; - if (det == 0) - return; - - qreal invDet = 1.0 / det; - qreal m11, m12, m21, m22, mdx, mdy; - - m11 = (u.u * w.y - u.y * w.u) * invDet; - m12 = (u.x * w.u - u.u * w.x) * invDet; - m21 = (u.v * w.y - u.y * w.v) * invDet; - m22 = (u.x * w.v - u.v * w.x) * invDet; - mdx = v[0].u - m11 * v[0].x - m12 * v[0].y; - mdy = v[0].v - m21 * v[0].x - m22 * v[0].y; - - int dudx = int(m11 * 0x10000); - int dvdx = int(m21 * 0x10000); - int dudy = int(m12 * 0x10000); - int dvdy = int(m22 * 0x10000); - int u0 = qCeil((0.5 * m11 + 0.5 * m12 + mdx) * 0x10000) - 1; - int v0 = qCeil((0.5 * m21 + 0.5 * m22 + mdy) * 0x10000) - 1; - - int x1 = qFloor(sourceRect.left()); - int y1 = qFloor(sourceRect.top()); - int x2 = qCeil(sourceRect.right()); - int y2 = qCeil(sourceRect.bottom()); - QRect sourceRectI(x1, y1, x2 - x1, y2 - y1); - - // rasterize trapezoids. - if (v[1].y < v[3].y) { - qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); - qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[0], v[3], sourceRectI, clip, v[1].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); - qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[3].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); - } else { - qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); - qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[3], v[2], sourceRectI, clip, v[3].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); - qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[1].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); - } -} - void qt_transform_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, const uchar *srcPixels, int sbpl, const QRectF &targetRect, diff --git a/src/gui/painting/qblendfunctions_p.h b/src/gui/painting/qblendfunctions_p.h new file mode 100644 index 0000000..ad754b0 --- /dev/null +++ b/src/gui/painting/qblendfunctions_p.h @@ -0,0 +1,497 @@ +/**************************************************************************** +** +** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QBLENDFUNCTIONS_P_H +#define QBLENDFUNCTIONS_P_H + +#include <qmath.h> +#include "qdrawhelper_p.h" + +QT_BEGIN_NAMESPACE + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. +// + +template <typename SRC, typename T> +void qt_scale_image_16bit(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &srcRect, + const QRect &clip, + T blender) +{ + qreal sx = targetRect.width() / (qreal) srcRect.width(); + qreal sy = targetRect.height() / (qreal) srcRect.height(); + + int ix = 0x00010000 / sx; + int iy = 0x00010000 / sy; + +// qDebug() << "scale:" << endl +// << " - target" << targetRect << endl +// << " - source" << srcRect << endl +// << " - clip" << clip << endl +// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy; + + int cx1 = clip.x(); + int cx2 = clip.x() + clip.width(); + int cy1 = clip.top(); + int cy2 = clip.y() + clip.height(); + + int tx1 = qRound(targetRect.left()); + int tx2 = qRound(targetRect.right()); + int ty1 = qRound(targetRect.top()); + int ty2 = qRound(targetRect.bottom()); + + if (tx2 < tx1) + qSwap(tx2, tx1); + + if (ty2 < ty1) + qSwap(ty2, ty1); + + if (tx1 < cx1) + tx1 = cx1; + + if (tx2 >= cx2) + tx2 = cx2; + + if (tx1 >= tx2) + return; + + if (ty1 < cy1) + ty1 = cy1; + + if (ty2 >= cy2) + ty2 = cy2; + + if (ty1 >= ty2) + return; + + int h = ty2 - ty1; + int w = tx2 - tx1; + + + quint32 basex; + quint32 srcy; + + if (sx < 0) { + int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; + basex = quint32(srcRect.right() * 65536) + dstx; + } else { + int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; + basex = quint32(srcRect.left() * 65536) + dstx; + } + if (sy < 0) { + int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; + srcy = quint32(srcRect.bottom() * 65536) + dsty; + } else { + int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; + srcy = quint32(srcRect.top() * 65536) + dsty; + } + + quint16 *dst = ((quint16 *) (destPixels + ty1 * dbpl)) + tx1; + + while (h--) { + const SRC *src = (const SRC *) (srcPixels + (srcy >> 16) * sbpl); + int srcx = basex; + int x = 0; + for (; x<w-7; x+=8) { + blender.write(&dst[x], src[srcx >> 16]); srcx += ix; + blender.write(&dst[x+1], src[srcx >> 16]); srcx += ix; + blender.write(&dst[x+2], src[srcx >> 16]); srcx += ix; + blender.write(&dst[x+3], src[srcx >> 16]); srcx += ix; + blender.write(&dst[x+4], src[srcx >> 16]); srcx += ix; + blender.write(&dst[x+5], src[srcx >> 16]); srcx += ix; + blender.write(&dst[x+6], src[srcx >> 16]); srcx += ix; + blender.write(&dst[x+7], src[srcx >> 16]); srcx += ix; + } + for (; x<w; ++x) { + blender.write(&dst[x], src[srcx >> 16]); + srcx += ix; + } + blender.flush(&dst[x]); + dst = (quint16 *)(((uchar *) dst) + dbpl); + srcy += iy; + } +} + +template <typename T> void qt_scale_image_32bit(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &srcRect, + const QRect &clip, + T blender) +{ + qreal sx = targetRect.width() / (qreal) srcRect.width(); + qreal sy = targetRect.height() / (qreal) srcRect.height(); + + int ix = 0x00010000 / sx; + int iy = 0x00010000 / sy; + +// qDebug() << "scale:" << endl +// << " - target" << targetRect << endl +// << " - source" << srcRect << endl +// << " - clip" << clip << endl +// << " - sx=" << sx << " sy=" << sy << " ix=" << ix << " iy=" << iy; + + int cx1 = clip.x(); + int cx2 = clip.x() + clip.width(); + int cy1 = clip.top(); + int cy2 = clip.y() + clip.height(); + + int tx1 = qRound(targetRect.left()); + int tx2 = qRound(targetRect.right()); + int ty1 = qRound(targetRect.top()); + int ty2 = qRound(targetRect.bottom()); + + if (tx2 < tx1) + qSwap(tx2, tx1); + + if (ty2 < ty1) + qSwap(ty2, ty1); + + if (tx1 < cx1) + tx1 = cx1; + + if (tx2 >= cx2) + tx2 = cx2; + + if (tx1 >= tx2) + return; + + if (ty1 < cy1) + ty1 = cy1; + + if (ty2 >= cy2) + ty2 = cy2; + + if (ty1 >= ty2) + return; + + int h = ty2 - ty1; + int w = tx2 - tx1; + + quint32 basex; + quint32 srcy; + + if (sx < 0) { + int dstx = qFloor((tx1 + qreal(0.5) - targetRect.right()) * ix) + 1; + basex = quint32(srcRect.right() * 65536) + dstx; + } else { + int dstx = qCeil((tx1 + qreal(0.5) - targetRect.left()) * ix) - 1; + basex = quint32(srcRect.left() * 65536) + dstx; + } + if (sy < 0) { + int dsty = qFloor((ty1 + qreal(0.5) - targetRect.bottom()) * iy) + 1; + srcy = quint32(srcRect.bottom() * 65536) + dsty; + } else { + int dsty = qCeil((ty1 + qreal(0.5) - targetRect.top()) * iy) - 1; + srcy = quint32(srcRect.top() * 65536) + dsty; + } + + quint32 *dst = ((quint32 *) (destPixels + ty1 * dbpl)) + tx1; + + while (h--) { + const uint *src = (const quint32 *) (srcPixels + (srcy >> 16) * sbpl); + int srcx = basex; + int x = 0; + for (; x<w; ++x) { + blender.write(&dst[x], src[srcx >> 16]); + srcx += ix; + } + blender.flush(&dst[x]); + dst = (quint32 *)(((uchar *) dst) + dbpl); + srcy += iy; + } +} + +struct QTransformImageVertex +{ + qreal x, y, u, v; // destination coordinates (x, y) and source coordinates (u, v) +}; + +template <class SrcT, class DestT, class Blender> +void qt_transform_image_rasterize(DestT *destPixels, int dbpl, + const SrcT *srcPixels, int sbpl, + const QTransformImageVertex &topLeft, const QTransformImageVertex &bottomLeft, + const QTransformImageVertex &topRight, const QTransformImageVertex &bottomRight, + const QRect &sourceRect, + const QRect &clip, + qreal topY, qreal bottomY, + int dudx, int dvdx, int dudy, int dvdy, int u0, int v0, + Blender blender) +{ + int fromY = qMax(qRound(topY), clip.top()); + int toY = qMin(qRound(bottomY), clip.top() + clip.height()); + if (fromY >= toY) + return; + + qreal leftSlope = (bottomLeft.x - topLeft.x) / (bottomLeft.y - topLeft.y); + qreal rightSlope = (bottomRight.x - topRight.x) / (bottomRight.y - topRight.y); + int dx_l = int(leftSlope * 0x10000); + int dx_r = int(rightSlope * 0x10000); + int x_l = int((topLeft.x + (0.5 + fromY - topLeft.y) * leftSlope + 0.5) * 0x10000); + int x_r = int((topRight.x + (0.5 + fromY - topRight.y) * rightSlope + 0.5) * 0x10000); + + int fromX, toX, x1, x2, u, v, i, ii; + DestT *line; + for (int y = fromY; y < toY; ++y) { + line = reinterpret_cast<DestT *>(reinterpret_cast<uchar *>(destPixels) + y * dbpl); + + fromX = qMax(x_l >> 16, clip.left()); + toX = qMin(x_r >> 16, clip.left() + clip.width()); + if (fromX < toX) { + // Because of rounding, we can get source coordinates outside the source image. + // Clamp these coordinates to the source rect to avoid segmentation fault and + // garbage on the screen. + + // Find the first pixel on the current scan line where the source coordinates are within the source rect. + x1 = fromX; + u = x1 * dudx + y * dudy + u0; + v = x1 * dvdx + y * dvdy + v0; + for (; x1 < toX; ++x1) { + int uu = u >> 16; + int vv = v >> 16; + if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width() + && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) { + break; + } + u += dudx; + v += dvdx; + } + + // Find the last pixel on the current scan line where the source coordinates are within the source rect. + x2 = toX; + u = (x2 - 1) * dudx + y * dudy + u0; + v = (x2 - 1) * dvdx + y * dvdy + v0; + for (; x2 > x1; --x2) { + int uu = u >> 16; + int vv = v >> 16; + if (uu >= sourceRect.left() && uu < sourceRect.left() + sourceRect.width() + && vv >= sourceRect.top() && vv < sourceRect.top() + sourceRect.height()) { + break; + } + u -= dudx; + v -= dvdx; + } + + // Set up values at the beginning of the scan line. + u = fromX * dudx + y * dudy + u0; + v = fromX * dvdx + y * dvdy + v0; + line += fromX; + + // Beginning of the scan line, with per-pixel checks. + i = x1 - fromX; + while (i) { + int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1); + int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1); + blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]); + u += dudx; + v += dvdx; + ++line; + --i; + } + + // Middle of the scan line, without checks. + // Manual loop unrolling. + i = x2 - x1; + ii = i >> 3; + while (ii) { + blender.write(&line[0], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + blender.write(&line[1], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + blender.write(&line[2], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + blender.write(&line[3], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + blender.write(&line[4], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + blender.write(&line[5], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + blender.write(&line[6], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + blender.write(&line[7], reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; + + line += 8; + + --ii; + } + switch (i & 7) { + case 7: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; + case 6: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; + case 5: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; + case 4: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; + case 3: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; + case 2: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; + case 1: blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + (v >> 16) * sbpl)[u >> 16]); u += dudx; v += dvdx; ++line; + } + + // End of the scan line, with per-pixel checks. + i = toX - x2; + while (i) { + int uu = qBound(sourceRect.left(), u >> 16, sourceRect.left() + sourceRect.width() - 1); + int vv = qBound(sourceRect.top(), v >> 16, sourceRect.top() + sourceRect.height() - 1); + blender.write(line, reinterpret_cast<const SrcT *>(reinterpret_cast<const uchar *>(srcPixels) + vv * sbpl)[uu]); + u += dudx; + v += dvdx; + ++line; + --i; + } + + blender.flush(line); + } + x_l += dx_l; + x_r += dx_r; + } +} + +template <class SrcT, class DestT, class Blender> +void qt_transform_image(DestT *destPixels, int dbpl, + const SrcT *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + const QTransform &targetRectTransform, + Blender blender) +{ + enum Corner + { + TopLeft, + TopRight, + BottomRight, + BottomLeft + }; + + // map source rectangle to destination. + QTransformImageVertex v[4]; + v[TopLeft].u = v[BottomLeft].u = sourceRect.left(); + v[TopLeft].v = v[TopRight].v = sourceRect.top(); + v[TopRight].u = v[BottomRight].u = sourceRect.right(); + v[BottomLeft].v = v[BottomRight].v = sourceRect.bottom(); + targetRectTransform.map(targetRect.left(), targetRect.top(), &v[TopLeft].x, &v[TopLeft].y); + targetRectTransform.map(targetRect.right(), targetRect.top(), &v[TopRight].x, &v[TopRight].y); + targetRectTransform.map(targetRect.left(), targetRect.bottom(), &v[BottomLeft].x, &v[BottomLeft].y); + targetRectTransform.map(targetRect.right(), targetRect.bottom(), &v[BottomRight].x, &v[BottomRight].y); + + // find topmost vertex. + int topmost = 0; + for (int i = 1; i < 4; ++i) { + if (v[i].y < v[topmost].y) + topmost = i; + } + // rearrange array such that topmost vertex is at index 0. + switch (topmost) { + case 1: + { + QTransformImageVertex t = v[0]; + for (int i = 0; i < 3; ++i) + v[i] = v[i+1]; + v[3] = t; + } + break; + case 2: + qSwap(v[0], v[2]); + qSwap(v[1], v[3]); + break; + case 3: + { + QTransformImageVertex t = v[3]; + for (int i = 3; i > 0; --i) + v[i] = v[i-1]; + v[0] = t; + } + break; + } + + // if necessary, swap vertex 1 and 3 such that 1 is to the left of 3. + qreal dx1 = v[1].x - v[0].x; + qreal dy1 = v[1].y - v[0].y; + qreal dx2 = v[3].x - v[0].x; + qreal dy2 = v[3].y - v[0].y; + if (dx1 * dy2 - dx2 * dy1 > 0) + qSwap(v[1], v[3]); + + QTransformImageVertex u = {v[1].x - v[0].x, v[1].y - v[0].y, v[1].u - v[0].u, v[1].v - v[0].v}; + QTransformImageVertex w = {v[2].x - v[0].x, v[2].y - v[0].y, v[2].u - v[0].u, v[2].v - v[0].v}; + + qreal det = u.x * w.y - u.y * w.x; + if (det == 0) + return; + + qreal invDet = 1.0 / det; + qreal m11, m12, m21, m22, mdx, mdy; + + m11 = (u.u * w.y - u.y * w.u) * invDet; + m12 = (u.x * w.u - u.u * w.x) * invDet; + m21 = (u.v * w.y - u.y * w.v) * invDet; + m22 = (u.x * w.v - u.v * w.x) * invDet; + mdx = v[0].u - m11 * v[0].x - m12 * v[0].y; + mdy = v[0].v - m21 * v[0].x - m22 * v[0].y; + + int dudx = int(m11 * 0x10000); + int dvdx = int(m21 * 0x10000); + int dudy = int(m12 * 0x10000); + int dvdy = int(m22 * 0x10000); + int u0 = qCeil((0.5 * m11 + 0.5 * m12 + mdx) * 0x10000) - 1; + int v0 = qCeil((0.5 * m21 + 0.5 * m22 + mdy) * 0x10000) - 1; + + int x1 = qFloor(sourceRect.left()); + int y1 = qFloor(sourceRect.top()); + int x2 = qCeil(sourceRect.right()); + int y2 = qCeil(sourceRect.bottom()); + QRect sourceRectI(x1, y1, x2 - x1, y2 - y1); + + // rasterize trapezoids. + if (v[1].y < v[3].y) { + qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); + qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[0], v[3], sourceRectI, clip, v[1].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); + qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[3].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); + } else { + qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[0], v[3], sourceRectI, clip, v[0].y, v[3].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); + qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[0], v[1], v[3], v[2], sourceRectI, clip, v[3].y, v[1].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); + qt_transform_image_rasterize(destPixels, dbpl, srcPixels, sbpl, v[1], v[2], v[3], v[2], sourceRectI, clip, v[1].y, v[2].y, dudx, dvdx, dudy, dvdy, u0, v0, blender); + } +} + +QT_END_NAMESPACE + +#endif // QBLENDFUNCTIONS_P_H diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 130270b..dc3b79b 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7964,6 +7964,12 @@ void qInitDrawhelperAsm() qBlendFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_blend_argb32_on_rgb16_neon; qBlendFunctions[QImage::Format_ARGB32_Premultiplied][QImage::Format_RGB16] = qt_blend_rgb16_on_argb32_neon; + qScaleFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_scale_image_argb32_on_rgb16_neon; + qScaleFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_scale_image_rgb16_on_rgb16_neon; + + qTransformFunctions[QImage::Format_RGB16][QImage::Format_ARGB32_Premultiplied] = qt_transform_image_argb32_on_rgb16_neon; + qTransformFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_transform_image_rgb16_on_rgb16_neon; + qDrawHelper[QImage::Format_RGB16].alphamapBlit = qt_alphamapblit_quint16_neon; } #endif diff --git a/src/gui/painting/qdrawhelper_neon.cpp b/src/gui/painting/qdrawhelper_neon.cpp index ef1b85c..ca1d85f 100644 --- a/src/gui/painting/qdrawhelper_neon.cpp +++ b/src/gui/painting/qdrawhelper_neon.cpp @@ -40,6 +40,8 @@ ****************************************************************************/ #include <private/qdrawhelper_p.h> +#include <private/qblendfunctions_p.h> +#include <private/qmath_p.h> #ifdef QT_HAVE_NEON @@ -310,6 +312,143 @@ void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer, pixman_composite_over_n_8_0565_asm_neon(mapWidth, mapHeight, dest, destStride, color, 0, mask, mapStride); } +extern "C" void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha); +extern "C" void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha); + +template <typename SRC, typename BlendFunc> +struct Blend_on_RGB16_SourceAndConstAlpha_Neon { + Blend_on_RGB16_SourceAndConstAlpha_Neon(BlendFunc blender, int const_alpha) + : m_index(0) + , m_blender(blender) + , m_const_alpha(const_alpha) + { + } + + inline void write(quint16 *dst, quint32 src) + { + srcBuffer[m_index++] = src; + + if (m_index == 8) { + m_blender(dst - 7, srcBuffer, m_const_alpha); + m_index = 0; + } + } + + inline void flush(quint16 *dst) + { + if (m_index > 0) { + quint16 dstBuffer[8]; + for (int i = 0; i < m_index; ++i) + dstBuffer[i] = dst[i - m_index]; + + m_blender(dstBuffer, srcBuffer, m_const_alpha); + + for (int i = 0; i < m_index; ++i) + dst[i - m_index] = dstBuffer[i]; + + m_index = 0; + } + } + + SRC srcBuffer[8]; + + int m_index; + BlendFunc m_blender; + int m_const_alpha; +}; + +template <typename SRC, typename BlendFunc> +Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc> +Blend_on_RGB16_SourceAndConstAlpha_Neon_create(BlendFunc blender, int const_alpha) +{ + return Blend_on_RGB16_SourceAndConstAlpha_Neon<SRC, BlendFunc>(blender, const_alpha); +} + +void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha) +{ + if (const_alpha == 0) + return; + + qt_scale_image_16bit<quint32>(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, + Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha)); +} + +void qt_scale_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha); + +void qt_scale_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha) +{ + if (const_alpha == 0) + return; + + if (const_alpha == 256) { + qt_scale_image_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, const_alpha); + return; + } + + qt_scale_image_16bit<quint16>(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, + Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha)); +} + +extern void qt_transform_image_rgb16_on_rgb16(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + const QTransform &targetRectTransform, + int const_alpha); + +void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + const QTransform &targetRectTransform, + int const_alpha) +{ + if (const_alpha == 0) + return; + + if (const_alpha == 256) { + qt_transform_image_rgb16_on_rgb16(destPixels, dbpl, srcPixels, sbpl, targetRect, sourceRect, clip, targetRectTransform, const_alpha); + return; + } + + qt_transform_image(reinterpret_cast<quint16 *>(destPixels), dbpl, + reinterpret_cast<const quint16 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform, + Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint16>(blend_8_pixels_rgb16_on_rgb16_neon, const_alpha)); +} + +void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + const QTransform &targetRectTransform, + int const_alpha) +{ + if (const_alpha == 0) + return; + + qt_transform_image(reinterpret_cast<quint16 *>(destPixels), dbpl, + reinterpret_cast<const quint32 *>(srcPixels), sbpl, targetRect, sourceRect, clip, targetRectTransform, + Blend_on_RGB16_SourceAndConstAlpha_Neon_create<quint32>(blend_8_pixels_argb32_on_rgb16_neon, const_alpha)); +} + QT_END_NAMESPACE #endif // QT_HAVE_NEON diff --git a/src/gui/painting/qdrawhelper_neon_asm.S b/src/gui/painting/qdrawhelper_neon_asm.S new file mode 100644 index 0000000..9992817 --- /dev/null +++ b/src/gui/painting/qdrawhelper_neon_asm.S @@ -0,0 +1,192 @@ +/**************************************************************************** +** +** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtGui module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +/* Prevent the stack from becoming executable for no reason... */ +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif + +.text +.fpu neon +.arch armv7a +.altmacro + +/* void blend_8_pixels_argb32_on_rgb16_neon(quint16 *dst, const quint32 *src, int const_alpha) */ + + .func blend_8_pixels_argb32_on_rgb16_neon + .global blend_8_pixels_argb32_on_rgb16_neon + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden blend_8_pixels_argb32_on_rgb16_neon + .type blend_8_pixels_argb32_on_rgb16_neon, %function +#endif +blend_8_pixels_argb32_on_rgb16_neon: + vld4.8 { d0, d1, d2, d3 }, [r1] + vld1.16 { d4, d5 }, [r0] + + cmp r2, #256 + beq .blend_32_inner + + vdup.8 d6, r2 + + /* multiply by const_alpha */ + vmull.u8 q8, d6, d0 + vmull.u8 q9, d6, d1 + vmull.u8 q10, d6, d2 + vmull.u8 q11, d6, d3 + + vshrn.u16 d0, q8, #8 + vshrn.u16 d1, q9, #8 + vshrn.u16 d2, q10, #8 + vshrn.u16 d3, q11, #8 + +.blend_32_inner: + /* convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q2, #8 + vshrn.u16 d7, q2, #3 + vsli.u16 q2, q2, #5 + vsri.u8 d6, d6, #5 + vmvn.8 d3, d3 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q2, #2 + + pld [r0, #128] + + /* now do alpha blending, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vmull.u8 q10, d3, d6 + vmull.u8 q11, d3, d7 + vmull.u8 q12, d3, d30 + vrshr.u16 q13, q10, #8 + vrshr.u16 q3, q11, #8 + vrshr.u16 q15, q12, #8 + vraddhn.u16 d20, q10, q13 + vraddhn.u16 d23, q11, q3 + vraddhn.u16 d22, q12, q15 + vqadd.u8 d16, d2, d20 + vqadd.u8 q9, q0, q11 + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 + + vst1.16 { d28, d29 }, [r0] + + bx lr + + .endfunc + +/* void blend_8_pixels_rgb16_on_rgb16_neon(quint16 *dst, const quint16 *src, int const_alpha) */ + + .func blend_8_pixels_rgb16_on_rgb16_neon + .global blend_8_pixels_rgb16_on_rgb16_neon + /* For ELF format also set function visibility to hidden */ +#ifdef __ELF__ + .hidden blend_8_pixels_rgb16_on_rgb16_neon + .type blend_8_pixels_rgb16_on_rgb16_neon, %function +#endif +blend_8_pixels_rgb16_on_rgb16_neon: + vld1.16 { d0, d1 }, [r0] + vld1.16 { d2, d3 }, [r1] + + rsb r3, r2, #256 + vdup.8 d4, r2 + vdup.8 d5, r3 + + /* convert 8 r5g6b5 pixel data from {d0, d1} to planar 8-bit format + and put data into d6 - red, d7 - green, d30 - blue */ + vshrn.u16 d6, q0, #8 + vshrn.u16 d7, q0, #3 + vsli.u16 q0, q0, #5 + vsri.u8 d6, d6, #5 + vsri.u8 d7, d7, #6 + vshrn.u16 d30, q0, #2 + + /* same from {d2, d3} into {d26, d27, d28} */ + vshrn.u16 d26, q1, #8 + vshrn.u16 d27, q1, #3 + vsli.u16 q1, q1, #5 + vsri.u8 d26, d26, #5 + vsri.u8 d27, d27, #6 + vshrn.u16 d28, q1, #2 + + /* multiply dst by inv const_alpha */ + vmull.u8 q10, d5, d6 + vmull.u8 q11, d5, d7 + vmull.u8 q12, d5, d30 + + vshrn.u16 d6, q10, #8 + vshrn.u16 d7, q11, #8 + vshrn.u16 d30, q12, #8 + + /* multiply src by const_alpha */ + vmull.u8 q10, d4, d26 + vmull.u8 q11, d4, d27 + vmull.u8 q12, d4, d28 + + vshrn.u16 d26, q10, #8 + vshrn.u16 d27, q11, #8 + vshrn.u16 d28, q12, #8 + + /* preload dst + 128 */ + pld [r0, #128] + + /* add components, storing results in 8-bit planar format + into d16 - red, d19 - green, d18 - blue */ + vadd.u8 d16, d26, d6 + vadd.u8 d19, d27, d7 + vadd.u8 d18, d28, d30 + + /* convert the result to r5g6b5 and store it into {d28, d29} */ + vshll.u8 q14, d16, #8 + vshll.u8 q8, d19, #8 + vshll.u8 q9, d18, #8 + vsri.u16 q14, q8, #5 + vsri.u16 q14, q9, #11 + + vst1.16 { d28, d29 }, [r0] + + bx lr + + .endfunc diff --git a/src/gui/painting/qdrawhelper_neon_p.h b/src/gui/painting/qdrawhelper_neon_p.h index 6f9604f..6f25243 100644 --- a/src/gui/painting/qdrawhelper_neon_p.h +++ b/src/gui/painting/qdrawhelper_neon_p.h @@ -84,6 +84,37 @@ void qt_alphamapblit_quint16_neon(QRasterBuffer *rasterBuffer, const uchar *bitmap, int mapWidth, int mapHeight, int mapStride, const QClipData *clip); + +void qt_scale_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha); + +void qt_scale_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + int const_alpha); + +void qt_transform_image_argb32_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + const QTransform &targetRectTransform, + int const_alpha); + +void qt_transform_image_rgb16_on_rgb16_neon(uchar *destPixels, int dbpl, + const uchar *srcPixels, int sbpl, + const QRectF &targetRect, + const QRectF &sourceRect, + const QRect &clip, + const QTransform &targetRectTransform, + int const_alpha); + #endif // QT_HAVE_NEON QT_END_NAMESPACE |