From e447e8a5356f5de4a6a62a28c0d21785b09b4cba Mon Sep 17 00:00:00 2001 From: Benjamin C Meyer Date: Thu, 30 Jul 2009 15:12:27 +0200 Subject: When there is no alpha channel significantly improve the speed of blendTiled. Copy the first line from the texture to the destination and from then on reference that copy reducing cache misses. Also progressively copy larger sizes, reducing the amount of time spend figuring out what to copy. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge-request: 371 Reviewed-by: Samuel Rødal --- src/gui/painting/qdrawhelper.cpp | 72 ++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 979390a..b6603e4 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -4794,30 +4794,60 @@ Q_STATIC_TEMPLATE_FUNCTION void blendTiled(int count, const QSpan *spans, void * if (sy < 0) sy += image_height; - while (length) { - int l = qMin(image_width - sx, length); - if (buffer_size < l) - l = buffer_size; - - DST *dest = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x; - const SRC *src = (SRC*)data->texture.scanLine(sy) + sx; - if (modeSource && coverage == 255) { + if (modeSource && coverage == 255) { + // Copy the first texture block + length = image_width; + while (length) { + int l = qMin(image_width - sx, length); + if (buffer_size < l) + l = buffer_size; + DST *dest = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x; + const SRC *src = (SRC*)data->texture.scanLine(sy) + sx; qt_memconvert(dest, src, l); - } else if (sizeof(DST) == 3 && sizeof(SRC) == 3 && l >= 4 && - (quintptr(dest) & 3) == (quintptr(src) & 3)) - { - blendUntransformed_dest24(dest, src, coverage, l); - } else if (sizeof(DST) == 2 && sizeof(SRC) == 2 && l >= 2 && - (quintptr(dest) & 3) == (quintptr(src) & 3)) - { - blendUntransformed_dest16(dest, src, coverage, l); - } else { - blendUntransformed_unaligned(dest, src, coverage, l); + length -= l; + sx = 0; } - x += l; - length -= l; - sx = 0; + // Now use the rasterBuffer as the source of the texture, + // We can now progressively copy larger blocks + // - Less cpu time in code figuring out what to copy + // We are dealing with one block of data + // - More likely to fit in the cache + // - can use memcpy + int copy_image_width = image_width; + length = spans->len - image_width; + DST *src = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x; + DST *dest = src + copy_image_width; + while (copy_image_width < length) { + qt_memconvert(dest, src, copy_image_width); + dest += copy_image_width; + length -= copy_image_width; + copy_image_width *= 2; + } + qt_memconvert(dest, src, length); + } else { + while (length) { + int l = qMin(image_width - sx, length); + if (buffer_size < l) + l = buffer_size; + DST *dest = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x; + const SRC *src = (SRC*)data->texture.scanLine(sy) + sx; + if (sizeof(DST) == 3 && sizeof(SRC) == 3 && l >= 4 && + (quintptr(dest) & 3) == (quintptr(src) & 3)) + { + blendUntransformed_dest24(dest, src, coverage, l); + } else if (sizeof(DST) == 2 && sizeof(SRC) == 2 && l >= 2 && + (quintptr(dest) & 3) == (quintptr(src) & 3)) + { + blendUntransformed_dest16(dest, src, coverage, l); + } else { + blendUntransformed_unaligned(dest, src, coverage, l); + } + + x += l; + length -= l; + sx = 0; + } } ++spans; } -- cgit v0.12