When there is no alpha channel significantly improve the speed of blendTiled.

Copy the first line from the texture to the destination and from then on reference that copy reducing cache misses. Also progressively copy larger sizes, reducing the amount of time spend figuring out what to copy. Merge-request: 371 Reviewed-by: Samuel Rødal <sroedal@trolltech.com>
author: Benjamin C Meyer <benjamin.meyer@torchmobile.com> 2009-07-30 13:12:27 (GMT)
committer: Samuel Rødal <sroedal@trolltech.com> 2009-07-30 13:12:27 (GMT)
commit: e447e8a5356f5de4a6a62a28c0d21785b09b4cba (patch)
tree: e98c20aaf8c1cec7bfc908ea688c726aa8530de5
parent: 91fe038eb280ef64f92a029f9247e657361c90f7 (diff)
download: Qt-e447e8a5356f5de4a6a62a28c0d21785b09b4cba.zip
Qt-e447e8a5356f5de4a6a62a28c0d21785b09b4cba.tar.gz
Qt-e447e8a5356f5de4a6a62a28c0d21785b09b4cba.tar.bz2
1 files changed, 51 insertions, 21 deletions
diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp
index 979390a..b6603e4 100644
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@@ -4794,30 +4794,60 @@ Q_STATIC_TEMPLATE_FUNCTION void blendTiled(int count, const QSpan *spans, void *
         if (sy < 0)
             sy += image_height;
 
-        while (length) {
-            int l = qMin(image_width - sx, length);
-            if (buffer_size < l)
-                l = buffer_size;
-
-            DST *dest = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x;
-            const SRC *src = (SRC*)data->texture.scanLine(sy) + sx;
-            if (modeSource && coverage == 255) {
+        if (modeSource && coverage == 255) {
+            // Copy the first texture block
+            length = image_width;
+            while (length) {
+                int l = qMin(image_width - sx, length);
+                if (buffer_size < l)
+                    l = buffer_size;
+                DST *dest = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x;
+                const SRC *src = (SRC*)data->texture.scanLine(sy) + sx;
                 qt_memconvert<DST, SRC>(dest, src, l);
-            } else if (sizeof(DST) == 3 && sizeof(SRC) == 3 && l >= 4 &&
-                       (quintptr(dest) & 3) == (quintptr(src) & 3))
-            {
-                blendUntransformed_dest24(dest, src, coverage, l);
-            } else if (sizeof(DST) == 2 && sizeof(SRC) == 2 && l >= 2 &&
-                       (quintptr(dest) & 3) == (quintptr(src) & 3))
-            {
-                blendUntransformed_dest16(dest, src, coverage, l);
-            } else {
-                blendUntransformed_unaligned(dest, src, coverage, l);
+                length -= l;
+                sx = 0;
             }
 
-            x += l;
-            length -= l;
-            sx = 0;
+            // Now use the rasterBuffer as the source of the texture,
+            // We can now progressively copy larger blocks
+            // - Less cpu time in code figuring out what to copy
+            // We are dealing with one block of data
+            // - More likely to fit in the cache
+            // - can use memcpy
+            int copy_image_width = image_width;
+            length = spans->len - image_width;
+            DST *src = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x;
+            DST *dest = src + copy_image_width;
+            while (copy_image_width < length) {
+                qt_memconvert(dest, src, copy_image_width);
+                dest += copy_image_width;
+                length -= copy_image_width;
+                copy_image_width *= 2;
+            }
+            qt_memconvert(dest, src, length);
+        } else {
+            while (length) {
+                int l = qMin(image_width - sx, length);
+                if (buffer_size < l)
+                    l = buffer_size;
+                DST *dest = ((DST*)data->rasterBuffer->scanLine(spans->y)) + x;
+                const SRC *src = (SRC*)data->texture.scanLine(sy) + sx;
+                if (sizeof(DST) == 3 && sizeof(SRC) == 3 && l >= 4 &&
+                           (quintptr(dest) & 3) == (quintptr(src) & 3))
+                {
+                    blendUntransformed_dest24(dest, src, coverage, l);
+                } else if (sizeof(DST) == 2 && sizeof(SRC) == 2 && l >= 2 &&
+                           (quintptr(dest) & 3) == (quintptr(src) & 3))
+                {
+                    blendUntransformed_dest16(dest, src, coverage, l);
+                } else {
+                    blendUntransformed_unaligned(dest, src, coverage, l);
+                }
+
+                x += l;
+                length -= l;
+                sx = 0;
+            }
         }
         ++spans;
     }
author	Benjamin C Meyer <benjamin.meyer@torchmobile.com>	2009-07-30 13:12:27 (GMT)
committer	Samuel Rødal <sroedal@trolltech.com>	2009-07-30 13:12:27 (GMT)
commit	e447e8a5356f5de4a6a62a28c0d21785b09b4cba (patch)
tree	e98c20aaf8c1cec7bfc908ea688c726aa8530de5
parent	91fe038eb280ef64f92a029f9247e657361c90f7 (diff)
download	Qt-e447e8a5356f5de4a6a62a28c0d21785b09b4cba.zip Qt-e447e8a5356f5de4a6a62a28c0d21785b09b4cba.tar.gz Qt-e447e8a5356f5de4a6a62a28c0d21785b09b4cba.tar.bz2