Performance: reduce overhead of updateMatrix() in GL2 paint engine

The original code was performing 40 floating-point multiplications, 40 additions, and 2 divisions every time the matrix was changed. Because most of the components in the orthographic projection matrix are trivial, we can implement the same transformation with only 6 multiplications, 6 additions, and 2 divisions. Reviewed-by: Sarah Smith
author: Rhys Weatherley <rhys.weatherley@nokia.com> 2009-09-21 04:23:49 (GMT)
committer: Rhys Weatherley <rhys.weatherley@nokia.com> 2009-09-21 04:49:02 (GMT)
commit: 1e284a2970efdbf32b61db3cfb207eebf7f33d14 (patch)
tree: 88cbd7a042dae8e2a8647ab2b491bd430fe9d207 /src/opengl
parent: 8db9f834e604c3a9eda8f76eacad2a9af20dbd33 (diff)
download: Qt-1e284a2970efdbf32b61db3cfb207eebf7f33d14.zip
Qt-1e284a2970efdbf32b61db3cfb207eebf7f33d14.tar.gz
Qt-1e284a2970efdbf32b61db3cfb207eebf7f33d14.tar.bz2
1 files changed, 45 insertions, 32 deletions
diff --git a/src/opengl/gl2paintengineex/qpaintengineex_opengl2.cpp b/src/opengl/gl2paintengineex/qpaintengineex_opengl2.cpp
index 837d055..930d181 100644
--- a/src/opengl/gl2paintengineex/qpaintengineex_opengl2.cpp
+++ b/src/opengl/gl2paintengineex/qpaintengineex_opengl2.cpp
@@ -543,46 +543,59 @@ void QGL2PaintEngineExPrivate::updateMatrix()
 {
 //     qDebug("QGL2PaintEngineExPrivate::updateMatrix()");
 
-    // We setup the Projection matrix to be the equivilant of glOrtho(0, w, h, 0, -1, 1):
-    GLfloat P[4][4] = {
-        {2.0/width,  0.0,        0.0, -1.0},
-        {0.0,       -2.0/height, 0.0,  1.0},
-        {0.0,        0.0,       -1.0,  0.0},
-        {0.0,        0.0,        0.0,  1.0}
-    };
+    // We set up the 4x4 transformation matrix on the vertex shaders to
+    // be the equivalent of glOrtho(0, w, h, 0, -1, 1) * transform:
+    //
+    // | 2/width     0     0 -1 |   | m11  m21  0   dx |
+    // |   0    -2/height  0  1 |   | m12  m22  0   dy |
+    // |   0         0    -1  0 | * |  0    0   1   0  |
+    // |   0         0     0  1 |   | m13  m23  0  m33 |
+    //
+    // We expand out the multiplication to save the cost of a full 4x4
+    // matrix multiplication as most of the components are trivial.
 
     const QTransform& transform = q->state()->matrix;
 
     if (mode == TextDrawingMode) {
         // Text drawing mode is only used for non-scaling transforms
-        for (int row = 0; row < 4; ++row)
-            for (int col = 0; col < 4; ++col)
-                pmvMatrix[col][row] = P[row][col];
-
-        pmvMatrix[3][0] += P[0][0] * qRound(transform.dx());
-        pmvMatrix[3][1] += P[1][1] * qRound(transform.dy());
+        pmvMatrix[0][0] = 2.0 / width;
+        pmvMatrix[0][1] = 0.0;
+        pmvMatrix[0][2] = 0.0;
+        pmvMatrix[0][3] = 0.0;
+        pmvMatrix[1][0] = 0.0;
+        pmvMatrix[1][1] = -2.0 / height;
+        pmvMatrix[1][2] = 0.0;
+        pmvMatrix[1][3] = 0.0;
+        pmvMatrix[2][0] = 0.0;
+        pmvMatrix[2][1] = 0.0;
+        pmvMatrix[2][2] = -1.0;
+        pmvMatrix[2][3] = 0.0;
+        pmvMatrix[3][0] = pmvMatrix[0][0] * qRound(transform.dx()) - 1.0;
+        pmvMatrix[3][1] = pmvMatrix[1][1] * qRound(transform.dy()) + 1.0;
+        pmvMatrix[3][2] = 0.0;
+        pmvMatrix[3][3] = 1.0;
 
         inverseScale = 1;
     } else {
-        // Use the (3x3) transform for the Model~View matrix:
-        GLfloat MV[4][4] = {
-            {transform.m11(), transform.m21(), 0.0, transform.dx()},
-            {transform.m12(), transform.m22(), 0.0, transform.dy()},
-            {0.0,             0.0,             1.0, 0.0},
-            {transform.m13(), transform.m23(), 0.0, transform.m33()}
-        };
-
-        // NOTE: OpenGL ES works with column-major matrices, so when we multiply the matrices,
-        //       we also transpose them ready for GL.
-        for (int row = 0; row < 4; ++row) {
-            for (int col = 0; col < 4; ++col) {
-                pmvMatrix[col][row] = 0.0;
-
-                // P[row][n] is 0.0 for n < row
-                for (int n = row; n < 4; ++n)
-                    pmvMatrix[col][row] += P[row][n] * MV[n][col];
-            }
-        }
+        qreal wfactor = 2.0 / width;
+        qreal hfactor = -2.0 / height;
+
+        pmvMatrix[0][0] = wfactor * transform.m11() - transform.m13();
+        pmvMatrix[0][1] = hfactor * transform.m12() + transform.m13();
+        pmvMatrix[0][2] = 0.0;
+        pmvMatrix[0][3] = transform.m13();
+        pmvMatrix[1][0] = wfactor * transform.m21() - transform.m23();
+        pmvMatrix[1][1] = hfactor * transform.m22() + transform.m23();
+        pmvMatrix[1][2] = 0.0;
+        pmvMatrix[1][3] = transform.m23();
+        pmvMatrix[2][0] = 0.0;
+        pmvMatrix[2][1] = 0.0;
+        pmvMatrix[2][2] = -1.0;
+        pmvMatrix[2][3] = 0.0;
+        pmvMatrix[3][0] = wfactor * transform.dx() - transform.m33();
+        pmvMatrix[3][1] = hfactor * transform.dy() + transform.m33();
+        pmvMatrix[3][2] = 0.0;
+        pmvMatrix[3][3] = transform.m33();
 
         // 1/10000 == 0.0001, so we have good enough res to cover curves
         // that span the entire widget...
author	Rhys Weatherley <rhys.weatherley@nokia.com>	2009-09-21 04:23:49 (GMT)
committer	Rhys Weatherley <rhys.weatherley@nokia.com>	2009-09-21 04:49:02 (GMT)
commit	1e284a2970efdbf32b61db3cfb207eebf7f33d14 (patch)
tree	88cbd7a042dae8e2a8647ab2b491bd430fe9d207 /src/opengl
parent	8db9f834e604c3a9eda8f76eacad2a9af20dbd33 (diff)
download	Qt-1e284a2970efdbf32b61db3cfb207eebf7f33d14.zip Qt-1e284a2970efdbf32b61db3cfb207eebf7f33d14.tar.gz Qt-1e284a2970efdbf32b61db3cfb207eebf7f33d14.tar.bz2