From 4e915e3942c1523ffdda01e36c019f842062b794 Mon Sep 17 00:00:00 2001
From: Eskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@nokia.com>
Date: Wed, 7 Jul 2010 11:04:33 +0200
Subject: Fix text drawing into alpha pixmap with opengl engine

The merge 03dc74984749adf5b11482bf871a47086217845c mistakenly merged
the glyphMargin() (which had been removed in 4.7 because
QGLTextureGlyphCache now inherits from the image glyph cache) with the
glyphPadding() which had been introduced in separate commits in both
branches (probably backported.) This broke text drawing into a pixmap
with an alpha with the GL engine, because we'd assume a margin of 1,
but the alphaMapForGlyph() function doesn't support margins.

Task-number: QTBUG-11987
Reviewed-by: Gunnar
---
 src/opengl/gl2paintengineex/qtextureglyphcache_gl.cpp | 5 -----
 src/opengl/gl2paintengineex/qtextureglyphcache_gl_p.h | 1 -
 2 files changed, 6 deletions(-)

diff --git a/src/opengl/gl2paintengineex/qtextureglyphcache_gl.cpp b/src/opengl/gl2paintengineex/qtextureglyphcache_gl.cpp
index 5371c5e..410cf21 100644
--- a/src/opengl/gl2paintengineex/qtextureglyphcache_gl.cpp
+++ b/src/opengl/gl2paintengineex/qtextureglyphcache_gl.cpp
@@ -276,11 +276,6 @@ void QGLTextureGlyphCache::fillTexture(const Coord &c, glyph_t glyph)
     }
 }
 
-int QGLTextureGlyphCache::glyphMargin() const
-{
-    return 1;
-}
-
 int QGLTextureGlyphCache::glyphPadding() const
 {
     return 1;
diff --git a/src/opengl/gl2paintengineex/qtextureglyphcache_gl_p.h b/src/opengl/gl2paintengineex/qtextureglyphcache_gl_p.h
index 84e9021..6bcd655 100644
--- a/src/opengl/gl2paintengineex/qtextureglyphcache_gl_p.h
+++ b/src/opengl/gl2paintengineex/qtextureglyphcache_gl_p.h
@@ -72,7 +72,6 @@ public:
     virtual void createTextureData(int width, int height);
     virtual void resizeTextureData(int width, int height);
     virtual void fillTexture(const Coord &c, glyph_t glyph);
-    virtual int glyphMargin() const;
     virtual int glyphPadding() const;
 
     inline GLuint texture() const { return m_texture; }
-- 
cgit v0.12


From 2769d4b72675e62c441fa181609adca25922715a Mon Sep 17 00:00:00 2001
From: John Brooks <special@dereferenced.net>
Date: Mon, 5 Jul 2010 21:17:48 +0200
Subject: Move logic for building SIMD extensions to gui.pro

Enables SIMD files to be built outside of painting.pri by appending
files to SSE_SOURCES etc.

Merge-request: 725
Reviewed-by: Benjamin Poulain <benjamin.poulain@nokia.com>
---
 src/gui/gui.pro               | 126 ++++++++++++++++++++++++++++++++++
 src/gui/painting/painting.pri | 152 ++----------------------------------------
 2 files changed, 133 insertions(+), 145 deletions(-)

diff --git a/src/gui/gui.pro b/src/gui/gui.pro
index dede9d0..41f1904 100644
--- a/src/gui/gui.pro
+++ b/src/gui/gui.pro
@@ -77,3 +77,129 @@ symbian {
     DEPLOYMENT = partial_upgrade $$DEPLOYMENT
 }
 
+contains(QMAKE_MAC_XARCH, no) {
+    DEFINES += QT_NO_MAC_XARCH
+} else {
+    mmx:DEFINES += QT_HAVE_MMX
+    3dnow:DEFINES += QT_HAVE_3DNOW
+    sse:DEFINES += QT_HAVE_SSE QT_HAVE_MMXEXT
+    sse2:DEFINES += QT_HAVE_SSE2
+    iwmmxt:DEFINES += QT_HAVE_IWMMXT
+
+    win32-g++*|!win32:!*-icc* {
+        mmx {
+            mmx_compiler.commands = $$QMAKE_CXX -c -Winline
+
+            mac {
+                mmx_compiler.commands += -Xarch_i386 -mmmx
+                mmx_compiler.commands += -Xarch_x86_64 -mmmx
+            } else {
+                mmx_compiler.commands += -mmmx
+            }
+
+            mmx_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+            mmx_compiler.dependency_type = TYPE_C
+            mmx_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+            mmx_compiler.input = MMX_SOURCES
+            mmx_compiler.variable_out = OBJECTS
+            mmx_compiler.name = compiling[mmx] ${QMAKE_FILE_IN}
+            silent:mmx_compiler.commands = @echo compiling[mmx] ${QMAKE_FILE_IN} && $$mmx_compiler.commands
+            QMAKE_EXTRA_COMPILERS += mmx_compiler
+        }
+        3dnow {
+            mmx3dnow_compiler.commands = $$QMAKE_CXX -c -Winline
+
+            mac {
+                mmx3dnow_compiler.commands += -Xarch_i386 -m3dnow -Xarch_i386 -mmmx
+                mmx3dnow_compiler.commands += -Xarch_x86_64 -m3dnow -Xarch_x86_64 -mmmx
+            } else {
+                mmx3dnow_compiler.commands += -m3dnow -mmmx
+            }
+
+            mmx3dnow_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+            mmx3dnow_compiler.dependency_type = TYPE_C
+            mmx3dnow_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+            mmx3dnow_compiler.input = MMX3DNOW_SOURCES
+            mmx3dnow_compiler.variable_out = OBJECTS
+            mmx3dnow_compiler.name = compiling[mmx3dnow] ${QMAKE_FILE_IN}
+            silent:mmx3dnow_compiler.commands = @echo compiling[mmx3dnow] ${QMAKE_FILE_IN} && $$mmx3dnow_compiler.commands
+            QMAKE_EXTRA_COMPILERS += mmx3dnow_compiler
+            sse {
+                sse3dnow_compiler.commands = $$QMAKE_CXX -c -Winline
+
+                mac {
+                    sse3dnow_compiler.commands += -Xarch_i386 -m3dnow -Xarch_i386 -msse
+                    sse3dnow_compiler.commands += -Xarch_x86_64 -m3dnow -Xarch_x86_64 -msse
+                } else {
+                    sse3dnow_compiler.commands += -m3dnow -msse
+                }
+
+                sse3dnow_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+                sse3dnow_compiler.dependency_type = TYPE_C
+                sse3dnow_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+                sse3dnow_compiler.input = SSE3DNOW_SOURCES
+                sse3dnow_compiler.variable_out = OBJECTS
+                sse3dnow_compiler.name = compiling[sse3dnow] ${QMAKE_FILE_IN}
+                silent:sse3dnow_compiler.commands = @echo compiling[sse3dnow] ${QMAKE_FILE_IN} && $$sse3dnow_compiler.commands
+                QMAKE_EXTRA_COMPILERS += sse3dnow_compiler
+            }
+        }
+        sse {
+            sse_compiler.commands = $$QMAKE_CXX -c -Winline
+
+            mac {
+                sse_compiler.commands += -Xarch_i386 -msse
+                sse_compiler.commands += -Xarch_x86_64 -msse
+            } else {
+                sse_compiler.commands += -msse
+            }
+
+            sse_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+            sse_compiler.dependency_type = TYPE_C
+            sse_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+            sse_compiler.input = SSE_SOURCES
+            sse_compiler.variable_out = OBJECTS
+            sse_compiler.name = compiling[sse] ${QMAKE_FILE_IN}
+            silent:sse_compiler.commands = @echo compiling[sse] ${QMAKE_FILE_IN} && $$sse_compiler.commands
+            QMAKE_EXTRA_COMPILERS += sse_compiler
+        }
+        sse2 {
+            sse2_compiler.commands = $$QMAKE_CXX -c -Winline
+
+            mac {
+                sse2_compiler.commands += -Xarch_i386 -msse2
+                sse2_compiler.commands += -Xarch_x86_64 -msse2
+            } else {
+                sse2_compiler.commands += -msse2
+            }
+
+            sse2_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+            sse2_compiler.dependency_type = TYPE_C
+            sse2_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+            sse2_compiler.input = SSE2_SOURCES
+            sse2_compiler.variable_out = OBJECTS
+            sse2_compiler.name = compiling[sse2] ${QMAKE_FILE_IN}
+            silent:sse2_compiler.commands = @echo compiling[sse2] ${QMAKE_FILE_IN} && $$sse2_compiler.commands
+            QMAKE_EXTRA_COMPILERS += sse2_compiler
+        }
+        iwmmxt {
+            iwmmxt_compiler.commands = $$QMAKE_CXX -c -Winline
+            iwmmxt_compiler.commands += -mcpu=iwmmxt
+            iwmmxt_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+            iwmmxt_compiler.dependency_type = TYPE_C
+            iwmmxt_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+            iwmmxt_compiler.input = IWMMXT_SOURCES
+            iwmmxt_compiler.variable_out = OBJECTS
+            iwmmxt_compiler.name = compiling[iwmmxt] ${QMAKE_FILE_IN}
+            silent:iwmmxt_compiler.commands = @echo compiling[iwmmxt] ${QMAKE_FILE_IN} && $$iwmmxt_compiler.commands
+            QMAKE_EXTRA_COMPILERS += iwmmxt_compiler
+        }
+    } else {
+        mmx: SOURCES += $$MMX_SOURCES
+        3dnow: SOURCES += $$MMX3DNOW_SOURCES
+        3dnow:sse: SOURCES += $$SSE3DNOW_SOURCES
+        sse: SOURCES += $$SSE_SOURCES
+        sse2: SOURCES += $$SSE2_SOURCES
+        iwmmxt: SOURCES += $$IWMMXT_SOURCES
+    }
+}
diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri
index 07aabc9..c207c9d 100644
--- a/src/gui/painting/painting.pri
+++ b/src/gui/painting/painting.pri
@@ -202,154 +202,16 @@ x11|embedded {
         DEFINES += QT_NO_CUPS QT_NO_LPR
 }
 
-contains(QMAKE_MAC_XARCH, no) {
-    DEFINES += QT_NO_MAC_XARCH
-} else:if(mmx|3dnow|sse|sse2|iwmmxt) {
+if(mmx|3dnow|sse|sse2|iwmmxt) {
     HEADERS += painting/qdrawhelper_x86_p.h \
                painting/qdrawhelper_mmx_p.h \
                painting/qdrawhelper_sse_p.h
-    mmx {
-        DEFINES += QT_HAVE_MMX
-        MMX_SOURCES += painting/qdrawhelper_mmx.cpp
-    }
-    3dnow {
-        DEFINES += QT_HAVE_3DNOW
-        MMX3DNOW_SOURCES += painting/qdrawhelper_mmx3dnow.cpp
-        sse {
-            SSE3DNOW_SOURCES += painting/qdrawhelper_sse3dnow.cpp
-        }
-    }
-    sse {
-        DEFINES += QT_HAVE_SSE
-        SSE_SOURCES += painting/qdrawhelper_sse.cpp
-
-        DEFINES += QT_HAVE_MMXEXT
-    }
-    sse2 {
-        DEFINES += QT_HAVE_SSE2
-        SSE2_SOURCES += painting/qdrawhelper_sse2.cpp
-    }
-    iwmmxt {
-        DEFINES += QT_HAVE_IWMMXT
-        IWMMXT_SOURCES += painting/qdrawhelper_iwmmxt.cpp
-    }
-
-    win32-g++*|!win32:!*-icc* {
-        mmx {
-            mmx_compiler.commands = $$QMAKE_CXX -c -Winline
-
-            mac {
-                mmx_compiler.commands += -Xarch_i386 -mmmx
-                mmx_compiler.commands += -Xarch_x86_64 -mmmx
-            } else {
-                mmx_compiler.commands += -mmmx
-            }
-
-            mmx_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
-            mmx_compiler.dependency_type = TYPE_C
-            mmx_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
-            mmx_compiler.input = MMX_SOURCES
-            mmx_compiler.variable_out = OBJECTS
-            mmx_compiler.name = compiling[mmx] ${QMAKE_FILE_IN}
-            silent:mmx_compiler.commands = @echo compiling[mmx] ${QMAKE_FILE_IN} && $$mmx_compiler.commands
-            QMAKE_EXTRA_COMPILERS += mmx_compiler
-        }
-        3dnow {
-            mmx3dnow_compiler.commands = $$QMAKE_CXX -c -Winline
-
-            mac {
-                mmx3dnow_compiler.commands += -Xarch_i386 -m3dnow -Xarch_i386 -mmmx
-                mmx3dnow_compiler.commands += -Xarch_x86_64 -m3dnow -Xarch_x86_64 -mmmx
-            } else {
-                mmx3dnow_compiler.commands += -m3dnow -mmmx
-            }
-
-            mmx3dnow_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
-            mmx3dnow_compiler.dependency_type = TYPE_C
-            mmx3dnow_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
-            mmx3dnow_compiler.input = MMX3DNOW_SOURCES
-            mmx3dnow_compiler.variable_out = OBJECTS
-            mmx3dnow_compiler.name = compiling[mmx3dnow] ${QMAKE_FILE_IN}
-            silent:mmx3dnow_compiler.commands = @echo compiling[mmx3dnow] ${QMAKE_FILE_IN} && $$mmx3dnow_compiler.commands
-            QMAKE_EXTRA_COMPILERS += mmx3dnow_compiler
-            sse {
-                sse3dnow_compiler.commands = $$QMAKE_CXX -c -Winline
-
-                mac {
-                    sse3dnow_compiler.commands += -Xarch_i386 -m3dnow -Xarch_i386 -msse
-                    sse3dnow_compiler.commands += -Xarch_x86_64 -m3dnow -Xarch_x86_64 -msse
-                } else {
-                    sse3dnow_compiler.commands += -m3dnow -msse
-                }
-
-                sse3dnow_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
-                sse3dnow_compiler.dependency_type = TYPE_C
-                sse3dnow_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
-                sse3dnow_compiler.input = SSE3DNOW_SOURCES
-                sse3dnow_compiler.variable_out = OBJECTS
-                sse3dnow_compiler.name = compiling[sse3dnow] ${QMAKE_FILE_IN}
-                silent:sse3dnow_compiler.commands = @echo compiling[sse3dnow] ${QMAKE_FILE_IN} && $$sse3dnow_compiler.commands
-                QMAKE_EXTRA_COMPILERS += sse3dnow_compiler
-            }
-        }
-        sse {
-            sse_compiler.commands = $$QMAKE_CXX -c -Winline
-
-            mac {
-                sse_compiler.commands += -Xarch_i386 -msse
-                sse_compiler.commands += -Xarch_x86_64 -msse
-            } else {
-                sse_compiler.commands += -msse
-            }
-
-            sse_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
-            sse_compiler.dependency_type = TYPE_C
-            sse_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
-            sse_compiler.input = SSE_SOURCES
-            sse_compiler.variable_out = OBJECTS
-            sse_compiler.name = compiling[sse] ${QMAKE_FILE_IN}
-            silent:sse_compiler.commands = @echo compiling[sse] ${QMAKE_FILE_IN} && $$sse_compiler.commands
-            QMAKE_EXTRA_COMPILERS += sse_compiler
-        }
-        sse2 {
-            sse2_compiler.commands = $$QMAKE_CXX -c -Winline
-
-            mac {
-                sse2_compiler.commands += -Xarch_i386 -msse2
-                sse2_compiler.commands += -Xarch_x86_64 -msse2
-            } else {
-                sse2_compiler.commands += -msse2
-            }
-
-            sse2_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
-            sse2_compiler.dependency_type = TYPE_C
-            sse2_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
-            sse2_compiler.input = SSE2_SOURCES
-            sse2_compiler.variable_out = OBJECTS
-            sse2_compiler.name = compiling[sse2] ${QMAKE_FILE_IN}
-            silent:sse2_compiler.commands = @echo compiling[sse2] ${QMAKE_FILE_IN} && $$sse2_compiler.commands
-            QMAKE_EXTRA_COMPILERS += sse2_compiler
-        }
-        iwmmxt {
-            iwmmxt_compiler.commands = $$QMAKE_CXX -c -Winline
-            iwmmxt_compiler.commands += -mcpu=iwmmxt
-            iwmmxt_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
-            iwmmxt_compiler.dependency_type = TYPE_C
-            iwmmxt_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
-            iwmmxt_compiler.input = IWMMXT_SOURCES
-            iwmmxt_compiler.variable_out = OBJECTS
-            iwmmxt_compiler.name = compiling[iwmmxt] ${QMAKE_FILE_IN}
-            silent:iwmmxt_compiler.commands = @echo compiling[iwmmxt] ${QMAKE_FILE_IN} && $$iwmmxt_compiler.commands
-            QMAKE_EXTRA_COMPILERS += iwmmxt_compiler
-        }
-    } else {
-        mmx: SOURCES += $$MMX_SOURCES
-        3dnow: SOURCES += $$MMX3DNOW_SOURCES
-        3dnow:sse: SOURCES += $$SSE3DNOW_SOURCES
-        sse: SOURCES += $$SSE_SOURCES
-        sse2: SOURCES += $$SSE2_SOURCES
-        iwmmxt: SOURCES += $$IWMMXT_SOURCES
-    }
+    MMX_SOURCES += painting/qdrawhelper_mmx.cpp
+    MMX3DNOW_SOURCES += painting/qdrawhelper_mmx3dnow.cpp
+    SSE3DNOW_SOURCES += painting/qdrawhelper_sse3dnow.cpp
+    SSE_SOURCES += painting/qdrawhelper_sse.cpp
+    SSE2_SOURCES += painting/qdrawhelper_sse2.cpp
+    IWMMXT_SOURCES += painting/qdrawhelper_iwmmxt.cpp
 }
 
 x11 {
-- 
cgit v0.12


From 93bcbe213e947843184a75f4b237c8dff45ca866 Mon Sep 17 00:00:00 2001
From: John Brooks <special@dereferenced.net>
Date: Mon, 5 Jul 2010 21:17:49 +0200
Subject: Moved primitive SSE2 painting utilities to qdrawingprimitive_sse2_p.h

Merge-request: 725
Reviewed-by: Benjamin Poulain <benjamin.poulain@nokia.com>
---
 src/gui/painting/painting.pri               |   3 +-
 src/gui/painting/qdrawhelper_sse2.cpp       | 156 --------------------
 src/gui/painting/qdrawingprimitive_sse2_p.h | 216 ++++++++++++++++++++++++++++
 3 files changed, 218 insertions(+), 157 deletions(-)
 create mode 100644 src/gui/painting/qdrawingprimitive_sse2_p.h

diff --git a/src/gui/painting/painting.pri b/src/gui/painting/painting.pri
index c207c9d..4023f65 100644
--- a/src/gui/painting/painting.pri
+++ b/src/gui/painting/painting.pri
@@ -205,7 +205,8 @@ x11|embedded {
 if(mmx|3dnow|sse|sse2|iwmmxt) {
     HEADERS += painting/qdrawhelper_x86_p.h \
                painting/qdrawhelper_mmx_p.h \
-               painting/qdrawhelper_sse_p.h
+               painting/qdrawhelper_sse_p.h \
+               painting/qdrawingprimitive_sse2_p.h
     MMX_SOURCES += painting/qdrawhelper_mmx.cpp
     MMX3DNOW_SOURCES += painting/qdrawhelper_mmx3dnow.cpp
     SSE3DNOW_SOURCES += painting/qdrawhelper_sse3dnow.cpp
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index 6cd8688..ae16fed 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -57,162 +57,6 @@
 
 QT_BEGIN_NAMESPACE
 
-/*
- * Multiply the components of pixelVector by alphaChannel
- * Each 32bits components of alphaChannel must be in the form 0x00AA00AA
- * colorMask must have 0x00ff00ff on each 32 bits component
- * half must have the value 128 (0x80) for each 32 bits compnent
- */
-#define BYTE_MUL_SSE2(result, pixelVector, alphaChannel, colorMask, half) \
-{ \
-    /* 1. separate the colors in 2 vectors so each color is on 16 bits \
-       (in order to be multiplied by the alpha \
-       each 32 bit of dstVectorAG are in the form 0x00AA00GG \
-       each 32 bit of dstVectorRB are in the form 0x00RR00BB */\
-    __m128i pixelVectorAG = _mm_srli_epi16(pixelVector, 8); \
-    __m128i pixelVectorRB = _mm_and_si128(pixelVector, colorMask); \
- \
-    /* 2. multiply the vectors by the alpha channel */\
-    pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); \
-    pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); \
- \
-    /* 3. devide by 255, that's the tricky part. \
-       we do it like for BYTE_MUL(), with bit shift: X/255 ~= (X + X/256 + rounding)/256 */ \
-    /** so first (X + X/256 + rounding) */\
-    pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); \
-    pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); \
-    pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); \
-    pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); \
- \
-    /** second devide by 256 */\
-    pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); \
-    /** for AG, we could >> 8 to divide followed by << 8 to put the \
-        bytes in the correct position. By masking instead, we execute \
-        only one instruction */\
-    pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); \
- \
-    /* 4. combine the 2 pairs of colors */ \
-    result = _mm_or_si128(pixelVectorAG, pixelVectorRB); \
-}
-
-/*
- * Each 32bits components of alphaChannel must be in the form 0x00AA00AA
- * oneMinusAlphaChannel must be 255 - alpha for each 32 bits component
- * colorMask must have 0x00ff00ff on each 32 bits component
- * half must have the value 128 (0x80) for each 32 bits compnent
- */
-#define INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, alphaChannel, oneMinusAlphaChannel, colorMask, half) { \
-    /* interpolate AG */\
-    __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); \
-    __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); \
-    __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, alphaChannel); \
-    __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusAlphaChannel); \
-    __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); \
-    finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); \
-    finalAG = _mm_add_epi16(finalAG, half); \
-    finalAG = _mm_andnot_si128(colorMask, finalAG); \
- \
-    /* interpolate RB */\
-    __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); \
-    __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); \
-    __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, alphaChannel); \
-    __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusAlphaChannel); \
-    __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); \
-    finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); \
-    finalRB = _mm_add_epi16(finalRB, half); \
-    finalRB = _mm_srli_epi16(finalRB, 8); \
- \
-    /* combine */\
-    result = _mm_or_si128(finalAG, finalRB); \
-}
-
-// Basically blend src over dst with the const alpha defined as constAlphaVector.
-// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as:
-//const __m128i nullVector = _mm_set1_epi32(0);
-//const __m128i half = _mm_set1_epi16(0x80);
-//const __m128i one = _mm_set1_epi16(0xff);
-//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
-//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
-//
-// The computation being done is:
-// result = s + d * (1-alpha)
-// with shortcuts if fully opaque or fully transparent.
-#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
-    int x = 0; \
-    for (; x < length-3; x += 4) { \
-        const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
-        const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
-        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
-            /* all opaque */ \
-            _mm_storeu_si128((__m128i *)&dst[x], srcVector); \
-        } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
-            /* not fully transparent */ \
-            /* extract the alpha channel on 2 x 16 bits */ \
-            /* so we have room for the multiplication */ \
-            /* each 32 bits will be in the form 0x00AA00AA */ \
-            /* with A being the 1 - alpha */ \
-            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
-            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
-            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
- \
-            const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
-            __m128i destMultipliedByOneMinusAlpha; \
-            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
- \
-            /* result = s + d * (1-alpha) */\
-            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
-            _mm_storeu_si128((__m128i *)&dst[x], result); \
-        } \
-    } \
-    for (; x < length; ++x) { \
-        uint s = src[x]; \
-        if (s >= 0xff000000) \
-            dst[x] = s; \
-        else if (s != 0) \
-            dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
-    } \
-}
-
-// Basically blend src over dst with the const alpha defined as constAlphaVector.
-// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as:
-//const __m128i nullVector = _mm_set1_epi32(0);
-//const __m128i half = _mm_set1_epi16(0x80);
-//const __m128i one = _mm_set1_epi16(0xff);
-//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
-//
-// The computation being done is:
-// dest = (s + d * sia) * ca + d * cia
-//      = s * ca + d * (sia * ca + cia)
-//      = s * ca + d * (1 - sa*ca)
-#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \
-{ \
-    int x = 0; \
-    for (; x < length-3; x += 4) { \
-        __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
-        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \
-            BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \
-\
-            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
-            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
-            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
- \
-            const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
-            __m128i destMultipliedByOneMinusAlpha; \
-            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
- \
-            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
-            _mm_storeu_si128((__m128i *)&dst[x], result); \
-        } \
-    } \
-    for (; x < length; ++x) { \
-        quint32 s = src[x]; \
-        if (s != 0) { \
-            s = BYTE_MUL(s, const_alpha); \
-            dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
-        } \
-    } \
-}
-
 void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
                                     const uchar *srcPixels, int sbpl,
                                     int w, int h,
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
new file mode 100644
index 0000000..2b595c5
--- /dev/null
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -0,0 +1,216 @@
+/****************************************************************************
+**
+** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QDRAWINGPRIMITIVE_SSE2_P_H
+#define QDRAWINGPRIMITIVE_SSE2_P_H
+
+//
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the Qt API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+QT_BEGIN_NAMESPACE
+
+/*
+ * Multiply the components of pixelVector by alphaChannel
+ * Each 32bits components of alphaChannel must be in the form 0x00AA00AA
+ * colorMask must have 0x00ff00ff on each 32 bits component
+ * half must have the value 128 (0x80) for each 32 bits compnent
+ */
+#define BYTE_MUL_SSE2(result, pixelVector, alphaChannel, colorMask, half) \
+{ \
+    /* 1. separate the colors in 2 vectors so each color is on 16 bits \
+       (in order to be multiplied by the alpha \
+       each 32 bit of dstVectorAG are in the form 0x00AA00GG \
+       each 32 bit of dstVectorRB are in the form 0x00RR00BB */\
+    __m128i pixelVectorAG = _mm_srli_epi16(pixelVector, 8); \
+    __m128i pixelVectorRB = _mm_and_si128(pixelVector, colorMask); \
+ \
+    /* 2. multiply the vectors by the alpha channel */\
+    pixelVectorAG = _mm_mullo_epi16(pixelVectorAG, alphaChannel); \
+    pixelVectorRB = _mm_mullo_epi16(pixelVectorRB, alphaChannel); \
+ \
+    /* 3. devide by 255, that's the tricky part. \
+       we do it like for BYTE_MUL(), with bit shift: X/255 ~= (X + X/256 + rounding)/256 */ \
+    /** so first (X + X/256 + rounding) */\
+    pixelVectorRB = _mm_add_epi16(pixelVectorRB, _mm_srli_epi16(pixelVectorRB, 8)); \
+    pixelVectorRB = _mm_add_epi16(pixelVectorRB, half); \
+    pixelVectorAG = _mm_add_epi16(pixelVectorAG, _mm_srli_epi16(pixelVectorAG, 8)); \
+    pixelVectorAG = _mm_add_epi16(pixelVectorAG, half); \
+ \
+    /** second devide by 256 */\
+    pixelVectorRB = _mm_srli_epi16(pixelVectorRB, 8); \
+    /** for AG, we could >> 8 to divide followed by << 8 to put the \
+        bytes in the correct position. By masking instead, we execute \
+        only one instruction */\
+    pixelVectorAG = _mm_andnot_si128(colorMask, pixelVectorAG); \
+ \
+    /* 4. combine the 2 pairs of colors */ \
+    result = _mm_or_si128(pixelVectorAG, pixelVectorRB); \
+}
+
+/*
+ * Each 32bits components of alphaChannel must be in the form 0x00AA00AA
+ * oneMinusAlphaChannel must be 255 - alpha for each 32 bits component
+ * colorMask must have 0x00ff00ff on each 32 bits component
+ * half must have the value 128 (0x80) for each 32 bits compnent
+ */
+#define INTERPOLATE_PIXEL_255_SSE2(result, srcVector, dstVector, alphaChannel, oneMinusAlphaChannel, colorMask, half) { \
+    /* interpolate AG */\
+    __m128i srcVectorAG = _mm_srli_epi16(srcVector, 8); \
+    __m128i dstVectorAG = _mm_srli_epi16(dstVector, 8); \
+    __m128i srcVectorAGalpha = _mm_mullo_epi16(srcVectorAG, alphaChannel); \
+    __m128i dstVectorAGoneMinusAlphalpha = _mm_mullo_epi16(dstVectorAG, oneMinusAlphaChannel); \
+    __m128i finalAG = _mm_add_epi16(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha); \
+    finalAG = _mm_add_epi16(finalAG, _mm_srli_epi16(finalAG, 8)); \
+    finalAG = _mm_add_epi16(finalAG, half); \
+    finalAG = _mm_andnot_si128(colorMask, finalAG); \
+ \
+    /* interpolate RB */\
+    __m128i srcVectorRB = _mm_and_si128(srcVector, colorMask); \
+    __m128i dstVectorRB = _mm_and_si128(dstVector, colorMask); \
+    __m128i srcVectorRBalpha = _mm_mullo_epi16(srcVectorRB, alphaChannel); \
+    __m128i dstVectorRBoneMinusAlphalpha = _mm_mullo_epi16(dstVectorRB, oneMinusAlphaChannel); \
+    __m128i finalRB = _mm_add_epi16(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha); \
+    finalRB = _mm_add_epi16(finalRB, _mm_srli_epi16(finalRB, 8)); \
+    finalRB = _mm_add_epi16(finalRB, half); \
+    finalRB = _mm_srli_epi16(finalRB, 8); \
+ \
+    /* combine */\
+    result = _mm_or_si128(finalAG, finalRB); \
+}
+
+// Basically blend src over dst with the const alpha defined as constAlphaVector.
+// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as:
+//const __m128i nullVector = _mm_set1_epi32(0);
+//const __m128i half = _mm_set1_epi16(0x80);
+//const __m128i one = _mm_set1_epi16(0xff);
+//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+//const __m128i alphaMask = _mm_set1_epi32(0xff000000);
+//
+// The computation being done is:
+// result = s + d * (1-alpha)
+// with shortcuts if fully opaque or fully transparent.
+#define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \
+    int x = 0; \
+    for (; x < length-3; x += 4) { \
+        const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
+        const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \
+        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \
+            /* all opaque */ \
+            _mm_storeu_si128((__m128i *)&dst[x], srcVector); \
+        } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \
+            /* not fully transparent */ \
+            /* extract the alpha channel on 2 x 16 bits */ \
+            /* so we have room for the multiplication */ \
+            /* each 32 bits will be in the form 0x00AA00AA */ \
+            /* with A being the 1 - alpha */ \
+            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
+            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
+            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
+ \
+            const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
+            __m128i destMultipliedByOneMinusAlpha; \
+            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
+ \
+            /* result = s + d * (1-alpha) */\
+            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
+            _mm_storeu_si128((__m128i *)&dst[x], result); \
+        } \
+    } \
+    for (; x < length; ++x) { \
+        uint s = src[x]; \
+        if (s >= 0xff000000) \
+            dst[x] = s; \
+        else if (s != 0) \
+            dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
+    } \
+}
+
+// Basically blend src over dst with the const alpha defined as constAlphaVector.
+// nullVector, half, one, colorMask are constant accross the whole image/texture, and should be defined as:
+//const __m128i nullVector = _mm_set1_epi32(0);
+//const __m128i half = _mm_set1_epi16(0x80);
+//const __m128i one = _mm_set1_epi16(0xff);
+//const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+//
+// The computation being done is:
+// dest = (s + d * sia) * ca + d * cia
+//      = s * ca + d * (sia * ca + cia)
+//      = s * ca + d * (1 - sa*ca)
+#define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \
+{ \
+    int x = 0; \
+    for (; x < length-3; x += 4) { \
+        __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \
+        if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \
+            BYTE_MUL_SSE2(srcVector, srcVector, constAlphaVector, colorMask, half); \
+\
+            __m128i alphaChannel = _mm_srli_epi32(srcVector, 24); \
+            alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \
+            alphaChannel = _mm_sub_epi16(one, alphaChannel); \
+ \
+            const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \
+            __m128i destMultipliedByOneMinusAlpha; \
+            BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \
+ \
+            const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \
+            _mm_storeu_si128((__m128i *)&dst[x], result); \
+        } \
+    } \
+    for (; x < length; ++x) { \
+        quint32 s = src[x]; \
+        if (s != 0) { \
+            s = BYTE_MUL(s, const_alpha); \
+            dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \
+        } \
+    } \
+}
+
+QT_END_NAMESPACE
+
+#endif // QDRAWINGPRIMITIVE_SSE2_P_H
-- 
cgit v0.12


From dad8e33546a209820da1f3d07a0e331e001bc23e Mon Sep 17 00:00:00 2001
From: John Brooks <special@dereferenced.net>
Date: Mon, 5 Jul 2010 21:17:49 +0200
Subject: SSE2 implementation of convert_ARGB_to_ARGB_PM_inplace for QImage

Merge-request: 725
Reviewed-by: Benjamin Poulain <benjamin.poulain@nokia.com>
---
 src/corelib/tools/qsimd_p.h                 |   5 +-
 src/gui/image/image.pri                     |   3 +
 src/gui/image/qimage.cpp                    |  18 ++++-
 src/gui/image/qimage_p.h                    |   2 +
 src/gui/image/qimage_sse2.cpp               | 109 ++++++++++++++++++++++++++++
 src/gui/kernel/qapplication.cpp             |   3 +
 src/gui/painting/qdrawhelper_sse2.cpp       |  12 +--
 src/gui/painting/qdrawingprimitive_sse2_p.h |   6 ++
 8 files changed, 144 insertions(+), 14 deletions(-)
 create mode 100644 src/gui/image/qimage_sse2.cpp

diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 58d2dcb..0ed9d5d 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -58,8 +58,7 @@ QT_BEGIN_HEADER
 #endif
 
 // SSE intrinsics
-#if defined(QT_HAVE_SSE2) && !defined(QT_BOOTSTRAPPED) && (defined(__SSE2__) \
-    || (defined(Q_CC_MSVC) && (defined(_M_X64) || _M_IX86_FP == 2)))
+#if defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
 #if defined(QT_LINUXBASE)
 /// this is an evil hack - the posix_memalign declaration in LSB
 /// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
@@ -73,8 +72,10 @@ QT_BEGIN_HEADER
 #  include <emmintrin.h>
 #endif
 
+#if !defined(QT_BOOTSTRAPPED) && (!defined(Q_CC_MSVC) || (defined(_M_X64) || _M_IX86_FP == 2))
 #define QT_ALWAYS_HAVE_SSE2
 #endif
+#endif // defined(QT_HAVE_SSE2) && (defined(__SSE2__) || defined(Q_CC_MSVC))
 
 // NEON intrinsics
 #if defined(QT_HAVE_NEON)
diff --git a/src/gui/image/image.pri b/src/gui/image/image.pri
index 9f0c87e..3a02d56 100644
--- a/src/gui/image/image.pri
+++ b/src/gui/image/image.pri
@@ -92,3 +92,6 @@ contains(QT_CONFIG, jpeg):include($$PWD/qjpeghandler.pri)
 contains(QT_CONFIG, mng):include($$PWD/qmnghandler.pri)
 contains(QT_CONFIG, tiff):include($$PWD/qtiffhandler.pri)
 contains(QT_CONFIG, gif):include($$PWD/qgifhandler.pri)
+
+# SIMD
+SSE2_SOURCES += image/qimage_sse2.cpp
diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp
index 79f266d..88a0366 100644
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@@ -58,6 +58,7 @@
 #include <private/qmemrotate_p.h>
 #include <private/qpixmapdata_p.h>
 #include <private/qimagescale_p.h>
+#include <private/qsimd_p.h>
 
 #include <qhash.h>
 
@@ -209,7 +210,7 @@ QImageData * QImageData::create(const QSize &size, QImage::Format format, int nu
         break;
     }
 
-    const int bytes_per_line = ((width * depth + 31) >> 5) << 2; // bytes per scanline (must be multiple of 8)
+    const int bytes_per_line = ((width * depth + 31) >> 5) << 2; // bytes per scanline (must be multiple of 4)
 
     // sanity check for potential overflows
     if (INT_MAX/depth < width
@@ -3630,7 +3631,7 @@ static const Image_Converter converter_map[QImage::NImageFormats][QImage::NImage
     } // Format_ARGB4444_Premultiplied
 };
 
-static const InPlace_Image_Converter inplace_converter_map[QImage::NImageFormats][QImage::NImageFormats] =
+static InPlace_Image_Converter inplace_converter_map[QImage::NImageFormats][QImage::NImageFormats] =
 {
     {
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
@@ -3727,6 +3728,19 @@ static const InPlace_Image_Converter inplace_converter_map[QImage::NImageFormats
     } // Format_ARGB4444_Premultiplied
 };
 
+void qInitImageConversions()
+{
+    const uint features = qDetectCPUFeatures();
+    Q_UNUSED(features);
+
+#ifdef QT_HAVE_SSE2
+    if (features & SSE2) {
+        extern bool convert_ARGB_to_ARGB_PM_inplace_SSE2(QImageData *data, Qt::ImageConversionFlags);
+        inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_SSE2;
+    }
+#endif
+}
+
 /*!
     Returns a copy of the image in the given \a format.
 
diff --git a/src/gui/image/qimage_p.h b/src/gui/image/qimage_p.h
index f1a0c47..5272848 100644
--- a/src/gui/image/qimage_p.h
+++ b/src/gui/image/qimage_p.h
@@ -108,6 +108,8 @@ struct Q_GUI_EXPORT QImageData {        // internal image data
     QPaintEngine *paintEngine;
 };
 
+void qInitImageConversions();
+
 QT_END_NAMESPACE
 
 #endif
diff --git a/src/gui/image/qimage_sse2.cpp b/src/gui/image/qimage_sse2.cpp
new file mode 100644
index 0000000..e2b89b9
--- /dev/null
+++ b/src/gui/image/qimage_sse2.cpp
@@ -0,0 +1,109 @@
+/****************************************************************************
+**
+** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights.  These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qimage.h"
+#include <private/qimage_p.h>
+#include <private/qsimd_p.h>
+#include <private/qdrawhelper_p.h>
+#include <private/qdrawingprimitive_sse2_p.h>
+
+#ifdef QT_HAVE_SSE2
+
+QT_BEGIN_NAMESPACE
+
+bool convert_ARGB_to_ARGB_PM_inplace_SSE2(QImageData *data, Qt::ImageConversionFlags)
+{
+    Q_ASSERT(data->format == QImage::Format_ARGB32);
+
+    // extra pixels on each line
+    const int spare = data->width & 3;
+    // width in pixels of the pad at the end of each line
+    const int pad = (data->bytes_per_line >> 2) - data->width;
+    const int iter = data->width >> 2;
+    int height = data->height;
+
+    const __m128i alphaMask = _mm_set1_epi32(0xff000000);
+    const __m128i nullVector = _mm_setzero_si128();
+    const __m128i half = _mm_set1_epi16(0x80);
+    const __m128i colorMask = _mm_set1_epi32(0x00ff00ff);
+
+    __m128i *d = reinterpret_cast<__m128i*>(data->data);
+    while (height--) {
+        const __m128i *end = d + iter;
+
+        for (; d != end; ++d) {
+            const __m128i srcVector = _mm_loadu_si128(d);
+            const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask);
+            if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) {
+                // opaque, data is unchanged
+            } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) == 0xffff) {
+                // fully transparent
+                _mm_storeu_si128(d, nullVector);
+            } else {
+                __m128i alphaChannel = _mm_srli_epi32(srcVector, 24);
+                alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16));
+
+                __m128i result;
+                BYTE_MUL_SSE2(result, srcVector, alphaChannel, colorMask, half);
+                result = _mm_or_si128(_mm_andnot_si128(alphaMask, result), srcVectorAlpha);
+                _mm_storeu_si128(d, result);
+            }
+        }
+
+        QRgb *p = reinterpret_cast<QRgb*>(d);
+        QRgb *pe = p+spare;
+        for (; p != pe; ++p) {
+            if (*p < 0x00ffffff)
+                *p = 0;
+            else if (*p < 0xff000000)
+                *p = PREMUL(*p);
+        }
+
+        d = reinterpret_cast<__m128i*>(p+pad);
+    }
+
+    data->format = QImage::Format_ARGB32_Premultiplied;
+    return true;
+}
+
+QT_END_NAMESPACE
+
+#endif // QT_HAVE_SSE2
diff --git a/src/gui/kernel/qapplication.cpp b/src/gui/kernel/qapplication.cpp
index ccfe88c..94211fd 100644
--- a/src/gui/kernel/qapplication.cpp
+++ b/src/gui/kernel/qapplication.cpp
@@ -902,6 +902,7 @@ QApplication::QApplication(Display *dpy, int &argc, char **argv,
 #endif // Q_WS_X11
 
 extern void qInitDrawhelperAsm();
+extern void qInitImageConversions();
 extern int qRegisterGuiVariant();
 extern int qUnregisterGuiVariant();
 #ifndef QT_NO_STATEMACHINE
@@ -959,6 +960,8 @@ void QApplicationPrivate::initialize()
 
     // Set up which span functions should be used in raster engine...
     qInitDrawhelperAsm();
+    // and QImage conversion functions
+    qInitImageConversions();
 
 #ifndef QT_NO_WHEELEVENT
     QApplicationPrivate::wheel_scroll_lines = 3;
diff --git a/src/gui/painting/qdrawhelper_sse2.cpp b/src/gui/painting/qdrawhelper_sse2.cpp
index ae16fed..346e177 100644
--- a/src/gui/painting/qdrawhelper_sse2.cpp
+++ b/src/gui/painting/qdrawhelper_sse2.cpp
@@ -43,18 +43,10 @@
 
 #ifdef QT_HAVE_SSE2
 
+#include <private/qsimd_p.h>
+#include <private/qdrawingprimitive_sse2_p.h>
 #include <private/qpaintengine_raster_p.h>
 
-#ifdef QT_LINUXBASE
-// this is an evil hack - the posix_memalign declaration in LSB
-// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
-#  define posix_memalign _lsb_hack_posix_memalign
-#  include <emmintrin.h>
-#  undef posix_memalign
-#else
-#  include <emmintrin.h>
-#endif
-
 QT_BEGIN_NAMESPACE
 
 void qt_blend_argb32_on_argb32_sse2(uchar *destPixels, int dbpl,
diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h
index 2b595c5..3c96946 100644
--- a/src/gui/painting/qdrawingprimitive_sse2_p.h
+++ b/src/gui/painting/qdrawingprimitive_sse2_p.h
@@ -42,6 +42,10 @@
 #ifndef QDRAWINGPRIMITIVE_SSE2_P_H
 #define QDRAWINGPRIMITIVE_SSE2_P_H
 
+#include <private/qsimd_p.h>
+
+#ifdef QT_HAVE_SSE2
+
 //
 //  W A R N I N G
 //  -------------
@@ -213,4 +217,6 @@ QT_BEGIN_NAMESPACE
 
 QT_END_NAMESPACE
 
+#endif // QT_HAVE_SSE2
+
 #endif // QDRAWINGPRIMITIVE_SSE2_P_H
-- 
cgit v0.12


From 03f325217ab8d896327cb8a31f07df4633e9a485 Mon Sep 17 00:00:00 2001
From: Benjamin Poulain <benjamin.poulain@nokia.com>
Date: Tue, 6 Jul 2010 13:22:10 +0200
Subject: Build fix, the header of QImageData did not declare QImageWriter.

Including this header in the SSE2 file broke the build because
QImageWriter was not declared.
---
 src/gui/image/qimage_p.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gui/image/qimage_p.h b/src/gui/image/qimage_p.h
index 5272848..da535aa 100644
--- a/src/gui/image/qimage_p.h
+++ b/src/gui/image/qimage_p.h
@@ -61,6 +61,8 @@
 #include <QMap>
 #endif
 
+class QImageWriter;
+
 QT_BEGIN_NAMESPACE
 
 struct Q_GUI_EXPORT QImageData {        // internal image data
-- 
cgit v0.12


From 51509e8df8caf9c84312255a0dae41615fda1168 Mon Sep 17 00:00:00 2001
From: Benjamin Poulain <benjamin.poulain@nokia.com>
Date: Wed, 7 Jul 2010 11:53:45 +0200
Subject: Add test and fix style for the SSE2 implementation of ARGB32
 conversion

The commit beba018814b35c4bd032e6b9fa948e4bac34c59a introduce conversion
from ARGB32 to ARGB32_PM with SSE2.

This patch add a benchmark for this type of usage, and change the
name to use lowercase for SSE2 (style convention).
---
 src/gui/image/qimage.cpp                           |  4 +-
 src/gui/image/qimage_sse2.cpp                      |  2 +-
 tests/benchmarks/gui/image/qpixmap/tst_qpixmap.cpp | 64 +++++++++++++++++++++-
 3 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/src/gui/image/qimage.cpp b/src/gui/image/qimage.cpp
index 88a0366..e5930ac 100644
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@@ -3735,8 +3735,8 @@ void qInitImageConversions()
 
 #ifdef QT_HAVE_SSE2
     if (features & SSE2) {
-        extern bool convert_ARGB_to_ARGB_PM_inplace_SSE2(QImageData *data, Qt::ImageConversionFlags);
-        inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_SSE2;
+        extern bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags);
+        inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_sse2;
     }
 #endif
 }
diff --git a/src/gui/image/qimage_sse2.cpp b/src/gui/image/qimage_sse2.cpp
index e2b89b9..82d49a6 100644
--- a/src/gui/image/qimage_sse2.cpp
+++ b/src/gui/image/qimage_sse2.cpp
@@ -49,7 +49,7 @@
 
 QT_BEGIN_NAMESPACE
 
-bool convert_ARGB_to_ARGB_PM_inplace_SSE2(QImageData *data, Qt::ImageConversionFlags)
+bool convert_ARGB_to_ARGB_PM_inplace_sse2(QImageData *data, Qt::ImageConversionFlags)
 {
     Q_ASSERT(data->format == QImage::Format_ARGB32);
 
diff --git a/tests/benchmarks/gui/image/qpixmap/tst_qpixmap.cpp b/tests/benchmarks/gui/image/qpixmap/tst_qpixmap.cpp
index 8e9de4a..27e5025 100644
--- a/tests/benchmarks/gui/image/qpixmap/tst_qpixmap.cpp
+++ b/tests/benchmarks/gui/image/qpixmap/tst_qpixmap.cpp
@@ -40,9 +40,12 @@
 ****************************************************************************/
 
 #include <qtest.h>
-#include <QPixmap>
 #include <QBitmap>
+#include <QDir>
+#include <QImage>
+#include <QImageReader>
 #include <QPainter>
+#include <QPixmap>
 #include <private/qpixmap_raster_p.h>
 
 class tst_QPixmap : public QObject
@@ -62,6 +65,9 @@ private slots:
     void transformed();
     void mask_data();
     void mask();
+
+    void fromImageReader_data();
+    void fromImageReader();
 };
 
 Q_DECLARE_METATYPE(QImage::Format)
@@ -248,6 +254,62 @@ void tst_QPixmap::mask()
     }
 }
 
+void tst_QPixmap::fromImageReader_data()
+{
+    const QString tempDir = QDir::tempPath();
+    QTest::addColumn<QString>("filename");
+
+    QImage image(2000, 2000, QImage::Format_ARGB32);
+    image.fill(0);
+    {
+        // Generate an image with opaque and transparent pixels
+        // with an interesting distribution.
+        QPainter painter(&image);
+
+        QRadialGradient radialGrad(QPointF(1000, 1000), 1000);
+        radialGrad.setColorAt(0, QColor(255, 0, 0, 255));
+        radialGrad.setColorAt(0.5, QColor(0, 255, 0, 255));
+        radialGrad.setColorAt(0.9, QColor(0, 0, 255, 100));
+        radialGrad.setColorAt(1, QColor(0, 0, 0, 0));
+
+        painter.fillRect(image.rect(), radialGrad);
+    }
+    image.save("test.png");
+
+    // RGB32
+    const QString rgb32Path = tempDir + QString::fromLatin1("/rgb32.jpg");
+    image.save(rgb32Path);
+    QTest::newRow("gradient RGB32") << rgb32Path;
+
+    // ARGB32
+    const QString argb32Path = tempDir + QString::fromLatin1("/argb32.png");
+    image.save(argb32Path);
+    QTest::newRow("gradient ARGB32") << argb32Path;
+
+    // Indexed 8
+    const QString indexed8Path = tempDir + QString::fromLatin1("/indexed8.gif");
+    image.save(indexed8Path);
+    QTest::newRow("gradient indexed8") << indexed8Path;
+
+}
+
+void tst_QPixmap::fromImageReader()
+{
+    QFETCH(QString, filename);
+    // warmup
+    {
+        QImageReader imageReader(filename);
+        QPixmap::fromImageReader(&imageReader);
+    }
+
+    QBENCHMARK {
+        QImageReader imageReader(filename);
+        QPixmap::fromImageReader(&imageReader);
+    }
+    QFile::remove(filename);
+}
+
+
 QTEST_MAIN(tst_QPixmap)
 
 #include "tst_qpixmap.moc"
-- 
cgit v0.12


From e231da119daec62c0aa2cee055c539154a0f935d Mon Sep 17 00:00:00 2001
From: Charles Yin <charles.yin@nokia.com>
Date: Thu, 8 Jul 2010 14:25:53 +1000
Subject: Fixes the sqlite driver bug QTBUG-11904 (pointer aliasing)

Applying upstream sqlite patch from: http://www.sqlite.org/src/info/d6ae275122

Task-number: QTBUG-11904
Reviewed-by: Michael Goddard
---
 src/3rdparty/sqlite/sqlite3.c | 47 +++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/3rdparty/sqlite/sqlite3.c b/src/3rdparty/sqlite/sqlite3.c
index 46d3dfc..27a8d18 100644
--- a/src/3rdparty/sqlite/sqlite3.c
+++ b/src/3rdparty/sqlite/sqlite3.c
@@ -48449,11 +48449,15 @@ SQLITE_PRIVATE void sqlite3VdbeIOTraceSql(Vdbe *p){
 #endif /* !SQLITE_OMIT_TRACE && SQLITE_ENABLE_IOTRACE */
 
 /*
-** Allocate space from a fixed size buffer.  Make *pp point to the
-** allocated space.  (Note:  pp is a char* rather than a void** to
-** work around the pointer aliasing rules of C.)  *pp should initially
-** be zero.  If *pp is not zero, that means that the space has already
-** been allocated and this routine is a noop.
+** Allocate space from a fixed size buffer and return a pointer to
+** that space.  If insufficient space is available, return NULL.
+**
+** The pBuf parameter is the initial value of a pointer which will
+** receive the new memory.  pBuf is normally NULL.  If pBuf is not
+** NULL, it means that memory space has already been allocated and that
+** this routine should not allocate any new memory.  When pBuf is not
+** NULL simply return pBuf.  Only allocate new memory space when pBuf
+** is NULL.
 **
 ** nByte is the number of bytes of space needed.
 **
@@ -48464,23 +48468,23 @@ SQLITE_PRIVATE void sqlite3VdbeIOTraceSql(Vdbe *p){
 ** to allocate.  If there is insufficient space in *ppFrom to satisfy the
 ** request, then increment *pnByte by the amount of the request.
 */
-static void allocSpace(
-  char *pp,            /* IN/OUT: Set *pp to point to allocated buffer */
+static void *allocSpace(
+  void *pBuf,          /* Where return pointer will be stored */
   int nByte,           /* Number of bytes to allocate */
   u8 **ppFrom,         /* IN/OUT: Allocate from *ppFrom */
   u8 *pEnd,            /* Pointer to 1 byte past the end of *ppFrom buffer */
   int *pnByte          /* If allocation cannot be made, increment *pnByte */
 ){
   assert( EIGHT_BYTE_ALIGNMENT(*ppFrom) );
-  if( (*(void**)pp)==0 ){
-    nByte = ROUND8(nByte);
-    if( &(*ppFrom)[nByte] <= pEnd ){
-      *(void**)pp = (void *)*ppFrom;
-      *ppFrom += nByte;
-    }else{
-      *pnByte += nByte;
-    }
+  if( pBuf ) return pBuf;
+  nByte = ROUND8(nByte);
+  if( &(*ppFrom)[nByte] <= pEnd ){
+    pBuf = (void*)*ppFrom;
+    *ppFrom += nByte;
+  }else{
+    *pnByte += nByte;
   }
+  return pBuf;
 }
 
 /*
@@ -48553,13 +48557,12 @@ SQLITE_PRIVATE void sqlite3VdbeMakeReady(
 
     do {
       nByte = 0;
-      allocSpace((char*)&p->aMem, nMem*sizeof(Mem), &zCsr, zEnd, &nByte);
-      allocSpace((char*)&p->aVar, nVar*sizeof(Mem), &zCsr, zEnd, &nByte);
-      allocSpace((char*)&p->apArg, nArg*sizeof(Mem*), &zCsr, zEnd, &nByte);
-      allocSpace((char*)&p->azVar, nVar*sizeof(char*), &zCsr, zEnd, &nByte);
-      allocSpace((char*)&p->apCsr, 
-                 nCursor*sizeof(VdbeCursor*), &zCsr, zEnd, &nByte
-      );
+      p->aMem = allocSpace(p->aMem, nMem*sizeof(Mem), &zCsr, zEnd, &nByte);
+      p->aVar = allocSpace(p->aVar, nVar*sizeof(Mem), &zCsr, zEnd, &nByte);
+      p->apArg = allocSpace(p->apArg, nArg*sizeof(Mem*), &zCsr, zEnd, &nByte);
+      p->azVar = allocSpace(p->azVar, nVar*sizeof(char*), &zCsr, zEnd, &nByte);
+      p->apCsr = allocSpace(p->apCsr, nCursor*sizeof(VdbeCursor*),
+                            &zCsr, zEnd, &nByte);
       if( nByte ){
         p->pFree = sqlite3DbMallocZero(db, nByte);
       }
-- 
cgit v0.12