From 9427b4c8f3b557524cda3f72cf81f68940cb7246 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Fri, 16 Jul 2010 20:32:38 +0200 Subject: Used aligned load and store when possible for the blending of ARGB32 Unaligned load and store can be costly. This patch mitigate the problem by aligning the destination before using SSE2. The destination is aligned because it is used by load and store, while the source is only use by load. On Atom, the blending test is 7% faster for ARGB32. Re-pushing that patch, thanks to awesome policies... Reviewed-by: Andreas Kling --- src/gui/painting/qdrawingprimitive_sse2_p.h | 33 ++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index 3c96946..b1f8306 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -141,12 +141,24 @@ QT_BEGIN_NAMESPACE // with shortcuts if fully opaque or fully transparent. #define BLEND_SOURCE_OVER_ARGB32_SSE2(dst, src, length, nullVector, half, one, colorMask, alphaMask) { \ int x = 0; \ +\ + /* First, get dst aligned. */ \ + const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3;\ + const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ + for (; x < prologLength; ++x) { \ + uint s = src[x]; \ + if (s >= 0xff000000) \ + dst[x] = s; \ + else if (s != 0) \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ +\ for (; x < length-3; x += 4) { \ const __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ const __m128i srcVectorAlpha = _mm_and_si128(srcVector, alphaMask); \ if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, alphaMask)) == 0xffff) { \ /* all opaque */ \ - _mm_storeu_si128((__m128i *)&dst[x], srcVector); \ + _mm_store_si128((__m128i *)&dst[x], srcVector); \ } else if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVectorAlpha, nullVector)) != 0xffff) { \ /* not fully transparent */ \ /* extract the alpha channel on 2 x 16 bits */ \ @@ -157,13 +169,13 @@ QT_BEGIN_NAMESPACE alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ alphaChannel = _mm_sub_epi16(one, alphaChannel); \ \ - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ __m128i destMultipliedByOneMinusAlpha; \ BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ \ /* result = s + d * (1-alpha) */\ const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ - _mm_storeu_si128((__m128i *)&dst[x], result); \ + _mm_store_si128((__m128i *)&dst[x], result); \ } \ } \ for (; x < length; ++x) { \ @@ -189,6 +201,17 @@ QT_BEGIN_NAMESPACE #define BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2(dst, src, length, nullVector, half, one, colorMask, constAlphaVector) \ { \ int x = 0; \ +\ + const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3;\ + const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ + for (; x < prologLength; ++x) { \ + uint s = src[x]; \ + if (s >= 0xff000000) \ + dst[x] = s; \ + else if (s != 0) \ + dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ +\ for (; x < length-3; x += 4) { \ __m128i srcVector = _mm_loadu_si128((__m128i *)&src[x]); \ if (_mm_movemask_epi8(_mm_cmpeq_epi32(srcVector, nullVector)) != 0xffff) { \ @@ -198,12 +221,12 @@ QT_BEGIN_NAMESPACE alphaChannel = _mm_or_si128(alphaChannel, _mm_slli_epi32(alphaChannel, 16)); \ alphaChannel = _mm_sub_epi16(one, alphaChannel); \ \ - const __m128i dstVector = _mm_loadu_si128((__m128i *)&dst[x]); \ + const __m128i dstVector = _mm_load_si128((__m128i *)&dst[x]); \ __m128i destMultipliedByOneMinusAlpha; \ BYTE_MUL_SSE2(destMultipliedByOneMinusAlpha, dstVector, alphaChannel, colorMask, half); \ \ const __m128i result = _mm_add_epi8(srcVector, destMultipliedByOneMinusAlpha); \ - _mm_storeu_si128((__m128i *)&dst[x], result); \ + _mm_store_si128((__m128i *)&dst[x], result); \ } \ } \ for (; x < length; ++x) { \ -- cgit v0.12 From 23ea4340a622cbfed81eb7afb2e09ec64b0ebef8 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sun, 18 Jul 2010 07:30:35 +0200 Subject: Corrected BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_SSE2 The unaligned prologue was processed without using the const alpha. Regressed with 9427b4c8f3b5. --- src/gui/painting/qdrawingprimitive_sse2_p.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gui/painting/qdrawingprimitive_sse2_p.h b/src/gui/painting/qdrawingprimitive_sse2_p.h index b1f8306..18355c2 100644 --- a/src/gui/painting/qdrawingprimitive_sse2_p.h +++ b/src/gui/painting/qdrawingprimitive_sse2_p.h @@ -205,11 +205,11 @@ QT_BEGIN_NAMESPACE const int offsetToAlignOn16Bytes = (4 - ((reinterpret_cast(dst) >> 2) & 0x3)) & 0x3;\ const int prologLength = qMin(length, offsetToAlignOn16Bytes);\ for (; x < prologLength; ++x) { \ - uint s = src[x]; \ - if (s >= 0xff000000) \ - dst[x] = s; \ - else if (s != 0) \ + quint32 s = src[x]; \ + if (s != 0) { \ + s = BYTE_MUL(s, const_alpha); \ dst[x] = s + BYTE_MUL(dst[x], qAlpha(~s)); \ + } \ } \ \ for (; x < length-3; x += 4) { \ -- cgit v0.12 From c6812138900b05012831e0f94d7d00aeac86cc2f Mon Sep 17 00:00:00 2001 From: Robin Burchell Date: Sun, 18 Jul 2010 23:05:44 +0200 Subject: Remove qMemCopy() usage from all .cpp files of Qt itself. This is (supposedly) more efficient as the compiler can optimise it to a builtin, per Thiago. Merge-request: 2430 Reviewed-by: Andreas Kling --- src/corelib/tools/qbytearray.cpp | 6 +++--- src/corelib/tools/qbytearraymatcher.cpp | 2 +- src/corelib/tools/qstring.cpp | 6 +++--- src/corelib/tools/qstringmatcher.cpp | 2 +- src/gui/text/qfontengine_s60.cpp | 4 ++-- src/gui/text/qstatictext.cpp | 12 ++++++------ src/gui/text/qtextdocument_p.cpp | 2 +- tests/auto/compiler/tst_compiler.cpp | 2 +- tools/assistant/tools/assistant/helpviewer_qwv.cpp | 2 +- tools/makeqpf/qpf2.cpp | 2 +- 10 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/corelib/tools/qbytearray.cpp b/src/corelib/tools/qbytearray.cpp index b46af1f..a5cb16a 100644 --- a/src/corelib/tools/qbytearray.cpp +++ b/src/corelib/tools/qbytearray.cpp @@ -2152,18 +2152,18 @@ QByteArray QByteArray::repeated(int times) const if (result.d->alloc != resultSize) return QByteArray(); // not enough memory - qMemCopy(result.d->data, d->data, d->size); + memcpy(result.d->data, d->data, d->size); int sizeSoFar = d->size; char *end = result.d->data + sizeSoFar; const int halfResultSize = resultSize >> 1; while (sizeSoFar <= halfResultSize) { - qMemCopy(end, result.d->data, sizeSoFar); + memcpy(end, result.d->data, sizeSoFar); end += sizeSoFar; sizeSoFar <<= 1; } - qMemCopy(end, result.d->data, resultSize - sizeSoFar); + memcpy(end, result.d->data, resultSize - sizeSoFar); result.d->data[resultSize] = '\0'; result.d->size = resultSize; return result; diff --git a/src/corelib/tools/qbytearraymatcher.cpp b/src/corelib/tools/qbytearraymatcher.cpp index d5a59c9..f8504f0 100644 --- a/src/corelib/tools/qbytearraymatcher.cpp +++ b/src/corelib/tools/qbytearraymatcher.cpp @@ -171,7 +171,7 @@ QByteArrayMatcher::~QByteArrayMatcher() QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other) { q_pattern = other.q_pattern; - qMemCopy(&p, &other.p, sizeof(p)); + memcpy(&p, &other.p, sizeof(p)); return *this; } diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index b2cf49b..b5651de 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -6156,18 +6156,18 @@ QString QString::repeated(int times) const if (result.d->alloc != resultSize) return QString(); // not enough memory - qMemCopy(result.d->data, d->data, d->size * sizeof(ushort)); + memcpy(result.d->data, d->data, d->size * sizeof(ushort)); int sizeSoFar = d->size; ushort *end = result.d->data + sizeSoFar; const int halfResultSize = resultSize >> 1; while (sizeSoFar <= halfResultSize) { - qMemCopy(end, result.d->data, sizeSoFar * sizeof(ushort)); + memcpy(end, result.d->data, sizeSoFar * sizeof(ushort)); end += sizeSoFar; sizeSoFar <<= 1; } - qMemCopy(end, result.d->data, (resultSize - sizeSoFar) * sizeof(ushort)); + memcpy(end, result.d->data, (resultSize - sizeSoFar) * sizeof(ushort)); result.d->data[resultSize] = '\0'; result.d->size = resultSize; return result; diff --git a/src/corelib/tools/qstringmatcher.cpp b/src/corelib/tools/qstringmatcher.cpp index ff13624..80a8457 100644 --- a/src/corelib/tools/qstringmatcher.cpp +++ b/src/corelib/tools/qstringmatcher.cpp @@ -208,7 +208,7 @@ QStringMatcher &QStringMatcher::operator=(const QStringMatcher &other) if (this != &other) { q_pattern = other.q_pattern; q_cs = other.q_cs; - qMemCopy(q_data, other.q_data, sizeof(q_data)); + memcpy(q_data, other.q_data, sizeof(q_data)); } return *this; } diff --git a/src/gui/text/qfontengine_s60.cpp b/src/gui/text/qfontengine_s60.cpp index f691413..74ef646 100644 --- a/src/gui/text/qfontengine_s60.cpp +++ b/src/gui/text/qfontengine_s60.cpp @@ -94,7 +94,7 @@ bool QSymbianTypeFaceExtras::getSfntTableData(uint tag, uchar *buffer, uint *len } else { *length = tableByteLength; if (buffer) - qMemCopy(buffer, fontTable.TableContent(), tableByteLength); + memcpy(buffer, fontTable.TableContent(), tableByteLength); } fontTable.Close(); @@ -146,7 +146,7 @@ bool QSymbianTypeFaceExtras::getSfntTableData(uint tag, uchar *buffer, uint *len } else { *length = tableByteLength; if (buffer) - qMemCopy(buffer, table, tableByteLength); + memcpy(buffer, table, tableByteLength); } m_trueTypeExtension->ReleaseTrueTypeTable(table); diff --git a/src/gui/text/qstatictext.cpp b/src/gui/text/qstatictext.cpp index 91a6612..ab518d0 100644 --- a/src/gui/text/qstatictext.cpp +++ b/src/gui/text/qstatictext.cpp @@ -465,13 +465,13 @@ namespace { m_chars.resize(m_chars.size() + ti.num_chars); glyph_t *glyphsDestination = m_glyphs.data() + currentItem.glyphOffset; - qMemCopy(glyphsDestination, glyphs.constData(), sizeof(glyph_t) * currentItem.numGlyphs); + memcpy(glyphsDestination, glyphs.constData(), sizeof(glyph_t) * currentItem.numGlyphs); QFixedPoint *positionsDestination = m_positions.data() + currentItem.positionOffset; - qMemCopy(positionsDestination, positions.constData(), sizeof(QFixedPoint) * currentItem.numGlyphs); + memcpy(positionsDestination, positions.constData(), sizeof(QFixedPoint) * currentItem.numGlyphs); QChar *charsDestination = m_chars.data() + currentItem.charOffset; - qMemCopy(charsDestination, ti.chars, sizeof(QChar) * currentItem.numChars); + memcpy(charsDestination, ti.chars, sizeof(QChar) * currentItem.numChars); m_items.append(currentItem); } @@ -681,13 +681,13 @@ void QStaticTextPrivate::init() items = new QStaticTextItem[itemCount]; glyphPool = new glyph_t[glyphs.size()]; - qMemCopy(glyphPool, glyphs.constData(), glyphs.size() * sizeof(glyph_t)); + memcpy(glyphPool, glyphs.constData(), glyphs.size() * sizeof(glyph_t)); positionPool = new QFixedPoint[positions.size()]; - qMemCopy(positionPool, positions.constData(), positions.size() * sizeof(QFixedPoint)); + memcpy(positionPool, positions.constData(), positions.size() * sizeof(QFixedPoint)); charPool = new QChar[chars.size()]; - qMemCopy(charPool, chars.constData(), chars.size() * sizeof(QChar)); + memcpy(charPool, chars.constData(), chars.size() * sizeof(QChar)); for (int i=0; istringPosition, it->size_array[0] * sizeof(QChar)); + memcpy(newTextPtr, text.constData() + it->stringPosition, it->size_array[0] * sizeof(QChar)); it->stringPosition = newLen; newTextPtr += it->size_array[0]; newLen += it->size_array[0]; diff --git a/tests/auto/compiler/tst_compiler.cpp b/tests/auto/compiler/tst_compiler.cpp index 368620d..90eb357 100644 --- a/tests/auto/compiler/tst_compiler.cpp +++ b/tests/auto/compiler/tst_compiler.cpp @@ -640,7 +640,7 @@ static inline double qt_inf() #endif union { uchar c[8]; double d; } returnValue; - qMemCopy(returnValue.c, bytes, sizeof(returnValue.c)); + memcpy(returnValue.c, bytes, sizeof(returnValue.c)); return returnValue.d; } diff --git a/tools/assistant/tools/assistant/helpviewer_qwv.cpp b/tools/assistant/tools/assistant/helpviewer_qwv.cpp index dcbbf2c..221e1b8 100644 --- a/tools/assistant/tools/assistant/helpviewer_qwv.cpp +++ b/tools/assistant/tools/assistant/helpviewer_qwv.cpp @@ -101,7 +101,7 @@ qint64 HelpNetworkReply::readData(char *buffer, qint64 maxlen) TRACE_OBJ qint64 len = qMin(qint64(data.length()), maxlen); if (len) { - qMemCopy(buffer, data.constData(), len); + memcpy(buffer, data.constData(), len); data.remove(0, len); } if (!data.length()) diff --git a/tools/makeqpf/qpf2.cpp b/tools/makeqpf/qpf2.cpp index ab57cea..cafdb79 100644 --- a/tools/makeqpf/qpf2.cpp +++ b/tools/makeqpf/qpf2.cpp @@ -543,7 +543,7 @@ void QPF::addGlyphs(QFontEngine *fe, const QList &ranges) ; } - qMemCopy(data, img.bits(), img.byteCount()); + memcpy(data, img.bits(), img.byteCount()); } } } -- cgit v0.12 From 9e4fb690b864f8d0c05d70687a3498b9eb9b2ed5 Mon Sep 17 00:00:00 2001 From: Alan Alpert Date: Mon, 19 Jul 2010 13:07:07 +1000 Subject: Fix Samegame Change to Behavior from Follow means that some of the previous assumptions are now incorrect. Script logic has been fixed. Task-number: QTBUG-12246 --- demos/declarative/samegame/SamegameCore/samegame.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demos/declarative/samegame/SamegameCore/samegame.js b/demos/declarative/samegame/SamegameCore/samegame.js index 6e1b24d..aa1b359 100755 --- a/demos/declarative/samegame/SamegameCore/samegame.js +++ b/demos/declarative/samegame/SamegameCore/samegame.js @@ -110,7 +110,7 @@ function shuffleDown() }else{ if(fallDist > 0){ var obj = board[index(column,row)]; - obj.y += fallDist * gameCanvas.blockSize; + obj.y = (row+fallDist) * gameCanvas.blockSize; board[index(column,row+fallDist)] = obj; board[index(column,row)] = null; } @@ -128,7 +128,7 @@ function shuffleDown() obj = board[index(column,row)]; if(obj == null) continue; - obj.x -= fallDist * gameCanvas.blockSize; + obj.x = (column-fallDist) * gameCanvas.blockSize; board[index(column-fallDist,row)] = obj; board[index(column,row)] = null; } -- cgit v0.12