From bdad106358ae177d1345f5ff85c0e38cfeb5ca90 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 22 Dec 2010 14:42:33 +0100 Subject: Improve toLatin1 x86 SIMD by using a new SSE4.1 instruction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new instruction is PBLENDVB, which creates a result by selecting bytes from one of two registers, depending on whether the mask contains a 1 (0xff) or a zero. The SSE2 code requires three instructions (and, andnot, or). The equivalent Neon instruction is VBSL (bit select). Reviewed-by: Samuel Rødal --- src/corelib/tools/qstring.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 8ad4e70..7cbef98 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -3579,6 +3579,10 @@ static QByteArray toLatin1_helper(const QChar *data, int length) const __m128i signedChunk = _mm_add_epi16(chunk1, signedBitOffset); const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); +#ifdef __SSE4_1__ + chunk1 = _mm_blendv_epi8(chunk1, questionMark, offLimitMask); +#else + // offLimitQuestionMark contains '?' for each 16 bits that was off-limit // the 16 bits that were correct contains zeros const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); @@ -3589,6 +3593,7 @@ static QByteArray toLatin1_helper(const QChar *data, int length) // merge offLimitQuestionMark and correctBytes to have the result chunk1 = _mm_or_si128(correctBytes, offLimitQuestionMark); +#endif } __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load @@ -3597,9 +3602,13 @@ static QByteArray toLatin1_helper(const QChar *data, int length) // exactly the same operations as for the previous chunk of data const __m128i signedChunk = _mm_add_epi16(chunk2, signedBitOffset); const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); +#ifdef __SSE4_1__ + chunk2 = _mm_blendv_epi8(chunk2, questionMark, offLimitMask); +#else const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk2); chunk2 = _mm_or_si128(correctBytes, offLimitQuestionMark); +#endif } // pack the two vector to 16 x 8bits elements -- cgit v0.12 From bb3bd601560132df769c32808ae0b36c56d1caab Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Wed, 22 Dec 2010 19:52:18 +0100 Subject: Create a function that merges the SSE common code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Samuel Rødal --- src/corelib/tools/qstring.cpp | 73 +++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 7cbef98..0edf291 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -3556,6 +3556,38 @@ bool QString::endsWith(const QChar &c, Qt::CaseSensitivity cs) const Use toLocal8Bit() instead. */ +#if defined(QT_ALWAYS_HAVE_SSE2) +static inline __m128i mergeQuestionMarks(__m128i chunk) +{ + const __m128i questionMark = _mm_set1_epi16('?'); + + // SSE has no compare instruction for unsigned comparison. + // The variables must be shiffted + 0x8000 to be compared + const __m128i signedBitOffset = _mm_set1_epi16(0x8000); + const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000); + + const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset); + const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); + +# ifdef __SSE4_1__ + // replace the non-Latin 1 characters in the chunk with question marks + chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask); +# else + // offLimitQuestionMark contains '?' for each 16 bits that was off-limit + // the 16 bits that were correct contains zeros + const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); + + // correctBytes contains the bytes that were in limit + // the 16 bits that were off limits contains zeros + const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk); + + // merge offLimitQuestionMark and correctBytes to have the result + chunk = _mm_or_si128(correctBytes, offLimitQuestionMark); +# endif + return chunk; +} +#endif + static QByteArray toLatin1_helper(const QChar *data, int length) { QByteArray ba; @@ -3566,50 +3598,15 @@ static QByteArray toLatin1_helper(const QChar *data, int length) #if defined(QT_ALWAYS_HAVE_SSE2) if (length >= 16) { const int chunkCount = length >> 4; // divided by 16 - const __m128i questionMark = _mm_set1_epi16('?'); - // SSE has no compare instruction for unsigned comparison. - // The variables must be shiffted + 0x8000 to be compared - const __m128i signedBitOffset = _mm_set1_epi16(0x8000); - const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000); + for (int i = 0; i < chunkCount; ++i) { __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load + chunk1 = mergeQuestionMarks(chunk1); src += 8; - { - // each 16 bit is equal to 0xFF if the source is outside latin 1 (>0xff) - const __m128i signedChunk = _mm_add_epi16(chunk1, signedBitOffset); - const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); - -#ifdef __SSE4_1__ - chunk1 = _mm_blendv_epi8(chunk1, questionMark, offLimitMask); -#else - - // offLimitQuestionMark contains '?' for each 16 bits that was off-limit - // the 16 bits that were correct contains zeros - const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); - - // correctBytes contains the bytes that were in limit - // the 16 bits that were off limits contains zeros - const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk1); - - // merge offLimitQuestionMark and correctBytes to have the result - chunk1 = _mm_or_si128(correctBytes, offLimitQuestionMark); -#endif - } __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load + chunk2 = mergeQuestionMarks(chunk2); src += 8; - { - // exactly the same operations as for the previous chunk of data - const __m128i signedChunk = _mm_add_epi16(chunk2, signedBitOffset); - const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); -#ifdef __SSE4_1__ - chunk2 = _mm_blendv_epi8(chunk2, questionMark, offLimitMask); -#else - const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); - const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk2); - chunk2 = _mm_or_si128(correctBytes, offLimitQuestionMark); -#endif - } // pack the two vector to 16 x 8bits elements const __m128i result = _mm_packus_epi16(chunk1, chunk2); -- cgit v0.12 From 4746bad937a4abfc413aa56f316fc25115fe0525 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 29 Apr 2011 18:49:11 +0200 Subject: Silence the callgrind warnings in our source code when using gcc 4.6 --- src/testlib/qbenchmarkvalgrind.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/testlib/qbenchmarkvalgrind.cpp b/src/testlib/qbenchmarkvalgrind.cpp index 4b260e8..22c7c37 100644 --- a/src/testlib/qbenchmarkvalgrind.cpp +++ b/src/testlib/qbenchmarkvalgrind.cpp @@ -225,6 +225,12 @@ bool QBenchmarkValgrindUtils::runCallgrindSubProcess(const QStringList &origAppA return finishedOk; } +#if defined(Q_CC_GNU) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406) +// the callgrind macros below generate warnings +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif + void QBenchmarkCallgrindMeasurer::start() { CALLGRIND_ZERO_STATS; @@ -237,6 +243,11 @@ qint64 QBenchmarkCallgrindMeasurer::checkpoint() return result; } +#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406) +// the callgrind macros above generate warnings +# pragma GCC diagnostic pop +#endif + qint64 QBenchmarkCallgrindMeasurer::stop() { return checkpoint(); -- cgit v0.12 From c169eeaf3886955d74b41f150e1035ee93c8c5c4 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Mon, 16 May 2011 13:21:18 +0200 Subject: Silence the "array out of bounds" warning in GCC 4.6. I can't find anything wrong with this code, so tell GCC simply to shut up. Reviewed-by: Trust Me --- src/opengl/qgl.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/opengl/qgl.cpp b/src/opengl/qgl.cpp index 2b0c8f8..4e941c6 100644 --- a/src/opengl/qgl.cpp +++ b/src/opengl/qgl.cpp @@ -366,6 +366,10 @@ void QGL::setPreferredPaintEngine(QPaintEngine::Type engineType) static inline void transform_point(GLdouble out[4], const GLdouble m[16], const GLdouble in[4]) { +#if defined(Q_CC_GNU) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Warray-bounds" +#endif #define M(row,col) m[col*4+row] out[0] = M(0, 0) * in[0] + M(0, 1) * in[1] + M(0, 2) * in[2] + M(0, 3) * in[3]; @@ -376,6 +380,9 @@ static inline void transform_point(GLdouble out[4], const GLdouble m[16], const out[3] = M(3, 0) * in[0] + M(3, 1) * in[1] + M(3, 2) * in[2] + M(3, 3) * in[3]; #undef M +#if defined(Q_CC_GNU) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 406) +# pragma GCC diagnostic pop +#endif } static inline GLint qgluProject(GLdouble objx, GLdouble objy, GLdouble objz, -- cgit v0.12 From 524dc28aa9172964622fe1764d1cc97a457b6ed3 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 14 Jun 2011 10:41:36 +0200 Subject: Silence a compiler warning about unhandled enum in switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value is impossible due to the construct, but the compiler complains nonetheless. So just add a value handled in the switch and a comment letting the reader know that it can't happen. Reviewed-By: Samuel Rødal --- src/gui/painting/qtextureglyphcache.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gui/painting/qtextureglyphcache.cpp b/src/gui/painting/qtextureglyphcache.cpp index 4bb4759..fdba9c9 100644 --- a/src/gui/painting/qtextureglyphcache.cpp +++ b/src/gui/painting/qtextureglyphcache.cpp @@ -308,6 +308,10 @@ QImage QTextureGlyphCache::textureMapForGlyph(glyph_t g, QFixed subPixelPosition format = QFontEngineFT::Format_Mono; imageFormat = QImage::Format_Mono; break; + case Raster_RGBMask: + // impossible condition (see the if-clause above) + // this option is here only to silence a compiler warning + break; }; QFontEngineFT *ft = static_cast (m_current_fontengine); -- cgit v0.12 From 82f32ef0e4e744276a011e30ce99aafa341ea816 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Fri, 29 Apr 2011 18:54:16 +0200 Subject: Fix alignment value not handled in ODF We're currently getting this error: src/gui/text/qtextodfwriter.cpp:592:16: error: enumeration value 'AlignBaseline' not handled in switch [-Werror=switch] Solve by making AlignBaseline operate like AlignNormal. Patch suggested by: Thomas Zander Reviewed-By: Thiago Macieira --- src/gui/text/qtextodfwriter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gui/text/qtextodfwriter.cpp b/src/gui/text/qtextodfwriter.cpp index 1619c9c..2addc0f 100644 --- a/src/gui/text/qtextodfwriter.cpp +++ b/src/gui/text/qtextodfwriter.cpp @@ -591,6 +591,7 @@ void QTextOdfWriter::writeCharacterFormat(QXmlStreamWriter &writer, QTextCharFor QString value; switch (format.verticalAlignment()) { case QTextCharFormat::AlignMiddle: + case QTextCharFormat::AlignBaseline: case QTextCharFormat::AlignNormal: value = QString::fromLatin1("0%"); break; case QTextCharFormat::AlignSuperScript: value = QString::fromLatin1("super"); break; case QTextCharFormat::AlignSubScript: value = QString::fromLatin1("sub"); break; -- cgit v0.12