From 69abc957bca39bce4ed954b0469eb6abce8173fa Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 23 Feb 2010 11:32:43 +0100 Subject: Move the SIMD detection from QtGui to QtCore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SIMD instructions are useful outside painting code, the common functions are moved to QtCore Reviewed-by: Samuel Rødal --- src/corelib/corelib.pro | 22 ++++ src/corelib/tools/qsimd.cpp | 227 +++++++++++++++++++++++++++++++++++++++ src/corelib/tools/qsimd_p.h | 122 +++++++++++++++++++++ src/corelib/tools/tools.pri | 2 + src/gui/painting/qdrawhelper.cpp | 196 +-------------------------------- src/gui/painting/qdrawhelper_p.h | 9 -- 6 files changed, 375 insertions(+), 203 deletions(-) create mode 100644 src/corelib/tools/qsimd.cpp create mode 100644 src/corelib/tools/qsimd_p.h diff --git a/src/corelib/corelib.pro b/src/corelib/corelib.pro index 7f33791..4bffd0f 100644 --- a/src/corelib/corelib.pro +++ b/src/corelib/corelib.pro @@ -38,3 +38,25 @@ symbian: { # Timezone server LIBS += -ltzclient } + +mmx { + DEFINES += QT_HAVE_MMX +} +3dnow { + DEFINES += QT_HAVE_3DNOW +} +sse { + DEFINES += QT_HAVE_SSE + DEFINES += QT_HAVE_MMXEXT +} +sse2 { + DEFINES += QT_HAVE_SSE2 +} +iwmmxt { + DEFINES += QT_HAVE_IWMMXT +} +neon { + DEFINES += QT_HAVE_NEON + QMAKE_CXXFLAGS *= -mfpu=neon +} + diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp new file mode 100644 index 0000000..331a699 --- /dev/null +++ b/src/corelib/tools/qsimd.cpp @@ -0,0 +1,227 @@ +/**************************************************************************** +** +** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qsimd_p.h" +#include + +QT_BEGIN_NAMESPACE + +uint qDetectCPUFeatures() +{ +#if defined (Q_OS_WINCE) +#if defined (ARM) + if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) + return IWMMXT; +#elif defined(_X86_) + uint features = 0; +#if defined QT_HAVE_MMX + if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE)) + features |= MMX; +#endif +#if defined QT_HAVE_3DNOW + if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE)) + features |= MMX3DNOW; +#endif + return features; +#endif + return 0; +#elif defined(QT_HAVE_IWMMXT) + // runtime detection only available when running as a previlegied process + static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); + return doIWMMXT ? IWMMXT : 0; +#elif defined(QT_HAVE_NEON) + static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); + return doNEON ? NEON : 0; +#else + uint features = 0; +#if defined(__x86_64__) || defined(Q_OS_WIN64) + features = MMX|SSE|SSE2|CMOV; +#elif defined(__ia64__) + features = MMX|SSE|SSE2; +#elif defined(__i386__) || defined(_M_IX86) + unsigned int extended_result = 0; + uint result = 0; + /* see p. 118 of amd64 instruction set manual Vol3 */ +#if defined(Q_CC_GNU) + asm ("push %%ebx\n" + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ebx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "xor %%edx, %%edx\n" + "xor %%ebx, %%eax\n" + "jz 1f\n" + + "mov $0x00000001, %%eax\n" + "cpuid\n" + "1:\n" + "pop %%ebx\n" + "mov %%edx, %0\n" + : "=r" (result) + : + : "%eax", "%ecx", "%edx" + ); + + asm ("push %%ebx\n" + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ebx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "xor %%edx, %%edx\n" + "xor %%ebx, %%eax\n" + "jz 2f\n" + + "mov $0x80000000, %%eax\n" + "cpuid\n" + "cmp $0x80000000, %%eax\n" + "jbe 2f\n" + "mov $0x80000001, %%eax\n" + "cpuid\n" + "2:\n" + "pop %%ebx\n" + "mov %%edx, %0\n" + : "=r" (extended_result) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined (Q_OS_WIN) + _asm { + push eax + push ebx + push ecx + push edx + pushfd + pop eax + mov ebx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ebx + jz skip + + mov eax, 1 + cpuid + mov result, edx + skip: + pop edx + pop ecx + pop ebx + pop eax + } + + _asm { + push eax + push ebx + push ecx + push edx + pushfd + pop eax + mov ebx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ebx + jz skip2 + + mov eax, 80000000h + cpuid + cmp eax, 80000000h + jbe skip2 + mov eax, 80000001h + cpuid + mov extended_result, edx + skip2: + pop edx + pop ecx + pop ebx + pop eax + } +#endif + + // result now contains the standard feature bits + if (result & (1u << 15)) + features |= CMOV; + if (result & (1u << 23)) + features |= MMX; + if (extended_result & (1u << 22)) + features |= MMXEXT; + if (extended_result & (1u << 31)) + features |= MMX3DNOW; + if (extended_result & (1u << 30)) + features |= MMX3DNOWEXT; + if (result & (1u << 25)) + features |= SSE; + if (result & (1u << 26)) + features |= SSE2; +#endif // i386 + + if (qgetenv("QT_NO_MMX").toInt()) + features ^= MMX; + if (qgetenv("QT_NO_MMXEXT").toInt()) + features ^= MMXEXT; + if (qgetenv("QT_NO_3DNOW").toInt()) + features ^= MMX3DNOW; + if (qgetenv("QT_NO_3DNOWEXT").toInt()) + features ^= MMX3DNOWEXT; + if (qgetenv("QT_NO_SSE").toInt()) + features ^= SSE; + if (qgetenv("QT_NO_SSE2").toInt()) + features ^= SSE2; + + return features; +#endif +} + +QT_END_NAMESPACE diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h new file mode 100644 index 0000000..d535762 --- /dev/null +++ b/src/corelib/tools/qsimd_p.h @@ -0,0 +1,122 @@ +/**************************************************************************** +** +** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QSIMD_P_H +#define QSIMD_P_H + +#include + + +QT_BEGIN_HEADER + +QT_BEGIN_NAMESPACE + +QT_MODULE(Core) + +#if defined(QT_NO_MAC_XARCH) || (defined(Q_OS_DARWIN) && (defined(__ppc__) || defined(__ppc64__))) +// Disable MMX and SSE on Mac/PPC builds, or if the compiler +// does not support -Xarch argument passing +#undef QT_HAVE_SSE2 +#undef QT_HAVE_SSE +#undef QT_HAVE_3DNOW +#undef QT_HAVE_MMX +#endif + +// SSE intrinsics +#if defined(QT_HAVE_SSE2) || defined(QT_HAVE_SSE) || defined(QT_HAVE_MMX) +#if defined(QT_LINUXBASE) +/// this is an evil hack - the posix_memalign declaration in LSB +/// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431 +# define posix_memalign _lsb_hack_posix_memalign +# include +# undef posix_memalign +#else +# include +#endif +#endif + +// NEON intrinsics +#if defined(QT_HAVE_NEON) +#include +#endif + + +// IWMMXT intrinsics +#if defined(QT_HAVE_IWMMXT) +#include +#if defined(Q_OS_WINCE) +# include "qplatformdefs.h" +#endif +#endif + +#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE) +# include +#elif defined(Q_OS_WINCE_STD) && defined(_X86_) +# pragma warning(disable: 4391) +# include +#endif + +// 3D now intrinsics +#if defined(QT_HAVE_3DNOW) +#include +#endif + +enum CPUFeatures { + None = 0, + MMX = 0x1, + MMXEXT = 0x2, + MMX3DNOW = 0x4, + MMX3DNOWEXT = 0x8, + SSE = 0x10, + SSE2 = 0x20, + CMOV = 0x40, + IWMMXT = 0x80, + NEON = 0x100 +}; + + +QT_END_NAMESPACE + +QT_END_HEADER + +Q_CORE_EXPORT uint qDetectCPUFeatures(); + +#endif // QSIMD_P_H diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri index 3406e41..6d64915 100644 --- a/src/corelib/tools/tools.pri +++ b/src/corelib/tools/tools.pri @@ -33,6 +33,7 @@ HEADERS += \ tools/qsharedpointer.h \ tools/qsharedpointer_impl.h \ tools/qset.h \ + tools/qsimd_p.h \ tools/qsize.h \ tools/qstack.h \ tools/qstring.h \ @@ -68,6 +69,7 @@ SOURCES += \ tools/qregexp.cpp \ tools/qshareddata.cpp \ tools/qsharedpointer.cpp \ + tools/qsimd.cpp \ tools/qsize.cpp \ tools/qstring.cpp \ tools/qstringbuilder.cpp \ diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index 194dda3..b9aa9ce 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include QT_BEGIN_NAMESPACE @@ -7720,199 +7721,6 @@ static void qt_memfill16_setup(quint16 *dest, quint16 value, int count); qt_memfill32_func qt_memfill32 = qt_memfill32_setup; qt_memfill16_func qt_memfill16 = qt_memfill16_setup; -enum CPUFeatures { - None = 0, - MMX = 0x1, - MMXEXT = 0x2, - MMX3DNOW = 0x4, - MMX3DNOWEXT = 0x8, - SSE = 0x10, - SSE2 = 0x20, - CMOV = 0x40, - IWMMXT = 0x80, - NEON = 0x100 -}; - -static uint detectCPUFeatures() -{ -#if defined (Q_OS_WINCE) -#if defined (ARM) - if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) - return IWMMXT; -#elif defined(_X86_) - uint features = 0; -#if defined QT_HAVE_MMX - if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE)) - features |= MMX; -#endif -#if defined QT_HAVE_3DNOW - if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE)) - features |= MMX3DNOW; -#endif - return features; -#endif - return 0; -#elif defined(QT_HAVE_IWMMXT) - // runtime detection only available when running as a previlegied process - static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); - return doIWMMXT ? IWMMXT : 0; -#elif defined(QT_HAVE_NEON) - static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); - return doNEON ? NEON : 0; -#else - uint features = 0; -#if defined(__x86_64__) || defined(Q_OS_WIN64) - features = MMX|SSE|SSE2|CMOV; -#elif defined(__ia64__) - features = MMX|SSE|SSE2; -#elif defined(__i386__) || defined(_M_IX86) - unsigned int extended_result = 0; - uint result = 0; - /* see p. 118 of amd64 instruction set manual Vol3 */ -#if defined(Q_CC_GNU) - asm ("push %%ebx\n" - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ebx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "xor %%edx, %%edx\n" - "xor %%ebx, %%eax\n" - "jz 1f\n" - - "mov $0x00000001, %%eax\n" - "cpuid\n" - "1:\n" - "pop %%ebx\n" - "mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); - - asm ("push %%ebx\n" - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ebx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "xor %%edx, %%edx\n" - "xor %%ebx, %%eax\n" - "jz 2f\n" - - "mov $0x80000000, %%eax\n" - "cpuid\n" - "cmp $0x80000000, %%eax\n" - "jbe 2f\n" - "mov $0x80000001, %%eax\n" - "cpuid\n" - "2:\n" - "pop %%ebx\n" - "mov %%edx, %0\n" - : "=r" (extended_result) - : - : "%eax", "%ecx", "%edx" - ); -#elif defined (Q_OS_WIN) - _asm { - push eax - push ebx - push ecx - push edx - pushfd - pop eax - mov ebx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ebx - jz skip - - mov eax, 1 - cpuid - mov result, edx - skip: - pop edx - pop ecx - pop ebx - pop eax - } - - _asm { - push eax - push ebx - push ecx - push edx - pushfd - pop eax - mov ebx, eax - xor eax, 00200000h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ebx - jz skip2 - - mov eax, 80000000h - cpuid - cmp eax, 80000000h - jbe skip2 - mov eax, 80000001h - cpuid - mov extended_result, edx - skip2: - pop edx - pop ecx - pop ebx - pop eax - } -#endif - - // result now contains the standard feature bits - if (result & (1u << 15)) - features |= CMOV; - if (result & (1u << 23)) - features |= MMX; - if (extended_result & (1u << 22)) - features |= MMXEXT; - if (extended_result & (1u << 31)) - features |= MMX3DNOW; - if (extended_result & (1u << 30)) - features |= MMX3DNOWEXT; - if (result & (1u << 25)) - features |= SSE; - if (result & (1u << 26)) - features |= SSE2; -#endif // i386 - - if (qgetenv("QT_NO_MMX").toInt()) - features ^= MMX; - if (qgetenv("QT_NO_MMXEXT").toInt()) - features ^= MMXEXT; - if (qgetenv("QT_NO_3DNOW").toInt()) - features ^= MMX3DNOW; - if (qgetenv("QT_NO_3DNOWEXT").toInt()) - features ^= MMX3DNOWEXT; - if (qgetenv("QT_NO_SSE").toInt()) - features ^= SSE; - if (qgetenv("QT_NO_SSE2").toInt()) - features ^= SSE2; - - return features; -#endif -} - #if defined(Q_CC_RVCT) && defined(QT_HAVE_ARMV6) // Move these to qdrawhelper_arm.c when all // functions are implemented using arm assembly. @@ -8008,7 +7816,7 @@ void qInitDrawhelperAsm() static uint features = 0xffffffff; if (features != 0xffffffff) return; - features = detectCPUFeatures(); + features = qDetectCPUFeatures(); qt_memfill32 = qt_memfill_template; qt_memfill16 = qt_memfill_quint16; //qt_memfill_template; diff --git a/src/gui/painting/qdrawhelper_p.h b/src/gui/painting/qdrawhelper_p.h index 6c47aac..474ebcf 100644 --- a/src/gui/painting/qdrawhelper_p.h +++ b/src/gui/painting/qdrawhelper_p.h @@ -67,15 +67,6 @@ #include "QtGui/qscreen_qws.h" #endif -// Disable MMX and SSE on Mac/PPC builds, or if the compiler -// does not support -Xarch argument passing -#if defined(QT_NO_MAC_XARCH) || (defined(Q_OS_DARWIN) && (defined(__ppc__) || defined(__ppc64__))) -#undef QT_HAVE_SSE2 -#undef QT_HAVE_SSE -#undef QT_HAVE_3DNOW -#undef QT_HAVE_MMX -#endif - QT_BEGIN_NAMESPACE #if defined(Q_CC_MSVC) && _MSCVER <= 1300 && !defined(Q_CC_INTEL) -- cgit v0.12 From d7181ae996f9add10bf4e956ddbedff99cb19378 Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 23 Feb 2010 16:05:38 +0100 Subject: Cache the result of qDetectCPUFeatures() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the caching of the result from drawhelper to qsimd.cpp. Avoid getting the environment variables when not necessary Reviewed-by: Samuel Rødal --- src/corelib/tools/qsimd.cpp | 33 ++++++++++++++++++++++++++------- src/gui/painting/qdrawhelper.cpp | 5 +---- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 331a699..4ba8901 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -46,12 +46,18 @@ QT_BEGIN_NAMESPACE uint qDetectCPUFeatures() { + static uint features = 0xffffffff; + if (features != 0xffffffff) + return features; + #if defined (Q_OS_WINCE) #if defined (ARM) - if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) - return IWMMXT; + if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { + features = IWMMXT; + return features; + } #elif defined(_X86_) - uint features = 0; + features = 0; #if defined QT_HAVE_MMX if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE)) features |= MMX; @@ -62,16 +68,19 @@ uint qDetectCPUFeatures() #endif return features; #endif - return 0; + features = 0; + return features; #elif defined(QT_HAVE_IWMMXT) // runtime detection only available when running as a previlegied process static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); - return doIWMMXT ? IWMMXT : 0; + features = doIWMMXT ? IWMMXT : 0 + return features; #elif defined(QT_HAVE_NEON) static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); - return doNEON ? NEON : 0; + features = doNEON ? NEON : 0 + return features; #else - uint features = 0; + features = 0; #if defined(__x86_64__) || defined(Q_OS_WIN64) features = MMX|SSE|SSE2|CMOV; #elif defined(__ia64__) @@ -207,18 +216,28 @@ uint qDetectCPUFeatures() features |= SSE2; #endif // i386 +#if defined(QT_HAVE_MMX) if (qgetenv("QT_NO_MMX").toInt()) features ^= MMX; +#endif if (qgetenv("QT_NO_MMXEXT").toInt()) features ^= MMXEXT; + +#if defined(QT_HAVE_3DNOW) if (qgetenv("QT_NO_3DNOW").toInt()) features ^= MMX3DNOW; +#endif if (qgetenv("QT_NO_3DNOWEXT").toInt()) features ^= MMX3DNOWEXT; + +#if defined(QT_HAVE_SSE) if (qgetenv("QT_NO_SSE").toInt()) features ^= SSE; +#endif +#if defined(QT_HAVE_SSE2) if (qgetenv("QT_NO_SSE2").toInt()) features ^= SSE2; +#endif return features; #endif diff --git a/src/gui/painting/qdrawhelper.cpp b/src/gui/painting/qdrawhelper.cpp index b9aa9ce..40fe499 100644 --- a/src/gui/painting/qdrawhelper.cpp +++ b/src/gui/painting/qdrawhelper.cpp @@ -7813,10 +7813,7 @@ static void qt_blend_color_argb_armv6(int count, const QSpan *spans, void *userD void qInitDrawhelperAsm() { - static uint features = 0xffffffff; - if (features != 0xffffffff) - return; - features = qDetectCPUFeatures(); + const uint features = qDetectCPUFeatures(); qt_memfill32 = qt_memfill_template; qt_memfill16 = qt_memfill_quint16; //qt_memfill_template; -- cgit v0.12 From 60fd302e8d88b92ade59d68872c99310128c3a6c Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 23 Feb 2010 17:06:41 +0100 Subject: Implement QString::fromLatin1_helper() with SSE2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there is at least 16 characters, use SSE2 to convert from 8bits to 16 bits. Reviewed-by: Samuel Rødal Reveiwed-by: Thiago Macieira --- src/corelib/tools/qsimd_p.h | 9 +++++++++ src/corelib/tools/qstring.cpp | 24 ++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index d535762..d0a057e 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -60,6 +60,15 @@ QT_MODULE(Core) #undef QT_HAVE_MMX #endif + +#if defined(__x86_64__) || defined(Q_OS_WIN64) || defined(__ia64__) || defined(__SSE2__) +#if defined(QT_HAVE_SSE2) +// Defined for small fast functions that can take advantages of SSE2 intrinsics +#define QT_ALWAYS_HAVE_SSE2 +#endif +#endif + + // SSE intrinsics #if defined(QT_HAVE_SSE2) || defined(QT_HAVE_SSE) || defined(QT_HAVE_MMX) #if defined(QT_LINUXBASE) diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index dec59b7..571a015 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -46,6 +46,7 @@ #include #endif #include +#include "qsimd_p.h" #include #include #include "qlocale.h" @@ -3612,10 +3613,29 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size) d->alloc = d->size = size; d->clean = d->asciiCache = d->simpletext = d->righttoleft = d->capacity = 0; d->data = d->array; - ushort *i = d->data; d->array[size] = '\0'; + ushort *dst = d->data; +#if defined(QT_ALWAYS_HAVE_SSE2) + if (size >= 16) { + int chunkCount = size >> 4; // divided by 16 + const __m128i nullMask = _mm_set1_epi32(0); + for (int i = 0; i < chunkCount; ++i) { + const __m128i chunk = _mm_loadu_si128((__m128i*)str); + str += 16; + + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_storeu_si128((__m128i*)dst, firstHalf); + dst += 8; + + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_storeu_si128((__m128i*)dst, secondHalf); + dst += 8; + } + size = size % 16; + } +#endif while (size--) - *i++ = (uchar)*str++; + *dst++ = (uchar)*str++; } return d; } -- cgit v0.12 From 7fb1434479001890b073405d53c7814143e9929a Mon Sep 17 00:00:00 2001 From: Benjamin Poulain Date: Tue, 23 Feb 2010 22:04:09 +0100 Subject: Build fixes for the SIMD functions Two semicolons were missing for arm. The MMX header was included for all non-x86 platforms. --- src/corelib/tools/qsimd.cpp | 4 ++-- src/corelib/tools/qsimd_p.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 4ba8901..52d2cea 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -73,11 +73,11 @@ uint qDetectCPUFeatures() #elif defined(QT_HAVE_IWMMXT) // runtime detection only available when running as a previlegied process static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); - features = doIWMMXT ? IWMMXT : 0 + features = doIWMMXT ? IWMMXT : 0; return features; #elif defined(QT_HAVE_NEON) static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); - features = doNEON ? NEON : 0 + features = doNEON ? NEON : 0; return features; #else features = 0; diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index d0a057e..07fae97 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -96,12 +96,14 @@ QT_MODULE(Core) #endif #endif +#if defined(QT_HAVE_IWMMXT) #if !defined(__IWMMXT__) && !defined(Q_OS_WINCE) # include #elif defined(Q_OS_WINCE_STD) && defined(_X86_) # pragma warning(disable: 4391) # include #endif +#endif // 3D now intrinsics #if defined(QT_HAVE_3DNOW) -- cgit v0.12