diff options
Diffstat (limited to 'src/corelib/tools')
-rw-r--r-- | src/corelib/tools/qregexp.cpp | 11 | ||||
-rw-r--r-- | src/corelib/tools/qsharedpointer_impl.h | 1 | ||||
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 246 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 126 | ||||
-rw-r--r-- | src/corelib/tools/qstring.cpp | 112 | ||||
-rw-r--r-- | src/corelib/tools/tools.pri | 2 |
6 files changed, 487 insertions, 11 deletions
diff --git a/src/corelib/tools/qregexp.cpp b/src/corelib/tools/qregexp.cpp index 25255f9..20ad444 100644 --- a/src/corelib/tools/qregexp.cpp +++ b/src/corelib/tools/qregexp.cpp @@ -1466,9 +1466,14 @@ void QRegExpMatchState::match(const QChar *str0, int len0, int pos0, #ifndef QT_NO_REGEXP_CAPTURE for (int i = 0; i < numCaptures; ++i) { int j = eng->captureForOfficialCapture.at(i); - int len = capEnd[j] - capBegin[j]; - *c++ = (len > 0) ? pos + capBegin[j] : 0; - *c++ = len; + if (capBegin[j] != EmptyCapture) { + int len = capEnd[j] - capBegin[j]; + *c++ = (len > 0) ? pos + capBegin[j] : 0; + *c++ = len; + } else { + *c++ = -1; + *c++ = -1; + } } #endif } else { diff --git a/src/corelib/tools/qsharedpointer_impl.h b/src/corelib/tools/qsharedpointer_impl.h index d85c248..bfc65bc 100644 --- a/src/corelib/tools/qsharedpointer_impl.h +++ b/src/corelib/tools/qsharedpointer_impl.h @@ -209,6 +209,7 @@ namespace QtSharedPointer { inline bool destroy() { destroyer(this); return true; } inline void operator delete(void *ptr) { ::operator delete(ptr); } + inline void operator delete(void *, void *) { } }; // sizeof(ExternalRefCountWithDestroyFn) = 16 (32-bit) / 24 (64-bit) diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp new file mode 100644 index 0000000..52d2cea --- /dev/null +++ b/src/corelib/tools/qsimd.cpp @@ -0,0 +1,246 @@ +/**************************************************************************** +** +** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qsimd_p.h" +#include <QByteArray> + +QT_BEGIN_NAMESPACE + +uint qDetectCPUFeatures() +{ + static uint features = 0xffffffff; + if (features != 0xffffffff) + return features; + +#if defined (Q_OS_WINCE) +#if defined (ARM) + if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { + features = IWMMXT; + return features; + } +#elif defined(_X86_) + features = 0; +#if defined QT_HAVE_MMX + if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE)) + features |= MMX; +#endif +#if defined QT_HAVE_3DNOW + if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE)) + features |= MMX3DNOW; +#endif + return features; +#endif + features = 0; + return features; +#elif defined(QT_HAVE_IWMMXT) + // runtime detection only available when running as a previlegied process + static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); + features = doIWMMXT ? IWMMXT : 0; + return features; +#elif defined(QT_HAVE_NEON) + static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); + features = doNEON ? NEON : 0; + return features; +#else + features = 0; +#if defined(__x86_64__) || defined(Q_OS_WIN64) + features = MMX|SSE|SSE2|CMOV; +#elif defined(__ia64__) + features = MMX|SSE|SSE2; +#elif defined(__i386__) || defined(_M_IX86) + unsigned int extended_result = 0; + uint result = 0; + /* see p. 118 of amd64 instruction set manual Vol3 */ +#if defined(Q_CC_GNU) + asm ("push %%ebx\n" + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ebx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "xor %%edx, %%edx\n" + "xor %%ebx, %%eax\n" + "jz 1f\n" + + "mov $0x00000001, %%eax\n" + "cpuid\n" + "1:\n" + "pop %%ebx\n" + "mov %%edx, %0\n" + : "=r" (result) + : + : "%eax", "%ecx", "%edx" + ); + + asm ("push %%ebx\n" + "pushf\n" + "pop %%eax\n" + "mov %%eax, %%ebx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "xor %%edx, %%edx\n" + "xor %%ebx, %%eax\n" + "jz 2f\n" + + "mov $0x80000000, %%eax\n" + "cpuid\n" + "cmp $0x80000000, %%eax\n" + "jbe 2f\n" + "mov $0x80000001, %%eax\n" + "cpuid\n" + "2:\n" + "pop %%ebx\n" + "mov %%edx, %0\n" + : "=r" (extended_result) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined (Q_OS_WIN) + _asm { + push eax + push ebx + push ecx + push edx + pushfd + pop eax + mov ebx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ebx + jz skip + + mov eax, 1 + cpuid + mov result, edx + skip: + pop edx + pop ecx + pop ebx + pop eax + } + + _asm { + push eax + push ebx + push ecx + push edx + pushfd + pop eax + mov ebx, eax + xor eax, 00200000h + push eax + popfd + pushfd + pop eax + mov edx, 0 + xor eax, ebx + jz skip2 + + mov eax, 80000000h + cpuid + cmp eax, 80000000h + jbe skip2 + mov eax, 80000001h + cpuid + mov extended_result, edx + skip2: + pop edx + pop ecx + pop ebx + pop eax + } +#endif + + // result now contains the standard feature bits + if (result & (1u << 15)) + features |= CMOV; + if (result & (1u << 23)) + features |= MMX; + if (extended_result & (1u << 22)) + features |= MMXEXT; + if (extended_result & (1u << 31)) + features |= MMX3DNOW; + if (extended_result & (1u << 30)) + features |= MMX3DNOWEXT; + if (result & (1u << 25)) + features |= SSE; + if (result & (1u << 26)) + features |= SSE2; +#endif // i386 + +#if defined(QT_HAVE_MMX) + if (qgetenv("QT_NO_MMX").toInt()) + features ^= MMX; +#endif + if (qgetenv("QT_NO_MMXEXT").toInt()) + features ^= MMXEXT; + +#if defined(QT_HAVE_3DNOW) + if (qgetenv("QT_NO_3DNOW").toInt()) + features ^= MMX3DNOW; +#endif + if (qgetenv("QT_NO_3DNOWEXT").toInt()) + features ^= MMX3DNOWEXT; + +#if defined(QT_HAVE_SSE) + if (qgetenv("QT_NO_SSE").toInt()) + features ^= SSE; +#endif +#if defined(QT_HAVE_SSE2) + if (qgetenv("QT_NO_SSE2").toInt()) + features ^= SSE2; +#endif + + return features; +#endif +} + +QT_END_NAMESPACE diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h new file mode 100644 index 0000000..556db14 --- /dev/null +++ b/src/corelib/tools/qsimd_p.h @@ -0,0 +1,126 @@ +/**************************************************************************** +** +** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** No Commercial Usage +** This file contains pre-release code and may not be distributed. +** You may use this file in accordance with the terms and conditions +** contained in the Technology Preview License Agreement accompanying +** this package. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 2.1 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 2.1 requirements +** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** If you have questions regarding the use of this file, please contact +** Nokia at qt-info@nokia.com. +** +** +** +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef QSIMD_P_H +#define QSIMD_P_H + +#include <qglobal.h> + + +QT_BEGIN_HEADER + +QT_BEGIN_NAMESPACE + +QT_MODULE(Core) + +#if defined(QT_NO_MAC_XARCH) || (defined(Q_OS_DARWIN) && (defined(__ppc__) || defined(__ppc64__))) +// Disable MMX and SSE on Mac/PPC builds, or if the compiler +// does not support -Xarch argument passing +#undef QT_HAVE_SSE2 +#undef QT_HAVE_SSE +#undef QT_HAVE_3DNOW +#undef QT_HAVE_MMX +#endif + +// SSE intrinsics +#if defined(__SSE2__) +#if defined(QT_LINUXBASE) +/// this is an evil hack - the posix_memalign declaration in LSB +/// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431 +# define posix_memalign _lsb_hack_posix_memalign +# include <emmintrin.h> +# undef posix_memalign +#else +# include <emmintrin.h> +#endif + +#define QT_ALWAYS_HAVE_SSE2 +#endif + +// NEON intrinsics +#if defined(QT_HAVE_NEON) +#include <arm_neon.h> +#endif + + +// IWMMXT intrinsics +#if defined(QT_HAVE_IWMMXT) +#include <mmintrin.h> +#if defined(Q_OS_WINCE) +# include "qplatformdefs.h" +#endif +#endif + +#if defined(QT_HAVE_IWMMXT) +#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE) +# include <xmmintrin.h> +#elif defined(Q_OS_WINCE_STD) && defined(_X86_) +# pragma warning(disable: 4391) +# include <xmmintrin.h> +#endif +#endif + +// 3D now intrinsics +#if defined(QT_HAVE_3DNOW) +#include <mm3dnow.h> +#endif + +enum CPUFeatures { + None = 0, + MMX = 0x1, + MMXEXT = 0x2, + MMX3DNOW = 0x4, + MMX3DNOWEXT = 0x8, + SSE = 0x10, + SSE2 = 0x20, + CMOV = 0x40, + IWMMXT = 0x80, + NEON = 0x100 +}; + + +QT_END_NAMESPACE + +QT_END_HEADER + +Q_CORE_EXPORT uint qDetectCPUFeatures(); + +#endif // QSIMD_P_H diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index ac1bee7..e9b7b9a 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -46,6 +46,7 @@ #include <qtextcodec.h> #endif #include <private/qutfcodec_p.h> +#include "qsimd_p.h" #include <qdatastream.h> #include <qlist.h> #include "qlocale.h" @@ -3479,12 +3480,82 @@ static QByteArray toLatin1_helper(const QChar *data, int length) QByteArray ba; if (length) { ba.resize(length); - const ushort *i = reinterpret_cast<const ushort *>(data); - const ushort *e = i + length; - uchar *s = (uchar*) ba.data(); - while (i != e) { - *s++ = (*i>0xff) ? '?' : (uchar) *i; - ++i; + const ushort *src = reinterpret_cast<const ushort *>(data); + uchar *dst = (uchar*) ba.data(); +#if defined(QT_ALWAYS_HAVE_SSE2) + if (length >= 16) { + const int chunkCount = length >> 4; // divided by 16 + const __m128i questionMark = _mm_set1_epi16('?'); + // SSE has no compare instruction for unsigned comparison. + // The variables must be shiffted + 0x8000 to be compared + const __m128i signedBitOffset = _mm_set1_epi16(0x8000); + const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000); + for (int i = 0; i < chunkCount; ++i) { + __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load + src += 8; + { + // each 16 bit is equal to 0xFF if the source is outside latin 1 (>0xff) + const __m128i signedChunk = _mm_add_epi16(chunk1, signedBitOffset); + const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); + + // offLimitQuestionMark contains '?' for each 16 bits that was off-limit + // the 16 bits that were correct contains zeros + const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); + + // correctBytes contains the bytes that were in limit + // the 16 bits that were off limits contains zeros + const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk1); + + // merge offLimitQuestionMark and correctBytes to have the result + chunk1 = _mm_or_si128(correctBytes, offLimitQuestionMark); + } + + __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load + src += 8; + { + // exactly the same operations as for the previous chunk of data + const __m128i signedChunk = _mm_add_epi16(chunk2, signedBitOffset); + const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask); + const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark); + const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk2); + chunk2 = _mm_or_si128(correctBytes, offLimitQuestionMark); + } + + // pack the two vector to 16 x 8bits elements + const __m128i result = _mm_packus_epi16(chunk1, chunk2); + + _mm_storeu_si128((__m128i*)dst, result); // store + dst += 16; + } + length = length % 16; + } +#elif QT_HAVE_NEON + // Refer to the documentation of the SSE2 implementation + // this use eactly the same method as for SSE except: + // 1) neon has unsigned comparison + // 2) packing is done to 64 bits (8 x 8bits component). + if (length >= 16) { + const int chunkCount = length >> 3; // divided by 8 + const uint16x8_t questionMark = vdupq_n_u16('?'); // set + const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set + for (int i = 0; i < chunkCount; ++i) { + uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load + src += 8; + + const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask + const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark + const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk + chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark + const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing + vst1_u8(dst, result); // store + dst += 8; + } + length = length % 8; + } +#endif + while (length--) { + *dst++ = (*src>0xff) ? '?' : (uchar) *src; + ++src; } } return ba; @@ -3647,10 +3718,35 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size) d->alloc = d->size = size; d->clean = d->asciiCache = d->simpletext = d->righttoleft = d->capacity = 0; d->data = d->array; - ushort *i = d->data; d->array[size] = '\0'; + ushort *dst = d->data; + /* SIMD: + * Unpacking with SSE has been shown to improve performance on recent CPUs + * The same method gives no improvement with NEON. + */ +#if defined(QT_ALWAYS_HAVE_SSE2) + if (size >= 16) { + int chunkCount = size >> 4; // divided by 16 + const __m128i nullMask = _mm_set1_epi32(0); + for (int i = 0; i < chunkCount; ++i) { + const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load + str += 16; + + // unpack the first 8 bytes, padding with zeros + const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask); + _mm_storeu_si128((__m128i*)dst, firstHalf); // store + dst += 8; + + // unpack the last 8 bytes, padding with zeros + const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask); + _mm_storeu_si128((__m128i*)dst, secondHalf); // store + dst += 8; + } + size = size % 16; + } +#endif while (size--) - *i++ = (uchar)*str++; + *dst++ = (uchar)*str++; } return d; } diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri index 3406e41..6d64915 100644 --- a/src/corelib/tools/tools.pri +++ b/src/corelib/tools/tools.pri @@ -33,6 +33,7 @@ HEADERS += \ tools/qsharedpointer.h \ tools/qsharedpointer_impl.h \ tools/qset.h \ + tools/qsimd_p.h \ tools/qsize.h \ tools/qstack.h \ tools/qstring.h \ @@ -68,6 +69,7 @@ SOURCES += \ tools/qregexp.cpp \ tools/qshareddata.cpp \ tools/qsharedpointer.cpp \ + tools/qsimd.cpp \ tools/qsize.cpp \ tools/qstring.cpp \ tools/qstringbuilder.cpp \ |