summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools')
-rw-r--r--src/corelib/tools/qregexp.cpp11
-rw-r--r--src/corelib/tools/qsharedpointer_impl.h1
-rw-r--r--src/corelib/tools/qsimd.cpp246
-rw-r--r--src/corelib/tools/qsimd_p.h126
-rw-r--r--src/corelib/tools/qstring.cpp112
-rw-r--r--src/corelib/tools/tools.pri2
6 files changed, 487 insertions, 11 deletions
diff --git a/src/corelib/tools/qregexp.cpp b/src/corelib/tools/qregexp.cpp
index 25255f9..20ad444 100644
--- a/src/corelib/tools/qregexp.cpp
+++ b/src/corelib/tools/qregexp.cpp
@@ -1466,9 +1466,14 @@ void QRegExpMatchState::match(const QChar *str0, int len0, int pos0,
#ifndef QT_NO_REGEXP_CAPTURE
for (int i = 0; i < numCaptures; ++i) {
int j = eng->captureForOfficialCapture.at(i);
- int len = capEnd[j] - capBegin[j];
- *c++ = (len > 0) ? pos + capBegin[j] : 0;
- *c++ = len;
+ if (capBegin[j] != EmptyCapture) {
+ int len = capEnd[j] - capBegin[j];
+ *c++ = (len > 0) ? pos + capBegin[j] : 0;
+ *c++ = len;
+ } else {
+ *c++ = -1;
+ *c++ = -1;
+ }
}
#endif
} else {
diff --git a/src/corelib/tools/qsharedpointer_impl.h b/src/corelib/tools/qsharedpointer_impl.h
index d85c248..bfc65bc 100644
--- a/src/corelib/tools/qsharedpointer_impl.h
+++ b/src/corelib/tools/qsharedpointer_impl.h
@@ -209,6 +209,7 @@ namespace QtSharedPointer {
inline bool destroy() { destroyer(this); return true; }
inline void operator delete(void *ptr) { ::operator delete(ptr); }
+ inline void operator delete(void *, void *) { }
};
// sizeof(ExternalRefCountWithDestroyFn) = 16 (32-bit) / 24 (64-bit)
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
new file mode 100644
index 0000000..52d2cea
--- /dev/null
+++ b/src/corelib/tools/qsimd.cpp
@@ -0,0 +1,246 @@
+/****************************************************************************
+**
+** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qsimd_p.h"
+#include <QByteArray>
+
+QT_BEGIN_NAMESPACE
+
+uint qDetectCPUFeatures()
+{
+ static uint features = 0xffffffff;
+ if (features != 0xffffffff)
+ return features;
+
+#if defined (Q_OS_WINCE)
+#if defined (ARM)
+ if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
+ features = IWMMXT;
+ return features;
+ }
+#elif defined(_X86_)
+ features = 0;
+#if defined QT_HAVE_MMX
+ if (IsProcessorFeaturePresent(PF_MMX_INSTRUCTIONS_AVAILABLE))
+ features |= MMX;
+#endif
+#if defined QT_HAVE_3DNOW
+ if (IsProcessorFeaturePresent(PF_3DNOW_INSTRUCTIONS_AVAILABLE))
+ features |= MMX3DNOW;
+#endif
+ return features;
+#endif
+ features = 0;
+ return features;
+#elif defined(QT_HAVE_IWMMXT)
+ // runtime detection only available when running as a previlegied process
+ static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt();
+ features = doIWMMXT ? IWMMXT : 0;
+ return features;
+#elif defined(QT_HAVE_NEON)
+ static const bool doNEON = !qgetenv("QT_NO_NEON").toInt();
+ features = doNEON ? NEON : 0;
+ return features;
+#else
+ features = 0;
+#if defined(__x86_64__) || defined(Q_OS_WIN64)
+ features = MMX|SSE|SSE2|CMOV;
+#elif defined(__ia64__)
+ features = MMX|SSE|SSE2;
+#elif defined(__i386__) || defined(_M_IX86)
+ unsigned int extended_result = 0;
+ uint result = 0;
+ /* see p. 118 of amd64 instruction set manual Vol3 */
+#if defined(Q_CC_GNU)
+ asm ("push %%ebx\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "mov %%eax, %%ebx\n"
+ "xor $0x00200000, %%eax\n"
+ "push %%eax\n"
+ "popf\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "xor %%edx, %%edx\n"
+ "xor %%ebx, %%eax\n"
+ "jz 1f\n"
+
+ "mov $0x00000001, %%eax\n"
+ "cpuid\n"
+ "1:\n"
+ "pop %%ebx\n"
+ "mov %%edx, %0\n"
+ : "=r" (result)
+ :
+ : "%eax", "%ecx", "%edx"
+ );
+
+ asm ("push %%ebx\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "mov %%eax, %%ebx\n"
+ "xor $0x00200000, %%eax\n"
+ "push %%eax\n"
+ "popf\n"
+ "pushf\n"
+ "pop %%eax\n"
+ "xor %%edx, %%edx\n"
+ "xor %%ebx, %%eax\n"
+ "jz 2f\n"
+
+ "mov $0x80000000, %%eax\n"
+ "cpuid\n"
+ "cmp $0x80000000, %%eax\n"
+ "jbe 2f\n"
+ "mov $0x80000001, %%eax\n"
+ "cpuid\n"
+ "2:\n"
+ "pop %%ebx\n"
+ "mov %%edx, %0\n"
+ : "=r" (extended_result)
+ :
+ : "%eax", "%ecx", "%edx"
+ );
+#elif defined (Q_OS_WIN)
+ _asm {
+ push eax
+ push ebx
+ push ecx
+ push edx
+ pushfd
+ pop eax
+ mov ebx, eax
+ xor eax, 00200000h
+ push eax
+ popfd
+ pushfd
+ pop eax
+ mov edx, 0
+ xor eax, ebx
+ jz skip
+
+ mov eax, 1
+ cpuid
+ mov result, edx
+ skip:
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ }
+
+ _asm {
+ push eax
+ push ebx
+ push ecx
+ push edx
+ pushfd
+ pop eax
+ mov ebx, eax
+ xor eax, 00200000h
+ push eax
+ popfd
+ pushfd
+ pop eax
+ mov edx, 0
+ xor eax, ebx
+ jz skip2
+
+ mov eax, 80000000h
+ cpuid
+ cmp eax, 80000000h
+ jbe skip2
+ mov eax, 80000001h
+ cpuid
+ mov extended_result, edx
+ skip2:
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ }
+#endif
+
+ // result now contains the standard feature bits
+ if (result & (1u << 15))
+ features |= CMOV;
+ if (result & (1u << 23))
+ features |= MMX;
+ if (extended_result & (1u << 22))
+ features |= MMXEXT;
+ if (extended_result & (1u << 31))
+ features |= MMX3DNOW;
+ if (extended_result & (1u << 30))
+ features |= MMX3DNOWEXT;
+ if (result & (1u << 25))
+ features |= SSE;
+ if (result & (1u << 26))
+ features |= SSE2;
+#endif // i386
+
+#if defined(QT_HAVE_MMX)
+ if (qgetenv("QT_NO_MMX").toInt())
+ features ^= MMX;
+#endif
+ if (qgetenv("QT_NO_MMXEXT").toInt())
+ features ^= MMXEXT;
+
+#if defined(QT_HAVE_3DNOW)
+ if (qgetenv("QT_NO_3DNOW").toInt())
+ features ^= MMX3DNOW;
+#endif
+ if (qgetenv("QT_NO_3DNOWEXT").toInt())
+ features ^= MMX3DNOWEXT;
+
+#if defined(QT_HAVE_SSE)
+ if (qgetenv("QT_NO_SSE").toInt())
+ features ^= SSE;
+#endif
+#if defined(QT_HAVE_SSE2)
+ if (qgetenv("QT_NO_SSE2").toInt())
+ features ^= SSE2;
+#endif
+
+ return features;
+#endif
+}
+
+QT_END_NAMESPACE
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
new file mode 100644
index 0000000..556db14
--- /dev/null
+++ b/src/corelib/tools/qsimd_p.h
@@ -0,0 +1,126 @@
+/****************************************************************************
+**
+** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QSIMD_P_H
+#define QSIMD_P_H
+
+#include <qglobal.h>
+
+
+QT_BEGIN_HEADER
+
+QT_BEGIN_NAMESPACE
+
+QT_MODULE(Core)
+
+#if defined(QT_NO_MAC_XARCH) || (defined(Q_OS_DARWIN) && (defined(__ppc__) || defined(__ppc64__)))
+// Disable MMX and SSE on Mac/PPC builds, or if the compiler
+// does not support -Xarch argument passing
+#undef QT_HAVE_SSE2
+#undef QT_HAVE_SSE
+#undef QT_HAVE_3DNOW
+#undef QT_HAVE_MMX
+#endif
+
+// SSE intrinsics
+#if defined(__SSE2__)
+#if defined(QT_LINUXBASE)
+/// this is an evil hack - the posix_memalign declaration in LSB
+/// is wrong - see http://bugs.linuxbase.org/show_bug.cgi?id=2431
+# define posix_memalign _lsb_hack_posix_memalign
+# include <emmintrin.h>
+# undef posix_memalign
+#else
+# include <emmintrin.h>
+#endif
+
+#define QT_ALWAYS_HAVE_SSE2
+#endif
+
+// NEON intrinsics
+#if defined(QT_HAVE_NEON)
+#include <arm_neon.h>
+#endif
+
+
+// IWMMXT intrinsics
+#if defined(QT_HAVE_IWMMXT)
+#include <mmintrin.h>
+#if defined(Q_OS_WINCE)
+# include "qplatformdefs.h"
+#endif
+#endif
+
+#if defined(QT_HAVE_IWMMXT)
+#if !defined(__IWMMXT__) && !defined(Q_OS_WINCE)
+# include <xmmintrin.h>
+#elif defined(Q_OS_WINCE_STD) && defined(_X86_)
+# pragma warning(disable: 4391)
+# include <xmmintrin.h>
+#endif
+#endif
+
+// 3D now intrinsics
+#if defined(QT_HAVE_3DNOW)
+#include <mm3dnow.h>
+#endif
+
+enum CPUFeatures {
+ None = 0,
+ MMX = 0x1,
+ MMXEXT = 0x2,
+ MMX3DNOW = 0x4,
+ MMX3DNOWEXT = 0x8,
+ SSE = 0x10,
+ SSE2 = 0x20,
+ CMOV = 0x40,
+ IWMMXT = 0x80,
+ NEON = 0x100
+};
+
+
+QT_END_NAMESPACE
+
+QT_END_HEADER
+
+Q_CORE_EXPORT uint qDetectCPUFeatures();
+
+#endif // QSIMD_P_H
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index ac1bee7..e9b7b9a 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -46,6 +46,7 @@
#include <qtextcodec.h>
#endif
#include <private/qutfcodec_p.h>
+#include "qsimd_p.h"
#include <qdatastream.h>
#include <qlist.h>
#include "qlocale.h"
@@ -3479,12 +3480,82 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
QByteArray ba;
if (length) {
ba.resize(length);
- const ushort *i = reinterpret_cast<const ushort *>(data);
- const ushort *e = i + length;
- uchar *s = (uchar*) ba.data();
- while (i != e) {
- *s++ = (*i>0xff) ? '?' : (uchar) *i;
- ++i;
+ const ushort *src = reinterpret_cast<const ushort *>(data);
+ uchar *dst = (uchar*) ba.data();
+#if defined(QT_ALWAYS_HAVE_SSE2)
+ if (length >= 16) {
+ const int chunkCount = length >> 4; // divided by 16
+ const __m128i questionMark = _mm_set1_epi16('?');
+ // SSE has no compare instruction for unsigned comparison.
+ // The variables must be shiffted + 0x8000 to be compared
+ const __m128i signedBitOffset = _mm_set1_epi16(0x8000);
+ const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000);
+ for (int i = 0; i < chunkCount; ++i) {
+ __m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
+ src += 8;
+ {
+ // each 16 bit is equal to 0xFF if the source is outside latin 1 (>0xff)
+ const __m128i signedChunk = _mm_add_epi16(chunk1, signedBitOffset);
+ const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
+
+ // offLimitQuestionMark contains '?' for each 16 bits that was off-limit
+ // the 16 bits that were correct contains zeros
+ const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
+
+ // correctBytes contains the bytes that were in limit
+ // the 16 bits that were off limits contains zeros
+ const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk1);
+
+ // merge offLimitQuestionMark and correctBytes to have the result
+ chunk1 = _mm_or_si128(correctBytes, offLimitQuestionMark);
+ }
+
+ __m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
+ src += 8;
+ {
+ // exactly the same operations as for the previous chunk of data
+ const __m128i signedChunk = _mm_add_epi16(chunk2, signedBitOffset);
+ const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
+ const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
+ const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk2);
+ chunk2 = _mm_or_si128(correctBytes, offLimitQuestionMark);
+ }
+
+ // pack the two vector to 16 x 8bits elements
+ const __m128i result = _mm_packus_epi16(chunk1, chunk2);
+
+ _mm_storeu_si128((__m128i*)dst, result); // store
+ dst += 16;
+ }
+ length = length % 16;
+ }
+#elif QT_HAVE_NEON
+ // Refer to the documentation of the SSE2 implementation
+ // this use eactly the same method as for SSE except:
+ // 1) neon has unsigned comparison
+ // 2) packing is done to 64 bits (8 x 8bits component).
+ if (length >= 16) {
+ const int chunkCount = length >> 3; // divided by 8
+ const uint16x8_t questionMark = vdupq_n_u16('?'); // set
+ const uint16x8_t thresholdMask = vdupq_n_u16(0xff); // set
+ for (int i = 0; i < chunkCount; ++i) {
+ uint16x8_t chunk = vld1q_u16((uint16_t *)src); // load
+ src += 8;
+
+ const uint16x8_t offLimitMask = vcgtq_u16(chunk, thresholdMask); // chunk > thresholdMask
+ const uint16x8_t offLimitQuestionMark = vandq_u16(offLimitMask, questionMark); // offLimitMask & questionMark
+ const uint16x8_t correctBytes = vbicq_u16(chunk, offLimitMask); // !offLimitMask & chunk
+ chunk = vorrq_u16(correctBytes, offLimitQuestionMark); // correctBytes | offLimitQuestionMark
+ const uint8x8_t result = vmovn_u16(chunk); // narrowing move->packing
+ vst1_u8(dst, result); // store
+ dst += 8;
+ }
+ length = length % 8;
+ }
+#endif
+ while (length--) {
+ *dst++ = (*src>0xff) ? '?' : (uchar) *src;
+ ++src;
}
}
return ba;
@@ -3647,10 +3718,35 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
d->alloc = d->size = size;
d->clean = d->asciiCache = d->simpletext = d->righttoleft = d->capacity = 0;
d->data = d->array;
- ushort *i = d->data;
d->array[size] = '\0';
+ ushort *dst = d->data;
+ /* SIMD:
+ * Unpacking with SSE has been shown to improve performance on recent CPUs
+ * The same method gives no improvement with NEON.
+ */
+#if defined(QT_ALWAYS_HAVE_SSE2)
+ if (size >= 16) {
+ int chunkCount = size >> 4; // divided by 16
+ const __m128i nullMask = _mm_set1_epi32(0);
+ for (int i = 0; i < chunkCount; ++i) {
+ const __m128i chunk = _mm_loadu_si128((__m128i*)str); // load
+ str += 16;
+
+ // unpack the first 8 bytes, padding with zeros
+ const __m128i firstHalf = _mm_unpacklo_epi8(chunk, nullMask);
+ _mm_storeu_si128((__m128i*)dst, firstHalf); // store
+ dst += 8;
+
+ // unpack the last 8 bytes, padding with zeros
+ const __m128i secondHalf = _mm_unpackhi_epi8 (chunk, nullMask);
+ _mm_storeu_si128((__m128i*)dst, secondHalf); // store
+ dst += 8;
+ }
+ size = size % 16;
+ }
+#endif
while (size--)
- *i++ = (uchar)*str++;
+ *dst++ = (uchar)*str++;
}
return d;
}
diff --git a/src/corelib/tools/tools.pri b/src/corelib/tools/tools.pri
index 3406e41..6d64915 100644
--- a/src/corelib/tools/tools.pri
+++ b/src/corelib/tools/tools.pri
@@ -33,6 +33,7 @@ HEADERS += \
tools/qsharedpointer.h \
tools/qsharedpointer_impl.h \
tools/qset.h \
+ tools/qsimd_p.h \
tools/qsize.h \
tools/qstack.h \
tools/qstring.h \
@@ -68,6 +69,7 @@ SOURCES += \
tools/qregexp.cpp \
tools/qshareddata.cpp \
tools/qsharedpointer.cpp \
+ tools/qsimd.cpp \
tools/qsize.cpp \
tools/qstring.cpp \
tools/qstringbuilder.cpp \