summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorQt Continuous Integration System <qt-info@nokia.com>2010-08-26 21:37:46 (GMT)
committerQt Continuous Integration System <qt-info@nokia.com>2010-08-26 21:37:46 (GMT)
commita5ac66200f77fd09d4c4ee5af3a9380efce871ad (patch)
tree46429b3c1c7835d120e4653f863e57079c3fc47e /src/corelib
parentef4c65fc364a0262389c0497092a0fa5e1f7d4c3 (diff)
parentfe8e118c41c6243ace972647934c90b7f937f60e (diff)
downloadQt-a5ac66200f77fd09d4c4ee5af3a9380efce871ad.zip
Qt-a5ac66200f77fd09d4c4ee5af3a9380efce871ad.tar.gz
Qt-a5ac66200f77fd09d4c4ee5af3a9380efce871ad.tar.bz2
Merge branch '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-1 into 4.7-integration
* '4.7' of scm.dev.nokia.troll.no:qt/oslo-staging-1: (61 commits) Autotest: don't use Q_FUNC_INFO for testing which method got called fix generated makefile dependencies Cocoa: Demo browser can get stuck after closing modal dialog Restore default if to system default on session close. tst_qmake doesn't need QtGui Use the full path to qmake in the qmake unit test qdoc: Fixed erroneous links to QML basic types. Fixed item view background color in Gtk style scope fixes and clutter reduction for sql driver projects I don't know why some linkers can't call this function, so comment it out. QNetworkSession::close() method now send closed() signal while faking disconnection. Add the missing license headers to the QString benchmark data Fix building of qsimd.cpp on Windows CE Use QElapsedTimer for the benchlib tests. Properly implement the CPU feature disabling in qsimd.cpp. Report the detected CPU features in the corelib boilerplate Detect CPU features on ARM by reading the ELF auxvec. Split the CPU-detection code into multiple functions for readability Fixed delivering gestures to a toplevel widget. Unroll the SSSE3 code even more to avoid the need to keep an extra variable for inverting the result ...
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/global/global.pri2
-rw-r--r--src/corelib/global/qlibraryinfo.cpp11
-rw-r--r--src/corelib/plugin/qlibrary.cpp5
-rw-r--r--src/corelib/tools/qsimd.cpp191
-rw-r--r--src/corelib/tools/qsimd_p.h1
-rw-r--r--src/corelib/tools/qstring.cpp63
-rw-r--r--src/corelib/tools/qstring.h1
7 files changed, 217 insertions, 57 deletions
diff --git a/src/corelib/global/global.pri b/src/corelib/global/global.pri
index b916b4d..2505e72 100644
--- a/src/corelib/global/global.pri
+++ b/src/corelib/global/global.pri
@@ -19,7 +19,7 @@ INCLUDEPATH += $$QT_BUILD_TREE/src/corelib/global
# Only used on platforms with CONFIG += precompile_header
PRECOMPILED_HEADER = global/qt_pch.h
-linux*-g++*:!static {
+linux*:!static {
QMAKE_LFLAGS += -Wl,-e,qt_core_boilerplate
prog=$$quote(if (/program interpreter: (.*)]/) { print $1; })
DEFINES += ELF_INTERPRETER=\\\"$$system(readelf -l /bin/ls | perl -n -e \'$$prog\')\\\"
diff --git a/src/corelib/global/qlibraryinfo.cpp b/src/corelib/global/qlibraryinfo.cpp
index ea7e2e4..a0779c9 100644
--- a/src/corelib/global/qlibraryinfo.cpp
+++ b/src/corelib/global/qlibraryinfo.cpp
@@ -500,6 +500,7 @@ QT_END_NAMESPACE
extern const char qt_core_interpreter[] __attribute__((section(".interp")))
= ELF_INTERPRETER;
+extern void qDumpCPUFeatures(); // in qsimd.cpp
extern "C" void qt_core_boilerplate();
void qt_core_boilerplate()
{
@@ -508,6 +509,14 @@ void qt_core_boilerplate()
"Contact: Nokia Corporation (qt-info@nokia.com)\n"
"\n"
"Build key: " QT_BUILD_KEY "\n"
+ "Compat build key: "
+#ifdef QT_BUILD_KEY_COMPAT
+ "| " QT_BUILD_KEY_COMPAT " "
+#endif
+#ifdef QT_BUILD_KEY_COMPAT2
+ "| " QT_BUILD_KEY_COMPAT2 " "
+#endif
+ "|\n"
"Build date: %s\n"
"Installation prefix: %s\n"
"Library path: %s\n"
@@ -517,6 +526,8 @@ void qt_core_boilerplate()
qt_configure_libraries_path_str + 12,
qt_configure_headers_path_str + 12);
+// qDumpCPUFeatures();
+
#ifdef QT_EVAL
extern void qt_core_eval_init(uint);
qt_core_eval_init(1);
diff --git a/src/corelib/plugin/qlibrary.cpp b/src/corelib/plugin/qlibrary.cpp
index 0f99948..a9ae2ab 100644
--- a/src/corelib/plugin/qlibrary.cpp
+++ b/src/corelib/plugin/qlibrary.cpp
@@ -790,10 +790,13 @@ bool QLibraryPrivate::isPlugin(QSettings *settings)
.arg(qt_version&0xff)
.arg(debug ? QLatin1String("debug") : QLatin1String("release"));
} else if (key != QT_BUILD_KEY
+ // we may have some compatibility keys, try them too:
#ifdef QT_BUILD_KEY_COMPAT
- // be sure to load plugins using an older but compatible build key
&& key != QT_BUILD_KEY_COMPAT
#endif
+#ifdef QT_BUILD_KEY_COMPAT2
+ && key != QT_BUILD_KEY_COMPAT2
+#endif
) {
if (qt_debug_component()) {
qWarning("In %s:\n"
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index 8005d7d..68ab033 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -41,6 +41,7 @@
#include "qsimd_p.h"
#include <QByteArray>
+#include <stdio.h>
#if defined(Q_OS_WINCE)
#include <windows.h>
@@ -50,15 +51,20 @@
#include <intrin.h>
#endif
+#if defined(Q_OS_LINUX) && defined(__arm__)
+#include "private/qcore_unix_p.h"
+
+#include <asm/hwcap.h>
+#include <linux/auxvec.h>
+#endif
+
QT_BEGIN_NAMESPACE
-uint qDetectCPUFeatures()
+#if defined (Q_OS_WINCE)
+static inline uint detectProcessorFeatures()
{
- static uint features = 0xffffffff;
- if (features != 0xffffffff)
- return features;
+ uint features = 0;
-#if defined (Q_OS_WINCE)
#if defined (ARM)
if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) {
features = IWMMXT;
@@ -78,22 +84,57 @@ uint qDetectCPUFeatures()
#endif
features = 0;
return features;
-#elif defined(QT_HAVE_IWMMXT)
+}
+
+#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON)
+static inline uint detectProcessorFeatures()
+{
+ uint features = 0;
+
+#if defined(Q_OS_LINUX)
+ int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY);
+ if (auxv != -1) {
+ unsigned long vector[64];
+ int nread;
+ while (features == 0) {
+ nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector);
+ if (nread <= 0) {
+ // EOF or error
+ break;
+ }
+
+ int max = nread / (sizeof vector[0]);
+ for (int i = 0; i < max; i += 2)
+ if (vector[i] == AT_HWCAP) {
+ if (vector[i+1] & HWCAP_IWMMXT)
+ features |= IWMMXT;
+ if (vector[i+1] & HWCAP_NEON)
+ features |= NEON;
+ break;
+ }
+ }
+
+ ::qt_safe_close(auxv);
+ return features;
+ }
+ // fall back if /proc/self/auxv wasn't found
+#endif
+
+#if defined(QT_HAVE_IWMMXT)
// runtime detection only available when running as a previlegied process
- static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt();
- features = doIWMMXT ? IWMMXT : 0;
- return features;
+ features = IWMMXT;
#elif defined(QT_HAVE_NEON)
- static const bool doNEON = !qgetenv("QT_NO_NEON").toInt();
- features = doNEON ? NEON : 0;
+ features = NEON;
+#endif
+
return features;
-#else
- features = 0;
-#if defined(__x86_64__) || defined(Q_OS_WIN64)
- features = MMX|SSE|SSE2|CMOV;
-#elif defined(__ia64__)
- features = MMX|SSE|SSE2;
+}
+
#elif defined(__i386__) || defined(_M_IX86)
+static inline uint detectProcessorFeatures()
+{
+ uint features = 0;
+
unsigned int extended_result = 0;
unsigned int feature_result = 0;
uint result = 0;
@@ -149,6 +190,7 @@ uint qDetectCPUFeatures()
:
: "%eax", "%ecx", "%edx"
);
+
#elif defined (Q_OS_WIN)
_asm {
push eax
@@ -210,6 +252,7 @@ uint qDetectCPUFeatures()
}
#endif
+
// result now contains the standard feature bits
if (result & (1u << 15))
features |= CMOV;
@@ -236,9 +279,13 @@ uint qDetectCPUFeatures()
if (feature_result & (1u << 28))
features |= AVX;
-#endif // i386
+ return features;
+}
-#if defined(__x86_64__) || defined(Q_OS_WIN64)
+#elif defined(__x86_64) || defined(Q_OS_WIN64)
+static inline uint detectProcessorFeatures()
+{
+ uint features = MMX|SSE|SSE2|CMOV;
uint feature_result = 0;
#if defined(Q_CC_GNU)
@@ -282,33 +329,97 @@ uint qDetectCPUFeatures()
features |= SSE4_2;
if (feature_result & (1u << 28))
features |= AVX;
-#endif // x86_64
-#if defined(QT_HAVE_MMX)
- if (qgetenv("QT_NO_MMX").toInt())
- features ^= MMX;
-#endif
- if (qgetenv("QT_NO_MMXEXT").toInt())
- features ^= MMXEXT;
+ return features;
+}
-#if defined(QT_HAVE_3DNOW)
- if (qgetenv("QT_NO_3DNOW").toInt())
- features ^= MMX3DNOW;
-#endif
- if (qgetenv("QT_NO_3DNOWEXT").toInt())
- features ^= MMX3DNOWEXT;
+#elif defined(__ia64__)
+static inline uint detectProcessorFeatures()
+{
+ return MMX|SSE|SSE2;
+}
-#if defined(QT_HAVE_SSE)
- if (qgetenv("QT_NO_SSE").toInt())
- features ^= SSE;
-#endif
-#if defined(QT_HAVE_SSE2)
- if (qgetenv("QT_NO_SSE2").toInt())
- features ^= SSE2;
+#else
+static inline uint detectProcessorFeatures()
+{
+ return 0;
+}
#endif
+/*
+ * Use kdesdk/scripts/generate_string_table.pl to update the table below.
+ * Here's the data (don't forget the ONE leading space):
+ mmx
+ mmxext
+ mmx3dnow
+ mmx3dnowext
+ sse
+ sse2
+ cmov
+ iwmmxt
+ neon
+ sse3
+ ssse3
+ sse4.1
+ sse4.2
+ avx
+ */
+
+// begin generated
+static const char features_string[] =
+ " mmx\0"
+ " mmxext\0"
+ " mmx3dnow\0"
+ " mmx3dnowext\0"
+ " sse\0"
+ " sse2\0"
+ " cmov\0"
+ " iwmmxt\0"
+ " neon\0"
+ " sse3\0"
+ " ssse3\0"
+ " sse4.1\0"
+ " sse4.2\0"
+ " avx\0"
+ "\0";
+
+static const int features_indices[] = {
+ 0, 5, 13, 23, 36, 41, 47, 53,
+ 61, 67, 73, 80, 88, 96, -1
+};
+// end generated
+
+const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
+
+uint qDetectCPUFeatures()
+{
+ static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1);
+ if (features != -1)
+ return features;
+
+ uint f = detectProcessorFeatures();
+ QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
+ if (!disable.isEmpty()) {
+ disable.prepend(' ');
+ for (int i = 0; i < features_count; ++i) {
+ if (disable.contains(features_string + features_indices[i]))
+ f &= ~(1 << i);
+ }
+ }
+
+ features = f;
return features;
-#endif
+}
+
+void qDumpCPUFeatures()
+{
+ uint features = qDetectCPUFeatures();
+ printf("Processor features: ");
+ for (int i = 0; i < features_count; ++i) {
+ if (features & (1 << i))
+ printf("%s", features_string + features_indices[i]);
+ }
+ puts("");
}
QT_END_NAMESPACE
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index a3148fb..2626657 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -85,6 +85,7 @@ QT_BEGIN_HEADER
// SSE4.1 and SSE4.2 intrinsics
#if (defined(QT_HAVE_SSE4_1) || defined(QT_HAVE_SSE4_2)) && (defined(__SSE4_1__) || defined(Q_CC_MSVC))
#include <smmintrin.h>
+#include <nmmintrin.h>
#endif
// AVX intrinsics
diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp
index 2fd9a0b..d940bf8 100644
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@@ -173,7 +173,7 @@ static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b)
return 1;
}
-// Unicode case-insensitive comparison
+// Unicode case-sensitive comparison
static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen)
{
if (a == b && alen == blen)
@@ -202,22 +202,55 @@ static int ucstrnicmp(const ushort *a, const ushort *b, int l)
return ucstricmp(a, a + l, b, b + l);
}
+// Benchmarking indicates that doing memcmp is much slower than
+// executing the comparison ourselves.
+//
+// The profiling was done on a population of calls to qMemEquals, generated
+// during a run of the demo browser. The profile of the data (32-bit x86
+// Linux) was:
+//
+// total number of comparisons: 21353
+// longest string compared: 95
+// average comparison length: 14.8786
+// cache-line crosses: 5661 (13.3%)
+// alignment histogram:
+// 0xXXX0 = 512 (1.2%) strings, 0 (0.0%) of which same-aligned
+// 0xXXX2 = 15087 (35.3%) strings, 5145 (34.1%) of which same-aligned
+// 0xXXX4 = 525 (1.2%) strings, 0 (0.0%) of which same-aligned
+// 0xXXX6 = 557 (1.3%) strings, 6 (1.1%) of which same-aligned
+// 0xXXX8 = 509 (1.2%) strings, 0 (0.0%) of which same-aligned
+// 0xXXXa = 24358 (57.0%) strings, 9901 (40.6%) of which same-aligned
+// 0xXXXc = 557 (1.3%) strings, 0 (0.0%) of which same-aligned
+// 0xXXXe = 601 (1.4%) strings, 15 (2.5%) of which same-aligned
+// total = 42706 (100%) strings, 15067 (35.3%) of which same-aligned
+//
+// 92% of the strings have alignment of 2 or 10, which is due to malloc on
+// 32-bit Linux returning values aligned to 8 bytes, and offsetof(array, QString::Data) == 18.
+//
+// The profile on 64-bit will be different since offsetof(array, QString::Data) == 26.
+//
+// The benchmark results were, for a Core-i7 @ 2.67 GHz 32-bit, compiled with -O3 -funroll-loops:
+// 16-bit loads only: 872,301 CPU ticks [Qt 4.5 / memcmp]
+// 32- and 16-bit loads: 773,362 CPU ticks [Qt 4.6]
+// SSE2 "movdqu" 128-bit loads: 618,736 CPU ticks
+// SSE3 "lddqu" 128-bit loads: 619,954 CPU ticks
+// SSSE3 "palignr" corrections: 852,147 CPU ticks
+// SSE4.2 "pcmpestrm": 738,702 CPU ticks
+//
+// The same benchmark on an Atom N450 @ 1.66 GHz, is:
+// 16-bit loads only: 2,185,882 CPU ticks
+// 32- and 16-bit loads: 1,805,060 CPU ticks
+// SSE2 "movdqu" 128-bit loads: 2,529,843 CPU ticks
+// SSE3 "lddqu" 128-bit loads: 2,514,858 CPU ticks
+// SSSE3 "palignr" corrections: 2,160,325 CPU ticks
+// SSE4.2 not available
+//
+// The conclusion we reach is that alignment the SSE2 unaligned code can gain
+// 20% improvement in performance in some systems, but suffers a penalty due
+// to the unaligned loads on others.
+
static bool qMemEquals(const quint16 *a, const quint16 *b, int length)
{
- // Benchmarking indicates that doing memcmp is much slower than
- // executing the comparison ourselves.
- // To make it even faster, we do a 32-bit comparison, comparing
- // twice the amount of data as a normal word-by-word comparison.
- //
- // Benchmarking results on a 2.33 GHz Core2 Duo, with a 64-QChar
- // block of data, with 4194304 iterations (per iteration):
- // operation usec cpu ticks
- // memcmp 330 710
- // 16-bit 79 167-171
- // 32-bit aligned 49 105-109
- //
- // Testing also indicates that unaligned 32-bit loads are as
- // performant as 32-bit aligned.
if (a == b || !length)
return true;
diff --git a/src/corelib/tools/qstring.h b/src/corelib/tools/qstring.h
index e52f59f..06e4d47 100644
--- a/src/corelib/tools/qstring.h
+++ b/src/corelib/tools/qstring.h
@@ -614,6 +614,7 @@ private:
ushort asciiCache : 1;
ushort capacity : 1;
ushort reserved : 11;
+ // ### Qt5: try to ensure that "array" is aligned to 16 bytes on both 32- and 64-bit
ushort array[1];
};
static Data shared_null;