diff options
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/global/global.pri | 2 | ||||
-rw-r--r-- | src/corelib/global/qlibraryinfo.cpp | 11 | ||||
-rw-r--r-- | src/corelib/plugin/qlibrary.cpp | 5 | ||||
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 191 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 1 | ||||
-rw-r--r-- | src/corelib/tools/qstring.cpp | 63 | ||||
-rw-r--r-- | src/corelib/tools/qstring.h | 1 |
7 files changed, 217 insertions, 57 deletions
diff --git a/src/corelib/global/global.pri b/src/corelib/global/global.pri index b916b4d..2505e72 100644 --- a/src/corelib/global/global.pri +++ b/src/corelib/global/global.pri @@ -19,7 +19,7 @@ INCLUDEPATH += $$QT_BUILD_TREE/src/corelib/global # Only used on platforms with CONFIG += precompile_header PRECOMPILED_HEADER = global/qt_pch.h -linux*-g++*:!static { +linux*:!static { QMAKE_LFLAGS += -Wl,-e,qt_core_boilerplate prog=$$quote(if (/program interpreter: (.*)]/) { print $1; }) DEFINES += ELF_INTERPRETER=\\\"$$system(readelf -l /bin/ls | perl -n -e \'$$prog\')\\\" diff --git a/src/corelib/global/qlibraryinfo.cpp b/src/corelib/global/qlibraryinfo.cpp index ea7e2e4..a0779c9 100644 --- a/src/corelib/global/qlibraryinfo.cpp +++ b/src/corelib/global/qlibraryinfo.cpp @@ -500,6 +500,7 @@ QT_END_NAMESPACE extern const char qt_core_interpreter[] __attribute__((section(".interp"))) = ELF_INTERPRETER; +extern void qDumpCPUFeatures(); // in qsimd.cpp extern "C" void qt_core_boilerplate(); void qt_core_boilerplate() { @@ -508,6 +509,14 @@ void qt_core_boilerplate() "Contact: Nokia Corporation (qt-info@nokia.com)\n" "\n" "Build key: " QT_BUILD_KEY "\n" + "Compat build key: " +#ifdef QT_BUILD_KEY_COMPAT + "| " QT_BUILD_KEY_COMPAT " " +#endif +#ifdef QT_BUILD_KEY_COMPAT2 + "| " QT_BUILD_KEY_COMPAT2 " " +#endif + "|\n" "Build date: %s\n" "Installation prefix: %s\n" "Library path: %s\n" @@ -517,6 +526,8 @@ void qt_core_boilerplate() qt_configure_libraries_path_str + 12, qt_configure_headers_path_str + 12); +// qDumpCPUFeatures(); + #ifdef QT_EVAL extern void qt_core_eval_init(uint); qt_core_eval_init(1); diff --git a/src/corelib/plugin/qlibrary.cpp b/src/corelib/plugin/qlibrary.cpp index 0f99948..a9ae2ab 100644 --- a/src/corelib/plugin/qlibrary.cpp +++ b/src/corelib/plugin/qlibrary.cpp @@ -790,10 +790,13 @@ bool QLibraryPrivate::isPlugin(QSettings *settings) .arg(qt_version&0xff) .arg(debug ? QLatin1String("debug") : QLatin1String("release")); } else if (key != QT_BUILD_KEY + // we may have some compatibility keys, try them too: #ifdef QT_BUILD_KEY_COMPAT - // be sure to load plugins using an older but compatible build key && key != QT_BUILD_KEY_COMPAT #endif +#ifdef QT_BUILD_KEY_COMPAT2 + && key != QT_BUILD_KEY_COMPAT2 +#endif ) { if (qt_debug_component()) { qWarning("In %s:\n" diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 8005d7d..68ab033 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -41,6 +41,7 @@ #include "qsimd_p.h" #include <QByteArray> +#include <stdio.h> #if defined(Q_OS_WINCE) #include <windows.h> @@ -50,15 +51,20 @@ #include <intrin.h> #endif +#if defined(Q_OS_LINUX) && defined(__arm__) +#include "private/qcore_unix_p.h" + +#include <asm/hwcap.h> +#include <linux/auxvec.h> +#endif + QT_BEGIN_NAMESPACE -uint qDetectCPUFeatures() +#if defined (Q_OS_WINCE) +static inline uint detectProcessorFeatures() { - static uint features = 0xffffffff; - if (features != 0xffffffff) - return features; + uint features = 0; -#if defined (Q_OS_WINCE) #if defined (ARM) if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { features = IWMMXT; @@ -78,22 +84,57 @@ uint qDetectCPUFeatures() #endif features = 0; return features; -#elif defined(QT_HAVE_IWMMXT) +} + +#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + +#if defined(Q_OS_LINUX) + int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY); + if (auxv != -1) { + unsigned long vector[64]; + int nread; + while (features == 0) { + nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector); + if (nread <= 0) { + // EOF or error + break; + } + + int max = nread / (sizeof vector[0]); + for (int i = 0; i < max; i += 2) + if (vector[i] == AT_HWCAP) { + if (vector[i+1] & HWCAP_IWMMXT) + features |= IWMMXT; + if (vector[i+1] & HWCAP_NEON) + features |= NEON; + break; + } + } + + ::qt_safe_close(auxv); + return features; + } + // fall back if /proc/self/auxv wasn't found +#endif + +#if defined(QT_HAVE_IWMMXT) // runtime detection only available when running as a previlegied process - static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); - features = doIWMMXT ? IWMMXT : 0; - return features; + features = IWMMXT; #elif defined(QT_HAVE_NEON) - static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); - features = doNEON ? NEON : 0; + features = NEON; +#endif + return features; -#else - features = 0; -#if defined(__x86_64__) || defined(Q_OS_WIN64) - features = MMX|SSE|SSE2|CMOV; -#elif defined(__ia64__) - features = MMX|SSE|SSE2; +} + #elif defined(__i386__) || defined(_M_IX86) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + unsigned int extended_result = 0; unsigned int feature_result = 0; uint result = 0; @@ -149,6 +190,7 @@ uint qDetectCPUFeatures() : : "%eax", "%ecx", "%edx" ); + #elif defined (Q_OS_WIN) _asm { push eax @@ -210,6 +252,7 @@ uint qDetectCPUFeatures() } #endif + // result now contains the standard feature bits if (result & (1u << 15)) features |= CMOV; @@ -236,9 +279,13 @@ uint qDetectCPUFeatures() if (feature_result & (1u << 28)) features |= AVX; -#endif // i386 + return features; +} -#if defined(__x86_64__) || defined(Q_OS_WIN64) +#elif defined(__x86_64) || defined(Q_OS_WIN64) +static inline uint detectProcessorFeatures() +{ + uint features = MMX|SSE|SSE2|CMOV; uint feature_result = 0; #if defined(Q_CC_GNU) @@ -282,33 +329,97 @@ uint qDetectCPUFeatures() features |= SSE4_2; if (feature_result & (1u << 28)) features |= AVX; -#endif // x86_64 -#if defined(QT_HAVE_MMX) - if (qgetenv("QT_NO_MMX").toInt()) - features ^= MMX; -#endif - if (qgetenv("QT_NO_MMXEXT").toInt()) - features ^= MMXEXT; + return features; +} -#if defined(QT_HAVE_3DNOW) - if (qgetenv("QT_NO_3DNOW").toInt()) - features ^= MMX3DNOW; -#endif - if (qgetenv("QT_NO_3DNOWEXT").toInt()) - features ^= MMX3DNOWEXT; +#elif defined(__ia64__) +static inline uint detectProcessorFeatures() +{ + return MMX|SSE|SSE2; +} -#if defined(QT_HAVE_SSE) - if (qgetenv("QT_NO_SSE").toInt()) - features ^= SSE; -#endif -#if defined(QT_HAVE_SSE2) - if (qgetenv("QT_NO_SSE2").toInt()) - features ^= SSE2; +#else +static inline uint detectProcessorFeatures() +{ + return 0; +} #endif +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. + * Here's the data (don't forget the ONE leading space): + mmx + mmxext + mmx3dnow + mmx3dnowext + sse + sse2 + cmov + iwmmxt + neon + sse3 + ssse3 + sse4.1 + sse4.2 + avx + */ + +// begin generated +static const char features_string[] = + " mmx\0" + " mmxext\0" + " mmx3dnow\0" + " mmx3dnowext\0" + " sse\0" + " sse2\0" + " cmov\0" + " iwmmxt\0" + " neon\0" + " sse3\0" + " ssse3\0" + " sse4.1\0" + " sse4.2\0" + " avx\0" + "\0"; + +static const int features_indices[] = { + 0, 5, 13, 23, 36, 41, 47, 53, + 61, 67, 73, 80, 88, 96, -1 +}; +// end generated + +const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]); + +uint qDetectCPUFeatures() +{ + static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1); + if (features != -1) + return features; + + uint f = detectProcessorFeatures(); + QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); + if (!disable.isEmpty()) { + disable.prepend(' '); + for (int i = 0; i < features_count; ++i) { + if (disable.contains(features_string + features_indices[i])) + f &= ~(1 << i); + } + } + + features = f; return features; -#endif +} + +void qDumpCPUFeatures() +{ + uint features = qDetectCPUFeatures(); + printf("Processor features: "); + for (int i = 0; i < features_count; ++i) { + if (features & (1 << i)) + printf("%s", features_string + features_indices[i]); + } + puts(""); } QT_END_NAMESPACE diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index a3148fb..2626657 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -85,6 +85,7 @@ QT_BEGIN_HEADER // SSE4.1 and SSE4.2 intrinsics #if (defined(QT_HAVE_SSE4_1) || defined(QT_HAVE_SSE4_2)) && (defined(__SSE4_1__) || defined(Q_CC_MSVC)) #include <smmintrin.h> +#include <nmmintrin.h> #endif // AVX intrinsics diff --git a/src/corelib/tools/qstring.cpp b/src/corelib/tools/qstring.cpp index 2fd9a0b..d940bf8 100644 --- a/src/corelib/tools/qstring.cpp +++ b/src/corelib/tools/qstring.cpp @@ -173,7 +173,7 @@ static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b) return 1; } -// Unicode case-insensitive comparison +// Unicode case-sensitive comparison static int ucstrcmp(const QChar *a, int alen, const QChar *b, int blen) { if (a == b && alen == blen) @@ -202,22 +202,55 @@ static int ucstrnicmp(const ushort *a, const ushort *b, int l) return ucstricmp(a, a + l, b, b + l); } +// Benchmarking indicates that doing memcmp is much slower than +// executing the comparison ourselves. +// +// The profiling was done on a population of calls to qMemEquals, generated +// during a run of the demo browser. The profile of the data (32-bit x86 +// Linux) was: +// +// total number of comparisons: 21353 +// longest string compared: 95 +// average comparison length: 14.8786 +// cache-line crosses: 5661 (13.3%) +// alignment histogram: +// 0xXXX0 = 512 (1.2%) strings, 0 (0.0%) of which same-aligned +// 0xXXX2 = 15087 (35.3%) strings, 5145 (34.1%) of which same-aligned +// 0xXXX4 = 525 (1.2%) strings, 0 (0.0%) of which same-aligned +// 0xXXX6 = 557 (1.3%) strings, 6 (1.1%) of which same-aligned +// 0xXXX8 = 509 (1.2%) strings, 0 (0.0%) of which same-aligned +// 0xXXXa = 24358 (57.0%) strings, 9901 (40.6%) of which same-aligned +// 0xXXXc = 557 (1.3%) strings, 0 (0.0%) of which same-aligned +// 0xXXXe = 601 (1.4%) strings, 15 (2.5%) of which same-aligned +// total = 42706 (100%) strings, 15067 (35.3%) of which same-aligned +// +// 92% of the strings have alignment of 2 or 10, which is due to malloc on +// 32-bit Linux returning values aligned to 8 bytes, and offsetof(array, QString::Data) == 18. +// +// The profile on 64-bit will be different since offsetof(array, QString::Data) == 26. +// +// The benchmark results were, for a Core-i7 @ 2.67 GHz 32-bit, compiled with -O3 -funroll-loops: +// 16-bit loads only: 872,301 CPU ticks [Qt 4.5 / memcmp] +// 32- and 16-bit loads: 773,362 CPU ticks [Qt 4.6] +// SSE2 "movdqu" 128-bit loads: 618,736 CPU ticks +// SSE3 "lddqu" 128-bit loads: 619,954 CPU ticks +// SSSE3 "palignr" corrections: 852,147 CPU ticks +// SSE4.2 "pcmpestrm": 738,702 CPU ticks +// +// The same benchmark on an Atom N450 @ 1.66 GHz, is: +// 16-bit loads only: 2,185,882 CPU ticks +// 32- and 16-bit loads: 1,805,060 CPU ticks +// SSE2 "movdqu" 128-bit loads: 2,529,843 CPU ticks +// SSE3 "lddqu" 128-bit loads: 2,514,858 CPU ticks +// SSSE3 "palignr" corrections: 2,160,325 CPU ticks +// SSE4.2 not available +// +// The conclusion we reach is that alignment the SSE2 unaligned code can gain +// 20% improvement in performance in some systems, but suffers a penalty due +// to the unaligned loads on others. + static bool qMemEquals(const quint16 *a, const quint16 *b, int length) { - // Benchmarking indicates that doing memcmp is much slower than - // executing the comparison ourselves. - // To make it even faster, we do a 32-bit comparison, comparing - // twice the amount of data as a normal word-by-word comparison. - // - // Benchmarking results on a 2.33 GHz Core2 Duo, with a 64-QChar - // block of data, with 4194304 iterations (per iteration): - // operation usec cpu ticks - // memcmp 330 710 - // 16-bit 79 167-171 - // 32-bit aligned 49 105-109 - // - // Testing also indicates that unaligned 32-bit loads are as - // performant as 32-bit aligned. if (a == b || !length) return true; diff --git a/src/corelib/tools/qstring.h b/src/corelib/tools/qstring.h index e52f59f..06e4d47 100644 --- a/src/corelib/tools/qstring.h +++ b/src/corelib/tools/qstring.h @@ -614,6 +614,7 @@ private: ushort asciiCache : 1; ushort capacity : 1; ushort reserved : 11; + // ### Qt5: try to ensure that "array" is aligned to 16 bytes on both 32- and 64-bit ushort array[1]; }; static Data shared_null; |