diff options
Diffstat (limited to 'src/corelib/tools/qsimd.cpp')
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 306 |
1 files changed, 200 insertions, 106 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index 8005d7d..7babf3a 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -41,6 +41,7 @@ #include "qsimd_p.h" #include <QByteArray> +#include <stdio.h> #if defined(Q_OS_WINCE) #include <windows.h> @@ -50,15 +51,32 @@ #include <intrin.h> #endif +#if defined(Q_OS_LINUX) && defined(__arm__) +#include "private/qcore_unix_p.h" + +// the kernel header definitions for HWCAP_* +// (the ones we need/may need anyway) + +// copied from <asm/hwcap.h> (ARM) +#define HWCAP_IWMMXT 512 +#define HWCAP_CRUNCH 1024 +#define HWCAP_THUMBEE 2048 +#define HWCAP_NEON 4096 +#define HWCAP_VFPv3 8192 +#define HWCAP_VFPv3D16 16384 + +// copied from <linux/auxvec.h> +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ + +#endif + QT_BEGIN_NAMESPACE -uint qDetectCPUFeatures() +#if defined (Q_OS_WINCE) +static inline uint detectProcessorFeatures() { - static uint features = 0xffffffff; - if (features != 0xffffffff) - return features; + uint features = 0; -#if defined (Q_OS_WINCE) #if defined (ARM) if (IsProcessorFeaturePresent(PF_ARM_INTEL_WMMX)) { features = IWMMXT; @@ -78,77 +96,98 @@ uint qDetectCPUFeatures() #endif features = 0; return features; -#elif defined(QT_HAVE_IWMMXT) +} + +#elif defined(__arm__) || defined(__arm) || defined(QT_HAVE_IWMMXT) || defined(QT_HAVE_NEON) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + +#if defined(Q_OS_LINUX) + int auxv = ::qt_safe_open("/proc/self/auxv", O_RDONLY); + if (auxv != -1) { + unsigned long vector[64]; + int nread; + while (features == 0) { + nread = ::qt_safe_read(auxv, (char *)vector, sizeof vector); + if (nread <= 0) { + // EOF or error + break; + } + + int max = nread / (sizeof vector[0]); + for (int i = 0; i < max; i += 2) + if (vector[i] == AT_HWCAP) { + if (vector[i+1] & HWCAP_IWMMXT) + features |= IWMMXT; + if (vector[i+1] & HWCAP_NEON) + features |= NEON; + break; + } + } + + ::qt_safe_close(auxv); + return features; + } + // fall back if /proc/self/auxv wasn't found +#endif + +#if defined(QT_HAVE_IWMMXT) // runtime detection only available when running as a previlegied process - static const bool doIWMMXT = !qgetenv("QT_NO_IWMMXT").toInt(); - features = doIWMMXT ? IWMMXT : 0; - return features; + features = IWMMXT; #elif defined(QT_HAVE_NEON) - static const bool doNEON = !qgetenv("QT_NO_NEON").toInt(); - features = doNEON ? NEON : 0; + features = NEON; +#endif + return features; -#else - features = 0; -#if defined(__x86_64__) || defined(Q_OS_WIN64) - features = MMX|SSE|SSE2|CMOV; -#elif defined(__ia64__) - features = MMX|SSE|SSE2; +} + #elif defined(__i386__) || defined(_M_IX86) +static inline uint detectProcessorFeatures() +{ + uint features = 0; + unsigned int extended_result = 0; unsigned int feature_result = 0; uint result = 0; /* see p. 118 of amd64 instruction set manual Vol3 */ #if defined(Q_CC_GNU) - asm ("push %%ebx\n" - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ebx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" + long cpuid_supported, tmp1; + asm ("pushf\n" + "pop %0\n" + "mov %0, %1\n" + "xor $0x00200000, %0\n" + "push %0\n" "popf\n" "pushf\n" - "pop %%eax\n" - "xor %%edx, %%edx\n" - "xor %%ebx, %%eax\n" - "jz 1f\n" - - "mov $0x00000001, %%eax\n" - "cpuid\n" - "1:\n" - "pop %%ebx\n" - "mov %%edx, %0\n" - "mov %%ecx, %1\n" - : "=r" (result), "=r" (feature_result) - : - : "%eax", "%ecx", "%edx" - ); + "pop %0\n" + "xor %1, %0\n" // %eax is now 0 if CPUID is not supported + : "=a" (cpuid_supported), "=r" (tmp1) + ); + if (cpuid_supported) { + asm ("xchg %%ebx, %2\n" + "cpuid\n" + "xchg %%ebx, %2\n" + : "=c" (feature_result), "=d" (result), "=&r" (tmp1) + : "a" (1)); - asm ("push %%ebx\n" - "pushf\n" - "pop %%eax\n" - "mov %%eax, %%ebx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "xor %%edx, %%edx\n" - "xor %%ebx, %%eax\n" - "jz 2f\n" + asm ("xchg %%ebx, %1\n" + "cpuid\n" + "cmp $0x80000000, %%eax\n" + "jnbe 1f\n" + "xor %0, %0\n" + "jmp 2f\n" + "1:\n" + "mov $0x80000001, %%eax\n" + "cpuid\n" + "2:\n" + "xchg %%ebx, %1\n" + : "=d" (extended_result), "=&r" (tmp1) + : "a" (0x80000000) + : "%ecx" + ); + } - "mov $0x80000000, %%eax\n" - "cpuid\n" - "cmp $0x80000000, %%eax\n" - "jbe 2f\n" - "mov $0x80000001, %%eax\n" - "cpuid\n" - "2:\n" - "pop %%ebx\n" - "mov %%edx, %0\n" - : "=r" (extended_result) - : - : "%eax", "%ecx", "%edx" - ); #elif defined (Q_OS_WIN) _asm { push eax @@ -210,6 +249,7 @@ uint qDetectCPUFeatures() } #endif + // result now contains the standard feature bits if (result & (1u << 15)) features |= CMOV; @@ -236,33 +276,23 @@ uint qDetectCPUFeatures() if (feature_result & (1u << 28)) features |= AVX; -#endif // i386 + return features; +} -#if defined(__x86_64__) || defined(Q_OS_WIN64) +#elif defined(__x86_64) || defined(Q_OS_WIN64) +static inline uint detectProcessorFeatures() +{ + uint features = MMX|SSE|SSE2|CMOV; uint feature_result = 0; #if defined(Q_CC_GNU) - asm ("push %%rbx\n" - "pushf\n" - "pop %%rax\n" - "mov %%eax, %%ebx\n" - "xor $0x00200000, %%eax\n" - "push %%rax\n" - "popf\n" - "pushf\n" - "pop %%rax\n" - "xor %%edx, %%edx\n" - "xor %%ebx, %%eax\n" - "jz 1f\n" - - "mov $0x00000001, %%eax\n" + long tmp; + asm ("xchg %%rbx, %1\n" "cpuid\n" - "1:\n" - "pop %%rbx\n" - "mov %%ecx, %0\n" - : "=r" (feature_result) - : - : "%eax", "%ecx", "%edx" + "xchg %%rbx, %1\n" + : "=c" (feature_result), "=&r" (tmp) + : "a" (1) + : "%edx" ); #elif defined (Q_OS_WIN64) { @@ -282,33 +312,97 @@ uint qDetectCPUFeatures() features |= SSE4_2; if (feature_result & (1u << 28)) features |= AVX; -#endif // x86_64 -#if defined(QT_HAVE_MMX) - if (qgetenv("QT_NO_MMX").toInt()) - features ^= MMX; -#endif - if (qgetenv("QT_NO_MMXEXT").toInt()) - features ^= MMXEXT; + return features; +} -#if defined(QT_HAVE_3DNOW) - if (qgetenv("QT_NO_3DNOW").toInt()) - features ^= MMX3DNOW; -#endif - if (qgetenv("QT_NO_3DNOWEXT").toInt()) - features ^= MMX3DNOWEXT; +#elif defined(__ia64__) +static inline uint detectProcessorFeatures() +{ + return MMX|SSE|SSE2; +} -#if defined(QT_HAVE_SSE) - if (qgetenv("QT_NO_SSE").toInt()) - features ^= SSE; -#endif -#if defined(QT_HAVE_SSE2) - if (qgetenv("QT_NO_SSE2").toInt()) - features ^= SSE2; +#else +static inline uint detectProcessorFeatures() +{ + return 0; +} #endif +/* + * Use kdesdk/scripts/generate_string_table.pl to update the table below. + * Here's the data (don't forget the ONE leading space): + mmx + mmxext + mmx3dnow + mmx3dnowext + sse + sse2 + cmov + iwmmxt + neon + sse3 + ssse3 + sse4.1 + sse4.2 + avx + */ + +// begin generated +static const char features_string[] = + " mmx\0" + " mmxext\0" + " mmx3dnow\0" + " mmx3dnowext\0" + " sse\0" + " sse2\0" + " cmov\0" + " iwmmxt\0" + " neon\0" + " sse3\0" + " ssse3\0" + " sse4.1\0" + " sse4.2\0" + " avx\0" + "\0"; + +static const int features_indices[] = { + 0, 5, 13, 23, 36, 41, 47, 53, + 61, 67, 73, 80, 88, 96, -1 +}; +// end generated + +const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]); + +uint qDetectCPUFeatures() +{ + static QBasicAtomicInt features = Q_BASIC_ATOMIC_INITIALIZER(-1); + if (features != -1) + return features; + + uint f = detectProcessorFeatures(); + QByteArray disable = qgetenv("QT_NO_CPU_FEATURE"); + if (!disable.isEmpty()) { + disable.prepend(' '); + for (int i = 0; i < features_count; ++i) { + if (disable.contains(features_string + features_indices[i])) + f &= ~(1 << i); + } + } + + features = f; return features; -#endif +} + +void qDumpCPUFeatures() +{ + uint features = qDetectCPUFeatures(); + printf("Processor features: "); + for (int i = 0; i < features_count; ++i) { + if (features & (1 << i)) + printf("%s", features_string + features_indices[i]); + } + puts(""); } QT_END_NAMESPACE |