diff options
Diffstat (limited to 'src/corelib/tools')
-rw-r--r-- | src/corelib/tools/qsimd.cpp | 82 | ||||
-rw-r--r-- | src/corelib/tools/qsimd_p.h | 21 |
2 files changed, 97 insertions, 6 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp index aa2ee47..5aa7217 100644 --- a/src/corelib/tools/qsimd.cpp +++ b/src/corelib/tools/qsimd.cpp @@ -91,6 +91,7 @@ uint qDetectCPUFeatures() features = MMX|SSE|SSE2; #elif defined(__i386__) || defined(_M_IX86) unsigned int extended_result = 0; + unsigned int feature_result = 0; uint result = 0; /* see p. 118 of amd64 instruction set manual Vol3 */ #if defined(Q_CC_GNU) @@ -112,7 +113,8 @@ uint qDetectCPUFeatures() "1:\n" "pop %%ebx\n" "mov %%edx, %0\n" - : "=r" (result) + "mov %%ecx, %1\n" + : "=r" (result), "=r" (feature_result) : : "%eax", "%ecx", "%edx" ); @@ -164,6 +166,7 @@ uint qDetectCPUFeatures() mov eax, 1 cpuid mov result, edx + mov feature_result, ecx skip: pop edx pop ecx @@ -218,19 +221,86 @@ uint qDetectCPUFeatures() features |= SSE; if (result & (1u << 26)) features |= SSE2; - if (extended_result & (1u)) + if (feature_result & (1u)) features |= SSE3; - if (extended_result & (1u << 9)) + if (feature_result & (1u << 9)) features |= SSSE3; - if (extended_result & (1u << 19)) + if (feature_result & (1u << 19)) features |= SSE4_1; - if (extended_result & (1u << 20)) + if (feature_result & (1u << 20)) features |= SSE4_2; - if (extended_result & (1u << 28)) + if (feature_result & (1u << 28)) features |= AVX; #endif // i386 +#if defined(__x86_64__) || defined(Q_OS_WIN64) + uint feature_result = 0; + +#if defined(Q_CC_GNU) + asm ("push %%rbx\n" + "pushf\n" + "pop %%rax\n" + "mov %%eax, %%ebx\n" + "xor $0x00200000, %%eax\n" + "push %%rax\n" + "popf\n" + "pushf\n" + "pop %%rax\n" + "xor %%edx, %%edx\n" + "xor %%ebx, %%eax\n" + "jz 1f\n" + + "mov $0x00000001, %%eax\n" + "cpuid\n" + "1:\n" + "pop %%rbx\n" + "mov %%ecx, %0\n" + : "=r" (feature_result) + : + : "%eax", "%ecx", "%edx" + ); +#elif defined (Q_OS_WIN64) + _asm { + push rax + push rbx + push rcx + push rdx + pushfd + pop rax + mov ebx, eax + xor eax, 00200000h + push rax + popfd + pushfd + pop rax + mov edx, 0 + xor eax, ebx + jz skip + + mov eax, 1 + cpuid + mov feature_result, ecx + skip: + pop rdx + pop rcx + pop rbx + pop rax + } +#endif + + if (feature_result & (1u)) + features |= SSE3; + if (feature_result & (1u << 9)) + features |= SSSE3; + if (feature_result & (1u << 19)) + features |= SSE4_1; + if (feature_result & (1u << 20)) + features |= SSE4_2; + if (feature_result & (1u << 28)) + features |= AVX; +#endif // x86_64 + #if defined(QT_HAVE_MMX) if (qgetenv("QT_NO_MMX").toInt()) features ^= MMX; diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h index 18394853..5ff0f97 100644 --- a/src/corelib/tools/qsimd_p.h +++ b/src/corelib/tools/qsimd_p.h @@ -72,6 +72,27 @@ QT_BEGIN_HEADER # include <emmintrin.h> #endif +// SSE3 intrinsics +#if defined(QT_HAVE_SSE3) && (defined(__SSE3__) || defined(Q_CC_MSVC)) +#include <pmmintrin.h> +#endif + +// SSSE3 intrinsics +#if defined(QT_HAVE_SSSE3) && (defined(__SSSE3__) || defined(Q_CC_MSVC)) +#include <tmmintrin.h> +#endif + +// SSE4.1 and SSE4.2 intrinsics +#if (defined(QT_HAVE_SSE4_1) || defined(QT_HAVE_SSE4_2)) && (defined(__SSE4_1__) || defined(Q_CC_MSVC)) +#include <smmintrin.h> +#endif + +// AVX intrinsics +#if defined(QT_HAVE_AVX) && (defined(__AVX__) || defined(Q_CC_MSVC)) +#include <immintrin.h> +#endif + + #if !defined(QT_BOOTSTRAPPED) && (!defined(Q_CC_MSVC) || (defined(_M_X64) || _M_IX86_FP == 2)) #define QT_ALWAYS_HAVE_SSE2 #endif |