summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/tools/qsimd.cpp82
-rw-r--r--src/corelib/tools/qsimd_p.h21
2 files changed, 97 insertions, 6 deletions
diff --git a/src/corelib/tools/qsimd.cpp b/src/corelib/tools/qsimd.cpp
index aa2ee47..5aa7217 100644
--- a/src/corelib/tools/qsimd.cpp
+++ b/src/corelib/tools/qsimd.cpp
@@ -91,6 +91,7 @@ uint qDetectCPUFeatures()
features = MMX|SSE|SSE2;
#elif defined(__i386__) || defined(_M_IX86)
unsigned int extended_result = 0;
+ unsigned int feature_result = 0;
uint result = 0;
/* see p. 118 of amd64 instruction set manual Vol3 */
#if defined(Q_CC_GNU)
@@ -112,7 +113,8 @@ uint qDetectCPUFeatures()
"1:\n"
"pop %%ebx\n"
"mov %%edx, %0\n"
- : "=r" (result)
+ "mov %%ecx, %1\n"
+ : "=r" (result), "=r" (feature_result)
:
: "%eax", "%ecx", "%edx"
);
@@ -164,6 +166,7 @@ uint qDetectCPUFeatures()
mov eax, 1
cpuid
mov result, edx
+ mov feature_result, ecx
skip:
pop edx
pop ecx
@@ -218,19 +221,86 @@ uint qDetectCPUFeatures()
features |= SSE;
if (result & (1u << 26))
features |= SSE2;
- if (extended_result & (1u))
+ if (feature_result & (1u))
features |= SSE3;
- if (extended_result & (1u << 9))
+ if (feature_result & (1u << 9))
features |= SSSE3;
- if (extended_result & (1u << 19))
+ if (feature_result & (1u << 19))
features |= SSE4_1;
- if (extended_result & (1u << 20))
+ if (feature_result & (1u << 20))
features |= SSE4_2;
- if (extended_result & (1u << 28))
+ if (feature_result & (1u << 28))
features |= AVX;
#endif // i386
+#if defined(__x86_64__) || defined(Q_OS_WIN64)
+ uint feature_result = 0;
+
+#if defined(Q_CC_GNU)
+ asm ("push %%rbx\n"
+ "pushf\n"
+ "pop %%rax\n"
+ "mov %%eax, %%ebx\n"
+ "xor $0x00200000, %%eax\n"
+ "push %%rax\n"
+ "popf\n"
+ "pushf\n"
+ "pop %%rax\n"
+ "xor %%edx, %%edx\n"
+ "xor %%ebx, %%eax\n"
+ "jz 1f\n"
+
+ "mov $0x00000001, %%eax\n"
+ "cpuid\n"
+ "1:\n"
+ "pop %%rbx\n"
+ "mov %%ecx, %0\n"
+ : "=r" (feature_result)
+ :
+ : "%eax", "%ecx", "%edx"
+ );
+#elif defined (Q_OS_WIN64)
+ _asm {
+ push rax
+ push rbx
+ push rcx
+ push rdx
+ pushfd
+ pop rax
+ mov ebx, eax
+ xor eax, 00200000h
+ push rax
+ popfd
+ pushfd
+ pop rax
+ mov edx, 0
+ xor eax, ebx
+ jz skip
+
+ mov eax, 1
+ cpuid
+ mov feature_result, ecx
+ skip:
+ pop rdx
+ pop rcx
+ pop rbx
+ pop rax
+ }
+#endif
+
+ if (feature_result & (1u))
+ features |= SSE3;
+ if (feature_result & (1u << 9))
+ features |= SSSE3;
+ if (feature_result & (1u << 19))
+ features |= SSE4_1;
+ if (feature_result & (1u << 20))
+ features |= SSE4_2;
+ if (feature_result & (1u << 28))
+ features |= AVX;
+#endif // x86_64
+
#if defined(QT_HAVE_MMX)
if (qgetenv("QT_NO_MMX").toInt())
features ^= MMX;
diff --git a/src/corelib/tools/qsimd_p.h b/src/corelib/tools/qsimd_p.h
index 18394853..5ff0f97 100644
--- a/src/corelib/tools/qsimd_p.h
+++ b/src/corelib/tools/qsimd_p.h
@@ -72,6 +72,27 @@ QT_BEGIN_HEADER
# include <emmintrin.h>
#endif
+// SSE3 intrinsics
+#if defined(QT_HAVE_SSE3) && (defined(__SSE3__) || defined(Q_CC_MSVC))
+#include <pmmintrin.h>
+#endif
+
+// SSSE3 intrinsics
+#if defined(QT_HAVE_SSSE3) && (defined(__SSSE3__) || defined(Q_CC_MSVC))
+#include <tmmintrin.h>
+#endif
+
+// SSE4.1 and SSE4.2 intrinsics
+#if (defined(QT_HAVE_SSE4_1) || defined(QT_HAVE_SSE4_2)) && (defined(__SSE4_1__) || defined(Q_CC_MSVC))
+#include <smmintrin.h>
+#endif
+
+// AVX intrinsics
+#if defined(QT_HAVE_AVX) && (defined(__AVX__) || defined(Q_CC_MSVC))
+#include <immintrin.h>
+#endif
+
+
#if !defined(QT_BOOTSTRAPPED) && (!defined(Q_CC_MSVC) || (defined(_M_X64) || _M_IX86_FP == 2))
#define QT_ALWAYS_HAVE_SSE2
#endif