Update the x86 feature-detection code, unifying x86 and x86-64
Most of it is the same for both of them, so let's avoid doing everything twice. Or more, since we may support x32 soon. For Windows, use the intrinsics. For GCC, we'd like to use cpuid.h, but it only exists since GCC 4.3, so we can't. And properly detect AVX support: it's not enough to detect that the processor supports them, we also need to check that the OS enabled support for the 256-bit registers. Change-Id: Ibb4872cdb774de5701b18c40f4e612330a266214 Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com> Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
This commit is contained in:
parent
01ee098531
commit
dc6d1c6c3f
@ -1,6 +1,7 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
|
||||
** Copyright (C) 2012 Intel Corporation.
|
||||
** Contact: http://www.qt-project.org/
|
||||
**
|
||||
** This file is part of the QtCore module of the Qt Toolkit.
|
||||
@ -145,17 +146,27 @@ static inline uint detectProcessorFeatures()
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(Q_PROCESSOR_X86_32)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
uint features = 0;
|
||||
#elif defined(Q_PROCESSOR_X86)
|
||||
|
||||
unsigned int extended_result = 0;
|
||||
unsigned int feature_result = 0;
|
||||
uint result = 0;
|
||||
/* see p. 118 of amd64 instruction set manual Vol3 */
|
||||
#if defined(Q_CC_GNU)
|
||||
long cpuid_supported, tmp1;
|
||||
#ifdef Q_PROCESSOR_X86_32
|
||||
# define PICreg "%%ebx"
|
||||
#else
|
||||
# define PICreg "%%rbx"
|
||||
#endif
|
||||
|
||||
static int maxBasicCpuidSupported()
|
||||
{
|
||||
#ifdef Q_OS_WIN
|
||||
// Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
|
||||
int info[4];
|
||||
__cpuid(info, 0);
|
||||
return info[0];
|
||||
#elif defined(Q_CC_GNU)
|
||||
long tmp1;
|
||||
|
||||
# ifdef Q_PROCESSOR_X86_32
|
||||
// check if the CPUID instruction is supported
|
||||
long cpuid_supported;
|
||||
asm ("pushf\n"
|
||||
"pop %0\n"
|
||||
"mov %0, %1\n"
|
||||
@ -167,132 +178,100 @@ static inline uint detectProcessorFeatures()
|
||||
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
|
||||
: "=a" (cpuid_supported), "=r" (tmp1)
|
||||
);
|
||||
if (cpuid_supported) {
|
||||
asm ("xchg %%ebx, %2\n"
|
||||
"cpuid\n"
|
||||
"xchg %%ebx, %2\n"
|
||||
: "=&c" (feature_result), "=d" (result), "=&r" (tmp1)
|
||||
: "a" (1));
|
||||
if (!cpuid_supported)
|
||||
return 0;
|
||||
# endif
|
||||
|
||||
asm ("xchg %%ebx, %1\n"
|
||||
"cpuid\n"
|
||||
"cmp $0x80000000, %%eax\n"
|
||||
"jnbe 1f\n"
|
||||
"xor %0, %0\n"
|
||||
"jmp 2f\n"
|
||||
"1:\n"
|
||||
"mov $0x80000001, %%eax\n"
|
||||
"cpuid\n"
|
||||
"2:\n"
|
||||
"xchg %%ebx, %1\n"
|
||||
: "=&d" (extended_result), "=&r" (tmp1)
|
||||
: "a" (0x80000000)
|
||||
: "%ecx"
|
||||
);
|
||||
}
|
||||
int result;
|
||||
asm ("xchg "PICreg", %1\n"
|
||||
"cpuid\n"
|
||||
"xchg "PICreg", %1\n"
|
||||
: "=&a" (result), "=&r" (tmp1)
|
||||
: "0" (0)
|
||||
: "ecx", "edx");
|
||||
return result;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#elif defined (Q_OS_WIN)
|
||||
_asm {
|
||||
push eax
|
||||
push ebx
|
||||
push ecx
|
||||
push edx
|
||||
pushfd
|
||||
pop eax
|
||||
mov ebx, eax
|
||||
xor eax, 00200000h
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
mov edx, 0
|
||||
xor eax, ebx
|
||||
jz skip
|
||||
static void cpuidFeatures01(uint &ecx, uint &edx)
|
||||
{
|
||||
#ifdef Q_OS_WIN
|
||||
int info[4];
|
||||
__cpuid(info, 1);
|
||||
ecx = info[2];
|
||||
edx = info[3];
|
||||
#elif defined(Q_CC_GNU)
|
||||
long tmp1;
|
||||
asm ("xchg "PICreg", %2\n"
|
||||
"cpuid\n"
|
||||
"xchg "PICreg", %2\n"
|
||||
: "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
|
||||
: "a" (1));
|
||||
#endif
|
||||
}
|
||||
|
||||
mov eax, 1
|
||||
cpuid
|
||||
mov result, edx
|
||||
mov feature_result, ecx
|
||||
skip:
|
||||
pop edx
|
||||
pop ecx
|
||||
pop ebx
|
||||
pop eax
|
||||
}
|
||||
static void cpuidFeatures07_00(uint &ebx)
|
||||
{
|
||||
#ifdef Q_OS_WIN
|
||||
int info[4];
|
||||
__cpuidex(info, 7, 0);
|
||||
ebx = info[1];
|
||||
#elif defined(Q_CC_GNU)
|
||||
unsigned long rbx; // in case it's 64-bit
|
||||
asm ("xchg "PICreg", %0\n"
|
||||
"cpuid\n"
|
||||
"xchg "PICreg", %0\n"
|
||||
: "=&r" (rbx)
|
||||
: "a" (7), "c" (0)
|
||||
: "%edx");
|
||||
ebx = rbx;
|
||||
#endif
|
||||
}
|
||||
|
||||
_asm {
|
||||
push eax
|
||||
push ebx
|
||||
push ecx
|
||||
push edx
|
||||
pushfd
|
||||
pop eax
|
||||
mov ebx, eax
|
||||
xor eax, 00200000h
|
||||
push eax
|
||||
popfd
|
||||
pushfd
|
||||
pop eax
|
||||
mov edx, 0
|
||||
xor eax, ebx
|
||||
jz skip2
|
||||
|
||||
mov eax, 80000000h
|
||||
cpuid
|
||||
cmp eax, 80000000h
|
||||
jbe skip2
|
||||
mov eax, 80000001h
|
||||
cpuid
|
||||
mov extended_result, edx
|
||||
skip2:
|
||||
pop edx
|
||||
pop ecx
|
||||
pop ebx
|
||||
pop eax
|
||||
}
|
||||
#ifdef Q_OS_WIN
|
||||
namespace QtXgetbvHack {
|
||||
inline quint64 _xgetbv(int) { return 0; }
|
||||
}
|
||||
using namespace QtXgetbvHack;
|
||||
#endif
|
||||
|
||||
static void xgetbv(int in, uint &eax, uint &edx)
|
||||
{
|
||||
#ifdef Q_OS_WIN
|
||||
quint64 result = _xgetbv(in);
|
||||
eax = result;
|
||||
edx = result >> 32;
|
||||
#elif defined(Q_CC_GNU)
|
||||
asm ("xgetbv"
|
||||
: "=a" (eax), "=d" (edx)
|
||||
: "c" (in));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
uint features = 0;
|
||||
if (maxBasicCpuidSupported() < 1)
|
||||
return 0;
|
||||
|
||||
#if defined(Q_PROCESSOR_X86_32)
|
||||
unsigned int feature_result = 0;
|
||||
uint result = 0;
|
||||
cpuidFeatures01(feature_result, result);
|
||||
|
||||
// result now contains the standard feature bits
|
||||
if (result & (1u << 26))
|
||||
features |= SSE2;
|
||||
if (feature_result & (1u))
|
||||
features |= SSE3;
|
||||
if (feature_result & (1u << 9))
|
||||
features |= SSSE3;
|
||||
if (feature_result & (1u << 19))
|
||||
features |= SSE4_1;
|
||||
if (feature_result & (1u << 20))
|
||||
features |= SSE4_2;
|
||||
if (feature_result & (1u << 28))
|
||||
features |= AVX;
|
||||
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(Q_PROCESSOR_X86_64) || defined(Q_OS_WIN64)
|
||||
static inline uint detectProcessorFeatures()
|
||||
{
|
||||
uint features = SSE2;
|
||||
uint feature_result = 0;
|
||||
|
||||
#if defined (Q_OS_WIN64)
|
||||
{
|
||||
int info[4];
|
||||
__cpuid(info, 1);
|
||||
feature_result = info[2];
|
||||
}
|
||||
#elif defined(Q_CC_GNU)
|
||||
quint64 tmp;
|
||||
asm ("xchg %%rbx, %1\n"
|
||||
"cpuid\n"
|
||||
"xchg %%rbx, %1\n"
|
||||
: "=&c" (feature_result), "=&r" (tmp)
|
||||
: "a" (1)
|
||||
: "%edx"
|
||||
);
|
||||
#else
|
||||
// x86-64 or x32
|
||||
features = SSE2;
|
||||
uint feature_result = 0, tmp;
|
||||
cpuidFeatures01(feature_result, tmp);
|
||||
#endif
|
||||
|
||||
// common part between 32- and 64-bit
|
||||
if (feature_result & (1u))
|
||||
features |= SSE3;
|
||||
if (feature_result & (1u << 9))
|
||||
@ -301,8 +280,17 @@ static inline uint detectProcessorFeatures()
|
||||
features |= SSE4_1;
|
||||
if (feature_result & (1u << 20))
|
||||
features |= SSE4_2;
|
||||
if (feature_result & (1u << 28))
|
||||
features |= AVX;
|
||||
uint xgetbvA = 0, xgetbvD = 0;
|
||||
if (feature_result & (1u << 27)) {
|
||||
// XGETBV enabled
|
||||
xgetbv(0, xgetbvA, xgetbvD);
|
||||
}
|
||||
|
||||
if ((xgetbvA & 6) == 6) {
|
||||
// support for YMM and XMM registers is enabled
|
||||
if (feature_result & (1u << 28))
|
||||
features |= AVX;
|
||||
}
|
||||
|
||||
return features;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user