Update the x86 feature-detection code, unifying x86 and x86-64
Most of it is the same for both of them, so let's avoid doing everything twice. Or more, since we may support x32 soon. For Windows, use the intrinsics. For GCC, we'd like to use cpuid.h, but it only exists since GCC 4.3, so we can't. And properly detect AVX support: it's not enough to detect that the processor supports them, we also need to check that the OS enabled support for the 256-bit registers. Change-Id: Ibb4872cdb774de5701b18c40f4e612330a266214 Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com> Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
This commit is contained in:
parent
01ee098531
commit
dc6d1c6c3f
@ -1,6 +1,7 @@
|
|||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
**
|
**
|
||||||
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
|
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
|
||||||
|
** Copyright (C) 2012 Intel Corporation.
|
||||||
** Contact: http://www.qt-project.org/
|
** Contact: http://www.qt-project.org/
|
||||||
**
|
**
|
||||||
** This file is part of the QtCore module of the Qt Toolkit.
|
** This file is part of the QtCore module of the Qt Toolkit.
|
||||||
@ -145,17 +146,27 @@ static inline uint detectProcessorFeatures()
|
|||||||
return features;
|
return features;
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif defined(Q_PROCESSOR_X86_32)
|
#elif defined(Q_PROCESSOR_X86)
|
||||||
static inline uint detectProcessorFeatures()
|
|
||||||
{
|
|
||||||
uint features = 0;
|
|
||||||
|
|
||||||
unsigned int extended_result = 0;
|
#ifdef Q_PROCESSOR_X86_32
|
||||||
unsigned int feature_result = 0;
|
# define PICreg "%%ebx"
|
||||||
uint result = 0;
|
#else
|
||||||
/* see p. 118 of amd64 instruction set manual Vol3 */
|
# define PICreg "%%rbx"
|
||||||
#if defined(Q_CC_GNU)
|
#endif
|
||||||
long cpuid_supported, tmp1;
|
|
||||||
|
static int maxBasicCpuidSupported()
|
||||||
|
{
|
||||||
|
#ifdef Q_OS_WIN
|
||||||
|
// Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
|
||||||
|
int info[4];
|
||||||
|
__cpuid(info, 0);
|
||||||
|
return info[0];
|
||||||
|
#elif defined(Q_CC_GNU)
|
||||||
|
long tmp1;
|
||||||
|
|
||||||
|
# ifdef Q_PROCESSOR_X86_32
|
||||||
|
// check if the CPUID instruction is supported
|
||||||
|
long cpuid_supported;
|
||||||
asm ("pushf\n"
|
asm ("pushf\n"
|
||||||
"pop %0\n"
|
"pop %0\n"
|
||||||
"mov %0, %1\n"
|
"mov %0, %1\n"
|
||||||
@ -167,132 +178,100 @@ static inline uint detectProcessorFeatures()
|
|||||||
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
|
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
|
||||||
: "=a" (cpuid_supported), "=r" (tmp1)
|
: "=a" (cpuid_supported), "=r" (tmp1)
|
||||||
);
|
);
|
||||||
if (cpuid_supported) {
|
if (!cpuid_supported)
|
||||||
asm ("xchg %%ebx, %2\n"
|
return 0;
|
||||||
"cpuid\n"
|
# endif
|
||||||
"xchg %%ebx, %2\n"
|
|
||||||
: "=&c" (feature_result), "=d" (result), "=&r" (tmp1)
|
|
||||||
: "a" (1));
|
|
||||||
|
|
||||||
asm ("xchg %%ebx, %1\n"
|
int result;
|
||||||
"cpuid\n"
|
asm ("xchg "PICreg", %1\n"
|
||||||
"cmp $0x80000000, %%eax\n"
|
"cpuid\n"
|
||||||
"jnbe 1f\n"
|
"xchg "PICreg", %1\n"
|
||||||
"xor %0, %0\n"
|
: "=&a" (result), "=&r" (tmp1)
|
||||||
"jmp 2f\n"
|
: "0" (0)
|
||||||
"1:\n"
|
: "ecx", "edx");
|
||||||
"mov $0x80000001, %%eax\n"
|
return result;
|
||||||
"cpuid\n"
|
#else
|
||||||
"2:\n"
|
return 0;
|
||||||
"xchg %%ebx, %1\n"
|
#endif
|
||||||
: "=&d" (extended_result), "=&r" (tmp1)
|
}
|
||||||
: "a" (0x80000000)
|
|
||||||
: "%ecx"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined (Q_OS_WIN)
|
static void cpuidFeatures01(uint &ecx, uint &edx)
|
||||||
_asm {
|
{
|
||||||
push eax
|
#ifdef Q_OS_WIN
|
||||||
push ebx
|
int info[4];
|
||||||
push ecx
|
__cpuid(info, 1);
|
||||||
push edx
|
ecx = info[2];
|
||||||
pushfd
|
edx = info[3];
|
||||||
pop eax
|
#elif defined(Q_CC_GNU)
|
||||||
mov ebx, eax
|
long tmp1;
|
||||||
xor eax, 00200000h
|
asm ("xchg "PICreg", %2\n"
|
||||||
push eax
|
"cpuid\n"
|
||||||
popfd
|
"xchg "PICreg", %2\n"
|
||||||
pushfd
|
: "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
|
||||||
pop eax
|
: "a" (1));
|
||||||
mov edx, 0
|
#endif
|
||||||
xor eax, ebx
|
}
|
||||||
jz skip
|
|
||||||
|
|
||||||
mov eax, 1
|
static void cpuidFeatures07_00(uint &ebx)
|
||||||
cpuid
|
{
|
||||||
mov result, edx
|
#ifdef Q_OS_WIN
|
||||||
mov feature_result, ecx
|
int info[4];
|
||||||
skip:
|
__cpuidex(info, 7, 0);
|
||||||
pop edx
|
ebx = info[1];
|
||||||
pop ecx
|
#elif defined(Q_CC_GNU)
|
||||||
pop ebx
|
unsigned long rbx; // in case it's 64-bit
|
||||||
pop eax
|
asm ("xchg "PICreg", %0\n"
|
||||||
}
|
"cpuid\n"
|
||||||
|
"xchg "PICreg", %0\n"
|
||||||
|
: "=&r" (rbx)
|
||||||
|
: "a" (7), "c" (0)
|
||||||
|
: "%edx");
|
||||||
|
ebx = rbx;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
_asm {
|
#ifdef Q_OS_WIN
|
||||||
push eax
|
namespace QtXgetbvHack {
|
||||||
push ebx
|
inline quint64 _xgetbv(int) { return 0; }
|
||||||
push ecx
|
}
|
||||||
push edx
|
using namespace QtXgetbvHack;
|
||||||
pushfd
|
|
||||||
pop eax
|
|
||||||
mov ebx, eax
|
|
||||||
xor eax, 00200000h
|
|
||||||
push eax
|
|
||||||
popfd
|
|
||||||
pushfd
|
|
||||||
pop eax
|
|
||||||
mov edx, 0
|
|
||||||
xor eax, ebx
|
|
||||||
jz skip2
|
|
||||||
|
|
||||||
mov eax, 80000000h
|
|
||||||
cpuid
|
|
||||||
cmp eax, 80000000h
|
|
||||||
jbe skip2
|
|
||||||
mov eax, 80000001h
|
|
||||||
cpuid
|
|
||||||
mov extended_result, edx
|
|
||||||
skip2:
|
|
||||||
pop edx
|
|
||||||
pop ecx
|
|
||||||
pop ebx
|
|
||||||
pop eax
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void xgetbv(int in, uint &eax, uint &edx)
|
||||||
|
{
|
||||||
|
#ifdef Q_OS_WIN
|
||||||
|
quint64 result = _xgetbv(in);
|
||||||
|
eax = result;
|
||||||
|
edx = result >> 32;
|
||||||
|
#elif defined(Q_CC_GNU)
|
||||||
|
asm ("xgetbv"
|
||||||
|
: "=a" (eax), "=d" (edx)
|
||||||
|
: "c" (in));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint detectProcessorFeatures()
|
||||||
|
{
|
||||||
|
uint features = 0;
|
||||||
|
if (maxBasicCpuidSupported() < 1)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#if defined(Q_PROCESSOR_X86_32)
|
||||||
|
unsigned int feature_result = 0;
|
||||||
|
uint result = 0;
|
||||||
|
cpuidFeatures01(feature_result, result);
|
||||||
|
|
||||||
// result now contains the standard feature bits
|
// result now contains the standard feature bits
|
||||||
if (result & (1u << 26))
|
if (result & (1u << 26))
|
||||||
features |= SSE2;
|
features |= SSE2;
|
||||||
if (feature_result & (1u))
|
#else
|
||||||
features |= SSE3;
|
// x86-64 or x32
|
||||||
if (feature_result & (1u << 9))
|
features = SSE2;
|
||||||
features |= SSSE3;
|
uint feature_result = 0, tmp;
|
||||||
if (feature_result & (1u << 19))
|
cpuidFeatures01(feature_result, tmp);
|
||||||
features |= SSE4_1;
|
|
||||||
if (feature_result & (1u << 20))
|
|
||||||
features |= SSE4_2;
|
|
||||||
if (feature_result & (1u << 28))
|
|
||||||
features |= AVX;
|
|
||||||
|
|
||||||
return features;
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined(Q_PROCESSOR_X86_64) || defined(Q_OS_WIN64)
|
|
||||||
static inline uint detectProcessorFeatures()
|
|
||||||
{
|
|
||||||
uint features = SSE2;
|
|
||||||
uint feature_result = 0;
|
|
||||||
|
|
||||||
#if defined (Q_OS_WIN64)
|
|
||||||
{
|
|
||||||
int info[4];
|
|
||||||
__cpuid(info, 1);
|
|
||||||
feature_result = info[2];
|
|
||||||
}
|
|
||||||
#elif defined(Q_CC_GNU)
|
|
||||||
quint64 tmp;
|
|
||||||
asm ("xchg %%rbx, %1\n"
|
|
||||||
"cpuid\n"
|
|
||||||
"xchg %%rbx, %1\n"
|
|
||||||
: "=&c" (feature_result), "=&r" (tmp)
|
|
||||||
: "a" (1)
|
|
||||||
: "%edx"
|
|
||||||
);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// common part between 32- and 64-bit
|
||||||
if (feature_result & (1u))
|
if (feature_result & (1u))
|
||||||
features |= SSE3;
|
features |= SSE3;
|
||||||
if (feature_result & (1u << 9))
|
if (feature_result & (1u << 9))
|
||||||
@ -301,8 +280,17 @@ static inline uint detectProcessorFeatures()
|
|||||||
features |= SSE4_1;
|
features |= SSE4_1;
|
||||||
if (feature_result & (1u << 20))
|
if (feature_result & (1u << 20))
|
||||||
features |= SSE4_2;
|
features |= SSE4_2;
|
||||||
if (feature_result & (1u << 28))
|
uint xgetbvA = 0, xgetbvD = 0;
|
||||||
features |= AVX;
|
if (feature_result & (1u << 27)) {
|
||||||
|
// XGETBV enabled
|
||||||
|
xgetbv(0, xgetbvA, xgetbvD);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((xgetbvA & 6) == 6) {
|
||||||
|
// support for YMM and XMM registers is enabled
|
||||||
|
if (feature_result & (1u << 28))
|
||||||
|
features |= AVX;
|
||||||
|
}
|
||||||
|
|
||||||
return features;
|
return features;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user