Update the x86 feature-detection code, unifying x86 and x86-64

Most of it is the same for both of them, so let's avoid doing
everything twice. Or more, since we may support x32 soon. For Windows,
use the intrinsics. For GCC, we'd like to use cpuid.h, but it only
exists since GCC 4.3, so we can't.

And properly detect AVX support: it's not enough to detect that the
processor supports them, we also need to check that the OS enabled
support for the 256-bit registers.

Change-Id: Ibb4872cdb774de5701b18c40f4e612330a266214
Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com>
Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
This commit is contained in:
Thiago Macieira 2011-12-25 20:36:14 -02:00 committed by Qt by Nokia
parent 01ee098531
commit dc6d1c6c3f

View File

@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
** Copyright (C) 2012 Intel Corporation.
** Contact: http://www.qt-project.org/
**
** This file is part of the QtCore module of the Qt Toolkit.
@ -145,17 +146,27 @@ static inline uint detectProcessorFeatures()
return features;
}
#elif defined(Q_PROCESSOR_X86_32)
static inline uint detectProcessorFeatures()
{
uint features = 0;
#elif defined(Q_PROCESSOR_X86)
unsigned int extended_result = 0;
unsigned int feature_result = 0;
uint result = 0;
/* see p. 118 of amd64 instruction set manual Vol3 */
#if defined(Q_CC_GNU)
long cpuid_supported, tmp1;
#ifdef Q_PROCESSOR_X86_32
# define PICreg "%%ebx"
#else
# define PICreg "%%rbx"
#endif
static int maxBasicCpuidSupported()
{
#ifdef Q_OS_WIN
// Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
int info[4];
__cpuid(info, 0);
return info[0];
#elif defined(Q_CC_GNU)
long tmp1;
# ifdef Q_PROCESSOR_X86_32
// check if the CPUID instruction is supported
long cpuid_supported;
asm ("pushf\n"
"pop %0\n"
"mov %0, %1\n"
@ -167,132 +178,100 @@ static inline uint detectProcessorFeatures()
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
: "=a" (cpuid_supported), "=r" (tmp1)
);
if (cpuid_supported) {
asm ("xchg %%ebx, %2\n"
"cpuid\n"
"xchg %%ebx, %2\n"
: "=&c" (feature_result), "=d" (result), "=&r" (tmp1)
: "a" (1));
if (!cpuid_supported)
return 0;
# endif
asm ("xchg %%ebx, %1\n"
"cpuid\n"
"cmp $0x80000000, %%eax\n"
"jnbe 1f\n"
"xor %0, %0\n"
"jmp 2f\n"
"1:\n"
"mov $0x80000001, %%eax\n"
"cpuid\n"
"2:\n"
"xchg %%ebx, %1\n"
: "=&d" (extended_result), "=&r" (tmp1)
: "a" (0x80000000)
: "%ecx"
);
}
int result;
asm ("xchg "PICreg", %1\n"
"cpuid\n"
"xchg "PICreg", %1\n"
: "=&a" (result), "=&r" (tmp1)
: "0" (0)
: "ecx", "edx");
return result;
#else
return 0;
#endif
}
#elif defined (Q_OS_WIN)
_asm {
push eax
push ebx
push ecx
push edx
pushfd
pop eax
mov ebx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
mov edx, 0
xor eax, ebx
jz skip
static void cpuidFeatures01(uint &ecx, uint &edx)
{
#ifdef Q_OS_WIN
int info[4];
__cpuid(info, 1);
ecx = info[2];
edx = info[3];
#elif defined(Q_CC_GNU)
long tmp1;
asm ("xchg "PICreg", %2\n"
"cpuid\n"
"xchg "PICreg", %2\n"
: "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
: "a" (1));
#endif
}
mov eax, 1
cpuid
mov result, edx
mov feature_result, ecx
skip:
pop edx
pop ecx
pop ebx
pop eax
}
static void cpuidFeatures07_00(uint &ebx)
{
#ifdef Q_OS_WIN
int info[4];
__cpuidex(info, 7, 0);
ebx = info[1];
#elif defined(Q_CC_GNU)
unsigned long rbx; // in case it's 64-bit
asm ("xchg "PICreg", %0\n"
"cpuid\n"
"xchg "PICreg", %0\n"
: "=&r" (rbx)
: "a" (7), "c" (0)
: "%edx");
ebx = rbx;
#endif
}
_asm {
push eax
push ebx
push ecx
push edx
pushfd
pop eax
mov ebx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
mov edx, 0
xor eax, ebx
jz skip2
mov eax, 80000000h
cpuid
cmp eax, 80000000h
jbe skip2
mov eax, 80000001h
cpuid
mov extended_result, edx
skip2:
pop edx
pop ecx
pop ebx
pop eax
}
#ifdef Q_OS_WIN
namespace QtXgetbvHack {
inline quint64 _xgetbv(int) { return 0; }
}
using namespace QtXgetbvHack;
#endif
static void xgetbv(int in, uint &eax, uint &edx)
{
#ifdef Q_OS_WIN
quint64 result = _xgetbv(in);
eax = result;
edx = result >> 32;
#elif defined(Q_CC_GNU)
asm ("xgetbv"
: "=a" (eax), "=d" (edx)
: "c" (in));
#endif
}
static inline uint detectProcessorFeatures()
{
uint features = 0;
if (maxBasicCpuidSupported() < 1)
return 0;
#if defined(Q_PROCESSOR_X86_32)
unsigned int feature_result = 0;
uint result = 0;
cpuidFeatures01(feature_result, result);
// result now contains the standard feature bits
if (result & (1u << 26))
features |= SSE2;
if (feature_result & (1u))
features |= SSE3;
if (feature_result & (1u << 9))
features |= SSSE3;
if (feature_result & (1u << 19))
features |= SSE4_1;
if (feature_result & (1u << 20))
features |= SSE4_2;
if (feature_result & (1u << 28))
features |= AVX;
return features;
}
#elif defined(Q_PROCESSOR_X86_64) || defined(Q_OS_WIN64)
static inline uint detectProcessorFeatures()
{
uint features = SSE2;
uint feature_result = 0;
#if defined (Q_OS_WIN64)
{
int info[4];
__cpuid(info, 1);
feature_result = info[2];
}
#elif defined(Q_CC_GNU)
quint64 tmp;
asm ("xchg %%rbx, %1\n"
"cpuid\n"
"xchg %%rbx, %1\n"
: "=&c" (feature_result), "=&r" (tmp)
: "a" (1)
: "%edx"
);
#else
// x86-64 or x32
features = SSE2;
uint feature_result = 0, tmp;
cpuidFeatures01(feature_result, tmp);
#endif
// common part between 32- and 64-bit
if (feature_result & (1u))
features |= SSE3;
if (feature_result & (1u << 9))
@ -301,8 +280,17 @@ static inline uint detectProcessorFeatures()
features |= SSE4_1;
if (feature_result & (1u << 20))
features |= SSE4_2;
if (feature_result & (1u << 28))
features |= AVX;
uint xgetbvA = 0, xgetbvD = 0;
if (feature_result & (1u << 27)) {
// XGETBV enabled
xgetbv(0, xgetbvA, xgetbvD);
}
if ((xgetbvA & 6) == 6) {
// support for YMM and XMM registers is enabled
if (feature_result & (1u << 28))
features |= AVX;
}
return features;
}