Update the x86 feature-detection code, unifying x86 and x86-64

Most of it is the same for both of them, so let's avoid doing
everything twice. Or more, since we may support x32 soon. For Windows,
use the intrinsics. For GCC, we'd like to use cpuid.h, but it only
exists since GCC 4.3, so we can't.

And properly detect AVX support: it's not enough to detect that the
processor supports them, we also need to check that the OS enabled
support for the 256-bit registers.

Change-Id: Ibb4872cdb774de5701b18c40f4e612330a266214
Reviewed-by: Bradley T. Hughes <bradley.hughes@nokia.com>
Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
This commit is contained in:
Thiago Macieira 2011-12-25 20:36:14 -02:00 committed by Qt by Nokia
parent 01ee098531
commit dc6d1c6c3f

View File

@ -1,6 +1,7 @@
/**************************************************************************** /****************************************************************************
** **
** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies). ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
** Copyright (C) 2012 Intel Corporation.
** Contact: http://www.qt-project.org/ ** Contact: http://www.qt-project.org/
** **
** This file is part of the QtCore module of the Qt Toolkit. ** This file is part of the QtCore module of the Qt Toolkit.
@ -145,17 +146,27 @@ static inline uint detectProcessorFeatures()
return features; return features;
} }
#elif defined(Q_PROCESSOR_X86_32) #elif defined(Q_PROCESSOR_X86)
static inline uint detectProcessorFeatures()
{
uint features = 0;
unsigned int extended_result = 0; #ifdef Q_PROCESSOR_X86_32
unsigned int feature_result = 0; # define PICreg "%%ebx"
uint result = 0; #else
/* see p. 118 of amd64 instruction set manual Vol3 */ # define PICreg "%%rbx"
#if defined(Q_CC_GNU) #endif
long cpuid_supported, tmp1;
static int maxBasicCpuidSupported()
{
#ifdef Q_OS_WIN
// Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
int info[4];
__cpuid(info, 0);
return info[0];
#elif defined(Q_CC_GNU)
long tmp1;
# ifdef Q_PROCESSOR_X86_32
// check if the CPUID instruction is supported
long cpuid_supported;
asm ("pushf\n" asm ("pushf\n"
"pop %0\n" "pop %0\n"
"mov %0, %1\n" "mov %0, %1\n"
@ -167,132 +178,100 @@ static inline uint detectProcessorFeatures()
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
: "=a" (cpuid_supported), "=r" (tmp1) : "=a" (cpuid_supported), "=r" (tmp1)
); );
if (cpuid_supported) { if (!cpuid_supported)
asm ("xchg %%ebx, %2\n" return 0;
"cpuid\n" # endif
"xchg %%ebx, %2\n"
: "=&c" (feature_result), "=d" (result), "=&r" (tmp1)
: "a" (1));
asm ("xchg %%ebx, %1\n" int result;
"cpuid\n" asm ("xchg "PICreg", %1\n"
"cmp $0x80000000, %%eax\n" "cpuid\n"
"jnbe 1f\n" "xchg "PICreg", %1\n"
"xor %0, %0\n" : "=&a" (result), "=&r" (tmp1)
"jmp 2f\n" : "0" (0)
"1:\n" : "ecx", "edx");
"mov $0x80000001, %%eax\n" return result;
"cpuid\n" #else
"2:\n" return 0;
"xchg %%ebx, %1\n" #endif
: "=&d" (extended_result), "=&r" (tmp1) }
: "a" (0x80000000)
: "%ecx"
);
}
#elif defined (Q_OS_WIN) static void cpuidFeatures01(uint &ecx, uint &edx)
_asm { {
push eax #ifdef Q_OS_WIN
push ebx int info[4];
push ecx __cpuid(info, 1);
push edx ecx = info[2];
pushfd edx = info[3];
pop eax #elif defined(Q_CC_GNU)
mov ebx, eax long tmp1;
xor eax, 00200000h asm ("xchg "PICreg", %2\n"
push eax "cpuid\n"
popfd "xchg "PICreg", %2\n"
pushfd : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
pop eax : "a" (1));
mov edx, 0 #endif
xor eax, ebx }
jz skip
mov eax, 1 static void cpuidFeatures07_00(uint &ebx)
cpuid {
mov result, edx #ifdef Q_OS_WIN
mov feature_result, ecx int info[4];
skip: __cpuidex(info, 7, 0);
pop edx ebx = info[1];
pop ecx #elif defined(Q_CC_GNU)
pop ebx unsigned long rbx; // in case it's 64-bit
pop eax asm ("xchg "PICreg", %0\n"
} "cpuid\n"
"xchg "PICreg", %0\n"
: "=&r" (rbx)
: "a" (7), "c" (0)
: "%edx");
ebx = rbx;
#endif
}
_asm { #ifdef Q_OS_WIN
push eax namespace QtXgetbvHack {
push ebx inline quint64 _xgetbv(int) { return 0; }
push ecx }
push edx using namespace QtXgetbvHack;
pushfd
pop eax
mov ebx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
mov edx, 0
xor eax, ebx
jz skip2
mov eax, 80000000h
cpuid
cmp eax, 80000000h
jbe skip2
mov eax, 80000001h
cpuid
mov extended_result, edx
skip2:
pop edx
pop ecx
pop ebx
pop eax
}
#endif #endif
static void xgetbv(int in, uint &eax, uint &edx)
{
#ifdef Q_OS_WIN
quint64 result = _xgetbv(in);
eax = result;
edx = result >> 32;
#elif defined(Q_CC_GNU)
asm ("xgetbv"
: "=a" (eax), "=d" (edx)
: "c" (in));
#endif
}
static inline uint detectProcessorFeatures()
{
uint features = 0;
if (maxBasicCpuidSupported() < 1)
return 0;
#if defined(Q_PROCESSOR_X86_32)
unsigned int feature_result = 0;
uint result = 0;
cpuidFeatures01(feature_result, result);
// result now contains the standard feature bits // result now contains the standard feature bits
if (result & (1u << 26)) if (result & (1u << 26))
features |= SSE2; features |= SSE2;
if (feature_result & (1u)) #else
features |= SSE3; // x86-64 or x32
if (feature_result & (1u << 9)) features = SSE2;
features |= SSSE3; uint feature_result = 0, tmp;
if (feature_result & (1u << 19)) cpuidFeatures01(feature_result, tmp);
features |= SSE4_1;
if (feature_result & (1u << 20))
features |= SSE4_2;
if (feature_result & (1u << 28))
features |= AVX;
return features;
}
#elif defined(Q_PROCESSOR_X86_64) || defined(Q_OS_WIN64)
static inline uint detectProcessorFeatures()
{
uint features = SSE2;
uint feature_result = 0;
#if defined (Q_OS_WIN64)
{
int info[4];
__cpuid(info, 1);
feature_result = info[2];
}
#elif defined(Q_CC_GNU)
quint64 tmp;
asm ("xchg %%rbx, %1\n"
"cpuid\n"
"xchg %%rbx, %1\n"
: "=&c" (feature_result), "=&r" (tmp)
: "a" (1)
: "%edx"
);
#endif #endif
// common part between 32- and 64-bit
if (feature_result & (1u)) if (feature_result & (1u))
features |= SSE3; features |= SSE3;
if (feature_result & (1u << 9)) if (feature_result & (1u << 9))
@ -301,8 +280,17 @@ static inline uint detectProcessorFeatures()
features |= SSE4_1; features |= SSE4_1;
if (feature_result & (1u << 20)) if (feature_result & (1u << 20))
features |= SSE4_2; features |= SSE4_2;
if (feature_result & (1u << 28)) uint xgetbvA = 0, xgetbvD = 0;
features |= AVX; if (feature_result & (1u << 27)) {
// XGETBV enabled
xgetbv(0, xgetbvA, xgetbvD);
}
if ((xgetbvA & 6) == 6) {
// support for YMM and XMM registers is enabled
if (feature_result & (1u << 28))
features |= AVX;
}
return features; return features;
} }