Reorganize the bits for the CPU feature detection
Instead of trying to detect one bit and set another, let's just use the bits from the x86 CPUID instruction on x86. This makes use of the full 64-bit space now. Since MSVC doesn't like enums bigger than 32-bit, we have to store the bit number instead of the actual bit value in the constant. For that reason, I also renamed the constants, to catch anyone who was using them directly, instead of through qCpuHasFeature. Change-Id: Ib306f8f647014b399b87ffff13f1d587692d827a Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
09aeda21b9
commit
6a8251a89b
@ -78,28 +78,28 @@ static inline uint detectProcessorFeatures()
|
||||
return 0;
|
||||
}
|
||||
#elif defined (Q_OS_WINCE)
|
||||
static inline uint detectProcessorFeatures()
|
||||
static inline quint64 detectProcessorFeatures()
|
||||
{
|
||||
uint features = 0;
|
||||
quint64 features = 0;
|
||||
|
||||
#if defined (ARM)
|
||||
# ifdef PF_ARM_NEON
|
||||
if (IsProcessorFeaturePresent(PF_ARM_NEON))
|
||||
features |= ARM_NEON;
|
||||
features |= Q_UINT64_C(1) << CpuFeatureNEON;
|
||||
# endif
|
||||
#elif defined(_X86_)
|
||||
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
||||
features |= SSE2;
|
||||
features |= Q_UINT64_C(1) << CpuFeatureSSE2;
|
||||
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
||||
features |= SSE3;
|
||||
features |= Q_UINT64_C(1) << CpuFeatureSSE3;
|
||||
#endif
|
||||
return features;
|
||||
}
|
||||
|
||||
#elif defined(Q_PROCESSOR_ARM)
|
||||
static inline uint detectProcessorFeatures()
|
||||
static inline quint64 detectProcessorFeatures()
|
||||
{
|
||||
uint features = 0;
|
||||
quint64 features = 0;
|
||||
|
||||
#if defined(Q_OS_LINUX)
|
||||
int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
|
||||
@ -117,7 +117,7 @@ static inline uint detectProcessorFeatures()
|
||||
for (int i = 0; i < max; i += 2)
|
||||
if (vector[i] == AT_HWCAP) {
|
||||
if (vector[i+1] & HWCAP_NEON)
|
||||
features |= NEON;
|
||||
features |= Q_UINT64_C(1) << CpuFeatureNEON;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -129,7 +129,7 @@ static inline uint detectProcessorFeatures()
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_NEON__)
|
||||
features = NEON;
|
||||
features = Q_UINT64_C(1) << CpuFeatureNEON;
|
||||
#endif
|
||||
|
||||
return features;
|
||||
@ -257,6 +257,8 @@ static quint64 detectProcessorFeatures()
|
||||
AVXState = XMM0_15 | YMM0_15Hi128,
|
||||
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
|
||||
};
|
||||
static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2);
|
||||
static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
|
||||
|
||||
quint64 features = 0;
|
||||
int cpuidLevel = maxBasicCpuidSupported();
|
||||
@ -269,28 +271,23 @@ static quint64 detectProcessorFeatures()
|
||||
|
||||
uint cpuid01ECX = 0, cpuid01EDX = 0;
|
||||
cpuidFeatures01(cpuid01ECX, cpuid01EDX);
|
||||
|
||||
// the low 32-bits of features is cpuid01ECX
|
||||
// note: we need to check OS support for saving the AVX register state
|
||||
features = cpuid01ECX;
|
||||
|
||||
#if defined(Q_PROCESSOR_X86_32)
|
||||
// x86 might not have SSE2 support
|
||||
if (cpuid01EDX & (1u << 26))
|
||||
features |= SSE2;
|
||||
features |= Q_UINT64_C(1) << CpuFeatureSSE2;
|
||||
else
|
||||
features &= ~(Q_UINT64_C(1) << CpuFeatureSSE2);
|
||||
// we should verify that the OS enabled saving of the SSE state...
|
||||
#else
|
||||
// x86-64 or x32
|
||||
features = SSE2;
|
||||
features |= Q_UINT64_C(1) << CpuFeatureSSE2;
|
||||
#endif
|
||||
|
||||
// common part between 32- and 64-bit
|
||||
if (cpuid01ECX & (1u))
|
||||
features |= SSE3;
|
||||
if (cpuid01ECX & (1u << 9))
|
||||
features |= SSSE3;
|
||||
if (cpuid01ECX & (1u << 19))
|
||||
features |= SSE4_1;
|
||||
if (cpuid01ECX & (1u << 20))
|
||||
features |= SSE4_2;
|
||||
if (cpuid01ECX & (1u << 25))
|
||||
features |= 0; // AES, enable if needed
|
||||
|
||||
uint xgetbvA = 0, xgetbvD = 0;
|
||||
if (cpuid01ECX & (1u << 27)) {
|
||||
// XGETBV enabled
|
||||
@ -298,22 +295,17 @@ static quint64 detectProcessorFeatures()
|
||||
}
|
||||
|
||||
uint cpuid0700EBX = 0;
|
||||
if (cpuidLevel >= 7)
|
||||
if (cpuidLevel >= 7) {
|
||||
cpuidFeatures07_00(cpuid0700EBX);
|
||||
|
||||
if ((xgetbvA & AVXState) == AVXState) {
|
||||
// support for YMM and XMM registers is enabled
|
||||
if (cpuid01ECX & (1u << 28))
|
||||
features |= AVX;
|
||||
|
||||
if (cpuid0700EBX & (1u << 5))
|
||||
features |= AVX2;
|
||||
// the high 32-bits of features is cpuid0700EBX
|
||||
features |= quint64(cpuid0700EBX) << 32;
|
||||
}
|
||||
|
||||
if (cpuid0700EBX & (1u << 4))
|
||||
features |= HLE; // Hardware Lock Ellision
|
||||
if (cpuid0700EBX & (1u << 11))
|
||||
features |= RTM; // Restricted Transactional Memory
|
||||
if ((xgetbvA & AVXState) != AVXState) {
|
||||
// support for YMM registers is disabled, disable all AVX
|
||||
features &= ~AllAVX;
|
||||
}
|
||||
|
||||
return features;
|
||||
}
|
||||
@ -430,24 +422,24 @@ static bool procCpuinfoContains(const char *prefix, const char *string)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint detectProcessorFeatures()
|
||||
static inline quint64 detectProcessorFeatures()
|
||||
{
|
||||
// NOTE: MIPS 74K cores are the only ones supporting DSPr2.
|
||||
uint flags = 0;
|
||||
quint64 flags = 0;
|
||||
|
||||
#if defined __mips_dsp
|
||||
flags |= DSP;
|
||||
flags |= Q_UINT64_C(1) << CpuFeatureDSP;
|
||||
# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
|
||||
flags |= DSPR2;
|
||||
flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
|
||||
# elif defined(Q_OS_LINUX)
|
||||
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
|
||||
flags |= DSPR2;
|
||||
flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
|
||||
# endif
|
||||
#elif defined(Q_OS_LINUX)
|
||||
if (procCpuinfoContains("ASEs implemented", "dsp")) {
|
||||
flags |= DSP;
|
||||
flags |= Q_UINT64_C(1) << CpuFeatureDSP;
|
||||
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
|
||||
flags |= DSPR2;
|
||||
flags |= Q_UINT64_C(1) << CpuFeatureDSPR2;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -462,47 +454,115 @@ static inline uint detectProcessorFeatures()
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use kdesdk/scripts/generate_string_table.pl to update the table below.
|
||||
* Here's the data (don't forget the ONE leading space):
|
||||
|
||||
neon
|
||||
sse2
|
||||
sse3
|
||||
ssse3
|
||||
sse4.1
|
||||
sse4.2
|
||||
avx
|
||||
avx2
|
||||
hle
|
||||
rtm
|
||||
dsp
|
||||
dspr2
|
||||
*/
|
||||
* Use kdesdk/scripts/generate_string_table.pl to update the table below. Note
|
||||
* that the x86 version has a lot of blanks that must be kept and that the
|
||||
* offset table's type is changed to make the table smaller. We also remove the
|
||||
* terminating -1 that the script adds.
|
||||
*/
|
||||
|
||||
// begin generated
|
||||
#if defined(Q_PROCESSOR_ARM)
|
||||
/* Data:
|
||||
neon
|
||||
*/
|
||||
static const char features_string[] = " neon\0";
|
||||
static const int features_indices[] = { 0 };
|
||||
#elif defined(Q_PROCESSOR_MIPS)
|
||||
/* Data:
|
||||
dsp
|
||||
dspr2
|
||||
*/
|
||||
static const char features_string[] =
|
||||
"\0"
|
||||
" neon\0"
|
||||
" sse2\0"
|
||||
" sse3\0"
|
||||
" ssse3\0"
|
||||
" sse4.1\0"
|
||||
" sse4.2\0"
|
||||
" avx\0"
|
||||
" avx2\0"
|
||||
" hle\0"
|
||||
" rtm\0"
|
||||
" dsp\0"
|
||||
" dspr2\0"
|
||||
"\0";
|
||||
|
||||
static const int features_indices[] = {
|
||||
0, 1, 7, 13, 19, 26, 34, 42,
|
||||
47, 53, 58, 63, 68, -1
|
||||
0, 5
|
||||
};
|
||||
#elif defined(Q_PROCESSOR_X86)
|
||||
/* Data:
|
||||
sse3
|
||||
sse2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ssse3
|
||||
|
||||
|
||||
fma
|
||||
cmpxchg16b
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
sse4.1
|
||||
sse4.2
|
||||
|
||||
movbe
|
||||
popcnt
|
||||
|
||||
aes
|
||||
|
||||
|
||||
avx
|
||||
f16c
|
||||
rdrand
|
||||
|
||||
|
||||
|
||||
|
||||
bmi
|
||||
hle
|
||||
avx2
|
||||
|
||||
|
||||
bmi2
|
||||
|
||||
|
||||
rtm
|
||||
*/
|
||||
static const char features_string[] =
|
||||
" sse3\0"
|
||||
" sse2\0"
|
||||
" ssse3\0"
|
||||
" fma\0"
|
||||
" cmpxchg16b\0"
|
||||
" sse4.1\0"
|
||||
" sse4.2\0"
|
||||
" movbe\0"
|
||||
" popcnt\0"
|
||||
" aes\0"
|
||||
" avx\0"
|
||||
" f16c\0"
|
||||
" rdrand\0"
|
||||
" bmi\0"
|
||||
" hle\0"
|
||||
" avx2\0"
|
||||
" bmi2\0"
|
||||
" rtm\0"
|
||||
"\0";
|
||||
|
||||
static const quint8 features_indices[] = {
|
||||
0, 6, 5, 5, 5, 5, 5, 5,
|
||||
5, 12, 5, 5, 19, 24, 5, 5,
|
||||
5, 5, 5, 36, 44, 5, 52, 59,
|
||||
5, 67, 5, 5, 72, 77, 83, 5,
|
||||
5, 5, 5, 91, 96, 101, 5, 5,
|
||||
107, 5, 5, 113
|
||||
};
|
||||
#else
|
||||
static const char features_string[] = "";
|
||||
static const int features_indices[] = { };
|
||||
#endif
|
||||
// end generated
|
||||
|
||||
static const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
|
||||
static const int features_count = (sizeof features_indices) / (sizeof features_indices[0]);
|
||||
|
||||
// record what CPU features were enabled by default in this Qt build
|
||||
static const quint64 minFeature = qCompilerCpuFeatures;
|
||||
|
@ -245,18 +245,29 @@ QT_BEGIN_NAMESPACE
|
||||
|
||||
|
||||
enum CPUFeatures {
|
||||
NEON = 0x2, ARM_NEON = NEON,
|
||||
SSE2 = 0x4,
|
||||
SSE3 = 0x8,
|
||||
SSSE3 = 0x10,
|
||||
SSE4_1 = 0x20,
|
||||
SSE4_2 = 0x40,
|
||||
AVX = 0x80,
|
||||
AVX2 = 0x100,
|
||||
HLE = 0x200,
|
||||
RTM = 0x400,
|
||||
DSP = 0x800,
|
||||
DSPR2 = 0x1000,
|
||||
#if defined(Q_PROCESSOR_ARM)
|
||||
CpuFeatureNEON = 0,
|
||||
CpuFeatureARM_NEON = CpuFeatureNEON,
|
||||
#elif defined(Q_PROCESSOR_MIPS)
|
||||
CpuFeatureDSP = 0,
|
||||
CpuFeatureDSPR2 = 1,
|
||||
#elif defined(Q_PROCESSOR_X86)
|
||||
// The order of the flags is jumbled so it matches most closely the bits in CPUID
|
||||
// Out of order:
|
||||
CpuFeatureSSE2 = 1, // uses the bit for PCLMULQDQ
|
||||
// in level 1, ECX
|
||||
CpuFeatureSSE3 = (0 + 0),
|
||||
CpuFeatureSSSE3 = (0 + 9),
|
||||
CpuFeatureSSE4_1 = (0 + 19),
|
||||
CpuFeatureSSE4_2 = (0 + 20),
|
||||
CpuFeatureAES = (0 + 25),
|
||||
CpuFeatureAVX = (0 + 28),
|
||||
|
||||
// in level 7, leaf 0, EBX
|
||||
CpuFeatureHLE = (32 + 4),
|
||||
CpuFeatureAVX2 = (32 + 5),
|
||||
CpuFeatureRTM = (32 + 11),
|
||||
#endif
|
||||
|
||||
// used only to indicate that the CPU detection was initialised
|
||||
QSimdInitialized = 0x80000000
|
||||
@ -264,37 +275,37 @@ enum CPUFeatures {
|
||||
|
||||
static const uint qCompilerCpuFeatures = 0
|
||||
#if defined __RTM__
|
||||
| RTM
|
||||
| (Q_UINT64_C(1) << CpuFeatureRTM)
|
||||
#endif
|
||||
#if defined __AVX2__
|
||||
| AVX2
|
||||
| (Q_UINT64_C(1) << CpuFeatureAVX2)
|
||||
#endif
|
||||
#if defined __AVX__
|
||||
| AVX
|
||||
| (Q_UINT64_C(1) << CpuFeatureAVX)
|
||||
#endif
|
||||
#if defined __SSE4_2__
|
||||
| SSE4_2
|
||||
| (Q_UINT64_C(1) << CpuFeatureSSE4_2)
|
||||
#endif
|
||||
#if defined __SSE4_1__
|
||||
| SSE4_1
|
||||
| (Q_UINT64_C(1) << CpuFeatureSSE4_1)
|
||||
#endif
|
||||
#if defined __SSSE3__
|
||||
| SSSE3
|
||||
| (Q_UINT64_C(1) << CpuFeatureSSSE3)
|
||||
#endif
|
||||
#if defined __SSE3__
|
||||
| SSE3
|
||||
| (Q_UINT64_C(1) << CpuFeatureSSE3)
|
||||
#endif
|
||||
#if defined __SSE2__
|
||||
| SSE2
|
||||
| (Q_UINT64_C(1) << CpuFeatureSSE2)
|
||||
#endif
|
||||
#if defined __ARM_NEON__
|
||||
| NEON
|
||||
| (Q_UINT64_C(1) << CpuFeatureNEON)
|
||||
#endif
|
||||
#if defined __mips_dsp
|
||||
| DSP
|
||||
| (Q_UINT64_C(1) << CpuFeatureDSP)
|
||||
#endif
|
||||
#if defined __mips_dspr2
|
||||
| DSPR2
|
||||
| (Q_UINT64_C(1) << CpuFeatureDSPR2)
|
||||
#endif
|
||||
;
|
||||
|
||||
@ -322,7 +333,8 @@ static inline quint64 qCpuFeatures()
|
||||
return features;
|
||||
}
|
||||
|
||||
#define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (feature)) || (qCpuFeatures() & (feature)))
|
||||
#define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (Q_UINT64_C(1) << CpuFeature ## feature)) \
|
||||
|| (qCpuFeatures() & (Q_UINT64_C(1) << CpuFeature ## feature)))
|
||||
|
||||
#ifdef Q_PROCESSOR_X86
|
||||
// Bit scan functions for x86
|
||||
|
@ -6326,8 +6326,6 @@ template<QtPixelOrder> const uint *QT_FASTCALL convertA2RGB30PMFromARGB32PM_sse4
|
||||
|
||||
void qInitDrawhelperAsm()
|
||||
{
|
||||
const uint features = qCpuFeatures();
|
||||
Q_UNUSED(features);
|
||||
#ifdef __SSE2__
|
||||
qDrawHelper[QImage::Format_RGB32].bitmapBlit = qt_bitmapblit32_sse2;
|
||||
qDrawHelper[QImage::Format_ARGB32].bitmapBlit = qt_bitmapblit32_sse2;
|
||||
@ -6372,7 +6370,7 @@ void qInitDrawhelperAsm()
|
||||
qt_fetch_radial_gradient = qt_fetch_radial_gradient_sse2;
|
||||
|
||||
#ifdef QT_COMPILER_SUPPORTS_SSSE3
|
||||
if (features & SSSE3) {
|
||||
if (qCpuHasFeature(SSSE3)) {
|
||||
extern void qt_blend_argb32_on_argb32_ssse3(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
@ -6466,7 +6464,7 @@ void qInitDrawhelperAsm()
|
||||
#endif // Q_PROCESSOR_MIPS_32
|
||||
|
||||
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP) || defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
|
||||
if (features & (DSP | DSPR2)) {
|
||||
if (qCpuHasFeature(DSP) && qCpuHasFeature(DSPR2)) {
|
||||
// Composition functions are all DSP r1
|
||||
qt_functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
|
||||
qt_functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
|
||||
|
Loading…
Reference in New Issue
Block a user