SIMD: Refactor the constants to be actual bit values

... instead of being the bit numbers. This allows us to test more than
one feature at a time with qCpuHasFeature (see commit about the Haswell
architecture features). The drawback is that we won't be able to handle
more than 63 different CPU features, though we're likely quite far from
it (x86 currently has only 36 features).

Change-Id: Iff4151c519c144d580c4fffd153a0acbfd74c2c6
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@qt.io>
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
Thiago Macieira 2018-06-20 19:08:14 -07:00
parent 74ce4ffff6
commit f6d9cc5b79
5 changed files with 134 additions and 116 deletions

View File

@ -322,16 +322,8 @@ static quint64 detectProcessorFeatures()
AVXState = XMM0_15 | YMM0_15Hi128,
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
};
static const quint64 AllAVX512 = (Q_UINT64_C(1) << CpuFeatureAVX512F) | (Q_UINT64_C(1) << CpuFeatureAVX512CD) |
(Q_UINT64_C(1) << CpuFeatureAVX512ER) | (Q_UINT64_C(1) << CpuFeatureAVX512PF) |
(Q_UINT64_C(1) << CpuFeatureAVX512BW) | (Q_UINT64_C(1) << CpuFeatureAVX512DQ) |
(Q_UINT64_C(1) << CpuFeatureAVX512VL) | (Q_UINT64_C(1) << CpuFeatureAVX512IFMA) |
(Q_UINT64_C(1) << CpuFeatureAVX512VBMI) | (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2) |
(Q_UINT64_C(1) << CpuFeatureAVX512VNNI) | (Q_UINT64_C(1) << CpuFeatureAVX512BITALG) |
(Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ) |
(Q_UINT64_C(1) << CpuFeatureAVX5124NNIW) | (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS);
static const quint64 AllAVX2 = (Q_UINT64_C(1) << CpuFeatureAVX2) | AllAVX512;
static const quint64 AllAVX = (Q_UINT64_C(1) << CpuFeatureAVX) | AllAVX2;
static const quint64 AllAVX2 = CpuFeatureAVX2 | AllAVX512;
static const quint64 AllAVX = CpuFeatureAVX | AllAVX2;
quint64 features = 0;
int cpuidLevel = maxBasicCpuidSupported();

View File

@ -270,30 +270,30 @@ QT_BEGIN_NAMESPACE
#ifndef Q_PROCESSOR_X86
enum CPUFeatures {
#if defined(Q_PROCESSOR_ARM)
CpuFeatureNEON = 0,
CpuFeatureNEON = 2,
CpuFeatureARM_NEON = CpuFeatureNEON,
CpuFeatureCRC32 = 1,
CpuFeatureCRC32 = 4,
#elif defined(Q_PROCESSOR_MIPS)
CpuFeatureDSP = 0,
CpuFeatureDSPR2 = 1,
CpuFeatureDSP = 2,
CpuFeatureDSPR2 = 4,
#endif
// used only to indicate that the CPU detection was initialised
QSimdInitialized = 0x80000000
QSimdInitialized = 1
};
static const quint64 qCompilerCpuFeatures = 0
#if defined __ARM_NEON__
| (Q_UINT64_C(1) << CpuFeatureNEON)
| CpuFeatureNEON
#endif
#if defined __ARM_FEATURE_CRC32
| (Q_UINT64_C(1) << CpuFeatureCRC32)
| CpuFeatureCRC32
#endif
#if defined __mips_dsp
| (Q_UINT64_C(1) << CpuFeatureDSP)
| CpuFeatureDSP
#endif
#if defined __mips_dspr2
| (Q_UINT64_C(1) << CpuFeatureDSPR2)
| CpuFeatureDSPR2
#endif
;
#endif
@ -322,8 +322,8 @@ static inline quint64 qCpuFeatures()
return features;
}
#define qCpuHasFeature(feature) ((qCompilerCpuFeatures & (Q_UINT64_C(1) << CpuFeature ## feature)) \
|| (qCpuFeatures() & (Q_UINT64_C(1) << CpuFeature ## feature)))
#define qCpuHasFeature(feature) (((qCompilerCpuFeatures & CpuFeature ## feature) == CpuFeature ## feature) \
|| ((qCpuFeatures() & CpuFeature ## feature) == CpuFeature ## feature))
#define ALIGNMENT_PROLOGUE_16BYTES(ptr, i, length) \
for (; i < static_cast<int>(qMin(static_cast<quintptr>(length), ((4 - ((reinterpret_cast<quintptr>(ptr) >> 2) & 0x3)) & 0x3))); ++i)

View File

@ -1,6 +1,6 @@
// This is a generated file. DO NOT EDIT.
// Please see util/x86simdgen/generate.pl
#include <qglobal.h>
#include "qsimd_p.h"
static const char features_string[] =
" sse2\0"
@ -96,3 +96,21 @@ static const quint8 x86_locators[] = {
Leaf7_0EDX*32 + 2, // avx5124nniw
Leaf7_0EDX*32 + 3 // avx5124fmaps
};
// List of AVX512 features (see detectProcessorFeatures())
static const quint64 AllAVX512 = 0
| CpuFeatureAVX512F
| CpuFeatureAVX512DQ
| CpuFeatureAVX512IFMA
| CpuFeatureAVX512PF
| CpuFeatureAVX512ER
| CpuFeatureAVX512CD
| CpuFeatureAVX512BW
| CpuFeatureAVX512VL
| CpuFeatureAVX512VBMI
| CpuFeatureAVX512VBMI2
| CpuFeatureAVX512VNNI
| CpuFeatureAVX512BITALG
| CpuFeatureAVX512VPOPCNTDQ
| CpuFeatureAVX5124NNIW
| CpuFeatureAVX5124FMAPS;

View File

@ -59,169 +59,166 @@ QT_BEGIN_NAMESPACE
#define QT_FUNCTION_TARGET_STRING_AVX5124NNIW "avx5124nniw"
#define QT_FUNCTION_TARGET_STRING_AVX5124FMAPS "avx5124fmaps"
enum CPUFeatures {
// in CPUID Leaf 1, EDX:
CpuFeatureSSE2 = 1,
// used only to indicate that the CPU detection was initialized
static const quint64 QSimdInitialized = Q_UINT64_C(1) << 0;
// in CPUID Leaf 1, ECX:
CpuFeatureSSE3 = 2,
CpuFeatureSSSE3 = 3,
CpuFeatureFMA = 4,
CpuFeatureSSE4_1 = 5,
CpuFeatureSSE4_2 = 6,
CpuFeatureMOVBE = 7,
CpuFeaturePOPCNT = 8,
CpuFeatureAES = 9,
CpuFeatureAVX = 10,
CpuFeatureF16C = 11,
CpuFeatureRDRND = 12,
// in CPUID Leaf 1, EDX:
static const quint64 CpuFeatureSSE2 = Q_UINT64_C(1) << 1;
// in CPUID Leaf 7, Sub-leaf 0, EBX:
CpuFeatureBMI = 13,
CpuFeatureHLE = 14,
CpuFeatureAVX2 = 15,
CpuFeatureBMI2 = 16,
CpuFeatureRTM = 17,
CpuFeatureAVX512F = 18,
CpuFeatureAVX512DQ = 19,
CpuFeatureRDSEED = 20,
CpuFeatureAVX512IFMA = 21,
CpuFeatureAVX512PF = 22,
CpuFeatureAVX512ER = 23,
CpuFeatureAVX512CD = 24,
CpuFeatureSHA = 25,
CpuFeatureAVX512BW = 26,
CpuFeatureAVX512VL = 27,
// in CPUID Leaf 1, ECX:
static const quint64 CpuFeatureSSE3 = Q_UINT64_C(1) << 2;
static const quint64 CpuFeatureSSSE3 = Q_UINT64_C(1) << 3;
static const quint64 CpuFeatureFMA = Q_UINT64_C(1) << 4;
static const quint64 CpuFeatureSSE4_1 = Q_UINT64_C(1) << 5;
static const quint64 CpuFeatureSSE4_2 = Q_UINT64_C(1) << 6;
static const quint64 CpuFeatureMOVBE = Q_UINT64_C(1) << 7;
static const quint64 CpuFeaturePOPCNT = Q_UINT64_C(1) << 8;
static const quint64 CpuFeatureAES = Q_UINT64_C(1) << 9;
static const quint64 CpuFeatureAVX = Q_UINT64_C(1) << 10;
static const quint64 CpuFeatureF16C = Q_UINT64_C(1) << 11;
static const quint64 CpuFeatureRDRND = Q_UINT64_C(1) << 12;
// in CPUID Leaf 7, Sub-leaf 0, ECX:
CpuFeatureAVX512VBMI = 28,
CpuFeatureAVX512VBMI2 = 29,
CpuFeatureGFNI = 30,
CpuFeatureVAES = 31,
CpuFeatureAVX512VNNI = 32,
CpuFeatureAVX512BITALG = 33,
CpuFeatureAVX512VPOPCNTDQ = 34,
// in CPUID Leaf 7, Sub-leaf 0, EBX:
static const quint64 CpuFeatureBMI = Q_UINT64_C(1) << 13;
static const quint64 CpuFeatureHLE = Q_UINT64_C(1) << 14;
static const quint64 CpuFeatureAVX2 = Q_UINT64_C(1) << 15;
static const quint64 CpuFeatureBMI2 = Q_UINT64_C(1) << 16;
static const quint64 CpuFeatureRTM = Q_UINT64_C(1) << 17;
static const quint64 CpuFeatureAVX512F = Q_UINT64_C(1) << 18;
static const quint64 CpuFeatureAVX512DQ = Q_UINT64_C(1) << 19;
static const quint64 CpuFeatureRDSEED = Q_UINT64_C(1) << 20;
static const quint64 CpuFeatureAVX512IFMA = Q_UINT64_C(1) << 21;
static const quint64 CpuFeatureAVX512PF = Q_UINT64_C(1) << 22;
static const quint64 CpuFeatureAVX512ER = Q_UINT64_C(1) << 23;
static const quint64 CpuFeatureAVX512CD = Q_UINT64_C(1) << 24;
static const quint64 CpuFeatureSHA = Q_UINT64_C(1) << 25;
static const quint64 CpuFeatureAVX512BW = Q_UINT64_C(1) << 26;
static const quint64 CpuFeatureAVX512VL = Q_UINT64_C(1) << 27;
// in CPUID Leaf 7, Sub-leaf 0, EDX:
CpuFeatureAVX5124NNIW = 35,
CpuFeatureAVX5124FMAPS = 36,
// in CPUID Leaf 7, Sub-leaf 0, ECX:
static const quint64 CpuFeatureAVX512VBMI = Q_UINT64_C(1) << 28;
static const quint64 CpuFeatureAVX512VBMI2 = Q_UINT64_C(1) << 29;
static const quint64 CpuFeatureGFNI = Q_UINT64_C(1) << 30;
static const quint64 CpuFeatureVAES = Q_UINT64_C(1) << 31;
static const quint64 CpuFeatureAVX512VNNI = Q_UINT64_C(1) << 32;
static const quint64 CpuFeatureAVX512BITALG = Q_UINT64_C(1) << 33;
static const quint64 CpuFeatureAVX512VPOPCNTDQ = Q_UINT64_C(1) << 34;
// used only to indicate that the CPU detection was initialized
QSimdInitialized = 1
};
// in CPUID Leaf 7, Sub-leaf 0, EDX:
static const quint64 CpuFeatureAVX5124NNIW = Q_UINT64_C(1) << 35;
static const quint64 CpuFeatureAVX5124FMAPS = Q_UINT64_C(1) << 36;
static const quint64 qCompilerCpuFeatures = 0
#ifdef __SSE2__
| (Q_UINT64_C(1) << CpuFeatureSSE2)
| (Q_UINT64_C(1) << 1) // CpuFeatureSSE2
#endif
#ifdef __SSE3__
| (Q_UINT64_C(1) << CpuFeatureSSE3)
| (Q_UINT64_C(1) << 2) // CpuFeatureSSE3
#endif
#ifdef __SSSE3__
| (Q_UINT64_C(1) << CpuFeatureSSSE3)
| (Q_UINT64_C(1) << 3) // CpuFeatureSSSE3
#endif
#ifdef __FMA__
| (Q_UINT64_C(1) << CpuFeatureFMA)
| (Q_UINT64_C(1) << 4) // CpuFeatureFMA
#endif
#ifdef __SSE4_1__
| (Q_UINT64_C(1) << CpuFeatureSSE4_1)
| (Q_UINT64_C(1) << 5) // CpuFeatureSSE4_1
#endif
#ifdef __SSE4_2__
| (Q_UINT64_C(1) << CpuFeatureSSE4_2)
| (Q_UINT64_C(1) << 6) // CpuFeatureSSE4_2
#endif
#ifdef __MOVBE__
| (Q_UINT64_C(1) << CpuFeatureMOVBE)
| (Q_UINT64_C(1) << 7) // CpuFeatureMOVBE
#endif
#ifdef __POPCNT__
| (Q_UINT64_C(1) << CpuFeaturePOPCNT)
| (Q_UINT64_C(1) << 8) // CpuFeaturePOPCNT
#endif
#ifdef __AES__
| (Q_UINT64_C(1) << CpuFeatureAES)
| (Q_UINT64_C(1) << 9) // CpuFeatureAES
#endif
#ifdef __AVX__
| (Q_UINT64_C(1) << CpuFeatureAVX)
| (Q_UINT64_C(1) << 10) // CpuFeatureAVX
#endif
#ifdef __F16C__
| (Q_UINT64_C(1) << CpuFeatureF16C)
| (Q_UINT64_C(1) << 11) // CpuFeatureF16C
#endif
#ifdef __RDRND__
| (Q_UINT64_C(1) << CpuFeatureRDRND)
| (Q_UINT64_C(1) << 12) // CpuFeatureRDRND
#endif
#ifdef __BMI__
| (Q_UINT64_C(1) << CpuFeatureBMI)
| (Q_UINT64_C(1) << 13) // CpuFeatureBMI
#endif
#ifdef __HLE__
| (Q_UINT64_C(1) << CpuFeatureHLE)
| (Q_UINT64_C(1) << 14) // CpuFeatureHLE
#endif
#ifdef __AVX2__
| (Q_UINT64_C(1) << CpuFeatureAVX2)
| (Q_UINT64_C(1) << 15) // CpuFeatureAVX2
#endif
#ifdef __BMI2__
| (Q_UINT64_C(1) << CpuFeatureBMI2)
| (Q_UINT64_C(1) << 16) // CpuFeatureBMI2
#endif
#ifdef __RTM__
| (Q_UINT64_C(1) << CpuFeatureRTM)
| (Q_UINT64_C(1) << 17) // CpuFeatureRTM
#endif
#ifdef __AVX512F__
| (Q_UINT64_C(1) << CpuFeatureAVX512F)
| (Q_UINT64_C(1) << 18) // CpuFeatureAVX512F
#endif
#ifdef __AVX512DQ__
| (Q_UINT64_C(1) << CpuFeatureAVX512DQ)
| (Q_UINT64_C(1) << 19) // CpuFeatureAVX512DQ
#endif
#ifdef __RDSEED__
| (Q_UINT64_C(1) << CpuFeatureRDSEED)
| (Q_UINT64_C(1) << 20) // CpuFeatureRDSEED
#endif
#ifdef __AVX512IFMA__
| (Q_UINT64_C(1) << CpuFeatureAVX512IFMA)
| (Q_UINT64_C(1) << 21) // CpuFeatureAVX512IFMA
#endif
#ifdef __AVX512PF__
| (Q_UINT64_C(1) << CpuFeatureAVX512PF)
| (Q_UINT64_C(1) << 22) // CpuFeatureAVX512PF
#endif
#ifdef __AVX512ER__
| (Q_UINT64_C(1) << CpuFeatureAVX512ER)
| (Q_UINT64_C(1) << 23) // CpuFeatureAVX512ER
#endif
#ifdef __AVX512CD__
| (Q_UINT64_C(1) << CpuFeatureAVX512CD)
| (Q_UINT64_C(1) << 24) // CpuFeatureAVX512CD
#endif
#ifdef __SHA__
| (Q_UINT64_C(1) << CpuFeatureSHA)
| (Q_UINT64_C(1) << 25) // CpuFeatureSHA
#endif
#ifdef __AVX512BW__
| (Q_UINT64_C(1) << CpuFeatureAVX512BW)
| (Q_UINT64_C(1) << 26) // CpuFeatureAVX512BW
#endif
#ifdef __AVX512VL__
| (Q_UINT64_C(1) << CpuFeatureAVX512VL)
| (Q_UINT64_C(1) << 27) // CpuFeatureAVX512VL
#endif
#ifdef __AVX512VBMI__
| (Q_UINT64_C(1) << CpuFeatureAVX512VBMI)
| (Q_UINT64_C(1) << 28) // CpuFeatureAVX512VBMI
#endif
#ifdef __AVX512VBMI2__
| (Q_UINT64_C(1) << CpuFeatureAVX512VBMI2)
| (Q_UINT64_C(1) << 29) // CpuFeatureAVX512VBMI2
#endif
#ifdef __GFNI__
| (Q_UINT64_C(1) << CpuFeatureGFNI)
| (Q_UINT64_C(1) << 30) // CpuFeatureGFNI
#endif
#ifdef __VAES__
| (Q_UINT64_C(1) << CpuFeatureVAES)
| (Q_UINT64_C(1) << 31) // CpuFeatureVAES
#endif
#ifdef __AVX512VNNI__
| (Q_UINT64_C(1) << CpuFeatureAVX512VNNI)
| (Q_UINT64_C(1) << 32) // CpuFeatureAVX512VNNI
#endif
#ifdef __AVX512BITALG__
| (Q_UINT64_C(1) << CpuFeatureAVX512BITALG)
| (Q_UINT64_C(1) << 33) // CpuFeatureAVX512BITALG
#endif
#ifdef __AVX512VPOPCNTDQ__
| (Q_UINT64_C(1) << CpuFeatureAVX512VPOPCNTDQ)
| (Q_UINT64_C(1) << 34) // CpuFeatureAVX512VPOPCNTDQ
#endif
#ifdef __AVX5124NNIW__
| (Q_UINT64_C(1) << CpuFeatureAVX5124NNIW)
| (Q_UINT64_C(1) << 35) // CpuFeatureAVX5124NNIW
#endif
#ifdef __AVX5124FMAPS__
| (Q_UINT64_C(1) << CpuFeatureAVX5124FMAPS)
| (Q_UINT64_C(1) << 36) // CpuFeatureAVX5124FMAPS
#endif
;
QT_END_NAMESPACE
#endif // QSIMD_X86_P_H

View File

@ -99,33 +99,32 @@ for my $feature (@features) {
}
# Print the enum
printf "\nenum CPUFeatures {";
print q{
// used only to indicate that the CPU detection was initialized
static const quint64 QSimdInitialized = Q_UINT64_C(1) << 0;};
my $lastleaf;
for (my $i = 0; $i < scalar @features; ++$i) {
my $feature = $features[$i];
# Leaf header:
printf "\n // in %s:\n", $leaves{$feature->{leaf}}
printf "\n// in %s:\n", $leaves{$feature->{leaf}}
if $feature->{leaf} ne $lastleaf;
$lastleaf = $feature->{leaf};
# Feature
printf " CpuFeature%-13s = %d,\n", $feature->{id}, $i + 1;
printf "static const quint64 CpuFeature%-13s = Q_UINT64_C(1) << %d;\n", $feature->{id}, $i + 1;
}
print q{
// used only to indicate that the CPU detection was initialized
QSimdInitialized = 1
\};
static const quint64 qCompilerCpuFeatures = 0};
# And print the compiler-enabled features part:
for my $feature (@features) {
for (my $i = 0; $i < scalar @features; ++$i) {
my $feature = $features[$i];
printf
"#ifdef __%s__\n" .
" | (Q_UINT64_C(1) << CpuFeature%s)\n" .
" | (Q_UINT64_C(1) << %d) \t// CpuFeature%s\n" .
"#endif\n",
$feature->{id}, $feature->{id};
$feature->{id}, $i + 1, $feature->{id};
}
print q{ ;
@ -149,7 +148,7 @@ if (my $cpp = shift @ARGV) {
print "// This is a generated file. DO NOT EDIT.";
print "// Please see util/x86simdgen/generate.pl";
print "#include <qglobal.h>";
print '#include "qsimd_p.h"';
print "";
# Now generate the string table and bit-location array
@ -189,4 +188,16 @@ for (my $j = 0; $j < scalar @features; ++$j) {
$feature->{leaf}, $feature->{bit};
$lastname = $feature->{name};
}
print " // $lastname\n};";
printf qq{ // $lastname
\};
// List of AVX512 features (see detectProcessorFeatures())
static const quint64 AllAVX512 = 0};
# Print AVX512 features
for (my $j = 0; $j < scalar @features; ++$j) {
my $feature = $features[$j];
$_ = $feature->{id};
printf "\n | CpuFeature%s", $_ if /AVX512/;
}
print ";";