qsimd_p.h: let the generator script generate the ARCH target strings

__attribute__((target("arch=xxxx"))) does not work because the compilers
(GCC at least) don't test the CPU features that they are targeting, so
we keep getting "inline failed" compiler errors.

GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90129

Upstream patch: https://github.com/opendcdiag/opendcdiag/pull/59

Change-Id: I6fcda969a9e9427198bffffd16cea09fda4406d2
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
This commit is contained in:
Thiago Macieira 2022-01-28 19:38:42 -08:00
parent 8b78439980
commit b2298b7e94
5 changed files with 111 additions and 53 deletions

View File

@ -200,20 +200,13 @@ static inline bool hasFastF16()
}
#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
#define QT_FUNCTION_TARGET_STRING_AVX512VLBW \
QT_FUNCTION_TARGET_STRING_AVX512VL "," \
QT_FUNCTION_TARGET_STRING_AVX512BW "," \
QT_FUNCTION_TARGET_STRING_F16C "," \
QT_FUNCTION_TARGET_STRING_BMI2 /* BMI2 for BZHI */
static bool hasFastF16Avx256()
{
// 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info)
constexpr quint64 CpuFeatureAVX512VLBW = CpuFeatureAVX512BW | CpuFeatureAVX512VL;
return qCpuHasFeature(AVX512VLBW);
return qCpuHasFeature(ArchSkylakeAvx512);
}
static QT_FUNCTION_TARGET(AVX512VLBW)
static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept
{
__mmask16 mask = _bzhi_u32(-1, len);
@ -222,7 +215,7 @@ void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) n
_mm_mask_storeu_epi16(out, mask, f16);
};
static QT_FUNCTION_TARGET(AVX512VLBW)
static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept
{
__mmask16 mask = _bzhi_u32(-1, len);

View File

@ -250,7 +250,6 @@ asm(
//
// macOS's fat binaries support the "x86_64h" sub-architecture and the GNU libc
// ELF loader also supports a "haswell/" subdir (e.g., /usr/lib/haswell).
# define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL "arch=haswell"
# define ARCH_HASWELL_MACROS (__AVX2__ + __BMI__ + __BMI2__ + __F16C__ + __FMA__ + __LZCNT__)
# if ARCH_HASWELL_MACROS != 0
# if ARCH_HASWELL_MACROS != 6

View File

@ -146,8 +146,8 @@ static const struct X86Architecture x86_architectures[] = {
{ cpu_icelake_server, "Ice Lake (Server)" },
{ cpu_icelake_client, "Ice Lake (Client)" },
{ cpu_alderlake, "Alder Lake" },
{ cpu_cooperlake, "Cooper Lake" },
{ cpu_cannonlake, "Cannon Lake" },
{ cpu_cooperlake, "Cooper Lake" },
{ cpu_cascadelake, "Cascade Lake" },
{ cpu_skylake_avx512, "Skylake (Avx512)" },
{ cpu_skylake, "Skylake" },

View File

@ -46,81 +46,47 @@
// in CPUID Leaf 1, EDX:
#define cpu_feature_sse2 (UINT64_C(1) << 0)
#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
// in CPUID Leaf 1, ECX:
#define cpu_feature_sse3 (UINT64_C(1) << 1)
#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
#define cpu_feature_ssse3 (UINT64_C(1) << 2)
#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
#define cpu_feature_fma (UINT64_C(1) << 3)
#define QT_FUNCTION_TARGET_STRING_FMA "fma"
#define cpu_feature_sse4_1 (UINT64_C(1) << 4)
#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
#define cpu_feature_sse4_2 (UINT64_C(1) << 5)
#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
#define cpu_feature_movbe (UINT64_C(1) << 6)
#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
#define cpu_feature_popcnt (UINT64_C(1) << 7)
#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
#define cpu_feature_aes (UINT64_C(1) << 8)
#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
#define cpu_feature_avx (UINT64_C(1) << 9)
#define QT_FUNCTION_TARGET_STRING_AVX "avx"
#define cpu_feature_f16c (UINT64_C(1) << 10)
#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
#define cpu_feature_rdrnd (UINT64_C(1) << 11)
#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
// in CPUID Leaf 7, Sub-leaf 0, EBX:
#define cpu_feature_bmi (UINT64_C(1) << 12)
#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
#define cpu_feature_avx2 (UINT64_C(1) << 13)
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx"
#define cpu_feature_bmi2 (UINT64_C(1) << 14)
#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
#define cpu_feature_avx512f (UINT64_C(1) << 15)
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx"
#define cpu_feature_avx512dq (UINT64_C(1) << 16)
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f"
#define cpu_feature_rdseed (UINT64_C(1) << 17)
#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
#define cpu_feature_avx512ifma (UINT64_C(1) << 18)
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f"
#define cpu_feature_avx512cd (UINT64_C(1) << 19)
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f"
#define cpu_feature_sha (UINT64_C(1) << 20)
#define QT_FUNCTION_TARGET_STRING_SHA "sha"
#define cpu_feature_avx512bw (UINT64_C(1) << 21)
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f"
#define cpu_feature_avx512vl (UINT64_C(1) << 22)
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f"
// in CPUID Leaf 7, Sub-leaf 0, ECX:
#define cpu_feature_avx512vbmi (UINT64_C(1) << 23)
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f"
#define cpu_feature_avx512vbmi2 (UINT64_C(1) << 24)
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f"
#define cpu_feature_shstk (UINT64_C(1) << 25)
#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk"
#define cpu_feature_gfni (UINT64_C(1) << 26)
#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
#define cpu_feature_vaes (UINT64_C(1) << 27)
#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes"
#define cpu_feature_avx512vnni (UINT64_C(1) << 28)
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f"
#define cpu_feature_avx512bitalg (UINT64_C(1) << 29)
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f"
#define cpu_feature_avx512vpopcntdq (UINT64_C(1) << 30)
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f"
// in CPUID Leaf 7, Sub-leaf 0, EDX:
#define cpu_feature_hybrid (UINT64_C(1) << 31)
#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid"
#define cpu_feature_ibt (UINT64_C(1) << 32)
#define QT_FUNCTION_TARGET_STRING_IBT "ibt"
#define cpu_feature_avx512fp16 (UINT64_C(1) << 33)
#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c"
// CPU architectures
#define cpu_x86_64 (0 \
@ -204,6 +170,83 @@
#define cpu_goldmont (cpu_glm)
#define cpu_tremont (cpu_tnt)
// __attribute__ target strings for GCC and Clang
#define QT_FUNCTION_TARGET_STRING_SSE2 "sse2"
#define QT_FUNCTION_TARGET_STRING_SSE3 "sse3"
#define QT_FUNCTION_TARGET_STRING_SSSE3 "ssse3"
#define QT_FUNCTION_TARGET_STRING_FMA "fma"
#define QT_FUNCTION_TARGET_STRING_SSE4_1 "sse4.1"
#define QT_FUNCTION_TARGET_STRING_SSE4_2 "sse4.2"
#define QT_FUNCTION_TARGET_STRING_MOVBE "movbe"
#define QT_FUNCTION_TARGET_STRING_POPCNT "popcnt"
#define QT_FUNCTION_TARGET_STRING_AES "aes,sse4.2"
#define QT_FUNCTION_TARGET_STRING_AVX "avx"
#define QT_FUNCTION_TARGET_STRING_F16C "f16c,avx"
#define QT_FUNCTION_TARGET_STRING_RDRND "rdrnd"
#define QT_FUNCTION_TARGET_STRING_BMI "bmi"
#define QT_FUNCTION_TARGET_STRING_AVX2 "avx2,avx"
#define QT_FUNCTION_TARGET_STRING_BMI2 "bmi2"
#define QT_FUNCTION_TARGET_STRING_AVX512F "avx512f,avx"
#define QT_FUNCTION_TARGET_STRING_AVX512DQ "avx512dq,avx512f"
#define QT_FUNCTION_TARGET_STRING_RDSEED "rdseed"
#define QT_FUNCTION_TARGET_STRING_AVX512IFMA "avx512ifma,avx512f"
#define QT_FUNCTION_TARGET_STRING_AVX512CD "avx512cd,avx512f"
#define QT_FUNCTION_TARGET_STRING_SHA "sha"
#define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f"
#define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f"
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f"
#define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f"
#define QT_FUNCTION_TARGET_STRING_SHSTK "shstk"
#define QT_FUNCTION_TARGET_STRING_GFNI "gfni"
#define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes"
#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f"
#define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f"
#define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f"
#define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid"
#define QT_FUNCTION_TARGET_STRING_IBT "ibt"
#define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c"
#define QT_FUNCTION_TARGET_STRING_ARCH_X86_64 "sse2"
#define QT_FUNCTION_TARGET_STRING_ARCH_CORE2 QT_FUNCTION_TARGET_STRING_ARCH_X86_64 ",sse3,ssse3,cx16"
#define QT_FUNCTION_TARGET_STRING_ARCH_NHM QT_FUNCTION_TARGET_STRING_ARCH_CORE2 ",sse4.1,sse4.2,popcnt"
#define QT_FUNCTION_TARGET_STRING_ARCH_WSM QT_FUNCTION_TARGET_STRING_ARCH_NHM
#define QT_FUNCTION_TARGET_STRING_ARCH_SNB QT_FUNCTION_TARGET_STRING_ARCH_WSM ",avx"
#define QT_FUNCTION_TARGET_STRING_ARCH_IVB QT_FUNCTION_TARGET_STRING_ARCH_SNB ",f16c,rdrnd,fsgsbase"
#define QT_FUNCTION_TARGET_STRING_ARCH_HSW QT_FUNCTION_TARGET_STRING_ARCH_IVB ",avx2,fma,bmi,bmi2,lzcnt,movbe"
#define QT_FUNCTION_TARGET_STRING_ARCH_BDW QT_FUNCTION_TARGET_STRING_ARCH_HSW ",adx,rdseed"
#define QT_FUNCTION_TARGET_STRING_ARCH_BDX QT_FUNCTION_TARGET_STRING_ARCH_BDW
#define QT_FUNCTION_TARGET_STRING_ARCH_SKL QT_FUNCTION_TARGET_STRING_ARCH_BDW ",xsavec,xsaves"
#define QT_FUNCTION_TARGET_STRING_ARCH_ADL QT_FUNCTION_TARGET_STRING_ARCH_SKL ",avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker"
#define QT_FUNCTION_TARGET_STRING_ARCH_SKX QT_FUNCTION_TARGET_STRING_ARCH_SKL ",avx512f,avx512dq,avx512cd,avx512bw,avx512vl"
#define QT_FUNCTION_TARGET_STRING_ARCH_CLX QT_FUNCTION_TARGET_STRING_ARCH_SKX ",avx512vnni"
#define QT_FUNCTION_TARGET_STRING_ARCH_CPX QT_FUNCTION_TARGET_STRING_ARCH_CLX ",avx512bf16"
#define QT_FUNCTION_TARGET_STRING_ARCH_CNL QT_FUNCTION_TARGET_STRING_ARCH_SKX ",avx512ifma,avx512vbmi"
#define QT_FUNCTION_TARGET_STRING_ARCH_ICL QT_FUNCTION_TARGET_STRING_ARCH_CNL ",avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq"
#define QT_FUNCTION_TARGET_STRING_ARCH_ICX QT_FUNCTION_TARGET_STRING_ARCH_ICL ",pconfig"
#define QT_FUNCTION_TARGET_STRING_ARCH_TGL QT_FUNCTION_TARGET_STRING_ARCH_ICL ",avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker"
#define QT_FUNCTION_TARGET_STRING_ARCH_SPR QT_FUNCTION_TARGET_STRING_ARCH_TGL ",avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr"
#define QT_FUNCTION_TARGET_STRING_ARCH_SLM QT_FUNCTION_TARGET_STRING_ARCH_WSM ",rdrnd,movbe"
#define QT_FUNCTION_TARGET_STRING_ARCH_GLM QT_FUNCTION_TARGET_STRING_ARCH_SLM ",fsgsbase,rdseed,lzcnt,xsavec,xsaves"
#define QT_FUNCTION_TARGET_STRING_ARCH_TNT QT_FUNCTION_TARGET_STRING_ARCH_GLM ",clwb,gfni,cldemote,waitpkg,movdiri,movdir64b"
#define QT_FUNCTION_TARGET_STRING_ARCH_NEHALEM QT_FUNCTION_TARGET_STRING_ARCH_NHM
#define QT_FUNCTION_TARGET_STRING_ARCH_WESTMERE QT_FUNCTION_TARGET_STRING_ARCH_WSM
#define QT_FUNCTION_TARGET_STRING_ARCH_SANDYBRIDGE QT_FUNCTION_TARGET_STRING_ARCH_SNB
#define QT_FUNCTION_TARGET_STRING_ARCH_IVYBRIDGE QT_FUNCTION_TARGET_STRING_ARCH_IVB
#define QT_FUNCTION_TARGET_STRING_ARCH_HASWELL QT_FUNCTION_TARGET_STRING_ARCH_HSW
#define QT_FUNCTION_TARGET_STRING_ARCH_BROADWELL QT_FUNCTION_TARGET_STRING_ARCH_BDW
#define QT_FUNCTION_TARGET_STRING_ARCH_SKYLAKE QT_FUNCTION_TARGET_STRING_ARCH_SKL
#define QT_FUNCTION_TARGET_STRING_ARCH_SKYLAKE_AVX512 QT_FUNCTION_TARGET_STRING_ARCH_SKX
#define QT_FUNCTION_TARGET_STRING_ARCH_CASCADELAKE QT_FUNCTION_TARGET_STRING_ARCH_CLX
#define QT_FUNCTION_TARGET_STRING_ARCH_COOPERLAKE QT_FUNCTION_TARGET_STRING_ARCH_CPX
#define QT_FUNCTION_TARGET_STRING_ARCH_CANNONLAKE QT_FUNCTION_TARGET_STRING_ARCH_CNL
#define QT_FUNCTION_TARGET_STRING_ARCH_ICELAKE_CLIENT QT_FUNCTION_TARGET_STRING_ARCH_ICL
#define QT_FUNCTION_TARGET_STRING_ARCH_ICELAKE_SERVER QT_FUNCTION_TARGET_STRING_ARCH_ICX
#define QT_FUNCTION_TARGET_STRING_ARCH_ALDERLAKE QT_FUNCTION_TARGET_STRING_ARCH_ADL
#define QT_FUNCTION_TARGET_STRING_ARCH_SAPPHIRERAPIDS QT_FUNCTION_TARGET_STRING_ARCH_SPR
#define QT_FUNCTION_TARGET_STRING_ARCH_TIGERLAKE QT_FUNCTION_TARGET_STRING_ARCH_TGL
#define QT_FUNCTION_TARGET_STRING_ARCH_SILVERMONT QT_FUNCTION_TARGET_STRING_ARCH_SLM
#define QT_FUNCTION_TARGET_STRING_ARCH_GOLDMONT QT_FUNCTION_TARGET_STRING_ARCH_GLM
#define QT_FUNCTION_TARGET_STRING_ARCH_TREMONT QT_FUNCTION_TARGET_STRING_ARCH_TNT
static const uint64_t _compilerCpuFeatures = 0
#ifdef __SSE2__
| cpu_feature_sse2

View File

@ -28,6 +28,7 @@ open(FH, '<', $input_conf_file) or die $!;
my $i = 0;
my @features;
my %feature_ids;
my @architecture_names;
my %architectures;
my @xsaveStates;
@ -58,7 +59,7 @@ while (<FH>) {
@basefeatures = @{$architectures{$based}->{allfeatures}} if $based ne "<>";
@extrafeatures = @{$architectures{$arch}{features}} if defined($architectures{$arch});
@extrafeatures = (@extrafeatures, split(',', $f));
my @allfeatures = sort (@basefeatures, @extrafeatures);
my @allfeatures = sort { $feature_ids{$a} <=> $feature_ids{$b} } (@basefeatures, @extrafeatures);
$architectures{$arch} = {
name => $arch,
@ -91,6 +92,7 @@ while (<FH>) {
$id =~ s/[^A-Z0-9_]/_/g;
push @features,
{ name => $name, depends => $depends, id => $id, bit => $bit, leaf => $function, comment => $comment };
$feature_ids{$name} = $i;
++$i;
die("Too many features to fit a 64-bit integer") if $i > 64;
}
@ -126,12 +128,6 @@ for (my $i = 0; $i < scalar @features; ++$i) {
# Feature
printf "#define cpu_feature_%-31s (UINT64_C(1) << %d)\n", lc($feature->{id}), $i;
# Feature string names for Clang and GCC
my $str = $feature->{name} . ',' . $feature->{depends};
$str =~ s/,$//;
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
$feature->{id}, $str;
}
# Print the architecture list
@ -160,6 +156,33 @@ for (@architecture_names) {
print ")";
}
print "\n// __attribute__ target strings for GCC and Clang";
for (my $i = 0; $i < scalar @features; ++$i) {
my $feature = $features[$i];
my $str = $feature->{name} . ',' . $feature->{depends};
$str =~ s/,$//;
printf "#define QT_FUNCTION_TARGET_STRING_%-17s \"%s\"\n",
$feature->{id}, $str;
}
for (@architecture_names) {
my $arch = $architectures{$_};
my $base = $arch->{base};
my $featurestr = "";
if ($base ne "<>") {
$featurestr = "QT_FUNCTION_TARGET_STRING_ARCH_" . uc($base);
}
my @features = @{$arch->{features}};
#@features = map { defined($feature_ids{$_}) ? $_ : () } @features;
if (scalar @features) {
$featurestr .= ' ",' if length $featurestr;
$featurestr .= '"' unless length $featurestr;
$featurestr .= join(',', @features);
$featurestr .= '"';
}
printf "#define QT_FUNCTION_TARGET_STRING_ARCH_%-12s %s\n", uc($arch->{id}), $featurestr;
}
print q{
static const uint64_t _compilerCpuFeatures = 0};