diff --git a/src/corelib/global/qsimd_x86.cpp b/src/corelib/global/qsimd_x86.cpp index f1a08e05e8..9a3bd80b39 100644 --- a/src/corelib/global/qsimd_x86.cpp +++ b/src/corelib/global/qsimd_x86.cpp @@ -1,8 +1,8 @@ // Copyright (C) 2022 Intel Corporation. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only - // This is a generated file. DO NOT EDIT. // Please see util/x86simdgen/README.md + #include "qsimd_x86_p.h" static const char features_string[] = @@ -30,24 +30,28 @@ static const char features_string[] = " avx512bw\0" " avx512vl\0" " avx512vbmi\0" + " waitpkg\0" " avx512vbmi2\0" " shstk\0" " gfni\0" " vaes\0" - " avx512vnni\0" " avx512bitalg\0" " avx512vpopcntdq\0" " hybrid\0" " ibt\0" " avx512fp16\0" + " raoint\0" + " cmpccxadd\0" + " avxifma\0" + " lam\0" "\0"; static const uint16_t features_indices[] = { 0, 6, 12, 19, 24, 32, 40, 47, 55, 60, 65, 71, 78, 83, 89, 95, 104, 114, 122, 134, 144, 149, 159, 169, - 181, 194, 201, 207, 213, 225, 239, 256, - 264, 269, + 181, 190, 203, 210, 216, 222, 236, 253, + 261, 266, 278, 286, 297, 306, }; enum X86CpuidLeaves { @@ -57,6 +61,7 @@ enum X86CpuidLeaves { Leaf07_00ECX, Leaf07_00EDX, Leaf07_01EAX, + Leaf07_01EDX, Leaf13_01EAX, Leaf80000001hECX, Leaf80000008hEBX, @@ -88,16 +93,20 @@ static const uint16_t x86_locators[] = { Leaf07_00EBX*32 + 30, // avx512bw Leaf07_00EBX*32 + 31, // avx512vl Leaf07_00ECX*32 + 1, // avx512vbmi + Leaf07_00ECX*32 + 5, // waitpkg Leaf07_00ECX*32 + 6, // avx512vbmi2 Leaf07_00ECX*32 + 7, // shstk Leaf07_00ECX*32 + 8, // gfni Leaf07_00ECX*32 + 9, // vaes - Leaf07_00ECX*32 + 11, // avx512vnni Leaf07_00ECX*32 + 12, // avx512bitalg Leaf07_00ECX*32 + 14, // avx512vpopcntdq Leaf07_00EDX*32 + 15, // hybrid Leaf07_00EDX*32 + 20, // ibt Leaf07_00EDX*32 + 23, // avx512fp16 + Leaf07_01EAX*32 + 3, // raoint + Leaf07_01EAX*32 + 6, // cmpccxadd + Leaf07_01EAX*32 + 23, // avxifma + Leaf07_01EAX*32 + 26, // lam }; struct X86Architecture @@ -107,25 +116,31 @@ struct X86Architecture }; static const struct X86Architecture x86_architectures[] = { - { cpu_sapphirerapids, "Sapphire Rapids" }, - { cpu_tigerlake, "Tiger Lake" }, - { cpu_icelake_server, "Ice Lake (Server)" }, - { cpu_icelake_client, "Ice Lake (Client)" }, - { cpu_alderlake, "Alder Lake" }, - { cpu_cannonlake, "Cannon Lake" }, - { cpu_cooperlake, "Cooper Lake" }, - { cpu_cascadelake, "Cascade Lake" }, - { cpu_skylake_avx512, "Skylake (Avx512)" }, - { cpu_skylake, "Skylake" }, - { cpu_tremont, "Tremont" }, - { cpu_broadwell, "Broadwell" }, - { cpu_haswell, "Haswell" }, - { cpu_goldmont, "Goldmont" }, - { cpu_ivybridge, "Ivy Bridge" }, - { cpu_silvermont, "Silvermont" }, - { cpu_sandybridge, "Sandy Bridge" }, - { cpu_westmere, "Westmere" }, { cpu_core2, "Core2" }, + { cpu_westmere, "Westmere" }, + { cpu_sandybridge, "Sandy Bridge" }, + { cpu_silvermont, "Silvermont" }, + { cpu_ivybridge, "Ivy Bridge" }, + { cpu_goldmont, "Goldmont" }, + { cpu_haswell, "Haswell" }, + { cpu_broadwell, "Broadwell" }, + { cpu_tremont, "Tremont" }, + { cpu_skylake, "Skylake" }, + { cpu_skylake_avx512, "Skylake (Avx512)" }, + { cpu_cascadelake, "Cascade Lake" }, + { cpu_cooperlake, "Cooper Lake" }, + { cpu_cannonlake, "Cannon Lake" }, + { cpu_gracemont, "Gracemont" }, + { cpu_icelake_client, "Ice Lake (Client)" }, + { cpu_icelake_server, "Ice Lake (Server)" }, + { cpu_crestmont, "Crestmont" }, + { cpu_tigerlake, "Tiger Lake" }, + { cpu_clearwaterforest, "Clearwater Forest" }, + { cpu_grandridge, "Grand Ridge" }, + { cpu_raptorcove, "Raptor Cove" }, + { cpu_redwoodcove, "Redwood Cove" }, + { cpu_emeraldrapids, "Emerald Rapids" }, + { cpu_graniterapids, "Granite Rapids" }, }; enum XSaveBits { @@ -168,10 +183,10 @@ static const uint64_t XSaveReq_AvxState = 0 | cpu_feature_avx512vbmi | cpu_feature_avx512vbmi2 | cpu_feature_vaes - | cpu_feature_avx512vnni | cpu_feature_avx512bitalg | cpu_feature_avx512vpopcntdq - | cpu_feature_avx512fp16; + | cpu_feature_avx512fp16 + | cpu_feature_avxifma; // List of features requiring XSave_Avx512State static const uint64_t XSaveReq_Avx512State = 0 @@ -183,7 +198,6 @@ static const uint64_t XSaveReq_Avx512State = 0 | cpu_feature_avx512vl | cpu_feature_avx512vbmi | cpu_feature_avx512vbmi2 - | cpu_feature_avx512vnni | cpu_feature_avx512bitalg | cpu_feature_avx512vpopcntdq | cpu_feature_avx512fp16; diff --git a/src/corelib/global/qsimd_x86_p.h b/src/corelib/global/qsimd_x86_p.h index 3e7427b0b1..1ec89d0c6c 100644 --- a/src/corelib/global/qsimd_x86_p.h +++ b/src/corelib/global/qsimd_x86_p.h @@ -1,5 +1,7 @@ // Copyright (C) 2022 Intel Corporation. // SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only +// This is a generated file. DO NOT EDIT. +// Please see util/x86simdgen/README.md // // W A R N I N G @@ -50,11 +52,11 @@ // in CPUID Leaf 7, Sub-leaf 0, ECX: #define cpu_feature_avx512vbmi (UINT64_C(1) << 23) -#define cpu_feature_avx512vbmi2 (UINT64_C(1) << 24) -#define cpu_feature_shstk (UINT64_C(1) << 25) -#define cpu_feature_gfni (UINT64_C(1) << 26) -#define cpu_feature_vaes (UINT64_C(1) << 27) -#define cpu_feature_avx512vnni (UINT64_C(1) << 28) +#define cpu_feature_waitpkg (UINT64_C(1) << 24) +#define cpu_feature_avx512vbmi2 (UINT64_C(1) << 25) +#define cpu_feature_shstk (UINT64_C(1) << 26) +#define cpu_feature_gfni (UINT64_C(1) << 27) +#define cpu_feature_vaes (UINT64_C(1) << 28) #define cpu_feature_avx512bitalg (UINT64_C(1) << 29) #define cpu_feature_avx512vpopcntdq (UINT64_C(1) << 30) @@ -63,6 +65,12 @@ #define cpu_feature_ibt (UINT64_C(1) << 32) #define cpu_feature_avx512fp16 (UINT64_C(1) << 33) +// in CPUID Leaf 7, Sub-leaf 1, EAX: +#define cpu_feature_raoint (UINT64_C(1) << 34) +#define cpu_feature_cmpccxadd (UINT64_C(1) << 35) +#define cpu_feature_avxifma (UINT64_C(1) << 36) +#define cpu_feature_lam (UINT64_C(1) << 37) + // CPU architectures #define cpu_x86_64 (0 \ | cpu_feature_sse2) @@ -89,42 +97,65 @@ | cpu_feature_rdseed) #define cpu_bdx (cpu_bdw) #define cpu_skl (cpu_bdw) -#define cpu_adl (cpu_skl \ - | cpu_feature_gfni \ - | cpu_feature_vaes \ - | cpu_feature_shstk \ - | cpu_feature_ibt) #define cpu_skx (cpu_skl \ | cpu_feature_avx512f \ | cpu_feature_avx512dq \ | cpu_feature_avx512cd \ | cpu_feature_avx512bw \ | cpu_feature_avx512vl) -#define cpu_clx (cpu_skx \ - | cpu_feature_avx512vnni) +#define cpu_clx (cpu_skx) #define cpu_cpx (cpu_clx) -#define cpu_cnl (cpu_skx \ +#define cpu_plc (cpu_skx \ | cpu_feature_avx512ifma \ | cpu_feature_avx512vbmi) -#define cpu_icl (cpu_cnl \ +#define cpu_snc (cpu_plc \ | cpu_feature_avx512vbmi2 \ | cpu_feature_gfni \ | cpu_feature_vaes \ - | cpu_feature_avx512vnni \ | cpu_feature_avx512bitalg \ | cpu_feature_avx512vpopcntdq) -#define cpu_icx (cpu_icl) -#define cpu_tgl (cpu_icl \ +#define cpu_wlc (cpu_snc \ | cpu_feature_shstk \ | cpu_feature_ibt) -#define cpu_spr (cpu_tgl) +#define cpu_glc (cpu_wlc \ + | cpu_feature_waitpkg) +#define cpu_rpc (cpu_glc) +#define cpu_rwc (cpu_rpc) #define cpu_slm (cpu_wsm \ | cpu_feature_rdrnd \ | cpu_feature_movbe) #define cpu_glm (cpu_slm \ | cpu_feature_rdseed) #define cpu_tnt (cpu_glm \ - | cpu_feature_gfni) + | cpu_feature_gfni \ + | cpu_feature_waitpkg) +#define cpu_grt (cpu_skl \ + | cpu_feature_gfni \ + | cpu_feature_vaes \ + | cpu_feature_shstk \ + | cpu_feature_ibt \ + | cpu_feature_waitpkg) +#define cpu_cmt (cpu_grt \ + | cpu_feature_cmpccxadd \ + | cpu_feature_avxifma) +#define cpu_cnl (cpu_plc) +#define cpu_icl (cpu_snc) +#define cpu_tgl (cpu_wlc) +#define cpu_adl (cpu_grt) +#define cpu_rpl (cpu_grt) +#define cpu_mtl (cpu_cmt) +#define cpu_arl (cpu_cmt) +#define cpu_lnl (cpu_cmt) +#define cpu_icx (cpu_snc) +#define cpu_spr (cpu_glc) +#define cpu_emr (cpu_spr) +#define cpu_gnr (cpu_glc) +#define cpu_srf (cpu_cmt \ + | cpu_feature_cmpccxadd \ + | cpu_feature_avxifma) +#define cpu_grr (cpu_srf \ + | cpu_feature_raoint) +#define cpu_cwf (cpu_srf) #define cpu_nehalem (cpu_nhm) #define cpu_westmere (cpu_wsm) #define cpu_sandybridge (cpu_snb) @@ -135,15 +166,32 @@ #define cpu_skylake_avx512 (cpu_skx) #define cpu_cascadelake (cpu_clx) #define cpu_cooperlake (cpu_cpx) +#define cpu_palmcove (cpu_plc) #define cpu_cannonlake (cpu_cnl) +#define cpu_sunnycove (cpu_snc) #define cpu_icelake_client (cpu_icl) #define cpu_icelake_server (cpu_icx) -#define cpu_alderlake (cpu_adl) -#define cpu_sapphirerapids (cpu_spr) +#define cpu_willowcove (cpu_wlc) #define cpu_tigerlake (cpu_tgl) +#define cpu_goldencove (cpu_glc) +#define cpu_alderlake (cpu_adl) +#define cpu_raptorcove (cpu_rpc) +#define cpu_raptorlake (cpu_rpl) +#define cpu_redwoodcove (cpu_rwc) +#define cpu_meteorlake (cpu_mtl) +#define cpu_arrowlake (cpu_arl) +#define cpu_lunarlake (cpu_lnl) +#define cpu_sapphirerapids (cpu_spr) +#define cpu_emeraldrapids (cpu_emr) +#define cpu_graniterapids (cpu_gnr) #define cpu_silvermont (cpu_slm) #define cpu_goldmont (cpu_glm) #define cpu_tremont (cpu_tnt) +#define cpu_gracemont (cpu_grt) +#define cpu_crestmont (cpu_cmt) +#define cpu_grandridge (cpu_grr) +#define cpu_sierraforest (cpu_srf) +#define cpu_clearwaterforest (cpu_cwf) // __attribute__ target strings for GCC and Clang #define QT_FUNCTION_TARGET_STRING_SSE2 "sse2" @@ -170,16 +218,20 @@ #define QT_FUNCTION_TARGET_STRING_AVX512BW "avx512bw,avx512f" #define QT_FUNCTION_TARGET_STRING_AVX512VL "avx512vl,avx512f" #define QT_FUNCTION_TARGET_STRING_AVX512VBMI "avx512vbmi,avx512f" +#define QT_FUNCTION_TARGET_STRING_WAITPKG "waitpkg" #define QT_FUNCTION_TARGET_STRING_AVX512VBMI2 "avx512vbmi2,avx512f" #define QT_FUNCTION_TARGET_STRING_SHSTK "shstk" #define QT_FUNCTION_TARGET_STRING_GFNI "gfni" #define QT_FUNCTION_TARGET_STRING_VAES "vaes,avx2,avx,aes" -#define QT_FUNCTION_TARGET_STRING_AVX512VNNI "avx512vnni,avx512f" #define QT_FUNCTION_TARGET_STRING_AVX512BITALG "avx512bitalg,avx512f" #define QT_FUNCTION_TARGET_STRING_AVX512VPOPCNTDQ "avx512vpopcntdq,avx512f" #define QT_FUNCTION_TARGET_STRING_HYBRID "hybrid" #define QT_FUNCTION_TARGET_STRING_IBT "ibt" #define QT_FUNCTION_TARGET_STRING_AVX512FP16 "avx512fp16,avx512f,f16c" +#define QT_FUNCTION_TARGET_STRING_RAOINT "raoint" +#define QT_FUNCTION_TARGET_STRING_CMPCCXADD "cmpccxadd" +#define QT_FUNCTION_TARGET_STRING_AVXIFMA "avxifma,avx" +#define QT_FUNCTION_TARGET_STRING_LAM "lam" #define QT_FUNCTION_TARGET_STRING_ARCH_X86_64 "sse2" #define QT_FUNCTION_TARGET_STRING_ARCH_CORE2 QT_FUNCTION_TARGET_STRING_ARCH_X86_64 ",sse3,ssse3,cx16" #define QT_FUNCTION_TARGET_STRING_ARCH_NHM QT_FUNCTION_TARGET_STRING_ARCH_CORE2 ",sse4.1,sse4.2,popcnt" @@ -190,18 +242,35 @@ #define QT_FUNCTION_TARGET_STRING_ARCH_BDW QT_FUNCTION_TARGET_STRING_ARCH_HSW ",adx,rdseed" #define QT_FUNCTION_TARGET_STRING_ARCH_BDX QT_FUNCTION_TARGET_STRING_ARCH_BDW #define QT_FUNCTION_TARGET_STRING_ARCH_SKL QT_FUNCTION_TARGET_STRING_ARCH_BDW ",xsavec,xsaves" -#define QT_FUNCTION_TARGET_STRING_ARCH_ADL QT_FUNCTION_TARGET_STRING_ARCH_SKL ",avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker" #define QT_FUNCTION_TARGET_STRING_ARCH_SKX QT_FUNCTION_TARGET_STRING_ARCH_SKL ",avx512f,avx512dq,avx512cd,avx512bw,avx512vl" #define QT_FUNCTION_TARGET_STRING_ARCH_CLX QT_FUNCTION_TARGET_STRING_ARCH_SKX ",avx512vnni" #define QT_FUNCTION_TARGET_STRING_ARCH_CPX QT_FUNCTION_TARGET_STRING_ARCH_CLX ",avx512bf16" -#define QT_FUNCTION_TARGET_STRING_ARCH_CNL QT_FUNCTION_TARGET_STRING_ARCH_SKX ",avx512ifma,avx512vbmi" -#define QT_FUNCTION_TARGET_STRING_ARCH_ICL QT_FUNCTION_TARGET_STRING_ARCH_CNL ",avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq" -#define QT_FUNCTION_TARGET_STRING_ARCH_ICX QT_FUNCTION_TARGET_STRING_ARCH_ICL ",pconfig" -#define QT_FUNCTION_TARGET_STRING_ARCH_TGL QT_FUNCTION_TARGET_STRING_ARCH_ICL ",avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker" -#define QT_FUNCTION_TARGET_STRING_ARCH_SPR QT_FUNCTION_TARGET_STRING_ARCH_TGL ",avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr" +#define QT_FUNCTION_TARGET_STRING_ARCH_PLC QT_FUNCTION_TARGET_STRING_ARCH_SKX ",avx512ifma,avx512vbmi" +#define QT_FUNCTION_TARGET_STRING_ARCH_SNC QT_FUNCTION_TARGET_STRING_ARCH_PLC ",avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq" +#define QT_FUNCTION_TARGET_STRING_ARCH_WLC QT_FUNCTION_TARGET_STRING_ARCH_SNC ",shstk,movdiri,movdir64b,ibt,keylocker" +#define QT_FUNCTION_TARGET_STRING_ARCH_GLC QT_FUNCTION_TARGET_STRING_ARCH_WLC ",avx512bf16,avxvnni,cldemote,waitpkg,serialize,uintr" +#define QT_FUNCTION_TARGET_STRING_ARCH_RPC QT_FUNCTION_TARGET_STRING_ARCH_GLC +#define QT_FUNCTION_TARGET_STRING_ARCH_RWC QT_FUNCTION_TARGET_STRING_ARCH_RPC ",prefetchiti" #define QT_FUNCTION_TARGET_STRING_ARCH_SLM QT_FUNCTION_TARGET_STRING_ARCH_WSM ",rdrnd,movbe" #define QT_FUNCTION_TARGET_STRING_ARCH_GLM QT_FUNCTION_TARGET_STRING_ARCH_SLM ",fsgsbase,rdseed,lzcnt,xsavec,xsaves" #define QT_FUNCTION_TARGET_STRING_ARCH_TNT QT_FUNCTION_TARGET_STRING_ARCH_GLM ",clwb,gfni,cldemote,waitpkg,movdiri,movdir64b" +#define QT_FUNCTION_TARGET_STRING_ARCH_GRT QT_FUNCTION_TARGET_STRING_ARCH_SKL ",avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker" +#define QT_FUNCTION_TARGET_STRING_ARCH_CMT QT_FUNCTION_TARGET_STRING_ARCH_GRT ",cmpccxadd,avxifma,avxneconvert,avxvnniint8" +#define QT_FUNCTION_TARGET_STRING_ARCH_CNL QT_FUNCTION_TARGET_STRING_ARCH_PLC +#define QT_FUNCTION_TARGET_STRING_ARCH_ICL QT_FUNCTION_TARGET_STRING_ARCH_SNC +#define QT_FUNCTION_TARGET_STRING_ARCH_TGL QT_FUNCTION_TARGET_STRING_ARCH_WLC +#define QT_FUNCTION_TARGET_STRING_ARCH_ADL QT_FUNCTION_TARGET_STRING_ARCH_GRT +#define QT_FUNCTION_TARGET_STRING_ARCH_RPL QT_FUNCTION_TARGET_STRING_ARCH_GRT +#define QT_FUNCTION_TARGET_STRING_ARCH_MTL QT_FUNCTION_TARGET_STRING_ARCH_CMT +#define QT_FUNCTION_TARGET_STRING_ARCH_ARL QT_FUNCTION_TARGET_STRING_ARCH_CMT +#define QT_FUNCTION_TARGET_STRING_ARCH_LNL QT_FUNCTION_TARGET_STRING_ARCH_CMT +#define QT_FUNCTION_TARGET_STRING_ARCH_ICX QT_FUNCTION_TARGET_STRING_ARCH_SNC ",pconfig" +#define QT_FUNCTION_TARGET_STRING_ARCH_SPR QT_FUNCTION_TARGET_STRING_ARCH_GLC ",pconfig,amx-tile,amx-bf16,amx-int8" +#define QT_FUNCTION_TARGET_STRING_ARCH_EMR QT_FUNCTION_TARGET_STRING_ARCH_SPR +#define QT_FUNCTION_TARGET_STRING_ARCH_GNR QT_FUNCTION_TARGET_STRING_ARCH_GLC ",pconfig,amx-tile,amx-bf16,amx-int8,amx-fp16,amx-complex" +#define QT_FUNCTION_TARGET_STRING_ARCH_SRF QT_FUNCTION_TARGET_STRING_ARCH_CMT ",cmpccxadd,avxifma,avxneconvert,avxvnniint8" +#define QT_FUNCTION_TARGET_STRING_ARCH_GRR QT_FUNCTION_TARGET_STRING_ARCH_SRF ",raoint" +#define QT_FUNCTION_TARGET_STRING_ARCH_CWF QT_FUNCTION_TARGET_STRING_ARCH_SRF #define QT_FUNCTION_TARGET_STRING_ARCH_NEHALEM QT_FUNCTION_TARGET_STRING_ARCH_NHM #define QT_FUNCTION_TARGET_STRING_ARCH_WESTMERE QT_FUNCTION_TARGET_STRING_ARCH_WSM #define QT_FUNCTION_TARGET_STRING_ARCH_SANDYBRIDGE QT_FUNCTION_TARGET_STRING_ARCH_SNB @@ -212,15 +281,32 @@ #define QT_FUNCTION_TARGET_STRING_ARCH_SKYLAKE_AVX512 QT_FUNCTION_TARGET_STRING_ARCH_SKX #define QT_FUNCTION_TARGET_STRING_ARCH_CASCADELAKE QT_FUNCTION_TARGET_STRING_ARCH_CLX #define QT_FUNCTION_TARGET_STRING_ARCH_COOPERLAKE QT_FUNCTION_TARGET_STRING_ARCH_CPX +#define QT_FUNCTION_TARGET_STRING_ARCH_PALMCOVE QT_FUNCTION_TARGET_STRING_ARCH_PLC #define QT_FUNCTION_TARGET_STRING_ARCH_CANNONLAKE QT_FUNCTION_TARGET_STRING_ARCH_CNL +#define QT_FUNCTION_TARGET_STRING_ARCH_SUNNYCOVE QT_FUNCTION_TARGET_STRING_ARCH_SNC #define QT_FUNCTION_TARGET_STRING_ARCH_ICELAKE_CLIENT QT_FUNCTION_TARGET_STRING_ARCH_ICL #define QT_FUNCTION_TARGET_STRING_ARCH_ICELAKE_SERVER QT_FUNCTION_TARGET_STRING_ARCH_ICX -#define QT_FUNCTION_TARGET_STRING_ARCH_ALDERLAKE QT_FUNCTION_TARGET_STRING_ARCH_ADL -#define QT_FUNCTION_TARGET_STRING_ARCH_SAPPHIRERAPIDS QT_FUNCTION_TARGET_STRING_ARCH_SPR +#define QT_FUNCTION_TARGET_STRING_ARCH_WILLOWCOVE QT_FUNCTION_TARGET_STRING_ARCH_WLC #define QT_FUNCTION_TARGET_STRING_ARCH_TIGERLAKE QT_FUNCTION_TARGET_STRING_ARCH_TGL +#define QT_FUNCTION_TARGET_STRING_ARCH_GOLDENCOVE QT_FUNCTION_TARGET_STRING_ARCH_GLC +#define QT_FUNCTION_TARGET_STRING_ARCH_ALDERLAKE QT_FUNCTION_TARGET_STRING_ARCH_ADL +#define QT_FUNCTION_TARGET_STRING_ARCH_RAPTORCOVE QT_FUNCTION_TARGET_STRING_ARCH_RPC +#define QT_FUNCTION_TARGET_STRING_ARCH_RAPTORLAKE QT_FUNCTION_TARGET_STRING_ARCH_RPL +#define QT_FUNCTION_TARGET_STRING_ARCH_REDWOODCOVE QT_FUNCTION_TARGET_STRING_ARCH_RWC +#define QT_FUNCTION_TARGET_STRING_ARCH_METEORLAKE QT_FUNCTION_TARGET_STRING_ARCH_MTL +#define QT_FUNCTION_TARGET_STRING_ARCH_ARROWLAKE QT_FUNCTION_TARGET_STRING_ARCH_ARL +#define QT_FUNCTION_TARGET_STRING_ARCH_LUNARLAKE QT_FUNCTION_TARGET_STRING_ARCH_LNL +#define QT_FUNCTION_TARGET_STRING_ARCH_SAPPHIRERAPIDS QT_FUNCTION_TARGET_STRING_ARCH_SPR +#define QT_FUNCTION_TARGET_STRING_ARCH_EMERALDRAPIDS QT_FUNCTION_TARGET_STRING_ARCH_EMR +#define QT_FUNCTION_TARGET_STRING_ARCH_GRANITERAPIDS QT_FUNCTION_TARGET_STRING_ARCH_GNR #define QT_FUNCTION_TARGET_STRING_ARCH_SILVERMONT QT_FUNCTION_TARGET_STRING_ARCH_SLM #define QT_FUNCTION_TARGET_STRING_ARCH_GOLDMONT QT_FUNCTION_TARGET_STRING_ARCH_GLM #define QT_FUNCTION_TARGET_STRING_ARCH_TREMONT QT_FUNCTION_TARGET_STRING_ARCH_TNT +#define QT_FUNCTION_TARGET_STRING_ARCH_GRACEMONT QT_FUNCTION_TARGET_STRING_ARCH_GRT +#define QT_FUNCTION_TARGET_STRING_ARCH_CRESTMONT QT_FUNCTION_TARGET_STRING_ARCH_CMT +#define QT_FUNCTION_TARGET_STRING_ARCH_GRANDRIDGE QT_FUNCTION_TARGET_STRING_ARCH_GRR +#define QT_FUNCTION_TARGET_STRING_ARCH_SIERRAFOREST QT_FUNCTION_TARGET_STRING_ARCH_SRF +#define QT_FUNCTION_TARGET_STRING_ARCH_CLEARWATERFOREST QT_FUNCTION_TARGET_STRING_ARCH_CWF static const uint64_t _compilerCpuFeatures = 0 #ifdef __SSE2__ @@ -295,6 +381,9 @@ static const uint64_t _compilerCpuFeatures = 0 #ifdef __AVX512VBMI__ | cpu_feature_avx512vbmi #endif +#ifdef __WAITPKG__ + | cpu_feature_waitpkg +#endif #ifdef __AVX512VBMI2__ | cpu_feature_avx512vbmi2 #endif @@ -307,9 +396,6 @@ static const uint64_t _compilerCpuFeatures = 0 #ifdef __VAES__ | cpu_feature_vaes #endif -#ifdef __AVX512VNNI__ - | cpu_feature_avx512vnni -#endif #ifdef __AVX512BITALG__ | cpu_feature_avx512bitalg #endif @@ -324,6 +410,18 @@ static const uint64_t _compilerCpuFeatures = 0 #endif #ifdef __AVX512FP16__ | cpu_feature_avx512fp16 +#endif +#ifdef __RAOINT__ + | cpu_feature_raoint +#endif +#ifdef __CMPCCXADD__ + | cpu_feature_cmpccxadd +#endif +#ifdef __AVXIFMA__ + | cpu_feature_avxifma +#endif +#ifdef __LAM__ + | cpu_feature_lam #endif ; @@ -353,16 +451,20 @@ enum X86CpuFeatures : uint64_t { CpuFeatureAVX512BW = cpu_feature_avx512bw, ///< AVX512 Byte & Word CpuFeatureAVX512VL = cpu_feature_avx512vl, ///< AVX512 Vector Length CpuFeatureAVX512VBMI = cpu_feature_avx512vbmi, ///< AVX512 Vector Byte Manipulation Instructions + CpuFeatureWAITPKG = cpu_feature_waitpkg, ///< User-Level Monitor / Wait CpuFeatureAVX512VBMI2 = cpu_feature_avx512vbmi2, ///< AVX512 Vector Byte Manipulation Instructions 2 CpuFeatureSHSTK = cpu_feature_shstk, ///< Control Flow Enforcement Technology Shadow Stack CpuFeatureGFNI = cpu_feature_gfni, ///< Galois Field new instructions CpuFeatureVAES = cpu_feature_vaes, ///< 256- and 512-bit AES - CpuFeatureAVX512VNNI = cpu_feature_avx512vnni, ///< AVX512 Vector Neural Network Instructions CpuFeatureAVX512BITALG = cpu_feature_avx512bitalg, ///< AVX512 Bit Algorithms CpuFeatureAVX512VPOPCNTDQ = cpu_feature_avx512vpopcntdq, ///< AVX512 Population Count CpuFeatureHYBRID = cpu_feature_hybrid, ///< Hybrid processor CpuFeatureIBT = cpu_feature_ibt, ///< Control Flow Enforcement Technology Indirect Branch Tracking CpuFeatureAVX512FP16 = cpu_feature_avx512fp16, ///< AVX512 16-bit Floating Point + CpuFeatureRAOINT = cpu_feature_raoint, ///< Remote Atomic Operations, Integer + CpuFeatureCMPCCXADD = cpu_feature_cmpccxadd, ///< CMPccXADD instructions + CpuFeatureAVXIFMA = cpu_feature_avxifma, ///< AVX-IFMA instructions + CpuFeatureLAM = cpu_feature_lam, ///< Linear Address Masking }; // enum X86CpuFeatures enum X86CpuArchitectures : uint64_t { @@ -372,22 +474,39 @@ enum X86CpuArchitectures : uint64_t { CpuArchWSM = cpu_wsm, CpuArchSNB = cpu_snb, CpuArchIVB = cpu_ivb, - CpuArchHSW = cpu_hsw, + CpuArchHSW = cpu_hsw, ///< hle,rtm CpuArchBDW = cpu_bdw, CpuArchBDX = cpu_bdx, CpuArchSKL = cpu_skl, - CpuArchADL = cpu_adl, - CpuArchSKX = cpu_skx, + CpuArchSKX = cpu_skx, ///< clwb CpuArchCLX = cpu_clx, CpuArchCPX = cpu_cpx, - CpuArchCNL = cpu_cnl, - CpuArchICL = cpu_icl, - CpuArchICX = cpu_icx, - CpuArchTGL = cpu_tgl, - CpuArchSPR = cpu_spr, + CpuArchPLC = cpu_plc, ///< sha + CpuArchSNC = cpu_snc, ///< fsrm,rdpid + CpuArchWLC = cpu_wlc, ///< avx512vp2intersect + CpuArchGLC = cpu_glc, ///< tsxldtrk + CpuArchRPC = cpu_rpc, + CpuArchRWC = cpu_rwc, CpuArchSLM = cpu_slm, CpuArchGLM = cpu_glm, CpuArchTNT = cpu_tnt, + CpuArchGRT = cpu_grt, ///< rdpid + CpuArchCMT = cpu_cmt, + CpuArchCNL = cpu_cnl, + CpuArchICL = cpu_icl, + CpuArchTGL = cpu_tgl, + CpuArchADL = cpu_adl, + CpuArchRPL = cpu_rpl, + CpuArchMTL = cpu_mtl, + CpuArchARL = cpu_arl, + CpuArchLNL = cpu_lnl, + CpuArchICX = cpu_icx, + CpuArchSPR = cpu_spr, + CpuArchEMR = cpu_emr, + CpuArchGNR = cpu_gnr, + CpuArchSRF = cpu_srf, + CpuArchGRR = cpu_grr, + CpuArchCWF = cpu_cwf, CpuArchNehalem = cpu_nehalem, ///< Intel Core i3/i5/i7 CpuArchWestmere = cpu_westmere, ///< Intel Core i3/i5/i7 CpuArchSandyBridge = cpu_sandybridge, ///< Second Generation Intel Core i3/i5/i7 @@ -398,15 +517,32 @@ enum X86CpuArchitectures : uint64_t { CpuArchSkylakeAvx512 = cpu_skylake_avx512, ///< Intel Xeon Scalable CpuArchCascadeLake = cpu_cascadelake, ///< Second Generation Intel Xeon Scalable CpuArchCooperLake = cpu_cooperlake, ///< Third Generation Intel Xeon Scalable + CpuArchPalmCove = cpu_palmcove, CpuArchCannonLake = cpu_cannonlake, ///< Intel Core i3-8121U + CpuArchSunnyCove = cpu_sunnycove, CpuArchIceLakeClient = cpu_icelake_client, ///< Tenth Generation Intel Core i3/i5/i7 CpuArchIceLakeServer = cpu_icelake_server, ///< Third Generation Intel Xeon Scalable - CpuArchAlderLake = cpu_alderlake, - CpuArchSapphireRapids = cpu_sapphirerapids, + CpuArchWillowCove = cpu_willowcove, CpuArchTigerLake = cpu_tigerlake, ///< Eleventh Generation Intel Core i3/i5/i7 + CpuArchGoldenCove = cpu_goldencove, + CpuArchAlderLake = cpu_alderlake, ///< Twelfth Generation Intel Core + CpuArchRaptorCove = cpu_raptorcove, + CpuArchRaptorLake = cpu_raptorlake, ///< Thirteenth Generation Intel Core + CpuArchRedwoodCove = cpu_redwoodcove, + CpuArchMeteorLake = cpu_meteorlake, + CpuArchArrowLake = cpu_arrowlake, + CpuArchLunarLake = cpu_lunarlake, + CpuArchSapphireRapids = cpu_sapphirerapids, ///< Fourth Generation Intel Xeon Scalable + CpuArchEmeraldRapids = cpu_emeraldrapids, ///< Fifth Generation Intel Xeon Scalable + CpuArchGraniteRapids = cpu_graniterapids, CpuArchSilvermont = cpu_silvermont, CpuArchGoldmont = cpu_goldmont, CpuArchTremont = cpu_tremont, + CpuArchGracemont = cpu_gracemont, + CpuArchCrestmont = cpu_crestmont, + CpuArchGrandRidge = cpu_grandridge, + CpuArchSierraForest = cpu_sierraforest, + CpuArchClearwaterForest = cpu_clearwaterforest, }; // enum X86cpuArchitectures #endif /* C++11 */ diff --git a/util/x86simdgen/3rdparty/simd-intel.conf b/util/x86simdgen/3rdparty/simd-intel.conf index 0db4259f97..3837855c19 100644 --- a/util/x86simdgen/3rdparty/simd-intel.conf +++ b/util/x86simdgen/3rdparty/simd-intel.conf @@ -50,13 +50,13 @@ avx512vl Leaf07_00EBX 31 avx512f # AVX512 Vector Length avx512vbmi Leaf07_00ECX 1 avx512f # AVX512 Vector Byte Manipulation Instructions #pku Leaf07_00ECX 3 # Protection Keys for User mode #ospke Leaf07_00ECX 4 # Protection Keys Enabled by OS -#waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait +waitpkg Leaf07_00ECX 5 # User-Level Monitor / Wait avx512vbmi2 Leaf07_00ECX 6 avx512f # AVX512 Vector Byte Manipulation Instructions 2 shstk Leaf07_00ECX 7 # Control Flow Enforcement Technology Shadow Stack gfni Leaf07_00ECX 8 # Galois Field new instructions vaes Leaf07_00ECX 9 avx2,avx,aes # 256- and 512-bit AES #vpclmulqdq Leaf07_00ECX 10 avx # 256- and 512-bit Carryless Multiply -avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions +#avx512vnni Leaf07_00ECX 11 avx512f # AVX512 Vector Neural Network Instructions avx512bitalg Leaf07_00ECX 12 avx512f # AVX512 Bit Algorithms avx512vpopcntdq Leaf07_00ECX 14 avx512f # AVX512 Population Count #la57 Leaf07_00ECX 16 # 5-level page tables @@ -78,16 +78,24 @@ hybrid Leaf07_00EDX 15 # Hybrid processor ibt Leaf07_00EDX 20 # Control Flow Enforcement Technology Indirect Branch Tracking #amxbf16 Leaf07_00EDX 22 amxtile # AMX Tile multiplication in BFloat16 avx512fp16 Leaf07_00EDX 23 avx512f,f16c # AVX512 16-bit Floating Point -#amxtile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support -#amxint8 Leaf07_00EDX 25 amxtile # AMX Tile multiplication for Int8 +#amx-tile Leaf07_00EDX 24 # Advanced Matrix Extensions Tile support +#amx-int8 Leaf07_00EDX 25 amx-tile # AMX Tile multiplication for Int8 +raoint Leaf07_01EAX 3 # Remote Atomic Operations, Integer #avxvnni Leaf07_01EAX 4 avx # AVX (VEX-encoded) versions of the Vector Neural Network Instructions #avx512bf16 Leaf07_01EAX 5 avx512f # AVX512 Brain Float16 +cmpccxadd Leaf07_01EAX 6 # CMPccXADD instructions #zlmovsb Leaf07_01EAX 10 # Zero-length MOVSB #fsrs Leaf07_01EAX 11 # Fast Short (REP?) STOSB #fsrc Leaf07_01EAX 12 # Fast Short (REP?) CMPSB, SCASB #fred Leaf07_01EAX 17 # Flexible Return and Event Delivery #lkgs Leaf07_01EAX 18 # Load into Kernel GS -#lam Leaf07_01EAX 26 # Linear Address Masking +#amx-fp16 Leaf07_01EAX 21 amx-tile # AMX Tile multiplication in FP16 +avxifma Leaf07_01EAX 23 avx # AVX-IFMA instructions +lam Leaf07_01EAX 26 # Linear Address Masking +#avxvnniint8 Leaf07_01EDX 4 avx # AVX Vector Neural Network Instructions, Int8 +#avxneconvert Leaf07_01EDX 5 avx # AVX Non-Exception BF16/FP16/FP32 Conversion instructions +#amx-complex Leaf07_01EDX 8 amx-tile # AMX Complex Matrix multiplication +#prefetchiti Leaf07_01EDX 14 # PREFETCHIT0/1 instructions #xsaveopt Leaf13_01EAX 0 # Optimized XSAVE #xsavec Leaf13_01EAX 1 # XSAVE with Compaction #xgetbv1 Leaf13_01EAX 2 # XGETBV with ECX=1 @@ -122,12 +130,12 @@ xsave=AvxState SseState|Ymm_Hi128 avx,fma,avx512f xsave=MPXState Bndregs|Bndcsr mpx xsave=Avx512State AvxState|OpMask|Zmm_Hi256|Hi16_Zmm avx512f xsave=CetState CetUState|CetSState shstk -xsave=AmxState Xtilecfg|Xtiledata amxtile +xsave=AmxState Xtilecfg|Xtiledata amx-tile # Processor/arch listing below this line # Source: Intel Instruction Set Extension manual, section 1.2 # Source: GCC gcc/config/i386/i386.h, i386-c.c, i386-builtins.c -# Architecture Based on New features Optional features +# Architecture Based on New features arch=x86_64 <> sse2 # Core line arch=Core2 x86_64 sse3,ssse3,cx16 @@ -135,26 +143,44 @@ arch=NHM Core2 sse4.1,sse4.2,popcnt arch=WSM NHM arch=SNB WSM avx arch=IVB SNB f16c,rdrnd,fsgsbase -arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe +arch=HSW IVB avx2,fma,bmi,bmi2,lzcnt,movbe # hle,rtm arch=BDW HSW adx,rdseed arch=BDX BDW arch=SKL BDW xsavec,xsaves -arch=ADL SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker rdpid -arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl clwb +arch=SKX SKL avx512f,avx512dq,avx512cd,avx512bw,avx512vl #clwb arch=CLX SKX avx512vnni arch=CPX CLX avx512bf16 -arch=CNL SKX avx512ifma,avx512vbmi sha -arch=ICL CNL avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq fsrm,rdpid -arch=ICX ICL pconfig -arch=TGL ICL avx512vp2intersect,shstk,,movdiri,movdir64b,ibt,keylocker -arch=SPR TGL avx512bf16,amxtile,amxbf16,amxint8,avxvnni,cldemote,pconfig,waitpkg,serialize,tsxldtrk,uintr +arch=PLC SKX avx512ifma,avx512vbmi #sha +arch=SNC PLC avx512vbmi2,gfni,vaes,vpclmulqdq,avx512vnni,avx512bitalg,avx512vpopcntdq #fsrm,rdpid +arch=WLC SNC shstk,movdiri,movdir64b,ibt,keylocker # avx512vp2intersect +arch=GLC WLC avx512bf16,avxvnni,cldemote,waitpkg,serialize,uintr # tsxldtrk +arch=RPC GLC +arch=RWC RPC prefetchiti # Atom line arch=SLM WSM rdrnd,movbe arch=GLM SLM fsgsbase,rdseed,lzcnt,xsavec,xsaves arch=TNT GLM clwb,gfni,cldemote,waitpkg,movdiri,movdir64b +arch=GRT SKL avxvnni,gfni,vaes,vpclmulqdq,serialize,shstk,cldemote,movdiri,movdir64b,ibt,waitpkg,keylocker # rdpid +arch=CMT GRT cmpccxadd,avxifma,avxneconvert,avxvnniint8 # Xeon Phi line #arch=KNL SKL avx512f,avx512er,avx512pf,avx512cd #arch=KNM KNL avx5124fmaps,avx5124vnniw,avx512vpopcntdq + # Hybrids and other names +arch=CNL PLC +arch=ICL SNC +arch=TGL WLC +arch=ADL GRT +arch=RPL GRT +arch=MTL CMT +arch=ARL CMT +arch=LNL CMT +arch=ICX SNC pconfig +arch=SPR GLC pconfig,amx-tile,amx-bf16,amx-int8 +arch=EMR SPR +arch=GNR GLC pconfig,amx-tile,amx-bf16,amx-int8,amx-fp16,amx-complex +arch=SRF CMT cmpccxadd,avxifma,avxneconvert,avxvnniint8 +arch=GRR SRF raoint +arch=CWF SRF # Longer names arch=Nehalem NHM # Intel Core i3/i5/i7 arch=Westmere WSM # Intel Core i3/i5/i7 @@ -166,14 +192,31 @@ arch=Skylake SKL # Sixth Generation Intel Core i3/i5/i7 arch=Skylake-Avx512 SKX # Intel Xeon Scalable arch=CascadeLake CLX # Second Generation Intel Xeon Scalable arch=CooperLake CPX # Third Generation Intel Xeon Scalable +arch=PalmCove PLC arch=CannonLake CNL # Intel Core i3-8121U +arch=SunnyCove SNC arch=IceLake-Client ICL # Tenth Generation Intel Core i3/i5/i7 arch=IceLake-Server ICX # Third Generation Intel Xeon Scalable -arch=AlderLake ADL -arch=SapphireRapids SPR +arch=WillowCove WLC arch=TigerLake TGL # Eleventh Generation Intel Core i3/i5/i7 +arch=GoldenCove GLC +arch=AlderLake ADL # Twelfth Generation Intel Core +arch=RaptorCove RPC +arch=RaptorLake RPL # Thirteenth Generation Intel Core +arch=RedwoodCove RWC +arch=MeteorLake MTL +arch=ArrowLake ARL +arch=LunarLake LNL +arch=SapphireRapids SPR # Fourth Generation Intel Xeon Scalable +arch=EmeraldRapids EMR # Fifth Generation Intel Xeon Scalable +arch=GraniteRapids GNR arch=Silvermont SLM arch=Goldmont GLM arch=Tremont TNT +arch=Gracemont GRT +arch=Crestmont CMT +arch=GrandRidge GRR +arch=SierraForest SRF +arch=ClearwaterForest CWF #arch=KnightsLanding KNL #arch=KnightsMill KNM diff --git a/util/x86simdgen/3rdparty/x86simd_generate.pl b/util/x86simdgen/3rdparty/x86simd_generate.pl index 99236b6eec..66bb1bbd9c 100755 --- a/util/x86simdgen/3rdparty/x86simd_generate.pl +++ b/util/x86simdgen/3rdparty/x86simd_generate.pl @@ -13,6 +13,7 @@ my %leaves = ( Leaf07_00ECX => "CPUID Leaf 7, Sub-leaf 0, ECX", Leaf07_00EDX => "CPUID Leaf 7, Sub-leaf 0, EDX", Leaf07_01EAX => "CPUID Leaf 7, Sub-leaf 1, EAX", + Leaf07_01EDX => "CPUID Leaf 7, Sub-leaf 1, EDX", Leaf13_01EAX => "CPUID Leaf 13, Sub-leaf 1, EAX", Leaf80000001hECX => "CPUID Leaf 80000001h, ECX", Leaf80000008hEBX => "CPUID Leaf 80000008h, EBX", @@ -258,7 +259,7 @@ print "\nenum X86CpuidLeaves {"; map { print " $_," } @leafNames; print " X86CpuidMaxLeaf\n};"; -my $type = scalar %leaves > 8 ? "uint16_t" : "uint8_t"; +my $type = scalar keys %leaves > 8 ? "uint16_t" : "uint8_t"; printf "\nstatic const %s x86_locators[] = {\n", $type, $type; for (my $j = 0; $j < scalar @features; ++$j) { @@ -283,7 +284,7 @@ struct X86Architecture }; static const struct X86Architecture x86_architectures[] = {|; -for (sort { $b <=> $a } keys %sorted_archs) { +for (sort keys %sorted_archs) { my $arch = $sorted_archs{$_}; next if $arch->{base} eq "<>"; printf " { cpu_%s, \"%s\" },\n", $arch->{id}, $arch->{prettyname};