x86: Add support for AVX10 preset and vec size in cpu-features

This commit add support for the new AVX10 cpu features:
https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf

We add checks for:
    - `AVX10`: Check if AVX10 is present.
    - `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support.

`make check` passes and cpuid output was checked against GNR/DMR on an
emulator.
This commit is contained in:
Noah Goldstein 2023-09-20 15:44:50 -05:00
parent 5f913506f4
commit d90b43a4ed
5 changed files with 83 additions and 3 deletions

View File

@ -222,6 +222,18 @@ Leaf (EAX = 23H).
@item @item
@code{AVX} -- The AVX instruction extensions. @code{AVX} -- The AVX instruction extensions.
@item
@code{AVX10} -- The AVX10 instruction extensions.
@item
@code{AVX10_XMM} -- Whether AVX10 includes xmm registers.
@item
@code{AVX10_YMM} -- Whether AVX10 includes ymm registers.
@item
@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers.
@item @item
@code{AVX2} -- The AVX2 instruction extensions. @code{AVX2} -- The AVX2 instruction extensions.

View File

@ -30,7 +30,8 @@ enum
CPUID_INDEX_80000008, CPUID_INDEX_80000008,
CPUID_INDEX_7_ECX_1, CPUID_INDEX_7_ECX_1,
CPUID_INDEX_19, CPUID_INDEX_19,
CPUID_INDEX_14_ECX_0 CPUID_INDEX_14_ECX_0,
CPUID_INDEX_24_ECX_0
}; };
struct cpuid_feature struct cpuid_feature
@ -312,6 +313,7 @@ enum
x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5, x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5,
x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8, x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8,
x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14, x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14,
x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19,
x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21, x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21,
x86_cpu_index_19_ebx x86_cpu_index_19_ebx
@ -325,5 +327,13 @@ enum
= (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int) = (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)), + cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4 x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4,
x86_cpu_index_24_ecx_0_ebx
= (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16,
x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17,
x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18,
}; };

View File

@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features)
CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK); CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
#endif #endif
enum
{
os_xmm = 1,
os_ymm = 2,
os_zmm = 4
} os_vector_size = os_xmm;
/* Can we call xgetbv? */ /* Can we call xgetbv? */
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE)) if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
{ {
unsigned int xcrlow; unsigned int xcrlow;
unsigned int xcrhigh; unsigned int xcrhigh;
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0)); asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
/* Is YMM and XMM state usable? */ /* Is YMM and XMM state usable? */
if ((xcrlow & (bit_YMM_state | bit_XMM_state)) if ((xcrlow & (bit_YMM_state | bit_XMM_state))
@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features)
/* Determine if AVX is usable. */ /* Determine if AVX is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX)) if (CPU_FEATURES_CPU_P (cpu_features, AVX))
{ {
os_vector_size |= os_ymm;
CPU_FEATURE_SET (cpu_features, AVX); CPU_FEATURE_SET (cpu_features, AVX);
/* The following features depend on AVX being usable. */ /* The following features depend on AVX being usable. */
/* Determine if AVX2 is usable. */ /* Determine if AVX2 is usable. */
@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features)
| bit_ZMM16_31_state)) | bit_ZMM16_31_state))
== (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state)) == (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
{ {
os_vector_size |= os_zmm;
/* Determine if AVX512F is usable. */ /* Determine if AVX512F is usable. */
if (CPU_FEATURES_CPU_P (cpu_features, AVX512F)) if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
{ {
@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features)
} }
} }
if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
&& cpu_features->basic.max_cpuid >= 0x24)
{
__cpuid_count (
0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
if (os_vector_size & os_xmm)
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
if (os_vector_size & os_ymm)
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
if (os_vector_size & os_zmm)
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
}
/* Are XTILECFG and XTILEDATA states usable? */ /* Are XTILECFG and XTILEDATA states usable? */
if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state)) if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
== (bit_XTILECFG_state | bit_XTILEDATA_state)) == (bit_XTILECFG_state | bit_XTILEDATA_state))

View File

@ -29,7 +29,7 @@
enum enum
{ {
CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1 CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
}; };
enum enum
@ -319,6 +319,7 @@ enum
#define bit_cpu_AVX_NE_CONVERT (1u << 5) #define bit_cpu_AVX_NE_CONVERT (1u << 5)
#define bit_cpu_AMX_COMPLEX (1u << 8) #define bit_cpu_AMX_COMPLEX (1u << 8)
#define bit_cpu_PREFETCHI (1u << 14) #define bit_cpu_PREFETCHI (1u << 14)
#define bit_cpu_AVX10 (1u << 19)
#define bit_cpu_APX_F (1u << 21) #define bit_cpu_APX_F (1u << 21)
/* CPUID_INDEX_19. */ /* CPUID_INDEX_19. */
@ -332,6 +333,13 @@ enum
/* EBX. */ /* EBX. */
#define bit_cpu_PTWRITE (1u << 4) #define bit_cpu_PTWRITE (1u << 4)
/* CPUID_INDEX_24_ECX_0. */
/* EBX. */
#define bit_cpu_AVX10_XMM (1u << 16)
#define bit_cpu_AVX10_YMM (1u << 17)
#define bit_cpu_AVX10_ZMM (1u << 18)
/* CPUID_INDEX_1. */ /* CPUID_INDEX_1. */
/* ECX. */ /* ECX. */
@ -563,6 +571,7 @@ enum
#define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1 #define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
#define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1 #define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1
#define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1 #define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1
#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1
#define index_cpu_APX_F CPUID_INDEX_7_ECX_1 #define index_cpu_APX_F CPUID_INDEX_7_ECX_1
/* CPUID_INDEX_19. */ /* CPUID_INDEX_19. */
@ -576,6 +585,13 @@ enum
/* EBX. */ /* EBX. */
#define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0 #define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0
/* CPUID_INDEX_24_ECX_0. */
/* EBX. */
#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0
#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0
#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0
/* CPUID_INDEX_1. */ /* CPUID_INDEX_1. */
/* ECX. */ /* ECX. */
@ -809,6 +825,7 @@ enum
#define reg_AVX_NE_CONVERT edx #define reg_AVX_NE_CONVERT edx
#define reg_AMX_COMPLEX edx #define reg_AMX_COMPLEX edx
#define reg_PREFETCHI edx #define reg_PREFETCHI edx
#define reg_AVX10 edx
#define reg_APX_F edx #define reg_APX_F edx
/* CPUID_INDEX_19. */ /* CPUID_INDEX_19. */
@ -822,6 +839,14 @@ enum
/* EBX. */ /* EBX. */
#define reg_PTWRITE ebx #define reg_PTWRITE ebx
/* CPUID_INDEX_24_ECX_0. */
/* EBX. */
#define reg_AVX10_XMM ebx
#define reg_AVX10_YMM ebx
#define reg_AVX10_ZMM ebx
/* PREFERRED_FEATURE_INDEX_1. First define the bitindex values /* PREFERRED_FEATURE_INDEX_1. First define the bitindex values
sequentially, then define the bit_arch* and index_arch_* lookup sequentially, then define the bit_arch* and index_arch_* lookup
constants. */ constants. */

View File

@ -219,6 +219,7 @@ do_test (void)
CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT); CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX); CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
CHECK_CPU_FEATURE_PRESENT (PREFETCHI); CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
CHECK_CPU_FEATURE_PRESENT (AVX10);
CHECK_CPU_FEATURE_PRESENT (APX_F); CHECK_CPU_FEATURE_PRESENT (APX_F);
CHECK_CPU_FEATURE_PRESENT (AESKLE); CHECK_CPU_FEATURE_PRESENT (AESKLE);
CHECK_CPU_FEATURE_PRESENT (WIDE_KL); CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
@ -391,11 +392,18 @@ do_test (void)
CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT); CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX); CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
CHECK_CPU_FEATURE_ACTIVE (PREFETCHI); CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
CHECK_CPU_FEATURE_ACTIVE (AVX10);
CHECK_CPU_FEATURE_ACTIVE (APX_F); CHECK_CPU_FEATURE_ACTIVE (APX_F);
CHECK_CPU_FEATURE_ACTIVE (AESKLE); CHECK_CPU_FEATURE_ACTIVE (AESKLE);
CHECK_CPU_FEATURE_ACTIVE (WIDE_KL); CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
CHECK_CPU_FEATURE_ACTIVE (PTWRITE); CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
if (CPU_FEATURE_ACTIVE (AVX10))
{
CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM);
CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM);
CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM);
}
return 0; return 0;
} }