x86: Set preferred CPU features on the KH-40000 and KX-7000 Zhaoxin processors

Fix code formatting under the Zhaoxin branch and add comments for
different Zhaoxin models.

Unaligned AVX load are slower on KH-40000 and KX-7000, so disable
the AVX_Fast_Unaligned_Load.

Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to
use sse2_unaligned version of memset,strcpy and strcat.
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
This commit is contained in:
MayShao-oc 2024-06-29 11:58:26 +08:00 committed by H.J. Lu
parent 2f1f7a5f8a
commit 44d757eb9f

View File

@ -1023,39 +1023,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
model += extended_model;
if (family == 0x6)
{
if (model == 0xf || model == 0x19)
{
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_Slow_SSE4_2]
|= bit_arch_Slow_SSE4_2;
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
&= ~bit_arch_AVX_Fast_Unaligned_Load;
}
}
else if (family == 0x7)
{
if (model == 0x1b)
{
/* Tuning for older Zhaoxin processors. */
if (model == 0xf || model == 0x19)
{
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_Slow_SSE4_2]
|= bit_arch_Slow_SSE4_2;
|= bit_arch_Slow_SSE4_2;
/* Unaligned AVX loads are slower. */
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
&= ~bit_arch_AVX_Fast_Unaligned_Load;
}
}
else if (family == 0x7)
{
switch (model)
{
/* Wudaokou microarch tuning. */
case 0x1b:
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_Slow_SSE4_2]
|= bit_arch_Slow_SSE4_2;
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
&= ~bit_arch_AVX_Fast_Unaligned_Load;
}
else if (model == 0x3b)
{
&= ~bit_arch_AVX_Fast_Unaligned_Load;
break;
/* Lujiazui microarch tuning. */
case 0x3b:
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
&= ~bit_arch_AVX_Fast_Unaligned_Load;
&= ~bit_arch_AVX_Fast_Unaligned_Load;
break;
/* Yongfeng and Shijidadao mircoarch tuning. */
case 0x5b:
case 0x6b:
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
&= ~bit_arch_AVX_Fast_Unaligned_Load;
/* To use sse2_unaligned versions of memset, strcpy and strcat.
*/
cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
|= (bit_arch_Prefer_No_VZEROUPPER
| bit_arch_Fast_Unaligned_Load);
break;
}
}
}