x86-64: Only define used SSE/AVX/AVX512 run-time resolvers

When glibc is built with x86-64 ISA level v3, SSE run-time resolvers
aren't used.  For x86-64 ISA level v4 build, both SSE and AVX resolvers
are unused.  Check the minimum x86-64 ISA level to exclude the unused
run-time resolvers.
This commit is contained in:
H.J. Lu 2022-06-27 11:36:28 -07:00
parent f56c497d2b
commit cfdc4df66c
3 changed files with 41 additions and 30 deletions

View File

@ -68,10 +68,12 @@
compile-time constant.. */ compile-time constant.. */
/* ISA level >= 4 guaranteed includes. */ /* ISA level >= 4 guaranteed includes. */
#define AVX512F_X86_ISA_LEVEL 4
#define AVX512VL_X86_ISA_LEVEL 4 #define AVX512VL_X86_ISA_LEVEL 4
#define AVX512BW_X86_ISA_LEVEL 4 #define AVX512BW_X86_ISA_LEVEL 4
/* ISA level >= 3 guaranteed includes. */ /* ISA level >= 3 guaranteed includes. */
#define AVX_X86_ISA_LEVEL 3
#define AVX2_X86_ISA_LEVEL 3 #define AVX2_X86_ISA_LEVEL 3
#define BMI2_X86_ISA_LEVEL 3 #define BMI2_X86_ISA_LEVEL 3

View File

@ -28,6 +28,7 @@
#include <dl-tlsdesc.h> #include <dl-tlsdesc.h>
#include <dl-static-tls.h> #include <dl-static-tls.h>
#include <dl-machine-rel.h> #include <dl-machine-rel.h>
#include <isa-level.h>
/* Return nonzero iff ELF header is compatible with the running host. */ /* Return nonzero iff ELF header is compatible with the running host. */
static inline int __attribute__ ((unused)) static inline int __attribute__ ((unused))
@ -86,6 +87,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* Identify this shared object. */ /* Identify this shared object. */
*(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l; *(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
const struct cpu_features* cpu_features = __get_cpu_features ();
/* The got[2] entry contains the address of a function which gets /* The got[2] entry contains the address of a function which gets
called to get the address of a so far unresolved function and called to get the address of a so far unresolved function and
jump to it. The profiling extension of the dynamic linker allows jump to it. The profiling extension of the dynamic linker allows
@ -94,9 +97,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
end in this function. */ end in this function. */
if (__glibc_unlikely (profile)) if (__glibc_unlikely (profile))
{ {
if (CPU_FEATURE_USABLE (AVX512F)) if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512; *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
else if (CPU_FEATURE_USABLE (AVX)) else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx; *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx;
else else
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse; *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse;
@ -112,9 +115,10 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
/* This function will get called to fix up the GOT entry /* This function will get called to fix up the GOT entry
indicated by the offset on the stack, and then jump to indicated by the offset on the stack, and then jump to
the resolved address. */ the resolved address. */
if (GLRO(dl_x86_cpu_features).xsave_state_size != 0) if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
|| GLRO(dl_x86_cpu_features).xsave_state_size != 0)
*(ElfW(Addr) *) (got + 2) *(ElfW(Addr) *) (got + 2)
= (CPU_FEATURE_USABLE (XSAVEC) = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
? (ElfW(Addr)) &_dl_runtime_resolve_xsavec ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
: (ElfW(Addr)) &_dl_runtime_resolve_xsave); : (ElfW(Addr)) &_dl_runtime_resolve_xsave);
else else

View File

@ -20,6 +20,7 @@
#include <sysdep.h> #include <sysdep.h>
#include <cpu-features-offsets.h> #include <cpu-features-offsets.h>
#include <link-defines.h> #include <link-defines.h>
#include <isa-level.h>
#ifndef DL_STACK_ALIGNMENT #ifndef DL_STACK_ALIGNMENT
/* Due to GCC bug: /* Due to GCC bug:
@ -62,35 +63,39 @@
#undef VMOVA #undef VMOVA
#undef VEC_SIZE #undef VEC_SIZE
#define VEC_SIZE 32 #if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
#define VMOVA vmovdqa # define VEC_SIZE 32
#define VEC(i) ymm##i # define VMOVA vmovdqa
#define _dl_runtime_profile _dl_runtime_profile_avx # define VEC(i) ymm##i
#include "dl-trampoline.h" # define _dl_runtime_profile _dl_runtime_profile_avx
#undef _dl_runtime_profile # include "dl-trampoline.h"
#undef VEC # undef _dl_runtime_profile
#undef VMOVA # undef VEC
#undef VEC_SIZE # undef VMOVA
# undef VEC_SIZE
#endif
#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
/* movaps/movups is 1-byte shorter. */ /* movaps/movups is 1-byte shorter. */
#define VEC_SIZE 16 # define VEC_SIZE 16
#define VMOVA movaps # define VMOVA movaps
#define VEC(i) xmm##i # define VEC(i) xmm##i
#define _dl_runtime_profile _dl_runtime_profile_sse # define _dl_runtime_profile _dl_runtime_profile_sse
#undef RESTORE_AVX # undef RESTORE_AVX
#include "dl-trampoline.h" # include "dl-trampoline.h"
#undef _dl_runtime_profile # undef _dl_runtime_profile
#undef VEC # undef VEC
#undef VMOVA # undef VMOVA
#undef VEC_SIZE # undef VEC_SIZE
#define USE_FXSAVE # define USE_FXSAVE
#define STATE_SAVE_ALIGNMENT 16 # define STATE_SAVE_ALIGNMENT 16
#define _dl_runtime_resolve _dl_runtime_resolve_fxsave # define _dl_runtime_resolve _dl_runtime_resolve_fxsave
#include "dl-trampoline.h" # include "dl-trampoline.h"
#undef _dl_runtime_resolve # undef _dl_runtime_resolve
#undef USE_FXSAVE # undef USE_FXSAVE
#undef STATE_SAVE_ALIGNMENT # undef STATE_SAVE_ALIGNMENT
#endif
#define USE_XSAVE #define USE_XSAVE
#define STATE_SAVE_ALIGNMENT 64 #define STATE_SAVE_ALIGNMENT 64