mirror of
https://sourceware.org/git/glibc.git
synced 2025-01-12 12:10:16 +00:00
x86-64: Only define used SSE/AVX/AVX512 run-time resolvers
When glibc is built with x86-64 ISA level v3, SSE run-time resolvers aren't used. For x86-64 ISA level v4 build, both SSE and AVX resolvers are unused. Check the minimum x86-64 ISA level to exclude the unused run-time resolvers.
This commit is contained in:
parent
f56c497d2b
commit
cfdc4df66c
@ -68,10 +68,12 @@
|
||||
compile-time constant.. */
|
||||
|
||||
/* ISA level >= 4 guaranteed includes. */
|
||||
#define AVX512F_X86_ISA_LEVEL 4
|
||||
#define AVX512VL_X86_ISA_LEVEL 4
|
||||
#define AVX512BW_X86_ISA_LEVEL 4
|
||||
|
||||
/* ISA level >= 3 guaranteed includes. */
|
||||
#define AVX_X86_ISA_LEVEL 3
|
||||
#define AVX2_X86_ISA_LEVEL 3
|
||||
#define BMI2_X86_ISA_LEVEL 3
|
||||
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <dl-tlsdesc.h>
|
||||
#include <dl-static-tls.h>
|
||||
#include <dl-machine-rel.h>
|
||||
#include <isa-level.h>
|
||||
|
||||
/* Return nonzero iff ELF header is compatible with the running host. */
|
||||
static inline int __attribute__ ((unused))
|
||||
@ -86,6 +87,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
/* Identify this shared object. */
|
||||
*(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
|
||||
|
||||
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||
|
||||
/* The got[2] entry contains the address of a function which gets
|
||||
called to get the address of a so far unresolved function and
|
||||
jump to it. The profiling extension of the dynamic linker allows
|
||||
@ -94,9 +97,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
end in this function. */
|
||||
if (__glibc_unlikely (profile))
|
||||
{
|
||||
if (CPU_FEATURE_USABLE (AVX512F))
|
||||
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
|
||||
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
|
||||
else if (CPU_FEATURE_USABLE (AVX))
|
||||
else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
|
||||
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx;
|
||||
else
|
||||
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse;
|
||||
@ -112,9 +115,10 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
||||
/* This function will get called to fix up the GOT entry
|
||||
indicated by the offset on the stack, and then jump to
|
||||
the resolved address. */
|
||||
if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
|
||||
if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
|
||||
|| GLRO(dl_x86_cpu_features).xsave_state_size != 0)
|
||||
*(ElfW(Addr) *) (got + 2)
|
||||
= (CPU_FEATURE_USABLE (XSAVEC)
|
||||
= (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
|
||||
? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
|
||||
: (ElfW(Addr)) &_dl_runtime_resolve_xsave);
|
||||
else
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <sysdep.h>
|
||||
#include <cpu-features-offsets.h>
|
||||
#include <link-defines.h>
|
||||
#include <isa-level.h>
|
||||
|
||||
#ifndef DL_STACK_ALIGNMENT
|
||||
/* Due to GCC bug:
|
||||
@ -62,35 +63,39 @@
|
||||
#undef VMOVA
|
||||
#undef VEC_SIZE
|
||||
|
||||
#define VEC_SIZE 32
|
||||
#define VMOVA vmovdqa
|
||||
#define VEC(i) ymm##i
|
||||
#define _dl_runtime_profile _dl_runtime_profile_avx
|
||||
#include "dl-trampoline.h"
|
||||
#undef _dl_runtime_profile
|
||||
#undef VEC
|
||||
#undef VMOVA
|
||||
#undef VEC_SIZE
|
||||
#if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
|
||||
# define VEC_SIZE 32
|
||||
# define VMOVA vmovdqa
|
||||
# define VEC(i) ymm##i
|
||||
# define _dl_runtime_profile _dl_runtime_profile_avx
|
||||
# include "dl-trampoline.h"
|
||||
# undef _dl_runtime_profile
|
||||
# undef VEC
|
||||
# undef VMOVA
|
||||
# undef VEC_SIZE
|
||||
#endif
|
||||
|
||||
#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
|
||||
/* movaps/movups is 1-byte shorter. */
|
||||
#define VEC_SIZE 16
|
||||
#define VMOVA movaps
|
||||
#define VEC(i) xmm##i
|
||||
#define _dl_runtime_profile _dl_runtime_profile_sse
|
||||
#undef RESTORE_AVX
|
||||
#include "dl-trampoline.h"
|
||||
#undef _dl_runtime_profile
|
||||
#undef VEC
|
||||
#undef VMOVA
|
||||
#undef VEC_SIZE
|
||||
# define VEC_SIZE 16
|
||||
# define VMOVA movaps
|
||||
# define VEC(i) xmm##i
|
||||
# define _dl_runtime_profile _dl_runtime_profile_sse
|
||||
# undef RESTORE_AVX
|
||||
# include "dl-trampoline.h"
|
||||
# undef _dl_runtime_profile
|
||||
# undef VEC
|
||||
# undef VMOVA
|
||||
# undef VEC_SIZE
|
||||
|
||||
#define USE_FXSAVE
|
||||
#define STATE_SAVE_ALIGNMENT 16
|
||||
#define _dl_runtime_resolve _dl_runtime_resolve_fxsave
|
||||
#include "dl-trampoline.h"
|
||||
#undef _dl_runtime_resolve
|
||||
#undef USE_FXSAVE
|
||||
#undef STATE_SAVE_ALIGNMENT
|
||||
# define USE_FXSAVE
|
||||
# define STATE_SAVE_ALIGNMENT 16
|
||||
# define _dl_runtime_resolve _dl_runtime_resolve_fxsave
|
||||
# include "dl-trampoline.h"
|
||||
# undef _dl_runtime_resolve
|
||||
# undef USE_FXSAVE
|
||||
# undef STATE_SAVE_ALIGNMENT
|
||||
#endif
|
||||
|
||||
#define USE_XSAVE
|
||||
#define STATE_SAVE_ALIGNMENT 64
|
||||
|
Loading…
Reference in New Issue
Block a user