mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-09 23:00:07 +00:00
x86-64: Only define used SSE/AVX/AVX512 run-time resolvers
When glibc is built with x86-64 ISA level v3, SSE run-time resolvers aren't used. For x86-64 ISA level v4 build, both SSE and AVX resolvers are unused. Check the minimum x86-64 ISA level to exclude the unused run-time resolvers.
This commit is contained in:
parent
f56c497d2b
commit
cfdc4df66c
@ -68,10 +68,12 @@
|
|||||||
compile-time constant.. */
|
compile-time constant.. */
|
||||||
|
|
||||||
/* ISA level >= 4 guaranteed includes. */
|
/* ISA level >= 4 guaranteed includes. */
|
||||||
|
#define AVX512F_X86_ISA_LEVEL 4
|
||||||
#define AVX512VL_X86_ISA_LEVEL 4
|
#define AVX512VL_X86_ISA_LEVEL 4
|
||||||
#define AVX512BW_X86_ISA_LEVEL 4
|
#define AVX512BW_X86_ISA_LEVEL 4
|
||||||
|
|
||||||
/* ISA level >= 3 guaranteed includes. */
|
/* ISA level >= 3 guaranteed includes. */
|
||||||
|
#define AVX_X86_ISA_LEVEL 3
|
||||||
#define AVX2_X86_ISA_LEVEL 3
|
#define AVX2_X86_ISA_LEVEL 3
|
||||||
#define BMI2_X86_ISA_LEVEL 3
|
#define BMI2_X86_ISA_LEVEL 3
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <dl-tlsdesc.h>
|
#include <dl-tlsdesc.h>
|
||||||
#include <dl-static-tls.h>
|
#include <dl-static-tls.h>
|
||||||
#include <dl-machine-rel.h>
|
#include <dl-machine-rel.h>
|
||||||
|
#include <isa-level.h>
|
||||||
|
|
||||||
/* Return nonzero iff ELF header is compatible with the running host. */
|
/* Return nonzero iff ELF header is compatible with the running host. */
|
||||||
static inline int __attribute__ ((unused))
|
static inline int __attribute__ ((unused))
|
||||||
@ -86,6 +87,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
|||||||
/* Identify this shared object. */
|
/* Identify this shared object. */
|
||||||
*(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
|
*(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l;
|
||||||
|
|
||||||
|
const struct cpu_features* cpu_features = __get_cpu_features ();
|
||||||
|
|
||||||
/* The got[2] entry contains the address of a function which gets
|
/* The got[2] entry contains the address of a function which gets
|
||||||
called to get the address of a so far unresolved function and
|
called to get the address of a so far unresolved function and
|
||||||
jump to it. The profiling extension of the dynamic linker allows
|
jump to it. The profiling extension of the dynamic linker allows
|
||||||
@ -94,9 +97,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
|||||||
end in this function. */
|
end in this function. */
|
||||||
if (__glibc_unlikely (profile))
|
if (__glibc_unlikely (profile))
|
||||||
{
|
{
|
||||||
if (CPU_FEATURE_USABLE (AVX512F))
|
if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F))
|
||||||
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
|
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512;
|
||||||
else if (CPU_FEATURE_USABLE (AVX))
|
else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX))
|
||||||
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx;
|
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx;
|
||||||
else
|
else
|
||||||
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse;
|
*(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_sse;
|
||||||
@ -112,9 +115,10 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
|
|||||||
/* This function will get called to fix up the GOT entry
|
/* This function will get called to fix up the GOT entry
|
||||||
indicated by the offset on the stack, and then jump to
|
indicated by the offset on the stack, and then jump to
|
||||||
the resolved address. */
|
the resolved address. */
|
||||||
if (GLRO(dl_x86_cpu_features).xsave_state_size != 0)
|
if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL
|
||||||
|
|| GLRO(dl_x86_cpu_features).xsave_state_size != 0)
|
||||||
*(ElfW(Addr) *) (got + 2)
|
*(ElfW(Addr) *) (got + 2)
|
||||||
= (CPU_FEATURE_USABLE (XSAVEC)
|
= (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)
|
||||||
? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
|
? (ElfW(Addr)) &_dl_runtime_resolve_xsavec
|
||||||
: (ElfW(Addr)) &_dl_runtime_resolve_xsave);
|
: (ElfW(Addr)) &_dl_runtime_resolve_xsave);
|
||||||
else
|
else
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
#include <sysdep.h>
|
#include <sysdep.h>
|
||||||
#include <cpu-features-offsets.h>
|
#include <cpu-features-offsets.h>
|
||||||
#include <link-defines.h>
|
#include <link-defines.h>
|
||||||
|
#include <isa-level.h>
|
||||||
|
|
||||||
#ifndef DL_STACK_ALIGNMENT
|
#ifndef DL_STACK_ALIGNMENT
|
||||||
/* Due to GCC bug:
|
/* Due to GCC bug:
|
||||||
@ -62,35 +63,39 @@
|
|||||||
#undef VMOVA
|
#undef VMOVA
|
||||||
#undef VEC_SIZE
|
#undef VEC_SIZE
|
||||||
|
|
||||||
#define VEC_SIZE 32
|
#if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
|
||||||
#define VMOVA vmovdqa
|
# define VEC_SIZE 32
|
||||||
#define VEC(i) ymm##i
|
# define VMOVA vmovdqa
|
||||||
#define _dl_runtime_profile _dl_runtime_profile_avx
|
# define VEC(i) ymm##i
|
||||||
#include "dl-trampoline.h"
|
# define _dl_runtime_profile _dl_runtime_profile_avx
|
||||||
#undef _dl_runtime_profile
|
# include "dl-trampoline.h"
|
||||||
#undef VEC
|
# undef _dl_runtime_profile
|
||||||
#undef VMOVA
|
# undef VEC
|
||||||
#undef VEC_SIZE
|
# undef VMOVA
|
||||||
|
# undef VEC_SIZE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
|
||||||
/* movaps/movups is 1-byte shorter. */
|
/* movaps/movups is 1-byte shorter. */
|
||||||
#define VEC_SIZE 16
|
# define VEC_SIZE 16
|
||||||
#define VMOVA movaps
|
# define VMOVA movaps
|
||||||
#define VEC(i) xmm##i
|
# define VEC(i) xmm##i
|
||||||
#define _dl_runtime_profile _dl_runtime_profile_sse
|
# define _dl_runtime_profile _dl_runtime_profile_sse
|
||||||
#undef RESTORE_AVX
|
# undef RESTORE_AVX
|
||||||
#include "dl-trampoline.h"
|
# include "dl-trampoline.h"
|
||||||
#undef _dl_runtime_profile
|
# undef _dl_runtime_profile
|
||||||
#undef VEC
|
# undef VEC
|
||||||
#undef VMOVA
|
# undef VMOVA
|
||||||
#undef VEC_SIZE
|
# undef VEC_SIZE
|
||||||
|
|
||||||
#define USE_FXSAVE
|
# define USE_FXSAVE
|
||||||
#define STATE_SAVE_ALIGNMENT 16
|
# define STATE_SAVE_ALIGNMENT 16
|
||||||
#define _dl_runtime_resolve _dl_runtime_resolve_fxsave
|
# define _dl_runtime_resolve _dl_runtime_resolve_fxsave
|
||||||
#include "dl-trampoline.h"
|
# include "dl-trampoline.h"
|
||||||
#undef _dl_runtime_resolve
|
# undef _dl_runtime_resolve
|
||||||
#undef USE_FXSAVE
|
# undef USE_FXSAVE
|
||||||
#undef STATE_SAVE_ALIGNMENT
|
# undef STATE_SAVE_ALIGNMENT
|
||||||
|
#endif
|
||||||
|
|
||||||
#define USE_XSAVE
|
#define USE_XSAVE
|
||||||
#define STATE_SAVE_ALIGNMENT 64
|
#define STATE_SAVE_ALIGNMENT 64
|
||||||
|
Loading…
Reference in New Issue
Block a user