glibc/sysdeps/x86_64/Makefile
H.J. Lu f3dcae82d5 Save and restore vector registers in x86-64 ld.so
This patch adds SSE, AVX and AVX512 versions of _dl_runtime_resolve
and _dl_runtime_profile, which save and restore the first 8 vector
registers used for parameter passing.  elf_machine_runtime_setup
selects the proper _dl_runtime_resolve or _dl_runtime_profile based
on _dl_x86_cpu_features.  It avoids race condition caused by
FOREIGN_CALL macros, which are only used for x86-64.

Performance impact of saving and restoring 8 vector registers are
negligible on Nehalem, Sandy Bridge, Ivy Bridge and Haswell when
ld.so is optimized with SSE2.

	[BZ #15128]
	* sysdeps/x86_64/Makefile [$(subdir) == elf] (tests): Add
	ifuncmain8.
	(modules-names): Add ifuncmod8.
	($(objpfx)ifuncmain8): New rule.
	* sysdeps/x86_64/dl-machine.h: Include <dl-procinfo.h> and
	<cpuid.h>.
	(elf_machine_runtime_setup): Use _dl_runtime_resolve_sse,
	_dl_runtime_resolve_avx, or _dl_runtime_resolve_avx512,
	_dl_runtime_profile_sse, _dl_runtime_profile_avx, or
	_dl_runtime_profile_avx512, based on HAS_ARCH_FEATURE.
	* sysdeps/x86_64/dl-trampoline.S: Rewrite.
	* sysdeps/x86_64/dl-trampoline.h: Likewise.
	* sysdeps/x86_64/ifuncmain8.c: New file.
	* sysdeps/x86_64/ifuncmod8.c: Likewise.
	* sysdeps/x86_64/nptl/tcb-offsets.sym (RTLD_SAVESPACE_SSE):
	Removed.
	* sysdeps/x86_64/nptl/tls.h (__128bits): Removed.
	(tcbhead_t): Change rtld_must_xmm_save to __glibc_unused1.
	Change rtld_savespace_sse to __glibc_unused2.
	(RTLD_CHECK_FOREIGN_CALL): Removed.
	(RTLD_ENABLE_FOREIGN_CALL): Likewise.
	(RTLD_PREPARE_FOREIGN_CALL): Likewise.
	(RTLD_FINALIZE_FOREIGN_CALL): Likewise.
2015-08-25 04:34:13 -07:00

106 lines
3.0 KiB
Makefile

# The i387 `long double' is a distinct type we support.
long-double-fcts = yes
ifeq ($(subdir),csu)
gen-as-const-headers += link-defines.sym
endif
ifeq ($(subdir),gmon)
sysdep_routines += _mcount
endif
ifeq ($(subdir),malloc)
tests += tst-mallocalign1
endif
ifeq ($(subdir),string)
sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
gen-as-const-headers += locale-defines.sym
endif
ifeq ($(subdir),elf)
sysdep-dl-routines += tlsdesc dl-tlsdesc
tests += ifuncmain8
modules-names += ifuncmod8
$(objpfx)ifuncmain8: $(objpfx)ifuncmod8.so
tests += tst-quad1 tst-quad2
modules-names += tst-quadmod1 tst-quadmod2
$(objpfx)tst-quad1: $(objpfx)tst-quadmod1.so
$(objpfx)tst-quad2: $(objpfx)tst-quadmod2.so
quad-pie-test += tst-quad1pie tst-quad2pie
tests += $(quad-pie-test)
tests-pie += $(quad-pie-test)
$(objpfx)tst-quad1pie: $(objpfx)tst-quadmod1pie.o
$(objpfx)tst-quad2pie: $(objpfx)tst-quadmod2pie.o
tests += tst-audit3 tst-audit4 tst-audit5 tst-audit10
ifeq (yes,$(config-cflags-avx))
tests += tst-audit6 tst-audit7
endif
tests += tst-split-dynreloc
LDFLAGS-tst-split-dynreloc = -Wl,-T,$(..)sysdeps/x86_64/tst-split-dynreloc.lds
tst-split-dynreloc-ENV = LD_BIND_NOW=1
modules-names += tst-auditmod3a tst-auditmod3b \
tst-auditmod4a tst-auditmod4b \
tst-auditmod5a tst-auditmod5b \
tst-auditmod6a tst-auditmod6b tst-auditmod6c \
tst-auditmod7a tst-auditmod7b \
tst-auditmod10a tst-auditmod10b
$(objpfx)tst-audit3: $(objpfx)tst-auditmod3a.so
$(objpfx)tst-audit3.out: $(objpfx)tst-auditmod3b.so
tst-audit3-ENV = LD_AUDIT=$(objpfx)tst-auditmod3b.so
$(objpfx)tst-audit4: $(objpfx)tst-auditmod4a.so
$(objpfx)tst-audit4.out: $(objpfx)tst-auditmod4b.so
tst-audit4-ENV = LD_AUDIT=$(objpfx)tst-auditmod4b.so
$(objpfx)tst-audit5: $(objpfx)tst-auditmod5a.so
$(objpfx)tst-audit5.out: $(objpfx)tst-auditmod5b.so
tst-audit5-ENV = LD_AUDIT=$(objpfx)tst-auditmod5b.so
$(objpfx)tst-audit6: $(objpfx)tst-auditmod6a.so
$(objpfx)tst-audit6.out: $(objpfx)tst-auditmod6b.so \
$(objpfx)tst-auditmod6c.so
tst-audit6-ENV = LD_AUDIT=$(objpfx)tst-auditmod6b.so:$(objpfx)tst-auditmod6c.so
$(objpfx)tst-audit7: $(objpfx)tst-auditmod7a.so
$(objpfx)tst-audit7.out: $(objpfx)tst-auditmod7b.so
tst-audit7-ENV = LD_AUDIT=$(objpfx)tst-auditmod7b.so
$(objpfx)tst-audit10: $(objpfx)tst-auditmod10a.so
$(objpfx)tst-audit10.out: $(objpfx)tst-auditmod10b.so
tst-audit10-ENV = LD_AUDIT=$(objpfx)tst-auditmod10b.so
ifeq (yes,$(config-cflags-avx))
AVX-CFLAGS=-mavx
ifeq (yes,$(config-cflags-novzeroupper))
AVX-CFLAGS+=-mno-vzeroupper
endif
CFLAGS-tst-audit4.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod4a.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod4b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6b.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod6c.c += $(AVX-CFLAGS)
CFLAGS-tst-auditmod7b.c += $(AVX-CFLAGS)
endif
ifeq (yes,$(config-cflags-avx512))
AVX512-CFLAGS = -mavx512f
CFLAGS-tst-audit10.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10a.c += $(AVX512-CFLAGS)
CFLAGS-tst-auditmod10b.c += $(AVX512-CFLAGS)
endif
endif
ifeq ($(subdir),csu)
gen-as-const-headers += tlsdesc.sym
endif