mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-21 20:40:05 +00:00
x86-64: Allocate state buffer space for RDI, RSI and RBX
_dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and R11. Define TLSDESC_CALL_REGISTER_SAVE_AREA to allocate space for RDI, RSI and RBX to avoid clobbering saved RDI, RSI and RBX values on stack by xsave to STATE_SAVE_OFFSET(%rsp). +==================+<- stack frame start aligned at 8 or 16 bytes | |<- RDI saved in the red zone | |<- RSI saved in the red zone | |<- RBX saved in the red zone | |<- paddings for stack realignment of 64 bytes |------------------|<- xsave buffer end aligned at 64 bytes | |<- | |<- | |<- |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) | |<- 8-byte padding for 64-byte alignment | |<- 8-byte padding for 64-byte alignment | |<- R11 | |<- R10 | |<- R9 | |<- R8 | |<- RDX | |<- RCX +==================+<- RSP aligned at 64 bytes Define TLSDESC_CALL_REGISTER_SAVE_AREA, the total register save area size for all integer registers by adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto stack without adjusting stack pointer first, using the red-zone. This fixes BZ #31501. Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
This commit is contained in:
parent
f44f3aed31
commit
717ebfa85c
@ -311,7 +311,7 @@ update_active (struct cpu_features *cpu_features)
|
||||
/* NB: On AMX capable processors, ebx always includes AMX
|
||||
states. */
|
||||
unsigned int xsave_state_full_size
|
||||
= ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64);
|
||||
= ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64);
|
||||
|
||||
cpu_features->xsave_state_size
|
||||
= xsave_state_full_size;
|
||||
@ -401,8 +401,10 @@ update_active (struct cpu_features *cpu_features)
|
||||
unsigned int amx_size
|
||||
= (xstate_amx_comp_offsets[31]
|
||||
+ xstate_amx_comp_sizes[31]);
|
||||
amx_size = ALIGN_UP (amx_size + STATE_SAVE_OFFSET,
|
||||
64);
|
||||
amx_size
|
||||
= ALIGN_UP ((amx_size
|
||||
+ TLSDESC_CALL_REGISTER_SAVE_AREA),
|
||||
64);
|
||||
/* Set xsave_state_full_size to the compact AMX
|
||||
state size for XSAVEC. NB: xsave_state_full_size
|
||||
is only used in _dl_tlsdesc_dynamic_xsave and
|
||||
@ -410,7 +412,8 @@ update_active (struct cpu_features *cpu_features)
|
||||
cpu_features->xsave_state_full_size = amx_size;
|
||||
#endif
|
||||
cpu_features->xsave_state_size
|
||||
= ALIGN_UP (size + STATE_SAVE_OFFSET, 64);
|
||||
= ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA,
|
||||
64);
|
||||
CPU_FEATURE_SET (cpu_features, XSAVEC);
|
||||
}
|
||||
}
|
||||
|
@ -38,14 +38,59 @@
|
||||
#ifdef __x86_64__
|
||||
/* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need
|
||||
space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be
|
||||
aligned to 16 bytes for fxsave and 64 bytes for xsave.
|
||||
aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero
|
||||
because MOV, instead of PUSH, is used to save registers onto stack.
|
||||
|
||||
NB: Is is non-zero because of the 128-byte red-zone. Some registers
|
||||
are saved on stack without adjusting stack pointer first. When we
|
||||
update stack pointer to allocate more space, we need to take the
|
||||
red-zone into account. */
|
||||
+==================+<- stack frame start aligned at 8 or 16 bytes
|
||||
| |<- paddings for stack realignment of 64 bytes
|
||||
|------------------|<- xsave buffer end aligned at 64 bytes
|
||||
| |<-
|
||||
| |<-
|
||||
| |<-
|
||||
|------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
|
||||
| |<- 8-byte padding for 64-byte alignment
|
||||
| |<- R9
|
||||
| |<- R8
|
||||
| |<- RDI
|
||||
| |<- RSI
|
||||
| |<- RDX
|
||||
| |<- RCX
|
||||
| |<- RAX
|
||||
+==================+<- RSP aligned at 64 bytes
|
||||
|
||||
*/
|
||||
# define STATE_SAVE_OFFSET (8 * 7 + 8)
|
||||
|
||||
/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
|
||||
stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and
|
||||
R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved
|
||||
RDI, RSI and RBX values on stack by xsave.
|
||||
|
||||
+==================+<- stack frame start aligned at 8 or 16 bytes
|
||||
| |<- RDI saved in the red zone
|
||||
| |<- RSI saved in the red zone
|
||||
| |<- RBX saved in the red zone
|
||||
| |<- paddings for stack realignment of 64 bytes
|
||||
|------------------|<- xsave buffer end aligned at 64 bytes
|
||||
| |<-
|
||||
| |<-
|
||||
| |<-
|
||||
|------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
|
||||
| |<- 8-byte padding for 64-byte alignment
|
||||
| |<- 8-byte padding for 64-byte alignment
|
||||
| |<- R11
|
||||
| |<- R10
|
||||
| |<- R9
|
||||
| |<- R8
|
||||
| |<- RDX
|
||||
| |<- RCX
|
||||
+==================+<- RSP aligned at 64 bytes
|
||||
|
||||
Define the total register save area size for all integer registers by
|
||||
adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
|
||||
stack without adjusting stack pointer first, using the red-zone. */
|
||||
# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
|
||||
|
||||
/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX
|
||||
registers are mutually exclusive. */
|
||||
# define STATE_SAVE_MASK \
|
||||
@ -66,8 +111,9 @@
|
||||
(STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
|
||||
#else
|
||||
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386
|
||||
doesn't have red-zone, use 0 here. */
|
||||
uses PUSH to save registers onto stack, use 0 here. */
|
||||
# define STATE_SAVE_OFFSET 0
|
||||
# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
|
||||
|
||||
/* Save SSE, AVX, AXV512, mask and bound registers. */
|
||||
# define STATE_SAVE_MASK \
|
||||
|
87
sysdeps/x86_64/tst-gnu2-tls2mod1.S
Normal file
87
sysdeps/x86_64/tst-gnu2-tls2mod1.S
Normal file
@ -0,0 +1,87 @@
|
||||
/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx.
|
||||
Copyright (C) 2024 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec
|
||||
to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size
|
||||
is 960 bytes and this test didn't fail. It may be due to the unused
|
||||
last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and
|
||||
this test might fail without the fix. */
|
||||
#ifndef OFFSET
|
||||
# define OFFSET 40
|
||||
#endif
|
||||
|
||||
.text
|
||||
.p2align 4
|
||||
.globl apply_tls
|
||||
.type apply_tls, @function
|
||||
apply_tls:
|
||||
cfi_startproc
|
||||
_CET_ENDBR
|
||||
pushq %rbp
|
||||
cfi_def_cfa_offset (16)
|
||||
cfi_offset (6, -16)
|
||||
movdqu (%RDI_LP), %xmm0
|
||||
lea tls_var1@TLSDESC(%rip), %RAX_LP
|
||||
mov %RSP_LP, %RBP_LP
|
||||
cfi_def_cfa_register (6)
|
||||
/* Align stack to 64 bytes. */
|
||||
and $-64, %RSP_LP
|
||||
sub $OFFSET, %RSP_LP
|
||||
pushq %rbx
|
||||
/* Set %ebx to 0xbadbeef. */
|
||||
movl $0xbadbeef, %ebx
|
||||
movl $0xbadbeef, %esi
|
||||
movq %rdi, saved_rdi(%rip)
|
||||
movq %rsi, saved_rsi(%rip)
|
||||
call *tls_var1@TLSCALL(%RAX_LP)
|
||||
/* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */
|
||||
cmpq saved_rdi(%rip), %rdi
|
||||
jne L(hlt)
|
||||
cmpq saved_rsi(%rip), %rsi
|
||||
jne L(hlt)
|
||||
cmpl $0xbadbeef, %ebx
|
||||
jne L(hlt)
|
||||
add %fs:0, %RAX_LP
|
||||
movups %xmm0, 32(%RAX_LP)
|
||||
movdqu 16(%RDI_LP), %xmm1
|
||||
mov %RAX_LP, %RBX_LP
|
||||
movups %xmm1, 48(%RAX_LP)
|
||||
lea 32(%RBX_LP), %RAX_LP
|
||||
pop %rbx
|
||||
leave
|
||||
cfi_def_cfa (7, 8)
|
||||
ret
|
||||
L(hlt):
|
||||
hlt
|
||||
cfi_endproc
|
||||
.size apply_tls, .-apply_tls
|
||||
.hidden tls_var1
|
||||
.globl tls_var1
|
||||
.section .tbss,"awT",@nobits
|
||||
.align 16
|
||||
.type tls_var1, @object
|
||||
.size tls_var1, 3200
|
||||
tls_var1:
|
||||
.zero 3200
|
||||
.local saved_rdi
|
||||
.comm saved_rdi,8,8
|
||||
.local saved_rsi
|
||||
.comm saved_rsi,8,8
|
||||
.section .note.GNU-stack,"",@progbits
|
Loading…
Reference in New Issue
Block a user