mirror of
https://sourceware.org/git/glibc.git
synced 2024-12-31 23:11:09 +00:00
d2123d6827
In short: __tls_get_addr checks the global generation counter and if the current dtv is older then _dl_update_slotinfo updates dtv up to the generation of the accessed module. So if the global generation is newer than generation of the module then __tls_get_addr keeps hitting the slow dtv update path. The dtv update path includes a number of checks to see if any update is needed and this already causes measurable tls access slow down after dlopen. It may be possible to detect up-to-date dtv faster. But if there are many modules loaded (> TLS_SLOTINFO_SURPLUS) then this requires at least walking the slotinfo list. This patch tries to update the dtv to the global generation instead, so after a dlopen the tls access slow path is only hit once. The modules with larger generation than the accessed one were not necessarily synchronized before, so additional synchronization is needed. This patch uses acquire/release synchronization when accessing the generation counter. Note: in the x86_64 version of dl-tls.c the generation is only loaded once, since relaxed mo is not faster than acquire mo load. I have not benchmarked this. Tested by Adhemerval Zanella on aarch64, powerpc, sparc, x86 who reported that it fixes the performance issue of bug 19924. Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
55 lines
1.7 KiB
C
55 lines
1.7 KiB
C
/* Thread-local storage handling in the ELF dynamic linker. x86-64 version.
|
|
Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
|
This file is part of the GNU C Library.
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU Lesser General Public
|
|
License as published by the Free Software Foundation; either
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with the GNU C Library; if not, see
|
|
<https://www.gnu.org/licenses/>. */
|
|
|
|
#ifdef SHARED
|
|
/* Work around GCC PR58066, due to which __tls_get_addr may be called
|
|
with an unaligned stack. The compat implementation is in
|
|
tls_get_addr-compat.S. */
|
|
|
|
# include <dl-tls.h>
|
|
|
|
/* Define __tls_get_addr within elf/dl-tls.c under a different
|
|
name. */
|
|
extern __typeof__ (__tls_get_addr) ___tls_get_addr;
|
|
|
|
# define __tls_get_addr ___tls_get_addr
|
|
# include <elf/dl-tls.c>
|
|
# undef __tls_get_addr
|
|
|
|
hidden_ver (___tls_get_addr, __tls_get_addr)
|
|
|
|
/* Only handle slow paths for __tls_get_addr. */
|
|
attribute_hidden
|
|
void *
|
|
__tls_get_addr_slow (GET_ADDR_ARGS)
|
|
{
|
|
dtv_t *dtv = THREAD_DTV ();
|
|
|
|
size_t gen = atomic_load_acquire (&GL(dl_tls_generation));
|
|
if (__glibc_unlikely (dtv[0].counter != gen))
|
|
return update_get_addr (GET_ADDR_PARAM, gen);
|
|
|
|
return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
|
|
}
|
|
#else
|
|
|
|
/* No compatibility symbol needed. */
|
|
# include <elf/dl-tls.c>
|
|
|
|
#endif
|