mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-09 23:00:07 +00:00
RFC elf: Fix slow tls access after dlopen [BZ #19924]
In short: __tls_get_addr checks the global generation counter, _dl_update_slotinfo updates up to the generation of the accessed module. If the global generation is newer than geneneration of the module then __tls_get_addr keeps hitting the slow path that updates the dtv. Possible approaches i can see: 1. update to global generation instead of module, 2. check the module generation in the fast path. This patch is 1.: it needs additional sync (load acquire) so the slotinfo list is up to date with the observed global generation. Approach 2. would require walking the slotinfo list at all times. I don't know how to make that fast with many modules. Note: in the x86_64 version of dl-tls.c the generation is only loaded once, since relaxed mo is not faster than acquire mo load. I have not benchmarked this yet.
This commit is contained in:
parent
f8ea2b9982
commit
b116855de7
@ -780,7 +780,7 @@ _dl_close_worker (struct link_map *map, bool force)
|
||||
if (__glibc_unlikely (newgen == 0))
|
||||
_dl_fatal_printf ("TLS generation counter wrapped! Please report as described in "REPORT_BUGS_TO".\n");
|
||||
/* Can be read concurrently. */
|
||||
atomic_store_relaxed (&GL(dl_tls_generation), newgen);
|
||||
atomic_store_release (&GL(dl_tls_generation), newgen);
|
||||
|
||||
if (tls_free_end == GL(dl_tls_static_used))
|
||||
GL(dl_tls_static_used) = tls_free_start;
|
||||
|
@ -400,7 +400,7 @@ update_tls_slotinfo (struct link_map *new)
|
||||
_dl_fatal_printf (N_("\
|
||||
TLS generation counter wrapped! Please report this."));
|
||||
/* Can be read concurrently. */
|
||||
atomic_store_relaxed (&GL(dl_tls_generation), newgen);
|
||||
atomic_store_release (&GL(dl_tls_generation), newgen);
|
||||
|
||||
/* We need a second pass for static tls data, because
|
||||
_dl_update_slotinfo must not be run while calls to
|
||||
@ -417,8 +417,8 @@ TLS generation counter wrapped! Please report this."));
|
||||
now, but we can delay updating the DTV. */
|
||||
imap->l_need_tls_init = 0;
|
||||
#ifdef SHARED
|
||||
/* Update the slot information data for at least the
|
||||
generation of the DSO we are allocating data for. */
|
||||
/* Update the slot information data for the current
|
||||
generation. */
|
||||
|
||||
/* FIXME: This can terminate the process on memory
|
||||
allocation failure. It is not possible to raise
|
||||
@ -426,7 +426,7 @@ TLS generation counter wrapped! Please report this."));
|
||||
_dl_update_slotinfo would have to be split into two
|
||||
operations, similar to resize_scopes and update_scopes
|
||||
above. This is related to bug 16134. */
|
||||
_dl_update_slotinfo (imap->l_tls_modid);
|
||||
_dl_update_slotinfo (imap->l_tls_modid, newgen);
|
||||
#endif
|
||||
|
||||
GL(dl_init_static_tls) (imap);
|
||||
|
@ -111,11 +111,10 @@ _dl_try_allocate_static_tls (struct link_map *map, bool optional)
|
||||
if (map->l_real->l_relocated)
|
||||
{
|
||||
#ifdef SHARED
|
||||
// TODO: it is not clear why we need to update the DTV here, add comment
|
||||
if (__builtin_expect (THREAD_DTV()[0].counter != GL(dl_tls_generation),
|
||||
0))
|
||||
/* Update the slot information data for at least the generation of
|
||||
the DSO we are allocating data for. */
|
||||
(void) _dl_update_slotinfo (map->l_tls_modid);
|
||||
(void) _dl_update_slotinfo (map->l_tls_modid, GL(dl_tls_generation));
|
||||
#endif
|
||||
|
||||
GL(dl_init_static_tls) (map);
|
||||
|
28
elf/dl-tls.c
28
elf/dl-tls.c
@ -701,7 +701,7 @@ allocate_and_init (struct link_map *map)
|
||||
|
||||
|
||||
struct link_map *
|
||||
_dl_update_slotinfo (unsigned long int req_modid)
|
||||
_dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
|
||||
{
|
||||
struct link_map *the_map = NULL;
|
||||
dtv_t *dtv = THREAD_DTV ();
|
||||
@ -718,19 +718,12 @@ _dl_update_slotinfo (unsigned long int req_modid)
|
||||
code and therefore add to the slotinfo list. This is a problem
|
||||
since we must not pick up any information about incomplete work.
|
||||
The solution to this is to ignore all dtv slots which were
|
||||
created after the one we are currently interested. We know that
|
||||
dynamic loading for this module is completed and this is the last
|
||||
load operation we know finished. */
|
||||
unsigned long int idx = req_modid;
|
||||
created after the generation we are interested in. We know that
|
||||
dynamic loading for this generation is completed and this is the
|
||||
last load operation we know finished. */
|
||||
struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
|
||||
|
||||
while (idx >= listp->len)
|
||||
{
|
||||
idx -= listp->len;
|
||||
listp = listp->next;
|
||||
}
|
||||
|
||||
if (dtv[0].counter < listp->slotinfo[idx].gen)
|
||||
if (dtv[0].counter < new_gen)
|
||||
{
|
||||
/* CONCURRENCY NOTES:
|
||||
|
||||
@ -751,7 +744,6 @@ _dl_update_slotinfo (unsigned long int req_modid)
|
||||
other entries are racy. However updating a non-relevant dtv
|
||||
entry does not affect correctness. For a relevant module m,
|
||||
max_modid >= modid of m. */
|
||||
size_t new_gen = listp->slotinfo[idx].gen;
|
||||
size_t total = 0;
|
||||
size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
|
||||
assert (max_modid >= req_modid);
|
||||
@ -894,9 +886,9 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
|
||||
|
||||
static struct link_map *
|
||||
__attribute_noinline__
|
||||
update_get_addr (GET_ADDR_ARGS)
|
||||
update_get_addr (GET_ADDR_ARGS, size_t gen)
|
||||
{
|
||||
struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
|
||||
struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
|
||||
dtv_t *dtv = THREAD_DTV ();
|
||||
|
||||
void *p = dtv[GET_ADDR_MODULE].pointer.val;
|
||||
@ -931,7 +923,11 @@ __tls_get_addr (GET_ADDR_ARGS)
|
||||
by user code, see CONCURRENCY NOTES in _dl_update_slotinfo. */
|
||||
size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
|
||||
if (__glibc_unlikely (dtv[0].counter != gen))
|
||||
return update_get_addr (GET_ADDR_PARAM);
|
||||
{
|
||||
// TODO: needs comment update if we rely on consistent generation with slotinfo
|
||||
gen = atomic_load_acquire (&GL(dl_tls_generation));
|
||||
return update_get_addr (GET_ADDR_PARAM, gen);
|
||||
}
|
||||
|
||||
void *p = dtv[GET_ADDR_MODULE].pointer.val;
|
||||
|
||||
|
@ -1224,7 +1224,8 @@ extern void _dl_add_to_slotinfo (struct link_map *l, bool do_add)
|
||||
|
||||
/* Update slot information data for at least the generation of the
|
||||
module with the given index. */
|
||||
extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid)
|
||||
extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid,
|
||||
size_t gen)
|
||||
attribute_hidden;
|
||||
|
||||
/* Look up the module's TLS block as for __tls_get_addr,
|
||||
|
@ -40,9 +40,9 @@ __tls_get_addr_slow (GET_ADDR_ARGS)
|
||||
{
|
||||
dtv_t *dtv = THREAD_DTV ();
|
||||
|
||||
size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
|
||||
size_t gen = atomic_load_acquire (&GL(dl_tls_generation));
|
||||
if (__glibc_unlikely (dtv[0].counter != gen))
|
||||
return update_get_addr (GET_ADDR_PARAM);
|
||||
return update_get_addr (GET_ADDR_PARAM, gen);
|
||||
|
||||
return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user