RFC elf: Fix slow tls access after dlopen [BZ #19924]

In short: __tls_get_addr checks the global generation counter,
_dl_update_slotinfo updates up to the generation of the accessed
module. If the global generation is newer than geneneration of the
module then __tls_get_addr keeps hitting the slow path that updates
the dtv.

Possible approaches i can see:

1. update to global generation instead of module,
2. check the module generation in the fast path.

This patch is 1.: it needs additional sync (load acquire) so the
slotinfo list is up to date with the observed global generation.

Approach 2. would require walking the slotinfo list at all times.
I don't know how to make that fast with many modules.

Note: in the x86_64 version of dl-tls.c the generation is only loaded
once, since relaxed mo is not faster than acquire mo load.

I have not benchmarked this yet.
This commit is contained in:
Szabolcs Nagy 2021-02-16 12:55:13 +00:00
parent f8ea2b9982
commit b116855de7
6 changed files with 23 additions and 27 deletions

View File

@ -780,7 +780,7 @@ _dl_close_worker (struct link_map *map, bool force)
if (__glibc_unlikely (newgen == 0))
_dl_fatal_printf ("TLS generation counter wrapped! Please report as described in "REPORT_BUGS_TO".\n");
/* Can be read concurrently. */
atomic_store_relaxed (&GL(dl_tls_generation), newgen);
atomic_store_release (&GL(dl_tls_generation), newgen);
if (tls_free_end == GL(dl_tls_static_used))
GL(dl_tls_static_used) = tls_free_start;

View File

@ -400,7 +400,7 @@ update_tls_slotinfo (struct link_map *new)
_dl_fatal_printf (N_("\
TLS generation counter wrapped! Please report this."));
/* Can be read concurrently. */
atomic_store_relaxed (&GL(dl_tls_generation), newgen);
atomic_store_release (&GL(dl_tls_generation), newgen);
/* We need a second pass for static tls data, because
_dl_update_slotinfo must not be run while calls to
@ -417,8 +417,8 @@ TLS generation counter wrapped! Please report this."));
now, but we can delay updating the DTV. */
imap->l_need_tls_init = 0;
#ifdef SHARED
/* Update the slot information data for at least the
generation of the DSO we are allocating data for. */
/* Update the slot information data for the current
generation. */
/* FIXME: This can terminate the process on memory
allocation failure. It is not possible to raise
@ -426,7 +426,7 @@ TLS generation counter wrapped! Please report this."));
_dl_update_slotinfo would have to be split into two
operations, similar to resize_scopes and update_scopes
above. This is related to bug 16134. */
_dl_update_slotinfo (imap->l_tls_modid);
_dl_update_slotinfo (imap->l_tls_modid, newgen);
#endif
GL(dl_init_static_tls) (imap);

View File

@ -111,11 +111,10 @@ _dl_try_allocate_static_tls (struct link_map *map, bool optional)
if (map->l_real->l_relocated)
{
#ifdef SHARED
// TODO: it is not clear why we need to update the DTV here, add comment
if (__builtin_expect (THREAD_DTV()[0].counter != GL(dl_tls_generation),
0))
/* Update the slot information data for at least the generation of
the DSO we are allocating data for. */
(void) _dl_update_slotinfo (map->l_tls_modid);
(void) _dl_update_slotinfo (map->l_tls_modid, GL(dl_tls_generation));
#endif
GL(dl_init_static_tls) (map);

View File

@ -701,7 +701,7 @@ allocate_and_init (struct link_map *map)
struct link_map *
_dl_update_slotinfo (unsigned long int req_modid)
_dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
{
struct link_map *the_map = NULL;
dtv_t *dtv = THREAD_DTV ();
@ -718,19 +718,12 @@ _dl_update_slotinfo (unsigned long int req_modid)
code and therefore add to the slotinfo list. This is a problem
since we must not pick up any information about incomplete work.
The solution to this is to ignore all dtv slots which were
created after the one we are currently interested. We know that
dynamic loading for this module is completed and this is the last
load operation we know finished. */
unsigned long int idx = req_modid;
created after the generation we are interested in. We know that
dynamic loading for this generation is completed and this is the
last load operation we know finished. */
struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
while (idx >= listp->len)
{
idx -= listp->len;
listp = listp->next;
}
if (dtv[0].counter < listp->slotinfo[idx].gen)
if (dtv[0].counter < new_gen)
{
/* CONCURRENCY NOTES:
@ -751,7 +744,6 @@ _dl_update_slotinfo (unsigned long int req_modid)
other entries are racy. However updating a non-relevant dtv
entry does not affect correctness. For a relevant module m,
max_modid >= modid of m. */
size_t new_gen = listp->slotinfo[idx].gen;
size_t total = 0;
size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
assert (max_modid >= req_modid);
@ -894,9 +886,9 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
static struct link_map *
__attribute_noinline__
update_get_addr (GET_ADDR_ARGS)
update_get_addr (GET_ADDR_ARGS, size_t gen)
{
struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
dtv_t *dtv = THREAD_DTV ();
void *p = dtv[GET_ADDR_MODULE].pointer.val;
@ -931,7 +923,11 @@ __tls_get_addr (GET_ADDR_ARGS)
by user code, see CONCURRENCY NOTES in _dl_update_slotinfo. */
size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
if (__glibc_unlikely (dtv[0].counter != gen))
return update_get_addr (GET_ADDR_PARAM);
{
// TODO: needs comment update if we rely on consistent generation with slotinfo
gen = atomic_load_acquire (&GL(dl_tls_generation));
return update_get_addr (GET_ADDR_PARAM, gen);
}
void *p = dtv[GET_ADDR_MODULE].pointer.val;

View File

@ -1224,7 +1224,8 @@ extern void _dl_add_to_slotinfo (struct link_map *l, bool do_add)
/* Update slot information data for at least the generation of the
module with the given index. */
extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid)
extern struct link_map *_dl_update_slotinfo (unsigned long int req_modid,
size_t gen)
attribute_hidden;
/* Look up the module's TLS block as for __tls_get_addr,

View File

@ -40,9 +40,9 @@ __tls_get_addr_slow (GET_ADDR_ARGS)
{
dtv_t *dtv = THREAD_DTV ();
size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
size_t gen = atomic_load_acquire (&GL(dl_tls_generation));
if (__glibc_unlikely (dtv[0].counter != gen))
return update_get_addr (GET_ADDR_PARAM);
return update_get_addr (GET_ADDR_PARAM, gen);
return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
}