elf: Fix fences in _dl_find_object_update (bug 28745)

As explained in Hans Boehm, Can Seqlocks Get Along with Programming
Language Memory Models?, an acquire fence is needed in
_dlfo_read_success.  The lack of a fence resulted in an observable
bug on powerpc64le compile-time load reordering.

The fence in _dlfo_mappings_begin_update has been reordered, turning
the fence/store sequence into a release MO store equivalent.

Relaxed MO loads are used on the reader side, and relaxed MO stores
on the writer side for the shared data, to avoid formal data races.
This is just to be conservative; it should not actually be necessary
given how the data is used.

This commit also fixes the test run time.  The intent was to run it
for 3 seconds, but 0.3 seconds was enough to uncover the bug very
occasionally (while 3 seconds did not reliably show the bug on every
test run).

Reviewed-by: Szabolcs Nagy <szabolcs.nagy@arm.com>
This commit is contained in:
Florian Weimer 2022-01-07 13:21:57 +01:00
parent d5b0046e3d
commit acbaad31e8
3 changed files with 133 additions and 77 deletions

View File

@ -17,6 +17,7 @@
<https://www.gnu.org/licenses/>. */ <https://www.gnu.org/licenses/>. */
#include <assert.h> #include <assert.h>
#include <atomic.h>
#include <atomic_wide_counter.h> #include <atomic_wide_counter.h>
#include <dl-find_object.h> #include <dl-find_object.h>
#include <dlfcn.h> #include <dlfcn.h>
@ -80,13 +81,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
over all segments, even though the data is not stored in one over all segments, even though the data is not stored in one
contiguous array. contiguous array.
During updates, the segments are overwritten in place, and a During updates, the segments are overwritten in place. A software
software transactional memory construct (involving the transactional memory construct (involving the
_dlfo_loaded_mappings_version variable) is used to detect _dlfo_loaded_mappings_version variable) is used to detect
concurrent modification, and retry as necessary. The memory concurrent modification, and retry as necessary. (This approach is
allocations are never deallocated, but slots used for objects that similar to seqlocks, except that two copies are used, and there is
have been dlclose'd can be reused by dlopen. The memory can live only one writer, ever, due to the loader lock.) Technically,
in the regular C malloc heap. relaxed MO loads and stores need to be used for the shared TM data,
to avoid data races.
The memory allocations are never deallocated, but slots used for
objects that have been dlclose'd can be reused by dlopen. The
memory can live in the regular C malloc heap.
The segments are populated from the start of the list, with the The segments are populated from the start of the list, with the
mappings with the highest address. Only if this segment is full, mappings with the highest address. Only if this segment is full,
@ -101,17 +107,18 @@ static struct dl_find_object_internal *_dlfo_nodelete_mappings
needed. */ needed. */
struct dlfo_mappings_segment struct dlfo_mappings_segment
{ {
/* The previous segment has lower base addresses. */ /* The previous segment has lower base addresses. Constant after
initialization; read in the TM region. */
struct dlfo_mappings_segment *previous; struct dlfo_mappings_segment *previous;
/* Used by __libc_freeres to deallocate malloc'ed memory. */ /* Used by __libc_freeres to deallocate malloc'ed memory. */
void *to_free; void *to_free;
/* Count of array elements in use and allocated. */ /* Count of array elements in use and allocated. */
size_t size; size_t size; /* Read in the TM region. */
size_t allocated; size_t allocated;
struct dl_find_object_internal objects[]; struct dl_find_object_internal objects[]; /* Read in the TM region. */
}; };
/* To achieve async-signal-safety, two copies of the data structure /* To achieve async-signal-safety, two copies of the data structure
@ -240,7 +247,8 @@ static inline uint64_t
_dlfo_read_start_version (void) _dlfo_read_start_version (void)
{ {
/* Acquire MO load synchronizes with the fences at the beginning and /* Acquire MO load synchronizes with the fences at the beginning and
end of the TM update region. */ end of the TM update region in _dlfo_mappings_begin_update,
_dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch. */
return __atomic_wide_counter_load_acquire (&_dlfo_loaded_mappings_version); return __atomic_wide_counter_load_acquire (&_dlfo_loaded_mappings_version);
} }
@ -258,34 +266,30 @@ _dlfo_read_version_locked (void)
static inline unsigned int static inline unsigned int
_dlfo_mappings_begin_update (void) _dlfo_mappings_begin_update (void)
{ {
unsigned int v /* The store synchronizes with loads in _dlfo_read_start_version
= __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, (also called from _dlfo_read_success). */
2);
/* Subsequent stores to the TM data must not be reordered before the
store above with the version update. */
atomic_thread_fence_release (); atomic_thread_fence_release ();
return v & 1; return __atomic_wide_counter_fetch_add_relaxed
(&_dlfo_loaded_mappings_version, 2);
} }
/* Installs the just-updated version as the active version. */ /* Installs the just-updated version as the active version. */
static inline void static inline void
_dlfo_mappings_end_update (void) _dlfo_mappings_end_update (void)
{ {
/* The previous writes to the TM data must not be reordered after /* The store synchronizes with loads in _dlfo_read_start_version
the version update below. */ (also called from _dlfo_read_success). */
atomic_thread_fence_release (); atomic_thread_fence_release ();
__atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 1);
1);
} }
/* Completes an in-place update without switching versions. */ /* Completes an in-place update without switching versions. */
static inline void static inline void
_dlfo_mappings_end_update_no_switch (void) _dlfo_mappings_end_update_no_switch (void)
{ {
/* The previous writes to the TM data must not be reordered after /* The store synchronizes with loads in _dlfo_read_start_version
the version update below. */ (also called from _dlfo_read_success). */
atomic_thread_fence_release (); atomic_thread_fence_release ();
__atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, __atomic_wide_counter_fetch_add_relaxed (&_dlfo_loaded_mappings_version, 2);
2);
} }
/* Return true if the read was successful, given the start /* Return true if the read was successful, given the start
@ -293,6 +297,19 @@ _dlfo_mappings_end_update_no_switch (void)
static inline bool static inline bool
_dlfo_read_success (uint64_t start_version) _dlfo_read_success (uint64_t start_version)
{ {
/* See Hans Boehm, Can Seqlocks Get Along with Programming Language
Memory Models?, Section 4. This is necessary so that loads in
the TM region are not ordered past the version check below. */
atomic_thread_fence_acquire ();
/* Synchronizes with stores in _dlfo_mappings_begin_update,
_dlfo_mappings_end_update, _dlfo_mappings_end_update_no_switch.
It is important that all stores from the last update have been
visible, and stores from the next updates are not.
Unlike with seqlocks, there is no check for odd versions here
because we have read the unmodified copy (confirmed to be
unmodified by the unchanged version). */
return _dlfo_read_start_version () == start_version; return _dlfo_read_start_version () == start_version;
} }
@ -318,7 +335,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
{ {
size_t half = size >> 1; size_t half = size >> 1;
struct dl_find_object_internal *middle = first + half; struct dl_find_object_internal *middle = first + half;
if (middle->map_start < pc) if (atomic_load_relaxed (&middle->map_start) < pc)
{ {
first = middle + 1; first = middle + 1;
size -= half + 1; size -= half + 1;
@ -327,9 +344,9 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
size = half; size = half;
} }
if (first != end && pc == first->map_start) if (first != end && pc == atomic_load_relaxed (&first->map_start))
{ {
if (pc < first->map_end) if (pc < atomic_load_relaxed (&first->map_end))
return first; return first;
else else
/* Zero-length mapping after dlclose. */ /* Zero-length mapping after dlclose. */
@ -339,7 +356,7 @@ _dlfo_lookup (uintptr_t pc, struct dl_find_object_internal *first1, size_t size)
{ {
/* Check to see if PC is in the previous mapping. */ /* Check to see if PC is in the previous mapping. */
--first; --first;
if (pc < first->map_end) if (pc < atomic_load_relaxed (&first->map_end))
/* pc >= first->map_start implied by the search above. */ /* pc >= first->map_start implied by the search above. */
return first; return first;
else else
@ -408,39 +425,47 @@ _dl_find_object (void *pc1, struct dl_find_object *result)
size on earlier unused segments. */ size on earlier unused segments. */
for (struct dlfo_mappings_segment *seg for (struct dlfo_mappings_segment *seg
= _dlfo_mappings_active_segment (start_version); = _dlfo_mappings_active_segment (start_version);
seg != NULL && seg->size > 0; seg = seg->previous) seg != NULL;
if (pc >= seg->objects[0].map_start) seg = atomic_load_acquire (&seg->previous))
{ {
/* PC may lie within this segment. If it is less than the size_t seg_size = atomic_load_relaxed (&seg->size);
segment start address, it can only lie in a previous if (seg_size == 0)
segment, due to the base address sorting. */ break;
struct dl_find_object_internal *obj
= _dlfo_lookup (pc, seg->objects, seg->size);
if (obj != NULL) if (pc >= atomic_load_relaxed (&seg->objects[0].map_start))
{ {
/* Found the right mapping. Copy out the data prior to /* PC may lie within this segment. If it is less than the
checking if the read transaction was successful. */ segment start address, it can only lie in a previous
struct dl_find_object_internal copy = *obj; segment, due to the base address sorting. */
if (_dlfo_read_success (start_version)) struct dl_find_object_internal *obj
{ = _dlfo_lookup (pc, seg->objects, seg_size);
_dl_find_object_to_external (&copy, result);
return 0; if (obj != NULL)
} {
else /* Found the right mapping. Copy out the data prior to
/* Read transaction failure. */ checking if the read transaction was successful. */
goto retry; struct dl_find_object_internal copy;
} _dl_find_object_internal_copy (obj, &copy);
else if (_dlfo_read_success (start_version))
{ {
/* PC is not covered by this mapping. */ _dl_find_object_to_external (&copy, result);
if (_dlfo_read_success (start_version)) return 0;
return -1; }
else else
/* Read transaction failure. */ /* Read transaction failure. */
goto retry; goto retry;
} }
} /* if: PC might lie within the current seg. */ else
{
/* PC is not covered by this mapping. */
if (_dlfo_read_success (start_version))
return -1;
else
/* Read transaction failure. */
goto retry;
}
} /* if: PC might lie within the current seg. */
}
/* PC is not covered by any segment. */ /* PC is not covered by any segment. */
if (_dlfo_read_success (start_version)) if (_dlfo_read_success (start_version))
@ -619,15 +644,19 @@ static inline size_t
_dlfo_update_init_seg (struct dlfo_mappings_segment *seg, _dlfo_update_init_seg (struct dlfo_mappings_segment *seg,
size_t remaining_to_add) size_t remaining_to_add)
{ {
size_t new_seg_size;
if (remaining_to_add < seg->allocated) if (remaining_to_add < seg->allocated)
/* Partially filled segment. */ /* Partially filled segment. */
seg->size = remaining_to_add; new_seg_size = remaining_to_add;
else else
seg->size = seg->allocated; new_seg_size = seg->allocated;
return seg->size; atomic_store_relaxed (&seg->size, new_seg_size);
return new_seg_size;
} }
/* Invoked from _dl_find_object_update after sorting. */ /* Invoked from _dl_find_object_update after sorting. Stores to the
shared data need to use relaxed MO. But plain loads can be used
because the loader lock prevents concurrent stores. */
static bool static bool
_dl_find_object_update_1 (struct link_map **loaded, size_t count) _dl_find_object_update_1 (struct link_map **loaded, size_t count)
{ {
@ -727,7 +756,8 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
{ {
/* Prefer mapping in current_seg. */ /* Prefer mapping in current_seg. */
assert (current_seg_index1 > 0); assert (current_seg_index1 > 0);
*dlfo = current_seg->objects[current_seg_index1 - 1]; _dl_find_object_internal_copy
(&current_seg->objects[current_seg_index1 - 1], dlfo);
--current_seg_index1; --current_seg_index1;
} }
else else
@ -753,7 +783,7 @@ _dl_find_object_update_1 (struct link_map **loaded, size_t count)
/* Prevent searching further into unused segments. */ /* Prevent searching further into unused segments. */
if (target_seg->previous != NULL) if (target_seg->previous != NULL)
target_seg->previous->size = 0; atomic_store_relaxed (&target_seg->previous->size, 0);
_dlfo_mappings_end_update (); _dlfo_mappings_end_update ();
return true; return true;

View File

@ -20,6 +20,7 @@
#define _DL_FIND_EH_FRAME_H #define _DL_FIND_EH_FRAME_H
#include <assert.h> #include <assert.h>
#include <atomic.h>
#include <dlfcn.h> #include <dlfcn.h>
#include <ldsodefs.h> #include <ldsodefs.h>
#include <stdbool.h> #include <stdbool.h>
@ -44,6 +45,30 @@ struct dl_find_object_internal
#endif #endif
}; };
/* Create a copy of *SOURCE in *COPY using relaxed MO loads and
stores. */
static inline void
_dl_find_object_internal_copy (const struct dl_find_object_internal *source,
struct dl_find_object_internal *copy)
{
atomic_store_relaxed (&copy->map_start,
atomic_load_relaxed (&source->map_start));
atomic_store_relaxed (&copy->map_end,
atomic_load_relaxed (&source->map_end));
atomic_store_relaxed (&copy->map,
atomic_load_relaxed (&source->map));
atomic_store_relaxed (&copy->eh_frame,
atomic_load_relaxed (&source->eh_frame));
#if DLFO_STRUCT_HAS_EH_DBASE
atomic_store_relaxed (&copy->eh_dbase,
atomic_load_relaxed (&source->eh_dbase));
#endif
#if DLFO_STRUCT_HAS_EH_COUNT
atomic_store_relaxed (&copy->eh_count,
atomic_load_relaxed (&source->eh_count));
#endif
}
static inline void static inline void
_dl_find_object_to_external (struct dl_find_object_internal *internal, _dl_find_object_to_external (struct dl_find_object_internal *internal,
struct dl_find_object *external) struct dl_find_object *external)
@ -62,34 +87,35 @@ _dl_find_object_to_external (struct dl_find_object_internal *internal,
} }
/* Extract the object location data from a link map and writes it to /* Extract the object location data from a link map and writes it to
*RESULT. */ *RESULT using relaxed MO stores. */
static void __attribute__ ((unused)) static void __attribute__ ((unused))
_dl_find_object_from_map (struct link_map *l, _dl_find_object_from_map (struct link_map *l,
struct dl_find_object_internal *result) struct dl_find_object_internal *result)
{ {
result->map_start = (uintptr_t) l->l_map_start; atomic_store_relaxed (&result->map_start, (uintptr_t) l->l_map_start);
result->map_end = (uintptr_t) l->l_map_end; atomic_store_relaxed (&result->map_end, (uintptr_t) l->l_map_end);
result->map = l; atomic_store_relaxed (&result->map, l);
#if DLFO_STRUCT_HAS_EH_DBASE #if DLFO_STRUCT_HAS_EH_DBASE
result->eh_dbase = (void *) l->l_info[DT_PLTGOT]; atomic_store_relaxed (&result->eh_dbase, (void *) l->l_info[DT_PLTGOT]);
#endif #endif
for (const ElfW(Phdr) *ph = l->l_phdr, *ph_end = l->l_phdr + l->l_phnum; for (const ElfW(Phdr) *ph = l->l_phdr, *ph_end = l->l_phdr + l->l_phnum;
ph < ph_end; ++ph) ph < ph_end; ++ph)
if (ph->p_type == DLFO_EH_SEGMENT_TYPE) if (ph->p_type == DLFO_EH_SEGMENT_TYPE)
{ {
result->eh_frame = (void *) (ph->p_vaddr + l->l_addr); atomic_store_relaxed (&result->eh_frame,
(void *) (ph->p_vaddr + l->l_addr));
#if DLFO_STRUCT_HAS_EH_COUNT #if DLFO_STRUCT_HAS_EH_COUNT
result->eh_count = ph->p_memsz / 8; atomic_store_relaxed (&result->eh_count, ph->p_memsz / 8);
#endif #endif
return; return;
} }
/* Object has no exception handling segment. */ /* Object has no exception handling segment. */
result->eh_frame = NULL; atomic_store_relaxed (&result->eh_frame, NULL);
#if DLFO_STRUCT_HAS_EH_COUNT #if DLFO_STRUCT_HAS_EH_COUNT
result->eh_count = 0; atomic_store_relaxed (&result->eh_count, 0);
#endif #endif
} }

View File

@ -138,12 +138,12 @@ check (void *address, struct dl_find_object *expected, int line)
#endif #endif
} }
/* Request process termination after 3 seconds. */ /* Request process termination after 0.3 seconds. */
static bool exit_requested; static bool exit_requested;
static void * static void *
exit_thread (void *ignored) exit_thread (void *ignored)
{ {
usleep (3 * 100 * 1000); usleep (300 * 1000);
__atomic_store_n (&exit_requested, true, __ATOMIC_RELAXED); __atomic_store_n (&exit_requested, true, __ATOMIC_RELAXED);
return NULL; return NULL;
} }