strengthen some atomic operations for weak memory models
This commit is contained in:
parent
2159c22415
commit
5c7c106d62
@ -30,26 +30,32 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
// ------------------------------------------------------
|
||||
|
||||
// Atomically add a 64-bit value; returns the previous value.
|
||||
// Note: not using _Atomic(int64_t) as it is only used for stats.
|
||||
static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add);
|
||||
// Note: not using _Atomic(int64_t) as it is only used for statistics.
|
||||
static inline void mi_atomic_add64(volatile int64_t* p, int64_t add);
|
||||
|
||||
// Atomically add a value; returns the previous value.
|
||||
// Atomically add a value; returns the previous value. Memory ordering is relaxed.
|
||||
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add);
|
||||
|
||||
// Atomically compare and exchange a value; returns `true` if successful. May fail spuriously.
|
||||
// Atomically compare and exchange a value; returns `true` if successful.
|
||||
// May fail spuriously. Memory ordering as release on success, and relaxed on failure.
|
||||
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
|
||||
static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
|
||||
|
||||
// Atomically compare and exchange a value; returns `true` if successful.
|
||||
// Memory ordering is acquire-release
|
||||
// (Note: expected and desired are in opposite order from atomic_compare_exchange)
|
||||
static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected);
|
||||
|
||||
// Atomically exchange a value.
|
||||
// Atomically exchange a value. Memory ordering is acquire-release.
|
||||
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange);
|
||||
|
||||
// Atomically read a value
|
||||
// Atomically read a value. Memory ordering is relaxed.
|
||||
static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p);
|
||||
|
||||
// Atomically write a value
|
||||
// Atomically read a value. Memory ordering is acquire.
|
||||
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p);
|
||||
|
||||
// Atomically write a value. Memory ordering is release.
|
||||
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x);
|
||||
|
||||
// Yield
|
||||
@ -76,11 +82,16 @@ static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) {
|
||||
return mi_atomic_subu(p, 1);
|
||||
}
|
||||
|
||||
// Atomically read a pointer
|
||||
// Atomically read a pointer; Memory order is relaxed.
|
||||
static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) {
|
||||
return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p);
|
||||
}
|
||||
|
||||
// Atomically read a pointer; Memory order is acquire.
|
||||
static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) {
|
||||
return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p);
|
||||
}
|
||||
|
||||
// Atomically write a pointer
|
||||
static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) {
|
||||
mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x );
|
||||
@ -127,18 +138,21 @@ static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t
|
||||
static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) {
|
||||
return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) {
|
||||
static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) {
|
||||
return *p;
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) {
|
||||
return mi_atomic_read(p);
|
||||
}
|
||||
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
mi_atomic_exchange(p,x);
|
||||
}
|
||||
static inline void mi_atomic_yield(void) {
|
||||
YieldProcessor();
|
||||
}
|
||||
static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) {
|
||||
static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) {
|
||||
#ifdef _WIN64
|
||||
return mi_atomic_add(p,add);
|
||||
mi_atomic_add(p,add);
|
||||
#else
|
||||
int64_t current;
|
||||
int64_t sum;
|
||||
@ -146,7 +160,6 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add)
|
||||
current = *p;
|
||||
sum = current + add;
|
||||
} while (_InterlockedCompareExchange64(p, sum, current) != current);
|
||||
return current;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -156,9 +169,9 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add)
|
||||
#else
|
||||
#define MI_USING_STD
|
||||
#endif
|
||||
static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add) {
|
||||
static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) {
|
||||
MI_USING_STD
|
||||
return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed);
|
||||
atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed);
|
||||
}
|
||||
static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) {
|
||||
MI_USING_STD
|
||||
@ -180,6 +193,10 @@ static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)
|
||||
MI_USING_STD
|
||||
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed);
|
||||
}
|
||||
static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) {
|
||||
MI_USING_STD
|
||||
return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire);
|
||||
}
|
||||
static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) {
|
||||
MI_USING_STD
|
||||
return atomic_store_explicit(p, x, memory_order_release);
|
||||
|
18
src/alloc.c
18
src/alloc.c
@ -118,22 +118,24 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
if (segment->page_kind==MI_PAGE_HUGE) {
|
||||
// huge page segments are always abandoned and can be freed immediately
|
||||
mi_assert_internal(segment->thread_id==0);
|
||||
mi_assert_internal(segment->abandoned_next==NULL);
|
||||
mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0);
|
||||
mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL);
|
||||
// claim it and free
|
||||
mi_block_set_next(page, block, page->free);
|
||||
page->free = block;
|
||||
page->used--;
|
||||
mi_heap_t* heap = mi_get_default_heap();
|
||||
segment->thread_id = heap->thread_id;
|
||||
_mi_segment_page_free(page,true,&heap->tld->segments);
|
||||
// paranoia: if this it the last reference, the cas should always succeed
|
||||
if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) {
|
||||
mi_block_set_next(page, block, page->free);
|
||||
page->free = block;
|
||||
page->used--;
|
||||
_mi_segment_page_free(page,true,&heap->tld->segments);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
tfree = page->thread_free;
|
||||
use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE ||
|
||||
(mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1)
|
||||
(mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page
|
||||
);
|
||||
if (mi_unlikely(use_delayed)) {
|
||||
// unlikely: this only happens on the first concurrent free in a page that is in the full list
|
||||
|
20
src/memory.c
20
src/memory.c
@ -131,7 +131,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
|
||||
mi_assert_internal(®ions[idx] == region);
|
||||
|
||||
// ensure the region is reserved
|
||||
void* start = mi_atomic_read_ptr_relaxed(®ion->start);
|
||||
void* start = mi_atomic_read_ptr(®ion->start);
|
||||
if (start == NULL)
|
||||
{
|
||||
start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld);
|
||||
@ -154,9 +154,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
|
||||
// we assign it to a later slot instead (up to 4 tries).
|
||||
// note: we don't need to increment the region count, this will happen on another allocation
|
||||
for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) {
|
||||
void* s = mi_atomic_read_ptr_relaxed(®ions[idx+i].start);
|
||||
void* s = mi_atomic_read_ptr(®ions[idx+i].start);
|
||||
if (s == NULL) { // quick test
|
||||
if (mi_atomic_cas_ptr_weak(®ions[idx+i].start, start, s)) {
|
||||
if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) {
|
||||
start = NULL;
|
||||
break;
|
||||
}
|
||||
@ -167,10 +167,10 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit
|
||||
_mi_os_free(start, MI_REGION_SIZE, tld->stats);
|
||||
}
|
||||
// and continue with the memory at our index
|
||||
start = mi_atomic_read_ptr_relaxed(®ion->start);
|
||||
start = mi_atomic_read_ptr(®ion->start);
|
||||
}
|
||||
}
|
||||
mi_assert_internal(start == mi_atomic_read_ptr_relaxed(®ion->start));
|
||||
mi_assert_internal(start == mi_atomic_read_ptr(®ion->start));
|
||||
mi_assert_internal(start != NULL);
|
||||
|
||||
// Commit the blocks to memory
|
||||
@ -230,7 +230,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
|
||||
|
||||
const uintptr_t mask = mi_region_block_mask(blocks, 0);
|
||||
const size_t bitidx_max = MI_REGION_MAP_BITS - blocks;
|
||||
uintptr_t map = mi_atomic_read_relaxed(®ion->map);
|
||||
uintptr_t map = mi_atomic_read(®ion->map);
|
||||
|
||||
#ifdef MI_HAVE_BITSCAN
|
||||
size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible
|
||||
@ -245,9 +245,9 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc
|
||||
mi_assert_internal((m >> bitidx) == mask); // no overflow?
|
||||
uintptr_t newmap = map | m;
|
||||
mi_assert_internal((newmap^map) >> bitidx == mask);
|
||||
if (!mi_atomic_cas_strong(®ion->map, newmap, map)) {
|
||||
if (!mi_atomic_cas_weak(®ion->map, newmap, map)) {
|
||||
// no success, another thread claimed concurrently.. keep going
|
||||
map = mi_atomic_read_relaxed(®ion->map);
|
||||
map = mi_atomic_read(®ion->map);
|
||||
continue;
|
||||
}
|
||||
else {
|
||||
@ -317,7 +317,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t*
|
||||
|
||||
// find a range of free blocks
|
||||
void* p = NULL;
|
||||
size_t count = mi_atomic_read_relaxed(®ions_count);
|
||||
size_t count = mi_atomic_read(®ions_count);
|
||||
size_t idx = tld->region_idx; // start index is per-thread to reduce contention
|
||||
for (size_t visited = 0; visited < count; visited++, idx++) {
|
||||
if (idx >= count) idx = 0; // wrap around
|
||||
@ -377,7 +377,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) {
|
||||
mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`?
|
||||
mem_region_t* region = ®ions[idx];
|
||||
mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed?
|
||||
void* start = mi_atomic_read_ptr_relaxed(®ion->start);
|
||||
void* start = mi_atomic_read_ptr(®ion->start);
|
||||
mi_assert_internal(start != NULL);
|
||||
void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE);
|
||||
mi_assert_internal(blocks_start == p); // not a pointer in our area?
|
||||
|
@ -38,8 +38,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
|
||||
if (mi_is_in_main(stat))
|
||||
{
|
||||
// add atomically (for abandoned pages)
|
||||
int64_t current = mi_atomic_add64(&stat->current,amount);
|
||||
if (current > stat->peak) stat->peak = stat->current; // racing.. it's ok
|
||||
mi_atomic_add64(&stat->current,amount);
|
||||
if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok
|
||||
if (amount > 0) {
|
||||
mi_atomic_add64(&stat->allocated,amount);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user