diff --git a/include/mimalloc-atomic.h b/include/mimalloc-atomic.h index 739d051..3a289fe 100644 --- a/include/mimalloc-atomic.h +++ b/include/mimalloc-atomic.h @@ -30,26 +30,32 @@ terms of the MIT license. A copy of the license can be found in the file // ------------------------------------------------------ // Atomically add a 64-bit value; returns the previous value. -// Note: not using _Atomic(int64_t) as it is only used for stats. -static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add); +// Note: not using _Atomic(int64_t) as it is only used for statistics. +static inline void mi_atomic_add64(volatile int64_t* p, int64_t add); -// Atomically add a value; returns the previous value. +// Atomically add a value; returns the previous value. Memory ordering is relaxed. static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add); -// Atomically compare and exchange a value; returns `true` if successful. May fail spuriously. +// Atomically compare and exchange a value; returns `true` if successful. +// May fail spuriously. Memory ordering as release on success, and relaxed on failure. // (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); // Atomically compare and exchange a value; returns `true` if successful. +// Memory ordering is acquire-release +// (Note: expected and desired are in opposite order from atomic_compare_exchange) static inline bool mi_atomic_cas_strong(volatile _Atomic(uintptr_t)* p, uintptr_t desired, uintptr_t expected); -// Atomically exchange a value. +// Atomically exchange a value. Memory ordering is acquire-release. static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange); -// Atomically read a value +// Atomically read a value. Memory ordering is relaxed. static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t)* p); -// Atomically write a value +// Atomically read a value. Memory ordering is acquire. +static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p); + +// Atomically write a value. Memory ordering is release. static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x); // Yield @@ -76,11 +82,16 @@ static inline uintptr_t mi_atomic_decrement(volatile _Atomic(uintptr_t)* p) { return mi_atomic_subu(p, 1); } -// Atomically read a pointer +// Atomically read a pointer; Memory order is relaxed. static inline void* mi_atomic_read_ptr_relaxed(volatile _Atomic(void*) const * p) { return (void*)mi_atomic_read_relaxed((const volatile _Atomic(uintptr_t)*)p); } +// Atomically read a pointer; Memory order is acquire. +static inline void* mi_atomic_read_ptr(volatile _Atomic(void*) const * p) { + return (void*)mi_atomic_read((const volatile _Atomic(uintptr_t)*)p); +} + // Atomically write a pointer static inline void mi_atomic_write_ptr(volatile _Atomic(void*)* p, void* x) { mi_atomic_write((volatile _Atomic(uintptr_t)*)p, (uintptr_t)x ); @@ -127,18 +138,21 @@ static inline bool mi_atomic_cas_weak(volatile _Atomic(uintptr_t)* p, uintptr_t static inline uintptr_t mi_atomic_exchange(volatile _Atomic(uintptr_t)* p, uintptr_t exchange) { return (uintptr_t)RC64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); } -static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { +static inline uintptr_t mi_atomic_read(volatile _Atomic(uintptr_t) const* p) { return *p; } +static inline uintptr_t mi_atomic_read_relaxed(volatile _Atomic(uintptr_t) const* p) { + return mi_atomic_read(p); +} static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { mi_atomic_exchange(p,x); } static inline void mi_atomic_yield(void) { YieldProcessor(); } -static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { +static inline void mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) { #ifdef _WIN64 - return mi_atomic_add(p,add); + mi_atomic_add(p,add); #else int64_t current; int64_t sum; @@ -146,7 +160,6 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) current = *p; sum = current + add; } while (_InterlockedCompareExchange64(p, sum, current) != current); - return current; #endif } @@ -156,9 +169,9 @@ static inline int64_t mi_atomic_add64(volatile _Atomic(int64_t)* p, int64_t add) #else #define MI_USING_STD #endif -static inline int64_t mi_atomic_add64(volatile int64_t* p, int64_t add) { +static inline void mi_atomic_add64(volatile int64_t* p, int64_t add) { MI_USING_STD - return atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); + atomic_fetch_add_explicit((volatile _Atomic(int64_t)*)p, add, memory_order_relaxed); } static inline intptr_t mi_atomic_add(volatile _Atomic(intptr_t)* p, intptr_t add) { MI_USING_STD @@ -180,6 +193,10 @@ static inline uintptr_t mi_atomic_read_relaxed(const volatile _Atomic(uintptr_t) MI_USING_STD return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_relaxed); } +static inline uintptr_t mi_atomic_read(const volatile _Atomic(uintptr_t)* p) { + MI_USING_STD + return atomic_load_explicit((volatile _Atomic(uintptr_t)*) p, memory_order_acquire); +} static inline void mi_atomic_write(volatile _Atomic(uintptr_t)* p, uintptr_t x) { MI_USING_STD return atomic_store_explicit(p, x, memory_order_release); diff --git a/src/alloc.c b/src/alloc.c index 97c5fcc..7e89a59 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -118,22 +118,24 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc mi_segment_t* segment = _mi_page_segment(page); if (segment->page_kind==MI_PAGE_HUGE) { // huge page segments are always abandoned and can be freed immediately - mi_assert_internal(segment->thread_id==0); - mi_assert_internal(segment->abandoned_next==NULL); + mi_assert_internal(mi_atomic_read_relaxed(&segment->thread_id)==0); + mi_assert_internal(mi_atomic_read_ptr_relaxed(mi_atomic_cast(void*,&segment->abandoned_next))==NULL); // claim it and free - mi_block_set_next(page, block, page->free); - page->free = block; - page->used--; mi_heap_t* heap = mi_get_default_heap(); - segment->thread_id = heap->thread_id; - _mi_segment_page_free(page,true,&heap->tld->segments); + // paranoia: if this it the last reference, the cas should always succeed + if (mi_atomic_cas_strong(&segment->thread_id,heap->thread_id,0)) { + mi_block_set_next(page, block, page->free); + page->free = block; + page->used--; + _mi_segment_page_free(page,true,&heap->tld->segments); + } return; } do { tfree = page->thread_free; use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE || - (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == page->thread_freed+1) + (mi_tf_delayed(tfree) == MI_NO_DELAYED_FREE && page->used == mi_atomic_read_relaxed(&page->thread_freed)+1) // data-race but ok, just optimizes early release of the page ); if (mi_unlikely(use_delayed)) { // unlikely: this only happens on the first concurrent free in a page that is in the full list diff --git a/src/memory.c b/src/memory.c index 1ea6ee1..268dc15 100644 --- a/src/memory.c +++ b/src/memory.c @@ -131,7 +131,7 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit mi_assert_internal(®ions[idx] == region); // ensure the region is reserved - void* start = mi_atomic_read_ptr_relaxed(®ion->start); + void* start = mi_atomic_read_ptr(®ion->start); if (start == NULL) { start = _mi_os_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, mi_option_is_enabled(mi_option_eager_region_commit), tld); @@ -154,9 +154,9 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit // we assign it to a later slot instead (up to 4 tries). // note: we don't need to increment the region count, this will happen on another allocation for(size_t i = 1; i <= 4 && idx + i < MI_REGION_MAX; i++) { - void* s = mi_atomic_read_ptr_relaxed(®ions[idx+i].start); + void* s = mi_atomic_read_ptr(®ions[idx+i].start); if (s == NULL) { // quick test - if (mi_atomic_cas_ptr_weak(®ions[idx+i].start, start, s)) { + if (mi_atomic_cas_ptr_strong(®ions[idx+i].start, start, NULL)) { start = NULL; break; } @@ -167,10 +167,10 @@ static bool mi_region_commit_blocks(mem_region_t* region, size_t idx, size_t bit _mi_os_free(start, MI_REGION_SIZE, tld->stats); } // and continue with the memory at our index - start = mi_atomic_read_ptr_relaxed(®ion->start); + start = mi_atomic_read_ptr(®ion->start); } } - mi_assert_internal(start == mi_atomic_read_ptr_relaxed(®ion->start)); + mi_assert_internal(start == mi_atomic_read_ptr(®ion->start)); mi_assert_internal(start != NULL); // Commit the blocks to memory @@ -230,7 +230,7 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc const uintptr_t mask = mi_region_block_mask(blocks, 0); const size_t bitidx_max = MI_REGION_MAP_BITS - blocks; - uintptr_t map = mi_atomic_read_relaxed(®ion->map); + uintptr_t map = mi_atomic_read(®ion->map); #ifdef MI_HAVE_BITSCAN size_t bitidx = mi_bsf(~map); // quickly find the first zero bit if possible @@ -245,9 +245,9 @@ static bool mi_region_alloc_blocks(mem_region_t* region, size_t idx, size_t bloc mi_assert_internal((m >> bitidx) == mask); // no overflow? uintptr_t newmap = map | m; mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_strong(®ion->map, newmap, map)) { + if (!mi_atomic_cas_weak(®ion->map, newmap, map)) { // no success, another thread claimed concurrently.. keep going - map = mi_atomic_read_relaxed(®ion->map); + map = mi_atomic_read(®ion->map); continue; } else { @@ -317,7 +317,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool commit, size_t* // find a range of free blocks void* p = NULL; - size_t count = mi_atomic_read_relaxed(®ions_count); + size_t count = mi_atomic_read(®ions_count); size_t idx = tld->region_idx; // start index is per-thread to reduce contention for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around @@ -377,7 +377,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, mi_stats_t* stats) { mi_assert_internal(idx < MI_REGION_MAX); if (idx >= MI_REGION_MAX) return; // or `abort`? mem_region_t* region = ®ions[idx]; mi_assert_internal((mi_atomic_read_relaxed(®ion->map) & mask) == mask ); // claimed? - void* start = mi_atomic_read_ptr_relaxed(®ion->start); + void* start = mi_atomic_read_ptr(®ion->start); mi_assert_internal(start != NULL); void* blocks_start = (uint8_t*)start + (bitidx * MI_SEGMENT_SIZE); mi_assert_internal(blocks_start == p); // not a pointer in our area? diff --git a/src/stats.c b/src/stats.c index 2176ba1..4dddb4b 100644 --- a/src/stats.c +++ b/src/stats.c @@ -38,8 +38,8 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { if (mi_is_in_main(stat)) { // add atomically (for abandoned pages) - int64_t current = mi_atomic_add64(&stat->current,amount); - if (current > stat->peak) stat->peak = stat->current; // racing.. it's ok + mi_atomic_add64(&stat->current,amount); + if (stat->current > stat->peak) stat->peak = stat->current; // racing.. it's ok if (amount > 0) { mi_atomic_add64(&stat->allocated,amount); }