diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj index 037e380..a98e78b 100644 --- a/ide/vs2019/mimalloc.vcxproj +++ b/ide/vs2019/mimalloc.vcxproj @@ -100,7 +100,7 @@ MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default @@ -119,7 +119,7 @@ MI_DEBUG=3;%(PreprocessorDefinitions); CompileAsCpp false - stdcpp17 + Default diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 3335414..902d2fd 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -20,16 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_trace_message(...) #endif +#define MI_CACHE_LINE 64 #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) #elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align #endif diff --git a/src/arena.c b/src/arena.c index acb9224..ac599f3 100644 --- a/src/arena.c +++ b/src/arena.c @@ -54,7 +54,7 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) // A memory arena descriptor -typedef struct mi_arena_s { +typedef mi_decl_cache_align struct mi_arena_s { _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) @@ -70,8 +70,8 @@ typedef struct mi_arena_s { // The available arenas -static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; -static _Atomic(uintptr_t) mi_arena_count; // = 0 +static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; +static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0 /* ----------------------------------------------------------- diff --git a/src/os.c b/src/os.c index 6e8c12d..b8dfaa7 100644 --- a/src/os.c +++ b/src/os.c @@ -397,7 +397,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro // On 64-bit systems, we can do efficient aligned allocation by using // the 4TiB to 30TiB area to allocate them. #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static volatile _Atomic(uintptr_t) aligned_base; +static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base; // Return a 4MiB aligned address that is probably available static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { @@ -905,7 +905,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages -static _Atomic(uintptr_t) mi_huge_start; // = 0 +static mi_decl_cache_align _Atomic(uintptr_t) mi_huge_start; // = 0 // Claim an aligned address range for huge pages static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { diff --git a/src/segment.c b/src/segment.c index 7aced87..a26ac44 100644 --- a/src/segment.c +++ b/src/segment.c @@ -365,9 +365,6 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) { } - - - /* ----------------------------------------------------------- Segment size calculations ----------------------------------------------------------- */ @@ -829,13 +826,15 @@ reuse their pages and/or free them eventually We maintain a global list of abandoned segments that are reclaimed on demand. Since this is shared among threads the implementation needs to avoid the A-B-A problem on -popping abandoned segments which is why tagged pointers are -used. +popping abandoned segments: +We use tagged pointers to avoid accidentially identifying +reused segments, much like stamped references in Java. +Secondly, we maintain a reader counter to avoid resetting +or decommitting segments that have a pending read operation. ----------------------------------------------------------- */ -// Use the bottom 20-bits (on 64-bit) of the aligned segment -// pointers to put in a tag that increments on update to avoid -// the A-B-A problem. +// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers +// to put in a tag that increments on update to avoid the A-B-A problem. #define MI_TAGGED_MASK MI_SEGMENT_MASK typedef uintptr_t mi_tagged_segment_t; @@ -850,16 +849,17 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se } // This is a list of visited abandoned pages that were full at the time. -// this list migrates to `abandoned` when that becomes NULL. -static volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL +// this list migrates to `abandoned` when that becomes NULL. The use of +// this list reduces contention and the rate at which segments are visited. +static mi_decl_cache_align volatile _Atomic(mi_segment_t*) abandoned_visited; // = NULL -// The abandoned page list. -static volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL +// The abandoned page list (tagged as it supports pop) +static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned; // = NULL // We also maintain a count of current readers of the abandoned list // in order to prevent resetting/decommitting segment memory if it might // still be read. -static volatile _Atomic(uintptr_t) abandoned_readers; // = 0 +static mi_decl_cache_align volatile _Atomic(uintptr_t) abandoned_readers; // = 0 // Push on the visited list static void mi_abandoned_visited_push(mi_segment_t* segment) { diff --git a/test/test-stress.c b/test/test-stress.c index 72e4e85..19f1036 100644 --- a/test/test-stress.c +++ b/test/test-stress.c @@ -32,10 +32,10 @@ static int ITER = 50; // N full iterations destructing and re-creating a // static int THREADS = 8; // more repeatable if THREADS <= #processors // static int SCALE = 100; // scaling factor -#define STRESS // undefine for leak test +// #define STRESS // undefine for leak test static bool allow_large_objects = true; // allow very large objects? -static size_t use_one_size = 1; // use single object size of `N * sizeof(uintptr_t)`? +static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`? #ifdef USE_STD_MALLOC @@ -198,7 +198,7 @@ static void test_stress(void) { static void leak(intptr_t tid) { uintptr_t r = (43*tid)^ticks(); - void* p = alloc_items(pick(&r)%128, &r); + void* p = alloc_items(1 /*pick(&r)%128*/, &r); if (chance(50, &r)) { intptr_t i = (pick(&r) % TRANSFERS); void* q = atomic_exchange_ptr(&transfer[i], p);