diff --git a/ide/vs2019/mimalloc.vcxproj b/ide/vs2019/mimalloc.vcxproj
index 037e380..a98e78b 100644
--- a/ide/vs2019/mimalloc.vcxproj
+++ b/ide/vs2019/mimalloc.vcxproj
@@ -100,7 +100,7 @@
       <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard>Default</LanguageStandard>
     </ClCompile>
     <Lib>
       <AdditionalLibraryDirectories>
@@ -119,7 +119,7 @@
       <PreprocessorDefinitions>MI_DEBUG=3;%(PreprocessorDefinitions);</PreprocessorDefinitions>
       <CompileAs>CompileAsCpp</CompileAs>
       <SupportJustMyCode>false</SupportJustMyCode>
-      <LanguageStandard>stdcpp17</LanguageStandard>
+      <LanguageStandard>Default</LanguageStandard>
     </ClCompile>
     <PostBuildEvent>
       <Command>
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 3335414..902d2fd 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -20,16 +20,20 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_trace_message(...)
 #endif
 
+#define MI_CACHE_LINE          64
 #if defined(_MSC_VER)
 #pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
 #define mi_decl_noinline        __declspec(noinline)
 #define mi_decl_thread          __declspec(thread)
+#define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
 #elif (defined(__GNUC__) && (__GNUC__>=3))  // includes clang and icc
 #define mi_decl_noinline        __attribute__((noinline))
 #define mi_decl_thread          __thread
+#define mi_decl_cache_align     __attribute__((aligned(MI_CACHE_LINE)))
 #else
 #define mi_decl_noinline
 #define mi_decl_thread          __thread        // hope for the best :-)
+#define mi_decl_cache_align     
 #endif
 
 
diff --git a/src/arena.c b/src/arena.c
index acb9224..ac599f3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -54,7 +54,7 @@ bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
 
 // A memory arena descriptor
-typedef struct mi_arena_s {
+typedef mi_decl_cache_align struct mi_arena_s {
   _Atomic(uint8_t*) start;                // the start of the memory area
   size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
   size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
@@ -70,8 +70,8 @@ typedef struct mi_arena_s {
 
 
 // The available arenas
-static _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
-static _Atomic(uintptr_t)   mi_arena_count; // = 0
+static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
+static mi_decl_cache_align _Atomic(uintptr_t)   mi_arena_count; // = 0
 
 
 /* -----------------------------------------------------------
diff --git a/src/os.c b/src/os.c
index 6e8c12d..b8dfaa7 100644
--- a/src/os.c
+++ b/src/os.c
@@ -397,7 +397,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
 // On 64-bit systems, we can do efficient aligned allocation by using
 // the 4TiB to 30TiB area to allocate them.
 #if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED)))
-static volatile _Atomic(uintptr_t) aligned_base;
+static volatile mi_decl_cache_align _Atomic(uintptr_t) aligned_base;
 
 // Return a 4MiB aligned address that is probably available
 static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) {
@@ -905,7 +905,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
 
 #if (MI_INTPTR_SIZE >= 8)
 // To ensure proper alignment, use our own area for huge OS pages
-static _Atomic(uintptr_t)  mi_huge_start; // = 0
+static mi_decl_cache_align _Atomic(uintptr_t)  mi_huge_start; // = 0
 
 // Claim an aligned address range for huge pages
 static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) {
diff --git a/src/segment.c b/src/segment.c
index 7aced87..a26ac44 100644
--- a/src/segment.c
+++ b/src/segment.c
@@ -365,9 +365,6 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) {
 }
 
 
-
-
-
 /* -----------------------------------------------------------
  Segment size calculations
 ----------------------------------------------------------- */
@@ -829,13 +826,15 @@ reuse their pages and/or free them eventually
 We maintain a global list of abandoned segments that are
 reclaimed on demand. Since this is shared among threads
 the implementation needs to avoid the A-B-A problem on
-popping abandoned segments which is why tagged pointers are 
-used.
+popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem>
+We use tagged pointers to avoid accidentially identifying
+reused segments, much like stamped references in Java.
+Secondly, we maintain a reader counter to avoid resetting
+or decommitting segments that have a pending read operation.
 ----------------------------------------------------------- */
 
-// Use the bottom 20-bits (on 64-bit) of the aligned segment
-// pointers to put in a tag that increments on update to avoid
-// the A-B-A problem.
+// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers 
+// to put in a tag that increments on update to avoid the A-B-A problem.
 #define MI_TAGGED_MASK   MI_SEGMENT_MASK
 typedef uintptr_t        mi_tagged_segment_t;
 
@@ -850,16 +849,17 @@ static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_se
 }
 
 // This is a list of visited abandoned pages that were full at the time.
-// this list migrates to `abandoned` when that becomes NULL.
-static volatile _Atomic(mi_segment_t*)       abandoned_visited; // = NULL
+// this list migrates to `abandoned` when that becomes NULL. The use of 
+// this list reduces contention and the rate at which segments are visited.
+static mi_decl_cache_align volatile _Atomic(mi_segment_t*)       abandoned_visited; // = NULL
 
-// The abandoned page list.
-static volatile _Atomic(mi_tagged_segment_t) abandoned;         // = NULL
+// The abandoned page list (tagged as it supports pop)
+static mi_decl_cache_align volatile _Atomic(mi_tagged_segment_t) abandoned;         // = NULL
 
 // We also maintain a count of current readers of the abandoned list
 // in order to prevent resetting/decommitting segment memory if it might
 // still be read.
-static volatile _Atomic(uintptr_t)           abandoned_readers; // = 0
+static mi_decl_cache_align volatile _Atomic(uintptr_t)           abandoned_readers; // = 0
 
 // Push on the visited list
 static void mi_abandoned_visited_push(mi_segment_t* segment) {
diff --git a/test/test-stress.c b/test/test-stress.c
index 72e4e85..19f1036 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -32,10 +32,10 @@ static int ITER    = 50;      // N full iterations destructing and re-creating a
 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
 // static int SCALE   = 100;  // scaling factor
 
-#define STRESS   // undefine for leak test
+// #define STRESS   // undefine for leak test
 
 static bool   allow_large_objects = true;    // allow very large objects?
-static size_t use_one_size = 1;              // use single object size of `N * sizeof(uintptr_t)`?
+static size_t use_one_size = 0;              // use single object size of `N * sizeof(uintptr_t)`?
 
 
 #ifdef USE_STD_MALLOC
@@ -198,7 +198,7 @@ static void test_stress(void) {
 
 static void leak(intptr_t tid) {
   uintptr_t r = (43*tid)^ticks();
-  void* p = alloc_items(pick(&r)%128, &r);
+  void* p = alloc_items(1 /*pick(&r)%128*/, &r);
   if (chance(50, &r)) {
     intptr_t i = (pick(&r) % TRANSFERS);
     void* q = atomic_exchange_ptr(&transfer[i], p);