update stress test for more realisitic size distribution
This commit is contained in:
parent
fd3ce5dc7d
commit
94bfb47725
@ -7,7 +7,7 @@ terms of the MIT license.
|
||||
/* This is a stress test for the allocator, using multiple threads and
|
||||
transferring objects between threads. This is not a typical workload
|
||||
but uses a random linear size distribution. Timing can also depend on
|
||||
(random) thread scheduling. Do not use this test as a benchmark!
|
||||
(random) thread scheduling. Do not use this test as a benchmark!
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
@ -17,10 +17,12 @@ terms of the MIT license.
|
||||
#include <string.h>
|
||||
#include <mimalloc.h>
|
||||
|
||||
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
|
||||
//
|
||||
// argument defaults
|
||||
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
||||
static int SCALE = 12; // scaling factor
|
||||
static int ITER = 50; // N full iterations re-creating all threads
|
||||
static int THREADS = 32; // more repeatable if THREADS <= #processors
|
||||
static int SCALE = 50; // scaling factor
|
||||
static int ITER = 10; // N full iterations re-creating all threads
|
||||
|
||||
// static int THREADS = 8; // more repeatable if THREADS <= #processors
|
||||
// static int SCALE = 100; // scaling factor
|
||||
@ -56,21 +58,21 @@ typedef uintptr_t* random_t;
|
||||
|
||||
static uintptr_t pick(random_t r) {
|
||||
uintptr_t x = *r;
|
||||
#if (UINTPTR_MAX > UINT32_MAX)
|
||||
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
|
||||
#if (UINTPTR_MAX > UINT32_MAX)
|
||||
// by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
|
||||
x ^= x >> 30;
|
||||
x *= 0xbf58476d1ce4e5b9UL;
|
||||
x ^= x >> 27;
|
||||
x *= 0x94d049bb133111ebUL;
|
||||
x ^= x >> 31;
|
||||
#else
|
||||
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
|
||||
#else
|
||||
// by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
|
||||
x ^= x >> 16;
|
||||
x *= 0x7feb352dUL;
|
||||
x ^= x >> 15;
|
||||
x *= 0x846ca68bUL;
|
||||
x ^= x >> 16;
|
||||
#endif
|
||||
#endif
|
||||
*r = x;
|
||||
return x;
|
||||
}
|
||||
@ -81,13 +83,13 @@ static bool chance(size_t perc, random_t r) {
|
||||
|
||||
static void* alloc_items(size_t items, random_t r) {
|
||||
if (chance(1, r)) {
|
||||
if (chance(1, r) && allow_large_objects) items *= 1000; // 0.01% giant
|
||||
else if (chance(10, r) && allow_large_objects) items *= 100; // 0.1% huge
|
||||
else items *= 10; // 1% large objects;
|
||||
if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant
|
||||
else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge
|
||||
else items *= 100; // 1% large objects;
|
||||
}
|
||||
if (items==40) items++; // pthreads uses that size for stack increases
|
||||
if (use_one_size>0) items = (use_one_size/sizeof(uintptr_t));
|
||||
uintptr_t* p = (uintptr_t*)custom_malloc(items*sizeof(uintptr_t));
|
||||
if (items == 40) items++; // pthreads uses that size for stack increases
|
||||
if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
|
||||
uintptr_t* p = (uintptr_t*)custom_malloc(items * sizeof(uintptr_t));
|
||||
if (p != NULL) {
|
||||
for (uintptr_t i = 0; i < items; i++) p[i] = (items - i) ^ cookie;
|
||||
}
|
||||
@ -99,7 +101,7 @@ static void free_items(void* p) {
|
||||
uintptr_t* q = (uintptr_t*)p;
|
||||
uintptr_t items = (q[0] ^ cookie);
|
||||
for (uintptr_t i = 0; i < items; i++) {
|
||||
if ((q[i]^cookie) != items - i) {
|
||||
if ((q[i] ^ cookie) != items - i) {
|
||||
fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
|
||||
abort();
|
||||
}
|
||||
@ -111,30 +113,30 @@ static void free_items(void* p) {
|
||||
|
||||
static void stress(intptr_t tid) {
|
||||
//bench_start_thread();
|
||||
uintptr_t r = tid ^ 42;
|
||||
const size_t max_item = 128; // in words
|
||||
const size_t max_item_retained = 10*max_item;
|
||||
size_t allocs = 25*SCALE*(tid%8 + 1); // some threads do more
|
||||
size_t retain = allocs/2;
|
||||
uintptr_t r = tid * 43;
|
||||
const size_t max_item_shift = 5; // 128
|
||||
const size_t max_item_retained_shift = max_item_shift + 2;
|
||||
size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
|
||||
size_t retain = allocs / 2;
|
||||
void** data = NULL;
|
||||
size_t data_size = 0;
|
||||
size_t data_top = 0;
|
||||
void** retained = (void**)custom_malloc(retain*sizeof(void*));
|
||||
void** retained = (void**)custom_malloc(retain * sizeof(void*));
|
||||
size_t retain_top = 0;
|
||||
|
||||
while (allocs>0 || retain>0) {
|
||||
while (allocs > 0 || retain > 0) {
|
||||
if (retain == 0 || (chance(50, &r) && allocs > 0)) {
|
||||
// 50%+ alloc
|
||||
allocs--;
|
||||
if (data_top >= data_size) {
|
||||
data_size += 100000;
|
||||
data = (void**)custom_realloc(data, data_size*sizeof(void*));
|
||||
data = (void**)custom_realloc(data, data_size * sizeof(void*));
|
||||
}
|
||||
data[data_top++] = alloc_items((pick(&r) % max_item) + 1, &r);
|
||||
data[data_top++] = alloc_items( 1ULL << (pick(&r) % max_item_shift), &r);
|
||||
}
|
||||
else {
|
||||
// 25% retain
|
||||
retained[retain_top++] = alloc_items(10*((pick(&r) % max_item_retained) + 1), &r);
|
||||
retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
|
||||
retain--;
|
||||
}
|
||||
if (chance(66, &r) && data_top > 0) {
|
||||
@ -167,36 +169,45 @@ static void stress(intptr_t tid) {
|
||||
static void run_os_threads(size_t nthreads);
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc>=2) {
|
||||
// > mimalloc-test-stress [THREADS] [SCALE] [ITER]
|
||||
if (argc >= 2) {
|
||||
char* end;
|
||||
long n = strtol(argv[1], &end, 10);
|
||||
if (n > 0) THREADS = n;
|
||||
}
|
||||
if (argc>=3) {
|
||||
if (argc >= 3) {
|
||||
char* end;
|
||||
long n = (strtol(argv[2], &end, 10));
|
||||
if (n > 0) SCALE = n;
|
||||
}
|
||||
printf("start with %i threads with a %i%% load-per-thread\n", THREADS, SCALE);
|
||||
if (argc >= 4) {
|
||||
char* end;
|
||||
long n = (strtol(argv[3], &end, 10));
|
||||
if (n > 0) ITER = n;
|
||||
}
|
||||
printf("start with %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
|
||||
//int res = mi_reserve_huge_os_pages(4,1);
|
||||
//printf("(reserve huge: %i\n)", res);
|
||||
|
||||
//bench_start_program();
|
||||
//bench_start_program();
|
||||
|
||||
// Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
|
||||
mi_stats_reset();
|
||||
uintptr_t r = 43;
|
||||
uintptr_t r = 43 * 43;
|
||||
for (int n = 0; n < ITER; n++) {
|
||||
run_os_threads(THREADS);
|
||||
for (int i = 0; i < TRANSFERS; i++) {
|
||||
if (chance(50, &r) || n+1 == ITER) { // free all on last run, otherwise free half of the transfers
|
||||
if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
|
||||
void* p = atomic_exchange_ptr(&transfer[i], NULL);
|
||||
free_items(p);
|
||||
}
|
||||
}
|
||||
mi_collect(false);
|
||||
#ifndef NDEBUG
|
||||
if ((n + 1) % 10 == 0) { printf("- iterations: %3d\n", n + 1); }
|
||||
#endif
|
||||
}
|
||||
|
||||
mi_collect(false);
|
||||
mi_collect(true);
|
||||
mi_stats_print(NULL);
|
||||
//bench_end_program();
|
||||
@ -230,11 +241,11 @@ static void run_os_threads(size_t nthreads) {
|
||||
}
|
||||
|
||||
static void* atomic_exchange_ptr(volatile void** p, void* newval) {
|
||||
#if (INTPTR_MAX == UINT32_MAX)
|
||||
#if (INTPTR_MAX == UINT32_MAX)
|
||||
return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
|
||||
#else
|
||||
#else
|
||||
return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
|
||||
@ -247,8 +258,8 @@ static void* thread_entry(void* param) {
|
||||
}
|
||||
|
||||
static void run_os_threads(size_t nthreads) {
|
||||
pthread_t* threads = (pthread_t*)custom_malloc(nthreads*sizeof(pthread_t));
|
||||
memset(threads, 0, sizeof(pthread_t)*nthreads);
|
||||
pthread_t* threads = (pthread_t*)custom_malloc(nthreads * sizeof(pthread_t));
|
||||
memset(threads, 0, sizeof(pthread_t) * nthreads);
|
||||
//pthread_setconcurrency(nthreads);
|
||||
for (uintptr_t i = 0; i < nthreads; i++) {
|
||||
pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
|
||||
|
Loading…
Reference in New Issue
Block a user