small optimizations, use bitwise aligne
This commit is contained in:
parent
66b8c37ab3
commit
189ad0f81d
@ -87,6 +87,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
|
||||
if(CMAKE_C_COMPILER_ID MATCHES "GNU")
|
||||
list(APPEND mi_cflags -Wno-invalid-memory-model)
|
||||
list(APPEND mi_cflags -fvisibility=hidden)
|
||||
list(APPEND mi_cflags -fbranch-target-load-optimize )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -39,7 +39,6 @@ bool _mi_preloading(); // true while the C runtime is not ready
|
||||
|
||||
// os.c
|
||||
size_t _mi_os_page_size(void);
|
||||
uintptr_t _mi_align_up(uintptr_t sz, size_t alignment);
|
||||
void _mi_os_init(void); // called from process init
|
||||
void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
|
||||
void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
|
||||
@ -165,6 +164,20 @@ static inline bool mi_mul_overflow(size_t size, size_t count, size_t* total) {
|
||||
#endif
|
||||
}
|
||||
|
||||
// Align upwards
|
||||
static inline uintptr_t _mi_is_power_of_two(uintptr_t x) {
|
||||
return ((x & (x - 1)) == 0);
|
||||
}
|
||||
static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
||||
uintptr_t mask = alignment - 1;
|
||||
if ((alignment & mask) == 0) { // power of two?
|
||||
return ((sz + mask) & ~mask);
|
||||
}
|
||||
else {
|
||||
return (((sz + mask)/alignment)*alignment);
|
||||
}
|
||||
}
|
||||
|
||||
// Align a byte size to a size in _machine words_,
|
||||
// i.e. byte size == `wsize*sizeof(void*)`.
|
||||
static inline size_t _mi_wsize_from_size(size_t size) {
|
||||
@ -324,12 +337,23 @@ static inline void mi_block_set_nextx(uintptr_t cookie, mi_block_t* block, mi_bl
|
||||
}
|
||||
|
||||
static inline mi_block_t* mi_block_next(mi_page_t* page, mi_block_t* block) {
|
||||
#if MI_SECURE
|
||||
return mi_block_nextx(page->cookie,block);
|
||||
#else
|
||||
UNUSED(page);
|
||||
return mi_block_nextx(0, block);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void mi_block_set_next(mi_page_t* page, mi_block_t* block, mi_block_t* next) {
|
||||
#if MI_SECURE
|
||||
mi_block_set_nextx(page->cookie,block,next);
|
||||
#else
|
||||
UNUSED(page);
|
||||
mi_block_set_nextx(0, block, next);
|
||||
#endif
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// Getting the thread id should be performant
|
||||
// as it is called in the fast path of `_mi_free`,
|
||||
|
@ -132,10 +132,9 @@ typedef union mi_page_flags_u {
|
||||
} mi_page_flags_t;
|
||||
|
||||
// Thread free list.
|
||||
// We use bottom 2 bits of the pointer for mi_delayed_t flags
|
||||
// We use the bottom 2 bits of the pointer for mi_delayed_t flags
|
||||
typedef uintptr_t mi_thread_free_t;
|
||||
|
||||
|
||||
// A page contains blocks of one specific size (`block_size`).
|
||||
// Each page has three list of free blocks:
|
||||
// `free` for blocks that can be allocated,
|
||||
@ -165,9 +164,11 @@ typedef struct mi_page_s {
|
||||
mi_page_flags_t flags;
|
||||
uint16_t capacity; // number of blocks committed
|
||||
uint16_t reserved; // number of blocks reserved in memory
|
||||
|
||||
|
||||
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
|
||||
#if MI_SECURE
|
||||
uintptr_t cookie; // random cookie to encode the free lists
|
||||
#endif
|
||||
size_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
|
||||
|
||||
mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
|
||||
@ -182,9 +183,9 @@ typedef struct mi_page_s {
|
||||
|
||||
// improve page index calculation
|
||||
#if MI_INTPTR_SIZE==8
|
||||
//void* padding[1]; // 10 words on 64-bit
|
||||
//void* padding[1]; // 12 words on 64-bit
|
||||
#elif MI_INTPTR_SIZE==4
|
||||
void* padding[1]; // 12 words on 32-bit
|
||||
void* padding[1]; // 12 words on 32-bit
|
||||
#endif
|
||||
} mi_page_t;
|
||||
|
||||
|
@ -52,8 +52,8 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define mi_attr_alloc_size2(s1,s2)
|
||||
#else
|
||||
#define mi_attr_alloc_size(s) __attribute__((alloc_size(s)))
|
||||
#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
|
||||
#define mi_cdecl // leads to warnings... __attribute__((cdecl))
|
||||
#define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2)))
|
||||
#define mi_cdecl // leads to warnings... __attribute__((cdecl))
|
||||
#endif
|
||||
#else
|
||||
#define mi_decl_thread __thread
|
||||
@ -62,7 +62,7 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
#define mi_attr_malloc
|
||||
#define mi_attr_alloc_size(s)
|
||||
#define mi_attr_alloc_size2(s1,s2)
|
||||
#define mi_cdecl
|
||||
#define mi_cdecl
|
||||
#endif
|
||||
|
||||
// ------------------------------------------------------
|
||||
|
@ -237,9 +237,9 @@ void mi_free(void* p) mi_attr_noexcept
|
||||
#endif
|
||||
|
||||
// adjust if it might be an un-aligned block
|
||||
if (mi_likely(page->flags.value==0)) { // note: merging both tests (local | value) does not matter for performance
|
||||
if (mi_likely(page->flags.value==0)) { // not full or aligned
|
||||
mi_block_t* block = (mi_block_t*)p;
|
||||
if (mi_likely(local)) {
|
||||
if (mi_likely(local)) { // note: merging both tests (local | value) does not matter for performance
|
||||
// owning thread can free a block directly
|
||||
mi_block_set_next(page, block, page->local_free); // note: moving this write earlier does not matter for performance
|
||||
page->local_free = block;
|
||||
@ -248,7 +248,7 @@ void mi_free(void* p) mi_attr_noexcept
|
||||
}
|
||||
else {
|
||||
// use atomic operations for a multi-threaded free
|
||||
_mi_free_block_mt(page, block);
|
||||
_mi_free_block_mt(page, block);
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -12,9 +12,11 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
|
||||
// Empty page used to initialize the small free pages array
|
||||
const mi_page_t _mi_page_empty = {
|
||||
0, false, false, false, {0},
|
||||
0, 0,
|
||||
NULL, 0, 0, // free, used, cookie
|
||||
0, false, false, false, {0}, 0, 0,
|
||||
NULL, 0, // free, used
|
||||
#if MI_SECURE
|
||||
0,
|
||||
#endif
|
||||
NULL, 0, 0,
|
||||
0, NULL, NULL, NULL
|
||||
#if (MI_INTPTR_SIZE==4)
|
||||
|
7
src/os.c
7
src/os.c
@ -34,13 +34,6 @@ terms of the MIT license. A copy of the license can be found in the file
|
||||
----------------------------------------------------------- */
|
||||
bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
|
||||
|
||||
uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
|
||||
uintptr_t x = (sz / alignment) * alignment;
|
||||
if (x < sz) x += alignment;
|
||||
if (x < sz) return 0; // overflow
|
||||
return x;
|
||||
}
|
||||
|
||||
static void* mi_align_up_ptr(void* p, size_t alignment) {
|
||||
return (void*)_mi_align_up((uintptr_t)p, alignment);
|
||||
}
|
||||
|
14
src/page.c
14
src/page.c
@ -93,7 +93,9 @@ static bool mi_page_is_valid_init(mi_page_t* page) {
|
||||
|
||||
bool _mi_page_is_valid(mi_page_t* page) {
|
||||
mi_assert_internal(mi_page_is_valid_init(page));
|
||||
#if MI_SECURE
|
||||
mi_assert_internal(page->cookie != 0);
|
||||
#endif
|
||||
if (page->heap!=NULL) {
|
||||
mi_segment_t* segment = _mi_page_segment(page);
|
||||
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == page->heap->thread_id);
|
||||
@ -119,7 +121,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay ) {
|
||||
else if (mi_unlikely(mi_tf_delayed(tfree) == MI_DELAYED_FREEING)) {
|
||||
mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
|
||||
continue; // and try again
|
||||
}
|
||||
}
|
||||
}
|
||||
while((mi_tf_delayed(tfreex) != mi_tf_delayed(tfree)) && // avoid atomic operation if already equal
|
||||
!mi_atomic_compare_exchange((volatile uintptr_t*)&page->thread_free, tfreex, tfree));
|
||||
@ -258,7 +260,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
|
||||
mi_block_t* next = mi_block_nextx(heap->cookie,block);
|
||||
// use internal free instead of regular one to keep stats etc correct
|
||||
if (!_mi_free_delayed_block(block)) {
|
||||
// we might already start delayed freeing while another thread has not yet
|
||||
// we might already start delayed freeing while another thread has not yet
|
||||
// reset the delayed_freeing flag; in that case delay it further by reinserting.
|
||||
mi_block_t* dfree;
|
||||
do {
|
||||
@ -498,7 +500,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_stats_t* st
|
||||
if (page->capacity >= page->reserved) return;
|
||||
|
||||
size_t page_size;
|
||||
_mi_page_start(_mi_page_segment(page), page, &page_size);
|
||||
_mi_page_start(_mi_page_segment(page), page, &page_size);
|
||||
_mi_stat_increase(&stats->pages_extended, 1);
|
||||
|
||||
// calculate the extend count
|
||||
@ -533,7 +535,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||
page->block_size = block_size;
|
||||
mi_assert_internal(page_size / block_size < (1L<<16));
|
||||
page->reserved = (uint16_t)(page_size / block_size);
|
||||
#if MI_SECURE
|
||||
page->cookie = _mi_heap_random(heap) | 1;
|
||||
#endif
|
||||
|
||||
mi_assert_internal(page->capacity == 0);
|
||||
mi_assert_internal(page->free == NULL);
|
||||
@ -543,7 +547,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
|
||||
mi_assert_internal(page->next == NULL);
|
||||
mi_assert_internal(page->prev == NULL);
|
||||
mi_assert_internal(page->flags.has_aligned == false);
|
||||
#if MI_SECURE
|
||||
mi_assert_internal(page->cookie != 0);
|
||||
#endif
|
||||
mi_assert_expensive(mi_page_is_valid_init(page));
|
||||
|
||||
// initialize an initial free list
|
||||
@ -683,7 +689,7 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) {
|
||||
mi_assert_internal(mi_page_immediate_available(page));
|
||||
mi_assert_internal(page->block_size == block_size);
|
||||
mi_heap_stat_increase( heap, huge, block_size);
|
||||
}
|
||||
}
|
||||
return page;
|
||||
}
|
||||
|
||||
|
@ -235,8 +235,8 @@ static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_se
|
||||
|
||||
|
||||
// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use,
|
||||
// and no more than 4.
|
||||
#define MI_SEGMENT_CACHE_MAX (4)
|
||||
// and no more than 2.
|
||||
#define MI_SEGMENT_CACHE_MAX (2)
|
||||
#define MI_SEGMENT_CACHE_FRACTION (8)
|
||||
|
||||
// note: returned segment may be partially reset
|
||||
@ -252,7 +252,7 @@ static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t
|
||||
}
|
||||
|
||||
static bool mi_segment_cache_full(mi_segments_tld_t* tld) {
|
||||
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
|
||||
if (tld->cache_count < MI_SEGMENT_CACHE_MAX &&
|
||||
tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { // always allow 1 element cache
|
||||
return false;
|
||||
}
|
||||
@ -318,7 +318,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind,
|
||||
size_t page_size = (page_kind == MI_PAGE_HUGE ? segment_size : (size_t)1 << page_shift);
|
||||
|
||||
// Try to get it from our thread local cache first
|
||||
bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM);
|
||||
bool commit = mi_option_is_enabled(mi_option_eager_commit) || (page_kind > MI_PAGE_MEDIUM);
|
||||
bool protection_still_good = false;
|
||||
mi_segment_t* segment = mi_segment_cache_pop(segment_size, tld);
|
||||
if (segment != NULL) {
|
||||
@ -702,10 +702,10 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld
|
||||
|
||||
mi_page_t* _mi_segment_page_alloc(size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
|
||||
mi_page_t* page;
|
||||
if (block_size <= (MI_SMALL_PAGE_SIZE/16)*3) {
|
||||
if (block_size <= (MI_SMALL_PAGE_SIZE/4)) {
|
||||
page = mi_segment_small_page_alloc(tld,os_tld);
|
||||
}
|
||||
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/16)*3) {
|
||||
else if (block_size <= (MI_MEDIUM_PAGE_SIZE/4)) {
|
||||
page = mi_segment_medium_page_alloc(tld, os_tld);
|
||||
}
|
||||
else if (block_size < (MI_LARGE_SIZE_MAX - sizeof(mi_segment_t))) {
|
||||
|
Loading…
Reference in New Issue
Block a user