[*] Further drop OS requirements

This commit is contained in:
Reece Wilson 2023-07-24 11:40:02 +01:00
parent 74d629643a
commit 18c0ff282b
4 changed files with 49 additions and 134 deletions

View File

@ -159,10 +159,6 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
size_t* current_rss, size_t* peak_rss,
size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
// -------------------------------------------------------------------------------------
// Aligned allocation
// Note that `alignment` always follows `size` for consistency with unaligned

View File

@ -287,10 +287,10 @@ static void _mi_thread_done(mi_heap_t* default_heap);
#include <Windows.h>
#include <fibersapi.h>
#if (_WIN32_WINNT < 0x600) // before Windows Vista
WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex);
WINBASEAPI DWORD WINAPI TlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
WINBASEAPI PVOID WINAPI TlsGetValue( _In_ DWORD dwFlsIndex );
WINBASEAPI BOOL WINAPI TlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
WINBASEAPI BOOL WINAPI TlsFree(_In_ DWORD dwFlsIndex);
#endif
static DWORD mi_fls_key = (DWORD)(-1);
static void NTAPI mi_fls_done(PVOID value) {
@ -318,7 +318,7 @@ static void mi_process_setup_auto_thread_done(void) {
#if defined(_WIN32) && defined(MI_SHARED_LIB)
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
mi_fls_key = FlsAlloc(&mi_fls_done);
mi_fls_key = TlsAlloc(&mi_fls_done);
#elif defined(MI_USE_PTHREADS)
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
@ -378,7 +378,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
// nothing to do as it is done in DllMain
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
mi_assert_internal(mi_fls_key != 0);
FlsSetValue(mi_fls_key, heap);
TlsSetValue(mi_fls_key, heap);
#elif defined(MI_USE_PTHREADS)
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
pthread_setspecific(_mi_heap_default_key, heap);
@ -491,8 +491,8 @@ static void mi_process_done(void) {
process_done = true;
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
FlsSetValue(mi_fls_key, NULL); // don't call main-thread callback
FlsFree(mi_fls_key); // call thread-done on all threads to prevent dangling callback pointer if statically linked with a DLL; Issue #208
TlsSetValue(mi_fls_key, NULL); // don't call main-thread callback
TlsFree(mi_fls_key); // call thread-done on all threads to prevent dangling callback pointer if statically linked with a DLL; Issue #208
#endif
#if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)

View File

@ -138,10 +138,16 @@ typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINI
static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
static DWORD(__stdcall *pGetCurrentProcessorNumber)() = NULL; L;
static DWORD(__stdcall* pGetLargePageMinimum)() = NULL;
static BOOL(__stdcall* pGetNumaHighestNodeNumber)(PULONG HighestNodeNumber) = NULL;
static BOOL(__stdcall* pGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber) = NULL;
static BOOL(__stdcall* pGetNumaNodeProcessorMask)(UCHAR Processor, PULONGLONG ProcessorMask) = NULL;
static bool mi_win_enable_large_os_pages()
{
if (large_os_page_size > 0) return true;
if (!pGetLargePageMinimum) return false;
// Try to see if large OS pages are supported
// To use large pages on Windows, we first need access permission
@ -161,7 +167,7 @@ static bool mi_win_enable_large_os_pages()
err = GetLastError();
ok = (err == ERROR_SUCCESS);
if (ok) {
large_os_page_size = GetLargePageMinimum();
large_os_page_size = pGetLargePageMinimum();
}
}
}
@ -187,13 +193,11 @@ void _mi_os_init(void) {
// use VirtualAlloc2FromApp if possible as it is available to Windows store apps
pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
FreeLibrary(hDll);
}
// NtAllocateVirtualMemoryEx is used for huge page allocation
hDll = LoadLibrary(TEXT("ntdll.dll"));
if (hDll != NULL) {
pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
FreeLibrary(hDll);
}
// Try to use Win7+ numa API
hDll = LoadLibrary(TEXT("kernel32.dll"));
@ -201,7 +205,11 @@ void _mi_os_init(void) {
pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
FreeLibrary(hDll);
pGetCurrentProcessorNumber = (DWORD(__stdcall*)())GetProcAddress(hDll, "GetCurrentProcessorNumber");
pGetLargePageMinimum = (DWORD(__cdecl*)())GetProcAddress(hDll, "GetLargePageMinimum");
pGetNumaHighestNodeNumber = (BOOL(__stdcall*)(PULONG))GetProcAddress(hDll, "GetNumaHighestNodeNumber");
pGetNumaProcessorNode = (BOOL(__stdcall*)(UCHAR, PUCHAR))GetProcAddress(hDll, "GetNumaProcessorNode");
pGetNumaNodeProcessorMask = (BOOL(__stdcall*)(UCHAR, PULONGLONG))GetProcAddress(hDll, "GetNumaNodeProcessorMask");
}
if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
mi_win_enable_large_os_pages();
@ -1109,6 +1117,25 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
#ifdef _WIN32
#if defined(AURORA_IS_64BIT)
DWORD __declspec(dllexport) GetCurrentProcessorNumberXP(void)
{
return pGetCurrentProcessorNumber();
}
#else
DWORD __declspec(dllexport) __declspec(naked) GetCurrentProcessorNumberXP(void)
{
__asm {
mov eax, 1
cpuid
shr ebx, 24
mov eax, ebx
ret
}
}
#endif
static size_t mi_os_numa_nodex() {
USHORT numa_node = 0;
if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
@ -1119,11 +1146,13 @@ static size_t mi_os_numa_nodex() {
BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
if (ok) numa_node = nnode;
}
else {
else
{
// Vista or earlier, use older API that is limited to 64 processors. Issue #277
DWORD pnum = GetCurrentProcessorNumber();
DWORD pnum = GetCurrentProcessorNumberXP();
UCHAR nnode = 0;
BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode);
BOOL ok = pGetNumaProcessorNode &&
pGetNumaProcessorNode((UCHAR)pnum, &nnode);
if (ok) numa_node = nnode;
}
return numa_node;
@ -1131,7 +1160,9 @@ static size_t mi_os_numa_nodex() {
static size_t mi_os_numa_node_countx(void) {
ULONG numa_max = 0;
GetNumaHighestNodeNumber(&numa_max);
if (pGetNumaHighestNodeNumber) {
pGetNumaHighestNodeNumber(&numa_max);
}
// find the highest node number that has actual processors assigned to it. Issue #282
while(numa_max > 0) {
if (pGetNumaNodeProcessorMaskEx != NULL) {
@ -1144,7 +1175,8 @@ static size_t mi_os_numa_node_countx(void) {
else {
// Vista or earlier, use older API that is limited to 64 processors.
ULONGLONG mask;
if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
if (pGetNumaNodeProcessorMask &&
pGetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
if (mask != 0) break; // found the maximum non-empty node
};
}

View File

@ -278,8 +278,6 @@ static void mi_buffered_out(const char* msg, void* arg) {
// Print statistics
//------------------------------------------------------------
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults);
static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
// wrap the output function to be line buffered
char buf[256];
@ -330,7 +328,6 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
size_t current_commit;
size_t peak_commit;
size_t page_faults;
mi_stat_process_info(&elapsed, &user_time, &sys_time, &current_rss, &peak_rss, &current_commit, &peak_commit, &page_faults);
_mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
_mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process",
user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults );
@ -438,113 +435,3 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
mi_msecs_t end = _mi_clock_now();
return (end - start - mi_clock_diff);
}
// --------------------------------------------------------
// Basic process statistics
// --------------------------------------------------------
#if defined(AURORA_PLATFORM_WIN32)
#include <Windows.h>
#include <psapi.h>
#pragma comment(lib,"psapi.lib")
static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
ULARGE_INTEGER i;
i.LowPart = ftime->dwLowDateTime;
i.HighPart = ftime->dwHighDateTime;
mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
return msecs;
}
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
{
*elapsed = _mi_clock_end(mi_process_start);
FILETIME ct;
FILETIME ut;
FILETIME st;
FILETIME et;
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
*utime = filetime_msecs(&ut);
*stime = filetime_msecs(&st);
PROCESS_MEMORY_COUNTERS info;
GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
*current_rss = (size_t)info.WorkingSetSize;
*peak_rss = (size_t)info.PeakWorkingSetSize;
*current_commit = (size_t)info.PagefileUsage;
*peak_commit = (size_t)info.PeakPagefileUsage;
*page_faults = (size_t)info.PageFaultCount;
}
#elif defined(AURORA_PLATFORM_LINUX) || defined(AURORA_PLATFORM_APPLE)
#include <stdio.h>
#include <unistd.h>
#include <sys/resource.h>
#if defined(__APPLE__) && defined(__MACH__)
#include <mach/mach.h>
#endif
#if defined(__HAIKU__)
#include <kernel/OS.h>
#endif
static mi_msecs_t timeval_secs(const struct timeval* tv) {
return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
}
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
{
*elapsed = _mi_clock_end(mi_process_start);
struct rusage rusage;
getrusage(RUSAGE_SELF, &rusage);
*utime = timeval_secs(&rusage.ru_utime);
*stime = timeval_secs(&rusage.ru_stime);
*page_faults = rusage.ru_majflt;
// estimate commit using our stats
*peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
*current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
*current_rss = *current_commit; // estimate
#if defined(AURORA_PLATFORM_LINUX)
*peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB
#endif
}
#else
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
{
*elapsed = _mi_clock_end(mi_process_start);
*peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
*current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
*peak_rss = *peak_commit;
*current_rss = *current_commit;
*page_faults = 0;
*utime = 0;
*stime = 0;
}
#endif
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
{
mi_msecs_t elapsed = 0;
mi_msecs_t utime = 0;
mi_msecs_t stime = 0;
size_t current_rss0 = 0;
size_t peak_rss0 = 0;
size_t current_commit0 = 0;
size_t peak_commit0 = 0;
size_t page_faults0 = 0;
mi_stat_process_info(&elapsed,&utime, &stime, &current_rss0, &peak_rss0, &current_commit0, &peak_commit0, &page_faults0);
if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX));
if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX));
if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX));
if (current_rss!=NULL) *current_rss = current_rss0;
if (peak_rss!=NULL) *peak_rss = peak_rss0;
if (current_commit!=NULL) *current_commit = current_commit0;
if (peak_commit!=NULL) *peak_commit = peak_commit0;
if (page_faults!=NULL) *page_faults = page_faults0;
}