[*] Further drop OS requirements
This commit is contained in:
parent
74d629643a
commit
18c0ff282b
@ -159,10 +159,6 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
|
||||
mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
|
||||
mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
|
||||
|
||||
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
|
||||
size_t* current_rss, size_t* peak_rss,
|
||||
size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
// Aligned allocation
|
||||
// Note that `alignment` always follows `size` for consistency with unaligned
|
||||
|
16
src/init.c
16
src/init.c
@ -287,10 +287,10 @@ static void _mi_thread_done(mi_heap_t* default_heap);
|
||||
#include <Windows.h>
|
||||
#include <fibersapi.h>
|
||||
#if (_WIN32_WINNT < 0x600) // before Windows Vista
|
||||
WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
|
||||
WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
|
||||
WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
|
||||
WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex);
|
||||
WINBASEAPI DWORD WINAPI TlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
|
||||
WINBASEAPI PVOID WINAPI TlsGetValue( _In_ DWORD dwFlsIndex );
|
||||
WINBASEAPI BOOL WINAPI TlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
|
||||
WINBASEAPI BOOL WINAPI TlsFree(_In_ DWORD dwFlsIndex);
|
||||
#endif
|
||||
static DWORD mi_fls_key = (DWORD)(-1);
|
||||
static void NTAPI mi_fls_done(PVOID value) {
|
||||
@ -318,7 +318,7 @@ static void mi_process_setup_auto_thread_done(void) {
|
||||
#if defined(_WIN32) && defined(MI_SHARED_LIB)
|
||||
// nothing to do as it is done in DllMain
|
||||
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
|
||||
mi_fls_key = FlsAlloc(&mi_fls_done);
|
||||
mi_fls_key = TlsAlloc(&mi_fls_done);
|
||||
#elif defined(MI_USE_PTHREADS)
|
||||
mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1));
|
||||
pthread_key_create(&_mi_heap_default_key, &mi_pthread_done);
|
||||
@ -378,7 +378,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) {
|
||||
// nothing to do as it is done in DllMain
|
||||
#elif defined(_WIN32) && !defined(MI_SHARED_LIB)
|
||||
mi_assert_internal(mi_fls_key != 0);
|
||||
FlsSetValue(mi_fls_key, heap);
|
||||
TlsSetValue(mi_fls_key, heap);
|
||||
#elif defined(MI_USE_PTHREADS)
|
||||
if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD
|
||||
pthread_setspecific(_mi_heap_default_key, heap);
|
||||
@ -491,8 +491,8 @@ static void mi_process_done(void) {
|
||||
process_done = true;
|
||||
|
||||
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
|
||||
FlsSetValue(mi_fls_key, NULL); // don't call main-thread callback
|
||||
FlsFree(mi_fls_key); // call thread-done on all threads to prevent dangling callback pointer if statically linked with a DLL; Issue #208
|
||||
TlsSetValue(mi_fls_key, NULL); // don't call main-thread callback
|
||||
TlsFree(mi_fls_key); // call thread-done on all threads to prevent dangling callback pointer if statically linked with a DLL; Issue #208
|
||||
#endif
|
||||
|
||||
#if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)
|
||||
|
50
src/os.c
50
src/os.c
@ -138,10 +138,16 @@ typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINI
|
||||
static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
|
||||
static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
|
||||
static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
|
||||
static DWORD(__stdcall *pGetCurrentProcessorNumber)() = NULL; L;
|
||||
static DWORD(__stdcall* pGetLargePageMinimum)() = NULL;
|
||||
static BOOL(__stdcall* pGetNumaHighestNodeNumber)(PULONG HighestNodeNumber) = NULL;
|
||||
static BOOL(__stdcall* pGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber) = NULL;
|
||||
static BOOL(__stdcall* pGetNumaNodeProcessorMask)(UCHAR Processor, PULONGLONG ProcessorMask) = NULL;
|
||||
|
||||
static bool mi_win_enable_large_os_pages()
|
||||
{
|
||||
if (large_os_page_size > 0) return true;
|
||||
if (!pGetLargePageMinimum) return false;
|
||||
|
||||
// Try to see if large OS pages are supported
|
||||
// To use large pages on Windows, we first need access permission
|
||||
@ -161,7 +167,7 @@ static bool mi_win_enable_large_os_pages()
|
||||
err = GetLastError();
|
||||
ok = (err == ERROR_SUCCESS);
|
||||
if (ok) {
|
||||
large_os_page_size = GetLargePageMinimum();
|
||||
large_os_page_size = pGetLargePageMinimum();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -187,13 +193,11 @@ void _mi_os_init(void) {
|
||||
// use VirtualAlloc2FromApp if possible as it is available to Windows store apps
|
||||
pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp");
|
||||
if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2");
|
||||
FreeLibrary(hDll);
|
||||
}
|
||||
// NtAllocateVirtualMemoryEx is used for huge page allocation
|
||||
hDll = LoadLibrary(TEXT("ntdll.dll"));
|
||||
if (hDll != NULL) {
|
||||
pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx");
|
||||
FreeLibrary(hDll);
|
||||
}
|
||||
// Try to use Win7+ numa API
|
||||
hDll = LoadLibrary(TEXT("kernel32.dll"));
|
||||
@ -201,7 +205,11 @@ void _mi_os_init(void) {
|
||||
pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
|
||||
pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
|
||||
pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
|
||||
FreeLibrary(hDll);
|
||||
pGetCurrentProcessorNumber = (DWORD(__stdcall*)())GetProcAddress(hDll, "GetCurrentProcessorNumber");
|
||||
pGetLargePageMinimum = (DWORD(__cdecl*)())GetProcAddress(hDll, "GetLargePageMinimum");
|
||||
pGetNumaHighestNodeNumber = (BOOL(__stdcall*)(PULONG))GetProcAddress(hDll, "GetNumaHighestNodeNumber");
|
||||
pGetNumaProcessorNode = (BOOL(__stdcall*)(UCHAR, PUCHAR))GetProcAddress(hDll, "GetNumaProcessorNode");
|
||||
pGetNumaNodeProcessorMask = (BOOL(__stdcall*)(UCHAR, PULONGLONG))GetProcAddress(hDll, "GetNumaNodeProcessorMask");
|
||||
}
|
||||
if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
|
||||
mi_win_enable_large_os_pages();
|
||||
@ -1109,6 +1117,25 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
|
||||
Support NUMA aware allocation
|
||||
-----------------------------------------------------------------------------*/
|
||||
#ifdef _WIN32
|
||||
|
||||
#if defined(AURORA_IS_64BIT)
|
||||
DWORD __declspec(dllexport) GetCurrentProcessorNumberXP(void)
|
||||
{
|
||||
return pGetCurrentProcessorNumber();
|
||||
}
|
||||
#else
|
||||
DWORD __declspec(dllexport) __declspec(naked) GetCurrentProcessorNumberXP(void)
|
||||
{
|
||||
__asm {
|
||||
mov eax, 1
|
||||
cpuid
|
||||
shr ebx, 24
|
||||
mov eax, ebx
|
||||
ret
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static size_t mi_os_numa_nodex() {
|
||||
USHORT numa_node = 0;
|
||||
if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
|
||||
@ -1119,11 +1146,13 @@ static size_t mi_os_numa_nodex() {
|
||||
BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
|
||||
if (ok) numa_node = nnode;
|
||||
}
|
||||
else {
|
||||
else
|
||||
{
|
||||
// Vista or earlier, use older API that is limited to 64 processors. Issue #277
|
||||
DWORD pnum = GetCurrentProcessorNumber();
|
||||
DWORD pnum = GetCurrentProcessorNumberXP();
|
||||
UCHAR nnode = 0;
|
||||
BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode);
|
||||
BOOL ok = pGetNumaProcessorNode &&
|
||||
pGetNumaProcessorNode((UCHAR)pnum, &nnode);
|
||||
if (ok) numa_node = nnode;
|
||||
}
|
||||
return numa_node;
|
||||
@ -1131,7 +1160,9 @@ static size_t mi_os_numa_nodex() {
|
||||
|
||||
static size_t mi_os_numa_node_countx(void) {
|
||||
ULONG numa_max = 0;
|
||||
GetNumaHighestNodeNumber(&numa_max);
|
||||
if (pGetNumaHighestNodeNumber) {
|
||||
pGetNumaHighestNodeNumber(&numa_max);
|
||||
}
|
||||
// find the highest node number that has actual processors assigned to it. Issue #282
|
||||
while(numa_max > 0) {
|
||||
if (pGetNumaNodeProcessorMaskEx != NULL) {
|
||||
@ -1144,7 +1175,8 @@ static size_t mi_os_numa_node_countx(void) {
|
||||
else {
|
||||
// Vista or earlier, use older API that is limited to 64 processors.
|
||||
ULONGLONG mask;
|
||||
if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
|
||||
if (pGetNumaNodeProcessorMask &&
|
||||
pGetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) {
|
||||
if (mask != 0) break; // found the maximum non-empty node
|
||||
};
|
||||
}
|
||||
|
113
src/stats.c
113
src/stats.c
@ -278,8 +278,6 @@ static void mi_buffered_out(const char* msg, void* arg) {
|
||||
// Print statistics
|
||||
//------------------------------------------------------------
|
||||
|
||||
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults);
|
||||
|
||||
static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept {
|
||||
// wrap the output function to be line buffered
|
||||
char buf[256];
|
||||
@ -330,7 +328,6 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
|
||||
size_t current_commit;
|
||||
size_t peak_commit;
|
||||
size_t page_faults;
|
||||
mi_stat_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults);
|
||||
_mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000);
|
||||
_mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process",
|
||||
user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults );
|
||||
@ -438,113 +435,3 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
|
||||
mi_msecs_t end = _mi_clock_now();
|
||||
return (end - start - mi_clock_diff);
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------
|
||||
// Basic process statistics
|
||||
// --------------------------------------------------------
|
||||
|
||||
#if defined(AURORA_PLATFORM_WIN32)
|
||||
#include <Windows.h>
|
||||
#include <psapi.h>
|
||||
#pragma comment(lib,"psapi.lib")
|
||||
|
||||
static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
|
||||
ULARGE_INTEGER i;
|
||||
i.LowPart = ftime->dwLowDateTime;
|
||||
i.HighPart = ftime->dwHighDateTime;
|
||||
mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds
|
||||
return msecs;
|
||||
}
|
||||
|
||||
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
|
||||
{
|
||||
*elapsed = _mi_clock_end(mi_process_start);
|
||||
FILETIME ct;
|
||||
FILETIME ut;
|
||||
FILETIME st;
|
||||
FILETIME et;
|
||||
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
|
||||
*utime = filetime_msecs(&ut);
|
||||
*stime = filetime_msecs(&st);
|
||||
PROCESS_MEMORY_COUNTERS info;
|
||||
GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
|
||||
*current_rss = (size_t)info.WorkingSetSize;
|
||||
*peak_rss = (size_t)info.PeakWorkingSetSize;
|
||||
*current_commit = (size_t)info.PagefileUsage;
|
||||
*peak_commit = (size_t)info.PeakPagefileUsage;
|
||||
*page_faults = (size_t)info.PageFaultCount;
|
||||
}
|
||||
|
||||
#elif defined(AURORA_PLATFORM_LINUX) || defined(AURORA_PLATFORM_APPLE)
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
#include <mach/mach.h>
|
||||
#endif
|
||||
|
||||
#if defined(__HAIKU__)
|
||||
#include <kernel/OS.h>
|
||||
#endif
|
||||
|
||||
static mi_msecs_t timeval_secs(const struct timeval* tv) {
|
||||
return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L);
|
||||
}
|
||||
|
||||
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
|
||||
{
|
||||
*elapsed = _mi_clock_end(mi_process_start);
|
||||
struct rusage rusage;
|
||||
getrusage(RUSAGE_SELF, &rusage);
|
||||
*utime = timeval_secs(&rusage.ru_utime);
|
||||
*stime = timeval_secs(&rusage.ru_stime);
|
||||
*page_faults = rusage.ru_majflt;
|
||||
|
||||
// estimate commit using our stats
|
||||
*peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
|
||||
*current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
|
||||
*current_rss = *current_commit; // estimate
|
||||
#if defined(AURORA_PLATFORM_LINUX)
|
||||
*peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
|
||||
{
|
||||
*elapsed = _mi_clock_end(mi_process_start);
|
||||
*peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
|
||||
*current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
|
||||
*peak_rss = *peak_commit;
|
||||
*current_rss = *current_commit;
|
||||
*page_faults = 0;
|
||||
*utime = 0;
|
||||
*stime = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept
|
||||
{
|
||||
mi_msecs_t elapsed = 0;
|
||||
mi_msecs_t utime = 0;
|
||||
mi_msecs_t stime = 0;
|
||||
size_t current_rss0 = 0;
|
||||
size_t peak_rss0 = 0;
|
||||
size_t current_commit0 = 0;
|
||||
size_t peak_commit0 = 0;
|
||||
size_t page_faults0 = 0;
|
||||
mi_stat_process_info(&elapsed,&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0);
|
||||
if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX));
|
||||
if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX));
|
||||
if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX));
|
||||
if (current_rss!=NULL) *current_rss = current_rss0;
|
||||
if (peak_rss!=NULL) *peak_rss = peak_rss0;
|
||||
if (current_commit!=NULL) *current_commit = current_commit0;
|
||||
if (peak_commit!=NULL) *peak_commit = peak_commit0;
|
||||
if (page_faults!=NULL) *page_faults = page_faults0;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user