AuroraRuntime/Source/Time/AuClock.cpp
Jamie Reece Wilson ad5ff2d783 [*] Simplified Win32 TLS callback overhead to prevent unwanted LoadLibraryW calls
(ive seen windows components do stupid things to result in crashes. time to harden.)
2023-12-07 19:04:30 +00:00

713 lines
30 KiB
C++

/***
Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved.
File: AuClock.cpp
Date: 2021-6-13
Author: Reece
***/
#include <Source/RuntimeInternal.hpp>
#include "AuClock.hpp"
#include "Time.hpp"
#if defined(AURORA_IS_POSIX_DERIVED)
#if defined(AURORA_IS_XNU_DERIVED)
#include <mach/mach_time.h>
AUKN_SYM unsigned long long _NTLikeQueryFrequency()
{
static unsigned long long uFrequency {};
static AuInitOnce gInitOnce;
gInitOnce.Call([]
{
mach_timebase_info_data_t timebase;
mach_timebase_info(&timebase);
uFrequency = (double(timebase.denom) * 1'000'000'000.0) / double(timebase.numer);
});
return uFrequency;
}
AUKN_SYM unsigned long long _NTLikeQueryCounter()
{
return mach_continuous_time();
}
#define GIB__GetSteadyTimeNS
#endif
#include <sys/resource.h>
#elif defined(AURORA_IS_MODERNNT_DERIVED)
#include <SWInfo/AuSWInfo.hpp>
static AuUInt8 _gNTClockMode {};
static long long(__cdecl *_gNTQuery)();
static long long(__cdecl *_gNTFreq)();
struct NTSystemTime
{
NTSystemTime(volatile NTSystemTime &dumb)
{
this->LowPart = dumb.LowPart;
this->High1Time = dumb.High1Time;
this->High2Time = dumb.High2Time;
}
unsigned long LowPart;
long High1Time;
long High2Time;
};
struct NTQPCoefficients
{
union
{
UCHAR TscQpcData;
struct
{
UCHAR TscQpcEnabled : 1;
UCHAR TscQpcSpareFlag : 1;
UCHAR TscQpcShift : 6;
};
};
};
static unsigned long long _NT3_Query_Frequency();
static void _NTSetFallbackClock();
static void * kKUSERShardDataOffset = AuReinterpretCast<void *>(0x7ffe0000);
static auto * kKInterruptTime = AuReinterpretCast<volatile NTSystemTime *>(AuReinterpretCast<AuUInt8 *>(kKUSERShardDataOffset) + 8);
static auto * kKQPCData = AuReinterpretCast<NTQPCoefficients *>(AuReinterpretCast<AuUInt8 *>(kKUSERShardDataOffset) + 0x2ED);
static AuUInt64 _gNTTimeShift {};
static AuUInt64 _gNTTimeBias {};
static AuUInt64 _gNTTimeFreq { _NT3_Query_Frequency() };
static unsigned long long _NT3_Query_Frequency()
{
return 1'0000'000ull;
}
static void _NT6_1_Init()
{
NTQPCoefficients now;
now.TscQpcData = AuAtomicLoad(&kKQPCData->TscQpcData);
if (!now.TscQpcEnabled)
{
_NTSetFallbackClock();
return;
}
_gNTTimeShift = now.TscQpcShift;
_gNTTimeBias = *AuReinterpretCast<ULONGLONG *>(AuReinterpretCast<AuUInt8 *>(kKUSERShardDataOffset) + 0x3B8);
long long uvalue {};
SysAssert(Aurora::pQueryPerformanceFrequency &&
Aurora::pQueryPerformanceFrequency(&uvalue), "no perf frequency");
_gNTTimeFreq = uvalue;
_gNTClockMode = 1;
}
static long long _NT6_1_Query_Frequency()
{
return _gNTTimeFreq;
}
static unsigned long long _NT6_1_Query_Counter()
{
#if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86)
AuUInt64 uTimeNow = __rdtsc();
uTimeNow += _gNTTimeBias;
uTimeNow >>= _gNTTimeShift;
return uTimeNow;
#endif
return 0;
}
static long long _NT3_Query_Counter()
{
auto interruptTime = *kKInterruptTime;
return (AuUInt64(kKInterruptTime->High1Time) << 32ull) | interruptTime.LowPart;
}
static void _NTSetFallbackClock()
{
_gNTFreq = []()
{
long long uvalue {};
SysAssert(Aurora::pQueryPerformanceFrequency &&
Aurora::pQueryPerformanceFrequency(&uvalue), "no perf frequency");
return uvalue;
};
_gNTQuery = []()
{
long long uvalue {};
SysAssert(Aurora::pQueryPerformanceCounter &&
Aurora::pQueryPerformanceCounter(&uvalue), "no perf counter");
return uvalue;
};
_gNTClockMode = 2;
}
static void _NTDoClockInit()
{
static AuInitOnce gInitOnce;
gInitOnce.Call([]
{
Aurora::InitNTAddressesForClock();
AuSwInfo::InitSwInfoEarly();
{
// Might be useful for real-time applications under busted motherboards and hypervisors.
// Should the QPC fastpath be invalidated, and the kernel hit each time because of NT HAL retardation,
// this can be used to enforce a userland-only clock query, when Windows would otherwise resort to a
// system call.
// Pretty much every Win32 environment and platform should support this
wchar_t buffer[64];
if (GetEnvironmentVariableW(L"AURORA_STEADY_TIME_SHID_CPU_XP_MODE", buffer, AuArraySize(buffer)))
{
_gNTClockMode = 0;
return;
}
}
if constexpr (AuBuild::kCurrentPlatform == AuBuild::EPlatform::ePlatformWin32)
{
if (!AuSwInfo::IsWindows7OrGreater())
{
_gNTClockMode = 0;
}
else if (AuSwInfo::IsWindows7Any())
{
_NT6_1_Init();
}
else //if (AuSwInfo::IsWindows8OrGreater())
{
_NTSetFallbackClock();
}
}
else
{
_gNTClockMode = 2;
_gNTFreq = _Query_perf_frequency;
_gNTQuery = _Query_perf_counter;
}
});
}
AUKN_SYM unsigned long long _NTLikeQueryFrequency()
{
_NTDoClockInit();
switch (_gNTClockMode)
{
case 0:
return _NT3_Query_Frequency();
case 1:
return _NT6_1_Query_Frequency();
case 2:
return _gNTFreq();
default:
return 0;
}
}
AUKN_SYM unsigned long long _NTLikeQueryCounter()
{
switch (_gNTClockMode)
{
case 0:
return _NT3_Query_Counter();
case 1:
return _NT6_1_Query_Counter();
case 2:
return _gNTQuery();
default:
return 0;
}
}
// benchmarking: https://github.com/microsoft/STL/issues/2085
#define GIB__GetSteadyTimeNS
// ~3.0741 seconds
// using high_res_clock = std::chrono::high_resolution_clock;
// ~6.07 seconds
// holy fuck, we're keeping this
// ~2x improvement
#else
using steady_clock = std::chrono::steady_clock;
#endif
#if defined(GIB__GetSteadyTimeNS)
static AuUInt64 _GetSteadyTimeNS()
{
static const long long gFreq = _NTLikeQueryFrequency();
const long long uCounter = _NTLikeQueryCounter();
if (gFreq == 10000000)
{
return uCounter * 100;
}
else if (gFreq == 1000000)
{
return uCounter * 1000;
}
else if (gFreq == 100000)
{
return uCounter * 10000;
}
else if (gFreq == 100000000)
{
return uCounter * 10;
}
else if (gFreq == 1000000000)
{
return uCounter;
}
else
{
// 6 branches: the default threshold for most jit and language compiler backends to decide to pick a jump table, if the values were in a close range
// otherwise, back to a tree of paths. either way, im sure 6 if elses are faster than grug math with large numbers, modulus, division, and multiplication
const long long uWhole = (uCounter / gFreq) * 1'000'000'000ull;
const long long uPart = (uCounter % gFreq) * 1'000'000'000ull / gFreq;
return uWhole + uPart;
}
}
#endif
using sys_clock = std::chrono::system_clock; // more stds to remove
sys_clock::duration __NormalizeEpoch(sys_clock::duration sysEpoch);
static AuInt64 _CurrentClock()
{
return __NormalizeEpoch(sys_clock::now().time_since_epoch()).count();
}
static AuInt64 _CurrentClockMS()
{
return std::chrono::duration_cast<std::chrono::milliseconds>(__NormalizeEpoch(sys_clock::now().time_since_epoch())).count();
}
static AuInt64 _CurrentClockNS()
{
return std::chrono::duration_cast<std::chrono::nanoseconds>(__NormalizeEpoch(sys_clock::now().time_since_epoch())).count();
}
namespace Aurora::Time
{
AUKN_SYM AuInt64 CurrentClock()
{
return _CurrentClock();
}
AUKN_SYM AuInt64 CurrentClockMS()
{
return _CurrentClockMS();
}
AUKN_SYM AuInt64 CurrentClockNS()
{
return _CurrentClockNS();
}
AUKN_SYM AuUInt64 SteadyClock()
{
#if defined(AURORA_IS_MODERNNT_DERIVED) || defined(AURORA_IS_XNU_DERIVED)
return _NTLikeQueryCounter();
#else
return SteadyClockNS() / (1000000000ull / SteadyClockFrequency());
#endif
}
AUKN_SYM AuUInt64 SteadyClockMS()
{
#if defined(AURORA_IS_MODERNNT_DERIVED) || defined(AURORA_IS_XNU_DERIVED)
return AuNSToMS<AuUInt64>(_GetSteadyTimeNS());
#elif defined(AURORA_IS_POSIX_DERIVED)
::timespec spec {};
if (::clock_gettime(CLOCK_MONOTONIC, &spec) == 0)
{
return AuSToMS<AuUInt64>(spec.tv_sec) + AuNSToMS<AuUInt64>(spec.tv_nsec);
}
else
{
return 0;
}
#else
return std::chrono::duration_cast<std::chrono::milliseconds>(steady_clock::now().time_since_epoch()).count();
#endif
}
AUKN_SYM AuUInt64 SteadyClockNS()
{
#if defined(AURORA_IS_MODERNNT_DERIVED) || defined(AURORA_IS_XNU_DERIVED)
return _GetSteadyTimeNS();
#elif defined(AURORA_IS_POSIX_DERIVED)
::timespec spec {};
if (::clock_gettime(CLOCK_MONOTONIC, &spec) == 0)
{
return AuMSToNS<AuUInt64>(AuSToMS<AuUInt64>(spec.tv_sec)) + (AuUInt64)spec.tv_nsec;
}
else
{
return 0;
}
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>(steady_clock::now().time_since_epoch()).count();
#endif
}
AUKN_SYM AuUInt64 SteadyClockFrequency()
{
static AuUInt64 gFrequency = 0;
if (gFrequency != 0)
{
return gFrequency;
}
#if defined(AURORA_IS_MODERNNT_DERIVED) || defined(AURORA_IS_XNU_DERIVED)
return gFrequency = _NTLikeQueryFrequency();
#elif defined(AURORA_IS_POSIX_DERIVED)
::timespec spec {};
if (::clock_getres(CLOCK_MONOTONIC, &spec) == 0)
{
if (spec.tv_nsec && !spec.tv_sec)
{
return gFrequency = 1000000000ull / spec.tv_nsec;
}
}
return gFrequency = (1000000000ull / 100ull);
#else
return gFrequency = static_cast<double>(steady_clock::period::den) / static_cast<double>(steady_clock::period::num);
#endif
}
AUKN_SYM AuInt64 CTimeToMS(time_t time)
{
return std::chrono::duration_cast<std::chrono::milliseconds>(__NormalizeEpoch(sys_clock::from_time_t(time).time_since_epoch())).count();
}
#if defined(AURORA_IS_POSIX_DERIVED)
enum class EPseudoPosixClock
{
eUser,
eKernel,
eAll
};
static AuUInt64 GetPOSIXTimeEx(struct rusage *usage, EPseudoPosixClock e)
{
struct timeval *tv {};
switch (e)
{
case EPseudoPosixClock::eAll:
{
return GetPOSIXTimeEx(usage, EPseudoPosixClock::eKernel) +
GetPOSIXTimeEx(usage, EPseudoPosixClock::eUser);
}
case EPseudoPosixClock::eUser:
{
tv = &usage->ru_utime;
break;
}
case EPseudoPosixClock::eKernel:
{
tv = &usage->ru_stime;
break;
}
};
auto uMS = AuSToMS<AuUInt64>(tv->tv_sec);
auto uNS = AuMSToNS<AuUInt64>(uMS) +
tv->tv_usec * 1'000ull;
return uNS;
}
static AuUInt64 GetPOSIXTime(bool bThread, EPseudoPosixClock e)
{
struct rusage usage;
getrusage(bThread ? RUSAGE_THREAD : RUSAGE_SELF,
&usage);
return GetPOSIXTimeEx(&usage, e);
}
static AuPair<AuUInt64, AuUInt64> GetPOSIXTimePair(bool bThread)
{
struct rusage usage;
getrusage(bThread ? RUSAGE_THREAD : RUSAGE_SELF,
&usage);
return {
GetPOSIXTimeEx(&usage, EPseudoPosixClock::eKernel),
GetPOSIXTimeEx(&usage, EPseudoPosixClock::eUser)
};
}
#if !defined(CLOCK_THREAD_CPUTIME_ID)
#define CLOCK_THREAD_CPUTIME_ID 0
#endif
#if !defined(CLOCK_PROCESS_CPUTIME_ID)
#define CLOCK_PROCESS_CPUTIME_ID 0
#endif
#endif
#if defined(AURORA_IS_MODERNNT_DERIVED)
#define ADD_CLOCK_FAMILY(fn, type, expr, posixId, posixCall) \
AUKN_SYM AuUInt64 fn ## ClockFrequency(); \
\
AUKN_SYM AuUInt64 fn ## ClockMS() \
{ \
return AuNSToMS<AuUInt64>(fn ## ClockNS()); \
} \
\
AUKN_SYM AuUInt64 fn ## ClockNS() \
{ \
FILETIME creation, exit, kernel, user; \
if (::Get ## type ## Times(GetCurrent ## type(), &creation, &exit, &kernel, &user)) \
{ \
ULARGE_INTEGER ullUser; \
{ \
ullUser.LowPart = user.dwLowDateTime; \
ullUser.HighPart = user.dwHighDateTime; \
} \
\
ULARGE_INTEGER ullKernel; \
{ \
ullKernel.LowPart = kernel.dwLowDateTime; \
ullKernel.HighPart = kernel.dwHighDateTime; \
} \
return (expr) * 100ull; \
} \
return 0; \
} \
\
AUKN_SYM AuUInt64 fn ## Clock() \
{ \
FILETIME creation, exit, kernel, user; \
\
if (::Get ## type ## Times(GetCurrent ## type(), &creation, &exit, &kernel, &user)) \
{ \
ULARGE_INTEGER ullUser; \
{ \
ullUser.LowPart = user.dwLowDateTime; \
ullUser.HighPart = user.dwHighDateTime; \
} \
\
ULARGE_INTEGER ullKernel; \
{ \
ullKernel.LowPart = kernel.dwLowDateTime; \
ullKernel.HighPart = kernel.dwHighDateTime; \
} \
return expr; \
} \
\
return fn ##ClockNS() / (1000000000ull / fn ## ClockFrequency()); \
} \
\
AUKN_SYM AuUInt64 fn ## ClockFrequency() \
{ \
return 1000000000ull / 100u; \
}
#elif defined(AURORA_IS_POSIX_DERIVED)
#define ADD_CLOCK_FAMILY(fn, type, expr, posixId, posixCall) \
AUKN_SYM AuUInt64 fn ## ClockFrequency(); \
\
AUKN_SYM AuUInt64 fn ## ClockMS() \
{ \
if (!posixId) \
{ \
return AuNSToMS<AuUInt64>(GetPOSIXTime AU_WHAT(posixCall)); \
} \
return AuNSToMS<AuUInt64>(fn ## ClockNS()); \
} \
\
AUKN_SYM AuUInt64 fn ## ClockNS() \
{ \
if (!posixId) \
{ \
return GetPOSIXTime AU_WHAT(posixCall); \
} \
::timespec spec {}; \
if (::clock_gettime(posixId, &spec) == 0) \
{ \
return AuMSToNS<AuUInt64>(AuSToMS<AuUInt64>(spec.tv_sec)) + (AuUInt64)spec.tv_nsec; \
} \
return 0; \
} \
\
AUKN_SYM AuUInt64 fn ## Clock() \
{ \
if (!posixId) \
{ \
return fn ##ClockNS() / 1000ull; \
} \
return fn ##ClockNS() / (1000000000ull / fn ## ClockFrequency()); \
} \
\
AUKN_SYM AuUInt64 fn ## ClockFrequency() \
{ \
if (!posixId) \
{ \
return 1'000'000ull; \
} \
static AuUInt64 frequency = 0; \
if (frequency != 0) \
{ \
return frequency; \
} \
\
::timespec spec {}; \
if (::clock_getres(posixId, &spec) == 0) \
{ \
if (spec.tv_nsec && !spec.tv_sec) \
{ \
return frequency = 1000000000ull / spec.tv_nsec; \
} \
else \
{ \
SysUnreachable(); \
return 0; \
} \
} \
\
return 0; \
}
#else
AUKN_SYM AuUInt64 fn ## ClockMS()
{
return 0;
}
AUKN_SYM AuUInt64 fn ## ClockNS()
{
return 0;
}
AUKN_SYM AuUInt64 fn ## Clock()
{
return 0;
}
AUKN_SYM AuUInt64 fn ## ClockFrequency()
{
return 0;
}
#endif
ADD_CLOCK_FAMILY(Process, Process, (ullUser.QuadPart + ullKernel.QuadPart), /*CLOCK_PROCESS_CPUTIME_ID*/ 0, (false, EPseudoPosixClock::eAll));
ADD_CLOCK_FAMILY(ProcessKernel, Process, (ullKernel.QuadPart), 0, (false, EPseudoPosixClock::eKernel));
ADD_CLOCK_FAMILY(ProcessUser, Process, (ullUser.QuadPart), /*CLOCK_PROCESS_CPUTIME_ID*/0, (false, EPseudoPosixClock::eUser));
ADD_CLOCK_FAMILY(Thread, Thread, (ullUser.QuadPart + ullKernel.QuadPart), /*CLOCK_THREAD_CPUTIME_ID*/0, (true, EPseudoPosixClock::eAll));
ADD_CLOCK_FAMILY(ThreadKernel, Thread, (ullKernel.QuadPart), 0, (true, EPseudoPosixClock::eKernel));
ADD_CLOCK_FAMILY(ThreadUser, Thread, (ullUser.QuadPart), /*CLOCK_THREAD_CPUTIME_ID*/0, (true, EPseudoPosixClock::eUser));
#if defined(AURORA_IS_MODERNNT_DERIVED)
#define ADD_CLOCK_FAMILY_PAIR(type, bIsThread, exprA, exprB) \
\
static AuPair<AuUInt64, AuUInt64> type ## ClockPairNS() \
{ \
FILETIME creation, exit, kernel, user; \
if (::Get ## type ## Times(GetCurrent ## type(), &creation, &exit, &kernel, &user)) \
{ \
ULARGE_INTEGER ullUser; \
{ \
ullUser.LowPart = user.dwLowDateTime; \
ullUser.HighPart = user.dwHighDateTime; \
} \
\
ULARGE_INTEGER ullKernel; \
{ \
ullKernel.LowPart = kernel.dwLowDateTime; \
ullKernel.HighPart = kernel.dwHighDateTime; \
} \
return AuMakePair((exprA) * 100ull, (exprB) * 100ull); \
} \
return {}; \
} \
#elif defined(AURORA_IS_POSIX_DERIVED)
#define ADD_CLOCK_FAMILY_PAIR(type, bIsThread, exprA, exprB) \
\
static AuPair<AuUInt64, AuUInt64> type ## ClockPairNS() \
{ \
return GetPOSIXTimePair(bIsThread); \
} \
#else
#define ADD_CLOCK_FAMILY_PAIR(type, bIsThread, exprA, exprB) \
\
static AuPair<AuUInt64, AuUInt64> type ## ClockPairNS() \
{ \
return {}; \
} \
#endif
ADD_CLOCK_FAMILY_PAIR(Thread, true, (ullKernel.QuadPart),
(ullUser.QuadPart));
ADD_CLOCK_FAMILY_PAIR(Process, false, (ullKernel.QuadPart),
(ullUser.QuadPart));
AUKN_SYM AuPair<AuUInt64, AuUInt64> GetClockUserAndKernelTimeNS(EClock clock)
{
AuPair<AuUInt64, AuUInt64> swizzle;
switch (clock)
{
case EClock::eWall:
case EClock::eSteady:
SysPushErrorArg("Invalid clock");
return {};
case EClock::eProcessTime:
case EClock::eProcessUserTime:
case EClock::eProcessKernelTime:
swizzle = ProcessClockPairNS();
break;
case EClock::eThreadTime:
case EClock::eThreadUserTime:
case EClock::eThreadKernelTime:
swizzle = ThreadClockPairNS();
break;
default:
SysPushErrorArg("Invalid clock");
return {};
}
if (swizzle == AuPair<AuUInt64, AuUInt64> {})
{
SysPushErrorGeneric();
return {};
}
return { swizzle.second, swizzle.first };
}
}