/*** Copyright (C) 2021 J Reece Wilson (a/k/a "Reece"). All rights reserved. File: AuClock.cpp Date: 2021-6-13 Author: Reece ***/ #include #include "AuClock.hpp" #include "Time.hpp" #if defined(AURORA_IS_POSIX_DERIVED) #include #elif defined(AURORA_IS_MODERNNT_DERIVED) #include static AuUInt8 _gNTClockMode {}; static long long(__cdecl *_gNTQuery)(); static long long(__cdecl *_gNTFreq)(); struct NTSystemTime { NTSystemTime(volatile NTSystemTime &dumb) { this->LowPart = dumb.LowPart; this->High1Time = dumb.High1Time; this->High2Time = dumb.High2Time; } unsigned long LowPart; long High1Time; long High2Time; }; struct NTQPCoefficients { union { UCHAR TscQpcData; struct { UCHAR TscQpcEnabled : 1; UCHAR TscQpcSpareFlag : 1; UCHAR TscQpcShift : 6; }; }; }; static unsigned long long _NT3_Query_Frequency(); static void _NTSetFallbackClock(); static void * kKUSERShardDataOffset = AuReinterpretCast(0x7ffe0000); static auto * kKInterruptTime = AuReinterpretCast(AuReinterpretCast(kKUSERShardDataOffset) + 8); static auto * kKQPCData = AuReinterpretCast(AuReinterpretCast(kKUSERShardDataOffset) + 0x2ED); static AuUInt64 _gNTTimeShift {}; static AuUInt64 _gNTTimeBias {}; static AuUInt64 _gNTTimeFreq { _NT3_Query_Frequency() }; static unsigned long long _NT3_Query_Frequency() { return 1'0000'000ull; } static void _NT6_1_Init() { NTQPCoefficients now; now.TscQpcData = AuAtomicLoad(&kKQPCData->TscQpcData); if (!now.TscQpcEnabled) { _NTSetFallbackClock(); return; } _gNTTimeShift = now.TscQpcShift; _gNTTimeBias = *AuReinterpretCast(AuReinterpretCast(kKUSERShardDataOffset) + 0x3B8); long long uvalue {}; SysAssert(Aurora::pQueryPerformanceFrequency && Aurora::pQueryPerformanceFrequency(&uvalue), "no perf frequency"); _gNTTimeFreq = uvalue; _gNTClockMode = 1; } static long long _NT6_1_Query_Frequency() { return _gNTTimeFreq; } static unsigned long long _NT6_1_Query_Counter() { #if defined(AURORA_ARCH_X64) || defined(AURORA_ARCH_X86) AuUInt64 uTimeNow = __rdtsc(); uTimeNow += _gNTTimeBias; uTimeNow >>= _gNTTimeShift; return uTimeNow; #endif return 0; } static long long _NT3_Query_Counter() { auto interruptTime = *kKInterruptTime; return (AuUInt64(kKInterruptTime->High1Time) << 32ull) | interruptTime.LowPart; } static void _NTSetFallbackClock() { _gNTFreq = []() { long long uvalue {}; SysAssert(Aurora::pQueryPerformanceFrequency && Aurora::pQueryPerformanceFrequency(&uvalue), "no perf frequency"); return uvalue; }; _gNTQuery = []() { long long uvalue {}; SysAssert(Aurora::pQueryPerformanceCounter && Aurora::pQueryPerformanceCounter(&uvalue), "no perf counter"); return uvalue; }; _gNTClockMode = 2; } static void _NTDoClockInit() { static AuInitOnce gInitOnce; gInitOnce.Call([] { Aurora::InitNTAddresses(); AuSwInfo::InitSwInfoEarly(); { // Might be useful for real-time applications under busted motherboards and hypervisors. // Should the QPC fastpath be invalidated, and the kernel hit each time because of NT HAL retardation, // this can be used to enforce a userland-only clock query, when Windows would otherwise resort to a // system call. // Pretty much every Win32 environment and platform should support this wchar_t buffer[64]; if (GetEnvironmentVariableW(L"AURORA_STEADY_TIME_SHID_CPU_XP_MODE", buffer, AuArraySize(buffer))) { _gNTClockMode = 0; return; } } if constexpr (AuBuild::kCurrentPlatform == AuBuild::EPlatform::ePlatformWin32) { if (!AuSwInfo::IsWindows7OrGreater()) { _gNTClockMode = 0; } else if (AuSwInfo::IsWindows7Any()) { _NT6_1_Init(); } else //if (AuSwInfo::IsWindows8OrGreater()) { _NTSetFallbackClock(); } } else { _gNTClockMode = 2; _gNTFreq = _Query_perf_frequency; _gNTQuery = _Query_perf_counter; } }); } AUKN_SYM unsigned long long _NTLikeQueryFrequency() { _NTDoClockInit(); switch (_gNTClockMode) { case 0: return _NT3_Query_Frequency(); case 1: return _NT6_1_Query_Frequency(); case 2: return _gNTFreq(); default: return 0; } } AUKN_SYM unsigned long long _NTLikeQueryCounter() { switch (_gNTClockMode) { case 0: return _NT3_Query_Counter(); case 1: return _NT6_1_Query_Counter(); case 2: return _gNTQuery(); default: return 0; } } // benchmarking: https://github.com/microsoft/STL/issues/2085 static AuUInt64 _GetSteadyTimeNS() { static const long long gFreq = _NTLikeQueryFrequency(); const long long uCounter = _NTLikeQueryCounter(); if (gFreq == 10000000) { return uCounter * 100; } else if (gFreq == 1000000) { return uCounter * 1000; } else if (gFreq == 100000) { return uCounter * 10000; } else if (gFreq == 100000000) { return uCounter * 10; } else if (gFreq == 1000000000) { return uCounter; } else { // 6 branches: the default threshold for most jit and language compiler backends to decide to pick a jump table, if the values were in a close range // otherwise, back to a tree of paths. either way, im sure 6 if elses are faster than grug math with large numbers, modulus, division, and multiplication const long long uWhole = (uCounter / gFreq) * 1'000'000'000ull; const long long uPart = (uCounter % gFreq) * 1'000'000'000ull / gFreq; return uWhole + uPart; } } // ~3.0741 seconds // using high_res_clock = std::chrono::high_resolution_clock; // ~6.07 seconds // holy fuck, we're keeping this // ~2x improvement #else using steady_clock = std::chrono::steady_clock; #endif using sys_clock = std::chrono::system_clock; // more stds to remove sys_clock::duration __NormalizeEpoch(sys_clock::duration sysEpoch); static AuInt64 _CurrentClock() { return __NormalizeEpoch(sys_clock::now().time_since_epoch()).count(); } static AuInt64 _CurrentClockMS() { return std::chrono::duration_cast(__NormalizeEpoch(sys_clock::now().time_since_epoch())).count(); } static AuInt64 _CurrentClockNS() { return std::chrono::duration_cast(__NormalizeEpoch(sys_clock::now().time_since_epoch())).count(); } namespace Aurora::Time { AUKN_SYM AuInt64 CurrentClock() { return _CurrentClock(); } AUKN_SYM AuInt64 CurrentClockMS() { return _CurrentClockMS(); } AUKN_SYM AuInt64 CurrentClockNS() { return _CurrentClockNS(); } AUKN_SYM AuUInt64 SteadyClock() { #if defined(AURORA_IS_MODERNNT_DERIVED) return _NTLikeQueryCounter(); #else return SteadyClockNS() / (1000000000ull / SteadyClockJiffies()); #endif } AUKN_SYM AuUInt64 SteadyClockMS() { #if defined(AURORA_IS_POSIX_DERIVED) ::timespec spec {}; if (::clock_gettime(CLOCK_MONOTONIC, &spec) == 0) { return AuSToMS(spec.tv_sec) + AuNSToMS(spec.tv_nsec); } else { return 0; } #elif defined(AURORA_IS_MODERNNT_DERIVED) return AuNSToMS(_GetSteadyTimeNS()); #else return std::chrono::duration_cast(steady_clock::now().time_since_epoch()).count(); #endif } AUKN_SYM AuUInt64 SteadyClockNS() { #if defined(AURORA_IS_POSIX_DERIVED) ::timespec spec {}; if (::clock_gettime(CLOCK_MONOTONIC, &spec) == 0) { return AuMSToNS(AuSToMS(spec.tv_sec)) + (AuUInt64)spec.tv_nsec; } else { return 0; } #elif defined(AURORA_IS_MODERNNT_DERIVED) return _GetSteadyTimeNS(); #else return std::chrono::duration_cast(steady_clock::now().time_since_epoch()).count(); #endif } AUKN_SYM AuUInt64 SteadyClockJiffies() { static AuUInt64 gFrequency = 0; if (gFrequency != 0) { return gFrequency; } #if defined(AURORA_COMPILER_MSVC) return gFrequency = _NTLikeQueryFrequency(); #elif defined(AURORA_IS_POSIX_DERIVED) ::timespec spec {}; if (::clock_getres(CLOCK_MONOTONIC, &spec) == 0) { if (spec.tv_nsec && !spec.tv_sec) { return gFrequency = 1000000000ull / spec.tv_nsec; } } return gFrequency = (1000000000ull / 100ull); #else return gFrequency = static_cast(steady_clock::period::den) / static_cast(steady_clock::period::num); #endif } AUKN_SYM AuInt64 CTimeToMS(time_t time) { return std::chrono::duration_cast(__NormalizeEpoch(sys_clock::from_time_t(time).time_since_epoch())).count(); } #if defined(AURORA_IS_POSIX_DERIVED) enum class EPseudoPosixClock { eUser, eKernel, eAll }; static AuUInt64 GetPOSIXTimeEx(struct rusage *usage, EPseudoPosixClock e) { struct timeval *tv {}; switch (e) { case EPseudoPosixClock::eAll: { return GetPOSIXTimeEx(usage, EPseudoPosixClock::eKernel) + GetPOSIXTimeEx(usage, EPseudoPosixClock::eUser); } case EPseudoPosixClock::eUser: { tv = &usage->ru_utime; break; } case EPseudoPosixClock::eKernel: { tv = &usage->ru_stime; break; } }; auto uMS = AuSToMS(tv->tv_sec); auto uNS = AuMSToNS(uMS) + tv->tv_usec * 1'000ull; return uNS; } static AuUInt64 GetPOSIXTime(bool bThread, EPseudoPosixClock e) { struct rusage usage; getrusage(bThread ? RUSAGE_THREAD : RUSAGE_SELF, &usage); return GetPOSIXTimeEx(&usage, e); } static AuPair GetPOSIXTimePair(bool bThread) { struct rusage usage; getrusage(bThread ? RUSAGE_THREAD : RUSAGE_SELF, &usage); return { GetPOSIXTimeEx(&usage, EPseudoPosixClock::eKernel), GetPOSIXTimeEx(&usage, EPseudoPosixClock::eUser) }; } #if !defined(CLOCK_THREAD_CPUTIME_ID) #define CLOCK_THREAD_CPUTIME_ID 0 #endif #if !defined(CLOCK_PROCESS_CPUTIME_ID) #define CLOCK_PROCESS_CPUTIME_ID 0 #endif #endif #if defined(AURORA_IS_MODERNNT_DERIVED) #define ADD_CLOCK_FAMILY(fn, type, expr, posixId, posixCall) \ AUKN_SYM AuUInt64 fn ## ClockJiffies(); \ \ AUKN_SYM AuUInt64 fn ## ClockMS() \ { \ return AuNSToMS(fn ## ClockNS()); \ } \ \ AUKN_SYM AuUInt64 fn ## ClockNS() \ { \ FILETIME creation, exit, kernel, user; \ if (::Get ## type ## Times(GetCurrent ## type(), &creation, &exit, &kernel, &user)) \ { \ ULARGE_INTEGER ullUser; \ { \ ullUser.LowPart = user.dwLowDateTime; \ ullUser.HighPart = user.dwHighDateTime; \ } \ \ ULARGE_INTEGER ullKernel; \ { \ ullKernel.LowPart = kernel.dwLowDateTime; \ ullKernel.HighPart = kernel.dwHighDateTime; \ } \ return (expr) * 100ull; \ } \ return 0; \ } \ \ AUKN_SYM AuUInt64 fn ## Clock() \ { \ FILETIME creation, exit, kernel, user; \ \ if (::Get ## type ## Times(GetCurrent ## type(), &creation, &exit, &kernel, &user)) \ { \ ULARGE_INTEGER ullUser; \ { \ ullUser.LowPart = user.dwLowDateTime; \ ullUser.HighPart = user.dwHighDateTime; \ } \ \ ULARGE_INTEGER ullKernel; \ { \ ullKernel.LowPart = kernel.dwLowDateTime; \ ullKernel.HighPart = kernel.dwHighDateTime; \ } \ return expr; \ } \ \ return fn ##ClockNS() / (1000000000ull / fn ## ClockJiffies()); \ } \ \ AUKN_SYM AuUInt64 fn ## ClockJiffies() \ { \ return 1000000000ull / 100u; \ } #elif defined(AURORA_IS_POSIX_DERIVED) #define ADD_CLOCK_FAMILY(fn, type, expr, posixId, posixCall) \ AUKN_SYM AuUInt64 fn ## ClockJiffies(); \ \ AUKN_SYM AuUInt64 fn ## ClockMS() \ { \ if (!posixId) \ { \ return AuNSToMS(GetPOSIXTime AU_WHAT(posixCall)); \ } \ return AuNSToMS(fn ## ClockNS()); \ } \ \ AUKN_SYM AuUInt64 fn ## ClockNS() \ { \ if (!posixId) \ { \ return GetPOSIXTime AU_WHAT(posixCall); \ } \ ::timespec spec {}; \ if (::clock_gettime(posixId, &spec) == 0) \ { \ return AuMSToNS(AuSToMS(spec.tv_sec)) + (AuUInt64)spec.tv_nsec; \ } \ return 0; \ } \ \ AUKN_SYM AuUInt64 fn ## Clock() \ { \ if (!posixId) \ { \ return fn ##ClockNS() / 1000ull; \ } \ return fn ##ClockNS() / (1000000000ull / fn ## ClockJiffies()); \ } \ \ AUKN_SYM AuUInt64 fn ## ClockJiffies() \ { \ if (!posixId) \ { \ return 1'000'000ull; \ } \ static AuUInt64 frequency = 0; \ if (frequency != 0) \ { \ return frequency; \ } \ \ ::timespec spec {}; \ if (::clock_getres(posixId, &spec) == 0) \ { \ if (spec.tv_nsec && !spec.tv_sec) \ { \ return frequency = 1000000000ull / spec.tv_nsec; \ } \ else \ { \ SysUnreachable(); \ return 0; \ } \ } \ \ return 0; \ } #else AUKN_SYM AuUInt64 fn ## ClockMS() { return 0; } AUKN_SYM AuUInt64 fn ## ClockNS() { return 0; } AUKN_SYM AuUInt64 fn ## Clock() { return 0; } AUKN_SYM AuUInt64 fn ## ClockJiffies() { return 0; } #endif ADD_CLOCK_FAMILY(Process, Process, (ullUser.QuadPart + ullKernel.QuadPart), /*CLOCK_PROCESS_CPUTIME_ID*/ 0, (false, EPseudoPosixClock::eAll)); ADD_CLOCK_FAMILY(ProcessKernel, Process, (ullKernel.QuadPart), 0, (false, EPseudoPosixClock::eKernel)); ADD_CLOCK_FAMILY(ProcessUser, Process, (ullUser.QuadPart), /*CLOCK_PROCESS_CPUTIME_ID*/0, (false, EPseudoPosixClock::eUser)); ADD_CLOCK_FAMILY(Thread, Thread, (ullUser.QuadPart + ullKernel.QuadPart), /*CLOCK_THREAD_CPUTIME_ID*/0, (true, EPseudoPosixClock::eAll)); ADD_CLOCK_FAMILY(ThreadKernel, Thread, (ullKernel.QuadPart), 0, (true, EPseudoPosixClock::eKernel)); ADD_CLOCK_FAMILY(ThreadUser, Thread, (ullUser.QuadPart), /*CLOCK_THREAD_CPUTIME_ID*/0, (true, EPseudoPosixClock::eUser)); #if defined(AURORA_IS_MODERNNT_DERIVED) #define ADD_CLOCK_FAMILY_PAIR(type, bIsThread, exprA, exprB) \ \ static AuPair type ## ClockPairNS() \ { \ FILETIME creation, exit, kernel, user; \ if (::Get ## type ## Times(GetCurrent ## type(), &creation, &exit, &kernel, &user)) \ { \ ULARGE_INTEGER ullUser; \ { \ ullUser.LowPart = user.dwLowDateTime; \ ullUser.HighPart = user.dwHighDateTime; \ } \ \ ULARGE_INTEGER ullKernel; \ { \ ullKernel.LowPart = kernel.dwLowDateTime; \ ullKernel.HighPart = kernel.dwHighDateTime; \ } \ return AuMakePair((exprA) * 100ull, (exprB) * 100ull); \ } \ return {}; \ } \ #elif defined(AURORA_IS_POSIX_DERIVED) #define ADD_CLOCK_FAMILY_PAIR(type, bIsThread, exprA, exprB) \ \ static AuPair type ## ClockPairNS() \ { \ return GetPOSIXTimePair(bIsThread); \ } \ #else #define ADD_CLOCK_FAMILY_PAIR(type, bIsThread, exprA, exprB) \ \ static AuPair type ## ClockPairNS() \ { \ return {}; \ } \ #endif ADD_CLOCK_FAMILY_PAIR(Thread, true, (ullKernel.QuadPart), (ullUser.QuadPart)); ADD_CLOCK_FAMILY_PAIR(Process, false, (ullKernel.QuadPart), (ullUser.QuadPart)); AUKN_SYM AuPair GetClockUserAndKernelTimeNS(EClock clock) { AuPair swizzle; switch (clock) { case EClock::eWall: case EClock::eSteady: SysPushErrorArg("Invalid clock"); return {}; case EClock::eProcessTime: case EClock::eProcessUserTime: case EClock::eProcessKernelTime: swizzle = ProcessClockPairNS(); break; case EClock::eThreadTime: case EClock::eThreadUserTime: case EClock::eThreadKernelTime: swizzle = ThreadClockPairNS(); break; default: SysPushErrorArg("Invalid clock"); return {}; } if (swizzle == AuPair {}) { SysPushErrorGeneric(); return {}; } return { swizzle.second, swizzle.first }; } }