[*] Found closed issue related to the MSVC STL clock. Proposed solution gave us a 2x boost in benchmarks. Using this over MSVC's clocks when possible (it's not even using kernel32/related apis, it's using high level apis of the crt and frens... The Microsoft STL has shit tier performance for real time usage, yea, who fucking knew? Why was I writing this project again?)

This commit is contained in:
Reece Wilson 2022-04-01 18:40:56 +01:00
parent e281f63a3b
commit 8b72865586

View File

@ -15,7 +15,60 @@
#include <Source/RuntimeInternal.hpp>
#include "Clock.hpp"
#if defined(AURORA_IS_MODERNNT_DERIVED)
// TODO (Reece): ....
// benchmarking:
// https://github.com/microsoft/STL/issues/2085
struct steady_clock_fast
{ // wraps QueryPerformanceCounter
using rep = long long;
using period = std::nano;
using duration = std::chrono::nanoseconds;
using time_point = _CHRONO time_point<steady_clock_fast>;
static constexpr bool is_steady = true;
_NODISCARD static time_point now() noexcept
{ // get current time
static const long long _Freq = _Query_perf_frequency(); // doesn't change after system boot
const long long _Ctr = _Query_perf_counter();
static_assert(period::num == 1, "This assumes period::num == 1.");
// Instead of just having "(_Ctr * period::den) / _Freq",
// the algorithm below prevents overflow when _Ctr is sufficiently large.
// It assumes that _Freq * period::den does not overflow, which is currently true for nano period.
// It is not realistic for _Ctr to accumulate to large values from zero with this assumption,
// but the initial value of _Ctr could be large.
// 10 MHz is a very common QPC frequency on modern PCs. Optimizing for
// this specific frequency can double the performance of this function by
// avoiding the expensive frequency conversion path.
if (_Freq == 10000000)
{
return time_point(duration(_Ctr * 100));
}
else
{
const long long _Whole = (_Ctr / _Freq) * period::den;
const long long _Part = (_Ctr % _Freq) * period::den / _Freq;
return time_point(duration(_Whole + _Part));
}
}
};
// ~3.0741 seconds
using high_res_clock = steady_clock_fast;
// holy fuck, we're keeping this
// ~2x improvement
#else
// ~6.07 seconds
using high_res_clock = std::chrono::high_resolution_clock;
#endif
using sys_clock = std::chrono::system_clock;
#if defined(AURORA_PLATFORM_WIN32)