[wasm] Remove thread timings (CPU time)
On Mac, we spend around 30% of lazy compile time in {ThreadTicks::Now}. On Linux it's less severe, but still around 10%. That's too much overhead for measurements that only show up in UKM, so remove that. The unused fields will be removed from the UKM events after the plumbing on the chromium side has been removed. R=ahaas@chromium.org, mlippautz@chromium.org CC=ecmziegler@chromium.org Bug: v8:13565 Change-Id: I2dad88d899482801888940499d2d1761ff075578 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4078966 Reviewed-by: Michael Lippautz <mlippautz@chromium.org> Reviewed-by: Andreas Haas <ahaas@chromium.org> Commit-Queue: Clemens Backes <clemensb@chromium.org> Cr-Commit-Position: refs/heads/main@{#84754}
This commit is contained in:
parent
f4fb8fc1f7
commit
068536f109
@ -12,6 +12,7 @@
|
||||
|
||||
#include "v8-internal.h" // NOLINT(build/include_directory)
|
||||
#include "v8-local-handle.h" // NOLINT(build/include_directory)
|
||||
#include "v8config.h" // NOLINT(build/include_directory)
|
||||
|
||||
namespace v8 {
|
||||
|
||||
@ -96,16 +97,88 @@ struct GarbageCollectionYoungCycle {
|
||||
};
|
||||
|
||||
struct WasmModuleDecoded {
|
||||
WasmModuleDecoded() = default;
|
||||
WasmModuleDecoded(bool async, bool streamed, bool success,
|
||||
size_t module_size_in_bytes, size_t function_count,
|
||||
int64_t wall_clock_duration_in_us)
|
||||
: async(async),
|
||||
streamed(streamed),
|
||||
success(success),
|
||||
module_size_in_bytes(module_size_in_bytes),
|
||||
function_count(function_count),
|
||||
wall_clock_duration_in_us(wall_clock_duration_in_us) {}
|
||||
|
||||
V8_DEPRECATE_SOON("Use the version without cpu_duration_in_us")
|
||||
WasmModuleDecoded(bool async, bool streamed, bool success,
|
||||
size_t module_size_in_bytes, size_t function_count,
|
||||
int64_t wall_clock_duration_in_us,
|
||||
int64_t cpu_duration_in_us)
|
||||
: async(async),
|
||||
streamed(streamed),
|
||||
success(success),
|
||||
module_size_in_bytes(module_size_in_bytes),
|
||||
function_count(function_count),
|
||||
wall_clock_duration_in_us(wall_clock_duration_in_us),
|
||||
cpu_duration_in_us(cpu_duration_in_us) {}
|
||||
|
||||
START_ALLOW_USE_DEPRECATED()
|
||||
// Copy constructor and copy assignment operator are allowed to copy the
|
||||
// {cpu_duration_in_us} field.
|
||||
WasmModuleDecoded(const WasmModuleDecoded&) = default;
|
||||
WasmModuleDecoded& operator=(const WasmModuleDecoded&) = default;
|
||||
END_ALLOW_USE_DEPRECATED()
|
||||
|
||||
bool async = false;
|
||||
bool streamed = false;
|
||||
bool success = false;
|
||||
size_t module_size_in_bytes = 0;
|
||||
size_t function_count = 0;
|
||||
int64_t wall_clock_duration_in_us = -1;
|
||||
V8_DEPRECATE_SOON("We do not collect cpu times any more")
|
||||
int64_t cpu_duration_in_us = -1;
|
||||
};
|
||||
|
||||
struct WasmModuleCompiled {
|
||||
WasmModuleCompiled() = default;
|
||||
|
||||
WasmModuleCompiled(bool async, bool streamed, bool cached, bool deserialized,
|
||||
bool lazy, bool success, size_t code_size_in_bytes,
|
||||
size_t liftoff_bailout_count,
|
||||
int64_t wall_clock_duration_in_us)
|
||||
: async(async),
|
||||
streamed(streamed),
|
||||
cached(cached),
|
||||
deserialized(deserialized),
|
||||
lazy(lazy),
|
||||
success(success),
|
||||
code_size_in_bytes(code_size_in_bytes),
|
||||
liftoff_bailout_count(liftoff_bailout_count),
|
||||
wall_clock_duration_in_us(wall_clock_duration_in_us) {}
|
||||
|
||||
V8_DEPRECATE_SOON("Use the version without cpu_duration_in_us")
|
||||
WasmModuleCompiled(bool async, bool streamed, bool cached, bool deserialized,
|
||||
bool lazy, bool success, size_t code_size_in_bytes,
|
||||
size_t liftoff_bailout_count,
|
||||
int64_t wall_clock_duration_in_us,
|
||||
int64_t cpu_duration_in_us)
|
||||
: async(async),
|
||||
streamed(streamed),
|
||||
cached(cached),
|
||||
deserialized(deserialized),
|
||||
lazy(lazy),
|
||||
success(success),
|
||||
code_size_in_bytes(code_size_in_bytes),
|
||||
liftoff_bailout_count(liftoff_bailout_count),
|
||||
wall_clock_duration_in_us(wall_clock_duration_in_us),
|
||||
cpu_duration_in_us(cpu_duration_in_us) {}
|
||||
|
||||
START_ALLOW_USE_DEPRECATED()
|
||||
// Copy constructor and copy assignment operator are allowed to copy the
|
||||
// {cpu_duration_in_us} field.
|
||||
WasmModuleCompiled(const WasmModuleCompiled&) = default;
|
||||
WasmModuleCompiled& operator=(const WasmModuleCompiled&) = default;
|
||||
END_ALLOW_USE_DEPRECATED()
|
||||
|
||||
bool async = false;
|
||||
bool streamed = false;
|
||||
bool cached = false;
|
||||
@ -115,6 +188,7 @@ struct WasmModuleCompiled {
|
||||
size_t code_size_in_bytes = 0;
|
||||
size_t liftoff_bailout_count = 0;
|
||||
int64_t wall_clock_duration_in_us = -1;
|
||||
V8_DEPRECATE_SOON("We do not collect cpu times any more")
|
||||
int64_t cpu_duration_in_us = -1;
|
||||
};
|
||||
|
||||
|
@ -1133,10 +1133,6 @@ bool CompileLazy(Isolate* isolate, WasmInstanceObject instance,
|
||||
|
||||
TRACE_LAZY("Compiling wasm-function#%d.\n", func_index);
|
||||
|
||||
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
|
||||
? base::ThreadTicks::Now()
|
||||
: base::ThreadTicks();
|
||||
|
||||
CompilationStateImpl* compilation_state =
|
||||
Impl(native_module->compilation_state());
|
||||
ExecutionTierPair tiers = GetLazyCompilationTiers(native_module, func_index);
|
||||
@ -1153,11 +1149,6 @@ bool CompileLazy(Isolate* isolate, WasmInstanceObject instance,
|
||||
&env, compilation_state->GetWireBytesStorage().get(), counters,
|
||||
assembler_buffer_cache, &detected_features);
|
||||
compilation_state->OnCompilationStopped(detected_features);
|
||||
if (!thread_ticks.IsNull()) {
|
||||
native_module->UpdateCPUDuration(
|
||||
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
|
||||
tiers.baseline_tier);
|
||||
}
|
||||
|
||||
// During lazy compilation, we can only get compilation errors when
|
||||
// {--wasm-lazy-validation} is enabled. Otherwise, the module was fully
|
||||
@ -1543,10 +1534,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
|
||||
|
||||
WasmFeatures detected_features = WasmFeatures::None();
|
||||
|
||||
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
|
||||
? base::ThreadTicks::Now()
|
||||
: base::ThreadTicks();
|
||||
|
||||
// Preparation (synchronized): Initialize the fields above and get the first
|
||||
// compilation unit.
|
||||
{
|
||||
@ -1607,11 +1594,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
|
||||
if (yield ||
|
||||
!(unit = compile_scope.compilation_state()->GetNextCompilationUnit(
|
||||
queue, baseline_only))) {
|
||||
if (!thread_ticks.IsNull()) {
|
||||
compile_scope.native_module()->UpdateCPUDuration(
|
||||
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
|
||||
current_tier);
|
||||
}
|
||||
std::vector<std::unique_ptr<WasmCode>> unpublished_code =
|
||||
compile_scope.native_module()->AddCompiledCode(
|
||||
base::VectorOf(std::move(results_to_publish)));
|
||||
@ -1633,12 +1615,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
|
||||
bool liftoff_finished = unit->tier() != current_tier &&
|
||||
unit->tier() == ExecutionTier::kTurbofan;
|
||||
if (batch_full || liftoff_finished) {
|
||||
if (!thread_ticks.IsNull()) {
|
||||
base::ThreadTicks thread_ticks_now = base::ThreadTicks::Now();
|
||||
compile_scope.native_module()->UpdateCPUDuration(
|
||||
(thread_ticks_now - thread_ticks).InMicroseconds(), current_tier);
|
||||
thread_ticks = thread_ticks_now;
|
||||
}
|
||||
std::vector<std::unique_ptr<WasmCode>> unpublished_code =
|
||||
compile_scope.native_module()->AddCompiledCode(
|
||||
base::VectorOf(std::move(results_to_publish)));
|
||||
@ -1801,9 +1777,7 @@ class CompilationTimeCallback : public CompilationEventCallback {
|
||||
true, // success
|
||||
native_module->liftoff_code_size(), // code_size_in_bytes
|
||||
native_module->liftoff_bailout_count(), // liftoff_bailout_count
|
||||
duration.InMicroseconds(), // wall_clock_duration_in_us
|
||||
static_cast<int64_t>( // cpu_time_duration_in_us
|
||||
native_module->baseline_compilation_cpu_duration())};
|
||||
duration.InMicroseconds()}; // wall_clock_duration_in_us
|
||||
metrics_recorder_->DelayMainThreadEvent(event, context_id_);
|
||||
}
|
||||
if (compilation_event == CompilationEvent::kFailedCompilation) {
|
||||
@ -1816,9 +1790,7 @@ class CompilationTimeCallback : public CompilationEventCallback {
|
||||
false, // success
|
||||
native_module->liftoff_code_size(), // code_size_in_bytes
|
||||
native_module->liftoff_bailout_count(), // liftoff_bailout_count
|
||||
duration.InMicroseconds(), // wall_clock_duration_in_us
|
||||
static_cast<int64_t>( // cpu_time_duration_in_us
|
||||
native_module->baseline_compilation_cpu_duration())};
|
||||
duration.InMicroseconds()}; // wall_clock_duration_in_us
|
||||
metrics_recorder_->DelayMainThreadEvent(event, context_id_);
|
||||
}
|
||||
}
|
||||
@ -2325,9 +2297,7 @@ void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) {
|
||||
!compilation_state->failed(), // success
|
||||
native_module_->turbofan_code_size(), // code_size_in_bytes
|
||||
native_module_->liftoff_bailout_count(), // liftoff_bailout_count
|
||||
duration.InMicroseconds(), // wall_clock_duration_in_us
|
||||
static_cast<int64_t>( // cpu_time_duration_in_us
|
||||
native_module_->baseline_compilation_cpu_duration())};
|
||||
duration.InMicroseconds()}; // wall_clock_duration_in_us
|
||||
isolate_->metrics_recorder()->DelayMainThreadEvent(event, context_id_);
|
||||
}
|
||||
}
|
||||
|
@ -104,9 +104,6 @@ ModuleResult DecodeWasmModule(
|
||||
v8::metrics::WasmModuleDecoded metrics_event;
|
||||
base::ElapsedTimer timer;
|
||||
timer.Start();
|
||||
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
|
||||
? base::ThreadTicks::Now()
|
||||
: base::ThreadTicks();
|
||||
ModuleResult result = DecodeWasmModule(enabled_features, wire_bytes,
|
||||
validate_functions, origin);
|
||||
if (counters && result.ok()) {
|
||||
@ -119,10 +116,6 @@ ModuleResult DecodeWasmModule(
|
||||
// Record event metrics.
|
||||
metrics_event.wall_clock_duration_in_us = timer.Elapsed().InMicroseconds();
|
||||
timer.Stop();
|
||||
if (!thread_ticks.IsNull()) {
|
||||
metrics_event.cpu_duration_in_us =
|
||||
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds();
|
||||
}
|
||||
metrics_event.success = result.ok();
|
||||
metrics_event.async = decoding_method == DecodingMethod::kAsync ||
|
||||
decoding_method == DecodingMethod::kAsyncStream;
|
||||
|
@ -1722,15 +1722,6 @@ void NativeModule::SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes) {
|
||||
}
|
||||
}
|
||||
|
||||
void NativeModule::UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier) {
|
||||
if (!compilation_state_->baseline_compilation_finished()) {
|
||||
baseline_compilation_cpu_duration_.fetch_add(cpu_duration,
|
||||
std::memory_order_relaxed);
|
||||
} else if (tier == ExecutionTier::kTurbofan) {
|
||||
tier_up_cpu_duration_.fetch_add(cpu_duration, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
void NativeModule::AddLazyCompilationTimeSample(int64_t sample_in_micro_sec) {
|
||||
num_lazy_compilations_.fetch_add(1, std::memory_order_relaxed);
|
||||
sum_lazy_compilation_time_in_micro_sec_.fetch_add(sample_in_micro_sec,
|
||||
|
@ -767,12 +767,6 @@ class V8_EXPORT_PRIVATE NativeModule final {
|
||||
size_t turbofan_code_size() const {
|
||||
return turbofan_code_size_.load(std::memory_order_relaxed);
|
||||
}
|
||||
size_t baseline_compilation_cpu_duration() const {
|
||||
return baseline_compilation_cpu_duration_.load();
|
||||
}
|
||||
size_t tier_up_cpu_duration() const {
|
||||
return tier_up_cpu_duration_.load(std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void AddLazyCompilationTimeSample(int64_t sample);
|
||||
|
||||
@ -805,7 +799,6 @@ class V8_EXPORT_PRIVATE NativeModule final {
|
||||
}
|
||||
void SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes);
|
||||
|
||||
void UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier);
|
||||
void AddLiftoffBailout() {
|
||||
liftoff_bailout_count_.fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
@ -1035,8 +1028,6 @@ class V8_EXPORT_PRIVATE NativeModule final {
|
||||
std::atomic<size_t> liftoff_bailout_count_{0};
|
||||
std::atomic<size_t> liftoff_code_size_{0};
|
||||
std::atomic<size_t> turbofan_code_size_{0};
|
||||
std::atomic<size_t> baseline_compilation_cpu_duration_{0};
|
||||
std::atomic<size_t> tier_up_cpu_duration_{0};
|
||||
|
||||
// Metrics for lazy compilation.
|
||||
std::atomic<int> num_lazy_compilations_{0};
|
||||
|
@ -312,9 +312,6 @@ COMPILE_TEST(TestEventMetrics) {
|
||||
recorder->module_decoded_.back().module_size_in_bytes);
|
||||
CHECK_EQ(1, recorder->module_decoded_.back().function_count);
|
||||
CHECK_LE(0, recorder->module_decoded_.back().wall_clock_duration_in_us);
|
||||
CHECK_IMPLIES(
|
||||
v8::base::ThreadTicks::IsSupported() && !i::v8_flags.wasm_test_streaming,
|
||||
recorder->module_decoded_.back().cpu_duration_in_us > 0);
|
||||
|
||||
CHECK_EQ(1, recorder->module_compiled_.size());
|
||||
CHECK(recorder->module_compiled_.back().success);
|
||||
@ -335,12 +332,6 @@ COMPILE_TEST(TestEventMetrics) {
|
||||
CHECK_GE(native_module->generated_code_size(),
|
||||
recorder->module_compiled_.back().code_size_in_bytes);
|
||||
CHECK_LE(0, recorder->module_compiled_.back().wall_clock_duration_in_us);
|
||||
CHECK_EQ(native_module->baseline_compilation_cpu_duration(),
|
||||
recorder->module_compiled_.back().cpu_duration_in_us);
|
||||
CHECK_IMPLIES(v8::base::ThreadTicks::IsSupported() &&
|
||||
!i::v8_flags.wasm_test_streaming &&
|
||||
!i::v8_flags.wasm_lazy_compilation,
|
||||
recorder->module_compiled_.back().cpu_duration_in_us > 0);
|
||||
|
||||
CHECK_EQ(1, recorder->module_instantiated_.size());
|
||||
CHECK(recorder->module_instantiated_.back().success);
|
||||
|
Loading…
Reference in New Issue
Block a user