[wasm] Remove thread timings (CPU time)

On Mac, we spend around 30% of lazy compile time in {ThreadTicks::Now}.
On Linux it's less severe, but still around 10%.
That's too much overhead for measurements that only show up in UKM, so
remove that.

The unused fields will be removed from the UKM events after the plumbing
on the chromium side has been removed.

R=ahaas@chromium.org, mlippautz@chromium.org
CC=​ecmziegler@chromium.org

Bug: v8:13565
Change-Id: I2dad88d899482801888940499d2d1761ff075578
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4078966
Reviewed-by: Michael Lippautz <mlippautz@chromium.org>
Reviewed-by: Andreas Haas <ahaas@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#84754}
This commit is contained in:
Clemens Backes 2022-12-08 16:03:47 +01:00 committed by V8 LUCI CQ
parent f4fb8fc1f7
commit 068536f109
6 changed files with 77 additions and 67 deletions

View File

@ -12,6 +12,7 @@
#include "v8-internal.h" // NOLINT(build/include_directory)
#include "v8-local-handle.h" // NOLINT(build/include_directory)
#include "v8config.h" // NOLINT(build/include_directory)
namespace v8 {
@ -96,16 +97,88 @@ struct GarbageCollectionYoungCycle {
};
struct WasmModuleDecoded {
WasmModuleDecoded() = default;
WasmModuleDecoded(bool async, bool streamed, bool success,
size_t module_size_in_bytes, size_t function_count,
int64_t wall_clock_duration_in_us)
: async(async),
streamed(streamed),
success(success),
module_size_in_bytes(module_size_in_bytes),
function_count(function_count),
wall_clock_duration_in_us(wall_clock_duration_in_us) {}
V8_DEPRECATE_SOON("Use the version without cpu_duration_in_us")
WasmModuleDecoded(bool async, bool streamed, bool success,
size_t module_size_in_bytes, size_t function_count,
int64_t wall_clock_duration_in_us,
int64_t cpu_duration_in_us)
: async(async),
streamed(streamed),
success(success),
module_size_in_bytes(module_size_in_bytes),
function_count(function_count),
wall_clock_duration_in_us(wall_clock_duration_in_us),
cpu_duration_in_us(cpu_duration_in_us) {}
START_ALLOW_USE_DEPRECATED()
// Copy constructor and copy assignment operator are allowed to copy the
// {cpu_duration_in_us} field.
WasmModuleDecoded(const WasmModuleDecoded&) = default;
WasmModuleDecoded& operator=(const WasmModuleDecoded&) = default;
END_ALLOW_USE_DEPRECATED()
bool async = false;
bool streamed = false;
bool success = false;
size_t module_size_in_bytes = 0;
size_t function_count = 0;
int64_t wall_clock_duration_in_us = -1;
V8_DEPRECATE_SOON("We do not collect cpu times any more")
int64_t cpu_duration_in_us = -1;
};
struct WasmModuleCompiled {
WasmModuleCompiled() = default;
WasmModuleCompiled(bool async, bool streamed, bool cached, bool deserialized,
bool lazy, bool success, size_t code_size_in_bytes,
size_t liftoff_bailout_count,
int64_t wall_clock_duration_in_us)
: async(async),
streamed(streamed),
cached(cached),
deserialized(deserialized),
lazy(lazy),
success(success),
code_size_in_bytes(code_size_in_bytes),
liftoff_bailout_count(liftoff_bailout_count),
wall_clock_duration_in_us(wall_clock_duration_in_us) {}
V8_DEPRECATE_SOON("Use the version without cpu_duration_in_us")
WasmModuleCompiled(bool async, bool streamed, bool cached, bool deserialized,
bool lazy, bool success, size_t code_size_in_bytes,
size_t liftoff_bailout_count,
int64_t wall_clock_duration_in_us,
int64_t cpu_duration_in_us)
: async(async),
streamed(streamed),
cached(cached),
deserialized(deserialized),
lazy(lazy),
success(success),
code_size_in_bytes(code_size_in_bytes),
liftoff_bailout_count(liftoff_bailout_count),
wall_clock_duration_in_us(wall_clock_duration_in_us),
cpu_duration_in_us(cpu_duration_in_us) {}
START_ALLOW_USE_DEPRECATED()
// Copy constructor and copy assignment operator are allowed to copy the
// {cpu_duration_in_us} field.
WasmModuleCompiled(const WasmModuleCompiled&) = default;
WasmModuleCompiled& operator=(const WasmModuleCompiled&) = default;
END_ALLOW_USE_DEPRECATED()
bool async = false;
bool streamed = false;
bool cached = false;
@ -115,6 +188,7 @@ struct WasmModuleCompiled {
size_t code_size_in_bytes = 0;
size_t liftoff_bailout_count = 0;
int64_t wall_clock_duration_in_us = -1;
V8_DEPRECATE_SOON("We do not collect cpu times any more")
int64_t cpu_duration_in_us = -1;
};

View File

@ -1133,10 +1133,6 @@ bool CompileLazy(Isolate* isolate, WasmInstanceObject instance,
TRACE_LAZY("Compiling wasm-function#%d.\n", func_index);
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
CompilationStateImpl* compilation_state =
Impl(native_module->compilation_state());
ExecutionTierPair tiers = GetLazyCompilationTiers(native_module, func_index);
@ -1153,11 +1149,6 @@ bool CompileLazy(Isolate* isolate, WasmInstanceObject instance,
&env, compilation_state->GetWireBytesStorage().get(), counters,
assembler_buffer_cache, &detected_features);
compilation_state->OnCompilationStopped(detected_features);
if (!thread_ticks.IsNull()) {
native_module->UpdateCPUDuration(
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
tiers.baseline_tier);
}
// During lazy compilation, we can only get compilation errors when
// {--wasm-lazy-validation} is enabled. Otherwise, the module was fully
@ -1543,10 +1534,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
WasmFeatures detected_features = WasmFeatures::None();
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
// Preparation (synchronized): Initialize the fields above and get the first
// compilation unit.
{
@ -1607,11 +1594,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
if (yield ||
!(unit = compile_scope.compilation_state()->GetNextCompilationUnit(
queue, baseline_only))) {
if (!thread_ticks.IsNull()) {
compile_scope.native_module()->UpdateCPUDuration(
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
current_tier);
}
std::vector<std::unique_ptr<WasmCode>> unpublished_code =
compile_scope.native_module()->AddCompiledCode(
base::VectorOf(std::move(results_to_publish)));
@ -1633,12 +1615,6 @@ CompilationExecutionResult ExecuteCompilationUnits(
bool liftoff_finished = unit->tier() != current_tier &&
unit->tier() == ExecutionTier::kTurbofan;
if (batch_full || liftoff_finished) {
if (!thread_ticks.IsNull()) {
base::ThreadTicks thread_ticks_now = base::ThreadTicks::Now();
compile_scope.native_module()->UpdateCPUDuration(
(thread_ticks_now - thread_ticks).InMicroseconds(), current_tier);
thread_ticks = thread_ticks_now;
}
std::vector<std::unique_ptr<WasmCode>> unpublished_code =
compile_scope.native_module()->AddCompiledCode(
base::VectorOf(std::move(results_to_publish)));
@ -1801,9 +1777,7 @@ class CompilationTimeCallback : public CompilationEventCallback {
true, // success
native_module->liftoff_code_size(), // code_size_in_bytes
native_module->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds(), // wall_clock_duration_in_us
static_cast<int64_t>( // cpu_time_duration_in_us
native_module->baseline_compilation_cpu_duration())};
duration.InMicroseconds()}; // wall_clock_duration_in_us
metrics_recorder_->DelayMainThreadEvent(event, context_id_);
}
if (compilation_event == CompilationEvent::kFailedCompilation) {
@ -1816,9 +1790,7 @@ class CompilationTimeCallback : public CompilationEventCallback {
false, // success
native_module->liftoff_code_size(), // code_size_in_bytes
native_module->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds(), // wall_clock_duration_in_us
static_cast<int64_t>( // cpu_time_duration_in_us
native_module->baseline_compilation_cpu_duration())};
duration.InMicroseconds()}; // wall_clock_duration_in_us
metrics_recorder_->DelayMainThreadEvent(event, context_id_);
}
}
@ -2325,9 +2297,7 @@ void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) {
!compilation_state->failed(), // success
native_module_->turbofan_code_size(), // code_size_in_bytes
native_module_->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds(), // wall_clock_duration_in_us
static_cast<int64_t>( // cpu_time_duration_in_us
native_module_->baseline_compilation_cpu_duration())};
duration.InMicroseconds()}; // wall_clock_duration_in_us
isolate_->metrics_recorder()->DelayMainThreadEvent(event, context_id_);
}
}

View File

@ -104,9 +104,6 @@ ModuleResult DecodeWasmModule(
v8::metrics::WasmModuleDecoded metrics_event;
base::ElapsedTimer timer;
timer.Start();
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
ModuleResult result = DecodeWasmModule(enabled_features, wire_bytes,
validate_functions, origin);
if (counters && result.ok()) {
@ -119,10 +116,6 @@ ModuleResult DecodeWasmModule(
// Record event metrics.
metrics_event.wall_clock_duration_in_us = timer.Elapsed().InMicroseconds();
timer.Stop();
if (!thread_ticks.IsNull()) {
metrics_event.cpu_duration_in_us =
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds();
}
metrics_event.success = result.ok();
metrics_event.async = decoding_method == DecodingMethod::kAsync ||
decoding_method == DecodingMethod::kAsyncStream;

View File

@ -1722,15 +1722,6 @@ void NativeModule::SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes) {
}
}
void NativeModule::UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier) {
if (!compilation_state_->baseline_compilation_finished()) {
baseline_compilation_cpu_duration_.fetch_add(cpu_duration,
std::memory_order_relaxed);
} else if (tier == ExecutionTier::kTurbofan) {
tier_up_cpu_duration_.fetch_add(cpu_duration, std::memory_order_relaxed);
}
}
void NativeModule::AddLazyCompilationTimeSample(int64_t sample_in_micro_sec) {
num_lazy_compilations_.fetch_add(1, std::memory_order_relaxed);
sum_lazy_compilation_time_in_micro_sec_.fetch_add(sample_in_micro_sec,

View File

@ -767,12 +767,6 @@ class V8_EXPORT_PRIVATE NativeModule final {
size_t turbofan_code_size() const {
return turbofan_code_size_.load(std::memory_order_relaxed);
}
size_t baseline_compilation_cpu_duration() const {
return baseline_compilation_cpu_duration_.load();
}
size_t tier_up_cpu_duration() const {
return tier_up_cpu_duration_.load(std::memory_order_relaxed);
}
void AddLazyCompilationTimeSample(int64_t sample);
@ -805,7 +799,6 @@ class V8_EXPORT_PRIVATE NativeModule final {
}
void SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes);
void UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier);
void AddLiftoffBailout() {
liftoff_bailout_count_.fetch_add(1, std::memory_order_relaxed);
}
@ -1035,8 +1028,6 @@ class V8_EXPORT_PRIVATE NativeModule final {
std::atomic<size_t> liftoff_bailout_count_{0};
std::atomic<size_t> liftoff_code_size_{0};
std::atomic<size_t> turbofan_code_size_{0};
std::atomic<size_t> baseline_compilation_cpu_duration_{0};
std::atomic<size_t> tier_up_cpu_duration_{0};
// Metrics for lazy compilation.
std::atomic<int> num_lazy_compilations_{0};

View File

@ -312,9 +312,6 @@ COMPILE_TEST(TestEventMetrics) {
recorder->module_decoded_.back().module_size_in_bytes);
CHECK_EQ(1, recorder->module_decoded_.back().function_count);
CHECK_LE(0, recorder->module_decoded_.back().wall_clock_duration_in_us);
CHECK_IMPLIES(
v8::base::ThreadTicks::IsSupported() && !i::v8_flags.wasm_test_streaming,
recorder->module_decoded_.back().cpu_duration_in_us > 0);
CHECK_EQ(1, recorder->module_compiled_.size());
CHECK(recorder->module_compiled_.back().success);
@ -335,12 +332,6 @@ COMPILE_TEST(TestEventMetrics) {
CHECK_GE(native_module->generated_code_size(),
recorder->module_compiled_.back().code_size_in_bytes);
CHECK_LE(0, recorder->module_compiled_.back().wall_clock_duration_in_us);
CHECK_EQ(native_module->baseline_compilation_cpu_duration(),
recorder->module_compiled_.back().cpu_duration_in_us);
CHECK_IMPLIES(v8::base::ThreadTicks::IsSupported() &&
!i::v8_flags.wasm_test_streaming &&
!i::v8_flags.wasm_lazy_compilation,
recorder->module_compiled_.back().cpu_duration_in_us > 0);
CHECK_EQ(1, recorder->module_instantiated_.size());
CHECK(recorder->module_instantiated_.back().success);