[wasm] Add CPU time metrics (reland)

This is a reland of dcdaf42fa8. It adds
CPU time metrics to the WasmModuleDecoded (except for streaming),
WasmModuleCompiled and WasmModuleTieredUp events. This can later be used
to provide this information as UKMs or UMAs.

Bug: v8:11611
Change-Id: I813fc8de36d1445c6a887abf496ec10e1a803815
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2953296
Reviewed-by: Adam Klein <adamk@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Commit-Queue: Emanuel Ziegler <ecmziegler@chromium.org>
Cr-Commit-Position: refs/heads/master@{#75715}
This commit is contained in:
Emanuel Ziegler 2021-07-13 14:24:01 +02:00 committed by V8 LUCI CQ
parent 3e405e3e95
commit 905aafab8b
8 changed files with 98 additions and 15 deletions

View File

@ -78,6 +78,7 @@ struct WasmModuleDecoded {
size_t module_size_in_bytes = 0;
size_t function_count = 0;
int64_t wall_clock_duration_in_us = -1;
int64_t cpu_duration_in_us = -1;
};
struct WasmModuleCompiled {
@ -90,6 +91,7 @@ struct WasmModuleCompiled {
size_t code_size_in_bytes = 0;
size_t liftoff_bailout_count = 0;
int64_t wall_clock_duration_in_us = -1;
int64_t cpu_duration_in_us = -1;
};
struct WasmModuleInstantiated {
@ -103,6 +105,7 @@ struct WasmModuleTieredUp {
bool lazy = false;
size_t code_size_in_bytes = 0;
int64_t wall_clock_duration_in_us = -1;
int64_t cpu_duration_in_us = -1;
};
struct WasmModulesPerIsolate {

View File

@ -4,6 +4,7 @@
#include "src/wasm/function-compiler.h"
#include "src/base/platform/time.h"
#include "src/base/strings.h"
#include "src/codegen/compiler.h"
#include "src/codegen/macro-assembler-inl.h"

View File

@ -71,6 +71,7 @@ class V8_EXPORT_PRIVATE WasmCompilationUnit final {
WasmFeatures* detected);
ExecutionTier tier() const { return tier_; }
ForDebugging for_debugging() const { return for_debugging_; }
int func_index() const { return func_index_; }
static void CompileWasmFunction(Isolate*, NativeModule*,

View File

@ -1138,6 +1138,10 @@ bool CompileLazy(Isolate* isolate, Handle<WasmModuleObject> module_object,
TRACE_LAZY("Compiling wasm-function#%d.\n", func_index);
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
CompilationStateImpl* compilation_state =
Impl(native_module->compilation_state());
ExecutionTierPair tiers =
@ -1154,6 +1158,11 @@ bool CompileLazy(Isolate* isolate, Handle<WasmModuleObject> module_object,
&env, compilation_state->GetWireBytesStorage().get(), counters,
&detected_features);
compilation_state->OnCompilationStopped(detected_features);
if (!thread_ticks.IsNull()) {
native_module->UpdateCPUDuration(
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
tiers.baseline_tier);
}
// During lazy compilation, we can only get compilation errors when
// {--wasm-lazy-validation} is enabled. Otherwise, the module was fully
@ -1312,6 +1321,10 @@ CompilationExecutionResult ExecuteCompilationUnits(
WasmFeatures detected_features = WasmFeatures::None();
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
// Preparation (synchronized): Initialize the fields above and get the first
// compilation unit.
{
@ -1349,10 +1362,19 @@ CompilationExecutionResult ExecuteCompilationUnits(
return kNoMoreUnits;
}
if (!unit->for_debugging() && result.result_tier != current_tier) {
compile_scope.native_module()->AddLiftoffBailout();
}
// Yield or get next unit.
if (yield ||
!(unit = compile_scope.compilation_state()->GetNextCompilationUnit(
queue, baseline_only))) {
if (!thread_ticks.IsNull()) {
compile_scope.native_module()->UpdateCPUDuration(
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds(),
current_tier);
}
std::vector<std::unique_ptr<WasmCode>> unpublished_code =
compile_scope.native_module()->AddCompiledCode(
base::VectorOf(std::move(results_to_publish)));
@ -1374,6 +1396,12 @@ CompilationExecutionResult ExecuteCompilationUnits(
bool liftoff_finished = unit->tier() != current_tier &&
unit->tier() == ExecutionTier::kTurbofan;
if (batch_full || liftoff_finished) {
if (!thread_ticks.IsNull()) {
base::ThreadTicks thread_ticks_now = base::ThreadTicks::Now();
compile_scope.native_module()->UpdateCPUDuration(
(thread_ticks_now - thread_ticks).InMicroseconds(), current_tier);
thread_ticks = thread_ticks_now;
}
std::vector<std::unique_ptr<WasmCode>> unpublished_code =
compile_scope.native_module()->AddCompiledCode(
base::VectorOf(std::move(results_to_publish)));
@ -1519,7 +1547,6 @@ class CompilationTimeCallback {
histogram->AddSample(static_cast<int>(duration.InMicroseconds()));
}
// TODO(sartang@microsoft.com): Remove wall_clock_time_in_us field
v8::metrics::WasmModuleCompiled event{
(compile_mode_ != kSynchronous), // async
(compile_mode_ == kStreaming), // streamed
@ -1529,8 +1556,9 @@ class CompilationTimeCallback {
true, // success
native_module->liftoff_code_size(), // code_size_in_bytes
native_module->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds() // wall_clock_duration_in_us
};
duration.InMicroseconds(), // wall_clock_duration_in_us
static_cast<int64_t>( // cpu_time_duration_in_us
native_module->baseline_compilation_cpu_duration())};
metrics_recorder_->DelayMainThreadEvent(event, context_id_);
}
if (event == CompilationEvent::kFinishedTopTierCompilation) {
@ -1540,8 +1568,9 @@ class CompilationTimeCallback {
v8::metrics::WasmModuleTieredUp event{
FLAG_wasm_lazy_compilation, // lazy
native_module->turbofan_code_size(), // code_size_in_bytes
duration.InMicroseconds() // wall_clock_duration_in_us
};
duration.InMicroseconds(), // wall_clock_duration_in_us
static_cast<int64_t>( // cpu_time_duration_in_us
native_module->tier_up_cpu_duration())};
metrics_recorder_->DelayMainThreadEvent(event, context_id_);
}
if (event == CompilationEvent::kFailedCompilation) {
@ -1554,8 +1583,9 @@ class CompilationTimeCallback {
false, // success
native_module->liftoff_code_size(), // code_size_in_bytes
native_module->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds() // wall_clock_duration_in_us
};
duration.InMicroseconds(), // wall_clock_duration_in_us
static_cast<int64_t>( // cpu_time_duration_in_us
native_module->baseline_compilation_cpu_duration())};
metrics_recorder_->DelayMainThreadEvent(event, context_id_);
}
}
@ -1954,10 +1984,11 @@ void AsyncCompileJob::FinishCompile(bool is_after_cache_hit) {
is_after_deserialization, // deserialized
wasm_lazy_compilation_, // lazy
!compilation_state->failed(), // success
native_module_->liftoff_code_size(), // code_size_in_bytes
native_module_->turbofan_code_size(), // code_size_in_bytes
native_module_->liftoff_bailout_count(), // liftoff_bailout_count
duration.InMicroseconds() // wall_clock_duration_in_us
};
duration.InMicroseconds(), // wall_clock_duration_in_us
static_cast<int64_t>( // cpu_time_duration_in_us
native_module_->baseline_compilation_cpu_duration())};
isolate_->metrics_recorder()->DelayMainThreadEvent(event, context_id_);
}
}

View File

@ -2068,12 +2068,19 @@ ModuleResult DecodeWasmModule(
v8::metrics::WasmModuleDecoded metrics_event;
base::ElapsedTimer timer;
timer.Start();
base::ThreadTicks thread_ticks = base::ThreadTicks::IsSupported()
? base::ThreadTicks::Now()
: base::ThreadTicks();
ModuleResult result =
decoder.DecodeModule(counters, allocator, verify_functions);
// Record event metrics.
metrics_event.wall_clock_duration_in_us = timer.Elapsed().InMicroseconds();
timer.Stop();
if (!thread_ticks.IsNull()) {
metrics_event.cpu_duration_in_us =
(base::ThreadTicks::Now() - thread_ticks).InMicroseconds();
}
metrics_event.success = decoder.ok() && result.ok();
metrics_event.async = decoding_method == DecodingMethod::kAsync ||
decoding_method == DecodingMethod::kAsyncStream;

View File

@ -1234,10 +1234,6 @@ WasmCode* NativeModule::PublishCodeLocked(
// {WasmCodeRefScope} though, so it cannot die here.
code->DecRefOnLiveCode();
}
if (!code->for_debugging() && tiering_state_ == kTieredDown &&
code->tier() == ExecutionTier::kTurbofan) {
liftoff_bailout_count_.fetch_add(1);
}
return code;
}
@ -1535,6 +1531,20 @@ void NativeModule::SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes) {
}
}
void NativeModule::UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier) {
if (tier == WasmCompilationUnit::GetBaselineExecutionTier(this->module())) {
if (!compilation_state_->baseline_compilation_finished()) {
baseline_compilation_cpu_duration_.fetch_add(
cpu_duration, std::memory_order::memory_order_relaxed);
}
} else if (tier == ExecutionTier::kTurbofan) {
if (!compilation_state_->top_tier_compilation_finished()) {
tier_up_cpu_duration_.fetch_add(cpu_duration,
std::memory_order::memory_order_relaxed);
}
}
}
void NativeModule::TransferNewOwnedCodeLocked() const {
allocation_mutex_.AssertHeld();
DCHECK(!new_owned_code_.empty());

View File

@ -698,6 +698,10 @@ class V8_EXPORT_PRIVATE NativeModule final {
size_t liftoff_bailout_count() const { return liftoff_bailout_count_.load(); }
size_t liftoff_code_size() const { return liftoff_code_size_.load(); }
size_t turbofan_code_size() const { return turbofan_code_size_.load(); }
size_t baseline_compilation_cpu_duration() const {
return baseline_compilation_cpu_duration_.load();
}
size_t tier_up_cpu_duration() const { return tier_up_cpu_duration_.load(); }
bool HasWireBytes() const {
auto wire_bytes = std::atomic_load(&wire_bytes_);
@ -705,6 +709,12 @@ class V8_EXPORT_PRIVATE NativeModule final {
}
void SetWireBytes(base::OwnedVector<const uint8_t> wire_bytes);
void UpdateCPUDuration(size_t cpu_duration, ExecutionTier tier);
void AddLiftoffBailout() {
liftoff_bailout_count_.fetch_add(1,
std::memory_order::memory_order_relaxed);
}
WasmCode* Lookup(Address) const;
WasmImportWrapperCache* import_wrapper_cache() const {
@ -919,6 +929,8 @@ class V8_EXPORT_PRIVATE NativeModule final {
std::atomic<size_t> liftoff_bailout_count_{0};
std::atomic<size_t> liftoff_code_size_{0};
std::atomic<size_t> turbofan_code_size_{0};
std::atomic<size_t> baseline_compilation_cpu_duration_{0};
std::atomic<size_t> tier_up_cpu_duration_{0};
};
class V8_EXPORT_PRIVATE WasmCodeManager final {

View File

@ -7,6 +7,7 @@
#include "include/libplatform/libplatform.h"
#include "include/v8-metrics.h"
#include "src/api/api-inl.h"
#include "src/base/platform/time.h"
#include "src/wasm/wasm-engine.h"
#include "src/wasm/wasm-module-builder.h"
#include "test/cctest/cctest.h"
@ -271,6 +272,9 @@ COMPILE_TEST(TestEventMetrics) {
std::shared_ptr<MetricsRecorder> recorder =
std::make_shared<MetricsRecorder>();
reinterpret_cast<v8::Isolate*>(isolate)->SetMetricsRecorder(recorder);
if (v8::base::ThreadTicks::IsSupported()) {
v8::base::ThreadTicks::WaitUntilInitialized();
}
TestSignatures sigs;
v8::internal::AccountingAllocator allocator;
@ -313,6 +317,9 @@ COMPILE_TEST(TestEventMetrics) {
recorder->module_decoded_.back().module_size_in_bytes);
CHECK_EQ(1, recorder->module_decoded_.back().function_count);
CHECK_LE(0, recorder->module_decoded_.back().wall_clock_duration_in_us);
CHECK_IMPLIES(
v8::base::ThreadTicks::IsSupported() && !i::FLAG_wasm_test_streaming,
recorder->module_decoded_.back().cpu_duration_in_us > 0);
CHECK_EQ(1, recorder->module_compiled_.size());
CHECK(recorder->module_compiled_.back().success);
@ -331,8 +338,12 @@ COMPILE_TEST(TestEventMetrics) {
recorder->module_compiled_.back().code_size_in_bytes);
CHECK_GE(native_module->generated_code_size(),
recorder->module_compiled_.back().code_size_in_bytes);
CHECK_EQ(0, recorder->module_compiled_.back().liftoff_bailout_count);
CHECK_LE(0, recorder->module_compiled_.back().wall_clock_duration_in_us);
CHECK_EQ(native_module->baseline_compilation_cpu_duration(),
recorder->module_compiled_.back().cpu_duration_in_us);
CHECK_IMPLIES(
v8::base::ThreadTicks::IsSupported() && !i::FLAG_wasm_test_streaming,
recorder->module_compiled_.back().cpu_duration_in_us > 0);
CHECK_EQ(1, recorder->module_instantiated_.size());
CHECK(recorder->module_instantiated_.back().success);
@ -351,6 +362,13 @@ COMPILE_TEST(TestEventMetrics) {
CHECK_GE(native_module->committed_code_space(),
recorder->module_tiered_up_.back().code_size_in_bytes);
CHECK_LE(0, recorder->module_tiered_up_.back().wall_clock_duration_in_us);
CHECK_EQ(native_module->tier_up_cpu_duration(),
recorder->module_tiered_up_.back().cpu_duration_in_us);
CHECK_IMPLIES(
v8::base::ThreadTicks::IsSupported() && i::FLAG_wasm_tier_up &&
i::FLAG_liftoff &&
recorder->module_compiled_.back().liftoff_bailout_count == 0,
recorder->module_tiered_up_.back().cpu_duration_in_us > 0);
}
} // namespace wasm