[wasm] Introduce a caching threshold

With dynamic tiering, typically not all functions of a WebAssembly
module get compiled with TurboFan, and therefore the code caching would
never get triggered. With this CL code caching is triggered whenever
{FLAG_wasm_caching_threshold} bytes of TurboFan code are generated.

This new caching event is only triggered when --wasm-dynamic-tiering is
enabled.

R=clemensb@chromium.org

Bug: v8:12281
Change-Id: I939325aea7e4310aa76c936636799661c05d4079
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3202593
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77251}
This commit is contained in:
Andreas Haas 2021-10-06 08:06:07 +02:00 committed by V8 LUCI CQ
parent bab8254c32
commit af1b9a9333
6 changed files with 153 additions and 17 deletions

View File

@ -955,6 +955,9 @@ DEFINE_BOOL(wasm_tier_up, true,
"have an effect)") "have an effect)")
DEFINE_BOOL(wasm_dynamic_tiering, false, DEFINE_BOOL(wasm_dynamic_tiering, false,
"enable dynamic tier up to the optimizing compiler") "enable dynamic tier up to the optimizing compiler")
DEFINE_INT(
wasm_caching_threshold, 1000000,
"the amount of wasm top tier code that triggers the next caching event")
DEFINE_DEBUG_BOOL(trace_wasm_decoder, false, "trace decoding of wasm code") DEFINE_DEBUG_BOOL(trace_wasm_decoder, false, "trace decoding of wasm code")
DEFINE_DEBUG_BOOL(trace_wasm_compiler, false, "trace compiling of wasm code") DEFINE_DEBUG_BOOL(trace_wasm_compiler, false, "trace compiling of wasm code")
DEFINE_DEBUG_BOOL(trace_wasm_interpreter, false, DEFINE_DEBUG_BOOL(trace_wasm_interpreter, false,

View File

@ -105,6 +105,7 @@ class WireBytesStorage {
enum class CompilationEvent : uint8_t { enum class CompilationEvent : uint8_t {
kFinishedBaselineCompilation, kFinishedBaselineCompilation,
kFinishedExportWrappers, kFinishedExportWrappers,
kFinishedCompilationChunk,
kFinishedTopTierCompilation, kFinishedTopTierCompilation,
kFailedCompilation, kFailedCompilation,
kFinishedRecompilation kFinishedRecompilation

View File

@ -746,6 +746,9 @@ class CompilationStateImpl {
int outstanding_baseline_units_ = 0; int outstanding_baseline_units_ = 0;
int outstanding_export_wrappers_ = 0; int outstanding_export_wrappers_ = 0;
int outstanding_top_tier_functions_ = 0; int outstanding_top_tier_functions_ = 0;
// The amount of generated top tier code since the last
// {kFinishedCompilationChunk} event.
size_t bytes_since_last_chunk = 0;
std::vector<uint8_t> compilation_progress_; std::vector<uint8_t> compilation_progress_;
int outstanding_recompilation_functions_ = 0; int outstanding_recompilation_functions_ = 0;
@ -2095,8 +2098,12 @@ class AsyncCompileJob::CompilationStateCallback {
: nullptr); : nullptr);
} }
break; break;
case CompilationEvent::kFinishedCompilationChunk:
DCHECK(CompilationEvent::kFinishedBaselineCompilation == last_event_ ||
CompilationEvent::kFinishedCompilationChunk == last_event_);
break;
case CompilationEvent::kFinishedTopTierCompilation: case CompilationEvent::kFinishedTopTierCompilation:
DCHECK_EQ(CompilationEvent::kFinishedBaselineCompilation, last_event_); DCHECK(CompilationEvent::kFinishedBaselineCompilation == last_event_);
// At this point, the job will already be gone, thus do not access it // At this point, the job will already be gone, thus do not access it
// here. // here.
break; break;
@ -3191,6 +3198,10 @@ void CompilationStateImpl::CommitTopTierCompilationUnit(
void CompilationStateImpl::AddTopTierPriorityCompilationUnit( void CompilationStateImpl::AddTopTierPriorityCompilationUnit(
WasmCompilationUnit unit, size_t priority) { WasmCompilationUnit unit, size_t priority) {
compilation_unit_queues_.AddTopTierPriorityUnit(unit, priority); compilation_unit_queues_.AddTopTierPriorityUnit(unit, priority);
{
base::MutexGuard guard(&callbacks_mutex_);
outstanding_top_tier_functions_++;
}
compile_job_->NotifyConcurrencyIncrease(); compile_job_->NotifyConcurrencyIncrease();
} }
@ -3303,6 +3314,9 @@ void CompilationStateImpl::OnFinishedUnits(
DCHECK_GT(outstanding_baseline_units_, 0); DCHECK_GT(outstanding_baseline_units_, 0);
outstanding_baseline_units_--; outstanding_baseline_units_--;
} }
if (code->tier() == ExecutionTier::kTurbofan) {
bytes_since_last_chunk += code->instructions().size();
}
if (reached_tier < required_top_tier && if (reached_tier < required_top_tier &&
required_top_tier <= code->tier()) { required_top_tier <= code->tier()) {
DCHECK_GT(outstanding_top_tier_functions_, 0); DCHECK_GT(outstanding_top_tier_functions_, 0);
@ -3356,12 +3370,17 @@ void CompilationStateImpl::TriggerCallbacks(
triggered_events.Add(CompilationEvent::kFinishedExportWrappers); triggered_events.Add(CompilationEvent::kFinishedExportWrappers);
if (outstanding_baseline_units_ == 0) { if (outstanding_baseline_units_ == 0) {
triggered_events.Add(CompilationEvent::kFinishedBaselineCompilation); triggered_events.Add(CompilationEvent::kFinishedBaselineCompilation);
if (outstanding_top_tier_functions_ == 0) { if (!FLAG_wasm_dynamic_tiering && outstanding_top_tier_functions_ == 0) {
triggered_events.Add(CompilationEvent::kFinishedTopTierCompilation); triggered_events.Add(CompilationEvent::kFinishedTopTierCompilation);
} }
} }
} }
if (static_cast<size_t>(FLAG_wasm_caching_threshold) <
bytes_since_last_chunk) {
triggered_events.Add(CompilationEvent::kFinishedCompilationChunk);
bytes_since_last_chunk = 0;
}
if (compile_failed_.load(std::memory_order_relaxed)) { if (compile_failed_.load(std::memory_order_relaxed)) {
// *Only* trigger the "failed" event. // *Only* trigger the "failed" event.
triggered_events = triggered_events =
@ -3372,9 +3391,11 @@ void CompilationStateImpl::TriggerCallbacks(
// Don't trigger past events again. // Don't trigger past events again.
triggered_events -= finished_events_; triggered_events -= finished_events_;
// Recompilation can happen multiple times, thus do not store this. // Recompilation can happen multiple times, thus do not store this. There can
finished_events_ |= // also be multiple compilation chunks.
triggered_events - CompilationEvent::kFinishedRecompilation; finished_events_ |= triggered_events -
CompilationEvent::kFinishedRecompilation -
CompilationEvent::kFinishedCompilationChunk;
for (auto event : for (auto event :
{std::make_pair(CompilationEvent::kFailedCompilation, {std::make_pair(CompilationEvent::kFailedCompilation,
@ -3385,6 +3406,8 @@ void CompilationStateImpl::TriggerCallbacks(
"wasm.BaselineFinished"), "wasm.BaselineFinished"),
std::make_pair(CompilationEvent::kFinishedTopTierCompilation, std::make_pair(CompilationEvent::kFinishedTopTierCompilation,
"wasm.TopTierFinished"), "wasm.TopTierFinished"),
std::make_pair(CompilationEvent::kFinishedCompilationChunk,
"wasm.CompilationChunkFinished"),
std::make_pair(CompilationEvent::kFinishedRecompilation, std::make_pair(CompilationEvent::kFinishedRecompilation,
"wasm.RecompilationFinished")}) { "wasm.RecompilationFinished")}) {
if (!triggered_events.contains(event.first)) continue; if (!triggered_events.contains(event.first)) continue;
@ -3395,7 +3418,11 @@ void CompilationStateImpl::TriggerCallbacks(
} }
} }
if (outstanding_baseline_units_ == 0 && outstanding_export_wrappers_ == 0 && // With dynamic tiering, we don't know if we can ever delete the callback.
// TODO(https://crbug.com/v8/12289): Release some callbacks also when dynamic
// tiering is enabled.
if (!FLAG_wasm_dynamic_tiering && outstanding_baseline_units_ == 0 &&
outstanding_export_wrappers_ == 0 &&
outstanding_top_tier_functions_ == 0 && outstanding_top_tier_functions_ == 0 &&
outstanding_recompilation_functions_ == 0) { outstanding_recompilation_functions_ == 0) {
// Clear the callbacks because no more events will be delivered. // Clear the callbacks because no more events will be delivered.

View File

@ -312,33 +312,29 @@ void AsyncStreamingDecoder::Abort() {
namespace { namespace {
class TopTierCompiledCallback { class CompilationChunkFinishedCallback {
public: public:
TopTierCompiledCallback( CompilationChunkFinishedCallback(
std::weak_ptr<NativeModule> native_module, std::weak_ptr<NativeModule> native_module,
AsyncStreamingDecoder::ModuleCompiledCallback callback) AsyncStreamingDecoder::ModuleCompiledCallback callback)
: native_module_(std::move(native_module)), : native_module_(std::move(native_module)),
callback_(std::move(callback)) {} callback_(std::move(callback)) {}
void operator()(CompilationEvent event) const { void operator()(CompilationEvent event) const {
if (event != CompilationEvent::kFinishedTopTierCompilation) return; if (event != CompilationEvent::kFinishedCompilationChunk &&
event != CompilationEvent::kFinishedTopTierCompilation) {
return;
}
// If the native module is still alive, get back a shared ptr and call the // If the native module is still alive, get back a shared ptr and call the
// callback. // callback.
if (std::shared_ptr<NativeModule> native_module = native_module_.lock()) { if (std::shared_ptr<NativeModule> native_module = native_module_.lock()) {
callback_(native_module); callback_(native_module);
} }
#ifdef DEBUG
DCHECK(!called_);
called_ = true;
#endif
} }
private: private:
const std::weak_ptr<NativeModule> native_module_; const std::weak_ptr<NativeModule> native_module_;
const AsyncStreamingDecoder::ModuleCompiledCallback callback_; const AsyncStreamingDecoder::ModuleCompiledCallback callback_;
#ifdef DEBUG
mutable bool called_ = false;
#endif
}; };
} // namespace } // namespace
@ -347,7 +343,7 @@ void AsyncStreamingDecoder::NotifyNativeModuleCreated(
const std::shared_ptr<NativeModule>& native_module) { const std::shared_ptr<NativeModule>& native_module) {
if (!module_compiled_callback_) return; if (!module_compiled_callback_) return;
auto* comp_state = native_module->compilation_state(); auto* comp_state = native_module->compilation_state();
comp_state->AddCallback(TopTierCompiledCallback{ comp_state->AddCallback(CompilationChunkFinishedCallback{
std::move(native_module), std::move(module_compiled_callback_)}); std::move(native_module), std::move(module_compiled_callback_)});
module_compiled_callback_ = {}; module_compiled_callback_ = {};
} }

View File

@ -1240,4 +1240,13 @@
'test-calls-with-arraylike-or-spread/*': [SKIP], 'test-calls-with-arraylike-or-spread/*': [SKIP],
}], }],
################################################################################
['variant == stress', {
# The 'stress' variants sets the '--stress-opt' d8 flag, which executes 2 runs
# in debug mode and 5 runs in release mode. Hence the module will be cached
# between runs, and the correct caching behavior cannot be observed anymore in
# the later runs.
'test-streaming-compilation/AsyncTestIncrementalCaching': [SKIP],
'test-streaming-compilation/SingleThreadedTestIncrementalCaching': [SKIP],
}],
] ]

View File

@ -20,6 +20,7 @@
#include "test/common/wasm/flag-utils.h" #include "test/common/wasm/flag-utils.h"
#include "test/common/wasm/test-signatures.h" #include "test/common/wasm/test-signatures.h"
#include "test/common/wasm/wasm-macro-gen.h" #include "test/common/wasm/wasm-macro-gen.h"
#include "test/common/wasm/wasm-module-runner.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
@ -1108,6 +1109,105 @@ STREAM_TEST(TestModuleWithImportedFunction) {
CHECK(tester.IsPromiseFulfilled()); CHECK(tester.IsPromiseFulfilled());
} }
STREAM_TEST(TestIncrementalCaching) {
FLAG_VALUE_SCOPE(wasm_dynamic_tiering, true);
FLAG_VALUE_SCOPE(wasm_tier_up, false);
constexpr int threshold = 10;
FlagScope<int> caching_treshold(&FLAG_wasm_caching_threshold, threshold);
StreamTester tester(isolate);
int call_cache_counter = 0;
tester.stream()->SetModuleCompiledCallback(
[&call_cache_counter](
const std::shared_ptr<i::wasm::NativeModule>& native_module) {
call_cache_counter++;
});
ZoneBuffer buffer(tester.zone());
TestSignatures sigs;
WasmModuleBuilder builder(tester.zone());
builder.SetMinMemorySize(1);
base::Vector<const char> function_names[] = {
base::CStrVector("f0"), base::CStrVector("f1"), base::CStrVector("f2")};
for (int i = 0; i < 3; ++i) {
WasmFunctionBuilder* f = builder.AddFunction(sigs.v_v());
constexpr int64_t val = 0x123456789abc;
constexpr int index = 0x1234;
uint8_t store_mem[] = {
WASM_STORE_MEM(MachineType::Int64(), WASM_I32V(index), WASM_I64V(val))};
constexpr uint32_t kStoreLength = 20;
CHECK_EQ(kStoreLength, arraysize(store_mem));
// Produce a store {threshold} many times to reach the caching threshold.
constexpr uint32_t kCodeLength = kStoreLength * threshold + 1;
uint8_t code[kCodeLength];
for (int j = 0; j < threshold; ++j) {
memcpy(code + (j * kStoreLength), store_mem, kStoreLength);
}
code[kCodeLength - 1] = WasmOpcode::kExprEnd;
f->EmitCode(code, kCodeLength);
builder.AddExport(function_names[i], f);
}
builder.WriteTo(&buffer);
tester.OnBytesReceived(buffer.begin(), buffer.end() - buffer.begin());
tester.FinishStream();
tester.RunCompilerTasks();
CHECK(tester.IsPromiseFulfilled());
tester.native_module();
constexpr base::Vector<const char> kNoSourceUrl{"", 0};
Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
Handle<Script> script = GetWasmEngine()->GetOrCreateScript(
i_isolate, tester.native_module(), kNoSourceUrl);
Handle<FixedArray> export_wrappers = i_isolate->factory()->NewFixedArray(3);
Handle<WasmModuleObject> module_object = WasmModuleObject::New(
i_isolate, tester.native_module(), script, export_wrappers);
ErrorThrower thrower(i_isolate, "Instantiation");
// We instantiated before, so the second instantiation must also succeed:
Handle<WasmInstanceObject> instance =
GetWasmEngine()
->SyncInstantiate(i_isolate, &thrower, module_object, {}, {})
.ToHandleChecked();
CHECK(!thrower.error());
WasmCodeRefScope code_scope;
CHECK(tester.native_module()->GetCode(0)->is_liftoff());
CHECK(tester.native_module()->GetCode(1)->is_liftoff());
CHECK(tester.native_module()->GetCode(2)->is_liftoff());
// No TurboFan compilation happened yet, and therefore no call to the cache.
CHECK_EQ(0, call_cache_counter);
bool exception = false;
// The tier-up threshold is hard-coded right now.
constexpr int tier_up_threshold = 4;
for (int i = 0; i < tier_up_threshold; ++i) {
testing::CallWasmFunctionForTesting(i_isolate, instance, "f0", 0, nullptr,
&exception);
}
tester.RunCompilerTasks();
CHECK(!tester.native_module()->GetCode(0)->is_liftoff());
CHECK(tester.native_module()->GetCode(1)->is_liftoff());
CHECK(tester.native_module()->GetCode(2)->is_liftoff());
CHECK_EQ(1, call_cache_counter);
size_t serialized_size;
{
i::wasm::WasmSerializer serializer(tester.native_module().get());
serialized_size = serializer.GetSerializedNativeModuleSize();
}
for (int i = 0; i < tier_up_threshold; ++i) {
testing::CallWasmFunctionForTesting(i_isolate, instance, "f1", 0, nullptr,
&exception);
}
tester.RunCompilerTasks();
CHECK(!tester.native_module()->GetCode(0)->is_liftoff());
CHECK(!tester.native_module()->GetCode(1)->is_liftoff());
CHECK(tester.native_module()->GetCode(2)->is_liftoff());
CHECK_EQ(2, call_cache_counter);
{
i::wasm::WasmSerializer serializer(tester.native_module().get());
CHECK_LT(serialized_size, serializer.GetSerializedNativeModuleSize());
}
}
STREAM_TEST(TestModuleWithErrorAfterDataSection) { STREAM_TEST(TestModuleWithErrorAfterDataSection) {
StreamTester tester(isolate); StreamTester tester(isolate);