[wasm][turbofan] Improve inlining heuristics

This CL improves wasm inlining heuristics in Turbofan, for an average
8,5% performance improvement in selected benchmarks.

Changes:
- In WasmInliner::Reduce(), only collect inlining candidates into a
  priority queue, according to WasmInliner::LexicographicOrdering.
  Move actual inlining to Finalize().
- Remove the InlineFirstFew heuristic. Add two limits to inlining:
  Maximum relative size increase (reversely proportional to the function
  size), and absolute size increase.
- Pass information about call frequency from liftoff-collected feedback
  to the WasmInliner though the wasm module.
- Run wasm inlining along other optimizations in the pipeline.
- Split inlining and speculative inlining tests.

Bug: v8:7748, v8:12166
Change-Id: Iccee22093db765981889a24451fb458dfce1f1a6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3222764
Reviewed-by: Nico Hartmann <nicohartmann@chromium.org>
Reviewed-by: Jakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Manos Koukoutos <manoskouk@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77428}
This commit is contained in:
Manos Koukoutos 2021-10-15 13:51:34 +00:00 committed by V8 LUCI CQ
parent e4dba97006
commit bce4410837
10 changed files with 418 additions and 216 deletions

View File

@ -1685,28 +1685,6 @@ struct WasmLoopUnrollingPhase {
}
}
};
struct WasmInliningPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmInlining)
void Run(PipelineData* data, Zone* temp_zone, wasm::CompilationEnv* env,
const wasm::WireBytesStorage* wire_bytes) {
GraphReducer graph_reducer(
temp_zone, data->graph(), &data->info()->tick_counter(), data->broker(),
data->jsgraph()->Dead(), data->observe_node_manager());
DeadCodeElimination dead(&graph_reducer, data->graph(),
data->mcgraph()->common(), temp_zone);
// For now, inline the first few functions;
InlineFirstFew heuristics(FLAG_wasm_inlining_budget);
WasmInliner inliner(&graph_reducer, env, data->source_positions(),
data->node_origins(), data->mcgraph(), wire_bytes,
&heuristics);
AddReducer(data, &graph_reducer, &dead);
AddReducer(data, &graph_reducer, &inliner);
graph_reducer.ReduceGraph();
}
};
#endif // V8_ENABLE_WEBASSEMBLY
struct LoopExitEliminationPhase {
@ -2010,12 +1988,14 @@ struct ScheduledEffectControlLinearizationPhase {
struct WasmOptimizationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(WasmOptimization)
void Run(PipelineData* data, Zone* temp_zone, bool allow_signalling_nan) {
void Run(PipelineData* data, Zone* temp_zone, bool allow_signalling_nan,
wasm::CompilationEnv* env, uint32_t function_index,
const wasm::WireBytesStorage* wire_bytes) {
// Run optimizations in two rounds: First one around load elimination and
// then one around branch elimination. This is because those two
// optimizations sometimes display quadratic complexity when run together.
// We only need load elimination for managed objects.
if (FLAG_experimental_wasm_gc) {
if (FLAG_experimental_wasm_gc || FLAG_wasm_inlining) {
GraphReducer graph_reducer(temp_zone, data->graph(),
&data->info()->tick_counter(), data->broker(),
data->jsgraph()->Dead(),
@ -2030,11 +2010,20 @@ struct WasmOptimizationPhase {
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
CsaLoadElimination load_elimination(&graph_reducer, data->jsgraph(),
temp_zone);
WasmInliner inliner(&graph_reducer, env, function_index,
data->source_positions(), data->node_origins(),
data->mcgraph(), wire_bytes);
AddReducer(data, &graph_reducer, &machine_reducer);
AddReducer(data, &graph_reducer, &dead_code_elimination);
AddReducer(data, &graph_reducer, &common_reducer);
AddReducer(data, &graph_reducer, &value_numbering);
AddReducer(data, &graph_reducer, &load_elimination);
if (FLAG_experimental_wasm_gc) {
AddReducer(data, &graph_reducer, &load_elimination);
}
if (FLAG_wasm_inlining &&
!WasmInliner::any_inlining_impossible(data->graph()->NodeCount())) {
AddReducer(data, &graph_reducer, &inliner);
}
graph_reducer.ReduceGraph();
}
{
@ -3236,14 +3225,11 @@ void Pipeline::GenerateCodeForWasmFunction(
pipeline.Run<WasmLoopUnrollingPhase>(loop_info);
pipeline.RunPrintAndVerify(WasmLoopUnrollingPhase::phase_name(), true);
}
if (FLAG_wasm_inlining) {
pipeline.Run<WasmInliningPhase>(env, wire_bytes_storage);
pipeline.RunPrintAndVerify(WasmInliningPhase::phase_name(), true);
}
const bool is_asm_js = is_asmjs_module(module);
if (FLAG_wasm_opt || is_asm_js) {
pipeline.Run<WasmOptimizationPhase>(is_asm_js);
pipeline.Run<WasmOptimizationPhase>(is_asm_js, env, function_index,
wire_bytes_storage);
pipeline.RunPrintAndVerify(WasmOptimizationPhase::phase_name(), true);
} else {
pipeline.Run<WasmBaseOptimizationPhase>();

View File

@ -27,25 +27,35 @@ Reduction WasmInliner::Reduce(Node* node) {
}
}
#define TRACE(...) \
if (FLAG_trace_wasm_speculative_inlining) { \
PrintF(__VA_ARGS__); \
}
#define TRACE(...) \
if (FLAG_trace_wasm_inlining) PrintF(__VA_ARGS__);
// TODO(12166): Save inlined frames for trap/--trace-wasm purposes. Consider
// tail calls.
// TODO(12166): Inline indirect calls/call_ref.
Reduction WasmInliner::ReduceCall(Node* call) {
DCHECK(call->opcode() == IrOpcode::kCall ||
call->opcode() == IrOpcode::kTailCall);
if (seen_.find(call) != seen_.end()) {
TRACE("function %d: have already seen node %d, skipping\n", function_index_,
call->id());
return NoChange();
}
seen_.insert(call);
Node* callee = NodeProperties::GetValueInput(call, 0);
IrOpcode::Value reloc_opcode = mcgraph_->machine()->Is32()
? IrOpcode::kRelocatableInt32Constant
: IrOpcode::kRelocatableInt64Constant;
if (callee->opcode() != reloc_opcode) return NoChange();
if (callee->opcode() != reloc_opcode) {
TRACE("[function %d: considering node %d... not a relocatable constant]\n",
function_index_, call->id());
return NoChange();
}
auto info = OpParameter<RelocatablePtrConstantInfo>(callee->op());
uint32_t inlinee_index = static_cast<uint32_t>(info.value());
TRACE("[considering call to %d... ", inlinee_index)
TRACE("[function %d: considering node %d, call to %d... ", function_index_,
call->id(), inlinee_index)
if (info.rmode() != RelocInfo::WASM_CALL) {
TRACE("not a wasm call]\n")
return NoChange();
@ -54,46 +64,112 @@ Reduction WasmInliner::ReduceCall(Node* call) {
TRACE("imported function]\n")
return NoChange();
}
if (!heuristics_->DoInline(source_positions_->GetSourcePosition(call),
inlinee_index)) {
TRACE("heuristics say no]\n")
if (inlinee_index == function_index_) {
TRACE("recursive call]\n")
return NoChange();
}
TRACE("inlining!]\n")
TRACE("adding to inlining candidates!]\n")
bool is_speculative_call_ref = false;
int call_count = 0;
if (FLAG_wasm_speculative_inlining) {
base::MutexGuard guard(&module()->type_feedback.mutex);
auto maybe_feedback =
module()->type_feedback.feedback_for_function.find(function_index_);
if (maybe_feedback != module()->type_feedback.feedback_for_function.end()) {
wasm::FunctionTypeFeedback feedback = maybe_feedback->second;
wasm::WasmCodePosition position =
source_positions_->GetSourcePosition(call).ScriptOffset();
DCHECK_NE(position, wasm::kNoCodePosition);
auto index_in_feedback_vector = feedback.positions.find(position);
if (index_in_feedback_vector != feedback.positions.end()) {
is_speculative_call_ref = true;
call_count = feedback.feedback_vector[index_in_feedback_vector->second]
.absolute_call_frequency;
}
}
}
CHECK_LT(inlinee_index, module()->functions.size());
const wasm::WasmFunction* inlinee = &module()->functions[inlinee_index];
base::Vector<const byte> function_bytes = wire_bytes_->GetCode(inlinee->code);
const wasm::FunctionBody inlinee_body(inlinee->sig, inlinee->code.offset(),
function_bytes.begin(),
function_bytes.end());
wasm::WasmFeatures detected;
WasmGraphBuilder builder(env_, zone(), mcgraph_, inlinee_body.sig,
source_positions_);
std::vector<WasmLoopInfo> infos;
CandidateInfo candidate{call, inlinee_index, is_speculative_call_ref,
call_count, function_bytes.length()};
size_t subgraph_min_node_id = graph()->NodeCount();
wasm::DecodeResult result;
Node* inlinee_start;
Node* inlinee_end;
{
Graph::SubgraphScope scope(graph());
result = wasm::BuildTFGraph(zone()->allocator(), env_->enabled_features,
module(), &builder, &detected, inlinee_body,
&infos, node_origins_, inlinee_index,
wasm::kInlinedFunction);
inlinee_start = graph()->start();
inlinee_end = graph()->end();
inlining_candidates_.push(candidate);
return NoChange();
}
void WasmInliner::Finalize() {
TRACE("function %d: going though inlining candidates...\n", function_index_);
while (!inlining_candidates_.empty()) {
CandidateInfo candidate = inlining_candidates_.top();
inlining_candidates_.pop();
Node* call = candidate.node;
TRACE(
" [function %d: considering candidate {@%d, index=%d, type=%s, "
"count=%d, size=%d}... ",
function_index_, call->id(), candidate.inlinee_index,
candidate.is_speculative_call_ref ? "ref" : "direct",
candidate.call_count, candidate.wire_byte_size);
if (call->IsDead()) {
TRACE("dead node]\n");
continue;
}
const wasm::WasmFunction* inlinee =
&module()->functions[candidate.inlinee_index];
base::Vector<const byte> function_bytes =
wire_bytes_->GetCode(inlinee->code);
const wasm::FunctionBody inlinee_body(inlinee->sig, inlinee->code.offset(),
function_bytes.begin(),
function_bytes.end());
wasm::WasmFeatures detected;
WasmGraphBuilder builder(env_, zone(), mcgraph_, inlinee_body.sig,
source_positions_);
std::vector<WasmLoopInfo> infos;
size_t subgraph_min_node_id = graph()->NodeCount();
wasm::DecodeResult result;
Node* inlinee_start;
Node* inlinee_end;
{
Graph::SubgraphScope scope(graph());
result = wasm::BuildTFGraph(
zone()->allocator(), env_->enabled_features, module(), &builder,
&detected, inlinee_body, &infos, node_origins_,
candidate.inlinee_index, wasm::kInlinedFunction);
inlinee_start = graph()->start();
inlinee_end = graph()->end();
}
if (result.failed()) {
// This can happen if the inlinee has never been compiled before and is
// invalid. Return, as there is no point to keep optimizing.
TRACE("failed to compile]\n")
return;
}
size_t additional_nodes = graph()->NodeCount() - subgraph_min_node_id;
if (current_graph_size_ + additional_nodes >
size_limit(initial_graph_size_)) {
// This is not based on the accurate graph size, as it may have been
// shrunk by other optimizations. We could recompute the accurate size
// with a traversal, but it is most probably not worth the time.
TRACE("not enough inlining budget]\n");
continue;
}
TRACE("inlining!]\n");
current_graph_size_ += additional_nodes;
if (call->opcode() == IrOpcode::kCall) {
InlineCall(call, inlinee_start, inlinee_end, inlinee->sig,
subgraph_min_node_id);
} else {
InlineTailCall(call, inlinee_start, inlinee_end);
}
// Returning after only one inlining has been tried and found worse.
}
if (result.failed()) return NoChange();
return call->opcode() == IrOpcode::kCall
? InlineCall(call, inlinee_start, inlinee_end, inlinee->sig,
subgraph_min_node_id)
: InlineTailCall(call, inlinee_start, inlinee_end);
}
/* Rewire callee formal parameters to the call-site real parameters. Rewire
@ -121,13 +197,14 @@ void WasmInliner::RewireFunctionEntry(Node* call, Node* callee_start) {
} else {
UNREACHABLE();
}
Revisit(edge.from());
break;
}
}
}
Reduction WasmInliner::InlineTailCall(Node* call, Node* callee_start,
Node* callee_end) {
void WasmInliner::InlineTailCall(Node* call, Node* callee_start,
Node* callee_end) {
DCHECK(call->opcode() == IrOpcode::kTailCall);
// 1) Rewire function entry.
RewireFunctionEntry(call, callee_start);
@ -136,16 +213,19 @@ Reduction WasmInliner::InlineTailCall(Node* call, Node* callee_start,
for (Node* const input : callee_end->inputs()) {
DCHECK(IrOpcode::IsGraphTerminator(input->opcode()));
NodeProperties::MergeControlToEnd(graph(), common(), input);
Revisit(graph()->end());
}
for (Edge edge_to_end : call->use_edges()) {
DCHECK_EQ(edge_to_end.from(), graph()->end());
edge_to_end.UpdateTo(mcgraph()->Dead());
}
callee_end->Kill();
return Replace(mcgraph()->Dead());
call->Kill();
Revisit(graph()->end());
}
Reduction WasmInliner::InlineCall(Node* call, Node* callee_start,
Node* callee_end,
const wasm::FunctionSig* inlinee_sig,
size_t subgraph_min_node_id) {
void WasmInliner::InlineCall(Node* call, Node* callee_start, Node* callee_end,
const wasm::FunctionSig* inlinee_sig,
size_t subgraph_min_node_id) {
DCHECK(call->opcode() == IrOpcode::kCall);
// 0) Before doing anything, if {call} has an exception handler, collect all
@ -312,17 +392,17 @@ Reduction WasmInliner::InlineCall(Node* call, Node* callee_start,
// Dead() as a dummy for value replacement.
ReplaceWithValue(call, mcgraph()->Dead(), effect_output, control_output);
}
return Replace(mcgraph()->Dead());
} else {
// The callee can never return. The call node and all its uses are dead.
ReplaceWithValue(call, mcgraph()->Dead(), mcgraph()->Dead(),
mcgraph()->Dead());
return Changed(call);
}
}
const wasm::WasmModule* WasmInliner::module() const { return env_->module; }
#undef TRACE
} // namespace compiler
} // namespace internal
} // namespace v8

View File

@ -30,23 +30,6 @@ namespace compiler {
class NodeOriginTable;
class SourcePositionTable;
// Parent class for classes that provide heuristics on how to inline in wasm.
class WasmInliningHeuristics {
public:
virtual bool DoInline(SourcePosition position, uint32_t function_index) = 0;
};
class InlineFirstFew : public WasmInliningHeuristics {
public:
explicit InlineFirstFew(int count) : count_(count) {}
bool DoInline(SourcePosition position, uint32_t function_index) override {
return count_-- > 0;
}
private:
int count_;
};
// The WasmInliner provides the core graph inlining machinery for Webassembly
// graphs. Note that this class only deals with the mechanics of how to inline
// one graph into another; heuristics that decide what and how much to inline
@ -54,43 +37,118 @@ class InlineFirstFew : public WasmInliningHeuristics {
class WasmInliner final : public AdvancedReducer {
public:
WasmInliner(Editor* editor, wasm::CompilationEnv* env,
SourcePositionTable* source_positions,
uint32_t function_index, SourcePositionTable* source_positions,
NodeOriginTable* node_origins, MachineGraph* mcgraph,
const wasm::WireBytesStorage* wire_bytes,
WasmInliningHeuristics* heuristics)
const wasm::WireBytesStorage* wire_bytes)
: AdvancedReducer(editor),
env_(env),
function_index_(function_index),
source_positions_(source_positions),
node_origins_(node_origins),
mcgraph_(mcgraph),
wire_bytes_(wire_bytes),
heuristics_(heuristics) {}
initial_graph_size_(mcgraph->graph()->NodeCount()),
current_graph_size_(initial_graph_size_),
inlining_candidates_() {}
const char* reducer_name() const override { return "WasmInliner"; }
Reduction Reduce(Node* node) final;
void Finalize() final;
static bool any_inlining_impossible(size_t initial_graph_size) {
return size_limit(initial_graph_size) - initial_graph_size <
kMinimumFunctionNodeCount;
}
private:
struct CandidateInfo {
Node* node;
uint32_t inlinee_index;
bool is_speculative_call_ref;
int call_count;
int wire_byte_size;
};
struct LexicographicOrdering {
// Returns if c1 should be prioritized less than c2.
bool operator()(CandidateInfo& c1, CandidateInfo& c2) {
if (c1.is_speculative_call_ref && !c2.is_speculative_call_ref) {
return false;
}
if (c2.is_speculative_call_ref && !c1.is_speculative_call_ref) {
return true;
}
if (c1.call_count > c2.call_count) return false;
if (c2.call_count > c1.call_count) return true;
return c1.wire_byte_size > c2.wire_byte_size;
}
};
// TODO(manoskouk): This has not been found to be useful, but something
// similar may be tried again in the future.
// struct AdvancedOrdering {
// // Returns if c1 should be prioritized less than c2.
// bool operator()(CandidateInfo& c1, CandidateInfo& c2) {
// if (c1.is_speculative_call_ref && c2.is_speculative_call_ref) {
// if (c1.call_count > c2.call_count) return false;
// if (c2.call_count > c1.call_count) return true;
// return c1.wire_byte_size > c2.wire_byte_size;
// }
// if (!c1.is_speculative_call_ref && !c2.is_speculative_call_ref) {
// return c1.wire_byte_size > c2.wire_byte_size;
// }
//
// constexpr int kAssumedCallCountForDirectCalls = 3;
//
// int c1_call_count = c1.is_speculative_call_ref
// ? c1.call_count
// : kAssumedCallCountForDirectCalls;
// int c2_call_count = c2.is_speculative_call_ref
// ? c2.call_count
// : kAssumedCallCountForDirectCalls;
//
// return static_cast<float>(c1_call_count) / c1.wire_byte_size <
// static_cast<float>(c2_call_count) / c2.wire_byte_size;
// }
//};
Zone* zone() const { return mcgraph_->zone(); }
CommonOperatorBuilder* common() const { return mcgraph_->common(); }
Graph* graph() const { return mcgraph_->graph(); }
MachineGraph* mcgraph() const { return mcgraph_; }
const wasm::WasmModule* module() const;
const wasm::WasmFunction* inlinee() const;
// A limit to the size of the inlined graph as a function of its initial size.
static size_t size_limit(size_t initial_graph_size) {
return initial_graph_size +
std::min(FLAG_wasm_inlining_max_size,
FLAG_wasm_inlining_budget_factor / initial_graph_size);
}
// The smallest size in TF nodes any meaningful wasm function can have
// (start, instance parameter, end).
static constexpr size_t kMinimumFunctionNodeCount = 3;
Reduction ReduceCall(Node* call);
Reduction InlineCall(Node* call, Node* callee_start, Node* callee_end,
const wasm::FunctionSig* inlinee_sig,
size_t subgraph_min_node_id);
Reduction InlineTailCall(Node* call, Node* callee_start, Node* callee_end);
void InlineCall(Node* call, Node* callee_start, Node* callee_end,
const wasm::FunctionSig* inlinee_sig,
size_t subgraph_min_node_id);
void InlineTailCall(Node* call, Node* callee_start, Node* callee_end);
void RewireFunctionEntry(Node* call, Node* callee_start);
wasm::CompilationEnv* const env_;
uint32_t function_index_;
SourcePositionTable* const source_positions_;
NodeOriginTable* const node_origins_;
MachineGraph* const mcgraph_;
const wasm::WireBytesStorage* const wire_bytes_;
WasmInliningHeuristics* heuristics_;
const size_t initial_graph_size_;
size_t current_graph_size_;
std::priority_queue<CandidateInfo, std::vector<CandidateInfo>,
LexicographicOrdering>
inlining_candidates_;
std::unordered_set<Node*> seen_;
};
} // namespace compiler

View File

@ -1052,10 +1052,14 @@ DEFINE_BOOL(wasm_math_intrinsics, true,
DEFINE_BOOL(
wasm_inlining, false,
"enable inlining of wasm functions into wasm functions (experimental)")
DEFINE_INT(wasm_inlining_budget, 3,
"maximum number of call targets to inline into a Wasm function")
DEFINE_SIZE_T(
wasm_inlining_budget_factor, 100000,
"maximum allowed size to inline a function is given by {n / caller size}")
DEFINE_SIZE_T(wasm_inlining_max_size, 1250,
"maximum size of a function that can be inlined, in TF nodes")
DEFINE_BOOL(wasm_speculative_inlining, false,
"enable speculative inlining of call_ref targets (experimental)")
DEFINE_BOOL(trace_wasm_inlining, false, "trace wasm inlining")
DEFINE_BOOL(trace_wasm_speculative_inlining, false,
"trace wasm speculative inlining")
DEFINE_IMPLICATION(wasm_speculative_inlining, experimental_wasm_typed_funcref)

View File

@ -5945,6 +5945,12 @@ class LiftoffCompiler {
LiftoffAssembler::VarState vector_var(kPointerKind, vector, 0);
LiftoffRegister index = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
uintptr_t vector_slot = num_call_ref_instructions_ * 2;
{
base::MutexGuard mutex_guard(&decoder->module_->type_feedback.mutex);
decoder->module_->type_feedback.feedback_for_function[func_index_]
.positions[decoder->position()] =
static_cast<int>(num_call_ref_instructions_);
}
num_call_ref_instructions_++;
__ LoadConstant(index, WasmValue::ForUintPtr(vector_slot));
LiftoffAssembler::VarState index_var(kIntPtrKind, index, 0);

View File

@ -127,13 +127,10 @@ class WasmGraphBuildingInterface {
base::MutexGuard mutex_guard(&feedbacks.mutex);
auto feedback = feedbacks.feedback_for_function.find(func_index_);
if (feedback != feedbacks.feedback_for_function.end()) {
type_feedback_ = std::move(feedback->second);
// Erasing the map entry means that if the same function later gets
// inlined, its inlined copy won't have any type feedback available.
// However, if we don't erase the entry now, we'll be stuck with it
// forever.
type_feedback_ = feedback->second.feedback_vector;
// We need to keep the feedback in the module to inline later. However,
// this means we are stuck with it forever.
// TODO(jkummerow): Reconsider our options here.
feedbacks.feedback_for_function.erase(func_index_);
}
}
// The first '+ 1' is needed by TF Start node, the second '+ 1' is for the
@ -675,7 +672,8 @@ class WasmGraphBuildingInterface {
// we won't have any for inlined functions. Figure out how to change that.
if (FLAG_wasm_speculative_inlining && type_feedback_.size() > 0) {
DCHECK_LT(feedback_instruction_index_, type_feedback_.size());
maybe_feedback = type_feedback_[feedback_instruction_index_];
maybe_feedback =
type_feedback_[feedback_instruction_index_].function_index;
feedback_instruction_index_++;
}
if (maybe_feedback == -1) {
@ -742,9 +740,10 @@ class WasmGraphBuildingInterface {
const FunctionSig* sig, uint32_t sig_index,
const Value args[]) {
int maybe_feedback = -1;
if (FLAG_wasm_speculative_inlining) {
DCHECK_LE(feedback_instruction_index_, type_feedback_.size());
maybe_feedback = type_feedback_[feedback_instruction_index_];
if (FLAG_wasm_speculative_inlining && type_feedback_.size() > 0) {
DCHECK_LT(feedback_instruction_index_, type_feedback_.size());
maybe_feedback =
type_feedback_[feedback_instruction_index_].function_index;
feedback_instruction_index_++;
}
if (maybe_feedback == -1) {
@ -1291,7 +1290,7 @@ class WasmGraphBuildingInterface {
// The entries in {type_feedback_} are indexed by the position of feedback-
// consuming instructions (currently only call_ref).
int feedback_instruction_index_ = 0;
std::vector<int> type_feedback_;
std::vector<CallSiteFeedback> type_feedback_;
TFNode* effect() { return builder_->effect(); }

View File

@ -1241,14 +1241,13 @@ bool CompileLazy(Isolate* isolate, Handle<WasmInstanceObject> instance,
return true;
}
std::vector<int> ProcessTypeFeedback(Isolate* isolate,
Handle<WasmInstanceObject> instance,
int func_index) {
std::vector<CallSiteFeedback> ProcessTypeFeedback(
Isolate* isolate, Handle<WasmInstanceObject> instance, int func_index) {
int which_vector = declared_function_index(instance->module(), func_index);
Object maybe_feedback = instance->feedback_vectors().get(which_vector);
if (!maybe_feedback.IsFixedArray()) return {};
FixedArray feedback = FixedArray::cast(maybe_feedback);
std::vector<int> result(feedback.length() / 2);
std::vector<CallSiteFeedback> result(feedback.length() / 2);
int imported_functions =
static_cast<int>(instance->module()->num_imported_functions);
for (int i = 0; i < feedback.length(); i += 2) {
@ -1263,7 +1262,9 @@ std::vector<int> ProcessTypeFeedback(Isolate* isolate,
PrintF("[Function #%d call_ref #%d inlineable (monomorphic)]\n",
func_index, i / 2);
}
result[i / 2] = target.function_index();
CallRefData data = CallRefData::cast(feedback.get(i + 1));
result[i / 2] = {target.function_index(),
static_cast<int>(data.count())};
continue;
}
} else if (value.IsFixedArray()) {
@ -1276,6 +1277,7 @@ std::vector<int> ProcessTypeFeedback(Isolate* isolate,
total_count += CallRefData::cast(polymorphic.get(j + 1)).count();
}
int found_target = -1;
int found_count = -1;
double best_frequency = 0;
for (int j = 0; j < polymorphic.length(); j += 2) {
uint32_t this_count = CallRefData::cast(polymorphic.get(j + 1)).count();
@ -1293,6 +1295,7 @@ std::vector<int> ProcessTypeFeedback(Isolate* isolate,
continue;
}
found_target = target.function_index();
found_count = static_cast<int>(this_count);
if (FLAG_trace_wasm_speculative_inlining) {
PrintF("[Function #%d call_ref #%d inlineable (polymorphic %f)]\n",
func_index, i / 2, frequency);
@ -1300,7 +1303,7 @@ std::vector<int> ProcessTypeFeedback(Isolate* isolate,
break;
}
if (found_target >= 0) {
result[i / 2] = found_target;
result[i / 2] = {found_target, found_count};
continue;
} else if (FLAG_trace_wasm_speculative_inlining) {
PrintF("[Function #%d call_ref #%d: best frequency %f]\n", func_index,
@ -1310,7 +1313,7 @@ std::vector<int> ProcessTypeFeedback(Isolate* isolate,
// If we fall through to here, then this call isn't eligible for inlining.
// Possible reasons: uninitialized or megamorphic feedback; or monomorphic
// or polymorphic that didn't meet our requirements.
result[i / 2] = -1;
result[i / 2] = {-1, -1};
}
return result;
}
@ -1329,7 +1332,7 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
// TODO(jkummerow): we could have collisions here if two different instances
// of the same module schedule tier-ups of the same function at the same
// time. If that ever becomes a problem, figure out a solution.
module->type_feedback.feedback_for_function[func_index] =
module->type_feedback.feedback_for_function[func_index].feedback_vector =
std::move(feedback);
}

View File

@ -262,8 +262,16 @@ struct V8_EXPORT_PRIVATE WasmDebugSymbols {
WireBytesRef external_url;
};
struct CallSiteFeedback {
int function_index;
int absolute_call_frequency;
};
struct FunctionTypeFeedback {
std::vector<CallSiteFeedback> feedback_vector;
std::map<WasmCodePosition, int> positions;
};
struct TypeFeedbackStorage {
std::map<uint32_t, std::vector<int>> feedback_for_function;
std::map<uint32_t, FunctionTypeFeedback> feedback_for_function;
// Accesses to {feedback_for_function} are guarded by this mutex.
base::Mutex mutex;
};

View File

@ -3,7 +3,6 @@
// found in the LICENSE file.
// Flags: --wasm-inlining --no-liftoff --experimental-wasm-return-call
// Flags: --experimental-wasm-typed-funcref
d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
@ -11,6 +10,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
// output, or implementing testing infrastructure with --allow-natives-syntax.
(function SimpleInliningTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x) = x - 1
@ -27,6 +27,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function MultiReturnTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x) = (x - 1, x + 1)
@ -43,6 +44,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function NoReturnTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
let global = builder.addGlobal(kWasmI32, true);
@ -60,6 +62,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function InfiniteLoopTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
let callee = builder.addFunction("callee", kSig_i_i)
@ -78,6 +81,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function TailCallInCalleeTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x) = g(x - 1)
@ -98,6 +102,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function MultipleCallAndReturnSitesTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x) = x >= 0 ? x - 1 : x + 1
@ -121,6 +126,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function TailCallInCallerTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x) = x > 0 ? g(x) + 1: g(x - 1);
@ -148,6 +154,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function HandledInHandledTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
let tag = builder.addTag(kSig_v_i);
@ -173,6 +180,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function HandledInUnhandledTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
let tag = builder.addTag(kSig_v_i);
@ -194,6 +202,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
})();
(function UnhandledInUnhandledTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
let tag = builder.addTag(kSig_v_i);
@ -213,6 +222,7 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
// the unhandled calls in the callee (including the 'throw' builtin) to the
// handler in the caller.
(function UnhandledInHandledTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
let tag = builder.addTag(kSig_v_i);
@ -241,96 +251,9 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
assertEquals(20, instance.exports.main(10, 20));
})();
(function CallRefSpecSucceededTest() {
let builder = new WasmModuleBuilder();
// f(x) = x - 1
let callee = builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
let global = builder.addGlobal(wasmRefType(0), false,
WasmInitExpr.RefFunc(callee.index));
// g(x) = f(5) + x
builder.addFunction("main", kSig_i_i)
.addBody([kExprI32Const, 5, kExprGlobalGet, global.index, kExprCallRef,
kExprLocalGet, 0, kExprI32Add])
.exportAs("main");
let instance = builder.instantiate();
assertEquals(14, instance.exports.main(10));
})();
(function CallRefSpecFailedTest() {
let builder = new WasmModuleBuilder();
// h(x) = x - 1
builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
// f(x) = x - 2
let callee = builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 2, kExprI32Sub]);
let global = builder.addGlobal(wasmRefType(1), false,
WasmInitExpr.RefFunc(callee.index));
// g(x) = f(5) + x
builder.addFunction("main", kSig_i_i)
.addBody([kExprI32Const, 5, kExprGlobalGet, global.index, kExprCallRef,
kExprLocalGet, 0, kExprI32Add])
.exportAs("main");
let instance = builder.instantiate();
assertEquals(13, instance.exports.main(10));
})();
(function CallReturnRefSpecSucceededTest() {
let builder = new WasmModuleBuilder();
// f(x) = x - 1
let callee = builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
let global = builder.addGlobal(wasmRefType(0), false,
WasmInitExpr.RefFunc(callee.index));
// g(x) = f(5 + x)
builder.addFunction("main", kSig_i_i)
.addBody([kExprI32Const, 5, kExprLocalGet, 0, kExprI32Add,
kExprGlobalGet, global.index, kExprReturnCallRef])
.exportAs("main");
let instance = builder.instantiate();
assertEquals(14, instance.exports.main(10));
})();
(function CallReturnRefSpecFailedTest() {
let builder = new WasmModuleBuilder();
// h(x) = x - 1
builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
// f(x) = x - 2
let callee = builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 2, kExprI32Sub]);
let global = builder.addGlobal(wasmRefType(1), false,
WasmInitExpr.RefFunc(callee.index));
// g(x) = f(5 + x)
builder.addFunction("main", kSig_i_i)
.addBody([kExprI32Const, 5, kExprLocalGet, 0, kExprI32Add,
kExprGlobalGet, global.index, kExprReturnCallRef])
.exportAs("main");
let instance = builder.instantiate();
assertEquals(13, instance.exports.main(10));
})();
// Tests that no LoopExits are emitted in the inlined function.
(function LoopUnrollingTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x, y) = { do { y += 1; x -= 1; } while (x > 0); return y; }

View File

@ -0,0 +1,135 @@
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --wasm-speculative-inlining --experimental-wasm-return-call
// Flags: --experimental-wasm-typed-funcref
d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
(function CallRefSpecSucceededTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x) = x - 1
let callee = builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
let global = builder.addGlobal(wasmRefType(0), false,
WasmInitExpr.RefFunc(callee.index));
// g(x) = f(5) + x
builder.addFunction("main", kSig_i_i)
.addBody([kExprI32Const, 5, kExprGlobalGet, global.index, kExprCallRef,
kExprLocalGet, 0, kExprI32Add])
.exportAs("main");
let instance = builder.instantiate();
// Run it 10 times to trigger tier-up.
for (var i = 0; i < 10; i++) assertEquals(14, instance.exports.main(10));
})();
(function CallRefSpecFailedTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// h(x) = x - 1
let callee0 = builder.addFunction("callee0", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
// f(x) = x - 2
let callee1 = builder.addFunction("callee1", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 2, kExprI32Sub]);
let global0 = builder.addGlobal(wasmRefType(1), false,
WasmInitExpr.RefFunc(callee0.index));
let global1 = builder.addGlobal(wasmRefType(1), false,
WasmInitExpr.RefFunc(callee1.index));
// g(x, y) = if (y) { h(5) + x } else { f(7) + x }
builder.addFunction("main", kSig_i_ii)
.addBody([
kExprLocalGet, 1,
kExprIf, kWasmI32,
kExprI32Const, 5, kExprGlobalGet, global0.index, kExprCallRef,
kExprLocalGet, 0, kExprI32Add,
kExprElse,
kExprI32Const, 7, kExprGlobalGet, global1.index, kExprCallRef,
kExprLocalGet, 0, kExprI32Add,
kExprEnd])
.exportAs("main");
let instance = builder.instantiate();
// Run main 10 times with the same function reference to trigger tier-up.
// This will speculatively inline a call to function {h}.
for (var i = 0; i < 10; i++) assertEquals(14, instance.exports.main(10, 1));
// If tier-up is done, "callee0" should be inlined in the trace.
assertEquals(14, instance.exports.main(10, 1))
// Now, run main with {f} instead. The correct reference should still be
// called, i.e., "callee1".
assertEquals(15, instance.exports.main(10, 0));
})();
// TODO(manoskouk): Fix the following tests.
(function CallReturnRefSpecSucceededTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// f(x) = x - 1
let callee = builder.addFunction("callee", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
let global = builder.addGlobal(wasmRefType(0), false,
WasmInitExpr.RefFunc(callee.index));
// g(x) = f(5 + x)
builder.addFunction("main", kSig_i_i)
.addBody([kExprI32Const, 5, kExprLocalGet, 0, kExprI32Add,
kExprGlobalGet, global.index, kExprReturnCallRef])
.exportAs("main");
let instance = builder.instantiate();
// Run it 10 times to trigger tier-up.
for (var i = 0; i < 10; i++) assertEquals(14, instance.exports.main(10));
})();
(function CallReturnRefSpecFailedTest() {
print(arguments.callee.name);
let builder = new WasmModuleBuilder();
// h(x) = x - 1
let callee0 = builder.addFunction("callee0", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub]);
// f(x) = x - 2
let callee1 = builder.addFunction("callee1", kSig_i_i)
.addBody([kExprLocalGet, 0, kExprI32Const, 2, kExprI32Sub]);
let global0 = builder.addGlobal(wasmRefType(1), false,
WasmInitExpr.RefFunc(callee0.index));
let global1 = builder.addGlobal(wasmRefType(1), false,
WasmInitExpr.RefFunc(callee1.index));
// g(x, y) = if (y) { h(x) } else { f(x) }
builder.addFunction("main", kSig_i_ii)
.addBody([
kExprLocalGet, 1,
kExprIf, kWasmI32,
kExprLocalGet, 0, kExprGlobalGet, global0.index, kExprReturnCallRef,
kExprElse,
kExprLocalGet, 0, kExprGlobalGet, global1.index, kExprReturnCallRef,
kExprEnd])
.exportAs("main");
let instance = builder.instantiate();
// Run main 10 times with the same function reference to trigger tier-up.
// This will speculatively inline a call to function {h}.
for (var i = 0; i < 10; i++) assertEquals(9, instance.exports.main(10, 1));
// If tier-up is done, "callee0" should be inlined in the trace.
assertEquals(9, instance.exports.main(10, 1))
// Now, run main with {f} instead. The correct reference should still be
// called, i.e., "callee1".
assertEquals(8, instance.exports.main(10, 0));
})();