[wasm] Implement loop peeling
We implement loop peeling for wasm, currently available behind a flag. Loops are peeled regardless of size. Bug: v8:11510 Change-Id: Ia4c883abdee83df632b2611584d608c44e3295c8 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3367615 Reviewed-by: Andreas Haas <ahaas@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Commit-Queue: Manos Koukoutos <manoskouk@chromium.org> Cr-Commit-Position: refs/heads/main@{#78496}
This commit is contained in:
parent
457827106a
commit
8e9d8e1783
@ -2249,6 +2249,7 @@ filegroup(
|
||||
"src/compiler/wasm-compiler.h",
|
||||
"src/compiler/wasm-escape-analysis.h",
|
||||
"src/compiler/wasm-inlining.h",
|
||||
"src/compiler/wasm-loop-peeling.h",
|
||||
"src/debug/debug-wasm-objects.cc",
|
||||
"src/debug/debug-wasm-objects.h",
|
||||
"src/debug/debug-wasm-objects-inl.h",
|
||||
@ -2634,6 +2635,7 @@ filegroup(
|
||||
":is_v8_enable_webassembly": [
|
||||
"src/compiler/int64-lowering.cc",
|
||||
"src/compiler/wasm-compiler.cc",
|
||||
"src/compiler/wasm-loop-peeling.cc",
|
||||
"src/compiler/wasm-escape-analysis.cc",
|
||||
"src/compiler/wasm-inlining.cc",
|
||||
],
|
||||
|
2
BUILD.gn
2
BUILD.gn
@ -3435,6 +3435,7 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/compiler/wasm-compiler.h",
|
||||
"src/compiler/wasm-escape-analysis.h",
|
||||
"src/compiler/wasm-inlining.h",
|
||||
"src/compiler/wasm-loop-peeling.h",
|
||||
"src/debug/debug-wasm-objects-inl.h",
|
||||
"src/debug/debug-wasm-objects.h",
|
||||
"src/trap-handler/trap-handler-internal.h",
|
||||
@ -3907,6 +3908,7 @@ if (v8_enable_webassembly) {
|
||||
"src/compiler/wasm-compiler.cc",
|
||||
"src/compiler/wasm-escape-analysis.cc",
|
||||
"src/compiler/wasm-inlining.cc",
|
||||
"src/compiler/wasm-loop-peeling.cc",
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -551,7 +551,7 @@ LoopTree* LoopFinder::BuildLoopTree(Graph* graph, TickCounter* tick_counter,
|
||||
#if V8_ENABLE_WEBASSEMBLY
|
||||
// static
|
||||
ZoneUnorderedSet<Node*>* LoopFinder::FindSmallInnermostLoopFromHeader(
|
||||
Node* loop_header, Zone* zone, size_t max_size) {
|
||||
Node* loop_header, Zone* zone, size_t max_size, bool calls_are_large) {
|
||||
auto* visited = zone->New<ZoneUnorderedSet<Node*>>(zone);
|
||||
std::vector<Node*> queue;
|
||||
|
||||
@ -594,13 +594,19 @@ ZoneUnorderedSet<Node*>* LoopFinder::FindSmallInnermostLoopFromHeader(
|
||||
}
|
||||
// All uses are outside the loop, do nothing.
|
||||
break;
|
||||
// If {calls_are_large}, call nodes are considered to have unbounded size,
|
||||
// i.e. >max_size, with the exception of certain wasm builtins.
|
||||
case IrOpcode::kTailCall:
|
||||
case IrOpcode::kJSWasmCall:
|
||||
case IrOpcode::kJSCall:
|
||||
// Call nodes are considered to have unbounded size, i.e. >max_size,
|
||||
// with the exception of certain wasm builtins.
|
||||
return nullptr;
|
||||
if (calls_are_large) return nullptr;
|
||||
ENQUEUE_USES(use, true)
|
||||
break;
|
||||
case IrOpcode::kCall: {
|
||||
if (!calls_are_large) {
|
||||
ENQUEUE_USES(use, true);
|
||||
break;
|
||||
}
|
||||
Node* callee = node->InputAt(0);
|
||||
if (callee->opcode() != IrOpcode::kRelocatableInt32Constant &&
|
||||
callee->opcode() != IrOpcode::kRelocatableInt64Constant) {
|
||||
|
@ -186,11 +186,11 @@ class V8_EXPORT_PRIVATE LoopFinder {
|
||||
// marked with LoopExit, LoopExitEffect, LoopExitValue, or End nodes.
|
||||
// Returns {nullptr} if
|
||||
// 1) the loop size (in graph nodes) exceeds {max_size},
|
||||
// 2) a function call is found in the loop, excluding calls to a set of wasm
|
||||
// builtins,
|
||||
// 2) {calls_are_large} and a function call is found in the loop, excluding
|
||||
// calls to a set of wasm builtins,
|
||||
// 3) a nested loop is found in the loop.
|
||||
static ZoneUnorderedSet<Node*>* FindSmallInnermostLoopFromHeader(
|
||||
Node* loop_header, Zone* zone, size_t max_size);
|
||||
Node* loop_header, Zone* zone, size_t max_size, bool calls_are_large);
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -198,7 +198,7 @@ class V8_EXPORT_PRIVATE LoopFinder {
|
||||
class NodeCopier {
|
||||
public:
|
||||
// {max}: The maximum number of nodes that this copier will track, including
|
||||
// The original nodes and all copies.
|
||||
// the original nodes and all copies.
|
||||
// {p}: A vector that holds the original nodes and all copies.
|
||||
// {copy_count}: How many times the nodes should be copied.
|
||||
NodeCopier(Graph* graph, uint32_t max, NodeVector* p, uint32_t copy_count)
|
||||
|
@ -99,6 +99,7 @@
|
||||
#include "src/compiler/wasm-compiler.h"
|
||||
#include "src/compiler/wasm-escape-analysis.h"
|
||||
#include "src/compiler/wasm-inlining.h"
|
||||
#include "src/compiler/wasm-loop-peeling.h"
|
||||
#include "src/wasm/function-body-decoder.h"
|
||||
#include "src/wasm/function-compiler.h"
|
||||
#include "src/wasm/wasm-engine.h"
|
||||
@ -1680,6 +1681,24 @@ struct WasmInliningPhase {
|
||||
}
|
||||
};
|
||||
|
||||
namespace {
|
||||
void EliminateLoopExits(std::vector<compiler::WasmLoopInfo>* loop_infos) {
|
||||
for (WasmLoopInfo& loop_info : *loop_infos) {
|
||||
std::unordered_set<Node*> loop_exits;
|
||||
// We collect exits into a set first because we are not allowed to mutate
|
||||
// them while iterating uses().
|
||||
for (Node* use : loop_info.header->uses()) {
|
||||
if (use->opcode() == IrOpcode::kLoopExit) {
|
||||
loop_exits.insert(use);
|
||||
}
|
||||
}
|
||||
for (Node* use : loop_exits) {
|
||||
LoopPeeler::EliminateLoopExit(use);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
struct WasmLoopUnrollingPhase {
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(WasmLoopUnrolling)
|
||||
|
||||
@ -1692,7 +1711,7 @@ struct WasmLoopUnrollingPhase {
|
||||
loop_info.header, temp_zone,
|
||||
// Only discover the loop until its size is the maximum unrolled
|
||||
// size for its depth.
|
||||
maximum_unrollable_size(loop_info.nesting_depth));
|
||||
maximum_unrollable_size(loop_info.nesting_depth), true);
|
||||
if (loop == nullptr) continue;
|
||||
UnrollLoop(loop_info.header, loop, loop_info.nesting_depth,
|
||||
data->graph(), data->common(), temp_zone,
|
||||
@ -1700,19 +1719,28 @@ struct WasmLoopUnrollingPhase {
|
||||
}
|
||||
}
|
||||
|
||||
EliminateLoopExits(loop_infos);
|
||||
}
|
||||
};
|
||||
|
||||
struct WasmLoopPeelingPhase {
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(WasmLoopPeeling)
|
||||
|
||||
void Run(PipelineData* data, Zone* temp_zone,
|
||||
std::vector<compiler::WasmLoopInfo>* loop_infos) {
|
||||
for (WasmLoopInfo& loop_info : *loop_infos) {
|
||||
std::unordered_set<Node*> loop_exits;
|
||||
// We collect exits into a set first because we are not allowed to mutate
|
||||
// them while iterating uses().
|
||||
for (Node* use : loop_info.header->uses()) {
|
||||
if (use->opcode() == IrOpcode::kLoopExit) {
|
||||
loop_exits.insert(use);
|
||||
}
|
||||
}
|
||||
for (Node* use : loop_exits) {
|
||||
LoopPeeler::EliminateLoopExit(use);
|
||||
if (loop_info.can_be_innermost) {
|
||||
ZoneUnorderedSet<Node*>* loop =
|
||||
LoopFinder::FindSmallInnermostLoopFromHeader(
|
||||
loop_info.header, temp_zone, std::numeric_limits<size_t>::max(),
|
||||
false);
|
||||
if (loop == nullptr) continue;
|
||||
PeelWasmLoop(loop_info.header, loop, data->graph(), data->common(),
|
||||
temp_zone, data->source_positions(), data->node_origins());
|
||||
}
|
||||
}
|
||||
// If we are going to unroll later, keep loop exits.
|
||||
if (!FLAG_wasm_loop_unrolling) EliminateLoopExits(loop_infos);
|
||||
}
|
||||
};
|
||||
#endif // V8_ENABLE_WEBASSEMBLY
|
||||
@ -3249,6 +3277,10 @@ void Pipeline::GenerateCodeForWasmFunction(
|
||||
loop_info);
|
||||
pipeline.RunPrintAndVerify(WasmInliningPhase::phase_name(), true);
|
||||
}
|
||||
if (FLAG_wasm_loop_peeling) {
|
||||
pipeline.Run<WasmLoopPeelingPhase>(loop_info);
|
||||
pipeline.RunPrintAndVerify(WasmLoopPeelingPhase::phase_name(), true);
|
||||
}
|
||||
if (FLAG_wasm_loop_unrolling) {
|
||||
pipeline.Run<WasmLoopUnrollingPhase>(loop_info);
|
||||
pipeline.RunPrintAndVerify(WasmLoopUnrollingPhase::phase_name(), true);
|
||||
|
133
src/compiler/wasm-loop-peeling.cc
Normal file
133
src/compiler/wasm-loop-peeling.cc
Normal file
@ -0,0 +1,133 @@
|
||||
// Copyright 2021 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/compiler/wasm-loop-peeling.h"
|
||||
|
||||
#include "src/base/small-vector.h"
|
||||
#include "src/codegen/tick-counter.h"
|
||||
#include "src/compiler/common-operator.h"
|
||||
#include "src/compiler/loop-analysis.h"
|
||||
#include "src/compiler/loop-peeling.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
namespace compiler {
|
||||
|
||||
void PeelWasmLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, Graph* graph,
|
||||
CommonOperatorBuilder* common, Zone* tmp_zone,
|
||||
SourcePositionTable* source_positions,
|
||||
NodeOriginTable* node_origins) {
|
||||
DCHECK_EQ(loop_node->opcode(), IrOpcode::kLoop);
|
||||
DCHECK_NOT_NULL(loop);
|
||||
// No back-jump to the loop header means this is not really a loop.
|
||||
if (loop_node->InputCount() < 2) return;
|
||||
|
||||
uint32_t copied_size = static_cast<uint32_t>(loop->size()) * 2;
|
||||
|
||||
NodeVector copied_nodes(tmp_zone);
|
||||
|
||||
NodeCopier copier(graph, copied_size, &copied_nodes, 1);
|
||||
source_positions->AddDecorator();
|
||||
copier.CopyNodes(graph, tmp_zone, graph->NewNode(common->Dead()),
|
||||
base::make_iterator_range(loop->begin(), loop->end()),
|
||||
source_positions, node_origins);
|
||||
source_positions->RemoveDecorator();
|
||||
|
||||
Node* peeled_iteration_header = copier.map(loop_node);
|
||||
|
||||
// The terminator nodes in the copies need to get connected to the graph's end
|
||||
// node, except Terminate nodes which will be deleted anyway.
|
||||
for (Node* node : copied_nodes) {
|
||||
if (IrOpcode::IsGraphTerminator(node->opcode()) &&
|
||||
node->opcode() != IrOpcode::kTerminate && node->UseCount() == 0) {
|
||||
NodeProperties::MergeControlToEnd(graph, common, node);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1: Create merges for loop exits.
|
||||
for (Node* node : loop_node->uses()) {
|
||||
// We do not need the Terminate node for the peeled iteration.
|
||||
if (node->opcode() == IrOpcode::kTerminate) {
|
||||
copier.map(node)->Kill();
|
||||
continue;
|
||||
}
|
||||
if (node->opcode() != IrOpcode::kLoopExit) continue;
|
||||
DCHECK_EQ(node->InputAt(1), loop_node);
|
||||
// Create a merge node for the peeled iteration and main loop. Skip the
|
||||
// LoopExit node in the peeled iteration, use its control input instead.
|
||||
Node* merge_node =
|
||||
graph->NewNode(common->Merge(2), node, copier.map(node)->InputAt(0));
|
||||
// Replace all uses of the loop exit with the merge node.
|
||||
for (Edge use_edge : node->use_edges()) {
|
||||
Node* use = use_edge.from();
|
||||
if (loop->count(use) == 1) {
|
||||
// Uses within the loop will be LoopExitEffects and LoopExitValues.
|
||||
// Those are used by nodes outside the loop. We need to create phis from
|
||||
// the main loop and peeled iteration to replace loop exits.
|
||||
DCHECK(use->opcode() == IrOpcode::kLoopExitEffect ||
|
||||
use->opcode() == IrOpcode::kLoopExitValue);
|
||||
const Operator* phi_operator =
|
||||
use->opcode() == IrOpcode::kLoopExitEffect
|
||||
? common->EffectPhi(2)
|
||||
: common->Phi(LoopExitValueRepresentationOf(use->op()), 2);
|
||||
Node* phi = graph->NewNode(phi_operator, use,
|
||||
copier.map(use)->InputAt(0), merge_node);
|
||||
use->ReplaceUses(phi);
|
||||
// Fix the input of phi we just broke.
|
||||
phi->ReplaceInput(0, use);
|
||||
copier.map(use)->Kill();
|
||||
} else if (use != merge_node) {
|
||||
// For uses outside the loop, simply redirect them to the merge.
|
||||
use->ReplaceInput(use_edge.index(), merge_node);
|
||||
}
|
||||
}
|
||||
copier.map(node)->Kill();
|
||||
}
|
||||
|
||||
// Step 2: The peeled iteration is not a loop anymore. Any control uses of
|
||||
// its loop header should now point to its non-recursive input. Any phi uses
|
||||
// should use the value coming from outside the loop.
|
||||
for (Edge use_edge : peeled_iteration_header->use_edges()) {
|
||||
if (NodeProperties::IsPhi(use_edge.from())) {
|
||||
use_edge.from()->ReplaceUses(use_edge.from()->InputAt(0));
|
||||
} else {
|
||||
use_edge.UpdateTo(loop_node->InputAt(0));
|
||||
}
|
||||
}
|
||||
|
||||
// We are now left with an unconnected subgraph of the peeled Loop node and
|
||||
// its phi uses.
|
||||
|
||||
// Step 3: Rewire the peeled iteration to flow into the main loop.
|
||||
|
||||
// We are reusing the Loop node of the peeled iteration and its phis as the
|
||||
// merge and phis which flow from the peeled iteration into the main loop.
|
||||
// First, remove the non-recursive input.
|
||||
peeled_iteration_header->RemoveInput(0);
|
||||
NodeProperties::ChangeOp(
|
||||
peeled_iteration_header,
|
||||
common->Merge(peeled_iteration_header->InputCount()));
|
||||
|
||||
// Remove the non-recursive input.
|
||||
for (Edge use_edge : peeled_iteration_header->use_edges()) {
|
||||
DCHECK(NodeProperties::IsPhi(use_edge.from()));
|
||||
use_edge.from()->RemoveInput(0);
|
||||
const Operator* phi = common->ResizeMergeOrPhi(
|
||||
use_edge.from()->op(),
|
||||
use_edge.from()->InputCount() - /* control input */ 1);
|
||||
NodeProperties::ChangeOp(use_edge.from(), phi);
|
||||
}
|
||||
|
||||
// In the main loop, change inputs to the merge and phis above.
|
||||
loop_node->ReplaceInput(0, peeled_iteration_header);
|
||||
for (Edge use_edge : loop_node->use_edges()) {
|
||||
if (NodeProperties::IsPhi(use_edge.from())) {
|
||||
use_edge.from()->ReplaceInput(0, copier.map(use_edge.from()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
33
src/compiler/wasm-loop-peeling.h
Normal file
33
src/compiler/wasm-loop-peeling.h
Normal file
@ -0,0 +1,33 @@
|
||||
// Copyright 2021 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#if !V8_ENABLE_WEBASSEMBLY
|
||||
#error This header should only be included if WebAssembly is enabled.
|
||||
#endif // !V8_ENABLE_WEBASSEMBLY
|
||||
|
||||
#ifndef V8_COMPILER_WASM_LOOP_PEELING_H_
|
||||
#define V8_COMPILER_WASM_LOOP_PEELING_H_
|
||||
|
||||
#include "src/compiler/common-operator.h"
|
||||
#include "src/compiler/loop-analysis.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
namespace compiler {
|
||||
|
||||
// Loop peeling is an optimization that copies the body of a loop, creating
|
||||
// a new copy of the body called the "peeled iteration" that represents the
|
||||
// first iteration. It enables a kind of loop hoisting: repeated computations
|
||||
// without side-effects in the body of the loop can be computed in the first
|
||||
// iteration only and reused in the next iterations.
|
||||
void PeelWasmLoop(Node* loop_node, ZoneUnorderedSet<Node*>* loop, Graph* graph,
|
||||
CommonOperatorBuilder* common, Zone* tmp_zone,
|
||||
SourcePositionTable* source_positions,
|
||||
NodeOriginTable* node_origins);
|
||||
|
||||
} // namespace compiler
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_COMPILER_WASM_LOOP_PEELING_H_
|
@ -1104,6 +1104,7 @@ DEFINE_NEG_IMPLICATION(liftoff_only, wasm_speculative_inlining)
|
||||
|
||||
DEFINE_BOOL(wasm_loop_unrolling, true,
|
||||
"enable loop unrolling for wasm functions")
|
||||
DEFINE_BOOL(wasm_loop_peeling, false, "enable loop peeling for wasm functions")
|
||||
DEFINE_BOOL(wasm_fuzzer_gen_test, false,
|
||||
"generate a test case when running a wasm fuzzer")
|
||||
DEFINE_IMPLICATION(wasm_fuzzer_gen_test, single_threaded)
|
||||
|
@ -372,6 +372,7 @@ class RuntimeCallTimer final {
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, VerifyGraph) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmBaseOptimization) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmInlining) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmLoopPeeling) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmLoopUnrolling) \
|
||||
ADD_THREAD_SPECIFIC_COUNTER(V, Optimize, WasmOptimization) \
|
||||
\
|
||||
|
@ -1310,7 +1310,9 @@ class WasmGraphBuildingInterface {
|
||||
// different nodes during inlining. These are Return and TailCall nodes.
|
||||
// - After IfFailure nodes.
|
||||
// - When exiting a loop through Delegate.
|
||||
bool emit_loop_exits() { return FLAG_wasm_loop_unrolling; }
|
||||
bool emit_loop_exits() {
|
||||
return FLAG_wasm_loop_unrolling || FLAG_wasm_loop_peeling;
|
||||
}
|
||||
|
||||
void GetNodes(TFNode** nodes, Value* values, size_t count) {
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
|
@ -62,6 +62,49 @@ d8.file.execute("test/mjsunit/wasm/wasm-module-builder.js");
|
||||
assertEquals(10, instance.exports.main(10));
|
||||
})();
|
||||
|
||||
(function LoopInLoopTest() {
|
||||
print(arguments.callee.name);
|
||||
let builder = new WasmModuleBuilder();
|
||||
|
||||
let fact = builder.addFunction("fact", kSig_i_i)
|
||||
.addLocals(kWasmI32, 1)
|
||||
.addBody([// result = 1;
|
||||
kExprI32Const, 1, kExprLocalSet, 1,
|
||||
kExprLoop, kWasmVoid,
|
||||
kExprLocalGet, 1,
|
||||
// if input == 1 return result;
|
||||
kExprLocalGet, 0, kExprI32Const, 1, kExprI32Eq, kExprBrIf, 1,
|
||||
// result *= input;
|
||||
kExprLocalGet, 0, kExprI32Mul, kExprLocalSet, 1,
|
||||
// input -= 1;
|
||||
kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub,
|
||||
kExprLocalSet, 0,
|
||||
kExprBr, 0,
|
||||
kExprEnd,
|
||||
kExprUnreachable]);
|
||||
|
||||
builder.addFunction("main", kSig_i_i)
|
||||
.addLocals(kWasmI32, 1)
|
||||
.addBody([
|
||||
kExprLoop, kWasmVoid,
|
||||
kExprLocalGet, 1,
|
||||
// if input == 0 return sum;
|
||||
kExprLocalGet, 0, kExprI32Const, 0, kExprI32Eq, kExprBrIf, 1,
|
||||
// sum += fact(input);
|
||||
kExprLocalGet, 0, kExprCallFunction, fact.index,
|
||||
kExprI32Add, kExprLocalSet, 1,
|
||||
// input -= 1;
|
||||
kExprLocalGet, 0, kExprI32Const, 1, kExprI32Sub,
|
||||
kExprLocalSet, 0,
|
||||
kExprBr, 0,
|
||||
kExprEnd,
|
||||
kExprUnreachable])
|
||||
.exportAs("main");
|
||||
|
||||
let instance = builder.instantiate();
|
||||
assertEquals(33, instance.exports.main(4));
|
||||
})();
|
||||
|
||||
(function InfiniteLoopTest() {
|
||||
print(arguments.callee.name);
|
||||
let builder = new WasmModuleBuilder();
|
||||
|
Loading…
Reference in New Issue
Block a user