[turbofan] Use heavy-handed graph duplication to do loop peeling for OSR.

BUG=

Review URL: https://codereview.chromium.org/898353002

Cr-Commit-Position: refs/heads/master@{#26576}
This commit is contained in:
titzer 2015-02-11 05:26:35 -08:00 committed by Commit bot
parent 12e1b959d2
commit 31637fb396
12 changed files with 559 additions and 15 deletions

View File

@ -462,6 +462,16 @@ LoopTree* LoopFinder::BuildLoopTree(Graph* graph, Zone* zone) {
return loop_tree;
}
Node* LoopTree::HeaderNode(Loop* loop) {
Node* first = *HeaderNodes(loop).begin();
if (first->opcode() == IrOpcode::kLoop) return first;
DCHECK(IrOpcode::IsPhiOpcode(first->opcode()));
Node* header = NodeProperties::GetControlInput(first);
DCHECK_EQ(IrOpcode::kLoop, header->opcode());
return header;
}
} // namespace compiler
} // namespace internal
} // namespace v8

View File

@ -89,6 +89,9 @@ class LoopTree : public ZoneObject {
&loop_nodes_[0] + loop->body_start_);
}
// Return the header control node for a loop.
Node* HeaderNode(Loop* loop);
// Return a range which can iterate over the body nodes of {loop}.
NodeRange BodyNodes(Loop* loop) {
return NodeRange(&loop_nodes_[0] + loop->body_start_,

View File

@ -3,10 +3,12 @@
// found in the LICENSE file.
#include "src/compiler.h"
#include "src/compiler/all-nodes.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/control-reducer.h"
#include "src/compiler/frame.h"
#include "src/compiler/graph.h"
#include "src/compiler/graph-visualizer.h"
#include "src/compiler/js-graph.h"
#include "src/compiler/loop-analysis.h"
#include "src/compiler/node.h"
@ -24,6 +26,154 @@ OsrHelper::OsrHelper(CompilationInfo* info)
info->osr_expr_stack_height()) {}
// Peel outer loops and rewire the graph so that control reduction can
// produce a properly formed graph.
static void PeelOuterLoopsForOsr(Graph* graph, CommonOperatorBuilder* common,
Zone* tmp_zone, Node* dead,
LoopTree* loop_tree, LoopTree::Loop* osr_loop,
Node* osr_normal_entry, Node* osr_loop_entry) {
const int original_count = graph->NodeCount();
AllNodes all(tmp_zone, graph);
NodeVector tmp_inputs(tmp_zone);
Node* sentinel = graph->NewNode(dead->op());
// Make a copy of the graph for each outer loop.
ZoneVector<NodeVector*> copies(tmp_zone);
for (LoopTree::Loop* loop = osr_loop->parent(); loop; loop = loop->parent()) {
void* stuff = tmp_zone->New(sizeof(NodeVector));
NodeVector* mapping =
new (stuff) NodeVector(original_count, sentinel, tmp_zone);
copies.push_back(mapping);
// Prepare the mapping for OSR values and the OSR loop entry.
mapping->at(osr_normal_entry->id()) = dead;
mapping->at(osr_loop_entry->id()) = dead;
// Don't duplicate the OSR values.
for (Node* use : osr_loop_entry->uses()) {
if (use->opcode() == IrOpcode::kOsrValue) mapping->at(use->id()) = use;
}
// The outer loops are dead in this copy.
for (LoopTree::Loop* outer = loop->parent(); outer;
outer = outer->parent()) {
for (Node* node : loop_tree->HeaderNodes(outer)) {
mapping->at(node->id()) = dead;
}
}
// Copy all nodes.
for (size_t i = 0; i < all.live.size(); i++) {
Node* orig = all.live[i];
Node* copy = mapping->at(orig->id());
if (copy != sentinel) {
// Mapping already exists.
continue;
}
if (orig->InputCount() == 0) {
// No need to copy leaf nodes.
mapping->at(orig->id()) = orig;
continue;
}
// Copy the node.
tmp_inputs.clear();
for (Node* input : orig->inputs()) {
tmp_inputs.push_back(mapping->at(input->id()));
}
copy = graph->NewNode(orig->op(), orig->InputCount(), &tmp_inputs[0]);
if (NodeProperties::IsTyped(orig)) {
NodeProperties::SetBounds(copy, NodeProperties::GetBounds(orig));
}
mapping->at(orig->id()) = copy;
}
// Fix missing inputs.
for (size_t i = 0; i < all.live.size(); i++) {
Node* orig = all.live[i];
Node* copy = mapping->at(orig->id());
for (int j = 0; j < copy->InputCount(); j++) {
Node* input = copy->InputAt(j);
if (input == sentinel)
copy->ReplaceInput(j, mapping->at(orig->InputAt(j)->id()));
}
}
// Construct the transfer from the previous graph copies to the new copy.
Node* loop_header = loop_tree->HeaderNode(loop);
NodeVector* previous =
copies.size() > 1 ? copies[copies.size() - 2] : nullptr;
const int backedges = loop_header->op()->ControlInputCount() - 1;
if (backedges == 1) {
// Simple case. Map the incoming edges to the loop to the previous copy.
for (Node* node : loop_tree->HeaderNodes(loop)) {
Node* copy = mapping->at(node->id());
Node* backedge = node->InputAt(1);
if (previous) backedge = previous->at(backedge->id());
copy->ReplaceInput(0, backedge);
}
} else {
// Complex case. Multiple backedges. Introduce a merge for incoming edges.
tmp_inputs.clear();
for (int i = 0; i < backedges; i++) {
Node* backedge = loop_header->InputAt(i + 1);
if (previous) backedge = previous->at(backedge->id());
tmp_inputs.push_back(backedge);
}
Node* merge =
graph->NewNode(common->Merge(backedges), backedges, &tmp_inputs[0]);
for (Node* node : loop_tree->HeaderNodes(loop)) {
Node* copy = mapping->at(node->id());
if (node == loop_header) {
// The entry to the loop is the merge.
copy->ReplaceInput(0, merge);
} else {
// Merge inputs to the phi at the loop entry.
tmp_inputs.clear();
for (int i = 0; i < backedges; i++) {
Node* backedge = node->InputAt(i + 1);
if (previous) backedge = previous->at(backedge->id());
tmp_inputs.push_back(backedge);
}
tmp_inputs.push_back(merge);
Node* phi =
graph->NewNode(common->ResizeMergeOrPhi(node->op(), backedges),
backedges + 1, &tmp_inputs[0]);
copy->ReplaceInput(0, phi);
}
}
}
}
// Kill the outer loops in the original graph.
for (LoopTree::Loop* outer = osr_loop->parent(); outer;
outer = outer->parent()) {
loop_tree->HeaderNode(outer)->ReplaceUses(dead);
}
// Merge the ends of the graph copies.
Node* end = graph->end();
tmp_inputs.clear();
for (int i = -1; i < static_cast<int>(copies.size()); i++) {
Node* input = end->InputAt(0);
if (i >= 0) input = copies[i]->at(input->id());
if (input->opcode() == IrOpcode::kMerge) {
for (Node* node : input->inputs()) tmp_inputs.push_back(node);
} else {
tmp_inputs.push_back(input);
}
}
int count = static_cast<int>(tmp_inputs.size());
Node* merge = graph->NewNode(common->Merge(count), count, &tmp_inputs[0]);
end->ReplaceInput(0, merge);
if (FLAG_trace_turbo_graph) { // Simple textual RPO.
OFStream os(stdout);
os << "-- Graph after OSR duplication -- " << std::endl;
os << AsRPO(*graph);
}
}
bool OsrHelper::Deconstruct(JSGraph* jsgraph, CommonOperatorBuilder* common,
Zone* tmp_zone) {
Graph* graph = jsgraph->graph();
@ -57,14 +207,16 @@ bool OsrHelper::Deconstruct(JSGraph* jsgraph, CommonOperatorBuilder* common,
// Analyze the graph to determine how deeply nested the OSR loop is.
LoopTree* loop_tree = LoopFinder::BuildLoopTree(graph, tmp_zone);
Node* dead = graph->NewNode(common->Dead());
LoopTree::Loop* loop = loop_tree->ContainingLoop(osr_loop);
if (loop->depth() > 0) return false; // cannot OSR inner loops yet.
// TODO(titzer): perform loop peeling or graph duplication.
if (loop->depth() > 0) {
PeelOuterLoopsForOsr(graph, common, tmp_zone, dead, loop_tree, loop,
osr_normal_entry, osr_loop_entry);
}
// Replace the normal entry with {Dead} and the loop entry with {Start}
// and run the control reducer to clean up the graph.
osr_normal_entry->ReplaceUses(graph->NewNode(common->Dead()));
osr_normal_entry->ReplaceUses(dead);
osr_loop_entry->ReplaceUses(graph->start());
ControlReducer::ReduceGraph(tmp_zone, jsgraph, common);

View File

@ -755,8 +755,8 @@ struct PrintGraphPhase {
fclose(json_file);
}
OFStream os(stdout);
if (FLAG_trace_turbo_graph) { // Simple textual RPO.
OFStream os(stdout);
os << "-- Graph after " << phase << " -- " << std::endl;
os << AsRPO(*graph);
}

View File

@ -49,8 +49,8 @@ class OsrDeconstructorTester : public HandleAndZoneScope {
start(graph.NewNode(common.Start(1))),
p0(graph.NewNode(common.Parameter(0), start)),
end(graph.NewNode(common.End(), start)),
osr_normal_entry(graph.NewNode(common.OsrNormalEntry(), start)),
osr_loop_entry(graph.NewNode(common.OsrLoopEntry(), start)),
osr_normal_entry(graph.NewNode(common.OsrNormalEntry(), start, start)),
osr_loop_entry(graph.NewNode(common.OsrLoopEntry(), start, start)),
self(graph.NewNode(common.Int32Constant(0xaabbccdd))) {
CHECK(num_values <= kMaxOsrValues);
graph.SetStart(start);
@ -90,20 +90,28 @@ class OsrDeconstructorTester : public HandleAndZoneScope {
return graph.NewNode(common.Phi(kMachAnyTagged, count), count + 1, inputs);
}
Node* NewOsrLoop(int num_backedges, Node* entry = NULL) {
Node* NewLoop(bool is_osr, int num_backedges, Node* entry = NULL) {
CHECK_LT(num_backedges, 4);
CHECK_GE(num_backedges, 0);
int count = 2 + num_backedges;
int count = 1 + num_backedges;
if (entry == NULL) entry = osr_normal_entry;
Node* inputs[5] = {entry, osr_loop_entry, self, self, self};
Node* inputs[5] = {entry, self, self, self, self};
if (is_osr) {
count = 2 + num_backedges;
inputs[1] = osr_loop_entry;
}
Node* loop = graph.NewNode(common.Loop(count), count, inputs);
for (int i = 0; i < num_backedges; i++) {
loop->ReplaceInput(2 + i, loop);
for (int i = 0; i < loop->InputCount(); i++) {
if (loop->InputAt(i) == self) loop->ReplaceInput(i, loop);
}
return loop;
}
Node* NewOsrLoop(int num_backedges, Node* entry = NULL) {
return NewLoop(true, num_backedges, entry);
}
};
@ -272,3 +280,178 @@ TEST(Deconstruct_osr_with_body3) {
T.jsgraph.ZeroConstant(), loop);
CheckInputs(ret, osr_phi, T.start, if_false2);
}
struct While {
OsrDeconstructorTester& t;
Node* branch;
Node* if_true;
Node* exit;
Node* loop;
While(OsrDeconstructorTester& R, Node* cond, bool is_osr, int backedges = 1)
: t(R) {
loop = t.NewLoop(is_osr, backedges);
branch = t.graph.NewNode(t.common.Branch(), cond, loop);
if_true = t.graph.NewNode(t.common.IfTrue(), branch);
exit = t.graph.NewNode(t.common.IfFalse(), branch);
loop->ReplaceInput(loop->InputCount() - 1, if_true);
}
void Nest(While& that) {
that.loop->ReplaceInput(that.loop->InputCount() - 1, exit);
this->loop->ReplaceInput(0, that.if_true);
}
Node* Phi(Node* i1, Node* i2, Node* i3) {
if (loop->InputCount() == 2) {
return t.graph.NewNode(t.common.Phi(kMachAnyTagged, 2), i1, i2, loop);
} else {
return t.graph.NewNode(t.common.Phi(kMachAnyTagged, 3), i1, i2, i3, loop);
}
}
};
static Node* FindSuccessor(Node* node, IrOpcode::Value opcode) {
for (Node* use : node->uses()) {
if (use->opcode() == opcode) return use;
}
UNREACHABLE(); // should have been found.
return nullptr;
}
TEST(Deconstruct_osr_nested1) {
OsrDeconstructorTester T(1);
While outer(T, T.p0, false);
While inner(T, T.p0, true);
inner.Nest(outer);
Node* outer_phi = outer.Phi(T.p0, T.p0, nullptr);
outer.branch->ReplaceInput(0, outer_phi);
Node* osr_phi = inner.Phi(T.jsgraph.OneConstant(), T.osr_values[0],
T.jsgraph.ZeroConstant());
inner.branch->ReplaceInput(0, osr_phi);
outer_phi->ReplaceInput(1, osr_phi);
Node* ret =
T.graph.NewNode(T.common.Return(), outer_phi, T.start, outer.exit);
Node* end = T.graph.NewNode(T.common.End(), ret);
T.graph.SetEnd(end);
OsrHelper helper(0, 0);
helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone());
// Check structure of deconstructed graph.
// Check inner OSR loop is directly connected to start.
CheckInputs(inner.loop, T.start, inner.if_true);
CheckInputs(osr_phi, T.osr_values[0], T.jsgraph.ZeroConstant(), inner.loop);
// Check control transfer to copy of outer loop.
Node* new_outer_loop = FindSuccessor(inner.exit, IrOpcode::kLoop);
Node* new_outer_phi = FindSuccessor(new_outer_loop, IrOpcode::kPhi);
CHECK_NE(new_outer_loop, outer.loop);
CHECK_NE(new_outer_phi, outer_phi);
CheckInputs(new_outer_loop, inner.exit, new_outer_loop->InputAt(1));
// Check structure of outer loop.
Node* new_outer_branch = FindSuccessor(new_outer_loop, IrOpcode::kBranch);
CHECK_NE(new_outer_branch, outer.branch);
CheckInputs(new_outer_branch, new_outer_phi, new_outer_loop);
Node* new_outer_exit = FindSuccessor(new_outer_branch, IrOpcode::kIfFalse);
Node* new_outer_if_true = FindSuccessor(new_outer_branch, IrOpcode::kIfTrue);
// Check structure of return.
end = T.graph.end();
Node* new_ret = end->InputAt(0);
CHECK_EQ(IrOpcode::kReturn, new_ret->opcode());
CheckInputs(new_ret, new_outer_phi, T.start, new_outer_exit);
// Check structure of inner loop.
Node* new_inner_loop = FindSuccessor(new_outer_if_true, IrOpcode::kLoop);
Node* new_inner_phi = FindSuccessor(new_inner_loop, IrOpcode::kPhi);
CheckInputs(new_inner_phi, T.jsgraph.OneConstant(), T.jsgraph.ZeroConstant(),
new_inner_loop);
CheckInputs(new_outer_phi, osr_phi, new_inner_phi, new_outer_loop);
}
TEST(Deconstruct_osr_nested2) {
OsrDeconstructorTester T(1);
// Test multiple backedge outer loop.
While outer(T, T.p0, false, 2);
While inner(T, T.p0, true);
inner.Nest(outer);
Node* outer_phi = outer.Phi(T.p0, T.p0, T.p0);
outer.branch->ReplaceInput(0, outer_phi);
Node* osr_phi = inner.Phi(T.jsgraph.OneConstant(), T.osr_values[0],
T.jsgraph.ZeroConstant());
inner.branch->ReplaceInput(0, osr_phi);
outer_phi->ReplaceInput(1, osr_phi);
outer_phi->ReplaceInput(2, T.jsgraph.ZeroConstant());
Node* x_branch = T.graph.NewNode(T.common.Branch(), osr_phi, inner.exit);
Node* x_true = T.graph.NewNode(T.common.IfTrue(), x_branch);
Node* x_false = T.graph.NewNode(T.common.IfFalse(), x_branch);
outer.loop->ReplaceInput(1, x_true);
outer.loop->ReplaceInput(2, x_false);
Node* ret =
T.graph.NewNode(T.common.Return(), outer_phi, T.start, outer.exit);
Node* end = T.graph.NewNode(T.common.End(), ret);
T.graph.SetEnd(end);
OsrHelper helper(0, 0);
helper.Deconstruct(&T.jsgraph, &T.common, T.main_zone());
// Check structure of deconstructed graph.
// Check inner OSR loop is directly connected to start.
CheckInputs(inner.loop, T.start, inner.if_true);
CheckInputs(osr_phi, T.osr_values[0], T.jsgraph.ZeroConstant(), inner.loop);
// Check control transfer to copy of outer loop.
Node* new_merge = FindSuccessor(x_true, IrOpcode::kMerge);
CHECK_EQ(new_merge, FindSuccessor(x_false, IrOpcode::kMerge));
CheckInputs(new_merge, x_true, x_false);
Node* new_outer_loop = FindSuccessor(new_merge, IrOpcode::kLoop);
Node* new_outer_phi = FindSuccessor(new_outer_loop, IrOpcode::kPhi);
CHECK_NE(new_outer_loop, outer.loop);
CHECK_NE(new_outer_phi, outer_phi);
Node* new_entry_phi = FindSuccessor(new_merge, IrOpcode::kPhi);
CheckInputs(new_entry_phi, osr_phi, T.jsgraph.ZeroConstant(), new_merge);
CHECK_EQ(new_merge, new_outer_loop->InputAt(0));
// Check structure of outer loop.
Node* new_outer_branch = FindSuccessor(new_outer_loop, IrOpcode::kBranch);
CHECK_NE(new_outer_branch, outer.branch);
CheckInputs(new_outer_branch, new_outer_phi, new_outer_loop);
Node* new_outer_exit = FindSuccessor(new_outer_branch, IrOpcode::kIfFalse);
Node* new_outer_if_true = FindSuccessor(new_outer_branch, IrOpcode::kIfTrue);
// Check structure of return.
end = T.graph.end();
Node* new_ret = end->InputAt(0);
CHECK_EQ(IrOpcode::kReturn, new_ret->opcode());
CheckInputs(new_ret, new_outer_phi, T.start, new_outer_exit);
// Check structure of inner loop.
Node* new_inner_loop = FindSuccessor(new_outer_if_true, IrOpcode::kLoop);
Node* new_inner_phi = FindSuccessor(new_inner_loop, IrOpcode::kPhi);
CheckInputs(new_inner_phi, T.jsgraph.OneConstant(), T.jsgraph.ZeroConstant(),
new_inner_loop);
CheckInputs(new_outer_phi, new_entry_phi, new_inner_phi,
T.jsgraph.ZeroConstant(), new_outer_loop);
}

View File

@ -0,0 +1,31 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --use-osr --turbo-osr
function foo(a) {
var i = a | 0;
while (true) {
if (i == 0) { i = 1; continue; }
if (i == 1) { i = 2; continue; }
if (i == 2) { i = 3; continue; }
if (i == 3) { i = 4; continue; }
if (i == 4) { i = 5; continue; }
if (i == 5) { i = 6; continue; }
if (i == 6) { i = 7; continue; }
if (i == 7) { i = 8; continue; }
for (var j = 0; j < 10; j++) { if (i == 5) %OptimizeOsr(); }
break;
}
return j;
}
function test(func, tv, fv) {
assertEquals(tv, func(0));
assertEquals(tv, func(0));
assertEquals(fv, func(9));
assertEquals(fv, func(9));
}
test(foo, 10, 10);

View File

@ -0,0 +1,52 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --use-osr --turbo-deoptimization
function bar(goal) {
var count = 0;
var sum = 11;
var i = 35;
while (i-- > 33) {
if (count++ == goal) %OptimizeOsr();
sum = sum + i;
}
while (i-- > 31) {
if (count++ == goal) %OptimizeOsr();
j = 9;
while (j-- > 7) {
if (count++ == goal) %OptimizeOsr();
sum = sum + j * 3;
}
while (j-- > 5) {
if (count++ == goal) %OptimizeOsr();
sum = sum + j * 5;
}
}
while (i-- > 29) {
if (count++ == goal) %OptimizeOsr();
while (j-- > 3) {
var k = 10;
if (count++ == goal) %OptimizeOsr();
while (k-- > 8) {
if (count++ == goal) %OptimizeOsr();
sum = sum + k * 11;
}
}
while (j-- > 1) {
if (count++ == goal) %OptimizeOsr();
while (k-- > 6) {
if (count++ == goal) %OptimizeOsr();
sum = sum + j * 13;
}
}
}
print(count);
return sum;
}
for (var i = 0; i < 13; i++) {
%DeoptimizeFunction(bar);
assertEquals(348, bar(i));
}

View File

@ -0,0 +1,63 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --use-osr --turbo-deoptimization
function bar() {
var sum = 11;
var i = 35;
while (i-- > 31) {
LOOP1();
j = 9;
while (j-- > 7) {
LOOP2();
sum = sum + j * 5;
var k = 7;
while (k-- > 5) {
LOOP3();
sum = sum + j * 5;
}
}
}
while (i-- > 29) {
LOOP4();
while (j-- > 3) {
LOOP5();
var k = 10;
while (k-- > 8) {
LOOP6();
sum = sum + k * 11;
}
}
while (j-- > 1) {
LOOP7();
var k = 8;
while (k-- > 6) {
LOOP8();
var m = 9;
while (m-- > 6) {
LOOP9();
sum = sum + k * 13;
}
}
}
}
return sum;
}
function gen(i) {
var body = bar.toString();
body = body.replace(new RegExp("bar"), "bar" + i);
for (var j = 1; j < 10; j++) {
var r = new RegExp("LOOP" + j + "\\(\\);");
if (i == j) body = body.replace(r, "%OptimizeOsr();");
else body = body.replace(r, "");
}
return eval("(" + body + ")");
}
for (var i = 1; i < 10; i++) {
var f = gen(i);
assertEquals(1979, f());
}

View File

@ -14,7 +14,6 @@ function f() {
sum += z;
if (i == 21) %OptimizeOsr();
}
if (true) break;
}
return sum;
}

View File

@ -0,0 +1,25 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --use-osr --turbo-osr
function f() {
var sum = 0;
for (var i = 5; i < 6; i++) {
for (var j = 0; j < 1000; j++) {
var x = i + 2;
var y = x + 5;
var z = y + 3;
sum += z;
if (i == 25) %OptimizeOsr();
}
if (true) break;
}
return sum;
}
assertEquals(15000, f());
assertEquals(15000, f());
assertEquals(15000, f());

View File

@ -15,9 +15,7 @@ function f() {
sum += z;
if (i == 19) %OptimizeOsr();
}
if (true) break;
}
if (true) break;
}
return sum;
}

View File

@ -0,0 +1,28 @@
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --use-osr --turbo-osr
function f() {
var sum = 0;
for (var m = 99; m < 100; m++) {
for (var i = 5; i < 6; i++) {
for (var j = 0; j < 1000; j++) {
var x = i + 2;
var y = x + 5;
var z = y + 3;
sum += z;
if (i == 25) %OptimizeOsr();
}
if (true) break;
}
if (true) break;
}
return sum;
}
assertEquals(15000, f());
assertEquals(15000, f());
assertEquals(15000, f());