[turboshaft] Port SelectLowering to turboshaft

Drive-by fix: reorder members of RandomAccessStackDominatorNode to
save 8 bytes on the total size of Block.

Bug: v8:12783
Change-Id: I4923490b0d2f4de22ea001eeba44c950c6451633
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3893853
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Darius Mercadier <dmercadier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#83679}
This commit is contained in:
Darius M 2022-10-13 12:07:33 +02:00 committed by V8 LUCI CQ
parent 36366b8d3e
commit 73b070b74f
11 changed files with 194 additions and 67 deletions

View File

@ -2897,6 +2897,7 @@ filegroup(
"src/compiler/turboshaft/recreate-schedule.h", "src/compiler/turboshaft/recreate-schedule.h",
"src/compiler/turboshaft/representations.cc", "src/compiler/turboshaft/representations.cc",
"src/compiler/turboshaft/representations.h", "src/compiler/turboshaft/representations.h",
"src/compiler/turboshaft/select-lowering-assembler.h",
"src/compiler/turboshaft/sidetable.h", "src/compiler/turboshaft/sidetable.h",
"src/compiler/turboshaft/simplify-tf-loops.cc", "src/compiler/turboshaft/simplify-tf-loops.cc",
"src/compiler/turboshaft/simplify-tf-loops.h", "src/compiler/turboshaft/simplify-tf-loops.h",

View File

@ -2927,6 +2927,7 @@ v8_header_set("v8_internal_headers") {
"src/compiler/turboshaft/optimization-phase.h", "src/compiler/turboshaft/optimization-phase.h",
"src/compiler/turboshaft/recreate-schedule.h", "src/compiler/turboshaft/recreate-schedule.h",
"src/compiler/turboshaft/representations.h", "src/compiler/turboshaft/representations.h",
"src/compiler/turboshaft/select-lowering-assembler.h",
"src/compiler/turboshaft/sidetable.h", "src/compiler/turboshaft/sidetable.h",
"src/compiler/turboshaft/simplify-tf-loops.h", "src/compiler/turboshaft/simplify-tf-loops.h",
"src/compiler/turboshaft/utils.h", "src/compiler/turboshaft/utils.h",

View File

@ -86,6 +86,7 @@
#include "src/compiler/turboshaft/machine-optimization-assembler.h" #include "src/compiler/turboshaft/machine-optimization-assembler.h"
#include "src/compiler/turboshaft/optimization-phase.h" #include "src/compiler/turboshaft/optimization-phase.h"
#include "src/compiler/turboshaft/recreate-schedule.h" #include "src/compiler/turboshaft/recreate-schedule.h"
#include "src/compiler/turboshaft/select-lowering-assembler.h"
#include "src/compiler/turboshaft/simplify-tf-loops.h" #include "src/compiler/turboshaft/simplify-tf-loops.h"
#include "src/compiler/turboshaft/value-numbering-assembler.h" #include "src/compiler/turboshaft/value-numbering-assembler.h"
#include "src/compiler/type-narrowing-reducer.h" #include "src/compiler/type-narrowing-reducer.h"
@ -1959,30 +1960,43 @@ struct LateOptimizationPhase {
DECL_PIPELINE_PHASE_CONSTANTS(LateOptimization) DECL_PIPELINE_PHASE_CONSTANTS(LateOptimization)
void Run(PipelineData* data, Zone* temp_zone) { void Run(PipelineData* data, Zone* temp_zone) {
GraphReducer graph_reducer( if (data->HasTurboshaftGraph()) {
temp_zone, data->graph(), &data->info()->tick_counter(), data->broker(), // TODO(dmercadier,tebbi): add missing assemblers (LateEscapeAnalysis,
data->jsgraph()->Dead(), data->observe_node_manager()); // BranchElimination, MachineOperatorReducer, CommonOperatorReducer).
LateEscapeAnalysis escape_analysis(&graph_reducer, data->graph(), turboshaft::OptimizationPhase<turboshaft::LivenessAnalyzer,
data->common(), temp_zone); turboshaft::SelectLoweringAssembler<
BranchElimination branch_condition_elimination(&graph_reducer, turboshaft::ValueNumberingAssembler>>::
data->jsgraph(), temp_zone); Run(&data->turboshaft_graph(), temp_zone, data->node_origins(),
DeadCodeElimination dead_code_elimination(&graph_reducer, data->graph(), turboshaft::VisitOrder::kDominator);
data->common(), temp_zone); } else {
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone()); GraphReducer graph_reducer(temp_zone, data->graph(),
MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph()); &data->info()->tick_counter(), data->broker(),
CommonOperatorReducer common_reducer( data->jsgraph()->Dead(),
&graph_reducer, data->graph(), data->broker(), data->common(), data->observe_node_manager());
data->machine(), temp_zone, BranchSemantics::kMachine); LateEscapeAnalysis escape_analysis(&graph_reducer, data->graph(),
JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone); data->common(), temp_zone);
SelectLowering select_lowering(&graph_assembler, data->graph()); BranchElimination branch_condition_elimination(
AddReducer(data, &graph_reducer, &escape_analysis); &graph_reducer, data->jsgraph(), temp_zone);
AddReducer(data, &graph_reducer, &branch_condition_elimination); DeadCodeElimination dead_code_elimination(&graph_reducer, data->graph(),
AddReducer(data, &graph_reducer, &dead_code_elimination); data->common(), temp_zone);
AddReducer(data, &graph_reducer, &machine_reducer); ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
AddReducer(data, &graph_reducer, &common_reducer); MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph());
AddReducer(data, &graph_reducer, &select_lowering); CommonOperatorReducer common_reducer(
AddReducer(data, &graph_reducer, &value_numbering); &graph_reducer, data->graph(), data->broker(), data->common(),
graph_reducer.ReduceGraph(); data->machine(), temp_zone, BranchSemantics::kMachine);
JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone);
SelectLowering select_lowering(&graph_assembler, data->graph());
AddReducer(data, &graph_reducer, &escape_analysis);
AddReducer(data, &graph_reducer, &branch_condition_elimination);
AddReducer(data, &graph_reducer, &dead_code_elimination);
AddReducer(data, &graph_reducer, &machine_reducer);
AddReducer(data, &graph_reducer, &common_reducer);
if (!FLAG_turboshaft) {
AddReducer(data, &graph_reducer, &select_lowering);
}
AddReducer(data, &graph_reducer, &value_numbering);
graph_reducer.ReduceGraph();
}
} }
}; };
@ -3005,6 +3019,8 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
} }
Run<PrintTurboshaftGraphPhase>(BuildTurboshaftPhase::phase_name()); Run<PrintTurboshaftGraphPhase>(BuildTurboshaftPhase::phase_name());
Run<LateOptimizationPhase>();
Run<OptimizeTurboshaftPhase>(); Run<OptimizeTurboshaftPhase>();
Run<PrintTurboshaftGraphPhase>(OptimizeTurboshaftPhase::phase_name()); Run<PrintTurboshaftGraphPhase>(OptimizeTurboshaftPhase::phase_name());

View File

@ -391,15 +391,6 @@ class AssemblerInterface : public Superclass {
#undef DECL_SINGLE_REP_UNARY #undef DECL_SINGLE_REP_UNARY
#undef DECL_MULTI_REP_UNARY #undef DECL_MULTI_REP_UNARY
OpIndex Word32Select(OpIndex condition, OpIndex left, OpIndex right) {
return subclass().Select(condition, left, right,
WordRepresentation::Word32());
}
OpIndex Word64Select(OpIndex condition, OpIndex left, OpIndex right) {
return subclass().Select(condition, left, right,
WordRepresentation::Word64());
}
OpIndex Word32Constant(uint32_t value) { OpIndex Word32Constant(uint32_t value) {
return subclass().Constant(ConstantOp::Kind::kWord32, uint64_t{value}); return subclass().Constant(ConstantOp::Kind::kWord32, uint64_t{value});
} }

View File

@ -567,12 +567,26 @@ OpIndex GraphBuilder::Process(
RegisterRepresentation::PointerSized(), RegisterRepresentation::PointerSized(),
RegisterRepresentation::Tagged()); RegisterRepresentation::Tagged());
case IrOpcode::kSelect: {
OpIndex cond = Map(node->InputAt(0));
OpIndex vtrue = Map(node->InputAt(1));
OpIndex vfalse = Map(node->InputAt(2));
const SelectParameters& params = SelectParametersOf(op);
return assembler.Select(cond, vtrue, vfalse,
RegisterRepresentation::FromMachineRepresentation(
params.representation()),
params.hint(), SelectOp::Implementation::kBranch);
}
case IrOpcode::kWord32Select: case IrOpcode::kWord32Select:
return assembler.Word32Select( return assembler.Select(
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2))); Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)),
RegisterRepresentation::Word32(), BranchHint::kNone,
SelectOp::Implementation::kCMove);
case IrOpcode::kWord64Select: case IrOpcode::kWord64Select:
return assembler.Word64Select( return assembler.Select(
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2))); Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)),
RegisterRepresentation::Word64(), BranchHint::kNone,
SelectOp::Implementation::kCMove);
case IrOpcode::kLoad: case IrOpcode::kLoad:
case IrOpcode::kLoadImmutable: case IrOpcode::kLoadImmutable:

View File

@ -253,13 +253,14 @@ class RandomAccessStackDominatorNode
friend class Block; friend class Block;
#endif #endif
int len_ = 0;
Derived* nxt_ = nullptr;
Derived* jmp_ = nullptr;
// Myers' original datastructure requires to often check jmp_->len_, which is // Myers' original datastructure requires to often check jmp_->len_, which is
// not so great on modern computers (memory access, caches & co). To speed up // not so great on modern computers (memory access, caches & co). To speed up
// things a bit, we store here jmp_len_. // things a bit, we store here jmp_len_.
int jmp_len_ = 0; int jmp_len_ = 0;
int len_ = 0;
Derived* nxt_ = nullptr;
Derived* jmp_ = nullptr;
}; };
// A basic block // A basic block
@ -479,15 +480,17 @@ class Graph {
V8_INLINE bool Add(Block* block) { V8_INLINE bool Add(Block* block) {
DCHECK_EQ(block->graph_generation_, generation_); DCHECK_EQ(block->graph_generation_, generation_);
if (!bound_blocks_.empty() && !block->HasPredecessors()) return false; if (!bound_blocks_.empty() && !block->HasPredecessors()) return false;
bool deferred = true; if (!block->IsDeferred()) {
for (Block* pred = block->last_predecessor_; pred != nullptr; bool deferred = true;
pred = pred->neighboring_predecessor_) { for (Block* pred = block->last_predecessor_; pred != nullptr;
if (!pred->IsDeferred()) { pred = pred->neighboring_predecessor_) {
deferred = false; if (!pred->IsDeferred()) {
break; deferred = false;
break;
}
} }
block->SetDeferred(deferred);
} }
block->SetDeferred(deferred);
DCHECK(!block->begin_.valid()); DCHECK(!block->begin_.valid());
block->begin_ = next_operation_index(); block->begin_ = next_operation_index();
DCHECK_EQ(block->index_, BlockIndex::Invalid()); DCHECK_EQ(block->index_, BlockIndex::Invalid());

View File

@ -254,6 +254,15 @@ std::ostream& operator<<(std::ostream& os, Float64InsertWord32Op::Kind kind) {
} }
} }
std::ostream& operator<<(std::ostream& os, SelectOp::Implementation kind) {
switch (kind) {
case SelectOp::Implementation::kBranch:
return os << "Branch";
case SelectOp::Implementation::kCMove:
return os << "CMove";
}
}
std::ostream& operator<<(std::ostream& os, FrameConstantOp::Kind kind) { std::ostream& operator<<(std::ostream& os, FrameConstantOp::Kind kind) {
switch (kind) { switch (kind) {
case FrameConstantOp::Kind::kStackCheckOffset: case FrameConstantOp::Kind::kStackCheckOffset:

View File

@ -20,6 +20,7 @@
#include "src/base/vector.h" #include "src/base/vector.h"
#include "src/codegen/external-reference.h" #include "src/codegen/external-reference.h"
#include "src/common/globals.h" #include "src/common/globals.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/globals.h" #include "src/compiler/globals.h"
#include "src/compiler/turboshaft/fast-hash.h" #include "src/compiler/turboshaft/fast-hash.h"
#include "src/compiler/turboshaft/representations.h" #include "src/compiler/turboshaft/representations.h"
@ -1080,23 +1081,33 @@ struct TaggedBitcastOp : FixedArityOperationT<1, TaggedBitcastOp> {
}; };
struct SelectOp : FixedArityOperationT<3, SelectOp> { struct SelectOp : FixedArityOperationT<3, SelectOp> {
// TODO(12783): Support all register reps. enum class Implementation : uint8_t { kBranch, kCMove };
WordRepresentation rep;
static constexpr OpProperties properties = OpProperties::Pure(); static constexpr OpProperties properties = OpProperties::Pure();
RegisterRepresentation rep;
BranchHint hint;
Implementation implem;
OpIndex condition() const { return Base::input(0); } SelectOp(OpIndex cond, OpIndex vtrue, OpIndex vfalse,
OpIndex left() const { return Base::input(1); } RegisterRepresentation rep, BranchHint hint, Implementation implem)
OpIndex right() const { return Base::input(2); } : Base(cond, vtrue, vfalse), rep(rep), hint(hint), implem(implem) {
#ifdef DEBUG
SelectOp(OpIndex condition, OpIndex left, OpIndex right, if (implem == Implementation::kCMove) {
WordRepresentation rep) DCHECK((rep == RegisterRepresentation::Word32() &&
: Base(condition, left, right), rep(rep) { SupportedOperations::word32_select()) ||
DCHECK(rep == WordRepresentation::Word32() (rep == RegisterRepresentation::Word64() &&
? SupportedOperations::word32_select() SupportedOperations::word64_select()));
: SupportedOperations::word64_select()); }
#endif
} }
auto options() const { return std::tuple{rep}; }
OpIndex cond() const { return input(0); }
OpIndex vtrue() const { return input(1); }
OpIndex vfalse() const { return input(2); }
auto options() const { return std::tuple{rep, hint, implem}; }
}; };
std::ostream& operator<<(std::ostream& os, SelectOp::Implementation kind);
struct PhiOp : OperationT<PhiOp> { struct PhiOp : OperationT<PhiOp> {
RegisterRepresentation rep; RegisterRepresentation rep;

View File

@ -465,9 +465,9 @@ struct OptimizationPhase<Analyzer, Assembler>::Impl {
return assembler.TaggedBitcast(MapToNewGraph(op.input()), op.from, op.to); return assembler.TaggedBitcast(MapToNewGraph(op.input()), op.from, op.to);
} }
OpIndex ReduceSelect(const SelectOp& op) { OpIndex ReduceSelect(const SelectOp& op) {
return assembler.Select(MapToNewGraph(op.condition()), return assembler.Select(MapToNewGraph(op.cond()), MapToNewGraph(op.vtrue()),
MapToNewGraph(op.left()), MapToNewGraph(op.right()), MapToNewGraph(op.vfalse()), op.rep, op.hint,
op.rep); op.implem);
} }
OpIndex ReduceConstant(const ConstantOp& op) { OpIndex ReduceConstant(const ConstantOp& op) {
return assembler.Constant(op.kind, op.storage); return assembler.Constant(op.kind, op.storage);

View File

@ -835,13 +835,21 @@ Node* ScheduleBuilder::ProcessOperation(const TaggedBitcastOp& op) {
return AddNode(o, {GetNode(op.input())}); return AddNode(o, {GetNode(op.input())});
} }
Node* ScheduleBuilder::ProcessOperation(const SelectOp& op) { Node* ScheduleBuilder::ProcessOperation(const SelectOp& op) {
const Operator* o = op.rep == WordRepresentation::Word32() // If there is a Select, then it should only be one that is supported by the
// machine, and it should be meant to be implementation with cmove.
DCHECK_EQ(op.implem, SelectOp::Implementation::kCMove);
DCHECK((op.rep == RegisterRepresentation::Word32() &&
SupportedOperations::word32_select()) ||
(op.rep == RegisterRepresentation::Word64() &&
SupportedOperations::word64_select()));
const Operator* o = op.rep == RegisterRepresentation::Word32()
? machine.Word32Select().op() ? machine.Word32Select().op()
: machine.Word64Select().op(); : machine.Word64Select().op();
return AddNode(
o, {GetNode(op.condition()), GetNode(op.left()), GetNode(op.right())});
}
return AddNode(
o, {GetNode(op.cond()), GetNode(op.vtrue()), GetNode(op.vfalse())});
}
Node* ScheduleBuilder::ProcessOperation(const PendingLoopPhiOp& op) { Node* ScheduleBuilder::ProcessOperation(const PendingLoopPhiOp& op) {
UNREACHABLE(); UNREACHABLE();
} }

View File

@ -0,0 +1,73 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_
#define V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_
#include "src/base/vector.h"
#include "src/compiler/common-operator.h"
#include "src/compiler/turboshaft/assembler.h"
#include "src/compiler/turboshaft/operations.h"
namespace v8::internal::compiler::turboshaft {
// Lowers Select operations to diamonds.
//
// A Select is conceptually somewhat similar to a ternary if:
//
// res = Select(cond, val_true, val_false)
//
// means:
//
// res = cond ? val_true : val_false
//
// SelectLoweringAssembler lowers such operations into:
//
// if (cond) {
// res = val_true
// } else {
// res = val_false
// }
template <class Base>
class SelectLoweringAssembler
: public AssemblerInterface<SelectLoweringAssembler<Base>, Base> {
public:
SelectLoweringAssembler(Graph* graph, Zone* phase_zone)
: AssemblerInterface<SelectLoweringAssembler, Base>(graph, phase_zone) {}
OpIndex Select(OpIndex cond, OpIndex vtrue, OpIndex vfalse,
RegisterRepresentation rep, BranchHint hint,
SelectOp::Implementation implem) {
if (implem == SelectOp::Implementation::kCMove) {
// We do not lower Select operations that should be implemented with
// CMove.
return Base::Select(cond, vtrue, vfalse, rep, hint, implem);
}
Block* true_block = this->NewBlock(Block::Kind::kBranchTarget);
Block* false_block = this->NewBlock(Block::Kind::kBranchTarget);
Block* merge_block = this->NewBlock(Block::Kind::kMerge);
if (hint == BranchHint::kTrue) {
false_block->SetDeferred(true);
} else if (hint == BranchHint::kFalse) {
true_block->SetDeferred(true);
}
this->Branch(cond, true_block, false_block);
this->Bind(true_block);
this->Goto(merge_block);
this->Bind(false_block);
this->Goto(merge_block);
this->Bind(merge_block);
return this->Phi(base::VectorOf({vtrue, vfalse}), rep);
}
};
} // namespace v8::internal::compiler::turboshaft
#endif // V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_