[turboshaft] Port SelectLowering to turboshaft
Drive-by fix: reorder members of RandomAccessStackDominatorNode to save 8 bytes on the total size of Block. Bug: v8:12783 Change-Id: I4923490b0d2f4de22ea001eeba44c950c6451633 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3893853 Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Commit-Queue: Darius Mercadier <dmercadier@chromium.org> Cr-Commit-Position: refs/heads/main@{#83679}
This commit is contained in:
parent
36366b8d3e
commit
73b070b74f
@ -2897,6 +2897,7 @@ filegroup(
|
||||
"src/compiler/turboshaft/recreate-schedule.h",
|
||||
"src/compiler/turboshaft/representations.cc",
|
||||
"src/compiler/turboshaft/representations.h",
|
||||
"src/compiler/turboshaft/select-lowering-assembler.h",
|
||||
"src/compiler/turboshaft/sidetable.h",
|
||||
"src/compiler/turboshaft/simplify-tf-loops.cc",
|
||||
"src/compiler/turboshaft/simplify-tf-loops.h",
|
||||
|
1
BUILD.gn
1
BUILD.gn
@ -2927,6 +2927,7 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/compiler/turboshaft/optimization-phase.h",
|
||||
"src/compiler/turboshaft/recreate-schedule.h",
|
||||
"src/compiler/turboshaft/representations.h",
|
||||
"src/compiler/turboshaft/select-lowering-assembler.h",
|
||||
"src/compiler/turboshaft/sidetable.h",
|
||||
"src/compiler/turboshaft/simplify-tf-loops.h",
|
||||
"src/compiler/turboshaft/utils.h",
|
||||
|
@ -86,6 +86,7 @@
|
||||
#include "src/compiler/turboshaft/machine-optimization-assembler.h"
|
||||
#include "src/compiler/turboshaft/optimization-phase.h"
|
||||
#include "src/compiler/turboshaft/recreate-schedule.h"
|
||||
#include "src/compiler/turboshaft/select-lowering-assembler.h"
|
||||
#include "src/compiler/turboshaft/simplify-tf-loops.h"
|
||||
#include "src/compiler/turboshaft/value-numbering-assembler.h"
|
||||
#include "src/compiler/type-narrowing-reducer.h"
|
||||
@ -1959,30 +1960,43 @@ struct LateOptimizationPhase {
|
||||
DECL_PIPELINE_PHASE_CONSTANTS(LateOptimization)
|
||||
|
||||
void Run(PipelineData* data, Zone* temp_zone) {
|
||||
GraphReducer graph_reducer(
|
||||
temp_zone, data->graph(), &data->info()->tick_counter(), data->broker(),
|
||||
data->jsgraph()->Dead(), data->observe_node_manager());
|
||||
LateEscapeAnalysis escape_analysis(&graph_reducer, data->graph(),
|
||||
data->common(), temp_zone);
|
||||
BranchElimination branch_condition_elimination(&graph_reducer,
|
||||
data->jsgraph(), temp_zone);
|
||||
DeadCodeElimination dead_code_elimination(&graph_reducer, data->graph(),
|
||||
data->common(), temp_zone);
|
||||
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
|
||||
MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph());
|
||||
CommonOperatorReducer common_reducer(
|
||||
&graph_reducer, data->graph(), data->broker(), data->common(),
|
||||
data->machine(), temp_zone, BranchSemantics::kMachine);
|
||||
JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone);
|
||||
SelectLowering select_lowering(&graph_assembler, data->graph());
|
||||
AddReducer(data, &graph_reducer, &escape_analysis);
|
||||
AddReducer(data, &graph_reducer, &branch_condition_elimination);
|
||||
AddReducer(data, &graph_reducer, &dead_code_elimination);
|
||||
AddReducer(data, &graph_reducer, &machine_reducer);
|
||||
AddReducer(data, &graph_reducer, &common_reducer);
|
||||
AddReducer(data, &graph_reducer, &select_lowering);
|
||||
AddReducer(data, &graph_reducer, &value_numbering);
|
||||
graph_reducer.ReduceGraph();
|
||||
if (data->HasTurboshaftGraph()) {
|
||||
// TODO(dmercadier,tebbi): add missing assemblers (LateEscapeAnalysis,
|
||||
// BranchElimination, MachineOperatorReducer, CommonOperatorReducer).
|
||||
turboshaft::OptimizationPhase<turboshaft::LivenessAnalyzer,
|
||||
turboshaft::SelectLoweringAssembler<
|
||||
turboshaft::ValueNumberingAssembler>>::
|
||||
Run(&data->turboshaft_graph(), temp_zone, data->node_origins(),
|
||||
turboshaft::VisitOrder::kDominator);
|
||||
} else {
|
||||
GraphReducer graph_reducer(temp_zone, data->graph(),
|
||||
&data->info()->tick_counter(), data->broker(),
|
||||
data->jsgraph()->Dead(),
|
||||
data->observe_node_manager());
|
||||
LateEscapeAnalysis escape_analysis(&graph_reducer, data->graph(),
|
||||
data->common(), temp_zone);
|
||||
BranchElimination branch_condition_elimination(
|
||||
&graph_reducer, data->jsgraph(), temp_zone);
|
||||
DeadCodeElimination dead_code_elimination(&graph_reducer, data->graph(),
|
||||
data->common(), temp_zone);
|
||||
ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone());
|
||||
MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph());
|
||||
CommonOperatorReducer common_reducer(
|
||||
&graph_reducer, data->graph(), data->broker(), data->common(),
|
||||
data->machine(), temp_zone, BranchSemantics::kMachine);
|
||||
JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone);
|
||||
SelectLowering select_lowering(&graph_assembler, data->graph());
|
||||
AddReducer(data, &graph_reducer, &escape_analysis);
|
||||
AddReducer(data, &graph_reducer, &branch_condition_elimination);
|
||||
AddReducer(data, &graph_reducer, &dead_code_elimination);
|
||||
AddReducer(data, &graph_reducer, &machine_reducer);
|
||||
AddReducer(data, &graph_reducer, &common_reducer);
|
||||
if (!FLAG_turboshaft) {
|
||||
AddReducer(data, &graph_reducer, &select_lowering);
|
||||
}
|
||||
AddReducer(data, &graph_reducer, &value_numbering);
|
||||
graph_reducer.ReduceGraph();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -3005,6 +3019,8 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
|
||||
}
|
||||
Run<PrintTurboshaftGraphPhase>(BuildTurboshaftPhase::phase_name());
|
||||
|
||||
Run<LateOptimizationPhase>();
|
||||
|
||||
Run<OptimizeTurboshaftPhase>();
|
||||
Run<PrintTurboshaftGraphPhase>(OptimizeTurboshaftPhase::phase_name());
|
||||
|
||||
|
@ -391,15 +391,6 @@ class AssemblerInterface : public Superclass {
|
||||
#undef DECL_SINGLE_REP_UNARY
|
||||
#undef DECL_MULTI_REP_UNARY
|
||||
|
||||
OpIndex Word32Select(OpIndex condition, OpIndex left, OpIndex right) {
|
||||
return subclass().Select(condition, left, right,
|
||||
WordRepresentation::Word32());
|
||||
}
|
||||
OpIndex Word64Select(OpIndex condition, OpIndex left, OpIndex right) {
|
||||
return subclass().Select(condition, left, right,
|
||||
WordRepresentation::Word64());
|
||||
}
|
||||
|
||||
OpIndex Word32Constant(uint32_t value) {
|
||||
return subclass().Constant(ConstantOp::Kind::kWord32, uint64_t{value});
|
||||
}
|
||||
|
@ -567,12 +567,26 @@ OpIndex GraphBuilder::Process(
|
||||
RegisterRepresentation::PointerSized(),
|
||||
RegisterRepresentation::Tagged());
|
||||
|
||||
case IrOpcode::kSelect: {
|
||||
OpIndex cond = Map(node->InputAt(0));
|
||||
OpIndex vtrue = Map(node->InputAt(1));
|
||||
OpIndex vfalse = Map(node->InputAt(2));
|
||||
const SelectParameters& params = SelectParametersOf(op);
|
||||
return assembler.Select(cond, vtrue, vfalse,
|
||||
RegisterRepresentation::FromMachineRepresentation(
|
||||
params.representation()),
|
||||
params.hint(), SelectOp::Implementation::kBranch);
|
||||
}
|
||||
case IrOpcode::kWord32Select:
|
||||
return assembler.Word32Select(
|
||||
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)));
|
||||
return assembler.Select(
|
||||
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)),
|
||||
RegisterRepresentation::Word32(), BranchHint::kNone,
|
||||
SelectOp::Implementation::kCMove);
|
||||
case IrOpcode::kWord64Select:
|
||||
return assembler.Word64Select(
|
||||
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)));
|
||||
return assembler.Select(
|
||||
Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)),
|
||||
RegisterRepresentation::Word64(), BranchHint::kNone,
|
||||
SelectOp::Implementation::kCMove);
|
||||
|
||||
case IrOpcode::kLoad:
|
||||
case IrOpcode::kLoadImmutable:
|
||||
|
@ -253,13 +253,14 @@ class RandomAccessStackDominatorNode
|
||||
friend class Block;
|
||||
#endif
|
||||
|
||||
int len_ = 0;
|
||||
Derived* nxt_ = nullptr;
|
||||
Derived* jmp_ = nullptr;
|
||||
// Myers' original datastructure requires to often check jmp_->len_, which is
|
||||
// not so great on modern computers (memory access, caches & co). To speed up
|
||||
// things a bit, we store here jmp_len_.
|
||||
int jmp_len_ = 0;
|
||||
|
||||
int len_ = 0;
|
||||
Derived* nxt_ = nullptr;
|
||||
Derived* jmp_ = nullptr;
|
||||
};
|
||||
|
||||
// A basic block
|
||||
@ -479,15 +480,17 @@ class Graph {
|
||||
V8_INLINE bool Add(Block* block) {
|
||||
DCHECK_EQ(block->graph_generation_, generation_);
|
||||
if (!bound_blocks_.empty() && !block->HasPredecessors()) return false;
|
||||
bool deferred = true;
|
||||
for (Block* pred = block->last_predecessor_; pred != nullptr;
|
||||
pred = pred->neighboring_predecessor_) {
|
||||
if (!pred->IsDeferred()) {
|
||||
deferred = false;
|
||||
break;
|
||||
if (!block->IsDeferred()) {
|
||||
bool deferred = true;
|
||||
for (Block* pred = block->last_predecessor_; pred != nullptr;
|
||||
pred = pred->neighboring_predecessor_) {
|
||||
if (!pred->IsDeferred()) {
|
||||
deferred = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
block->SetDeferred(deferred);
|
||||
}
|
||||
block->SetDeferred(deferred);
|
||||
DCHECK(!block->begin_.valid());
|
||||
block->begin_ = next_operation_index();
|
||||
DCHECK_EQ(block->index_, BlockIndex::Invalid());
|
||||
|
@ -254,6 +254,15 @@ std::ostream& operator<<(std::ostream& os, Float64InsertWord32Op::Kind kind) {
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, SelectOp::Implementation kind) {
|
||||
switch (kind) {
|
||||
case SelectOp::Implementation::kBranch:
|
||||
return os << "Branch";
|
||||
case SelectOp::Implementation::kCMove:
|
||||
return os << "CMove";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, FrameConstantOp::Kind kind) {
|
||||
switch (kind) {
|
||||
case FrameConstantOp::Kind::kStackCheckOffset:
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "src/base/vector.h"
|
||||
#include "src/codegen/external-reference.h"
|
||||
#include "src/common/globals.h"
|
||||
#include "src/compiler/common-operator.h"
|
||||
#include "src/compiler/globals.h"
|
||||
#include "src/compiler/turboshaft/fast-hash.h"
|
||||
#include "src/compiler/turboshaft/representations.h"
|
||||
@ -1080,23 +1081,33 @@ struct TaggedBitcastOp : FixedArityOperationT<1, TaggedBitcastOp> {
|
||||
};
|
||||
|
||||
struct SelectOp : FixedArityOperationT<3, SelectOp> {
|
||||
// TODO(12783): Support all register reps.
|
||||
WordRepresentation rep;
|
||||
enum class Implementation : uint8_t { kBranch, kCMove };
|
||||
|
||||
static constexpr OpProperties properties = OpProperties::Pure();
|
||||
RegisterRepresentation rep;
|
||||
BranchHint hint;
|
||||
Implementation implem;
|
||||
|
||||
OpIndex condition() const { return Base::input(0); }
|
||||
OpIndex left() const { return Base::input(1); }
|
||||
OpIndex right() const { return Base::input(2); }
|
||||
|
||||
SelectOp(OpIndex condition, OpIndex left, OpIndex right,
|
||||
WordRepresentation rep)
|
||||
: Base(condition, left, right), rep(rep) {
|
||||
DCHECK(rep == WordRepresentation::Word32()
|
||||
? SupportedOperations::word32_select()
|
||||
: SupportedOperations::word64_select());
|
||||
SelectOp(OpIndex cond, OpIndex vtrue, OpIndex vfalse,
|
||||
RegisterRepresentation rep, BranchHint hint, Implementation implem)
|
||||
: Base(cond, vtrue, vfalse), rep(rep), hint(hint), implem(implem) {
|
||||
#ifdef DEBUG
|
||||
if (implem == Implementation::kCMove) {
|
||||
DCHECK((rep == RegisterRepresentation::Word32() &&
|
||||
SupportedOperations::word32_select()) ||
|
||||
(rep == RegisterRepresentation::Word64() &&
|
||||
SupportedOperations::word64_select()));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
auto options() const { return std::tuple{rep}; }
|
||||
|
||||
OpIndex cond() const { return input(0); }
|
||||
OpIndex vtrue() const { return input(1); }
|
||||
OpIndex vfalse() const { return input(2); }
|
||||
|
||||
auto options() const { return std::tuple{rep, hint, implem}; }
|
||||
};
|
||||
std::ostream& operator<<(std::ostream& os, SelectOp::Implementation kind);
|
||||
|
||||
struct PhiOp : OperationT<PhiOp> {
|
||||
RegisterRepresentation rep;
|
||||
|
@ -465,9 +465,9 @@ struct OptimizationPhase<Analyzer, Assembler>::Impl {
|
||||
return assembler.TaggedBitcast(MapToNewGraph(op.input()), op.from, op.to);
|
||||
}
|
||||
OpIndex ReduceSelect(const SelectOp& op) {
|
||||
return assembler.Select(MapToNewGraph(op.condition()),
|
||||
MapToNewGraph(op.left()), MapToNewGraph(op.right()),
|
||||
op.rep);
|
||||
return assembler.Select(MapToNewGraph(op.cond()), MapToNewGraph(op.vtrue()),
|
||||
MapToNewGraph(op.vfalse()), op.rep, op.hint,
|
||||
op.implem);
|
||||
}
|
||||
OpIndex ReduceConstant(const ConstantOp& op) {
|
||||
return assembler.Constant(op.kind, op.storage);
|
||||
|
@ -835,13 +835,21 @@ Node* ScheduleBuilder::ProcessOperation(const TaggedBitcastOp& op) {
|
||||
return AddNode(o, {GetNode(op.input())});
|
||||
}
|
||||
Node* ScheduleBuilder::ProcessOperation(const SelectOp& op) {
|
||||
const Operator* o = op.rep == WordRepresentation::Word32()
|
||||
// If there is a Select, then it should only be one that is supported by the
|
||||
// machine, and it should be meant to be implementation with cmove.
|
||||
DCHECK_EQ(op.implem, SelectOp::Implementation::kCMove);
|
||||
DCHECK((op.rep == RegisterRepresentation::Word32() &&
|
||||
SupportedOperations::word32_select()) ||
|
||||
(op.rep == RegisterRepresentation::Word64() &&
|
||||
SupportedOperations::word64_select()));
|
||||
|
||||
const Operator* o = op.rep == RegisterRepresentation::Word32()
|
||||
? machine.Word32Select().op()
|
||||
: machine.Word64Select().op();
|
||||
return AddNode(
|
||||
o, {GetNode(op.condition()), GetNode(op.left()), GetNode(op.right())});
|
||||
}
|
||||
|
||||
return AddNode(
|
||||
o, {GetNode(op.cond()), GetNode(op.vtrue()), GetNode(op.vfalse())});
|
||||
}
|
||||
Node* ScheduleBuilder::ProcessOperation(const PendingLoopPhiOp& op) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
73
src/compiler/turboshaft/select-lowering-assembler.h
Normal file
73
src/compiler/turboshaft/select-lowering-assembler.h
Normal file
@ -0,0 +1,73 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_
|
||||
#define V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_
|
||||
|
||||
#include "src/base/vector.h"
|
||||
#include "src/compiler/common-operator.h"
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/operations.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
// Lowers Select operations to diamonds.
|
||||
//
|
||||
// A Select is conceptually somewhat similar to a ternary if:
|
||||
//
|
||||
// res = Select(cond, val_true, val_false)
|
||||
//
|
||||
// means:
|
||||
//
|
||||
// res = cond ? val_true : val_false
|
||||
//
|
||||
// SelectLoweringAssembler lowers such operations into:
|
||||
//
|
||||
// if (cond) {
|
||||
// res = val_true
|
||||
// } else {
|
||||
// res = val_false
|
||||
// }
|
||||
|
||||
template <class Base>
|
||||
class SelectLoweringAssembler
|
||||
: public AssemblerInterface<SelectLoweringAssembler<Base>, Base> {
|
||||
public:
|
||||
SelectLoweringAssembler(Graph* graph, Zone* phase_zone)
|
||||
: AssemblerInterface<SelectLoweringAssembler, Base>(graph, phase_zone) {}
|
||||
|
||||
OpIndex Select(OpIndex cond, OpIndex vtrue, OpIndex vfalse,
|
||||
RegisterRepresentation rep, BranchHint hint,
|
||||
SelectOp::Implementation implem) {
|
||||
if (implem == SelectOp::Implementation::kCMove) {
|
||||
// We do not lower Select operations that should be implemented with
|
||||
// CMove.
|
||||
return Base::Select(cond, vtrue, vfalse, rep, hint, implem);
|
||||
}
|
||||
Block* true_block = this->NewBlock(Block::Kind::kBranchTarget);
|
||||
Block* false_block = this->NewBlock(Block::Kind::kBranchTarget);
|
||||
Block* merge_block = this->NewBlock(Block::Kind::kMerge);
|
||||
|
||||
if (hint == BranchHint::kTrue) {
|
||||
false_block->SetDeferred(true);
|
||||
} else if (hint == BranchHint::kFalse) {
|
||||
true_block->SetDeferred(true);
|
||||
}
|
||||
|
||||
this->Branch(cond, true_block, false_block);
|
||||
|
||||
this->Bind(true_block);
|
||||
this->Goto(merge_block);
|
||||
|
||||
this->Bind(false_block);
|
||||
this->Goto(merge_block);
|
||||
|
||||
this->Bind(merge_block);
|
||||
return this->Phi(base::VectorOf({vtrue, vfalse}), rep);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_
|
Loading…
Reference in New Issue
Block a user