diff --git a/BUILD.bazel b/BUILD.bazel index f216a1811d..a1d45e7f86 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -2897,6 +2897,7 @@ filegroup( "src/compiler/turboshaft/recreate-schedule.h", "src/compiler/turboshaft/representations.cc", "src/compiler/turboshaft/representations.h", + "src/compiler/turboshaft/select-lowering-assembler.h", "src/compiler/turboshaft/sidetable.h", "src/compiler/turboshaft/simplify-tf-loops.cc", "src/compiler/turboshaft/simplify-tf-loops.h", diff --git a/BUILD.gn b/BUILD.gn index 29eb0dc060..edee25577d 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -2927,6 +2927,7 @@ v8_header_set("v8_internal_headers") { "src/compiler/turboshaft/optimization-phase.h", "src/compiler/turboshaft/recreate-schedule.h", "src/compiler/turboshaft/representations.h", + "src/compiler/turboshaft/select-lowering-assembler.h", "src/compiler/turboshaft/sidetable.h", "src/compiler/turboshaft/simplify-tf-loops.h", "src/compiler/turboshaft/utils.h", diff --git a/src/compiler/pipeline.cc b/src/compiler/pipeline.cc index d6aa877209..5665c4ad4c 100644 --- a/src/compiler/pipeline.cc +++ b/src/compiler/pipeline.cc @@ -86,6 +86,7 @@ #include "src/compiler/turboshaft/machine-optimization-assembler.h" #include "src/compiler/turboshaft/optimization-phase.h" #include "src/compiler/turboshaft/recreate-schedule.h" +#include "src/compiler/turboshaft/select-lowering-assembler.h" #include "src/compiler/turboshaft/simplify-tf-loops.h" #include "src/compiler/turboshaft/value-numbering-assembler.h" #include "src/compiler/type-narrowing-reducer.h" @@ -1959,30 +1960,43 @@ struct LateOptimizationPhase { DECL_PIPELINE_PHASE_CONSTANTS(LateOptimization) void Run(PipelineData* data, Zone* temp_zone) { - GraphReducer graph_reducer( - temp_zone, data->graph(), &data->info()->tick_counter(), data->broker(), - data->jsgraph()->Dead(), data->observe_node_manager()); - LateEscapeAnalysis escape_analysis(&graph_reducer, data->graph(), - data->common(), temp_zone); - BranchElimination branch_condition_elimination(&graph_reducer, - data->jsgraph(), temp_zone); - DeadCodeElimination dead_code_elimination(&graph_reducer, data->graph(), - data->common(), temp_zone); - ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone()); - MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph()); - CommonOperatorReducer common_reducer( - &graph_reducer, data->graph(), data->broker(), data->common(), - data->machine(), temp_zone, BranchSemantics::kMachine); - JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone); - SelectLowering select_lowering(&graph_assembler, data->graph()); - AddReducer(data, &graph_reducer, &escape_analysis); - AddReducer(data, &graph_reducer, &branch_condition_elimination); - AddReducer(data, &graph_reducer, &dead_code_elimination); - AddReducer(data, &graph_reducer, &machine_reducer); - AddReducer(data, &graph_reducer, &common_reducer); - AddReducer(data, &graph_reducer, &select_lowering); - AddReducer(data, &graph_reducer, &value_numbering); - graph_reducer.ReduceGraph(); + if (data->HasTurboshaftGraph()) { + // TODO(dmercadier,tebbi): add missing assemblers (LateEscapeAnalysis, + // BranchElimination, MachineOperatorReducer, CommonOperatorReducer). + turboshaft::OptimizationPhase>:: + Run(&data->turboshaft_graph(), temp_zone, data->node_origins(), + turboshaft::VisitOrder::kDominator); + } else { + GraphReducer graph_reducer(temp_zone, data->graph(), + &data->info()->tick_counter(), data->broker(), + data->jsgraph()->Dead(), + data->observe_node_manager()); + LateEscapeAnalysis escape_analysis(&graph_reducer, data->graph(), + data->common(), temp_zone); + BranchElimination branch_condition_elimination( + &graph_reducer, data->jsgraph(), temp_zone); + DeadCodeElimination dead_code_elimination(&graph_reducer, data->graph(), + data->common(), temp_zone); + ValueNumberingReducer value_numbering(temp_zone, data->graph()->zone()); + MachineOperatorReducer machine_reducer(&graph_reducer, data->jsgraph()); + CommonOperatorReducer common_reducer( + &graph_reducer, data->graph(), data->broker(), data->common(), + data->machine(), temp_zone, BranchSemantics::kMachine); + JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone); + SelectLowering select_lowering(&graph_assembler, data->graph()); + AddReducer(data, &graph_reducer, &escape_analysis); + AddReducer(data, &graph_reducer, &branch_condition_elimination); + AddReducer(data, &graph_reducer, &dead_code_elimination); + AddReducer(data, &graph_reducer, &machine_reducer); + AddReducer(data, &graph_reducer, &common_reducer); + if (!FLAG_turboshaft) { + AddReducer(data, &graph_reducer, &select_lowering); + } + AddReducer(data, &graph_reducer, &value_numbering); + graph_reducer.ReduceGraph(); + } } }; @@ -3005,6 +3019,8 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) { } Run(BuildTurboshaftPhase::phase_name()); + Run(); + Run(); Run(OptimizeTurboshaftPhase::phase_name()); diff --git a/src/compiler/turboshaft/assembler.h b/src/compiler/turboshaft/assembler.h index f862d16c63..6a72da8fe5 100644 --- a/src/compiler/turboshaft/assembler.h +++ b/src/compiler/turboshaft/assembler.h @@ -391,15 +391,6 @@ class AssemblerInterface : public Superclass { #undef DECL_SINGLE_REP_UNARY #undef DECL_MULTI_REP_UNARY - OpIndex Word32Select(OpIndex condition, OpIndex left, OpIndex right) { - return subclass().Select(condition, left, right, - WordRepresentation::Word32()); - } - OpIndex Word64Select(OpIndex condition, OpIndex left, OpIndex right) { - return subclass().Select(condition, left, right, - WordRepresentation::Word64()); - } - OpIndex Word32Constant(uint32_t value) { return subclass().Constant(ConstantOp::Kind::kWord32, uint64_t{value}); } diff --git a/src/compiler/turboshaft/graph-builder.cc b/src/compiler/turboshaft/graph-builder.cc index 7c5d2ba5d0..c55c901529 100644 --- a/src/compiler/turboshaft/graph-builder.cc +++ b/src/compiler/turboshaft/graph-builder.cc @@ -567,12 +567,26 @@ OpIndex GraphBuilder::Process( RegisterRepresentation::PointerSized(), RegisterRepresentation::Tagged()); + case IrOpcode::kSelect: { + OpIndex cond = Map(node->InputAt(0)); + OpIndex vtrue = Map(node->InputAt(1)); + OpIndex vfalse = Map(node->InputAt(2)); + const SelectParameters& params = SelectParametersOf(op); + return assembler.Select(cond, vtrue, vfalse, + RegisterRepresentation::FromMachineRepresentation( + params.representation()), + params.hint(), SelectOp::Implementation::kBranch); + } case IrOpcode::kWord32Select: - return assembler.Word32Select( - Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2))); + return assembler.Select( + Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)), + RegisterRepresentation::Word32(), BranchHint::kNone, + SelectOp::Implementation::kCMove); case IrOpcode::kWord64Select: - return assembler.Word64Select( - Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2))); + return assembler.Select( + Map(node->InputAt(0)), Map(node->InputAt(1)), Map(node->InputAt(2)), + RegisterRepresentation::Word64(), BranchHint::kNone, + SelectOp::Implementation::kCMove); case IrOpcode::kLoad: case IrOpcode::kLoadImmutable: diff --git a/src/compiler/turboshaft/graph.h b/src/compiler/turboshaft/graph.h index 0685a4a0a6..060fd642ab 100644 --- a/src/compiler/turboshaft/graph.h +++ b/src/compiler/turboshaft/graph.h @@ -253,13 +253,14 @@ class RandomAccessStackDominatorNode friend class Block; #endif - int len_ = 0; - Derived* nxt_ = nullptr; - Derived* jmp_ = nullptr; // Myers' original datastructure requires to often check jmp_->len_, which is // not so great on modern computers (memory access, caches & co). To speed up // things a bit, we store here jmp_len_. int jmp_len_ = 0; + + int len_ = 0; + Derived* nxt_ = nullptr; + Derived* jmp_ = nullptr; }; // A basic block @@ -479,15 +480,17 @@ class Graph { V8_INLINE bool Add(Block* block) { DCHECK_EQ(block->graph_generation_, generation_); if (!bound_blocks_.empty() && !block->HasPredecessors()) return false; - bool deferred = true; - for (Block* pred = block->last_predecessor_; pred != nullptr; - pred = pred->neighboring_predecessor_) { - if (!pred->IsDeferred()) { - deferred = false; - break; + if (!block->IsDeferred()) { + bool deferred = true; + for (Block* pred = block->last_predecessor_; pred != nullptr; + pred = pred->neighboring_predecessor_) { + if (!pred->IsDeferred()) { + deferred = false; + break; + } } + block->SetDeferred(deferred); } - block->SetDeferred(deferred); DCHECK(!block->begin_.valid()); block->begin_ = next_operation_index(); DCHECK_EQ(block->index_, BlockIndex::Invalid()); diff --git a/src/compiler/turboshaft/operations.cc b/src/compiler/turboshaft/operations.cc index 50e9ec2dbe..7cf11f43dc 100644 --- a/src/compiler/turboshaft/operations.cc +++ b/src/compiler/turboshaft/operations.cc @@ -254,6 +254,15 @@ std::ostream& operator<<(std::ostream& os, Float64InsertWord32Op::Kind kind) { } } +std::ostream& operator<<(std::ostream& os, SelectOp::Implementation kind) { + switch (kind) { + case SelectOp::Implementation::kBranch: + return os << "Branch"; + case SelectOp::Implementation::kCMove: + return os << "CMove"; + } +} + std::ostream& operator<<(std::ostream& os, FrameConstantOp::Kind kind) { switch (kind) { case FrameConstantOp::Kind::kStackCheckOffset: diff --git a/src/compiler/turboshaft/operations.h b/src/compiler/turboshaft/operations.h index d7f49e9bc5..163e30beeb 100644 --- a/src/compiler/turboshaft/operations.h +++ b/src/compiler/turboshaft/operations.h @@ -20,6 +20,7 @@ #include "src/base/vector.h" #include "src/codegen/external-reference.h" #include "src/common/globals.h" +#include "src/compiler/common-operator.h" #include "src/compiler/globals.h" #include "src/compiler/turboshaft/fast-hash.h" #include "src/compiler/turboshaft/representations.h" @@ -1080,23 +1081,33 @@ struct TaggedBitcastOp : FixedArityOperationT<1, TaggedBitcastOp> { }; struct SelectOp : FixedArityOperationT<3, SelectOp> { - // TODO(12783): Support all register reps. - WordRepresentation rep; + enum class Implementation : uint8_t { kBranch, kCMove }; + static constexpr OpProperties properties = OpProperties::Pure(); + RegisterRepresentation rep; + BranchHint hint; + Implementation implem; - OpIndex condition() const { return Base::input(0); } - OpIndex left() const { return Base::input(1); } - OpIndex right() const { return Base::input(2); } - - SelectOp(OpIndex condition, OpIndex left, OpIndex right, - WordRepresentation rep) - : Base(condition, left, right), rep(rep) { - DCHECK(rep == WordRepresentation::Word32() - ? SupportedOperations::word32_select() - : SupportedOperations::word64_select()); + SelectOp(OpIndex cond, OpIndex vtrue, OpIndex vfalse, + RegisterRepresentation rep, BranchHint hint, Implementation implem) + : Base(cond, vtrue, vfalse), rep(rep), hint(hint), implem(implem) { +#ifdef DEBUG + if (implem == Implementation::kCMove) { + DCHECK((rep == RegisterRepresentation::Word32() && + SupportedOperations::word32_select()) || + (rep == RegisterRepresentation::Word64() && + SupportedOperations::word64_select())); + } +#endif } - auto options() const { return std::tuple{rep}; } + + OpIndex cond() const { return input(0); } + OpIndex vtrue() const { return input(1); } + OpIndex vfalse() const { return input(2); } + + auto options() const { return std::tuple{rep, hint, implem}; } }; +std::ostream& operator<<(std::ostream& os, SelectOp::Implementation kind); struct PhiOp : OperationT { RegisterRepresentation rep; diff --git a/src/compiler/turboshaft/optimization-phase.h b/src/compiler/turboshaft/optimization-phase.h index 421612982a..bc68328b7b 100644 --- a/src/compiler/turboshaft/optimization-phase.h +++ b/src/compiler/turboshaft/optimization-phase.h @@ -465,9 +465,9 @@ struct OptimizationPhase::Impl { return assembler.TaggedBitcast(MapToNewGraph(op.input()), op.from, op.to); } OpIndex ReduceSelect(const SelectOp& op) { - return assembler.Select(MapToNewGraph(op.condition()), - MapToNewGraph(op.left()), MapToNewGraph(op.right()), - op.rep); + return assembler.Select(MapToNewGraph(op.cond()), MapToNewGraph(op.vtrue()), + MapToNewGraph(op.vfalse()), op.rep, op.hint, + op.implem); } OpIndex ReduceConstant(const ConstantOp& op) { return assembler.Constant(op.kind, op.storage); diff --git a/src/compiler/turboshaft/recreate-schedule.cc b/src/compiler/turboshaft/recreate-schedule.cc index e8c2563909..455fb52c62 100644 --- a/src/compiler/turboshaft/recreate-schedule.cc +++ b/src/compiler/turboshaft/recreate-schedule.cc @@ -835,13 +835,21 @@ Node* ScheduleBuilder::ProcessOperation(const TaggedBitcastOp& op) { return AddNode(o, {GetNode(op.input())}); } Node* ScheduleBuilder::ProcessOperation(const SelectOp& op) { - const Operator* o = op.rep == WordRepresentation::Word32() + // If there is a Select, then it should only be one that is supported by the + // machine, and it should be meant to be implementation with cmove. + DCHECK_EQ(op.implem, SelectOp::Implementation::kCMove); + DCHECK((op.rep == RegisterRepresentation::Word32() && + SupportedOperations::word32_select()) || + (op.rep == RegisterRepresentation::Word64() && + SupportedOperations::word64_select())); + + const Operator* o = op.rep == RegisterRepresentation::Word32() ? machine.Word32Select().op() : machine.Word64Select().op(); - return AddNode( - o, {GetNode(op.condition()), GetNode(op.left()), GetNode(op.right())}); -} + return AddNode( + o, {GetNode(op.cond()), GetNode(op.vtrue()), GetNode(op.vfalse())}); +} Node* ScheduleBuilder::ProcessOperation(const PendingLoopPhiOp& op) { UNREACHABLE(); } diff --git a/src/compiler/turboshaft/select-lowering-assembler.h b/src/compiler/turboshaft/select-lowering-assembler.h new file mode 100644 index 0000000000..397484405d --- /dev/null +++ b/src/compiler/turboshaft/select-lowering-assembler.h @@ -0,0 +1,73 @@ +// Copyright 2022 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_ +#define V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_ + +#include "src/base/vector.h" +#include "src/compiler/common-operator.h" +#include "src/compiler/turboshaft/assembler.h" +#include "src/compiler/turboshaft/operations.h" + +namespace v8::internal::compiler::turboshaft { + +// Lowers Select operations to diamonds. +// +// A Select is conceptually somewhat similar to a ternary if: +// +// res = Select(cond, val_true, val_false) +// +// means: +// +// res = cond ? val_true : val_false +// +// SelectLoweringAssembler lowers such operations into: +// +// if (cond) { +// res = val_true +// } else { +// res = val_false +// } + +template +class SelectLoweringAssembler + : public AssemblerInterface, Base> { + public: + SelectLoweringAssembler(Graph* graph, Zone* phase_zone) + : AssemblerInterface(graph, phase_zone) {} + + OpIndex Select(OpIndex cond, OpIndex vtrue, OpIndex vfalse, + RegisterRepresentation rep, BranchHint hint, + SelectOp::Implementation implem) { + if (implem == SelectOp::Implementation::kCMove) { + // We do not lower Select operations that should be implemented with + // CMove. + return Base::Select(cond, vtrue, vfalse, rep, hint, implem); + } + Block* true_block = this->NewBlock(Block::Kind::kBranchTarget); + Block* false_block = this->NewBlock(Block::Kind::kBranchTarget); + Block* merge_block = this->NewBlock(Block::Kind::kMerge); + + if (hint == BranchHint::kTrue) { + false_block->SetDeferred(true); + } else if (hint == BranchHint::kFalse) { + true_block->SetDeferred(true); + } + + this->Branch(cond, true_block, false_block); + + this->Bind(true_block); + this->Goto(merge_block); + + this->Bind(false_block); + this->Goto(merge_block); + + this->Bind(merge_block); + return this->Phi(base::VectorOf({vtrue, vfalse}), rep); + } +}; + +} // namespace v8::internal::compiler::turboshaft + +#endif // V8_COMPILER_TURBOSHAFT_SELECT_LOWERING_ASSEMBLER_H_