From 596e56868d4cbd0becbe227764fbf7b8fbc10107 Mon Sep 17 00:00:00 2001 From: Maya Lekova Date: Tue, 31 Jan 2023 14:09:19 +0100 Subject: [PATCH] [turboshaft] Implement structural optimization reducer This CL adds a new Turboshaft reducer that is suitable for changing the graph in a way that doesn't reduce individual operations, rather changes the structure of the graph. The first such reduction we support is transforming if-else cascades that check if a given value is equal to any constant from a given set into a switch with cases corresponding to the constants in the set. Bug: v8:12783 Change-Id: Iee1e5581a334c3dc255d673d2178f76706e6dae2 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4106752 Commit-Queue: Maya Lekova Reviewed-by: Tobias Tebbi Reviewed-by: Clemens Backes Cr-Commit-Position: refs/heads/main@{#85561} --- BUILD.bazel | 1 + BUILD.gn | 1 + src/base/iterator.h | 27 +- src/compiler/pipeline.cc | 4 +- .../dead-code-elimination-reducer.h | 4 +- src/compiler/turboshaft/graph.h | 2 +- src/compiler/turboshaft/operations.h | 2 +- src/compiler/turboshaft/optimization-phase.h | 14 +- .../structural-optimization-reducer.h | 233 ++++++++++++++++++ 9 files changed, 279 insertions(+), 9 deletions(-) create mode 100644 src/compiler/turboshaft/structural-optimization-reducer.h diff --git a/BUILD.bazel b/BUILD.bazel index 13c79bee9a..c9acd32672 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -2936,6 +2936,7 @@ filegroup( "src/compiler/turboshaft/simplify-tf-loops.cc", "src/compiler/turboshaft/simplify-tf-loops.h", "src/compiler/turboshaft/snapshot-table.h", + "src/compiler/turboshaft/structural-optimization-reducer.h", "src/compiler/turboshaft/type-inference-reducer.h", "src/compiler/turboshaft/type-parser.cc", "src/compiler/turboshaft/type-parser.h", diff --git a/BUILD.gn b/BUILD.gn index 480d68d646..b154729f7b 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -3013,6 +3013,7 @@ v8_header_set("v8_internal_headers") { "src/compiler/turboshaft/sidetable.h", "src/compiler/turboshaft/simplify-tf-loops.h", "src/compiler/turboshaft/snapshot-table.h", + "src/compiler/turboshaft/structural-optimization-reducer.h", "src/compiler/turboshaft/type-inference-reducer.h", "src/compiler/turboshaft/type-parser.h", "src/compiler/turboshaft/typed-optimizations-reducer.h", diff --git a/src/base/iterator.h b/src/base/iterator.h index d0dff13330..b10240649b 100644 --- a/src/base/iterator.h +++ b/src/base/iterator.h @@ -93,7 +93,7 @@ struct DerefPtrIterator : base::iterator { // The signature avoids binding to temporaries (T&& / const T&) on purpose. The // lifetime of a temporary would not extend to a range-based for loop using it. template -auto Reversed(T& t) { // NOLINT(runtime/references): match {rbegin} and {rend} +auto Reversed(T& t) { return make_iterator_range(std::rbegin(t), std::rend(t)); } @@ -105,6 +105,31 @@ auto Reversed(const iterator_range& t) { return make_iterator_range(std::rbegin(t), std::rend(t)); } +// {IterateWithoutLast} returns a container adapter usable in a range-based +// "for" statement for iterating all elements without the last in a forward +// order. It performs a check whether the container is empty. +// +// Example: +// +// std::vector v = ...; +// for (int i : base::IterateWithoutLast(v)) { +// // iterates through v front to --back +// } +// +// The signature avoids binding to temporaries, see the remark in {Reversed}. +template +auto IterateWithoutLast(T& t) { + DCHECK_NE(std::begin(t), std::end(t)); + auto new_end = std::end(t); + return make_iterator_range(std::begin(t), --new_end); +} + +template +auto IterateWithoutLast(const iterator_range& t) { + iterator_range range_copy = {t.begin(), t.end()}; + return IterateWithoutLast(range_copy); +} + } // namespace base } // namespace v8 diff --git a/src/compiler/pipeline.cc b/src/compiler/pipeline.cc index d1c5e27779..50624d128a 100644 --- a/src/compiler/pipeline.cc +++ b/src/compiler/pipeline.cc @@ -96,6 +96,7 @@ #include "src/compiler/turboshaft/recreate-schedule.h" #include "src/compiler/turboshaft/select-lowering-reducer.h" #include "src/compiler/turboshaft/simplify-tf-loops.h" +#include "src/compiler/turboshaft/structural-optimization-reducer.h" #include "src/compiler/turboshaft/type-inference-reducer.h" #include "src/compiler/turboshaft/typed-optimizations-reducer.h" #include "src/compiler/turboshaft/types.h" @@ -2132,6 +2133,7 @@ struct OptimizeTurboshaftPhase { UnparkedScopeIfNeeded scope(data->broker(), v8_flags.turboshaft_trace_reduction); turboshaft::OptimizationPhase< + turboshaft::StructuralOptimizationReducer, turboshaft::LateEscapeAnalysisReducer, turboshaft::MemoryOptimizationReducer, turboshaft::VariableReducer, turboshaft::MachineOptimizationReducerSignallingNanImpossible, @@ -3116,7 +3118,7 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) { } // Optimize control flow. - if (v8_flags.turbo_cf_optimization) { + if (v8_flags.turbo_cf_optimization && !v8_flags.turboshaft) { Run(); RunPrintAndVerify(ControlFlowOptimizationPhase::phase_name(), true); } diff --git a/src/compiler/turboshaft/dead-code-elimination-reducer.h b/src/compiler/turboshaft/dead-code-elimination-reducer.h index 715cd17456..7b2244c3fe 100644 --- a/src/compiler/turboshaft/dead-code-elimination-reducer.h +++ b/src/compiler/turboshaft/dead-code-elimination-reducer.h @@ -25,7 +25,7 @@ namespace v8::internal::compiler::turboshaft { // // OperationState reflects the liveness of operations. An operation is live if // -// 1) The operation has the `is_required_when_unused` property +// 1) The operation has the `observable_when_unused` property // 2) Any of its outputs is live (is used in a live operation). // // If the operation is not live, it is dead and can be eliminated. @@ -279,7 +279,7 @@ class DeadCodeAnalysis { // state, so we skip them here. liveness_[index] = OperationState::kLive; continue; - } else if (op.Properties().is_required_when_unused) { + } else if (op.Properties().observable_when_unused) { op_state = OperationState::kLive; } else if (op.Is()) { has_live_phis = has_live_phis || (op_state == OperationState::kLive); diff --git a/src/compiler/turboshaft/graph.h b/src/compiler/turboshaft/graph.h index f7696e8335..48dfe44471 100644 --- a/src/compiler/turboshaft/graph.h +++ b/src/compiler/turboshaft/graph.h @@ -510,7 +510,7 @@ class Graph { Op& op = Op::New(this, args...); IncrementInputUses(op); - if (op.Properties().is_required_when_unused) { + if (op.Properties().observable_when_unused) { // Once the graph is built, an operation with a `saturated_use_count` of 0 // is guaranteed to be unused and can be removed. Thus, to avoid removing // operations that never have uses (such as Goto or Branch), we set the diff --git a/src/compiler/turboshaft/operations.h b/src/compiler/turboshaft/operations.h index 8401612fd8..2bf036e950 100644 --- a/src/compiler/turboshaft/operations.h +++ b/src/compiler/turboshaft/operations.h @@ -152,7 +152,7 @@ struct OpProperties { // guaranteed to be derived. const bool is_pure_no_allocation = !(can_read || can_write || can_allocate || can_abort || is_block_terminator); - const bool is_required_when_unused = + const bool observable_when_unused = can_write || can_abort || is_block_terminator; // Operations that don't read, write, allocate and aren't block terminators // can be eliminated via value numbering, which means that if there are two diff --git a/src/compiler/turboshaft/optimization-phase.h b/src/compiler/turboshaft/optimization-phase.h index d3b45af59e..ba5747b482 100644 --- a/src/compiler/turboshaft/optimization-phase.h +++ b/src/compiler/turboshaft/optimization-phase.h @@ -47,8 +47,7 @@ struct AnalyzerBase { void Run() {} bool OpIsUsed(OpIndex i) const { const Operation& op = graph.Get(i); - return op.saturated_use_count > 0 || - op.Properties().is_required_when_unused; + return op.saturated_use_count > 0 || op.Properties().observable_when_unused; } explicit AnalyzerBase(const Graph& graph, Zone* phase_zone) @@ -59,7 +58,7 @@ struct AnalyzerBase { // Analyzers modify the input graph in-place when they want to mark some // Operations as removeable. In order to make that work for operations that have // no uses such as Goto and Branch, all operations that have the property -// `is_required_when_unused` have a non-zero `saturated_use_count`. +// `observable_when_unused` have a non-zero `saturated_use_count`. V8_INLINE bool ShouldSkipOperation(const Operation& op) { return op.saturated_use_count == 0; } @@ -245,6 +244,14 @@ class GraphVisitor { } } + // {InlineOp} introduces two limitations unlike {CloneAndInlineBlock}: + // 1. The input operation must not be emitted anymore as part of its + // regular input block; + // 2. {InlineOp} must not be used multiple times for the same input op. + bool InlineOp(OpIndex index, const Block* input_block) { + return VisitOp(index, input_block); + } + template OpIndex MapToNewGraph(OpIndex old_index, int predecessor_index = -1) { DCHECK(old_index.valid()); @@ -760,6 +767,7 @@ class GraphVisitor { assembler().Set(*var, new_index); return; } + DCHECK(!op_mapping_[old_index.id()].valid()); op_mapping_[old_index.id()] = new_index; } diff --git a/src/compiler/turboshaft/structural-optimization-reducer.h b/src/compiler/turboshaft/structural-optimization-reducer.h new file mode 100644 index 0000000000..2a0598426c --- /dev/null +++ b/src/compiler/turboshaft/structural-optimization-reducer.h @@ -0,0 +1,233 @@ +// Copyright 2022 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_ +#define V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_ + +#include + +#include "src/compiler/turboshaft/assembler.h" +#include "src/compiler/turboshaft/index.h" +#include "src/zone/zone.h" + +// The StructuralOptimizationReducer reducer is suitable for changing the +// graph in a way that doesn't reduce individual operations, rather changes +// the structure of the graph. +// +// We currently support a reduction which transforms if-else cascades +// that check if a given value is equal to a 32-bit constant from a given set +// into a switch with cases corresponding to the constants in the set. +// +// So for example code like: +// [only pure ops 1] +// if (x == 3) { +// B1; +// } else { +// [only pure ops 2] +// if (x == 5) { +// B2; +// } else { +// B3; +// } +// } +// +// will be transformed to: +// [only pure ops 1] +// [only pure ops 2] +// switch (x) { +// case 3: +// B1; +// case 5: +// B2; +// default: +// B3; +// } +// +// Or represented graphically: +// [only pure ops 1] +// [only pure ops 1] [only pure ops 2] +// x == 3 Switch(x) +// Branch | | | +// | | ----- | ------ +// ----- ------ case 3 | | | default +// | | | | | +// T | | F v | | +// v v B1 | v +// B1 [only pure ops 2] becomes | B3 +// x == 5 ======> case 5 | +// Branch v +// | | B2 +// ----- ------ +// | | +// T | | F +// v v +// B2 B3 +// + +// TODO(mslekova): Introduce a flag and move to a common graph place. +// #define TRACE_REDUCTIONS +#ifdef TRACE_REDUCTIONS +#define TRACE(str, ...) \ + { PrintF(str, ##__VA_ARGS__); } +#else // TRACE_REDUCTIONS +#define TRACE(str, ...) + +#endif // TRACE_REDUCTIONS + +namespace v8::internal::compiler::turboshaft { + +template +class StructuralOptimizationReducer : public Next { + public: + using Next::Asm; + template + explicit StructuralOptimizationReducer(const std::tuple& args) + : Next(args) {} + + OpIndex ReduceInputGraphBranch(OpIndex input_index, const BranchOp& branch) { + LABEL_BLOCK(no_change) { + return Next::ReduceInputGraphBranch(input_index, branch); + } + + TRACE("[structural] Calling ReduceInputGraphBranch for index: %u\n", + static_cast(input_index.id())); + + base::SmallVector cases; + base::SmallVector false_blocks; + + Block* current_if_false; + const BranchOp* current_branch = &branch; + BranchHint default_hint = BranchHint::kNone; + + OpIndex switch_var = OpIndex::Invalid(); + while (true) { + Block* current_if_true = current_branch->if_true; + current_if_false = current_branch->if_false; + + DCHECK(current_if_true && current_if_false); + + // If we encounter a condition that is not equality, we can't turn it + // into a switch case. + const EqualOp* equal = Asm() + .input_graph() + .Get(current_branch->condition()) + .template TryCast(); + if (!equal || equal->rep != RegisterRepresentation::Word32()) { + TRACE( + "\t [bailout] Branch with different condition than Word32 " + "Equal.\n"); + break; + } + + // MachineOptimizationReducer should normalize equality to put constants + // right. + const Operation& right_op = Asm().input_graph().Get(equal->right()); + if (!right_op.Is()) { + TRACE("\t [bailout] No constant on the right side of Equal.\n"); + break; + } + + // We can only turn Word32 constant equals to switch cases. + const ConstantOp& const_op = right_op.Cast(); + if (const_op.kind != ConstantOp::Kind::kWord32) { + TRACE("\t [bailout] Constant is not of type Word32.\n"); + break; + } + + // If we encounter equal to a different value, we can't introduce + // a switch. + OpIndex current_var = equal->left(); + if (!switch_var.valid()) { + switch_var = current_var; + } else if (switch_var != current_var) { + TRACE("\t [bailout] Not all branches compare the same variable.\n"); + break; + } + + // The current_if_true block becomes the corresponding switch case block. + uint32_t value = const_op.word32(); + cases.emplace_back(value, Asm().MapToNewGraph(current_if_true->index()), + current_branch->hint); + + // All pure ops from the if_false block should be executed before + // the switch, except the last Branch operation (which we drop). + false_blocks.push_back(current_if_false); + + // If we encounter a if_false block that doesn't end with a Branch, + // this means we've reached the end of the cascade. + const Operation& maybe_branch = + current_if_false->LastOperation(Asm().input_graph()); + if (!maybe_branch.Is()) { + TRACE("\t [break] Reached end of the if-else cascade.\n"); + break; + } + + default_hint = current_branch->hint; + + // Iterate to the next if_false block in the cascade. + current_branch = &maybe_branch.template Cast(); + + // As long as the else blocks contain only pure ops, we can keep + // traversing the if-else cascade. + if (!ContainsOnlyPureOps(current_branch->if_false, Asm().input_graph())) { + TRACE("\t [break] End of only-pure-ops cascade reached.\n"); + break; + } + } + + // Probably better to keep short if-else cascades as they are. + if (cases.size() <= 2) { + TRACE("\t [bailout] Cascade with less than 2 levels of nesting.\n"); + goto no_change; + } + CHECK_EQ(cases.size(), false_blocks.size()); + + // We're skipping the last false block, as it becomes the default block. + for (size_t i = 0; i < false_blocks.size() - 1; ++i) { + const Block* block = false_blocks[i]; + InlineAllOperationsWithoutLast(block); + } + + TRACE("[reduce] Successfully emit a Switch with %z cases.", cases.size()); + + // The last current_if_true block that ends the cascade becomes the default + // case. + Block* default_block = current_if_false; + Asm().Switch( + Asm().MapToNewGraph(switch_var), + Asm().output_graph().graph_zone()->CloneVector(base::VectorOf(cases)), + Asm().MapToNewGraph(default_block->index()), default_hint); + return OpIndex::Invalid(); + } + + private: + static bool ContainsOnlyPureOps(const Block* block, const Graph& graph) { + for (const auto& op : base::IterateWithoutLast(graph.operations(*block))) { + OpProperties props = op.Properties(); + // It's fine to allow allocations and reads. Writes and + // aborting should be disallowed though. + if (props.observable_when_unused) { + return false; + } + } + return true; + } + + // Visits and emits {input_block} right now (ie, in the current block) + // until the one before the last operation is reached. + void InlineAllOperationsWithoutLast(const Block* input_block) { + base::iterator_range all_ops = + Asm().input_graph().OperationIndices(*input_block); + + for (OpIndex op : base::IterateWithoutLast(all_ops)) { + Asm().InlineOp(op, input_block); + } + } +}; + +} // namespace v8::internal::compiler::turboshaft + +#undef TRACE + +#endif // V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_