[turboshaft] Implement structural optimization reducer

This CL adds a new Turboshaft reducer that is suitable for changing the
graph in a way that doesn't reduce individual operations, rather changes
the structure of the graph. The first such reduction we support is
transforming if-else cascades that check if a given value is equal to
any constant from a given set into a switch with cases corresponding to
the constants in the set.

Bug: v8:12783
Change-Id: Iee1e5581a334c3dc255d673d2178f76706e6dae2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4106752
Commit-Queue: Maya Lekova <mslekova@chromium.org>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#85561}
This commit is contained in:
Maya Lekova 2023-01-31 14:09:19 +01:00 committed by V8 LUCI CQ
parent 961868decf
commit 596e56868d
9 changed files with 279 additions and 9 deletions

View File

@ -2936,6 +2936,7 @@ filegroup(
"src/compiler/turboshaft/simplify-tf-loops.cc",
"src/compiler/turboshaft/simplify-tf-loops.h",
"src/compiler/turboshaft/snapshot-table.h",
"src/compiler/turboshaft/structural-optimization-reducer.h",
"src/compiler/turboshaft/type-inference-reducer.h",
"src/compiler/turboshaft/type-parser.cc",
"src/compiler/turboshaft/type-parser.h",

View File

@ -3013,6 +3013,7 @@ v8_header_set("v8_internal_headers") {
"src/compiler/turboshaft/sidetable.h",
"src/compiler/turboshaft/simplify-tf-loops.h",
"src/compiler/turboshaft/snapshot-table.h",
"src/compiler/turboshaft/structural-optimization-reducer.h",
"src/compiler/turboshaft/type-inference-reducer.h",
"src/compiler/turboshaft/type-parser.h",
"src/compiler/turboshaft/typed-optimizations-reducer.h",

View File

@ -93,7 +93,7 @@ struct DerefPtrIterator : base::iterator<std::bidirectional_iterator_tag, T> {
// The signature avoids binding to temporaries (T&& / const T&) on purpose. The
// lifetime of a temporary would not extend to a range-based for loop using it.
template <typename T>
auto Reversed(T& t) { // NOLINT(runtime/references): match {rbegin} and {rend}
auto Reversed(T& t) {
return make_iterator_range(std::rbegin(t), std::rend(t));
}
@ -105,6 +105,31 @@ auto Reversed(const iterator_range<T>& t) {
return make_iterator_range(std::rbegin(t), std::rend(t));
}
// {IterateWithoutLast} returns a container adapter usable in a range-based
// "for" statement for iterating all elements without the last in a forward
// order. It performs a check whether the container is empty.
//
// Example:
//
// std::vector<int> v = ...;
// for (int i : base::IterateWithoutLast(v)) {
// // iterates through v front to --back
// }
//
// The signature avoids binding to temporaries, see the remark in {Reversed}.
template <typename T>
auto IterateWithoutLast(T& t) {
DCHECK_NE(std::begin(t), std::end(t));
auto new_end = std::end(t);
return make_iterator_range(std::begin(t), --new_end);
}
template <typename T>
auto IterateWithoutLast(const iterator_range<T>& t) {
iterator_range<T> range_copy = {t.begin(), t.end()};
return IterateWithoutLast(range_copy);
}
} // namespace base
} // namespace v8

View File

@ -96,6 +96,7 @@
#include "src/compiler/turboshaft/recreate-schedule.h"
#include "src/compiler/turboshaft/select-lowering-reducer.h"
#include "src/compiler/turboshaft/simplify-tf-loops.h"
#include "src/compiler/turboshaft/structural-optimization-reducer.h"
#include "src/compiler/turboshaft/type-inference-reducer.h"
#include "src/compiler/turboshaft/typed-optimizations-reducer.h"
#include "src/compiler/turboshaft/types.h"
@ -2132,6 +2133,7 @@ struct OptimizeTurboshaftPhase {
UnparkedScopeIfNeeded scope(data->broker(),
v8_flags.turboshaft_trace_reduction);
turboshaft::OptimizationPhase<
turboshaft::StructuralOptimizationReducer,
turboshaft::LateEscapeAnalysisReducer,
turboshaft::MemoryOptimizationReducer, turboshaft::VariableReducer,
turboshaft::MachineOptimizationReducerSignallingNanImpossible,
@ -3116,7 +3118,7 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
}
// Optimize control flow.
if (v8_flags.turbo_cf_optimization) {
if (v8_flags.turbo_cf_optimization && !v8_flags.turboshaft) {
Run<ControlFlowOptimizationPhase>();
RunPrintAndVerify(ControlFlowOptimizationPhase::phase_name(), true);
}

View File

@ -25,7 +25,7 @@ namespace v8::internal::compiler::turboshaft {
//
// OperationState reflects the liveness of operations. An operation is live if
//
// 1) The operation has the `is_required_when_unused` property
// 1) The operation has the `observable_when_unused` property
// 2) Any of its outputs is live (is used in a live operation).
//
// If the operation is not live, it is dead and can be eliminated.
@ -279,7 +279,7 @@ class DeadCodeAnalysis {
// state, so we skip them here.
liveness_[index] = OperationState::kLive;
continue;
} else if (op.Properties().is_required_when_unused) {
} else if (op.Properties().observable_when_unused) {
op_state = OperationState::kLive;
} else if (op.Is<PhiOp>()) {
has_live_phis = has_live_phis || (op_state == OperationState::kLive);

View File

@ -510,7 +510,7 @@ class Graph {
Op& op = Op::New(this, args...);
IncrementInputUses(op);
if (op.Properties().is_required_when_unused) {
if (op.Properties().observable_when_unused) {
// Once the graph is built, an operation with a `saturated_use_count` of 0
// is guaranteed to be unused and can be removed. Thus, to avoid removing
// operations that never have uses (such as Goto or Branch), we set the

View File

@ -152,7 +152,7 @@ struct OpProperties {
// guaranteed to be derived.
const bool is_pure_no_allocation = !(can_read || can_write || can_allocate ||
can_abort || is_block_terminator);
const bool is_required_when_unused =
const bool observable_when_unused =
can_write || can_abort || is_block_terminator;
// Operations that don't read, write, allocate and aren't block terminators
// can be eliminated via value numbering, which means that if there are two

View File

@ -47,8 +47,7 @@ struct AnalyzerBase {
void Run() {}
bool OpIsUsed(OpIndex i) const {
const Operation& op = graph.Get(i);
return op.saturated_use_count > 0 ||
op.Properties().is_required_when_unused;
return op.saturated_use_count > 0 || op.Properties().observable_when_unused;
}
explicit AnalyzerBase(const Graph& graph, Zone* phase_zone)
@ -59,7 +58,7 @@ struct AnalyzerBase {
// Analyzers modify the input graph in-place when they want to mark some
// Operations as removeable. In order to make that work for operations that have
// no uses such as Goto and Branch, all operations that have the property
// `is_required_when_unused` have a non-zero `saturated_use_count`.
// `observable_when_unused` have a non-zero `saturated_use_count`.
V8_INLINE bool ShouldSkipOperation(const Operation& op) {
return op.saturated_use_count == 0;
}
@ -245,6 +244,14 @@ class GraphVisitor {
}
}
// {InlineOp} introduces two limitations unlike {CloneAndInlineBlock}:
// 1. The input operation must not be emitted anymore as part of its
// regular input block;
// 2. {InlineOp} must not be used multiple times for the same input op.
bool InlineOp(OpIndex index, const Block* input_block) {
return VisitOp<false>(index, input_block);
}
template <bool can_be_invalid = false>
OpIndex MapToNewGraph(OpIndex old_index, int predecessor_index = -1) {
DCHECK(old_index.valid());
@ -760,6 +767,7 @@ class GraphVisitor {
assembler().Set(*var, new_index);
return;
}
DCHECK(!op_mapping_[old_index.id()].valid());
op_mapping_[old_index.id()] = new_index;
}

View File

@ -0,0 +1,233 @@
// Copyright 2022 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_
#define V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_
#include <cstdio>
#include "src/compiler/turboshaft/assembler.h"
#include "src/compiler/turboshaft/index.h"
#include "src/zone/zone.h"
// The StructuralOptimizationReducer reducer is suitable for changing the
// graph in a way that doesn't reduce individual operations, rather changes
// the structure of the graph.
//
// We currently support a reduction which transforms if-else cascades
// that check if a given value is equal to a 32-bit constant from a given set
// into a switch with cases corresponding to the constants in the set.
//
// So for example code like:
// [only pure ops 1]
// if (x == 3) {
// B1;
// } else {
// [only pure ops 2]
// if (x == 5) {
// B2;
// } else {
// B3;
// }
// }
//
// will be transformed to:
// [only pure ops 1]
// [only pure ops 2]
// switch (x) {
// case 3:
// B1;
// case 5:
// B2;
// default:
// B3;
// }
//
// Or represented graphically:
// [only pure ops 1]
// [only pure ops 1] [only pure ops 2]
// x == 3 Switch(x)
// Branch | | |
// | | ----- | ------
// ----- ------ case 3 | | | default
// | | | | |
// T | | F v | |
// v v B1 | v
// B1 [only pure ops 2] becomes | B3
// x == 5 ======> case 5 |
// Branch v
// | | B2
// ----- ------
// | |
// T | | F
// v v
// B2 B3
//
// TODO(mslekova): Introduce a flag and move to a common graph place.
// #define TRACE_REDUCTIONS
#ifdef TRACE_REDUCTIONS
#define TRACE(str, ...) \
{ PrintF(str, ##__VA_ARGS__); }
#else // TRACE_REDUCTIONS
#define TRACE(str, ...)
#endif // TRACE_REDUCTIONS
namespace v8::internal::compiler::turboshaft {
template <class Next>
class StructuralOptimizationReducer : public Next {
public:
using Next::Asm;
template <class... Args>
explicit StructuralOptimizationReducer(const std::tuple<Args...>& args)
: Next(args) {}
OpIndex ReduceInputGraphBranch(OpIndex input_index, const BranchOp& branch) {
LABEL_BLOCK(no_change) {
return Next::ReduceInputGraphBranch(input_index, branch);
}
TRACE("[structural] Calling ReduceInputGraphBranch for index: %u\n",
static_cast<unsigned int>(input_index.id()));
base::SmallVector<SwitchOp::Case, 16> cases;
base::SmallVector<const Block*, 16> false_blocks;
Block* current_if_false;
const BranchOp* current_branch = &branch;
BranchHint default_hint = BranchHint::kNone;
OpIndex switch_var = OpIndex::Invalid();
while (true) {
Block* current_if_true = current_branch->if_true;
current_if_false = current_branch->if_false;
DCHECK(current_if_true && current_if_false);
// If we encounter a condition that is not equality, we can't turn it
// into a switch case.
const EqualOp* equal = Asm()
.input_graph()
.Get(current_branch->condition())
.template TryCast<EqualOp>();
if (!equal || equal->rep != RegisterRepresentation::Word32()) {
TRACE(
"\t [bailout] Branch with different condition than Word32 "
"Equal.\n");
break;
}
// MachineOptimizationReducer should normalize equality to put constants
// right.
const Operation& right_op = Asm().input_graph().Get(equal->right());
if (!right_op.Is<ConstantOp>()) {
TRACE("\t [bailout] No constant on the right side of Equal.\n");
break;
}
// We can only turn Word32 constant equals to switch cases.
const ConstantOp& const_op = right_op.Cast<ConstantOp>();
if (const_op.kind != ConstantOp::Kind::kWord32) {
TRACE("\t [bailout] Constant is not of type Word32.\n");
break;
}
// If we encounter equal to a different value, we can't introduce
// a switch.
OpIndex current_var = equal->left();
if (!switch_var.valid()) {
switch_var = current_var;
} else if (switch_var != current_var) {
TRACE("\t [bailout] Not all branches compare the same variable.\n");
break;
}
// The current_if_true block becomes the corresponding switch case block.
uint32_t value = const_op.word32();
cases.emplace_back(value, Asm().MapToNewGraph(current_if_true->index()),
current_branch->hint);
// All pure ops from the if_false block should be executed before
// the switch, except the last Branch operation (which we drop).
false_blocks.push_back(current_if_false);
// If we encounter a if_false block that doesn't end with a Branch,
// this means we've reached the end of the cascade.
const Operation& maybe_branch =
current_if_false->LastOperation(Asm().input_graph());
if (!maybe_branch.Is<BranchOp>()) {
TRACE("\t [break] Reached end of the if-else cascade.\n");
break;
}
default_hint = current_branch->hint;
// Iterate to the next if_false block in the cascade.
current_branch = &maybe_branch.template Cast<BranchOp>();
// As long as the else blocks contain only pure ops, we can keep
// traversing the if-else cascade.
if (!ContainsOnlyPureOps(current_branch->if_false, Asm().input_graph())) {
TRACE("\t [break] End of only-pure-ops cascade reached.\n");
break;
}
}
// Probably better to keep short if-else cascades as they are.
if (cases.size() <= 2) {
TRACE("\t [bailout] Cascade with less than 2 levels of nesting.\n");
goto no_change;
}
CHECK_EQ(cases.size(), false_blocks.size());
// We're skipping the last false block, as it becomes the default block.
for (size_t i = 0; i < false_blocks.size() - 1; ++i) {
const Block* block = false_blocks[i];
InlineAllOperationsWithoutLast(block);
}
TRACE("[reduce] Successfully emit a Switch with %z cases.", cases.size());
// The last current_if_true block that ends the cascade becomes the default
// case.
Block* default_block = current_if_false;
Asm().Switch(
Asm().MapToNewGraph(switch_var),
Asm().output_graph().graph_zone()->CloneVector(base::VectorOf(cases)),
Asm().MapToNewGraph(default_block->index()), default_hint);
return OpIndex::Invalid();
}
private:
static bool ContainsOnlyPureOps(const Block* block, const Graph& graph) {
for (const auto& op : base::IterateWithoutLast(graph.operations(*block))) {
OpProperties props = op.Properties();
// It's fine to allow allocations and reads. Writes and
// aborting should be disallowed though.
if (props.observable_when_unused) {
return false;
}
}
return true;
}
// Visits and emits {input_block} right now (ie, in the current block)
// until the one before the last operation is reached.
void InlineAllOperationsWithoutLast(const Block* input_block) {
base::iterator_range<Graph::OpIndexIterator> all_ops =
Asm().input_graph().OperationIndices(*input_block);
for (OpIndex op : base::IterateWithoutLast(all_ops)) {
Asm().InlineOp(op, input_block);
}
}
};
} // namespace v8::internal::compiler::turboshaft
#undef TRACE
#endif // V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_