Revert "[turboshaft] Implement structural optimization reducer"
This reverts commit 596e56868d
.
Reason for revert: Breaks JetStream2 on perf bots.
Original change's description:
> [turboshaft] Implement structural optimization reducer
>
> This CL adds a new Turboshaft reducer that is suitable for changing the
> graph in a way that doesn't reduce individual operations, rather changes
> the structure of the graph. The first such reduction we support is
> transforming if-else cascades that check if a given value is equal to
> any constant from a given set into a switch with cases corresponding to
> the constants in the set.
>
> Bug: v8:12783
> Change-Id: Iee1e5581a334c3dc255d673d2178f76706e6dae2
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4106752
> Commit-Queue: Maya Lekova <mslekova@chromium.org>
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Clemens Backes <clemensb@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#85561}
Bug: v8:12783
Change-Id: Ic9ded7f4e18258346f547600cb541b2fa094fb8d
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4210088
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Auto-Submit: Maya Lekova <mslekova@chromium.org>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Reviewed-by: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#85566}
This commit is contained in:
parent
614f62d8a1
commit
8ce2c87338
@ -2936,7 +2936,6 @@ filegroup(
|
||||
"src/compiler/turboshaft/simplify-tf-loops.cc",
|
||||
"src/compiler/turboshaft/simplify-tf-loops.h",
|
||||
"src/compiler/turboshaft/snapshot-table.h",
|
||||
"src/compiler/turboshaft/structural-optimization-reducer.h",
|
||||
"src/compiler/turboshaft/type-inference-reducer.h",
|
||||
"src/compiler/turboshaft/type-parser.cc",
|
||||
"src/compiler/turboshaft/type-parser.h",
|
||||
|
1
BUILD.gn
1
BUILD.gn
@ -3013,7 +3013,6 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/compiler/turboshaft/sidetable.h",
|
||||
"src/compiler/turboshaft/simplify-tf-loops.h",
|
||||
"src/compiler/turboshaft/snapshot-table.h",
|
||||
"src/compiler/turboshaft/structural-optimization-reducer.h",
|
||||
"src/compiler/turboshaft/type-inference-reducer.h",
|
||||
"src/compiler/turboshaft/type-parser.h",
|
||||
"src/compiler/turboshaft/typed-optimizations-reducer.h",
|
||||
|
@ -93,7 +93,7 @@ struct DerefPtrIterator : base::iterator<std::bidirectional_iterator_tag, T> {
|
||||
// The signature avoids binding to temporaries (T&& / const T&) on purpose. The
|
||||
// lifetime of a temporary would not extend to a range-based for loop using it.
|
||||
template <typename T>
|
||||
auto Reversed(T& t) {
|
||||
auto Reversed(T& t) { // NOLINT(runtime/references): match {rbegin} and {rend}
|
||||
return make_iterator_range(std::rbegin(t), std::rend(t));
|
||||
}
|
||||
|
||||
@ -105,31 +105,6 @@ auto Reversed(const iterator_range<T>& t) {
|
||||
return make_iterator_range(std::rbegin(t), std::rend(t));
|
||||
}
|
||||
|
||||
// {IterateWithoutLast} returns a container adapter usable in a range-based
|
||||
// "for" statement for iterating all elements without the last in a forward
|
||||
// order. It performs a check whether the container is empty.
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// std::vector<int> v = ...;
|
||||
// for (int i : base::IterateWithoutLast(v)) {
|
||||
// // iterates through v front to --back
|
||||
// }
|
||||
//
|
||||
// The signature avoids binding to temporaries, see the remark in {Reversed}.
|
||||
template <typename T>
|
||||
auto IterateWithoutLast(T& t) {
|
||||
DCHECK_NE(std::begin(t), std::end(t));
|
||||
auto new_end = std::end(t);
|
||||
return make_iterator_range(std::begin(t), --new_end);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto IterateWithoutLast(const iterator_range<T>& t) {
|
||||
iterator_range<T> range_copy = {t.begin(), t.end()};
|
||||
return IterateWithoutLast(range_copy);
|
||||
}
|
||||
|
||||
} // namespace base
|
||||
} // namespace v8
|
||||
|
||||
|
@ -96,7 +96,6 @@
|
||||
#include "src/compiler/turboshaft/recreate-schedule.h"
|
||||
#include "src/compiler/turboshaft/select-lowering-reducer.h"
|
||||
#include "src/compiler/turboshaft/simplify-tf-loops.h"
|
||||
#include "src/compiler/turboshaft/structural-optimization-reducer.h"
|
||||
#include "src/compiler/turboshaft/type-inference-reducer.h"
|
||||
#include "src/compiler/turboshaft/typed-optimizations-reducer.h"
|
||||
#include "src/compiler/turboshaft/types.h"
|
||||
@ -2133,7 +2132,6 @@ struct OptimizeTurboshaftPhase {
|
||||
UnparkedScopeIfNeeded scope(data->broker(),
|
||||
v8_flags.turboshaft_trace_reduction);
|
||||
turboshaft::OptimizationPhase<
|
||||
turboshaft::StructuralOptimizationReducer,
|
||||
turboshaft::LateEscapeAnalysisReducer,
|
||||
turboshaft::MemoryOptimizationReducer, turboshaft::VariableReducer,
|
||||
turboshaft::MachineOptimizationReducerSignallingNanImpossible,
|
||||
@ -3118,7 +3116,7 @@ bool PipelineImpl::OptimizeGraph(Linkage* linkage) {
|
||||
}
|
||||
|
||||
// Optimize control flow.
|
||||
if (v8_flags.turbo_cf_optimization && !v8_flags.turboshaft) {
|
||||
if (v8_flags.turbo_cf_optimization) {
|
||||
Run<ControlFlowOptimizationPhase>();
|
||||
RunPrintAndVerify(ControlFlowOptimizationPhase::phase_name(), true);
|
||||
}
|
||||
|
@ -25,7 +25,7 @@ namespace v8::internal::compiler::turboshaft {
|
||||
//
|
||||
// OperationState reflects the liveness of operations. An operation is live if
|
||||
//
|
||||
// 1) The operation has the `observable_when_unused` property
|
||||
// 1) The operation has the `is_required_when_unused` property
|
||||
// 2) Any of its outputs is live (is used in a live operation).
|
||||
//
|
||||
// If the operation is not live, it is dead and can be eliminated.
|
||||
@ -279,7 +279,7 @@ class DeadCodeAnalysis {
|
||||
// state, so we skip them here.
|
||||
liveness_[index] = OperationState::kLive;
|
||||
continue;
|
||||
} else if (op.Properties().observable_when_unused) {
|
||||
} else if (op.Properties().is_required_when_unused) {
|
||||
op_state = OperationState::kLive;
|
||||
} else if (op.Is<PhiOp>()) {
|
||||
has_live_phis = has_live_phis || (op_state == OperationState::kLive);
|
||||
|
@ -510,7 +510,7 @@ class Graph {
|
||||
Op& op = Op::New(this, args...);
|
||||
IncrementInputUses(op);
|
||||
|
||||
if (op.Properties().observable_when_unused) {
|
||||
if (op.Properties().is_required_when_unused) {
|
||||
// Once the graph is built, an operation with a `saturated_use_count` of 0
|
||||
// is guaranteed to be unused and can be removed. Thus, to avoid removing
|
||||
// operations that never have uses (such as Goto or Branch), we set the
|
||||
|
@ -152,7 +152,7 @@ struct OpProperties {
|
||||
// guaranteed to be derived.
|
||||
const bool is_pure_no_allocation = !(can_read || can_write || can_allocate ||
|
||||
can_abort || is_block_terminator);
|
||||
const bool observable_when_unused =
|
||||
const bool is_required_when_unused =
|
||||
can_write || can_abort || is_block_terminator;
|
||||
// Operations that don't read, write, allocate and aren't block terminators
|
||||
// can be eliminated via value numbering, which means that if there are two
|
||||
|
@ -47,7 +47,8 @@ struct AnalyzerBase {
|
||||
void Run() {}
|
||||
bool OpIsUsed(OpIndex i) const {
|
||||
const Operation& op = graph.Get(i);
|
||||
return op.saturated_use_count > 0 || op.Properties().observable_when_unused;
|
||||
return op.saturated_use_count > 0 ||
|
||||
op.Properties().is_required_when_unused;
|
||||
}
|
||||
|
||||
explicit AnalyzerBase(const Graph& graph, Zone* phase_zone)
|
||||
@ -58,7 +59,7 @@ struct AnalyzerBase {
|
||||
// Analyzers modify the input graph in-place when they want to mark some
|
||||
// Operations as removeable. In order to make that work for operations that have
|
||||
// no uses such as Goto and Branch, all operations that have the property
|
||||
// `observable_when_unused` have a non-zero `saturated_use_count`.
|
||||
// `is_required_when_unused` have a non-zero `saturated_use_count`.
|
||||
V8_INLINE bool ShouldSkipOperation(const Operation& op) {
|
||||
return op.saturated_use_count == 0;
|
||||
}
|
||||
@ -244,14 +245,6 @@ class GraphVisitor {
|
||||
}
|
||||
}
|
||||
|
||||
// {InlineOp} introduces two limitations unlike {CloneAndInlineBlock}:
|
||||
// 1. The input operation must not be emitted anymore as part of its
|
||||
// regular input block;
|
||||
// 2. {InlineOp} must not be used multiple times for the same input op.
|
||||
bool InlineOp(OpIndex index, const Block* input_block) {
|
||||
return VisitOp<false>(index, input_block);
|
||||
}
|
||||
|
||||
template <bool can_be_invalid = false>
|
||||
OpIndex MapToNewGraph(OpIndex old_index, int predecessor_index = -1) {
|
||||
DCHECK(old_index.valid());
|
||||
@ -767,7 +760,6 @@ class GraphVisitor {
|
||||
assembler().Set(*var, new_index);
|
||||
return;
|
||||
}
|
||||
DCHECK(!op_mapping_[old_index.id()].valid());
|
||||
op_mapping_[old_index.id()] = new_index;
|
||||
}
|
||||
|
||||
|
@ -1,233 +0,0 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_
|
||||
#define V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/index.h"
|
||||
#include "src/zone/zone.h"
|
||||
|
||||
// The StructuralOptimizationReducer reducer is suitable for changing the
|
||||
// graph in a way that doesn't reduce individual operations, rather changes
|
||||
// the structure of the graph.
|
||||
//
|
||||
// We currently support a reduction which transforms if-else cascades
|
||||
// that check if a given value is equal to a 32-bit constant from a given set
|
||||
// into a switch with cases corresponding to the constants in the set.
|
||||
//
|
||||
// So for example code like:
|
||||
// [only pure ops 1]
|
||||
// if (x == 3) {
|
||||
// B1;
|
||||
// } else {
|
||||
// [only pure ops 2]
|
||||
// if (x == 5) {
|
||||
// B2;
|
||||
// } else {
|
||||
// B3;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// will be transformed to:
|
||||
// [only pure ops 1]
|
||||
// [only pure ops 2]
|
||||
// switch (x) {
|
||||
// case 3:
|
||||
// B1;
|
||||
// case 5:
|
||||
// B2;
|
||||
// default:
|
||||
// B3;
|
||||
// }
|
||||
//
|
||||
// Or represented graphically:
|
||||
// [only pure ops 1]
|
||||
// [only pure ops 1] [only pure ops 2]
|
||||
// x == 3 Switch(x)
|
||||
// Branch | | |
|
||||
// | | ----- | ------
|
||||
// ----- ------ case 3 | | | default
|
||||
// | | | | |
|
||||
// T | | F v | |
|
||||
// v v B1 | v
|
||||
// B1 [only pure ops 2] becomes | B3
|
||||
// x == 5 ======> case 5 |
|
||||
// Branch v
|
||||
// | | B2
|
||||
// ----- ------
|
||||
// | |
|
||||
// T | | F
|
||||
// v v
|
||||
// B2 B3
|
||||
//
|
||||
|
||||
// TODO(mslekova): Introduce a flag and move to a common graph place.
|
||||
// #define TRACE_REDUCTIONS
|
||||
#ifdef TRACE_REDUCTIONS
|
||||
#define TRACE(str, ...) \
|
||||
{ PrintF(str, ##__VA_ARGS__); }
|
||||
#else // TRACE_REDUCTIONS
|
||||
#define TRACE(str, ...)
|
||||
|
||||
#endif // TRACE_REDUCTIONS
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
template <class Next>
|
||||
class StructuralOptimizationReducer : public Next {
|
||||
public:
|
||||
using Next::Asm;
|
||||
template <class... Args>
|
||||
explicit StructuralOptimizationReducer(const std::tuple<Args...>& args)
|
||||
: Next(args) {}
|
||||
|
||||
OpIndex ReduceInputGraphBranch(OpIndex input_index, const BranchOp& branch) {
|
||||
LABEL_BLOCK(no_change) {
|
||||
return Next::ReduceInputGraphBranch(input_index, branch);
|
||||
}
|
||||
|
||||
TRACE("[structural] Calling ReduceInputGraphBranch for index: %u\n",
|
||||
static_cast<unsigned int>(input_index.id()));
|
||||
|
||||
base::SmallVector<SwitchOp::Case, 16> cases;
|
||||
base::SmallVector<const Block*, 16> false_blocks;
|
||||
|
||||
Block* current_if_false;
|
||||
const BranchOp* current_branch = &branch;
|
||||
BranchHint default_hint = BranchHint::kNone;
|
||||
|
||||
OpIndex switch_var = OpIndex::Invalid();
|
||||
while (true) {
|
||||
Block* current_if_true = current_branch->if_true;
|
||||
current_if_false = current_branch->if_false;
|
||||
|
||||
DCHECK(current_if_true && current_if_false);
|
||||
|
||||
// If we encounter a condition that is not equality, we can't turn it
|
||||
// into a switch case.
|
||||
const EqualOp* equal = Asm()
|
||||
.input_graph()
|
||||
.Get(current_branch->condition())
|
||||
.template TryCast<EqualOp>();
|
||||
if (!equal || equal->rep != RegisterRepresentation::Word32()) {
|
||||
TRACE(
|
||||
"\t [bailout] Branch with different condition than Word32 "
|
||||
"Equal.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// MachineOptimizationReducer should normalize equality to put constants
|
||||
// right.
|
||||
const Operation& right_op = Asm().input_graph().Get(equal->right());
|
||||
if (!right_op.Is<ConstantOp>()) {
|
||||
TRACE("\t [bailout] No constant on the right side of Equal.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// We can only turn Word32 constant equals to switch cases.
|
||||
const ConstantOp& const_op = right_op.Cast<ConstantOp>();
|
||||
if (const_op.kind != ConstantOp::Kind::kWord32) {
|
||||
TRACE("\t [bailout] Constant is not of type Word32.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// If we encounter equal to a different value, we can't introduce
|
||||
// a switch.
|
||||
OpIndex current_var = equal->left();
|
||||
if (!switch_var.valid()) {
|
||||
switch_var = current_var;
|
||||
} else if (switch_var != current_var) {
|
||||
TRACE("\t [bailout] Not all branches compare the same variable.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
// The current_if_true block becomes the corresponding switch case block.
|
||||
uint32_t value = const_op.word32();
|
||||
cases.emplace_back(value, Asm().MapToNewGraph(current_if_true->index()),
|
||||
current_branch->hint);
|
||||
|
||||
// All pure ops from the if_false block should be executed before
|
||||
// the switch, except the last Branch operation (which we drop).
|
||||
false_blocks.push_back(current_if_false);
|
||||
|
||||
// If we encounter a if_false block that doesn't end with a Branch,
|
||||
// this means we've reached the end of the cascade.
|
||||
const Operation& maybe_branch =
|
||||
current_if_false->LastOperation(Asm().input_graph());
|
||||
if (!maybe_branch.Is<BranchOp>()) {
|
||||
TRACE("\t [break] Reached end of the if-else cascade.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
default_hint = current_branch->hint;
|
||||
|
||||
// Iterate to the next if_false block in the cascade.
|
||||
current_branch = &maybe_branch.template Cast<BranchOp>();
|
||||
|
||||
// As long as the else blocks contain only pure ops, we can keep
|
||||
// traversing the if-else cascade.
|
||||
if (!ContainsOnlyPureOps(current_branch->if_false, Asm().input_graph())) {
|
||||
TRACE("\t [break] End of only-pure-ops cascade reached.\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Probably better to keep short if-else cascades as they are.
|
||||
if (cases.size() <= 2) {
|
||||
TRACE("\t [bailout] Cascade with less than 2 levels of nesting.\n");
|
||||
goto no_change;
|
||||
}
|
||||
CHECK_EQ(cases.size(), false_blocks.size());
|
||||
|
||||
// We're skipping the last false block, as it becomes the default block.
|
||||
for (size_t i = 0; i < false_blocks.size() - 1; ++i) {
|
||||
const Block* block = false_blocks[i];
|
||||
InlineAllOperationsWithoutLast(block);
|
||||
}
|
||||
|
||||
TRACE("[reduce] Successfully emit a Switch with %z cases.", cases.size());
|
||||
|
||||
// The last current_if_true block that ends the cascade becomes the default
|
||||
// case.
|
||||
Block* default_block = current_if_false;
|
||||
Asm().Switch(
|
||||
Asm().MapToNewGraph(switch_var),
|
||||
Asm().output_graph().graph_zone()->CloneVector(base::VectorOf(cases)),
|
||||
Asm().MapToNewGraph(default_block->index()), default_hint);
|
||||
return OpIndex::Invalid();
|
||||
}
|
||||
|
||||
private:
|
||||
static bool ContainsOnlyPureOps(const Block* block, const Graph& graph) {
|
||||
for (const auto& op : base::IterateWithoutLast(graph.operations(*block))) {
|
||||
OpProperties props = op.Properties();
|
||||
// It's fine to allow allocations and reads. Writes and
|
||||
// aborting should be disallowed though.
|
||||
if (props.observable_when_unused) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Visits and emits {input_block} right now (ie, in the current block)
|
||||
// until the one before the last operation is reached.
|
||||
void InlineAllOperationsWithoutLast(const Block* input_block) {
|
||||
base::iterator_range<Graph::OpIndexIterator> all_ops =
|
||||
Asm().input_graph().OperationIndices(*input_block);
|
||||
|
||||
for (OpIndex op : base::IterateWithoutLast(all_ops)) {
|
||||
Asm().InlineOp(op, input_block);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#undef TRACE
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_STRUCTURAL_OPTIMIZATION_REDUCER_H_
|
Loading…
Reference in New Issue
Block a user