Reland "[turboshaft] Port LateEscapeAnalysis"
This reverts commit 0bd121f8e6
.
MemoryAnalyzer wasn't calling LateEscapeAnalysisReducer's
ShouldSkipOperation method, but instead was calling the BaseReducer's
method (because it was using a generic Operation) for the call, which
resulted in some memory corruptions, because MemoryAnalyzer was
planning some folding which was never actually happening.
Original change's description:
> [turboshaft] Port LateEscapeAnalysis
>
> Bug: v8:12783
> Change-Id: Id5fa026d103dc67e05322b725f34186124bc5936
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4054621
> Commit-Queue: Darius Mercadier <dmercadier@chromium.org>
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#84603}
Bug: v8:12783
Change-Id: I103eb2f518943c0c57bc3e10471d1c47f5262599
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4075724
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Commit-Queue: Darius Mercadier <dmercadier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#84660}
This commit is contained in:
parent
8d00d52680
commit
7795179da6
@ -2898,6 +2898,8 @@ filegroup(
|
||||
"src/compiler/turboshaft/index.h",
|
||||
"src/compiler/turboshaft/graph-visualizer.cc",
|
||||
"src/compiler/turboshaft/graph-visualizer.h",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.h",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.cc",
|
||||
"src/compiler/turboshaft/layered-hash-map.h",
|
||||
"src/compiler/turboshaft/machine-optimization-reducer.h",
|
||||
"src/compiler/turboshaft/memory-optimization.cc",
|
||||
|
2
BUILD.gn
2
BUILD.gn
@ -2958,6 +2958,7 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/compiler/turboshaft/graph-visualizer.h",
|
||||
"src/compiler/turboshaft/graph.h",
|
||||
"src/compiler/turboshaft/index.h",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.h",
|
||||
"src/compiler/turboshaft/layered-hash-map.h",
|
||||
"src/compiler/turboshaft/machine-optimization-reducer.h",
|
||||
"src/compiler/turboshaft/memory-optimization.h",
|
||||
@ -4293,6 +4294,7 @@ v8_source_set("v8_turboshaft") {
|
||||
"src/compiler/turboshaft/graph-builder.cc",
|
||||
"src/compiler/turboshaft/graph-visualizer.cc",
|
||||
"src/compiler/turboshaft/graph.cc",
|
||||
"src/compiler/turboshaft/late-escape-analysis-reducer.cc",
|
||||
"src/compiler/turboshaft/memory-optimization.cc",
|
||||
"src/compiler/turboshaft/operations.cc",
|
||||
"src/compiler/turboshaft/optimization-phase.cc",
|
||||
|
@ -85,6 +85,7 @@
|
||||
#include "src/compiler/turboshaft/graph-builder.h"
|
||||
#include "src/compiler/turboshaft/graph-visualizer.h"
|
||||
#include "src/compiler/turboshaft/graph.h"
|
||||
#include "src/compiler/turboshaft/late-escape-analysis-reducer.h"
|
||||
#include "src/compiler/turboshaft/machine-optimization-reducer.h"
|
||||
#include "src/compiler/turboshaft/memory-optimization.h"
|
||||
#include "src/compiler/turboshaft/optimization-phase.h"
|
||||
@ -1964,8 +1965,7 @@ struct LateOptimizationPhase {
|
||||
|
||||
void Run(PipelineData* data, Zone* temp_zone) {
|
||||
if (data->HasTurboshaftGraph()) {
|
||||
// TODO(dmercadier,tebbi): port missing reducers (LateEscapeAnalysis and
|
||||
// CommonOperatorReducer) to turboshaft.
|
||||
// TODO(dmercadier,tebbi): add missing CommonOperatorReducer.
|
||||
turboshaft::OptimizationPhase<
|
||||
turboshaft::VariableReducer, turboshaft::BranchEliminationReducer,
|
||||
turboshaft::SelectLoweringReducer,
|
||||
@ -1994,8 +1994,8 @@ struct LateOptimizationPhase {
|
||||
JSGraphAssembler graph_assembler(data->jsgraph(), temp_zone,
|
||||
BranchSemantics::kMachine);
|
||||
SelectLowering select_lowering(&graph_assembler, data->graph());
|
||||
AddReducer(data, &graph_reducer, &escape_analysis);
|
||||
if (!v8_flags.turboshaft) {
|
||||
AddReducer(data, &graph_reducer, &escape_analysis);
|
||||
AddReducer(data, &graph_reducer, &branch_condition_elimination);
|
||||
}
|
||||
AddReducer(data, &graph_reducer, &dead_code_elimination);
|
||||
@ -2094,6 +2094,7 @@ struct OptimizeTurboshaftPhase {
|
||||
UnparkedScopeIfNeeded scope(data->broker(),
|
||||
v8_flags.turboshaft_trace_reduction);
|
||||
turboshaft::OptimizationPhase<
|
||||
turboshaft::LateEscapeAnalysisReducer,
|
||||
turboshaft::MemoryOptimizationReducer, turboshaft::VariableReducer,
|
||||
turboshaft::MachineOptimizationReducerSignallingNanImpossible,
|
||||
turboshaft::ValueNumberingReducer>::
|
||||
|
@ -53,6 +53,7 @@ class ReducerStack<Assembler, FirstReducer, Reducers...>
|
||||
template <class Assembler>
|
||||
class ReducerStack<Assembler> {
|
||||
public:
|
||||
using AssemblerType = Assembler;
|
||||
Assembler& Asm() { return *static_cast<Assembler*>(this); }
|
||||
};
|
||||
|
||||
|
@ -454,6 +454,8 @@ class Graph {
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
void MarkAsUnused(OpIndex i) { Get(i).saturated_use_count = 0; }
|
||||
|
||||
const Block& StartBlock() const { return Get(BlockIndex(0)); }
|
||||
|
||||
Block& Get(BlockIndex i) {
|
||||
@ -488,6 +490,16 @@ class Graph {
|
||||
#endif // DEBUG
|
||||
Op& op = Op::New(this, args...);
|
||||
IncrementInputUses(op);
|
||||
|
||||
if (op.Properties().is_required_when_unused) {
|
||||
// Once the graph is built, an operation with a `saturated_use_count` of 0
|
||||
// is guaranteed to be unused and can be removed. Thus, to avoid removing
|
||||
// operations that never have uses (such as Goto or Branch), we set the
|
||||
// `saturated_use_count` of Operations that are `required_when_unused`
|
||||
// to 1.
|
||||
op.saturated_use_count = 1;
|
||||
}
|
||||
|
||||
DCHECK_EQ(result, Index(op));
|
||||
#ifdef DEBUG
|
||||
for (OpIndex input : op.inputs()) {
|
||||
|
101
src/compiler/turboshaft/late-escape-analysis-reducer.cc
Normal file
101
src/compiler/turboshaft/late-escape-analysis-reducer.cc
Normal file
@ -0,0 +1,101 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/compiler/turboshaft/late-escape-analysis-reducer.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::Run() {
|
||||
CollectUsesAndAllocations();
|
||||
FindRemovableAllocations();
|
||||
}
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::RecordAllocateUse(OpIndex alloc, OpIndex use) {
|
||||
auto [it, new_entry] = alloc_uses_.try_emplace(alloc, phase_zone_);
|
||||
auto& uses = it->second;
|
||||
if (new_entry) {
|
||||
uses.reserve(graph_.Get(alloc).saturated_use_count);
|
||||
}
|
||||
uses.push_back(use);
|
||||
}
|
||||
|
||||
// Collects the Allocate Operations and their uses.
|
||||
void LateEscapeAnalysisAnalyzer::CollectUsesAndAllocations() {
|
||||
for (auto& op : graph_.AllOperations()) {
|
||||
if (ShouldSkipOperation(op)) continue;
|
||||
OpIndex op_index = graph_.Index(op);
|
||||
for (OpIndex input : op.inputs()) {
|
||||
if (graph_.Get(input).Is<AllocateOp>()) {
|
||||
RecordAllocateUse(input, op_index);
|
||||
}
|
||||
}
|
||||
if (op.Is<AllocateOp>()) {
|
||||
allocs_.push_back(op_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::FindRemovableAllocations() {
|
||||
while (!allocs_.empty()) {
|
||||
OpIndex current_alloc = allocs_.back();
|
||||
allocs_.pop_back();
|
||||
|
||||
if (ShouldSkipOperation(graph_.Get(current_alloc))) {
|
||||
// We are re-visiting an allocation that we've actually already removed.
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!AllocationIsEscaping(current_alloc)) {
|
||||
MarkToRemove(current_alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool LateEscapeAnalysisAnalyzer::AllocationIsEscaping(OpIndex alloc) {
|
||||
if (alloc_uses_.find(alloc) == alloc_uses_.end()) return false;
|
||||
for (OpIndex use : alloc_uses_.at(alloc)) {
|
||||
if (EscapesThroughUse(alloc, use)) return true;
|
||||
}
|
||||
// We haven't found any non-store use
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns true if {using_op_idx} is an operation that forces {alloc} to be
|
||||
// emitted.
|
||||
bool LateEscapeAnalysisAnalyzer::EscapesThroughUse(OpIndex alloc,
|
||||
OpIndex using_op_idx) {
|
||||
if (ShouldSkipOperation(graph_.Get(alloc))) {
|
||||
// {using_op_idx} is an Allocate itself, which has been removed.
|
||||
return false;
|
||||
}
|
||||
const Operation& op = graph_.Get(using_op_idx);
|
||||
if (const StoreOp* store_op = op.TryCast<StoreOp>()) {
|
||||
// A StoreOp only makes {alloc} escape if it uses {alloc} as the {value} or
|
||||
// the {index}. Put otherwise, StoreOp makes {alloc} escape if it writes
|
||||
// {alloc}, but not if it writes **to** {alloc}.
|
||||
return store_op->value() == alloc;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void LateEscapeAnalysisAnalyzer::MarkToRemove(OpIndex alloc) {
|
||||
graph_.MarkAsUnused(alloc);
|
||||
if (alloc_uses_.find(alloc) == alloc_uses_.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// The uses of {alloc} should also be skipped.
|
||||
for (OpIndex use : alloc_uses_.at(alloc)) {
|
||||
graph_.MarkAsUnused(use);
|
||||
const StoreOp& store = graph_.Get(use).Cast<StoreOp>();
|
||||
if (graph_.Get(store.value()).Is<AllocateOp>()) {
|
||||
// This store was storing the result of an allocation. Because we now
|
||||
// removed this store, we might be able to remove the other allocation
|
||||
// as well.
|
||||
allocs_.push_back(store.value());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
67
src/compiler/turboshaft/late-escape-analysis-reducer.h
Normal file
67
src/compiler/turboshaft/late-escape-analysis-reducer.h
Normal file
@ -0,0 +1,67 @@
|
||||
// Copyright 2022 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_COMPILER_TURBOSHAFT_LATE_ESCAPE_ANALYSIS_REDUCER_H_
|
||||
#define V8_COMPILER_TURBOSHAFT_LATE_ESCAPE_ANALYSIS_REDUCER_H_
|
||||
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/graph.h"
|
||||
#include "src/compiler/turboshaft/utils.h"
|
||||
#include "src/zone/zone-containers.h"
|
||||
#include "src/zone/zone.h"
|
||||
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
// LateEscapeAnalysis removes allocation that have no uses besides the stores
|
||||
// initializing the object.
|
||||
|
||||
class LateEscapeAnalysisAnalyzer {
|
||||
public:
|
||||
LateEscapeAnalysisAnalyzer(Graph& graph, Zone* zone)
|
||||
: graph_(graph), phase_zone_(zone), alloc_uses_(zone), allocs_(zone) {}
|
||||
|
||||
void Run();
|
||||
|
||||
private:
|
||||
void RecordAllocateUse(OpIndex alloc, OpIndex use);
|
||||
|
||||
void CollectUsesAndAllocations();
|
||||
void FindRemovableAllocations();
|
||||
bool AllocationIsEscaping(OpIndex alloc);
|
||||
bool EscapesThroughUse(OpIndex alloc, OpIndex using_op_idx);
|
||||
void MarkToRemove(OpIndex alloc);
|
||||
|
||||
Graph& graph_;
|
||||
Zone* phase_zone_;
|
||||
|
||||
// {alloc_uses_} records all the uses of each AllocateOp.
|
||||
ZoneUnorderedMap<OpIndex, ZoneVector<OpIndex>> alloc_uses_;
|
||||
// {allocs_} is filled with all of the AllocateOp of the graph, and then
|
||||
// iterated upon to determine which allocations can be removed and which
|
||||
// cannot.
|
||||
ZoneVector<OpIndex> allocs_;
|
||||
};
|
||||
|
||||
template <class Next>
|
||||
class LateEscapeAnalysisReducer : public Next {
|
||||
public:
|
||||
using Next::Asm;
|
||||
|
||||
template <class... Args>
|
||||
explicit LateEscapeAnalysisReducer(const std::tuple<Args...>& args)
|
||||
: Next(args),
|
||||
analyzer_(Asm().modifiable_input_graph(), Asm().phase_zone()) {}
|
||||
|
||||
void Analyze() {
|
||||
analyzer_.Run();
|
||||
Next::Analyze();
|
||||
}
|
||||
|
||||
private:
|
||||
LateEscapeAnalysisAnalyzer analyzer_;
|
||||
};
|
||||
|
||||
} // namespace v8::internal::compiler::turboshaft
|
||||
|
||||
#endif // V8_COMPILER_TURBOSHAFT_LATE_ESCAPE_ANALYSIS_REDUCER_H_
|
@ -7,7 +7,7 @@
|
||||
#include "src/codegen/interface-descriptors-inl.h"
|
||||
#include "src/compiler/linkage.h"
|
||||
|
||||
namespace v8 ::internal::compiler::turboshaft {
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
const TSCallDescriptor* CreateAllocateBuiltinDescriptor(Zone* zone) {
|
||||
return TSCallDescriptor::Create(
|
||||
@ -36,6 +36,10 @@ void MemoryAnalyzer::Run() {
|
||||
}
|
||||
|
||||
void MemoryAnalyzer::Process(const Operation& op) {
|
||||
if (ShouldSkipOperation(op)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (auto* alloc = op.TryCast<AllocateOp>()) {
|
||||
ProcessAllocation(*alloc);
|
||||
return;
|
||||
@ -78,7 +82,7 @@ void MemoryAnalyzer::ProcessBlockTerminator(const Operation& op) {
|
||||
// speculation resulting in processing the loop twice.
|
||||
for (const Operation& op :
|
||||
input_graph.operations(*goto_op->destination)) {
|
||||
if (op.Properties().can_allocate) {
|
||||
if (op.Properties().can_allocate && !ShouldSkipOperation(op)) {
|
||||
state = BlockState();
|
||||
break;
|
||||
}
|
||||
@ -96,7 +100,8 @@ void MemoryAnalyzer::ProcessBlockTerminator(const Operation& op) {
|
||||
void MemoryAnalyzer::ProcessAllocation(const AllocateOp& alloc) {
|
||||
if (ShouldSkipOptimizationStep()) return;
|
||||
base::Optional<uint64_t> new_size;
|
||||
if (auto* size = input_graph.Get(alloc.size()).TryCast<ConstantOp>()) {
|
||||
if (auto* size =
|
||||
input_graph.Get(alloc.size()).template TryCast<ConstantOp>()) {
|
||||
new_size = size->integral();
|
||||
}
|
||||
// If the new allocation has a static size and is of the same type, then we
|
||||
@ -141,10 +146,7 @@ void MemoryAnalyzer::MergeCurrentStateIntoSuccessor(const Block* successor) {
|
||||
return;
|
||||
}
|
||||
// All predecessors need to have the same last allocation for us to continue
|
||||
// folding into it. This is only true when all the predecessors don't do any
|
||||
// allocations and have the same ancestor that does an allocation (and there
|
||||
// is no allocation on the path from the predecessors to their allocating
|
||||
// common ancestor).
|
||||
// folding into it.
|
||||
if (target_state->last_allocation != state.last_allocation) {
|
||||
target_state = BlockState();
|
||||
return;
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include "src/compiler/turboshaft/assembler.h"
|
||||
#include "src/compiler/turboshaft/utils.h"
|
||||
|
||||
namespace v8 ::internal::compiler::turboshaft {
|
||||
namespace v8::internal::compiler::turboshaft {
|
||||
|
||||
const TSCallDescriptor* CreateAllocateBuiltinDescriptor(Zone* zone);
|
||||
|
||||
@ -66,12 +66,13 @@ struct MemoryAnalyzer {
|
||||
}
|
||||
|
||||
bool IsFoldedAllocation(OpIndex op) {
|
||||
return folded_into.count(input_graph.Get(op).TryCast<AllocateOp>());
|
||||
return folded_into.count(
|
||||
input_graph.Get(op).template TryCast<AllocateOp>());
|
||||
}
|
||||
|
||||
base::Optional<uint32_t> ReservedSize(OpIndex alloc) {
|
||||
if (auto it =
|
||||
reserved_size.find(input_graph.Get(alloc).TryCast<AllocateOp>());
|
||||
if (auto it = reserved_size.find(
|
||||
input_graph.Get(alloc).template TryCast<AllocateOp>());
|
||||
it != reserved_size.end()) {
|
||||
return it->second;
|
||||
}
|
||||
@ -79,6 +80,7 @@ struct MemoryAnalyzer {
|
||||
}
|
||||
|
||||
void Run();
|
||||
|
||||
void Process(const Operation& op);
|
||||
void ProcessBlockTerminator(const Operation& op);
|
||||
void ProcessAllocation(const AllocateOp& alloc);
|
||||
|
@ -47,6 +47,15 @@ struct AnalyzerBase {
|
||||
: phase_zone(phase_zone), graph(graph) {}
|
||||
};
|
||||
|
||||
// All operations whose `saturated_use_count` are unused and can be skipped.
|
||||
// Analyzers modify the input graph in-place when they want to mark some
|
||||
// Operations as removeable. In order to make that work for operations that have
|
||||
// no uses such as Goto and Branch, all operations that have the property
|
||||
// `is_required_when_unused` have a non-zero `saturated_use_count`.
|
||||
V8_INLINE bool ShouldSkipOperation(const Operation& op) {
|
||||
return op.saturated_use_count == 0;
|
||||
}
|
||||
|
||||
// TODO(dmercadier, tebbi): transform this analyzer into a reducer, and plug in
|
||||
// into some reducer stacks.
|
||||
struct LivenessAnalyzer : AnalyzerBase {
|
||||
@ -194,6 +203,10 @@ class GraphVisitor {
|
||||
Zone* phase_zone() { return phase_zone_; }
|
||||
const Block* current_input_block() { return current_input_block_; }
|
||||
|
||||
// Analyzers set Operations' saturated_use_count to zero when they are unused,
|
||||
// and thus need to have a non-const input graph.
|
||||
Graph& modifiable_input_graph() const { return input_graph_; }
|
||||
|
||||
// Visits and emits {input_block} right now (ie, in the current block).
|
||||
void CloneAndInlineBlock(const Block* input_block) {
|
||||
// Computing which input of Phi operations to use when visiting
|
||||
@ -312,12 +325,11 @@ class GraphVisitor {
|
||||
assembler().output_graph().next_operation_index();
|
||||
USE(first_output_index);
|
||||
const Operation& op = input_graph().Get(index);
|
||||
if (op.saturated_use_count == 0 &&
|
||||
!op.Properties().is_required_when_unused) {
|
||||
if constexpr (trace_reduction) TraceOperationUnused();
|
||||
if constexpr (trace_reduction) TraceReductionStart(index);
|
||||
if (ShouldSkipOperation(op)) {
|
||||
if constexpr (trace_reduction) TraceOperationSkipped();
|
||||
return true;
|
||||
}
|
||||
if constexpr (trace_reduction) TraceReductionStart(index);
|
||||
OpIndex new_index;
|
||||
if (input_block->IsLoop() && op.Is<PhiOp>()) {
|
||||
const PhiOp& phi = op.Cast<PhiOp>();
|
||||
@ -351,7 +363,7 @@ class GraphVisitor {
|
||||
<< PaddingSpace{5 - CountDecimalDigits(index.id())}
|
||||
<< OperationPrintStyle{input_graph().Get(index), "#o"} << "\n";
|
||||
}
|
||||
void TraceOperationUnused() { std::cout << "╰─> unused\n\n"; }
|
||||
void TraceOperationSkipped() { std::cout << "╰─> skipped\n\n"; }
|
||||
void TraceBlockUnreachable() { std::cout << "╰─> unreachable\n\n"; }
|
||||
void TraceReductionResult(Block* current_block, OpIndex first_output_index,
|
||||
OpIndex new_index) {
|
||||
|
Loading…
Reference in New Issue
Block a user